数据库里面的博主粉丝数是静态的,一定时间就要进行更新,所以需要获取tiktok和youtube上博主的粉丝数,数据库中已有博主主页链接,根据主页链接爬取粉丝进行修改,没啥技术含量,主要是正则要写的好。
TikTok:
import requests
import re
import mysql.connector
from mysql.connector import Error
import time
# 数据库配置
DB_CONFIG = {
"host": "xxxx", # 数据库地址
"database": "xxxx", # 数据库名称
"user": "xxxx", # 用户名
"password": "xxxx", # 密码
"port": xxxx # 端口
}
def get_tiktok_links(connection):
"""
从数据库中获取所有 TikTok 平台的博主主页链接
:param connection: 数据库连接
:return: TikTok 链接列表
"""
try:
cursor = connection.cursor()
sql_query = "SELECT profile_url FROM kol_base_info WHERE profile_url LIKE '%tiktok.com%'"
cursor.execute(sql_query)
rows = cursor.fetchall()
return [row[0] for row in rows]
except Error as e:
print(f"查询失败: {e}")
return []
finally:
cursor.close()
def get_tiktok_followers(url):
"""
通过 TikTok 博主主页链接爬取粉丝数
:param url: TikTok 博主主页链接
:return: 粉丝数(整数)或 None
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://www.google.com/"
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
html_content = response.text
# 使用正则表达式匹配粉丝数信息
pattern = re.compile(r'"followerCount":(\d+)')
match = pattern.search(html_content)
if match:
follower_text = match.group(1)
return parse_follower_count(follower_text)
else:
print(f"URL: {url} - 未找到粉丝数信息")
return None
except requests.RequestException as e:
print(f"请求失败: {e}")
return None
def parse_follower_count(follower_text):
"""
将粉丝数的文本格式(如 400.2K 或 1.5M)转换为整数
:param follower_text: 字符串,形如 '400.2K' 或 '1.5M'
:return: 整数形式的粉丝数
"""
try:
num = float(re.search(r'[\d\.]+', follower_text).group())
if "k" in follower_text:
num *= 1000
elif "m" in follower_text:
num *= 1000000
return int(num)
except Exception as e:
print(f"粉丝数转换失败: {follower_text} - 错误: {e}")
return None
def update_fans_num(connection, profile_url, fans_num):
"""
更新数据库中对应博主的粉丝数
:param connection: 数据库连接
:param profile_url: 博主主页链接
:param fans_num: 粉丝数(整数)
"""
try:
cursor = connection.cursor()
sql_update = """
UPDATE kol_base_info
SET fans_num = %s
WHERE profile_url = %s
"""
cursor.execute(sql_update, (fans_num, profile_url))
connection.commit()
print(f"成功更新: {profile_url} - 粉丝数: {fans_num}")
except Error as e:
print(f"数据库更新失败: {e}")
finally:
cursor.close()
def main():
try:
# 连接数据库
connection = mysql.connector.connect(**DB_CONFIG)
if connection.is_connected():
print("成功连接到数据库")
# 从数据库获取 TikTok 链接
tiktok_links = get_tiktok_links(connection)
print(f"找到 {len(tiktok_links)} 个 TikTok 链接")
# 遍历链接,爬取粉丝数并更新数据库
for url in tiktok_links:
fans_num = get_tiktok_followers(url)
if fans_num is not None:
update_fans_num(connection, url, fans_num)
else:
print(f"未能获取粉丝数: {url}")
# 防止请求过于频繁,添加延迟
time.sleep(2)
except Error as e:
print(f"数据库连接失败: {e}")
finally:
if 'connection' in locals() and connection.is_connected():
connection.close()
print("数据库连接已关闭")
if __name__ == "__main__":
main()
Youtube:
import requests
import re
import mysql.connector
from mysql.connector import Error
# 数据库配置
DB_CONFIG = {
"host": "xxxx", # 数据库地址
"database": "xxxx", # 数据库名称
"user": "xxxx", # 用户名
"password": "xxxxx", # 密码
"port": xxxx # 端口
}
def get_youtube_links(connection):
"""
从数据库中获取所有 YouTube 平台的博主主页链接
:param connection: 数据库连接
:return: YouTube 链接列表
"""
try:
cursor = connection.cursor()
sql_query = "SELECT profile_url FROM kol_base_info WHERE profile_url like '%youtube%'"
cursor.execute(sql_query)
rows = cursor.fetchall()
return [row[0] for row in rows]
except Error as e:
print(f"查询失败: {e}")
return []
finally:
cursor.close()
def get_youtube_subscribers(url):
"""
通过 YouTube 博主主页链接爬取粉丝数
:param url: YouTube 博主主页链接
:return: 粉丝数(整数)或 None
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/122.0.0.0",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://www.google.com/"
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
html_content = response.text
# 使用正则表达式匹配粉丝数信息
pattern = re.compile(r'{"text":{"content":"(\d{1,3}(?:\.\d{1,2})?[KM] subscribers)"}')
match = pattern.search(html_content)
if match:
subscriber_text = match.group(1)
return parse_subscriber_count(subscriber_text)
else:
print(f"URL: {url} - 未找到粉丝数信息")
return None
except requests.RequestException as e:
print(f"请求失败: {e}")
return None
def parse_subscriber_count(subscriber_text):
"""
将粉丝数的文本格式(如 32.9K subscribers)转换为整数
:param subscriber_text: 字符串,形如 '32.9K subscribers'
:return: 整数形式的粉丝数
"""
try:
num = float(re.search(r'\d+(?:\.\d+)?', subscriber_text).group())
if "K" in subscriber_text:
num *= 1000
elif "M" in subscriber_text:
num *= 1000000
return int(num)
except Exception as e:
print(f"粉丝数转换失败: {subscriber_text} - 错误: {e}")
return None
def update_fans_num(connection, profile_url, fans_num):
"""
更新数据库中对应博主的粉丝数
:param connection: 数据库连接
:param profile_url: 博主主页链接
:param fans_num: 粉丝数(整数)
"""
try:
cursor = connection.cursor()
sql_update = """
UPDATE kol_base_info
SET fans_num = %s
WHERE profile_url = %s
"""
cursor.execute(sql_update, (fans_num, profile_url))
connection.commit()
print(f"成功更新: {profile_url} - 粉丝数: {fans_num}")
except Error as e:
print(f"数据库更新失败: {e}")
finally:
cursor.close()
def main():
try:
# 连接数据库
connection = mysql.connector.connect(**DB_CONFIG)
if connection.is_connected():
print("成功连接到数据库")
# 从数据库获取 YouTube 链接
youtube_links = get_youtube_links(connection)
print(f"找到 {len(youtube_links)} 个 YouTube 链接")
# 遍历链接,爬取粉丝数并更新数据库
for url in youtube_links:
fans_num = get_youtube_subscribers(url)
if fans_num is not None:
update_fans_num(connection, url, fans_num)
else:
print(f"未能获取粉丝数: {url}")
except Error as e:
print(f"数据库连接失败: {e}")
finally:
if 'connection' in locals() and connection.is_connected():
connection.close()
print("数据库连接已关闭")
if __name__ == "__main__":
main()