import logging
from time import sleep
import requests
import pymysql
from typing import List, Tuple, Optional
from concurrent.futures import ThreadPoolExecutor
import time
from dbutils.pooled_db import PooledDB
# 配置常量
MAX_WORKERS = 10 # 最大线程数
headers = {
"x-rapidapi-key": "40eb93fa97mshba6ed80cd703eadp105b67jsn0fb3c77e09ea",
"x-rapidapi-host": "imdb236.p.rapidapi.com"
}
def query_movie(imdb_id:str, vod_id:int):
url = f"https://imdb236.p.rapidapi.com/api/imdb/{imdb_id}"
print(url)
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
data = response.json()
print(data)
if not data:
logging.warning(f"未找到IMDB编号: {imdb_id}")
return False
primaryTitle = data.get("primaryTitle", "")
releaseDate = data.get("releaseDate", "")
averageRating = data.get("averageRating", "")
description = data.get("description", "")
if not any([primaryTitle, releaseDate, description]):
logging.warning(f"关键字段全空: {data.get('imdb_id', '未知ID')}")
return False
print(f"primaryTitle: {primaryTitle}")
print(f"releaseDate: {releaseDate}")
print(f"averageRating: {averageRating}")
print(f"description: {description}")
with get_db_connection() as connection:
with connection.cursor() as cursor:
if not releaseDate:
sql = "UPDATE mac_vod SET vod_sub = %s, vod_blurb_en = %s, imdb_update=now() WHERE vod_id = %s"
cursor.execute(sql, (primaryTitle, description, vod_id))
connection.commit()
logging.info(f"Updated ID:{vod_id} Title:{primaryTitle}")
return True
else:
sql = "UPDATE mac_vod SET vod_sub = %s, vod_year = %s, vod_blurb_en = %s, imdb_update=now() WHERE vod_id = %s"
cursor.execute(sql, (primaryTitle, releaseDate, description, vod_id))
connection.commit()
logging.info(f"Updated ID:{vod_id} Title:{primaryTitle}")
return True
except Exception as e:
logging.error(f"处理ID:{imdb_id}时出错: {str(e)}")
return False
def get_db_connection():
"""获取数据库连接"""
return pymysql.connect(
host='172.96.161.250',
user='root',
password='66a811ee546b02a1', # 建议从环境变量或配置文件中读取
database='fusion',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
def query_sql(page: int, page_size: int) -> List[Tuple[int, str]]:
try:
offset = (page - 1) * page_size
connection = get_db_connection()
with connection.cursor() as cursor:
sql = "SELECT vod_id, imdb_id FROM mac_vod where imdb_id is not null and imdb_update is null order by vod_id desc limit %s offset %s;"
cursor.execute(sql,(page_size, offset))
print(sql)
return cursor.fetchall()
except Exception as e:
print(f"数据库查询出错: {e}")
return []
finally:
if 'connection' in locals():
connection.close()
def process_item(item: Tuple[int,str]) -> None:
# 处理单个项目
vod_id = item['vod_id']
imdb_id = item['imdb_id']
try:
print(f"IMDBid:{imdb_id} vod_id:{vod_id}")
sleep(1)
query_movie(imdb_id,vod_id)
except Exception as e:
print(f"处理ID:{imdb_id}时出错: {e}")
def start():
for i in range(1, 960):
# 查询最后更新的数据
items = query_sql(1,50)
print(items)
print(f"获取到{len(items)}条需要处理的记录")
# 使用线程池并行处理
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
executor.map(process_item, items)
if __name__ == '__main__':
# 记录开始时间
start_time = time.time()
start()
# 记录结束时间并计算执行时间
end_time = time.time()
execution_time = end_time - start_time
print(f"任务完成,总执行时间: {execution_time:.2f} 秒")
修改代码,使用新的api
import requests
url = "https://imdb8.p.rapidapi.com/title/v2/get-ratings"
querystring = {"tconst":"tt32439604"}
headers = {
"x-rapidapi-key": "40eb93fa97mshba6ed80cd703eadp105b67jsn0fb3c77e09ea",
"x-rapidapi-host": "imdb8.p.rapidapi.com"
}
response = requests.get(url, headers=headers, params=querystring)
print(response.json())
请求返回示例:{'data': {'title': {'__typename': 'Title', 'id': 'tt32439604', 'titleText': {'text': 'I Am What I Am 2', 'isOriginalTitle': False}, 'originalTitleText': {'text': 'Xiong Shi Shao Nian 2', 'isOriginalTitle': True}, 'releaseYear': {'__typename': 'YearRange', 'year': 2024, 'endYear': None}, 'releaseDate': {'__typename': 'ReleaseDate', 'month': 12, 'day': 14, 'year': 2024, 'country': {'id': 'CN'}, 'restriction': None, 'attributes': [], 'displayableProperty': {'qualifiersInMarkdownList': None}}, 'titleType': {'__typename': 'TitleType', 'id': 'movie', 'text': 'Movie', 'categories': [{'id': 'movie', 'text': 'Movie', 'value': 'movie'}], 'canHaveEpisodes': False, 'isEpisode': False, 'isSeries': False, 'displayableProperty': {'value': {'plainText': ''}}}, 'primaryImage': {'__typename': 'Image', 'id': 'rm636131586', 'url': 'https://m.media-amazon.com/images/M/MV5BMDU4NjE0NmEtZjVkMi00N2IwLTliY2EtM2JmNjhjYTg3MjRkXkEyXkFqcGc@._V1_.jpg', 'height': 2500, 'width': 1786}, 'canRate': {'isRatable': True}, 'ratingsSummary': {'voteCount': 278, 'aggregateRating': 7.5, 'topRanking': None}}}}
将示例的'text': '{I Am What I Am 2}'更新到数据库 vod_sub 字段
将示例的'month': {12}, 'day': {14}, 'year': {2024}, 按照2024-12-14重新拼接,更新到数据库 vod_year 字段
将示例的'aggregateRating': {7.5}更新到数据库 vod_douban_score 字段
最新发布