某音商品详情数据接口抓取(douyin.item_get)

抖音商品详情数据接口是指抖音平台提供的获取商品详情的数据接口,开发者可以通过调用该接口,获取商品的基本信息、价格、库存、销量等相关数据。

抖音商品详情数据接口一般包括以下信息:

  1. 商品基本信息:包括商品名称、商品描述、商品主图等;
  2. 商品价格:包括商品原价、促销价、折扣信息等;
  3. 商品库存:包括商品的总库存量、剩余库存量等;
  4. 商品销量:包括商品的总销量、日销量等;
  5. 商品评价:包括商品的评价总数、好评率等。
  6. 通过商品ID或商品链接封装获取抖音商品详情数据,方法如下:

douyin.item_get-获取抖音商品详情数据接口返回值说明

1.请求方式:HTTP POST GET; 复制Taobaoapi2014获取APISDK文件。

2.请求URL:c0b.cc/R4rbK2

3.请求参数:

请求参数:num_iid=3514453298386183303

参数说明:num_iid:抖音商品ID

4.请求示例:

# coding:utf-8
"""
Compatible for python2.x and python3.x
requirement: pip install requests
"""
from __future__ import print_function
import requests
# 请求示例 url 默认请求参数已经做URL编码
url = "api-gw.xxx.cn/douyin/item_get/?key=<您自己的apiKey>&secret=<您自己的apiSecret>&num_iid=3514453298386183303"
headers = {
    "Accept-Encoding": "gzip",
    "Connection": "close"
}
if __name__ == "__main__":
    r = requests.get(url, headers=headers)
    json_obj = r.json()
    print(json_obj)

 5.响应示例:

通过调用抖音商品详情数据接口,开发者可以根据自己的需求获取商品的相关信息,用于展示商品详情、计算价格等操作。同时,抖音也提供了相关的接口文档和开发文档,开发者可以根据文档中提供的接口地址、参数和返回值等信息,进行接口调用和数据获取。

下面是一段监控指定用户首页作品信息的程序代码,需要优化以下,从本地指定文档读取的访问地址地址原来为:“https://api.cenguigui.cn/api/douyin/user.php?url=https://www.douyin.com/user/MS4wLjABAAAAMsNm8EEeoiSOJ4cu7395eOFV4aomv7PwH_jikyF892eNI_iA0fJw_pqQXagTTRMU”将访问地址修改为:“https://api.cenguigui.cn/api/douyin/user.php?url=”+本地读取到的“https://www.douyin.com/user/MS4wLjABAAAAMsNm8EEeoiSOJ4cu7395eOFV4aomv7PwH_jikyF892eNI_iA0fJw_pqQXagTTRMU”。一个地址是一个用户,表格显示作者在最前面,如果有多个用户,显示数据排列按文档中用户排列顺序显示,发布时间如果日期为当日,则将发布时间设定为第一次发现此视频的详细时间。原代码如下:import requests import sqlite3 import time import logging import smtplib from email.mime.text import MIMEText from pathlib import Path from datetime import datetime from threading import Thread from flask import Flask, render_template import json # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('douyin_monitor.log'), logging.StreamHandler() ] ) # 配置类 class Config: # 邮件配置 SMTP_SERVER = 'smtp.example.com' SMTP_PORT = 587 EMAIL_USER = 'your_email@example.com' EMAIL_PASSWORD = 'your_password' EMAIL_RECEIVER = 'receiver@example.com' # 通知阈值 LIKE_THRESHOLD = 5 COMMENT_THRESHOLD = 3 # Web配置 WEB_HOST = '0.0.0.0' WEB_PORT = 5000 class DouyinMonitor: def __init__(self, config_file='config.txt', db_file='douyin_data.db'): self.config_file = config_file self.db_file = db_file self.init_db() self.web_app = self.create_web_app() def init_db(self): """初始化数据库""" with sqlite3.connect(self.db_file) as conn: cursor = conn.cursor() # 添加变化量字段用于记录变化 cursor.execute(''' CREATE TABLE IF NOT EXISTS douyin_data ( aweme_id TEXT PRIMARY KEY, nickname TEXT, comment INTEGER, prev_comment INTEGER DEFAULT 0, comment_change INTEGER DEFAULT 0, like_count INTEGER, prev_like INTEGER DEFAULT 0, like_change INTEGER DEFAULT 0, time TEXT, last_update TEXT, UNIQUE(aweme_id) ) ''') conn.commit() def create_web_app(self): """创建Flask应用""" app = Flask(__name__, template_folder='templates') # 显式指定模板目录 @app.route('/') def index(): data = self.get_all_data() return render_template('index.html', data=data) @app.route('/api/data') def api_data(): data = self.get_all_data() return json.dumps(data, ensure_ascii=False) return app def get_all_data(self): """获取所有数据用于Web展示""" with sqlite3.connect(self.db_file) as conn: conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute(''' SELECT * FROM douyin_data ORDER BY last_update DESC ''') return [dict(row) for row in cursor.fetchall()] def read_urls_from_config(self): """从配置文件读取URL""" try: with open(self.config_file, 'r') as f: urls = [line.strip() for line in f if line.strip()] return urls except FileNotFoundError: logging.error(f"配置文件 {self.config_file} 不存在") return [] def fetch_data(self, url): """获取数据""" try: response = requests.get(url, timeout=10) response.raise_for_status() data = response.json() if data.get("code") == 200: return data.get("data", []) else: logging.warning(f"API返回错误: {data.get('msg', '未知错误')}") return [] except Exception as e: logging.error(f"获取数据失败: {e}") return [] def process_data(self, data): """处理数据并返回结构化结果""" processed = [] for item in data: processed.append({ 'aweme_id': item.get('aweme_id', ''), 'nickname': item.get('nickname', ''), 'comment': item.get('comment', 0), 'like_count': item.get('like', 0), 'time': item.get('time', ''), 'last_update': datetime.now().strftime('%Y-%m-%d %H:%M:%S') }) return processed def send_notification(self, item, changes): """发送邮件通知""" try: subject = f"数据变化通知 - {item['nickname']}" content = f""" 检测到数据有显著变化: 视频ID: {item['aweme_id']} 作者: {item['nickname']} 时间: {item['time']} 变化情况: - 点赞数: {changes['like_change']} (新: {item['like_count']}, 旧: {item['prev_like']}) - 评论数: {changes['comment_change']} (新: {item['comment']}, 旧: {item['prev_comment']}) 最后更新时间: {item['last_update']} """ msg = MIMEText(content, 'plain', 'utf-8') msg['Subject'] = subject msg['From'] = Config.EMAIL_USER msg['To'] = Config.EMAIL_RECEIVER with smtplib.SMTP(Config.SMTP_SERVER, Config.SMTP_PORT) as server: server.starttls() server.login(Config.EMAIL_USER, Config.EMAIL_PASSWORD) server.sendmail(Config.EMAIL_USER, [Config.EMAIL_RECEIVER], msg.as_string()) logging.info(f"已发送通知邮件: {subject}") except Exception as e: logging.error(f"发送邮件失败: {e}") def update_db(self, new_data): """更新数据库并检测显著变化""" notifications = [] with sqlite3.connect(self.db_file) as conn: cursor = conn.cursor() for item in new_data: # 获取旧数据 cursor.execute(''' SELECT comment, like_count FROM douyin_data WHERE aweme_id = ? ''', (item['aweme_id'],)) old_data = cursor.fetchone() # 计算变化量 changes = { 'comment_change': 0, 'like_change': 0 } if old_data: old_comment, old_like = old_data changes['comment_change'] = item['comment'] - old_comment changes['like_change'] = item['like_count'] - old_like # 检查是否达到通知阈值 if (changes['like_change'] >= Config.LIKE_THRESHOLD or changes['comment_change'] >= Config.COMMENT_THRESHOLD): item.update({ 'prev_comment': old_comment, 'prev_like': old_like }) notifications.append((item.copy(), changes.copy())) # 更新或插入数据 cursor.execute(''' INSERT OR REPLACE INTO douyin_data (aweme_id, nickname, comment, prev_comment, comment_change, like_count, prev_like, like_change, time, last_update) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( item['aweme_id'], item['nickname'], item['comment'], item.get('prev_comment', 0), changes['comment_change'], item['like_count'], item.get('prev_like', 0), changes['like_change'], item['time'], item['last_update'] )) conn.commit() # 发送通知 for item, changes in notifications: self.send_notification(item, changes) return len(notifications) def run_monitor(self, interval=3600): """运行监控程序""" logging.info("数据监控程序启动") while True: try: urls = self.read_urls_from_config() if not urls: logging.warning("没有找到有效的URL,请检查配置文件") time.sleep(60) continue for url in urls: logging.info(f"正在处理URL: {url}") raw_data = self.fetch_data(url) if not raw_data: continue processed_data = self.process_data(raw_data) notified = self.update_db(processed_data) logging.info( f"数据更新完成 - 处理: {len(processed_data)}条, " f"触发通知: {notified}条" ) logging.info(f"等待 {interval//60} 分钟后再次检查...") time.sleep(interval) except KeyboardInterrupt: logging.info("用户中断,监控服务停止") break except Exception as e: logging.error(f"监控出错: {e}", exc_info=True) time.sleep(300) def run_web(self): """运行Web服务""" logging.info(f"启动Web服务: http://{Config.WEB_HOST}:{Config.WEB_PORT}") self.web_app.run(host=Config.WEB_HOST, port=Config.WEB_PORT) def run(self): """启动监控和Web服务""" # 创建模板文件 (如果不存在) templates_dir = Path('templates') try: if not templates_dir.exists(): templates_dir.mkdir(exist_ok=True) index_template = templates_dir / 'index.html' if not index_template.exists(): with open(index_template, 'w', encoding='utf-8') as f: f.write('''<!DOCTYPE html> <html> <head> <title>数据监控</title> <meta charset="utf-8"> <style> body { font-family: Arial, sans-serif; margin: 20px; } table { border-collapse: collapse; width: 100%; } th, td { border: 1px solid #ddd; padding: 8px; text-align: left; } th { background-color: #f2f2f2; } tr:nth-child(even) { background-color: #f9f9f9; } .positive { color: green; } .negative { color: red; } </style> </head> <body> <h1>数据监控</h1> <p>最后更新时间: {{ data[0].last_update if data else '无数据' }}</p> <table> <thead> <tr> <th>ID</th> <th>作者</th> <th>评论数</th> <th>评论变化</th> <th>点赞数</th> <th>点赞变化</th> <th>发布时间</th> <th>最后更新</th> </tr> </thead> <tbody> {% for item in data %} <tr> <td>{{ item.aweme_id }}</td> <td>{{ item.nickname }}</td> <td>{{ item.comment }}</td> <td class="{% if item.comment_change > 0 %}positive{% elif item.comment_change < 0 %}negative{% endif %}"> {{ item.comment_change }} </td> <td>{{ item.like_count }}</td> <td class="{% if item.like_change > 0 %}positive{% elif item.like_change < 0 %}negative{% endif %}"> {{ item.like_change }} </td> <td>{{ item.time }}</td> <td>{{ item.last_update }}</td> </tr> {% endfor %} </tbody> </table> </body> </html>''') logging.info("已创建模板文件") except Exception as e: logging.error(f"创建模板文件失败: {e}") # 启动监控线程 monitor_thread = Thread(target=self.run_monitor) monitor_thread.daemon = True monitor_thread.start() # 启动Web服务 self.run_web() if __name__ == "__main__": # 创建配置文件示例 (如果不存在) config_path = Path('config.txt') if not config_path.exists(): with open(config_path, 'w') as f: f.write("https://api.cenguigui.cn/api/douyin/user.php?url=https://www.douyin.com/user/MS4wLjABAAAAMsNm8EEeoiSOJ4cu7395eOFV4aomv7PwH_jikyF892eNI_iA0fJw_pqQXagTTRMU\n") # 运行程序 monitor = DouyinMonitor() monitor.run()
06-06
import time import json import csv import random import logging from datetime import datetime from tqdm import tqdm from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException, WebDriverException, ElementClickInterceptedException, SessionNotCreatedException from bs4 import BeautifulSoup from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import re import traceback # 配置日志记录 logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[ logging.FileHandler(f'scraper_log_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log', encoding='utf-8'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def read_video_urls(csv_file): """从 CSV 文件读取视频链接""" logger.info(f"正在读取视频链接文件: {csv_file}") urls = [] try: with open(csv_file, "r", encoding="utf-8-sig") as f: reader = csv.reader(f) for row in reader: if row: urls.append(row[0].strip()) logger.info(f"成功加载 {len(urls)} 个视频链接") except Exception as e: logger.error(f"读取视频链接失败: {str(e)}") return urls def load_cookies(driver, cookie_file="cookies.json"): """导入 cookies""" logger.info(f"正在加载 cookies 文件: {cookie_file}") try: with open(cookie_file, "r", encoding="utf-8") as f: cookies = json.load(f) driver.get("https://www.douyin.com/") time.sleep(3) for cookie in cookies: cookie.pop("sameSite", None) if "expiry" in cookie: cookie["expires"] = cookie.pop("expiry") driver.add_cookie(cookie) driver.refresh() time.sleep(3) logger.info("cookies 加载成功") except Exception as e: logger.error(f"加载 cookies 失败: {str(e)}") def create_driver(): """创建优化的浏览器配置""" logger.info("正在创建 Chrome WebDriver 实例") opts = Options() opts.add_argument("--headless=new") opts.add_argument("--disable-gpu") opts.add_argument("--no-sandbox") opts.add_argument("--disable-dev-shm-usage") opts.add_argument("--disable-extensions") opts.add_argument("--disable-plugins") opts.add_argument("--disable-web-security") opts.add_argument("--disable-features=VizDisplayCompositor") opts.add_argument("--memory-pressure-off") opts.add_argument("--max_old_space_size=4096") opts.add_argument("--window-size=1920,1080") opts.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") retries = 3 for attempt in range(1, retries + 1): try: driver = webdriver.Chrome(options=opts) driver.implicitly_wait(10) logger.info("WebDriver 创建成功") return driver except SessionNotCreatedException as e: logger.error(f"创建 WebDriver 失败 (尝试 {attempt}/{retries}): {str(e)}") if attempt == retries: raise time.sleep(2) return None def load_all_comments(driver, url): """加载所有评论(一级和二级)""" logger.info(f"开始加载评论: {url}") start_time = time.time() driver.get(url) time.sleep(3) def get_comment_container(): try: wait = WebDriverWait(driver, 15) container = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-e2e='comment-list']"))) return container except: try: container = driver.find_element(By.CSS_SELECTOR, "div[class*='comment']") return container except NoSuchElementException: try: scrollable = driver.execute_script(""" return document.querySelector('div[style*="overflow-y: auto"], div[style*="overflow-y: scroll"]') || document.querySelector('div[class*="comment"]'); """) if scrollable: return scrollable except: return driver.find_element(By.TAG_NAME, 'body') comment_selector = "div[data-e2e='comment-item']" comments_with_replies = [] scroll_count = 0 last_comment_count = 0 no_new_comments_count = 0 logger.info("开始加载一级评论") while True: # 持续滚动直到检测到"暂无更多评论" comment_container = get_comment_container() # 关闭弹窗 if scroll_count % 5 == 0: try: close_buttons = driver.find_elements(By.CSS_SELECTOR, "button[class*='close'], div[class*='modal-close'], div[class*='popup-close']") for button in close_buttons: button.click() time.sleep(0.5) except Exception: pass # 滚动评论区 try: driver.execute_script("arguments[0].scrollBy(0, 3000);", comment_container) except Exception: try: actions = ActionChains(driver) actions.move_to_element(comment_container).send_keys(Keys.PAGE_DOWN).perform() except Exception: driver.execute_script("window.scrollBy(0, 3000);") time.sleep(1.5) current_comments = driver.find_elements(By.CSS_SELECTOR, comment_selector) logger.info(f"迭代 {scroll_count}: 已加载 {len(current_comments)} 条一级评论") # 检查是否到达底部 try: no_more = driver.find_element(By.CSS_SELECTOR, "div.fanRMYie.cDj65BDb") if "暂无更多评论" in no_more.text: logger.info("检测到'暂无更多评论',停止加载一级评论") break except NoSuchElementException: pass # 检查是否有新评论 if len(current_comments) == last_comment_count: no_new_comments_count += 1 if no_new_comments_count >= 5: # 增加重试次数 logger.info("连续5次没有新评论,继续尝试") no_new_comments_count = 0 # 重置计数器继续尝试 else: no_new_comments_count = 0 last_comment_count = len(current_comments) # 检查是否有二级评论 for comment in current_comments: try: # 检查是否有展开回复按钮 reply_expand = comment.find_elements(By.CSS_SELECTOR, "button.VZWu521O") if reply_expand: comments_with_replies.append(comment) except Exception: pass scroll_count += 1 time.sleep(random.uniform(1.0, 2.0)) # 加载二级评论 logger.info(f"找到 {len(comments_with_replies)} 条有二级评论的一级评论,开始加载二级评论") for i, comment in enumerate(tqdm(comments_with_replies, desc="加载二级评论")): try: driver.execute_script("arguments[0].scrollIntoView(true);", comment) time.sleep(1.0) # 首次展开二级评论 reply_expand = comment.find_elements(By.CSS_SELECTOR, "button.VZWu521O") if reply_expand: reply_expand[0].click() time.sleep(1.5) while True: try: # 检查"展开更多"按钮 expand_more = comment.find_elements(By.CSS_SELECTOR, "button.bgz8RRCZ") # 检查"收起"按钮 collapse = comment.find_elements(By.CSS_SELECTOR, "button.AlC_XilC.bgz8RRCZ") # 如果同时存在展开更多和收起,说明还有更多评论 if expand_more and "展开更多" in expand_more[0].text: expand_more[0].click() time.sleep(1.5) # 如果只有收起按钮,说明已加载完所有评论 elif collapse and "收起" in collapse[0].text and not expand_more: break else: break except (NoSuchElementException, ElementClickInterceptedException): break except Exception as e: logger.error(f"加载二级评论时出错: {str(e)}") break except Exception as e: logger.error(f"处理评论 {i + 1} 时出错: {str(e)}") logger.info(f"评论加载完成,耗时 {time.time() - start_time:.2f} 秒") return driver.page_source def extract_comment_content(block): """提取评论内容,包括表情""" try: content_parts = [] seen_text = set() if block: # 提取文本内容 for child in block.children: if child.name == 'span': text = child.get_text(strip=True) if text and text not in seen_text: content_parts.append(text) seen_text.add(text) elif child.name == 'img' and child.get('alt'): alt = child.get('alt', '') if alt: content_parts.append(alt) seen_text.add(alt) # 备用提取:直接查找所有图片的表情 if not content_parts: for img in block.find_all('img'): alt = img.get('alt', '') if alt and alt not in seen_text: content_parts.append(alt) seen_text.add(alt) content = ''.join(content_parts) if content_parts else "无内容" return content except Exception as e: logger.error(f"提取评论内容失败: {str(e)}") return "无内容" def parse_comments(html, video_url): """解析评论""" logger.info(f"开始解析评论: {video_url}") start_time = time.time() soup = BeautifulSoup(html, "html.parser") comment_items = soup.find_all("div", {"data-e2e": "comment-item"}) results = [] primary_index = 0 processed_comments = set() for block in tqdm(comment_items, desc="解析评论"): try: is_primary = not block.find_parent("div", class_=re.compile(r'replyContainer')) if is_primary: primary_index += 1 a_tag = block.select_one("a[href*='/user/'] span.j5WZzJdp span span span") or block.select_one("a[href*='/user/'] span") user = a_tag.get_text(strip=True) if a_tag else "未知用户" user = re.sub(r'[^\w\s]', '', user) cont_div = block.select_one("div.LvAtyU_f") content = extract_comment_content(cont_div) like_span = block.select_one("span[data-e2e='comment-like-count']") or block.select_one("p.wiQmZrKV span") likes = like_span.get_text(strip=True) if like_span else "0" secondary_comments = block.select("div.cKvms_3E.replyContainer div[data-e2e='comment-item']") secondary_count = len(secondary_comments) comment_key = f"primary_{primary_index}_{user}_{content}" if comment_key not in processed_comments: primary_comment = { "一级评论_序号": primary_index, "一级评论_账号": user, "一级评论_评论内容": content, "一级评论_赞数量": likes, "一级评论_裂开数量": "0", "一级评论_子评论数量": secondary_count, "二级评论_序号": "", "二级评论_账号": "", "二级评论_评论内容": "", "二级评论_点赞数量": "", "二级评论_裂开数量": "" } results.append(primary_comment) processed_comments.add(comment_key) if secondary_comments: secondary_index = 0 for secondary in secondary_comments: secondary_index += 1 sec_a_tag = secondary.select_one("a[href*='/user/'] span.j5WZzJdp span span span") or secondary.select_one("a[href*='/user/'] span") sec_user = sec_a_tag.get_text(strip=True) if sec_a_tag else "未知用户" sec_user = re.sub(r'[^\w\s]', '', sec_user) sec_cont_div = secondary.select_one("div.LvAtyU_f") sec_content = extract_comment_content(sec_cont_div) sec_like_span = secondary.select_one("span[data-e2e='comment-like-count']") or secondary.select_one("p.wiQmZrKV span") sec_likes = sec_like_span.get_text(strip=True) if sec_like_span else "0" comment_key = f"secondary_{primary_index}_{secondary_index}_{sec_user}_{sec_content}" if comment_key not in processed_comments: secondary_comment = { "一级评论_序号": primary_index, "一级评论_账号": "", "一级评论_评论内容": "", "一级评论_赞数量": "", "一级评论_裂开数量": "", "一级评论_子评论数量": "", "二级评论_序号": secondary_index, "二级评论_账号": sec_user, "二级评论_评论内容": sec_content, "二级评论_点赞数量": sec_likes, "二级评论_裂开数量": "0" } results.append(secondary_comment) processed_comments.add(comment_key) except Exception as e: logger.error(f"解析第 {primary_index + 1} 条一级评论时出错: {str(e)}") logger.info(f"解析完成,共解析 {len(results)} 条评论,耗时 {time.time() - start_time:.2f} 秒") return results def main(): """主逻辑""" logger.info("开始爬取任务") start_time = time.time() video_urls = read_video_urls("test.csv") all_comments = [] for i, url in enumerate(tqdm(video_urls, desc="处理视频")): logger.info(f"[{i + 1}/{len(video_urls)}] 正在抓取视频: {url}") start_video_time = time.time() driver = create_driver() try: load_cookies(driver, "cookies.json") html = load_all_comments(driver, url) comments = parse_comments(html, url) logger.info(f"[{i + 1}/{len(video_urls)}] 抓取到 {len(comments)} 条评论,耗时 {time.time() - start_video_time:.2f} 秒") all_comments.extend(comments) # 保存中间结果 with open(f"intermediate_results_{i + 1}.csv", "w", encoding="utf-8-sig", newline="") as f: writer = csv.DictWriter(f, fieldnames=[ "一级评论_序号", "一级评论_账号", "一级评论_评论内容", "一级评论_赞数量", "一级评论_裂开数量", "一级评论_子评论数量", "二级评论_序号", "二级评论_账号", "二级评论_评论内容", "二级评论_点赞数量", "二级评论_裂开数量" ]) writer.writeheader() writer.writerows(comments) logger.info(f"[{i + 1}/{len(video_urls)}] 中间结果已保存至 intermediate_results_{i + 1}.csv") except Exception as e: logger.error(f"[{i + 1}/{len(video_urls)}] 处理视频失败: {url} - {str(e)}") finally: driver.quit() # 保存最终结果 keys = [ "一级评论_序号", "一级评论_账号", "一级评论_评论内容", "一级评论_赞数量", "一级评论_裂开数量", "一级评论_子评论数量", "二级评论_序号", "二级评论_账号", "二级评论_评论内容", "二级评论_点赞数量", "二级评论_裂开数量" ] with open("comments.csv", "w", encoding="utf-8-sig", newline="") as f: writer = csv.DictWriter(f, fieldnames=keys) writer.writeheader() writer.writerows(all_comments) logger.info(f"最终结果已保存至 comments1.csv,共 {len(all_comments)} 条评论,耗时 {time.time() - start_time:.2f} 秒") logger.info(f"爬取任务完成,耗时 {time.time() - start_time:.2f} 秒") if __name__ == "__main__": main() y_conda_envs\Scrape\python.exe F:\Scrape\Douyin_scrape\二级存表情.py 2025-06-27 19:35:59,432 [INFO] 开始爬取任务 2025-06-27 19:35:59,432 [INFO] 正在读取视频链接文件: test.csv 2025-06-27 19:35:59,432 [INFO] 成功加载 1 个视频链接 处理视频: 0%| | 0/1 [00:00<?, ?it/s]2025-06-27 19:35:59,438 [INFO] [1/1] 正在抓取视频: https://v.douyin.com/6PfJULVgFxE 2025-06-27 19:35:59,438 [INFO] 正在创建 Chrome WebDriver 实例 2025-06-27 19:36:01,019 [INFO] WebDriver 创建成功 2025-06-27 19:36:01,019 [INFO] 正在加载 cookies 文件: cookies.json 2025-06-27 19:36:26,258 [INFO] cookies 加载成功 2025-06-27 19:36:26,258 [INFO] 开始加载评论: https://v.douyin.com/6PfJULVgFxE 2025-06-27 19:36:30,962 [INFO] 开始加载一级评论 2025-06-27 19:36:42,523 [INFO] 迭代 0: 已加载 6 条一级评论 2025-06-27 19:37:56,001 [INFO] 迭代 1: 已加载 6 条一级评论 2025-06-27 19:39:09,790 [INFO] 迭代 2: 已加载 6 条一级评论 2025-06-27 19:40:23,472 [INFO] 迭代 3: 已加载 6 条一级评论 为什么程序在第一次迭代后 就没有爬取更多评论了 请检查是否滚动页面 爬取所有一级评论 帮我完善代码
最新发布
06-28
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值