代码思路是这样的,刚开始直接进入目标主页,有个登录注册按钮,手动选择登录,然后输入用户名密码进去用户主页,需要再切换回目标主页获取所有的点赞人列表,这是目前的代码并未实现需求,直接修改:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import csv
import json
import logging
# 配置信息
CHROME_DRIVER_PATH = r'C:\Users\LENOVO\Desktop\tom\chromedriver-win64\chromedriver.exe'
INSTAGRAM_URL = "https://www.instagram.com/accounts/login/"
TARGET_URL = "https://www.instagram.com/p/DP1z9ZUDiAL"
USERNAME = "thomas_beltrami2014"
PASSWORD = "'cnk3%uu{~i4!@$waso>aSlxB"
OUTPUT_FILE = "instagram_post_data.csv"
# 配置日志记录
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.FileHandler('instagram_scraper.log'), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
# 初始化浏览器
def init_browser():
options = webdriver.ChromeOptions()
options.add_argument("--disable-notifications")
options.add_argument("--lang=en-US")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
service = webdriver.chrome.service.Service(executable_path=CHROME_DRIVER_PATH)
browser = webdriver.Chrome(service=service, options=options)
browser.maximize_window()
return browser
# 直接导航到DP1z9ZUDiAL主页
def open_target_url(browser, TARGET_URL):
browser.get(TARGET_URL)
time.sleep(5)
# 登录 Instagram
def login_instagram(browser, username, password):
browser.get(INSTAGRAM_URL)
time.sleep(3)
# 填写登录信息
username_field = WebDriverWait(browser, 10).until(
EC.presence_of_element_located((By.NAME, "username"))
)
username_field.send_keys(username)
password_field = browser.find_element(By.NAME, "password")
# 模拟人的输入
password_field.clear()
for char in password:
time.sleep(0.5)
password_field.send_keys(char)
time.sleep(2)
# 提交登录
password_field.send_keys(Keys.RETURN)
time.sleep(2)
# 获取点赞用户列表
def get_likers(browser):
# 点击点赞数打开点赞弹窗
likes_element = WebDriverWait(browser, 15).until(
EC.element_to_be_clickable((By.XPATH, "//section[contains(@class, 'x12nagc x182iqb8 xv54qhq xf7dkkf')]//a[contains(@href, 'liked_by')]"))
)
likes_element.click()
time.sleep(3)
# 滚动加载所有点赞用户
likers_popup = WebDriverWait(browser, 15).until(
EC.presence_of_element_located((By.XPATH, "//div[@role='dialog']"))
)
last_height = 0
likers = []
for _ in range(30): # 最多滚动30次
# 获取当前可见的点赞用户
current_likers = likers_popup.find_elements(By.XPATH, ".//a[@role='link']")
# 添加到结果集
for liker in current_likers:
username = liker.text.strip()
if username and username not in likers:
likers.append(username)
# 滚动
browser.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", likers_popup)
time.sleep(1)
# 检查是否滚动到底部
new_height = browser.execute_script("return arguments[0].scrollTop", likers_popup)
if new_height == last_height:
break
last_height = new_height
# 关闭弹窗
close_button = likers_popup.find_element(By.XPATH, ".//button")
close_button.click()
time.sleep(1)
return likers
# 保存数据到CSV
def save_to_csv(data, filename):
with open(filename, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Type', 'Username', 'Data'])
# 保存点赞数据
for liker in data['likers']:
writer.writerow(['Liker', liker, ''])
print(f"数据已保存到 {filename}")
# 主函数
def main():
browser = init_browser()
try:
# 打开目标主页
open_target_url(browser, TARGET_URL)
# 登录Instagram
login_instagram(browser, USERNAME, PASSWORD)
browser.execute_script('window.open("https://www.instagram.com/p/DP1z9ZUDiAL")')
# 获取点赞用户
likers = get_likers(browser)
print(f"获取到 {len(likers)} 位点赞用户")
# 保存数据
data = {
"likers": likers,
# "comments": comments
}
save_to_csv(data, OUTPUT_FILE)
# 可选:保存为JSON
with open('instagram_data.json', 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
except Exception as e:
print(f"发生错误: {str(e)}")
finally:
browser.quit()
if __name__ == "__main__":
main()
最新发布