Python爬取动态网站-集信达案例

本文介绍了如何使用Python的Selenium库自动化登录JiXinDa网站,实现短信验证码验证和数据抓取,将抓取的数据以JSON格式保存。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

from selenium import webdriver
import time
import json
from selenium.webdriver.support.wait import WebDriverWait
from  selenium.webdriver.support import  expected_conditions as EC
from  selenium.webdriver.common.by import By

class JiXinDa:
    def __init__(self):
        self.url = 'http://jxd.itheima.net/#/login'
        self.driver = webdriver.Chrome()

    def login_to_find(self):
        #发送请求
        self.driver.get(self.url)
        self.driver.implicitly_wait(10)
        #设置窗口最大化
        self.driver.maximize_window()
        #登录
        self.driver.find_element(By.CLASS_NAME,'el-button').click()
        self.driver.implicitly_wait(20)
        # 定位短信服务元素://*[@id="app"]/div/div[1]/ul/li[2]/div/div/span
        sms_service_element = self.driver.find_element(By.XPATH,'//*[@id="app"]/div/div[1]/ul/li[2]/div/div/span')
        webdriver.ActionChains(self.driver).move_to_element(sms_service_element).click(sms_service_element).perform()
        #单击
        service_manager = self.driver.find_element(By.XPATH,
        '/html/body/div[1]/div/div[1]/ul/li[2]/ul/li/ul/li[4]/span')
        webdriver.ActionChains(self.driver).move_to_element(service_manager).click(service_manager).perform()
        #显示等待<input>标签加载
        WebDriverWait(self.driver,30).until(
            EC.presence_of_element_located((By.XPATH,
                '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[1]/div/div/div/input')))
        #输入签名名称为“黑马头条”
        self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[1]/div/div/div/input').send_keys('黑马头条')
        #单击"搜索"按钮
        WebDriverWait(self.driver,30).until(
            EC.presence_of_element_located((By.XPATH,
            '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[2]/div/div/div/button[1]'))
        )
        self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[2]/div/div/div/button[1]').click()

    def get_data(self):
        #提取网页表格的数据
        data=[]
        title = self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[3]/div[1]/div/div/div[1]/span').text
        data.append(title)
        IP = self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[3]/div[1]/div/div/div[2]/p[1]/span[2]').text
        data.append(IP)
        Createtime = self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[3]/div[1]/div/div/div[2]/p[4]/span[2]').text
        data.append(Createtime)
        return  data

    def save_data(self,data):
        try:
            with open('jixindata',mode='a+',encoding='utf-8') as file:
                file.write(json.dumps(data,ensure_ascii=False))
        except Exception as e:
            print(e)
        return False

    def run(self):
        self.login_to_find()
        data = self.get_data()
        self.save_data(data)
        print(f'正在保存数据')
        print('剪辑也算是计算机专业就业')
        print(data)

if __name__=='__main__':
    jixindata = JiXinDa()
    jixindata.run()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值