from datetime import datetime
import logging
# Getting the current date and time
current_time = datetime.now().strftime("%d-%H-%M-%S")
path = r'C:\Program Files\Google\Chrome\Application\chrome.exe' # 请改为你电脑内Chrome可执行文件路径
from DrissionPage import ChromiumPage, ChromiumOptions
shuju = 0
data_url_chushi = []
data_url_1ji = []
data_url_2ji = []
data_url_3ji = []
def log_error(error_message):
# 创建一个日志器
logger = logging.getLogger('error_logger')
logger.setLevel(logging.ERROR) # 设置日志级别为ERROR
# 创建一个文件处理器,用于将日志写入文件
file_handler = logging.FileHandler('LOG.txt', encoding='utf-8')
file_handler.setLevel(logging.ERROR)
# 创建一个格式器
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
# 将文件处理器添加到日志器
logger.addHandler(file_handler)
# 记录错误信息
logger.error(error_message)
def log_error1(error_message):
# 创建一个日志器
logger = logging.getLogger('error_logger')
logger.setLevel(logging.ERROR) # 设置日志级别为ERROR
# 创建一个文件处理器,用于将日志写入文件
file_handler = logging.FileHandler('err_LOG.txt', encoding='utf-8')
file_handler.setLevel(logging.ERROR)
# 创建一个格式器
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
# 将文件处理器添加到日志器
logger.addHandler(file_handler)
# 记录错误信息
logger.error(error_message)
def huoqu_3_ji_yumian(page,url):
try:
page.get(url)
# page.get('https://www.21ic.com/rf')
ele1 = page.ele('x:/html/body/div[1]/div[2]/div[1]/div[2]/ul').eles('tag:h3')
for i in ele1:
url=i.ele('x:/a').link
global data_url_3ji
data_url_3ji.append(url)
global shuju
shuju+=1
page.get(url)
ele1 = page.ele('x://*[@id="ff"]').text
xueru_dizi('资料.txt', ele1)
st1r1='完成{}个-------------------时间{}'.format(shuju,current_time)
log_error(st1r1)
except Exception as a:
log_error1(a)
def huoqu_2ji_yemian_url(page,url):
try:
global data_url_2ji
page.get(url)
ele1 = page.ele('x://div[@class="area_article list"]').eles('tag:a')
wei=str(ele1[-1].link)
yeshu=ele1[-1].attr('data-page')
if 'page' not in wei:
for i in range(1,int(yeshu)):
data1=wei.split('.')
data2=data1[-2].split('/')
data2[-1]=str(i)
str1='/'.join(data2)
data1[-2]=str1
str2 = '/'.join(data1) #二级页面地址
huoqu_3_ji_yumian(page, str2)
data_url_2ji.append(str2)
else:
for i in range(1,int(yeshu)):
data1=wei.split('=')
data1[-1]=str(i)
str1='='.join(data1) #二级页面地址
huoqu_3_ji_yumian(page, str1)
data_url_2ji.append(str1)
except Exception as a:
log_error1(a)
def huoqu_yiji_wangye(page,data_url):
page.get(data_url)
ele1 = page.ele('x:/html/body/div[1]/div[2]/div[1]/div[4]/ul').eles('tag:a')
for r in ele1:
huoqu_2ji_yemian_url(page, r.link)
global data_url_1ji
data_url_1ji.append(r.link)
def xueru_dizi(file_path,urls):
with open(file_path, 'a') as file:
for url in urls:
file.write(url + '\n')
if __name__ == '__main__':
co = ChromiumOptions().set_browser_path(path)
page = ChromiumPage(co)
page.get('https://www.21ic.com/rf')
ele1 = page.ele('x://*[@id="main_nav"]/ul/li[2]/div/div').eles('tag:a')
for i in ele1:
huoqu_yiji_wangye(page,i.link)
data_url_1ji.append(i.link)
xueru_dizi('初始网址.txt', data_url_chushi)
xueru_dizi('一级网址.txt', data_url_1ji)
xueru_dizi('二级网址.txt', data_url_2ji)
xueru_dizi('三级网址.txt', data_url_3ji)