技术方案
python
selenium
先下载Microsoft Edge WebDriver
先看一下自己的edge版本
搜索到版本然后下载自己的版本
安装依赖
pip install selenium
import time
from selenium import webdriver
driver = webdriver.Edge(
executable_path=r"/Users/xiesi/Documents/Project/PythonDCD/edgedriver_mac64/msedgedriver",
capabilities={},
)
r = driver.get("https://www.baidu.com")
print(driver.title)
time.sleep(10)
driver.quit()
已经抓到,但是需要解密
import time
import os
from selenium import webdriver
from selenium.webdriver.edge.options import Options
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
import logging
def decode_string(encoded_str):
# 定义映射关系
code_to_char = {
32: " ", # 空格
46: ".", # 小数点
58378: "万",
58425: "0",
58700: "1",
58467: "2",
58525: "3",
58397: "4",
58385: "5",
58676: "6",
58347: "7",
58595: "8",
58461: "9",
}
# 创建字符到映射字符的反向字典
char_map = {}
for code, char in code_to_char.items():
try:
mapped_char = chr(code)
char_map[mapped_char] = char
except ValueError:
raise ValueError(f"无效的Unicode码点: {code}")
# 解码输入字符串
decoded_chars = []
for c in encoded_str:
if c in char_map:
decoded_chars.append(char_map[c])
else:
# raise ValueError(f"遇到无法解码的字符: {c} (Unicode: {ord(c)})")
decoded_chars.append(c)
decoded_str = "".join(decoded_chars).strip()
# 处理包含'万'的情况,将其转换为数值
if "万" in decoded_str:
parts = decoded_str.split("万")
number = 0
# 处理'万'前的部分
if parts[0]:
try:
number += float(parts[0]) * 10000
except ValueError:
raise ValueError(f"无法解析'万'前的数值部分: {parts[0]}")
else:
number += 10000 # 如果'万'前为空,则默认为1万
# 处理'万'后的部分
if len(parts) > 1 and parts[1]:
try:
number += float(parts[1])
except ValueError:
raise ValueError(f"无法解析'万'后的数值部分: {parts[1]}")
return int(number)
else:
# 如果不包含'万',直接转换为int或float
try:
if "." in decoded_str:
return int(decoded_str)
else:
return int(decoded_str)
except ValueError:
raise ValueError("解码后的字符串无法转换为数值类型。")
data_list = []
# 配置 logging,设置输出文件和日志级别
logging.basicConfig(filename="output.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
# 设置Edge WebDriver路径
edge_driver_path = r"/Users/banma-0779/Documents/tools/aoneauto/edgedriver_mac64_m1/msedgedriver"
# 获取Edge浏览器用户数据目录路径
edge_user_data_dir = os.path.expanduser("~/Library/Application Support/Microsoft Edge")
print(edge_user_data_dir)
user_data_dir = "/Users/banma-0779/Documents/tools/aoneauto/user_data" # 选择一个用于存放用户数据的新目录路径
os.makedirs(user_data_dir, exist_ok=True) # 创建目录,如果目录已存在,则不会抛出异常
# 设置Edge WebDriver的选项,以使用现有用户数据
edge_options = Options()
# edge_options.add_argument(f"user-data-dir={edge_user_data_dir}")
edge_options.add_argument(f"user-data-dir={user_data_dir}")
# 创建WebDriver服务对象
service = Service(edge_driver_path)
# 创建WebDriver实例并应用选项
# driver = webdriver.Edge(service=service, options=edge_options)
driver = webdriver.Edge(options=edge_options, service=service, keep_alive=True)
r = driver.get("https://www.dongchedi.com/usedcar/9,10-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-3,5-x-x-x-x-420100-1-6-x-x-x-x")
print(driver.title)
for _ in range(60 * 10):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)
try:
ul_xpath_list = [
"/html/body/div[1]/div[1]/div[2]/div/div/div[2]/ul",
"/html/body/div[1]/div[1]/div[2]/div/div/div[2]/div[4]/ul",
]
for ul_xpath in ul_xpath_list:
ul_element = driver.find_element(By.XPATH, ul_xpath)
# 查找该 <ul> 下的所有 <li> 元素
li_elements = ul_element.find_elements(By.XPATH, "li")
for index, li in enumerate(li_elements, start=1):
# 获取名字
"/html/body/div[1]/div[1]/div[2]/div/div/div[2]/ul/li[1]/a/dl/dt/p"
car_name = li.find_element(By.XPATH, "a/dl/dt/p")
# 获取每个 <li> 的文本内容
li_text = li.text
# print(f"第 {index} 个列表项: {li_text}")
span_element = li.find_element(By.XPATH, "a/dl/dd[3]/span")
span_text = span_element.text.replace("新车指导价: ", "")
# print(f"{text}")
# for ordtext in text:
# print(ord(ordtext))
dd_element = li.find_element(By.XPATH, "a/dl/dd[3]")
ddtext = dd_element.text.replace(span_element.text, "")
# for ordtext_dd in ddtext:
# print(ord(ordtext_dd))
leftnumber = decode_string(span_text)
rightnumber = decode_string(ddtext)
折旧率 = rightnumber / leftnumber * 100
data_list.append(
(折旧率, f"{index:>5} : {leftnumber:>10}/{rightnumber:>10} = {折旧率:5.1f}% : {car_name.text}")
)
# 排序列表,按照浮点数从小到大
sorted_data = sorted(data_list, key=lambda x: x[0])
# 输出排序后的字符串
print("按照浮点数从小到大排序后的字符串顺序为:")
for item in sorted_data:
logging.info(item[1])
# /li[218]/a/dl/dd[3]
except Exception as e:
logging.error(e)
# time.sleep(100)
driver.quit()
"""
32 -- 空格
46 -- 小数点
58378 -- 万
58425 -- 0
58700 -- 1
58467 -- 2
58525 -- 3
58397 -- 4
58385 -- 5
58676 -- 6
58347 -- 7
58595 -- 8
58461 -- 9
"""