1.项目目录
----Project
------venv
--------main.py
--------brickseek.py
--------database.py
2.main.py
import brickseek
SKU=["675353130","543873356","113247244","259271016","618763356"]
if __name__ == '__main__':
for sku in SKU:
brickseek.setZip(sku)
3.brickseek.py
from pymysql.converters import escape_string
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from database import Database
import time
from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptions
def exlog(sku,zip):
try:
f = open("errorLog.txt", 'a', encoding='utf-8')
msg = "爬取失败 sku=" + str(sku) + " zip=" + str(zip) + "\n"
f.write(msg)
f.close()
except Exception:
print()
def saveData(id,store,price,quantity,sku):
print("保存数据到mysql")
try:
d = Database().queryOne("select * from brickseek where id=%s" % id)
date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
if d:
print("更新")
sql = "UPDATE brickseek SET sku = '%s',store='%s',quantity='%s',price='%s',time=str_to_date('%s','%%Y-%%m-%%d %%H:%%i:%%s') WHERE id = '%s'" % \
(sku, escape_string(store), quantity, escape_string(price) , date, id)
Database().save(sql)
else:
print("插入")
sql = "insert into brickseek(id,sku,store,quantity,price,time) " \
"values('%s', '%s', '%s', '%s', '%s', str_to_date('%s','%%Y-%%m-%%d %%H:%%i:%%s'))" % (
id, sku, escape_string(store), quantity, escape_string(price), date)
Database().save(sql)
print("保存数据成功")
except Exception as e:
print("保存数据失败,Error:",e)
def getPage(d,zip,sku):
zipInput = d.find_element(By.ID, 'inventory-checker-form-zip')
zipInput.send_keys(zip)
print("输入zip...")
but = d.find_element(By.ID,'main').find_element(By.CLASS_NAME,'bs-button')
but.submit()
print("正在请求zip网页数据...")
wait = WebDriverWait(d, 30)
wait.until(EC.presence_of_element_located((By.ID, "BrickseekVideoAdContainer")))
print("解析数据...")
rows = d.find_element(By.ID,'main').find_element(By.CLASS_NAME,'table__body').find_elements(By.CLASS_NAME,'table__row')
print("数量=", len(rows))
for row in rows:
id = row.find_element(By.CLASS_NAME,'address-location-name').text.split("#")[1]
list = row.find_elements(By.CLASS_NAME,'table__cell-content')
store = list[0].text
price = list[2].text
quantity = list[1].find_element(By.CLASS_NAME,'availability-status-indicator__text').text
if "In Stock"==quantity:
quantity = list[1].find_element(By.CLASS_NAME,'table__cell-quantity').text[-1]
if quantity == "+": quantity = "6+"
elif "Out of Stock" == quantity:
quantity = "0"
else:
quantity = "2"
saveData(id,store,price,quantity,sku)
rows.clear()
def setZip(sku):
url = "https://xxx.com/?sku="
url = url + sku
d = webdriver.Chrome()
d.implicitly_wait(30) # 隐性等待,最长等30秒,调用一次即可
# d.maximize_window() # 浏览器全屏显示
d.set_window_size(480, 800)
# 反爬
d.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
print("正在打开网页...")
d.get(url)
print("正在获取网页数据...")
for i in range(50):
zip = Database().queryOne("select * from zip_code order by rand() limit 1")[0]
try:
print("url=", url, "zip=", zip)
getPage(d,zip,sku)
print("爬取成功 sku=", sku, "zip=", zip)
except Exception as e:
print("爬取失败 sku=", sku, "zip=", zip,"Error:",e)
exlog(sku,zip)
continue
4.database.py
import MySQLdb
class Database(object):
connection = None
def __init__(self):
if not Database.connection:
Database.connection = MySQLdb.connect(host="xxx", user="xxx", passwd="xxx", database="reptile", charset='utf8')
print("get connection")
def queryOne(self,sql):
cursor = Database.connection.cursor()
cursor.execute(sql)
rows = cursor.fetchone()
print(rows)
return rows
def queryAll(self,sql):
cursor = Database.connection.cursor()
cursor.execute(sql)
rows = cursor.fetchall()
print(rows)
return rows
def save(self,sql):
cursor = Database.connection.cursor()
cursor.execute(sql)
Database.connection.commit()
def closeDB(self):
Database.connection = None
print("close db ...")