import requests
import re
import redis
from lxml import etree
import pymysql
#获取城市数据
class City():
def __init__(self):
#初始化Redis连接
self.r=self.get_redis()
def __call__(self, *args, **kwargs):
self.get_city()
# 创建数据库,减少对网站的攻击
def get_redis(self):
return redis.Redis(host='127.0.0.1', port=6379, db=1)
def get_city(self):
# 初始的url
interface_url="https://www.xin.com/apis/Ajax_common/get_home_city/"
import json
json_data=str(requests.get(interface_url).json())
print(json_data)
print(type(json_data))
#获取城市:
city_name=re.findall("'ename': '(.*?)'",json_data)
# print(city_name)
#遍历城市,获取城市的url
for city in city_name:
city_url="https://www.xin.com/{}/s/?channel=a49b117c44837d110753e751863f53".format(city)
# print('================正在下载{}=============='.format(city))
# print(city_url)
#将城市的链接写入Redis数据库
self.r.rpush("city_url", city_url)
def get_html(self,url):
response=requests.get(url)
html=response.text
# print(html)
return html,etree.HTML(html)
# 获取所有车的品牌:
class AutomobileBrand(City):
def __call__(self, *args, **kwargs):
self.get_brand()
def get_brand(self):
city_url_all=self.r.lrange('city_url',0,-1)
# print(city_url_all)
for city_url in city_url_all:
# 根据url获取所有的车的品牌:
# print(city_url.decode('utf-8'))
htnl,html_xml=self.get_html(city_url.decode('utf-8'))
car_brand_name=html_xml.xpath('//ul//li[position()>1]/dl/dd/a/text()')
car_brand_url=html_xml.xpath('//ul//li[position()>1]/dl/dd/a/@href')
# print(car_brand_url)
# print(car_brand_name)
for index in range(len(car_brand_url)):