import requests
from bs4 import BeautifulSoup
import xlwt
# 爬取园区数据
class YuanQu:
def __init__(self,index,name,province,city,qu,address,area,num,url,coordinate):
self.index = index
self.name = name
self.province = province
self.city = city
self.qu = qu
self.address = address
self.area = area
self.num = num
self.url = url
self.coordinate = coordinate
def disply(self):
print(self.index,self.name,self.province,self.city,self.qu,self.address,self.area,self.num,self.url,self.coordinate)
# 1.获取基本信息:序号、园区名称、省份、城市、地区、详细地址、约面积(亩)、企业数、链接
def getYuanQuInfo(url):
# 1)获取页面text
urlhtml = requests.get(url)
urltext = BeautifulSoup(urlhtml.text, 'lxml')
# 2)获取需要的信息--->定位元素、解析元素中的内容
data = urltext.select('body > div.wrap > div.container > div.box-s2.mt15 > table > tbody > tr > td')
list = []
linenum = int(len(data) / 9)
for line in range(1, linenum + 1): # 获取每一行的数据:共linenum行
# 获取这一行的所有的 td,eg:第一行data
start = (line - 1) * 9
end = line * 9
linedata = data[start:end]
zuobiaourl = "https://y.qianzhan.com/" + linedata[8].find_all('a')[0]['href']
# 获取高德坐标
zuobiaohtml = requests.get(zuobiaourl)
zuobiaotext = BeautifulSoup(zuobiaohtml.text, 'lxml')
zuobiaodata = zuobiaotext.select('#iGMap')[0].get('src')
yuanqu = YuanQu(linedata[0].get_text(), linedata[1].get_text(), linedata[2].get_text(), linedata[3].get_text(),
linedata[4].get_text(), linedata[5].get_text(), linedata[6].get_text(), linedata[7].get_text(),
zuobiaourl,zuobiaodata)
list.append(yuanqu)
return list
# 模拟获取两页的数据
listAll = []
for page in range(1,3):
list = getYuanQuInfo('https://y.qianzhan.com/yuanqu/?pg='+str(page))
listAll.extend(list)
# 3.将数据写入excel
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet("园区数据")
for data in range(len(listAll)):
worksheet.write(data, 0,listAll[data].index)
worksheet.write(data, 1,listAll[data].name)
worksheet.write(data, 2, listAll[data].province)
worksheet.write(data, 3, listAll[data].city)
worksheet.write(data, 4, listAll[data].qu)
worksheet.write(data, 5, listAll[data].address)
worksheet.write(data, 6, listAll[data].area)
worksheet.write(data, 7, listAll[data].num)
worksheet.write(data, 8, listAll[data].url)
worksheet.write(data, 9, listAll[data].coordinate)
workbook.save('园区数据.xls')
print("爬取完成")