python爬虫

import requests
from bs4 import BeautifulSoup
import xlwt

# 爬取园区数据
class YuanQu:
    def __init__(self,index,name,province,city,qu,address,area,num,url,coordinate):
        self.index = index
        self.name = name
        self.province = province
        self.city = city
        self.qu = qu
        self.address = address
        self.area = area
        self.num = num
        self.url = url
        self.coordinate = coordinate
    def disply(self):
        print(self.index,self.name,self.province,self.city,self.qu,self.address,self.area,self.num,self.url,self.coordinate)

# 1.获取基本信息:序号、园区名称、省份、城市、地区、详细地址、约面积(亩)、企业数、链接
def getYuanQuInfo(url):
    # 1)获取页面text
    urlhtml = requests.get(url)
    urltext = BeautifulSoup(urlhtml.text, 'lxml')
    # 2)获取需要的信息--->定位元素、解析元素中的内容
    data = urltext.select('body > div.wrap > div.container > div.box-s2.mt15 > table > tbody > tr > td')
    list = []
    linenum = int(len(data) / 9)
    for line in range(1, linenum + 1):  # 获取每一行的数据:共linenum行
        # 获取这一行的所有的 td,eg:第一行data
        start = (line - 1) * 9
        end = line * 9
        linedata = data[start:end]
        zuobiaourl = "https://y.qianzhan.com/" + linedata[8].find_all('a')[0]['href']
        # 获取高德坐标
        zuobiaohtml = requests.get(zuobiaourl)
        zuobiaotext = BeautifulSoup(zuobiaohtml.text, 'lxml')
        zuobiaodata = zuobiaotext.select('#iGMap')[0].get('src')
        yuanqu = YuanQu(linedata[0].get_text(), linedata[1].get_text(), linedata[2].get_text(), linedata[3].get_text(),
                        linedata[4].get_text(), linedata[5].get_text(), linedata[6].get_text(), linedata[7].get_text(),
                        zuobiaourl,zuobiaodata)
        list.append(yuanqu)
    return list

# 模拟获取两页的数据
listAll = []
for page in range(1,3):
    list = getYuanQuInfo('https://y.qianzhan.com/yuanqu/?pg='+str(page))
    listAll.extend(list)

# 3.将数据写入excel
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet("园区数据")
for data in range(len(listAll)):
    worksheet.write(data, 0,listAll[data].index)
    worksheet.write(data, 1,listAll[data].name)
    worksheet.write(data, 2, listAll[data].province)
    worksheet.write(data, 3, listAll[data].city)
    worksheet.write(data, 4, listAll[data].qu)
    worksheet.write(data, 5, listAll[data].address)
    worksheet.write(data, 6, listAll[data].area)
    worksheet.write(data, 7, listAll[data].num)
    worksheet.write(data, 8, listAll[data].url)
    worksheet.write(data, 9, listAll[data].coordinate)
workbook.save('园区数据.xls')

print("爬取完成")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值