#coding=gbk
#因为涉及到中文,utf-8会报错
### 环境:Python 3.6###
import requests
import re
import pandas as pd
import csv
from bs4 import BeautifulSoup
def generate_allurl(user_in_nub):
url = 'https://bj.lianjia.com/ditiefang/li647/pg{}/'
for url_next in range(1, int(user_in_nub)):
yield url.format(url_next)
def main():
#user_in_nub = input('输入生成页数:')
df = []
for i in generate_allurl(35): #总共34页
print("页码"+i)
#get_allurl(i)
res = requests.get(i)
if res.status_code == 200:
soup = BeautifulSoup(res.text, 'lxml') #获取html的文本
re_set = re.compile('<li.*?class="clear">.*?<a.*?class="img.*?".*?data-housecode="(.*?)"')#正则匹配data-house
re_get = re.findall(re_set, res.text)#获取一页的二手房信息个数
print(len(re_get))
#for i
Python爬取链家地铁房数据
最新推荐文章于 2024-04-04 20:13:44 发布