基于python爬取安居客房价数据

原创于 2024-01-26 09:38:56 发布 · 1k 阅读

22 ·

CC 4.0 BY-SA版权

文章标签：

#python #爬取数据 #安居客房价数据

pip install requests
pip install beautifulsoup4

import requests
from bs4 import BeautifulSoup
import csv

def crawl_anjuke_house_prices(city, district):
    base_url = f'https://{city}.anjuke.com/sale/{district}/'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    response = requests.get(base_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        house_list = soup.find_all('li', class_='list-item')

        with open(f'{city}_{district}_house_prices.csv', 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['Title', 'Link', 'Price']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

            for house in house_list:
                title = house.find('div', class_='house-title').text.strip()
                link = house.find('a', class_='houseListTitle')['href']
                price = house.find('span', class_='price-det').text.strip()

                writer.writerow({'Title': title, 'Link': link, 'Price': price})

        print(f'Data for {city}, {district} saved successfully.')
    else:
        print(f'Failed to retrieve data for {city}, {district}.')

# Example usage
crawl_anjuke_house_prices(city='shenzhen', district='longgang')