先查看网站发送信息格式,发现可以通过ajax来拿取信息,还没有ip访问限制。然后顺便爬了杭州5000家酒店信息
import scrapyimport time
import json
from xiechen.items import XiechenItem
class XiecSpider(scrapy.Spider):
name = 'xiec'
allowed_domains = ['xiecheng.com']
# http://hotels.ctrip.com/Domestic/Tool/AjaxHotelList.aspx
def start_requests(self):
num = 0
for i in range(300):
time.sleep(0.5)
num += 1
yield scrapy.FormRequest(
method='POST',
url="http://hotels.ctrip.com/Domestic/Tool/AjaxHotelList.aspx",
formdata={