主程序hy.py:
# -*- coding: utf-8 -*-
import scrapy
from ..items import sortItem,gameInfo,gameSonSort,houseInfo
from scrapy import Request
import re
from time import sleep
class HySpider(scrapy.Spider):
name = 'hy'
allowed_domains = ['huya.com']
start_urls = ['http://huya.com/g']
def parse(self, response):
Sort=sortItem()
urls = response.xpath("//div[@class='filter']/dl[1]/dd[position()=5]/a/@href").extract()
names=response.xpath("//div[@class='filter']/dl[1]/dd[position()=5]/a/span/text()").extract()
for i in range(len(urls)):
Sort['Surl']=urls[i]
Sort['Sname']=names[i]
yield Sort
url=urls[i]
yield response.follow(url,self.parseSort)
def parseSort(self,response):
game=gameInfo()
gameName=response.xpath('//ul[@id="js-game-list"]/li/@title').extract()
gameUrl=response.xpath('//ul[@id="js-game-list"]/li/a/@href').extract()
gameImg=response.xpath('//ul[@id="js-game-list"]/li/a/img/@src').extract()
gameGid=response.xpath('//ul[@id="js-game-list"]/li/a/@report').extract()
str=re.compile('"game_id":"(.*)"}')
for i in range(len(gameImg)-1):
game['gam