用Scrapy爬商品ID
首先要设置ROBOTSTXT_OBEY = False
base.py
# -*- coding: utf-8 -*-
import scrapy
import codecs
class BaseSpider(scrapy.Spider):
allowed_domains = ["taobao.com"]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.file = codecs.open(self.name + '.txt', 'w', 'utf-8')
def __del__(self):
self.file.close()
tce_id.py 用来爬小分类的ID
# -*- coding: utf-8 -*-
from .base import BaseSpider
import json
CATEGORY_URLS = [
'https://www.taobao.com/markets/nvzhuang/taobaonvzhuang',
'https://www.taobao.com/markets/nanzhuang/2017new',
'https://neiyi.taobao.com',
'https://www.taobao.com/markets/xie/nvxie/index',
'https://www.taobao.com/markets/bao/xiangbao',
'https://pei.taobao.com',
'https://www.taobao.com/markets/qbb/index?spm=a21bo.50862.201879-item-1008.5.YrbXb6&pvid=b9f2df4c-6d60-4af4-b500-c5168009831f&scm=1007.12802.34660.100200300000000',
'https://www.taobao.com/markets/qbb/index?spm=a21bo.50862.201867-main.8.mL7cax&pvid=b9f2df4c-6d60-4af4-b500-c5168009831f&scm=1007.12802.34660.100200300000000',
'https://www.taobao.com/markets/qbb/index?spm=a21bo.50862.201867-main.8&pvid=b9f2df4c-6d60-4af4-b500-c5168009831f&scm=1007.12802.34660.100200300000000',
'https://www.taobao.com/markets/jiadian/index',
'https://www.taobao.com/markets/3c/shuma',
'https://www.taobao.com/markets/3c/sj',
'https://mei.taobao.com/',
'https://www.taobao.com/market/baihuo/xihuyongpin.php?spm=a217u.7383845.a214d5z-static.49.e8DQmz',
'https://g.taobao.com/brand_detail.htm?navigator=all&_input_charset=utf-8&q=%E8%90%A5%E5%85%BB%E5%93%81&spm=a21bo.50862.201867-links-4.54.oMw9IU',
'https://www.taobao.com/market/peishi/zhubao.php',
'https://www.taobao.com/market/peishi/yanjing.php?spm=a219r.lm5630.a214d69.14.CkLAJ7',
'https://www.taobao.com/market/peishi/shoubiao.php',
'https://www.taobao.com/markets/coolcity/coolcityHome',
'https://www.taobao.com/markets/coolcity/coolcityHome',
'https://www.taobao.com/markets/amusement/home',
'https://game.taobao.com',
'https://www.taobao.com/markets/acg/dongman',
'https://www.taobao.com/markets/acg/yingshi',
'https://chi.taobao.com',
'https://chi.taobao.com',
'https://chi.taobao.com',
'https://s.taobao.com/search?q=%E5%9B%AD%E8%89%BA&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.50862.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170419',