import scrapy
import os
from lxml import etree
import re
class QuotesSpider(scrapy.Spider):
name = "start"
def start_requests(self):
urls = [' ']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = etree.HTML(response.body)
textarea = page.xpath('//textarea[starts-with(@id,"txtare")]/text()')
for i in textarea:
str = i
searchObj = re.search(r'(.*。)', str).group()
print(str[str.find("《")+1 :str.find("》")],'|',
str[str.find("—") + 2:str.find("·")],'|',
str[str.find("·") + 1:str.find("《")],'|',
searchObj,'|',
str[str.find("》") + 1:str.__len__()]
)
if __name__ == '__main__':
os.system('scrapy crawl start')