import scrapy
from Scrapy.items import PositionItem
from bs4 import BeautifulSoup
import requests
class ShixisengSpider(scrapy.Spider):
name = 'shixiseng'
allowed_domains = ['https://www.shixiseng.com/interns?page=1&keyword=Python&type=intern&area=&months=&days=°ree=&official=&enterprise=&salary=-0&publishTime=&sortType=&city=%E8%BF%90%E5%9F%8E&internExtend=']
start_urls = ['https://www.shixiseng.com/interns?page=1&keyword=Python&type=intern&area=&months=&days=°ree=&official=&enterprise=&salary=-0&publishTime=&sortType=&city=%E8%BF%90%E5%9F%8E&internExtend=']
def parse(self, response):
position = PositionItem()
j=0
for i in response.xpath('//*[@id="__layout"]/div/div[2]/div[2]/div[1]/div[1]/div[1]//div/div[1]/div[1]/p[1]/a/@href'):
position['url_cur']=i.extract()
response2=requests.get(url=position['url_cur']).text
bs=BeautifulSoup(response2,"html.parser")
temp=bs.find(attrs={'class':'new_job_name'}).find('span')
position['name']=temp.get_text()
print(j+1,': ',position['name'])
j += 1
使用scrapy框架爬取实习僧python岗位第一页页面内所有岗位名称
最新推荐文章于 2024-07-29 15:43:44 发布