import requests,csv,time
def save_data(row):
f=open('大街网职位爬虫.csv','a',encoding='GBK',newline='',errors='ignore')
csv_writer = csv.writer(f)
csv_writer.writerow(row)
f.close()
def ParserResponse(response):
for i in range(30):
corpId = response['data']['list'][i]['corpId']
jobName = response['data']['list'][i]['jobName']
compName = response['data']['list'][i]['compName']
pubCity = response['data']['list'][i]['pubCity']
salary = response['data']['list'][i]['salary']
try:
pubEdu = response['data']['list'][i]['pubEdu']
except:
pubEdu ="无学历要求"
try:
pubEx = response['data']['list'][i]['pubEx']
except:
pubEx = "无经验要求"
try:
industryName = response['data']['list'][i]['industryName']
except:
industryName = '无行业分类'
a = [corpId,pubCity,jobName,compName,salary,industryName,pubEx,pubEdu]
save_data(a)
print(a)
def main():
firstUrl = 'https://so.dajie.com/job/search'
for page in range(23):
url = 'https://so.dajie.com/job/ajax/search/filter?keyword=&order=0&city=441900&recruitType=&salary=&experience=&page={}&positionFunction=&_CSRFToken=&ajax=1'.format(page)
session = requests.session()
headers={
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'
}
session.get(firstUrl)
session.headers['referer'] = firstUrl
response = session.get(url).json()
ParserResponse(response)
if __name__=='__main__':
main()