腾讯招聘网站爬取
爬虫项目面向对象编程
import re
import time
import requests
import json
class TenxunSpider(object):
def __init__(self):
self.headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36"
}
self.address = {"深圳": 1, "北京": 2, "广州": 5, "上海": 3}
def run(self):
url_jog_list = self.get_url_inform()
self.parse_url(url_jog_list)
def parse_url(self, url_jog_list):
for url in url_jog_list:
response = requests.get(url=url, headers=self.headers)
data2 = json.loads(response.text)
for k in data2["Data"]["Posts"]:
new_url1 = "https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn".format(
str(int(time.time())), k['PostId'])
response2 = requests.get(new_url1, headers=self.headers)
data3 = json.loads(response2.text)
name_data2 = data3["Data"]["RecruitPostName"]
res_bili_data3 = data3["Data"]["Responsibility"]
requirement_data3 = data3["Data"]["Requirement"]
self.write_data(name_data2, res_bili_data3, requirement_data3)
print("写入完毕")
def write_data(self, name_data2, res_bili_data3, requirement_data3):
try:
name_data2 = re.sub("/", "", name_data2)
f = open("./腾讯招聘信息/" + name_data2 + ".txt", "a")
f.write(name_data2)
f.write("\n工作职责\n")
f.write(res_bili_data3)
f.write("\n工作要求\n")
f.write(requirement_data3)
f.close()
except Exception as e:
print(e)
def get_url_inform(self):
address = input("请输入您选择的地点:")
job_info = input("请输入您选择的工作岗位方向:")
page = input("您要查询多少页:")
url_list = []
for key, value in self.address.items():
if address == key:
address_input = str(value)
for i in range(1, int(page) + 1):
url = "https://careers.tencent.com" \
"/tencentcareer/api/post/" \
"Query?timestamp={}&cityId={}" \
"&keyword={}&pageIndex={}" \
"&pageSize=10&language=zh-cn&area=cn" \
"".format(str(int(time.time())), address_input, job_info, str(i))
url_list.append(url)
return url_list
if __name__ == "__main__":
ts = TenxunSpider()
ts.run()