获取某厂招聘岗位信息

今天方向一个爬虫案例,爬取某厂招聘岗位信息数据,通过程序可以学习pymysql的使用,通过pycharm工具获取数据,并且导入mysql数据库中。

1 导入必要的包

import requests
import pymysql

2 主体代码


class Baidu(object):
    def __init__(self):
        self.db = pymysql.connect(host="127.0.0.1", user="root", password="88888888", db="test_db")
        self.cursor = self.db.cursor()
        self.url = 'https://talent.alibaba.com/position/search'
        self.headers = {
            'cookie': '自己的cookie',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/547.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/547.36'
        }
        self.params = {
            "_csrf": "09d5fe8f-08a2-4d3c-a43f"
        }

    def get_data(self, page):  # 获取地址和User-Agent
        data = {
            "channel": "group_official_site",
            "language": "zh",
            "batchId": "",
            "categories": "",
            "deptCodes": [],
            "key": "",
            "pageIndex": page,
            "pageSize": 19, 
            "regions": "",
            "subCategories": ""
        }
        response = requests.post(url=self.url, params=self.params, headers=self.headers, json=data)
        return response.json()

    def parse_data(self, response):
        # print(response)
        data_list = response["content"]['datas']
        for node in data_list:
            workLocations = ','.join(node['workLocations'])
            name = node['name']
            requirement = node['requirement']
            self.save_data(workLocations, name, requirement)

    def create_table(self):
        # 使用预处理语句创建表
        sql = '''
                CREATE TABLE IF NOT EXISTS ali_quarter_bill(
                    id int primary key auto_increment not null,
                    workLocations VARCHAR(255) NOT NULL, 
                    name VARCHAR(255) NOT NULL, 
                    requirement TEXT)
        '''
        try:
            self.cursor.execute(sql)
            print("CREATE TABLE SUCCESS.")
        except Exception as ex:
            print(f"CREATE TABLE FAILED,CASE:{ex}")
    def save_data(self, workLocations, name, requirement):
        # SQL 插入语句
        sql = 'INSERT INTO ali(id, workLocations, name, requirement) values(%s, %s, %s, %s)'
        # 执行 SQL 语句
        try:
            self.cursor.execute(sql, (0, workLocations, name, requirement))
            # 提交到数据库执行
            self.db.commit()
            print('数据插入成功...')
        except Exception as e:
            print(f'数据插入失败: {e}')
            # 如果发生错误就回滚
            self.db.rollback()
    def run(self):
        self.create_table()
        for i in range(1, 19):
            response = self.get_data(i)
            self.parse_data(response)
        # 关闭数据库连接
        self.db.close()
if __name__ == '__main__':
    baidu = Baidu()
    baidu.run()

结果:
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值