基于bs4、xpath确保数据的顺序和完整性进行数据行提取DEMO

from bs4 import BeautifulSoup

html = """
<div class="jobsList">
    <div class="jobLeft">
        <dl>
            <dt class="mouseListen">
                <div class="jobName_text clearfix">
                    <a href="/jobs/pinzhiyuan/6b97cda7-9071-41fa-bc1f-38f0e6f11623.html" target="_blank" class="pop_jobName">品质员IPQC</a>
                </div>
            </dt>
            <dd>
                <span class="salaryList">5.5-8K</span>
            </dd>
            <dd>
                <span class="tagBox_list">年龄不限</span>
                <span class="tagBox_list">品质员</span>
            </dd>
        </dl>
    </div>
    <div class="jobLeft">
        <dl>
            <dt class="mouseListen">
                <div class="jobName_text clearfix">
                    <a href="/jobs/pinzhiyuan/6b97cda7-9071-41fa-bc1f-38f0e6f11623.html" target="_blank" class="pop_jobName">生产主任</a>
                </div>
            </dt>
            <dd>
                <span class="salaryList">8-12K</span>
            </dd>
            <dd>
                <span class="tagBox_list">年龄不限</span>
                <span class="tagBox_list">生产主任</span>
            </dd>
        </dl>
    </div>
</div>
"""
#基于BeautifulSoup进行数据提取
soup = BeautifulSoup(html, 'html.parser')

jobs = soup.find_all('div', class_='jobLeft')

for job in jobs:
    job_name = job.dt.div.a.string
    salary = job.dd.find('span', class_='salaryList').string
    age = job.dd.find_all('span', class_='tagBox_list')[0].string
    print(f"岗位:{job_name},薪资:{salary},年龄要求:{age}")
#基于xpath进行数据提取
tree = etree.HTML(html)

jobs = tree.xpath('//div[@class="jobLeft"]')

for job in jobs:
    job_name = job.xpath('.//a[@class="pop_jobName"]/text()')[0]
    salary = job.xpath('.//span[@class="salaryList"]/text()')[0]
    age = job.xpath('.//dd/span[@class="tagBox_list"][1]/text()')[0]
    print(f"岗位:{job_name},薪资:{salary},年龄要求:{age}")
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值