
爬虫
poirotl
这个作者很懒,什么都没留下…
展开
-
每日爬虫6
import requests from lxml import etree url = 'https://kunming.zbj.com/search/f/?kw=sass' resp = requests.get(url) # print(resp.text) html = etree.HTML(resp.text) # 拿到单个服务商的信息 divs= html.xpath("/html/body/div[6]/div/div/div[2]/div[7]/div/div") for div in d原创 2022-04-05 22:06:27 · 603 阅读 · 0 评论 -
每日爬虫5
import requests from lxml import etree url = "https://beijing.zbj.com/search/f/?type=new&kw=saas" resp = requests.get(url) # print(resp.text) # 解析 html = etree.HTML(resp.text) # 拿到每一个服务商的div divs = html.xpath("/html/body/div[6]/div/div/div[2]/div[4]原创 2022-04-02 20:27:30 · 154 阅读 · 0 评论 -
每日爬虫4
# 1.拿到主页面的源代码. 然后提取到子页面的链接地址, href # 2.通过href拿到子页面的内容. 从子页面中找到图片的下载地址 img -> src # 3.下载图片 import requests from bs4 import BeautifulSoup import time url = "https://www.umei.cc/bizhitupian/weimeibizhi/" resp = requests.get(url) resp.encoding = 'utf-8' #原创 2022-03-31 20:51:41 · 94 阅读 · 0 评论 -
每日爬虫3
import json import requests def get_content_list(): from lxml import etree base_url = "http://news.4399.com/gonglue/kpct/kapai/" resp = requests.get(base_url) #输出状态码 # print(resp) #输出源代码 # print(resp.content.decode("gbk"))原创 2022-03-25 18:19:55 · 463 阅读 · 0 评论