
爬虫
尉迟海棠
这个作者很懒,什么都没留下…
展开
-
request + bs4 爬取网易云音乐热门评论
""" 获取网易云音乐中的评论 """ import requests from bs4 import BeautifulSoup import json def comment(): url = r'https://music.163.com/weapi/comment/resource/comments/get?csrf_token=' headers = { 'user - agent': "Mozilla/5.0 (Windows NT 10.0; Win64;原创 2021-05-30 20:46:48 · 521 阅读 · 0 评论 -
requset + bs4 爬取贝壳房源
""" 爬取贝壳找房的房源 """ import json import requests from bs4 import BeautifulSoup import pandas as pd def get_data(keyword): """ 获取原始数据 :param keyword: :return: """ ip = '114.100.0.229:9999' proxy = {"http": ip} url = 'https://原创 2021-05-30 20:39:44 · 329 阅读 · 0 评论 -
requests + bs4 爬取豆瓣 top250 电影信息
""" 爬取豆瓣top250个电影 """ import requests import bs4 import re def open_url(url): headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'} res = requests.get(url,原创 2021-05-29 19:45:02 · 951 阅读 · 0 评论 -
Selenium 爬取百度图片
Selenium 爬取百度图片 # coding=utf-8 """ 获取10张百度图片 """ from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys import time, requests def download_img(kw): # 打开浏览器原创 2021-05-29 19:43:13 · 424 阅读 · 0 评论