
爬虫
python小白努力中
自信,美好,永不止步。
chinacheng2023
展开
-
12306验证码
# !user/bin/env python3# -*-coding: utf-8 -*-__author__ = '闻名'from selenium import webdriverfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.common.by impo原创 2020-05-26 13:57:28 · 426 阅读 · 0 评论 -
处理excel写入的问题
file = open(r'国产注册.txt','r', encoding='ISO-8859-1')data = file.read()content = eval(data)print(len(content))import xlwtwookbook = xlwt.Workbook(encoding='utf-8')wooksheet = wookbook.add_sheet('sheet')title = ['注册证编号', '注册人名称', '注册人住所 ', '生产地址 ', .原创 2020-05-18 16:24:08 · 226 阅读 · 0 评论 -
医脉通数据爬取 http://disease.medlive.cn
import requestsimport timefrom lxml import etreeimport reimport xlwtimport randomimport xlrdfrom multiprocessing import Processclass Yimaitong(): def __init__(self): # 请求的url self.url = 'http://disease.medlive.cn/wiki/list/1.原创 2020-05-18 16:08:09 · 1107 阅读 · 0 评论 -
爬取豆瓣读书排行
import requests # 获取网页数据from bs4 import BeautifulSoup # 解析网页数据import time # 设置爬虫等待时间import xlwt# 获取豆瓣网址并解析数据def get_douban_books(url, num): headers = { 'user-agent': 'Mozilla/5.0...原创 2020-03-20 16:21:46 · 618 阅读 · 0 评论 -
爬取百度贴吧所有数据
# !/user/bin/python3# -*- coding:utf-8 -*-import requestsfrom lxml import etreeclass Tiebasprite(): def __init__(self, tieba_name): self.tieba_name = tieba_name self.post_url ...原创 2020-03-20 10:08:07 · 1323 阅读 · 0 评论 -
爬取百度图片
# !/user/bin/python3# -*- coding:utf-8 -*-import requestsimport refrom urllib import requestdef get_picture_list(keyword, biggest_pages): all_picture_list = [] for page in range(bigges...原创 2020-03-20 09:00:46 · 198 阅读 · 0 评论 -
请求头列表
USER_AGENT_LIST=[ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTM...原创 2020-03-20 08:56:28 · 436 阅读 · 0 评论 -
爬虫-获取百度图片壁纸保存至本地
# !/user/bin/python3# -*- coding:utf-8 -*-import requestsimport osimport re # 使用正则表达式def getManyPages(keyword, pages): params=[] # 分页请求地址拼接 每页显示60个图片 for i in range(0, 30*pages+30,...原创 2020-03-17 20:49:27 · 398 阅读 · 0 评论