from bs4 import BeautifulSoup
from lxml import etree
import requests
from splinter import Browser
from selenium import webdriver
import urllib
import time
import os
import re
import base64
from splinter import Browser
import sys
import urllib.request
import os
import xlwt
import sqlite3
#http://www.gongyibaodao.com/zyff/16490.html
#http://www.gongyibaodao.com/zyff/16545.html
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36 Edg/89.0.774.57"
}
#<div id="article">
#<h1 id="articleTitle">
#<div id="articleText"> all_p
def main():
datalist = getData()
#爬取网页
savepath = "公益新闻.xls"
saveData(datalist, savepath)
#保存数据
#爬取网页
def getData():
datalist = []
num = 0
for i in range(16491,16506
爬取新闻并存入excel中
最新推荐文章于 2024-03-13 14:49:10 发布