目的:抓取网页上的人物和相关简介信息
代码分享:
import urllib.request
import ssl
ssl._create_default_https_context=ssl._create_unverified_context
def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
html = html.decode(‘utf-8’)
return html
if name == “main”:
# from requests_html import HTMLSession
# import requests
import pandas as pd
import bs4
import re
# testurl1 = “http://dtcc.it168.com/2010/”
url_typ1_list = [“http://dtcc.it168.com/2010/",“http://dtcc.it168.com/2011/”,“http://dtcc.it168.com/2012/”,“http://dtcc.it168.com/2013/”,“http://dt