cp6_p127_Store Values To Database

最新推荐文章于 2025-11-11 22:01:26 发布

翻译最新推荐文章于 2025-11-11 22:01:26 发布 · 210 阅读

文章标签：

#cp6

Web Scraping with Python 专栏收录该内容

23 篇文章

订阅专栏

#!/usr/bin/python
#encoding:utf-8

from urllib.request import urlopen
from bs4 import BeautifulSoup
import datetime
import random
import pymysql
import re

conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='54951', db='mysql', charset='utf8')
cur = conn.cursor()
cur.execute('USE scraping')

random.seed(datetime.datetime.now())

def store(title, content):
    cur.execute('Insert Into pages(title, content) Values("%s", "%s")', (title, content))
    cur.connection.commit()

def getLinks(articleUrl):
    html = urlopen('https://en.wikipedia.org' + articleUrl)
    bs = BeautifulSoup(html, 'html.parser')

    title = bs.find('h1').get_text()
    content = bs.find('div', {'id':'mw-content-text'}).find('p').get_text()
    store(title, content)
    #print("###",title,"###",content)

    return bs.find('div', {'id':'bodyContent'}).findAll( 'a', href=re.compile('^(/wiki/)((?!:).)*$') )


links = getLinks('/wiki/Kevin_Bacon') #linkList
try:
    while len(links) > 0:
        newArticle = links[random.randint(0, len(links) -1)].attrs['href']
        print(newArticle)
        links = getLinks(newArticle)
finally:
    cur.close()
    conn.close()