import requests
import json
import re
import time
import pymysql
# 连接数据库
class mysql_conn():
def __init__(self):
self.db = pymysql.connect('127.0.0.1','root','****','wang')
self.cursor = self.db.cursor()
def execute_modify_mysql(self,sql):
self.cursor.execute(sql)
self.db.commit()
def __del__(self):
self.db.close()
sq = mysql_conn()
for k in range(1,178):
#主页
url = 'http://www.ygdy8.net/html/gndy/dyzz/list_23_{}.html'.format(k)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get(url,headers=headers)
html = response.text
# print(html)
res = re.findall('<a href="(.*)" class="ulink">',html)
# print(res)
for i in res:
#详情页
url2 = 'http://www.ygdy8.net'+ i
# print(url)
response2 = requests.get(url2,headers=headers)
html2 = response2.content.decode('gbk')
# print(html2)
#影片名
# res2 = re.search('<title>(.*)</title>',html2)
# print(res2.group(1))
# 影片链接匹配
res3 = re.search('<td style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href="(.*)">',html2)
# print(res3.group(1))
#影片链接
move_url = res3.group(1)
#影片名匹配
move= re.search('\d/(.*).m',res3.group(1))
# print(move.group(1))
# 影片名
move_name = move.group(1)
sql = 'insert into dianying(name,url) values("{}","{}")'.format(move_name,move_url)
# print(sql)
sq.execute_modify_mysql(sql)
time.sleep(2)
print('第{}页存储完毕'.format(k))
把电影天堂的最新电影名称和观看链接存储到数据库(所有页)
最新推荐文章于 2022-11-13 14:45:01 发布