Python爬虫与django框架开发小说网站
第三方包:mysqldb,django1.10.8
Python版本2.7
先写python爬虫爬取全书网所有小说的书名,章节,内容等到mysql数据库中,然后再将python爬虫爬取的小说内容在django框架中显示。
建库novel,再建表
Novelcopy(models.Model):
novelid = models.AutoField(primary_key=True)
sort = models.CharField(max_length=100, blank=True, null=True)
novelname = models.CharField(max_length=100, blank=True, null=True)
novelintroduction = models.TextField(blank=True, null=True)
toppicture = models.CharField(max_length=50, blank=True, null=True)
author = models.CharField(max_length=50, blank=True, null=True)
novelimge = models.CharField(max_length=255, blank=True, null=True)
Chaptercopy(models.Model):
chapterid = models.AutoField(primary_key=True)
novelid = models.ForeignKey('Novelcopy', models.DO_NOTHING, db_column='novelid', blank=True, null=True) #外键与'Novelcopy'中的novelid关联
toppicture = models.CharField(max_length=100, blank=True, null=True)
content = models.TextField(blank=True, null=True)
chaptername = models.CharField(max_length=100, blank=True, null=True)
novelname = models.CharField(max_length=200, blank=True, null=True)
1. 先写python爬虫爬取全书网所有小说的书名,章节,内容等到mysql数据库中。
import urllib2
import re
import MySQLdb
class Sql(object):
''' 连接数据库'''
conn = MySQLdb.connect(
#如果发现运行错误与编码有关,可将注释全部删除再运行
# hots="127.0.0.1",
port=3306, #端口名
user='root', #用户名
passwd='123456', #密码
db='novel', #数据库名
charset='utf8') #选择数据库编码格式
#将小说的类别,介绍,名字,作者,连载信息,小说封面照片存入AddNovelcopy表中
def AddNovelcopy(self,sort,novelintroduction,novelname,author,toppicture,novelimge):
cur = self.conn.cursor()
cur.execute("insert into novelcopy(sort,novelintroduction,novelname,author,toppicture,novelimge) values('%s' ,'%s','%s','%s','%s','%s')"%(sort,novelintroduction,novelname,author,toppicture,novelimge))
lastrowid = cur.lastrowid
cur.close()
self.conn.commit( )
return lastrowid
#将小说id,连载信息,内容,章节名字,小说名字存入AddChaptername表中
def AddChaptername(self,novelid,toppicture,content,chaptername,novelname):
cur = self.conn.cursor()