一、创建数据库和数据表
CREATE DATABASE `heixiu` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
use heixiu;
CREATE TABLE heixiumovie (id int primary key auto_increment ,title VARCHAR(100 ) NOT NULL,video_url VARCHAR(100 ) NOT NULL);
二、配置mysql账号信息
MYSQL_HOST = 'localhost'
MYSQL_DBNAME = 'douban'
MYSQL_USER = 'root'
MYSQL_PASSWD = '123456'
三、创建管道
class DBPipeline (object) :
def __init__ (self) :
settings = get_project_settings()
self.host = settings['MYSQL_HOST' ]
self.user = settings['MYSQL_USER' ]
self.pwd = settings['MYSQL_PASSWD' ]
self.name = settings['MYSQL_DBNAME' ]
self.charset = settings['MYSQL_CHARSET' ]
self.connect()
def connect (self) :
self.conn = pymysql.connect(host=self.host,
port=3306 ,
user=self.user,
password=self.pwd,
db=self.name,
charset=self.charset)
self.cursor = self.conn.cursor()
def close_spider (self, spider) :
self.conn.close()
self.cursor.close()
def process_item (self, item, spider) :
try :
self.cursor.execute(item['url' ])
repetition = self.cursor.fetchone()
if repetition:
pass
else :
self.cursor.execute(
(item['title' ],
item['url' ],))
except Exception as error:
print(error)
return item
四、注册管道
ITEM_PIPELINES = {
'heihei.pipelines.HeiheiPipeline' : 300 ,
+'heihei.pipelines.DBPipeline' :50 ,
}