前面写了初试Scrapy,数据存入CSV,数据存入JSON文章。
这次整理一下数据存入MySQL数据库,整理了两个写法。
一、
1.修改pipelines.py文件,
新创建一个MysqlPipeline类,用于存储数据到数据库。
import pymysql
MY_HOST = 'xx'
MY_USER = 'xx'
MY_PASSWD = 'xx'
MY_DB = 'xx'
MY_PORT = 00
class MysqlPipeline(object):
def __init__(self):
self.conn = pymysql.connect(host=MY_HOST,user=MY_USER,password=MY_PASSWD,db=MY_DB,port=MY_PORT,use_unicode=True,charset='utf8')
self.cursor = self.conn.cursor()
def process_item(self,item, spider):
insert_sql = """
insert into doubanxs(name,author) values (%s,%s)
"""
self.cursor.execute(insert_sql, (item['name'], item['author']))
self.conn.commit()
return item
2.修改settings.py文件,激活pipeline
ITEM_PIPELINES = {
#'doubanxs.pipelines.DoubanxsPipeline': 300,
'doubanxs.pipelines.MysqlPipeline': 300,
}
二、
1.修改pipelines.py文件
同样也是新创建一个MysqlPipeline类,用于存储数据到数据库。
import pymysql
class MysqlPipeline(object):
def __init__(self,host,user,passwd,db,port):
self.host = host
self.user = user
self.passwd = passwd
self.db = db
self.port = port
@classmethod
def from_crawler(cls,crawler):
'''
读取全局信息settings.py文件的数据
'''
return cls(
host = crawler.settings.get('MY_HOST'),
user = crawler.settings.get('MY_USER'),
passwd = crawler.settings.get('MY_PASSWD'),
db = crawler.settings.get('MY_DB'),
port = crawler.settings.get('MY_PORT')
)
def open_spider(self,spider):
'''
在爬虫开启的时候自动调用,做一些初始化操作,如开始数据库链接
'''
print('连接mysql')
self.conn = pymysql.connect(host=self.host,user=self.user,password=self.passwd,db=self.db,port=self.port,use_unicode=True,charset='utf8')
self.cursor = self.conn.cursor()
def process_item(self,item,spider):
insert_sql = """
insert into doubanxs(name,author) values (%s,%s)
"""
self.cursor.execute(insert_sql, (item['name'], item['author']))
self.conn.commit()
return item
def close_spider(self,spider):
'''
关闭数据库链接
'''
print('关闭mysql')
self.cursor.close()
self.conn.close()
2.修改 settings.py文件,添加MySQL设置及激活pipeline
MY_HOST = 'xx'
MY_USER = 'xx'
MY_PASSWD = 'xx'
MY_DB = 'xx'
MY_PORT = 00
ITEM_PIPELINES = {
#'doubanxs.pipelines.DoubanxsPipeline': 300,
'doubanxs.pipelines.MysqlPipeline': 300,
}
以上是把数据存入MySQL的两个方法 。
三、 简单的建表语句
create table if not exists doubanxs (
id int(10) not null auto_increment primary key comment 'id',
name varchar(255),
author varchar(200))engine=INNODB default charset=utf8;