管道下载写法:
同步写入:
import pymysql
from .items import Item
class CheshiPipeline:
def __init__(self):
self.conn = pymysql.connect(host='localhost', user='root', passwd='123456', db='mysql_db',
charset='utf8') # 连接mysql
self.cursor = self.conn.cursor() # 建立游标
def process_item(self, item, spider):
if isinstance(item, Item):
if item["title"] == '轻松玩':
pass
else:
self.insert(item)
else:
self.insert(item)
return item
def insert(self, item):
data = dict(item)
keys = ",".join(data.keys())
values = ",".join(['%s'] * len(data))
sql = 'replace into %s(%s) values (%s)' % (item.table, keys, values)
# 执行sql语句
try:
self.cursor.execute(sql, tuple(data.values()))
self.conn.commit()
except Exception as e:
print('[{}] insert 操作失败>>>>'.format(item.table), e)
self.conn.rollback()
else:
print('[{}]insert 操作成功'.format(item.table))
def close(self):
self.cursor.close()
self.conn.close()
写入方式根据自己的需求来判断是 insert 还是 replace , item 中需要加数据库表名的字段。
异步写入:
from pymysql import cursors
from twisted.enterprise import adbapi
class ToMysqlTwistedPipeline(object):
# 初始化函数
def __init__(self, db_pool):
self.db_pool = db_pool
# 从settings配置文件中读取参数
@classmethod
def from_settings(cls, settings):
# 用一个db_params接收连接数据库的参数
db_params = dict(
host=settings['MYSQL_HOST'],
user=settings['MYSQL_USER'],
password=settings['MYSQL_PASSWORD'],
port=settings['MYSQL_PORT'],
database=settings['MYSQL_DBNAME'],
charset=settings['MYSQL_CHARSET'],
use_unicode=True,
# 设置游标类型
cursorclass=cursors.DictCursor,
)
# 创建连接池
db_pool = adbapi.ConnectionPool('pymysql', **db_params)
# 返回一个pipeline对象
return cls(db_pool)
# 处理item函数
def process_item(self, item, spider):
# 把要执行的sql放入连接池
query = self.db_pool.runInteraction(self.insert, item)
# 如果sql执行发送错误,自动回调addErrBack()函数
query.addErrback(self.handle_error, item, spider)
# 返回Item
return item
# 处理sql函数
def insert(self, cursor, item):
data = dict(item)
keys = ",".join(data.keys())
values = ",".join(['%s'] * len(data))
sql = 'replace into %s(%s) values (%s)' % (item.table, keys, values)
# 执行sql语句
try:
cursor.execute(sql, tuple(data.values()))
except Exception as e:
print('[{}] insert 操作失败>>>>'.format(item.table), e)
else:
print('[{}]insert 操作成功'.format(item.table))
# 错误函数
def handle_error(self, failure, item, spider):
# #输出错误信息
print(failure)
setting 里面的配置数据库信息:
# 数据库地址
MYSQL_HOST = 'localhost'
# 数据库用户名:
MYSQL_USER = 'root'
# 数据库密码
MYSQL_PASSWORD = '123456'
# 数据库端口
MYSQL_PORT = 3306
# 数据库名称
MYSQL_DBNAME = 'mysql_db'
# 数据库编码
MYSQL_CHARSET = 'utf8'
配置数据库 名称、 端口、ip 就可以了