1.爬取到的数据上传到mysql数据库
>>>mysql语法可以参照下面
sql = "select count(*) from categoryMovieTable where title='{}'".format(item.get('title'))
sql1 = "insert into categoryMovieTable(category, url, title, source) value('{}', '{}', '{}', '{}')".format(item.get('category'), item.get('url'), item.get('title'), item.get('source'))
def put_to_mysql(sql, sql1):
"""
爬取到的数据上传到数据库
:param sql: 查重sql
:param sql1: 上传sql
:return:
"""
conn = connect(host='***', port=3306, database='MovieSpider', user='mysql', password='mysql', charset='utf8')
cs1 = conn.cursor()
try:
cs1.execute(sql)
count = cs1.fetchone()[0]
print(count)
if count:
print("-"*100)
print("mysql已经存在"*10)
else:
cs1.execute(sql1)
conn.commit()
print("-" * 100)
print("添加成功"*10)
except Exception as e:
print(e)
finally:
cs1.close()
conn.close()
2.爬取到的图片上传到阿里云,返回链接
def put_to_aliyun(data, path, key):
"""
图片上传到阿里云
:param data: data为解析之后的数据(data=requests.get(url,headers).content)
:param path: 阿里云上路径
:param key: 阿里云上路径
:return:
"""
access_key_id = 'your_key_id'
access_key_secret = 'your_access_key_secret '
bucket_name = 'your-bucket_name '
endpoint = 'your-endpoint '
bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
if not bucket.object_exists("aiqiyi/{}/{}.jpg".format(path, key)):
bucket.put_object("aiqiyi/{}/{}.jpg".format(path, key), data)
imagepath = bucket.sign_url('GET', "aiqiyi/{}/{}.jpg".format(path, key), 3600 * 24 * 10)
print("*"*100)
print("添加aliyun成功"*10)
return imagepath