Mysql 操作
"""
#连接
>mysql -u lauf - p
>show database;
>use db;
>show tables;
>select user();
>select cur_date();
>select database();
>create table stu(id int,name varchar(10))engine=innodb;
>insert into stu(id,name) values(1,"jack");
>select name from stu where id=20;
>select count(name) from stu;
>delete from stu where id=1;
>update stu set name="tom" where id=3;
>drop table stu;
>drop database lauf;
"""
MySQL
python+pymysql --> mysql
import pymysql
#1建立连接
conn = pymysql.connect(host="localhost",port=3306,user="lauf",password="lauf123",db="lauf",charset="utf8")
#django中
#参数全大写,值全部字符串 db--> "NAME"
#2 创建游标
cursor = conn.cursor()
#3 执行语句
#参数化的sql
sql = "insert into stu(id,name) values(%s,%s)"
#sqlite (?,?)
cursor.execute(sql,[1,"jack"])
#4 提交更改
conn.commit()/conn.rollback()
#5 关闭游标
cursor.close()
#6关闭连接
conn.close()
mysql数据存储的猫眼爬虫
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 25 19:40:55 2021
@author: lenovo
"""
import requests
import time
import random
import pymysql
url = "https://maoyan.com/board/4?offset={}"
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"}
regex = '<dd>.*?<p class="name"><a.*?>(.*?)</a>.*?<p class="star">(.*?)</p>.*?<p class="releasetime">(.*?)</p>.*?<i class="integer">(.*?)</i><i class="fraction">(.*?)</i>.*?</dd>'
#</i><i class="fraction">(.*?)</i></p>
import re
class MaoyanSpider(object):
def __init__(self):
self.url = url
self.headers = headers
self.regex = re.compile(regex,re.S)
#初始化连接
self.conn = pymysql.connect(host="localhost",port=3306,user="lauf",password="lauf123",db="lauf",charset="utf8")
self.cursor = self.conn.cursor()
def get_html(self,url):
self.res = requests.get(url,headers=headers)
def parse_html(self):
r_list = self.regex.findall(self.res.content.decode("utf-8"))
return r_list
def save_data(self):
pass
def run(self):
#遍历页码
for i in range(0,10):
#拼接url
url = self.url.format(i*10)
#获取页面
self.get_html(url)
# print(self.res.content.decode("utf-8"))
#解析页面
r_list = self.parse_html()
#当前页的10个电影
for idx,m in enumerate(r_list):
temp = []
temp.append(m[0].strip())
temp.append(m[1].strip())
temp.append(m[2].strip())
temp.append(float(m[3].strip()+m[4].strip()))
print("current movie:",temp)
#插入到db
self.cursor.execute("insert into movies values(%s,%s,%s,%s)",temp)
self.conn.commit()
#程序随机休眠,防止被识别
time.sleep(random.uniform(0,3))
#
#关闭连接
self.cursor.close()
self.conn.close()
print("爬取完成。。。")
if __name__=="__main__":
spider = MaoyanSpider()
spider.run()
数据存入csv
import csv
#newline="" 解决空行的问题
#encoding 解决编码的问题 win默认采用gbk linux默认utf-8
with open("xx.csv","w",newline="",encoding="gb18030") as f:
writer = csv.writer(f)
writer.writerow(["a","b","c"])
writer.writerows([["a","b","c"],[]])