1.数据读取:
#方法一:对网页直接进行读取
# content = urllib2.urlopen(urllib2.Request("http://pythonscraping.com/files/inaugurationSpeech.txt")).read()
#方法二:对本地文件的读取,测试时候用,因为无需联网
# content = open("data\\English.txt").read()
2.数据库连接
import MySQLdb
conn = MySQLdb.connect(host='localhost', user='root', passwd='root', db='twitter', port=3306, charset='utf8')
cur = conn.cursor()
增:
sql = "replace into overview (topic_id,perNum,txtNum,fdNum,time) values (%s,%s,%s,%s,%s)"
cur.execute(sql, (str(topic[0]),perNum,txtNum,fdNum,time))
conn.commit()
删:
cur.execute("delete from overview")
conn.commit()
改:
cur.execute("UPDATE overview SET burst_id= -1 where topic_id>100")
conn.commit()
查:
cur.execute('SELECT DISTINCT topic_id FROM messageinfo order by topic_id')
topicdata = cur.fetchall()
for topic in topicdata:
print topic[0]
cur.execute("select create_time from messageinfo where topic_id= '"+str(topic[0])+"' order by create_time limit 1")
time1=cur.fetchone()
断开数据库:
cur.close()
conn.close()
3.时间戳
import time
print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
字符串与时间互转 :
mindate= datetime.datetime.strptime("2016-01-01 23:59:59", "%Y-%m-%d %H:%M:%S")
time = datetime.datetime.strftime(selectdate,'%Y-%m-%d %H:%M:%S')
4.正则匹配
import re
print(re.findall('.*r(.*)b.*', 'www.runoob.com'))
判断有数字:re.match(r'[+-]?\d+$', s) s 为数字, 返回数字位置 ,
not re.match(r'[+-]?\d+$', s) 返回为True说明不含有数字
判断有英文字符: re.match(r'[a-z]+',s) 返回小写字母位置
re.match(r'[a-z]+',s,re.I) 对大小写敏感。返回字母位置
not re.match(r'[a-z]+',s,re.I) 返回为True说明没有英文字符
5. ipynb文件转.py文件
到要转的demo.ipynb所在目录下,然后输入命令:
jupyter nbconvert --to script demo.ipynb