#! /usr/bin/python2
# coding=utf-8
import os
import sys
sys.path.append('..')
import code_pub.api_sqlite
#import urllib2
import time
#import pymongo
#import mechanize
import urllib.request as urllib2
import urllib
from datetime import datetime
from datetime import timedelta
global stocks
global g_db_stock
global k_ri
global list_code
global conn_list, conn_k
list_code = []
server_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
create_teble_sql='''
create table if not exists stock(
[id] integer primary key autoincrement,
[date] datetime,
[open] float,
[high] float,
[low] float,
[close] float,
[volume] float,
[adj_close] float
)
'''
def init_db_sqlite():
global stocks
stocks = code_pub.api_sqlite.c_create_db(db_name="stock_code")
stocks.conn_db()
#stocks.create_table(create_teble_sql)
sql = 'select * from stock'
print(sql)
info = stocks.search_info(search_sql=sql)
cnt = 0
for i in info:
#print(i, type(i[0]), i[0])
code = i[1]
name = i[2]
date = i[3]
list_code.append({"code": code, "name": name, "date_time": date})
cnt += 1
list_code.append({"code": "0A0001", "name": "上证指数", "date_time": "1990-12-01"})
list_code.append({"code": "399001", "name": "深证指数", "date_time": "1990-12-01"})
list_code.append({"code": "399006", "name": "创业板指", "date_time": "1990-12-01"})
print("cnt", cnt,type("上证指数"))
def init_db_code(code, date_start):
global g_db_stock
g_db_stock = code_pub.api_sqlite.c_create_db(db_name=code)
g_db_stock.conn_db()
g_db_stock.create_table(create_teble_sql)
sql = 'select * from stock order by id desc limit 1'
print(sql)
str_day = date_start
info = g_db_stock.search_info(search_sql=sql)
cnt = 0
for i in info:
print(i, type(i[0]), i[0])
str_day = i[1]
cnt += 1
print("day", cnt ,str_day)
return str_day
def day_plus(str):
# now = datetime.now()
day = datetime.strptime(str, "%Y-%m-%d")
#day_diff = timedelta(days=1)
#day = day + day_diff
return day
def day_str_change(str):
day = datetime.strptime(str, "%Y-%m-%d")
return day.strftime('%Y%m%d')
# yahoo
'''
def update_ri(code, date, open, high, low, close, volume, adj_close):
print code, date, open, high, low, close, volume, adj_close
k_ri.update({"code": code, "date": date}, {"$setOnInsert":{"open": open, "high": high,
"low": low, "close": close,
"volume": volume,
"adj_close": adj_close}}, upsert = True)
'''
# 163
def update_ri(code, date, close, high, low, open, pre_close, change_value, change_quote, turnover_rate, \
volume, turnover, total_market_value, market_capitalization, turnover_number):
print ("sql", code, date, close, high, low, open, pre_close, change_value, change_quote, turnover_rate, \
volume, turnover, total_market_value, market_capitalization, turnover_number)
sql = 'select * from stock where date = "%s" ' % (date)
print(sql)
info = g_db_stock.search_info(search_sql=sql)
cnt = len(list(info))
print("cnt", cnt, date)
if(0 == cnt):
sql = 'insert into stock (date, open, high, low, close, volume, adj_close) values ("%s", %f, %f, %f, %f, %f, %f)' % \
(date, open, high, low, close, change_quote, turnover_rate)
print(sql)
g_db_stock.update_info(update_sql=sql)
elif(1 < cnt):
print("---------------------error-----------------------",date)
'''
sql = 'delete from stock where date = "%s"' % (date)
print(sql)
g_db_stock.update_info(update_sql=sql)
sql = 'insert into stock (date, open, high, low, close, volume, adj_close) values ("%s", %f, %f, %f, %f, %f, %f)' % \
(date, open, high, low, close, change_quote, turnover_rate)
print(sql)
g_db_stock.update_info(update_sql=sql)
'''
'''
k_ri.update({"code": code, "date": date},
{"$setOnInsert": {"open": open, "high": high,
"low": low, "close": close,
"pre_close": pre_close,
"change_quote": change_quote,
"change_value": change_value,
"turnover_rate": turnover_rate,
"volume": volume,
"turnover": turnover,
"total_market_value": total_market_value,
"market_capitalization": market_capitalization,
"turnover_number": turnover_number}}, upsert=True)
'''
# '''
def stock_header(code):
url = 'http://quotes.money.163.com/service/chddata.html?code='
t1 = ('60', '900')
t2 = ('000', '002', '300', '200')
t3 = ('399001', '399006')
if code.startswith(t1):
str = '0' + code
elif code.startswith('000001'):
str = '0' + code
elif code.startswith(t3):
str = '1' + code
elif code.startswith(t2):
str = '1' + code
else:
str = code
print (code)
url = url + str
return url
# '''
'''
def stock_header(code):
url = 'http://table.finance.yahoo.com/table.csv?s='
t1 = ('60', '900')
t2 = ('000', '002', '300', '200')
t3 = ('399001','399006')
if code.startswith(t1):
str = code + '.ss'
elif code.startswith('000001'):
str = code + '.ss'
elif code.startswith(t3):
str = code + '.sz'
elif code.startswith(t2):
str = code + '.sz'
else:
str = code
print code
url = url + str
return url
'''
def deal_url(str_day, url):
if ("" == str_day):
print (url)
return url;
day = day_plus(str_day)
now = datetime.now()
if (day.strftime('%Y%m%d') >= now.strftime('%Y%m%d')):
print (day.strftime('%Y%m%d'), now.strftime('%Y%m%d'))
return ""
# 163 data 日期 股票代码 名称 收盘价 最高价 最低价 开盘价 前收盘 涨跌额 涨跌幅 换手率 成交量 成交金额 总市值 流通市值 成交笔数
str_url = url + '&start=' + day.strftime('%Y%m%d') + '&end=' + now.strftime('%Y%m%d')
# yahoo data Date Open High Low Close volume Adj Close
'''
mon1 = int(now.strftime('%m')) - 1
mon2 = int(day.strftime('%m')) - 1
str_url = url + '&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' % (
str(mon1), now.strftime('%d'), now.strftime('%Y'), str(mon2), day.strftime('%d'), day.strftime('%Y'))
'''
# print str_url
return str_url
def file_to_sql(code, content):
#print code, content
rows = content.split('\n')
rows.sort()
i = 0
cnt = len(rows) - 1
#print (cnt,rows)
for row in rows:
# 忽略第一行和最后一行 网易 列名和多个空字符
if (0 == i):
i += 1
continue
#print(row)
if (i == cnt):
#i += 1
print ("end file", i)
break
split_row = row.split(",")
#print(split_row )
#split_row[1] = int(split_row[1])
#full_data.append(split_row)
#print("-----------------type--",type(split_row), split_row )
#print(split_row[1],split_row[0])
try:
# yahoo
# if ("" == split_row[0]):
# continue
# update_ri(code, split_row[0], float(split_row[1]), float(split_row[2]), float(split_row[3]), float(split_row[4]), int(split_row[5]), float(split_row[6]))
# 163
full_data = []
for row_s in split_row:
#print (row_s, len(row_s))
str = row_s.replace("\r", '').replace('None', '0')
# print str
if ('' == str):
# print "kong"
str = '0'
# print row_s
# print "--------------------"
full_data.append(str)
# print row_s,"---"
# print code, len(full_data)
# print full_data
# print split_row,i
#print(len(full_data))
if (16 != len(full_data)):
break
# print "full",full_data
update_ri(code, full_data[0], float(full_data[3]), float(full_data[4]), float(full_data[5]),
float(full_data[6]), \
float(full_data[7]), float(full_data[8]), float(full_data[9]), float(full_data[10]),
float(full_data[11]), \
float(full_data[12]), float(full_data[13]), float(full_data[14]), float(full_data[15]))
except ValueError:
print ('\033[1;31;40m')
print (split_row)
print ("--------------------ValueError----------------------------------------------")
print ('\033[0m')
continue
i += 1
def get_day(code, url):
print (code, url)
if ("" == url):
print ("---newst---date---------------------------------")
return
# url = 'http://quotes.money.163.com/service/chddata.html?code=1000002'
# url = 'http://quotes.money.163.com/service/chddata.html?code=0601398&start=20000720&end=20150508'
# url = 'http://table.finance.yahoo.com/table.csv?s=000002.sz'
# url = 'http://table.finance.yahoo.com/table.csv?s=000002.sz&d=6&e=22&f=2006&g=d&a=11&b=16&c=1991&ignore=.csv'
# url = 'http://hq.sinajs.cn/?list=sh600127'
# http://market.finance.sina.com.cn/downxls.php?date=2016-10-28&symbol=sz300127
# print url
req_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'gzip', 'Connection': 'close', 'Referer': None # 注意如果依然不能抓取的话,这里可以设置抓取网站的host
}
req_header = {('User-agent','Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')}
req_timeout = 500
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
#data = urllib.parse.urlencode(dict).encode('utf-8')
req = urllib2.Request(url = url,headers = headers)
#req = urllib2.Request(url)
# print req
# 如果不需要设置代理,下面的set_proxy就不用调用了。由于公司网络要代理才能连接外网,所以这里有set_proxy…
# req.set_proxy('proxy.XXX.com:911', 'http')
# socket = urllib2.urlopen(req,None,req_timeout)
try:
socket = urllib2.urlopen(req, None, req_timeout)
# print socket
# content = socket.read().strip()
content = socket.read().decode('GB18030')
#print(content)
socket.close()
except urllib2.HTTPError as e:
print('\033[1;31;40m')
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
print('Error reason: ', e.reason)
print('\033[0m')
except urllib2.URLError as e:
print('\033[1;31;40m')
print('We failed to reach a server.')
print('Reason: ', e.reason)
print('\033[0m')
else:
# everything is fine
file_to_sql(code, content)
class NoHistory(object):
def add(self, *a, **k): pass
def clear(self): pass
def browser(url):
print (url)
if ("" == url):
print ("---newst---date---------------------------------")
return ""
br = mechanize.Browser(history=NoHistory())
#options
br.set_handle_equiv(True)
#br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
#Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.set_debug_http(True)
br.set_debug_redirects(True)
br.set_debug_responses(True)
#欺骗行为
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
#上面的代码主要用于初始化设置,最好设置一下
try:
# 打开百度
r = br.open(url)
#获取百度的表单,从中找到输入汉字的位置
'''
for f in br.forms():
print f
br.select_form(nr = 0)
#搜索关键字“火车”
br.form['wd'] = "火车"
br.submit()
# 查看搜索结果
brr=br.response().read()
#是html代码,能看到火车的搜索结果
print brr
'''
content = r.read().strip()
#print content
'''
rows = content.split('\n')
rows.sort()
print rows
print "---------------"
print len(rows)
i = 0
cnt = len(rows) - 1
for row in rows:
print row
print cnt
'''
except:
print('\033[1;31;40m')
print("open err ------------------------------------")
print('\033[0m')
return ""
return content
def get_day_list(code, date_start):
str_day = init_db_code(code, date_start)
print(str_day)
url = stock_header(code)
print(url)
url = deal_url(str_day, url)
# print url
get_day(code, url)
# content = browser(url)
# print content
# if ("" != content):
# file_to_sql(code, content)
g_db_stock.close_db()
if __name__ == '__main__':
init_db_sqlite()
print (len(list_code))
#for i in list_code:
# print (i["code"],i["name"], i["date_time"])
for i in list_code:
get_day_list(i["code"], i["date_time"])
#get_day_list("600000", "1999-11-10")
stocks.close_db()```
python3.8 stock info day to db
最新推荐文章于 2024-08-25 11:08:49 发布