import baostock as bs
import pandas as pd
import os
import sys
from datetime import datetime
from tqdm import tqdm
import time
class CorrectFormatDownloader:
def __init__(self):
self.login_success = False
self.stock_info = None
self.csv_path = r"F:\TDX股票信息\全部A股基本信息.csv"
self.output_dir = "F:/baostock_hdate"
# 确保输出目录存在
os.makedirs(self.output_dir, exist_ok=True)
# 中文表头映射
self.chinese_columns = {
'code': '股票代码', 'date': '交易日期', 'open': '开盘价', 'high': '最高价',
'low': '最低价', 'close': '收盘价', 'preclose': '前收盘价', 'volume': '成交量',
'amount': '成交额', 'adjustflag': '复权状态', 'turn': '换手率', 'tradestatus': '交易状态',
'pctChg': '涨跌幅', 'peTTM': '市盈率TTM', 'pbMRQ': '市净率MRQ',
'psTTM': '市销率TTM', 'pcfNcfTTM': '市现率TTM', 'isST': '是否ST',
'industry_code': '行业代码', 'area': '所属地域',
'list_date': '上市日期', 'market': '市场类型',
'research1': '研究1', 'research2': '研究2', 'research3': '研究3',
'research1_code': '研究1行业代码', 'research2_code': '研究2行业代码', 'research3_code': '研究3行业代码',
'employee_count': '员工总数'
}
def login(self):
"""登录baostock"""
print("正在登录baostock...")
try:
lg = bs.login()
if lg.error_code == '0':
print("登录成功")
self.login_success = True
return True
else:
print(f"登录失败: {lg.error_msg}")
return False
except Exception as e:
print(f"登录异常: {e}")
return False
def load_stock_info_from_csv(self):
"""从CSV文件加载股票信息"""
print(f"正在从 {self.csv_path} 加载股票信息...")
if not os.path.exists(self.csv_path):
print(f"文件不存在: {self.csv_path}")
return False
try:
# 使用UTF-8编码读取文件
self.stock_info = pd.read_csv(self.csv_path, encoding='utf-8')
print(f"成功加载 {len(self.stock_info)} 条股票信息")
# 清理列名
self.stock_info.columns = [col.strip() for col in self.stock_info.columns]
return self.process_csv_data()
except Exception as e:
print(f"读取文件时出错: {e}")
return False
def process_csv_data(self):
"""处理CSV数据,生成baostock格式的股票代码"""
if self.stock_info is None or self.stock_info.empty:
return False
# 列名映射
column_mapping = {
'名称': 'name',
'股票代码': 'code',
'上市日期': 'list_date',
'研究1': 'research1',
'研究2': 'research2',
'研究3': 'research3',
'研究1行业代码': 'research1_code',
'研究2行业代码': 'research2_code',
'研究3行业代码': 'research3_code',
'员工总数': 'employee_count'
}
self.stock_info = self.stock_info.rename(columns=column_mapping)
# 处理股票代码为baostock格式
def convert_to_baostock_format(code):
"""将6位股票代码转换为baostock格式"""
code_str = str(code).split('.')[0] # 去除小数点
code_str = code_str.zfill(6) # 确保6位
# 根据股票代码前缀确定市场
if code_str.startswith(('6', '5')): # 上海
return f"sh.{code_str}"
elif code_str.startswith(('0', '3', '2')): # 深圳
return f"sz.{code_str}"
else: # 北京
return f"bj.{code_str}"
# 创建baostock格式的股票代码
self.stock_info['bs_code'] = self.stock_info['code'].apply(convert_to_baostock_format)
# 添加6位股票代码(用于文件名)
self.stock_info['short_code'] = self.stock_info['code'].astype(str).str.zfill(6)
# 添加市场类型
def get_market_type(code):
if code.startswith(('6', '5')):
return 'sh'
elif code.startswith(('0', '3', '2')):
return 'sz'
else:
return 'bj'
self.stock_info['market'] = self.stock_info['short_code'].apply(get_market_type)
# 处理日期格式
self.stock_info['list_date'] = pd.to_datetime(self.stock_info['list_date']).dt.strftime('%Y-%m-%d')
print(f"处理后数据:")
print(f"股票数量: {len(self.stock_info)}")
print("示例数据:")
print(self.stock_info[['short_code', 'bs_code', 'name', 'market', 'research1']].head(5))
return True
def download_single_stock(self, short_code, bs_code, stock_name, start_date="2010-01-01", end_date=None):
"""下载单只股票数据"""
if end_date is None:
end_date = datetime.now().strftime("%Y-%m-%d")
# 获取股票信息
stock_info = self.stock_info[self.stock_info['bs_code'] == bs_code]
if stock_info.empty:
print(f"未找到股票代码: {bs_code}")
return None
stock_data = stock_info.iloc[0]
# 构建文件名
safe_name = str(stock_name).replace('*', '星').replace('/', '').replace('\\', '')
market = stock_data['market']
filename = f"{market}_{short_code}_{safe_name}.csv"
filepath = os.path.join(self.output_dir, filename)
# 增量更新检查
last_date = start_date
if os.path.exists(filepath):
try:
existing_data = pd.read_csv(filepath)
if '交易日期' in existing_data.columns and not existing_data.empty:
last_date = existing_data['交易日期'].max()
last_date = (pd.to_datetime(last_date) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
except Exception as e:
print(f"读取现有文件失败: {e}")
try:
print(f"正在下载 {bs_code} ({stock_name})...")
# 下载前复权价格数据
price_fields = "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg"
rs_price = bs.query_history_k_data_plus(
code=bs_code, # 使用baostock格式
fields=price_fields,
start_date=last_date,
end_date=end_date,
frequency="d",
adjustflag="3"
)
if rs_price.error_code != '0':
print(f"价格数据下载失败: {rs_price.error_msg}")
return None
price_data = []
while rs_price.next():
price_data.append(rs_price.get_row_data())
if not price_data:
print(f"{bs_code} 无新数据")
return None
price_df = pd.DataFrame(price_data, columns=rs_price.fields)
# 下载财务指标数据
finance_fields = "date,code,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST"
rs_finance = bs.query_history_k_data_plus(
code=bs_code,
fields=finance_fields,
start_date=last_date,
end_date=end_date,
frequency="d",
adjustflag="0"
)
finance_df = None
if rs_finance.error_code == '0':
finance_data = []
while rs_finance.next():
finance_data.append(rs_finance.get_row_data())
if finance_data:
finance_df = pd.DataFrame(finance_data, columns=rs_finance.fields)
# 合并数据
if finance_df is not None:
merged_df = pd.merge(price_df, finance_df, on=['date', 'code'], how='left')
else:
merged_df = price_df
# 添加CSV文件中的股票信息
stock_row = stock_data
merged_df['市场类型'] = stock_row['market']
merged_df['股票名称'] = stock_row['name']
merged_df['上市日期'] = stock_row['list_date']
merged_df['研究1'] = stock_row['research1']
merged_df['研究1行业代码'] = stock_row['research1_code']
merged_df['研究2'] = stock_row['research2']
merged_df['研究2行业代码'] = stock_row['research2_code']
merged_df['研究3'] = stock_row['research3']
merged_df['研究3行业代码'] = stock_row['research3_code']
merged_df['员工总数'] = stock_row['employee_count']
# 重命名标准列
merged_df = merged_df.rename(columns={
'date': '交易日期',
'code': 'baostock代码',
'open': '开盘价',
'high': '最高价',
'low': '最低价',
'close': '收盘价',
'preclose': '前收盘价',
'volume': '成交量',
'amount': '成交额',
'adjustflag': '复权状态',
'turn': '换手率',
'tradestatus': '交易状态',
'pctChg': '涨跌幅',
'peTTM': '市盈率TTM',
'pbMRQ': '市净率MRQ',
'psTTM': '市销率TTM',
'pcfNcfTTM': '市现率TTM',
'isST': '是否ST'
})
# 处理增量更新
final_df = merged_df
if os.path.exists(filepath):
try:
existing_df = pd.read_csv(filepath)
final_df = pd.concat([existing_df, merged_df])
final_df = final_df.drop_duplicates(subset=['交易日期'], keep='last')
final_df = final_df.sort_values('交易日期')
except:
pass
# 保存数据
final_df.to_csv(filepath, index=False, encoding='utf-8-sig')
print(f"✓ {bs_code} ({stock_name}) 已更新,共 {len(final_df)} 条记录")
return final_df
except Exception as e:
print(f"✗ 下载 {bs_code} 时出错: {e}")
return None
def download_all_stocks(self, start_date="2010-01-01", end_date=None, limit=None):
"""下载所有股票数据"""
if end_date is None:
end_date = datetime.now().strftime("%Y-%m-%d")
if self.stock_info is None or self.stock_info.empty:
print("股票信息未加载")
return
# 选择要下载的股票
stocks_to_download = self.stock_info
if limit:
stocks_to_download = self.stock_info.head(limit)
print(f"\n准备下载 {len(stocks_to_download)} 只股票的数据...")
success_count = 0
failed_stocks = []
for _, stock in tqdm(stocks_to_download.iterrows(), total=len(stocks_to_download)):
short_code = stock['short_code']
bs_code = stock['bs_code']
name = stock['name']
result = self.download_single_stock(short_code, bs_code, name, start_date, end_date)
if result is not None:
success_count += 1
else:
failed_stocks.append(f"{bs_code}_{name}")
time.sleep(0.1) # 避免请求过快
# 生成报告
self.generate_report(success_count, len(stocks_to_download), failed_stocks)
def generate_report(self, success, total, failed):
"""生成下载报告"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report = f"""股票数据下载完成报告
=====================
下载时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
总股票数: {total}
成功下载: {success}
失败数量: {len(failed)}
成功率: {success / total * 100:.2f}%
失败股票列表:
{chr(10).join(failed) if failed else "无"}
数据保存目录: {self.output_dir}
"""
report_file = os.path.join(self.output_dir, f"下载报告_{timestamp}.txt")
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report)
print("\n" + "=" * 60)
print(report)
print("=" * 60)
print(f"详细报告已保存到: {report_file}")
def main():
"""主函数"""
downloader = CorrectFormatDownloader()
# 登录
if not downloader.login():
print("登录失败,程序退出")
return
# 加载股票信息
if not downloader.load_stock_info_from_csv():
print("无法加载股票信息,程序退出")
return
# 下载所有股票数据
print("\n开始下载所有股票数据...")
downloader.download_all_stocks(start_date="2010-01-01")
if __name__ == "__main__":
main()