swifter---你准备好了么?

博客提及了WWDC 2014,重点指出Swift到来,引发是否做好准备的思考,聚焦于信息技术领域中移动开发相关的Swift语言和苹果全球开发者大会。

WWDC 2014

swift来了,你准备好了么?

import pandas as pd, numpy as np, time from pathlib import Path from tqdm import tqdm def progress_analyzer(): """终极进度版 - 4阶段进度条+精确耗时""" t0 = time.time() root, out = Path(r"F:\stock_data"), Path(r"F:\自定义数据源") out.mkdir(exist_ok=True) periods = [5, 10, 15, 25, 30, 50, 90, 120, 250] # 📊 阶段1: 文件扫描 (带进度条) print("🔍 阶段1: 扫描文件...") files = list(tqdm(root.rglob("*.csv"), desc="📂 扫描CSV")) scan_time = time.time() - t0 # 📈 阶段2: 数据读取与清洗 print(f"\n📊 阶段2: 读取数据 ({len(files)}个文件)...") t1 = time.time() dfs = [] for f in tqdm(files, desc="📥 读取"): try: d = pd.read_csv(f, encoding='gbk', low_memory=False) cols = {c.lower(): c for c in d.columns} # 智能列映射 date = next((c for k, c in cols.items() if 'date' in k), d.columns[0]) close = next((c for k, c in cols.items() if 'close' in k), d.columns[1]) d = d[[date, close] + [c for c in [cols.get('turn'), cols.get('pe'), cols.get('pb')] if c]].copy() d.columns = ['date', 'close', 'turn', 'peTTM', 'pbMRQ'][:len(d.columns)] # 精准清洗 d['date'] = pd.to_datetime(d['date'], errors='coerce') d['close'] = pd.to_numeric(d['close'], errors='coerce') d = d[d['date'] >= '2000-01-04'].dropna(subset=['date', 'close']) if len(d) > 5: d['stock'] = f.stem dfs.append(d) except: continue # 生成测试数据 if not dfs: print("🔄 生成测试数据...") stocks = [f"S{i:04d}" for i in range(50)] dates = pd.date_range('2000-01-04', '2024-12-31', freq='B') df = pd.DataFrame({ 'date': np.repeat(dates, len(stocks)), 'stock': stocks * len(dates), 'close': 100 + np.random.randn(len(dates) * len(stocks)).cumsum() * 3, 'turn': np.random.uniform(0.1, 15, len(dates) * len(stocks)), 'peTTM': np.random.uniform(1, 100, len(dates) * len(stocks)), 'pbMRQ': np.random.uniform(0.1, 10, len(dates) * len(stocks)) }) else: df = pd.concat(dfs) read_time = time.time() - t1 # 📈 阶段3: 涨幅计算 print(f"\n📈 阶段3: 计算涨幅 ({len(df)}条记录)...") t2 = time.time() df = df.sort_values(['stock', 'date']) # 批量计算所有周期涨幅 for p in tqdm(periods, desc="⚡ 计算周期"): df[f'r{p}'] = df.groupby('stock')['close'].pct_change(p) * 100 calc_time = time.time() - t2 # 📊 阶段4: 统计汇总 print(f"\n📊 阶段4: 统计汇总...") t3 = time.time() # 高效分组统计 stats = df.groupby('date').apply(lambda g: pd.Series({ **{f'r{p}_{q}': g[f'r{p}'].dropna().quantile(q) for p in periods[1:] for q in [0.25, 0.5, 0.75]}, **{f'{m}_{q}': g[m].dropna().quantile(q) for m in ['turn', 'peTTM', 'pbMRQ'] if m in g for q in [0.25, 0.5, 0.75]}, **{f'{p}日占比': (g[f'r{p}'] > 0).mean() * 100 for p in periods} })).reset_index() stats['日期'] = stats['date'].dt.strftime('%Y/%m/%d') stats = stats.drop('date', axis=1) # 按指定顺序排列列 cols = ['日期'] for p in periods[1:]: cols += [f'r{p}_{q}' for q in [0.25, 0.5, 0.75]] for m in ['turn', 'peTTM', 'pbMRQ']: if f'{m}_0.25' in stats.columns: cols += [f'{m}_{q}' for q in [0.25, 0.5, 0.75]] for p in periods: if f'{p}日占比' in stats.columns: cols.append(f'{p}日占比') stats = stats[cols] # 保存结果 filename = out / f"进度统计_{int(time.time())}.xlsx" stats.to_excel(filename, index=False) total_time = time.time() - t0 # 📊 完整耗时报告 print("\n" + "=" * 50) print("📊 完整耗时报告") print("=" * 50) print(f"📂 文件扫描: {scan_time:.2f}s") print(f"📥 数据读取: {read_time:.2f}s") print(f"⚡ 涨幅计算: {calc_time:.2f}s") print(f"📊 统计汇总: {time.time() - t3:.2f}s") print(f"⏱️ 总耗时: {total_time:.1f}s") print(f"📈 交易日: {len(stats)}天") print(f"📁 文件: {filename}") print("=" * 50) if __name__ == "__main__": progress_analyzer()详细解释每行代码,并举例说明
08-19
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值