E. Median String

本文介绍了一种求解两个字符串间中位字符串的算法。针对长度为n的字符串ss和tt,通过26进制加法和除法操作,找到所有位于ss和tt之间的字符串的中位数。具体实现包括字符串到数值的转换、加法处理及结果输出。

You are given two strings ss and tt, both consisting of exactly kk lowercase Latin letters, ss is lexicographically less than tt.

Let's consider list of all strings consisting of exactly kk lowercase Latin letters, lexicographically not less than ss and not greater than tt(including ss and tt) in lexicographical order. For example, for k=2k=2, s=s="az" and t=t="bf" the list will be ["az", "ba", "bb", "bc", "bd", "be", "bf"].

Your task is to print the median (the middle element) of this list. For the example above this will be "bc".

It is guaranteed that there is an odd number of strings lexicographically not less than ss and not greater than tt.

Input

The first line of the input contains one integer kk (1≤k≤2⋅1051≤k≤2⋅105) — the length of strings.

The second line of the input contains one string ss consisting of exactly kk lowercase Latin letters.

The third line of the input contains one string tt consisting of exactly kk lowercase Latin letters.

It is guaranteed that ss is lexicographically less than tt.

It is guaranteed that there is an odd number of strings lexicographically not less than ss and not greater than tt.

Output

Print one string consisting exactly of kk lowercase Latin letters — the median (the middle element) of list of strings of length kklexicographically not less than ss and not greater than tt.

Examples

input

Copy

2
az
bf

output

Copy

bc

input

Copy

5
afogk
asdji

output

Copy

alvuw

input

Copy

6
nijfvj
tvqhwp

output

Copy

qoztvz

题意:给你两个长度为n的字符串然后求位于它两中间的字符串

思路:把字母转换成26进制情况然后模拟加法,和除法(准确的说是除二)。

代码:

#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstring>
#define LL long long 

using namespace std;
const int maxn=2e5+100;

char s[maxn],t[maxn];
int a[maxn],b[maxn];
int ans[maxn];
int main()
{
	int n;
	scanf("%d",&n);
	scanf("%s%s",s+1,t+1);
	
	for(int i=1;i<=n;i++)
	{
		a[i]=s[i]-'a';
		b[i]=t[i]-'a';
	}
	
	for(int i=n;i>=1;i--)
	{
		//cout<<a[i]<<" "<<b[i]<<endl;
		a[i]+=b[i];
		if(a[i]>=26)
		{
			a[i-1]+=a[i]/26;
			a[i]%=26;
		}
		//cout<<a[i]<<" ";
	}
	//cout<<a[0];
	for(int i=0;i<=n;i++)
	{
		ans[i]=a[i]/2;
		//cout<<ans[i]<<" "<<a[i]<<endl;
		int mod=a[i]%2;
		a[i+1]+=mod*26;
	}
	for(int i=1;i<=n;i++)
	{
		printf("%c",ans[i]+'a');
	}
	printf("\n");
	return 0;
}

 

from maix import camera, display, image, nn, app, time, touchscreen, uart import numpy as np def is_in_button(x, y, btn_pos): return x > btn_pos[0] and x < btn_pos[0] + btn_pos[2] and y > btn_pos[1] and y < btn_pos[1] + btn_pos[3] def median_filter(image, kernel_size): height, width = image.shape padded_image = np.pad(image, ((kernel_size//2, kernel_size//2), (kernel_size//2, kernel_size//2)), mode='constant') filtered_image = np.zeros((height, width)) for i in range(height): for j in range(width): window = padded_image[i:i+kernel_size, j:j+kernel_size] median = np.median(window) filtered_image[i, j] = median return filtered_image def main(disp): model = "/root/models/pp_ocr.mud" ocr = nn.PP_OCR(model) device = "/dev/ttyS0" serial0 = uart.UART(device, 115200) #设备串口 cam = camera.Camera(ocr.input_width(), ocr.input_height(), ocr.input_format()) ts = touchscreen.TouchScreen() img_back = image.load("/maixapp/share/icon/ret.png") back_rect = [0, 0, 32, 32] back_rect_disp = image.resize_map_pos(cam.width(), cam.height(), disp.width(), disp.height(), image.Fit.FIT_CONTAIN, back_rect[0], back_rect[1], back_rect[2], back_rect[3]) image.load_font("ppocr", "/maixapp/share/font/ppocr_keys_v1.ttf", size = 20) image.set_default_font("ppocr") while not app.need_exit(): img = cam.read() objs = ocr.detect(img) for obj in objs: points = obj.box.to_list() img.draw_keypoints(points, image.COLOR_RED, 4, -1, 1) img.draw_string(obj.box.x4, obj.box.y4, obj.char_str(), image.COLOR_RED) if obj.char_str().isdigit() and len(obj.char_str())==1 and obj.char_str()!='7' : serial0.write_str(obj.char_str()) print(obj.char_str()) if obj.char_str()=='O': serial0.write_str('0') print('0') if obj.char_str()=='o': serial0.write_str('0') print('0') if obj.char_str()=='7': serial0.write_str('1') print('1') if obj.char_str()=='A': serial0.write_str('4') print('4') if obj.char_str().isdigit() and len(obj.char_str())>=2: serial0.write_str(obj.char_str()[0]) # print(obj.char_str()[0]) img.draw_image(0, 0, img_back) disp.show(img) x, y, pressed = ts.read() if is_in_button(x, y, back_rect_disp): app.set_exit_flag(True) if __name__ == '__main__': screen = display.Display() try: main(screen) except Exception: import traceback e = traceback.format_exc() print(e) img = image.Image(screen.width(), screen.height()) img.draw_string(2, 2, e, image.COLOR_WHITE, font="hershey_complex_small", scale=0.6) screen.show(img) while not app.need_exit(): time.sleep(0.2)为这段代码加一段中值滤波的代码
最新发布
10-10
#read png to excel # 定义要遍历的根路径 root_path = r"D:/workspace/image_collect/test" # 存储结果的列表 results = [] # 遍历根路径下的所有文件夹和文件 for root, dirs, files in os.walk(root_path): for file in files: if file.lower().endswith('.png') and "copy" not in file: # 构建图片的完整路径 image_path = os.path.join(root, file) try: # 打开图片 image = Image.open(image_path) # 获取图片的宽度和高度 width, height = image.size # 定义区域(可根据实际情况调整) left = 1300 top = 900 right = 1600 bottom = 1050 crop_box = (left, top, right, bottom) # 裁剪图片 cropped_img = image.crop(crop_box) # 获取原图片的文件名和扩展名 file_name, file_ext = os.path.splitext(image_path) if "copy" not in file_name: # 构造新的文件名 new_file_name = f"{file_name}-copy{file_ext}" else: new_file_name = f"{file_name}" # 图片预处理 # 灰度化 gray = cropped_img.convert('L') # 二值化 threshold = 127 binary = gray.point(lambda p: 255 if p > threshold else 0) # 去噪 img = np.array(binary) blur = cv2.medianBlur(img, 3) processed_img = Image.fromarray(blur) # 保存裁剪后的图片到原路径 processed_img.save(new_file_name) # 使用 pytesseract 提取文字 text = pytesseract.image_to_string(processed_img, lang='eng') # # 图片预处理 # # 灰度化 # gray = cropped_img.convert('L') # # 二值化 # threshold = 127 # binary = gray.point(lambda p: 255 if p > threshold else 0) # # 去噪 # img = np.array(binary) # blur = cv2.medianBlur(img, 3) # processed_img = Image.fromarray(blur) # # # # 保存裁剪后的图片到原路径 # # processed_img.save(new_file_name) # # 使用 pytesseract 提取文字 # text = pytesseract.image_to_string(processed_img, lang='eng') # 去除多余的空白字符 text = text.strip() # 将文件名和提取的文字以 - 相连 file_name, _ = os.path.splitext(file) result = f'{file_name}-{text}' results.append(result) print(results) except Exception as e: print(f"处理 {image_path} 时出错: {e}") file_path = r"D:/workspace/image_collect/excel_load/output.xlsx" # Excel文件路径 # 将结果保存到 Excel 文件 df = pd.DataFrame(results, columns=['结果']) df.to_excel(file_path, index=False) 输出的结果file_name如何加上每个文件夹的名字,以及输出的excel分为两列,text和file_name分别作为一列,在这个代码的基础上更改
09-11
cmd := fmt.Sprintf("%s/pt-query-digest --since %s --until %s %s", conf.ServerConfig.AnalyzeConf.PtToolPath, startTime.Format(time.DateOnly), endTime.Format(time.DateOnly), fmt.Sprintf("%s/%s/%s/%s.log", common.BaseSlowStatPath, svrId, currentDay, insName)) logger.Infof("定时执行推送任务:pt分析=>%s", cmd) out, err := tools.DoShellCmd(cmd) if err != nil { logger.Errorf("慢日志分析器:执行分析命令[%s]错误=>%s", cmd, err) return } logger.Infof(out) //查询统计日期 // statResult, err := dao.StatTotalQuotaByTimeRange(startTime.Format(time.DateOnly), endTime.Format(time.DateOnly), task.ConfigInsId) // if err != nil || statResult == nil { // logger.Warnf("推送任务新增-推送慢SQL:执行任务[%d]对应的当日统计查询出错=>%s; 跳过当前任务", task.ID, err) // dao.UpdateStatPushTaskOnlyStatus(task.ID, common.TaskStatus_Fail) // return // } title := fmt.Sprintf("实例[%s]单次推送任务[%s]统计任务", cftInstance.InstanceName, task.TaskName) var content strings.Builder content.WriteString(fmt.Sprintf("统计时间范围[%s - %s]<br/>", startTime.Format(time.DateTime), endTime.Format(time.DateTime))) // content.WriteString(fmt.Sprintf("慢SQL总数:%d,执行总时间:%f,执行平均时间:%f,最大执行时间:%f,最小执行时间%f<br/>", // statResult.TotalNum, statResult.ExeTotalTs, statResult.ExeAvgTs, statResult.ExeMaxTs, statResult.ExeMinTs)) 这是我的逻辑 下面是我的out的输出 # 620ms user time, 70ms system time, 41.38M rss, 106.85M vsz # Current date: Tue Aug 26 17:50:33 2025 # Hostname: ECS-hostname-20240410059 # Files: /home/finance/Data/sika-dbslow.msxf.lo/1/20250826/10.246.6.191_3306.log # Overall: 2 total, 2 unique, 0.00 QPS, 0.00x concurrency ________________ # Time range: 2025-06-23T15:50:27 to 2025-07-24T09:06:04 # Attribute total min max avg 95%!s(MISSING)tddev median # ============ ======= ======= ======= ======= ======= ======= ======= # Exec time 20s 10s 10s 10s 10s 3ms 10s # Lock time 11us 5us 6us 5us 6us 0 5us # Rows sent 0 0 0 0 0 0 0 # Rows examine 0 0 0 0 0 0 0 # Query size 259 47 212 129.50 212 116.67 129.50 # Profile # Rank Query ID Response time Calls R/Call V/M Item # ==== ================== ============= ===== ======= ===== ====== # 1 0x402CFC71A27ECE45 10.0262 50.0%! (MISSING)10.0262 0.00 CREATE # 2 0x98B653F46BD7096E 10.0223 50.0%! (MISSING)10.0223 0.00 ALTER # Query 1: 0 QPS, 0x concurrency, ID 0x402CFC71A27ECE45 at byte 69755009 _ # This item is included in the report because it matches --limit. # Scores: V/M = 0.00 # Time range: all events occurred at 2025-06-23T15:50:27 # Attribute pct total min max avg 95%!s(MISSING)tddev median # ============ === ======= ======= ======= ======= ======= ======= ======= # Count 50 1 # Exec time 50 10s 10s 10s 10s 10s 0 10s # Lock time 45 5us 5us 5us 5us 5us 0 5us # Rows sent 0 0 0 0 0 0 0 0 # Rows examine 0 0 0 0 0 0 0 0 # Query size 81 212 212 212 212 212 0 212 # String: # Databases mysql # Hosts 127.0.0.1 # Users agentmsfinance # Query_time distribution # 1us # 10us # 100us # 1ms # 10ms # 100ms # 1s # 10s+ ################################################################ CREATE USER 'root'@'127.0.0.6' IDENTIFIED WITH 'caching_sha2_password' REQUIRE NONE PASSWORD EXPIRE DEFAULT ACCOUNT UNLOCK PASSWORD HISTORY DEFAULT PASSWORD REUSE INTERVAL DEFAULT PASSWORD REQUIRE CURRENT DEFAULT\G # Query 2: 0 QPS, 0x concurrency, ID 0x98B653F46BD7096E at byte 69755009 _ # This item is included in the report because it matches --limit. # Scores: V/M = 0.00 # Time range: all events occurred at 2025-07-24T09:06:04 # Attribute pct total min max avg 95%!s(MISSING)tddev median # ============ === ======= ======= ======= ======= ======= ======= ======= # Count 50 1 # Exec time 49 10s 10s 10s 10s 10s 0 10s # Lock time 54 6us 6us 6us 6us 6us 0 6us # Rows sent 0 0 0 0 0 0 0 0 # Rows examine 0 0 0 0 0 0 0 0 # Query size 18 47 47 47 47 47 0 47 # String: # Databases mysql # Hosts 127.0.0.1 # Users agentmsfinance # Query_time distribution # 1us # 10us # 100us # 1ms # 10ms # 100ms # 1s # 10s+ ################################################################ ALTER USER 'guodong'@'%!'(MISSING) IDENTIFIED BY <secret>\G 我需要解析 # Profile # Rank Query ID Response time Calls R/Call V/M Item # ==== ================== ============= ===== ======= ===== ====== # 1 0x402CFC71A27ECE45 10.0262 50.0% 1 10.0262 0.00 CREATE # 2 0x98B653F46BD7096E 10.0223 50.0% 1 10.0223 0.00 ALTER 这一段 然后放到我的content里面
08-27
def preprocess_data(self): """数据预处理对话框 - 整合全部预处理功能""" if self.df is None: messagebox.showwarning("警告", "请先选择Excel文件") return preprocess_window = tk.Toplevel(self.root) preprocess_window.title("数据预处理") preprocess_window.geometry("600x700") # 创建主容器 main_frame = ttk.Frame(preprocess_window) main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 1. 缺失值处理部分 missing_frame = ttk.LabelFrame(main_frame, text="缺失值处理", padding=10) missing_frame.pack(fill=tk.X, pady=5) # 缺失值统计显示 missing_stats = self.df.isnull().sum() missing_text = scrolledtext.ScrolledText(missing_frame, height=4) missing_text.pack(fill=tk.X) for col, count in missing_stats.items(): if count > 0: missing_text.insert(tk.END, f"{col}: {count}个缺失值\n") missing_text.config(state=tk.DISABLED) # 缺失值处理方法选择 ttk.Label(missing_frame, text="处理方法:").pack(anchor=tk.W) missing_method_var = tk.StringVar(value="fill") missing_method_frame = ttk.Frame(missing_frame) missing_method_frame.pack(fill=tk.X, pady=5) ttk.Radiobutton(missing_method_frame, text="删除缺失行", variable=missing_method_var, value="drop").pack( side=tk.LEFT) ttk.Radiobutton(missing_method_frame, text="固定值填充", variable=missing_method_var, value="fill").pack( side=tk.LEFT) ttk.Radiobutton(missing_method_frame, text="插值法", variable=missing_method_var, value="interpolate").pack( side=tk.LEFT) # 填充选项 fill_options_frame = ttk.Frame(missing_frame) fill_options_frame.pack(fill=tk.X, pady=5) ttk.Label(fill_options_frame, text="填充值:").pack(side=tk.LEFT) fill_value_entry = ttk.Entry(fill_options_frame, width=10) fill_value_entry.pack(side=tk.LEFT, padx=5) fill_value_entry.insert(0, "0") ttk.Label(fill_options_frame, text="或选择:").pack(side=tk.LEFT, padx=5) fill_type_var = tk.StringVar(value="fixed") ttk.Radiobutton(fill_options_frame, text="前值填充", variable=fill_type_var, value="ffill").pack(side=tk.LEFT) ttk.Radiobutton(fill_options_frame, text="后值填充", variable=fill_type_var, value="bfill").pack(side=tk.LEFT) ttk.Radiobutton(fill_options_frame, text="均值填充", variable=fill_type_var, value="mean").pack(side=tk.LEFT) # 2. 异常值处理部分 outlier_frame = ttk.LabelFrame(main_frame, text="异常值处理", padding=10) outlier_frame.pack(fill=tk.X, pady=5) # 异常值检测方法 ttk.Label(outlier_frame, text="检测方法:").pack(anchor=tk.W) outlier_method_var = tk.StringVar(value="3sigma") outlier_method_frame = ttk.Frame(outlier_frame) outlier_method_frame.pack(fill=tk.X) ttk.Radiobutton(outlier_method_frame, text="3σ原则", variable=outlier_method_var, value="3sigma").pack( side=tk.LEFT) ttk.Radiobutton(outlier_method_frame, text="IQR方法", variable=outlier_method_var, value="iqr").pack( side=tk.LEFT) # 异常值处理方式 ttk.Label(outlier_frame, text="处理方式:").pack(anchor=tk.W) outlier_action_var = tk.StringVar(value="remove") outlier_action_frame = ttk.Frame(outlier_frame) outlier_action_frame.pack(fill=tk.X) ttk.Radiobutton(outlier_action_frame, text="删除", variable=outlier_action_var, value="remove").pack( side=tk.LEFT) ttk.Radiobutton(outlier_action_frame, text="用中位数替换", variable=outlier_action_var, value="median").pack( side=tk.LEFT) ttk.Radiobutton(outlier_action_frame, text="用前后均值替换", variable=outlier_action_var, value="neighbor").pack(side=tk.LEFT) # 3. 数据类型转换部分 type_frame = ttk.LabelFrame(main_frame, text="数据类型转换", padding=10) type_frame.pack(fill=tk.X, pady=5) # 时间列转换 ttk.Label(type_frame, text="时间列转换:").pack(anchor=tk.W) time_col_var = tk.StringVar() time_col_combo = ttk.Combobox(type_frame, textvariable=time_col_var, width=20) time_col_combo['values'] = tuple(self.df.columns) time_col_combo.pack(anchor=tk.W, pady=5) # 4. 特征工程部分 feature_frame = ttk.LabelFrame(main_frame, text="特征工程", padding=10) feature_frame.pack(fill=tk.X, pady=5) # 添加滞后特征 ttk.Label(feature_frame, text="滞后特征:").pack(anchor=tk.W) lag_frame = ttk.Frame(feature_frame) lag_frame.pack(fill=tk.X) ttk.Label(lag_frame, text="选择列:").pack(side=tk.LEFT) lag_col_var = tk.StringVar() lag_col_combo = ttk.Combobox(lag_frame, textvariable=lag_col_var, width=15) lag_col_combo['values'] = tuple(self.df.select_dtypes(include=['number']).columns) lag_col_combo.pack(side=tk.LEFT, padx=5) ttk.Label(lag_frame, text="滞后步数:").pack(side=tk.LEFT) lag_steps_entry = ttk.Entry(lag_frame, width=5) lag_steps_entry.pack(side=tk.LEFT) lag_steps_entry.insert(0, "1") # 执行预处理按钮 def apply_preprocessing(): try: original_shape = self.df.shape # 1. 处理缺失值 missing_method = missing_method_var.get() if missing_method == "drop": self.df = self.df.dropna() elif missing_method == "fill": fill_type = fill_type_var.get() if fill_type == "fixed": fill_value = fill_value_entry.get() self.df = self.df.fillna( float(fill_value) if self.df.select_dtypes(include=['number']).shape[1] > 0 else fill_value) elif fill_type == "ffill": self.df = self.df.ffill() elif fill_type == "bfill": self.df = self.df.bfill() elif fill_type == "mean": self.df = self.df.fillna(self.df.mean()) elif missing_method == "interpolate": self.df = self.df.interpolate() # 2. 处理异常值 outlier_method = outlier_method_var.get() outlier_action = outlier_action_var.get() numeric_cols = self.df.select_dtypes(include=['number']).columns for col in numeric_cols: if outlier_method == "3sigma": mean, std = self.df[col].mean(), self.df[col].std() lower, upper = mean - 3 * std, mean + 3 * std else: # iqr q1, q3 = self.df[col].quantile(0.25), self.df[col].quantile(0.75) iqr = q3 - q1 lower, upper = q1 - 1.5 * iqr, q3 + 1.5 * iqr if outlier_action == "remove": self.df = self.df[(self.df[col] >= lower) & (self.df[col] <= upper)] elif outlier_action == "median": self.df.loc[(self.df[col] < lower) | (self.df[col] > upper), col] = self.df[col].median() elif outlier_action == "neighbor": mask = (self.df[col] < lower) | (self.df[col] > upper) self.df.loc[mask, col] = self.df[col].rolling(2, min_periods=1).mean()[mask] # 3. 时间列转换 time_col = time_col_var.get() if time_col and time_col in self.df.columns: try: self.df[time_col] = pd.to_datetime(self.df[time_col]) self.df['year'] = self.df[time_col].dt.year self.df['month'] = self.df[time_col].dt.month self.df['day'] = self.df[time_col].dt.day except Exception as e: messagebox.showwarning("时间转换警告", f"时间列转换失败: {str(e)}") # 4. 添加滞后特征 lag_col = lag_col_var.get() if lag_col and lag_col in self.df.columns: try: lag_steps = int(lag_steps_entry.get()) self.df[f'{lag_col}_lag{lag_steps}'] = self.df[lag_col].shift(lag_steps) except Exception as e: messagebox.showwarning("滞后特征警告", f"创建滞后特征失败: {str(e)}") # 更新显示 self.show_preview() preprocess_window.destroy() new_shape = self.df.shape self.status_var.set(f"预处理完成 | 原形状: {original_shape} | 新形状: {new_shape}") except Exception as e: messagebox.showerror("预处理错误", f"预处理过程中发生错误:\n{str(e)}") ttk.Button(main_frame, text="执行全部预处理", command=apply_preprocessing).pack(pady=10) 能不能对这个代码进行一些微调使得我可以自己决定每一项功能是否执行而不是一下子决定所有的是否预处理
07-16
# 导入操作系统相关功能模块,用于文件和目录操作 import os # 导入pandas库,用于数据处理和分析 import pandas as pd # 导入numpy库,用于数值计算 import numpy as np # 导入datetime模块,用于处理日期和时间 from datetime import datetime # 导入warnings模块,用于控制警告信息的显示 import warnings # 过滤numpy库中特定的RuntimeWarning警告,避免输出过多干扰信息 warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy') # 设置pandas显示选项,使中文显示正常 # 将模糊的Unicode字符显示为全宽字符 pd.set_option('display.unicode.ambiguous_as_wide', True) # 将东亚宽度的Unicode字符显示为全宽字符 pd.set_option('display.unicode.east_asian_width', True) # 定义输入文件夹路径,即包含周期涨幅CSV文件的文件夹 input_folder = 'F:\\周期涨幅' # 定义输出中位数数据的文件夹路径 output_median_folder = 'F:\\股票横向统计\\股票中位数' # 定义输出涨幅占比数据的文件夹路径 output_ratio_folder = 'F:\\股票横向统计\\周期涨幅占比' # 定义中位数结果输出文件的完整路径 output_median_file = os.path.join(output_median_folder, 'ZF_GZ_MED.csv') # 定义涨幅占比结果输出文件的完整路径 output_ratio_file = os.path.join(output_ratio_folder, 'ZFZB_0.csv') # 确认日期列的名称,用户需要根据实际数据调整此变量 _date_column = 'date' # 定义需要计算的周期列表,包含11个不同的时间周期涨幅字段 # 根据需求更新为包含25日,移除20日 periods = ['ZF1', 'ZF3', 'ZF5', 'ZF10', 'ZF15', 'ZF25', 'ZF30', 'ZF50', 'ZF90', 'ZF120', 'ZF250'] # 定义其他需要计算中位数的财务指标列 other_columns = ['turn', 'peTTM', 'pBMRQ'] # 创建输出文件夹,如果文件夹已存在则不报错 os.makedirs(output_median_folder, exist_ok=True) os.makedirs(output_ratio_folder, exist_ok=True) # 定义主处理函数,负责读取、处理数据并导出结果 def process_stock_data(): try: # 获取输入文件夹中所有CSV文件的列表 csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')] # 检查是否找到CSV文件 if not csv_files: print(f'错误: 在{input_folder}中未找到CSV文件') return # 打印找到的CSV文件数量 print(f'找到{len(csv_files)}个CSV文件') # 初始化空列表,用于存储所有读取的数据 all_data = [] # 遍历每个CSV文件 for file in csv_files: # 构建文件的完整路径 file_path = os.path.join(input_folder, file) try: # 读取CSV文件数据到DataFrame df = pd.read_csv(file_path) # 打印文件名和其包含的列名,用于调试 print(f'读取文件: {file},列名: {df.columns.tolist()}') # 检查日期列是否存在于当前文件中 if _date_column not in df.columns: print(f'警告: 文件{file}中没有{_date_column}列,跳过该文件') continue # 尝试将日期列转换为datetime格式 try: df[_date_column] = pd.to_datetime(df[_date_column]) except Exception as e: print(f'警告: 文件{file}的日期格式转换失败: {e},跳过该文件') continue # 检查是否包含所有需要的周期列 missing_periods = [p for p in periods if p not in df.columns] if missing_periods: print(f'警告: 文件{file}中缺少周期列: {missing_periods}') # 检查是否包含其他所需的财务指标列 missing_other = [c for c in other_columns if c not in df.columns] if missing_other: print(f'警告: 文件{file}中缺少其他列: {missing_other}') # 将处理后的DataFrame添加到列表中 all_data.append(df) except Exception as e: # 捕获并打印读取文件时可能出现的错误 print(f'读取文件{file}时出错: {e}') # 检查是否成功读取到有效数据 if not all_data: print('错误: 没有找到有效的周期涨幅数据,请检查输入文件') return # 合并所有DataFrame数据,忽略原始索引 combined_df = pd.concat(all_data, ignore_index=True) # 打印合并后的数据量 print(f'合并后的数据量: {len(combined_df)}行') # 按日期列对数据进行分组 grouped = combined_df.groupby(_date_column) # 打印分组后的数据组数 print(f'按日期分组后的数据量: {len(grouped)}组') # 初始化存储中位数结果的列表 median_results_list = [] # 初始化存储涨幅占比结果的列表 ratio_results_list = [] # 遍历每个日期组 for date, group in grouped: # 初始化中位数结果行 median_row = {_date_column: date} # 计算每个周期的中位数 for period in periods: if period in group.columns: # 过滤掉NaN值,保留有效数据 valid_data = group[period].dropna() if len(valid_data) > 0: # 计算有效数据的中位数 median_row[period] = valid_data.median() else: # 如果没有有效数据,设为NaN median_row[period] = np.nan else: # 如果列不存在,设为NaN median_row[period] = np.nan # 计算其他财务指标的中位数 for col in other_columns: if col in group.columns: # 过滤掉NaN值,保留有效数据 valid_data = group[col].dropna() if len(valid_data) > 0: # 计算有效数据的中位数,并添加到结果行 median_row[f'{col}_median'] = valid_data.median() else: # 如果没有有效数据,设为NaN median_row[f'{col}_median'] = np.nan else: # 如果列不存在,设为NaN median_row[f'{col}_median'] = np.nan # 将当前日期的中位数结果添加到列表 median_results_list.append(median_row) # 初始化涨幅占比结果行 ratio_row = {_date_column: date} # 计算每个周期涨幅大于0的股票数量占比 for period in periods: if period in group.columns: # 过滤掉NaN值,保留有效数据 valid_data = group[period].dropna() total = len(valid_data) if total > 0: # 计算涨幅大于0的股票数量 positive = sum(valid_data > 0) # 计算占比 ratio_row[period] = positive / total else: # 如果没有有效数据,设为NaN ratio_row[period] = np.nan else: # 如果列不存在,设为NaN ratio_row[period] = np.nan # 将当前日期的涨幅占比结果添加到列表 ratio_results_list.append(ratio_row) # 将中位数结果列表转换为DataFrame median_results = pd.DataFrame(median_results_list) # 将涨幅占比结果列表转换为DataFrame ratio_results = pd.DataFrame(ratio_results_list) # 按日期对中位数结果进行排序 median_results = median_results.sort_values(by=_date_column) # 按日期对涨幅占比结果进行排序 ratio_results = ratio_results.sort_values(by=_date_column) # 导出中位数结果到CSV文件,不包含索引,使用UTF-8-SIG编码以支持中文 median_results.to_csv(output_median_file, index=False, encoding='utf-8-sig') # 导出涨幅占比结果到CSV文件,不包含索引,使用UTF-8-SIG编码以支持中文 ratio_results.to_csv(output_ratio_file, index=False, encoding='utf-8-sig') # 打印处理完成的提示信息 print(f'数据处理完成!结果已导出到:') print(output_median_file) print(output_ratio_file) except Exception as e: # 捕获并打印处理过程中可能出现的任何错误 print(f'处理数据时出错: {e}') # 导入traceback模块,打印详细的错误堆栈信息,便于调试 import traceback traceback.print_exc() # 如果该脚本作为主程序运行,则调用process_stock_data函数 if __name__ == '__main__': process_stock_data()详细注释每一行
08-27
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值