Data Stream Median

本文介绍了一种使用最大堆和最小堆的数据结构方法来实时计算一组连续输入整数的中位数。通过维持两个堆之间的平衡,可以在每次添加新元素后高效地获取当前集合的中位数。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Numbers keep coming, return the median of numbers at every time a new number added.

Clarification

What's the definition of Median?
- Median is the number that in the middle of a sorted array. If there are n numbers in a sorted array A, the median is A[(n - 1) / 2]. For example, if A=[1,2,3], median is 2. If A=[1,19], median is 1.

Example

For numbers coming list: [1, 2, 3, 4, 5], return [1, 1, 2, 2, 3].

For numbers coming list: [4, 5, 1, 3, 2, 6, 0], return [4, 4, 4, 3, 3, 3, 3].

For numbers coming list: [2, 20, 100], return [2, 2, 20].

使用大小堆的方式进行处理

public class Solution {
    /*
     * @param nums: A list of integers
     * @return: the median of numbers
     */
    public int[] medianII(int[] nums) {
        // write your code here
        if (nums == null || nums.length == 0) {
            return new int[]{};
        }
        Comparator<Integer> cmp = new Comparator<Integer>() {
            public int compare(Integer a, Integer b) {
                return b - a;
            }
        };
        Queue<Integer> minHeap = new PriorityQueue<>();
        Queue<Integer> maxHeap = new PriorityQueue<>(1, cmp);
        int[] res = new int[nums.length];
        res[0] = nums[0];
        maxHeap.offer(nums[0]);
        for (int i = 1; i < nums.length; i++) {
            int val = maxHeap.peek();
            if (nums[i] < val) {
                maxHeap.offer(nums[i]);
            } else {
                minHeap.offer(nums[i]);
            }
            if (maxHeap.size() > minHeap.size() + 1) {
                minHeap.offer(maxHeap.poll());
            } else if (minHeap.size() > maxHeap.size()) {
                maxHeap.offer(minHeap.poll());
            }
            res[i] = maxHeap.peek();
        }
        return res;
    }
}


import os import re import pandas as pd import numpy as np import tensorflow as tf from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.model_selection import train_test_split from tensorflow.keras.layers import Input, Dense, Concatenate, Multiply, GlobalAveragePooling1D, Conv1D from tensorflow.keras.models import Model from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau import matplotlib.pyplot as plt import seaborn as sns # --------------------------- # 1. 数据加载与完整性验证 # --------------------------- def find_gait_data_files(data_dir='gait_data'): """自动发现所有步态数据文件""" file_map = {} patterns = [ r'koa_?(\w+)\.npy', # koa_t25.npy 或 koat25.npy r'KOA_?(\w+)\.npy', # KOA_T25.npy r'patient_?(\w+)\.npy', # patient_t25.npy r'(\w+)_gait\.npy' # t25_gait.npy ] for filename in os.listdir(data_dir): if filename.endswith('.npy'): for pattern in patterns: match = re.match(pattern, filename, re.IGNORECASE) if match: patient_id = match.group(1) file_map[patient_id.lower()] = os.path.join(data_dir, filename) break return file_map def load_and_validate_data(metadata_path='patient_metadata.csv', data_dir='gait_data'): """加载并验证数据完整性""" # 加载元数据 metadata = pd.read_csv(metadata_path, encoding='utf-8-sig') # 清理患者ID metadata['clean_id'] = metadata['patient_id'].apply( lambda x: re.sub(r'[^\w]', '', str(x)).str.lower() # 获取步态文件映射 gait_file_map = find_gait_data_files(data_dir) # 添加文件路径到元数据 metadata['gait_path'] = metadata['clean_id'].map(gait_file_map) # 标记缺失数据 metadata['data_complete'] = metadata['gait_path'].notnull() # 分离完整和不完整数据 complete_data = metadata[metadata['data_complete']].copy() incomplete_data = metadata[~metadata['data_complete']].copy() print(f"总患者数: {len(metadata)}") print(f"完整数据患者: {len(complete_data)}") print(f"缺失步态数据患者: {len(incomplete_data)}") if not incomplete_data.empty: print("\n缺失步态数据的患者:") print(incomplete_data[['patient_id', 'koa_grade']]) incomplete_data.to_csv('incomplete_records.csv', index=False) return complete_data # --------------------------- # 2. 特征工程 # --------------------------- def preprocess_static_features(metadata): """预处理静态元数据特征""" # 定义特征类型 numeric_features = ['age', 'height_cm', 'weight_kg', 'bmi'] categorical_features = ['gender', 'dominant_leg', 'affected_side'] # 确保所有列存在 for col in numeric_features + categorical_features: if col not in metadata.columns: metadata[col] = np.nan # 填充缺失值 metadata[numeric_features] = metadata[numeric_features].fillna(metadata[numeric_features].median()) metadata[categorical_features] = metadata[categorical_features].fillna('unknown') # 创建预处理管道 preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), numeric_features), ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) ], remainder='drop' # 只处理指定特征 ) # 应用预处理 static_features = preprocessor.fit_transform(metadata) feature_names = ( numeric_features + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)) return static_features, feature_names def extract_gait_features(gait_path): """从步态数据中提取特征""" gait_data = np.load(gait_path) # 1. 时域特征 mean_values = np.mean(gait_data, axis=0) std_values = np.std(gait_data, axis=0) max_values = np.max(gait_data, axis=0) min_values = np.min(gait_data, axis=0) # 2. 频域特征 (FFT) fft_values = np.abs(np.fft.rfft(gait_data, axis=0)) dominant_freq = np.argmax(fft_values, axis=0) spectral_energy = np.sum(fft_values**2, axis=0) # 3. 生物力学特征 # 左右腿不对称性 asymmetry_index = np.mean(np.abs(gait_data[:, :5] - gait_data[:, 5:10]), axis=0) # 关节协调性 (髋-膝相位差) hip_knee_phase_diff = np.arctan2(gait_data[:, 2], gait_data[:, 1]) - np.arctan2(gait_data[:, 7], gait_data[:, 6]) phase_diff_mean = np.mean(hip_knee_phase_diff) phase_diff_std = np.std(hip_knee_phase_diff) # 步态周期对称性 gait_cycle_symmetry = np.corrcoef(gait_data[:, :5].flatten(), gait_data[:, 5:10].flatten())[0, 1] # 组合所有特征 gait_features = np.concatenate([ mean_values, std_values, max_values, min_values, dominant_freq, spectral_energy, asymmetry_index, [phase_diff_mean, phase_diff_std, gait_cycle_symmetry] ]) # 特征名称 base_names = [ 'R.Hip_AP', 'R.Knee_Moment', 'R.KneeFlex', 'R.Gluteus', 'R.Vastus', 'L.Hip_AP', 'L.Knee_Moment', 'L.KneeFlex', 'L.Gluteus', 'L.Vastus' ] feature_names = [] for prefix in ['mean_', 'std_', 'max_', 'min_']: feature_names.extend([prefix + name for name in base_names]) feature_names.extend(['dom_freq_' + name for name in base_names]) feature_names.extend(['spec_energy_' + name for name in base_names]) feature_names.extend(['asym_' + name for name in base_names]) feature_names.extend(['phase_diff_mean', 'phase_diff_std', 'gait_symmetry']) return gait_features, feature_names def biomechanical_feature_enhancement(static_df, gait_features): """基于临床知识的特征增强""" enhanced_features = gait_features.copy() # BMI影响肌肉力量 bmi = static_df['bmi'].values[0] bmi_factor = 1 + (bmi - 25) * 0.01 # 每增加1 BMI单位增加1%肌肉力量 # 肌肉力量通道索引 (R.Gluteus, R.Vastus, L.Gluteus, L.Vastus) muscle_indices = [3, 4, 8, 9] for idx in muscle_indices: # 应用BMI调整 enhanced_features[idx] *= bmi_factor # mean enhanced_features[idx + 10] *= bmi_factor # std enhanced_features[idx + 20] *= bmi_factor # max enhanced_features[idx + 30] *= bmi_factor # min enhanced_features[idx + 50] *= bmi_factor # spectral energy # 年龄影响步态稳定性 age = static_df['age'].values[0] stability_factor = max(0.8, 1 - (age - 60) * 0.005) # 60岁以上每岁减少0.5%稳定性 # 稳定性相关特征 stability_indices = [ 1, 6, # 膝力矩 2, 7, # 膝屈曲 40, 45, 50, 55, # 不对称性和相位差特征 len(gait_features) - 3, len(gait_features) - 2, len(gait_features) - 1 # 相位和对称性 ] for idx in stability_indices: enhanced_features[idx] *= stability_factor return enhanced_features # --------------------------- # 3. 特征融合模型 # --------------------------- def create_fusion_model(static_dim, gait_dim, num_classes): """创建特征融合模型""" # 输入层 static_input = Input(shape=(static_dim,), name='static_input') gait_input = Input(shape=(gait_dim,), name='gait_input') # 静态特征处理 static_stream = Dense(64, activation='relu')(static_input) static_stream = Dense(32, activation='relu')(static_stream) # 步态特征处理 gait_stream = Dense(128, activation='relu')(gait_input) gait_stream = Dense(64, activation='relu')(gait_stream) # 注意力融合机制 attention_scores = Dense(64, activation='sigmoid')(gait_stream) attended_static = Multiply()([static_stream, attention_scores]) # 特征融合 fused = Concatenate()([attended_static, gait_stream]) # 分类层 x = Dense(128, activation='relu')(fused) x = Dense(64, activation='relu')(x) output = Dense(num_classes, activation='softmax', name='koa_grade')(x) return Model(inputs=[static_input, gait_input], outputs=output) def create_1d_cnn_fusion_model(static_dim, gait_timesteps, gait_channels, num_classes): """创建包含原始时间序列的融合模型""" # 输入层 static_input = Input(shape=(static_dim,), name='static_input') gait_input = Input(shape=(gait_timesteps, gait_channels), name='gait_input') # 静态特征处理 static_stream = Dense(64, activation='relu')(static_input) static_stream = Dense(32, activation='relu')(static_stream) # 时间序列特征提取 gait_stream = Conv1D(64, 7, activation='relu', padding='same')(gait_input) gait_stream = Conv1D(128, 5, activation='relu', padding='same')(gait_stream) gait_stream = GlobalAveragePooling1D()(gait_stream) # 特征融合 fused = Concatenate()([static_stream, gait_stream]) # 分类层 x = Dense(128, activation='relu')(fused) x = Dense(64, activation='relu')(x) output = Dense(num_classes, activation='softmax', name='koa_grade')(x) return Model(inputs=[static_input, gait_input], outputs=output) # --------------------------- # 4. 训练与评估 # --------------------------- def train_model(metadata, model_type='features'): """训练特征融合模型""" # 预处理静态特征 static_features, static_feature_names = preprocess_static_features(metadata) # 提取步态特征 gait_features_list = [] gait_feature_names = None gait_data_list = [] # 原始时间序列数据 for idx, row in metadata.iterrows(): gait_path = row['gait_path'] gait_features, gait_feature_names = extract_gait_features(gait_path) # 应用生物力学增强 enhanced_features = biomechanical_feature_enhancement(row.to_frame().T, gait_features) gait_features_list.append(enhanced_features) # 保存原始时间序列数据(用于CNN模型) gait_data = np.load(gait_path) gait_data_list.append(gait_data) gait_features = np.array(gait_features_list) # 目标变量 y = metadata['koa_grade'].values num_classes = len(np.unique(y)) # 划分训练测试集 X_static_train, X_static_test, X_gait_train, X_gait_test, y_train, y_test = train_test_split( static_features, gait_features, y, test_size=0.2, random_state=42, stratify=y ) # 创建模型 if model_type == 'features': model = create_fusion_model( static_dim=static_features.shape[1], gait_dim=gait_features.shape[1], num_classes=num_classes ) train_input = [X_static_train, X_gait_train] test_input = [X_static_test, X_gait_test] else: # 使用原始时间序列 # 填充时间序列到相同长度 max_timesteps = max(len(data) for data in gait_data_list) X_gait_padded = np.array([np.pad(data, ((0, max_timesteps - len(data)), (0, 0)), 'constant') for data in gait_data_list]) # 重新划分数据集 X_static_train, X_static_test, X_gait_train, X_gait_test, y_train, y_test = train_test_split( static_features, X_gait_padded, y, test_size=0.2, random_state=42, stratify=y ) model = create_1d_cnn_fusion_model( static_dim=static_features.shape[1], gait_timesteps=X_gait_padded.shape[1], gait_channels=X_gait_padded.shape[2], num_classes=num_classes ) train_input = [X_static_train, X_gait_train] test_input = [X_static_test, X_gait_test] # 编译模型 model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'] ) # 训练模型 history = model.fit( train_input, y_train, validation_split=0.1, epochs=100, batch_size=16, callbacks=[ EarlyStopping(patience=20, restore_best_weights=True), ReduceLROnPlateau(factor=0.5, patience=10) ] ) # 评估模型 test_loss, test_acc = model.evaluate(test_input, y_test) print(f"\n测试准确率: {test_acc:.4f}") # 特征重要性分析 if model_type == 'features': analyze_feature_importance(model, X_static_test, X_gait_test, y_test, static_feature_names, gait_feature_names) return model, history def analyze_feature_importance(model, X_static, X_gait, y_true, static_names, gait_names): """分析特征重要性""" # 获取模型预测 y_pred = model.predict([X_static, X_gait]) y_pred_classes = np.argmax(y_pred, axis=1) # 计算混淆矩阵 cm = confusion_matrix(y_true, y_pred_classes) plt.figure(figsize=(10, 8)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["G0", "G1", "G2", "G3", "G4"], yticklabels=["G0", "G1", "G2", "G3", "G4"]) plt.xlabel('预测分级') plt.ylabel('真实分级') plt.title('KL分级混淆矩阵') plt.savefig('confusion_matrix.png', dpi=300) plt.close() # SHAP特征重要性分析 try: import shap # 创建解释器 explainer = shap.KernelExplainer(model.predict, [X_static[:50], X_gait[:50]]) # 计算SHAP值 shap_values = explainer.shap_values([X_static[:50], X_gait[:50]]) # 组合特征名称 all_feature_names = static_names + gait_names # 可视化 plt.figure(figsize=(12, 8)) shap.summary_plot(shap_values, feature_names=all_feature_names, plot_type='bar') plt.tight_layout() plt.savefig('feature_importance.png', dpi=300) plt.close() except ImportError: print("SHAP库未安装,跳过特征重要性分析") # --------------------------- # 5. 主工作流程 # --------------------------- def main(): print("="*50) print("KOA分级诊疗平台 - 特征融合系统") print("="*50) # 步骤1: 加载并验证数据 print("\n[步骤1] 加载数据并验证完整性...") complete_data = load_and_validate_data() if len(complete_data) < 20: print("\n错误: 完整数据样本不足,无法训练模型") return # 步骤2: 训练特征融合模型 print("\n[步骤2] 训练特征融合模型...") print("选项1: 使用提取的特征 (更快)") print("选项2: 使用原始时间序列 (更准确)") choice = input("请选择模型类型 (1/2): ").strip() model_type = 'features' if choice == '1' else 'time_series' model, history = train_model(complete_data, model_type) # 保存模型 model.save(f'koa_fusion_model_{model_type}.h5') print(f"模型已保存为 koa_fusion_model_{model_type}.h5") # 绘制训练历史 plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.plot(history.history['accuracy'], label='训练准确率') plt.plot(history.history['val_accuracy'], label='验证准确率') plt.title('模型准确率') plt.ylabel('准确率') plt.xlabel('Epoch') plt.legend() plt.subplot(1, 2, 2) plt.plot(history.history['loss'], label='训练损失') plt.plot(history.history['val_loss'], label='验证损失') plt.title('模型损失') plt.ylabel('损失') plt.xlabel('Epoch') plt.legend() plt.tight_layout() plt.savefig('training_history.png', dpi=300) print("训练历史图已保存为 training_history.png") print("\n特征融合模型训练完成!") if __name__ == "__main__": main()我那个步态数据文件是Excel的
07-24
1. 用户与身体信息管理模块 用户信息管理: 注册登录:支持手机号 / 邮箱注册,密码加密存储,提供第三方快捷登录(模拟) 个人资料:记录基本信息(姓名、年龄、性别、身高、体重、职业) 健康目标:用户设置目标(如 “减重 5kg”“增肌”“维持健康”)及期望周期 身体状态跟踪: 体重记录:定期录入体重数据,生成体重变化曲线(折线图) 身体指标:记录 BMI(自动计算)、体脂率(可选)、基础代谢率(根据身高体重估算) 健康状况:用户可填写特殊情况(如糖尿病、过敏食物、素食偏好),系统据此调整推荐 2. 膳食记录与食物数据库模块 食物数据库: 基础信息:包含常见食物(如米饭、鸡蛋、牛肉)的名称、类别(主食 / 肉类 / 蔬菜等)、每份重量 营养成分:记录每 100g 食物的热量(kcal)、蛋白质、脂肪、碳水化合物、维生素、矿物质含量 数据库维护:管理员可添加新食物、更新营养数据,支持按名称 / 类别检索 膳食记录功能: 快速记录:用户选择食物、输入食用量(克 / 份),系统自动计算摄入的营养成分 餐次分类:按早餐 / 午餐 / 晚餐 / 加餐分类记录,支持上传餐食照片(可选) 批量操作:提供常见套餐模板(如 “三明治 + 牛奶”),一键添加到记录 历史记录:按日期查看过往膳食记录,支持编辑 / 删除错误记录 3. 营养分析模块 每日营养摄入分析: 核心指标计算:统计当日摄入的总热量、蛋白质 / 脂肪 / 碳水化合物占比(按每日推荐量对比) 微量营养素分析:检查维生素(如维生素 C、钙、铁)的摄入是否达标 平衡评估:生成 “营养平衡度” 评分(0-100 分),指出摄入过剩或不足的营养素 趋势分析: 周 / 月营养趋势:用折线图展示近 7 天 / 30 天的热量、三大营养素摄入变化 对比分析:将实际摄入与推荐量对比(如 “蛋白质摄入仅达到推荐量的 70%”) 目标达成率:针对健
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

ncst

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值