【Java】((int) ((float) nums.length / 0.75F + 1.0F))是什么鬼?

了解如何为HashMap设置合理的初始容量,避免不必要的扩容,提高程序运行效率。通过expectedSize/0.75F+1.0F计算,平衡内存与性能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Map<Integer, Integer> map = new HashMap<>((int) ((float) nums.length / 0.75F + 1.0F));

这句代码中后面的((int) ((float) nums.length / 0.75F + 1.0F))解释如下:

当我们使用HashMap(int initialCapacity)来初始化容量的时候,jdk会默认帮我们计算一个相对合理的值当做初始容量。但是这个值并没有参考loadFactor的值。

也就是说,如果我们设置的默认值是7,经过Jdk处理之后,会被设置成8,但是,这个HashMap在元素个数达到 8*0.75 = 6的时候就会进行一次扩容,这明显是我们不希望见到的。

如果我们通过expectedSize / 0.75F + 1.0F计算,7/0.75 + 1 = 10 ,10经过Jdk处理之后,会被设置成16,这就大大的减少了扩容的几率。

当HashMap内部维护的哈希表的容量达到75%时(默认情况下),会触发rehash,而rehash的过程是比较耗费时间的。所以初始化容量要设置成expectedSize/0.75 + 1的话,可以有效的减少冲突也可以减小误差。

所以,我可以认为,当我们明确知道HashMap中元素的个数的时候,把默认容量设置成expectedSize / 0.75F + 1.0F 是一个在性能上相对好的选择,但是,同时也会牺牲些内存。

总结

当我们想要在代码中创建一个HashMap的时候,如果我们已知这个Map中即将存放的元素个数,给HashMap设置初始容量可以在一定程度上提升效率。

但是,JDK并不会直接拿用户传进来的数字当做默认容量,而是会进行一番运算,最终得到一个2的幂。原因在(全网把Map中的hash()分析的最透彻的文章,别无二家。)介绍过,得到这个数字的算法其实是使用了使用无符号右移和按位或运算来提升效率。

但是,为了最大程度的避免扩容带来的性能消耗,我们建议可以把默认容量的数字设置成expectedSize / 0.75F + 1.0F 。在日常开发中,可以使用

Map<String, String> map = Maps.newHashMapWithExpectedSize(10);

来创建一个HashMap,计算的过程guava会帮我们完成。

但是,以上的操作是一种用内存换性能的做法,真正使用的时候,要考虑到内存的影响。

请帮我检查并优化代码,尤其关注避免数据泄露,调整灵活权重等问题:import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from tensorflow.keras.optimizers import Adam from sklearn.model_selection import TimeSeriesSplit import joblib import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Layer from tensorflow.keras.optimizers import Adam import warnings import tensorflow as tf warnings.filterwarnings('ignore') # ===== 自定义注意力层 ===== class AttentionLayer(Layer): def __init__(self, **kwargs): super(AttentionLayer, self).__init__(**kwargs) def build(self, input_shape): self.time_steps = input_shape[1] self.feature_dim = input_shape[2] # 创建可训练权重 self.W = self.add_weight( name='att_weight', shape=(self.feature_dim, self.feature_dim), initializer='glorot_uniform', trainable=True ) self.b = self.add_weight( name='att_bias', shape=(self.feature_dim,), initializer='zeros', trainable=True ) self.V = self.add_weight( name='att_v', shape=(self.feature_dim, 1), initializer='glorot_uniform', trainable=True ) super(AttentionLayer, self).build(input_shape) # 修改call方法 def call(self, inputs): # 动态计算每个时间步的重要性 score = tf.matmul(tf.tanh(tf.matmul(inputs, self.W) + self.b), self.V) score = tf.squeeze(score, axis=-1) alpha = tf.nn.softmax(score, axis=1) # 修正为按时间步归一化 alpha = tf.expand_dims(alpha, axis=-1) context = tf.reduce_sum(alpha * inputs, axis=1) return context def compute_output_shape(self, input_shape): return (input_shape[0], input_shape[2]) # (batch_size, feature_dim) # ===== 模型构建函数 ===== # 改进的模型结构 def build_model(input_shape): inputs = Input(shape=input_shape) # 双向LSTM捕获时序特征 lstm_out = Bidirectional(LSTM(128, return_sequences=True))(inputs) # 注意力层 att_out = AttentionLayer()(lstm_out) # 红球分支(6个独立预测) red_outputs = [] for i in range(6): branch = Dense(64, activation='relu')(att_out) red_outputs.append(Dense(33, activation='softmax', name=f'red_{i}')(branch)) # 蓝球分支 blue_branch = Dense(32, activation='relu')(att_out) blue_output = Dense(16, activation='softmax', name='blue_output')(blue_branch) model = Model(inputs, red_outputs + [blue_output]) # 调整损失权重(红球重要性更高) loss_weights = {f'red_{i}': 0.15 for i in range(6)} loss_weights['blue_output'] = 0.1 model.compile(optimizer=Adam(0.001), loss='categorical_crossentropy', loss_weights=loss_weights) return model def build_attention_lstm_model(input_shape): time_steps, num_features = input_shape inputs = Input(shape=input_shape) # LSTM层 lstm_out = LSTM(128, return_sequences=True)(inputs) lstm_out = Dropout(0.3)(lstm_out) # 使用自定义注意力层 attention_out = AttentionLayer()(lstm_out) # 红球分支 red_branch = Dense(64, activation='relu')(attention_out) red_branch = Dropout(0.2)(red_branch) # 修改为6个输出节点(每个红球位置独立预测) red_outputs = [] for i in range(6): branch = Dense(32, activation='relu')(attention_out) red_outputs.append(Dense(33, activation='softmax', name=f'red_{i}')(branch)) # 蓝球分支 blue_branch = Dense(32, activation='relu')(attention_out) blue_branch = Dropout(0.2)(blue_branch) blue_output = Dense(16, activation='sigmoid', name='blue_output')(blue_branch) model = Model(inputs=inputs, outputs=[red_output, blue_output]) optimizer = Adam(learning_rate=0.001) model.compile( optimizer=optimizer, loss={'red_output': 'binary_crossentropy', 'blue_output': 'binary_crossentropy'}, metrics={'red_output': 'binary_accuracy', 'blue_output': 'binary_accuracy'}, # 关键修改:使用字典形式指定损失权重 loss_weights={'red_output': 0.7, 'blue_output': 0.3} ) model.summary() return model # ===== 数据预处理函数 ===== #窗口特征计算前必须进行时间序列分割 #建议将create_features中的目标构建移至prepare_data def step1_format_data(): """格式化原始数据""" print("===== 步骤1: 格式化原始数据 =====") df = pd.read_excel('01hand.xlsx', sheet_name='Sheet1', header=None) # 提取A列和C列数据 new_df = pd.DataFrame({ 'A': pd.to_numeric(df.iloc[:, 0], errors='coerce'), 'B': pd.to_numeric(df.iloc[:, 2], errors='coerce') }).dropna() # 保存新文件 new_df.to_excel('01hand2.xlsx', index=False, header=False) print(f"新表格 '01hand2.xlsx' 创建成功! 包含 {len(new_df)} 行数据") def step2_process_data(): """数据去重和排序""" print("\n===== 步骤2: 数据去重和排序 =====") input_file = "01hand2.xlsx" output_file1 = "02resultA.xlsx" # 降序输出 output_file2 = "02resultB.xlsx" # 升序输出 # 读取数据并转换为长格式 df = pd.read_excel(input_file, header=None) all_values = df.stack().dropna().astype(str).tolist() # 确保数据长度是8的倍数 valid_length = len(all_values) - (len(all_values) % 8) if len(all_values) != valid_length: print(f"警告: 数据总量 {len(all_values)} 不符合8的倍数, 截断至 {valid_length} 个元素") all_values = all_values[:valid_length] # 转换数据格式 new_data = [] for i in range(0, len(all_values), 8): group = all_values[i:i+8] try: # 转换日期和数字 date = int(group[0]) numbers = [int(float(num)) if '.' in num else int(num) for num in group[1:]] new_data.append([date] + numbers) except: continue # 创建DataFrame并去重 columns = ['日期', '数字1', '数字2', '数字3', '数字4', '数字5', '数字6', '数字7'] df = pd.DataFrame(new_data, columns=columns) df = df.drop_duplicates(subset='日期').dropna() # 保存降序文件 df.sort_values('日期', ascending=False).to_excel(output_file1, index=False) print(f"降序文件保存至: {output_file1}") # 保存升序文件 df.sort_values('日期', ascending=True).to_excel(output_file2, index=False) print(f"升序文件保存至: {output_file2}") print(f"最终数据维度: {df.shape}") return df # ===== 特征工程函数 ===== def create_features(df, save_features=True): """创建模型特征并保存特征处理器""" print("\n===== 步骤3: 特征工程 =====") features = df[['日期']].copy() red_cols = ['数字1', '数字2', '数字3', '数字4', '数字5', '数字6'] # 基础特征 features['红球和值'] = df[red_cols].sum(axis=1) features['蓝球值'] = df['数字7'] features['奇偶比'] = df[red_cols].applymap(lambda x: x % 2).sum(axis=1) features['大小比'] = df[red_cols].applymap(lambda x: 1 if x > 16 else 0).sum(axis=1) for num in range(1, 34): features[f'red_{num}_missing'] = features.index - features[f'red_{num}_last'] # 添加质数比 prime_nums = [2,3,5,7,11,13,17,19,23,29,31] features['prime_ratio'] = df[red_cols].applymap(lambda x: x in prime_nums).sum(axis=1) # 窗口特征 (窗口大小10) window_size = 10 for col in ['红球和值', '奇偶比', '大小比']: features[f'{col}_MA{window_size}'] = features[col].rolling(window=window_size).mean() features[f'{col}_STD{window_size}'] = features[col].rolling(window=window_size).std() # 滞后特征 (滞后1-9期) for lag in range(1, 10): for col in red_cols + ['数字7']: features[f'{col}_lag{lag}'] = df[col].shift(lag) # 目标变量 (下一期开奖结果) red_targets = [] blue_targets = [] for i in range(len(df) - 1): next_row = df.iloc[i + 1] # 红球目标 (33选6) red_target = [1 if num in next_row[red_cols].values else 0 for num in range(1, 34)] # 蓝球目标 (16选1) blue_target = [1 if i == next_row['数字7'] else 0 for i in range(1, 17)] red_targets.append(red_target) blue_targets.append(blue_target) # 转换为numpy数组 red_targets = np.array(red_targets) blue_targets = np.array(blue_targets) # 移除无效数据 (前window_size行和最后一行) features = features.iloc[window_size:-1].reset_index(drop=True) red_targets = red_targets[window_size-1:-1] # 对齐索引 blue_targets = blue_targets[window_size-1:-1] # 保存特征处理器 feature_columns = features.drop(columns=['日期']).columns.tolist() joblib.dump(feature_columns, 'feature_columns.pkl') print(f"特征列名已保存: {len(feature_columns)}个特征") if save_features: features.to_excel('04_features.xlsx', index=False) print(f"特征工程完成, 维度: {features.shape}") return features, red_targets, blue_targets # ===== 模型构建函数 ===== def prepare_data(features, red_targets, blue_targets): """准备训练数据并保存数据处理器""" print("\n===== 步骤4: 数据准备 =====") scaler_X = MinMaxScaler() X_scaled = scaler_X.fit_transform(features.drop(columns=['日期'])) # 保存特征处理器 joblib.dump(scaler_X, 'scaler_X.save') print("特征缩放器已保存") # 创建时间序列数据 time_steps = 10 X_seq, y_red_seq, y_blue_seq = [], [], [] for i in range(time_steps, len(X_scaled)): X_seq.append(X_scaled[i-time_steps:i, :]) y_red_seq.append(red_targets[i-1]) # 使用当前时间步的目标 y_blue_seq.append(blue_targets[i-1]) X_seq = np.array(X_seq) y_red_seq = np.array(y_red_seq) y_blue_seq = np.array(y_blue_seq) print(f"时间序列数据形状: X={X_seq.shape}, y_red={y_red_seq.shape}, y_blue={y_blue_seq.shape}") # 保存历史数据用于预测 joblib.dump(X_scaled[-10:], 'historical_data.pkl') print("历史数据已保存用于预测") return X_seq, y_red_seq, y_blue_seq, scaler_X # ===== 模型训练函数 ===== # 在train_model中添加 from tensorflow.keras.preprocessing.sequence import pad_sequences # 使用pad_sequences处理不等长序列 X_padded = pad_sequences(X, maxlen=10, padding='pre', dtype='float32') # 修改早停策略 callbacks.append(EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)) def train_model(X, y_red, y_blue): """训练模型""" print("\n===== 步骤5: 模型训练 =====") best_models = [] tscv = TimeSeriesSplit(n_splits=3) for fold, (train_index, val_index) in enumerate(tscv.split(X)): print(f"\n===== 训练 Fold {fold+1}/3 =====") X_train, X_val = X[train_index], X[val_index] y_red_train, y_red_val = y_red[train_index], y_red[val_index] y_blue_train, y_blue_val = y_blue[train_index], y_blue[val_index] model = build_attention_lstm_model((X_train.shape[1], X_train.shape[2])) callbacks = [ EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1), ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=7, min_lr=1e-6, verbose=1) ] history = model.fit( X_train, {'red_output': y_red_train, 'blue_output': y_blue_train}, epochs=100, batch_size=32, validation_data=(X_val, {'red_output': y_red_val, 'blue_output': y_blue_val}), callbacks=callbacks, verbose=1 ) model.save(f'best_model_fold{fold+1}.h5') best_models.append(model) # 保存训练历史图 plot_training_history(history, fold+1) return best_models def plot_training_history(history, fold): """绘制训练历史图表""" plt.figure(figsize=(15, 10)) # 损失曲线 plt.subplot(2, 2, 1) plt.plot(history.history['loss'], label='训练损失') plt.plot(history.history['val_loss'], label='验证损失') plt.title(f'Fold {fold} - 总损失曲线') plt.ylabel('损失') plt.xlabel('Epoch') plt.legend() # 红球准确率 plt.subplot(2, 2, 2) plt.plot(history.history['red_output_binary_accuracy'], label='红球训练准确率') plt.plot(history.history['val_red_output_binary_accuracy'], label='红球验证准确率') plt.title(f'Fold {fold} - 红球准确率') plt.ylabel('准确率') plt.xlabel('Epoch') plt.legend() # 蓝球准确率 plt.subplot(2, 2, 3) plt.plot(history.history['blue_output_binary_accuracy'], label='蓝球训练准确率') plt.plot(history.history['val_blue_output_binary_accuracy'], label='蓝球验证准确率') plt.title(f'Fold {fold} - 蓝球准确率') plt.ylabel('准确率') plt.xlabel('Epoch') plt.legend() # 学习率 plt.subplot(2, 2, 4) if 'lr' in history.history: plt.plot(history.history['lr'], label='学习率') plt.title(f'Fold {fold} - 学习率变化') plt.ylabel('学习率') plt.xlabel('Epoch') plt.legend() plt.tight_layout() plt.savefig(f'training_history_fold{fold}.png') plt.close() # ===== 预测准备函数 ===== def prepare_prediction_input(df, features, scaler_X): """准备预测输入,确保特征一致性""" print("\n===== 准备预测输入 =====") # 加载特征列名 feature_columns = joblib.load('feature_columns.pkl') print(f"预期特征数量: {len(feature_columns)}") # 创建空DataFrame prediction_features = pd.DataFrame(columns=feature_columns) # 获取最后10行有效数据 last_10 = features.iloc[-10:] # 填充基础特征 red_cols = ['数字1', '数字2', '数字3', '数字4', '数字5', '数字6'] current_row = df.iloc[-1] prediction_features.loc[0, '红球和值'] = current_row[red_cols].sum() prediction_features.loc[0, '蓝球值'] = current_row['数字7'] prediction_features.loc[0, '奇偶比'] = current_row[red_cols].apply(lambda x: x % 2).sum() prediction_features.loc[0, '大小比'] = current_row[red_cols].apply(lambda x: 1 if x > 16 else 0).sum() # 填充窗口特征 window_size = 10 for col in ['红球和值', '奇偶比', '大小比']: col_values = features[col].iloc[-window_size:] prediction_features.loc[0, f'{col}_MA{window_size}'] = col_values.mean() prediction_features.loc[0, f'{col}_STD{window_size}'] = col_values.std() # 填充滞后特征 - 修正逻辑 for lag in range(1, 10): # 确保滞后索引有效 lag_index = -lag - 1 # 从当前行向前追溯 for col in red_cols + ['数字7']: feature_name = f'{col}_lag{lag}' if feature_name in feature_columns: if len(df) > lag: prediction_features.loc[0, feature_name] = df[col].iloc[lag_index] else: # 数据不足时使用平均值 prediction_features.loc[0, feature_name] = df[col].mean() # 处理缺失特征 missing_cols = set(feature_columns) - set(prediction_features.columns) for col in missing_cols: prediction_features[col] = 0 # 默认填充0 # 确保顺序一致 prediction_features = prediction_features[feature_columns] # 标准化 X_pred = scaler_X.transform(prediction_features) print(f"预测输入形状: {X_pred.shape}") return X_pred # ===== 预测函数 ===== def predict_next_period(models): # 加载特征处理器 scaler = joblib.load('scaler_X.save') # 动态获取最新10期数据(避免文件依赖) raw_data = pd.read_excel('02resultB.xlsx').iloc[-10:] processed = create_features(raw_data, save_features=False)[0] X_seq = scaler.transform(processed.drop(columns=['日期'])) # 概率集成(考虑模型验证集性能) val_scores = [model.evaluate(X_val, y_val)[0] for model in models] weights = np.array([1/score for score in val_scores]) # 加权预测 red_probs, blue_probs = [], [] for model, weight in zip(models, weights): preds = model.predict(np.expand_dims(X_seq, axis=0)) red_probs.append(preds[:6] * weight) blue_probs.append(preds[6] * weight) # 合并结果 final_red = np.mean(red_probs, axis=0) final_blue = np.mean(blue_probs, axis=0) # 归一化概率 red_probs /= total_weight blue_probs /= total_weight # 获取预测结果 red_indices = np.argsort(red_probs[0])[::-1][:6] blue_indices = np.argsort(blue_probs[0])[::-1][:3] return ( [i+1 for i in red_indices], [red_probs[0][i] for i in red_indices], [i+1 for i in blue_indices], [blue_probs[0][i] for i in blue_indices] ) # ===== 主函数 ===== def main(): # 执行数据处理步骤 step1_format_data() df = step2_process_data() # 特征工程 features, red_targets, blue_targets = create_features(df) # 准备训练数据 X, y_red, y_blue, scaler_X = prepare_data(features, red_targets, blue_targets) # 训练模型 models = train_model(X, y_red, y_blue) # 预测 red_nums, red_probs, blue_nums, blue_probs = predict_next_period(models) # 打印结果 print("\n" + "="*50) print("双色球下一期预测结果") print("="*50) print("\n红球预测 (前6个):") for num, prob in zip(red_nums, red_probs): print(f"号码 {num:2d} : 概率 {prob:.4f}") print("\n蓝球预测 (前3个):") for num, prob in zip(blue_nums, blue_probs): print(f"号码 {num:2d} : 概率 {prob:.4f}") # 保存结果 result_df = pd.DataFrame({ '红球预测': red_nums, '红球概率': red_probs, '蓝球预测': blue_nums, '蓝球概率': blue_probs }) result_df.to_excel('prediction_results.xlsx', index=False) print("\n预测结果已保存至: prediction_results.xlsx") if __name__ == "__main__": main() def coverage_rate(y_true, y_pred): # 计算预测号码与实际开奖的重合数量 return K.mean(K.sum(K.cast(K.equal(y_true, y_pred), axis=-1)) # main.py import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Layer from tensorflow.keras.optimizers import Adam # 内联定义自定义层(避免导入问题) class AttentionLayer(Layer): """自定义注意力层""" def __init__(self, **kwargs): super(AttentionLayer, self).__init__(**kwargs) def build(self, input_shape): self.time_steps = input_shape[1] self.feature_dim = input_shape[2] self.W = self.add_weight( name='att_weight', shape=(self.feature_dim, self.feature_dim), initializer='glorot_uniform', trainable=True ) self.b = self.add_weight( name='att_bias', shape=(self.feature_dim,), initializer='zeros', trainable=True ) self.V = self.add_weight( name='att_v', shape=(self.feature_dim, 1), initializer='glorot_uniform', trainable=True ) super(AttentionLayer, self).build(input_shape) def call(self, inputs): score = tf.tanh(tf.matmul(inputs, self.W) + self.b) score = tf.matmul(score, self.V) score = tf.squeeze(score, axis=-1) alpha = tf.nn.softmax(score, axis=-1) alpha = tf.expand_dims(alpha, axis=-1) context = tf.reduce_sum(alpha * inputs, axis=1) return context def compute_output_shape(self, input_shape): return (input_shape[0], input_shape[2]) # 模型构建函数 def build_attention_lstm_model(input_shape): inputs = Input(shape=input_shape) # LSTM层 lstm_out = LSTM(128, return_sequences=True)(inputs) lstm_out = Dropout(0.3)(lstm_out) # 使用自定义层 attention_out = AttentionLayer()(lstm_out) # 输出分支 red_branch = Dense(64, activation='relu')(attention_out) red_output = Dense(33, activation='sigmoid', name='red_output')(red_branch) blue_branch = Dense(32, activation='relu')(attention_out) blue_output = Dense(16, activation='sigmoid', name='blue_output')(blue_branch) model = Model(inputs=inputs, outputs=[red_output, blue_output]) model.compile( optimizer=Adam(0.001), loss={'red_output': 'binary_crossentropy', 'blue_output': 'binary_crossentropy'}, # 这里也需要修改为字典形式 loss_weights={'red_output': 0.7, 'blue_output': 0.3} ) return model # 测试模型构建 if __name__ == "__main__": model = build_attention_lstm_model(input_shape=(10, 20)) model.summary() print("模型构建成功!")
最新发布
07-17
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值