import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model
from sklearn.preprocessing import MinMaxScaler
import tensorflow_probability as tfp
tfd = tfp.distributions
# 1. 数据加载与预处理
def load_and_preprocess_data(file_path):
"""加载双色球历史数据并进行预处理"""
df = pd.read_csv(file_path)
# 假设CSV文件包含以下列:开奖日期,红球1-6,蓝球
# 提取号码列(根据实际CSV结构调整列名)
# number_cols = ['红球1', '红球2', '红球3', '红球4', '红球5', '红球6', '蓝球']
data = df.values.astype(np.float32)
# 创建时序特征:包括基础特征和与前几期的差异
features = []
for i in range(len(data)):
row = data[i].tolist() # 原始7个号码
# 添加统计特征:红球奇数个数、偶数个数、均值、标准差
reds = data[i][:6]
blue = data[i][6]
odd_count = np.sum(reds % 2 == 1) # 奇数个数
even_count = 6 - odd_count # 偶数个数
red_mean = np.mean(reds)
red_std = np.std(reds)
# 添加与前3期的差异(如果存在)
if i >= 1:
diff1 = data[i] - data[i-1]
row.extend(diff1.tolist())
else:
row.extend([0.0] * 7)
if i >= 2:
diff2 = data[i] - data[i-2]
row.extend(diff2.tolist())
else:
row.extend([0.0] * 7)
if i >= 3:
diff3 = data[i] - data[i-3]
row.extend(diff3.tolist())
else:
row.extend([0.0] * 7)
# 添加统计特征
row.extend([odd_count, even_count, red_mean, red_std])
features.append(row)
features = np.array(features, dtype=np.float32)
# 归一化处理
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled_features = scaler.fit_transform(features)
return scaled_features, scaler, data, features.shape[1]
# 2. 构建模型组件
class Encoder(layers.Layer):
"""编码器:将观测转换为潜在状态分布"""
def __init__(self, latent_dim):
super(Encoder, self).__init__()
self.dense1 = layers.Dense(128, activation='elu')
self.dense2 = layers.Dense(128, activation='elu')
self.latent_mean = layers.Dense(latent_dim)
self.latent_std = layers.Dense(latent_dim, activation='softplus')
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
mean = self.latent_mean(x)
std = self.latent_std(x) + 1e-5 # 避免零
return tfd.MultivariateNormalDiag(loc=mean, scale_diag=std)
class GRUDynamics(layers.Layer):
"""动态模型:GRU单元预测下一个状态"""
def __init__(self, hidden_dim):
super(GRUDynamics, self).__init__()
self.gru = layers.GRUCell(hidden_dim)
self.dense = layers.Dense(hidden_dim, activation='elu')
def call(self, prev_state, action, prev_rnn_state):
# 合并输入:上一个潜在状态、动作和上一个RNN状态
combined = tf.concat([prev_state, action, prev_rnn_state], axis=-1)
x = self.dense(combined)
# GRU更新
_, next_rnn_state = self.gru(x, [prev_rnn_state])
return next_rnn_state
class Decoder(layers.Layer):
"""解码器:从潜在状态重构观测"""
def __init__(self, output_dim):
super(Decoder, self).__init__()
self.dense1 = layers.Dense(128, activation='elu')
self.dense2 = layers.Dense(128, activation='elu')
self.output_layer = layers.Dense(output_dim)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
return self.output_layer(x)
class ActorNetwork(layers.Layer):
"""演员网络:生成动作(号码预测)"""
def __init__(self, action_dim):
super(ActorNetwork, self).__init__()
self.dense1 = layers.Dense(128, activation='elu')
self.dense2 = layers.Dense(128, activation='elu')
self.mean_layer = layers.Dense(action_dim)
self.std_layer = layers.Dense(action_dim, activation='softplus')
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
mean = self.mean_layer(x)
std = self.std_layer(x) + 1e-5
return tfd.MultivariateNormalDiag(loc=mean, scale_diag=std)
class CriticNetwork(layers.Layer):
"""评论家网络:评估状态价值"""
def __init__(self):
super(CriticNetwork, self).__init__()
self.dense1 = layers.Dense(128, activation='elu')
self.dense2 = layers.Dense(128, activation='elu')
self.value_layer = layers.Dense(1)
def call(self, inputs):
x = self.dense1(inputs)
x = self.dense2(x)
return self.value_layer(x)
# 3. DreamerV3代理
class DreamerV3Agent:
def __init__(self, obs_dim, action_dim, latent_dim=64, hidden_dim=128):
# 世界模型组件
self.encoder = Encoder(latent_dim)
self.dynamics = GRUDynamics(hidden_dim)
self.decoder = Decoder(obs_dim)
# 演员-评论家组件
self.actor = ActorNetwork(action_dim)
self.critic = CriticNetwork()
# 优化器
self.world_model_optimizer = tf.keras.optimizers.Adam(1e-4)
self.agent_optimizer = tf.keras.optimizers.Adam(1e-4)
# 初始RNN状态
self.initial_rnn_state = tf.zeros((1, hidden_dim))
@tf.function
def train_world_model(self, obs_seq):
"""训练世界模型:编码器、动态模型、解码器"""
rnn_state = self.initial_rnn_state
total_loss = 0.0
batch_size = obs_seq.shape[0]
seq_length = obs_seq.shape[1]
for b in range(batch_size):
rnn_state = self.initial_rnn_state
with tf.GradientTape() as tape:
for t in range(seq_length - 1):
# 编码当前观测
obs_current = obs_seq[b, t, :]
print(obs_current)
posterior = self.encoder(obs_current)
latent_state = posterior.sample()
# 使用动态模型预测下一个RNN状态(使用零动作)
action = tf.zeros((1, 7)) # 双色球没有外部动作,用零填充
rnn_state = self.dynamics(latent_state, action, rnn_state)
# 解码预测的观测
decoded_obs = self.decoder(rnn_state)
# 计算重构损失
obs_next = obs_seq[b, t+1, :]
recon_loss = tf.reduce_mean(tf.square(decoded_obs - tf.expand_dims(obs_next, 0)))
# 计算KL散度损失(正则化)
prior = tfd.MultivariateNormalDiag(loc=tf.zeros_like(posterior.mean()),
scale_diag=tf.ones_like(posterior.stddev()))
kl_loss = tf.reduce_mean(tfd.kl_divergence(posterior, prior))
total_loss += recon_loss + 0.1 * kl_loss
# 更新世界模型参数
variables = self.encoder.trainable_variables + self.dynamics.trainable_variables + self.decoder.trainable_variables
grads = tape.gradient(total_loss, variables)
self.world_model_optimizer.apply_gradients(zip(grads, variables))
return total_loss
@tf.function
def train_agent(self, states):
"""训练演员和评论家网络"""
with tf.GradientTape() as tape:
# 演员生成动作分布
action_dist = self.actor(states)
actions = action_dist.sample()
# 评论家评估状态价值
values = self.critic(states)
# 设计奖励函数:预测号码与实际下一期号码的接近程度
# 注意:在训练时,我们使用下一期的真实号码作为目标(但在预测时没有)
# 这里我们假设states是包含历史多期的,实际中需要设计如何获取下一期真实值
# 由于双色球的随机性,我们采用另一种方式:鼓励预测号码符合规则(如红球在1-33,蓝球在1-16)且分布合理
# 示例奖励:红球在1-33之间且不重复,蓝球在1-16之间
# 但注意:训练时我们无法知道下一期的真实值,因此只能使用规则奖励
# 这里简化:奖励为预测号码各元素离边界0.5的距离之和(鼓励靠近0.5)仅作示例
rewards = tf.reduce_sum(tf.abs(actions - 0.5), axis=1, keepdims=True)
# 演员损失:最大化期望奖励
log_probs = action_dist.log_prob(actions)
actor_loss = -tf.reduce_mean(log_probs * tf.stop_gradient(rewards))
# 评论家损失:最小化价值误差
critic_loss = tf.reduce_mean(tf.square(values - rewards))
total_loss = actor_loss + critic_loss
# 更新演员和评论家参数
variables = self.actor.trainable_variables + self.critic.trainable_variables
grads = tape.gradient(total_loss, variables)
self.agent_optimizer.apply_gradients(zip(grads, variables))
return actor_loss, critic_loss
def predict_next(self, obs_history):
"""使用历史观测预测下一期号码"""
# 初始化RNN状态
rnn_state = self.initial_rnn_state
# 处理历史序列
for t in range(obs_history.shape[0]):
obs_t = tf.expand_dims(obs_history[t], 0)
# 编码
posterior = self.encoder(obs_t)
latent_state = posterior.sample()
# 动态更新(动作用零)
action = tf.zeros((1, 7))
rnn_state = self.dynamics(latent_state, action, rnn_state)
# 使用演员网络生成预测动作(即预测的号码)
action_dist = self.actor(rnn_state)
prediction = action_dist.mean()
return prediction[0].numpy()
# 4. 后处理预测结果
def postprocess_prediction(pred, scaler, feature_dim):
"""将模型输出的预测值转换为实际号码"""
# 创建反归一化所需的数组(原始特征维度)
dummy_features = np.zeros((1, feature_dim))
# 将预测的7个号码放在前7个位置
dummy_features[0, :7] = pred
# 反归一化
denorm_pred = scaler.inverse_transform(dummy_features)[0, :7]
# 提取红球和蓝球
red_pred = denorm_pred[:6]
blue_pred = denorm_pred[6]
# 将红球限制在1-33,蓝球在1-16,并取整
red_pred = np.clip(red_pred, 1, 33).astype(int)
blue_pred = np.clip(blue_pred, 1, 16).astype(int)
# 对红球进行排序
red_pred_sorted = np.sort(red_pred)
return red_pred_sorted, blue_pred
# 5. 主函数
def main():
# 数据文件路径
file_path = r"D:\worker\lottery_results7.csv"
# 超参数
seq_len = 10 # 序列长度
batch_size = 32
epochs_world = 100
epochs_agent = 50
# 加载并预处理数据
scaled_features, scaler, original_data, feature_dim = load_and_preprocess_data(file_path)
# 构建训练序列(batch_size, seq_len, feature_dim)
sequences = []
for i in range(len(scaled_features) - seq_len):
sequences.append(scaled_features[i:i+seq_len])
sequences = np.array(sequences)
# 创建代理
agent = DreamerV3Agent(
obs_dim=feature_dim,
action_dim=7, # 预测7个号码(6红+1蓝)
latent_dim=64,
hidden_dim=128
)
# 训练世界模型
print("训练世界模型...")
for epoch in range(epochs_world):
epoch_loss = 0
for i in range(0, len(sequences), batch_size):
batch = sequences[i:i+batch_size]
loss = agent.train_world_model(batch)
epoch_loss += loss.numpy()
print(f"Epoch {epoch+1}/{epochs_world}, 世界模型损失: {epoch_loss/len(sequences):.4f}")
# 训练演员-评论家
# 注意:此处我们使用整个数据集(除了序列部分)作为状态输入
print("训练演员-评论家...")
states = scaled_features[seq_len:] # 跳过前seq_len个,因为每个序列的最后一个状态对应一个训练样本
for epoch in range(epochs_agent):
actor_losses = []
critic_losses = []
for i in range(0, len(states), batch_size):
batch_states = states[i:i+batch_size]
actor_loss, critic_loss = agent.train_agent(batch_states)
actor_losses.append(actor_loss.numpy())
critic_losses.append(critic_loss.numpy())
avg_actor_loss = np.mean(actor_losses)
avg_critic_loss = np.mean(critic_losses)
print(f"Epoch {epoch+1}/{epochs_agent}, 演员损失: {avg_actor_loss:.4f}, 评论家损失: {avg_critic_loss:.4f}")
# 预测下一期
print("预测下一期号码...")
# 使用最近seq_len期数据
last_sequence = scaled_features[-seq_len:]
raw_pred = agent.predict_next(last_sequence)
red_balls, blue_ball = postprocess_prediction(raw_pred, scaler, feature_dim)
# 输出预测结果
print("预测红球:", " ".join(map(str, red_balls)))
print("预测蓝球:", blue_ball)
if __name__ == "__main__":
main()
梳理完善上述代码,并生成新代码
最新发布