import tensorflow as tf
from tensorflow.keras import layers, Model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import os
# 1. 数据加载与预处理
def load_and_preprocess_data(file_path):
"""加载并预处理双色球历史数据"""
try:
df = pd.read_csv(file_path,encoding ="gbk")
print(f"成功加载数据: {len(df)}条历史记录")
# 检查所需列是否存在
required_columns = ['红1', '红2', '红3', '红4', '红5', '红6', '蓝球']
if not all(col in df.columns for col in required_columns):
missing = [col for col in required_columns if col not in df.columns]
raise ValueError(f"CSV文件中缺少必要列: {missing}")
# 提取红球和蓝球数据
red_balls = df[['红1', '红2', '红3', '红4', '红5', '红6']].values.astype('float64')
blue_balls = df[['蓝球']].values.astype('float64')
# 数据编码
red_encoder = OneHotEncoder(categories=[range(1, 34)], sparse_output=False)
blue_encoder = OneHotEncoder(categories=[range(1, 17)], sparse_output=False)
# 红球编码 (6个球 * 33个可能值 = 198维)
red_encoded = red_encoder.fit_transform(red_balls.reshape(-1, 1))
red_encoded = red_encoded.reshape(-1, 6 * 33)
# 蓝球编码 (16维)
blue_encoded = blue_encoder.fit_transform(blue_balls)
# 合并特征 (198 + 16 = 214维)
combined = np.concatenate((red_encoded, blue_encoded), axis=1)
# 创建时间序列对 (X=前一期, Y=当前期)
X, Y = [], []
for i in range(1, len(combined)):
X.append(combined[i-1]) # 上一期
Y.append(combined[i]) # 当前期
X = np.array(X)
Y = np.array(Y)
# 数据集拆分
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=0.1, random_state=42, shuffle=False
)
print(f"训练集大小: {len(X_train)}, 测试集大小: {len(X_test)}")
return X_train, X_test, Y_train, Y_test, red_encoder, blue_encoder
except Exception as e:
print(f"数据处理错误: {e}")
return None, None, None, None, None, None
# 2. CGAN模型构建
class CGAN(Model):
def __init__(self, latent_dim=100):
super(CGAN, self).__init__()
self.latent_dim = latent_dim
self.generator = self.build_generator()
self.discriminator = self.build_discriminator()
def build_generator(self):
"""构建生成器网络"""
model = tf.keras.Sequential([
layers.Dense(512, input_dim=self.latent_dim + 214), # 噪声 + 上期数据
layers.LeakyReLU(alpha=0.2),
layers.BatchNormalization(),
layers.Dense(1024),
layers.LeakyReLU(alpha=0.2),
layers.BatchNormalization(),
layers.Dense(214, activation='sigmoid') # 输出维度=198(红球)+16(蓝球)
])
return model
def build_discriminator(self):
"""构建判别器网络"""
model = tf.keras.Sequential([
layers.Dense(1024, input_dim=214*2), # 当前期+上期数据
layers.LeakyReLU(alpha=0.2),
layers.Dropout(0.3),
layers.Dense(512),
layers.LeakyReLU(alpha=0.2),
layers.Dropout(0.3),
layers.Dense(1, activation='sigmoid')
])
return model
def compile(self, g_optimizer, d_optimizer, loss_fn):
super(CGAN, self).compile()
self.g_optimizer = g_optimizer
self.d_optimizer = d_optimizer
self.loss_fn = loss_fn
def train_step(self, data):
# 解包数据
prev_data, real_data = data
batch_size = tf.shape(prev_data)[0]
noise = tf.random.normal([batch_size, self.latent_dim],dtype=tf.dtypes.float64)
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
# 生成假数据
gen_input = tf.concat([noise, prev_data], axis=1)
generated_data = self.generator(gen_input, training=True)
# 判别器输入
real_pairs = tf.concat([real_data, prev_data], axis=1)
fake_pairs = tf.concat([generated_data, prev_data], axis=1)
# 判别器输出
real_output = self.discriminator(real_pairs, training=True)
fake_output = self.discriminator(fake_pairs, training=True)
# 计算损失
d_real_loss = self.loss_fn(tf.ones_like(real_output), real_output)
d_fake_loss = self.loss_fn(tf.zeros_like(fake_output), fake_output)
d_loss = (d_real_loss + d_fake_loss) / 2
g_loss = self.loss_fn(tf.ones_like(fake_output), fake_output)
# 计算梯度并更新权重
gen_grads = gen_tape.gradient(g_loss, self.generator.trainable_variables)
disc_grads = disc_tape.gradient(d_loss, self.discriminator.trainable_variables)
self.g_optimizer.apply_gradients(zip(gen_grads, self.generator.trainable_variables))
self.d_optimizer.apply_gradients(zip(disc_grads, self.discriminator.trainable_variables))
return {"d_loss": d_loss, "g_loss": g_loss}
# 3. 训练配置与执行
def train_gan(X_train, Y_train):
"""训练CGAN模型"""
latent_dim = 128
# 创建模型
gan = CGAN(latent_dim=latent_dim)
# 编译模型
gan.compile(
g_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5),
d_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5),
loss_fn=tf.keras.losses.BinaryCrossentropy()
)
# 创建数据集
dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
dataset = dataset.shuffle(buffer_size=1024).batch(64).prefetch(tf.data.AUTOTUNE)
# 检查点回调
checkpoint_path = "cgan_double_color.weights.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_path,
save_weights_only=True,
save_best_only=True,
monitor='g_loss',
mode='min'
)
# 训练模型
history = gan.fit(
dataset,
epochs=500,
callbacks=[
tf.keras.callbacks.EarlyStopping(monitor='g_loss', patience=20, restore_best_weights=True),
tf.keras.callbacks.ReduceLROnPlateau(monitor='g_loss', factor=0.5, patience=10, verbose=1),
cp_callback
]
)
# 保存完整模型
gan.generator.save('double_color_generator.keras')
# 绘制训练过程
plt.figure(figsize=(10, 6))
plt.plot(history.history['d_loss'], label='判别器损失')
plt.plot(history.history['g_loss'], label='生成器损失')
plt.title('CGAN训练过程')
plt.xlabel('训练轮次')
plt.ylabel('损失值')
plt.legend()
plt.grid(True)
plt.savefig('training_history.png')
plt.close()
return gan
# 4. 号码预测与解码
def predict_next_numbers(model, last_data, red_encoder, blue_encoder, num_predictions=5):
"""使用训练好的模型预测下一期号码"""
predictions = []
for _ in range(num_predictions):
# 生成噪声
noise = tf.random.normal([1, model.latent_dim])
# 生成预测
gen_input = tf.concat([noise, last_data], axis=1)
pred = model.generator(gen_input, training=False)
# 分离红球和蓝球部分
red_pred = pred[0, :198].numpy().reshape(6, 33)
blue_pred = pred[0, 198:].numpy()
# 解码红球
red_balls = []
for i in range(6):
ball = np.argmax(red_pred[i]) + 1
red_balls.append(ball)
# 去除重复并排序
red_balls = sorted(set(red_balls))
if len(red_balls) < 6:
# 补充缺失号码
all_balls = list(range(1, 34))
missing = [b for b in all_balls if b not in red_balls]
red_balls.extend(missing[:6-len(red_balls)])
red_balls = sorted(red_balls[:6])
# 解码蓝球
blue_ball = np.argmax(blue_pred) + 1
predictions.append((red_balls, blue_ball))
last_data = pred # 更新为最新预测结果
return predictions
# 5. 主程序
def main():
# 加载数据
file_path = r'D:\worker\lottery_results.csv'
X_train, X_test, Y_train, Y_test, red_encoder, blue_encoder = load_and_preprocess_data(file_path)
if X_train is None:
print("数据加载失败,请检查文件路径和格式")
return
# 训练模型
print("开始训练CGAN模型...")
gan = train_gan(X_train, Y_train)
print("模型训练完成")
# 使用最新一期数据预测
last_entry = X_test[-1:].astype('float64') # 获取最新一期数据
predictions = predict_next_numbers(gan, last_entry, red_encoder, blue_encoder, num_predictions=5)
# 打印预测结果
print("\n双色球预测结果:")
for i, (red, blue) in enumerate(predictions, 1):
print(f"预测 {i}: 红球: {red}, 蓝球: {blue}")
if __name__ == "__main__":
# 设置TensorFlow日志级别
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')
main()
修改上述代码中的TypeError: Tensors in list passed to 'values' of 'ConcatV2' Op have types [float32, float64] that don't all match.
最新发布