import requests
from bs4 import BeautifulSoup
from collections import Counter
# 获取内容
res = requests.get('http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html', timeout=30)
res.encoding = 'utf-8'
htm = res.text
# 解析内容
soup = BeautifulSoup(htm, 'html.parser')
# url前缀
prefix_url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list'
# 获取总页数
total = int(soup.find('p', attrs={"class": "pg"}).find_all('strong')[0].text)
# 将获取的信息,写进文件
local_file = open('双色球.txt', 'w')
red_num = [] # 历史上开出的红球
blue_num = [] # 历史上开出的蓝球
# 分页获取每一页的开奖信息
for page_num in range(1, total + 1):
t_url = prefix_url + '_' + str(page_num) + '.html'
print(t_url)
res2 = requests.get(t_url, timeout=30)
res2.encoding = 'utf-8'
page_context = res2.text
page_soup = BeautifulSoup(page_context, 'html.parser')
if page_soup.table is None:
continue
elif page_soup.table:
table_rows = page_soup.table.find_all('tr')
for row_num in range(2, len(table_rows) - 1):
row_tds = table_rows[row_num].find_all('td')
ems = row_tds[2].find_all('em')
# result = '开奖日期:'+ row_tds[0].string +','+'期号:'+ row_tds[1].string +', '+ems[0].string+' '+ems[1].string+' '+ems[2].string+' '+ems[3].string+' '+ems[4].string+' '+ems[5].string+' '+ems[6].string
result = row_tds[0].string + ',' + row_tds[1].string + ', ' + ems[0].string + ' ' + ems[1].string + ' ' + \
ems[2].string + ' ' + ems[3].string + ' ' + ems[4].string + ' ' + ems[5].string + ' ' + ems[
6].string
local_file.write(result + '\n')
print(result)
red_num.append(ems[0].string) # 红球1
red_num.append(ems[1].string) # 红球2
red_num.append(ems[2].string) # 红球3
red_num.append(ems[3].string) # 红球4
red_num.append(ems[4].string) # 红球5
red_num.append(ems[5].string) # 红球6
blue_num.append(ems[6].string) # 蓝球
else:
continue
local_file.close
red_count = Counter(red_num)
blue_count = Counter(blue_num)
# 按照出现频率顺序
red_count_sorted = sorted(red_count.items(), key=lambda pair: pair[1], reverse=False)
blue_count_sorted = sorted(blue_count.items(), key=lambda pair: pair[1], reverse=False)
print(red_count_sorted)
print(blue_count_sorted)
ssq_red = red_count_sorted[0:6]
ssq_blue = blue_count_sorted[0:3]
print(list(map(lambda item: item[0], ssq_red)))
print(list(map(lambda item: item[0], ssq_blue)))
ssq_red = list(map(lambda item: item[0], ssq_red))
ssq_blue = list(map(lambda item: item[0], ssq_blue))
ssq_red.sort()
ssq_blue.sort()
print('顺选-1:' + str(ssq_red) + '|' + ssq_blue[0])
print('顺选-2:' + str(ssq_red) + '|' + ssq_blue[1])
print('顺选-3:' + str(ssq_red) + '|' + ssq_blue[2])
print('------------------------------------------------------------------------------')
# 按照出现频率倒序
red_count_sorted = sorted(red_count.items(), key=lambda pair: pair[1], reverse=True)
blue_count_sorted = sorted(blue_count.items(), key=lambda pair: pair[1], reverse=True)
print(red_count_sorted)
print(blue_count_sorted)
ssq_red = red_count_sorted[0:6]
ssq_blue = blue_count_sorted[0:3]
print(list(map(lambda item: item[0], ssq_red)))
print(list(map(lambda item: item[0], ssq_blue)))
ssq_red = list(map(lambda item: item[0], ssq_red))
ssq_blue = list(map(lambda item: item[0], ssq_blue))
ssq_red.sort()
ssq_blue.sort()
print('反选-1:' + str(ssq_red) + '|' + ssq_blue[0])
print('反选-2:' + str(ssq_red) + '|' + ssq_blue[1])
print('反选-3:' + str(ssq_red) + '|' + ssq_blue[2])
import numpy as np
import pandas as pd
from collections import Counter
class DoubleColorBallPredictor:
def __init__(self, history_data):
"""
初始化预测器
history_data: 历史开奖数据DataFrame,包含['red1','red2','red3','red4','red5','red6','blue']列
"""
self.history = history_data
self.all_reds = list(range(1, 34))
self.all_blues = list(range(1, 17))
def analyze_reds(self):
"""分析红球数据"""
# 合并所有历史红球
all_red_numbers = []
for col in ['red1', 'red2', 'red3', 'red4', 'red5', 'red6']:
all_red_numbers.extend(self.history[col].values)
# 计算热号(出现频率最高的)
freq = Counter(all_red_numbers)
hot_reds = [num for num, _ in freq.most_common(10)]
# 计算冷号(出现频率最低的)
cold_reds = [num for num, _ in freq.most_common()[:-11:-1]]
return hot_reds, cold_reds, freq
def analyze_blues(self):
"""分析蓝球数据"""
blue_numbers = self.history['blue'].values
freq = Counter(blue_numbers)
hot_blues = [num for num, _ in freq.most_common(5)]
cold_blues = [num for num, _ in freq.most_common()[:-6:-1]]
return hot_blues, cold_blues, freq
def predict(self):
"""生成预测结果"""
# 分析数据
hot_reds, cold_reds, red_freq = self.analyze_reds()
hot_blues, cold_blues, blue_freq = self.analyze_blues()
# 生成推荐红球(排除冷号)
rec_reds = [num for num in self.all_reds if num not in cold_reds[:3]]
np.random.shuffle(rec_reds)
# 生成推荐蓝球(排除冷号)
rec_blues = [num for num in self.all_blues if num not in cold_blues[:2]]
np.random.shuffle(rec_blues)
# 1. 金胆预测 (最看好的红球)
gold_red = hot_reds[0]
# 2. 三胆预测 (三个最看好的红球)
three_reds = hot_reds[:3]
# 3. 杀3红 (冷门红球)
kill_3_reds = cold_reds[:3]
# 4. 杀6红 (冷门红球)
kill_6_reds = cold_reds[:6]
# 5. 龙头预测 (小号区热号)
dragon_head = min(hot_reds[:5])
# 6. 凤尾预测 (大号区热号)
phoenix_tail = max(hot_reds[:5])
# 7. 20码推荐
rec_20 = sorted(rec_reds[:20])
# 8. 五码定蓝
five_blues = hot_blues[:5]
# 9. 一码定蓝
one_blue = hot_blues[0]
# 10. 杀蓝
kill_blue = cold_blues[0]
# 11. 三区比预测 (1-11, 12-22, 23-33)
zone_counts = [0, 0, 0]
for num in hot_reds[:6]:
if num <= 11:
zone_counts[0] += 1
elif num <= 22:
zone_counts[1] += 1
else:
zone_counts[2] += 1
zone_ratio = f"{zone_counts[0]}:{zone_counts[1]}:{zone_counts[2]}"
# 12. 奇偶比预测
odd_count = sum(1 for num in hot_reds[:6] if num % 2 == 1)
even_count = 6 - odd_count
parity_ratio = f"{odd_count}:{even_count}"
# 13-16. 复式推荐
def generate_compound(red_count, blue_count):
reds = sorted(np.random.choice(rec_reds, red_count, replace=False))
blues = sorted(np.random.choice(rec_blues, blue_count, replace=False))
return reds, blues
compound_12_4 = generate_compound(12, 4)
compound_9_3 = generate_compound(9, 3)
compound_8_3 = generate_compound(8, 3)
compound_7_2 = generate_compound(7, 2)
# 17. 单式推荐 (5组)
single_bets = []
for _ in range(5):
reds = sorted(np.random.choice(rec_reds, 6, replace=False))
blue = np.random.choice(rec_blues, 1)[0]
single_bets.append((reds, blue))
return {
"金胆": gold_red,
"三胆": three_reds,
"杀3红": kill_3_reds,
"杀6红": kill_6_reds,
"龙头": dragon_head,
"凤尾": phoenix_tail,
"20码": rec_20,
"五码定蓝": five_blues,
"一码定蓝": one_blue,
"杀蓝": kill_blue,
"三区比": zone_ratio,
"奇偶比": parity_ratio,
"12+4复式": compound_12_4,
"9+3复式": compound_9_3,
"8+3复式": compound_8_3,
"7+2复式": compound_7_2,
"单式推荐": single_bets
}
# 示例使用
if __name__ == "__main__":
# 模拟历史数据 (实际应用中应替换为真实数据)
history_data = pd.DataFrame({
'red1': np.random.randint(1, 34, 100),
'red2': np.random.randint(1, 34, 100),
'red3': np.random.randint(1, 34, 100),
'red4': np.random.randint(1, 34, 100),
'red5': np.random.randint(1, 34, 100),
'red6': np.random.randint(1, 34, 100),
'blue': np.random.randint(1, 17, 100)
})
predictor = DoubleColorBallPredictor(history_data)
prediction = predictor.predict()
# 打印预测结果
print("双色球预测结果:")
print(f"金胆: {prediction['金胆']}")
print(f"三胆: {prediction['三胆']}")
print(f"杀3红: {prediction['杀3红']}")
print(f"杀6红: {prediction['杀6红']}")
print(f"龙头: {prediction['龙头']}")
print(f"凤尾: {prediction['凤尾']}")
print(f"20码: {prediction['20码']}")
print(f"五码定蓝: {prediction['五码定蓝']}")
print(f"一码定蓝: {prediction['一码定蓝']}")
print(f"杀蓝: {prediction['杀蓝']}")
print(f"三区比: {prediction['三区比']}")
print(f"奇偶比: {prediction['奇偶比']}")
print("\n复式推荐:")
print(f"12+4: 红球={prediction['12+4复式'][0]} 蓝球={prediction['12+4复式'][1]}")
print(f"9+3: 红球={prediction['9+3复式'][0]} 蓝球={prediction['9+3复式'][1]}")
print(f"8+3: 红球={prediction['8+3复式'][0]} 蓝球={prediction['8+3复式'][1]}")
print(f"7+2: 红球={prediction['7+2复式'][0]} 蓝球={prediction['7+2复式'][1]}")
print("\n单式推荐:")
for i, bet in enumerate(prediction['单式推荐'], 1):
print(f"第{i}组: 红球={bet[0]} 蓝球={bet[1]}")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def frequency_analysis(data):
"""计算红球和蓝球的频率分布"""
# 提取所有红球号码
red_balls = data[['红1', '红2', '红3', '红4', '红5', '红6']].values.flatten()
# 计算红球频率
red_freq = pd.Series(red_balls).value_counts().sort_index()
red_freq = red_freq.reindex(range(1, 34), fill_value=0) # 确保1-33都有
# 计算蓝球频率
blue_freq = data['蓝球'].value_counts().sort_index()
blue_freq = blue_freq.reindex(range(1, 17), fill_value=0) # 确保1-16都有
return red_freq, blue_freq
def plot_frequency(red_freq, blue_freq):
"""可视化频率分布"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
# 红球频率图
red_freq.plot(kind='bar', ax=ax1, color='red', alpha=0.7)
ax1.set_title('红球出现频率')
ax1.set_xlabel('红球号码')
ax1.set_ylabel('出现次数')
# 蓝球频率图
blue_freq.plot(kind='bar', ax=ax2, color='blue', alpha=0.7)
ax2.set_title('蓝球出现频率')
ax2.set_xlabel('蓝球号码')
ax2.set_ylabel('出现次数')
plt.tight_layout()
plt.show()
# 使用示例
if __name__ == "__main__":
# 加载数据 (假设已有数据)
data = pd.read_csv('ssq_history.csv')
red_freq, blue_freq = frequency_analysis(data)
plot_frequency(red_freq, blue_freq)
# 输出热号(前5)和冷号(后5)
print("红球热号:", red_freq.nlargest(5).index.tolist())
print("红球冷号:", red_freq.nsmallest(5).index.tolist())
print("蓝球热号:", blue_freq.nlargest(3).index.tolist())
print("蓝球冷号:", blue_freq.nsmallest(3).index.tolist())
def parity_analysis(data):
"""分析奇偶比例"""
# 提取红球
red_balls = data[['红1', '红2', '红3', '红4', '红5', '红6']]
# 计算每期奇偶比
data['红球奇数'] = red_balls.apply(lambda row: sum(1 for num in row if num % 2 == 1), axis=1)
data['红球偶数'] = 6 - data['红球奇数']
data['奇偶比'] = data['红球奇数'].astype(str) + ':' + data['红球偶数'].astype(str)
# 蓝球奇偶分析
data['蓝球奇偶'] = data['蓝球'].apply(lambda x: '奇' if x % 2 == 1 else '偶')
# 统计奇偶比分布
ratio_dist = data['奇偶比'].value_counts().sort_index()
return data, ratio_dist
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
data, ratio_dist = parity_analysis(data)
# 输出最常见的奇偶比
print("最常见的奇偶比:")
print(ratio_dist.head(5))
# 可视化
plt.figure(figsize=(10, 6))
ratio_dist.plot(kind='bar', color='green')
plt.title('红球奇偶比分布')
plt.xlabel('奇偶比')
plt.ylabel('出现次数')
plt.show()
def zone_analysis(data):
"""三区比分析"""
# 定义区间:1-11(一区), 12-22(二区), 23-33(三区)
def count_zones(row):
zone1 = sum(1 for num in row if 1 <= num <= 11)
zone2 = sum(1 for num in row if 12 <= num <= 22)
zone3 = sum(1 for num in row if 23 <= num <= 33)
return zone1, zone2, zone3
# 计算每期的三区分布
data[['一区', '二区', '三区']] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply(
lambda row: count_zones(row), axis=1, result_type='expand'
)
# 计算三区比
data['三区比'] = data['一区'].astype(str) + ':' + data['二区'].astype(str) + ':' + data['三区'].astype(str)
# 统计三区比分布
zone_dist = data['三区比'].value_counts()
return data, zone_dist
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
data, zone_dist = zone_analysis(data)
# 输出最常见的三区比
print("最常见的三区比:")
print(zone_dist.head(5))
# 可视化
plt.figure(figsize=(10, 6))
zone_dist.head(10).plot(kind='barh', color='purple')
plt.title('三区比分布')
plt.xlabel('出现次数')
plt.ylabel('三区比')
plt.gca().invert_yaxis()
plt.show()
def path_analysis(data):
"""012路分析"""
# 红球012路分析
red_balls = data[['红1', '红2', '红3', '红4', '红5', '红6']]
def count_paths(row):
path0 = sum(1 for num in row if num % 3 == 0)
path1 = sum(1 for num in row if num % 3 == 1)
path2 = sum(1 for num in row if num % 3 == 2)
return path0, path1, path2
data[['0路', '1路', '2路']] = red_balls.apply(
lambda row: count_paths(row), axis=1, result_type='expand'
)
data['012路比'] = data['0路'].astype(str) + ':' + data['1路'].astype(str) + ':' + data['2路'].astype(str)
# 蓝球012路分析
data['蓝球012路'] = data['蓝球'].apply(lambda x: x % 3)
return data
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
data = path_analysis(data)
# 统计012路分布
path_dist = data['012路比'].value_counts()
# 可视化
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
path_dist.head(10).plot(kind='bar', color='orange')
plt.title('012路分布')
plt.subplot(1, 2, 2)
data['蓝球012路'].value_counts().plot(kind='pie', autopct='%1.1f%%',
colors=['gold', 'lightcoral', 'lightblue'])
plt.title('蓝球012路分布')
plt.ylabel('')
plt.tight_layout()
plt.show()
def sum_and_span_analysis(data):
"""和值与跨度分析"""
# 计算和值(红球总和)
data['和值'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].sum(axis=1)
# 计算跨度(最大值-最小值)
data['跨度'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].max(axis=1) - \
data[['红1', '红2', '红3', '红4', '红5', '红6']].min(axis=1)
return data
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
data = sum_and_span_analysis(data)
# 可视化
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.hist(data['和值'], bins=30, color='teal', alpha=0.7)
plt.axvline(data['和值'].mean(), color='red', linestyle='dashed', linewidth=1)
plt.title('红球和值分布')
plt.xlabel('和值')
plt.ylabel('频次')
plt.subplot(1, 2, 2)
plt.hist(data['跨度'], bins=20, color='brown', alpha=0.7)
plt.axvline(data['跨度'].mean(), color='red', linestyle='dashed', linewidth=1)
plt.title('红球跨度分布')
plt.xlabel('跨度')
plt.tight_layout()
plt.show()
def consecutive_and_repeat_analysis(data):
"""连号与重号分析"""
# 连号分析
def count_consecutive(row):
sorted_row = sorted(row)
count = 0
for i in range(1, len(sorted_row)):
if sorted_row[i] - sorted_row[i - 1] == 1:
count += 1
return count
data['连号数量'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply(
lambda row: count_consecutive(row), axis=1
)
# 重号分析(与上期比较)
data['重号数量'] = 0
for i in range(1, len(data)):
current = set(data.iloc[i][['红1', '红2', '红3', '红4', '红5', '红6']])
previous = set(data.iloc[i - 1][['红1', '红2', '红3', '红4', '红5', '红6']])
data.at[i, '重号数量'] = len(current & previous)
return data
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
data = consecutive_and_repeat_analysis(data)
# 可视化
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
data['连号数量'].value_counts().sort_index().plot(kind='bar', color='green')
plt.title('连号数量分布')
plt.xlabel('连号数量')
plt.ylabel('期数')
plt.subplot(1, 2, 2)
data['重号数量'].value_counts().sort_index().plot(kind='bar', color='blue')
plt.title('重号数量分布')
plt.xlabel('重号数量')
plt.tight_layout()
plt.show()
def ac_value_analysis(data):
"""AC值分析"""
def calculate_ac(row):
numbers = sorted(row)
diff_set = set()
for i in range(len(numbers)):
for j in range(i + 1, len(numbers)):
diff = abs(numbers[j] - numbers[i])
if diff > 0:
diff_set.add(diff)
return len(diff_set) - (6 - 1)
data['AC值'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply(
lambda row: calculate_ac(row), axis=1
)
return data
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
data = ac_value_analysis(data)
# 可视化
plt.figure(figsize=(10, 6))
plt.hist(data['AC值'], bins=range(4, 11), align='left', rwidth=0.8, color='purple')
plt.xticks(range(4, 11))
plt.title('AC值分布')
plt.xlabel('AC值')
plt.ylabel('频次')
plt.show()
def blue_ball_follow_analysis(data):
"""蓝球跟随分析"""
# 创建蓝球转移矩阵
transition_matrix = pd.DataFrame(0, index=range(1, 17), columns=range(1, 17))
blues = data['蓝球'].values
for i in range(1, len(blues)):
prev = blues[i - 1]
current = blues[i]
transition_matrix.at[prev, current] += 1
# 计算概率
prob_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0)
# 找出每个蓝球后面最常出现的号码
follow_stats = {}
for num in range(1, 17):
follow = prob_matrix.loc[num].nlargest(3)
follow_stats[num] = follow.index.tolist()
return prob_matrix, follow_stats
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
prob_matrix, follow_stats = blue_ball_follow_analysis(data)
# 打印跟随关系
print("蓝球跟随分析:")
for num, follows in follow_stats.items():
print(f"蓝球 {num} 后最常出现: {follows}")
# 可视化转移矩阵
plt.figure(figsize=(12, 10))
plt.imshow(prob_matrix, cmap='Blues', interpolation='nearest')
plt.colorbar(label='概率')
plt.title('蓝球转移概率矩阵')
plt.xlabel('当前蓝球')
plt.ylabel('上期蓝球')
plt.xticks(range(16), range(1, 17))
plt.yticks(range(16), range(1, 17))
plt.show()
def time_series_analysis(data):
"""时间序列分析"""
# 确保日期列是datetime类型
data['开奖日期'] = pd.to_datetime(data['开奖日期'])
data.set_index('开奖日期', inplace=True)
# 计算每期的热冷指数
all_reds = np.arange(1, 34)
# 计算滚动窗口内的频率
window_size = 50 # 50期窗口
hot_cold_index = pd.DataFrame(index=data.index, columns=all_reds)
for num in all_reds:
# 计算每个号码在滚动窗口内的出现次数
hot_cold_index[num] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply(
lambda row: num in row.values, axis=1
).rolling(window=window_size).sum()
# 计算平均热冷指数
avg_index = hot_cold_index.mean(axis=1)
return hot_cold_index, avg_index
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
hot_cold_index, avg_index = time_series_analysis(data)
# 可视化热冷指数变化
plt.figure(figsize=(14, 7))
avg_index.plot(color='darkred')
plt.title('红球平均热冷指数变化 (50期滚动窗口)')
plt.xlabel('日期')
plt.ylabel('热冷指数')
plt.grid(True)
plt.show()
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
def association_analysis(data):
"""关联规则挖掘"""
# 准备交易数据
transactions = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply(
lambda row: sorted(row), axis=1
).tolist()
# 转换数据格式
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
# 挖掘频繁项集
frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True)
# 生成关联规则
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
# 过滤有用的规则
useful_rules = rules[(rules['confidence'] > 0.2) & (rules['lift'] > 1.5)]
return useful_rules
# 使用示例
if __name__ == "__main__":
data = pd.read_csv('ssq_history.csv')
rules = association_analysis(data)
# 打印重要关联规则
print("重要关联规则:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values('lift', ascending=False))
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
class DoubleColorBallPredictor:
def __init__(self, history_data_path):
self.df = pd.read_csv(history_data_path)
self.red_balls = [f"红球{i}" for i in range(1, 7)]
self.blue_ball = "蓝球"
self.initialize_statistics()
def initialize_statistics(self):
"""初始化统计数据"""
self.red_stats = {num: self.compute_ball_stats(num, '红球') for num in range(1, 34)}
self.blue_stats = {num: self.compute_ball_stats(num, '蓝球') for num in range(1, 17)}
def compute_ball_stats(self, num, ball_type):
"""计算单个号码的统计数据"""
stats = {}
if ball_type == '红球':
col_mask = self.red_balls
total_periods = len(self.df)
else:
col_mask = [self.blue_ball]
total_periods = len(self.df)
# 出现次数
occur_count = (self.df[col_mask] == num).any(axis=1).sum()
# 遗漏值计算
last_occur = 0
current_miss = 0
miss_records = []
temp_miss = 0
for i in range(len(self.df)):
row = self.df.iloc[i]
if num in row[col_mask].values:
if temp_miss > 0:
miss_records.append(temp_miss)
last_occur = temp_miss
temp_miss = 0
else:
temp_miss += 1
current_miss = temp_miss
max_miss = max(miss_records) if miss_records else 0
avg_miss = sum(miss_records) / len(miss_records) if miss_records else 0
# 计算统计指标
stats['出现次数'] = occur_count
stats['出现概率'] = occur_count / total_periods
stats['理论概率'] = 1 / 33 if ball_type == '红球' else 1 / 16
stats['概率偏差'] = stats['出现概率'] - stats['理论概率']
stats['平均遗漏'] = avg_miss
stats['最大遗漏'] = max_miss
stats['上次遗漏'] = last_occur
stats['本次遗漏'] = current_miss
stats['欲出机率'] = current_miss / avg_miss if avg_miss > 0 else 0
return stats
def predict_gold_dan(self, top_n=1):
"""预测金胆(红球)"""
scores = []
for num in range(1, 34):
s = self.red_stats[num]
# 综合欲出机率+概率偏差进行评分
score = s['欲出机率'] * 0.7 + s['概率偏差'] * 0.3
scores.append((num, score))
# 按评分降序排序
scores.sort(key=lambda x: x[1], reverse=True)
return [num for num, _ in scores[:top_n]]
def predict_kill_reds(self, kill_count=3):
"""预测杀红球"""
scores = []
for num in range(1, 34):
s = self.red_stats[num]
# 近期出现频繁+欲出机率低的优先排除
score = s['本次遗漏'] * 0.3 - s['出现概率'] * 0.7
scores.append((num, score))
scores.sort(key=lambda x: x[1])
return [num for num, _ in scores[:kill_count]]
def predict_blue_ball(self):
"""预测蓝球"""
scores = []
for num in range(1, 17):
s = self.blue_stats[num]
# 综合多个指标
score = (s['欲出机率'] * 0.5 +
s['概率偏差'] * 0.3 +
(1 - s['本次遗漏'] / max(1, s['平均遗漏'])) * 0.2)
scores.append((num, score))
scores.sort(key=lambda x: x[1], reverse=True)
return [num for num, _ in scores[:5]] # 返回五码定蓝
def predict_head_tail(self):
"""预测龙头凤尾"""
# 分析历史首位和末位号码分布
head_values = self.df[self.red_balls[0]].value_counts().to_dict()
tail_values = self.df[self.red_balls[-1]].value_counts().to_dict()
# 结合遗漏值筛选
head_candidates = sorted(
[(k, v * 0.6 + self.red_stats[k]['欲出机率'] * 0.4)
for k, v in head_values.items()],
key=lambda x: x[1], reverse=True)[:5]
tail_candidates = sorted(
[(k, v * 0.5 + self.red_stats[k]['欲出机率'] * 0.5)
for k, v in tail_values.items()],
key=lambda x: x[1], reverse=True)[:5]
return head_candidates[0][0], tail_candidates[0][0]
def generate_recommendation(self, num_count=20):
"""生成红球推荐"""
scores = []
for num in range(1, 34):
s = self.red_stats[num]
# 综合指标评分
score = (s['欲出机率'] * 0.4 +
s['概率偏差'] * 0.3 +
(1 - s['本次遗漏'] / max(1, s['平均遗漏'])) * 0.3)
scores.append((num, score))
scores.sort(key=lambda x: x[1], reverse=True)
return [num for num, _ in scores[:num_count]]
def predict_ratio(self, ratio_type='three_zone'):
"""预测三区比/奇偶比"""
if ratio_type == 'three_zone':
# 分区统计 (1-11, 12-22, 23-33)
z1 = sum(1 for n in range(1, 12) if n in self.predict_gold_dan(6)[:3])
z2 = sum(1 for n in range(12, 23) if n in self.predict_gold_dan(6)[3:5])
z3 = 6 - z1 - z2
return f"{z1}:{z2}:{z3}"
else:
# 奇偶比预测
odds = sum(1 for n in self.predict_gold_dan(6) if n % 2 == 1)
return f"{odds}:{6 - odds}"
# 使用示例
if __name__ == "__main__":
predictor = DoubleColorBallPredictor("双色球历史数据.csv")
print("金胆:", predictor.predict_gold_dan(1))
print("三胆:", predictor.predict_gold_dan(3))
print("杀3红:", predictor.predict_kill_reds(3))
print("杀6红:", predictor.predict_kill_reds(6))
print("龙头凤尾:", predictor.predict_head_tail())
print("20码推荐:", predictor.generate_recommendation(20))
print("五码定蓝:", predictor.predict_blue_ball()[:5])
print("一码定蓝:", predictor.predict_blue_ball()[0])
print("杀蓝:", predictor.predict_kill_reds(1)) # 类似红球逻辑
print("三区比:", predictor.predict_ratio('three_zone'))
print("奇偶比:", predictor.predict_ratio('odd_even'))
print("12+4复式:", predictor.generate_recommendation(12), predictor.predict_blue_ball()[:4])
def calculate_missing_values(history_data):
"""
计算双色球红球和蓝球的遗漏值
:param history_data: 历史开奖数据列表,格式示例:
[
{"red": [1, 2, 3, 4, 5, 6], "blue": 16}, # 最新一期
{"red": [7, 8, 9, 10, 11, 12], "blue": 1}, # 上一期
... # 按时间倒序排列(最新数据在前)
]
:return: (red_missing, blue_missing)
红球遗漏值字典:{1: 5, 2: 0, ... 33: 10}
蓝球遗漏值字典:{1: 3, 2: 8, ... 16: 1}
"""
total_periods = len(history_data)
# 初始化红球遗漏值(默认从未出现)
red_missing = {num: total_periods for num in range(1, 34)}
# 初始化蓝球遗漏值(默认从未出现)
blue_missing = {num: total_periods for num in range(1, 17)}
# 遍历历史数据(从最新一期开始)
for period_idx, record in enumerate(history_data):
# 处理红球(6个号码)
for red_num in range(1, 34):
if red_num in record["red"]:
# 更新为当前期数的索引值(即遗漏期数)
red_missing[red_num] = period_idx
continue
# 处理蓝球(1个号码)
blue_num = record["blue"]
blue_missing[blue_num] = period_idx
return red_missing, blue_missing
# ===== 使用示例 =====
if __name__ == "__main__":
# 模拟历史数据(最新数据在前)
history = [
{"red": [1, 15, 22, 23, 24, 33], "blue": 5}, # 最新一期
{"red": [2, 8, 12, 19, 21, 27], "blue": 9},
{"red": [1, 7, 10, 16, 25, 32], "blue": 5}, # 红球1和蓝球5在此期出现
{"red": [3, 9, 11, 17, 18, 31], "blue": 14}
]
red_miss, blue_miss = calculate_missing_values(history)
print("红球遗漏值:")
for num in sorted(red_miss.keys()):
print(f"号码 {num:2d}: {red_miss[num]}期")
print("\n蓝球遗漏值:")
for num in sorted(blue_miss.keys()):
print(f"号码 {num:2d}: {blue_miss[num]}期")
好了,现在帮我修改,避免重复