C. Phoenix and Distribution

探讨了如何将一个字符串分割成k个子串,使其中字典序最大的字符串达到最小化。通过排序和贪心策略,分析了不同场景下的最优分配方案。

Phoenix and Distribution

题意
给了一个长度为n的字符串,让你将字符串分成k个字符串,这k个字符串不能存在空串,现在想要知道,这k个字符串中 字典序最大的最小的字符串是什么。

思路
第一步肯定是对字符串排序。
考虑一下,如果前k小的字符不全一样的话,那么肯定是已经有了大小结果。就是第k个字符单独成串这样满足字典序最大的字符串最小。为什么?因为第一个字符久已经可以判断大小关系了。不管后面我怎么分配剩余的字符串,第一个字符最大的那个肯定是字典序最大的,但我想让他最小,那很简单,我只需要把剩余的分配给其他k-1个字符串即可,就一个贪心的思想。
那么如果第一个字符串相同呢?那么说明暂时无法比出来大小。
那么我们需要判断剩余的n-k个字符是不是都一样。
如果都一样,我们肯定是把这n-k个字符平均分给k个字符串。
如果存在不一样?直接把剩余的n-k个字符放在一个字符串后面。
为什么?考虑一下,如果剩余的n-k个不完全一样,那么必定有一个最小字符,一个最大字符
如果把这n-k个均分的话,字典序最大的会更大。
比如s=aabc ,k=2
先给两个字符串分配了一个字符a,如果均分的话字符串就是 ab、ac 最大字典序是ac
如果把剩余接在一个后面的话就是a、abc,最大字典序是abc abc<ac
分析完后 代码模拟一下即可。

#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
int main(){

    int t;cin>>t;
    while(t--){
       int len,k;cin>>len>>k;
       string s;cin>>s;
       sort(s.begin(),s.end());
       if(s[0]!=s[k-1]) cout<<s[k-1]<<endl;
       else {
            cout<<s[k-1];
            if(s[k]==s[len-1]){
                int q=(len-k)/k;
                if((len-k)%k) q++;
                while(q--) cout<<s[k];
            }
            else for(int i=k;i<len;i++) cout<<s[i];
            cout<<endl;
       }

    }
    return 0;
}
import requests from bs4 import BeautifulSoup from collections import Counter # 获取内容 res = requests.get('http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html', timeout=30) res.encoding = 'utf-8' htm = res.text # 解析内容 soup = BeautifulSoup(htm, 'html.parser') # url前缀 prefix_url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list' # 获取总页数 total = int(soup.find('p', attrs={"class": "pg"}).find_all('strong')[0].text) # 将获取的信息,写进文件 local_file = open('双色球.txt', 'w') red_num = [] # 历史上开出的红球 blue_num = [] # 历史上开出的蓝球 # 分页获取每一页的开奖信息 for page_num in range(1, total + 1): t_url = prefix_url + '_' + str(page_num) + '.html' print(t_url) res2 = requests.get(t_url, timeout=30) res2.encoding = 'utf-8' page_context = res2.text page_soup = BeautifulSoup(page_context, 'html.parser') if page_soup.table is None: continue elif page_soup.table: table_rows = page_soup.table.find_all('tr') for row_num in range(2, len(table_rows) - 1): row_tds = table_rows[row_num].find_all('td') ems = row_tds[2].find_all('em') # result = '开奖日期:'+ row_tds[0].string +','+'期号:'+ row_tds[1].string +', '+ems[0].string+' '+ems[1].string+' '+ems[2].string+' '+ems[3].string+' '+ems[4].string+' '+ems[5].string+' '+ems[6].string result = row_tds[0].string + ',' + row_tds[1].string + ', ' + ems[0].string + ' ' + ems[1].string + ' ' + \ ems[2].string + ' ' + ems[3].string + ' ' + ems[4].string + ' ' + ems[5].string + ' ' + ems[ 6].string local_file.write(result + '\n') print(result) red_num.append(ems[0].string) # 红球1 red_num.append(ems[1].string) # 红球2 red_num.append(ems[2].string) # 红球3 red_num.append(ems[3].string) # 红球4 red_num.append(ems[4].string) # 红球5 red_num.append(ems[5].string) # 红球6 blue_num.append(ems[6].string) # 蓝球 else: continue local_file.close red_count = Counter(red_num) blue_count = Counter(blue_num) # 按照出现频率顺序 red_count_sorted = sorted(red_count.items(), key=lambda pair: pair[1], reverse=False) blue_count_sorted = sorted(blue_count.items(), key=lambda pair: pair[1], reverse=False) print(red_count_sorted) print(blue_count_sorted) ssq_red = red_count_sorted[0:6] ssq_blue = blue_count_sorted[0:3] print(list(map(lambda item: item[0], ssq_red))) print(list(map(lambda item: item[0], ssq_blue))) ssq_red = list(map(lambda item: item[0], ssq_red)) ssq_blue = list(map(lambda item: item[0], ssq_blue)) ssq_red.sort() ssq_blue.sort() print('顺选-1:' + str(ssq_red) + '|' + ssq_blue[0]) print('顺选-2:' + str(ssq_red) + '|' + ssq_blue[1]) print('顺选-3:' + str(ssq_red) + '|' + ssq_blue[2]) print('------------------------------------------------------------------------------') # 按照出现频率倒序 red_count_sorted = sorted(red_count.items(), key=lambda pair: pair[1], reverse=True) blue_count_sorted = sorted(blue_count.items(), key=lambda pair: pair[1], reverse=True) print(red_count_sorted) print(blue_count_sorted) ssq_red = red_count_sorted[0:6] ssq_blue = blue_count_sorted[0:3] print(list(map(lambda item: item[0], ssq_red))) print(list(map(lambda item: item[0], ssq_blue))) ssq_red = list(map(lambda item: item[0], ssq_red)) ssq_blue = list(map(lambda item: item[0], ssq_blue)) ssq_red.sort() ssq_blue.sort() print('反选-1:' + str(ssq_red) + '|' + ssq_blue[0]) print('反选-2:' + str(ssq_red) + '|' + ssq_blue[1]) print('反选-3:' + str(ssq_red) + '|' + ssq_blue[2]) import numpy as np import pandas as pd from collections import Counter class DoubleColorBallPredictor: def __init__(self, history_data): """ 初始化预测器 history_data: 历史开奖数据DataFrame,包含['red1','red2','red3','red4','red5','red6','blue']列 """ self.history = history_data self.all_reds = list(range(1, 34)) self.all_blues = list(range(1, 17)) def analyze_reds(self): """分析红球数据""" # 合并所有历史红球 all_red_numbers = [] for col in ['red1', 'red2', 'red3', 'red4', 'red5', 'red6']: all_red_numbers.extend(self.history[col].values) # 计算热号(出现频率最高的) freq = Counter(all_red_numbers) hot_reds = [num for num, _ in freq.most_common(10)] # 计算冷号(出现频率最低的) cold_reds = [num for num, _ in freq.most_common()[:-11:-1]] return hot_reds, cold_reds, freq def analyze_blues(self): """分析蓝球数据""" blue_numbers = self.history['blue'].values freq = Counter(blue_numbers) hot_blues = [num for num, _ in freq.most_common(5)] cold_blues = [num for num, _ in freq.most_common()[:-6:-1]] return hot_blues, cold_blues, freq def predict(self): """生成预测结果""" # 分析数据 hot_reds, cold_reds, red_freq = self.analyze_reds() hot_blues, cold_blues, blue_freq = self.analyze_blues() # 生成推荐红球(排除冷号) rec_reds = [num for num in self.all_reds if num not in cold_reds[:3]] np.random.shuffle(rec_reds) # 生成推荐蓝球(排除冷号) rec_blues = [num for num in self.all_blues if num not in cold_blues[:2]] np.random.shuffle(rec_blues) # 1. 金胆预测 (最看好的红球) gold_red = hot_reds[0] # 2. 三胆预测 (三个最看好的红球) three_reds = hot_reds[:3] # 3. 杀3红 (冷门红球) kill_3_reds = cold_reds[:3] # 4. 杀6红 (冷门红球) kill_6_reds = cold_reds[:6] # 5. 龙头预测 (小号区热号) dragon_head = min(hot_reds[:5]) # 6. 凤尾预测 (大号区热号) phoenix_tail = max(hot_reds[:5]) # 7. 20码推荐 rec_20 = sorted(rec_reds[:20]) # 8. 五码定蓝 five_blues = hot_blues[:5] # 9. 一码定蓝 one_blue = hot_blues[0] # 10. 杀蓝 kill_blue = cold_blues[0] # 11. 三区比预测 (1-11, 12-22, 23-33) zone_counts = [0, 0, 0] for num in hot_reds[:6]: if num <= 11: zone_counts[0] += 1 elif num <= 22: zone_counts[1] += 1 else: zone_counts[2] += 1 zone_ratio = f"{zone_counts[0]}:{zone_counts[1]}:{zone_counts[2]}" # 12. 奇偶比预测 odd_count = sum(1 for num in hot_reds[:6] if num % 2 == 1) even_count = 6 - odd_count parity_ratio = f"{odd_count}:{even_count}" # 13-16. 复式推荐 def generate_compound(red_count, blue_count): reds = sorted(np.random.choice(rec_reds, red_count, replace=False)) blues = sorted(np.random.choice(rec_blues, blue_count, replace=False)) return reds, blues compound_12_4 = generate_compound(12, 4) compound_9_3 = generate_compound(9, 3) compound_8_3 = generate_compound(8, 3) compound_7_2 = generate_compound(7, 2) # 17. 单式推荐 (5组) single_bets = [] for _ in range(5): reds = sorted(np.random.choice(rec_reds, 6, replace=False)) blue = np.random.choice(rec_blues, 1)[0] single_bets.append((reds, blue)) return { "金胆": gold_red, "三胆": three_reds, "杀3红": kill_3_reds, "杀6红": kill_6_reds, "龙头": dragon_head, "凤尾": phoenix_tail, "20码": rec_20, "五码定蓝": five_blues, "一码定蓝": one_blue, "杀蓝": kill_blue, "三区比": zone_ratio, "奇偶比": parity_ratio, "12+4复式": compound_12_4, "9+3复式": compound_9_3, "8+3复式": compound_8_3, "7+2复式": compound_7_2, "单式推荐": single_bets } # 示例使用 if __name__ == "__main__": # 模拟历史数据 (实际应用中应替换为真实数据) history_data = pd.DataFrame({ 'red1': np.random.randint(1, 34, 100), 'red2': np.random.randint(1, 34, 100), 'red3': np.random.randint(1, 34, 100), 'red4': np.random.randint(1, 34, 100), 'red5': np.random.randint(1, 34, 100), 'red6': np.random.randint(1, 34, 100), 'blue': np.random.randint(1, 17, 100) }) predictor = DoubleColorBallPredictor(history_data) prediction = predictor.predict() # 打印预测结果 print("双色球预测结果:") print(f"金胆: {prediction['金胆']}") print(f"三胆: {prediction['三胆']}") print(f"杀3红: {prediction['杀3红']}") print(f"杀6红: {prediction['杀6红']}") print(f"龙头: {prediction['龙头']}") print(f"凤尾: {prediction['凤尾']}") print(f"20码: {prediction['20码']}") print(f"五码定蓝: {prediction['五码定蓝']}") print(f"一码定蓝: {prediction['一码定蓝']}") print(f"杀蓝: {prediction['杀蓝']}") print(f"三区比: {prediction['三区比']}") print(f"奇偶比: {prediction['奇偶比']}") print("\n复式推荐:") print(f"12+4: 红球={prediction['12+4复式'][0]} 蓝球={prediction['12+4复式'][1]}") print(f"9+3: 红球={prediction['9+3复式'][0]} 蓝球={prediction['9+3复式'][1]}") print(f"8+3: 红球={prediction['8+3复式'][0]} 蓝球={prediction['8+3复式'][1]}") print(f"7+2: 红球={prediction['7+2复式'][0]} 蓝球={prediction['7+2复式'][1]}") print("\n单式推荐:") for i, bet in enumerate(prediction['单式推荐'], 1): print(f"第{i}组: 红球={bet[0]} 蓝球={bet[1]}") import pandas as pd import numpy as np import matplotlib.pyplot as plt def frequency_analysis(data): """计算红球和蓝球的频率分布""" # 提取所有红球号码 red_balls = data[['红1', '红2', '红3', '红4', '红5', '红6']].values.flatten() # 计算红球频率 red_freq = pd.Series(red_balls).value_counts().sort_index() red_freq = red_freq.reindex(range(1, 34), fill_value=0) # 确保1-33都有 # 计算蓝球频率 blue_freq = data['蓝球'].value_counts().sort_index() blue_freq = blue_freq.reindex(range(1, 17), fill_value=0) # 确保1-16都有 return red_freq, blue_freq def plot_frequency(red_freq, blue_freq): """可视化频率分布""" fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) # 红球频率图 red_freq.plot(kind='bar', ax=ax1, color='red', alpha=0.7) ax1.set_title('红球出现频率') ax1.set_xlabel('红球号码') ax1.set_ylabel('出现次数') # 蓝球频率图 blue_freq.plot(kind='bar', ax=ax2, color='blue', alpha=0.7) ax2.set_title('蓝球出现频率') ax2.set_xlabel('蓝球号码') ax2.set_ylabel('出现次数') plt.tight_layout() plt.show() # 使用示例 if __name__ == "__main__": # 加载数据 (假设已有数据) data = pd.read_csv('ssq_history.csv') red_freq, blue_freq = frequency_analysis(data) plot_frequency(red_freq, blue_freq) # 输出热号(前5)和冷号(后5) print("红球热号:", red_freq.nlargest(5).index.tolist()) print("红球冷号:", red_freq.nsmallest(5).index.tolist()) print("蓝球热号:", blue_freq.nlargest(3).index.tolist()) print("蓝球冷号:", blue_freq.nsmallest(3).index.tolist()) def parity_analysis(data): """分析奇偶比例""" # 提取红球 red_balls = data[['红1', '红2', '红3', '红4', '红5', '红6']] # 计算每期奇偶比 data['红球奇数'] = red_balls.apply(lambda row: sum(1 for num in row if num % 2 == 1), axis=1) data['红球偶数'] = 6 - data['红球奇数'] data['奇偶比'] = data['红球奇数'].astype(str) + ':' + data['红球偶数'].astype(str) # 蓝球奇偶分析 data['蓝球奇偶'] = data['蓝球'].apply(lambda x: '奇' if x % 2 == 1 else '偶') # 统计奇偶比分布 ratio_dist = data['奇偶比'].value_counts().sort_index() return data, ratio_dist # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') data, ratio_dist = parity_analysis(data) # 输出最常见的奇偶比 print("最常见的奇偶比:") print(ratio_dist.head(5)) # 可视化 plt.figure(figsize=(10, 6)) ratio_dist.plot(kind='bar', color='green') plt.title('红球奇偶比分布') plt.xlabel('奇偶比') plt.ylabel('出现次数') plt.show() def zone_analysis(data): """三区比分析""" # 定义区间:1-11(一区), 12-22(二区), 23-33(三区) def count_zones(row): zone1 = sum(1 for num in row if 1 <= num <= 11) zone2 = sum(1 for num in row if 12 <= num <= 22) zone3 = sum(1 for num in row if 23 <= num <= 33) return zone1, zone2, zone3 # 计算每期的三区分布 data[['一区', '二区', '三区']] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply( lambda row: count_zones(row), axis=1, result_type='expand' ) # 计算三区比 data['三区比'] = data['一区'].astype(str) + ':' + data['二区'].astype(str) + ':' + data['三区'].astype(str) # 统计三区比分布 zone_dist = data['三区比'].value_counts() return data, zone_dist # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') data, zone_dist = zone_analysis(data) # 输出最常见的三区比 print("最常见的三区比:") print(zone_dist.head(5)) # 可视化 plt.figure(figsize=(10, 6)) zone_dist.head(10).plot(kind='barh', color='purple') plt.title('三区比分布') plt.xlabel('出现次数') plt.ylabel('三区比') plt.gca().invert_yaxis() plt.show() def path_analysis(data): """012路分析""" # 红球012路分析 red_balls = data[['红1', '红2', '红3', '红4', '红5', '红6']] def count_paths(row): path0 = sum(1 for num in row if num % 3 == 0) path1 = sum(1 for num in row if num % 3 == 1) path2 = sum(1 for num in row if num % 3 == 2) return path0, path1, path2 data[['0路', '1路', '2路']] = red_balls.apply( lambda row: count_paths(row), axis=1, result_type='expand' ) data['012路比'] = data['0路'].astype(str) + ':' + data['1路'].astype(str) + ':' + data['2路'].astype(str) # 蓝球012路分析 data['蓝球012路'] = data['蓝球'].apply(lambda x: x % 3) return data # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') data = path_analysis(data) # 统计012路分布 path_dist = data['012路比'].value_counts() # 可视化 plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) path_dist.head(10).plot(kind='bar', color='orange') plt.title('012路分布') plt.subplot(1, 2, 2) data['蓝球012路'].value_counts().plot(kind='pie', autopct='%1.1f%%', colors=['gold', 'lightcoral', 'lightblue']) plt.title('蓝球012路分布') plt.ylabel('') plt.tight_layout() plt.show() def sum_and_span_analysis(data): """和值与跨度分析""" # 计算和值(红球总和) data['和值'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].sum(axis=1) # 计算跨度(最大值-最小值) data['跨度'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].max(axis=1) - \ data[['红1', '红2', '红3', '红4', '红5', '红6']].min(axis=1) return data # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') data = sum_and_span_analysis(data) # 可视化 plt.figure(figsize=(14, 6)) plt.subplot(1, 2, 1) plt.hist(data['和值'], bins=30, color='teal', alpha=0.7) plt.axvline(data['和值'].mean(), color='red', linestyle='dashed', linewidth=1) plt.title('红球和值分布') plt.xlabel('和值') plt.ylabel('频次') plt.subplot(1, 2, 2) plt.hist(data['跨度'], bins=20, color='brown', alpha=0.7) plt.axvline(data['跨度'].mean(), color='red', linestyle='dashed', linewidth=1) plt.title('红球跨度分布') plt.xlabel('跨度') plt.tight_layout() plt.show() def consecutive_and_repeat_analysis(data): """连号与重号分析""" # 连号分析 def count_consecutive(row): sorted_row = sorted(row) count = 0 for i in range(1, len(sorted_row)): if sorted_row[i] - sorted_row[i - 1] == 1: count += 1 return count data['连号数量'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply( lambda row: count_consecutive(row), axis=1 ) # 重号分析(与上期比较) data['重号数量'] = 0 for i in range(1, len(data)): current = set(data.iloc[i][['红1', '红2', '红3', '红4', '红5', '红6']]) previous = set(data.iloc[i - 1][['红1', '红2', '红3', '红4', '红5', '红6']]) data.at[i, '重号数量'] = len(current & previous) return data # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') data = consecutive_and_repeat_analysis(data) # 可视化 plt.figure(figsize=(14, 6)) plt.subplot(1, 2, 1) data['连号数量'].value_counts().sort_index().plot(kind='bar', color='green') plt.title('连号数量分布') plt.xlabel('连号数量') plt.ylabel('期数') plt.subplot(1, 2, 2) data['重号数量'].value_counts().sort_index().plot(kind='bar', color='blue') plt.title('重号数量分布') plt.xlabel('重号数量') plt.tight_layout() plt.show() def ac_value_analysis(data): """AC值分析""" def calculate_ac(row): numbers = sorted(row) diff_set = set() for i in range(len(numbers)): for j in range(i + 1, len(numbers)): diff = abs(numbers[j] - numbers[i]) if diff > 0: diff_set.add(diff) return len(diff_set) - (6 - 1) data['AC值'] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply( lambda row: calculate_ac(row), axis=1 ) return data # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') data = ac_value_analysis(data) # 可视化 plt.figure(figsize=(10, 6)) plt.hist(data['AC值'], bins=range(4, 11), align='left', rwidth=0.8, color='purple') plt.xticks(range(4, 11)) plt.title('AC值分布') plt.xlabel('AC值') plt.ylabel('频次') plt.show() def blue_ball_follow_analysis(data): """蓝球跟随分析""" # 创建蓝球转移矩阵 transition_matrix = pd.DataFrame(0, index=range(1, 17), columns=range(1, 17)) blues = data['蓝球'].values for i in range(1, len(blues)): prev = blues[i - 1] current = blues[i] transition_matrix.at[prev, current] += 1 # 计算概率 prob_matrix = transition_matrix.div(transition_matrix.sum(axis=1), axis=0) # 找出每个蓝球后面最常出现的号码 follow_stats = {} for num in range(1, 17): follow = prob_matrix.loc[num].nlargest(3) follow_stats[num] = follow.index.tolist() return prob_matrix, follow_stats # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') prob_matrix, follow_stats = blue_ball_follow_analysis(data) # 打印跟随关系 print("蓝球跟随分析:") for num, follows in follow_stats.items(): print(f"蓝球 {num} 后最常出现: {follows}") # 可视化转移矩阵 plt.figure(figsize=(12, 10)) plt.imshow(prob_matrix, cmap='Blues', interpolation='nearest') plt.colorbar(label='概率') plt.title('蓝球转移概率矩阵') plt.xlabel('当前蓝球') plt.ylabel('上期蓝球') plt.xticks(range(16), range(1, 17)) plt.yticks(range(16), range(1, 17)) plt.show() def time_series_analysis(data): """时间序列分析""" # 确保日期列是datetime类型 data['开奖日期'] = pd.to_datetime(data['开奖日期']) data.set_index('开奖日期', inplace=True) # 计算每期的热冷指数 all_reds = np.arange(1, 34) # 计算滚动窗口内的频率 window_size = 50 # 50期窗口 hot_cold_index = pd.DataFrame(index=data.index, columns=all_reds) for num in all_reds: # 计算每个号码在滚动窗口内的出现次数 hot_cold_index[num] = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply( lambda row: num in row.values, axis=1 ).rolling(window=window_size).sum() # 计算平均热冷指数 avg_index = hot_cold_index.mean(axis=1) return hot_cold_index, avg_index # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') hot_cold_index, avg_index = time_series_analysis(data) # 可视化热冷指数变化 plt.figure(figsize=(14, 7)) avg_index.plot(color='darkred') plt.title('红球平均热冷指数变化 (50期滚动窗口)') plt.xlabel('日期') plt.ylabel('热冷指数') plt.grid(True) plt.show() from mlxtend.preprocessing import TransactionEncoder from mlxtend.frequent_patterns import apriori, association_rules def association_analysis(data): """关联规则挖掘""" # 准备交易数据 transactions = data[['红1', '红2', '红3', '红4', '红5', '红6']].apply( lambda row: sorted(row), axis=1 ).tolist() # 转换数据格式 te = TransactionEncoder() te_ary = te.fit(transactions).transform(transactions) df = pd.DataFrame(te_ary, columns=te.columns_) # 挖掘频繁项集 frequent_itemsets = apriori(df, min_support=0.05, use_colnames=True) # 生成关联规则 rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2) # 过滤有用的规则 useful_rules = rules[(rules['confidence'] > 0.2) & (rules['lift'] > 1.5)] return useful_rules # 使用示例 if __name__ == "__main__": data = pd.read_csv('ssq_history.csv') rules = association_analysis(data) # 打印重要关联规则 print("重要关联规则:") print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].sort_values('lift', ascending=False)) import pandas as pd import numpy as np from collections import Counter from sklearn.ensemble import RandomForestClassifier class DoubleColorBallPredictor: def __init__(self, history_data_path): self.df = pd.read_csv(history_data_path) self.red_balls = [f"红球{i}" for i in range(1, 7)] self.blue_ball = "蓝球" self.initialize_statistics() def initialize_statistics(self): """初始化统计数据""" self.red_stats = {num: self.compute_ball_stats(num, '红球') for num in range(1, 34)} self.blue_stats = {num: self.compute_ball_stats(num, '蓝球') for num in range(1, 17)} def compute_ball_stats(self, num, ball_type): """计算单个号码的统计数据""" stats = {} if ball_type == '红球': col_mask = self.red_balls total_periods = len(self.df) else: col_mask = [self.blue_ball] total_periods = len(self.df) # 出现次数 occur_count = (self.df[col_mask] == num).any(axis=1).sum() # 遗漏值计算 last_occur = 0 current_miss = 0 miss_records = [] temp_miss = 0 for i in range(len(self.df)): row = self.df.iloc[i] if num in row[col_mask].values: if temp_miss > 0: miss_records.append(temp_miss) last_occur = temp_miss temp_miss = 0 else: temp_miss += 1 current_miss = temp_miss max_miss = max(miss_records) if miss_records else 0 avg_miss = sum(miss_records) / len(miss_records) if miss_records else 0 # 计算统计指标 stats['出现次数'] = occur_count stats['出现概率'] = occur_count / total_periods stats['理论概率'] = 1 / 33 if ball_type == '红球' else 1 / 16 stats['概率偏差'] = stats['出现概率'] - stats['理论概率'] stats['平均遗漏'] = avg_miss stats['最大遗漏'] = max_miss stats['上次遗漏'] = last_occur stats['本次遗漏'] = current_miss stats['欲出机率'] = current_miss / avg_miss if avg_miss > 0 else 0 return stats def predict_gold_dan(self, top_n=1): """预测金胆(红球)""" scores = [] for num in range(1, 34): s = self.red_stats[num] # 综合欲出机率+概率偏差进行评分 score = s['欲出机率'] * 0.7 + s['概率偏差'] * 0.3 scores.append((num, score)) # 按评分降序排序 scores.sort(key=lambda x: x[1], reverse=True) return [num for num, _ in scores[:top_n]] def predict_kill_reds(self, kill_count=3): """预测杀红球""" scores = [] for num in range(1, 34): s = self.red_stats[num] # 近期出现频繁+欲出机率低的优先排除 score = s['本次遗漏'] * 0.3 - s['出现概率'] * 0.7 scores.append((num, score)) scores.sort(key=lambda x: x[1]) return [num for num, _ in scores[:kill_count]] def predict_blue_ball(self): """预测蓝球""" scores = [] for num in range(1, 17): s = self.blue_stats[num] # 综合多个指标 score = (s['欲出机率'] * 0.5 + s['概率偏差'] * 0.3 + (1 - s['本次遗漏'] / max(1, s['平均遗漏'])) * 0.2) scores.append((num, score)) scores.sort(key=lambda x: x[1], reverse=True) return [num for num, _ in scores[:5]] # 返回五码定蓝 def predict_head_tail(self): """预测龙头凤尾""" # 分析历史首位和末位号码分布 head_values = self.df[self.red_balls[0]].value_counts().to_dict() tail_values = self.df[self.red_balls[-1]].value_counts().to_dict() # 结合遗漏值筛选 head_candidates = sorted( [(k, v * 0.6 + self.red_stats[k]['欲出机率'] * 0.4) for k, v in head_values.items()], key=lambda x: x[1], reverse=True)[:5] tail_candidates = sorted( [(k, v * 0.5 + self.red_stats[k]['欲出机率'] * 0.5) for k, v in tail_values.items()], key=lambda x: x[1], reverse=True)[:5] return head_candidates[0][0], tail_candidates[0][0] def generate_recommendation(self, num_count=20): """生成红球推荐""" scores = [] for num in range(1, 34): s = self.red_stats[num] # 综合指标评分 score = (s['欲出机率'] * 0.4 + s['概率偏差'] * 0.3 + (1 - s['本次遗漏'] / max(1, s['平均遗漏'])) * 0.3) scores.append((num, score)) scores.sort(key=lambda x: x[1], reverse=True) return [num for num, _ in scores[:num_count]] def predict_ratio(self, ratio_type='three_zone'): """预测三区比/奇偶比""" if ratio_type == 'three_zone': # 分区统计 (1-11, 12-22, 23-33) z1 = sum(1 for n in range(1, 12) if n in self.predict_gold_dan(6)[:3]) z2 = sum(1 for n in range(12, 23) if n in self.predict_gold_dan(6)[3:5]) z3 = 6 - z1 - z2 return f"{z1}:{z2}:{z3}" else: # 奇偶比预测 odds = sum(1 for n in self.predict_gold_dan(6) if n % 2 == 1) return f"{odds}:{6 - odds}" # 使用示例 if __name__ == "__main__": predictor = DoubleColorBallPredictor("双色球历史数据.csv") print("金胆:", predictor.predict_gold_dan(1)) print("三胆:", predictor.predict_gold_dan(3)) print("杀3红:", predictor.predict_kill_reds(3)) print("杀6红:", predictor.predict_kill_reds(6)) print("龙头凤尾:", predictor.predict_head_tail()) print("20码推荐:", predictor.generate_recommendation(20)) print("五码定蓝:", predictor.predict_blue_ball()[:5]) print("一码定蓝:", predictor.predict_blue_ball()[0]) print("杀蓝:", predictor.predict_kill_reds(1)) # 类似红球逻辑 print("三区比:", predictor.predict_ratio('three_zone')) print("奇偶比:", predictor.predict_ratio('odd_even')) print("12+4复式:", predictor.generate_recommendation(12), predictor.predict_blue_ball()[:4]) def calculate_missing_values(history_data): """ 计算双色球红球和蓝球的遗漏值 :param history_data: 历史开奖数据列表,格式示例: [ {"red": [1, 2, 3, 4, 5, 6], "blue": 16}, # 最新一期 {"red": [7, 8, 9, 10, 11, 12], "blue": 1}, # 上一期 ... # 按时间倒序排列(最新数据在前) ] :return: (red_missing, blue_missing) 红球遗漏值字典:{1: 5, 2: 0, ... 33: 10} 蓝球遗漏值字典:{1: 3, 2: 8, ... 16: 1} """ total_periods = len(history_data) # 初始化红球遗漏值(默认从未出现) red_missing = {num: total_periods for num in range(1, 34)} # 初始化蓝球遗漏值(默认从未出现) blue_missing = {num: total_periods for num in range(1, 17)} # 遍历历史数据(从最新一期开始) for period_idx, record in enumerate(history_data): # 处理红球(6个号码) for red_num in range(1, 34): if red_num in record["red"]: # 更新为当前期数的索引值(即遗漏期数) red_missing[red_num] = period_idx continue # 处理蓝球(1个号码) blue_num = record["blue"] blue_missing[blue_num] = period_idx return red_missing, blue_missing # ===== 使用示例 ===== if __name__ == "__main__": # 模拟历史数据(最新数据在前) history = [ {"red": [1, 15, 22, 23, 24, 33], "blue": 5}, # 最新一期 {"red": [2, 8, 12, 19, 21, 27], "blue": 9}, {"red": [1, 7, 10, 16, 25, 32], "blue": 5}, # 红球1和蓝球5在此期出现 {"red": [3, 9, 11, 17, 18, 31], "blue": 14} ] red_miss, blue_miss = calculate_missing_values(history) print("红球遗漏值:") for num in sorted(red_miss.keys()): print(f"号码 {num:2d}: {red_miss[num]}期") print("\n蓝球遗漏值:") for num in sorted(blue_miss.keys()): print(f"号码 {num:2d}: {blue_miss[num]}期") 好了,现在帮我修改,避免重复
11-30
java.sql.SQLException: ERROR 726 (43M10): Inconsistent namespace mapping properties. Ensure that config phoenix.schema.isNamespaceMappingEnabled is consistent on client and server. at org.apache.phoenix.exception.SQLExceptionCode$Factory$1.newException(SQLExceptionCode.java:607) at org.apache.phoenix.exception.SQLExceptionInfo.buildException(SQLExceptionInfo.java:217) at org.apache.phoenix.query.ConnectionQueryServicesImpl.checkClientServerCompatibility(ConnectionQueryServicesImpl.java:1731) at org.apache.phoenix.query.ConnectionQueryServicesImpl.ensureTableCreated(ConnectionQueryServicesImpl.java:1460) at org.apache.phoenix.query.ConnectionQueryServicesImpl.createTable(ConnectionQueryServicesImpl.java:1939) at org.apache.phoenix.schema.MetaDataClient.createTableInternal(MetaDataClient.java:3090) at org.apache.phoenix.schema.MetaDataClient.createTable(MetaDataClient.java:1117) at org.apache.phoenix.compile.CreateTableCompiler$CreateTableMutationPlan.execute(CreateTableCompiler.java:420) at org.apache.phoenix.jdbc.PhoenixStatement$2.call(PhoenixStatement.java:443) at org.apache.phoenix.jdbc.PhoenixStatement$2.call(PhoenixStatement.java:425) at org.apache.phoenix.call.CallRunner.run(CallRunner.java:53) at org.apache.phoenix.jdbc.PhoenixStatement.executeMutation(PhoenixStatement.java:424) at org.apache.phoenix.jdbc.PhoenixStatement.executeMutation(PhoenixStatement.java:412) at org.apache.phoenix.jdbc.PhoenixStatement.executeUpdate(PhoenixStatement.java:1990) at org.apache.phoenix.query.ConnectionQueryServicesImpl$12.call(ConnectionQueryServicesImpl.java:3312) at org.apache.phoenix.query.ConnectionQueryServicesImpl$12.call(ConnectionQueryServicesImpl.java:3266) at org.apache.phoenix.util.PhoenixContextExecutor.call(PhoenixContextExecutor.java:76) at org.apache.phoenix.query.ConnectionQueryServicesImpl.init(ConnectionQueryServicesImpl.java:3266) at org.apache.phoenix.jdbc.PhoenixDriver.getConnectionQueryServices(PhoenixDriver.java:255) at org.apache.phoenix.jdbc.PhoenixEmbeddedDriver.createConnection(PhoenixEmbeddedDriver.java:144) at org.apache.phoenix.jdbc.PhoenixDriver.connect(PhoenixDriver.java:221) at sqlline.DatabaseConnection.connect(DatabaseConnection.java:135) at sqlline.DatabaseConnection.getConnection(DatabaseConnection.java:192) at sqlline.Commands.connect(Commands.java:1364) at sqlline.Commands.connect(Commands.java:1244) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at sqlline.ReflectiveCommandHandler.execute(ReflectiveCommandHandler.java:38) at sqlline.SqlLine.dispatch(SqlLine.java:730) at sqlline.SqlLine.initArgs(SqlLine.java:410) at sqlline.SqlLine.begin(SqlLine.java:515) at sqlline.SqlLine.start(SqlLine.java:267) at sqlline.SqlLine.main(SqlLine.java:206) sqlline version 1.9.0
03-18
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

我不会c语言

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值