Python解决“找出整形数组中占比超过一半的数”问题

问题描述

小R从班级中抽取了一些同学,每位同学都会给出一个数字。已知在这些数字中,某个数字的出现次数超过了数字总数的一半。现在需要你帮助小R找到这个数字。

测试样例

样例1:
输入:array = [1, 3, 8, 2, 3, 1, 3, 3, 3]
输出:3

样例2:
输入:array = [5, 5, 5, 1, 2, 5, 5]
输出:5

样例3:
输入:array = [9, 9, 9, 9, 8, 9, 8, 8]
输出:9

解决思路

这道题目综合运用了哈希表和计数算法知识。题目要求找出数组中出现次数超过一半的数字。由于题目明确指出存在这样的数字,因此我们可以通过统计每个数字的出现次数来找到答案。使用哈希表(Python中的Counter)可以高效地统计每个数字的出现次数,然后遍历哈希表找到出现次数超过数组长度一半的数字。

统计数字出现次数:使用Counter对数组中的每个数字进行计数,生成一个哈希表,键为数字,值为该数字在数组中出现的次数。
查找超过一半的数字:遍历哈希表,找到第一个满足出现次数乘以2大于数组长度的数字,即为答案。

时间复杂度:O(n),其中n是数组的长度。我们需要遍历数组一次来统计数字的出现次数,然后再遍历哈希表一次来找到目标数字。
空间复杂度:O(n),哈希表的空间复杂度与数组中不同数字的数量成正比,最坏情况下为n。

代码

法1

根据思路逻辑解法

def solution(array):
    # 创建一个字典来记录每个数字的出现次数
    count_dict = {}
    
    # 遍历数组,统计每个数字的出现次数
    for num in array:
        # 如果数字已经在字典中,增加其计数
        if num in count_dict:
            count_dict[num] += 1
        else:
            # 如果数字不在字典中,初始化其计数为1
            count_dict[num] = 1
    
    # 找到出现次数超过数组长度一半的数字
    half_length = len(array) // 2
    for num, count in count_dict.items():
        if count > half_length:
            return num
    
    # 如果没有找到符合条件的数字,返回0(虽然题目保证一定存在这样的数字)
    return 0

if __name__ == "__main__":
    # 添加你的测试用例
    print(solution(array = [1, 3, 8, 2, 3, 1, 3, 3, 3]))
    print(solution(array = [5, 5, 5, 1, 2, 5, 5]))
    print(solution(array = [9, 9, 9, 9, 8, 9, 8, 8]))

法2

简化方法:

def solution(array: list) -> int:
    from collections import Counter
    c = Counter(array)
    return next(k for k, v in c.items() if v * v > len(array))


if __name__ == '__main__':
    print(solution(array = [1, 3, 8, 2, 3, 1, 3, 3, 3]) == 3)
    print(solution(array = [5, 5, 5, 1, 2, 5, 5]) == 5)
    print(solution(array = [9, 9, 9, 9, 8, 9, 8, 8]) == 9)
import glob import os import pickle import pandas as pd import numpy as np from sklearn.metrics import ( confusion_matrix, precision_recall_fscore_support ) def load_test_outputs(out_dir: str) -> pd.DataFrame: files = sorted(glob.glob(os.path.join(out_dir, "exp_pred.pkl"))) records = { "label": [], "pred": [], "label_sg": [], "pred_sg": []} for path in files: with open(path, "rb") as f: while True: try: batch = pickle.load(f) except EOFError: break # records["mat_id"].extend(batch["mat_ids"]) records["label"].extend(batch["labels"].tolist()) records["pred"].extend(batch["preds"].tolist()) records["label_sg"].extend(batch["labels_sg"].tolist()) records["pred_sg"].extend(batch["preds_sg"].tolist()) return pd.DataFrame(records) out_dir = "./" df = load_test_outputs(out_dir) c:\Users\AMDL\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\storage.py:414: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. return torch.load(io.BytesIO(b)) # Count predictions in the 'pred' column for labels 0 through 6 label_range = range(7) pred_counts = df['pred'].value_counts().reindex(label_range, fill_value=0) # Print the counts print("Prediction counts (label: count):") for label, count in pred_counts.items(): print(f"{label}: {count}") Prediction counts (label: count): 0: 0 1: 2 2: 241 3: 5229 4: 55 5: 27196 6: 29645 # print perentage of each label in the 'pred' column total_predictions = len(df['pred']) print("\nPercentage of each label in the 'pred' column:") for label, count in pred_counts.items(): percentage = (count / total_predictions) * 100 print(f"{label}: {percentage:.2f}%") Percentage of each label in the 'pred' column: 0: 0.00% 1: 0.00% 2: 0.39% 3: 8.38% 4: 0.09% 5: 43.61% 6: 47.53% import matplotlib.pyplot as plt def plot_top3_predicted_crystal_systems(df, font_size=12): # Crystal system names corresponding to labels 0–6 crystal_systems = { 0: 'triclinic', 1: 'Monoclinic', 2: 'Orthorhombic', 3: 'Tetragonal', 4: 'trigonal', 5: 'Hexagonal', 6: 'Cubic' } label_range = range(7) pred_counts = df['pred'].value_counts().reindex(label_range, fill_value=0) total_preds = pred_counts.sum() pred_percentages = (pred_counts / total_preds * 100).round(2) top3 = pred_percentages.sort_values(ascending=False).head(3) plt.figure(figsize=(8, 6)) bars = plt.bar( [crystal_systems[i] for i in top3.index], top3.values, color=['tab:red', 'tab:blue', 'tab:green'] ) # Annotate percentage values on top of bars for bar, pct in zip(bars, top3.values): plt.text( bar.get_x() + bar.get_width() / 2, bar.get_height(), f"{pct:.0f}%", ha='center', va='bottom', fontsize=font_size ) # plt.xlabel('crystal system', fontsize=font_size) plt.ylabel('percentage of pixels (%)', fontsize=font_size) #remove x-ticks plt.xticks([]) # Hide x-ticks # plt.xticks(fontsize=font_size) plt.yticks(fontsize=font_size) plt.legend(bars, [crystal_systems[i] for i in top3.index], title='Crystal system', fontsize=font_size) plt.grid(axis='y', linestyle='--', alpha=0.5) plt.tight_layout() plt.show() # Example usage: plot_top3_predicted_crystal_systems(df, font_size=18) import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap import matplotlib.patches as mpatches def plot_label_map_top3_colors(df, label_col='pred', size=256, font_size=14): num_pixels = size * size labels = np.array(df[label_col]) # Crop to size*size if len(labels) < num_pixels: labels = np.concatenate([labels, np.full(num_pixels - len(labels), -1)]) else: labels = labels[:num_pixels] # Reshape into image image = labels.reshape(size, size) # Find top 3 labels unique, counts = np.unique(labels[labels != -1], return_counts=True) top_labels = unique[np.argsort(counts)[-3:]][::-1] # most → least frequent # Map labels to color indices color_image = np.zeros_like(image, dtype=int) # default black = 0 label_to_color = {top_labels[0]: 1, top_labels[1]: 2, top_labels[2]: 3} for label, color_index in label_to_color.items(): color_image[image == label] = color_index # Define color map cmap = ListedColormap(['white', 'tab:red', 'tab:blue', 'tab:green']) # Plot plt.figure(figsize=(6, 6)) plt.imshow(color_image, cmap=cmap, origin='upper', vmin=0, vmax=3) plt.axis('off') plt.title('Top 3 Predicted Crystal Systems', fontsize=font_size) # Add legend legend_patches = [ mpatches.Patch(color='tab:blue', label=f'Top 1: {top_labels[0]}'), mpatches.Patch(color='tab:orange', label=f'Top 2: {top_labels[1]}'), mpatches.Patch(color='tab:green', label=f'Top 3: {top_labels[2]}'), mpatches.Patch(color='white', label='Other / Padding') ] # plt.legend(handles=legend_patches, loc='upper right', fontsize=font_size) plt.tight_layout() plt.show() # Example usage: plot_label_map_top3_colors(df, label_col='pred', size=256, font_size=18)解释一下
10-30
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

啥都鼓捣的小yao

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值