D. Balanced Round

最小删除操作实现数组元素间差值小于k
文章讲述了如何使用贪心策略对数组进行排序,以减少需要删除的元素,使得数组中元素之间的差值不超过给定阈值k。通过计数符合条件的连续差值,得出最小删除数量。

题目:

输入
7
5 1
1 2 4 5 6
1 2
10
8 3
17 3 1 20 12 5 17 12
4 2
2 4 6 8
5 3
2 3 19 10 8
3 4
1 10 5
8 1
8 3 1 4 5 10 7 3

输出
2
0
5
0
3
1
4

 

 

 思路:

       题目意思是,输出最少删除多少个 不符合 两个数之间差 <= k 的元素,使它们的排列各元素之间的差值 <= k

这里是贪心排序,我们可以先从小到大排好序,使它们的差值尽可能的缩小,如何再遍历一遍找出符合 两个数之间差 <= k 的元素 数量是多少, 之后我们再 用总数量减去那他们符合两个数之间差 <= k 的元素 最长连续数量,就是我们要删除的最少元素数量

代码详解如下:

#include <iostream>
#include <vector>
#include <algorithm>
#define All(x) x.begin(),x.end()
#define endl '\n'
#define ___G std::ios::sync_with_stdio(false),cin.tie(0), cout.tie(0)
using namespace std;

inline void solve()
{
	vector<int>ans, a;
	int n, k;
	cin >> n >> k;
	for (int i = 0, num; i < n; ++i)
	{
		cin >> num;
		a.emplace_back(num);
	}

	sort(All(a));	// 排序,是它们的难度差尽可能缩小

	// cnt 用来计数,记录符合 a[i] - a[i - 1] <= k 的数量
	int cnt = 1;
	// 因为当前下标也是算是一个数值 所以 cnt 从 1 开始计数

	for (int i = 1; i < n; ++i)
	{
		// 如果大于记录的数值 碰到 大于 k 的情况,存储好
		// 满足 a[i] - a[i - 1] <= k 的数量
		if (a[i] - a[i - 1] > k)
		{
			ans.emplace_back(cnt);
			cnt = 1;
		}
		else
			cnt++;
	}

	// 因为有可能都可以满足
	// 所以 最后也要添加一下满足的数量
	ans.emplace_back(cnt);

	int maxs = -1;	// 变量 maxs 为满足 a[i] - a[i - 1] <= k 的数量

	// 找出 满足的最大数量
	for (auto i : ans)
	{
		maxs = max(maxs, i);
	}

	// 输出删除的最小数量
	cout << n - maxs << endl;

	return ;
}

int main()
{
	___G;
	int __t;
	cin >> __t;
	while (__t--)
	{
		solve();
	}
	return 0;
}

最后提交:

D:\PythonProject\moxing1\.venv\Scripts\python.exe D:\PythonProject\moxing1\try.py 2025-08-14 12:37:47.897094: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. 2025-08-14 12:37:48.651773: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. WARNING:tensorflow:From D:\PythonProject\moxing1\.venv\Lib\site-packages\tf_keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead. 原始数据集大小: 545 有效样本数: 545 类别权重: {0: np.float64(4.866071428571429), 1: np.float64(1.2976190476190477), 2: np.float64(1.792763157894737), 3: np.float64(0.4055059523809524)} Traceback (most recent call last): File "D:\PythonProject\moxing1\try.py", line 314, in <module> trained_model, history = train_and_evaluate() ^^^^^^^^^^^^^^^^^^^^ File "D:\PythonProject\moxing1\try.py", line 216, in train_and_evaluate y_train, y_test, class_weights) = load_and_preprocess_data() ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\PythonProject\moxing1\try.py", line 101, in load_and_preprocess_data y_train = utils.to_categorical(y_train, NUM_CLASSES) ^^^^^^^^^^^^^^^^^^^^ AttributeError: module 'tensorflow.python.keras.utils' has no attribute 'to_categorical' Process finished with exit code 1
08-15
# -*- coding: utf-8 -*- """ 📌 中国研究生数学建模竞赛 E题 · 任务三(增强版) 🔧 方法:CORAL + Random Forest + 伪标签迭代训练(Self-Training) 🎯 目标:提升目标域预测置信度与一致性 """ import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt import seaborn as sns # ====================== 超参数设置 ====================== INPUT_DIM = 41 NUM_CLASSES = 4 N_TREES = 100 MAX_DEPTH = 10 RANDOM_STATE = 42 MAX_ITER = 3 # 最大伪标签迭代次数 CONFIDENCE_THRESHOLD = 0.6 # 置信度阈值:高于此值才加入训练 EARLY_STOPPING = True # 若无新样本加入则提前停止 class_names = ['Normal', 'Outer Race Fault', 'Inner Race Fault', 'Ball Fault'] feature_cols_ignore = ['filename', 'label', 'domain'] # ====================== CORAL 对齐函数(同前正确实现)====================== def coral_loss(source_features, target_features): Xs = (source_features - source_features.mean(0, keepdims=True)) / (source_features.std(0, keepdims=True) + 1e-6) Xt = (target_features - target_features.mean(0, keepdims=True)) / (target_features.std(0, keepdims=True) + 1e-6) Cs = np.cov(Xs.T) Ct = np.cov(Xt.T) return np.linalg.norm(Cs - Ct, 'fro') ** 2 def coral_align(Xs, Xt): d = Xs.shape[1] Xs_mean = Xs.mean(axis=0, keepdims=True) Xt_mean = Xt.mean(axis=0, keepdims=True) Xs_c = Xs - Xs_mean Xt_c = Xt - Xt_mean ns, nt = len(Xs), len(Xt) Cs = (Xs_c.T @ Xs_c) / (ns - 1) + 1e-6 * np.eye(d) Ct = (Xt_c.T @ Xt_c) / (nt - 1) + 1e-6 * np.eye(d) U_s, S_s, Vt_s = np.linalg.svd(Cs) U_t, S_t, Vt_t = np.linalg.svd(Ct) Cs_sqrt = U_s @ np.diag(np.sqrt(S_s)) @ U_s.T Ct_inv_sqrt = U_t @ np.diag(S_t ** -0.5) @ U_t.T Xt_aligned = Xt_c @ Ct_inv_sqrt @ Cs_sqrt + Xs_mean return Xt_aligned.astype(np.float32) # ====================== 主函数:带伪标签迭代训练 ====================== def main_with_self_training(): print("🚀 开始执行任务三(增强版):CORAL + RF + 伪标签迭代训练") # 1. 加载数据 df = pd.read_csv('extracted_features_with_domain.csv') feature_cols = [col for col in df.columns if col not in feature_cols_ignore] source_data = df[df['domain'] == 'source'].copy() target_data = df[df['domain'] == 'target'].copy() Xs = source_data[feature_cols].values.astype(np.float32) ys = source_data['label'].values.astype(int) Xt = target_data[feature_cols].values.astype(np.float32) filenames_target = target_data['filename'].values print(f"✅ 源域样本数: {len(Xs)}") print(f"✅ 目标域样本数: {len(Xt)}") # 2. 数据标准化(使用源域标准) scaler = StandardScaler() X_all = np.vstack((Xs, Xt)) X_all_scaled = scaler.fit_transform(X_all) Xs_scaled = X_all_scaled[:len(Xs)] Xt_scaled = X_all_scaled[len(Xs):] # 3. CORAL 对齐(只做一次) print("🔄 正在使用 CORAL 对齐特征...") loss_before = coral_loss(Xs_scaled, Xt_scaled) Xt_aligned = coral_align(Xs_scaled, Xt_scaled) loss_after = coral_loss(Xs_scaled, Xt_aligned) print(f"📊 CORAL Loss Before: {loss_before:.4f} → After: {loss_after:.4f}") # 4. 初始化训练集 X_train = Xs_scaled.copy() y_train = ys.copy() initial_model_trained = False # 存储每轮结果 history = { 'iter': [], 'added': [], 'conf_avg': [], 'preds': [], 'probas': [] } for it in range(MAX_ITER): print(f"\n🔄 进行第 {it+1} 轮伪标签训练...") # 训练模型 rf_model = RandomForestClassifier( n_estimators=N_TREES, max_depth=MAX_DEPTH, random_state=RANDOM_STATE, class_weight='balanced' ) rf_model.fit(X_train, y_train) initial_model_trained = True # 在目标域上预测 probas = rf_model.predict_proba(Xt_aligned) predictions = rf_model.predict(Xt_aligned) confidences = np.max(probas, axis=1) avg_conf = confidences.mean() pred_labels = [class_names[i] for i in predictions] # 找出高置信样本(用于伪标签) high_conf_mask = confidences >= CONFIDENCE_THRESHOLD num_new = high_conf_mask.sum() print(f"📈 第{it+1}轮平均置信度: {avg_conf:.3f}") print(f"🟢 新增高置信样本数: {num_new}") # 记录历史 history['iter'].append(it+1) history['added'].append(num_new) history['conf_avg'].append(avg_conf) history['preds'].append(pred_labels.copy()) history['probas'].append(probas.copy()) # 如果没有新样本加入,提前终止 if num_new == 0 and EARLY_STOPPING: print("🔚 无新增高置信样本,提前结束迭代。") break # 将高置信样本加入训练集 X_pseudo = Xt_aligned[high_conf_mask] y_pseudo = predictions[high_conf_mask] # 更新训练集 X_train = np.vstack([X_train, X_pseudo]) y_train = np.hstack([y_train, y_pseudo]) print(f"🧠 当前训练集大小: {len(X_train)} (源域{len(Xs)}, 伪标签{len(X_pseudo)})") # ====================== 输出最终结果 ====================== final_probas = history['probas'][-1] final_preds = history['preds'][-1] final_conf = np.max(final_probas, axis=1) result_df = pd.DataFrame({ 'File': [f.split('.')[0] for f in filenames_target], 'Predicted_Label': final_preds, 'Confidence': final_conf.round(3) }) for i, cls_name in enumerate(class_names): result_df[f'Prob_{cls_name}'] = final_probas[:, i].round(3) result_df.to_csv('predicted_labels_CORAL_RF_SELFTRAIN.csv', index=False) print("\n📋 最终预测结果:") print(result_df.to_string(index=False)) print("💾 已保存至: predicted_labels_CORAL_RF_SELFTRAIN.csv") # ====================== 可视化:迭代过程 ====================== plt.figure(figsize=(12, 5)) # --- 左图:每轮新增样本与平均置信度 --- ax1 = plt.subplot(1, 2, 1) epochs = history['iter'] plt.plot(epochs, history['conf_avg'], 'bo-', label='Avg Confidence') plt.bar(epochs, history['added'], alpha=0.6, color='orange', label='New Pseudo Labels') plt.xlabel('Iteration') plt.ylabel('Value') plt.title('Self-Training Progress') plt.legend() plt.grid(True, alpha=0.3) # --- 右图:热力图展示概率变化 --- plt.subplot(1, 2, 2) first_probs = history['probas'][0] last_probs = history['probas'][-1] delta_probs = last_probs - first_probs # 变化量 sns.heatmap(delta_probs.T, annot=True, fmt=".2f", xticklabels=[f.split('.')[0] for f in filenames_target], yticklabels=class_names, cmap='RdBu_r', center=0) plt.title('Probability Change Before vs After Self-Training') plt.xlabel('Sample') plt.ylabel('Class') plt.tight_layout() plt.savefig('self_training_progress.png', dpi=150) plt.show() # ====================== 条形图对比置信度变化(可选)====================== if len(history['iter']) > 1: init_conf = history['probas'][0].max(axis=1) final_conf = history['probas'][-1].max(axis=1) diff_conf = final_conf - init_conf plt.figure(figsize=(10, 6)) colors = ['green' if x > 0 else 'red' for x in diff_conf] plt.barh(result_df['File'], diff_conf, color=colors, edgecolor='black', alpha=0.8) plt.axvline(0, color='gray', linestyle='--') plt.xlabel('Confidence Change (After - Before)') plt.title('Change in Prediction Confidence after Self-Training') plt.grid(True, axis='x', alpha=0.5) plt.tight_layout() plt.savefig('confidence_change_selftrain.png', dpi=150) plt.show() print("🎉 伪标签迭代训练完成!请查看输出文件与图表。") if __name__ == "__main__": main_with_self_training() 将代码中的可视化图改为展示迁移前后源域数据的四种故障情况和目标域数据的T-SNE图,以及使用CORAL对齐前后的差异表格和预测的置信区间分布图
09-24
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值