E. 实验8_6_矩阵变换x2

本文探讨了一个具体的算法问题,即如何通过对一个n*n矩阵进行行变换,使其每行元素的平均值按递增顺序排列。文章提供了一个C语言实现的示例,详细展示了算法的步骤,包括读取矩阵、计算每行的总和、比较并交换行以达到排序的目的,最后输出排序后的矩阵。

运行时间限制: 1000 运行内存限制: 65536
作者: scshuanghai 是否specialjudge: False
题目描述
问题描述:
任意给定一个n*n的矩阵,矩阵的行数与列数均为n。你的任务是通过行变换,使得矩阵每行元素的平均值按递增顺序排列。如果出现有的行平均值相同的情况,则按照原顺序输出。

输入与输出要求:
输入一个整数n代表矩阵的行数(列数),n的范围是1—100。然后输入n*n个整数,即此矩阵的元素。矩阵元素的绝对值不会超过1000000。输出经过行变换后的新矩阵。每行的每个元素用空格分隔,注意最后一个元素后为换行符。

程序运行效果:
Sample 1:
3↙
5 5 5↙
3 3 3↙
1 1 1↙

1 1 1
3 3 3
5 5 5

#include<stdio.h>
#include<string.h>
int main()
{
	int n;
	scanf("%d",&n);
	long long a[110][110]={0};
	for(int i=1;i<=n;i++)
	{
		for(int j=1;j<=n;j++)
		{
			scanf("%lld",&a[i][j]);
			a[i][n+1]+=a[i][j];
		}
	}
	int t[110]={0};
	for(int i=1;i<n;i++)
	{
		for(int j=1;j<=n-i;j++)
		{
			if(a[j][n+1]>a[j+1][n+1])
			{
				for(int p=1;p<=n+1;p++)
				{
					t[p]=a[j][p];
					a[j][p]=a[j+1][p];
					a[j+1][p]=t[p];
				}
			}
		}	
	}
	for(int i=1;i<=n;i++)
	{
		for(int j=1;j<=n;j++)
		{
			if(j<=n-1)
			{
				printf("%lld ",a[i][j]);
			}
			else
			{
				printf("%lld\n",a[i][j]);
			}	
		}
	}
	return 0;
}
%% 参数设置 % 点源位移网格 row = 1; col = 1; delta = 2e-6; % 点源间距 delta_x = ((0 : (row-1)) - floor((row-1)/2)) * delta; % x方向点源位移 delta_y = ((0 : (col-1)) - floor((col-1)/2)) * delta;% y方向点源位移 [delta_xx, delta_yy] = meshgrid(delta_x, delta_y); delta_xx = delta_xx(:); % 展平为一维数组 delta_yy = delta_yy(:); lambda = 355e-9; NA = 0.9; k = 2*pi / lambda; %真空波束 n1 = 1; n2 = 1; f2 = 0.3e-3; % 待测物镜焦距 d2 = 101.*f2; % 照明透镜到待测透镜距离 d1 = d2/5; % 点源到照明透镜距离 5倍放大,均匀照明物镜光瞳面 。 也就是确定倍率后,根据光瞳面大小确定光源间距 f1 = 5/6.*d1; % 照明透镜焦距 d3 = 101/100.*f2; % 待测物镜到像面光栅距离 d4 = 3e-3; %像面光栅到ccd d_objgrat = 2e-6; % 物光栅面采样间距 %增大可以增加 物镜在衍射面尺寸占比 r = 1e-3; % 物光栅 光阑半径 px = 200e-6; % 物面光栅周期 pi_ = 2e-6; % 像面光栅周期(MATLAB中pi为内置函数,用pi_表示) fill_factor = 0.5; % 占空比 % 和矢量仿真有关的参数 L = 4096; % 全部采样数(需为2的幂) z=0; % 观察平面位于焦平面(z=0) m0=1/2; % 坐标偏移(避免原点奇异性) xmax=0.9; % 输出图像半宽(0.9个波长) lens_radius = d3 *NA; % 透镜半径 0.0091 %lens_radius = d3 ./ sqrt(1-(1./NA.^2)); % 透镜直径 0.0232 % N = 128/2; %透镜半径所占的像素个数 d_obj=lens_radius/N; % 透镜面的空间分辨率(μm/像素) ≈ 142 μm/px % 物光栅面坐标网格生成 x1 = linspace(-L*d_objgrat/2, L*d_objgrat/2 - d_objgrat, L); y1 = x1; [X1, Y1] = meshgrid(x1, y1); aper = square_aper(X1, Y1, r, r); % 物面方形光阑 obj_grat_x = rect_grating(X1, px, fill_factor); % 物面光栅 phi_1 = exp(1i * (-k/(2*f1) * (X1.^2 + Y1.^2))); % 准直透镜相位 % figure;imagesc(obj_grat_x.*aper),colorbar;title('obj_grat_x') % %待测透镜 出瞳面空间坐标系生成 before lens (x_inf, y_inf) % m0=1/2偏移避免原点位于网格中心,防止奇异性。 *d_obj转化为真实数据坐标 [x_inf,y_inf]=meshgrid(linspace(-L/2+m0,L/2-m0,L)*d_obj,-linspace(-L/2+m0,L/2-m0,L)*d_obj); [theta_inf,rho_inf] = cart2pol(x_inf,y_inf); % zernike_coefficients = generate_custom_zernike_coefficients(); % a = calculate_zernike_phase(zernike_coefficients, lens_radius, x_inf, y_inf); % 直接在光阑区域归一化生成像差 % % % obj_aper = circ_aper(x_inf, y_inf, lens_radius); %物镜面光瞳大小网格 % phi = exp(1i * (-k/(2*f2)*(x_inf.^2 + y_inf.^2) + k*a)); % 含像差的透镜相位; 前面e4 ;ka才0.04左右 % figure;imagesc(a),colorbar;title('a') % 角谱计算 利用透镜面坐标参数算出 角谱 k1= 1*k; % 输出面介质波数 dk=k*NA/N; % 角谱分辨率 kx=x_inf*k1/f2; % x方向波数 ,根据论文公式4。x_inf/f 应该是透镜上某一点到焦点的角度。tan ky=y_inf*k1/f2; dxf = d_objgrat/100; % *********傅里叶后输出平面(像面光栅处)空间分辨率(μm/px)********* kM1=k1*ones(L,L); % 波数 矩阵 kz1=sqrt(kM1.^2 - kx.^2 - ky.^2); % z方向波数分量 % xmax2=round(xmax*lambda/dxf); % 像面光栅处聚焦光斑 的 物理尺寸(波长)→ 像素坐标 %相位校正 [X,Y]=meshgrid(linspace(-L/2,L/2,L),linspace(-L/2,L/2,L)); %%%校正辅助坐标 PhaseShift=m0*2*pi*X/L+m0*2*pi*Y/L; %correction phase %软边孔径函数 生成平滑孔径,避免硬边界衍射。tanh函数的斜率因子 1.5 控制边缘过渡宽度。就是论文公式11 Theta=0.5*(1+tanh((1.5/1)*(N-sqrt((x_inf/d_obj).^2+(y_inf/d_obj).^2)))); cx=ones(L,L); cy=zeros(L,L); % x lin horizontal % 像面光栅坐标网格 dxf x2 = linspace(-L*dxf/2, L*dxf/2 - dxf, L); y2 = x2; [X2, Y2] = meshgrid(x2, y2); img_grat_x = rect_grating(X2, pi_, fill_factor); % 位移后的像面光栅 aper2 = square_aper(X2, Y2, r/100, r/100); % 物面方形光阑 % 矢量角谱 [FX_img, FY_img] = meshgrid(linspace(-1/(2*dxf), 1/(2*dxf) - 1/(L*dxf), L), linspace(-1/(2*dxf), 1/(2*dxf) - 1/(L*dxf), L)); % 像面光栅处的波数分量 kx_img = 2 * pi * FX_img; ky_img = 2 * pi * FY_img; kz_img = sqrt(kM1.^2 - kx_img.^2 - ky_img.^2); obliquity = kz_img/k; % 矢量角谱传播的倾斜因子 % CCD 面的空间坐标网格 [X_ccd, Y_ccd] = meshgrid(linspace(-L*10*dxf/2, L*10*dxf/2 - 10*dxf, L), linspace(-L*10*dxf/2, L*10*dxf/2 - 10*dxf, L)); % delta_g = [0.5].*pi_; %pi_像面光栅周期 % 主循环:遍历光栅位移 n_dg = numel(delta_g); for n = 1:n_dg current_time = datetime('now', 'Format', 'HH:mm:ss.SSS'); % 显示迭代次数和时间 + fprintf('迭代: %d / %d | 当前时间: %s\n', n, n_dg, current_time); I_x = zeros(size(aper)); I_y = zeros(size(aper)); I_z = zeros(size(aper)); E_field_x = zeros(size(aper)); E_field_y = zeros(size(aper)); E_field_z = zeros(size(aper)); I = zeros(size(aper)); img_grat_x = rect_grating(X2, pi_, fill_factor); if n == 1 I_tong = zeros(size(aper), 'single'); % 物瞳面光强 I_grat = zeros(size(aper), 'single'); % 光栅面光强 end for s = 1:numel(delta_xx) %将二维空间转化为一维序列,遍历不同的光源点 % 添加进度显示:每2000次迭代或到达终点时显示 if mod(s, 1000) == 0 || s == 10201 current_time_inner = datetime('now', 'Format', 'HH:mm:ss.SSS'); fprintf(' 点源迭代: %d / %d | 当前时间: %s | 剩余: %d 次\n', ... s, 10201, current_time_inner, 10201 - s); end dx_batch = delta_xx(s); dy_batch = delta_yy(s); % 计算点源到输入面的距离 R = sqrt((X1 + dx_batch).^2 + (Y1 + dy_batch).^2 + d1^2); % 需要改成相减嘛 E0 = (1./R) .* exp(1i*k*R); % 球面波 E0_x = E0 .* aper .* obj_grat_x .* phi_1; % 调制光阑、光栅和透镜相位 [E1_x, dx1] = fresnel_batch(E0_x, d_objgrat,lambda , d2); % E1_x 就是物镜前表面入射场 if n == 1 % 仅n=1时计算 I_tong = I_tong + abs(E1_x.*Theta).^2; end E_incx = E1_x; %input x 论文里面公式5:假设入射场为x偏振光,远场输出的3个偏振分量。就是3*1矩阵的每一行代表一个分量 %x component E_infxx=sqrt(n1/n2)*sqrt(kz1./kM1).*Theta.*E_incx.*(ky.^2+kx.^2.*kz1./kM1)./(kx.^2+ky.^2); %y component E_infxy=sqrt(n1/n2)*sqrt(kz1./kM1).*Theta.*E_incx.*(-kx.*ky+kx.*ky.*kz1./kM1)./(kx.^2+ky.^2); % %z component E_infxz=sqrt(n1/n2)*sqrt(kz1./kM1).*Theta.*E_incx.*(-(kx.^2+ky.^2).*kx./kM1)./(kx.^2+ky.^2); %y input E_incy = E_incx; %x component E_infyx=sqrt(n1/n2)*sqrt(kz1./kM1).*Theta.*E_incy.*(-ky.*kx+kx.*ky.*kz1./kM1)./(kx.^2+ky.^2); E_infyy=sqrt(n1/n2)*sqrt(kz1./kM1).*Theta.*E_incy.*(kx.^2+ky.^2.*kz1./kM1)./(kx.^2+ky.^2); E_infyz=sqrt(n1/n2)*sqrt(kz1./kM1).*Theta.*E_incy.*(-(kx.^2+ky.^2).*ky./kM1)./(kx.^2+ky.^2); %factors to assemble E_inf 公式10前面的系数 CF2=-1i*f2*exp(-1i*f2*2*pi*n2/lambda)*exp(1i*kz1.*z)./(2*pi*kz1); %%E_inf 傅里叶的线性性质,FFT(af)=aFFT(f);这里CF2就是a Fieldx=CF2.*(cx.*E_infxx +cy.*E_infyx); Fieldy=CF2.*(cx.*E_infxy +cy.*E_infyy); Fieldz=CF2.*(cx.*E_infxz +cy.*E_infyz); Ex0=ifftshift(ifft2(fftshift((Fieldx)))).*exp(1i*PhaseShift); Ey0=ifftshift(ifft2((fftshift(Fieldy)))).*exp(1i*PhaseShift); Ez0=ifftshift(ifft2((fftshift(Fieldz)))).*exp(1i*PhaseShift); % 2. 矢量场通过像面光栅调制 ******再加上像面光阑 Ex1 = Ex0 .* img_grat_x.* aper2; % x偏振分量与x光栅调制 Ey1 = Ey0 .* img_grat_x.* aper2; % y偏振分量与x光栅调制(假设光栅对x/y偏振影响相同) Ez1 = Ez0 .* img_grat_x.* aper2; % z偏振分量与x光栅调制 % figure(); % imagesc(abs(Ex1)); % 空间频率域中的传播核 H_prop = exp(1i * kz_img * d4); % 使用角谱法传播每个偏振分量 % 1. 对像面上的场量进行 FFT FFT_Ex_after_grating = fftshift(fft2(Ex1)); FFT_Ey_after_grating = fftshift(fft2(Ey1)); FFT_Ez_after_grating = fftshift(fft2(Ez1)); % 2. 乘以传播核 FFT_Ex_at_ccd = FFT_Ex_after_grating .* H_prop; FFT_Ey_at_ccd = FFT_Ey_after_grating .* H_prop; FFT_Ez_at_ccd = FFT_Ez_after_grating .* H_prop; % 3. IFFT 得到 CCD 平面上的场量 Ex_at_ccd = ifft2(ifftshift(FFT_Ex_at_ccd)); Ey_at_ccd = ifft2(ifftshift(FFT_Ey_at_ccd)); Ez_at_ccd = ifft2(ifftshift(FFT_Ez_at_ccd)); I_x = I_x + abs(Ex_at_ccd).^2; % ccd累加光强 I_y = I_y + abs(Ey_at_ccd).^2; I_z = I_z + abs(Ez_at_ccd).^2; end end I = I_x + I_y + I_z ; 分析总结我的代码
06-21
评价我的代码是否能运行成功并且验证论文创新点%% 双信道事件触发自适应控制(最终修正版) % 关键修改:修复ELSE分支语法错误,确保逻辑结构完整 clear; clc; close all; %% 1. 仿真参数配置(步长0.02) t0 = 0; % 初始时间 tf = 80; % 终止时间 dt = 0.02; % 步长 t = t0:dt:tf; % 时间向量 % 切换信号(平均驻留时间ADT=15s) sigma = ones(1, length(t)); switch_times = [15, 30, 45, 60, 75]; % 切换时刻 for k = 1:length(switch_times) idx = t >= switch_times(k); sigma(idx) = mod(k, 2) + 1; end % 参考轨迹 y_d = 0.25 * sin(t); dy_d = 0.25 * cos(t); % 2. 子系统参数 sub1.f = @(x1) 0.05 * sin(x1^2); % 子系统1(正常) sub1.g = @(x1, x2) 0.1 * sin(x1*x2); sub1.d = @(t) 0.01 * cos(t); sub1.delta = @(t) 0.005 * sin(t); sub2.f = @(x1) 0.075 * x1; % 子系统2(故障) sub2.g = @(x1, x2) -0.05 * x1*x2; sub2.d = @(t) 0.01 * cos(t); sub2.delta = @(t) 0.01 * cos(t); % 3. 控制器/观测器参数(矩阵化设计,确保Hurwitz) % BLF参数 BLF.k1 = 0.08; BLF.beta1 = 0.5; BLF.k2 = 0.1; BLF.beta2 = 0.5; % 事件触发阈值 trigger.eta_y = 0.02; % 传感器-控制器 trigger.eta_w = 2.2; % 控制器-执行器 trigger.nu1 = 2.5; % 切换补偿系数 trigger.E_bar = 2; % 切换诱导误差上界 trigger.switch_record = []; % 记录切换时刻 % 模糊观测器参数 fuzzy.c = [-0.8, -0.4, 0, 0.4, 0.8]; % 5个基函数 fuzzy.b = 0.3; fuzzy.gamma = 0.05; fuzzy.sigma_theta = 0.05; % 观测器矩阵A_k(Hurwitz) observer.A1 = [-5, 1; 0, -10]; % 子系统1 observer.A2 = [-6, 1; 0, -12]; % 子系统2 % 求解Lyapunov矩阵P_k Q1 = eye(2); [observer.P1, ~] = lyap(observer.A1', -Q1); Q2 = eye(2); [observer.P2, ~] = lyap(observer.A2', -Q2); % 观测器增益L_k observer.L1 = (observer.P1 \ ones(2,1)) * 10; observer.L2 = (observer.P2 \ ones(2,1)) * 12; % Nussbaum参数 nussbaum.gamma_n1 = 0.08; nussbaum.gamma_n2 = 0.08; % 控制增益 control.c1 = [30, 20]; % 子系统1/2的c1 control.c2 = [40, 30]; % 子系统1/2的c2 control.mu = 0.1; % 平滑参数 %% 2. 初始化变量 % 系统状态 x1 = zeros(1, length(t)); x1(1) = 0; x2 = zeros(1, length(t)); x2(1) = 0; % 观测器状态 x_hat = zeros(2, length(t)); x_hat(:,1) = [0; 0]; % [x1_hat; x2_hat] e_obs = zeros(2, length(t)); % 观测误差 % 模糊权重估计 theta_hat1 = zeros(5, length(t)); % 子系统1权重 theta_hat2 = zeros(5, length(t)); % 子系统2权重 % Nussbaum参数 theta_n = zeros(2, length(t)); % [theta_n1; theta_n2] % 控制信号 w = zeros(1, length(t)); % 连续控制 u = zeros(1, length(t)); % 触发控制 alpha1 = zeros(1, length(t)); % 虚拟控制 % 事件触发记录 trigger.y_t = [t0]; % 传感器触发时刻 trigger.w_t = [t0]; % 控制器触发时刻 trigger.y_count = 1; % 传感器触发次数 trigger.w_count = 1; % 控制器触发次数 y_last = x1(1); % 传感器上次触发值 w_last = 0; % 控制器上次触发值 trigger.switch_in_interval = false; % 切换标记 % 误差记录 err.tracking = zeros(1, length(t)); % 跟踪误差 err.obs = zeros(2, length(t)); % 观测误差 err.theta = zeros(2, length(t)); % 权重误差 %% 3. 主仿真循环 for k = 2:length(t) tk = t(k); sk = sigma(k); % 当前子系统 prev_sk = sigma(k-1); % 前一时刻子系统 % 检测切换事件 if sk ~= prev_sk trigger.switch_record = [trigger.switch_record, tk]; trigger.switch_in_interval = true; end % 3.1 系统动力学更新 if sk == 1 f = sub1.f(x1(k-1)); g = sub1.g(x1(k-1), x2(k-1)); d = sub1.d(tk); delta = sub1.delta(tk); else f = sub2.f(x1(k-1)); g = sub2.g(x1(k-1), x2(k-1)); d = sub2.d(tk); delta = sub2.delta(tk); end dx1 = f + g * x2(k-1) + d; dx2 = g + u(k-1) + delta; x1(k) = x1(k-1) + dx1 * dt; x2(k) = x2(k-1) + dx2 * dt; % 3.2 矩阵化模糊状态观测器 % 基函数计算 phi1 = gaussian_basis(x_hat(1,k-1), fuzzy.c, fuzzy.b); phi2 = gaussian_basis(x_hat(:,k-1), fuzzy.c, fuzzy.b); % Nussbaum增益 N1 = exp(theta_n(1,k-1)^2) * cos(pi * theta_n(1,k-1)/2); N2 = exp(theta_n(2,k-1)^2) * cos(pi * theta_n(2,k-1)/2); % 观测器矩阵与增益 if sk == 1 A = observer.A1; L = observer.L1; else A = observer.A2; L = observer.L2; end % 观测器状态更新 dx_hat = A * x_hat(:,k-1) + L * (y_last - x_hat(1,k-1)) ... + [theta_hat1(:,k-1)'*phi1; theta_hat2(:,k-1)'*phi2] ... + [N1 * x_hat(2,k-1); N2 * u(k-1)]; x_hat(:,k) = x_hat(:,k-1) + dx_hat * dt; % 观测误差 e_obs(:,k) = [x1(k); x2(k)] - x_hat(:,k); err.obs(:,k) = abs(e_obs(:,k)); % 3.3 模糊权重与Nussbaum参数更新 z1 = x_hat(1,k) - y_d(k); z2 = x_hat(2,k) - alpha1(k-1); % BLF变换误差 zeta1 = z1 / (cos(pi * z1^2/(2*BLF.k1^2))^2); zeta2 = z2 / (cos(pi * z2^2/(2*BLF.k2^2))^2); % 权重更新 d_theta1 = fuzzy.gamma * zeta1 * phi1' - fuzzy.sigma_theta * theta_hat1(:,k-1); d_theta2 = fuzzy.gamma * zeta2 * phi2' - fuzzy.sigma_theta * theta_hat2(:,k-1); theta_hat1(:,k) = theta_hat1(:,k-1) + d_theta1 * dt; theta_hat2(:,k) = theta_hat2(:,k-1) + d_theta2 * dt; % Nussbaum参数更新 d_theta_n1 = nussbaum.gamma_n1 * zeta1 * x_hat(2,k-1); d_theta_n2 = nussbaum.gamma_n2 * zeta2 * u(k-1); theta_n(1,k) = theta_n(1,k-1) + d_theta_n1 * dt; theta_n(2,k) = theta_n(2,k-1) + d_theta_n2 * dt; % 3.4 Backstepping控制器设计 % Step1:虚拟控制alpha1 c1 = control.c1(sk); alpha1(k) = N1 * (-c1 * zeta1 - zeta1*(theta_hat1(:,k)'*phi1 ... + sqrt(sub1.delta(tk)^2/(2*0.1)) + sqrt(sub1.d(tk)^2/(2*0.1)))); % Step2:实际控制w(连续信号) c2 = control.c2(sk); term_switch = trigger.nu1 * trigger.E_bar; term_trigger = trigger.eta_w * tanh(zeta2 * trigger.eta_w / control.mu); w(k) = N2 * (-c2 * zeta2 - (zeta1 * z2)/zeta2 - term_trigger - term_switch); % 3.5 双信道事件触发(三种场景处理) % 场景1:触发间隔内无切换 if ~trigger.switch_in_interval % 传感器触发 e_y = x1(k) - y_last; if abs(e_y) >= trigger.eta_y || tk - trigger.y_t(end) > 0.1 y_last = x1(k); trigger.y_t = [trigger.y_t, tk]; trigger.y_count = trigger.y_count + 1; end % 控制器触发 e_w = w(k) - w_last; if abs(e_w) >= trigger.eta_w || tk - trigger.w_t(end) > 0.2 w_last = w(k); u(k) = w(k); trigger.w_t = [trigger.w_t, tk]; trigger.w_count = trigger.w_count + 1; else u(k) = u(k-1); end % 场景2:触发间隔内有切换(单次/多次) else interval_switches = trigger.switch_record(trigger.switch_record > trigger.w_t(end) ... & trigger.switch_record < tk); if ~isempty(interval_switches) for s_idx = 1:length(interval_switches) ts = interval_switches(s_idx); % 切换前([tJ, ts)) if tk < ts e_w = w(k) - w_last; if abs(e_w) >= trigger.eta_w w_last = w(k); u(k) = w(k); trigger.w_t = [trigger.w_t, tk]; trigger.w_count = trigger.w_count + 1; trigger.switch_in_interval = false; else u(k) = u(k-1); end % 切换时([ts^-, ts^+]) elseif abs(tk - ts) < dt/2 E_jst = w(k) - w(k-1); e_w = w(k) - w_last; if abs(e_w) >= trigger.eta_w + trigger.nu1*abs(E_jst) w_last = w(k); u(k) = w(k); trigger.w_t = [trigger.w_t, tk]; trigger.w_count = trigger.w_count + 1; else u(k) = u(k-1); end % 切换后([ts, tJ+1)) else e_w = w(k) - w_last; if abs(e_w) >= trigger.eta_w + trigger.nu1*trigger.E_bar w_last = w(k); u(k) = w(k); trigger.w_t = [trigger.w_t, tk]; trigger.w_count = trigger.w_count + 1; trigger.switch_in_interval = false; else u(k) = u(k-1); end end end else % 场景3:触发间隔内无切换(补充逻辑,避免分支遗漏) e_w = w(k) - w_last; if abs(e_w) >= trigger.eta_w || tk - trigger.w_t(end) > 0.2 w_last = w(k); u(k) = w(k); trigger.w_t = [trigger.w_t, tk]; trigger.w_count = trigger.w_count + 1; trigger.switch_in_interval = false; else u(k) = u(k-1); end end end % 3.6 误差记录 err.tracking(k) = abs(x1(k) - y_d(k)); err.theta(1,k) = norm(theta1_star() - theta_hat1(:,k)); err.theta(2,k) = norm(theta2_star() - theta_hat2(:,k)); end %% 4. 仿真结果分析 % 4.1 关键性能指标 % 通信量降低率 rate_y = 1 - length(trigger.y_t)/length(t); % 传感器信道 rate_w = 1 - length(trigger.w_t)/length(t); % 控制器信道 % 计算量降低率 calc_rate = 1 - 5/25; % 跟踪误差与观测误差 max_tracking_err = max(err.tracking); mean_obs_err = mean(err.obs(:,100:end), 2); % 剔除初始瞬态 % 4.2 结果打印 fprintf('==================== 仿真结果(创新点验证) ====================\n'); fprintf('1. 双信道事件触发(创新点1):\n'); fprintf(' - 传感器信道触发次数:%d,通信降低率:%.1f%%\n', trigger.y_count, rate_y*100); fprintf(' - 控制器信道触发次数:%d,通信降低率:%.1f%%\n', trigger.w_count, rate_w*100); fprintf('2. 稀疏模糊观测器+Nussbaum(创新点2):\n'); fprintf(' - 计算量降低率:%.1f%%\n', calc_rate*100); fprintf(' - x1观测误差均值:%.4f,x2观测误差均值:%.4f\n', mean_obs_err(1), mean_obs_err(2)); fprintf('3. BLF误差约束(创新点3):\n'); fprintf(' - 最大跟踪误差:%.4f rad\n', max_tracking_err); fprintf('4. 切换-触发场景覆盖:\n'); fprintf(' - 触发间隔内切换次数:%d,无Zeno行为\n', length(trigger.switch_record)); fprintf('====================================================================\n'); % 4.3 绘图 figure('Position', [100, 100, 1200, 800]); % 子图1:跟踪性能 subplot(3, 2, 1); plot(t, y_d, 'r--', 'LineWidth', 1.5, 'DisplayName', '参考轨迹y_d'); hold on; plot(t, x1, 'b-', 'LineWidth', 1.2, 'DisplayName', '系统输出x1'); plot(trigger.y_t, y_last(1:length(trigger.y_t)), 'go', 'MarkerSize', 4, 'DisplayName', '传感器触发点'); hold off; xlabel('时间t (s)'); ylabel('幅值 (rad)'); title('跟踪性能(BLF约束)'); legend('Location', 'Best'); grid on; ylim([-0.3, 0.3]); % 子图2:观测误差 subplot(3, 2, 2); plot(t, err.obs(1,:), 'b-', 'LineWidth', 1.2, 'DisplayName', 'e1=x1-x1_hat'); hold on; plot(t, err.obs(2,:), 'r--', 'LineWidth', 1.2, 'DisplayName', 'e2=x2-x2_hat'); hold off; xlabel('时间t (s)'); ylabel('误差 (rad)'); title('观测误差(Hurwitz矩阵保证稳定)'); legend('Location', 'Best'); grid on; ylim([0, 0.02]); % 子图3:控制信号 subplot(3, 2, 3); plot(t, w, 'b-', 'LineWidth', 1.0, 'DisplayName', '连续控制w'); hold on; plot(t, u, 'r--', 'LineWidth', 1.5, 'DisplayName', '触发控制u'); plot(trigger.w_t, w_last(1:length(trigger.w_t)), 'go', 'MarkerSize', 4, 'DisplayName', '控制器触发点'); hold off; xlabel('时间t (s)'); ylabel('控制幅值 (N·m)'); title('控制信号(三种切换-触发场景)'); legend('Location', 'Best'); grid on; ylim([-5, 5]); % 子图4:Nussbaum参数 subplot(3, 2, 4); plot(t, theta_n(1,:), 'b-', 'LineWidth', 1.2, 'DisplayName', '子系统1 θn1'); hold on; plot(t, theta_n(2,:), 'r--', 'LineWidth', 1.2, 'DisplayName', '子系统2 θn2'); hold off; xlabel('时间t (s)'); ylabel('参数值'); title('Nussbaum参数演化'); legend('Location', 'Best'); grid on; % 子图5:触发间隔分布 subplot(3, 2, 5); histogram(diff(trigger.y_t), 15, 'FaceColor', [0.2, 0.8, 0.2], 'DisplayName', '传感器信道'); hold on; histogram(diff(trigger.w_t), 15, 'FaceColor', [0.8, 0.2, 0.2], 'DisplayName', '控制器信道'); hold off; xlabel('触发间隔 (s)'); ylabel('频次'); title('触发间隔分布(无Zeno)'); legend('Location', 'Best'); grid on; % 子图6:切换-触发同步 subplot(3, 2, 6); plot(t, sigma, 'k-', 'LineWidth', 2, 'DisplayName', '切换信号σ(t)'); hold on; yline(1.5, 'g--', '切换阈值', 'LineWidth', 1.0); scatter(trigger.switch_record, ones(1, length(trigger.switch_record))*1.2, 50, 'r', 'filled', 'DisplayName', '切换时刻'); scatter(trigger.w_t, ones(1, length(trigger.w_t))*0.8, 30, 'b', 'filled', 'DisplayName', '控制器触发时刻'); hold off; xlabel('时间t (s)'); ylabel((t)'); title('切换与触发时刻同步验证'); legend('Location', 'Best'); grid on; ylim([0.5, 2.5]); %% 辅助函数:gaussian_basis.m function phi = gaussian_basis(x, c, b) n_basis = length(c); phi = zeros(n_basis, 1); for i = 1:n_basis if length(x) == 1 phi(i) = exp(-(x - c(i))^2 / (2*b^2)); else phi(i) = exp(-norm(x - c(i)*ones(length(x), 1))^2 / (2*b^2)); end end end
最新发布
11-04
import os import time import torch import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms, models from torch.utils.data import DataLoader, Dataset import matplotlib.pyplot as plt import matplotlib.patches as patches import numpy as np import random from PIL import Image import requests from io import BytesIO import xml.etree.ElementTree as ET # 设置随机种子确保结果可复现 torch.manual_seed(42) np.random.seed(42) random.seed(42) # 数据预处理 transform_train = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.ColorJitter(brightness=0.2, contrast=0.2), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # 自定义安全帽检测数据集类(修复边界框处理) class HelmetDataset(Dataset): def __init__(self, root_dir, transform=None, mode='train', max_bboxes=20): self.root_dir = root_dir self.transform = transform self.mode = mode self.max_bboxes = max_bboxes # 每个图像最多保留的边界框数量 # 检查根目录是否存在 if not os.path.exists(root_dir): raise FileNotFoundError(f"数据集根目录不存在: {root_dir}") # 数据集结构: root_dir/ -> images/ + annotations/ self.images_dir = os.path.join(root_dir, 'images') self.annotations_dir = os.path.join(root_dir, 'annotations') # 检查图像目录和标注目录 if not os.path.exists(self.images_dir): raise FileNotFoundError(f"图像目录不存在: {self.images_dir}") if not os.path.exists(self.annotations_dir): raise FileNotFoundError(f"标注目录不存在: {self.annotations_dir}") # 获取所有图像文件 self.image_files = sorted([f for f in os.listdir(self.images_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]) if len(self.image_files) == 0: raise ValueError(f"在目录 {self.images_dir} 中未找到图像文件") # 划分训练集和测试集(8:2) total_size = len(self.image_files) train_size = int(0.8 * total_size) if mode == 'train': self.image_files = self.image_files[:train_size] else: self.image_files = self.image_files[train_size:] print(f"创建{mode}数据集: {len(self.image_files)}个样本") def __len__(self): return len(self.image_files) def __getitem__(self, idx): img_name = self.image_files[idx] img_path = os.path.join(self.images_dir, img_name) # 加载图像 image = Image.open(img_path).convert('RGB') original_w, original_h = image.size # 记录原图尺寸 # 获取对应的XML标注文件 annotation_name = os.path.splitext(img_name)[0] + '.xml' annotation_path = os.path.join(self.annotations_dir, annotation_name) # 解析XML:获取标签和边界框 has_helmet, bboxes = self._parse_annotation(annotation_path, original_w, original_h) # 应用图像变换 if self.transform: image = self.transform(image) # 修复:标准化边界框格式,确保每个样本具有相同数量的边界框 # 如果边界框数量超过max_bboxes,截断;如果不足,用-1填充 fixed_bboxes = np.full((self.max_bboxes, 4), -1.0, dtype=np.float32) if len(bboxes) > 0: num_bboxes = min(len(bboxes), self.max_bboxes) fixed_bboxes[:num_bboxes] = bboxes[:num_bboxes] return image, torch.tensor(has_helmet, dtype=torch.long), torch.tensor(fixed_bboxes) def _parse_annotation(self, annotation_path, original_w, original_h): """解析XML:返回是否戴安全帽和边界框列表(归一化后)""" has_helmet = False bboxes = [] # 存储格式:[xmin_norm, ymin_norm, xmax_norm, ymax_norm] if not os.path.exists(annotation_path): print(f"警告: 未找到标注文件 {annotation_path},将使用默认标签") return has_helmet, bboxes try: tree = ET.parse(annotation_path) root = tree.getroot() # 遍历所有标注对象 for obj in root.findall('object'): name = obj.find('name').text if name.lower() == 'helmet': has_helmet = True # 解析边界框并归一化 bndbox = obj.find('bndbox') xmin = int(bndbox.find('xmin').text) ymin = int(bndbox.find('ymin').text) xmax = int(bndbox.find('xmax').text) ymax = int(bndbox.find('ymax').text) # 确保坐标在有效范围内 xmin = max(0, min(xmin, original_w)) ymin = max(0, min(ymin, original_h)) xmax = max(0, min(xmax, original_w)) ymax = max(0, min(ymax, original_h)) # 归一化到0-1范围 xmin_norm = xmin / original_w ymin_norm = ymin / original_h xmax_norm = xmax / original_w ymax_norm = ymax / original_h bboxes.append([xmin_norm, ymin_norm, xmax_norm, ymax_norm]) except Exception as e: print(f"警告: 解析标注文件 {annotation_path} 时出错: {str(e)}") return has_helmet, bboxes # 下载安全帽数据集函数 def download_helmet_detection_dataset(): dataset_url = "https://example.com/helmet-detection-dataset.zip" # 需替换为实际URL save_path = "helmet_detection_dataset.zip" if not os.path.exists("helmet_detection_data"): print("下载安全帽检测数据集...") try: response = requests.get(dataset_url) with open(save_path, 'wb') as f: f.write(response.content) print("解压数据集...") import zipfile with zipfile.ZipFile(save_path, 'r') as zip_ref: zip_ref.extractall("helmet_detection_data") print("数据集准备完成") except Exception as e: print(f"下载或解压数据集时出错: {str(e)}") print("请手动下载数据集并解压到helmet_detection_data目录") else: print("数据集已存在") # 定义模型 def create_model(): model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) # 修复:使用新的weights参数 # 冻结大部分预训练层 for param in list(model.parameters())[:-5]: param.requires_grad = False # 修改最后一层:适配2分类 num_ftrs = model.fc.in_features model.fc = nn.Sequential( nn.Dropout(0.5), nn.Linear(num_ftrs, 2) ) return model # 训练函数 # 训练函数 def train(model, device, train_loader, optimizer, epoch, criterion): model.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets, _) in enumerate(train_loader): inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.item() # 获取预测类别索引(0 或 1) _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() if (batch_idx + 1) % 50 == 0: print(f'Epoch: {epoch} | Batch: {batch_idx + 1}/{len(train_loader)} | ' f'Loss: {train_loss / (batch_idx + 1):.3f} | ' f'Acc: {100. * correct / total:.3f}%') return train_loss / len(train_loader), 100. * correct / total # 测试函数 def test(model, device, test_loader, criterion): model.eval() test_loss = 0 correct = 0 total = 0 with torch.no_grad(): for inputs, targets, _ in test_loader: inputs, targets = inputs.to(device), targets.to(device) outputs = model(inputs) loss = criterion(outputs, targets) test_loss += loss.item() # 获取预测类别索引(0 或 1) _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() print(f'Test Loss: {test_loss / len(test_loader):.3f} | Test Acc: {100. * correct / total:.3f}%') return test_loss / len(test_loader), 100. * correct / total # 可视化预测结果(带边界框) def visualize_predictions(model, test_loader, device, classes, num_samples=10): model.eval() try: images, labels, bboxes_list = next(iter(test_loader)) except StopIteration: print("错误: 测试数据加载器为空,无法可视化预测结果") return num_samples = min(num_samples, len(images)) images = images[:num_samples].to(device) labels = labels[:num_samples].cpu().numpy() bboxes_list = bboxes_list[:num_samples] with torch.no_grad(): outputs = model(images) # 获取每个样本的预测类别索引(0 或 1) _, predicted = torch.max(outputs, 1) predicted = predicted.cpu().numpy() fig, axes = plt.subplots(2, 5, figsize=(15, 6)) axes = axes.flatten() for i in range(num_samples): # 反归一化图像 img = images[i].cpu().numpy().transpose((1, 2, 0)) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) img = std * img + mean img = np.clip(img, 0, 1) axes[i].imshow(img) # 确保 classes[predicted[i]] 和 classes[labels[i]] 正确匹配类别 axes[i].set_title(f'Pred: {classes[predicted[i]]}\nTrue: {classes[labels[i]]}', fontsize=10) axes[i].axis('off') # 绘制边界框(过滤掉填充的-1值) bboxes = bboxes_list[i].cpu().numpy() img_h, img_w = img.shape[0], img.shape[1] for bbox in bboxes: # 检查是否是有效的边界框(不是填充的-1值) if bbox[0] == -1: continue xmin = int(bbox[0] * img_w) ymin = int(bbox[1] * img_h) xmax = int(bbox[2] * img_w) ymax = int(bbox[3] * img_h) # 绘制边界框,根据预测类别选择颜色 color = 'green' if predicted[i] == 1 else 'red' rect = patches.Rectangle( (xmin, ymin), xmax - xmin, ymax - ymin, linewidth=2, edgecolor=color, facecolor='none' ) axes[i].add_patch(rect) plt.tight_layout() plt.savefig('helmet_detection_predictions_with_boxes.png') plt.show() # 显示样本图像(带边界框) def show_helmet_samples(train_loader, classes, num_samples=10): try: dataiter = iter(train_loader) images, labels, bboxes_list = next(dataiter) except StopIteration: print("错误: 训练数据加载器为空,无法显示样本图像") return num_samples = min(num_samples, len(images)) images = images[:num_samples] labels = labels[:num_samples] bboxes_list = bboxes_list[:num_samples] fig, axes = plt.subplots(2, 5, figsize=(15, 6)) axes = axes.flatten() for i in range(num_samples): # 反归一化图像 img = images[i].numpy().transpose((1, 2, 0)) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) img = std * img + mean img = np.clip(img, 0, 1) axes[i].imshow(img) axes[i].set_title(f'{classes[labels[i]]}', fontsize=10) axes[i].axis('off') # 绘制边界框(过滤填充值) bboxes = bboxes_list[i].numpy() img_h, img_w = img.shape[0], img.shape[1] for bbox in bboxes: if bbox[0] == -1: continue xmin = int(bbox[0] * img_w) ymin = int(bbox[1] * img_h) xmax = int(bbox[2] * img_w) ymax = int(bbox[3] * img_h) rect = patches.Rectangle( (xmin, ymin), xmax - xmin, ymax - ymin, linewidth=2, edgecolor='blue', facecolor='none' ) axes[i].add_patch(rect) plt.tight_layout() plt.savefig('helmet_detection_samples_with_boxes.png') plt.show() # 其他函数保持不变... def print_model_summary(model): print("\nModel Summary:") print(model) total_params = sum(p.numel() for p in model.parameters()) print(f"\nTotal parameters: {total_params:,}") total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f"Trainable parameters: {total_trainable_params:,}") def calculate_class_accuracy(model, test_loader, device, classes): model.eval() class_correct = list(0. for i in range(len(classes))) class_total = list(0. for i in range(len(classes))) with torch.no_grad(): for data in test_loader: images, labels, _ = data images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(len(labels)): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 print("\nAccuracy per class:") for i in range(len(classes)): if class_total[i] > 0: print(f'{classes[i]:10s}: {100 * class_correct[i] / class_total[i]:.2f}%') else: print(f'{classes[i]:10s}: N/A (没有样本)') def create_confusion_matrix(model, test_loader, device, classes): model.eval() all_preds = [] all_labels = [] try: with torch.no_grad(): for data in test_loader: images, labels, _ = data images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs, 1) all_preds.extend(predicted.cpu().numpy()) all_labels.extend(labels.cpu().numpy()) from sklearn.metrics import confusion_matrix cm = confusion_matrix(all_labels, all_preds) plt.figure(figsize=(10, 8)) import seaborn as sns sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes) plt.xlabel('Predicted Label') plt.ylabel('True Label') plt.title('Confusion Matrix') plt.savefig('helmet_detection_confusion_matrix.png') plt.show() except Exception as e: print(f"创建混淆矩阵时出错: {str(e)}") # 主函数 def main(): # 设置设备 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # 提示用户设置数据集路径 dataset_dir = input("请输入安全帽数据集的路径 (默认为'helmet_detection_data'): ") if not dataset_dir: dataset_dir = "helmet_detection_data" # 检查数据集是否存在 if not os.path.exists(dataset_dir): print(f"数据集目录 '{dataset_dir}' 不存在") download_option = input("是否需要下载示例数据集? (y/n): ").lower() if download_option == 'y': download_helmet_detection_dataset() else: print("请准备好数据集并确保路径正确后再运行程序") return try: # 创建数据集,设置最大边界框数量 train_dataset = HelmetDataset(dataset_dir, transform=transform_train, mode='train', max_bboxes=20) test_dataset = HelmetDataset(dataset_dir, transform=transform_test, mode='test', max_bboxes=20) # 创建数据加载器,使用自定义的collate_fn处理边界框 def collate_fn(batch): images = torch.stack([item[0] for item in batch]) labels = torch.stack([item[1] for item in batch]) bboxes = torch.stack([item[2] for item in batch]) return images, labels, bboxes train_loader = DataLoader( train_dataset, batch_size=32, shuffle=True, num_workers=0 if device.type == 'cpu' else 4, # CPU时禁用多进程加载 pin_memory=True if device.type == 'cuda' else False, collate_fn=collate_fn # 添加自定义collate_fn ) test_loader = DataLoader( test_dataset, batch_size=32, shuffle=False, num_workers=0 if device.type == 'cpu' else 4, pin_memory=True if device.type == 'cuda' else False, collate_fn=collate_fn ) # 安全帽数据集的类别 classes = ('未戴安全帽', '戴安全帽') # 初始化模型 os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" model = create_model().to(device) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # 学习率调度器 scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=3, factor=0.5, verbose=True ) # 训练模型 epochs = 100 best_acc = 0.0 train_losses, train_accuracies = [], [] test_losses, test_accuracies = [], [] total_train_time = 0 for epoch in range(1, epochs + 1): print(f'Epoch {epoch}:') start_time = time.time() train_loss, train_acc = train(model, device, train_loader, optimizer, epoch, criterion) epoch_time = time.time() - start_time total_train_time += epoch_time print(f'Epoch {epoch} training time: {epoch_time:.2f} seconds') test_loss, test_acc = test(model, device, test_loader, criterion) # 记录训练过程 train_losses.append(train_loss) train_accuracies.append(train_acc) test_losses.append(test_loss) test_accuracies.append(test_acc) # 学习率调整 scheduler.step(test_loss) # 保存最佳模型 if test_acc > best_acc: print(f'Saving best model with accuracy: {test_acc:.3f}%') torch.save(model.state_dict(), 'best_helmet_detection_model.pth') best_acc = test_acc print('-' * 50) print(f'Total training time: {total_train_time:.2f} seconds') # 可视化训练结果 plt.figure(figsize=(12, 4)) plt.subplot(1, 2, 1) plt.plot(range(1, epochs + 1), train_losses, 'b-', label='Train Loss') plt.plot(range(1, epochs + 1), test_losses, 'r-', label='Test Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.title('Training and Test Loss') plt.subplot(1, 2, 2) plt.plot(range(1, epochs + 1), train_accuracies, 'b-', label='Train Accuracy') plt.plot(range(1, epochs + 1), test_accuracies, 'r-', label='Test Accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy (%)') plt.legend() plt.title('Training and Test Accuracy') plt.tight_layout() plt.savefig('helmet_detection_training_results.png') plt.show() # 加载最佳模型 if os.path.exists('best_helmet_detection_model.pth'): model.load_state_dict(torch.load('best_helmet_detection_model.pth')) model.eval() # 可视化预测结果 visualize_predictions(model, test_loader, device, classes) # 显示样本图像 show_helmet_samples(train_loader, classes) # 显示模型架构 print_model_summary(model) # 计算每个类别的准确率 calculate_class_accuracy(model, test_loader, device, classes) # 创建混淆矩阵 create_confusion_matrix(model, test_loader, device, classes) else: print("警告: 未找到保存的最佳模型,无法进行后续可视化和评估") except Exception as e: print(f"程序运行出错: {str(e)}") if __name__ == '__main__': main() 帮我更改上述代码可以识别安全帽佩戴情况更精准,输出的实验图可以输出pred和true的值
09-13
改进以下代码识别墙壁不精确的问题import cv2 import numpy as np from collections import deque def preprocess_maze_image(image_path): """预处理迷宫图像:灰度化、二值化、降噪""" # 读取图像 img = cv2.imread(image_path) if img is None: raise ValueError(f"无法读取图像: {image_path}") # 转换为灰度图 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 自适应阈值二值化(处理光照变化) binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # 形态学操作:去除噪点并填充小孔洞 kernel = np.ones((3, 3), np.uint8) cleaned = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) cleaned = cv2.morphologyEx(cleaned, cv2.MORPH_CLOSE, kernel) return img, cleaned def detect_maze_structure(binary_img): """检测迷宫结构:识别网格、起点和终点""" # 查找轮廓(墙壁) contours, _ = cv2.findContours( binary_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) # 找到最大轮廓(迷宫外边界) max_contour = max(contours, key=cv2.contourArea) x, y, w, h = cv2.boundingRect(max_contour) # 提取迷宫区域 maze_roi = binary_img[y:y+h, x:x+w] # 使用投影法确定网格行列数 horizontal_projection = np.sum(maze_roi, axis=1) // 255 vertical_projection = np.sum(maze_roi, axis=0) // 255 # 计算行数和列数(根据投影峰值) rows = np.count_nonzero(horizontal_projection > 0.5 * np.max(horizontal_projection)) cols = np.count_nonzero(vertical_projection > 0.5 * np.max(vertical_projection)) # 计算单元格尺寸 cell_h = h // rows cell_w = w // cols # 确定起点(左下角)和终点(右上角) start_x = x + cell_w // 2 start_y = y + h - cell_h // 2 end_x = x + w - cell_w // 2 end_y = y + cell_h // 2 # 转换为网格坐标 start_cell = (rows - 1, 0) end_cell = (0, cols - 1) return { 'roi': (x, y, w, h), 'grid_size': (rows, cols), 'cell_size': (cell_h, cell_w), 'start': (start_x, start_y), 'end': (end_x, end_y), 'start_cell': start_cell, 'end_cell': end_cell } def create_maze_grid(binary_img, maze_info): """创建迷宫网格矩阵(0=通道,1=墙壁)""" x, y, w, h = maze_info['roi'] rows, cols = maze_info['grid_size'] cell_h, cell_w = maze_info['cell_size'] # 初始化迷宫网格 grid = np.zeros((rows, cols), dtype=np.uint8) # 遍历每个单元格,检查中心区域是否为墙壁 for r in range(rows): for c in range(cols): # 计算单元格中心区域 cell_y = y + r * cell_h + cell_h // 4 cell_x = x + c * cell_w + cell_w // 4 roi_h = cell_h // 2 roi_w = cell_w // 2 # 检查中心区域是否为墙壁 cell_region = binary_img[ cell_y:cell_y+roi_h, cell_x:cell_x+roi_w ] # 如果有超过25%的像素是墙壁,则标记为墙壁 if np.mean(cell_region) > 25: grid[r, c] = 1 return grid def bfs_pathfinding(grid, start, end): """使用BFS算法寻找最短路径""" rows, cols = grid.shape directions = [(0, 1), (1, 0), (0, -1), (-1, 0)] # 右、下、左、上 # 初始化队列和访问矩阵 queue = deque([(start, [start])]) visited = np.zeros_like(grid, dtype=bool) visited[start] = True while queue: (r, c), path = queue.popleft() # 到达终点 if (r, c) == end: return path # 探索四个方向 for dr, dc in directions: nr, nc = r + dr, c + dc # 检查是否在网格内且不是墙壁 if (0 <= nr < rows and 0 <= nc < cols and not visited[nr, nc] and grid[nr, nc] == 0): visited[nr, nc] = True queue.append(((nr, nc), path + [(nr, nc)])) return [] # 未找到路径 def visualize_results(original_img, maze_info, grid, path): """可视化结果:在原图上标记路径、起点和终点""" x, y, w, h = maze_info['roi'] rows, cols = maze_info['grid_size'] cell_h, cell_w = maze_info['cell_size'] # 创建输出图像 result_img = original_img.copy() # 绘制迷宫边界 cv2.rectangle(result_img, (x, y), (x+w, y+h), (0, 100, 255), 2) # 绘制起点和终点 cv2.circle(result_img, maze_info['start'], 8, (0, 0, 255), -1) # 红色起点 cv2.circle(result_img, maze_info['end'], 8, (255, 0, 0), -1) # 蓝色终点 # 绘制路径 if path: for i in range(1, len(path)): r1, c1 = path[i-1] r2, c2 = path[i] # 计算实际像素坐标 y1 = y + r1 * cell_h + cell_h // 2 x1 = x + c1 * cell_w + cell_w // 2 y2 = y + r2 * cell_h + cell_h // 2 x2 = x + c2 * cell_w + cell_w // 2 # 绘制路径线 cv2.line(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2) return result_img def process_maze_image(image_path, output_path="output.png"): """处理迷宫图像的主函数""" try: # 1. 图像预处理 original, binary = preprocess_maze_image(image_path) # 2. 检测迷宫结构 maze_info = detect_maze_structure(binary) # 3. 创建迷宫网格 grid = create_maze_grid(binary, maze_info) # 4. 路径规划 path = bfs_pathfinding( grid, maze_info['start_cell'], maze_info['end_cell'] ) if not path: print("警告:未找到从起点到终点的路径!") # 5. 可视化结果 result_img = visualize_results(original, maze_info, grid, path) # 保存结果 cv2.imwrite(output_path, result_img) print(f"处理完成!结果已保存至: {output_path}") return result_img except Exception as e: print(f"处理过程中出错: {str(e)}") return None # 示例使用 if __name__ == "__main__": input_image = "maze3.png" # 替换为你的迷宫图像路径 output_image = "solved_maze.png" result = process_maze_image(input_image, output_image) # 显示结果(可选) if result is not None: cv2.namedWindow("Solved Maze",cv2.WINDOW_NORMAL) cv2.imshow("Solved Maze", result) cv2.waitKey(0) cv2.destroyAllWindows()
07-16
这是main.py文件的代码:from datetime import datetime from functools import partial from PIL import Image import cv2 import numpy as np from torch.utils.data import DataLoader from torch.version import cuda from torchvision import transforms from torchvision.datasets import CIFAR10 from torchvision.models import resnet from tqdm import tqdm import argparse import json import math import os import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F #数据增强(核心增强部分) import torch from torchvision import transforms from torch.utils.data import Dataset, DataLoader # 设置参数 parser = argparse.ArgumentParser(description='Train MoCo on CIFAR-10') parser.add_argument('-a', '--arch', default='resnet18') # lr: 0.06 for batch 512 (or 0.03 for batch 256) parser.add_argument('--lr', '--learning-rate', default=0.06, type=float, metavar='LR', help='initial learning rate', dest='lr') parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--schedule', default=[120, 160], nargs='*', type=int, help='learning rate schedule (when to drop lr by 10x); does not take effect if --cos is on') parser.add_argument('--cos', action='store_true', help='use cosine lr schedule') parser.add_argument('--batch-size', default=64, type=int, metavar='N', help='mini-batch size') parser.add_argument('--wd', default=5e-4, type=float, metavar='W', help='weight decay') # moco specific configs: parser.add_argument('--moco-dim', default=128, type=int, help='feature dimension') parser.add_argument('--moco-k', default=4096, type=int, help='queue size; number of negative keys') parser.add_argument('--moco-m', default=0.99, type=float, help='moco momentum of updating key encoder') parser.add_argument('--moco-t', default=0.1, type=float, help='softmax temperature') parser.add_argument('--bn-splits', default=8, type=int, help='simulate multi-gpu behavior of BatchNorm in one gpu; 1 is SyncBatchNorm in multi-gpu') parser.add_argument('--symmetric', action='store_true', help='use a symmetric loss function that backprops to both crops') # knn monitor parser.add_argument('--knn-k', default=20, type=int, help='k in kNN monitor') parser.add_argument('--knn-t', default=0.1, type=float, help='softmax temperature in kNN monitor; could be different with moco-t') # utils parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--results-dir', default='', type=str, metavar='PATH', help='path to cache (default: none)') ''' args = parser.parse_args() # running in command line ''' args = parser.parse_args('') # running in ipynb # set command line arguments here when running in ipynb args.epochs = 300 # 修改处 args.cos = True args.schedule = [] # cos in use args.symmetric = False if args.results_dir == '': args.results_dir = "E:\\contrast\\yolov8\\MoCo\\run\\cache-" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S-moco") moco_args = args class CIFAR10Pair(CIFAR10): def __getitem__(self, index): img = self.data[index] img = Image.fromarray(img) # 原始图像增强 im_1 = self.transform(img) im_2 = self.transform(img) # 退化增强生成额外视图 degraded_results = image_degradation_and_augmentation(img) im_3 = self.transform(Image.fromarray(degraded_results['augmented_images'][0])) # 选择第一组退化增强 im_4 = self.transform(Image.fromarray(degraded_results['cutmix_image'])) return im_1, im_2, im_3, im_4 # 返回原始增强+退化增强 # 定义数据加载器 # class CIFAR10Pair(CIFAR10): # """CIFAR10 Dataset. # """ # def __getitem__(self, index): # img = self.data[index] # img = Image.fromarray(img) # if self.transform is not None: # im_1 = self.transform(img) # im_2 = self.transform(img) # return im_1, im_2 import cv2 import numpy as np import random def apply_interpolation_degradation(img, method): """ 应用插值退化 参数: img: 输入图像(numpy数组) method: 插值方法('nearest', 'bilinear', 'bicubic') 返回: 退化后的图像 """ # 获取图像尺寸 h, w = img.shape[:2] # 应用插值方法 if method == 'nearest': # 最近邻退化: 下采样+上采样 downsampled = cv2.resize(img, (w//2, h//2), interpolation=cv2.INTER_NEAREST) degraded = cv2.resize(downsampled, (w, h), interpolation=cv2.INTER_NEAREST) elif method == 'bilinear': # 双线性退化: 下采样+上采样 downsampled = cv2.resize(img, (w//2, h//2), interpolation=cv2.INTER_LINEAR) degraded = cv2.resize(downsampled, (w, h), interpolation=cv2.INTER_LINEAR) elif method == 'bicubic': # 双三次退化: 下采样+上采样 downsampled = cv2.resize(img, (w//2, h//2), interpolation=cv2.INTER_CUBIC) degraded = cv2.resize(downsampled, (w, h), interpolation=cv2.INTER_CUBIC) else: degraded = img return degraded def darken_image(img, intensity=0.3): """ 应用黑暗处理 - 降低图像亮度并增加暗区对比度 参数: img: 输入图像(numpy数组) intensity: 黑暗强度 (0.1-0.9) 返回: 黑暗处理后的图像 """ # 限制强度范围 intensity = max(0.1, min(0.9, intensity)) # 将图像转换为HSV颜色空间 hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV).astype(np.float32) # 降低亮度(V通道) hsv[:, :, 2] = hsv[:, :, 2] * intensity # 增加暗区的对比度 - 使用gamma校正 gamma = 1.0 + (1.0 - intensity) # 黑暗强度越大,gamma值越大 hsv[:, :, 2] = np.power(hsv[:, :, 2]/255.0, gamma) * 255.0 # 限制值在0-255范围内 hsv[:, :, 2] = np.clip(hsv[:, :, 2], 0, 255) # 转换回RGB return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB) def random_affine(image): """ 随机仿射变换(缩放和平移) 参数: image: 输入图像(numpy数组) 返回: 变换后的图像 """ height, width = image.shape[:2] # 随机缩放因子 (0.8 to 1.2) scale = random.uniform(0.8, 1.2) # 随机平移 (10% of image size) max_trans = 0.1 * min(width, height) tx = random.randint(-int(max_trans), int(max_trans)) ty = random.randint(-int(max_trans), int(max_trans)) # 变换矩阵 M = np.array([[scale, 0, tx], [0, scale, ty]], dtype=np.float32) # 应用仿射变换 transformed = cv2.warpAffine(image, M, (width, height)) return transformed def augment_hsv(image, h_gain=0.1, s_gain=0.5, v_gain=0.5): """ HSV色彩空间增强 参数: image: 输入图像(numpy数组) h_gain, s_gain, v_gain: 各通道的增益范围 返回: 增强后的图像 """ # 限制增益范围 h_gain = max(-0.1, min(0.1, random.uniform(-h_gain, h_gain))) s_gain = max(0.5, min(1.5, random.uniform(1-s_gain, 1+s_gain))) v_gain = max(0.5, min(1.5, random.uniform(1-v_gain, 1+v_gain))) # 转换为HSV hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype(np.float32) # 应用增益 hsv[:, :, 0] = (hsv[:, :, 0] * (1 + h_gain)) % 180 hsv[:, :, 1] = np.clip(hsv[:, :, 1] * s_gain, 0, 255) hsv[:, :, 2] = np.clip(hsv[:, :, 2] * v_gain, 0, 255) # 转换回RGB return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB) # def mixup(img1, img2, alpha=0.6): # """ # 将两幅图像混合在一起 # 参数: # img1, img2: 输入图像(numpy数组) # alpha: Beta分布的参数,控制混合比例 # 返回: # 混合后的图像 # """ # # 生成混合比例 # lam = random.betavariate(alpha, alpha) # # 确保图像尺寸相同 # if img1.shape != img2.shape: # img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0])) # # 混合图像 # mixed = (lam * img1.astype(np.float32) + (1 - lam) * img2.astype(np.float32)).astype(np.uint8) # return mixed # def image_degradation_and_augmentation(image,dark_intensity=0.3): # """ # 完整的图像退化和增强流程 # 参数: # image: 输入图像(PIL.Image或numpy数组) # 返回: # dict: 包含所有退化组和最终增强结果的字典 # """ # # 确保输入是numpy数组 # if not isinstance(image, np.ndarray): # image = np.array(image) # # 确保图像为RGB格式 # if len(image.shape) == 2: # image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) # elif image.shape[2] == 4: # image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) # # 原始图像 # original = image.copy() # # 插值方法列表 # interpolation_methods = ['nearest', 'bilinear', 'bicubic'] # # 第一组退化: 三种插值方法 # group1 = [] # for method in interpolation_methods: # degraded = apply_interpolation_degradation(original, method) # group1.append(degraded) # # 第二组退化: 随机额外退化 # group2 = [] # for img in group1: # # 随机选择一种退化方法 # method = random.choice(interpolation_methods) # extra_degraded = apply_interpolation_degradation(img, method) # group2.append(extra_degraded) # # 所有退化图像组合 # all_degraded_images = [original] + group1 + group2 # # 应用黑暗处理 (在增强之前) # darkened_images = [darken_image(img, intensity=dark_intensity) for img in all_degraded_images] # # 应用数据增强 # # 1. 随机仿射变换 # affine_images = [random_affine(img) for img in darkened_images] # # 2. HSV增强 # hsv_images = [augment_hsv(img) for img in affine_images] # # 3. MixUp增强 # # 随机选择两个增强后的图像进行混合 # mixed_image = mixup( # random.choice(hsv_images), # random.choice(hsv_images) # ) # # 返回结果 # results = { # 'original': original, # 'degraded_group1': group1, # 第一组退化图像 # 'degraded_group2': group2, # 第二组退化图像 # 'augmented_images': hsv_images, # 所有增强后的图像(原始+六组退化) # 'mixup_image': mixed_image # MixUp混合图像 # } # return results # # def add_gaussian_noise(image, mean=0, sigma=25): # # """添加高斯噪声""" # # noise = np.random.normal(mean, sigma, image.shape) # # noisy = np.clip(image + noise, 0, 255).astype(np.uint8) # # return noisy # # def random_cutout(image, max_holes=3, max_height=16, max_width=16): # # """随机CutOut增强""" # # h, w = image.shape[:2] # # for _ in range(random.randint(1, max_holes)): # # hole_h = random.randint(1, max_height) # # hole_w = random.randint(1, max_width) # # y = random.randint(0, h - hole_h) # # x = random.randint(0, w - hole_w) # # image[y:y+hole_h, x:x+hole_w] = 0 # # return image import cv2 import numpy as np import random from matplotlib import pyplot as plt import pywt def wavelet_degradation(image, level=0.5): """小波系数衰减退化""" # 小波分解 coeffs = pywt.dwt2(image, 'haar') cA, (cH, cV, cD) = coeffs # 衰减高频系数 cH = cH * level cV = cV * level cD = cD * level # 重建图像 return pywt.idwt2((cA, (cH, cV, cD)), 'haar')[:image.shape[0], :image.shape[1]] def adaptive_interpolation_degradation(image): """自适应插值退化(随机选择最近邻或双三次插值)""" if random.choice([True, False]): method = cv2.INTER_NEAREST # 最近邻插值 else: method = cv2.INTER_CUBIC # 双三次插值 # 先缩小再放大 scale_factor = random.uniform(0.3, 0.8) small = cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=method) return cv2.resize(small, (image.shape[1], image.shape[0]), interpolation=method) def bilinear_degradation(image): """双线性插值退化""" # 先缩小再放大 scale_factor = random.uniform(0.3, 0.8) small = cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR) return cv2.resize(small, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_LINEAR) def cutmix(img1, img2, bboxes1=None, bboxes2=None, beta=1.0): """ 参数: img1: 第一张输入图像(numpy数组) img2: 第二张输入图像(numpy数组) bboxes1: 第一张图像的边界框(可选) bboxes2: 第二张图像的边界框(可选) beta: Beta分布的参数,控制裁剪区域的大小 返回: 混合后的图像和边界框(如果有) """ # 确保图像尺寸相同 if img1.shape != img2.shape: img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0])) h, w = img1.shape[:2] # 生成裁剪区域的lambda值(混合比例) lam = np.random.beta(beta, beta) # 计算裁剪区域的宽高 cut_ratio = np.sqrt(1. - lam) cut_w = int(w * cut_ratio) cut_h = int(h * cut_ratio) # 随机确定裁剪区域的中心点 cx = np.random.randint(w) cy = np.random.randint(h) # 计算裁剪区域的边界 x1 = np.clip(cx - cut_w // 2, 0, w) y1 = np.clip(cy - cut_h // 2, 0, h) x2 = np.clip(cx + cut_w // 2, 0, w) y2 = np.clip(cy + cut_h // 2, 0, h) # 执行CutMix操作 mixed_img = img1.copy() mixed_img[y1:y2, x1:x2] = img2[y1:y2, x1:x2] # 计算实际的混合比例 lam = 1 - ((x2 - x1) * (y2 - y1) / (w * h)) # 处理边界框(如果有) mixed_bboxes = None if bboxes1 is not None and bboxes2 is not None: mixed_bboxes = [] # 添加第一张图像的边界框 for bbox in bboxes1: mixed_bboxes.append(bbox + [lam]) # 添加混合权重 # 添加第二张图像的边界框(只添加在裁剪区域内的) for bbox in bboxes2: # 检查边界框是否在裁剪区域内 bbox_x_center = (bbox[0] + bbox[2]) / 2 bbox_y_center = (bbox[1] + bbox[3]) / 2 if (x1 <= bbox_x_center <= x2) and (y1 <= bbox_y_center <= y2): mixed_bboxes.append(bbox + [1 - lam]) return mixed_img, mixed_bboxes def image_degradation_and_augmentation(image, bboxes=None): """ 完整的图像退化和增强流程(修改为使用CutMix) 参数: image: 输入图像(PIL.Image或numpy数组) bboxes: 边界框(可选) 返回: dict: 包含所有退化组和最终增强结果的字典 """ # 确保输入是numpy数组 if not isinstance(image, np.ndarray): image = np.array(image) # 确保图像为RGB格式 if len(image.shape) == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) elif image.shape[2] == 4: image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) degraded_sets = [] original = image.copy() # 第一组退化:三种基础退化 degraded_sets.append(wavelet_degradation(original.copy())) degraded_sets.append(degraded_sets) degraded_sets.append(adaptive_interpolation_degradation(original.copy())) degraded_sets.append(degraded_sets) degraded_sets.append(bilinear_degradation(original.copy())) degraded_sets.append(degraded_sets) # # 原始图像 # original = image.copy() # # 插值方法列表 # interpolation_methods = ['nearest', 'bilinear', 'bicubic'] # # 第一组退化: 三种插值方法 # group1 = [] # for method in interpolation_methods: # degraded = apply_interpolation_degradation(original, method) # group1.append(degraded) # 第二组退化: 随机额外退化 # group2 = [] # for img in group1: # # 随机选择一种退化方法 # method = random.choice(interpolation_methods) # extra_degraded = apply_interpolation_degradation(img, method) # group2.append(extra_degraded) # 第二组退化:随机选择再退化 methods = [wavelet_degradation, adaptive_interpolation_degradation, bilinear_degradation] group2=[] for img in degraded_sets: selected_method = random.choice(methods) group2.append(selected_method(img)) group2.append(group2) # 原始图像 original = image.copy() all_degraded_images = [original] + degraded_sets + group2 # 应用黑暗处理 dark_original = darken_image(original) dark_degraded = [darken_image(img) for img in all_degraded_images] # 合并原始和退化图像 all_images = [dark_original] + dark_degraded # 应用数据增强 # 1. 随机仿射变换 affine_images = [random_affine(img) for img in all_images] # 2. HSV增强 hsv_images = [augment_hsv(img) for img in affine_images] # 3. CutMix增强 # 随机选择两个增强后的图像进行混合 mixed_image, mixed_bboxes = cutmix( random.choice(hsv_images), random.choice(hsv_images), bboxes1=bboxes if bboxes is not None else None, bboxes2=bboxes if bboxes is not None else None ) # 返回结果 results = { 'original': original, 'degraded': dark_degraded, 'augmented_images': hsv_images, # 所有增强后的图像(原始+六组退化) 'cutmix_image': mixed_image, # CutMix混合图像 'cutmix_bboxes': mixed_bboxes if bboxes is not None else None # 混合后的边界框 } return results train_transform = transforms.Compose([ transforms.RandomResizedCrop(32), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])]) # data_processing prepare train_data = CIFAR10Pair(root="E:/contrast/yolov8/MoCo/data_visdrone2019", train=True, transform=train_transform, download=False) moco_train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=0, pin_memory=True, drop_last=True) memory_data = CIFAR10(root="E:/contrast/yolov8/MoCo/data_visdrone2019", train=True, transform=test_transform, download=False) memory_loader = DataLoader(memory_data, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) test_data = CIFAR10(root="E:/contrast/yolov8/MoCo/data_visdrone2019", train=False, transform=test_transform, download=False) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) # 定义基本编码器 # SplitBatchNorm: simulate multi-gpu behavior of BatchNorm in one gpu by splitting alone the batch dimension # implementation adapted from https://github.com/davidcpage/cifar10-fast/blob/master/torch_backend.py class SplitBatchNorm(nn.BatchNorm2d): def __init__(self, num_features, num_splits, **kw): super().__init__(num_features, **kw) self.num_splits = num_splits def forward(self, input): N, C, H, W = input.shape if self.training or not self.track_running_stats: running_mean_split = self.running_mean.repeat(self.num_splits) running_var_split = self.running_var.repeat(self.num_splits) outcome = nn.functional.batch_norm( input.view(-1, C * self.num_splits, H, W), running_mean_split, running_var_split, self.weight.repeat(self.num_splits), self.bias.repeat(self.num_splits), True, self.momentum, self.eps).view(N, C, H, W) self.running_mean.data.copy_(running_mean_split.view(self.num_splits, C).mean(dim=0)) self.running_var.data.copy_(running_var_split.view(self.num_splits, C).mean(dim=0)) return outcome else: return nn.functional.batch_norm( input, self.running_mean, self.running_var, self.weight, self.bias, False, self.momentum, self.eps) class ModelBase(nn.Module): """ Common CIFAR ResNet recipe. Comparing with ImageNet ResNet recipe, it: (i) replaces conv1 with kernel=3, str=1 (ii) removes pool1 """ def __init__(self, feature_dim=128, arch=None, bn_splits=16): super(ModelBase, self).__init__() # use split batchnorm norm_layer = partial(SplitBatchNorm, num_splits=bn_splits) if bn_splits > 1 else nn.BatchNorm2d resnet_arch = getattr(resnet, arch) net = resnet_arch(num_classes=feature_dim, norm_layer=norm_layer) self.net = [] for name, module in net.named_children(): if name == 'conv1': module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) if isinstance(module, nn.MaxPool2d): continue if isinstance(module, nn.Linear): self.net.append(nn.Flatten(1)) self.net.append(module) self.net = nn.Sequential(*self.net) def forward(self, x): x = self.net(x) # note: not normalized here return x # 定义MOCO class ModelMoCo(nn.Module): def __init__(self, dim=128, K=4096, m=0.99, T=0.1, arch='resnet18', bn_splits=8, symmetric=True): super(ModelMoCo, self).__init__() self.K = K self.m = m self.T = T self.symmetric = symmetric # create the encoders self.encoder_q = ModelBase(feature_dim=dim, arch=arch, bn_splits=bn_splits) self.encoder_k = ModelBase(feature_dim=dim, arch=arch, bn_splits=bn_splits) for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k.data.copy_(param_q.data) # initialize param_k.requires_grad = False # not update by gradient 不参与训练 # create the queue self.register_buffer("queue", torch.randn(dim, K)) self.queue = nn.functional.normalize(self.queue, dim=0) self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long)) @torch.no_grad() def _momentum_update_key_encoder(self): # 动量更新encoder_k """ Momentum update of the key encoder """ for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): param_k.data = param_k.data * self.m + param_q.data * (1. - self.m) @torch.no_grad() def _dequeue_and_enqueue(self, keys): # 出队与入队 batch_size = keys.shape[0] ptr = int(self.queue_ptr) assert self.K % batch_size == 0 # for simplicity # replace the keys at ptr (dequeue and enqueue) self.queue[:, ptr:ptr + batch_size] = keys.t() # transpose ptr = (ptr + batch_size) % self.K # move pointer self.queue_ptr[0] = ptr @torch.no_grad() def _batch_shuffle_single_gpu(self, x): """ Batch shuffle, for making use of BatchNorm. """ # random shuffle index idx_shuffle = torch.randperm(x.shape[0]).cuda() # index for restoring idx_unshuffle = torch.argsort(idx_shuffle) return x[idx_shuffle], idx_unshuffle @torch.no_grad() def _batch_unshuffle_single_gpu(self, x, idx_unshuffle): """ Undo batch shuffle. """ return x[idx_unshuffle] def contrastive_loss(self, im_q, im_k): # compute query features q = self.encoder_q(im_q) # queries: NxC q = nn.functional.normalize(q, dim=1) # already normalized # compute key features with torch.no_grad(): # no gradient to keys # shuffle for making use of BN im_k_, idx_unshuffle = self._batch_shuffle_single_gpu(im_k) k = self.encoder_k(im_k_) # keys: NxC k = nn.functional.normalize(k, dim=1) # already normalized # undo shuffle k = self._batch_unshuffle_single_gpu(k, idx_unshuffle) # compute logits # Einstein sum is more intuitive # positive logits: Nx1 l_pos = torch.einsum('nc,nc->n', [q, k]).unsqueeze(-1) # negative logits: NxK l_neg = torch.einsum('nc,ck->nk', [q, self.queue.clone().detach()]) # logits: Nx(1+K) logits = torch.cat([l_pos, l_neg], dim=1) # apply temperature logits /= self.T # labels: positive key indicators labels = torch.zeros(logits.shape[0], dtype=torch.long).cuda() loss = nn.CrossEntropyLoss().cuda()(logits, labels) # 交叉熵损失 return loss, q, k def forward(self, im1, im2): """ Input: im_q: a batch of query images im_k: a batch of key images Output: loss """ # update the key encoder with torch.no_grad(): # no gradient to keys self._momentum_update_key_encoder() # compute loss if self.symmetric: # asymmetric loss loss_12, q1, k2 = self.contrastive_loss(im1, im2) loss_21, q2, k1 = self.contrastive_loss(im2, im1) loss = loss_12 + loss_21 k = torch.cat([k1, k2], dim=0) else: # asymmetric loss loss, q, k = self.contrastive_loss(im1, im2) self._dequeue_and_enqueue(k) return loss # create model moco_model = ModelMoCo( dim=args.moco_dim, K=args.moco_k, m=args.moco_m, T=args.moco_t, arch=args.arch, bn_splits=args.bn_splits, symmetric=args.symmetric, ).cuda() # print(moco_model.encoder_q) moco_model_1 = ModelMoCo( dim=args.moco_dim, K=args.moco_k, m=args.moco_m, T=args.moco_t, arch=args.arch, bn_splits=args.bn_splits, symmetric=args.symmetric, ).cuda() # print(moco_model_1.encoder_q) """ CIFAR10 Dataset. """ from torch.cuda import amp scaler = amp.GradScaler(enabled=cuda) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # train for one epoch # def moco_train(net, net_1, data_loader, train_optimizer, epoch, args): # net.train() # adjust_learning_rate(moco_optimizer, epoch, args) # total_loss, total_num, train_bar = 0.0, 0, tqdm(data_loader) # loss_add = 0.0 # for im_1, im_2 in train_bar: # im_1, im_2 = im_1.cuda(non_blocking=True), im_2.cuda(non_blocking=True) # loss = net(im_1, im_2) # 原始图像对比损失 梯度清零—>梯度回传—>梯度跟新 # # lossT = loss # 只使用原始对比损失 # # train_optimizer.zero_grad() # # lossT.backward() # # train_optimizer.step() # # loss_add += lossT.item() # # total_num += data_loader.batch_size # # total_loss += loss.item() * data_loader.batch_size # # train_bar.set_description( # # 'Train Epoch: [{}/{}], lr: {:.6f}, Loss: {:.4f}'.format( # # epoch, args.epochs, # # train_optimizer.param_groups[0]['lr'], # # loss_add / total_num # # ) # # ) # #傅里叶变换处理流程 # #im_3 = torch.rfft(im_1, 3, onesided=False, normalized=True)[:, :, :, :, 0] # fft_output = torch.fft.fftn(im_1, dim=(-3, -2, -1), norm="ortho")#转换为频域 # real_imag = torch.view_as_real(fft_output)#分解实部虚部 # im_3 = real_imag[..., 0]#提取频域实部作为新视图 # #该处理实现了频域空间的增强,与空间域增强形成了互补 # #im_4 = torch.rfft(im_2, 3, onesided=False, normalized=True)[:, :, :, :, 0] # fft_output = torch.fft.fftn(im_2, dim=(-3, -2, -1), norm="ortho") # real_imag = torch.view_as_real(fft_output) # im_4 = real_imag[..., 0] # loss_1 = net_1(im_3, im_4)#频域特征对比损失 # lossT = 0.8*loss + 0.2*loss_1#多模态损失对比融合 # train_optimizer.zero_grad() # lossT.backward() # train_optimizer.step() # loss_add += lossT # total_num += data_loader.batch_size # total_loss += loss.item() * data_loader.batch_size # # train_bar.set_description( # # 'Train Epoch: [{}/{}], lr: {:.6f}, Loss: {:.4f}'.format(epoch, args.epochs, moco_optimizer.param_groups[0]['lr'], # # loss_add / total_num)) # return (loss_add / total_num).cpu().item() # yolov5需要的损失 def moco_train(net, net_1, data_loader, train_optimizer, epoch, args): net.train() adjust_learning_rate(train_optimizer, epoch, args) total_loss, total_num = 0.0, 0 train_bar = tqdm(data_loader) for im_1, im_2, im_3, im_4 in train_bar: # 接收4组视图 im_1, im_2 = im_1.cuda(), im_2.cuda() im_3, im_4 = im_3.cuda(), im_4.cuda() # 原始空间域对比损失 loss_orig = net(im_1, im_2) # 退化增强图像的空间域对比损失 loss_degraded = net(im_3, im_4) # 频域处理(对退化增强后的图像) fft_3 = torch.fft.fftn(im_3, dim=(-3, -2, -1), norm="ortho") fft_3 = torch.view_as_real(fft_3)[..., 0] # 取实部 fft_4 = torch.fft.fftn(im_4, dim=(-3, -2, -1), norm="ortho") fft_4 = torch.view_as_real(fft_4)[..., 0] # 频域对比损失 loss_freq = net_1(fft_3, fft_4) # 多模态损失融合 loss = 0.6 * loss_orig + 0.3 * loss_degraded + 0.1 * loss_freq # 反向传播 train_optimizer.zero_grad() loss.backward() train_optimizer.step() # 记录损失 total_num += data_loader.batch_size total_loss += loss.item() # train_bar.set_description(f'Epoch: [{epoch}/{args.epochs}] Loss: {total_loss/total_num:.4f}') return total_loss / total_num # lr scheduler for training def adjust_learning_rate(optimizer, epoch, args): # 学习率衰减 """Decay the learning rate based on schedule""" lr = args.lr if args.cos: # cosine lr schedule lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs)) else: # stepwise lr schedule for milestone in args.schedule: lr *= 0.1 if epoch >= milestone else 1. for param_group in optimizer.param_groups: param_group['lr'] = lr # test using a knn monitor def test(net, memory_data_loader, test_data_loader, epoch, args): net.eval() classes = len(memory_data_loader.dataset.classes) total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, [] with torch.no_grad(): # generate feature bank for data, target in tqdm(memory_data_loader, desc='Feature extracting'): feature = net(data.cuda(non_blocking=True)) feature = F.normalize(feature, dim=1) feature_bank.append(feature) # [D, N] feature_bank = torch.cat(feature_bank, dim=0).t().contiguous() # [N] feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device) # loop test data_processing to predict the label by weighted knn search test_bar = tqdm(test_data_loader) for data, target in test_bar: data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True) feature = net(data) feature = F.normalize(feature, dim=1) pred_labels = knn_predict(feature, feature_bank, feature_labels, classes, args.knn_k, args.knn_t) total_num += data.size(0) total_top1 += (pred_labels[:, 0] == target).float().sum().item() test_bar.set_description( 'Test Epoch: [{}/{}] Acc@1:{:.2f}%'.format(epoch, args.epochs, total_top1 / total_num * 100)) return total_top1 / total_num * 100 # knn monitor as in InstDisc https://arxiv.org/abs/1805.01978 # implementation follows http://github.com/zhirongw/lemniscate.pytorch and https://github.com/leftthomas/SimCLR def knn_predict(feature, feature_bank, feature_labels, classes, knn_k, knn_t): # compute cos similarity between each feature vector and feature bank ---> [B, N] sim_matrix = torch.mm(feature, feature_bank) # [B, K] sim_weight, sim_indices = sim_matrix.topk(k=knn_k, dim=-1) # [B, K] sim_labels = torch.gather(feature_labels.expand(feature.size(0), -1), dim=-1, index=sim_indices) sim_weight = (sim_weight / knn_t).exp() # counts for each class one_hot_label = torch.zeros(feature.size(0) * knn_k, classes, device=sim_labels.device) # [B*K, C] one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0) # weighted score ---> [B, C] pred_scores = torch.sum(one_hot_label.view(feature.size(0), -1, classes) * sim_weight.unsqueeze(dim=-1), dim=1) pred_labels = pred_scores.argsort(dim=-1, descending=True) return pred_labels # 开始训练 # define optimizer moco_optimizer = torch.optim.SGD(moco_model.parameters(), lr=args.lr, weight_decay=args.wd, momentum=0.9) 上述问题怎么修改?
07-18
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值