%% Q1_analysis_final_fixed.m
% 问题一:完整增强版 + 警告处理(修正版,无 ternary)
clear; clc; close all;
%% 1. 加载与预处理
data = readtable('附件3-公交车到站延误时间统计.csv');
data.weather = categorical(data.weather, {'sunny','rainy'});
data.period = categorical(data.period, {'off-peak','peak'});
% IQR 异常值剔除
d = data.delay_time;
Q = quantile(d, [0.25, 0.75]); IQR = Q(2)-Q(1);
mask = d >= (Q(1)-1.5*IQR) & d <= (Q(2)+1.5*IQR);
data = data(mask, :);
% 分组统计
[G, W, P] = findgroups(data.weather, data.period);
gm = splitapply(@mean, data.delay_time, G);
gs = splitapply(@std, data.delay_time, G);
gn = splitapply(@numel, data.delay_time, G);
Tstats = table(W, P, gm, gs, gn, 'VariableNames',{'Weather','Period','Mean','Std','Count'});
disp('=== 分组统计 ===');
disp(Tstats);
%% 2. 可视化
labels = strcat(string(W), '-', string(P));
figure('Name','箱线图'); boxplot(data.delay_time, G, 'Labels',labels);
title('不同天气-时段组合延误分布(箱线图)');
ylabel('延误时间 (分钟)');
saveas(gcf,'Q1_boxplot.png');
figure('Name','核密度估计'); hold on;
cols = lines(max(G));
for k=1:max(G)
[f,xi] = ksdensity(data.delay_time(G==k));
plot(xi,f,'Color',cols(k,:),'LineWidth',1.5);
end
legend(labels,'Location','best');
title('不同组合延误时间核密度估计');
xlabel('延误时间 (分钟)');
ylabel('密度');
saveas(gcf,'Q1_kde.png');
hold off;
%% 3. 正态性检验(彻底去除 Lilliefors 低 p 值警告)
% 保存当前所有警告状态
warnStateAll = warning;
% 关闭仅与 lillietest 相关的警告
warning('off','stats:lillietest:LowPValue');
% 计算残差
residuals = data.delay_time - gm(G);
% 执行检验
[hN, pN] = lillietest(residuals);
% 恢复之前的所有警告设置
warning(warnStateAll);
% 输出结果
if hN
normMsg = '拒绝残差正态性';
else
normMsg = '未拒绝残差正态性';
end
fprintf('Lilliefors 正态性检验:p = %.4f,%s\n', pN, normMsg);
% Q–Q 图
figure('Name','残差 Q–Q 图');
qqplot(residuals);
title('残差 Q–Q 图');
saveas(gcf,'Q1_qqplot.png');
%% 4. 方差齐性检验
pL = vartestn(data.delay_time, G, 'TestType','LeveneAbsolute','Display','off');
fprintf('Levene 方差齐性检验:p = %.4f\n', pL);
% 若方差不齐,进行 log 变换
if pL < 0.05
fprintf('方差不齐,进行 log 变换后重做 ANOVA\n');
data.delay_time = log(data.delay_time - min(data.delay_time) + 1);
% 重新分组统计残差等需重新计算:这里只直接跑 ANOVA
[pA, tblA, statsA] = anovan(data.delay_time, ...
{data.weather, data.period}, ...
'model','interaction','varnames',{'Weather','Period'}, ...
'display','off');
labelStr = 'Log 变换后';
else
[pA, tblA, statsA] = anovan(data.delay_time, ...
{data.weather, data.period}, ...
'model','interaction','varnames',{'Weather','Period'}, ...
'display','off');
labelStr = '原始数据';
end
%% 5. 双因素 ANOVA & 贡献率
SS = cell2mat(tblA(2:4,2)); SSE = tblA{5,2}; SST = sum(SS) + SSE;
contri = SS ./ SST * 100;
fprintf('\n=== ANOVA 结果(%s)===\n', labelStr);
fprintf(' 天气主效应: F=%.2f, p=%.4f, 贡献率=%.2f%%\n', ...
tblA{2,6}, tblA{2,7}, contri(1));
fprintf(' 时段主效应: F=%.2f, p=%.4f, 贡献率=%.2f%%\n', ...
tblA{3,6}, tblA{3,7}, contri(2));
fprintf(' 交互作用效应: F=%.2f, p=%.4f, 贡献率=%.2f%%\n', ...
tblA{4,6}, tblA{4,7}, contri(3));
%% 6. 事后多重比较
[c,~,~,gnames] = multcompare(statsA, 'Dimension',[1 2], 'Display','off');
Tmc = array2table(c, 'VariableNames',{'Group1','Group2','Lower','Estimate','Upper','pValue'});
Tmc.Group1 = gnames(c(:,1));
Tmc.Group2 = gnames(c(:,2));
disp('=== 交互作用事后比较 ===');
disp(Tmc);