function audio_pitch_correction
% 创建主GUI界面
fig = uifigure('Name', '音频音准矫正系统', 'Position', [100 100 900 700]);
% 创建音频选择区域
uilabel(fig, 'Position', [50 680 300 20], 'Text', '待矫正音频来源:', 'FontWeight', 'bold');
% 创建录音选项按钮组
source_btn_group = uibuttongroup(fig, 'Position', [50 630 300 40], 'Title', '');
uibutton(source_btn_group, 'Position', [10 10 130 30], 'Text', '导入音频文件', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'source'));
uibutton(source_btn_group, 'Position', [160 10 130 30], 'Text', '录制音频', ...
'ButtonPushedFcn', @(btn,event) record_audio(fig));
% 创建参考音频选择按钮
uilabel(fig, 'Position', [400 680 300 20], 'Text', '参考音频来源:', 'FontWeight', 'bold');
uibutton(fig, 'Position', [400 630 150 30], 'Text', '导入参考音频', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'reference'));
% 创建处理按钮
process_btn = uibutton(fig, 'Position', [600 630 150 30], ...
'Text', '开始矫正', 'Enable', 'off', ...
'ButtonPushedFcn', @(btn,event) process_audio(fig));
% 创建播放和保存按钮
uibutton(fig, 'Position', [50 580 150 30], 'Text', '播放原始音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'source'));
uibutton(fig, 'Position', [250 580 150 30], 'Text', '播放矫正音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'corrected'));
uibutton(fig, 'Position', [450 580 150 30], 'Text', '保存矫正音频', ...
'ButtonPushedFcn', @(btn,event) save_audio(fig));
% 创建录音状态显示
recording_label = uilabel(fig, 'Position', [650 580 200 30], ...
'Text', '准备录音', 'FontColor', [0 0.5 0]);
% 创建波形显示区域
ax_source = uiaxes(fig, 'Position', [50 350 800 150]);
title(ax_source, '待矫正音频波形');
ax_reference = uiaxes(fig, 'Position', [50 180 800 150]);
title(ax_reference, '参考音频波形');
ax_corrected = uiaxes(fig, 'Position', [50 10 800 150]);
title(ax_corrected, '矫正后音频波形');
% 存储数据
fig.UserData.source_audio = [];
fig.UserData.reference_audio = [];
fig.UserData.corrected_audio = [];
fig.UserData.fs = 44100; % 默认采样率
fig.UserData.process_btn = process_btn;
fig.UserData.axes = struct('source', ax_source, 'reference', ax_reference, 'corrected', ax_corrected);
fig.UserData.recording_label = recording_label;
fig.UserData.recorder = []; % 录音器对象
fig.UserData.timer = []; % 计时器对象
end
function select_audio(fig, audio_type)
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg;*.flac', ...
'音频文件 (*.wav,*.mp3,*.ogg,*.flac)'});
if isequal(file, 0)
return;
end
filename = fullfile(path, file);
[audio, fs] = audioread(filename);
% 处理立体声:转换为单声道
if size(audio, 2) > 1
audio = mean(audio, 2);
end
% 截取前20秒
max_samples = min(20*fs, length(audio));
audio = audio(1:max_samples);
% 存储数据
fig.UserData.([audio_type '_audio']) = audio;
fig.UserData.fs = fs;
% 更新波形显示
ax = fig.UserData.axes.(audio_type);
plot(ax, (1:length(audio))/fs, audio);
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.source_audio) && ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function record_audio(fig)
% 创建录音界面
record_fig = uifigure('Name', '音频录制', 'Position', [300 300 400 200]);
% 录音时长设置
uilabel(record_fig, 'Position', [50 150 100 20], 'Text', '录音时长 (秒):');
duration_edit = uieditfield(record_fig, 'numeric', ...
'Position', [160 150 100 20], 'Value', 5, 'Limits', [1 30]);
% 采样率设置
uilabel(record_fig, 'Position', [50 120 100 20], 'Text', '采样率:');
fs_dropdown = uidropdown(record_fig, ...
'Position', [160 120 100 20], ...
'Items', {'8000', '16000', '44100', '48000'}, ...
'Value', '44100');
% 控制按钮
record_btn = uibutton(record_fig, 'Position', [50 70 100 30], ...
'Text', '开始录音', ...
'ButtonPushedFcn', @(btn,event) start_recording(fig, duration_edit.Value, str2double(fs_dropdown.Value)));
uibutton(record_fig, 'Position', [160 70 100 30], ...
'Text', '停止录音', ...
'ButtonPushedFcn', @(btn,event) stop_recording(fig));
uibutton(record_fig, 'Position', [270 70 100 30], ...
'Text', '关闭', ...
'ButtonPushedFcn', @(btn,event) close(record_fig));
end
function start_recording(fig, duration, fs)
% 更新状态
fig.UserData.recording_label.Text = '录音中...';
fig.UserData.recording_label.FontColor = [1 0 0];
drawnow;
% 创建录音器对象
recorder = audiorecorder(fs, 16, 1); % 16-bit, 单声道
% 设置录音时长
fig.UserData.recorder = recorder;
fig.UserData.fs = fs;
% 开始录音
record(recorder, duration);
% 创建计时器显示剩余时间
t = timer('ExecutionMode', 'fixedRate', 'Period', 1, ...
'TasksToExecute', duration, ...
'TimerFcn', @(t,~) update_recording_timer(fig, t, duration));
start(t);
% 存储计时器
fig.UserData.timer = t;
end
function update_recording_timer(fig, t, total_duration)
elapsed = t.TasksExecuted;
remaining = total_duration - elapsed;
fig.UserData.recording_label.Text = sprintf('录音中: %d秒', remaining);
% 录音结束时自动停止
if remaining <= 0
stop_recording(fig);
end
end
function stop_recording(fig)
if ~isempty(fig.UserData.recorder) && isrecording(fig.UserData.recorder)
stop(fig.UserData.recorder);
end
% 停止计时器
if ~isempty(fig.UserData.timer) && isvalid(fig.UserData.timer)
stop(fig.UserData.timer);
delete(fig.UserData.timer);
fig.UserData.timer = [];
end
% 获取录音数据
audio = getaudiodata(fig.UserData.recorder);
fs = fig.UserData.fs;
% 更新状态
fig.UserData.recording_label.Text = '录音完成!';
fig.UserData.recording_label.FontColor = [0 0.5 0];
% 存储为待矫正音频
fig.UserData.source_audio = audio;
% 更新波形显示
ax = fig.UserData.axes.source;
plot(ax, (1:length(audio))/fs, audio);
title(ax, '录制音频波形');
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function process_audio(fig)
source = fig.UserData.source_audio;
reference = fig.UserData.reference_audio;
fs = fig.UserData.fs;
% 确保主图窗存在
if ~isvalid(fig)
errordlg('主窗口已关闭,无法处理音频!', '处理错误');
return;
end
% 创建处理进度对话框
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音频对齐...', 'Indeterminate', 'on');
% 步骤1:音频对齐
try
[aligned_source, aligned_ref] = improved_align_audio(source, reference, fs);
catch ME
close(h);
errordlg(['音频对齐失败: ' ME.message], '处理错误');
return;
end
% 步骤2:基频提取
h.Message = '提取音高...';
try
[f0_source, time_source] = extract_pitch(aligned_source, fs);
[f0_ref, time_ref] = extract_pitch(aligned_ref, fs);
catch ME
close(h);
errordlg(['音高提取失败: ' ME.message], '处理错误');
return;
end
% 步骤3:音调矫正
h.Message = '矫正音调...';
try
[corrected, f0_corrected] = correct_pitch(fig, aligned_source, fs, f0_source, f0_ref, time_source, time_ref);
catch ME
close(h);
errordlg(['音高校正失败: ' ME.message], '处理错误');
return;
end
% 关闭进度对话框
close(h);
% === 关键修复 1: 存储矫正结果 ===
fig.UserData.corrected_audio = corrected;
% === 关键修复 2: 更新播放按钮状态 ===
play_btn = findobj(fig, 'Text', '播放矫正音频');
if ~isempty(play_btn)
play_btn.Enable = 'on';
end
% 保存结果并更新显示
% 更新原始音频波形图(添加音高曲线)
ax_src = fig.UserData.axes.source;
cla(ax_src);
yyaxis(ax_src, 'left');
plot(ax_src, (1:length(aligned_source))/fs, aligned_source, 'b');
ylabel(ax_src, '幅度');
yyaxis(ax_src, 'right');
plot(ax_src, time_source, f0_source, 'r', 'LineWidth', 1.5);
ylabel(ax_src, '频率 (Hz)');
title(ax_src, '原始音频波形与音高');
grid(ax_src, 'on');
% 更新参考音频波形图(添加音高曲线)
ax_ref = fig.UserData.axes.reference;
cla(ax_ref);
yyaxis(ax_ref, 'left');
plot(ax_ref, (1:length(aligned_ref))/fs, aligned_ref, 'g');
ylabel(ax_ref, '幅度');
yyaxis(ax_ref, 'right');
plot(ax_ref, time_ref, f0_ref, 'm', 'LineWidth', 1.5);
ylabel(ax_ref, '频率 (Hz)');
title(ax_ref, '参考音频波形与音高');
grid(ax_ref, 'on');
% 更新矫正后音频波形图(添加音高曲线)
ax_corr = fig.UserData.axes.corrected;
cla(ax_corr);
yyaxis(ax_corr, 'left');
plot(ax_corr, (1:length(corrected))/fs, corrected, 'Color', [0.5 0 0.5]);
ylabel(ax_corr, '幅度');
yyaxis(ax_corr, 'right');
plot(ax_corr, time_source, f0_corrected, 'Color', [1 0.5 0], 'LineWidth', 2);
ylabel(ax_corr, '频率 (Hz)');
title(ax_corr, '矫正后音频波形与音高');
grid(ax_corr, 'on');
% 绘制综合音高对比图
% 修改后的调用:添加音频波形参数
plot_pitch_comparison(time_source, f0_source, time_ref, f0_ref, f0_corrected,...
aligned_source, aligned_ref, corrected, fs);
fprintf('原始音高平均: %.1f Hz\n', mean(f0_source(f0_source>0)));
fprintf('参考音高平均: %.1f Hz\n', mean(f0_ref(f0_ref>0)));
fprintf('矫正后音高平均: %.1f Hz\n', mean(f0_corrected(f0_corrected>0)));
end
function [aligned_src, aligned_ref] = improved_align_audio(src, ref, fs)
% 改进的音频对齐方法:使用频谱互相关
win_size = round(0.1 * fs); % 100ms窗口
hop_size = round(0.05 * fs); % 50ms跳跃
% 计算源音频的频谱图
[S_src, ~, t_src] = spectrogram(src, win_size, win_size-hop_size, win_size, fs);
% 计算参考音频的频谱图
[S_ref, ~, t_ref] = spectrogram(ref, win_size, win_size-hop_size, win_size, fs);
% 计算互相关
n_frames = min(length(t_src), length(t_ref));
corr_vals = zeros(1, n_frames);
for i = 1:n_frames
spec_src = abs(S_src(:, i));
spec_ref = abs(S_ref(:, i));
corr_vals(i) = dot(spec_src, spec_ref) / (norm(spec_src) * norm(spec_ref));
end
% 找到最大相关帧
[~, max_idx] = max(corr_vals);
time_diff = t_src(max_idx) - t_ref(max_idx);
sample_diff = round(time_diff * fs);
% 对齐音频
if sample_diff > 0
aligned_src = src(1:end-sample_diff);
aligned_ref = ref(sample_diff+1:end);
else
aligned_src = src(-sample_diff+1:end);
aligned_ref = ref(1:end+sample_diff);
end
% 确保等长
min_len = min(length(aligned_src), length(aligned_ref));
aligned_src = aligned_src(1:min_len);
aligned_ref = aligned_ref(1:min_len);
end
function mfcc = mfcc_feature(audio, fs, frame_size, hop_size)
% 参数验证
if nargin < 4
hop_size = round(frame_size/2); % 默认50%重叠
end
% 预处理:预加重
audio = filter([1 -0.97], 1, audio);
% 分帧处理
frames = buffer(audio, frame_size, frame_size - hop_size, 'nodelay');
num_frames = size(frames, 2);
% 加窗(汉明窗)
window = hamming(frame_size);
windowed_frames = frames .* repmat(window, 1, num_frames);
% 计算功率谱
nfft = 2^nextpow2(frame_size);
mag_frames = abs(fft(windowed_frames, nfft));
power_frames = (mag_frames(1:nfft/2+1, :)).^2;
% 设计梅尔滤波器组
num_filters = 26; % 滤波器数量
mel_min = 0; % 最小Mel频率
mel_max = 2595 * log10(1 + (fs/2)/700); % 最大Mel频率
% 创建等间隔的Mel频率点
mel_points = linspace(mel_min, mel_max, num_filters + 2);
% 将Mel频率转换为线性频率
hz_points = 700 * (10.^(mel_points/2595) - 1);
% 转换为FFT bin索引
bin_indices = floor((nfft+1) * hz_points / fs);
% 创建梅尔滤波器组
filter_bank = zeros(num_filters, nfft/2+1);
for m = 2:num_filters+1
left = bin_indices(m-1);
center = bin_indices(m);
right = bin_indices(m+1);
% 左侧斜坡
for k = left:center-1
filter_bank(m-1, k+1) = (k - left) / (center - left);
end
% 右侧斜坡
for k = center:right
filter_bank(m-1, k+1) = (right - k) / (right - center);
end
end
% 应用梅尔滤波器组
mel_spectrum = filter_bank * power_frames;
% 取对数
log_mel = log(mel_spectrum + eps);
% 计算DCT得到MFCC系数
mfcc = dct(log_mel);
% 保留前13个系数(含能量系数)
mfcc = mfcc(1:13, :);
% 可选:添加能量特征
energy = log(sum(power_frames) + eps);
mfcc(1, :) = energy; % 替换第0阶MFCC为对数能量
% 应用倒谱均值归一化 (CMN)
mfcc = mfcc - mean(mfcc, 2);
end
function [f0, time] = extract_pitch(audio, fs)
% 使用改进的自相关方法
frame_size = round(0.05 * fs);
hop_size = round(0.025 * fs);
n_frames = floor((length(audio) - frame_size) / hop_size) + 1;
f0 = zeros(1, n_frames);
time = (0:n_frames-1)*hop_size/fs + frame_size/(2*fs);
% 预处理:带通滤波和预加重
[b, a] = butter(4, [80, 2000]/(fs/2), 'bandpass');
audio = filtfilt(b, a, audio);
audio = filter([1, -0.97], 1, audio); % 预加重
for i = 1:n_frames
start_idx = (i-1)*hop_size + 1;
frame = audio(start_idx:start_idx+frame_size-1);
% 归一化自相关函数
autocorr = xcorr(frame, 'normalized');
autocorr = autocorr(frame_size:end); % 取非负延迟部分
% 寻找第一个显著峰值
[peaks, locs] = findpeaks(autocorr, 'MinPeakHeight', 0.3);
if ~isempty(locs)
% 找到最低频率的显著峰值
valid_locs = locs(peaks > 0.5*max(peaks));
if ~isempty(valid_locs)
tau = valid_locs(1);
else
[~, tau] = max(autocorr);
end
else
[~, tau] = max(autocorr);
end
% 二次插值
if tau > 1 && tau < length(autocorr)-1
ac_vals = autocorr(tau-1:tau+1);
delta = (ac_vals(1) - ac_vals(3)) / (2*(2*ac_vals(2) - ac_vals(1) - ac_vals(3)));
tau = tau + delta;
end
% 计算基频
f0(i) = fs / tau;
end
% 后处理:改进的平滑和插值
valid = f0 > 80 & f0 < 1000;
f0(~valid) = NaN;
f0 = fillmissing(f0, 'movmedian', 10);
f0 = fillmissing(f0, 'pchip');
% 谐波增强:验证基频和谐波一致性
for i = 1:length(f0)
if ~isnan(f0(i))
% 检查第二谐波是否存在
harmonic_freq = 2*f0(i);
harmonic_bin = round(harmonic_freq * frame_size / fs);
if harmonic_bin <= frame_size/2
frame_start = (i-1)*hop_size + 1;
frame = audio(frame_start:frame_start+frame_size-1);
spectrum = abs(fft(frame));
harmonic_strength = spectrum(harmonic_bin+1);
fundamental_strength = spectrum(round(f0(i)*frame_size/fs)+1);
% 如果谐波强度不足,降低置信度
if harmonic_strength < 0.5*fundamental_strength
f0(i) = NaN;
end
end
end
end
% 最终插值
f0 = fillmissing(f0, 'pchip');
end
function [corrected, f0_corrected] = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref)
% 创建进度条
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音高校正...');
% 动态计算最优段长(基于音高变化率)
valid_pitch = f0_src > 0;
if any(valid_pitch)
f0_variation = mean(abs(diff(f0_src(valid_pitch))));
segment_duration = max(0.1, min(0.5, 0.3/(f0_variation/50 + 0.1))); % 自适应段长
else
segment_duration = 0.3; % 默认值
end
segment_samples = round(segment_duration * fs);
n_segments = ceil(length(audio) / segment_samples);
corrected = zeros(size(audio));
f0_corrected = zeros(size(f0_src));
% 创建参考音高插值函数(使用形状保持插值)
valid_ref = f0_ref > 0;
if any(valid_ref)
ref_interp = @(t) interp1(time_ref(valid_ref), f0_ref(valid_ref), t, 'pchip', 'extrap');
else
ref_interp = @(t) 0;
end
% 创建音高变化强度因子(基于音高差异)
pitch_diff = abs(f0_src - ref_interp(time_src));
pitch_diff(pitch_diff < 20) = 0; % 忽略微小差异
intensity_factor = min(2, 1 + pitch_diff/100); % 1-2倍强度因子
for seg = 1:n_segments
h.Value = seg/n_segments;
h.Message = sprintf('处理段 %d/%d (%.1f%%)', seg, n_segments, seg/n_segments*100);
% 获取当前段
start_idx = max(1, (seg-1)*segment_samples + 1);
end_idx = min(length(audio), seg*segment_samples);
segment_audio = audio(start_idx:end_idx);
% 计算段内平均音高(加权平均)
seg_time = time_src(time_src >= (start_idx-1)/fs & time_src <= end_idx/fs);
valid_seg = f0_src >= start_idx/fs & f0_src <= end_idx/fs & f0_src > 0;
if any(valid_seg)
% 计算加权平均(差异大的部分权重更高)
weights = intensity_factor(valid_seg);
mean_src = sum(f0_src(valid_seg).*weights) / sum(weights);
mean_ref = sum(ref_interp(seg_time).*weights) / sum(weights);
ratio = mean_ref / mean_src;
else
ratio = 1;
end
% 应用强度因子增强变化
if any(valid_seg)
seg_intensity = mean(intensity_factor(valid_seg));
else
seg_intensity = 1;
end
ratio = ratio^seg_intensity; % 指数增强
% 限制比例范围(更严格的限制)
ratio = max(0.8, min(1.25, ratio));
% 应用增强的相位声码器
try
corrected_seg = enhanced_phase_vocoder(segment_audio, ratio, fs);
% === 关键修复: 实时验证信号为实数 ===
if ~isreal(corrected_seg)
% 记录警告但不中断处理
warning('段 %d 输出复数信号,强制转换为实数', seg);
corrected_seg = real(corrected_seg);
end
catch ME
% 错误处理
warning('段 %d 处理失败: %s', seg, ME.message);
corrected_seg = segment_audio;
end
% 存储结果
seg_end = min(length(corrected), start_idx + length(corrected_seg) - 1);
corrected(start_idx:seg_end) = corrected_seg(1:min(length(corrected_seg), seg_end-start_idx+1));
% 动态淡入淡出长度(基于音高变化率)
fade_factor = min(1, max(0.2, f0_variation/100));
fade_samples = round(0.03 * fs * fade_factor); % 10-30ms动态调整
% 确保淡入淡出长度有效
fade_samples = min(fade_samples, floor(length(corrected_seg)/3));
fade_samples = min(fade_samples, floor(segment_samples/3));
% 增强的交叉淡入淡出处理(余弦渐变)
if seg > 1 && fade_samples > 5
prev_end = (seg-1)*segment_samples;
fade_range = max(1, prev_end-fade_samples+1):prev_end;
if fade_range(end) <= length(corrected) && fade_range(1) > 0 && ...
(fade_range(end) - fade_range(1) + 1) == length(fade_range)
fade_in = (1 - cos(linspace(0, pi, fade_samples)))/2;
fade_out = (1 + cos(linspace(0, pi, fade_samples)))/2;
% 应用交叉混合
corrected(fade_range) = corrected(fade_range).*fade_out(:) + ...
corrected_seg(1:length(fade_range)).*fade_in(:);
end
end
end
% 重新提取矫正后的音高
[f0_corrected, time_corr] = extract_pitch(corrected, fs);
% 后处理:应用音高导向的平滑滤波器
if ~isempty(f0_corrected) && any(f0_corrected > 0)
f0_diff = abs(f0_corrected - ref_interp(time_corr));
smooth_window = max(3, min(15, round(f0_diff/5))); % 根据差异调整平滑窗口
f0_corrected = movmedian(f0_corrected, smooth_window);
end
% === 关键修复: 确保数据格式正确 ===
corrected = real(corrected); % 双重保证
max_amp = max(abs(corrected));
if max_amp > 0
corrected = corrected / max_amp; % 归一化
else
corrected = zeros(size(corrected));
end
close(h);
end
function y = enhanced_phase_vocoder(x, ratio, fs)
% 自适应帧长(高频用较短帧,低频用较长帧)
try
[f0_temp, ~] = extract_pitch(x, fs); % 使用自定义音高提取函数
avg_pitch = mean(f0_temp(f0_temp > 0));
if isnan(avg_pitch) || avg_pitch < 80
avg_pitch = 200; % 默认值
end
catch
avg_pitch = 200; % 错误时使用默认值
end
frame_size = round(min(4096, max(1024, 2048 * (200/avg_pitch))));
overlap = round(frame_size * 0.75);
hop_in = frame_size - overlap;
hop_out = round(hop_in * ratio);
% 使用改进的STFT处理(汉宁窗)
win = hann(frame_size, 'periodic');
[S, ~, ~] = stft(x, fs, 'Window', win, 'OverlapLength', overlap, 'FFTLength', frame_size);
% 相位处理
Y = enhanced_phase_processing(S, hop_in, hop_out, fs);
% 重建信号(使用加权重叠相加法)
y = istft(Y, fs, 'Window', win, 'OverlapLength', frame_size - hop_out, ...
'FFTLength', frame_size, 'Method', 'wola');
% === 关键修复: 确保输出为实数 ===
% 检查虚部并移除数值误差
if ~isreal(y)
% 计算虚部能量占比
imag_energy = sum(abs(imag(y)).^2);
total_energy = sum(abs(y).^2);
imag_ratio = imag_energy / (total_energy + eps);
if imag_ratio > 1e-6 % 如果虚部能量显著
warning('复数信号虚部能量占比: %.4f%%,强制转换为实数', imag_ratio*100);
end
y = real(y);
end
% 长度匹配
if length(y) > length(x)
y = y(1:length(x));
elseif length(y) < length(x)
y = [y; zeros(length(x)-length(y), 1)];
end
% 后处理:谱平滑减少人工痕迹
y = spectral_smoothing(y, fs, ratio);
end
function y = spectral_smoothing(x, fs, ratio)
% 应用低通滤波减少高频人工痕迹
cutoff = min(8000, 20000 / ratio^0.5); % 自适应截止频率
[b, a] = butter(4, cutoff/(fs/2), 'low');
y = filtfilt(b, a, x);
end
function plot_pitch_comparison(time_src, f0_src, time_ref, f0_ref, f0_corrected, src_wave, ref_wave, corr_wave, fs)
% 确保所有序列长度一致
min_length = min([length(time_src), length(time_ref), length(f0_corrected)]);
time_src = time_src(1:min_length);
f0_src = f0_src(1:min_length);
time_ref = time_ref(1:min_length);
f0_ref = f0_ref(1:min_length);
f0_corrected = f0_corrected(1:min_length);
% 创建综合音高对比图(包含波形和音高)
pitch_fig = figure('Name', '音频波形与音高分析', 'Position', [100 100 900 800]);
% 原始音频波形 + 音高
subplot(3,1,1);
time_wave_src = (1:length(src_wave)) / fs;
yyaxis left;
plot(time_wave_src, src_wave, 'Color', [0.7 0.7 1], 'LineWidth', 0.5);
ylabel('幅度');
ylim([-1.1 1.1]); % 固定幅度范围
yyaxis right;
plot(time_src, f0_src, 'b', 'LineWidth', 1.5);
hold on;
plot(time_ref, f0_ref, 'r--', 'LineWidth', 1.5);
hold off;
title('原始音频波形与音高');
xlabel('时间 (s)');
ylabel('频率 (Hz)');
legend('原始波形', '原始音高', '参考音高', 'Location', 'best');
grid on;
% 参考音频波形 + 音高
subplot(3,1,2);
time_wave_ref = (1:length(ref_wave)) / fs;
yyaxis left;
plot(time_wave_ref, ref_wave, 'Color', [1 0.7 0.7], 'LineWidth', 0.5);
ylabel('幅度');
ylim([-1.1 1.1]); % 固定幅度范围
yyaxis right;
plot(time_ref, f0_ref, 'r', 'LineWidth', 1.5);
title('参考音频波形与音高');
xlabel('时间 (s)');
ylabel('频率 (Hz)');
legend('参考波形', '参考音高', 'Location', 'best');
grid on;
% 矫正后音频波形 + 音高
subplot(3,1,3);
time_wave_corr = (1:length(corr_wave)) / fs;
yyaxis left;
plot(time_wave_corr, corr_wave, 'Color', [0.7 1 0.7], 'LineWidth', 0.5);
ylabel('幅度');
ylim([-1.1 1.1]); % 固定幅度范围
yyaxis right;
plot(time_src, f0_src, 'b:', 'LineWidth', 1);
hold on;
plot(time_ref, f0_ref, 'r--', 'LineWidth', 1);
plot(time_src, f0_corrected, 'g', 'LineWidth', 2);
hold off;
title('矫正后音频波形与音高');
xlabel('时间 (s)');
ylabel('频率 (Hz)');
legend('矫正波形', '原始音高', '参考音高', '矫正音高', 'Location', 'best');
grid on;
% 添加音高误差分析
valid_idx = (f0_src > 0) & (f0_ref > 0) & (f0_corrected > 0);
if any(valid_idx)
src_error = mean(abs(f0_src(valid_idx) - f0_ref(valid_idx)));
corr_error = mean(abs(f0_corrected(valid_idx) - f0_ref(valid_idx)));
annotation(pitch_fig, 'textbox', [0.15 0.05 0.7 0.05], ...
'String', sprintf('原始音高平均误差: %.2f Hz | 矫正后音高平均误差: %.2f Hz | 改进: %.1f%%', ...
src_error, corr_error, (src_error - corr_error)/src_error*100), ...
'FitBoxToText', 'on', 'BackgroundColor', [0.9 0.9 0.9], ...
'FontSize', 12, 'HorizontalAlignment', 'center');
end
end
function play_audio(fig, audio_type)
if ~isvalid(fig)
errordlg('主窗口无效!', '播放错误');
return;
end
switch audio_type
case 'source'
audio = fig.UserData.source_audio;
title_text = '播放原始音频';
if isempty(audio)
errordlg('未找到原始音频数据!', '播放错误');
return;
end
case 'corrected'
audio = fig.UserData.corrected_audio;
title_text = '播放矫正音频';
if isempty(audio)
errordlg('请先完成音高校正!', '播放错误');
return;
end
otherwise
return;
end
fs = fig.UserData.fs;
player = audioplayer(audio, fs);
% 创建播放控制界面
play_fig = uifigure('Name', title_text, 'Position', [500 500 300 150]);
% 播放进度条
ax = uiaxes(play_fig, 'Position', [50 100 200 20]);
hold(ax, 'on');
prog_line = plot(ax, [0 0], [0 1], 'b', 'LineWidth', 2); % 垂直范围[0,1]
hold(ax, 'off');
xlim(ax, [0 1]);
ylim(ax, [0 1]);
set(ax, 'XTick', [], 'YTick', []);
% 播放时间显示
time_label = uilabel(play_fig, 'Position', [50 80 200 20], ...
'Text', '00:00 / 00:00', 'HorizontalAlignment', 'center');
% 控制按钮
uibutton(play_fig, 'Position', [50 30 60 30], 'Text', '播放', ...
'ButtonPushedFcn', @(btn,event) play(player));
uibutton(play_fig, 'Position', [120 30 60 30], 'Text', '暂停', ...
'ButtonPushedFcn', @(btn,event) pause(player));
uibutton(play_fig, 'Position', [190 30 60 30], 'Text', '停止', ...
'ButtonPushedFcn', @(btn,event) stop(player));
% 总时长计算
total_time = length(audio)/fs;
mins = floor(total_time/60);
secs = round(total_time - mins*60);
total_str = sprintf('%02d:%02d', mins, secs);
% 更新播放进度回调
player.TimerFcn = {@update_playback, play_fig, time_label, total_str, prog_line, length(audio)};
player.TimerPeriod = 0.1; % 更新频率(秒)
player.StopFcn = @(src,event) stop_playback(src, event, play_fig);
end
function stop_playback(src, ~, fig)
stop(src);
if isvalid(fig)
close(fig);
end
end
function save_audio(fig)
if ~isvalid(fig) || isempty(fig.UserData.corrected_audio)
errordlg('无有效音频数据可保存!', '保存错误');
return;
end
[file, path] = uiputfile('*.wav', '保存矫正音频');
if isequal(file, 0), return; end
audiowrite(fullfile(path, file), fig.UserData.corrected_audio, fig.UserData.fs);
msgbox('音频保存成功!', '完成');
end
function Y = enhanced_phase_processing(X, hop_in, hop_out, fs)
Y = zeros(size(X));
if isempty(X), return; end
n_bins = size(X, 1);
freq_bins = (0:n_bins-1)' * fs / (2*(n_bins-1));
bin_phase_inc = 2*pi * freq_bins * hop_in / fs;
phase_prev = angle(X(:,1));
Y(:,1) = abs(X(:,1)) .* exp(1j*phase_prev);
for k = 2:size(X,2)
mag = abs(X(:,k));
phase = angle(X(:,k));
% 计算相位增量(考虑瞬时频率)
delta_phase = phase - phase_prev - bin_phase_inc;
% 相位展开(改进方法)
delta_phase = delta_phase - 2*pi*round(delta_phase/(2*pi));
% 计算真实瞬时频率
inst_freq = bin_phase_inc + delta_phase;
% 相位累积(考虑时间伸缩)
adjusted_phase = phase_prev + inst_freq * hop_out / hop_in;
% === 关键修复: 相位一致性检查 ===
% 检查相位跳跃(超过π弧度)
phase_jump = abs(adjusted_phase - phase_prev);
if any(phase_jump > pi)
% 应用相位解缠绕
phase_jump_adjust = round(phase_jump/(2*pi)) * 2*pi;
adjusted_phase = adjusted_phase - phase_jump_adjust;
end
% 相位一致性调整
if k > 2
phase_diff = adjusted_phase - angle(Y(:,k-1));
phase_diff = phase_diff - 2*pi*round(phase_diff/(2*pi));
adjusted_phase = angle(Y(:,k-1)) + phase_diff;
end
% 合成新帧
Y(:,k) = mag .* exp(1j*adjusted_phase);
% 更新前一帧相位(确保在[-π, π]范围内)
phase_prev = mod(adjusted_phase + pi, 2*pi) - pi;
end
end
这是完整代码,运行显示,窗口长度必须为有限正实数标量
最新发布