function audio_pitch_correction
% 创建主GUI界面
fig = uifigure('Name', '音频音准矫正系统', 'Position', [100 100 900 700]);
% 创建音频选择区域
uilabel(fig, 'Position', [50 680 300 20], 'Text', '待矫正音频来源:', 'FontWeight', 'bold');
% 创建录音选项按钮组
source_btn_group = uibuttongroup(fig, 'Position', [50 630 300 40], 'Title', '');
uibutton(source_btn_group, 'Position', [10 10 130 30], 'Text', '导入音频文件', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'source'));
uibutton(source_btn_group, 'Position', [160 10 130 30], 'Text', '录制音频', ...
'ButtonPushedFcn', @(btn,event) record_audio(fig));
% 创建参考音频选择按钮
uilabel(fig, 'Position', [400 680 300 20], 'Text', '参考音频来源:', 'FontWeight', 'bold');
uibutton(fig, 'Position', [400 630 150 30], 'Text', '导入参考音频', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'reference'));
% 创建处理按钮
process_btn = uibutton(fig, 'Position', [600 630 150 30], ...
'Text', '开始矫正', 'Enable', 'off', ...
'ButtonPushedFcn', @(btn,event) process_audio(fig));
% 创建播放和保存按钮
uibutton(fig, 'Position', [50 580 150 30], 'Text', '播放原始音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'source'));
uibutton(fig, 'Position', [250 580 150 30], 'Text', '播放矫正音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'corrected'));
uibutton(fig, 'Position', [450 580 150 30], 'Text', '保存矫正音频', ...
'ButtonPushedFcn', @(btn,event) save_audio(fig));
% 创建录音状态显示
recording_label = uilabel(fig, 'Position', [650 580 200 30], ...
'Text', '准备录音', 'FontColor', [0 0.5 0]);
% 创建波形显示区域
ax_source = uiaxes(fig, 'Position', [50 350 800 150]);
title(ax_source, '待矫正音频波形');
ax_reference = uiaxes(fig, 'Position', [50 180 800 150]);
title(ax_reference, '参考音频波形');
ax_corrected = uiaxes(fig, 'Position', [50 10 800 150]);
title(ax_corrected, '矫正后音频波形');
% 存储数据
fig.UserData.source_audio = [];
fig.UserData.reference_audio = [];
fig.UserData.corrected_audio = [];
fig.UserData.fs = 44100; % 默认采样率
fig.UserData.process_btn = process_btn;
fig.UserData.axes = struct('source', ax_source, 'reference', ax_reference, 'corrected', ax_corrected);
fig.UserData.recording_label = recording_label;
fig.UserData.recorder = []; % 录音器对象
fig.UserData.timer = []; % 计时器对象
fig.UserData.f0_source = []; % 存储原始音高数据
fig.UserData.f0_ref = []; % 存储参考音高数据
fig.UserData.f0_corrected = []; % 存储矫正后音高数据
end
function select_audio(fig, audio_type)
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg;*.flac', ...
'音频文件 (*.wav,*.mp3,*.ogg,*.flac)'});
if isequal(file, 0)
return;
end
filename = fullfile(path, file);
[audio, fs] = audioread(filename);
% 处理立体声:转换为单声道
if size(audio, 2) > 1
audio = mean(audio, 2);
end
% 截取前20秒
max_samples = min(20*fs, length(audio));
audio = audio(1:max_samples);
% 存储数据
fig.UserData.([audio_type '_audio']) = audio;
fig.UserData.fs = fs;
% 更新波形显示
ax = fig.UserData.axes.(audio_type);
plot(ax, (1:length(audio))/fs, audio);
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.source_audio) && ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function record_audio(fig)
% 创建录音界面
record_fig = uifigure('Name', '音频录制', 'Position', [300 300 400 200]);
% 录音时长设置
uilabel(record_fig, 'Position', [50 150 100 20], 'Text', '录音时长 (秒):');
duration_edit = uieditfield(record_fig, 'numeric', ...
'Position', [160 150 100 20], 'Value', 5, 'Limits', [1 30]);
% 采样率设置
uilabel(record_fig, 'Position', [50 120 100 20], 'Text', '采样率:');
fs_dropdown = uidropdown(record_fig, ...
'Position', [160 120 100 20], ...
'Items', {'8000', '16000', '44100', '48000'}, ...
'Value', '44100');
% 控制按钮
record_btn = uibutton(record_fig, 'Position', [50 70 100 30], ...
'Text', '开始录音', ...
'ButtonPushedFcn', @(btn,event) start_recording(fig, duration_edit.Value, str2double(fs_dropdown.Value)));
uibutton(record_fig, 'Position', [160 70 100 30], ...
'Text', '停止录音', ...
'ButtonPushedFcn', @(btn,event) stop_recording(fig));
uibutton(record_fig, 'Position', [270 70 100 30], ...
'Text', '关闭', ...
'ButtonPushedFcn', @(btn,event) close(record_fig));
end
function start_recording(fig, duration, fs)
% 更新状态
fig.UserData.recording_label.Text = '录音中...';
fig.UserData.recording_label.FontColor = [1 0 0];
drawnow;
% 创建录音器对象
recorder = audiorecorder(fs, 16, 1); % 16-bit, 单声道
% 设置录音时长
fig.UserData.recorder = recorder;
fig.UserData.fs = fs;
% 开始录音
record(recorder, duration);
% 创建计时器显示剩余时间
t = timer('ExecutionMode', 'fixedRate', 'Period', 1, ...
'TasksToExecute', duration, ...
'TimerFcn', @(t,~) update_recording_timer(fig, t, duration));
start(t);
% 存储计时器
fig.UserData.t = t;
end
function update_recording_timer(fig, t, total_duration)
elapsed = t.TasksExecuted;
remaining = total_duration - elapsed;
fig.UserData.recording_label.Text = sprintf('录音中: %d秒', remaining);
% 录音结束时自动停止
if remaining <= 0
stop_recording(fig);
end
end
function stop_recording(fig)
if ~isempty(fig.UserData.recorder) && isrecording(fig.UserData.recorder)
stop(fig.UserData.recorder);
end
% 停止计时器
if ~isempty(fig.UserData.timer) && isvalid(fig.UserData.timer)
stop(fig.UserData.timer);
delete(fig.UserData.timer);
fig.UserData.timer = [];
end
% 获取录音数据
audio = getaudiodata(fig.UserData.recorder);
fs = fig.UserData.fs;
% 更新状态
fig.UserData.recording_label.Text = '录音完成!';
fig.UserData.recording_label.FontColor = [0 0.5 0];
% 存储为待矫正音频
fig.UserData.source_audio = audio;
% 更新波形显示
ax = fig.UserData.axes.source;
plot(ax, (1:length(audio))/fs, audio);
title(ax, '录制音频波形');
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function process_audio(fig)
source = fig.UserData.source_audio;
reference = fig.UserData.reference_audio;
fs = fig.UserData.fs;
% 确保主图窗存在
if ~isvalid(fig)
errordlg('主窗口已关闭,无法处理音频!', '处理错误');
return;
end
% 创建处理进度对话框
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音频对齐...', 'Indeterminate', 'on');
% 步骤1:音频对齐
try
[aligned_source, aligned_ref] = improved_align_audio(source, reference, fs);
catch ME
close(h);
errordlg(['音频对齐失败: ' ME.message], '处理错误');
return;
end
% 步骤2:基频提取
h.Message = '提取音高...';
try
[f0_source, time_source] = extract_pitch(aligned_source, fs);
[f0_ref, time_ref] = extract_pitch(aligned_ref, fs);
catch ME
close(h);
errordlg(['音高提取失败: ' ME.message], '处理错误');
return;
end
% 存储音高数据用于后续对比
fig.UserData.f0_source = f0_source;
fig.UserData.f0_ref = f0_ref;
fig.UserData.time_source = time_source;
fig.UserData.time_ref = time_ref;
% 步骤3:音调矫正
h.Message = '矫正音调...';
try
corrected = correct_pitch(fig, aligned_source, fs, f0_source, f0_ref, time_source, time_ref);
catch ME
close(h);
errordlg(['音高校正失败: ' ME.message], '处理错误');
return;
end
% 关闭进度对话框
close(h);
% 保存结果并更新显示
fig.UserData.corrected_audio = corrected;
plot(fig.UserData.axes.corrected, (1:length(corrected))/fs, corrected);
xlabel(fig.UserData.axes.corrected, '时间 (s)');
ylabel(fig.UserData.axes.corrected, '幅度');
% 提取矫正后音频的音高
[f0_corrected, time_corrected] = extract_pitch(corrected, fs);
fig.UserData.f0_corrected = f0_corrected;
fig.UserData.time_corrected = time_corrected;
% 绘制完整音高对比图
plot_pitch_comparison(fig);
end
function plot_pitch_comparison(fig)
% 创建新的图窗显示完整音高对比
pitch_fig = figure('Name', '音高对比分析', 'Position', [100 100 800 800]);
% 子图1: 原始音高 vs 参考音高
subplot(3,1,1);
plot(fig.UserData.time_source, fig.UserData.f0_source, 'b', ...
fig.UserData.time_ref, fig.UserData.f0_ref, 'r');
title('原始音高 vs 参考音高');
legend('原始音高', '参考音高', 'Location', 'best');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
grid on;
ylim([min([fig.UserData.f0_source; fig.UserData.f0_ref])*0.8, ...
max([fig.UserData.f0_source; fig.UserData.f0_ref])*1.2]);
% 子图2: 矫正后音高 vs 参考音高
subplot(3,1,2);
plot(fig.UserData.time_corrected, fig.UserData.f0_corrected, 'g', ...
fig.UserData.time_ref, fig.UserData.f0_ref, 'r');
title('矫正后音高 vs 参考音高');
legend('矫正后音高', '参考音高', 'Location', 'best');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
grid on;
ylim([min([fig.UserData.f0_corrected; fig.UserData.f0_ref])*0.8, ...
max([fig.UserData.f0_corrected; fig.UserData.f0_ref])*1.2]);
% 子图3: 原始音高 vs 矫正后音高
subplot(3,1,3);
plot(fig.UserData.time_source, fig.UserData.f0_source, 'b:', ...
fig.UserData.time_corrected, fig.UserData.f0_corrected, 'g-');
title('原始音高 vs 矫正后音高');
legend('原始音高', '矫正后音高', 'Location', 'best');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
grid on;
ylim([min([fig.UserData.f0_source; fig.UserData.f0_corrected])*0.8, ...
max([fig.UserData.f0_source; fig.UserData.f0_corrected])*1.2]);
% 添加对比分析按钮
uicontrol(pitch_fig, 'Style', 'pushbutton', 'String', '显示偏差分析', ...
'Position', [350 20 100 30], ...
'Callback', @(src,event) show_deviation_analysis(fig));
end
function show_deviation_analysis(fig)
% 计算原始音高偏差
orig_dev = zeros(size(fig.UserData.f0_source));
for i = 1:length(fig.UserData.f0_source)
[~, idx] = min(abs(fig.UserData.time_ref - fig.UserData.time_source(i)));
if idx <= length(fig.UserData.f0_ref)
orig_dev(i) = fig.UserData.f0_source(i) - fig.UserData.f0_ref(idx);
end
end
% 计算矫正后音高偏差
corr_dev = zeros(size(fig.UserData.f0_corrected));
for i = 1:length(fig.UserData.f0_corrected)
[~, idx] = min(abs(fig.UserData.time_ref - fig.UserData.time_corrected(i)));
if idx <= length(fig.UserData.f0_ref)
corr_dev(i) = fig.UserData.f0_corrected(i) - fig.UserData.f0_ref(idx);
end
end
% 创建偏差分析图
analysis_fig = figure('Name', '音高偏差分析', 'Position', [150 150 800 600]);
subplot(2,1,1);
plot(fig.UserData.time_source, orig_dev, 'b');
title('原始音高偏差');
xlabel('时间 (s)'); ylabel('偏差 (Hz)');
grid on;
hold on;
line([min(fig.UserData.time_source), max(fig.UserData.time_source)], [0, 0], 'Color', 'r', 'LineStyle', '--');
subplot(2,1,2);
plot(fig.UserData.time_corrected, corr_dev, 'g');
title('矫正后音高偏差');
xlabel('时间 (s)'); ylabel('偏差 (Hz)');
grid on;
hold on;
line([min(fig.UserData.time_corrected), max(fig.UserData.time_corrected)], [0, 0], 'Color', 'r', 'LineStyle', '--');
% 添加统计信息
uicontrol(analysis_fig, 'Style', 'text', ...
'Position', [50 30 300 40], ...
'String', sprintf('原始平均偏差: %.2f Hz\n矫正后平均偏差: %.2f Hz', ...
mean(abs(orig_dev)), mean(abs(corr_dev))), ...
'FontSize', 10);
end
function [aligned_src, aligned_ref] = improved_align_audio(src, ref, fs)
% 改进的音频对齐方法:使用频谱互相关
win_size = round(0.1 * fs); % 100ms窗口
hop_size = round(0.05 * fs); % 50ms跳跃
% 计算源音频的频谱图
[S_src, f_src, t_src] = spectrogram(src, win_size, win_size-hop_size, win_size, fs);
% 计算参考音频的频谱图
[S_ref, f_ref, t_ref] = spectrogram(ref, win_size, win_size-hop_size, win_size, fs);
% 计算互相关
n_frames = min(length(t_src), length(t_ref));
corr_vals = zeros(1, n_frames);
for i = 1:n_frames
spec_src = abs(S_src(:, i));
spec_ref = abs(S_ref(:, i));
corr_vals(i) = dot(spec_src, spec_ref) / (norm(spec_src) * norm(spec_ref));
end
% 找到最大相关帧
[~, max_idx] = max(corr_vals);
time_diff = t_src(max_idx) - t_ref(max_idx);
sample_diff = round(time_diff * fs);
% 对齐音频
if sample_diff > 0
aligned_src = src(1:end-sample_diff);
aligned_ref = ref(sample_diff+1:end);
else
aligned_src = src(-sample_diff+1:end);
aligned_ref = ref(1:end+sample_diff);
end
% 确保等长
min_len = min(length(aligned_src), length(aligned_ref));
aligned_src = aligned_src(1:min_len);
aligned_ref = aligned_ref(1:min_len);
end
function [f0, time] = extract_pitch(audio, fs)
% 使用pitch函数提取基频(需要Audio Toolbox)
frame_size = round(0.05 * fs); % 50ms帧
overlap = round(0.75 * frame_size); % 75%重叠
[f0, time] = pitch(audio, fs, ...
'WindowLength', frame_size, ...
'OverlapLength', overlap, ...
'Range', [50, 1000], ... % 50Hz-1000Hz范围
'Method', 'NCF'); % 归一化相关函数方法
end
function corrected = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref)
% 创建进度条
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音高校正...');
frame_len = round(0.05 * fs); % 50ms帧长
hop_size = round(0.25 * frame_len); % 25%跳跃
n_frames = floor((length(audio)-frame_len)/hop_size) + 1;
corrected = zeros(size(audio));
for i = 1:n_frames
% 计算当前帧位置
start_idx = (i-1)*hop_size + 1;
end_idx = start_idx + frame_len - 1;
frame = audio(start_idx:end_idx);
% 查找当前帧对应的目标音高
t_frame = mean([start_idx, end_idx]) / fs;
[~, idx_src] = min(abs(time_src - t_frame));
[~, idx_ref] = min(abs(time_ref - t_frame));
if idx_ref <= length(f0_ref) && idx_src <= length(f0_src) && f0_src(idx_src) > 0
target_ratio = f0_ref(idx_ref) / f0_src(idx_src);
% 限制比例范围 (0.5-2.0)
target_ratio = max(0.5, min(2.0, target_ratio));
else
target_ratio = 1.0; % 无法获取有效音高时不调整
end
% 使用相位声码器改变音高
corrected_frame = phase_vocoder(frame, target_ratio, fs);
% 重叠相加
frame_end_idx = start_idx + length(corrected_frame) - 1;
if frame_end_idx <= length(corrected)
corrected(start_idx:frame_end_idx) = ...
corrected(start_idx:frame_end_idx) + corrected_frame;
end
% 更新进度条
h.Value = i/n_frames;
h.Message = sprintf('处理进度: %d/%d 帧 (%.1f%%)', i, n_frames, i/n_frames*100);
end
close(h);
% 归一化防止削波
max_amp = max(abs(corrected));
if max_amp > 0
corrected = corrected / max_amp;
end
end
function y = phase_vocoder(x, ratio, fs)
% 简化的相位声码器实现
n = 2048; % FFT点数
hop_in = round(n/4);
hop_out = round(hop_in * ratio);
% 初始化
w = hann(n, 'periodic');
X = stft(x, 'Window', w, 'OverlapLength', n-hop_in, 'FFTLength', n);
% 相位处理
Y = phase_vocoder_process(X, hop_in, hop_out);
% 重建信号
y = istft(Y, 'Window', w, 'OverlapLength', n-hop_out, 'FFTLength', n, ...
'ConjugateSymmetric', true);
end
function Y = phase_vocoder_process(X, hop_in, hop_out)
% 相位声码器核心处理
Y = zeros(size(X));
if isempty(X), return; end
phase_adv = angle(X(:,1));
for i = 1:size(X,2)
mag = abs(X(:,i));
phase = angle(X(:,i));
% 计算相位增量
delta_phase = phase - phase_adv;
phase_adv = phase;
% 计算瞬时频率
inst_freq = delta_phase / hop_in;
% 调整相位
adjusted_phase = phase_adv + inst_freq * hop_out;
% 合成新帧
Y(:,i) = mag .* exp(1j * adjusted_phase);
end
end
function play_audio(fig, audio_type)
if ~isvalid(fig)
errordlg('主窗口无效!', '播放错误');
return;
end
switch audio_type
case 'source'
audio = fig.UserData.source_audio;
title_text = '播放原始音频';
if isempty(audio)
errordlg('未找到原始音频数据!', '播放错误');
return;
end
case 'corrected'
audio = fig.UserData.corrected_audio;
title_text = '播放矫正音频';
if isempty(audio)
errordlg('请先完成音高校正!', '播放错误');
return;
end
otherwise
return;
end
fs = fig.UserData.fs;
player = audioplayer(audio, fs);
% 创建播放控制界面
play_fig = uifigure('Name', title_text, 'Position', [500 500 300 150]);
% 播放进度条
ax = uiaxes(play_fig, 'Position', [50 100 200 20]);
prog_line = line(ax, [0 0], [0 0], 'Color', 'b', 'LineWidth', 2);
xlim(ax, [0 1]);
ylim(ax, [0 1]);
set(ax, 'XTick', [], 'YTick', []);
% 播放时间显示
time_label = uilabel(play_fig, 'Position', [50 80 200 20], ...
'Text', '00:00 / 00:00', 'HorizontalAlignment', 'center');
% 控制按钮
uibutton(play_fig, 'Position', [50 30 60 30], 'Text', '播放', ...
'ButtonPushedFcn', @(btn,event) play(player));
uibutton(play_fig, 'Position', [120 30 60 30], 'Text', '暂停', ...
'ButtonPushedFcn', @(btn,event) pause(player));
uibutton(play_fig, 'Position', [190 30 60 30], 'Text', '停止', ...
'ButtonPushedFcn', @(btn,event) stop(player));
% 总时长计算
total_time = length(audio)/fs;
mins = floor(total_time/60);
secs = round(total_time - mins*60);
total_str = sprintf('%02d:%02d', mins, secs);
% 更新播放进度
player.TimerFcn = {@update_playback, play_fig, time_label, total_str, prog_line, length(audio)};
player.StopFcn = {@stop_playback, play_fig};
end
function update_playback(player, ~, play_fig, time_label, total_str, prog_line, total_samples)
if ~isvalid(play_fig) || player.CurrentSample <= 0
return;
end
% 计算当前时间
current_time = player.CurrentSample/player.SampleRate;
mins = floor(current_time/60);
secs = round(current_time - mins*60);
current_str = sprintf('%02d:%02d', mins, secs);
% 更新显示
time_label.Text = [current_str ' / ' total_str];
% 更新进度条
progress = player.CurrentSample / total_samples;
prog_line.XData = [0 progress];
prog_line.YData = [0.5 0.5];
end
function stop_playback(player, ~, play_fig)
if isvalid(play_fig)
close(play_fig);
end
end
function save_audio(fig)
if ~isvalid(fig) || isempty(fig.UserData.corrected_audio)
errordlg('无有效音频数据可保存!', '保存错误');
return;
end
[file, path] = uiputfile('*.wav', '保存矫正音频');
if isequal(file, 0), return; end
audiowrite(fullfile(path, file), fig.UserData.corrected_audio, fig.UserData.fs);
msgbox('音频保存成功!', '完成');
end
调整哪些参数能够改变音准变化幅度