function audio_pitch_correction
% 创建主GUI界面
fig = uifigure('Name', '音频音准矫正系统', 'Position', [100 100 900 700]);
% 创建音频选择区域
uilabel(fig, 'Position', [50 680 300 20], 'Text', '待矫正音频来源:', 'FontWeight', 'bold');
% 创建录音选项按钮组
source_btn_group = uibuttongroup(fig, 'Position', [50 630 300 40], 'Title', '');
uibutton(source_btn_group, 'Position', [10 10 130 30], 'Text', '导入音频文件', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'source'));
uibutton(source_btn_group, 'Position', [160 10 130 30], 'Text', '录制音频', ...
'ButtonPushedFcn', @(btn,event) record_audio(fig));
% 创建参考音频选择按钮
uilabel(fig, 'Position', [400 680 300 20], 'Text', '参考音频来源:', 'FontWeight', 'bold');
uibutton(fig, 'Position', [400 630 150 30], 'Text', '导入参考音频', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'reference'));
% 创建处理按钮
process_btn = uibutton(fig, 'Position', [600 630 150 30], ...
'Text', '开始矫正', 'Enable', 'off', ...
'ButtonPushedFcn', @(btn,event) process_audio(fig));
% 创建播放和保存按钮
uibutton(fig, 'Position', [50 580 150 30], 'Text', '播放原始音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'source'));
uibutton(fig, 'Position', [250 580 150 30], 'Text', '播放矫正音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'corrected'));
uibutton(fig, 'Position', [450 580 150 30], 'Text', '保存矫正音频', ...
'ButtonPushedFcn', @(btn,event) save_audio(fig));
% 创建录音状态显示
recording_label = uilabel(fig, 'Position', [650 580 200 30], ...
'Text', '准备录音', 'FontColor', [0 0.5 0]);
% 创建波形显示区域
ax_source = uiaxes(fig, 'Position', [50 350 800 150]);
title(ax_source, '待矫正音频波形');
ax_reference = uiaxes(fig, 'Position', [50 180 800 150]);
title(ax_reference, '参考音频波形');
ax_corrected = uiaxes(fig, 'Position', [50 10 800 150]);
title(ax_corrected, '矫正后音频波形');
% 存储数据
fig.UserData.source_audio = [];
fig.UserData.reference_audio = [];
fig.UserData.corrected_audio = [];
fig.UserData.fs = 44100; % 默认采样率
fig.UserData.process_btn = process_btn;
fig.UserData.axes = struct('source', ax_source, 'reference', ax_reference, 'corrected', ax_corrected);
fig.UserData.recording_label = recording_label;
fig.UserData.recorder = []; % 录音器对象
fig.UserData.timer = []; % 计时器对象
end
function select_audio(fig, audio_type)
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg;*.flac', ...
'音频文件 (*.wav,*.mp3,*.ogg,*.flac)'});
if isequal(file, 0)
return;
end
filename = fullfile(path, file);
[audio, fs] = audioread(filename);
% 处理立体声:转换为单声道
if size(audio, 2) > 1
audio = mean(audio, 2);
end
% 截取前20秒
max_samples = min(20*fs, length(audio));
audio = audio(1:max_samples);
% 存储数据
fig.UserData.([audio_type '_audio']) = audio;
fig.UserData.fs = fs;
% 更新波形显示
ax = fig.UserData.axes.(audio_type);
plot(ax, (1:length(audio))/fs, audio);
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.source_audio) && ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function record_audio(fig)
% 创建录音界面
record_fig = uifigure('Name', '音频录制', 'Position', [300 300 400 200]);
% 录音时长设置
uilabel(record_fig, 'Position', [50 150 100 20], 'Text', '录音时长 (秒):');
duration_edit = uieditfield(record_fig, 'numeric', ...
'Position', [160 150 100 20], 'Value', 5, 'Limits', [1 30]);
% 采样率设置
uilabel(record_fig, 'Position', [50 120 100 20], 'Text', '采样率:');
fs_dropdown = uidropdown(record_fig, ...
'Position', [160 120 100 20], ...
'Items', {'8000', '16000', '44100', '48000'}, ...
'Value', '44100');
% 控制按钮
record_btn = uibutton(record_fig, 'Position', [50 70 100 30], ...
'Text', '开始录音', ...
'ButtonPushedFcn', @(btn,event) start_recording(fig, duration_edit.Value, str2double(fs_dropdown.Value)));
uibutton(record_fig, 'Position', [160 70 100 30], ...
'Text', '停止录音', ...
'ButtonPushedFcn', @(btn,event) stop_recording(fig));
uibutton(record_fig, 'Position', [270 70 100 30], ...
'Text', '关闭', ...
'ButtonPushedFcn', @(btn,event) close(record_fig));
end
function start_recording(fig, duration, fs)
% 更新状态
fig.UserData.recording_label.Text = '录音中...';
fig.UserData.recording_label.FontColor = [1 0 0];
drawnow;
% 创建录音器对象
recorder = audiorecorder(fs, 16, 1); % 16-bit, 单声道
% 设置录音时长
fig.UserData.recorder = recorder;
fig.UserData.fs = fs;
% 开始录音
record(recorder, duration);
% 创建计时器显示剩余时间
t = timer('ExecutionMode', 'fixedRate', 'Period', 1, ...
'TasksToExecute', duration, ...
'TimerFcn', @(t,~) update_recording_timer(fig, t, duration));
start(t);
% 存储计时器
fig.UserData.timer = t;
end
function update_recording_timer(fig, t, total_duration)
elapsed = t.TasksExecuted;
remaining = total_duration - elapsed;
fig.UserData.recording_label.Text = sprintf('录音中: %d秒', remaining);
% 录音结束时自动停止
if remaining <= 0
stop_recording(fig);
end
end
function stop_recording(fig)
if ~isempty(fig.UserData.recorder) && isrecording(fig.UserData.recorder)
stop(fig.UserData.recorder);
end
% 停止计时器
if ~isempty(fig.UserData.timer) && isvalid(fig.UserData.timer)
stop(fig.UserData.timer);
delete(fig.UserData.timer);
fig.UserData.timer = [];
end
% 获取录音数据
audio = getaudiodata(fig.UserData.recorder);
fs = fig.UserData.fs;
% 更新状态
fig.UserData.recording_label.Text = '录音完成!';
fig.UserData.recording_label.FontColor = [0 0.5 0];
% 存储为待矫正音频
fig.UserData.source_audio = audio;
% 更新波形显示
ax = fig.UserData.axes.source;
plot(ax, (1:length(audio))/fs, audio);
title(ax, '录制音频波形');
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function process_audio(fig)
source = fig.UserData.source_audio;
reference = fig.UserData.reference_audio;
fs = fig.UserData.fs;
% 确保主图窗存在
if ~isvalid(fig)
errordlg('主窗口已关闭,无法处理音频!', '处理错误');
return;
end
% 创建处理进度对话框
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音频对齐...', 'Indeterminate', 'on');
% 步骤1:音频对齐
try
[aligned_source, aligned_ref] = improved_align_audio(source, reference, fs);
catch ME
close(h);
errordlg(['音频对齐失败: ' ME.message], '处理错误');
return;
end
% 步骤2:基频提取
h.Message = '提取音高...';
try
[f0_source, time_source] = extract_pitch(aligned_source, fs);
[f0_ref, time_ref] = extract_pitch(aligned_ref, fs);
catch ME
close(h);
errordlg(['音高提取失败: ' ME.message], '处理错误');
return;
end
% 步骤3:音调矫正
h.Message = '矫正音调...';
try
corrected = correct_pitch(fig, aligned_source, fs, f0_source, f0_ref, time_source, time_ref);
catch ME
close(h);
errordlg(['音高校正失败: ' ME.message], '处理错误');
return;
end
% 关闭进度对话框
close(h);
% 保存结果并更新显示
fig.UserData.corrected_audio = corrected;
plot(fig.UserData.axes.corrected, (1:length(corrected))/fs, corrected);
xlabel(fig.UserData.axes.corrected, '时间 (s)');
ylabel(fig.UserData.axes.corrected, '幅度');
% 绘制音高对比图
pitch_fig = figure('Name', '音高对比', 'Position', [100 100 800 600]);
subplot(211);
plot(time_source, f0_source, 'b', time_ref, f0_ref, 'r');
legend('原始音高', '参考音高');
title('矫正前音高对比');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
subplot(212);
plot(time_source, f0_source, 'b:', time_source, f0_ref, 'r-');
legend('原始音高', '目标音高');
title('矫正前后音高对比');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
end
function [aligned_src, aligned_ref] = improved_align_audio(src, ref, fs)
% 改进的音频对齐方法:使用频谱互相关
win_size = round(0.1 * fs); % 100ms窗口
hop_size = round(0.05 * fs); % 50ms跳跃
% 计算源音频的频谱图
[S_src, f_src, t_src] = spectrogram(src, win_size, win_size-hop_size, win_size, fs);
% 计算参考音频的频谱图
[S_ref, f_ref, t_ref] = spectrogram(ref, win_size, win_size-hop_size, win_size, fs);
% 计算互相关
n_frames = min(length(t_src), length(t_ref));
corr_vals = zeros(1, n_frames);
for i = 1:n_frames
spec_src = abs(S_src(:, i));
spec_ref = abs(S_ref(:, i));
corr_vals(i) = dot(spec_src, spec_ref) / (norm(spec_src) * norm(spec_ref));
end
% 找到最大相关帧
[~, max_idx] = max(corr_vals);
time_diff = t_src(max_idx) - t_ref(max_idx);
sample_diff = round(time_diff * fs);
% 对齐音频
if sample_diff > 0
aligned_src = src(1:end-sample_diff);
aligned_ref = ref(sample_diff+1:end);
else
aligned_src = src(-sample_diff+1:end);
aligned_ref = ref(1:end+sample_diff);
end
% 确保等长
min_len = min(length(aligned_src), length(aligned_ref));
aligned_src = aligned_src(1:min_len);
aligned_ref = aligned_ref(1:min_len);
end
function [f0, time] = extract_pitch(audio, fs)
% 使用pitch函数提取基频(需要Audio Toolbox)
frame_size = round(0.05 * fs); % 50ms帧
overlap = round(0.75 * frame_size); % 75%重叠
[f0, time] = pitch(audio, fs, ...
'WindowLength', frame_size, ...
'OverlapLength', overlap, ...
'Range', [50, 1000], ... % 50Hz-1000Hz范围
'Method', 'NCF'); % 归一化相关函数方法
end
function corrected = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref)
% 创建进度条(确保使用有效的父图窗)
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音高校正...');
frame_len = round(0.05 * fs); % 50ms帧长
hop_size = round(0.25 * frame_len); % 25%跳跃
n_frames = floor((length(audio)-frame_len)/hop_size) + 1;
corrected = zeros(size(audio));
for i = 1:n_frames
% 计算当前帧位置
start_idx = (i-1)*hop_size + 1;
end_idx = start_idx + frame_len - 1;
frame = audio(start_idx:end_idx);
% 查找当前帧对应的目标音高
t_frame = mean([start_idx, end_idx]) / fs;
[~, idx_src] = min(abs(time_src - t_frame));
[~, idx_ref] = min(abs(time_ref - t_frame));
if idx_ref <= length(f0_ref) && idx_src <= length(f0_src) && f0_src(idx_src) > 0
target_ratio = f0_ref(idx_ref) / f0_src(idx_src);
% 限制比例范围 (0.5-2.0)
target_ratio = max(0.5, min(2.0, target_ratio));
else
target_ratio = 1.0; % 无法获取有效音高时不调整
end
% 使用相位声码器改变音高
corrected_frame = phase_vocoder(frame, target_ratio, fs);
% 重叠相加
frame_end_idx = start_idx + length(corrected_frame) - 1;
if frame_end_idx <= length(corrected)
corrected(start_idx:frame_end_idx) = ...
corrected(start_idx:frame_end_idx) + corrected_frame;
end
% 更新进度条
h.Value = i/n_frames;
h.Message = sprintf('处理进度: %d/%d 帧 (%.1f%%)', i, n_frames, i/n_frames*100);
end
close(h);
% 归一化防止削波
max_amp = max(abs(corrected));
if max_amp > 0
corrected = corrected / max_amp;
end
end
function y = phase_vocoder(x, ratio, fs)
% 简化的相位声码器实现
n = 2048; % FFT点数
hop_in = round(n/4);
hop_out = round(hop_in * ratio);
% 初始化
w = hann(n, 'periodic');
X = stft(x, 'Window', w, 'OverlapLength', n-hop_in, 'FFTLength', n);
% 相位处理
Y = phase_vocoder_process(X, hop_in, hop_out);
% 重建信号
y = istft(Y, 'Window', w, 'OverlapLength', n-hop_out, 'FFTLength', n, ...
'ConjugateSymmetric', true);
end
function Y = phase_vocoder_process(X, hop_in, hop_out)
% 相位声码器核心处理
Y = zeros(size(X));
if isempty(X), return; end
phase_adv = angle(X(:,1));
for i = 1:size(X,2)
mag = abs(X(:,i));
phase = angle(X(:,i));
% 计算相位增量
delta_phase = phase - phase_adv;
phase_adv = phase;
% 计算瞬时频率
inst_freq = delta_phase / hop_in;
% 调整相位
adjusted_phase = phase_adv + inst_freq * hop_out;
% 合成新帧
Y(:,i) = mag .* exp(1j * adjusted_phase);
end
end
function play_audio(fig, audio_type)
if ~isvalid(fig), return; end
switch audio_type
case 'source'
audio = fig.UserData.source_audio;
title_text = '播放原始音频';
case 'corrected'
audio = fig.UserData.corrected_audio;
title_text = '播放矫正音频';
otherwise
return;
end
if isempty(audio)
errordlg('未找到音频数据!', '播放错误');
return;
end
fs = fig.UserData.fs;
player = audioplayer(audio, fs);
play(player);
% 创建播放状态显示
play_fig = uifigure('Name', title_text, 'Position', [500 500 300 100]);
uilabel(play_fig, 'Position', [50 50 200 30], 'Text', '正在播放音频...', ...
'FontSize', 16, 'HorizontalAlignment', 'center');
% 播放结束后关闭窗口
player.TimerFcn = {@stop_playback, play_fig};
player.StopFcn = {@stop_playback, play_fig};
end
function stop_playback(~, ~, fig)
if isvalid(fig)
close(fig);
end
end
function save_audio(fig)
if ~isvalid(fig) || isempty(fig.UserData.corrected_audio)
errordlg('无有效音频数据可保存!', '保存错误');
return;
end
[file, path] = uiputfile('*.wav', '保存矫正音频');
if isequal(file, 0), return; end
audiowrite(fullfile(path, file), fig.UserData.corrected_audio, fig.UserData.fs);
msgbox('音频保存成功!', '完成');
end
这是完整代码,我希望增加调整前后以及与参考音高的对比综合图,并且,我希望在每一帧能够根据提供的标准音频的音高,将待转化音频的音高尽量调整到与标准音频一样的高度,从而实现校准‘’