function audio_pitch_correction_t3
% 创建主GUI界面
fig = uifigure('Name', '音频音准矫正系统', 'Position', [100 100 900 700]);
% 创建音频选择区域
uilabel(fig, 'Position', [50 680 300 20], 'Text', '待矫正音频来源:', 'FontWeight', 'bold');
% 创建录音选项按钮组
source_btn_group = uibuttongroup(fig, 'Position', [50 630 300 40], 'Title', '');
uibutton(source_btn_group, 'Position', [10 10 130 30], 'Text', '导入音频文件', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'source'));
uibutton(source_btn_group, 'Position', [160 10 130 30], 'Text', '录制音频', ...
'ButtonPushedFcn', @(btn,event) record_audio(fig));
% 创建参考音频选择按钮
uilabel(fig, 'Position', [400 680 300 20], 'Text', '参考音频来源:', 'FontWeight', 'bold');
uibutton(fig, 'Position', [400 630 150 30], 'Text', '导入参考音频', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'reference'));
% 创建处理按钮
process_btn = uibutton(fig, 'Position', [600 630 150 30], ...
'Text', '开始矫正', 'Enable', 'off', ...
'ButtonPushedFcn', @(btn,event) process_audio(fig));
% === 关键修复: 播放按钮创建 ===
% 创建播放和保存按钮 (添加Tag以便查找)
uibutton(fig, 'Position', [50 580 150 30], 'Text', '播放原始音频', ...
'Tag', 'play_source_btn', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'source'));
% 创建播放矫正音频按钮并存储引用
play_corrected_btn = uibutton(fig, 'Position', [250 580 150 30], ...
'Text', '播放矫正音频', ...
'Tag', 'play_corrected_btn', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'corrected'));
play_corrected_btn.Enable = 'off'; % 初始禁用
% 存储按钮引用到UserData
fig.UserData.play_corrected_btn = play_corrected_btn;
uibutton(fig, 'Position', [450 580 150 30], 'Text', '保存矫正音频', ...
'ButtonPushedFcn', @(btn,event) save_audio(fig));
% 创建录音状态显示
recording_label = uilabel(fig, 'Position', [650 580 200 30], ...
'Text', '准备录音', 'FontColor', [0 0.5 0]);
% 创建波形显示区域
ax_source = uiaxes(fig, 'Position', [50 350 800 150]);
title(ax_source, '待矫正音频波形');
ax_reference = uiaxes(fig, 'Position', [50 180 800 150]);
title(ax_reference, '参考音频波形');
ax_corrected = uiaxes(fig, 'Position', [50 10 800 150]);
title(ax_corrected, '矫正后音频波形');
% 存储数据
fig.UserData.source_audio = [];
fig.UserData.reference_audio = [];
fig.UserData.corrected_audio = [];
fig.UserData.fs = 44100; % 默认采样率
fig.UserData.process_btn = process_btn;
fig.UserData.axes = struct('source', ax_source, 'reference', ax_reference, 'corrected', ax_corrected);
fig.UserData.recording_label = recording_label;
fig.UserData.recorder = []; % 录音器对象
fig.UserData.timer = []; % 计时器对象
end
function select_audio(fig, audio_type)
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg;*.flac;*.mat', ...
'音频文件 (*.wav,*.mp3,*.ogg,*.flac,*.mat)'});
if isequal(file, 0), return; end
filename = fullfile(path, file);
[~, ~, ext] = fileparts(filename);
if strcmpi(ext, '.mat')
% 加载MAT文件
data = load(filename);
% 检查必需字段
if isfield(data, 'corrected_audio') && isfield(data, 'f0_corrected') && ...
isfield(data, 'time_source') && isfield(data, 'fs')
% 存储数据
fig.UserData.corrected_audio = data.corrected_audio;
fig.UserData.f0_corrected = data.f0_corrected;
fig.UserData.time_source = data.time_source;
fig.UserData.fs = data.fs;
% 更新波形显示
ax = fig.UserData.axes.corrected;
cla(ax);
yyaxis(ax, 'left');
plot(ax, (1:length(data.corrected_audio))/data.fs, data.corrected_audio);
ylabel(ax, '幅度');
yyaxis(ax, 'right');
plot(ax, data.time_source, data.f0_corrected, 'Color', [1 0.5 0], 'LineWidth', 2);
ylabel(ax, '频率 (Hz)');
title(ax, '矫正后音频波形与音高');
grid(ax, 'on');
return;
else
errordlg('MAT文件缺少必需的音高数据字段!', '加载错误');
return;
end
end
% 常规音频文件处理
[audio, fs] = audioread(filename);
% 处理立体声:转换为单声道
if size(audio, 2) > 1
audio = mean(audio, 2);
end
% 截取前20秒
max_samples = min(20*fs, length(audio));
audio = audio(1:max_samples);
% 存储数据
fig.UserData.([audio_type '_audio']) = audio;
fig.UserData.fs = fs;
% 更新波形显示
ax = fig.UserData.axes.(audio_type);
cla(ax);
plot(ax, (1:length(audio))/fs, audio);
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 如果是矫正音频,尝试读取元数据中的音高信息
if strcmp(audio_type, 'corrected')
try
info = audioinfo(filename);
if isfield(info, 'Comment') && ~isempty(info.Comment)
metadata = jsondecode(info.Comment);
if isfield(metadata, 'f0_corrected')
fig.UserData.f0_corrected = metadata.f0_corrected;
fig.UserData.time_source = metadata.time_source;
% 添加音高曲线
yyaxis(ax, 'left');
plot(ax, (1:length(audio))/fs, audio);
ylabel(ax, '幅度');
yyaxis(ax, 'right');
plot(ax, metadata.time_source, metadata.f0_corrected, 'r', 'LineWidth', 1.5);
ylabel(ax, '频率 (Hz)');
title(ax, '矫正后音频波形与音高');
grid(ax, 'on');
end
end
catch
% 忽略元数据读取错误
end
end
% 启用处理按钮
if ~isempty(fig.UserData.source_audio) && ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function record_audio(fig)
% 创建录音界面
record_fig = uifigure('Name', '音频录制', 'Position', [300 300 400 200]);
% 录音时长设置
uilabel(record_fig, 'Position', [50 150 100 20], 'Text', '录音时长 (秒):');
duration_edit = uieditfield(record_fig, 'numeric', ...
'Position', [160 150 100 20], 'Value', 5, 'Limits', [1 30]);
% 采样率设置
uilabel(record_fig, 'Position', [50 120 100 20], 'Text', '采样率:');
fs_dropdown = uidropdown(record_fig, ...
'Position', [160 120 100 20], ...
'Items', {'8000', '16000', '44100', '48000'}, ...
'Value', '44100');
% 控制按钮
record_btn = uibutton(record_fig, 'Position', [50 70 100 30], ...
'Text', '开始录音', ...
'ButtonPushedFcn', @(btn,event) start_recording(fig, duration_edit.Value, str2double(fs_dropdown.Value)));
uibutton(record_fig, 'Position', [160 70 100 30], ...
'Text', '停止录音', ...
'ButtonPushedFcn', @(btn,event) stop_recording(fig));
uibutton(record_fig, 'Position', [270 70 100 30], ...
'Text', '关闭', ...
'ButtonPushedFcn', @(btn,event) close(record_fig));
end
function start_recording(fig, duration, fs)
% 更新状态
fig.UserData.recording_label.Text = '录音中...';
fig.UserData.recording_label.FontColor = [1 0 0];
drawnow;
% 创建录音器对象
recorder = audiorecorder(fs, 16, 1); % 16-bit, 单声道
% 设置录音时长
fig.UserData.recorder = recorder;
fig.UserData.fs = fs;
% 开始录音
record(recorder, duration);
% 创建计时器显示剩余时间
t = timer('ExecutionMode', 'fixedRate', 'Period', 1, ...
'TasksToExecute', duration, ...
'TimerFcn', @(t,~) update_recording_timer(fig, t, duration));
start(t);
% 存储计时器
fig.UserData.timer = t;
end
function update_recording_timer(fig, t, total_duration)
elapsed = t.TasksExecuted;
remaining = total_duration - elapsed;
fig.UserData.recording_label.Text = sprintf('录音中: %d秒', remaining);
% 录音结束时自动停止
if remaining <= 0
stop_recording(fig);
end
end
function stop_recording(fig)
if ~isempty(fig.UserData.recorder) && isrecording(fig.UserData.recorder)
stop(fig.UserData.recorder);
end
% 停止计时器
if ~isempty(fig.UserData.timer) && isvalid(fig.UserData.timer)
stop(fig.UserData.timer);
delete(fig.UserData.timer);
fig.UserData.timer = [];
end
% 获取录音数据
audio = getaudiodata(fig.UserData.recorder);
fs = fig.UserData.fs;
% 更新状态
fig.UserData.recording_label.Text = '录音完成!';
fig.UserData.recording_label.FontColor = [0 0.5 0];
% 存储为待矫正音频
fig.UserData.source_audio = audio;
% 更新波形显示
ax = fig.UserData.axes.source;
plot(ax, (1:length(audio))/fs, audio);
title(ax, '录制音频波形');
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function process_audio(fig)
source = fig.UserData.source_audio;
reference = fig.UserData.reference_audio;
fs = fig.UserData.fs;
% 确保主图窗存在
if ~isvalid(fig)
errordlg('主窗口已关闭,无法处理音频!', '处理错误');
return;
end
% 创建处理进度对话框
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音频对齐...', 'Indeterminate', 'on');
% === 关键修复1: 统一帧参数 ===
frame_len = round(0.05 * fs); % 50ms帧长
hop_size = round(0.025 * fs); % 25ms跳跃
% 步骤1:音频对齐
try
[aligned_source, aligned_ref] = improved_align_audio(source, reference, fs);
catch ME
close(h);
errordlg(['音频对齐失败: ' ME.message], '处理错误');
return;
end
% 步骤2:基频提取(使用统一帧参数)
h.Message = '提取音高...';
try
[f0_source, time_source] = extract_pitch(aligned_source, fs, frame_len, hop_size);
[f0_ref, time_ref] = extract_pitch(aligned_ref, fs, frame_len, hop_size);
% === 关键修复2: 确保时间序列长度一致 ===
min_time_len = min(length(time_source), length(time_ref));
time_source = time_source(1:min_time_len);
time_ref = time_ref(1:min_time_len);
f0_source = f0_source(1:min_time_len);
f0_ref = f0_ref(1:min_time_len);
catch ME
close(h);
errordlg(['音高提取失败: ' ME.message], '处理错误');
return;
end
% 步骤3:音调矫正
h.Message = '矫正音调...';
try
[corrected, f0_corrected] = correct_pitch(fig, aligned_source, fs, f0_source, f0_ref, time_source, time_ref);
catch ME
close(h);
errordlg(['音高校正失败: ' ME.message], '处理错误');
return;
end
% 关闭进度对话框
close(h);
% 存储矫正结果
fig.UserData.corrected_audio = corrected;
% 更新播放按钮状态
play_btn = findobj(fig, 'Tag', 'play_corrected_btn');
if ~isempty(play_btn)
play_btn.Enable = 'on';
end
% 更新原始音频波形图
ax_src = fig.UserData.axes.source;
cla(ax_src);
yyaxis(ax_src, 'left');
plot(ax_src, (0:length(aligned_source)-1)/fs, aligned_source, 'b');
ylabel(ax_src, '幅度');
yyaxis(ax_src, 'right');
plot(ax_src, time_source, f0_source, 'r', 'LineWidth', 1.5);
ylabel(ax_src, '频率 (Hz)');
title(ax_src, '原始音频波形与音高');
grid(ax_src, 'on');
% 更新参考音频波形图
ax_ref = fig.UserData.axes.reference;
cla(ax_ref);
yyaxis(ax_ref, 'left');
plot(ax_ref, (0:length(aligned_ref)-1)/fs, aligned_ref, 'g');
ylabel(ax_ref, '幅度');
yyaxis(ax_ref, 'right');
plot(ax_ref, time_ref, f0_ref, 'm', 'LineWidth', 1.5);
ylabel(ax_ref, '频率 (Hz)');
title(ax_ref, '参考音频波形与音高');
grid(ax_ref, 'on');
% === 关键修复: 更新矫正后音频波形图 ===
ax_corr = fig.UserData.axes.corrected;
cla(ax_corr);
time_corr = (0:length(corrected)-1)/fs; % 从0开始的时间轴
yyaxis(ax_corr, 'left');
plot(ax_corr, time_corr, corrected, 'Color', [0.5 0 0.5]);
ylabel(ax_corr, '幅度');
yyaxis(ax_corr, 'right');
% 确保音高数据长度匹配
if length(time_source) == length(f0_corrected)
plot(ax_corr, time_source, f0_corrected, 'Color', [1 0.5 0], 'LineWidth', 2);
else
min_len = min(length(time_source), length(f0_corrected));
plot(ax_corr, time_source(1:min_len), f0_corrected(1:min_len), ...
'Color', [1 0.5 0], 'LineWidth', 2);
end
ylabel(ax_corr, '频率 (Hz)');
title(ax_corr, '矫正后音频波形与音高');
grid(ax_corr, 'on');
% 存储所有关键数据
fig.UserData.f0_corrected = f0_corrected;
fig.UserData.time_source = time_source;
fig.UserData.original_fs = fs;
% 绘制综合音高对比图
plot_pitch_comparison(time_source, f0_source, time_ref, f0_ref, f0_corrected,...
aligned_source, aligned_ref, corrected, fs);
end
function [aligned_src, aligned_ref] = improved_align_audio(src, ref, fs)
% 改进的音频对齐方法:使用频谱互相关
win_size = round(0.1 * fs); % 100ms窗口
hop_size = round(0.05 * fs); % 50ms跳跃
% 计算源音频的频谱图
[S_src, ~, t_src] = spectrogram(src, win_size, win_size-hop_size, win_size, fs);
% 计算参考音频的频谱图
[S_ref, ~, t_ref] = spectrogram(ref, win_size, win_size-hop_size, win_size, fs);
% 计算互相关
n_frames = min(length(t_src), length(t_ref));
corr_vals = zeros(1, n_frames);
for i = 1:n_frames
spec_src = abs(S_src(:, i));
spec_ref = abs(S_ref(:, i));
corr_vals(i) = dot(spec_src, spec_ref) / (norm(spec_src) * norm(spec_ref));
end
% 找到最大相关帧
[~, max_idx] = max(corr_vals);
time_diff = t_src(max_idx) - t_ref(max_idx);
sample_diff = round(time_diff * fs);
% 对齐音频
if sample_diff > 0
aligned_src = src(1:end-sample_diff);
aligned_ref = ref(sample_diff+1:end);
else
aligned_src = src(-sample_diff+1:end);
aligned_ref = ref(1:end+sample_diff);
end
% 确保等长
min_len = min(length(aligned_src), length(aligned_ref));
aligned_src = aligned_src(1:min_len);
aligned_ref = aligned_ref(1:min_len);
end
function mfcc = mfcc_feature(audio, fs, frame_size, hop_size)
% 参数验证
if nargin < 4
hop_size = round(frame_size/2); % 默认50%重叠
end
% 预处理:预加重
audio = filter([1 -0.97], 1, audio);
% 分帧处理
frames = buffer(audio, frame_size, frame_size - hop_size, 'nodelay');
num_frames = size(frames, 2);
% 加窗(汉明窗)
window = hamming(frame_size);
windowed_frames = frames .* repmat(window, 1, num_frames);
% 计算功率谱
nfft = 2^nextpow2(frame_size);
mag_frames = abs(fft(windowed_frames, nfft));
power_frames = (mag_frames(1:nfft/2+1, :)).^2;
% 设计梅尔滤波器组
num_filters = 26; % 滤波器数量
mel_min = 0; % 最小Mel频率
mel_max = 2595 * log10(1 + (fs/2)/700); % 最大Mel频率
% 创建等间隔的Mel频率点
mel_points = linspace(mel_min, mel_max, num_filters + 2);
% 将Mel频率转换为线性频率
hz_points = 700 * (10.^(mel_points/2595) - 1);
% 转换为FFT bin索引
bin_indices = floor((nfft+1) * hz_points / fs);
% 创建梅尔滤波器组
filter_bank = zeros(num_filters, nfft/2+1);
for m = 2:num_filters+1
left = bin_indices(m-1);
center = bin_indices(m);
right = bin_indices(m+1);
% 左侧斜坡
for k = left:center-1
filter_bank(m-1, k+1) = (k - left) / (center - left);
end
% 右侧斜坡
for k = center:right
filter_bank(m-1, k+1) = (right - k) / (right - center);
end
end
% 应用梅尔滤波器组
mel_spectrum = filter_bank * power_frames;
% 取对数
log_mel = log(mel_spectrum + eps);
% 计算DCT得到MFCC系数
mfcc = dct(log_mel);
% 保留前13个系数(含能量系数)
mfcc = mfcc(1:13, :);
% 可选:添加能量特征
energy = log(sum(power_frames) + eps);
mfcc(1, :) = energy; % 替换第0阶MFCC为对数能量
% 应用倒谱均值归一化 (CMN)
mfcc = mfcc - mean(mfcc, 2);
end
function [f0, time] = extract_pitch(audio, fs, frame_len, hop_size)
% 参数验证与默认值设置
if nargin < 3
frame_len = round(0.05 * fs); % 默认50ms帧长
end
if nargin < 4
hop_size = round(0.025 * fs); % 默认25ms跳跃
end
% === 关键修复: 确保所有尺寸参数为整数 ===
frame_len = round(frame_len);
hop_size = round(hop_size);
% 安全计算帧数
n_frames = floor((length(audio) - frame_len) / hop_size) + 1;
% 基频提取参数
f0_min = 80; % 最低基频(Hz)
f0_max = 1000; % 最高基频(Hz)
tau_min = max(1, round(fs/f0_max)); % 确保至少为1
tau_max = max(tau_min+1, round(fs/f0_min)); % 确保大于tau_min
% 预分配输出
f0 = zeros(1, n_frames);
time = zeros(1, n_frames);
% 预处理:带通滤波去除噪声
if fs > 2000 % 确保采样率足够高
[b, a] = butter(4, [80, 1000]/(fs/2), 'bandpass');
audio = filtfilt(b, a, audio);
end
for i = 1:n_frames
start_idx = (i-1)*hop_size + 1;
end_idx = min(start_idx + frame_len - 1, length(audio));
frame = audio(start_idx:end_idx);
% 确保帧长度有效
if length(frame) < 10
f0(i) = 0;
time(i) = (start_idx + frame_len/2) / fs;
continue;
end
% === 修复1: 确保tau在有效范围内 ===
tau_min_valid = max(1, tau_min);
tau_max_valid = min(tau_max, length(frame)-1);
if tau_max_valid <= tau_min_valid
f0(i) = 0;
time(i) = (start_idx + frame_len/2) / fs;
continue;
end
% === 改进的YIN算法 ===
% 自相关函数计算
autocorr = xcorr(frame, 'biased');
autocorr = autocorr(length(frame):end); % 取正延迟部分
% 差分函数计算
diff = zeros(1, tau_max_valid);
for tau = tau_min_valid:tau_max_valid
diff(tau) = autocorr(1) - 2*autocorr(tau+1) + autocorr(1);
end
% 累积均值归一化 (CMND)
cmnd = diff;
cmnd(1) = 1;
sum_val = diff(1);
for tau = 2:tau_max_valid
sum_val = sum_val + diff(tau);
cmnd(tau) = diff(tau) * tau / sum_val;
end
% 寻找全局最小值
[min_val, min_idx] = min(cmnd(tau_min_valid:tau_max_valid));
tau_int = min_idx + tau_min_valid - 1;
% 抛物线插值精炼
if tau_int > 1 && tau_int < tau_max_valid
if cmnd(tau_int-1) < cmnd(tau_int+1)
num = cmnd(tau_int-1) - cmnd(tau_int);
denom = cmnd(tau_int-1) - 2*cmnd(tau_int) + cmnd(tau_int+1);
delta = num / denom;
else
num = cmnd(tau_int+1) - cmnd(tau_int);
denom = cmnd(tau_int-1) - 2*cmnd(tau_int) + cmnd(tau_int+1);
delta = -num / denom;
end
tau_true = tau_int + delta;
f0(i) = fs / tau_true;
else
f0(i) = fs / tau_int;
end
time(i) = (start_idx + frame_len/2) / fs;
end
% === 增强的平滑处理 ===
% 1. 异常值检测和替换
med_f0 = medfilt1(f0, 5); % 5点中值滤波
diff_ratio = abs(f0 - med_f0) ./ med_f0;
outlier_idx = diff_ratio > 0.2; % 20%偏差视为异常
% 2. 三次样条插值替代线性插值
f0(outlier_idx) = NaN;
valid_idx = find(~isnan(f0));
if length(valid_idx) > 3
f0 = interp1(time(valid_idx), f0(valid_idx), time, 'spline');
else
f0 = fillmissing(f0, 'linear');
end
% 3. 自适应高斯平滑
win_size = min(15, floor(length(f0)/10)); % 动态窗口大小
win_size = max(round(win_size), 3); % 最小窗口大小为3
if win_size > 2
% 确保窗口大小为奇数
if mod(win_size, 2) == 0
win_size = win_size + 1;
end
gauss_filter = gausswin(win_size);
gauss_filter = gauss_filter / sum(gauss_filter);
f0 = conv(f0, gauss_filter, 'same');
end
% 确保输出长度
if length(f0) > n_frames
f0 = f0(1:n_frames);
time = time(1:n_frames);
end
end
function [corrected, f0_corrected] = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref)
% === 关键修复1: 统一帧参数 ===
frame_len = max(256, round(0.05 * fs)); % 最小帧长256
hop_size = max(64, round(0.025 * fs)); % 最小跳跃64
% === 关键修复2: 安全计算帧数 ===
n_frames_src = length(f0_src);
n_frames_audio = max(1, floor((length(audio)-frame_len)/hop_size) + 1); % 确保至少1帧
n_frames = min(n_frames_src, n_frames_audio);
% 预分配输出
corrected = zeros(ceil(length(audio)*1.2), 1);
f0_corrected = zeros(1, n_frames);
% 创建参考音高插值函数
valid_ref = f0_ref > 50;
if sum(valid_ref) > 3
smooth_f0_ref = smooth_pitch(f0_ref, 7);
ref_interp = @(t) interp1(time_ref(valid_ref), smooth_f0_ref(valid_ref), t, 'pchip', 'extrap');
else
ref_interp = @(t) 0;
end
% 创建相位声码器实例
pv = PhaseVocoder(fs, frame_len, hop_size);
for i = 1:n_frames
% === 安全帧提取 ===
start_idx = max(1, (i-1)*hop_size + 1); % 确保起始索引>=1
end_idx = min(start_idx + frame_len - 1, length(audio)); % 确保结束索引<=音频长度
% 安全提取帧数据
frame = audio(start_idx:end_idx);
% 补零确保帧长度一致
if length(frame) < frame_len
frame = [frame; zeros(frame_len - length(frame), 1)];
end
% === 安全获取基频值 ===
if i <= length(f0_src)
src_f0 = f0_src(i);
else
src_f0 = f0_src(end);
end
t_frame = (start_idx + frame_len/2) / fs;
target_f0 = ref_interp(t_frame);
% === 智能矫正逻辑 ===
if ~isnan(src_f0) && src_f0 > 50 && target_f0 > 50
% 计算半音差
semitone_diff = 12 * log2(target_f0 / src_f0);
% 动态范围限制
max_diff = min(24, 6 + 0.2*abs(semitone_diff));
semitone_diff = max(-max_diff, min(max_diff, semitone_diff));
% 使用相位声码器处理
corrected_frame = pv.process(frame, semitone_diff);
f0_corrected(i) = src_f0 * 2^(semitone_diff/12);
else
% 不进行矫正的区域
corrected_frame = frame;
f0_corrected(i) = src_f0;
end
% === 安全OLA合成 ===
output_start = max(1, (i-1)*hop_size + 1); % 确保输出起始位置>=1
output_end = output_start + length(corrected_frame) - 1;
% 扩展输出数组
if output_end > length(corrected)
corrected(output_end) = 0; % 安全扩展数组
end
% 确保窗函数有效
if isempty(corrected_frame)
win = 1;
else
win = hann(length(corrected_frame));
end
% === 安全重叠处理 ===
overlap_start = max(1, output_start - hop_size + 1);
overlap_end = min(output_end, output_start + hop_size - 1);
overlap_len = max(0, overlap_end - overlap_start + 1);
if overlap_len > 0
% 创建渐变混合窗
blend_win = linspace(0, 1, overlap_len)';
prev_win = blend_win;
curr_win = 1 - blend_win;
% 安全索引访问
if overlap_end <= length(corrected) && overlap_len <= length(corrected_frame)
corrected(overlap_start:overlap_end) = ...
corrected(overlap_start:overlap_end).*prev_win + ...
corrected_frame(1:overlap_len).*curr_win;
end
% 处理非重叠部分
if output_start < overlap_start
start_idx_new = output_start:(overlap_start-1);
if start_idx_new(end) <= length(corrected)
corrected(start_idx_new) = corrected(start_idx_new);
end
end
if output_end > overlap_end
end_idx_new = (overlap_end+1):output_end;
frame_end_idx = (overlap_len+1):length(corrected_frame);
if end_idx_new(end) <= length(corrected) && frame_end_idx(end) <= length(corrected_frame)
corrected(end_idx_new) = corrected_frame(frame_end_idx) .* ...
win(frame_end_idx);
end
end
else
% 无重叠区域
if output_end <= length(corrected)
corrected(output_start:output_end) = corrected(output_start:output_end) + ...
corrected_frame .* win;
end
end
end
% === 安全裁剪输出 ===
last_sample = find(abs(corrected) > 0.001, 1, 'last'); % 使用阈值避免数值误差
if ~isempty(last_sample)
corrected = corrected(1:last_sample);
else
corrected = zeros(1, 0);
end
% 归一化
if ~isempty(corrected)
max_val = max(abs(corrected));
if max_val > 0
corrected = corrected / max_val;
end
end
% === 确保f0_corrected长度正确 ===
if length(f0_corrected) > n_frames
f0_corrected = f0_corrected(1:n_frames);
end
end
function smoothed = smooth_pitch(pitch, window_size)
% 确保窗口大小为整数
window_size = max(round(window_size), 3); % 最小窗口大小为3
% 自适应中值-高斯混合滤波
% 步骤1:中值滤波去除尖峰
med_smoothed = medfilt1(pitch, window_size, 'omitnan', 'truncate');
% 步骤2:计算局部方差
window_size_var = max(3, round(window_size)); % 最小窗口大小为3
local_var = movvar(med_smoothed, window_size_var);
avg_var = mean(local_var, 'omitnan');
% 步骤3:自适应高斯滤波
gauss_window = max(5, window_size * 2 - 1); % 最小窗口大小为5
gauss_window = gauss_window + mod(gauss_window + 1, 2); % 确保奇数长度
gauss_filter = gausswin(gauss_window);
gauss_filter = gauss_filter / sum(gauss_filter);
% 扩展边界处理
padded = [repmat(med_smoothed(1), 1, ceil((gauss_window-1)/2)), med_smoothed, ...
repmat(med_smoothed(end), 1, floor((gauss_window-1)/2))];
% 应用卷积
conv_result = conv(padded, gauss_filter, 'same');
smoothed = conv_result(ceil((gauss_window-1)/2)+1:end-floor((gauss_window-1)/2));
% 步骤4:基于方差的平滑强度调整
high_var_idx = local_var > 2*avg_var;
if any(high_var_idx)
% 在高方差区域应用额外平滑
extra_smooth = movmean(smoothed, 5);
smoothed(high_var_idx) = extra_smooth(high_var_idx);
end
% 确保长度一致
smoothed = smoothed(1:length(pitch));
end
% 安全帧提取函数
function [frame, valid] = safe_frame(x, start, len)
if start < 1 || start+len-1 > length(x)
frame = zeros(len, 1);
if start < 1
valid_part = x(1:min(len+start-1, length(x)));
frame(1-start+1:end) = valid_part;
else
valid_part = x(start:min(start+len-1, length(x)));
frame(1:length(valid_part)) = valid_part;
end
valid = false;
else
frame = x(start:start+len-1);
valid = true;
end
end
% function [corrected, f0_corrected] = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref)
% % 创建进度条
% h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音高校正...');
%
% frame_len = round(0.05 * fs); % 50ms帧长
% hop_size = round(0.025 * fs); % 25ms跳跃
% n_frames = floor((length(audio)-frame_len)/hop_size) + 1;
% corrected = zeros(size(audio));
% f0_corrected = zeros(1, n_frames);
%
% % 创建参考音高插值函数
% valid_ref = f0_ref > 0;
% if any(valid_ref)
% ref_interp = @(t) interp1(time_ref(valid_ref), f0_ref(valid_ref), t, 'linear', 'extrap');
% else
% ref_interp = @(t) 0;
% end
%
% for i = 1:n_frames
% % 计算当前帧位置
% start_idx = (i-1)*hop_size + 1;
% end_idx = start_idx + frame_len - 1;
% frame = audio(start_idx:end_idx);
%
% % 查找当前帧对应的目标音高
% t_frame = (start_idx + frame_len/2) / fs;
% target_f0 = ref_interp(t_frame);
%
% if f0_src(i) > 0 && target_f0 > 0
% % 使用对数比例(音乐音高是几何级数)
% semitone_diff = 12 * log2(target_f0 / f0_src(i));
%
% % 限制最大校正范围(±12半音)
% semitone_diff = max(-12, min(12, semitone_diff));
%
% % 转换为频率比例
% target_ratio = 2^(semitone_diff/12);
%
% % 使用相位声码器
% corrected_frame = phase_vocoder(frame, target_ratio, fs);
%
% f0_corrected(i) = target_f0;
% else
% corrected_frame = frame;
% f0_corrected(i) = f0_src(i);
% end
%
%
% % 重叠相加
% frame_end_idx = start_idx + length(corrected_frame) - 1;
% if frame_end_idx <= length(corrected)
% corrected(start_idx:frame_end_idx) = ...
% corrected(start_idx:frame_end_idx) + corrected_frame .* hamming(length(corrected_frame));
% end
%
% % 更新进度条
% h.Value = i/n_frames;
% h.Message = sprintf('处理进度: %d/%d 帧 (%.1f%%)', i, n_frames, i/n_frames*100);
% end
%
% % === 关键修复 3: 数据格式处理 ===
% corrected = real(corrected); % 确保实数
% max_amp = max(abs(corrected));
% if max_amp > 0
% corrected = corrected / max_amp;
% else
% corrected = zeros(size(corrected)); % 处理全零情况
% end
% if ~isa(corrected, 'double')
% corrected = double(corrected);
% end
%
% % 归一化防止削波
% max_amp = max(abs(corrected));
% if max_amp > 0
% corrected = corrected / max_amp;
% end
%
% close(h);
% end
function plot_pitch_comparison(time_src, f0_src, time_ref, f0_ref, f0_corrected, src_wave, ref_wave, corr_wave, fs)
% 确保所有序列长度一致
min_length = min([length(time_src), length(time_ref), length(f0_corrected)]);
time_src = time_src(1:min_length);
f0_src = f0_src(1:min_length);
time_ref = time_ref(1:min_length);
f0_ref = f0_ref(1:min_length);
f0_corrected = f0_corrected(1:min_length);
% 创建综合音高对比图(包含波形和音高)
pitch_fig = figure('Name', '音频波形与音高分析', 'Position', [100 100 900 800]);
% === 关键修复: 原始音频波形 ===
subplot(3,1,1);
time_wave_src = (0:length(src_wave)-1)/fs; % 从0开始的时间轴
yyaxis left;
plot(time_wave_src, src_wave, 'Color', [0.7 0.7 1], 'LineWidth', 0.5);
ylabel('幅度');
ylim([-1.1 1.1]); % 固定幅度范围
yyaxis right;
plot(time_src, f0_src, 'b', 'LineWidth', 1.5);
hold on;
plot(time_ref, f0_ref, 'r--', 'LineWidth', 1.5);
hold off;
title('原始音频波形与音高');
xlabel('时间 (s)');
ylabel('频率 (Hz)');
legend('原始波形', '原始音高', '参考音高', 'Location', 'best');
grid on;
% === 关键修复: 参考音频波形 ===
subplot(3,1,2);
time_wave_ref = (0:length(ref_wave)-1)/fs; % 从0开始的时间轴
yyaxis left;
plot(time_wave_ref, ref_wave, 'Color', [1 0.7 0.7], 'LineWidth', 0.5);
ylabel('幅度');
ylim([-1.1 1.1]); % 固定幅度范围
yyaxis right;
plot(time_ref, f0_ref, 'r', 'LineWidth', 1.5);
title('参考音频波形与音高');
xlabel('时间 (s)');
ylabel('频率 (Hz)');
legend('参考波形', '参考音高', 'Location', 'best');
grid on;
% === 关键修复: 矫正后音频波形 ===
subplot(3,1,3);
time_wave_corr = (0:length(corr_wave)-1)/fs; % 从0开始的时间轴
yyaxis left;
plot(time_wave_corr, corr_wave, 'Color', [0.7 1 0.7], 'LineWidth', 0.5);
ylabel('幅度');
ylim([-1.1 1.1]); % 固定幅度范围
yyaxis right;
plot(time_src, f0_src, 'b:', 'LineWidth', 1);
hold on;
plot(time_ref, f0_ref, 'r--', 'LineWidth', 1);
plot(time_src, f0_corrected, 'g', 'LineWidth', 2);
hold off;
title('矫正后音频波形与音高');
xlabel('时间 (s)');
ylabel('频率 (Hz)');
legend('矫正波形', '原始音高', '参考音高', '矫正音高', 'Location', 'best');
grid on;
% 添加音高误差分析
valid_idx = (f0_src > 0) & (f0_ref > 0) & (f0_corrected > 0);
if any(valid_idx)
src_error = mean(abs(f0_src(valid_idx) - f0_ref(valid_idx)));
corr_error = mean(abs(f0_corrected(valid_idx) - f0_ref(valid_idx)));
annotation(pitch_fig, 'textbox', [0.15 0.05 0.7 0.05], ...
'String', sprintf('原始音高平均误差: %.2f Hz | 矫正后音高平均误差: %.2f Hz | 改进: %.1f%%', ...
src_error, corr_error, (src_error - corr_error)/src_error*100), ...
'FitBoxToText', 'on', 'BackgroundColor', [0.9 0.9 0.9], ...
'FontSize', 12, 'HorizontalAlignment', 'center');
end
end
function play_audio(fig, audio_type)
if ~isvalid(fig)
errordlg('主窗口无效!', '播放错误');
return;
end
switch audio_type
case 'source'
audio = fig.UserData.source_audio;
title_text = '播放原始音频';
if isempty(audio)
errordlg('未找到原始音频数据!', '播放错误');
return;
end
case 'corrected'
% === 关键修复: 检查矫正音频是否存在 ===
if ~isfield(fig.UserData, 'corrected_audio') || isempty(fig.UserData.corrected_audio)
errordlg('请先完成音高校正!', '播放错误');
return;
end
audio = fig.UserData.corrected_audio;
title_text = '播放矫正音频';
otherwise
return;
end
fs = fig.UserData.fs;
% === 关键修复: 确保音频数据有效 ===
if isempty(audio) || ~isreal(audio) || all(audio==0)
errordlg('音频数据无效,无法播放!', '播放错误');
return;
end
player = audioplayer(audio, fs);
% 创建播放控制界面
play_fig = uifigure('Name', title_text, 'Position', [500 500 300 150]);
% 播放进度条
ax = uiaxes(play_fig, 'Position', [50 100 200 20]);
hold(ax, 'on');
prog_line = plot(ax, [0 0], [0 1], 'b', 'LineWidth', 2); % 垂直范围[0,1]
hold(ax, 'off');
xlim(ax, [0 1]);
ylim(ax, [0 1]);
set(ax, 'XTick', [], 'YTick', []);
% 播放时间显示
time_label = uilabel(play_fig, 'Position', [50 80 200 20], ...
'Text', '00:00 / 00:00', 'HorizontalAlignment', 'center');
% 控制按钮
uibutton(play_fig, 'Position', [50 30 60 30], 'Text', '播放', ...
'ButtonPushedFcn', @(btn,event) play(player));
uibutton(play_fig, 'Position', [120 30 60 30], 'Text', '暂停', ...
'ButtonPushedFcn', @(btn,event) pause(player));
uibutton(play_fig, 'Position', [190 30 60 30], 'Text', '停止', ...
'ButtonPushedFcn', @(btn,event) stop(player));
% 总时长计算
total_time = length(audio)/fs;
mins = floor(total_time/60);
secs = round(total_time - mins*60);
total_str = sprintf('%02d:%02d', mins, secs);
% 更新播放进度回调
player.TimerFcn = {@update_playback, play_fig, time_label, total_str, prog_line, length(audio)};
player.TimerPeriod = 0.1; % 更新频率(秒)
player.StopFcn = @(src,event) stop_playback(src, event, play_fig);
end
function update_playback(player, ~, fig, time_label, total_str, prog_line, total_samples)
if strcmp(player.Running, 'on')
current_sample = player.CurrentSample;
if current_sample > 0
% 更新进度条
progress = current_sample / total_samples;
prog_line.XData = [0 progress];
% 更新时间显示
current_time = current_sample / player.SampleRate;
mins = floor(current_time/60);
secs = round(current_time - mins*60);
time_label.Text = sprintf('%02d:%02d / %s', mins, secs, total_str);
end
end
end
function stop_playback(src, ~, fig)
stop(src);
if isvalid(fig)
close(fig);
end
end
function save_audio(fig)
if ~isvalid(fig) || isempty(fig.UserData.corrected_audio)
errordlg('无有效音频数据可保存!', '保存错误');
return;
end
% 获取所有相关数据
corrected_audio = fig.UserData.corrected_audio;
f0_corrected = fig.UserData.f0_corrected;
time_source = fig.UserData.time_source;
fs = fig.UserData.original_fs;
% 创建元数据结构
metadata = struct();
metadata.f0_corrected = f0_corrected;
metadata.time_source = time_source;
metadata.fs = fs;
metadata.creation_date = datestr(now);
metadata.pitch_correction_info = 'Generated by Audio Pitch Correction System';
% 提示用户保存
[file, path] = uiputfile({'*.wav', 'WAV文件 (*.wav)'; '*.mat', 'MATLAB数据文件 (*.mat)'}, ...
'保存矫正音频和音高数据');
if isequal(file, 0), return; end
filename = fullfile(path, file);
[~, ~, ext] = fileparts(filename);
if strcmpi(ext, '.wav')
% 保存为WAV文件并嵌入元数据
audiowrite(filename, corrected_audio, fs, ...
'BitsPerSample', 24, ...
'Comment', jsonencode(metadata));
msgbox('音频和音高数据保存成功!', '完成');
elseif strcmpi(ext, '.mat')
% 保存为MAT文件
save(filename, 'corrected_audio', 'f0_corrected', 'time_source', 'fs');
msgbox('完整数据保存成功!', '完成');
end
end
还是无法播放矫正后的音频,请检查是不是数据没有传递保存或者处理有问题
最新发布