function timbre_transfer
% 创建主界面
fig = figure('Name', '高级音色转换系统 v3.2', 'Position', [50, 50, 1200, 900], ...
'NumberTitle', 'off', 'MenuBar', 'none', 'Resize', 'on', ...
'CloseRequestFcn', @close_gui, 'Color', [0.94, 0.94, 0.94]);
% 全局变量
fs = 44100; % 默认采样率
source_audio = []; % 源音频(提供音色)
target_audio = []; % 目标音频(提供内容)
converted_audio = []; % 转换后的音频
processing = false; % 处理状态标志
conversion_complete = false; % 转换完成标志
% STFT参数
stft_params.win_len = 2048; % 窗长
stft_params.overlap = 1536; % 重叠点数 (75%)
stft_params.nfft = 2048; % FFT点数
stft_params.window = hamming(stft_params.win_len, 'periodic'); % 汉明窗
stft_params.lifter_order = 30; % 包络阶数
stft_params.phase_iter = 5; % 相位迭代次数
stft_params.fs = fs; % 采样率参数
stft_params.hop_size = stft_params.win_len - stft_params.overlap; % 跳跃长度
% 计算合成窗 (确保完美重建)
stft_params.win_synthesis = stft_params.window / sum(stft_params.window.^2) * stft_params.hop_size;
% === 创建控件 ===
% 顶部控制面板
control_panel = uipanel('Title', '音频控制', 'Position', [0.02, 0.92, 0.96, 0.07], ...
'BackgroundColor', [0.9, 0.95, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '导入源音频(音色)',...
'Position', [20, 10, 150, 30], 'Callback', @load_source, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.7, 0.9, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '导入目标音频(内容)',...
'Position', [190, 10, 150, 30], 'Callback', @load_target, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.7, 0.9, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '执行音色转换',...
'Position', [360, 10, 150, 30], 'Callback', @transfer_timbre, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.8, 1, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '播放目标音频',...
'Position', [530, 10, 120, 30], 'Callback', @(src,evt) play_audio(target_audio, fs), ...
'FontSize', 10, 'BackgroundColor', [1, 0.95, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '播放转换音频',...
'Position', [670, 10, 120, 30], 'Callback', @(src,evt) play_audio(converted_audio, fs), ...
'FontSize', 10, 'BackgroundColor', [1, 0.95, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '保存转换音频',...
'Position', [810, 10, 120, 30], 'Callback', @save_audio, ...
'FontSize', 10, 'BackgroundColor', [0.9, 1, 0.8]);
% 参数控制面板
param_panel = uipanel('Title', 'STFT参数设置', 'Position', [0.02, 0.82, 0.96, 0.09], ...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '窗长:',...
'Position', [20, 40, 50, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
win_len_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.win_len),...
'Position', [80, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '重叠率(%):',...
'Position', [180, 40, 70, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
overlap_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', '75',...
'Position', [260, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', 'FFT点数:',...
'Position', [360, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
nfft_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.nfft),...
'Position', [430, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '包络阶数:',...
'Position', [530, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
lifter_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.lifter_order),...
'Position', [600, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '相位迭代:',...
'Position', [700, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
iter_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.phase_iter),...
'Position', [770, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
% 波形显示区域 - 使用选项卡
tabgp = uitabgroup(fig, 'Position', [0.02, 0.02, 0.96, 0.35]);
tab1 = uitab(tabgp, 'Title', '目标音频');
tab2 = uitab(tabgp, 'Title', '转换后音频');
tab3 = uitab(tabgp, 'Title', '源音频');
ax1 = axes('Parent', tab1, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax1, '目标音频波形');
xlabel(ax1, '时间 (s)'); ylabel(ax1, '幅度');
grid(ax1, 'on');
ax2 = axes('Parent', tab2, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax2, '转换后音频波形');
xlabel(ax2, '时间 (s)'); ylabel(ax2, '幅度');
grid(ax2, 'on');
ax3 = axes('Parent', tab3, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax3, '源音频波形');
xlabel(ax3, '时间 (s)'); ylabel(ax3, '幅度');
grid(ax3, 'on');
% 频谱显示区域(只保留三个频谱图)
spec_panel = uipanel('Title', '频谱分析', 'Position', [0.02, 0.38, 0.96, 0.43], ...
'BackgroundColor', [0.98, 0.98, 0.98], 'FontWeight', 'bold');
% 增大频谱图尺寸(垂直方向)
ax4 = axes('Parent', spec_panel, 'Position', [0.03, 0.1, 0.3, 0.8]); % 高度增加到80%
title(ax4, '源音频频谱');
ax5 = axes('Parent', spec_panel, 'Position', [0.36, 0.1, 0.3, 0.8]); % 高度增加到80%
title(ax5, '目标音频频谱');
ax6 = axes('Parent', spec_panel, 'Position', [0.69, 0.1, 0.3, 0.8]); % 高度增加到80%
title(ax6, '转换后频谱');
% 状态文本
status_text = uicontrol('Style', 'text', 'Position', [50, 5, 900, 30],...
'String', '就绪', 'HorizontalAlignment', 'left',...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [1, 1, 1]);
% 进度条
progress_ax = axes('Position', [0.1, 0.97, 0.8, 0.02],...
'XLim', [0, 1], 'YLim', [0, 1], 'Box', 'on', 'Color', [0.9, 0.9, 0.9]);
progress_bar = patch(progress_ax, [0 0 0 0], [0 0 1 1], [0.2, 0.6, 1]);
axis(progress_ax, 'off');
progress_text = uicontrol('Style', 'text', 'Position', [500, 970, 200, 20],...
'String', '', 'HorizontalAlignment', 'center',...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [1, 1, 1]);
% 诊断信息面板
diag_panel = uipanel('Title', '处理日志', 'Position', [0.02, 0.02, 0.96, 0.35], ...
'BackgroundColor', [0.95, 0.95, 0.95], 'Visible', 'off');
diag_text = uicontrol('Parent', diag_panel, 'Style', 'listbox', ...
'Position', [10, 10, 1140, 250], 'String', {'系统已初始化'}, ...
'HorizontalAlignment', 'left', 'FontSize', 9, ...
'BackgroundColor', [1, 1, 1], 'Max', 100, 'Min', 0);
% 添加显示/隐藏日志按钮
uicontrol('Style', 'pushbutton', 'String', '显示日志',...
'Position', [1020, 920, 100, 30], 'Callback', @toggle_log, ...
'FontSize', 9, 'BackgroundColor', [0.9, 0.95, 1]);
% === 回调函数 ===
% 更新参数回调
function update_params(~, ~)
try
% 获取新参数值
new_win_len = str2double(get(win_len_edit, 'String'));
overlap_percent = str2double(get(overlap_edit, 'String'));
new_nfft = str2double(get(nfft_edit, 'String'));
lifter_order = str2double(get(lifter_edit, 'String'));
phase_iter = str2double(get(iter_edit, 'String'));
% 验证参数
if isnan(new_win_len) || new_win_len <= 0 || mod(new_win_len, 1) ~= 0
error('窗长必须是正整数');
end
if isnan(overlap_percent) || overlap_percent < 0 || overlap_percent > 100
error('重叠率必须是0-100之间的数字');
end
if isnan(new_nfft) || new_nfft <= 0 || mod(new_nfft, 1) ~= 0
error('FFT点数必须是正整数');
end
if isnan(lifter_order) || lifter_order <= 0 || mod(lifter_order, 1) ~= 0
error('包络阶数必须是正整数');
end
if isnan(phase_iter) || phase_iter <= 0 || mod(phase_iter, 1) ~= 0
error('相位迭代次数必须是正整数');
end
% 更新参数
stft_params.win_len = new_win_len;
stft_params.overlap = round(overlap_percent/100 * new_win_len);
stft_params.nfft = new_nfft;
stft_params.window = hamming(new_win_len, 'periodic');
stft_params.lifter_order = lifter_order;
stft_params.phase_iter = phase_iter;
update_diag(sprintf('参数更新: 窗长=%d, 重叠=%d(%.0f%%), FFT=%d', ...
new_win_len, stft_params.overlap, overlap_percent, new_nfft));
catch e
errordlg(['参数错误: ', e.message], '输入错误');
update_diag(['参数错误: ', e.message], true);
end
end
% 更新诊断信息
function update_diag(msg, force)
if nargin < 2, force = false; end
if ~conversion_complete || force
current = get(diag_text, 'String');
new_msg = sprintf('[%s] %s', datestr(now, 'HH:MM:SS'), msg);
set(diag_text, 'String', [current; {new_msg}]);
set(diag_text, 'Value', length(get(diag_text, 'String')));
end
end
% 切换日志显示
function toggle_log(~, ~)
if strcmp(get(diag_panel, 'Visible'), 'on')
set(diag_panel, 'Visible', 'off');
set(tabgp, 'Position', [0.02, 0.02, 0.96, 0.35]);
else
set(diag_panel, 'Visible', 'on');
set(tabgp, 'Position', [0.02, 0.38, 0.96, 0.35]);
end
end
% 关闭GUI回调
function close_gui(~, ~)
if processing
choice = questdlg('处理正在进行中,确定要关闭吗?', '确认关闭', '是', '否', '否');
if strcmp(choice, '否')
return;
end
end
delete(fig);
end
% 导入源音频
function load_source(~, ~)
if processing, return; end
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg', '音频文件 (*.wav,*.mp3,*.ogg)'});
if isequal(file, 0), return; end
try
[audio, fs_in] = audioread(fullfile(path, file));
update_diag(['加载源音频: ', file, ' (', num2str(fs_in), 'Hz)']);
set(status_text, 'String', ['正在处理源音频: ', file]);
drawnow;
% 转换为单声道并归一化
if size(audio, 2) > 1
source_audio = mean(audio, 2);
update_diag('转换为单声道');
else
source_audio = audio;
end
source_audio = source_audio / max(abs(source_audio));
update_diag('归一化完成');
% 更新采样率参数
stft_params.fs = fs;
% 采样率处理
if fs == 0
fs = fs_in;
elseif fs ~= fs_in
update_diag(['重采样: ', num2str(fs_in), 'Hz -> ', num2str(fs), 'Hz']);
source_audio = resample(source_audio, fs, fs_in);
end
% 显示波形和频谱
plot(ax3, (0:length(source_audio)-1)/fs, source_audio);
title(ax3, ['源音频波形: ', file]);
xlabel(ax3, '时间 (s)'); ylabel(ax3, '幅度');
grid(ax3, 'on');
% 显示频谱(不再显示包络)
show_spectrum(ax4, source_audio, fs, stft_params, '源音频频谱');
set(status_text, 'String', ['已加载源音频: ', file, ' (', num2str(fs/1000), 'kHz)']);
update_diag(['源音频长度: ', num2str(length(source_audio)/fs), '秒']);
% 重置转换完成标志
conversion_complete = false;
catch e
errordlg(['加载源音频失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
end
end
% 导入目标音频
function load_target(~, ~)
if processing, return; end
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg', '音频文件 (*.wav,*.mp3,*.ogg)'});
if isequal(file, 0), return; end
try
[audio, fs_in] = audioread(fullfile(path, file));
update_diag(['加载目标音频: ', file, ' (', num2str(fs_in), 'Hz)']);
set(status_text, 'String', ['正在处理目标音频: ', file]);
drawnow;
% 转换为单声道并归一化
if size(audio, 2) > 1
target_audio = mean(audio, 2);
update_diag('转换为单声道');
else
target_audio = audio;
end
target_audio = target_audio / max(abs(target_audio));
update_diag('归一化完成');
% 更新采样率参数
stft_params.fs = fs;
% 采样率处理
if fs == 0
fs = fs_in;
elseif fs ~= fs_in
update_diag(['重采样: ', num2str(fs_in), 'Hz -> ', num2str(fs), 'Hz']);
target_audio = resample(target_audio, fs, fs_in);
end
% 显示波形和频谱
plot(ax1, (0:length(target_audio)-1)/fs, target_audio);
title(ax1, ['目标音频波形: ', file]);
xlabel(ax1, '时间 (s)'); ylabel(ax1, '幅度');
grid(ax1, 'on');
% 显示频谱(不再显示包络)
show_spectrum(ax5, target_audio, fs, stft_params, '目标音频频谱');
set(status_text, 'String', ['已加载目标音频: ', file, ' (', num2str(fs/1000), 'kHz)']);
update_diag(['目标音频长度: ', num2str(length(target_audio)/fs), '秒']);
% 重置转换完成标志
conversion_complete = false;
catch e
errordlg(['加载目标音频失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
end
end
% 核心音色转换函数
function transfer_timbre(~, ~)
if processing, return; end
if isempty(source_audio) || isempty(target_audio)
errordlg('请先导入源音频和目标音频!', '错误');
return;
end
% 设置处理状态
processing = true;
conversion_complete = false;
set(status_text, 'String', '开始音色转换...');
update_diag('=== 开始音色转换 ===');
drawnow;
% 统一音频长度(以目标音频长度为基准)
target_length = length(target_audio);
source_length = length(source_audio);
if source_length < target_length
% 源音频较短,重复填充
num_repeat = ceil(target_length / source_length);
extended_source = repmat(source_audio, num_repeat, 1);
source_audio_adj = extended_source(1:target_length);
update_diag('源音频已扩展以匹配目标长度');
elseif source_length > target_length
% 源音频较长,截断
source_audio_adj = source_audio(1:target_length);
update_diag('源音频已截断以匹配目标长度');
else
source_audio_adj = source_audio;
end
% 确保长度兼容
target_audio_adj = target_audio(1:min(target_length, length(source_audio_adj)));
source_audio_adj = source_audio_adj(1:min(target_length, length(source_audio_adj)));
try
% === 目标音频STFT ===
update_diag('对目标音频进行STFT...');
update_progress(0.1, '目标音频STFT');
[S_target, ~, ~] = optimized_stft(target_audio_adj, stft_params, @update_progress);
mag_target = abs(S_target);
phase_target = angle(S_target);
update_diag(sprintf('目标音频STFT完成: %d帧', size(S_target,2)));
% === 源音频STFT ===
update_diag('对源音频进行STFT...');
update_progress(0.3, '源音频STFT');
[S_source, ~, ~] = optimized_stft(source_audio_adj, stft_params, @update_progress);
mag_source = abs(S_source);
update_diag(sprintf('源音频STFT完成: %d帧', size(S_source,2)));
% 确保频谱矩阵大小相同
if size(mag_target, 2) ~= size(mag_source, 2)
min_frames = min(size(mag_target, 2), size(mag_source, 2));
mag_target = mag_target(:, 1:min_frames);
mag_source = mag_source(:, 1:min_frames);
phase_target = phase_target(:, 1:min_frames);
update_diag(sprintf('调整频谱帧数: %d帧', min_frames));
end
% === 改进的频谱转换算法 ===
update_diag('应用改进的音色转换算法...');
update_progress(0.65, '频谱转换');
% 1. 计算源音频的频谱包络
mag_source_env = spectral_envelope(mag_source, stft_params.lifter_order, stft_params.nfft);
% 2. 计算目标音频的频谱包络
mag_target_env = spectral_envelope(mag_target, stft_params.lifter_order, stft_params.nfft);
% 3. 计算源音频的频谱细节
mag_source_detail = mag_source ./ (mag_source_env + eps);
% 4. 应用转换:目标包络 + 源细节
mag_new = mag_target_env .* mag_source_detail;
% 5. 相位重建(Griffin-Lim相位重建)
update_diag('相位重建...');
update_progress(0.80, '相位重建');
phase_new = phase_reconstruction(mag_new, phase_target, stft_params, stft_params.phase_iter);
% === 重建音频 ===
update_diag('重建音频(ISTFT)...');
update_progress(0.90, 'ISTFT重建');
converted_audio = optimized_istft(mag_new, phase_new, stft_params, @update_progress);
converted_audio = converted_audio / max(abs(converted_audio)); % 归一化
% 确保长度匹配
if length(converted_audio) > target_length
converted_audio = converted_audio(1:target_length);
elseif length(converted_audio) < target_length
converted_audio = [converted_audio; zeros(target_length - length(converted_audio), 1)];
end
% 显示结果
plot(ax2, (0:length(converted_audio)-1)/fs, converted_audio);
title(ax2, '转换后音频波形');
xlabel(ax2, '时间 (s)'); ylabel(ax2, '幅度');
grid(ax2, 'on');
% 显示转换后频谱(不再显示包络)
show_spectrum(ax6, converted_audio, fs, stft_params, '转换后频谱');
% 更新状态
update_progress(1.0, '转换完成');
set(status_text, 'String', '音色转换完成!');
update_diag('音色转换成功!', true);
% 设置完成标志
conversion_complete = true;
% 清理大内存变量
clear S_target S_source mag_target mag_source mag_new;
catch e
errordlg(['音色转换失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
set(progress_bar, 'FaceColor', [1, 0.3, 0.3]);
set(progress_text, 'String', '处理失败');
end
% 重置处理状态
processing = false;
end
function phase = phase_reconstruction(mag, phase_init, params, progress_callback)
% 相位重建函数 - 使用参数指定的迭代次数
% 输入:
% mag - 目标幅度谱 (单边谱)
% phase_init - 初始相位谱 (单边谱)
% params - 参数结构体 (包含 phase_iter 等参数)
% progress_callback - 进度回调函数
% 输出:
% phase - 重建后的相位谱
% === 参数提取 ===
nfft = params.nfft;
griffin_lim_iters = params.phase_iter; % 使用参数中的迭代次数
[num_bins, num_frames] = size(mag);
% === 初始化 ===
current_phase = phase_init;
% 进度更新间隔
update_interval = max(1, floor(griffin_lim_iters/10));
% === Griffin-Lim 迭代相位重建 ===
for iter = 1:griffin_lim_iters
% 1. 创建复数频谱 (单边转双边)
S_complex = create_full_spectrum(mag .* exp(1i*current_phase), nfft);
% 2. ISTFT重建时域信号
x_recon = optimized_istft(mag, current_phase, params, []);
% 3. 对重建信号进行STFT
[~, ~, ~, S_new] = optimized_stft(x_recon, params, []);
% 4. 更新相位
current_phase = angle(S_new);
% 5. 进度更新
if ~isempty(progress_callback) && mod(iter, update_interval) == 0
progress_callback(iter/griffin_lim_iters * 0.2, ...
sprintf('相位重建: %d/%d', iter, griffin_lim_iters));
end
end
phase = current_phase;
% === 辅助函数: 创建完整频谱 ===
function S_full = create_full_spectrum(S_half, nfft)
% 从单边谱创建双边谱
num_bins = size(S_half, 1);
S_full = zeros(nfft, size(S_half, 2));
if rem(nfft, 2) % 奇数点FFT
S_full(1:num_bins, :) = S_half;
S_full(num_bins+1:end, :) = conj(S_half(end:-1:2, :));
else % 偶数点FFT
S_full(1:num_bins, :) = S_half;
S_full(num_bins+1:end, :) = conj(S_half(end-1:-1:2, :));
end
end
end
function env = spectral_envelope(mag, lifter_order, nfft)
% 计算频谱包络
% 输入:
% mag - 频谱幅度 (单边谱)
% lifter_order - 包络阶数
% nfft - FFT点数
% 输出:
% env - 频谱包络
[num_bins, num_frames] = size(mag);
% === 1. 参数验证 ===
if lifter_order >= nfft/2
lifter_order = floor(nfft/2) - 1;
warning('包络阶数过大,自动调整为 %d', lifter_order);
end
% === 2. 重建双边谱 ===
if rem(nfft, 2) % 奇数点FFT
full_mag = zeros(nfft, num_frames);
full_mag(1:num_bins, :) = mag;
full_mag(num_bins+1:end, :) = conj(mag(end:-1:2, :));
else % 偶数点FFT
full_mag = zeros(nfft, num_frames);
full_mag(1:num_bins, :) = mag;
full_mag(num_bins+1:end, :) = conj(mag(end-1:-1:2, :));
end
% === 3. 计算倒谱 ===
% 对数幅度谱
log_mag = log(full_mag + eps); % 避免log(0)
% 倒谱 = ifft(对数幅度谱)
cepstrum = ifft(log_mag, nfft, 'symmetric');
% === 4. 创建提升器窗口 ===
lifter = zeros(nfft, 1);
% 保留低频部分
lifter(1:lifter_order+1) = 1;
% 保留对称的高频部分
if nfft > 2*lifter_order+1
lifter(end-lifter_order+1:end) = 1;
end
% === 5. 应用提升器 ===
cepstrum_liftered = cepstrum .* lifter;
% === 6. 重建频谱包络 ===
log_env = real(fft(cepstrum_liftered, nfft));
env = exp(log_env(1:num_bins, :)); % 取回单边谱
% === 7. 数值稳定性处理 ===
env(env < eps) = eps; % 避免零值
end
% 进度更新函数
function update_progress(progress, message)
if nargin >= 1
set(progress_bar, 'XData', [0, progress, progress, 0]);
end
if nargin >= 2
set(progress_text, 'String', message);
set(status_text, 'String', message);
end
if nargin == 1
set(progress_text, 'String', sprintf('%.0f%%', progress*100));
end
% 强制刷新界面
drawnow limitrate;
end
% 播放音频函数
function play_audio(audio, fs)
if processing, return; end
if isempty(audio)
errordlg('音频数据为空!', '错误');
return;
end
try
player = audioplayer(audio, fs);
play(player);
set(status_text, 'String', '正在播放音频...');
update_diag(['播放音频: ', num2str(length(audio)/fs), '秒'], true);
catch e
errordlg(['播放失败: ', e.message], '极错误');
update_diag(['播放错误: ', e.message], true);
end
end
% 保存音频函数
function save_audio(~, ~)
if processing
errordlg('处理正在进行中,请稍后保存', '错误');
return;
end
if isempty(converted_audio)
errordlg('没有转换后的音频可保存!', '错误');
return;
end
[file, path] = uiputfile('*.wav', '保存转换音频');
if isequal(file, 0), return; end
set(status_text, 'String', '正在保存音频...');
update_diag(['开始保存: ', file], true);
try
% 直接保存音频
filename = fullfile(path, file);
audiowrite(filename, converted_audio, fs);
set(status_text, 'String', ['已保存: ', file]);
update_diag(['音频已保存: ', filename], true);
catch e
errordlg(['保存失败: ', e.message], '极错误');
update_diag(['保存错误: ', e.message], true);
end
end
% 简化版频谱显示函数(不再包含包络计算)
function show_spectrum(ax, audio, fs, params, title_str)
try
% 计算STFT
[S, f, t] = optimized_stft(audio, params, []);
% 处理频谱数据
mag = abs(S);
spec_data = 10*log10(mag + eps);
% === 增强的维度验证 ===
% 确保频率向量是列向量
if ~iscolumn(f)
f = f(:);
end
% 确保时间向量是行向量
if ~isrow(t)
t = t(:)';
end
% === 维度一致性检查 ===
if size(spec_data, 1) ~= length(f) || size(spec_data, 2) ~= length(t)
min_bins = min(size(spec_data, 1), length(f));
min_frames = min(size(spec_data, 2), length(t));
spec_data = spec_data(1:min_bins, 1:min_frames);
f = f(1:min_bins);
t = t(1:min_frames);
update_diag(sprintf('维度自动调整: spec_data(%d×%d), f(%d), t(%d)',...
size(spec_data,1), size(spec_data,2), length(f), length(t)));
end
% === 坐标值验证 ===
% 确保频率在合理范围内
nyquist = fs/2;
if any(f > nyquist)
f(f > nyquist) = nyquist;
end
% 清除旧内容
cla(ax);
% === 绘制频谱图 ===
imagesc(ax, t, f, spec_data);
% === 设置坐标轴属性 ===
set(ax, 'YDir', 'normal'); % 低频在底部
axis(ax, 'tight'); % 自动调整坐标轴范围
% === 设置显示属性 ===
title(ax, title_str);
xlabel(ax, '时间 (s)');
ylabel(ax, '频率 (Hz)');
colorbar(ax);
colormap(ax, 'jet');
% 设置频率范围
max_freq = min(fs/2, max(f));
ylim(ax, [min(f), max_freq]);
% 添加诊断信息
update_diag(sprintf('频谱显示成功: %s (尺寸: %d×%d)', title_str, size(spec_data,1), size(spec_data,2)));
catch e
% 详细的错误信息
dim_info = sprintf('维度: spec_data(%d×%d), f(%d), t(%d)',...
size(spec_data,1), size(spec_data,2), length(f), length(t));
err_msg = sprintf('频谱错误: %s\n%s', e.message, dim_info);
% 显示错误信息
cla(ax);
text(ax, 0.5, 0.5, err_msg, ...
'HorizontalAlignment', 'center', ...
'FontSize', 8, 'Color', 'red');
title(ax, [title_str, ' (错误)']);
% 在诊断信息中记录详细错误
update_diag(['频谱显示错误: ' err_msg], true);
end
end
end
%% === 核心音频处理函数 ===
function [mag, phase, f, t] = optimized_stft(x, params, progress_callback)
% 优化的短时傅里叶变换(STFT)实现
% 输入:
% x - 时域信号
% params - 参数结构体 (包含窗长、重叠、FFT点数、窗函数等)
% progress_callback - 进度回调函数
% 输出:
% mag - 幅度谱 (单边谱)
% phase - 相位谱 (单边谱)
% f - 频率向量 (Hz)
% t - 时间向量 (秒)
% === 参数提取 ===
win_len = params.win_len;
overlap = params.overlap;
nfft = params.nfft;
win_anal = params.window;
fs = params.fs;
hop_size = win_len - overlap;
% === 计算STFT的帧数 ===
num_samples = length(x);
num_frames = floor((num_samples - overlap) / hop_size);
% === 初始化STFT矩阵 ===
S = zeros(nfft, num_frames); % 完整的双边谱
% 进度更新间隔
update_interval = max(1, floor(num_frames/10));
% === 分帧处理 ===
for frame_idx = 1:num_frames
% 1. 计算当前帧的起始和结束索引
start_idx = (frame_idx - 1) * hop_size + 1;
end_idx = start_idx + win_len - 1;
% 边界处理:如果最后一帧超出信号长度,则截断
if end_idx > num_samples
frame = [x(start_idx:end); zeros(end_idx - num_samples, 1)];
else
frame = x(start_idx:end_idx);
end
% 2. 加窗
frame_win = frame .* win_anal;
% 3. FFT
S_frame = fft(frame_win, nfft);
S(:, frame_idx) = S_frame;
% 4. 进度更新
if ~isempty(progress_callback) && mod(frame_idx, update_interval) == 0
progress_callback(frame_idx/num_frames * 0.2, ...
sprintf('STFT计算: %d/%d', frame_idx, num_frames));
end
end
% === 计算单边谱 ===
if rem(nfft, 2) % 奇数点FFT
num_bins = (nfft+1)/2;
else
num_bins = nfft/2 + 1;
end
S_half = S(1:num_bins, :); % 单边谱
% 幅度和相位
mag = abs(S_half);
phase = angle(S_half);
% 频率向量
f = (0:num_bins-1) * fs / nfft;
% 时间向量
t = (0:num_frames-1) * hop_size / fs;
end
function x_recon = optimized_istft(mag, phase, params, progress_callback)
% 优化的逆短时傅里叶变换(ISTFT)实现
% 输入:
% mag - 幅度谱 (单边谱)
% phase - 相位谱 (单边谱)
% params - 参数结构体
% progress_callback - 进度回调函数
% 输出:
% x_recon - 重建的时域信号
% === 参数提取 ===
nfft = params.nfft;
win_len = params.win_len;
hop_size = win_len - params.overlap;
win_synth = params.win_synthesis;
[num_bins, num_frames] = size(mag);
% 计算信号总长度
total_samples = (num_frames - 1) * hop_size + win_len;
x_recon = zeros(total_samples, 1);
% 进度更新间隔
update_interval = max(1, floor(num_frames/10));
% === 重建复数频谱 ===
S_half = mag .* exp(1i * phase);
% === 创建双边谱 ===
S_full = zeros(nfft, num_frames);
if rem(nfft, 2) % 奇数点FFT
S_full(1:num_bins, :) = S_half;
S_full(num_bins+1:end, :) = conj(S_half(end:-1:2, :));
else % 偶数点FFT
S_full(1:num_bins, :) = S_half;
% 注意:Nyquist点处理
S_full(num_bins+1:end, :) = conj(S_half(end-1:-1:2, :));
end
% === 执行逆FFT和重叠相加 ===
for frame_idx = 1:num_frames
% 1. 逆FFT
frame = real(ifft(S_full(:, frame_idx), nfft));
% 2. 应用合成窗
frame_win = frame(1:win_len) .* win_synth;
% 3. 计算位置并叠加
start_idx = (frame_idx - 1) * hop_size + 1;
end_idx = start_idx + win_len - 1;
% 确保不越界
if end_idx > total_samples
end_idx = total_samples;
frame_win = frame_win(1:(end_idx - start_idx + 1));
end
% 重叠相加
x_recon(start_idx:end_idx) = x_recon(start_idx:end_idx) + frame_win;
% 4. 进度更新
if ~isempty(progress_callback) && mod(frame_idx, update_interval) == 0
progress_callback(frame_idx/num_frames * 0.2, ...
sprintf('ISTFT重建: %d/%d', frame_idx, num_frames));
end
end
% === 归一化处理 ===
% 计算重叠因子
overlap_factor = win_len / hop_size;
% 计算归一化窗口
norm_win = zeros(total_samples, 1);
for i = 1:num_frames
start_idx = (i - 1) * hop_size + 1;
end_idx = min(start_idx + win_len - 1, total_samples);
norm_win(start_idx:end_idx) = norm_win(start_idx:end_idx) + win_synth(1:(end_idx-start_idx+1)).^2;
end
% 避免除以零
norm_win(norm_win < eps) = eps;
% 应用归一化
x_recon = x_recon ./ norm_win;
end
以上是全部代码,但是运行有一些问题,主要是刚才测试时显示输出参数过多,并且STFT的调用有一些问题,请重写这些问题,其他的如控件结构等请不要更改,然后提供完整的包含所有部分的代码