function timbre_transfer
% 创建主界面
fig = figure('Name', '高级音色转换系统 v3.0', 'Position', [50, 50, 1200, 900], ...
'NumberTitle', 'off', 'MenuBar', 'none', 'Resize', 'on', ...
'CloseRequestFcn', @close_gui, 'Color', [0.94, 0.94, 0.94]);
% 全局变量
fs = 44100; % 默认采样率
source_audio = []; % 源音频(提供音色)
target_audio = []; % 目标音频(提供内容)
converted_audio = []; % 转换后的音频
processing = false; % 处理状态标志
conversion_complete = false; % 转换完成标志
% STFT参数
stft_params.win_len = 2048; % 窗长
stft_params.overlap = 1536; % 重叠点数 (75%)
stft_params.nfft = 2048; % FFT点数
stft_params.window = hamming(stft_params.win_len, 'periodic'); % 汉明窗
stft_params.lifter_order = 30; % 包络阶数
stft_params.phase_iter = 5; % 相位迭代次数
stft_params.fs = fs; % 采样率参数
% === 创建控件 ===
% 顶部控制面板
control_panel = uipanel('Title', '音频控制', 'Position', [0.02, 0.92, 0.96, 0.07], ...
'BackgroundColor', [0.9, 0.95, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '导入源音频(音色)',...
'Position', [20, 10, 150, 30], 'Callback', @load_source, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.7, 0.9, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '导入目标音频(内容)',...
'Position', [190, 10, 150, 30], 'Callback', @load_target, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.7, 0.9, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '执行音色转换',...
'Position', [360, 10, 150, 30], 'Callback', @transfer_timbre, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.8, 1, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '播放目标音频',...
'Position', [530, 10, 120, 30], 'Callback', @(src,evt) play_audio(target_audio, fs), ...
'FontSize', 10, 'BackgroundColor', [1, 0.95, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '播放转换音频',...
'Position', [670, 10, 120, 30], 'Callback', @(src,evt) play_audio(converted_audio, fs), ...
'FontSize', 10, 'BackgroundColor', [1, 0.95, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '保存转换音频',...
'Position', [810, 10, 120, 30], 'Callback', @save_audio, ...
'FontSize', 10, 'BackgroundColor', [0.9, 1, 0.8]);
% 参数控制面板
param_panel = uipanel('Title', 'STFT参数设置', 'Position', [0.02, 0.82, 0.96, 0.09], ...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '窗长:',...
'Position', [20, 40, 50, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
win_len_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.win_len),...
'Position', [80, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '重叠率(%):',...
'Position', [180, 40, 70, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
overlap_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', '75',...
'Position', [260, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', 'FFT点数:',...
'Position', [360, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
nfft_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.nfft),...
'Position', [430, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '包络阶数:',...
'Position', [530, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
lifter_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.lifter_order),...
'Position', [600, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '相位迭代:',...
'Position', [700, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
iter_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.phase_iter),...
'Position', [770, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
% 波形显示区域 - 使用选项卡
tabgp = uitabgroup(fig, 'Position', [0.02, 0.02, 0.96, 0.35]);
tab1 = uitab(tabgp, 'Title', '目标音频');
tab2 = uitab(tabgp, 'Title', '转换后音频');
tab3 = uitab(tabgp, 'Title', '源音频');
ax1 = axes('Parent', tab1, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax1, '目标音频波形');
xlabel(ax1, '时间 (s)'); ylabel(ax1, '幅度');
grid(ax1, 'on');
ax2 = axes('Parent', tab2, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax2, '转换后音频波形');
xlabel(ax2, '时间 (s)'); ylabel(ax2, '幅度');
grid(ax2, 'on');
ax3 = axes('Parent', tab3, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax3, '源音频波形');
xlabel(ax3, '时间 (s)'); ylabel(ax3, '幅度');
grid(ax3, 'on');
% 频谱显示区域
spec_panel = uipanel('Title', '频谱分析', 'Position', [0.02, 0.38, 0.96, 0.43], ...
'BackgroundColor', [0.98, 0.98, 0.98], 'FontWeight', 'bold');
ax4 = axes('Parent', spec_panel, 'Position', [0.03, 0.55, 0.3, 0.4]);
title(ax4, '源音频频谱');
ax5 = axes('Parent', spec_panel, 'Position', [0.36, 0.55, 0.3, 0.4]);
title(ax5, '目标音频频谱');
ax6 = axes('Parent', spec_panel, 'Position', [0.69, 0.55, 0.3, 0.4]);
title(ax6, '转换后频谱');
ax7 = axes('Parent', spec_panel, 'Position', [0.03, 0.05, 0.3, 0.4]);
title(ax7, '源音频包络');
ax8 = axes('Parent', spec_panel, 'Position', [0.36, 0.05, 0.3, 0.4]);
title(ax8, '目标音频包络');
ax9 = axes('Parent', spec_panel, 'Position', [0.69, 0.05, 0.3, 0.4]);
title(ax9, '转换后包络');
% 状态文本
status_text = uicontrol('Style', 'text', 'Position', [50, 5, 900, 30],...
'String', '就绪', 'HorizontalAlignment', 'left',...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [1, 1, 1]);
% 进度条
progress_ax = axes('Position', [0.1, 0.97, 0.8, 0.02],...
'XLim', [0, 1], 'YLim', [0, 1], 'Box', 'on', 'Color', [0.9, 0.9, 0.9]);
progress_bar = patch(progress_ax, [0 0 0 0], [0 0 1 1], [0.2, 0.6, 1]);
axis(progress_ax, 'off');
progress_text = uicontrol('Style', 'text', 'Position', [500, 970, 200, 20],...
'String', '', 'HorizontalAlignment', 'center',...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [1, 1, 1]);
% 诊断信息面板
diag_panel = uipanel('Title', '处理日志', 'Position', [0.02, 0.02, 0.96, 0.35], ...
'BackgroundColor', [0.95, 0.95, 0.95], 'Visible', 'off');
diag_text = uicontrol('Parent', diag_panel, 'Style', 'listbox', ...
'Position', [10, 10, 1140, 250], 'String', {'系统已初始化'}, ...
'HorizontalAlignment', 'left', 'FontSize', 9, ...
'BackgroundColor', [1, 1, 1], 'Max', 100, 'Min', 0);
% 添加显示/隐藏日志按钮
uicontrol('Style', 'pushbutton', 'String', '显示日志',...
'Position', [1020, 920, 100, 30], 'Callback', @toggle_log, ...
'FontSize', 9, 'BackgroundColor', [0.9, 0.95, 1]);
% === 回调函数 ===
% 更新参数回调
function update_params(~, ~)
try
% 获取新参数值
new_win_len = str2double(get(win_len_edit, 'String'));
overlap_percent = str2double(get(overlap_edit, 'String'));
new_nfft = str2double(get(nfft_edit, 'String'));
lifter_order = str2double(get(lifter_edit, 'String'));
phase_iter = str2double(get(iter_edit, 'String'));
% 验证参数
if isnan(new_win_len) || new_win_len <= 0 || mod(new_win_len, 1) ~= 0
error('窗长必须是正整数');
end
if isnan(overlap_percent) || overlap_percent < 0 || overlap_percent > 100
error('重叠率必须是0-100之间的数字');
end
if isnan(new_nfft) || new_nfft <= 0 || mod(new_nfft, 1) ~= 0
error('FFT点数必须是正整数');
end
if isnan(lifter_order) || lifter_order <= 0 || mod(lifter_order, 1) ~= 0
error('包络阶数必须是正整数');
end
if isnan(phase_iter) || phase_iter <= 0 || mod(phase_iter, 1) ~= 0
error('相位迭代次数必须是正整数');
end
% 更新参数
stft_params.win_len = new_win_len;
stft_params.overlap = round(overlap_percent/100 * new_win_len);
stft_params.nfft = new_nfft;
stft_params.window = hamming(new_win_len, 'periodic');
stft_params.lifter_order = lifter_order;
stft_params.phase_iter = phase_iter;
update_diag(sprintf('参数更新: 窗长=%d, 重叠=%d(%.0f%%), FFT=%d', ...
new_win_len, stft_params.overlap, overlap_percent, new_nfft));
catch e
errordlg(['参数错误: ', e.message], '输入错误');
update_diag(['参数错误: ', e.message], true);
end
end
% 更新诊断信息
function update_diag(msg, force)
if nargin < 2, force = false; end
if ~conversion_complete || force
current = get(diag_text, 'String');
new_msg = sprintf('[%s] %s', datestr(now, 'HH:MM:SS'), msg);
set(diag_text, 'String', [current; {new_msg}]);
set(diag_text, 'Value', length(get(diag_text, 'String')));
end
end
% 切换日志显示
function toggle_log(~, ~)
if strcmp(get(diag_panel, 'Visible'), 'on')
set(diag_panel, 'Visible', 'off');
set(tabgp, 'Position', [0.02, 0.02, 0.96, 0.35]);
else
set(diag_panel, 'Visible', 'on');
set(tabgp, 'Position', [0.02, 0.38, 0.96, 0.35]);
end
end
% 关闭GUI回调
function close_gui(~, ~)
if processing
choice = questdlg('处理正在进行中,确定要关闭吗?', '确认关闭', '是', '否', '否');
if strcmp(choice, '否')
return;
end
end
delete(fig);
end
% 导入源音频
function load_source(~, ~)
if processing, return; end
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg', '音频文件 (*.wav,*.mp3,*.ogg)'});
if isequal(file, 0), return; end
try
[audio, fs_in] = audioread(fullfile(path, file));
update_diag(['加载源音频: ', file, ' (', num2str(fs_in), 'Hz)']);
set(status_text, 'String', ['正在处理源音频: ', file]);
drawnow;
% 转换为单声道并归一化
if size(audio, 2) > 1
source_audio = mean(audio, 2);
update_diag('转换为单声道');
else
source_audio = audio;
end
source_audio = source_audio / max(abs(source_audio));
update_diag('归一化完成');
% 更新采样率参数
stft_params.fs = fs;
% 采样率处理
if fs == 0
fs = fs_in;
elseif fs ~= fs_in
update_diag(['重采样: ', num2str(fs_in), 'Hz -> ', num2str(fs), 'Hz']);
source_audio = resample(source_audio, fs, fs_in);
end
% 显示波形和频谱
plot(ax3, (0:length(source_audio)-1)/fs, source_audio);
title(ax3, ['源音频波形: ', file]);
xlabel(ax3, '时间 (s)'); ylabel(ax3, '幅度');
grid(ax3, 'on');
% 显示频谱
show_spectrum(ax4, source_audio, fs, stft_params, '源音频频谱');
show_spectrum(ax7, source_audio, fs, stft_params, '源音频包络', true);
set(status_text, 'String', ['已加载源音频: ', file, ' (', num2str(fs/1000), 'kHz)']);
update_diag(['源音频长度: ', num2str(length(source_audio)/fs), '秒']);
% 重置转换完成标志
conversion_complete = false;
catch e
errordlg(['加载源音频失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
end
end
% 导入目标音频
function load_target(~, ~)
if processing, return; end
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg', '音频文件 (*.wav,*.mp3,*.ogg)'});
if isequal(file, 0), return; end
try
[audio, fs_in] = audioread(fullfile(path, file));
update_diag(['加载目标音频: ', file, ' (', num2str(fs_in), 'Hz)']);
set(status_text, 'String', ['正在处理目标音频: ', file]);
drawnow;
% 转换为单声道并归一化
if size(audio, 2) > 1
target_audio = mean(audio, 2);
update_diag('转换为单声道');
else
target_audio = audio;
end
target_audio = target_audio / max(abs(target_audio));
update_diag('归一化完成');
% 更新采样率参数
stft_params.fs = fs;
% 采样率处理
if fs == 0
fs = fs_in;
elseif fs ~= fs_in
update_diag(['重采样: ', num2str(fs_in), 'Hz -> ', num2str(fs), 'Hz']);
target_audio = resample(target_audio, fs, fs_in);
end
% 显示波形和频谱
plot(ax1, (0:length(target_audio)-1)/fs, target_audio);
title(ax1, ['目标音频波形: ', file]);
xlabel(ax1, '时间 (s)'); ylabel(ax1, '幅度');
grid(ax1, 'on');
% 显示频谱
show_spectrum(ax5, target_audio, fs, stft_params, '目标音频频谱');
show_spectrum(ax8, target_audio, fs, stft_params, '目标音频包络', true);
set(status_text, 'String', ['已加载目标音频: ', file, ' (', num2str(fs/1000), 'kHz)']);
update_diag(['目标音频长度: ', num2str(length(target_audio)/fs), '秒']);
% 重置转换完成标志
conversion_complete = false;
catch e
errordlg(['加载目标音频失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
end
end
% 频谱包络计算函数
function env = spectral_envelope(mag, lifter_order, nfft)
% 输入参数验证
if lifter_order >= nfft/2
lifter_order = floor(nfft/2) - 1;
update_diag(sprintf('包络阶数过大,自动调整为%d', lifter_order));
end
[num_bins, num_frames] = size(mag);
% 重建双边谱
if rem(nfft, 2) % 奇数点FFT
full_mag = [mag; mag(end-1:-1:2, :)];
else % 偶数点FFT
full_mag = [mag; mag(end:-1:2, :)];
end
% 计算倒谱
cepstrum = ifft(log(full_mag + eps));
cepstrum = real(cepstrum); % 取实部
% 创建提升器窗口
lifter = zeros(nfft, 1);
lifter(1:lifter_order+1) = 1; % 保留低频部分
if nfft > 2*lifter_order+1
lifter(end-lifter_order+1:end) = 1; % 保留对称的高频部分
end
% 应用提升器
cepstrum_liftered = cepstrum .* lifter;
% 重建频谱包络
log_env = real(fft(cepstrum_liftered));
env = exp(log_env(1:num_bins, :)); % 取回单边谱
end
% 核心音色转换函数
function transfer_timbre(~, ~)
if processing, return; end
if isempty(source_audio) || isempty(target_audio)
errordlg('请先导入源音频和目标音频!', '错误');
return;
end
% 设置处理状态
processing = true;
conversion_complete = false;
set(status_text, 'String', '开始音色转换...');
update_diag('=== 开始音色转换 ===');
drawnow;
% 统一音频长度(以目标音频长度为基准)
target_length = length(target_audio);
source_length = length(source_audio);
if source_length < target_length
% 源音频较短,重复填充
num_repeat = ceil(target_length / source_length);
extended_source = repmat(source_audio, num_repeat, 1);
source_audio_adj = extended_source(1:target_length);
update_diag('源音频已扩展以匹配目标长度');
elseif source_length > target_length
% 源音频较长,截断
source_audio_adj = source_audio(1:target_length);
update_diag('源音频已截断以匹配目标长度');
else
source_audio_adj = source_audio;
end
% 确保长度兼容
target_audio_adj = target_audio(1:min(target_length, length(source_audio_adj)));
source_audio_adj = source_audio_adj(1:min(target_length, length(source_audio_adj)));
try
% === 目标音频STFT ===
update_diag('对目标音频进行STFT...');
update_progress(0.1, '目标音频STFT');
[S_target, ~, ~] = optimized_stft(target_audio_adj, stft_params, @update_progress);
mag_target = abs(S_target);
phase_target = angle(S_target);
update_diag(sprintf('目标音频STFT完成: %d帧', size(S_target,2)));
% === 源音频STFT ===
update_diag('对源音频进行STFT...');
update_progress(0.3, '源音频STFT');
[S_source, ~, ~] = optimized_stft(source_audio_adj, stft_params, @update_progress);
mag_source = abs(S_source);
update_diag(sprintf('源音频STFT完成: %d帧', size(S_source,2)));
% 确保频谱矩阵大小相同
if size(mag_target, 2) ~= size(mag_source, 2)
min_frames = min(size(mag_target, 2), size(mag_source, 2));
mag_target = mag_target(:, 1:min_frames);
mag_source = mag_source(:, 1:min_frames);
phase_target = phase_target(:, 1:min_frames);
update_diag(sprintf('调整频谱帧数: %d帧', min_frames));
end
% === 改进的频谱转换算法 ===
update_diag('应用改进的音色转换算法...');
update_progress(0.65, '频谱转换');
% 1. 计算源音频的频谱包络
mag_source_env = spectral_envelope(mag_source, stft_params.lifter_order, stft_params.nfft);
% 2. 计算目标音频的频谱包络
mag_target_env = spectral_envelope(mag_target, stft_params.lifter_order, stft_params.nfft);
% 3. 计算源音频的频谱细节
mag_source_detail = mag_source ./ (mag_source_env + eps);
% 4. 应用转换:目标包络 + 源细节
mag_new = mag_target_env .* mag_source_detail;
% 5. 相位重建(Griffin-Lim相位重建)
update_diag('相位重建...');
update_progress(0.80, '相位重建');
phase_new = phase_reconstruction(mag_new, phase_target, stft_params, stft_params.phase_iter);
% === 重建音频 ===
update_diag('重建音频(ISTFT)...');
update_progress(0.90, 'ISTFT重建');
converted_audio = optimized_istft(mag_new, phase_new, stft_params, @update_progress);
converted_audio = converted_audio / max(abs(converted_audio)); % 归一化
% 确保长度匹配
if length(converted_audio) > target_length
converted_audio = converted_audio(1:target_length);
elseif length(converted_audio) < target_length
converted_audio = [converted_audio; zeros(target_length - length(converted_audio), 1)];
end
% 显示结果
plot(ax2, (0:length(converted_audio)-1)/fs, converted_audio);
title(ax2, '转换后音频波形');
xlabel(ax2, '时间 (s)'); ylabel(ax2, '幅度');
grid(ax2, 'on');
% 显示转换后频谱
show_spectrum(ax6, converted_audio, fs, stft_params, '转换后频谱');
show_spectrum(ax9, converted_audio, fs, stft_params, '转换后包络', true);
% 更新状态
update_progress(1.0, '转换完成');
set(status_text, 'String', '音色转换完成!');
update_diag('音色转换成功!', true);
% 设置完成标志
conversion_complete = true;
% 清理大内存变量
clear S_target S_source mag_target mag_source mag_new;
catch e
errordlg(['音色转换失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
set(progress_bar, 'FaceColor', [1, 0.3, 0.3]);
set(progress_text, 'String', '处理失败');
end
% 重置处理状态
processing = false;
end
% 相位重建函数 (Griffin-Lim算法)
function phase = phase_reconstruction(mag, init_phase, params, iter)
phase = init_phase; % 使用目标音频相位作为初始值
[num_bins, num_frames] = size(mag);
% 重建双边频谱
if rem(params.nfft, 2) % 奇数点FFT
full_nfft = (num_bins-1)*2 + 1;
else % 偶数点FFT
full_nfft = (num_bins-1)*2;
end
for i = 1:iter
% 重建复数频谱
S = mag .* exp(1i * phase);
% 重建双边频谱
if rem(params.nfft, 2) % 奇数点FFT
S_full = [S; conj(S(end:-1:2, :))];
else % 偶数点FFT
S_full = [S; conj(S(end-1:-1:2, :))];
end
% IFFT变换
frames = real(ifft(S_full, full_nfft));
frames = frames(1:params.win_len, :); % 取窗长部分
% 重新计算STFT
[S_new, ~, ~] = optimized_stft(frames(:), params, []);
% 更新相位
phase = angle(S_new);
end
end
% 进度更新函数
function update_progress(progress, message)
if nargin >= 1
set(progress_bar, 'XData', [0, progress, progress, 0]);
end
if nargin >= 2
set(progress_text, 'String', message);
set(status_text, 'String', message);
end
if nargin == 1
set(progress_text, 'String', sprintf('%.0f%%', progress*100));
end
% 强制刷新界面
drawnow limitrate;
end
% 播放音频函数
function play_audio(audio, fs)
if processing, return; end
if isempty(audio)
errordlg('音频数据为空!', '错误');
return;
end
try
player = audioplayer(audio, fs);
play(player);
set(status_text, 'String', '正在播放音频...');
update_diag(['播放音频: ', num2str(length(audio)/fs), '秒'], true);
catch e
errordlg(['播放失败: ', e.message], '错误');
update_diag(['播放错误: ', e.message], true);
end
end
% 保存音频函数
function save_audio(~, ~)
if processing
errordlg('处理正在进行中,请稍后保存', '错误');
return;
end
if isempty(converted_audio)
errordlg('没有转换后的音频可保存!', '错误');
return;
end
[file, path] = uiputfile('*.wav', '保存转换音频');
if isequal(file, 0), return; end
set(status_text, 'String', '正在保存音频...');
update_diag(['开始保存: ', file], true);
try
% 直接保存音频
filename = fullfile(path, file);
audiowrite(filename, converted_audio, fs);
set(status_text, 'String', ['已保存: ', file]);
update_diag(['音频已保存: ', filename], true);
catch e
errordlg(['保存失败: ', e.message], '错误');
update_diag(['保存错误: ', e.message], true);
end
end
% 频谱显示函数
function show_spectrum(ax, audio, fs, params, title_str, show_envelope)
if nargin < 6, show_envelope = false; end
try
% 计算STFT
[S, f, t] = optimized_stft(audio, params, []);
% 处理频谱数据
mag = abs(S);
if show_envelope
% 计算频谱包络
env = spectral_envelope(mag, params.lifter_order, params.nfft);
data = 10*log10(env + eps);
else
data = 10*log10(mag + eps);
end
% 绘制频谱图
surf(ax, t, f, data, 'EdgeColor', 'none');
view(ax, 2); % 2D视图
axis(ax, 'tight');
title(ax, title_str);
xlabel(ax, '时间 (s)');
ylabel(ax, '频率 (Hz)');
colorbar(ax);
colormap(ax, 'jet');
shading(ax, 'interp');
catch e
% 错误处理
cla(ax);
text(ax, 0.5, 0.5, '频谱计算失败', ...
'HorizontalAlignment', 'center', ...
'FontSize', 12, 'Color', 'red');
title(ax, [title_str, ' (错误)']);
disp(['频谱显示错误: ', e.message]);
end
end
end
%% === 核心音频处理函数 ===
function [S, f, t] = optimized_stft(x, params, progress_callback)
% 提取参数
window = params.window;
win_len = length(window);
noverlap = params.overlap;
nfft = params.nfft;
fs = params.fs;
% 确保输入为列向量
x = x(:);
% 如果音频太短,填充到窗长
if length(x) < win_len
x = [x; zeros(win_len - length(x), 1)];
end
% 计算步长和帧数
step = win_len - noverlap;
num_frames = floor((length(x) - noverlap) / step);
% 预分配内存
frames = zeros(win_len, num_frames);
% 分帧处理(带进度反馈)
last_progress = -1;
for i = 1:num_frames
start_idx = (i-1)*step + 1;
end_idx = start_idx + win_len - 1;
% 提取帧
frame = x(start_idx:end_idx);
frames(:, i) = frame .* window;
% 更新进度
if ~isempty(progress_callback) && mod(i, 10) == 0
current_progress = i/num_frames;
if current_progress - last_progress >= 0.1
progress_callback(current_progress * 0.3, ...
['STFT: ', num2str(round(current_progress*100)), '%']);
last_progress = current_progress;
end
end
end
% FFT变换
S = fft(frames, nfft);
% 取单边频谱
if rem(nfft, 2) % 奇数点FFT
num_bins = (nfft+1)/2;
S = S(1:num_bins, :);
else % 偶数点FFT
num_bins = nfft/2 + 1;
S = S(1:num_bins, :);
end
% 正确计算频率向量 (0到Nyquist频率)
f = (0:num_bins-1)' * (fs/(2*(num_bins-1))); % 0 ~ fs/2
% 计算时间向量 (每帧中心点对应的时间)
t = ((0:num_frames-1) * step + win_len/2) / fs;
end
function x = optimized_istft(mag, phase, params, progress_callback)
% 提取参数
window = params.window;
win_len = length(window);
noverlap = params.overlap;
nfft = params.nfft;
fs = params.fs;
step = win_len - noverlap;
num_frames = size(mag, 2);
num_bins = size(mag, 1);
% 重建复数频谱
S = mag .* exp(1i * phase);
% === 重建双边频谱 ===
if rem(nfft, 2) % 奇数点FFT
full_nfft = (num_bins-1)*2 + 1;
S_full = [S; conj(S(end:-1:2, :))];
else % 偶数点FFT
full_nfft = (num_bins-1)*2;
S_full = [S; conj(S(end-1:-1:2, :))];
end
% IFFT变换
frames = real(ifft(S_full, full_nfft));
frames = frames(1:win_len, :); % 取窗长部分
% 加窗
frames = frames .* window;
% 重建信号长度
x_len = (num_frames-1)*step + win_len;
x = zeros(x_len, 1);
win_sum = zeros(x_len, 1); % 用于窗补偿
% === 重叠相加(带进度反馈)===
last_progress = -1;
for i = 1:num_frames
start_idx = (i-1)*step + 1;
end_idx = start_idx + win_len - 1;
% 添加帧到信号
if end_idx <= x_len
x(start_idx:end_idx) = x(start_idx:end_idx) + frames(:, i);
win_sum(start_idx:end_idx) = win_sum(start_idx:end_idx) + window.^2;
else
valid_len = x_len - start_idx + 1;
x(start_idx:end) = x(start_idx:end) + frames(1:valid_len, i);
win_sum(start_idx:end) = win_sum(start_idx:end) + window(1:valid_len).^2;
end
% 更新进度
if ~isempty(progress_callback) && mod(i, 10) == 0
current_progress = i/num_frames;
if current_progress - last_progress >= 0.1
progress_callback(0.85 + current_progress*0.15, ...
['ISTFT: ', num2str(round(current_progress*100)), '%']);
last_progress = current_progress;
end
end
end
% === 窗补偿 ===
win_sum(win_sum < eps) = eps; % 避免除以零
x = x ./ win_sum;
% 归一化
x = x / max(abs(x));
end
% function phase = phase_reconstruction(mag, init_phase, params, iter)
% % Griffin-Lim相位重建算法
% phase = init_phase; % 使用目标音频相位作为初始值
% [num_bins, num_frames] = size(mag);
%
% % 重建双边频谱
% if rem(params.nfft, 2) % 奇数点FFT
% full_nfft = (num_bins-1)*2 + 1;
% else % 偶数点FFT
% full_nfft = (num_bins-1)*2;
% end
%
% for i = 1:iter
% % 重建复数频谱
% S = mag .* exp(1i * phase);
%
% % 重建双边频谱
% if rem(params.nfft, 2) % 奇数点FFT
% S_full = [S; conj(S(end:-1:2, :))];
% else % 偶数点FFT
% S_full = [S; conj(S(end-1:-1:2, :))];
% end
%
% % IFFT变换
% frames = real(ifft(S_full, full_nfft));
% frames = frames(1:params.win_len, :); % 取窗长部分
%
% % 重新计算STFT
% [S_new, ~, ~] = optimized_stft(frames(:), params, []);
%
% % 更新相位
% phase = angle(S_new);
% end
% end
请阅读提供的代码,此代码存在问题,频谱显示错误:对于此运算,数组的大小不兼容,请针对修改代码后重头给出完整的代码,包含各个部分