我现在再次提供全部的代码,运行时播放音频显示音频为空,播放失败,请给出修改建议,尽量不要修改函数接口。
function timbre_transfer
% 创建主界面
fig = figure('Name', '高级音色转换系统 v3.2', 'Position', [50, 50, 1200, 900], ...
'NumberTitle', 'off', 'MenuBar', 'none', 'Resize', 'on', ...
'CloseRequestFcn', @close_gui, 'Color', [0.94, 0.94, 0.94]);
% 全局变量
fs = 44100; % 默认采样率
source_audio = []; % 源音频(提供音色)
target_audio = []; % 目标音频(提供内容)
converted_audio = []; % 转换后的音频
processing = false; % 处理状态标志
conversion_complete = false; % 转换完成标志
% STFT参数
stft_params.win_len = 2048; % 窗长
stft_params.overlap = 1536; % 重叠点数 (75%)
stft_params.nfft = 2048; % FFT点数
stft_params.window = hamming(stft_params.win_len, 'periodic'); % 汉明窗
stft_params.lifter_order = 30; % 包络阶数
stft_params.phase_iter = 5; % 相位迭代次数
stft_params.fs = fs; % 采样率参数
stft_params.hop_size = stft_params.win_len - stft_params.overlap; % 跳跃长度
% 计算合成窗 (确保完美重建)
stft_params.win_synthesis = stft_params.window / sum(stft_params.window.^2) * stft_params.hop_size;
% === 创建控件 ===
% 顶部控制面板
control_panel = uipanel('Title', '音频控制', 'Position', [0.02, 0.92, 0.96, 0.07], ...
'BackgroundColor', [0.9, 0.95, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '导入源音频(音色)',...
'Position', [20, 10, 150, 30], 'Callback', @load_source, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.7, 0.9, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '导入目标音频(内容)',...
'Position', [190, 10, 150, 30], 'Callback', @load_target, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.7, 0.9, 1]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '执行音色转换',...
'Position', [360, 10, 150, 30], 'Callback', @transfer_timbre, ...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [0.8, 1, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '播放目标音频',...
'Position', [530, 10, 120, 30], 'Callback', @(src,evt) play_audio(target_audio, fs), ...
'FontSize', 10, 'BackgroundColor', [1, 0.95, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '播放转换音频',...
'Position', [670, 10, 120, 30], 'Callback', @(src,evt) play_audio(converted_audio, fs), ...
'FontSize', 10, 'BackgroundColor', [1, 0.95, 0.8]);
uicontrol('Parent', control_panel, 'Style', 'pushbutton', 'String', '保存转换音频',...
'Position', [810, 10, 120, 30], 'Callback', @save_audio, ...
'FontSize', 10, 'BackgroundColor', [0.9, 1, 0.8]);
% 参数控制面板
param_panel = uipanel('Title', 'STFT参数设置', 'Position', [0.02, 0.82, 0.96, 0.09], ...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '窗长:',...
'Position', [20, 40, 50, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
win_len_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.win_len),...
'Position', [80, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '重叠率(%):',...
'Position', [180, 40, 70, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
overlap_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', '75',...
'Position', [260, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', 'FFT点数:',...
'Position', [360, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
nfft_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.nfft),...
'Position', [430, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '包络阶数:',...
'Position', [530, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
lifter_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.lifter_order),...
'Position', [600, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
uicontrol('Parent', param_panel, 'Style', 'text', 'String', '相位迭代:',...
'Position', [700, 40, 60, 20], 'HorizontalAlignment', 'left',...
'BackgroundColor', [0.95, 0.97, 1], 'FontWeight', 'bold');
iter_edit = uicontrol('Parent', param_panel, 'Style', 'edit',...
'String', num2str(stft_params.phase_iter),...
'Position', [770, 40, 80, 25], 'Callback', @update_params, ...
'BackgroundColor', [1, 1, 1]);
% 波形显示区域 - 使用选项卡
tabgp = uitabgroup(fig, 'Position', [0.02, 0.02, 0.96, 0.35]);
tab1 = uitab(tabgp, 'Title', '目标音频');
tab2 = uitab(tabgp, 'Title', '转换后音频');
tab3 = uitab(tabgp, 'Title', '源音频');
ax1 = axes('Parent', tab1, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax1, '目标音频波形');
xlabel(ax1, '时间 (s)'); ylabel(ax1, '幅度');
grid(ax1, 'on');
ax2 = axes('Parent', tab2, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax2, '转换后音频波形');
xlabel(ax2, '时间 (s)'); ylabel(ax2, '幅度');
grid(ax2, 'on');
ax3 = axes('Parent', tab3, 'Position', [0.07, 0.15, 0.9, 0.75]);
title(ax3, '源音频波形');
xlabel(ax3, '时间 (s)'); ylabel(ax3, '幅度');
grid(ax3, 'on');
% 频谱显示区域(只保留三个频谱图)
spec_panel = uipanel('Title', '频谱分析', 'Position', [0.02, 0.38, 0.96, 0.43], ...
'BackgroundColor', [0.98, 0.98, 0.98], 'FontWeight', 'bold');
% 增大频谱图尺寸(垂直方向)
ax4 = axes('Parent', spec_panel, 'Position', [0.03, 0.1, 0.3, 0.8]); % 高度增加到80%
title(ax4, '源音频频谱');
ax5 = axes('Parent', spec_panel, 'Position', [0.36, 0.1, 0.3, 0.8]); % 高度增加到80%
title(ax5, '目标音频频谱');
ax6 = axes('Parent', spec_panel, 'Position', [0.69, 0.1, 0.3, 0.8]); % 高度增加到80%
title(ax6, '转换后频谱');
% 状态文本
status_text = uicontrol('Style', 'text', 'Position', [50, 5, 900, 30],...
'String', '就绪', 'HorizontalAlignment', 'left',...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [1, 1, 1]);
% 进度条
progress_ax = axes('Position', [0.1, 0.97, 0.8, 0.02],...
'XLim', [0, 1], 'YLim', [0, 1], 'Box', 'on', 'Color', [0.9, 0.9, 0.9]);
progress_bar = patch(progress_ax, [0 0 0 0], [0 0 1 1], [0.2, 0.6, 1]);
axis(progress_ax, 'off');
progress_text = uicontrol('Style', 'text', 'Position', [500, 970, 200, 20],...
'String', '', 'HorizontalAlignment', 'center',...
'FontSize', 10, 'FontWeight', 'bold', 'BackgroundColor', [1, 1, 1]);
% 诊断信息面板
diag_panel = uipanel('Title', '处理日志', 'Position', [0.02, 0.02, 0.96, 0.35], ...
'BackgroundColor', [0.95, 0.95, 0.95], 'Visible', 'off');
diag_text = uicontrol('Parent', diag_panel, 'Style', 'listbox', ...
'Position', [10, 10, 1140, 250], 'String', {'系统已初始化'}, ...
'HorizontalAlignment', 'left', 'FontSize', 9, ...
'BackgroundColor', [1, 1, 1], 'Max', 100, 'Min', 0);
% 添加显示/隐藏日志按钮
uicontrol('Style', 'pushbutton', 'String', '显示日志',...
'Position', [1020, 920, 100, 30], 'Callback', @toggle_log, ...
'FontSize', 9, 'BackgroundColor', [0.9, 0.95, 1]);
% === 回调函数 ===
% 更新参数回调
function update_params(~, ~)
try
% 获取新参数值
new_win_len = str2double(get(win_len_edit, 'String'));
overlap_percent = str2double(get(overlap_edit, 'String'));
new_nfft = str2double(get(nfft_edit, 'String'));
lifter_order = str2double(get(lifter_edit, 'String'));
phase_iter = str2double(get(iter_edit, 'String'));
% 验证参数
if isnan(new_win_len) || new_win_len <= 0 || mod(new_win_len, 1) ~= 0
error('窗长必须是正整数');
end
if isnan(overlap_percent) || overlap_percent < 0 || overlap_percent > 100
error('重叠率必须是0-100之间的数字');
end
if isnan(new_nfft) || new_nfft <= 0 || mod(new_nfft, 1) ~= 0
error('FFT点数必须是正整数');
end
if isnan(lifter_order) || lifter_order <= 0 || mod(lifter_order, 1) ~= 0
error('包络阶数必须是正整数');
end
if isnan(phase_iter) || phase_iter <= 0 || mod(phase_iter, 1) ~= 0
error('相位迭代次数必须是正整数');
end
% 更新参数
stft_params.win_len = new_win_len;
stft_params.overlap = round(overlap_percent/100 * new_win_len);
stft_params.nfft = new_nfft;
stft_params.window = hamming(new_win_len, 'periodic');
stft_params.lifter_order = lifter_order;
stft_params.phase_iter = phase_iter;
stft_params.hop_size = stft_params.win_len - stft_params.overlap;
stft_params.win_synthesis = stft_params.window / sum(stft_params.window.^2) * stft_params.hop_size;
update_diag(sprintf('参数更新: 窗长=%d, 重叠=%d(%.0f%%), FFT=%d', ...
new_win_len, stft_params.overlap, overlap_percent, new_nfft));
catch e
errordlg(['参数错误: ', e.message], '输入错误');
update_diag(['参数错误: ', e.message], true);
end
end
% 更新诊断信息
function update_diag(msg, force)
if nargin < 2, force = false; end
if ~conversion_complete || force
current = get(diag_text, 'String');
new_msg = sprintf('[%s] %s', datestr(now, 'HH:MM:SS'), msg);
set(diag_text, 'String', [current; {new_msg}]);
set(diag_text, 'Value', length(get(diag_text, 'String')));
end
end
% 切换日志显示
function toggle_log(~, ~)
if strcmp(get(diag_panel, 'Visible'), 'on')
set(diag_panel, 'Visible', 'off');
set(tabgp, 'Position', [0.02, 0.02, 0.96, 0.35]);
else
set(diag_panel, 'Visible', 'on');
set(tabgp, 'Position', [0.02, 0.38, 0.96, 0.35]);
end
end
% 关闭GUI回调
function close_gui(~, ~)
if processing
choice = questdlg('处理正在进行中,确定要关闭吗?', '确认关闭', '是', '否', '否');
if strcmp(choice, '否')
return;
end
end
delete(fig);
end
% 导入源音频
function load_source(~, ~)
if processing, return; end
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg', '音频文件 (*.wav,*.mp3,*.ogg)'});
if isequal(file, 0), return; end
try
[audio, fs_in] = audioread(fullfile(path, file));
update_diag(['加载源音频: ', file, ' (', num2str(fs_in), 'Hz)']);
set(status_text, 'String', ['正在处理源音频: ', file]);
drawnow;
% 转换为单声道并归一化
if size(audio, 2) > 1
source_audio = mean(audio, 2);
update_diag('转换为单声道');
else
source_audio = audio;
end
source_audio = source_audio / max(abs(source_audio));
update_diag('归一化完成');
% 更新采样率参数
stft_params.fs = fs;
% 采样率处理
if fs == 0
fs = fs_in;
elseif fs ~= fs_in
update_diag(['重采样: ', num2str(fs_in), 'Hz -> ', num2str(fs), 'Hz']);
source_audio = resample(source_audio, fs, fs_in);
end
% 显示波形和频谱
plot(ax3, (0:length(source_audio)-1)/fs, source_audio);
title(ax3, ['源音频波形: ', file]);
xlabel(ax3, '时间 (s)'); ylabel(ax3, '幅度');
grid(ax3, 'on');
% 显示频谱
show_spectrum(ax4, source_audio, fs, stft_params, '源音频频谱');
set(status_text, 'String', ['已加载源音频: ', file, ' (', num2str(fs/1000), 'kHz)']);
update_diag(['源音频长度: ', num2str(length(source_audio)/fs), '秒']);
% 重置转换完成标志
conversion_complete = false;
catch e
errordlg(['加载源音频失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
end
end
% 导入目标音频
function load_target(~, ~)
if processing, return; end
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg', '音频文件 (*.wav,*.mp3,*.ogg)'});
if isequal(file, 0), return; end
try
[audio, fs_in] = audioread(fullfile(path, file));
update_diag(['加载目标音频: ', file, ' (', num2str(fs_in), 'Hz)']);
set(status_text, 'String', ['正在处理目标音频: ', file]);
drawnow;
% 转换为单声道并归一化
if size(audio, 2) > 1
target_audio = mean(audio, 2);
update_diag('转换为单声道');
else
target_audio = audio;
end
target_audio = target_audio / max(abs(target_audio));
update_diag('归一化完成');
% 更新采样率参数
stft_params.fs = fs;
% 采样率处理
if fs == 0
fs = fs_in;
elseif fs ~= fs_in
update_diag(['重采样: ', num2str(fs_in), 'Hz -> ', num2str(fs), 'Hz']);
target_audio = resample(target_audio, fs, fs_in);
end
% 显示波形和频谱
plot(ax1, (0:length(target_audio)-1)/fs, target_audio);
title(ax1, ['目标音频波形: ', file]);
xlabel(ax1, '时间 (s)'); ylabel(ax1, '幅度');
grid(ax1, 'on');
% 显示频谱
show_spectrum(ax5, target_audio, fs, stft_params, '目标音频频谱');
set(status_text, 'String', ['已加载目标音频: ', file, ' (', num2str(fs/1000), 'kHz)']);
update_diag(['目标音频长度: ', num2str(length(target_audio)/fs), '秒']);
% 重置转换完成标志
conversion_complete = false;
catch e
errordlg(['加载目标音频失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
end
end
function transfer_timbre(~, ~)
if processing, return; end
if isempty(source_audio) || isempty(target_audio)
errordlg('请先导入源音频和目标音频!', '错误');
return;
end
% 设置处理状态
processing = true;
conversion_complete = false;
set(status_text, 'String', '开始音色转换...');
update_diag('=== 开始音色转换 ===');
drawnow;
% 统一音频长度(以目标音频长度为基准)
target_length = length(target_audio);
source_length = length(source_audio);
if source_length < target_length
% 源音频较短,重复填充
num_repeat = ceil(target_length / source_length);
extended_source = repmat(source_audio, num_repeat, 1);
source_audio_adj = extended_source(1:target_length);
update_diag('源音频已扩展以匹配目标长度');
elseif source_length > target_length
% 源音频较长,截断
source_audio_adj = source_audio(1:target_length);
update_diag('源音频已截断以匹配目标长度');
else
source_audio_adj = source_audio;
end
% 确保长度兼容
target_audio_adj = target_audio(1:min(target_length, length(source_audio_adj)));
source_audio_adj = source_audio_adj(1:min(target_length, length(source_audio_adj)));
try
% === 目标音频STFT ===
update_diag('对目标音频进行STFT...');
update_progress(0.1, '目标音频STFT');
[mag_target, phase_target] = optimized_stft(target_audio_adj, stft_params, @update_progress);
update_diag(sprintf('目标音频STFT完成: %d帧', size(mag_target,2)));
% === 源音频STFT ===
update_diag('对源音频进行STFT...');
update_progress(0.3, '源音频STFT');
[mag_source] = optimized_stft(source_audio_adj, stft_params, @update_progress);
update_diag(sprintf('源音频STFT完成: %d帧', size(mag_source,2)));
% 确保频谱矩阵大小相同
if size(mag_target, 2) ~= size(mag_source, 2)
min_frames = min(size(mag_target, 2), size(mag_source, 2));
mag_target = mag_target(:, 1:min_frames);
mag_source = mag_source(:, 1:min_frames);
phase_target = phase_target(:, 1:min_frames);
update_diag(sprintf('调整频谱帧数: %d帧', min_frames));
end
% === 改进的频谱转换算法 ===
update_diag('应用改进的音色转换算法...');
update_progress(0.65, '频谱转换');
% 1. 计算源音频的频谱包络
mag_source_env = spectral_envelope(mag_source, stft_params.lifter_order, stft_params.nfft);
% 2. 计算目标音频的频谱包络
mag_target_env = spectral_envelope(mag_target, stft_params.lifter_order, stft_params.nfft);
% 3. 计算源音频的频谱细节(改进方法)
mag_source_detail = spectral_detail(mag_source, mag_source_env);
% 4. 应用转换:目标包络 + 源细节
mag_new = mag_target_env .* mag_source_detail;
% 5. 频谱整形(增强音色特征)
mag_new = spectral_shaping(mag_new, mag_source_env, mag_target_env);
% 6. 相位处理(直接使用目标相位)
phase_new = phase_target;
update_diag('使用目标音频相位');
% === 重建音频 ===
update_diag('重建音频(ISTFT)...');
update_progress(0.90, 'ISTFT重建');
converted_audio = optimized_istft(mag_new, phase_new, stft_params, @update_progress);
converted_audio = converted_audio / max(abs(converted_audio)); % 归一化
% 确保长度匹配
if length(converted_audio) > target_length
converted_audio = converted_audio(1:target_length);
elseif length(converted_audio) < target_length
converted_audio = [converted_audio; zeros(target_length - length(converted_audio), 1)];
end
% 显示结果
plot(ax2, (0:length(converted_audio)-1)/fs, converted_audio);
title(ax2, '转换后音频波形');
xlabel(ax2, '时间 (s)'); ylabel(ax2, '幅度');
grid(ax2, 'on');
% 显示转换后频谱
show_spectrum(ax6, converted_audio, fs, stft_params, '转换后频谱');
% 更新状态
update_progress(1.0, '转换完成');
set(status_text, 'String', '音色转换完成!');
update_diag('音色转换成功!', true);
% 设置完成标志
conversion_complete = true;
% 清理大内存变量
clear mag_target mag_source mag_new;
catch e
errordlg(['音色转换失败: ', e.message], '错误');
update_diag(['错误: ', e.message], true);
set(progress_bar, 'FaceColor', [1, 0.3, 0.3]);
set(progress_text, 'String', '处理失败');
end
% 重置处理状态
processing = false;
end
function phase = phase_reconstruction(mag, phase_init, params)
% 相位重建函数 - 使用参数指定的迭代次数
% 输入:
% mag - 目标幅度谱 (单边谱)
% phase_init - 初始相位谱 (单边谱)
% params - 参数结构体 (包含 phase_iter 等参数)
% 输出:
% phase - 重建后的相位谱
% === 参数提取 ===
nfft = params.nfft;
griffin_lim_iters = params.phase_iter; % 使用参数中的迭代次数
[num_bins, num_frames] = size(mag);
% === 初始化 ===
current_phase = phase_init;
% === Griffin-Lim 迭代相位重建 ===
for iter = 1:griffin_lim_iters
% 1. 创建复数频谱 (单边转双边)
S_complex = create_full_spectrum(mag .* exp(1i*current_phase), nfft);
% 2. ISTFT重建时域信号
x_recon = optimized_istft(mag, current_phase, params, []);
% 3. 对重建信号进行STFT
[~, phase_new] = optimized_stft(x_recon, params, []);
% 4. 更新相位
current_phase = phase_new;
end
phase = current_phase;
% === 辅助函数: 创建完整频谱 ===
function S_full = create_full_spectrum(S_half, nfft)
% 从单边谱创建双边谱
num_bins = size(S_half, 1);
S_full = zeros(nfft, size(S_half, 2));
if rem(nfft, 2) % 奇数点FFT
S_full(1:num_bins, :) = S_half;
S_full(num_bins+1:end, :) = conj(S_half(end:-1:2, :));
else % 偶数点FFT
S_full(1:num_bins, :) = S_half;
S_full(num_bins+1:end, :) = conj(S_half(end-1:-1:2, :));
end
end
end
% 替换原来的 spectral_envelope 函数
function env = spectral_envelope(mag, lifter_order, nfft)
% 使用零相位巴特沃斯滤波器提取包络
[num_bins, num_frames] = size(mag);
% 设计低通滤波器
order = 8; % 滤波器阶数
cutoff = 0.05; % 截止频率 (归一化)
[b, a] = butter(order, cutoff, 'low');
env = zeros(size(mag));
for i = 1:num_frames
% 对数幅度谱
log_mag = log(mag(:, i) + eps);
% 零相位滤波
env_frame = filtfilt(b, a, log_mag);
% 指数变换
env(:, i) = exp(env_frame);
end
% 平滑处理
env = movmean(env, 3, 2);
end
% 进度更新函数
function update_progress(progress, message)
if nargin >= 1
set(progress_bar, 'XData', [0, progress, progress, 0]);
end
if nargin >= 2
set(progress_text, 'String', message);
set(status_text, 'String', message);
end
if nargin == 1
set(progress_text, 'String', sprintf('%.0f%%', progress*100));
end
% 强制刷新界面
drawnow limitrate;
end
function play_audio(audio, fs)
% === 持久化玩家对象 ===
persistent player;
% === 增强空值检查 ===
if isempty(audio)
errordlg('音频数据为空!', '播放错误');
update_diag('播放失败: 音频数据为空', true);
return;
end
% === 验证采样率 ===
if isempty(fs) || ~isscalar(fs) || fs <= 0
fs = 44100; % 默认采样率
update_diag(['警告: 使用默认采样率 ', num2str(fs), ' Hz'], false);
end
% === 停止当前播放 ===
try
if ~isempty(player) && isplaying(player)
stop(player);
delete(player);
player = [];
end
catch
player = [];
end
% === 创建新播放器 ===
try
% 确保音频数据格式正确
if ~isvector(audio)
audio = audio(:); % 强制转换为列向量
end
player = audioplayer(audio, fs);
play(player);
% === 更新状态 ===
duration = length(audio)/fs;
set(status_text, 'String', sprintf('正在播放音频 (%.1f秒)', duration));
update_diag(sprintf('播放音频: %.2f秒 (采样率: %d Hz)', duration, fs), true);
catch e
% === 增强错误处理 ===
err_msg = sprintf('播放失败: %s\n音频尺寸: %d×%d', e.message, size(audio,1), size(audio,2));
errordlg(err_msg, '播放错误');
update_diag(['播放错误: ', err_msg], true);
% 清理无效玩家
if exist('player', 'var') && ~isempty(player)
try
stop(player);
delete(player);
catch
end
player = [];
end
end
end
% 保存音频函数
function save_audio(~, ~)
if processing
errordlg('处理正在进行中,请稍后保存', '错误');
return;
end
if isempty(converted_audio)
errordlg('没有转换后的音频可保存!', '错误');
return;
end
[file, path] = uiputfile('*.wav', '保存转换音频');
if isequal(file, 0), return; end
set(status_text, 'String', '正在保存音频...');
update_diag(['开始保存: ', file], true);
try
% 直接保存音频
filename = fullfile(path, file);
audiowrite(filename, converted_audio, fs);
set(status_text, 'String', ['已保存: ', file]);
update_diag(['音频已保存: ', filename], true);
catch e
errordlg(['保存失败: ', e.message], '极错误');
update_diag(['保存错误: ', e.message], true);
end
end
function show_spectrum(ax, audio, fs, params, title_str)
try
% 检查输入音频
if isempty(audio) || length(audio) < params.win_len
error('无效音频数据: 长度=%d, 需要≥%d', length(audio), params.win_len);
end
% 计算STFT
[~, ~, f, t] = optimized_stft(audio, params, []);
% 直接使用optimized_stft的维度验证
[mag, ~, f, t] = optimized_stft(audio, params, []);
spec_data = 10*log10(abs(mag) + eps);
% 绘图
cla(ax);
imagesc(ax, t, f, spec_data);
% 坐标轴设置
set(ax, 'YDir', 'normal');
axis(ax, 'tight');
ylim(ax, [0, fs/2]); % 限制到Nyquist频率
title(ax, [title_str, sprintf(' (%.1f秒)', length(audio)/fs)]);
xlabel(ax, '时间 (s)');
ylabel(ax, '频率 (Hz)');
colorbar(ax);
colormap(ax, 'jet');
catch e
% 错误处理
cla(ax);
err_msg = sprintf('频谱错误: %s\n音频尺寸: %dx%d', e.message, size(audio,1), size(audio,2));
text(ax, 0.5, 0.5, err_msg, ...
'HorizontalAlignment', 'center', ...
'Color', 'red', ...
'FontSize', 9);
title(ax, [title_str, ' (错误)']);
end
end
end
function detail = spectral_detail(mag, env)
% 带限细节提取
alpha = 0.3; % 细节增强因子
beta = 0.1; % 平滑因子
% 计算基础细节
base_detail = mag ./ (env + 0.01 * max(env(:)));
% 应用非线性变换增强特征
detail = tanh(alpha * base_detail) / tanh(alpha);
% 频域平滑
for i = 1:size(detail, 2)
detail(:, i) = smoothdata(detail(:, i), 'gaussian', round(0.1*size(detail, 1)));
end
% 时域平滑
detail = movmean(detail, 3, 2);
end
function mag_out = spectral_shaping(mag, env_source, env_target)
% 频谱平衡处理
balance_factor = 0.7; % 源音色特征强度
% 计算频谱比例因子
ratio = (env_source ./ (env_target + eps)).^balance_factor;
% 限制比例范围
ratio = min(max(ratio, 0.5), 2.0);
% 应用比例因子
mag_out = mag .* ratio;
% 能量归一化
source_energy = sum(env_source(:).^2);
target_energy = sum(env_target(:).^2);
energy_ratio = sqrt(source_energy / (target_energy + eps));
mag_out = mag_out * energy_ratio;
end
%% === 核心音频处理函数 ===
function [mag, phase, f, t] = optimized_stft(x, params, progress_callback)
% 参数提取
win_len = params.win_len;
hop_size = params.hop_size;
nfft = params.nfft;
fs = params.fs;
% 输入验证
if isempty(x) || length(x) < win_len
error('无效输入: 信号长度(%d) < 窗长(%d)', length(x), win_len);
end
% 创建窗函数
win = hann(win_len, 'periodic');
% 计算帧数
num_frames = floor((length(x) - win_len) / hop_size) + 1;
% 初始化STFT矩阵
stft_matrix = zeros(nfft, num_frames);
% === 关键修复: 正确的时间向量计算 ===
% 每帧的中心时间点 (秒)
t = ((0:num_frames-1) * hop_size + win_len/2) / fs;
% 进行STFT
for i = 1:num_frames
start_idx = (i-1) * hop_size + 1;
end_idx = min(start_idx + win_len - 1, length(x));
segment = x(start_idx:end_idx);
% 零填充短于窗长的段
if length(segment) < win_len
segment = [segment; zeros(win_len - length(segment), 1)];
end
segment = segment .* win;
X = fft(segment, nfft);
stft_matrix(:, i) = X;
% 进度更新
if ~isempty(progress_callback)
progress = i / num_frames;
progress_callback(progress);
end
end
% 取单边频谱
num_freq_bins = floor(nfft/2) + 1;
stft_matrix = stft_matrix(1:num_freq_bins, :);
% 计算幅度和相位
mag = abs(stft_matrix);
phase = angle(stft_matrix);
% 频率向量 (Hz)
f = (0:num_freq_bins-1)' * (fs / nfft);
% === 维度验证 ===
assert(size(mag, 1) == length(f), ...
'频率维度不匹配: mag行数=%d, f长度=%d', size(mag,1), length(f));
assert(size(mag, 2) == length(t), ...
'时间维度不匹配: mag列数=%d, t长度=%d', size(mag,2), length(t));
end
function x_recon = optimized_istft(mag, phase, params, progress_callback)
% 优化的逆短时傅里叶变换(ISTFT)实现
% 输入:
% mag - 幅度谱 (单边谱)
% phase - 相位谱 (单边谱)
% params - 参数结构体
% progress_callback - 进度回调函数
% 输出:
% x_recon - 重建的时域信号
% === 输入验证增强 ===
if isempty(mag) || isempty(phase)
error('ISTFT输入为空');
end
% === 参数提取 ===
nfft = params.nfft;
win_len = params.win_len;
hop_size = win_len - params.overlap;
win_synth = params.win_synthesis;
[num_bins, num_frames] = size(mag);
% 计算信号总长度
total_samples = (num_frames - 1) * hop_size + win_len;
x_recon = zeros(total_samples, 1);
% 进度更新间隔
update_interval = max(1, floor(num_frames/10));
% === 重建复数频谱 ===
S_half = mag .* exp(1i * phase);
% === 创建双边谱 ===
S_full = zeros(nfft, num_frames);
if rem(nfft, 2) % 奇数点FFT
S_full(1:num_bins, :) = S_half;
S_full(num_bins+1:end, :) = conj(S_half(end:-1:2, :));
else % 偶数点FFT
S_full(1:num_bins, :) = S_half;
% 注意:Nyquist点处理
S_full(num_bins+1:end, :) = conj(S_half(end-1:-1:2, :));
end
% === 执行逆FFT和重叠相加 ===
for frame_idx = 1:num_frames
% 1. 逆FFT
frame = real(ifft(S_full(:, frame_idx), nfft));
% 2. 应用合成窗
frame_win = frame(1:win_len) .* win_synth;
% 3. 计算位置并叠加
start_idx = (frame_idx - 1) * hop_size + 1;
end_idx = start_idx + win_len - 1;
% 确保不越界
if end_idx > total_samples
end_idx = total_samples;
frame_win = frame_win(1:(end_idx - start_idx + 1));
end
% 重叠相加
x_recon(start_idx:end_idx) = x_recon(start_idx:end_idx) + frame_win;
% 4. 进度更新
if ~isempty(progress_callback) && mod(frame_idx, update_interval) == 0
progress_callback(frame_idx/num_frames * 0.2, ...
sprintf('ISTFT重建: %d/%d', frame_idx, num_frames));
end
end
% === 归一化处理 ===
% 计算重叠因子
overlap_factor = win_len / hop_size;
% 计算归一化窗口
norm_win = zeros(total_samples, 1);
for i = 1:num_frames
start_idx = (i - 1) * hop_size + 1;
end_idx = min(start_idx + win_len - 1, total_samples);
norm_win(start_idx:end_idx) = norm_win(start_idx:end_idx) + win_synth(1:(end_idx-start_idx+1)).^2;
end
% 避免除以零
norm_win(norm_win < eps) = eps;
% 应用归一化
x_recon = x_recon ./ norm_win;
end
最新发布