描述算法10673 - Play with Floor and Ceil

欧几里得算法与程序员幽默
本文介绍了一个基于欧几里得算法的C++实现示例,该算法用于求解最大公约数,并通过有趣的程序员笑话为紧张的技术氛围增添轻松元素。

文章结束给大家来个程序员笑话:[M]

    每日一道理
成功的花朵开放在啊勤劳的枝头,失败的苦果孕育在懒惰的温床之中。
描述:欧几里得扩大算法
#include <cstdio>
#include <cmath>
void gcd(int a,int b,int &d,int &x,int &y)
{
    if(!b) d=a,x=1,y=0;
    else
    {
        gcd(b,a%b,d,y,x);
        y-=x*(a/b);
    }
}
int main()
{
    //freopen("a.txt","r",stdin);
    int n,m,t,a,b,x,y;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%d%d",&n,&m);
        a=floor(n*1.0/m),b=ceil(n*1.0/m);
        gcd(a,b,m,x,y);
        printf("%d %d\n",x*(n/m),y*(n/m));
    }
    return 0;
}

文章结束给大家分享下程序员的一些笑话语录: 古鸽是一种搜索隐禽,在中国快绝迹了…初步的研究表明,古鸽的离去,很可能导致另一种长着熊爪,酷似古鸽,却又习性不同的猛禽类——犤毒鸟

--------------------------------- 原创文章 By
描述和算法
---------------------------------

function audio_pitch_correction_t3 % 创建主GUI界面 fig = uifigure('Name', '音频音准矫正系统', 'Position', [100 100 900 700]); % 创建音频选择区域 uilabel(fig, 'Position', [50 680 300 20], 'Text', '待矫正音频来源:', 'FontWeight', 'bold'); % 创建录音选项按钮组 source_btn_group = uibuttongroup(fig, 'Position', [50 630 300 40], 'Title', ''); uibutton(source_btn_group, 'Position', [10 10 130 30], 'Text', '导入音频文件', ... 'ButtonPushedFcn', @(btn,event) select_audio(fig, 'source')); uibutton(source_btn_group, 'Position', [160 10 130 30], 'Text', '录制音频', ... 'ButtonPushedFcn', @(btn,event) record_audio(fig)); % 创建参考音频选择按钮 uilabel(fig, 'Position', [400 680 300 20], 'Text', '参考音频来源:', 'FontWeight', 'bold'); uibutton(fig, 'Position', [400 630 150 30], 'Text', '导入参考音频', ... 'ButtonPushedFcn', @(btn,event) select_audio(fig, 'reference')); % 创建处理按钮 process_btn = uibutton(fig, 'Position', [600 630 150 30], ... 'Text', '开始矫正', 'Enable', 'off', ... 'ButtonPushedFcn', @(btn,event) process_audio(fig)); % 创建播放和保存按钮 uibutton(fig, 'Position', [50 580 150 30], 'Text', '播放原始音频', ... 'ButtonPushedFcn', @(btn,event) play_audio(fig, 'source')); uibutton(fig, 'Position', [250 580 150 30], 'Text', '播放矫正音频', ... 'ButtonPushedFcn', @(btn,event) play_audio(fig, 'corrected')); uibutton(fig, 'Position', [450 580 150 30], 'Text', '保存矫正音频', ... 'ButtonPushedFcn', @(btn,event) save_audio(fig)); % 创建录音状态显示 recording_label = uilabel(fig, 'Position', [650 580 200 30], ... 'Text', '准备录音', 'FontColor', [0 0.5 0]); % 创建波形显示区域 ax_source = uiaxes(fig, 'Position', [50 350 800 150]); title(ax_source, '待矫正音频波形'); ax_reference = uiaxes(fig, 'Position', [50 180 800 150]); title(ax_reference, '参考音频波形'); ax_corrected = uiaxes(fig, 'Position', [50 10 800 150]); title(ax_corrected, '矫正后音频波形'); % 存储数据 fig.UserData.source_audio = []; fig.UserData.reference_audio = []; fig.UserData.corrected_audio = []; fig.UserData.fs = 44100; % 默认采样率 fig.UserData.process_btn = process_btn; fig.UserData.axes = struct('source', ax_source, 'reference', ax_reference, 'corrected', ax_corrected); fig.UserData.recording_label = recording_label; fig.UserData.recorder = []; % 录音器对象 fig.UserData.timer = []; % 计时器对象 end function select_audio(fig, audio_type) [file, path] = uigetfile({'*.wav;*.mp3;*.ogg;*.flac;*.mat', ... '音频文件 (*.wav,*.mp3,*.ogg,*.flac,*.mat)'}); if isequal(file, 0), return; end filename = fullfile(path, file); [~, ~, ext] = fileparts(filename); if strcmpi(ext, '.mat') % 加载MAT文件 data = load(filename); % 检查必需字段 if isfield(data, 'corrected_audio') && isfield(data, 'f0_corrected') && ... isfield(data, 'time_source') && isfield(data, 'fs') % 存储数据 fig.UserData.corrected_audio = data.corrected_audio; fig.UserData.f0_corrected = data.f0_corrected; fig.UserData.time_source = data.time_source; fig.UserData.fs = data.fs; % 更新波形显示 ax = fig.UserData.axes.corrected; cla(ax); yyaxis(ax, 'left'); plot(ax, (1:length(data.corrected_audio))/data.fs, data.corrected_audio); ylabel(ax, '幅度'); yyaxis(ax, 'right'); plot(ax, data.time_source, data.f0_corrected, 'Color', [1 0.5 0], 'LineWidth', 2); ylabel(ax, '频率 (Hz)'); title(ax, '矫正后音频波形与音高'); grid(ax, 'on'); return; else errordlg('MAT文件缺少必需的音高数据字段!', '加载错误'); return; end end % 常规音频文件处理 [audio, fs] = audioread(filename); % 处理立体声:转换为单声道 if size(audio, 2) > 1 audio = mean(audio, 2); end % 截取前20秒 max_samples = min(20*fs, length(audio)); audio = audio(1:max_samples); % 存储数据 fig.UserData.([audio_type '_audio']) = audio; fig.UserData.fs = fs; % 更新波形显示 ax = fig.UserData.axes.(audio_type); cla(ax); plot(ax, (1:length(audio))/fs, audio); xlabel(ax, '时间 (s)'); ylabel(ax, '幅度'); % 如果是矫正音频,尝试读取元数据中的音高信息 if strcmp(audio_type, 'corrected') try info = audioinfo(filename); if isfield(info, 'Comment') && ~isempty(info.Comment) metadata = jsondecode(info.Comment); if isfield(metadata, 'f0_corrected') fig.UserData.f0_corrected = metadata.f0_corrected; fig.UserData.time_source = metadata.time_source; % 添加音高曲线 yyaxis(ax, 'left'); plot(ax, (1:length(audio))/fs, audio); ylabel(ax, '幅度'); yyaxis(ax, 'right'); plot(ax, metadata.time_source, metadata.f0_corrected, 'r', 'LineWidth', 1.5); ylabel(ax, '频率 (Hz)'); title(ax, '矫正后音频波形与音高'); grid(ax, 'on'); end end catch % 忽略元数据读取错误 end end % 启用处理按钮 if ~isempty(fig.UserData.source_audio) && ~isempty(fig.UserData.reference_audio) fig.UserData.process_btn.Enable = 'on'; end end function record_audio(fig) % 创建录音界面 record_fig = uifigure('Name', '音频录制', 'Position', [300 300 400 200]); % 录音时长设置 uilabel(record_fig, 'Position', [50 150 100 20], 'Text', '录音时长 (秒):'); duration_edit = uieditfield(record_fig, 'numeric', ... 'Position', [160 150 100 20], 'Value', 5, 'Limits', [1 30]); % 采样率设置 uilabel(record_fig, 'Position', [50 120 100 20], 'Text', '采样率:'); fs_dropdown = uidropdown(record_fig, ... 'Position', [160 120 100 20], ... 'Items', {'8000', '16000', '44100', '48000'}, ... 'Value', '44100'); % 控制按钮 record_btn = uibutton(record_fig, 'Position', [50 70 100 30], ... 'Text', '开始录音', ... 'ButtonPushedFcn', @(btn,event) start_recording(fig, duration_edit.Value, str2double(fs_dropdown.Value))); uibutton(record_fig, 'Position', [160 70 100 30], ... 'Text', '停止录音', ... 'ButtonPushedFcn', @(btn,event) stop_recording(fig)); uibutton(record_fig, 'Position', [270 70 100 30], ... 'Text', '关闭', ... 'ButtonPushedFcn', @(btn,event) close(record_fig)); end function start_recording(fig, duration, fs) % 更新状态 fig.UserData.recording_label.Text = '录音中...'; fig.UserData.recording_label.FontColor = [1 0 0]; drawnow; % 创建录音器对象 recorder = audiorecorder(fs, 16, 1); % 16-bit, 单声道 % 设置录音时长 fig.UserData.recorder = recorder; fig.UserData.fs = fs; % 开始录音 record(recorder, duration); % 创建计时器显示剩余时间 t = timer('ExecutionMode', 'fixedRate', 'Period', 1, ... 'TasksToExecute', duration, ... 'TimerFcn', @(t,~) update_recording_timer(fig, t, duration)); start(t); % 存储计时器 fig.UserData.timer = t; end function update_recording_timer(fig, t, total_duration) elapsed = t.TasksExecuted; remaining = total_duration - elapsed; fig.UserData.recording_label.Text = sprintf('录音中: %d秒', remaining); % 录音结束时自动停止 if remaining <= 0 stop_recording(fig); end end function stop_recording(fig) if ~isempty(fig.UserData.recorder) && isrecording(fig.UserData.recorder) stop(fig.UserData.recorder); end % 停止计时器 if ~isempty(fig.UserData.timer) && isvalid(fig.UserData.timer) stop(fig.UserData.timer); delete(fig.UserData.timer); fig.UserData.timer = []; end % 获取录音数据 audio = getaudiodata(fig.UserData.recorder); fs = fig.UserData.fs; % 更新状态 fig.UserData.recording_label.Text = '录音完成!'; fig.UserData.recording_label.FontColor = [0 0.5 0]; % 存储为待矫正音频 fig.UserData.source_audio = audio; % 更新波形显示 ax = fig.UserData.axes.source; plot(ax, (1:length(audio))/fs, audio); title(ax, '录制音频波形'); xlabel(ax, '时间 (s)'); ylabel(ax, '幅度'); % 启用处理按钮 if ~isempty(fig.UserData.reference_audio) fig.UserData.process_btn.Enable = 'on'; end end function process_audio(fig) source = fig.UserData.source_audio; reference = fig.UserData.reference_audio; fs = fig.UserData.fs; % 确保主图窗存在 if ~isvalid(fig) errordlg('主窗口已关闭,无法处理音频!', '处理错误'); return; end % 创建处理进度对话框 h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音频对齐...', 'Indeterminate', 'on'); % 步骤1:音频对齐 try [aligned_source, aligned_ref] = improved_align_audio(source, reference, fs); catch ME close(h); errordlg(['音频对齐失败: ' ME.message], '处理错误'); return; end % 步骤2:基频提取 h.Message = '提取音高...'; try [f0_source, time_source] = extract_pitch(aligned_source, fs); [f0_ref, time_ref] = extract_pitch(aligned_ref, fs); catch ME close(h); errordlg(['音高提取失败: ' ME.message], '处理错误'); return; end % 步骤3:音调矫正 h.Message = '矫正音调...'; try [corrected, f0_corrected] = correct_pitch(fig, aligned_source, fs, f0_source, f0_ref, time_source, time_ref); catch ME close(h); errordlg(['音高校正失败: ' ME.message], '处理错误'); return; end % 关闭进度对话框 close(h); % === 关键修复 1: 存储矫正结果 === fig.UserData.corrected_audio = corrected; % === 关键修复 2: 更新播放按钮状态 === play_btn = findobj(fig, 'Text', '播放矫正音频'); if ~isempty(play_btn) play_btn.Enable = 'on'; end % 保存结果并更新显示 % 更新原始音频波形图(添加音高曲线) ax_src = fig.UserData.axes.source; cla(ax_src); yyaxis(ax_src, 'left'); plot(ax_src, (1:length(aligned_source))/fs, aligned_source, 'b'); ylabel(ax_src, '幅度'); yyaxis(ax_src, 'right'); plot(ax_src, time_source, f0_source, 'r', 'LineWidth', 1.5); ylabel(ax_src, '频率 (Hz)'); title(ax_src, '原始音频波形与音高'); grid(ax_src, 'on'); % 更新参考音频波形图(添加音高曲线) ax_ref = fig.UserData.axes.reference; cla(ax_ref); yyaxis(ax_ref, 'left'); plot(ax_ref, (1:length(aligned_ref))/fs, aligned_ref, 'g'); ylabel(ax_ref, '幅度'); yyaxis(ax_ref, 'right'); plot(ax_ref, time_ref, f0_ref, 'm', 'LineWidth', 1.5); ylabel(ax_ref, '频率 (Hz)'); title(ax_ref, '参考音频波形与音高'); grid(ax_ref, 'on'); % 更新矫正后音频波形图(添加音高曲线) ax_corr = fig.UserData.axes.corrected; cla(ax_corr); yyaxis(ax_corr, 'left'); plot(ax_corr, (1:length(corrected))/fs, corrected, 'Color', [0.5 0 0.5]); ylabel(ax_corr, '幅度'); yyaxis(ax_corr, 'right'); plot(ax_corr, time_source, f0_corrected, 'Color', [1 0.5 0], 'LineWidth', 2); ylabel(ax_corr, '频率 (Hz)'); title(ax_corr, '矫正后音频波形与音高'); grid(ax_corr, 'on'); % 存储所有关键数据 fig.UserData.corrected_audio = corrected; fig.UserData.f0_corrected = f0_corrected; fig.UserData.time_source = time_source; fig.UserData.original_fs = fs; % 存储原始采样率 % 绘制综合音高对比图 % 修改后的调用:添加音频波形参数 plot_pitch_comparison(time_source, f0_source, time_ref, f0_ref, f0_corrected,... aligned_source, aligned_ref, corrected, fs); fprintf('原始音高平均: %.1f Hz\n', mean(f0_source(f0_source>0))); fprintf('参考音高平均: %.1f Hz\n', mean(f0_ref(f0_ref>0))); fprintf('矫正后音高平均: %.1f Hz\n', mean(f0_corrected(f0_corrected>0))); end function [aligned_src, aligned_ref] = improved_align_audio(src, ref, fs) % 改进的音频对齐方法:使用频谱互相关 win_size = round(0.1 * fs); % 100ms窗口 hop_size = round(0.05 * fs); % 50ms跳跃 % 计算源音频的频谱图 [S_src, ~, t_src] = spectrogram(src, win_size, win_size-hop_size, win_size, fs); % 计算参考音频的频谱图 [S_ref, ~, t_ref] = spectrogram(ref, win_size, win_size-hop_size, win_size, fs); % 计算互相关 n_frames = min(length(t_src), length(t_ref)); corr_vals = zeros(1, n_frames); for i = 1:n_frames spec_src = abs(S_src(:, i)); spec_ref = abs(S_ref(:, i)); corr_vals(i) = dot(spec_src, spec_ref) / (norm(spec_src) * norm(spec_ref)); end % 找到最大相关帧 [~, max_idx] = max(corr_vals); time_diff = t_src(max_idx) - t_ref(max_idx); sample_diff = round(time_diff * fs); % 对齐音频 if sample_diff > 0 aligned_src = src(1:end-sample_diff); aligned_ref = ref(sample_diff+1:end); else aligned_src = src(-sample_diff+1:end); aligned_ref = ref(1:end+sample_diff); end % 确保等长 min_len = min(length(aligned_src), length(aligned_ref)); aligned_src = aligned_src(1:min_len); aligned_ref = aligned_ref(1:min_len); end function mfcc = mfcc_feature(audio, fs, frame_size, hop_size) % 参数验证 if nargin < 4 hop_size = round(frame_size/2); % 默认50%重叠 end % 预处理:预加重 audio = filter([1 -0.97], 1, audio); % 分帧处理 frames = buffer(audio, frame_size, frame_size - hop_size, 'nodelay'); num_frames = size(frames, 2); % 加窗(汉明窗) window = hamming(frame_size); windowed_frames = frames .* repmat(window, 1, num_frames); % 计算功率谱 nfft = 2^nextpow2(frame_size); mag_frames = abs(fft(windowed_frames, nfft)); power_frames = (mag_frames(1:nfft/2+1, :)).^2; % 设计梅尔滤波器组 num_filters = 26; % 滤波器数量 mel_min = 0; % 最小Mel频率 mel_max = 2595 * log10(1 + (fs/2)/700); % 最大Mel频率 % 创建等间隔的Mel频率点 mel_points = linspace(mel_min, mel_max, num_filters + 2); % 将Mel频率转换为线性频率 hz_points = 700 * (10.^(mel_points/2595) - 1); % 转换为FFT bin索引 bin_indices = floor((nfft+1) * hz_points / fs); % 创建梅尔滤波器组 filter_bank = zeros(num_filters, nfft/2+1); for m = 2:num_filters+1 left = bin_indices(m-1); center = bin_indices(m); right = bin_indices(m+1); % 左侧斜坡 for k = left:center-1 filter_bank(m-1, k+1) = (k - left) / (center - left); end % 右侧斜坡 for k = center:right filter_bank(m-1, k+1) = (right - k) / (right - center); end end % 应用梅尔滤波器组 mel_spectrum = filter_bank * power_frames; % 取对数 log_mel = log(mel_spectrum + eps); % 计算DCT得到MFCC系数 mfcc = dct(log_mel); % 保留前13个系数(含能量系数) mfcc = mfcc(1:13, :); % 可选:添加能量特征 energy = log(sum(power_frames) + eps); mfcc(1, :) = energy; % 替换第0阶MFCC为对数能量 % 应用倒谱均值归一化 (CMN) mfcc = mfcc - mean(mfcc, 2); end function [f0, time] = extract_pitch(audio, fs) % 参数设置 frame_size = round(0.05 * fs); % 50ms帧 hop_size = round(0.025 * fs); % 25ms跳跃 n_frames = floor((length(audio) - frame_size) / hop_size) + 1; f0_min = 80; % 最低基频(Hz) f0_max = 1000; % 最高基频(Hz) tau_min = max(1, round(fs/f0_max)); % 确保至少为1 tau_max = max(tau_min+1, round(fs/f0_min)); % 确保大于tau_min f0 = zeros(1, n_frames); time = zeros(1, n_frames); % 预处理:带通滤波去除噪声 [b, a] = butter(4, [80, 1000]/(fs/2), 'bandpass'); audio = filtfilt(b, a, audio); for i = 1:n_frames start_idx = (i-1)*hop_size + 1; end_idx = min(start_idx + frame_size - 1, length(audio)); frame = audio(start_idx:end_idx); % === 修复1: 确保tau在有效范围内 === tau_min_valid = max(1, tau_min); tau_max_valid = min(tau_max, length(frame)-1); if tau_max_valid <= tau_min_valid f0(i) = 0; time(i) = (start_idx + frame_size/2) / fs; continue; end % 改进的YIN算法实现 diff = zeros(1, tau_max_valid); for tau = 1:tau_max_valid % 从1开始 for j = 1:(length(frame)-tau) diff(tau) = diff(tau) + (frame(j) - frame(j+tau))^2; end end % 累积均值归一化差分函数 (CMND) cmnd = zeros(1, tau_max_valid); cmnd(1) = 1; % 避免除以零 % === 修复2: 使用安全的索引范围 === for tau = 2:tau_max_valid % 确保分母不为零 if tau > 0 denominator = (1/tau) * sum(diff(1:tau)); if denominator > eps cmnd(tau) = diff(tau) / denominator; else cmnd(tau) = 1; % 安全值 end else cmnd(tau) = 1; % 安全值 end end % 寻找最小值 (考虑阈值) [min_val, min_idx] = min(cmnd(tau_min_valid:tau_max_valid)); tau_int = min_idx + tau_min_valid - 1; % 保持为整数索引 % === 关键修复:保持tau为整数,仅用插值修正基频 === if tau_int > 1 && tau_int < tau_max_valid cmnd_tau = cmnd(tau_int); cmnd_prev = cmnd(tau_int-1); cmnd_next = cmnd(tau_int+1); if min_val < 0.1 % 计算抛物线插值修正量 delta = 0.5 * (cmnd_next - cmnd_prev) / ... (2*cmnd_tau - cmnd_prev - cmnd_next); % 精确计算修正后的基频(tau_int保持整数) tau_true = tau_int + delta; f0(i) = fs / tau_true; else f0(i) = fs / tau_int; end else f0(i) = fs / tau_int; end time(i) = (start_idx + frame_size/2) / fs; end % 后处理:中值滤波和插值 valid_idx = f0 > f0_min & f0 < f0_max; f0(~valid_idx) = NaN; f0 = fillmissing(f0, 'movmedian', 5); % 5点移动中值 f0 = fillmissing(f0, 'linear'); % 线性插值填充 if any(isnan(cmnd)) || any(isinf(cmnd)) fprintf('帧 %d: 检测到无效CMND值!\n', i); fprintf('tau范围: %d 到 %d\n', tau_min_valid, tau_max_valid); fprintf('diff值: %s\n', mat2str(diff(1:min(10,end)))); end end function [corrected, f0_corrected] = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref) % 创建进度条 h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音高校正...'); % 固定参数(全部整数化) frame_len = round(0.05 * fs); % 50ms帧长 hop_size = round(0.025 * fs); % 25ms跳跃 n_frames = floor((length(audio)-frame_len)/hop_size) + 1; % 预分配输出(基于最大可能长度) max_ratio = 2^(24/12); % 最大半音变化 estimated_length = ceil(length(audio) * max_ratio * 2); corrected = zeros(estimated_length, 1, 'like', audio); f0_corrected = zeros(1, n_frames, 'like', audio); % 参考音高插值 valid_ref = f0_ref > 0 & ~isnan(f0_ref); ref_interp = @(t) interp1(time_ref(valid_ref), f0_ref(valid_ref), t, 'nearest', 0); % 完全整数化位置跟踪 current_position = 1; for i = 1:n_frames % === 整数化帧提取 === start_idx = max(1, (i-1)*hop_size + 1); end_idx = min(start_idx + frame_len - 1, length(audio)); frame = audio(start_idx:end_idx); actual_frame_len = end_idx - start_idx + 1; % 目标音高 t_frame = (start_idx + floor(frame_len/2)) / fs; target_f0 = ref_interp(t_frame); if f0_src(i) > 50 && target_f0 > 50 % === 整数化音高调整 === semitone_diff = 12 * log2(target_f0 / f0_src(i)); semitone_diff = max(-24, min(24, semitone_diff)); [num, den] = rat(2^(semitone_diff/12), 0.01); % 有理数近似 % 重采样 resampled_frame = resample(frame, num, den); % === 长度受控的相位声码器 === input_len = length(resampled_frame); expected_len = round(input_len * den/num); corrected_frame = controlled_phase_vocoder(resampled_frame, den, num, expected_len); f0_corrected(i) = target_f0; else corrected_frame = frame; f0_corrected(i) = f0_src(i); num = 1; den = 1; end % === 关键修复:确保长度一致 === corrected_length = length(corrected_frame); output_start = current_position; output_end = output_start + corrected_length - 1; % === 安全写入机制 === if output_end > length(corrected) % 整数块扩展 block_size = 1024; needed = output_end - length(corrected); additional = ceil(needed / block_size) * block_size; corrected = [corrected; zeros(additional, 1, 'like', corrected)]; end % === 动态窗函数生成 === win = hamming(corrected_length); win_frame = corrected_frame .* win; % === 安全赋值(长度验证) === target_range = output_start:output_end; if length(target_range) ~= length(win_frame) % 自动调整到匹配长度 min_len = min(length(target_range), length(win_frame)); corrected(output_start:output_start+min_len-1) = ... corrected(output_start:output_start+min_len-1) + win_frame(1:min_len); % 记录警告 warning('帧%d: 长度不匹配 (目标:%d, 实际:%d)', i, length(target_range), length(win_frame)); else % 完美匹配时直接赋值 corrected(output_start:output_end) = ... corrected(output_start:output_end) + win_frame; end % === 整数化位置更新 === frame_hop = round(hop_size * den/num); current_position = current_position + frame_hop; % 调试信息 fprintf('帧 %d: 位置=%d, 输入长度=%d, 输出长度=%d\n', ... i, current_position, actual_frame_len, corrected_length); % 更新进度条 h.Value = i/n_frames; h.Message = sprintf('处理进度: %d/%d 帧 (%.1f%%)', i, n_frames, i/n_frames*100); end % 裁剪到实际长度 last_sample = find(corrected ~= 0, 1, 'last'); if ~isempty(last_sample) corrected = corrected(1:last_sample); end close(h); end % 长度受控的相位声码器 function y = controlled_phase_vocoder(x, den, num, expected_len) % 基本参数 n = 1024; % FFT大小 hop_in = round(n * 0.25); % 输入跳跃(25%重叠) hop_out = round(hop_in * den/num); % 输出跳跃 % 初始化 y = zeros(expected_len, 1, 'like', x); theta = zeros(n,1); y_pos = 1; % 处理所有帧 for start = 1:hop_in:length(x)-n % 当前帧 frame = x(start:start+n-1); X = fft(frame .* hamming(n)); % 相位处理 mag = abs(X); phase_diff = angle(X) - theta; theta = angle(X); % 相位累积 delta_phi = 2*pi*hop_in*(0:n-1)'/n; phase_diff = phase_diff - delta_phi; phase_diff = wrapToPi(phase_diff); % 频率估计 omega = (phase_diff + delta_phi) / hop_in; % 相位传播 theta = theta + hop_out * omega; % 重建帧 y_frame = real(ifft(mag .* exp(1i*theta))); % 安全写入 end_pos = min(y_pos+n-1, expected_len); valid_len = end_pos - y_pos + 1; if valid_len > 0 y(y_pos:end_pos) = y(y_pos:end_pos) + y_frame(1:valid_len) .* hamming(valid_len); end % 更新位置 y_pos = y_pos + hop_out; end end % 安全帧提取函数 function [frame, valid] = safe_frame(x, start, len) if start < 1 || start+len-1 > length(x) frame = zeros(len, 1); if start < 1 valid_part = x(1:min(len+start-1, length(x))); frame(1-start+1:end) = valid_part; else valid_part = x(start:min(start+len-1, length(x))); frame(1:length(valid_part)) = valid_part; end valid = false; else frame = x(start:start+len-1); valid = true; end end % function [corrected, f0_corrected] = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref) % % 创建进度条 % h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音高校正...'); % % frame_len = round(0.05 * fs); % 50ms帧长 % hop_size = round(0.025 * fs); % 25ms跳跃 % n_frames = floor((length(audio)-frame_len)/hop_size) + 1; % corrected = zeros(size(audio)); % f0_corrected = zeros(1, n_frames); % % % 创建参考音高插值函数 % valid_ref = f0_ref > 0; % if any(valid_ref) % ref_interp = @(t) interp1(time_ref(valid_ref), f0_ref(valid_ref), t, 'linear', 'extrap'); % else % ref_interp = @(t) 0; % end % % for i = 1:n_frames % % 计算当前帧位置 % start_idx = (i-1)*hop_size + 1; % end_idx = start_idx + frame_len - 1; % frame = audio(start_idx:end_idx); % % % 查找当前帧对应的目标音高 % t_frame = (start_idx + frame_len/2) / fs; % target_f0 = ref_interp(t_frame); % % if f0_src(i) > 0 && target_f0 > 0 % % 使用对数比例(音乐音高是几何级数) % semitone_diff = 12 * log2(target_f0 / f0_src(i)); % % % 限制最大校正范围(±12半音) % semitone_diff = max(-12, min(12, semitone_diff)); % % % 转换为频率比例 % target_ratio = 2^(semitone_diff/12); % % % 使用相位声码器 % corrected_frame = phase_vocoder(frame, target_ratio, fs); % % f0_corrected(i) = target_f0; % else % corrected_frame = frame; % f0_corrected(i) = f0_src(i); % end % % % % 重叠相加 % frame_end_idx = start_idx + length(corrected_frame) - 1; % if frame_end_idx <= length(corrected) % corrected(start_idx:frame_end_idx) = ... % corrected(start_idx:frame_end_idx) + corrected_frame .* hamming(length(corrected_frame)); % end % % % 更新进度条 % h.Value = i/n_frames; % h.Message = sprintf('处理进度: %d/%d 帧 (%.1f%%)', i, n_frames, i/n_frames*100); % end % % % === 关键修复 3: 数据格式处理 === % corrected = real(corrected); % 确保实数 % max_amp = max(abs(corrected)); % if max_amp > 0 % corrected = corrected / max_amp; % else % corrected = zeros(size(corrected)); % 处理全零情况 % end % if ~isa(corrected, 'double') % corrected = double(corrected); % end % % % 归一化防止削波 % max_amp = max(abs(corrected)); % if max_amp > 0 % corrected = corrected / max_amp; % end % % close(h); % end function plot_pitch_comparison(time_src, f0_src, time_ref, f0_ref, f0_corrected, src_wave, ref_wave, corr_wave, fs) % 确保所有序列长度一致 min_length = min([length(time_src), length(time_ref), length(f0_corrected)]); time_src = time_src(1:min_length); f0_src = f0_src(1:min_length); time_ref = time_ref(1:min_length); f0_ref = f0_ref(1:min_length); f0_corrected = f0_corrected(1:min_length); % 创建综合音高对比图(包含波形和音高) pitch_fig = figure('Name', '音频波形与音高分析', 'Position', [100 100 900 800]); % 原始音频波形 + 音高 subplot(3,1,1); time_wave_src = (1:length(src_wave)) / fs; yyaxis left; plot(time_wave_src, src_wave, 'Color', [0.7 0.7 1], 'LineWidth', 0.5); ylabel('幅度'); ylim([-1.1 1.1]); % 固定幅度范围 yyaxis right; plot(time_src, f0_src, 'b', 'LineWidth', 1.5); hold on; plot(time_ref, f0_ref, 'r--', 'LineWidth', 1.5); hold off; title('原始音频波形与音高'); xlabel('时间 (s)'); ylabel('频率 (Hz)'); legend('原始波形', '原始音高', '参考音高', 'Location', 'best'); grid on; % 参考音频波形 + 音高 subplot(3,1,2); time_wave_ref = (1:length(ref_wave)) / fs; yyaxis left; plot(time_wave_ref, ref_wave, 'Color', [1 0.7 0.7], 'LineWidth', 0.5); ylabel('幅度'); ylim([-1.1 1.1]); % 固定幅度范围 yyaxis right; plot(time_ref, f0_ref, 'r', 'LineWidth', 1.5); title('参考音频波形与音高'); xlabel('时间 (s)'); ylabel('频率 (Hz)'); legend('参考波形', '参考音高', 'Location', 'best'); grid on; % 矫正后音频波形 + 音高 subplot(3,1,3); time_wave_corr = (1:length(corr_wave)) / fs; yyaxis left; plot(time_wave_corr, corr_wave, 'Color', [0.7 1 0.7], 'LineWidth', 0.5); ylabel('幅度'); ylim([-1.1 1.1]); % 固定幅度范围 yyaxis right; plot(time_src, f0_src, 'b:', 'LineWidth', 1); hold on; plot(time_ref, f0_ref, 'r--', 'LineWidth', 1); plot(time_src, f0_corrected, 'g', 'LineWidth', 2); hold off; title('矫正后音频波形与音高'); xlabel('时间 (s)'); ylabel('频率 (Hz)'); legend('矫正波形', '原始音高', '参考音高', '矫正音高', 'Location', 'best'); grid on; % 添加音高误差分析 valid_idx = (f0_src > 0) & (f0_ref > 0) & (f0_corrected > 0); if any(valid_idx) src_error = mean(abs(f0_src(valid_idx) - f0_ref(valid_idx))); corr_error = mean(abs(f0_corrected(valid_idx) - f0_ref(valid_idx))); annotation(pitch_fig, 'textbox', [0.15 0.05 0.7 0.05], ... 'String', sprintf('原始音高平均误差: %.2f Hz | 矫正后音高平均误差: %.2f Hz | 改进: %.1f%%', ... src_error, corr_error, (src_error - corr_error)/src_error*100), ... 'FitBoxToText', 'on', 'BackgroundColor', [0.9 0.9 0.9], ... 'FontSize', 12, 'HorizontalAlignment', 'center'); end end function play_audio(fig, audio_type) if ~isvalid(fig) errordlg('主窗口无效!', '播放错误'); return; end switch audio_type case 'source' audio = fig.UserData.source_audio; title_text = '播放原始音频'; if isempty(audio) errordlg('未找到原始音频数据!', '播放错误'); return; end case 'corrected' audio = fig.UserData.corrected_audio; title_text = '播放矫正音频'; if isempty(audio) errordlg('请先完成音高校正!', '播放错误'); return; end otherwise return; end fs = fig.UserData.fs; player = audioplayer(audio, fs); % 创建播放控制界面 play_fig = uifigure('Name', title_text, 'Position', [500 500 300 150]); % 播放进度条 ax = uiaxes(play_fig, 'Position', [50 100 200 20]); hold(ax, 'on'); prog_line = plot(ax, [0 0], [0 1], 'b', 'LineWidth', 2); % 垂直范围[0,1] hold(ax, 'off'); xlim(ax, [0 1]); ylim(ax, [0 1]); set(ax, 'XTick', [], 'YTick', []); % 播放时间显示 time_label = uilabel(play_fig, 'Position', [50 80 200 20], ... 'Text', '00:00 / 00:00', 'HorizontalAlignment', 'center'); % 控制按钮 uibutton(play_fig, 'Position', [50 30 60 30], 'Text', '播放', ... 'ButtonPushedFcn', @(btn,event) play(player)); uibutton(play_fig, 'Position', [120 30 60 30], 'Text', '暂停', ... 'ButtonPushedFcn', @(btn,event) pause(player)); uibutton(play_fig, 'Position', [190 30 60 30], 'Text', '停止', ... 'ButtonPushedFcn', @(btn,event) stop(player)); % 总时长计算 total_time = length(audio)/fs; mins = floor(total_time/60); secs = round(total_time - mins*60); total_str = sprintf('%02d:%02d', mins, secs); % 更新播放进度回调 player.TimerFcn = {@update_playback, play_fig, time_label, total_str, prog_line, length(audio)}; player.TimerPeriod = 0.1; % 更新频率(秒) player.StopFcn = @(src,event) stop_playback(src, event, play_fig); end function stop_playback(src, ~, fig) stop(src); if isvalid(fig) close(fig); end end function save_audio(fig) if ~isvalid(fig) || isempty(fig.UserData.corrected_audio) errordlg('无有效音频数据可保存!', '保存错误'); return; end % 获取所有相关数据 corrected_audio = fig.UserData.corrected_audio; f0_corrected = fig.UserData.f0_corrected; time_source = fig.UserData.time_source; fs = fig.UserData.original_fs; % 创建元数据结构 metadata = struct(); metadata.f0_corrected = f0_corrected; metadata.time_source = time_source; metadata.fs = fs; metadata.creation_date = datestr(now); metadata.pitch_correction_info = 'Generated by Audio Pitch Correction System'; % 提示用户保存 [file, path] = uiputfile({'*.wav', 'WAV文件 (*.wav)'; '*.mat', 'MATLAB数据文件 (*.mat)'}, ... '保存矫正音频和音高数据'); if isequal(file, 0), return; end filename = fullfile(path, file); [~, ~, ext] = fileparts(filename); if strcmpi(ext, '.wav') % 保存为WAV文件并嵌入元数据 audiowrite(filename, corrected_audio, fs, ... 'BitsPerSample', 24, ... 'Comment', jsonencode(metadata)); msgbox('音频和音高数据保存成功!', '完成'); elseif strcmpi(ext, '.mat') % 保存为MAT文件 save(filename, 'corrected_audio', 'f0_corrected', 'time_source', 'fs'); msgbox('完整数据保存成功!', '完成'); end end 这是完整的代码,但是生成的音调变化较大,重采样变形有点严重,如何修改,我希望重采样之后每一帧能和原来音频的大致内容对应上
06-16
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值