量化中max_bits重复计算之优化

本文介绍了一种在音频编码过程中通过避免重复计算max_bits来提高整体编码效率的方法。通过对原有代码逻辑进行调整,将计算过程移出循环,显著减少了计算时间。以41.5M PCM信号为例,优化前后编码时间从53s减少到51s。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

在量化的时候发现max_bits重复计算了4次。

 

提到外部②的位置,每个Frame只需要计算一次就可以了,不必重复计算。

 

 

max_bits = mean_bits / config.wave.channels; -----

/*  */

    for( gr=2; gr--; )

    {

        for( ch=config.wave.channels; ch--; )

        {

            cod_info = (gr_info *)&(side_info->gr[gr].ch[ch]);    

           

/* calculation of number of available bit( per granule )

 * 计算每个颗粒可用的bit, 量化成0的部分则不编码进去.

 * 在一个Frame中重复计算了4, 可以提到for外面去

 */

            //max_bits = mean_bits / config.wave.channels; ---- ① 

   

    . . . . . .

 

}

}

41.5M的PCM信号 原本消耗53s, 经优化消耗为51s,

%% PCM译码质量优化系统 clear; clc; close all; % 1. 生成测试信号 fs = 8000; % 采样率8kHz t = 0:1/fs:1; % 1秒时长 f = [500, 1400, 2500]; % 多频测试信号 signal = 0.5*sin(2*pi*f(1)*t) + 0.3*sin(2*pi*f(2)*t) + 0.2*sin(2*pi*f(3)*t); % 2. 改进的PCM编码函数 function pcm_code = enhanced_pcm_encode(x, n_bits) % 动态范围自适应调整 max_val = max(abs(x)) * 1.2; % 增加20%动态余量 x_norm = x / max_val; % 非均匀量化 (A-law压缩) A = 87.6; % A-law参数 compressed = sign(x_norm) .* (log(1 + A*abs(x_norm)) / log(1+A)); % 量化电平数 L = 2^n_bits; step = 2 / L; levels = -1 + step/2 : step : 1 - step/2; % 矢量量化 (提高效率) [~, idx] = min(abs(compressed' - levels), [], 2); pcm_code = de2bi(idx-1, n_bits, 'left-msb'); end % 3. 优化的PCM译码函数 function y = enhanced_pcm_decode(pcm_code, max_val, n_bits) % 解码为量化电平 idx = bi2de(pcm_code, 'left-msb') + 1; L = 2^n_bits; step = 2 / L; levels = -1 + step/2 : step : 1 - step/2; quantized = levels(idx)'; % A-law扩展 A = 87.6; expanded = sign(quantized) .* (exp(abs(quantized)*log(1+A)) - 1) / A; % 幅值恢复 y = expanded * max_val; % 优化重建滤波器 (FIR低通滤波器) fc = 3400; % 截止频率3400Hz order = 128; % 高阶滤波器 b = fir1(order, fc/(fs/2), 'low', kaiser(order+1, 5)); y = filter(b, 1, y); end % 4. 主程序流程 n_bits = 8; % 量化位数 max_val = max(abs(signal)) * 1.2; % 动态范围估计 % PCM编码 pcm_encoded = enhanced_pcm_encode(signal, n_bits); % PCM译码 decoded = enhanced_pcm_decode(pcm_encoded, max_val, n_bits); % 5. 性能评估 % 计算信噪比(SNR) noise = signal - decoded(1:length(signal)); signal_power = sum(signal.^2)/length(signal); noise_power = sum(noise.^2)/length(noise); snr = 10*log10(signal_power/noise_power); fprintf('优化后SNR: %.2f dB\n', snr); % 频谱分析 figure; subplot(2,1,1); psd(spectrum.periodogram, signal, 'Fs', fs); title('原始信号频谱'); subplot(2,1,2); psd(spectrum.periodogram, decoded, 'Fs', fs); title('译码信号频谱'); % 时域波形对比 figure; plot(t(1:200), signal(1:200), 'b', t(1:200), decoded(1:200), 'r--'); legend('原始信号', 'PCM译码'); title('时域波形对比'); xlabel('时间(s)'); ylabel('幅度'); 上述代码PCM译码恢复效果质量差,再不改变原有功能基础上进行优化
06-27
clear all; close all; clc; %% 基本参数设置 M = 100; % 码元数量 L = 100; % 每码元采样点数 fs = 2000; % 模拟信号采样频率 (Hz) Rb = 1000; % 数字信号码元速率 (bps) Ts_digital = 1/Rb; % 数字码元间隔 dt = Ts_digital/L; % 系统采样间隔 % 载波参数 fc = 2000; % 载波频率 2kHz % 信道参数 SNR_dB = 15; % 信噪比 (dB) %% 1. 生成两路模拟信号 % 计算模拟信号时间向量 TotalT_analog = M * Ts_digital; % 模拟信号总时长 t_analog = 0:1/fs:TotalT_analog-1/fs; % 模拟信号时间向量 % 第一路信号: 正弦波 + 噪声 f1 = 50; % 信号频率 50Hz signal1 = 0.8*sin(2*pi*f1*t_analog) % 第二路信号: 方波 f2 = 40; % 信号频率 20Hz signal2 = 0.5*square(2*pi*f2*t_analog, 40) % 多路信号波形 figure('Name', '多路信源波形', 'NumberTitle', 'off'); subplot(2,1,1); plot(t_analog, signal1, 'b', 'LineWidth', 1.5); title('第一路模拟信号 (正弦波)'); xlabel('时间 (s)'); ylabel('幅度'); grid on; axis tight; subplot(2,1,2); plot(t_analog, signal2, 'r', 'LineWidth', 1.5); title('第二路模拟信号 (方波)'); xlabel('时间 (s)'); ylabel('幅度'); grid on; axis tight; %% 2. PCM编码 (8位) % 采样模拟信号 sample_indices = round(linspace(1, length(t_analog), M)); sampled_signal1 = signal1(sample_indices); sampled_signal2 = signal2(sample_indices); % 归一化到0-255范围 (8位) normalized1 = floor((sampled_signal1 + 1) * 127.5); normalized2 = floor((sampled_signal2 + 1) * 127.5); % 确保值在0-255范围内 normalized1(normalized1 < 0) = 0; normalized1(normalized1 > 255) = 255; normalized2(normalized2 < 0) = 0; normalized2(normalized2 > 255) = 255; % 转换为8位二进制 pcm_signal1 = dec2bin(normalized1, 8); pcm_signal2 = dec2bin(normalized2, 8); % 将PCM编码转换为二进制流 binary_stream1 = []; binary_stream2 = []; for i = 1:size(pcm_signal1, 1) binary_stream1 = [binary_stream1, pcm_signal1(i,:) - '0']; end for i = 1:size(pcm_signal2, 1) binary_stream2 = [binary_stream2, pcm_signal2(i,:) - '0']; end % 显示PCM编码波形 figure('Name', 'PCM编码波形', 'NumberTitle', 'off'); subplot(2,1,1); plot_bits = min(100, length(binary_stream1)); % 限制绘图点数 stairs(1:plot_bits, binary_stream1(1:plot_bits), 'LineWidth', 1.5); title('第一路信号PCM编码'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; subplot(2,1,2); plot_bits = min(100, length(binary_stream2)); stairs(1:plot_bits, binary_stream2(1:plot_bits), 'LineWidth', 1.5); title('第二路信号PCM编码'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; %% 3. 信道编码 (汉明码 7,4) % 汉明码编码函数 hamming_encoded1 = hamming_encode(binary_stream1); hamming_encoded2 = hamming_encode(binary_stream2); % 显示信道编码波形 figure('Name', '信道编码波形', 'NumberTitle', 'off'); subplot(2,1,1); plot_bits = min(100, length(hamming_encoded1)); stairs(1:plot_bits, hamming_encoded1(1:plot_bits), 'LineWidth', 1.5); title('第一路信号汉明码编码'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; subplot(2,1,2); plot_bits = min(100, length(hamming_encoded2)); stairs(1:plot_bits, hamming_encoded2(1:plot_bits), 'LineWidth', 1.5); title('第二路信号汉明码编码'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; %% 4. 时分多路复用 % 交替两路信号 multiplexed_signal = []; min_length = min(length(hamming_encoded1), length(hamming_encoded2)); for i = 1:min_length multiplexed_signal = [multiplexed_signal, hamming_encoded1(i), hamming_encoded2(i)]; end % 显示复用信号 figure('Name', '信道传输波形', 'NumberTitle', 'off'); plot_bits = min(200, length(multiplexed_signal)); stairs(1:plot_bits, multiplexed_signal(1:plot_bits), 'LineWidth', 1.5); title('时分多路复用信号'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; %% 5. 创建数字系统时间向量 total_bits = length(multiplexed_signal); TotalT_digital = total_bits * Ts_digital; % 数字信号总时长 t_digital = 0:dt:TotalT_digital; % 数字系统时间向量 if length(t_digital) > total_bits * L t_digital = t_digital(1:total_bits*L); % 确保时间向量长度匹配 end %% 6. 2ASK调制 % 复制每个比特L次 baseband = repelem(multiplexed_signal, L); % 确保baseband长度与时间向量匹配 if length(baseband) > length(t_digital) baseband = baseband(1:length(t_digital)); elseif length(baseband) < length(t_digital) t_digital = t_digital(1:length(baseband)); end % 生成载波 carrier = cos(2*pi*fc*t_digital(1:length(baseband))); % 2ASK调制 ask_signal = baseband .* carrier; % 显示调制信号 figure('Name', '2ASK调制信号', 'NumberTitle', 'off'); subplot(2,1,1); plot(t_digital(1:min(2000, length(ask_signal))), ask_signal(1:min(2000, length(ask_signal))), 'b', 'LineWidth', 1); title('2ASK调制信号'); xlabel('时间 (s)'); ylabel('幅度'); grid on; subplot(2,1,2); zoom_samples = min(200, length(ask_signal)); % 只显示前200个点 plot(t_digital(1:zoom_samples), ask_signal(1:zoom_samples), 'b', 'LineWidth', 1); title('2ASK调制信号 (放大)'); xlabel('时间 (s)'); ylabel('幅度'); grid on; %% 7. 信道传输 (加入高斯白噪声) noisy_signal = awgn(ask_signal, SNR_dB, 'measured'); % 显示加噪信号 figure('Name', '信道加入噪声后波形', 'NumberTitle', 'off'); subplot(2,1,1); plot(t_digital(1:min(2000, length(noisy_signal))), noisy_signal(1:min(2000, length(noisy_signal))), 'b', 'LineWidth', 1); title('加入高斯白噪声后的信号'); xlabel('时间 (s)'); ylabel('幅度'); grid on; subplot(2,1,2); zoom_samples = min(200, length(noisy_signal)); plot(t_digital(1:zoom_samples), noisy_signal(1:zoom_samples), 'b', 'LineWidth', 1); title('加入高斯白噪声后的信号 (放大)'); xlabel('时间 (s)'); ylabel('幅度'); grid on; %% 8. 接收端处理 % 带通滤波器设计 bpFilt = designfilt('bandpassfir', 'FilterOrder', 100, ... 'CutoffFrequency1', fc-500, 'CutoffFrequency2', fc+500, ... 'SampleRate', 1/dt); % 应用带通滤波器 filtered_signal = filtfilt(bpFilt, noisy_signal); % 显示带通滤波后的信号 figure('Name', '接收端滤波器后波形', 'NumberTitle', 'off'); subplot(2,1,1); plot(t_digital(1:min(2000, length(filtered_signal))), filtered_signal(1:min(2000, length(filtered_signal))), 'b', 'LineWidth', 1); title('带通滤波后的信号'); xlabel('时间 (s)'); ylabel('幅度'); grid on; subplot(2,1,2); zoom_samples = min(200, length(filtered_signal)); plot(t_digital(1:zoom_samples), filtered_signal(1:zoom_samples), 'b', 'LineWidth', 1); title('带通滤波后的信号 (放大)'); xlabel('时间 (s)'); ylabel('幅度'); grid on; % 相干解调 demod_signal = filtered_signal .* carrier(1:length(filtered_signal)); % 低通滤波器设计 lpFilt = designfilt('lowpassfir', 'FilterOrder', 100, ... 'PassbandFrequency', Rb/2, 'StopbandFrequency', Rb, ... 'SampleRate', 1/dt); % 应用低通滤波器 demod_filtered = filtfilt(lpFilt, demod_signal); % 显示解调信号 figure('Name', '解调后波形', 'NumberTitle', 'off'); subplot(3,1,1); plot(t_digital(1:min(2000, length(demod_signal))), demod_signal(1:min(2000, length(demod_signal))), 'b', 'LineWidth', 1); title('相干解调后的信号'); xlabel('时间 (s)'); ylabel('幅度'); grid on; subplot(3,1,2); plot(t_digital(1:min(2000, length(demod_filtered))), demod_filtered(1:min(2000, length(demod_filtered))), 'r', 'LineWidth', 1.5); title('低通滤波后的信号'); xlabel('时间 (s)'); ylabel('幅度'); grid on; % 抽样判决 sampled_signal = demod_filtered(round(L/2):L:end); received_bits = sampled_signal > 0.5; % 截取与发送相同长度的比特流 if length(received_bits) > length(multiplexed_signal) received_bits = received_bits(1:length(multiplexed_signal)); else multiplexed_signal = multiplexed_signal(1:length(received_bits)); end % 显示抽样判决后的信号 subplot(3,1,3); plot_bits = min(200, length(received_bits)); stairs(1:plot_bits, received_bits(1:plot_bits), 'm', 'LineWidth', 1.5); title('抽样判决后的数字信号'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; %% 9. 分路 received_signal1 = received_bits(1:2:end); received_signal2 = received_bits(2:2:end); % 确保长度匹配 min_length = min(length(received_signal1), length(hamming_encoded1)); received_signal1 = received_signal1(1:min_length); hamming_encoded1 = hamming_encoded1(1:min_length); min_length = min(length(received_signal2), length(hamming_encoded2)); received_signal2 = received_signal2(1:min_length); hamming_encoded2 = hamming_encoded2(1:min_length); % 显示分路信号 figure('Name', '多路信源解调波形', 'NumberTitle', 'off'); subplot(2,1,1); plot_bits = min(100, length(received_signal1)); stairs(1:plot_bits, received_signal1(1:plot_bits), 'b', 'LineWidth', 1.5); title('第一路解调信号'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; subplot(2,1,2); plot_bits = min(100, length(received_signal2)); stairs(1:plot_bits, received_signal2(1:plot_bits), 'r', 'LineWidth', 1.5); title('第二路解调信号'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; %% 10. 信道译码 (汉明码 7,4) decoded_signal1 = hamming_decode(received_signal1); decoded_signal2 = hamming_decode(received_signal2); % 确保长度匹配 min_length = min(length(decoded_signal1), length(binary_stream1)); decoded_signal1 = decoded_signal1(1:min_length); binary_stream1 = binary_stream1(1:min_length); min_length = min(length(decoded_signal2), length(binary_stream2)); decoded_signal2 = decoded_signal2(1:min_length); binary_stream2 = binary_stream2(1:min_length); % 显示信道译码波形 figure('Name', '信道译码波形', 'NumberTitle', 'off'); subplot(2,1,1); plot_bits = min(100, length(decoded_signal1)); stairs(1:plot_bits, decoded_signal1(1:plot_bits), 'b', 'LineWidth', 1.5); title('第一路汉明码译码'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; subplot(2,1,2); plot_bits = min(100, length(decoded_signal2)); stairs(1:plot_bits, decoded_signal2(1:plot_bits), 'r', 'LineWidth', 1.5); title('第二路汉明码译码'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); grid on; %% 11. PCM译码 % 将二进制流转换为十进制 decoded_bytes1 = reshape(decoded_signal1(1:floor(length(decoded_signal1)/8)*8), 8, [])'; decoded_bytes2 = reshape(decoded_signal2(1:floor(length(decoded_signal2)/8)*8), 8, [])'; digital_values1 = bin2dec(char(decoded_bytes1 + '0')); digital_values2 = bin2dec(char(decoded_bytes2 + '0')); % 转换为模拟信号 recovered_signal1 = (digital_values1 / 127.5) - 1; recovered_signal2 = (digital_values2 / 127.5) - 1; % 插值恢复连续信号 recovery_time = (0:length(recovered_signal1)-1) * Ts_digital; continuous_time = t_analog; interp_signal1 = interp1(recovery_time, recovered_signal1, continuous_time, 'spline'); interp_signal2 = interp1(recovery_time, recovered_signal2, continuous_time, 'spline'); % 显示恢复信号 figure('Name', 'PCM译码波形', 'NumberTitle', 'off'); subplot(2,1,1); plot(continuous_time, interp_signal1, 'b', 'LineWidth', 1.5); hold on; stem(recovery_time, recovered_signal1, 'r', 'filled'); title('第一路信号PCM译码恢复'); xlabel('时间 (s)'); ylabel('幅度'); legend('恢复信号', '采样点'); grid on; axis tight; subplot(2,1,2); plot(continuous_time, interp_signal2, 'b', 'LineWidth', 1.5); hold on; stem(recovery_time, recovered_signal2, 'r', 'filled'); title('第二路信号PCM译码恢复'); xlabel('时间 (s)'); ylabel('幅度'); legend('恢复信号', '采样点'); grid on; axis tight; %% 12. 误码率分析 % 计算原始信号与接收信号的比特差异 original_bits = [multiplexed_signal]; if length(original_bits) > length(received_bits) original_bits = original_bits(1:length(received_bits)); else received_bits = received_bits(1:length(original_bits)); end % 计算误码率 bit_errors = sum(original_bits ~= received_bits); total_bits = length(original_bits); bit_error_rate = bit_errors / total_bits; % 计算信道编码前后的误码率 original_data = [binary_stream1, binary_stream2]; received_data = [decoded_signal1(1:length(binary_stream1)), decoded_signal2(1:length(binary_stream2))]; if length(original_data) > length(received_data) original_data = original_data(1:length(received_data)); else received_data = received_data(1:length(original_data)); end data_errors = sum(original_data ~= received_data); data_error_rate = data_errors / length(original_data); % 显示结果 fprintf('通信系统性能分析:\n'); fprintf('总传输比特数: %d\n', total_bits); fprintf('信道编码前误码数: %d\n', data_errors); fprintf('信道编码前误码率: %.4f\n', data_error_rate); fprintf('信道编码后误码数: %d\n', bit_errors); fprintf('信道编码后误码率: %.4f\n', bit_error_rate); fprintf('信道编码纠错能力: %.2f%%\n', (data_errors - bit_errors)/max(1,data_errors)*100); % 显示误码位置 figure('Name', '误码分析', 'NumberTitle', 'off'); subplot(2,1,1); plot_bits = min(200, length(original_bits)); stem(1:plot_bits, original_bits(1:plot_bits), 'b', 'filled'); hold on; error_positions = find(original_bits(1:plot_bits) ~= received_bits(1:plot_bits)); stem(error_positions, original_bits(error_positions), 'r', 'filled'); title('原始发送比特流 (红色为误码位置)'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); legend('正确比特', '错误比特'); grid on; subplot(2,1,2); stem(1:plot_bits, received_bits(1:plot_bits), 'g', 'filled'); hold on; stem(error_positions, received_bits(error_positions), 'r', 'filled'); title('接收比特流 (红色为误码位置)'); xlabel('比特位置'); ylabel('比特值'); ylim([-0.1 1.1]); legend('接收比特', '错误比特'); grid on; %% 汉明码编码函数 function encoded = hamming_encode(data) % (7,4)汉明码编码 encoded = []; for i = 1:4:length(data) % 获取4位数据 start_idx = i; end_idx = min(i+3, length(data)); block = data(start_idx:end_idx); % 如果不足4位,补零 if length(block) < 4 block = [block, zeros(1, 4-length(block))]; end % 计算校验位 p1 = mod(block(1) + block(2) + block(3), 2); p2 = mod(block(1) + block(3) + block(4), 2); p3 = mod(block(2) + block(3) + block(4), 2); % 组合为7位码字 encoded = [encoded, block(1), block(2), block(3), block(4), p1, p2, p3]; end end %% 汉明码解码函数 function decoded = hamming_decode(data) % (7,4)汉明码解码 decoded = []; for i = 1:7:length(data) % 获取7位码字 start_idx = i; end_idx = min(i+6, length(data)); block = data(start_idx:end_idx); % 如果不足7位,补零 if length(block) < 7 block = [block, zeros(1, 7-length(block))]; end % 提取数据和校验位 d1 = block(1); d2 = block(2); d3 = block(3); d4 = block(4); p1 = block(5); p2 = block(6); p3 = block(7); % 计算校验子 s1 = mod(p1 + d1 + d2 + d3, 2); s2 = mod(p2 + d1 + d3 + d4, 2); s3 = mod(p3 + d2 + d3 + d4, 2); % 错误定位和纠正 error_pos = s1*1 + s2*2 + s3*4; if error_pos > 0 && error_pos <= 7 % 纠正错误 block(error_pos) = mod(block(error_pos) + 1, 2); end % 提取原始数据 decoded = [decoded, block(1:min(4, end_idx-start_idx+1))]; end end上述代码PCM译码恢复质量差,进行优化,给出优化后完整代码
06-27
class UniformAffineQuantizer(nn.Module): def __init__( self, n_bits: int = 8, symmetric: bool = False, per_channel_axes=[], metric="minmax", dynamic=False, dynamic_method="per_cluster", group_size=None, shape=None, lwc=False, disable_zero_point=False, ): """ support cluster quantize dynamic_method support per_token and per_cluster """ super().__init__() self.symmetric = symmetric self.disable_zero_point = disable_zero_point assert 2 <= n_bits <= 16, "bitwidth not supported" self.n_bits = n_bits if self.disable_zero_point: self.qmin = -(2 ** (n_bits - 1)) self.qmax = 2 ** (n_bits - 1) - 1 else: self.qmin = 0 self.qmax = 2 ** (n_bits) - 1 self.per_channel_axes = per_channel_axes self.metric = metric self.cluster_counts = None self.cluster_dim = None self.scale = None self.zero_point = None self.round_zero_point = None self.cached_xmin = None self.cached_xmax = None self.dynamic = dynamic self.dynamic_method = dynamic_method self.deficiency = 0 self.lwc = lwc init_value = 4. # inti value of learnable weight clipping if lwc: if group_size: dim1 = int(shape[0]*math.ceil(shape[1]/group_size)) self.deficiency = shape[-1]%group_size if self.deficiency > 0: self.deficiency = group_size - self.deficiency assert self.symmetric # support for mlc-llm symmetric quantization else: dim1 = shape[0] self.upbound_factor = nn.Parameter(torch.ones((dim1,1))*init_value) self.lowbound_factor = nn.Parameter(torch.ones((dim1,1))*init_value) self.sigmoid = nn.Sigmoid() self.enable = True self.group_size = group_size def change_n_bits(self, n_bits): self.n_bits = n_bits if self.disable_zero_point: self.qmin = -(2 ** (n_bits - 1)) self.qmax = 2 ** (n_bits - 1) - 1 else: self.qmin = 0 self.qmax = 2 ** (n_bits) - 1 def fake_quant(self, x, scale, round_zero_point): if self.deficiency > 0: pad_zeros = torch.zeros((x.shape[0],self.deficiency),dtype=x.dtype,device=x.device) x = torch.cat((x,pad_zeros),dim=1) if self.group_size: assert len(x.shape)==2, "only support linear layer now" dim1, dim2 = x.shape x = x.reshape(-1, self.group_size) x_int = round_ste(x / scale) if round_zero_point is not None: x_int = x_int.add(round_zero_point) x_int = x_int.clamp(self.qmin, self.qmax) x_dequant = x_int if round_zero_point is not None: x_dequant = x_dequant.sub(round_zero_point) x_dequant = x_dequant.mul(scale) if self.group_size: x_dequant = x_dequant.reshape(dim1, dim2) if self.deficiency > 0: x_dequant = x_dequant[:,:-self.deficiency] return x_dequant def forward(self, x: torch.Tensor): if self.n_bits >= 16 or not self.enable: return x if self.metric == "fix0to1": return x.mul_(2**self.n_bits-1).round_().div_(2**self.n_bits-1) if self.dynamic_method == "per_token" or self.dynamic_method == "per_channel": self.per_token_dynamic_calibration(x) else: raise NotImplementedError() x_dequant = self.fake_quant(x, self.scale, self.round_zero_point) return x_dequant def per_token_dynamic_calibration(self, x): if self.group_size: if self.deficiency == 0: x = x.reshape(-1,self.group_size) else: pad_zeros = torch.zeros((x.shape[0],self.deficiency),dtype=x.dtype,device=x.device) x = torch.cat((x,pad_zeros),dim=1) x = x.reshape(-1,self.group_size) reduce_shape = [-1] xmin = x.amin(reduce_shape, keepdim=True) xmax = x.amax(reduce_shape, keepdim=True) if self.lwc: xmax = self.sigmoid(self.upbound_factor)*xmax xmin = self.sigmoid(self.lowbound_factor)*xmin if self.symmetric: abs_max = torch.max(xmax.abs(),xmin.abs()) scale = abs_max / (2**(self.n_bits-1)-1) self.scale = scale.clamp(min=CLIPMIN, max=1e4) zero_point = (2**(self.n_bits-1)-1)*torch.ones_like(self.scale) else: range = xmax - xmin scale = range / (2**self.n_bits-1) self.scale = scale.clamp(min=CLIPMIN, max=1e4) zero_point = -(xmin) / (self.scale) if self.disable_zero_point: self.round_zero_point = None else: self.round_zero_point = zero_point.clamp(min=-1e4, max=1e4).round() def register_scales_and_zeros(self): self.register_buffer('scales', self.scale) self.register_buffer('zeros', self.round_zero_point) del self.scale del self.round_zero_point
07-24
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值