/**
* Copyright (C) 2024 TP-Link. All rights reserved.
*/
#ifdef NEON_NO_SUPPORT /* 9851X系列不支持neon */
#undef AUDIO_NEON /* 定义该宏,表示运算中开启NEON指令加速 */
#else /* 支持neon机型 */
#define AUDIO_NEON /* 定义该宏,表示运算中开启NEON指令加速 */
#ifdef PC_DEBUG
#include <NEON_2_SSE.h>
#else
#include <arm_neon.h>
#endif
#endif /* NEON_NO_SUPPORT */
#include "audio_ns.h"
#include "ams_common.h"
// Changes the aggressiveness of the noise suppression method.
// |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is
// aggressive (15dB).
// Returns 0 on success and -1 otherwise.
int set_policy_core(NS_CONTEXT *self, int mode, float gain)
{
// Allow for modes: 0, 1, 2, 3.
if (mode == 0)
{
self->overdrive = 1.f;
self->denoiseBound = 0.5f;
self->gainmap = 0;
}
else if (mode == 1)
{
// self->overdrive = 1.25f;
self->overdrive = 1.f;
self->denoiseBound = 0.25f;
self->gainmap = 1;
}
else if (mode == 2)
{
// self->overdrive = 1.25f;
self->overdrive = 1.1f;
self->denoiseBound = 0.125f;
self->gainmap = 1;
}
else if (mode == 3)
{
// self->overdrive = 1.3f;
self->overdrive = 1.15f;
self->denoiseBound = 0.06f;
self->gainmap = 1;
}
else if (mode == 4)
{
self->overdrive = 1.15f;
self->denoiseBound = 0.01f;
self->gainmap = 1;
}
else if (mode == 5)
{
self->overdrive = 1.20f;
self->denoiseBound = 0.005f;
self->gainmap = 1;
}
else
{
return -1;
}
self->aggrMode = mode;
return 0;
}
// Set Feature Extraction Parameters.
void set_feature_extraction_parameters(NS_CONTEXT *self)
{
// Bin size of histogram.
self->featureExtractionParams.binSizeLrt = 0.1f;
self->featureExtractionParams.binSizeSpecFlat = 0.05f;
self->featureExtractionParams.binSizeSpecDiff = 0.1f;
// Range of histogram over which LRT threshold is computed.
self->featureExtractionParams.rangeAvgHistLrt = 1.f;
// Scale parameters: multiply dominant peaks of the histograms by scale factor
// to obtain thresholds for prior model.
// For LRT and spectral difference.
self->featureExtractionParams.factor1ModelPars = 1.2f;
// For spectral_flatness: used when noise is flatter than speech.
self->featureExtractionParams.factor2ModelPars = 0.9f;
// Peak limit for spectral flatness (varies between 0 and 1).
self->featureExtractionParams.thresPosSpecFlat = 0.6f;
// Limit on spacing of two highest peaks in histogram: spacing determined by
// bin size.
self->featureExtractionParams.limitPeakSpacingSpecFlat = 2 * self->featureExtractionParams.binSizeSpecFlat;
self->featureExtractionParams.limitPeakSpacingSpecDiff = 2 * self->featureExtractionParams.binSizeSpecDiff;
// Limit on relevance of second peak.
self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f;
self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f;
// Fluctuation limit of LRT feature.
self->featureExtractionParams.thresFluctLrt = 0.05f;
// Limit on the max and min values for the feature thresholds.
self->featureExtractionParams.maxLrt = 1.f;
self->featureExtractionParams.minLrt = 0.2f;
self->featureExtractionParams.maxSpecFlat = 0.95f;
self->featureExtractionParams.minSpecFlat = 0.1f;
self->featureExtractionParams.maxSpecDiff = 1.f;
self->featureExtractionParams.minSpecDiff = 0.16f;
// Criteria of weight of histogram peak to accept/reject feature.
self->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness.
self->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference.
}
/********************************************* 噪声估计 ***********************************************/
/* 分位数噪声估计 */
void NoiseEstimation(NS_CONTEXT *self, float *lmagn, float *noise)
{
size_t i, s, offset = 0;
float delta;
if (self->updates < END_STARTUP_LONG)
{
self->updates++;
}
// Loop over simultaneous estimates.
for (s = 0; s < SIMULT; s++)
{
offset = s * self->magnLen;
float norm_counter_weight = 1.f / (self->counter[s] + 1.f);
float density_plus_weight = 1 * norm_counter_weight / (2.f * WIDTH);
// newquantest(...)
for (i = 0; i < self->magnLen; i++)
{
// Compute delta.
// 变化步长。density大于1时用精细化搜索。count数值小时步长大
if (self->density[offset + i] > 1.0)
{
delta = FACTOR / self->density[offset + i];
}
else
{
delta = FACTOR;
}
// Update log quantile estimate.
if (lmagn[i] > self->lquantile[offset + i])
{
self->lquantile[offset + i] += QUANTILE * delta * norm_counter_weight;
}
else
{
self->lquantile[offset + i] -= (1.f - QUANTILE) * delta * norm_counter_weight;
}
// Update density estimate.
if (fabsf(lmagn[i] - self->lquantile[offset + i]) < WIDTH)
{
self->density[offset + i] = (self->counter[s] + 1) * self->density[offset + i] * norm_counter_weight + density_plus_weight;
// 可简写为:
// self->density[offset + i] = self->density[offset + i] + density_plus_weight;
}
} // End loop over magnitude spectrum.
if (self->counter[s] >= END_STARTUP_LONG)
{
self->counter[s] = 0;
if (self->updates >= END_STARTUP_LONG)
{
for (i = 0; i < self->magnLen; i++)
{
self->quantile[i] = expf(self->lquantile[offset + i]);
}
}
}
self->counter[s]++;
} // End loop over simultaneous estimates.
// Sequentially update the noise during startup.
if (self->updates < END_STARTUP_LONG)
{
// Use the last "s" to get noise during startup that differ from zero.
for (i = 0; i < self->magnLen; i++)
{
self->quantile[i] = expf(self->lquantile[offset + i]);
}
memcpy(noise, self->quantile, self->magnLen * sizeof(*noise));
}
else
{
memcpy(noise, self->quantile, self->magnLen * sizeof(*noise));
}
}
void ComputeSnr(const NS_CONTEXT *self, const float *magn, const float *noise, float *snrLocPrior, float *logSnrLocPrior, float *snrLocPost)
{
size_t i;
for (i = 0; i < self->magnLen; i++)
{
// Previous post SNR.
// Previous estimate: based on previous frame with gain filter.
float previousEstimateStsa = (self->magnPrev[i] * self->smooth[i]) / (self->noisePrev[i] + EPSILON);
// Post SNR.
snrLocPost[i] = 0.f;
if (magn[i] > noise[i])
{
snrLocPost[i] = (magn[i] - noise[i]) / (noise[i] + EPSILON);
}
// DD estimate is sum of two terms: current estimate and previous estimate.
// Directed decision update of snrPrior.
snrLocPrior[i] = 2.0f * (DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i]);
logSnrLocPrior[i] = log1pf(snrLocPrior[i]);
} // End of loop over frequencies.
}
// Compute spectral flatness on input spectrum.
// |magnIn| is the magnitude spectrum.
// Spectral flatness is returned in self->featureData[0].
void ComputeSpectralFlatness(NS_CONTEXT *self, const float *magnIn, const float *logmagnIn)
{
size_t i;
size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures.
float avgSpectralFlatnessNum;
float avgSpectralFlatnessDen;
float spectralTmp;
// Compute spectral measures.
// For flatness.
avgSpectralFlatnessNum = 0;
avgSpectralFlatnessDen = self->sumMagn;
for (i = 0; i < shiftLP; i++)
{
avgSpectralFlatnessDen -= magnIn[i];
}
// Compute log of ratio of the geometric to arithmetic mean: check for log(0)
// case.
for (i = shiftLP; i < self->magnLen; i++)
{
if (magnIn[i] > 0.0)
{
avgSpectralFlatnessNum += logmagnIn[i];
}
else
{
self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0];
return;
}
}
// Normalize.
avgSpectralFlatnessDen = avgSpectralFlatnessDen * self->normMagnLen;
avgSpectralFlatnessNum = avgSpectralFlatnessNum * self->normMagnLen;
// Ratio and inverse log: check for case of log(0).
spectralTmp = expf(avgSpectralFlatnessNum) / avgSpectralFlatnessDen;
// Time-avg update of spectral flatness feature.
self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]);
// Done with flatness feature.
}
// Compute the difference measure between input spectrum and a template/learned
// noise spectrum.
// |magnIn| is the input spectrum.
// The reference/template spectrum is self->magnAvgPause[i].
// Returns (normalized) spectral difference in self->featureData[4].
void ComputeSpectralDifference(NS_CONTEXT *self, const float *magnIn)
{
// avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
// var(magnAvgPause)
size_t i;
float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
avgPause = 0;
avgMagn = self->sumMagn;
// Compute average quantities.
for (i = 0; i < self->magnLen; i++)
{
// Conservative smooth noise spectrum from pause frames.
avgPause += self->magnAvgPause[i];
}
avgPause *= self->normMagnLen;
avgMagn *= self->normMagnLen;
covMagnPause = 0;
varPause = 0;
varMagn = 0;
// Compute variance and covariance quantities.
for (i = 0; i < self->magnLen; i++)
{
const float avgPauseDiff = self->magnAvgPause[i] - avgPause;
const float avgMagnDiff = magnIn[i] - avgMagn;
covMagnPause += avgMagnDiff * avgPauseDiff;
varPause += avgPauseDiff * avgPauseDiff;
varMagn += avgMagnDiff * avgMagnDiff;
}
covMagnPause *= self->normMagnLen;
varPause *= self->normMagnLen;
varMagn *= self->normMagnLen;
// Update of average magnitude spectrum.
self->featureData[6] += self->signalEnergy;
avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + EPSILON);
// Normalize and compute time-avg update of difference feature.
avgDiffNormMagn = avgDiffNormMagn / (self->featureData[5] + EPSILON);
self->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]);
// 在白噪声段,这个值很低,就没超过0.12的。语音段,介于0.2 ~ 2都有
}
// Extract thresholds for feature parameters.
// Histograms are computed over some window size (given by
// self->modelUpdatePars[1]).
// Thresholds and weights are extracted every window.
// |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights.
// Threshold and weights are returned in: self->priorModelPars.
// flag=0: 仅记录 flag=1: 更新参数
// 这个函数的作用是统计各个feature的阈值
// 用featureData来更新hist再更新priorModelPars
void FeatureParameterExtraction(NS_CONTEXT *self, int flag)
{
int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
// 两个峰值
int maxPeak1, maxPeak2;
int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff;
float binMid, featureSum;
float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl;
// 3 features: LRT, flatness, difference.
// lrt_feature = self->featureData[3];
// flat_feature = self->featureData[0];
// diff_feature = self->featureData[4];
// Update histograms.
if (flag == 0)
{
// LRT
if ((self->featureData[3] < HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) && (self->featureData[3] >= 0.0))
{
i = (int)(self->featureData[3] / self->featureExtractionParams.binSizeLrt);
self->histLrt[i]++;
}
// Spectral flatness
if ((self->featureData[0] < HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) && (self->featureData[0] >= 0.0))
{
i = (int)(self->featureData[0] / self->featureExtractionParams.binSizeSpecFlat);
self->histSpecFlat[i]++;
}
// Spectral difference.
if ((self->featureData[4] < HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) && (self->featureData[4] >= 0.0))
{
i = (int)(self->featureData[4] / self->featureExtractionParams.binSizeSpecDiff);
self->histSpecDiff[i]++;
}
}
// Extract parameters for speech/noise probability.
if (flag == 1)
{
// LRT feature: compute the average over
// self->featureExtractionParams.rangeAvgHistLrt.
avgHistLrt = 0;
avgHistLrtCompl = 0;
avgSquareHistLrt = 0;
numHistLrt = 0;
for (i = 0; i < HIST_PAR_EST; i++)
{
binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt;
if (binMid <= self->featureExtractionParams.rangeAvgHistLrt)
{
avgHistLrt += self->histLrt[i] * binMid;
numHistLrt += self->histLrt[i];
}
avgSquareHistLrt += self->histLrt[i] * binMid * binMid;
avgHistLrtCompl += self->histLrt[i] * binMid;
}
if (numHistLrt > 0)
{
avgHistLrt = avgHistLrt / ((float)numHistLrt);
}
avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]);
avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]);
fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl;
// Get threshold for LRT feature.
if (fluctLrt < self->featureExtractionParams.thresFluctLrt)
{
// Very low fluctuation, so likely noise.
self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
}
else
{
self->priorModelPars[0] = self->featureExtractionParams.factor1ModelPars * avgHistLrt;
// Check if value is within min/max range.
if (self->priorModelPars[0] < self->featureExtractionParams.minLrt)
{
self->priorModelPars[0] = self->featureExtractionParams.minLrt;
}
if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt)
{
self->priorModelPars[0] = self->featureExtractionParams.maxLrt;
}
}
// Done with LRT feature.
// For spectral flatness and spectral difference: compute the main peaks of
// histogram.
maxPeak1 = 0;
maxPeak2 = 0;
posPeak1SpecFlat = 0;
posPeak2SpecFlat = 0;
weightPeak1SpecFlat = 0;
weightPeak2SpecFlat = 0;
// Peaks for flatness.
for (i = 0; i < HIST_PAR_EST; i++)
{
binMid = (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat;
if (self->histSpecFlat[i] > maxPeak1)
{
// Found new "first" peak.
maxPeak2 = maxPeak1;
weightPeak2SpecFlat = weightPeak1SpecFlat;
posPeak2SpecFlat = posPeak1SpecFlat;
maxPeak1 = self->histSpecFlat[i];
weightPeak1SpecFlat = self->histSpecFlat[i];
posPeak1SpecFlat = binMid;
}
else if (self->histSpecFlat[i] > maxPeak2)
{
// Found new "second" peak.
maxPeak2 = self->histSpecFlat[i];
weightPeak2SpecFlat = self->histSpecFlat[i];
posPeak2SpecFlat = binMid;
}
}
// Compute two peaks for spectral difference.
maxPeak1 = 0;
maxPeak2 = 0;
posPeak1SpecDiff = 0;
posPeak2SpecDiff = 0;
weightPeak1SpecDiff = 0;
weightPeak2SpecDiff = 0;
// Peaks for spectral difference.
for (i = 0; i < HIST_PAR_EST; i++)
{
binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff;
if (self->histSpecDiff[i] > maxPeak1)
{
// Found new "first" peak.
maxPeak2 = maxPeak1;
weightPeak2SpecDiff = weightPeak1SpecDiff;
posPeak2SpecDiff = posPeak1SpecDiff;
maxPeak1 = self->histSpecDiff[i];
weightPeak1SpecDiff = self->histSpecDiff[i];
posPeak1SpecDiff = binMid;
}
else if (self->histSpecDiff[i] > maxPeak2)
{
// Found new "second" peak.
maxPeak2 = self->histSpecDiff[i];
weightPeak2SpecDiff = self->histSpecDiff[i];
posPeak2SpecDiff = binMid;
}
}
// For spectrum flatness feature.
useFeatureSpecFlat = 1;
// Merge the two peaks if they are close.
if ((fabsf(posPeak2SpecFlat - posPeak1SpecFlat) < self->featureExtractionParams.limitPeakSpacingSpecFlat) && (weightPeak2SpecFlat > self->featureExtractionParams.limitPeakWeightsSpecFlat * weightPeak1SpecFlat))
{
weightPeak1SpecFlat += weightPeak2SpecFlat;
posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat);
}
// Reject if weight of peaks is not large enough, or peak value too small.
if (weightPeak1SpecFlat < self->featureExtractionParams.thresWeightSpecFlat || posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat)
{
useFeatureSpecFlat = 0;
}
// If selected, get the threshold.
if (useFeatureSpecFlat == 1)
{
// Compute the threshold.
self->priorModelPars[1] = self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
// Check if value is within min/max range.
if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat)
{
self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat;
}
if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat)
{
self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat;
}
}
// Done with flatness feature.
// For template feature.
useFeatureSpecDiff = 1;
// Merge the two peaks if they are close.
if ((fabsf(posPeak2SpecDiff - posPeak1SpecDiff) < self->featureExtractionParams.limitPeakSpacingSpecDiff) && (weightPeak2SpecDiff > self->featureExtractionParams.limitPeakWeightsSpecDiff * weightPeak1SpecDiff))
{
weightPeak1SpecDiff += weightPeak2SpecDiff;
posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff);
}
// Get the threshold value.
self->priorModelPars[3] = self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
// Reject if weight of peaks is not large enough.
if (weightPeak1SpecDiff < self->featureExtractionParams.thresWeightSpecDiff)
{
useFeatureSpecDiff = 0;
}
// Check if value is within min/max range.
if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff)
{
self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff;
}
if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff)
{
self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff;
}
// Done with spectral difference feature.
// Don't use template feature if fluctuation of LRT feature is very low:
// most likely just noise state.
if (fluctLrt < self->featureExtractionParams.thresFluctLrt)
{
useFeatureSpecDiff = 0;
}
// Select the weights between the features.
// self->priorModelPars[4] is weight for LRT: always selected.
// self->priorModelPars[5] is weight for spectral flatness.
// self->priorModelPars[6] is weight for spectral difference.
featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff);
self->priorModelPars[4] = 1.f / featureSum;
self->priorModelPars[5] = ((float)useFeatureSpecFlat) * self->priorModelPars[4];
self->priorModelPars[6] = ((float)useFeatureSpecDiff) * self->priorModelPars[4];
// Set hists to zero for next update.
if (self->modelUpdatePars[0] >= 1)
{
for (i = 0; i < HIST_PAR_EST; i++)
{
self->histLrt[i] = 0;
self->histSpecFlat[i] = 0;
self->histSpecDiff[i] = 0;
}
}
} // End of flag == 1.
}
// Update the noise features.
// Inputs:
// * |magn| is the signal magnitude spectrum estimate.
// * |updateParsFlag| is an update flag for parameters.
void FeatureUpdate(NS_CONTEXT *self, const float *magn, const float *lmagn, int updateParsFlag)
{
// Compute spectral flatness on input spectrum.
// 计算 self->featureData[0]
ComputeSpectralFlatness(self, magn, lmagn);
// Compute difference of input spectrum with learned/estimated noise spectrum.
// 计算 self->featureData[4], self->featureData[6]
ComputeSpectralDifference(self, magn);
// Compute histograms for parameter decisions (thresholds and weights for
// features).
// Parameters are extracted once every window time.
if (updateParsFlag >= 1)
{
// Counter update.
self->modelUpdatePars[3]--;
// Update histogram.
if (self->modelUpdatePars[3] > 0)
{
FeatureParameterExtraction(self, 0);
}
if (self->modelUpdatePars[3] == 0)
{
FeatureParameterExtraction(self, 1);
self->modelUpdatePars[3] = self->modelUpdatePars[1];
// If wish to update only once, set flag to zero.
if (updateParsFlag == 1)
{
self->modelUpdatePars[0] = 0;
}
else
{
// Update every window:
// Get normalization for spectral difference for next window estimate.
self->featureData[6] = self->featureData[6] / ((float)self->modelUpdatePars[1]);
self->featureData[5] = 0.5f * (self->featureData[6] + self->featureData[5]);
self->featureData[6] = 0.f;
}
}
}
}
// Compute speech/noise probability.
// Speech/noise probability is returned in |probSpeechFinal|.
// |magn| is the input magnitude spectrum.
// |noise| is the noise spectrum.
// |snrLocPrior| is the prior SNR for each frequency.
// |snrLocPost| is the post SNR for each frequency.
void SpeechNoiseProb(NS_CONTEXT *self, float *probSpeechFinal, const float *snrLocPrior, const float *logSnrLocPrior, const float *snrLocPost)
{
size_t i;
int sgnMap;
float invLrt, gainPrior, indPrior;
float logLrtTimeAvgKsum, besselTmp;
float indicator0, indicator1;
float weightIndPrior0, weightIndPrior1;
float threshPrior0, threshPrior1, threshPrior2;
float widthPrior, widthPrior0, widthPrior1, widthPrior2;
widthPrior0 = WIDTH_PR_MAP;
// Width for pause region: lower range, so increase width in tanh map.
widthPrior1 = 2.f * WIDTH_PR_MAP;
widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure.
// Threshold parameters for features.
threshPrior0 = self->priorModelPars[0];
threshPrior1 = self->priorModelPars[1];
threshPrior2 = self->priorModelPars[3];
// Sign for flatness feature.
sgnMap = (int)(self->priorModelPars[2]);
// Weight parameters for features.
weightIndPrior0 = self->priorModelPars[4];
weightIndPrior1 = self->priorModelPars[5];
// Compute feature based on average LR factor.
// This is the average over all frequencies of the smooth log LRT.
logLrtTimeAvgKsum = 0;
for (i = 0; i < self->magnLen; i++)
{
besselTmp = (snrLocPost[i] * snrLocPrior[i] + snrLocPrior[i]) / (snrLocPrior[i] + 1.f + EPSILON);
self->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - logSnrLocPrior[i] - self->logLrtTimeAvg[i]);
logLrtTimeAvgKsum += self->logLrtTimeAvg[i];
logLrtTimeAvgKsum += self->logLrtTimeAvg[i];
}
logLrtTimeAvgKsum = logLrtTimeAvgKsum * self->normMagnLen;
self->featureData[3] = logLrtTimeAvgKsum;
// Done with computation of LR factor.
// Compute the indicator functions.
// Average LRT feature.
widthPrior = widthPrior0;
// Use larger width in tanh map for pause regions.
if (logLrtTimeAvgKsum < threshPrior0)
{
widthPrior = widthPrior1;
}
// Compute indicator function: sigmoid map.
indicator0 = 0.5f * tanhf(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 0.5f;
// Spectral flatness feature.
widthPrior = widthPrior0;
// Use larger width in tanh map for pause regions.
if (sgnMap == 1 && (self->featureData[0] > threshPrior1))
{
widthPrior = widthPrior1;
}
if (sgnMap == -1 && (self->featureData[0] < threshPrior1))
{
widthPrior = widthPrior1;
}
// Compute indicator function: sigmoid map.
// threshPrior1 == 0.5
indicator1 = 0.5f * tanhf((float)sgnMap * widthPrior * (threshPrior1 - self->featureData[0])) + 0.5f;
// For template spectrum-difference.
widthPrior = widthPrior0;
// Use larger width in tanh map for pause regions.
if (self->featureData[4] < threshPrior2)
{
widthPrior = widthPrior2;
}
// Combine the indicator function with the feature weights.
// spec diff 实际没生效
indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1;
// Done with computing indicator function.
// Compute the prior probability.
self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb);
// Make sure probabilities are within range: keep floor to 0.01.
if (self->priorSpeechProb > 1.f)
{
self->priorSpeechProb = 1.f;
}
if (self->priorSpeechProb < 0.01f)
{
self->priorSpeechProb = 0.01f;
}
// -nan并不走上面两个if, 得用这种特殊处理
if (self->priorSpeechProb <= 1 && self->priorSpeechProb > 0.009) { }
else
{
self->priorSpeechProb = 0.0;
}
// Final speech probability: combine prior model with LR factor:.
gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + EPSILON);
for (i = 0; i < self->magnLen; i++)
{
invLrt = expf(-self->logLrtTimeAvg[i]);
invLrt = gainPrior * invLrt;
probSpeechFinal[i] = 1.f / (1.f + invLrt);
}
}
// Update the noise estimate.
// Inputs:
// * |magn| is the signal magnitude spectrum estimate.
// * |snrLocPrior| is the prior SNR.
// * |snrLocPost| is the post SNR.
// Output:
// * |noise| is the updated noise magnitude spectrum estimate.
void UpdateNoiseEstimate(NS_CONTEXT *self, const float *magn, float *noise)
{
size_t i;
float probSpeech, probNonSpeech;
// Time-avg parameter for noise update.
float gammaNoiseTmp = NOISE_UPDATE;
float gammaNoiseOld;
float noiseUpdateTmp;
for (i = 0; i < self->magnLen; i++)
{
probSpeech = self->speechProb[i];
probNonSpeech = 1.f - probSpeech;
// Temporary noise update:
// Use it for speech frames if update value is less than previous.
noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + probSpeech * self->noisePrev[i]);
// Time-constant based on speech/noise state.
gammaNoiseOld = gammaNoiseTmp;
gammaNoiseTmp = NOISE_UPDATE;
// Increase gamma (i.e., less noise update) for frame likely to be speech.
if (probSpeech > PROB_RANGE)
{
gammaNoiseTmp = SPEECH_UPDATE;
}
// Conservative noise update.
// 这个没起到作用???
if (probSpeech < PROB_RANGE)
{
self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]);
}
// Noise update.
if (gammaNoiseTmp == gammaNoiseOld)
{
noise[i] = noiseUpdateTmp;
}
else
{
noise[i] = gammaNoiseTmp * self->noisePrev[i] + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + probSpeech * self->noisePrev[i]);
// Allow for noise update downwards:
// If noise update decreases the noise, it is safe, so allow it to
// happen.
if (noiseUpdateTmp < noise[i])
{
noise[i] = noiseUpdateTmp;
}
}
} // End of freq loop.
}
// Estimate prior SNR decision-directed and compute DD based Wiener Filter.
// Input:
// * |magn| is the signal magnitude spectrum estimate.
// Output:
// * |theFilter| is the frequency response of the computed Wiener filter.
void ComputeDdBasedWienerFilter(const NS_CONTEXT *self, const float *magn, float *theFilter)
{
size_t i;
float snrPrior, previousEstimateStsa, currentEstimateStsa;
for (i = 0; i < self->magnLen; i++)
{
// Previous estimate: based on previous frame with gain filter.
previousEstimateStsa = self->magnPrev[i] * self->smooth[i] / (self->noisePrev[i] + EPSILON);
// Post and prior SNR.
currentEstimateStsa = 0.f;
if (magn[i] > self->noise[i])
{
currentEstimateStsa = (magn[i] - self->noise[i]) / (self->noise[i] + EPSILON);
}
// DD estimate is sum of two terms: current estimate and previous estimate.
// Directed decision update of |snrPrior|.
snrPrior = DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * currentEstimateStsa;
// Gain filter.
theFilter[i] = snrPrior / (self->overdrive + snrPrior);
} // End of loop over frequencies.
}
解释下这个代码的功能