/**
* 为了避免 重复计算昂贵的相似度打分,代码会
* 根据 Impacts 里提供的 (freq, norm) 列表,先一次性算出该区间的最大打分上界;
* 把结果放进 缓存 (cache);
* 以后如果再问“这一层/这一区间的最大分是多少”,就直接从缓存拿,不再重新跑一遍相似度公式。
* 简而言之:“算一次,存起来,后面反复用,省 CPU”。
* Compute maximum scores based on {@link Impacts} and keep them in a cache in
* order not to run expensive similarity score computations multiple times on
* the same data.
* @lucene.internal
*/
final class MaxScoreCache {
private final ImpactsSource impactsSource;
private final SimScorer scorer;
private float[] maxScoreCache;
private int[] maxScoreCacheUpTo;
/**
* Sole constructor.
*/
public MaxScoreCache(ImpactsSource impactsSource, SimScorer scorer) {
this.impactsSource = impactsSource;
this.scorer = scorer;
maxScoreCache = new float[0];
maxScoreCacheUpTo = new int[0];
}
private void ensureCacheSize(int size) {
if (maxScoreCache.length < size) {
int oldLength = maxScoreCache.length;
maxScoreCache = ArrayUtil.grow(maxScoreCache, size);
maxScoreCacheUpTo = ArrayUtil.growExact(maxScoreCacheUpTo, maxScoreCache.length);
Arrays.fill(maxScoreCacheUpTo, oldLength, maxScoreCacheUpTo.length, -1);
}
}
/**
* 先按文档 ID 升序排好,再按 (freq, norm) 的变化点把连续文档切成若干小段。
* “把区间里所有候选impacts (freq, norm) 组合都算一遍,取最大值,作为该区间最大打分上界”
* @param impacts
* @return
*/
private float computeMaxScore(List<Impact> impacts) {
float maxScore = 0;
for (Impact impact : impacts) {
//计算最大的
maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
}
return maxScore;
}
/**
* Return the first level that includes all doc IDs up to {@code upTo},
* or -1 if there is no such level.
*/
int getLevel(int upTo) throws IOException {
final Impacts impacts = impactsSource.getImpacts();
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
final int impactsUpTo = impacts.getDocIdUpTo(level);
if (upTo <= impactsUpTo) {
return level;
}
}
return -1;
}
/**
* Return the maximum score for the given {@code level}.
*/
float getMaxScoreForLevel(int level) throws IOException {
final Impacts impacts = impactsSource.getImpacts();
ensureCacheSize(level + 1);
final int levelUpTo = impacts.getDocIdUpTo(level);
if (maxScoreCacheUpTo[level] < levelUpTo) {
maxScoreCache[level] = computeMaxScore(impacts.getImpacts(level));
maxScoreCacheUpTo[level] = levelUpTo;
}
return maxScoreCache[level];
}
/**
* Return the maximum level at which scores are all less than {@code minScore},
* or -1 if none.
*/
private int getSkipLevel(Impacts impacts, float minScore) throws IOException {
final int numLevels = impacts.numLevels();
for (int level = 0; level < numLevels; ++level) {
if (getMaxScoreForLevel(level) >= minScore) {
return level - 1;
}
}
return numLevels - 1;
}
/**
* Return the an inclusive upper bound of documents that all have a score that
* is less than {@code minScore}, or {@code -1} if the current document may
* be competitive.
*/
int getSkipUpTo(float minScore) throws IOException {
final Impacts impacts = impactsSource.getImpacts();
final int level = getSkipLevel(impacts, minScore);
if (level == -1) {
return -1;
}
return impacts.getDocIdUpTo(level);
}
}
01-16
2154
2154
05-18
2998
2998

被折叠的 条评论
为什么被折叠?



