【无标题】

原创于 2025-08-12 22:36:47 发布 · 339 阅读
CC 4.0 BY-SA版权
文章标签：
/**
 * 为了避免 重复计算昂贵的相似度打分，代码会
 * 根据 Impacts 里提供的 (freq, norm) 列表，先一次性算出该区间的最大打分上界；
 * 把结果放进 缓存 (cache)；
 * 以后如果再问“这一层/这一区间的最大分是多少”，就直接从缓存拿，不再重新跑一遍相似度公式。
 * 简而言之：“算一次，存起来，后面反复用，省 CPU”。
 * Compute maximum scores based on {@link Impacts} and keep them in a cache in
 * order not to run expensive similarity score computations multiple times on
 * the same data.
 * @lucene.internal
 */
final class MaxScoreCache {

    private final ImpactsSource impactsSource;
    private final SimScorer scorer;
    private float[] maxScoreCache;
    private int[] maxScoreCacheUpTo;

    /**
     * Sole constructor.
     */
    public MaxScoreCache(ImpactsSource impactsSource, SimScorer scorer) {
        this.impactsSource = impactsSource;
        this.scorer = scorer;
        maxScoreCache = new float[0];
        maxScoreCacheUpTo = new int[0];
    }

    private void ensureCacheSize(int size) {
        if (maxScoreCache.length < size) {
            int oldLength = maxScoreCache.length;
            maxScoreCache = ArrayUtil.grow(maxScoreCache, size);
            maxScoreCacheUpTo = ArrayUtil.growExact(maxScoreCacheUpTo, maxScoreCache.length);
            Arrays.fill(maxScoreCacheUpTo, oldLength, maxScoreCacheUpTo.length, -1);
        }
    }

    /**
     *  先按文档 ID 升序排好，再按 (freq, norm) 的变化点把连续文档切成若干小段。
     *  “把区间里所有候选impacts (freq, norm) 组合都算一遍，取最大值，作为该区间最大打分上界”
     * @param impacts
     * @return
     */
    private float computeMaxScore(List<Impact> impacts) {
        float maxScore = 0;
        for (Impact impact : impacts) {
            //计算最大的
            maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
        }
        return maxScore;
    }

    /**
     * Return the first level that includes all doc IDs up to {@code upTo},
     * or -1 if there is no such level.
     */
    int getLevel(int upTo) throws IOException {
        final Impacts impacts = impactsSource.getImpacts();
        for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
            final int impactsUpTo = impacts.getDocIdUpTo(level);
            if (upTo <= impactsUpTo) {
                return level;
            }
        }
        return -1;
    }

    /**
     * Return the maximum score for the given {@code level}.
     */
    float getMaxScoreForLevel(int level) throws IOException {
        final Impacts impacts = impactsSource.getImpacts();
        ensureCacheSize(level + 1);
        final int levelUpTo = impacts.getDocIdUpTo(level);
        if (maxScoreCacheUpTo[level] < levelUpTo) {
            maxScoreCache[level] = computeMaxScore(impacts.getImpacts(level));
            maxScoreCacheUpTo[level] = levelUpTo;
        }
        return maxScoreCache[level];
    }

    /**
     * Return the maximum level at which scores are all less than {@code minScore},
     * or -1 if none.
     */
    private int getSkipLevel(Impacts impacts, float minScore) throws IOException {
        final int numLevels = impacts.numLevels();
        for (int level = 0; level < numLevels; ++level) {
            if (getMaxScoreForLevel(level) >= minScore) {
                return level - 1;
            }
        }
        return numLevels - 1;
    }

    /**
     * Return the an inclusive upper bound of documents that all have a score that
     * is less than {@code minScore}, or {@code -1} if the current document may
     * be competitive.
     */
    int getSkipUpTo(float minScore) throws IOException {
        final Impacts impacts = impactsSource.getImpacts();
        final int level = getSkipLevel(impacts, minScore);
        if (level == -1) {
            return -1;
        }
        return impacts.getDocIdUpTo(level);
    }

}