【无标题】

/**
 * 为了避免 重复计算昂贵的相似度打分,代码会
 * 根据 Impacts 里提供的 (freq, norm) 列表,先一次性算出该区间的最大打分上界;
 * 把结果放进 缓存 (cache);
 * 以后如果再问“这一层/这一区间的最大分是多少”,就直接从缓存拿,不再重新跑一遍相似度公式。
 * 简而言之:“算一次,存起来,后面反复用,省 CPU”。
 * Compute maximum scores based on {@link Impacts} and keep them in a cache in
 * order not to run expensive similarity score computations multiple times on
 * the same data.
 * @lucene.internal
 */
final class MaxScoreCache {

    private final ImpactsSource impactsSource;
    private final SimScorer scorer;
    private float[] maxScoreCache;
    private int[] maxScoreCacheUpTo;

    /**
     * Sole constructor.
     */
    public MaxScoreCache(ImpactsSource impactsSource, SimScorer scorer) {
        this.impactsSource = impactsSource;
        this.scorer = scorer;
        maxScoreCache = new float[0];
        maxScoreCacheUpTo = new int[0];
    }

    private void ensureCacheSize(int size) {
        if (maxScoreCache.length < size) {
            int oldLength = maxScoreCache.length;
            maxScoreCache = ArrayUtil.grow(maxScoreCache, size);
            maxScoreCacheUpTo = ArrayUtil.growExact(maxScoreCacheUpTo, maxScoreCache.length);
            Arrays.fill(maxScoreCacheUpTo, oldLength, maxScoreCacheUpTo.length, -1);
        }
    }

    /**
     *  先按文档 ID 升序排好,再按 (freq, norm) 的变化点把连续文档切成若干小段。
     *  “把区间里所有候选impacts (freq, norm) 组合都算一遍,取最大值,作为该区间最大打分上界”
     * @param impacts
     * @return
     */
    private float computeMaxScore(List<Impact> impacts) {
        float maxScore = 0;
        for (Impact impact : impacts) {
            //计算最大的
            maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
        }
        return maxScore;
    }

    /**
     * Return the first level that includes all doc IDs up to {@code upTo},
     * or -1 if there is no such level.
     */
    int getLevel(int upTo) throws IOException {
        final Impacts impacts = impactsSource.getImpacts();
        for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
            final int impactsUpTo = impacts.getDocIdUpTo(level);
            if (upTo <= impactsUpTo) {
                return level;
            }
        }
        return -1;
    }

    /**
     * Return the maximum score for the given {@code level}.
     */
    float getMaxScoreForLevel(int level) throws IOException {
        final Impacts impacts = impactsSource.getImpacts();
        ensureCacheSize(level + 1);
        final int levelUpTo = impacts.getDocIdUpTo(level);
        if (maxScoreCacheUpTo[level] < levelUpTo) {
            maxScoreCache[level] = computeMaxScore(impacts.getImpacts(level));
            maxScoreCacheUpTo[level] = levelUpTo;
        }
        return maxScoreCache[level];
    }

    /**
     * Return the maximum level at which scores are all less than {@code minScore},
     * or -1 if none.
     */
    private int getSkipLevel(Impacts impacts, float minScore) throws IOException {
        final int numLevels = impacts.numLevels();
        for (int level = 0; level < numLevels; ++level) {
            if (getMaxScoreForLevel(level) >= minScore) {
                return level - 1;
            }
        }
        return numLevels - 1;
    }

    /**
     * Return the an inclusive upper bound of documents that all have a score that
     * is less than {@code minScore}, or {@code -1} if the current document may
     * be competitive.
     */
    int getSkipUpTo(float minScore) throws IOException {
        final Impacts impacts = impactsSource.getImpacts();
        final int level = getSkipLevel(impacts, minScore);
        if (level == -1) {
            return -1;
        }
        return impacts.getDocIdUpTo(level);
    }

}
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值