关于B-K tree与k-d tree一些自己想法

最新推荐文章于 2025-06-09 15:47:29 发布

原创最新推荐文章于 2025-06-09 15:47:29 发布 · 2.5k 阅读

0 ·

CC 4.0 BY-SA版权

数据结构同时被 3 个专栏收录

83 篇文章

订阅专栏

思考&证明

15 篇文章

订阅专栏

实验室学习

4 篇文章

订阅专栏

本文探讨了B-K树和k-d树的相似之处，两者都用于寻找目标点附近的点。B-K树适用于给定距离k以内的查询，而k-d树则针对最近的k个点。B-K树在数据点距离范围大但k值小时效果显著，常用于单词纠错。k-d树需要数据点有向量描述，建树、插入、删除和查询复杂度分别为O(K*nlgn)、O(K*logn)、O(K*lgk*T)。高维情况下，可以使用BBF优化提高查询效率。虽然在低维中效果有限，但在高维时表现更佳。B-K树与k-d树在一定条件下可以相互模拟。文章提供了二维曼哈顿距离的测试题目，说明BBF优化在低维场景下效果不明显。

以前用B-K tree做过一个题，最近学习k-d tree，感觉这两个树有某些相似，这个俩种树都是寻找距目标点临近最近的点(B-K tree针对给定距离k以内，k-d tree针对给定最近的k个点)。

其中B-K tree只要求数据点之间定义可度量的距离，支持求距给定点距离k以内的数据点。相对于暴力法来说，是常数级别的提高，但有时会很有效（比如数据点之间距离范围很大，但阀值k很小），一个应用就是实现单词纠错程序。当距离范围较大时，还可以采用分段映射来实现。

k-d tree除了要求数据点之间定义可度量距离外，还要求数据点用向量来描述(因为要按照不同维度进行划分)。建树时间复杂度为O(K*nlgn)，插入和删除都为O(K*logn)，查询k临近为O(K*lgk*T)，其中T为一次查询访问点数量，T最差为O(n^(1-1/k))。维数较高时，效率明显下降，所以又有了BBF优化，大概就是把存储路径上节点的堆栈换成优先队列，按照点距区域距离最短优先的回溯并更新最近点，当队列为空或者当前最近点距离小于对队首权重时即为答案。如果不要求解的最优性，可以通过提前返回的方式以更高的效率得到较优解。

在不考虑效率的情况下，B-K tree可以通过倍增二分的方法实现k-d tree的功能，如果数据点用向量描述，k-d tree也可以实现B-K tree的功能。

附上一道测试题目，二维曼哈顿距离，注释掉的代码为bbf优化，效率只提升了一点，看来bbf优化要在较高维度才有显著效果。

http://www.lydsy.com/JudgeOnline/problem.php?id=2648

<span style="font-size:14px;">#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <utility>
#include <cstring>
#include <map>
#include <climits>
#include <queue>
using namespace std;

typedef long long LL;
typedef unsigned long long ULL;
typedef unsigned UI;

const int MAXN(500010);
const int MAXK(100010);
const int MAXL(10);
const int MAXC(2);
const int INF((INT_MAX-1)/2);
const int F(0);

template<typename T>
inline bool checkmax(T &a, const T &b){
    return b > a? ((a = b), true): false;
}

template<typename T>
inline bool checkmin(T &a, const T &b){
    return b < a? ((a = b), true): false;
}

template<typename T>
inline T ABS(T a){
    return a < 0? -a: a;
}

struct PO{
    int co[MAXC];
    friend bool operator < (const PO &a, const PO &b){
        for(int i = 0; i < MAXC; ++i)
            if(a.co[i] != b.co[i])
                return a.co[i] < b.co[i];
       return false;
    }
    friend bool operator == (const PO &a, const PO &b){
        for(int i = 0; i < MAXC; ++i)
            if(a.co[i] != b.co[i])
                return false;
       return true;
    }
} po[MAXN];

int D;

bool cmp(const PO &a, const PO &b){
    return a.co[D] < b.co[D];
}

inline int dis(const PO &a, const PO &b){
    int ret = 0;
    for(int i = 0; i < MAXC; ++i) ret += ABS(a.co[i]-b.co[i]);
    return ret;
}

inline int dis2(const PO &a, const PO &b1, const PO &b2){
    int ret = 0;
    for(int i = 0; i < MAXC; ++i){
        if(a.co[i] < b1.co[i]) ret += b1.co[i]-a.co[i];
        if(a.co[i] > b2.co[i]) ret += a.co[i]-b2.co[i];
    }
    return ret;
}

inline int tra(int id){
    return id^1;
}

struct NO{
    PO ke, b1, b2;
    NO *ch[2];
    void up(NO *p){
        for(int i = 0; i < MAXC; ++i) {
            checkmin(b1.co[i], p->b1.co[i]);
            checkmax(b2.co[i], p->b2.co[i]);
        }
    }
};

struct PNO{
    int d;
    NO *p;
    PNO(){}
    PNO(int d_, NO *p_): d(d_), p(p_){}
};

struct CMP{
    bool operator()(const PNO &a, const PNO &b){
        return a.d > b.d;
    }
};

struct KDT{
    NO pool[MAXN*2], *rt, *re;
    PO K;
    int mdis;
    void init(){
        re = pool;
    }
    void build(NO *&p, int l, int r, int f){
        if(l > r){
            p = 0;
            return;
        }
        int m = (l+r) >> 1;
        D = f;
        nth_element(po+l, po+m, po+r+1, cmp);
        p = re++;
        p->ch[0] = p->ch[1];
        p->ke = p->b1 = p->b2 = po[m];
        build(p->ch[0], l, m-1, tra(f));
        build(p->ch[1], m+1, r, tra(f));
        if(p->ch[0]) p->up(p->ch[0]);
        if(p->ch[1]) p->up(p->ch[1]);
    }
    void add(NO *&p, const PO &k, int f){
        if(!p){
            p = re++;
            p->ke = p->b1 = p->b2 = k;
            p->ch[0] = p->ch[1] = 0;
            return;
        }
        if(p->ke == k) return;
        if(k.co[f] < p->ke.co[f]){
            add(p->ch[0], k, tra(f));
            p->up(p->ch[0]);
        }
        else{
            add(p->ch[1], k, tra(f));
            p->up(p->ch[1]);
        }
    }
    int query(const PO &k){
        K = k;
        mdis = INF;
        dfs(rt);
        return mdis;
    }
    void dfs(NO *p){
        int d = dis(p->ke, K);
        checkmin(mdis, d);
        int ld = p->ch[0]? dis2(K, p->ch[0]->b1, p->ch[0]->b2): INF;
        int rd = p->ch[1]? dis2(K, p->ch[1]->b1, p->ch[1]->b2): INF;
        if(ld < rd){
            if(p->ch[0]) dfs(p->ch[0]);
            if(rd < mdis) dfs(p->ch[1]);
        }else{
            if(p->ch[1]) dfs(p->ch[1]);
            if(ld < mdis) dfs(p->ch[0]);
        }
    }
/*
    int query(const PO &k){
        K = k;
        mdis = INF;
        priority_queue<PNO, vector<PNO>, CMP> que;
        dfs(rt, que);
        while(!que.empty() && mdis > que.top().d){
            PNO t = que.top();
            que.pop();
            int d = dis(t.p->ke, K);
            checkmin(mdis, d);
            int ld = t.p->ch[0]? dis2(K, t.p->ch[0]->b1, t.p->ch[0]->b2): INF;
            int rd = t.p->ch[1]? dis2(K, t.p->ch[1]->b1, t.p->ch[1]->b2): INF;
            if(ld < mdis) que.push(PNO(ld, t.p->ch[0]));
            if(rd < mdis) que.push(PNO(rd, t.p->ch[1]));
        }
        return mdis;
    }
    void dfs(NO *p,  priority_queue<PNO, vector<PNO>, CMP> &que){
        int d = dis(p->ke, K);
        checkmin(mdis, d);
        int ld = p->ch[0]? dis2(K, p->ch[0]->b1, p->ch[0]->b2): INF;
        int rd = p->ch[1]? dis2(K, p->ch[1]->b1, p->ch[1]->b2): INF;
        if(ld < rd){
            if(p->ch[0]) dfs(p->ch[0], que);
            if(rd < mdis) que.push(PNO(rd, p->ch[1]));
        }else{
            if(p->ch[1]) dfs(p->ch[1], que);
            if(ld < mdis) que.push(PNO(ld, p->ch[0]));
        }
    }*/
}kdt;

int main(){
    int n, m, t;
    scanf("%d%d", &n, &m);
    for(int i = 0; i < n; ++i)
        for(int j = 0; j < MAXC; ++j)
            scanf("%d", po[i].co+j);
    sort(po, po+n);
    int tn = unique(po, po+n)-po;
    kdt.init();
    kdt.build(kdt.rt, 0, tn-1, F);
    PO ke;
    for(int i = 0; i < m; ++i){
        scanf("%d%d%d", &t, ke.co, ke.co+1);
        if(t == 1)
            kdt.add(kdt.rt, ke, F);
        else
            printf("%d\n", kdt.query(ke));
    }
    return 0;
}
</span>

第K大，加了bbf反而效率更低

http://tsinsen.com/ViewGProblem.page?gpid=A1302

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <utility>
#include <cstring>
#include <map>
#include <climits>
#include <queue>
using namespace std;

typedef long long LL;
typedef unsigned long long ULL;
typedef unsigned UI;

const int MAXN(100010);
const int MAXK(100010);
const int MAXL(10);
const int MAXC(2);
const int INF(INT_MAX-1);
const int F(0);
const LL LIM(1LL << 62);

template<typename T>
inline bool checkmax(T &a, const T &b){
    return b > a? ((a = b), true): false;
}

template<typename T>
inline bool checkmin(T &a, const T &b){
    return b < a? ((a = b), true): false;
}

template<typename T>
inline T ABS(T a){
    return a < 0? -a: a;
}

struct PO{
    int co[MAXC], id;
    friend bool operator < (const PO &a, const PO &b){
        for(int i = 0; i < MAXC; ++i)
            if(a.co[i] != b.co[i])
                return a.co[i] < b.co[i];
       return false;
    }
    friend bool operator == (const PO &a, const PO &b){
        for(int i = 0; i < MAXC; ++i)
            if(a.co[i] != b.co[i])
                return false;
       return true;
    }
} po[MAXN], tpo[MAXN];

int D;

bool cmp(const PO &a, const PO &b){
    return a.co[D] < b.co[D];
}

inline LL dis(const PO &a, const PO &b){
    LL ret = 0;
    for(int i = 0; i < MAXC; ++i)
        ret += (LL)ABS(a.co[i]-b.co[i])*ABS(a.co[i]-b.co[i]);
    return ret;
}

inline int dis2(const PO &a, const PO &b1, const PO &b2){
    int ret = 0;
    for(int i = 0; i < MAXC; ++i){
        if(a.co[i] < b1.co[i]) ret += b1.co[i]-a.co[i];
        if(a.co[i] > b2.co[i]) ret += a.co[i]-b2.co[i];
    }
    return ret;
}

inline LL dis3(const PO &a, const PO &b1, const PO &b2){
    LL ret = 0;
    for(int i = 0; i < MAXC; ++i){
        LL t = max(ABS(a.co[i]-b1.co[i]), ABS(a.co[i]-b2.co[i]));
        ret += t*t;
    }
    return ret;
}

inline int tra(int id){
    return id^1;
}

struct NO{
    PO ke, b1, b2;
    NO *ch[2];
    void up(NO *p){
        for(int i = 0; i < MAXC; ++i) {
            checkmin(b1.co[i], p->b1.co[i]);
            checkmax(b2.co[i], p->b2.co[i]);
        }
    }
};

struct PNO{
    LL d;
    int id;
    PNO(){}
    PNO(LL d_, int i_): d(d_), id(i_){}
    friend bool operator >(const PNO &a, const PNO &b){
        return a.d == b.d? a.id < b.id: a.d > b.d;
    }
};

struct CMP{
    bool operator()(const PNO &a, const PNO &b){
        return a.d == b.d? a.id < b.id: a.d > b.d;
    }
};

struct PNO2{
    LL d;
    NO *p;
    PNO2(){}
    PNO2(LL d_, NO *p_): d(d_), p(p_){}
    friend bool operator <(const PNO2 &a, const PNO2 &b){
        return a.d < b.d;
    }
};

struct KDT{
    NO pool[MAXN], *rt, *re;
    PO K;
    int C;
    int mdis;
    void init(){
        re = pool;
    }
    void build(NO *&p, int l, int r, int f){
        if(l > r){
            p = 0;
            return;
        }
        int m = (l+r) >> 1;
        D = f;
        nth_element(po+l, po+m, po+r+1, cmp);
        p = re++;
        p->ch[0] = p->ch[1];
        p->ke = p->b1 = p->b2 = po[m];
        build(p->ch[0], l, m-1, tra(f));
        build(p->ch[1], m+1, r, tra(f));
        if(p->ch[0]) p->up(p->ch[0]);
        if(p->ch[1]) p->up(p->ch[1]);
    }
    void add(NO *&p, const PO &k, int f){
        if(!p){
            p = re++;
            p->ke = p->b1 = p->b2 = k;
            p->ch[0] = p->ch[1] = 0;
            return;
        }
        if(p->ke == k) return;
        if(k.co[f] < p->ke.co[f]){
            add(p->ch[0], k, tra(f));
            p->up(p->ch[0]);
        }
        else{
            add(p->ch[1], k, tra(f));
            p->up(p->ch[1]);
        }
    }
    int query(const PO &k, int c){
        K = k;
        C = c;
        priority_queue<PNO, vector<PNO>, CMP> que;
        priority_queue<PNO2, vector<PNO2> > que2;
        dfs(rt, que, que2);
        while(!que2.empty() && (que.size() < C || que2.top().d >= que.top().d)){
            PNO2 cur = que2.top();
            que2.pop();
            PNO t(dis(cur.p->ke, K), cur.p->ke.id);
            if(que.size() < C)
                que.push(t);
            else
                if(t > que.top()){
                    que.pop();
                    que.push(t);
                }
            LL ld = cur.p->ch[0]? dis3(K, cur.p->ch[0]->b1, cur.p->ch[0]->b2): -LIM;
            LL rd = cur.p->ch[1]? dis3(K, cur.p->ch[1]->b1, cur.p->ch[1]->b2): -LIM;
            if(cur.p->ch[0] && (que.size() < C || ld >= que.top().d)) que2.push(PNO2(ld, cur.p->ch[0]));
            if(cur.p->ch[1] && (que.size() < C || rd >= que.top().d)) que2.push(PNO2(rd, cur.p->ch[1]));
        }
        return que.top().id;
    }
    void dfs(NO *p,  priority_queue<PNO, vector<PNO>, CMP> &que, priority_queue<PNO2, vector<PNO2> > &que2){
        PNO t(dis(p->ke, K), p->ke.id);
        if(que.size() < C)
            que.push(t);
        else
            if(t > que.top()){
                que.pop();
                que.push(t);
            }
        LL ld = p->ch[0]? dis3(K, p->ch[0]->b1, p->ch[0]->b2): -LIM;
        LL rd = p->ch[1]? dis3(K, p->ch[1]->b1, p->ch[1]->b2): -LIM;
        if(ld > rd){
            if(p->ch[0]) dfs(p->ch[0], que, que2);
            if(p->ch[1] && (que.size() < C || rd >= que.top().d)) que2.push(PNO2(rd, p->ch[1]));
        }else{
            if(p->ch[1]) dfs(p->ch[1], que, que2);
            if(p->ch[0] && (que.size() < C || ld >= que.top().d)) que2.push(PNO2(ld, p->ch[0]));
        }
    }
}kdt;

int main(){
    int n, m, c;
    scanf("%d", &n);
    for(int i = 0; i < n; ++i){
        scanf("%d%d", po[i].co, po[i].co+1);
        po[i].id = i;
        tpo[i] = po[i];
    }
    kdt.init();
    kdt.build(kdt.rt, 0, n-1, F);
    scanf("%d", &m);
    PO t;
    for(int i = 0; i < m; ++i){
        scanf("%d%d%d", t.co, t.co+1, &c);
        printf("%d\n", kdt.query(t, c)+1);
    }
    return 0;
}