一种快速的无监督的向量化方法做地标识别

本文介绍了一种基于Random Projection Trees的快速地标识别方法,利用随机投影在低维空间进行数据划分,形成森林结构以提高算法的泛化性。相比其他无监督方法,该算法在速度上有显著优势。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

这是我之前写的一篇文章extremely fast codebook learning for landmark recognition。主要是利用Random projection trees and low dimensional manifolds这篇文章进行地标识别,论文的主要思想简单:利用random projection tree 随机选择一个方向进行投影,将投影之后的data根据阈值进行左右子树的划分,这里的思想有点类似于lsh在欧几里得空间的做法了,只不过这里的bin只有左右两个,分裂到一定深度停止(比如10),利用多颗树构成森林,消除随机性带来的不确定性,提高算法的泛化性。算法的速度非常快。在实际使用中也比其他的无监督式的做法要快的多。这里主要贴上个人写的主要c++实现代码,具体代码将放在https://github.com/guoyilin/ERPF:

struct TreeNode {
        vector<int> indices;
        int index;
        int height;
        float thresholds[2];
        vector<float> w;
        TreeNode* left;
        TreeNode* right;
};
class SpatialTree {
private:
        vector<vector<float> > *data;
        void splitF(TreeNode *node);
        void splitFbyFixed(TreeNode *node);
        vector<float>
        dot(const vector<vector<float> > &v1, const vector<float> &v2);
        float dot(const vector<float> &v1, const vector<float> &v2);
        float find_percentile(float percentage, const vector<float> &w);
        void save_rpTree(const string& rptree_file);
        void save_rpTree_sub(TreeNode *tree, ptree &pt);
public:
        float spill;
        string rule;
        TreeNode *root;
        int leaf_count;
        int min_items;
        int samples_rp;
        int height;
        int dimension;
        SpatialTree();
        vector<float> stringTovector(string& w);
        void print_rpTree(TreeNode *tree);
        SpatialTree(vector<vector<float> > &data, const string &rule, float spill,
                        int height);
        void create_rpTree(const string& rptree_file);
        void load_rpTree(const string& rptree_file);
        vector<int> retrievalLeaf(vector<float> &feature);
};
#endif
<pre name="code" class="cpp">void SpatialTree::save_rpTree_sub(TreeNode *tree, ptree &pt) {
        if (tree->index > -1)//// leaf
                pt.put("xmlattr.index", tree->index);
        else {
                ////not leaf
                string w_string;
                for (std::vector<float>::const_iterator iter = tree->w.begin(); iter
                                != tree->w.end(); ++iter)
                        if (w_string.empty()) {
                                ostringstream ostr;
                                ostr << *iter;
                                w_string += ostr.str();
                                w_string += ",";
                        } else {
                                ostringstream ostr;
                                ostr << *iter;
                                w_string += ostr.str();
                                w_string += ",";
                        }
                w_string = w_string.substr(0, w_string.size() - 1);
                pt.put("xmlattr.w", w_string);
                pt.put("xmlattr.t0", tree->thresholds[0]);
                pt.put("xmlattr.t1", tree->thresholds[1]);
                ptree left_child;
                ptree right_child;
                left_child.put("xmlattr.height", tree->left->height);
                save_rpTree_sub(tree->left, left_child);
                pt.put_child("left", left_child);
                right_child.put("xmlattr.height", tree->right->height);
                save_rpTree_sub(tree->right, right_child);
                pt.put_child("right", right_child);
        }

}
void SpatialTree::save_rpTree(const string& rptree_file) {
        ptree pt;
        ptree root_tree;
        root_tree.put("xmlattr.height", root->height);
        root_tree.put("xmlattr.count", this->leaf_count);
        root_tree.put("xmlattr.dimension", this->dimension);
        root_tree.put("xmlattr.min_items", this->min_items);
        root_tree.put("xmlattr.samples_rp", this->samples_rp);
        root_tree.put("xmlattr.rule", this->rule);
        root_tree.put("xmlattr.spill", this->spill);
        if (root->height != 0) {
                save_rpTree_sub(root, root_tree);
        }
        pt.put_child("root", root_tree);
        boost::property_tree::write_xml(rptree_file.c_str(), pt);

}
SpatialTree::SpatialTree() {
        this->leaf_count = 0;
        this->min_items = 64;
        this->samples_rp = 10;
}
float SpatialTree::find_percentile(float percentage, const vector<float> &w) {
        vector<float> wx_sort(w);
        std::sort(wx_sort.begin(), wx_sort.end());
        int n = wx_sort.size();
        float i = (n + 1) * percentage;
        int j = (int) floor(i);
        float g = fmod(i, 1);
        if (g == 0)
                return wx_sort[j];
        else
                return (1 - g) * wx_sort[j - 1] + g * wx_sort[j];

}
vector<int> SpatialTree::retrievalLeaf(vector<float> &feature) {
        vector<int> leafs;
        queue<TreeNode *> queue;
        queue.push(root);
        while (!queue.empty()) {
                TreeNode *item = queue.front();
                if (item->index != -1)
                        leafs.push_back(item->index);
                else {
                        float wx = this->dot(item->w, feature);
                        if (wx >= item->thresholds[0])
                                queue.push(item->right);
                        if (wx < item->thresholds[1])
                                queue.push(item->left);
                }
                queue.pop();
        }
        return leafs;
}
void SpatialTree::print_rpTree(TreeNode *tree) {
        if (tree->index != -1) {
                cout << "leaf:" << tree->height << endl;
                return;
        }
        for (int i = 0; i < tree->w.size(); i++) {
                cout << tree->w[i] << " ";
        }
        if (tree->index == -1) {
                cout << "height:" << tree->height << "\t";
                cout << "w size:" << tree->w.size() << endl;
                print_rpTree(tree->left);
                print_rpTree(tree->right);
        }

}
vector<float> SpatialTree::stringTovector(string& w) {
        vector<float> result;
        vector<string> strs;
        boost::split(strs, w, boost::is_any_of(","));
        for (int i = 0; i < strs.size(); i++) {
                float value = atof(strs[i].c_str());
                result.push_back(value);
        }

        return result;
}
void SpatialTree::load_rpTree(const string& rptree_file) {
        ptree pt;
        read_xml(rptree_file, pt);
        this->root = new TreeNode();
        root->height = pt.get<int> ("root.xmlattr.height");
        this->min_items = pt.get<int> ("root.xmlattr.min_items");
        this->samples_rp = pt.get<int> ("root.xmlattr.samples_rp");
        this->dimension = pt.get<int> ("root.xmlattr.dimension");
        this->leaf_count = pt.get<int> ("root.xmlattr.count");
        queue<ptree> q;
        queue<TreeNode *> q2;
        ptree root_pt;
        root_pt = pt.get_child("root");
        q.push(root_pt);
        q2.push(root);
        while (!q.empty()) {
                ptree node = q.front();
                TreeNode *current = q2.front();
                int index = node.get<int> ("xmlattr.index", -1);
                if (index != -1) {
                        current->index = node.get<int> ("xmlattr.index", -1);
                } else {
                        current->index = -1;
                        current->thresholds[0] = node.get<float> ("xmlattr.t0");
                        current->thresholds[1] = node.get<float> ("xmlattr.t1");
                        string w = node.get<string> ("xmlattr.w");
                        current->w = stringTovector(w);

                        TreeNode *leftNode = new TreeNode();
                        current->left = leftNode;
                        TreeNode *rightNode = new TreeNode();
                        current->right = rightNode;

                        leftNode->height = node.get<int> ("left.xmlattr.height");
                        rightNode->height = node.get<int> ("right.xmlattr.height");

                        ptree left = node.get_child("left");
                        ptree right = node.get_child("right");

                        q.push(left);
                        q.push(right);
                        q2.push(current->left);
                        q2.push(current->right);
                }
                q.pop();
                q2.pop();
        }
}

vector<float> SpatialTree::dot(const vector<vector<float> > &v1,
                const vector<float> &v2) {
        if (v2.size() == 0)
                cout << "error in compute dot!" << endl;
        vector<float> result(v1.size());
        for (int j = 0; j < v1.size(); j++)
                for (int i = 0; i < v2.size(); i++) {
                        result[j] += v1[j][i] * v2[i];
                }
        return result;
}
float SpatialTree::dot(const vector<float> &v1, const vector<float> &v2) {
        if (v1.size() != v2.size() || v1.size() == 0) {
                cout << "error in compute dot!" << endl;
        }

        float result = 0;
        for (int i = 0; i < v1.size(); i++) {
                result += v1[i] * v2[i];
        }

        return result;
}
void SpatialTree::splitF(TreeNode *node) {
        std::random_device rd;
        std::mt19937 gen(rd());
        std::normal_distribution<float> distribution(0, 1);
        vector<vector<float> > W(this->samples_rp);
        for (int i = 0; i < W.size(); i++) {
                vector<float> v(this->dimension);
                W[i] = v;
        }
        vector<float> sum(this->samples_rp);

        for (int i = 0; i < this->samples_rp; i++) {
                for (int j = 0; j < this->dimension; j++) {
                        W[i][j] = (float) distribution(gen);
                        sum[i] += W[i][j];
                }
        }
        for (int i = 0; i < this->samples_rp; i++) {
                for (int j = 0; j < this->dimension; j++) {
                        W[i][j] = W[i][j] / sum[i];
                }
        }
        vector<float> min_val(this->samples_rp, INFINITY);
        vector<float> max_val(this->samples_rp, -INFINITY);
        for (int i = 0; i < node->indices.size(); i++) {
                vector<float> point = (*this->data)[i];
                vector<float> Wx = this->dot(W, point);
                for (int j = 0; j < min_val.size(); j++) {
                        if (min_val[j] > Wx[j])
                                min_val[j] = Wx[j];
                }
                for (int j = 0; j < max_val.size(); j++) {
                        if (max_val[j] < Wx[j])
                                max_val[j] = Wx[j];
                }
        }
        int max_index = -1;
        float max_value = -INFINITY;
        for (int i = 0; i < max_val.size(); i++) {
                max_val[i] = max_val[i] - min_val[i];
                if (max_value < max_val[i]) {
                        max_value = max_val[i];
                        max_index = i;
                }
        }
        node->w = W[max_index];
}

SpatialTree::SpatialTree(vector<vector<float> > &data, const string &rule,
                float spill, int height) {
        this->data = &data;
        this->rule = rule;
        this->spill = spill;
        min_items = 64;
        samples_rp = 10;
        this->height = height;
        this->leaf_count = 0;
}
void SpatialTree::create_rpTree(const string& rptree_file) {
        vector<int> indices(data->size());
        for (vector<int>::iterator iter = indices.begin(); iter != indices.end(); ++iter) {
                *iter = leaf_count;
                leaf_count++;
        }
        root = new TreeNode();
        root->indices = indices;
        root->height = this->height - 1;
        root->index = -1;
        leaf_count = 0;
        this->dimension = (*data)[0].size();
        queue<TreeNode *> q;
        q.push(this->root);
        while (!q.empty()) {
                TreeNode *node = q.front();
                if (node->height == 0 || node->indices.size() < this->min_items) {
                        node->index = leaf_count;
                        leaf_count++;
                } else {
                        this->splitF(node);
                        vector<float> wx(node->indices.size());
                        for (int i = 0; i < node->indices.size(); i++) {
                                wx[i] = this->dot((*this->data)[node->indices[i]], node->w);
                        }
                        float low_percent = 0.5 - this->spill / 2;
                        float high_percent = 0.5 + this->spill / 2;
                        node->thresholds[0] = this->find_percentile(low_percent, wx);
                        node->thresholds[1] = this->find_percentile(high_percent, wx);
                        TreeNode *left = new TreeNode();
                        TreeNode *right = new TreeNode();
                        node->left = left;
                        node->right = right;
                        left->height = node->height - 1;
                        right->height = node->height - 1;
                        left->index = -1;
                        right->index = -1;
                        for (int i = 0; i < node->indices.size(); i++) {
                                if (wx[i] <= node->thresholds[1])
                                        left->indices.push_back(node->indices[i]);
                                if (wx[i] >= node->thresholds[0])
                                        right->indices.push_back(node->indices[i]);
                        }
                        q.push(left);
                        q.push(right);
                }
                q.pop();
        }
        //      this->save_rpTree(rptree_file);
}


 


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值