前缀树原理:前缀树又名字典树,是将每一个样本从头开始根据前缀字符或前缀数字构建出一棵树
使用场景:根据前缀信息来查询。比如当我们遍历完一些样本后,让你查询某一样本出现的次数,可以使用哈希表来查询,也可以使用前缀树来查询。但如果题目给你一个样本,让你查询以这个样本为前缀的样本的个数,这时就不能用哈希表来做了
优缺点:优点是可以根据前缀信息来选择分支,节省时间;缺点是比较浪费空间
前缀树的实现:前缀树可以通过类实现或静态数组实现
前缀树的类实现:前缀树通常将字符放在路径上,节点上包含pass(经过当前字符的样本的个数)和end(以当前字符为结尾的样本的个数) 前缀树的类实现
节点的构造:
class trienode {
int pass;//记录经过的样本
int end;//记录以此节点为结尾的样本的个数
trienode** next;
trienode(){
pass = 0;
end = 0;
next = new trienode*[26]();//26个字母对应下标索引
}
};
插入:
void insert(string word) {
trienode cur = root;
cur->pass++;
for (int i = 0; i < word.size(); i++) {
int path = word[i] - 'a';
if (cur->next[path] == null)
cur->next[path] = new trienode;
cur = cur->next[path];
cur->pass++;
}
cur->end++;
}
查询单词出现的次数:
int countstring(string word) {
trienode cur = root;
for (int i = 0; i < word.size(); i++) {
int path = word[i] - 'a';
if (cur->next[path] == nullptr)
return 0;
cur = cur->next[path];
}
return cur->end;
}
查询以某个字符串为前缀的字符串的个数:
int countprefix(string prefix) {
trienode* cur = root;
for (int i = 0; i < prefix.size(); i++) {
int path = prefix[i] - 'a';
if (cur->next[path] == nullptr)
return 0;
cur = cur->next[path];
}
return cur->pass;
}
删除字符串:
void erase(string word) {
trienode* cur = root;
if (countstring(word) > 0) {
for (int i = 0; i < word.size(); i++) {
int path = word[i] - 'a';
if (--(cur->next[path]->pass) == 0) {
cur = cur->next[path];
while (i < word.size()) {//手动释放动态开辟的内存
trienode* rem = cur;
cur = cur->next[word[++i] - 'a'];
delete rem;
}
delete cur;
cur = nullptr;
return;
}
cur = cur->next[path];
}
cur->end--;
}
}
前缀树静态数组的实现:开辟行数足够大,列数为26(还是以小写的字符串为例)的二维数组,每一列相当于类实现的一个节点;然后开辟两个足够大的pass和end数组,用来记录。
测试链接前缀树
构造:
int trienode[Maxnum][26];
int pass[Maxnum];
int end1[Maxnum];
int cnt;//记录用过的节点
void build() {
cnt = 1;//这里我们不用cnt=0的节点(当然其实也可以用)
}
插入:
void insert(string word) {
int cur = 1;
pass[cur]++;
for (int i = 0; i < word.size(); i++) {
int path = word[i] - 'a';
if (trienode[cur][path] == 0)
trienode[cur][path] = ++cnt;
cur = trienode[cur][path];
pass[cur]++;
}
end1[cur]++;
}
查询字符串的个数:
int search(string word) {
int cur = 1;
for (int i = 0; i < word.size(); i++) {
int path = word[i] - 'a';
if (trienode[cur][path] == 0)
return 0;
cur = trienode[cur][path];
}
return end1[cur];
}
查询前缀字符串的个数:
int prefixnum(string word) {
int cur = 1;
for (int i = 0; i < word.size(); i++) {
int path = word[i] - 'a';
if (trienode[cur][path] == 0)
return 0;
cur = trienode[cur][path];
}
return pass[cur];
}
删除字符串:
void deletestring(string word) {
int cur = 1;
if (search(word) == 0)
return;
for (int i = 0; i < word.size(); i++) {
int path = word[i] - 'a';
if (--pass[trienode[cur][path]] == 0) {
trienode[cur][path] = 0;
return;
}
cur = trienode[cur][path];
}
end1[cur]--;
}
清空:
void clear(){
for (int i = 1; i <= cnt; i++) {
end1[i] = 0;
pass[i] = 0;
for (int j = 0; j < 26; j++)
trienode[i][j] = 0;
}
}
前缀树经典题目:
接头密钥
const static int maxnum = 10000001;
int trienode[maxnum][13];
int pass[maxnum];
int cnt;
void build() {
cnt = 1;
}
int get_path(char c) {
if (c == '-')
return 10;
else if (c == '#')
return 11;
else return c - '0';
}
void insert(string s) {
int cur = 1;
pass[cur]++;
for (int i = 0; i < s.size(); i++) {
int path = get_path(s[i]);
if (trienode[cur][path] == 0)
trienode[cur][path] = ++cnt;
cur = trienode[cur][path];
pass[cur]++;
}
}
int prefix(string s) {
int cur = 1;
for (int i = 0; i < s.size(); i++) {
int path = get_path(s[i]);
if (trienode[cur][path] == 0)
return 0;
cur = trienode[cur][path];
}
return pass[cur];
}
vector<int> countConsistentKeys(vector<vector<int> >& b,
vector<vector<int> >& a) {
vector<int>ans;
string s;
build();
for (auto i : a) {
s.erase(0, s.size());
for (int j = 1; j < i.size(); j++) {
s += to_string(i[j] - i[j - 1]);
s += "#";
}
insert(s);
}
for (auto i : b) {
s.erase(0, s.size());
for (int j = 1; j < i.size(); j++) {
s += to_string(i[j] - i[j - 1]);
s += "#";
}
ans.push_back(prefix(s));
}
return ans;
}