字典树(Trie树)
/*==================================================*\
| Trie树(k叉)
| INIT: init();
| 注: tree[i][tk]>0时表示单词存在, 当然也可赋予它更多含义;
\*==================================================*/
const int tk = 26, tb = 'a'; // tk叉; 起始字母为tb;
int top, tree[N][tk + 1]; // N: 大结点个数
void init() {
top = 1;
memset(tree[0], 0, sizeof(tree[0]));
}
int sear(char *s) { // 失败返回0
for (int rt = 0; rt = tree[rt][*s - tb];)
if (*(++s) == 0) return tree[rt][tk];
return 0;
}
void insert(char *s, int rank = 1) {
int rt, nxt;
for (rt = 0; *s; rt = nxt, ++s) {
nxt = tree[rt][*s - tb];
if (0 == nxt) {
tree[rt][*s - tb] = nxt = top;
memset(tree[top], 0, sizeof(tree[top]));
top++;
}
}
tree[rt][tk] = rank; //1表示存在0表示不存在,也可以赋予其其他含义
}
void delt(char *s) { // 只做标记, 假定s一定存在
int rt = 0;
for (; *s; ++s) rt = tree[rt][*s - tb];
tree[rt][tk] = 0;
}
int prefix(char *s) { // 最长前缀
int rt = 0, lv;
for (lv = 0; *s; ++s, ++lv) {
rt = tree[rt][*s - tb];
if (rt == 0) break;
}
return lv;
}
/*==================================================*\
| Trie树(左儿子又兄弟)
| INIT: init();
\*==================================================*/
int top;
struct trie {
char c;
int l, r, rk;
} tree[N];
void init() {
top = 1;
memset(tree, 0, sizeof(tree[0]));
}
int sear(char *s) { // 失败返回0
int rt;
for (rt = 0; *s; ++s) {
for (rt = tree[rt].l; rt; rt = tree[rt].r)
if (tree[rt].c == *s) break;
if (rt == 0) return 0;
}
return tree[rt].rk;
}
void insert(char *s, int rk = 1) { //rk: 权或者标记
int i, rt;
for (rt = 0; *s; ++s, rt = i) {
for (i = tree[rt].l; i; i = tree[i].r)
if (tree[i].c == *s) break;
if (i == 0) {
tree[top].r = tree[rt].l;
tree[top].l = 0;
tree[top].c = *s;
tree[top].rk = 0;
tree[rt].l = top;
i = top++;
}
}
tree[rt].rk = rk;
}
void delt(char *s) { // 假定s已经存在, 只做标记
int rt;
for (rt = 0; *s; ++s) {
for (rt = tree[rt].l; rt; rt = tree[rt].r)
if (tree[rt].c == *s) break;
}
tree[rt].rk = 0;
}
int profix(char *s) { // 最长前缀
int rt = 0, lv;
for (lv = 0; *s; ++s, ++lv) {
for (rt = tree[rt].l; rt; rt = tree[rt].r)
if (tree[rt].c == *s) break;
if (rt == 0) break;
}
return lv;
}
后缀数组
/*==================================================*\
| 后缀数组 O(N * log N)
| INIT: n = strlen(s) + 1;
| CALL: makesa(); lcp();
| 注: height[i] = lcp(sa[i], sa[i-1]);
\*==================================================*/
char s[N]; // N > 256
int n, sa[N], height[N], rank[N], tmp[N], top[N];
void makesa() { // O(N * log N)
int i, j, len, na;
na = (n < 256 ? 256 : n);
memset(top, 0, na * sizeof(int));
for (i = 0; i < n; i++) top[rank[i] = s[i] & 0xff]++;
for (i = 1; i < na; i++) top[i] += top[i - 1];
for (i = 0; i < n; i++) sa[--top[rank[i]]] = i;
for (len = 1; len < n; len <<= 1) {
for (i = 0; i < n; i++) {
j = sa[i] - len;
if (j < 0) j += n;
tmp[top[rank[j]]++] = j;
}
sa[tmp[top[0] = 0]] = j = 0;
for (i = 1; i < n; i++) {
if (rank[tmp[i]] != rank[tmp[i - 1]]
|| rank[tmp[i] + len] != rank[tmp[i - 1] + len])
top[++j] = i;
sa[tmp[i]] = j;
}
memcpy(rank, sa, n * sizeof(int));
memcpy(sa, tmp, n * sizeof(int));
if (j >= n - 1) break;
}
}
void lcp() { // O(4 * N)
int i, j, k;
for (j = rank[height[i = k = 0] = 0]; i < n - 1; i++, k++)
while (k >= 0 && s[i] != s[sa[j - 1] + k])
height[j] = (k--), j = rank[sa[j] + 1];
}
KMP(单模式匹配)
/*==================================================*\
| KMP匹配算法O(M+N)
| CALL: res=kmp(str, pat); 原串为str; 模式为pat(长为P);
\*==================================================*/
int fail[P];
int kmp(char *str, char *pat) {
int i, j, k;
memset(fail, -1, sizeof(fail));
for (i = 1; pat[i]; ++i) {
for (k = fail[i - 1]; k >= 0 && pat[i] != pat[k + 1]; k = fail[k]);
if (pat[k + 1] == pat[i]) fail[i] = k + 1;
}
i = j = 0;
while (str[i] && pat[j]) { // By Fandywang
if (pat[j] == str[i]) ++i, ++j;
else if (j == 0)++i; //第一个字符匹配失败,从str下个字符开始
else j = fail[j - 1] + 1;
}
if (pat[j]) return -1; else return i - j;
}
AC自动机(多模式匹配)
/*==================================================*\
| tire是字典树,fail是失败数组;
| 剩下的数组在用到的时候会说明 。
| 这里假定每个模式串的长度不超过45;
\*==================================================*/
int tire[INF][30], fail[INF], End[INF],auxLen[INF];
char aux[INF][45] ;
int root = 0, index = 0;
void insert(char* data, int rt) {
int len = strlen(data);
rt = root;
for(int i = 0; i < len; i++) { //建树的过程
int y = data[i] - 'a';
if(tire[rt][y] == 0) {
tire[rt][y] = ++index;
}
rt = tire[rt][y];
}
// 此时的rt结点是代表的字符串是该模式串,而不是模式串的一个前缀。
// 在查找的时候如果End[rt]大于0,就说明rt结点代表的是一个模式串,而不是一个前缀;
End[rt]++;
auxLen[rt] = len; //把rt结点代表的字符串的长度存在auxLen中。
strcpy(aux[rt],data); //把rt结点代表的字符串拷贝到aux中;
}
void build() { //通过bfs来建立失败数组
queue <int> que;
int rt = root;
for(int i = 0; i < 26; i++) {
if(tire[rt][i] != 0) {
que.push(tire[rt][i]); //初始化,将每个模式串的的首字符加入队列
}
}
while(!que.empty()) {
int now = que.front();
que.pop();
for(int i = 0; i < 26; i++) {
if(tire[now][i] == 0) {
tire[now][i] = tire[fail[now]][i];
} else {
fail[tire[now][i]] = tire[fail[now]][i];
que.push(tire[now][i]);
}
}
}
}
int query(char* data) { //查询过程
int len = strlen(data);
int rt = root;
int res = 0;
for(int i = 0; i < len; i++) {
int y = data[i] - 'a';
rt = tire[rt][y];
int jump = rt;
while(jump != root) { //如果能匹配就判断该jump结点代表的字符串是前缀还是模式串
if(End[jump] > 0) { //如果是模式串的话
res += End[jump] ;
printf("%d ",i - auxLen[jump] + 2);
printf("%s\n",aux[jump]);
End[jump] = 0;
}
// 将jump指向该结点的失败指针 ,
// 看一下该结点代表的字符串的最大后缀是不是模式串;
jump = fail[jump];
}
}
return res;
}
字符串hash
/*==================================================*\
| 字符串Hash
| 注意:mod选择足够大的质数(至少大于字符串个数)
\*==================================================*/
unsigned int hasha(char *url, int mod) {
unsigned int n = 0;
char *b = (char *) &n;
for (int i = 0; url[i]; ++i) b[i % 4] ^= url[i];
return n % mod;
}
unsigned int hashb(char *url, int mod) {
unsigned int h = 0, g;
while (*url) {
h = (h << 4) + *url++;
g = h & 0xF0000000;
if (g) h ^= g >> 24;
h &= ~g;
}
return h % mod;
}
int hashc(char *p, int prime = 25013) {
unsigned int h = 0, g;
for (; *p; ++p) {
h = (h << 4) + *p;
if (g = h & 0xf0000000) {
h = h ^ (g >> 24);
h = h ^ g;
}
}
return h % prime;
}
Karp-Rabin字符串匹配
/*==================================================*\
| Karp-Rabin字符串匹配
| hash(w[0..m-1]) =
| (w[0] * 2^(m-1) + ... + w[m-1] * 2^0) % q;
| hash(w[j+1..j+m]) =
| rehash(y[j], y[j+m], hash(w[j..j+m-1]);
| rehash(a, b, h) = ((h - a * 2^(m-1) ) * 2 + b) % q;
| 可以用q = 2^32简化%运算
\*==================================================*/
#define REHASH(a, b, h) ((((h) - (a)*d) << 1) + (b))
int krmatch(char *x, int m, char *y, int n) { // search x in y
int d, hx, hy, i, j;
for (d = i = 1; i < m; ++i) d = (d << 1);
for (hy = hx = i = 0; i < m; ++i) {
hx = ((hx << 1) + x[i]);
hy = ((hy << 1) + y[i]);
}
for (j = 0; j <= n - m; ++j) {
if (hx == hy && memcmp(x, y + j, m) == 0) return j;
hy = REHASH(y[j], y[j + m], hy);
}
}
代码多源于网络,更多模板