Longest Common Substring
Time Limit: 8000/4000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)
Total Submission(s): 1453 Accepted Submission(s): 419
Problem Description
Given two strings, you have to tell the length of the Longest Common Substring of them.
For example:
str1 = banana
str2 = cianaic
So the Longest Common Substring is "ana", and the length is 3.
For example:
str1 = banana
str2 = cianaic
So the Longest Common Substring is "ana", and the length is 3.
Input
The input contains several test cases. Each test case contains two strings, each string will have at most 100000 characters. All the characters are in lower-case.
Process to the end of file.
Process to the end of file.
Output
For each test case, you have to tell the length of the Longest Common Substring of them.
Sample Input
banana cianaic
Sample Output
3今次系第一次用倍增算法构建后缀数组,我写既呢个其实比较好理解,哈哈,因为我好老实甘对SA两个关键字排序鸟,无花巧野,好直接,不过自己写既效率真系不敢恭维,可能直接过头………………{= =}
3898619 2011-05-02 09:27:06 Accepted 1403 1015MS 8056K 2800 B G++ 10SGetEternal{(。)(。)}!
3898664 2011-05-02 09:45:07 Accepted 1403 968MS 8060K 2901 B G++ 10SGetEternal{(。)(。)}!
3898667 2011-05-02 09:45:32 Accepted 1403 843MS 6844K 2901 B G++ 10SGetEternal{(。)(。)}!
3898750 2011-05-02 10:09:18 Accepted 1403 828MS 6852K 3481 B G++ 10SGetEternal{(。)(。)}!
3898973 2011-05-02 11:06:51 Accepted 1403 750MS 6964K 2441 B G++ 10SGetEternal{(。)(。)}!
3900666 2011-05-02 20:02:15 Accepted 1403 734MS 6900K 2827 B C++ 10SGetEternal{(。)(。)}!
3900874 2011-05-02 20:42:53 Accepted 1403 484MS 6900K 2496 B C++ 10SGetEternal{(。)(。)}!
3900895 2011-05-02 20:45:23 Accepted 1403 468MS 6900K 2258 B C++ 10SGetEternal{(。)(。)}!
3900928 2011-05-02 20:49:18 Accepted 1403 93MS 6756K 2329 B C++ 10SGetEternal{(。)(。)}! 93ms!!!!!感动到内牛满面!!!!!!!!!!{= =}下面献上优化代码,终于可以做模版鸟可以。#include<iostream> #include<string> using namespace std; #define MAXI 200011char str[MAXI]; int *sa = new int [MAXI]; int *tsa = new int [MAXI]; int *rank = new int [MAXI]; int *k1 = new int [MAXI]; int *k2 = new int [MAXI]; int *t, l, sigm, hash[MAXI], h[MAXI], height[MAXI];void RS(int *k) { int i; for (i = 0; i < sigm; i++) *(hash + i) = 0; for (i = 0; i < l; i++) hash[k[sa[i]]]++; for (i = 1; i < sigm; i++) *(hash + i) += *(hash + i - 1); for (i = l - 1; i >= 0; i--) tsa[--hash[k[sa[i]]]] = *(sa + i); t = sa; sa = tsa; tsa = t; }void CSA() { int i, j, k; for (i = 0; i < l; i++) { *(sa + i) = i; *(k1 + i) = *(str + i); } for (RS(k1), j = 1; j < l && sigm < l; j *= 2, sigm++) { for (k =0, i = l - j; i < l; i++) *(tsa + k++) = i; for (i = 0; i < l; i++) if (sa[i] >= j) *(tsa + k++) = *(sa + i) - j; for (i = 0; i < l; i++) *(k2 + i) = *(k1 + *(sa + i));//参考baidu代码!终于明白神马意思鸟…………如果明白你原理既话,就知道,我初先系先求排名//再求SA,甘样就吾可以用sigm < l去优化(加左从468ms变成93ms)。//因为先求排名就吾可以充分利用上一次k1,其实对第二关键字(i + j)排序结果可以直接从上一次//SA得出,原理其实同埋我原先果个代码一样,将(i + j) > l既下标直接前置,上次系先名次前置//今次直接用TSA前置,甘样就直接得出key2排序结果鸟!!!!!t = sa; sa = tsa; tsa = t; //等价于更新左SA数组 RS(k1), t = k1, k1 = k2, k2 = t; //再对key1排序; for (sigm = k1[sa[0]] = 0, i = 1; i < l; i++) //求名次 if (k2[sa[i]] == k2[sa[i - 1]] && k2[sa[i] + j] == k2[sa[i - 1] + j]) *(k1 + *(sa + i)) = sigm; else *(k1 + *(sa + i)) = ++sigm;//有人可能会问,点解直接sa[i] + j 唔会超界,其实就系最后一个字符'/0’起作用鸟,如果长度超界//甘样每次比较长度如果达到最后一个字符'/0',k2[sa[i]]必定不等于k2[sa[i - 1]],因为距地同//系一个串度,长度必然吾同,所以必然会比较到'/0'即必会分出大小,所以&&后面判断唔会执行//唔会发生超界情况{^____^},之所以交换k1, k2就系为左保持循环不变式,用k1求出迭代结果。 } for (i = 0; i < l; i++) *(rank + *(sa + i)) = i; //因为先求SA,所以rank要另外求 }void CHA() { int i, j, k, add;for (i = 0; i < l; i++) { if (rank[i] == 0) { h[i] = 0; continue; } j = rank[i]; k = j - 1; if (i ==0 || h[i - 1] <= 1) add = 0; else add = h[i - 1] - 1; while (str[sa[j] + add] == str[sa[k] + add]) add++; height[rank[i]] = h[i] = add; } }int main() { int i, T, l1, l2; char tbuf[MAXI];//scanf("%d", &T); //while (T--) while (scanf("%s", tbuf) != EOF) { l1 = strlen(tbuf); for (i = l = 0; i < l1; i++) str[l++] = tbuf[i]; str[l++] = '#'; scanf("%s", tbuf); l2 = strlen(tbuf); for (i = 0; i < l2; i++) str[l++] = tbuf[i]; str[l++] = 0; sigm = 256; CSA(), CHA(); for (sigm = 0, i = 1; i < l; i++) if ((sa[i] < l1 && sa[i - 1] > l1) || (sa[i - 1] < l1 && sa[i] > l1)) if (height[i] > sigm) sigm = height[i]; printf("%d/n", sigm); }return 0; }#include<iostream.h> #include<stdio.h> #include<string.h> #define MAXI 200011char str[MAXI]; int *sa = new int [MAXI]; int *tsa = new int [MAXI]; int *rank = new int [MAXI]; int *k1 = new int [MAXI]; int *k2 = new int [MAXI]; int *t, l, sigm, hash[MAXI], h[MAXI], height[MAXI];void Print(int *s, int l)//用于测试{= =} { int i; for (i = 0; i < l; i++) printf("%d ", *(s + i)); putchar('/n'); }void PrintSA()//测试输出too{= =} { int i; for (i = 0; i < l; i++) printf("%4d : %s/n", sa[i], str + sa[i]); }void RS(int *k)//Radix Sort 其实通过多次系计数排序调整相对位置既排序,我觉得可以理解为降维 { int i;for (i = 0; i < sigm; i++) *(hash + i) = 0; for (i = 0; i < l; i++) hash[k[sa[i]]]++; for (i = 1; i < sigm; i++) *(hash + i) += *(hash + i - 1); for (i = l - 1; i >= 0; i--) tsa[--hash[k[sa[i]]]] = *(sa + i); t = sa; sa = tsa; tsa = t; }void CSA()//Construct Suffix Array 构建后缀数组也 { int i, j;for (i = 0; i < l; i++) { *(sa + i) = i; //SA储存下标,后缀数组就系按照下标排序d *(k1 + i) = *(str + i); //其实大家可以将Str理解为第一次排序既关键字,姐系rank['a'] = 0………… }RS(k1); //先求出SA0 for (rank[sa[0]] = 0, i = 1; i < l; i++) if (k1[sa[i]] == k1[sa[i - 1]]) rank[sa[i]] = rank[sa[i - 1]]; else rank[sa[i]] = rank[sa[i - 1]] + 1; for (j = 1; j < l; j <<= 1, sigm++) //sigm其实就系∑{0o0}关键字集合最大值,用黎优化左15ms{o0o} { for (i = 0; i < l; i++) //k1,k2,其实就系Rank i同埋Rank i+j,同时又系关键字key。 if (i + j < l) k2[i] = rank[i + j]; //第二关键字其实已经可以从Rank求出,原理应该唔洗我讲拉下哇,写得甘直接{= =} else k2[i] = 0; //对于i + j >= l, k2[i] = 0可以理解为保证SA i+1 中最小既先摆前面t = k1; k1 = rank; rank = t;//呢个就系动态数组既好处直接交换k1,rank RS(k2); RS(k1); //LSD,先拍第二关键字Rank i+j 再排第一关键字,直接,我捻就系尼度慢左{= 。=} for (sigm = 0, i = 1; i < l; i++) //排名次兼更新sigm,排名规则百度大把。 { if (k1[sa[i]] == k1[sa[i - 1]] && k2[sa[i]] == k2[sa[i - 1]]) rank[sa[i]] = rank[sa[i - 1]]; else rank[sa[i]] = rank[sa[i - 1]] + 1; if (sigm < rank[sa[i]]) sigm = rank[sa[i]]; } } //Print(sa, l); //PrintSA(); }void CHA()//Construct Height Array, 构建Height数组也,其中height = LCP(sa[i], sa[i - 1]); { int i, j, k, add;for (i = 0; i < l; i++) { if (rank[i] == 0) //排名第0LCP(0, -1)肯定为0啦,直接continue { h[i] = 0; continue; } j = rank[i]; k = j - 1;//尼度其实有dKMP既味道我觉得,因为SA中,h[i]>= h[i - 1] - 1,可以想象下,有个感性形象既认识 if (i ==0 || h[i - 1] <= 1) add = 0;//呢个唔洗讲啦下哇{-_-|||} else add = h[i - 1] - 1; while (str[sa[j] + add] == str[sa[k] + add]) add++;//顺次比较 height[rank[i]] = h[i] = add;//height[rank[i]] = h[i], height[i] = h[sa[i]],同SA,Rank关系一样,都系互逆数组//呢d比较复杂既下标操作,用实例操操就明白鸟{= =} } //Print(height, l); }int main() { int i, T, l1, l2; char tbuf[MAXI];//scanf("%d", &T); //while (T--) while (scanf("%s", tbuf) != EOF) {//先合并字符串,再构建SA l1 = strlen(tbuf); for (i = l = 0; i < l1; i++) str[l++] = tbuf[i]; str[l++] = '#'; //分隔符#,同后面‘ ’作用差唔多 scanf("%s", tbuf); l2 = strlen(tbuf); for (i = 0; i < l2; i++) str[l++] = tbuf[i]; str[l++] = 0;//初先我吾明白点解最后要加一个最小关键字,但系我通过一个范例AAAAAA终于明白鸟,就系保证比较有结果也 sigm = MAXI; CSA(); CHA(); for (sigm = 0, i = 1; i < l; i++) //sigm转职储存最大公共子串 if ((sa[i] < l1 && sa[i - 1] > l1) || (sa[i - 1] < l1 && sa[i] > l1))//唔系同一个串 if (height[i] > sigm) sigm = height[i]; //且大于当前最大值 printf("%d/n", sigm); }return 0; }睇住baidu后缀数组代码真系好鬼死蛋痛…………好难睇下。关于我对后缀数组既理解会另外写系数据结构9up度,我会尽量用写得容易理解d,少d理论,多d图片,平凡d,造福人类。{= =*}Y经过左今次优化之后我觉得…………可能对key1排序都可以好似key2甘直接得出!!不过都系搞下其他先啦,因为就快省赛鸟………………