string string string
Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 2377 Accepted Submission(s): 748
Problem Description
Uncle Mao is a wonderful ACMER. One day he met an easy problem, but Uncle Mao was so lazy that he left the problem to you. I hope you can give him a solution.
Given a string s, we define a substring that happens exactly k times as an important string, and you need to find out how many substrings which are important strings.
Input
The first line contains an integer T (T≤100) implying the number of test cases.
For each test case, there are two lines:
the first line contains an integer k (k≥1) which is described above;
the second line contain a string s (length(s)≤105).
It's guaranteed that ∑length(s)≤2∗106.
Output
For each test case, print the number of the important substrings in a line.
Sample Input
2 2 abcabc 3 abcabcabcabc
Sample Output
6 9
Source
2017 ACM/ICPC Asia Regional Shenyang Online
题意:给出一个字符串,找出恰好出现K次的子串的数量
思路:利用后缀数组,并不是太理解后缀数组的原理,仅仅知道那几个数组各自的含义,sa[i]数组表示排名为i的后缀的下标是哪一个,rank[i]表示后缀i的排名是什么,height[i]表示sa[i-1]后缀和sa[i]后缀的最长公共前缀,利用RMQ可以把任意两个后缀的最长公共前缀求出。这道题是先得出至少出现k次的子串,k段k段的枚举已经排好序的后缀i,如果sa[i-1]到sa[i+k-1]的最长公共前缀是c的话,那么就有c个字串出现了至少k次,然后减去至少出现k+1次的字串,但还要和这个k段的最长公共前缀有关系,所以把这个区间分别向左向右扩大一个单位,减去这两个区间的最长公共前缀,但这又可能会把至少出现k+2次的子串多减了一遍,所以在加上这个区间同时向左向右扩大一个单位的的最长公共子串,注意要处理边界问题
#include <stdio.h>
#include <string.h>
#include <algorithm>
using namespace std;
typedef long long ll;
const int MAXN = 100100;
int t1[MAXN],t2[MAXN],c[MAXN];
bool cmp(int *r,int a,int b,int l)
{
return r[a] == r[b] && r[a + l] == r[b + l];
}
void da(int str[],int sa[],int Rank[],int height[],int n,int m)
{
n++;
int i,j,p,*x = t1,*y = t2;
for(i = 0; i < m; i++) c[i] = 0;
for(i = 0; i < n; i++) c[x[i] = str[i]]++;
for(i = 1; i < m; i++) c[i] += c[i - 1];
for(i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;
for(j = 1; j <= n; j <<= 1) {
p = 0;
for(i = n - j; i < n; i++) y[p++] = i;
for(i = 0; i < n; i++) if(sa[i] >= j) y[p++] = sa[i] - j;
for(i = 0; i < m; i++) c[i] = 0;
for(i = 0; i < n; i++) c[x[y[i]]]++;
for(i = 1; i < m; i++) c[i] += c[i - 1];
for(i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
swap(x,y);
p = 1; x[sa[0]] = 0;
for(i = 1; i < n; i++) {
x[sa[i]] = cmp(y,sa[i - 1],sa[i],j) ? p - 1 : p++;
}
if(p >= n) break;
m = p;
}
int k = 0;
n--;
for(i = 0; i <= n; i++) Rank[sa[i]] = i;
for(i = 0; i < n; i++) {
if(k) k--;
j = sa[Rank[i] - 1];
while(str[i + k] == str[j + k]) k++;
height[Rank[i]] = k;
}
}
int Rank[MAXN],height[MAXN];
int RMQ[MAXN];
int mm[MAXN];
int best[40][MAXN];
void initRMQ(int n)
{
mm[0] = -1;
for(int i = 1; i <= n; i++) {
mm[i] = ((i & (i - 1)) == 0) ? mm[i - 1] + 1 : mm[i - 1];
}
for(int i = 1; i <= n; i++) best[0][i] = i;
for(int i = 1; i <= mm[n]; i++) {
for(int j = 1; j + (1 << i) - 1 <= n; j++) {
int a = best[i - 1][j];
int b = best[i - 1][j + (1 << (i - 1))];
if(RMQ[a] < RMQ[b]) best[i][j] = a;
else best[i][j] = b;
}
}
}
int askRMQ(int a,int b)
{
int t;
t = mm[b - a + 1];
b -= (1 << t) - 1;
a = best[t][a];b = best[t][b];
return RMQ[a] < RMQ[b] ? a : b;
}
int sa[MAXN];
int r[MAXN];
char str[MAXN];
int len;
int lcp(int a,int b)
{
a = Rank[a];b = Rank[b];
//因为没加这一句疯狂RE
if(sa[a] == sa[b]) return len - sa[a];
return height[askRMQ(a + 1,b)];
}
int main()
{
int T,k;
scanf("%d",&T);
while(T--) {
scanf("%d",&k);
scanf("%s",str);
len = strlen(str);
for(int i = 0; i < len ; i++) {
r[i] = str[i] - 'a' + 1;
}
r[len] = 0;
da(r,sa,Rank,height,len,30);
for(int i = 1; i <= len; i++) {
RMQ[i] = height[i];
}
initRMQ(len);
ll ans = 0;
for(int i = 1; i + k - 1 <= len; i++) {
ans += lcp(sa[i],sa[i + k - 1]);
if(i - 1 > 0) ans -= lcp(sa[i - 1],sa[i + k - 1]);
if(i + k <= len) ans -= lcp(sa[i],sa[i + k]);
if(i - 1 > 0 && i + k <= len) ans += lcp(sa[i - 1],sa[i + k]);
}
printf("%I64d\n",ans);
}
return 0;
}