后缀数组应用——多个字符串的相关问题

本文探讨如何利用后缀数组解决两个字符串问题:1) 求至少K个字符串中出现过的最长子串;2) 求每个字符串中至少出现两次且不重叠的最长子串。通过将字符串连接并构建后缀数组,再进行二分查找和分组,以达到O(nlogn)的时间复杂度解题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1、不小于 K 个字符串中的最长子串(POJ 3294)

给出 n 个字符串,求在至少 K 个字符串中出现过的最长的子串

虽然是多个字符串,但是还是用老方法:将这 n 个字符串接在一起,中间用没有出现过且不重复的字符分隔开。求出 SA,Height 数组。二分答案,然后把后缀进行分组, 最后判断是否至少有一个组内有 K 个来自不同的字符串的后缀即可。时间复杂度 O( nlogn )

PS.开数组大小的时候不要忘记计算插入几个分隔符,不然 RE...

#include <cstdio>
#include <cstring>
#include <algorithm>

using namespace std;

const int MAX_N = 100111;

int T, n, K, l, a[MAX_N], sa[MAX_N], r[MAX_N], h[MAX_N];
int ws[MAX_N], wv[MAX_N], wa[MAX_N], wb[MAX_N];
char s[1005];
int who[MAX_N], g[105], ans[MAX_N], cnt = 0, sum = 0;
bool ok = 0;

void da(int *a, int *sa, int n, int m) {
	int *x = wa, *y = wb;
	for (int i = 0; i < m; i ++) ws[i] = 0;
	for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
	for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
	for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
	for (int k = 1; k <= n; k <<= 1) {
		int p = 0;
		for (int i = n - k; i < n; i ++) y[p ++] = i;
		for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
		for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
		for (int i = 0; i < m; i ++) ws[i] = 0;
		for (int i = 0; i < n; i ++) ws[wv[i]] ++;
		for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
		for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
		swap(x, y); p = 1; x[sa[0]] = 0;
		for (int i = 1; i < n; i ++) x[sa[i]] = (y[sa[i - 1]] == y[sa[i]]) && (y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
		if (p >= n) break; m = p;
	}
}
void calc() {
	for (int i = 1; i <= n; i ++) r[sa[i]] = i;
	int k = 0, j;
	for (int i = 0; i < n; h[r[i ++]] = k)
		for (k ? k -- : 0, j = sa[r[i] - 1]; a[i + k] ==  a[j + k]; k ++);
}
inline bool check(int x) {
	int t, j, tot = 0;
	bool ok = 0;
	for (int i = 2; i <= n; i = j + 1) {
		for (; h[i] < x && i <= n; i ++);
		for (j = i; h[j] >= x; j ++);
		if (j - i + 1 < K) continue;
		tot = 0; cnt ++;
		for (int k = i - 1; k < j; k ++) {
			if ((t = who[sa[k]]) != 0)
			if (g[t] != cnt) g[t] = cnt, tot ++; 
		}
		if (tot >= K) if (ok) ans[++ sum] = sa[i - 1];
		else ok = 1, ans[sum = 1] = sa[i - 1];
	}
	return ok;
}
void init() {
	n = 0; cnt = 0;
	for (int t = 1; t <= T; t ++) {
		scanf("%s", s); l = strlen(s);
		for (int i = 0; i < l; i ++) a[n] = s[i] + 100, who[n] = t, n ++;
		if (t != T) a[n] = t, who[n] = 0, n ++;
	}
	a[n] = 0; who[n] = 0;
	da(a, sa, n + 1, 228); calc();
//	for (int i = 1; i <= n; i ++) printf("%d ", h[i]); printf("\n");
}
void doit() {
	K = T / 2 + 1;
	int l = 1, r = 1000, mid;
	while (l <= r) {
		mid = (l + r) >> 1;
		if (check(mid)) l = mid + 1;
		else r = mid - 1; 
	}
	if (ok) printf("\n");
	else ok = 1;
	if (r == 0) printf("?\n");
	else for (int i = 1; i <= sum; i ++) {
		int t = ans[i];
		for (int j = 0; j < r; j ++) printf("%c", (char)(a[t + j] - 100)); printf("\n");
	}	
}
int main() {
	while (scanf("%d", &T) != EOF) {
		if (!T) break;
		init();
		doit();
	}
	return 0;
} 
2、每个字符串最少出现两次且不重叠的最长子串 (SPOJ 220)

给出 n 个字符串,求在每个字符串中最少出现两次且不重叠的最长子串。

和上一题的方法大同小异:接起来,求数组,二分答案 x ,分组。唯一不同的是需要判断每组中的来自不同字符串的后缀是否为 n 个且每组的 SA 的最大与最小值的差是否大于等于 x

#include <cstdio>
#include <cstring>
#include <algorithm>

using namespace std;

const int MAX_N = 100105;

int C, T, n, l, a[MAX_N], sa[MAX_N], r[MAX_N], h[MAX_N];
int ws[MAX_N], wv[MAX_N], wa[MAX_N], wb[MAX_N];
char s[10005];
int who[MAX_N], g[MAX_N], cnt = 0, mx[MAX_N], mn[MAX_N];

void da(int *a, int *sa,int n, int m) {
	int *x = wa, *y = wb;
	for (int i = 0; i < m; i ++) ws[i] = 0;
	for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
	for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
	for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
	for (int k = 1; k <= n; k <<= 1) {
		int p = 0;
		for (int i = n - k; i < n; i ++) y[p ++] = i;
		for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
		for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
		for (int i = 0; i < m; i ++) ws[i] = 0;
		for (int i = 0; i < n; i ++) ws[wv[i]] ++;
		for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
		for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
		swap(x, y); p = 1; x[sa[0]] = 0;
		for (int i = 1; i < n; i ++) x[sa[i]] = (y[sa[i - 1]] == y[sa[i]]) && (y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
		if (p >= n) break; m = p;
	}
}
void calc() {
	for (int i = 1; i <= n; i ++) r[sa[i]] = i;
	int k = 0, j;
	for (int i = 0; i < n; h[r[i ++]] = k)
		for (k ? k -- : 0, j = sa[r[i] - 1]; a[i + k] == a[j + k]; k ++);
}
inline bool check(int x) {
	int sum, t, j;
	for (int i = 2; i <= n; i = j + 1) {
		for (; h[i] < x && i <= n; i ++);
		for (j = i; h[j] >= x; j ++) 
		if (j - i + 1 < T) continue;
		for (int k = 1; k <= T; k ++) mn[k] = 10000000, mx[k] = -1;
		for (int k = i - 1; k < j; k ++) {
			if (sa[k] > mx[who[sa[k]]]) mx[who[sa[k]]] = sa[k];
			if (sa[k] < mn[who[sa[k]]]) mn[who[sa[k]]] = sa[k];
		}
		int k;
		for (k = 1; k <= T; k ++) 
			if (mx[k] - mn[k] < x) break;
		if (k > T) return 1; 
	}
	return 0;
}
void init() {
	scanf("%d", &T); 
	n = 0; cnt = 0;
	for (int t = 1; t <= T; t ++) {
		scanf("%s", s); l = strlen(s);
		for (int i = 0; i < l; i ++) a[n] = s[i] + 10, who[n] = t, n ++;
		if (t != T) a[n] = t, who[n] = 0, n ++;
	}
	a[n] = 0; who[n] = 0;
	da(a, sa, n + 1, 138); calc();
}
void doit() {
	h[n + 1] = -1;
	int l = 1, r = 10000, mid;
	while (l <= r) {
		mid = (l + r) >> 1;
		if (check(mid)) l = mid + 1;
		else r = mid - 1;
	}
	printf("%d\n", r);
}
int main() {
	scanf("%d", &C); 
	while (C --) {
		init();
		doit();
	}
	return 0;
}



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值