一个长度为10000的字符串,写一个算法,找出最长的重复子串,如abczzacbca,结果是bc。
提示:此题是后缀树/数组的典型应用,即是求后缀数组的height[]的最大值。
#include <stdio.h>
#include <stdlib.h>
#define SIZE 5000
char str[SIZE], *pstr[SIZE];
int cmpLen(char* p, char* q) {
int len = 0;
while (*p && *(p++) == *(q++)) {
len++;
}
return len;
}
int pstrcmp(char* p, char* q) {
return strcmp(*(char* const *) p, *(char* const *) q);
}
//初始化尾序数组
void initRearArray(char* src) {
int i = 0;
while (*(src + i)) {
pstr[i] = &str[i];
str[i] = *(src + i);
i++;
}
str[i] = '\0';
}
//找出最大重复子串
char* findMaxRepeatStr() {
int len = strlen(str);
int j = 0;
int maxLen = 0, maxId = 0;
int temp = 0;
for (j = 0; j < len - 1; ++j) {
if ((temp = cmpLen(pstr[j], pstr[j + 1])) > maxLen) {
maxId = j;
maxLen = temp;
}
}
//截取子串
char* result = calloc(sizeof(char), maxLen + 1);
char* selectedStr = pstr[maxId];
for (j = 0; j < maxLen; j++) {
result[j] = *(selectedStr + j);
}
result[j] = '\0';
return result;
}
int main(void) {
char* src = "abcazzacbczacbc";
initRearArray(src);
//按字典序对字符串指针数组快排
qsort(pstr, strlen(str), sizeof(char*), pstrcmp);
printf("最大重复子串:");
puts(findMaxRepeatStr());
return EXIT_SUCCESS;
}