Distinct Substrings

本文介绍了一种高效算法,用于求解给定字符串中不同的子串数量。通过构建后缀数组并计算高度数组,利用这些数据结构可以快速得出所有不重复子串的数量。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

#include<cstdio>
#include<cstring>
#include<algorithm>
#define Rep(i,n) for(i=0;i<(n);i++)//宏定义,减少下面的代码量 
 
typedef long long ll;
using namespace std;

const int maxn=1010;
char str[maxn];
int s[maxn];

int sa[maxn],t1[maxn],t2[maxn],c[maxn];//c当做是桶 
int rank[maxn],height[maxn];

void get_sa(int s[],int n,int m){
	int i,j,p,*x=t1,*y=t2;
	Rep(i,m) c[i]=0;
	Rep(i,n) c[x[i]=s[i]]++;//同时x[i]=s[i]
	Rep(i,m) c[i+1]+=c[i];
	for(i=n-1;i>=0;i--)	sa[--c[x[i]]]=i;//第一关键字基数排序
	for(j=1;j<=n;j<<=1){
		p=0;
		for(i=n-j;i<n;i++) y[p++]=i;
		Rep(i,n) if(sa[i]>=j) y[p++]=sa[i]-j;
		
		Rep(i,m) c[i]=0;
		Rep(i,n) c[x[y[i]]]++;//同时x[i]=s[i]
		Rep(i,m) c[i+1]+=c[i];
		for(i=n-1;i>=0;i--)	sa[--c[x[y[i]]]]=y[i];//第一关键字基数排序
		
		swap(x,y);
		p=1;x[sa[0]]=0;
		for(i=1;i<n;i++)
			x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
		if(p>=n) break;
		m=p;
	}	
	
}

void get_height(int s[],int n){
	int i,j,k=0;
	for(i=0;i<=n;i++) rank[sa[i]]=i;
	Rep(i,n){
		if(k) k--;
		j=sa[rank[i]-1];
		while(s[i+k]==s[j+k]) k++;
		height[rank[i]]=k;
	}
}

int main()
{
	int T;
	scanf("%d",&T);
	while(T--){
		scanf(" %s",&str);
		int len=strlen(str);
		for(int i=0;i<=len;i++)
			s[i]=str[i];
		get_sa(s,len+1,128);
		get_height(s,len);
		int ans=len*(len+1)/2;
		for(int i=2;i<=len;i++)
			ans-=height[i];
		printf("%d\n",ans);
		
	}
	return 0;
}
还有来自大神的板子 https://www.cnblogs.com/kuangbin/archive/2013/04/24/3039634.html

/*
 * SPOJ 694
 * 给定一个字符串,求不相同子串个数。
 * 每个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间的不相同子串个数。
 * 总数为n*(n-1)/2,再减掉height[i]的和就是答案
 */

#include <iostream>
#include <string.h>
#include <algorithm>
#include <stdio.h>
using namespace std;
const int MAXN=1010;

/*
*suffix array
*倍增算法  O(n*logn)
*待排序数组长度为n,放在0~n-1中,在最后面补一个0
*build_sa( ,n+1, );//注意是n+1;
*getHeight(,n);
*例如:
*n   = 8;
*num[]   = { 1, 1, 2, 1, 1, 1, 1, 2, $ };注意num最后一位为0,其他大于0
*rank[]  = { 4, 6, 8, 1, 2, 3, 5, 7, 0 };rank[0~n-1]为有效值,rank[n]必定为0无效值
*sa[]    = { 8, 3, 4, 5, 0, 6, 1, 7, 2 };sa[1~n]为有效值,sa[0]必定为n是无效值
*height[]= { 0, 0, 3, 2, 3, 1, 2, 0, 1 };height[2~n]为有效值
*
*/

int sa[MAXN];//SA数组,表示将S的n个后缀从小到大排序后把排好序的
             //的后缀的开头位置顺次放入SA中
int t1[MAXN],t2[MAXN],c[MAXN];//求SA数组需要的中间变量,不需要赋值
int rank[MAXN],height[MAXN];
//待排序的字符串放在s数组中,从s[0]到s[n-1],长度为n,且最大值小于m,
//除s[n-1]外的所有s[i]都大于0,r[n-1]=0
//函数结束以后结果放在sa数组中
void build_sa(int s[],int n,int m)
{
    int i,j,p,*x=t1,*y=t2;
    //第一轮基数排序,如果s的最大值很大,可改为快速排序
    for(i=0;i<m;i++)c[i]=0;
    for(i=0;i<n;i++)c[x[i]=s[i]]++;
    for(i=1;i<m;i++)c[i]+=c[i-1];
    for(i=n-1;i>=0;i--)sa[--c[x[i]]]=i;
    for(j=1;j<=n;j<<=1)
    {
        p=0;
        //直接利用sa数组排序第二关键字
        for(i=n-j;i<n;i++)y[p++]=i;//后面的j个数第二关键字为空的最小
        for(i=0;i<n;i++)if(sa[i]>=j)y[p++]=sa[i]-j;
        //这样数组y保存的就是按照第二关键字排序的结果
        //基数排序第一关键字
        for(i=0;i<m;i++)c[i]=0;
        for(i=0;i<n;i++)c[x[y[i]]]++;
        for(i=1;i<m;i++)c[i]+=c[i-1];
        for(i=n-1;i>=0;i--)sa[--c[x[y[i]]]]=y[i];
        //根据sa和x数组计算新的x数组
        swap(x,y);
        p=1;x[sa[0]]=0;
        for(i=1;i<n;i++)
            x[sa[i]]=y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;
        if(p>=n)break;
        m=p;//下次基数排序的最大值
    }
}
void getHeight(int s[],int n)
{
    int i,j,k=0;
    for(i=0;i<=n;i++)rank[sa[i]]=i;
    for(i=0;i<n;i++)
    {
        if(k)k--;
        j=sa[rank[i]-1];
        while(s[i+k]==s[j+k])k++;
        height[rank[i]]=k;
    }
}

char str[MAXN];
int s[MAXN];

int main()
{
    //freopen("in.txt","r",stdin);
    //freopen("out.txt","w",stdout);
    int T;
    scanf("%d",&T);
    while(T--)
    {
        scanf("%s",str);
        int n=strlen(str);
        for(int i=0;i<=n;i++)s[i]=str[i];
        build_sa(s,n+1,128);
        getHeight(s,n);
        int ans=n*(n+1)/2;
        for(int i=2;i<=n;i++)ans-=height[i];
        printf("%d\n",ans);
    }
    return 0;
}



with online_base as ( select a.session_id as xma_session_id , k.uniqueid as unique_id, a.customer_id as dz_customer_id, a.user_id , case when date(a.create_time)<'2025-05-22' then from_unixtime(unix_timestamp(a.create_time) - 3600) else a.create_time end as xma_begin_time , ---会话开始时间 case when date(a.end_time)<'2025-05-22' then from_unixtime(unix_timestamp(a.end_time) - 3600) else a.create_time end as end_time , --会话结束时间 a.total_time , --通话总时长 a.is_im , --是否转人工的意愿 是/否 a.is_closed , --是否关闭 是/否 a.is_artificial_first, --是否人工优先 是/否 a.to_user , --坐席用户号 g.name , --坐席名字 a.im_status , --im会话状态 a.distribute_status , --im分配类型 正式分配客服 接通看这个 a.offline_status, --im会话结束类型 是否自动结束 case when a.distribute_status ='正式分配客服' then a.im_start_time else null end ima_begin_time , --ima开始时间 case when a.distribute_status ='正式分配客服' then a.end_time else null end ima_end_time ,--ima结束时间 a.first_response_time /1000 as first_response_time, --首次响应时间 这是客户发了第一条消息到客服回复时间差值 单位毫秒 a.first_reply_time/1000 as first_reply_time , --人工首次回复时长 接通人工之后,客服发的第一条消息时间 单位毫秒 b.avg_response_time/1000 as avg_reply_dur , a.end_type , --结束类型 手动结束还是超时未回复结束 a.channel_type , a.first brief_sum1 , a.second brief_sum2 , a.third brief_sum3, a.ima_product_name , --来源渠道 k.app_code , a.dt from dm_opr.dmd_opr_lia_online_df a left join ( SELECT name from ods.ods_smartxma_basic_ai_basic_user_global_df where dt=replace (date_sub (current_date(), 1), '-', '') ) g on a.to_user=g.id left join ( select session_id, avg_response_time from ods.ods_smartxma_im_chat_im_chat_kpi_df where dt=replace (date_sub (current_date(), 1), '-', '') ) b on a.session_id=b.session_id left join report_csc_ana.xma_testuu x on x.customer_id=a.customer_id left join (select * from ods.ods_smartxma_basic_ai_basic_customer_identity_df where dt=replace (date_sub (current_date(), 1), '-', '') -- and app_code='pulsar' ) k on k.customer_id=a.customer_id left join (selECT distinct SUBSTRING(CAST (phone AS STRING), GREATEST(LENGTH(CAST(phone AS STRING)) - 9, 1), 10) as phone from report_csc_ana.testphone)pp on k.phone=pp.phone where a.dt=replace (date_sub (current_date(), 1), '-', '') and x.customer_id is null --剔除测试uuid and pp.phone is null ), t_xma as ( SELECT DISTINCT xma_begin_time, xma_session_id, unique_id, dz_customer_id, ima_product_name as xma_prod_name , app_code ,distribute_status from online_base where dt=replace(date_sub (current_date(), 1), '-', '') and to_date (xma_begin_time) >= '2025-05-08' ), t_xma_flow as ( --distribute_status 为空 表示排队没成功(crm系统显示的未转人工) distribute_status为非正常分配表示排队成功但是分配失败 select xma_session_id, 'xma未分流' as is_flow from online_base where dt=replace(date_sub (current_date(), 1), '-', '') and is_im='是' ), t_next_call as ( select *, lead (xma_begin_time) over ( partition by case when unique_id is null then dz_customer_id else unique_id end order by xma_begin_time ) as next_xma_time from ( select * from t_xma ) a ), t_ima as ( SELECT DISTINCT a.xma_session_id, a.xma_session_id as ima_session_id, a.name as to_cust_svc, a.ima_begin_time, a.ima_end_time, a.first_reply_time as 1st_resp_dur, avg_reply_dur, ---逻辑检查 first_value (a.brief_sum1) over ( PARTITION BY a.xma_session_id order by ima_begin_time ) as brief_sum1, first_value (a.brief_sum2) over ( PARTITION BY a.xma_session_id order by ima_begin_time ) as brief_sum2, first_value (a.brief_sum3) over ( PARTITION BY a.xma_session_id order by ima_begin_time ) as brief_sum3 from online_base a where a.dt=replace(date_sub (current_date(), 1), '-', '') and to_date (a.xma_begin_time) >= '2025-05-08' and a.distribute_status ='正式分配客服' and a.ima_begin_time is not null ), t_workorder as ( --提单判定 先模糊匹配 selECT distinct order_id as work_order_number, customer_id as uniqueid, session_id, --关联在线会话号 call_record_id, ---关联callid 这个目前关联不到有问题 rela_order_id , --关联工单号 order_desc ---描述 from dm_opr.dmd_opr_bso_wo_info_df where dt=replace (date_sub (current_date(), 1), '-', '') and order_status_desc !='草稿' ), t_tmp as ( select DISTINCT to_date (t_xma.xma_begin_time) as dt, t_xma.xma_begin_time, t_xma.xma_session_id, t_xma.xma_prod_name, t_xma.unique_id, t_xma.dz_customer_id, t_xma.app_code , t_xma.distribute_status if (t_xma_flow.is_flow is null,'xma分流',t_xma_flow.is_flow) is_flow, t_ima.ima_session_id, t_ima.ima_begin_time, t_ima.ima_end_time, t_ima.to_cust_svc, t_ima.1st_resp_dur, t_ima.avg_reply_dur, t_ima.brief_sum1, t_ima.brief_sum2, t_ima.brief_sum3, t_next_call.next_xma_time, t_workorder.work_order_number, first_value (a.begin_call_dt) over ( PARTITION BY t_xma.xma_session_id order by a.begin_call_dt ) as next_ivr_call from t_xma left join t_xma_flow on t_xma.xma_session_id = t_xma_flow.xma_session_id left join t_next_call on t_xma.xma_session_id = t_next_call.xma_session_id left join (selECT disTINCT case when uniqueid is null then participant_role_id else uniqueid end customer_id , case when date(a.room_start_time)<'2025-05-22' then from_unixtime(unix_timestamp(a.room_start_time) - 3600) else a.room_start_time end as begin_call_dt ,k.app_code from dm_opr.dmd_opr_lia_call_info_df a left join (select * from ods.ods_smartxma_basic_ai_basic_customer_identity_df where dt=replace (date_sub (current_date(), 1), '-', '') ) k on k.customer_id=a.participant_role_id where a.dt =replace (date_sub (current_date(), 1), '-', '') and a.participant_role='CUSTOMER' and a.call_type='呼入' ) a on (case when t_xma.unique_id is null then t_xma.dz_customer_id else t_xma.unique_id end) = a.customer_id and unix_timestamp (t_xma.xma_begin_time) < unix_timestamp (a.begin_call_dt) and t_xma.app_code=a.app_code left join t_ima on t_ima.xma_session_id = t_xma.xma_session_id left join t_workorder on t_xma.xma_session_id= t_workorder.session_id ) insert OVERWRITE dm_opr.online_base_df SELECT *, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 2 ) then '48h重复进线' else '48h未重复进线' end is_repeat_48h, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 3 ) then '72h重复进线' else '72h未重复进线' end is_repeat_72h, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 4 ) then '96h重复进线' else '96h未重复进线' end is_repeat_96h, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 7 ) then '7D重复进线' else '7D未重复进线' end is_repeat_7d, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 2 ) or ( unix_timestamp (next_ivr_call) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 2 ) or work_order_number is not null then '48h未解决' else '48h解决' end is_solve_48h, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 3 ) or ( unix_timestamp (next_ivr_call) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 3 ) or work_order_number is not null then '72h未解决' else '72h解决' end is_solve_72h, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 4 ) or ( unix_timestamp (next_ivr_call) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 4 ) or work_order_number is not null then '96h未解决' else '96h解决' end is_solve_96h, case when ( unix_timestamp (next_xma_time) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 7 ) or ( unix_timestamp (next_ivr_call) - unix_timestamp (xma_begin_time) BETWEEN 0 and 3600 * 24 * 7 ) or work_order_number is not null then '7D未解决' else '7D解决' end is_solve_7d from t_tmp; 这个sql,有什么问题
最新发布
07-10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值