Gym - 101741K Consistent Occurrences

最新推荐文章于 2020-06-02 02:01:53 发布

夏奇火

最新推荐文章于 2020-06-02 02:01:53 发布

阅读量415

点赞数

CC 4.0 BY-SA版权

分类专栏： acm19年寒假集训 CodeForce 字符串

本文链接：https://blog.youkuaiyun.com/Only_Wolfy/article/details/87952181

CodeForce 同时被 3 个专栏收录

11 篇文章

订阅专栏

acm19年寒假集训

8 篇文章

订阅专栏

字符串

2 篇文章

订阅专栏

本文介绍了一种利用哈希算法进行字符串匹配的方法，特别适用于处理大量字符串且需避免重叠匹配的情况。通过计算不同长度的子串哈希值，可以高效地判断目标字符串在源字符串中的出现次数。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

去做了一点点职业规划。。
刚刚发现不用被强迫写csdn，wsl

传送门

题意
第一行给出n和m，下面一行给出字符串s，长度为n，再下面m行给出m个字符串，求这m个字符串在s里面有多少个，注意，计算的时候不可以重叠。

思路
哈希，不会ac自动机，队友想到一个暴力但是超时了（哈哈哈哈哈）。

思路就是对于每一个长度（1~n），求出字符串s里头，这个长度的子串的哈希值和出现的次数，然后看看m个字符串中，长度相等的字符串的哈希值跟这些子串是否相同的，然后就可以得出答案。

值得注意的是因为有m组字符串t，然后 | t | , m <=1e5，开不了二维的 t [ ] [ ] ，因为总长度不超过1e5，所以就用一个 t [ 1e5 + 5 ] 存储，然后记录m个字符串的左右起始下标和长度 l [ i ] , r [ i ] , len [ i ]

最后采用双哈希，单哈希不知道过不过

#include  <algorithm>//           ¨|¨|¨|        ¨|¨|¨|¨|
#include   <iostream>//         ¨}¨}¨}¨}      ¨~¨~¨~
#include    <cstring>//        ¨~¨~ ¨~¨~    ¨~¨~
#include    <stdio.h>//       ¨~¨~  ¨~¨~   ¨~¨~
#include     <vector>//      ¨~¨~   ¨~¨~  ¨~¨~
#include      <cmath>//     ¨~¨~    ¨~¨~  ¨~¨~
#include      <queue>//    ¨~¨~ ¨~¨~¨~¨~  ¨~¨~
#include        <map>//   ¨~¨~      ¨~¨~   ¨~¨~
using namespace std ;//  ¨~¨~       ¨~¨~    ¨~¨~¨~
typedef long long ll;// ¨~¨~        ¨~¨~      ¨~¨~¨~¨~¨~
#define pl pair<ll,ll>  ///////////////////////////////
#define P(x,y) make_pair(x,y)
const int maxn = 100000 + 5;
const ll mod[2] = {1000000000+7,1000000000+9};
const ll poww[2] = {13331,131};
ll read()
{
	ll x=0;char ch=getchar(); bool flag = false;
	if(ch=='-') { flag = true; ch = getchar();}
	while(ch<'0'||ch>'9')ch=getchar();
	while(ch>='0'&&ch<='9'){x=x*10+ch-'0';ch=getchar();}
	if(flag) return -x;  else return x;
}
////////////////////////////////////////////////////////////
map<pl,pl>mp;// mp[xxx].first 统计xxx记录的这个字符串的hashcode出现的次数， mp[xxx].second 统计最后一次出现的下标
char s[maxn],t[maxn];
ll h[2][maxn],p[2][maxn];
int n,m,l[maxn],r[maxn],len[maxn],vis[maxn],tot,ans[maxn];
ll hashcode(int op,int r,int L,int len){
    return (( h[op][r] - 1ll*h[op][L]*p[op][len]%mod[op] + mod[op] ) % mod[op]);
}
void _hash(){
    p[0][0] = p[1][0] = 1; // 别忘了这一句= =
    for(int k=0;k<2;k++)
        for(int i=1;i<=n;i++){
            h[k][i] = (1ll*h[k][i-1]*poww[k]+s[i]) % mod[k];
            p[k][i] = 1ll*p[k][i-1]*poww[k] % mod[k];
        }
}
void solve(int len){
    mp.clear();
    for(int i=len;i<=n;i++){
        pl tmp = P( hashcode(0,i,i-len,len),hashcode(1,i,i-len,len) );
        if(mp.find(tmp)==mp.end()) mp[tmp] = P(1,i);
        // 避免重复，比如aaaa里头，aa(12)，当检测到aa(23)的时候这对23的aa不行，所以continue，aa(34)这一对才行，
        // 此时，i-cnt[val].se == len ，是可以的
        else if(i-mp[tmp].second<len) continue;
        else { mp[tmp].first++; mp[tmp].second = i;} // 如果字符串没有覆盖，就到这一步
    }
    for(int i=1;i<=m;i++){
        if( r[i]-l[i]+1 == len ){ // 长度符合的字符串才需要遍历
            pl tmp = P(0,0);
            // 计算出这个字符串的hashcode
            for(int j=l[i];j<=r[i];j++)
                tmp = P( (tmp.first*poww[0]+t[j])%mod[0] , (tmp.second*poww[1]+t[j])%mod[1] );
            ans[i] = mp[tmp].first;
        }
    }
}
////////////////////////////////////////////////////////////
int main()
{   if(fopen("in.txt","r")) freopen("in.txt","r",stdin);
    int T,k,i,sum,j,tmp = 0;
    scanf("%d%d%s",&n,&m,s+1);
    _hash(); //求字符串s的哈希值
    for(i=1;i<=m;i++){
        scanf("%s",t+tmp+1);
        len[i] = strlen(t+tmp+1);
        l[i] = tmp + 1;
        r[i] = tmp + len[i];
        tmp += len[i];
    }
    for(i=1;i<=m;i++){
        if(vis[len[i]]) continue;
        vis[len[i]] = 1;
        solve(len[i]);
    }
    for(i=1;i<=m;i++) printf("%d\n",ans[i]);
    return 0;
}