Language:
DNA Sequence
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. Output
An integer, the number of DNA sequences, mod 100000.
Sample Input 4 3 AT AC AG AA Sample Output 36 |
思路:首先建立ac自动机,然后构造转移矩阵,下面的博客讲得很清楚,加一点自己的理解在代码里。
http://hi.baidu.com/ccsu_010/item/7847a3c17f6fe2bc0d0a7b89
#include<iostream>
#include<cstdio>
#include<string>
#include<cstring>
#include<vector>
#include<cmath>
#include<queue>
#include<stack>
#include<map>
#include<set>
#include<algorithm>
using namespace std;
typedef long long LL;
const int maxn=15;
const int maxm=15*10;
const int SIGMA_SIZE=4;
const int MOD=100000;
char word[20];
int n,m,sz;
int cnt[110][110];
struct Matrix
{
int mat[110][110];
Matrix(){memset(mat,0,sizeof(mat));}
Matrix operator*(Matrix a)
{
Matrix res;
for(int k=0;k<sz;k++)
for(int i=0;i<sz;i++)
{
if(mat[i][k]==0)continue;
for(int j=0;j<sz;j++)
if(a.mat[k][j])
res.mat[i][j]=(res.mat[i][j]+(LL)mat[i][k]*a.mat[k][j]%MOD)%MOD;
}
return res;
}
};
struct AC
{
int ch[maxm][4],val[maxm];
int fail[maxm],last[maxm];
void clear(){memset(ch[0],0,sizeof(ch[0]));sz=1;val[0]=0;}
int idx(char x)
{
if(x=='A')return 0;
else if(x=='C')return 1;
else if(x=='T')return 2;
return 3;
}
void insert(char *s)
{
int u=0;
int n=strlen(s);
for(int i=0;i<n;i++)
{
int c=idx(s[i]);
if(!ch[u][c])
{
memset(ch[sz],0,sizeof(ch[sz]));
val[sz]=0;
ch[u][c]=sz++;
}
u=ch[u][c];
}
val[u]=1;
}
void getfail()
{
queue<int> q;
int u=0;
fail[0]=0;
for(int i=0;i<SIGMA_SIZE;i++)
{
u=ch[0][i];
if(u){fail[u]=last[u]=0;q.push(u);}
}
while(!q.empty())
{
int r=q.front();q.pop();
if(val[fail[r]])val[r]=1;//失配指针相连的,若果fail[u]是单词节点,那么相应的当前节点也是单词节点,因为fail[u]是这个的前缀
for(int c=0;c<SIGMA_SIZE;c++)
{
u=ch[r][c];
if(!u){ch[r][c]=ch[fail[r]][c];continue;}
q.push(u);
int v=fail[r];
while(v&&!ch[v][c])v=fail[v];
fail[u]=ch[v][c];
last[u]=val[fail[u]]?fail[u]:last[fail[u]];
}
}
}
Matrix getMatrix()//根据是否是单词节点构造矩阵,进行转移
{
Matrix res;
for(int i=0;i<sz;i++)
for(int j=0;j<4;j++)
if(!val[ch[i][j]])
res.mat[i][ch[i][j]]++;
return res;
}
}ac;
Matrix pow_mul(Matrix A,int x)
{
Matrix res;
for(int i=0;i<=sz;i++)res.mat[i][i]=1;
while(x)
{
if(x&1)res=res*A;
A=A*A;
x>>=1;
}
return res;
}
int main()
{
while(scanf("%d%d",&n,&m)!=EOF)
{
ac.clear();
for(int i=1;i<=n;i++)
{
scanf("%s",word);
ac.insert(word);
}
ac.getfail();
Matrix A=ac.getMatrix();
A=pow_mul(A,m);
int ans=0;
for(int i=0;i<sz;i++)
ans=(ans+A.mat[0][i])%MOD;
printf("%d\n",ans);
}
return 0;
}