hdu 4691 Front compression(LCP+RMQ || hash,5级)

本文链接：https://blog.youkuaiyun.com/nealgavin/article/details/10212683

本文探讨了一种名为前端压缩的delta编码压缩算法，通过记录公共前缀及其长度来避免重复存储。详细介绍了算法原理、输入输出处理及具体实现案例，旨在优化字符串存储与传输效率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Front compression

Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others)
Total Submission(s): 678 Accepted Submission(s): 275

Problem Description

Front compression is a type of delta encoding compression algorithm whereby common prefixes and their lengths are recorded so that they need not be duplicated. For example:

The size of the input is 43 bytes, while the size of the compressed output is 40. Here, every space and newline is also counted as 1 byte.
Given the input, each line of which is a substring of a long string, what are sizes of it and corresponding compressed output?

Input

There are multiple test cases. Process to the End of File.
The first line of each test case is a long string S made up of lowercase letters, whose length doesn't exceed 100,000. The second line contains a integer 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of the following N lines contains two integers 0 ≤ A < B ≤ length(S), indicating that that line of the input is substring [A, B) of S.

Output

For each test case, output the sizes of the input and corresponding compressed output.

Sample Input

  
   frcode
2
0 6
0 6
unitedstatesofamerica
3
0 6
0 12
0 21
myxophytamyxopodnabnabbednabbingnabit
6
0 9
9 16
16 19
19 25
25 32
32 37

Sample Output

Author

Zejun Wu (watashi)

Source

2013 Multi-University Training Contest 9

Recommend

zhuyuanchen520

思路：后缀数组+LCP, RMQ查询

#include<cstdio>
#include<cstring>
#include<algorithm>
#define FOR(i,a,b) for(int i=a;i<=b;++i)
#define clr(f,z) memset(f,z,sizeof(f))
#define ll(x) (1<<x)
#define LL __int64
using namespace std;
const int max_node=1e5+9;
class SUFFIX_ARRAY
{ public:
  int rank[max_node],sa[max_node],t1[max_node];
  int h[max_node],c[max_node],n;
  int idx(char x)
  {
    if(x=='\0')return 0;
    return x-'a'+1;
  }
  void build_SA(char*s,int m)
  { int*wx=t1,*wy=rank;
    n=strlen(s)+1;
    FOR(i,0,m-1)c[i]=0;
    FOR(i,0,n-1)c[ wx[i]=idx(s[i]) ]++;
    FOR(i,1,m-1)c[i]+=c[i-1];
    for(int i=n-1;i>=0;--i)sa[ --c[ wx[i] ] ]=i;
    for(int k=1;k<=n;k<<=1)
    {
      int p=0;
      FOR(i,n-k,n-1)wy[p++]=i;
      FOR(i,0,n-1)if(sa[i]>=k)wy[p++]=sa[i]-k;
      FOR(i,0,m-1)c[i]=0;
      FOR(i,0,n-1)++c[ wx[ wy[i] ] ];
      FOR(i,1,m-1)c[i]+=c[i-1];
      for(int i=n-1;i>=0;--i)sa[ --c[ wx[ wy[i] ] ] ]=wy[i];
      swap(wx,wy);
      wx[ sa[0] ]=0;
      p=1;
      FOR(i,1,n-1)wx[ sa[i] ]=cmp(wy,i,k)?p-1:p++;
      if(p>=n)break;
      m=p;
    }
    --n;
  }
  bool cmp(int*r,int i,int k)
  {
    return r[ sa[i] ]==r[ sa[i-1] ]&&r[ sa[i]+k ]==r[ sa[i-1]+k ];
  }
  void get_H(char*s)
  { int k=0;
    FOR(i,0,n)rank[ sa[i] ]=i;
    FOR(i,0,n-1)
    {
      if(k)--k;
      int j=sa[ rank[i]-1 ];
      while(s[i+k]==s[j+k])++k;
      h[ rank[i] ]=k;
    }
  }
  void debug()
  { printf("sa=");
    FOR(i,0,n)printf("%d ",sa[i]);puts("");
    printf("rank=");
    FOR(i,0,n)printf("%d ",rank[i]);puts("");
    printf("h=");
    FOR(i,0,n)printf("%d ",h[i]);puts("");

  }
  int rmq[max_node][20],bit[max_node];
  void init_RMQ()
  {
    bit[0]=-1;
    FOR(i,0,n-1)bit[i+1]=((i&(i+1))==0)?bit[i]+1:bit[i];
    FOR(i,1,n)rmq[i][0]=h[i];
    FOR(i,0,bit[n]-1)
    for(int j=1;j+ll(i)<=n+1;++j)//for(int j=1;j+ll(i)<=n;++j) 换成这个部分数据过不了
      rmq[j][i+1]=min(rmq[j][i],rmq[j+ll(i)][i]);
  }
  int LCP_RMQ(int l,int r)
  {
    l=rank[l];r=rank[r];
    if(l>r)swap(l,r);
    ++l;int t=bit[r-l+1];
    r-=ll(t)-1;
    return min(rmq[l][t],rmq[r][t]);
  }
};
int ten(int x)
{
  int ret=0;
  if(x==0)return 1;
  while(x)
  { x/=10;
    ++ret;
  }
  return ret;
}
SUFFIX_ARRAY tf;
char s[max_node];
int A[max_node],B[max_node];
int main()
{ int N;
  //freopen("1006.in","r",stdin);
  while(~scanf("%s",s))
  {
    tf.build_SA(s,27);
     tf.get_H(s);
    tf.init_RMQ();
    //tf.debug();
    LL ans1=0,ans2=0;
    scanf("%d",&N);
    FOR(kk,1,N)
    {
      scanf("%d%d",&A[kk],&B[kk]);
      if(kk==1)
      {
        ans1+=B[kk]-A[kk]+1;
        ans2+=B[kk]-A[kk]+3;
        continue;
      }
      int ret;
      if(A[kk]^A[kk-1])ret=tf.LCP_RMQ(A[kk],A[kk-1]);
      else ret=max_node;
      ret=min(ret,min(B[kk]-A[kk],B[kk-1]-A[kk-1]));
      ans1+=B[kk]-A[kk]+1;
      ans2+=B[kk]-A[kk]-ret+1;
      ans2+=1+ten(ret);
    }
    printf("%I64d %I64d\n",ans1,ans2);
  }
  return 0;
}

LCP hash.效率高。

#include<cstdio>
#include<iostream>
#include<cstring>
#include<algorithm>
#define FOR(i,a,b) for(int i=a;i<=b;++i)
#define clr(f,z) memset(f,z,sizeof(f))
#define ll(x) (1<<x)
#define LL __int64
using namespace std;
const int max_node=1e5+9;
const int xhash=127;
class SUFFIX_ARRAY
{
  public:
    unsigned LL H[max_node],xp[max_node],hash[max_node];
    int n;
    void getHASH(char*s)
    { n=strlen(s);
      H[n]=0;
      for(int i=n-1;i>=0;--i)
      {
        H[i]=H[i+1]*xhash+(s[i]-'a');
      }
      xp[0]=1;
      FOR(i,1,n)xp[i]=xp[i-1]*xhash;
    }
    bool ok(int l,int r,int x)
    {
      unsigned LL lhash=H[l]-H[l+x]*xp[x];
      unsigned LL rhash=H[r]-H[r+x]*xp[x];
      return lhash==rhash;
    }
    int LCP(int a,int b,int len)
    {
      int l=0,r=len,mid;
      int x=0;
      while(l<=r)
      { mid=(l+r)/2;
        if(ok(a,b,mid))
        { x=max(x,mid);
          l=mid+1;
        }
        else r=mid-1;
      }
      return x;
    }
};
int ten(int x)
{
  int ret=0;
  if(x==0)return 1;
  while(x)
  { x/=10;
    ++ret;
  }
  return ret;
}
SUFFIX_ARRAY tf;
char s[max_node];
int A[max_node],B[max_node];
int main()
{ int N,x,y;
  //freopen("1006.in","r",stdin);
  while(~scanf("%s",s))
  { tf.getHASH(s);
    LL ans1=0,ans2=0;
    /*int len=strlen(s);
    while(cin>>x>>y)
    {
      cout<<tf.LCP(x,y,min(len-x,len-y))<<endl;
    }*/
    scanf("%d",&N);
    FOR(kk,1,N)
    {
      scanf("%d%d",&A[kk],&B[kk]);
      if(kk==1)
      {
        ans1+=B[kk]-A[kk]+1;
        ans2+=B[kk]-A[kk]+3;
        continue;
      }
      int ret;
      //if(A[kk]^A[kk-1])
        ret=tf.LCP(A[kk],A[kk-1],min(B[kk]-A[kk],B[kk-1]-A[kk-1]));
      if(ret>B[kk]-A[kk])cout<<"+++"<<endl;
      ans1+=B[kk]-A[kk]+1;
      ans2+=B[kk]-A[kk]-ret+1;
      ans2+=1+ten(ret);
    }
    printf("%I64d %I64d\n",ans1,ans2);
  }
  return 0;
}