问题 J: 简单的变位词 [hash]

最新推荐文章于 2022-08-02 04:06:14 发布

原创最新推荐文章于 2022-08-02 04:06:14 发布 · 720 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#hash

hash 专栏收录该内容

2 篇文章

订阅专栏

该博客介绍了如何解决寻找变位词并分组的问题。通过统计每个单词的字符并使用哈希来判断是否为变位词，然后根据变位词的数量和字典序对组进行排序。博客提供了样例输入和输出，并链接到具体的在线判题系统。

题目描述
变位词是指改变某个词的字母顺序后构成的新词。蔡老板最近沉迷研究变位词并给你扔了一道题：

给你一些单词，让你把里面的变位词分组找出来。互为变位词的归为一组，最后输出含有变位词最多的前五组。如果有组数相同的按照字典序输出。

输入
输入包含由小写字母组成的单词，用换行分割，被EOF终止。输入数据不超过30000个单词。

输出
输出五组包含单词数量最多的变位词，如果少于五组，输出全部。对每组输出，写出它的大小和成员词，成员词按字典序排序用空格分隔，每组输出之间用换行分隔，相同词只输出一次，但算个数。

样例输入
neuq
tea
bate
beat
caret
trace
nueq
carte
cater
crate
abet
ate
eat
beta
eta
signal
样例输出
Group of size 5: caret carte cater crate trace .
Group of size 4: abet bate beat beta .
Group of size 4: ate eat eta tea .
Group of size 2: neuq nueq .
Group of size 1: signal .

link http://oj.acmclub.cn/problem.php?cid=1164&pid=9

题解

所有字符串统计字符后hash，排完序就确定每组的个数、确定一组中字典序最小的字符串。再根据个数以和字符串对组进行排序。

#include<cstdio>
#include<cstring>
#include<map>
#include<vector>
#include<algorithm>

using namespace std;

typedef unsigned long long ull;
const ull B=100000007;
const int MAXN=3e4+5;

int o=0,u=0;
map<ull,int> mp;
vector<int> G[MAXN];
char str[MAXN][50],ss[50];
struct node{int cnt,idd;};
node dat[MAXN];

ull hhash(char s[]){
     ull h=0;
     sort(s,s+strlen(s));
     for(int i=0;s[i];i++) h=h*B+s[i];
     return h;
}

bool cmp1(node x,node y){
    if(x.cnt==y.cnt){
        if(!G[x.idd].empty()&&!G[y.idd].empty()) return strcmp(str[G[x.idd][0]],str[G[y.idd][0]])<0;
        else return G[x.idd].size()>G[y.idd].size();
    }
    return x.cnt>y.cnt;
}

bool cmp2(int x,int y){
    return strcmp(str[x],str[y])<0;
}

int main()
{
    while(scanf("%s",str[u])!=EOF){
        strcpy(ss,str[u]);
        ull h=hhash(ss);
        if(!mp[h]) mp[h]=++o;
        G[mp[h]].push_back(u);
        dat[mp[h]].cnt++;
        u++;
    }
    for(int i=0;i<u;i++) dat[i].idd=i;
    for(int i=1;i<=o;i++){
        if(G[i].empty()) continue;
        sort(G[i].begin(),G[i].end(),cmp2);
    }
    sort(dat,dat+u,cmp1);
    for(int i=0;i<5;i++){
        if(dat[i].cnt==0) break;
        int idd=dat[i].idd;
        int size=G[idd].size();
        printf("Group of size %d:",size);
        for(int i=0;i<size;i++){
            if(i>0&&strcmp(str[G[idd][i]],str[G[idd][i-1]])==0) continue;
            printf(" %s",str[G[idd][i]]);
        }
        puts(" .");
    }
    return 0;
}