题意:给出一串长为n的字符串,找出请帮他求出这段 DNA 序列中所有连续k个碱基形成的碱基序列中,出现最多的一种的出现次数。
思路:哈希;
#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long ULL;
const int N = 5e6 + 10, sigma = 26, HASH_CNT = 2;
int n, k, cnt[3300][3300];
char s[N];
ULL Seed_Pool[] = {911, 146527, 19260817, 91815541};
ULL Mod_Pool[] = {3229, 998244353, 1000000009, 4294967291ull};
struct Hash {
ULL Seed, Mod, bas[N], sum[N];
int perm[sigma];
void Init(int sendindex, int modindex) {//普通哈希
Seed = Seed_Pool[sendindex], Mod = Mod_Pool[modindex];
bas[0] = 1;
for(int i = 1; i <= n; i++)
bas[i] = bas[i - 1] * Seed % Mod;
for(int i = 1; i <= n; i++)
sum[i] = (sum[i - 1] * Seed + s[i]) % Mod;
}
void IndexInit(int sendindex, int modindex) {
Seed = Seed_Pool[sendindex], Mod = Mod_Pool[modindex];
for(int i = 1; i <= sigma; i++)
perm[i] = i;
random_shuffle(perm + 1, perm + sigma + 1);
bas[0] = 1;
for(int i = 1; i <= n; i++)
bas[i] = bas[i - 1] * Seed % Mod;
for(int i = 1; i <= n; i++)
sum[i] = (sum[i - 1] * Seed % Mod + perm[s[i] - 'A' + 1]) % Mod;
}
ULL getHash(int l, int r) {
return (sum[r] - sum[l - 1] * bas[r - l + 1] % Mod + Mod) % Mod;
}
} hasher[HASH_CNT];//哈希模板
int main() {
int ans = 0;
scanf("%s %d", s + 1, &k);
n = strlen(s + 1);
hasher[0].Init(0, 0);
hasher[1].Init(1, 0);
for(int i = k; i <= n; i++) {
ULL a = hasher[0].getHash(i - k + 1, i);
ULL b = hasher[1].getHash(i - k + 1, i);
cnt[a][b]++;
ans = max(ans, cnt[a][b]);
}
printf("%d\n", ans);
return 0;
}
或者
#include <bits/stdc++.h>
using namespace std;
typedef unsigned long long ULL;
const int N = 5e6 + 10, sigma = 26, HASH_CNT = 2;
int n, k, cnt[3300][3300];
char s[N];
ULL Seed_Pool[] = {911, 146527, 19260817, 91815541};
ULL Mod_Pool[] = {3229, 998244353, 1000000009, 4294967291ull};
struct Hash {
ULL Seed, Mod, bas[N], sum[N];
int perm[sigma];
void Init(int sendindex, int modindex) {
Seed = Seed_Pool[sendindex], Mod = Mod_Pool[modindex];
bas[0] = 1;
for(int i = 1; i <= n; i++)
bas[i] = bas[i - 1] * Seed % Mod;
for(int i = 1; i <= n; i++)
sum[i] = (sum[i - 1] * Seed + s[i]) % Mod;
}
void IndexInit(int sendindex, int modindex) {
Seed = Seed_Pool[sendindex], Mod = Mod_Pool[modindex];
for(int i = 1; i <= sigma; i++)
perm[i] = i;
random_shuffle(perm + 1, perm + sigma + 1);
bas[0] = 1;
for(int i = 1; i <= n; i++)
bas[i] = bas[i - 1] * Seed % Mod;
for(int i = 1; i <= n; i++)
sum[i] = (sum[i - 1] * Seed % Mod + perm[s[i] - 'A' + 1]) % Mod;
}
ULL getHash(int l, int r) {
return (sum[r] - sum[l - 1] * bas[r - l + 1] % Mod + Mod) % Mod;
}
} hasher[HASH_CNT];//哈希模板
int main() {
int ans = 0;
scanf("%s %d", s + 1, &k);
n = strlen(s + 1);
hasher[0].IndexInit(0, 0);
hasher[1].IndexInit(1, 0);
for(int i = k; i <= n; i++) {
ULL a = hasher[0].getHash(i - k + 1, i);
ULL b = hasher[1].getHash(i - k + 1, i);
cnt[a][b]++;
ans = max(ans, cnt[a][b]);
}
printf("%d\n", ans);
return 0;
}