【思路要点】
- 考虑 r=1r=1r=1 ,问题要求将排列分成若干段长度为 kkk 的极长上升序列,这里假设 NNN 是 kkk 的倍数, NNN 不为 kkk 的倍数时只需要多一些细节处理。
- 若不考虑极长的限制,这里的排列数显然为 N!k!Nk\frac{N!}{k!^{\frac{N}{k}}}k!kNN! ,那么,我们可以用容斥原理计算考虑极长限制的方案数,即记 dpidp_idpi 表示第 1∼i1\sim i1∼i 段的分割方式数除去 N!N!N! 后的数值,则有
dpi=−∑j=0i−1dpj((i−j)×k)!dp_{i}=-\sum_{j=0}^{i-1}\frac{dp_j}{((i-j)\times k)!}dpi=−j=0∑i−1((i−j)×k)!dpj- 可以用多项式求逆优化上式,时间复杂度 O(NLogN)O(NLogN)O(NLogN) 。
- 由上面的算法不难得到 r>1r>1r>1 的转移
dpi=−∑j=0i−1dpj((i−j)×k)!rdp_{i}=-\sum_{j=0}^{i-1}\frac{dp_j}{((i-j)\times k)!^r}dpi=−j=0∑i−1((i−j)×k)!rdpj- 同样可以用多项式求逆优化上式,时间复杂度 O(NLogN)O(NLogN)O(NLogN) 。
【代码】
#include<bits/stdc++.h> using namespace std; const int MAXN = 2097152; const int P = 998244353; typedef long long ll; typedef long double ld; typedef unsigned long long ull; template <typename T> void chkmax(T &x, T y) {x = max(x, y); } template <typename T> void chkmin(T &x, T y) {x = min(x, y); } template <typename T> void read(T &x) { x = 0; int f = 1; char c = getchar(); for (; !isdigit(c); c = getchar()) if (c == '-') f = -f; for (; isdigit(c); c = getchar()) x = x * 10 + c - '0'; x *= f; } template <typename T> void write(T x) { if (x < 0) x = -x, putchar('-'); if (x > 9) write(x / 10); putchar(x % 10 + '0'); } template <typename T> void writeln(T x) { write(x); puts(""); } namespace Poly { const int MAXN = 2097152; const int P = 998244353; const int LOG = 25; const int G = 3; int power(int x, int y) { if (y == 0) return 1; int tmp = power(x, y / 2); if (y % 2 == 0) return 1ll * tmp * tmp % P; else return 1ll * tmp * tmp % P * x % P; } int invn[MAXN], tmpa[MAXN], tmpb[MAXN]; int N, Log, home[MAXN]; bool initialized; int forward[MAXN], bckward[MAXN], inv[LOG]; void init() { initialized = true; forward[0] = bckward[0] = inv[0] = invn[1] = 1; for (int len = 2, lg = 1; len <= MAXN; len <<= 1, lg++) inv[lg] = power(len, P - 2); for (int i = 2; i < MAXN; i++) invn[i] = (P - 1ll * (P / i) * invn[P % i] % P) % P; int delta = power(G, (P - 1) / MAXN); for (int i = 1; i < MAXN; i++) forward[i] = bckward[MAXN - i] = 1ll * forward[i - 1] * delta % P; } void NTTinit() { for (int i = 0; i < N; i++) { int ans = 0, tmp = i; for (int j = 1; j <= Log; j++) { ans <<= 1; ans += tmp & 1; tmp >>= 1; } home[i] = ans; } } void NTT(int *a, int mode) { assert(initialized); for (int i = 0; i < N; i++) if (home[i] < i) swap(a[i], a[home[i]]); int *g; if (mode == 1) g = forward; else g = bckward; for (int len = 2, lg = 1; len <= N; len <<= 1, lg++) { for (int i = 0; i < N; i += len) { for (int j = i, k = i + len / 2; k < i + len; j++, k++) { int tmp = a[j]; int tnp = 1ll * a[k] * g[MAXN / len * (j - i)] % P; a[j] = (tmp + tnp > P) ? (tmp + tnp - P) : (tmp + tnp); a[k] = (tmp - tnp < 0) ? (tmp - tnp + P) : (tmp - tnp); } } } if (mode == -1) { for (int i = 0; i < N; i++) a[i] = 1ll * a[i] * inv[Log] % P; } } void times(vector <int> &a, vector <int> &b, vector <int> &c) { assert(a.size() >= 1), assert(b.size() >= 1); int goal = a.size() + b.size() - 1; N = 1, Log = 0; while (N < goal) { N <<= 1; Log++; } for (int i = 0; i < a.size(); i++) tmpa[i] = a[i]; for (int i = a.size(); i < N; i++) tmpa[i] = 0; for (int i = 0; i < b.size(); i++) tmpb[i] = b[i]; for (int i = b.size(); i < N; i++) tmpb[i] = 0; NTTinit(); NTT(tmpa, 1); NTT(tmpb, 1); for (int i = 0; i < N; i++) tmpa[i] = 1ll * tmpa[i] * tmpb[i] % P; NTT(tmpa, -1); c.resize(goal); for (int i = 0; i < goal; i++) c[i] = tmpa[i]; } void timesabb(vector <int> &a, vector <int> &b, vector <int> &c) { assert(a.size() >= 1), assert(b.size() >= 1); int goal = a.size() + b.size() * 2 - 2; N = 1, Log = 0; while (N < goal) { N <<= 1; Log++; } for (int i = 0; i < a.size(); i++) tmpa[i] = a[i]; for (int i = a.size(); i < N; i++) tmpa[i] = 0; for (int i = 0; i < b.size(); i++) tmpb[i] = b[i]; for (int i = b.size(); i < N; i++) tmpb[i] = 0; NTTinit(); NTT(tmpa, 1); NTT(tmpb, 1); for (int i = 0; i < N; i++) tmpa[i] = 1ll * tmpa[i] * tmpb[i] % P * tmpb[i] % P; NTT(tmpa, -1); c.resize(goal); for (int i = 0; i < goal; i++) c[i] = tmpa[i]; } void getinv(vector <int> &a, vector <int> &b) { assert(a.size() >= 1), assert(a[0] != 0); b.clear(), b.push_back(power(a[0], P - 2)); while (b.size() < a.size()) { vector <int> c, ta = a; ta.resize(b.size() * 2); timesabb(ta, b, c); b.resize(b.size() * 2); for (unsigned i = 0; i < b.size(); i++) b[i] = (2ll * b[i] - c[i] + P) % P; } b.resize(a.size()); } void getder(vector <int> &a, vector <int> &b) { assert(a.size() >= 1); if (a.size() == 1) { b.clear(); b.resize(1); } else { b.resize(a.size() - 1); for (unsigned i = 0; i < b.size(); i++) b[i] = (i + 1ll) * a[i + 1] % P; } } void getint(vector <int> &a, vector <int> &b) { b.resize(a.size() + 1), b[0] = 0; for (unsigned i = 0; i < a.size(); i++) b[i + 1] = 1ll * invn[i + 1] * a[i] % P; } void getlog(vector <int> &a, vector <int> &b) { assert(a.size() >= 1), assert(a[0] == 1); vector <int> da, inva, db; getder(a, da), getinv(a, inva); times(da, inva, db), getint(db, b); b.resize(a.size()); } void getexp(vector <int> &a, vector <int> &b) { assert(a.size() >= 1), assert(a[0] == 0); b.clear(), b.push_back(1); while (b.size() < a.size()) { vector <int> lnb, res; b.resize(b.size() * 2), getlog(b, lnb); for (unsigned i = 0; i < lnb.size(); i++) if (i == 0) lnb[i] = (P + 1 + a[i] - lnb[i]) % P; else if (i < a.size()) lnb[i] = (P + a[i] - lnb[i]) % P; else lnb[i] = (P - lnb[i]) % P; times(lnb, b, res); res.resize(b.size()); swap(res, b); } b.resize(a.size()); } } int n, m, r, k, s[MAXN], dp[MAXN]; int fac[MAXN], inv[MAXN], coef[MAXN]; int power(int x, int y) { if (y == 0) return 1; int tmp = power(x, y / 2); if (y % 2 == 0) return 1ll * tmp * tmp % P; else return 1ll * tmp * tmp % P * x % P; } int getc(int x, int y) { if (y > x) return 0; else return 1ll * fac[x] * inv[y] % P * inv[x - y] % P; } void init(int n) { fac[0] = 1; for (int i = 1; i <= n; i++) fac[i] = 1ll * fac[i - 1] * i % P; inv[n] = power(fac[n], P - 2); for (int i = n - 1; i >= 0; i--) inv[i] = inv[i + 1] * (i + 1ll) % P; } void update(int &x, int y) { x += y; if (x >= P) x -= P; } int main() { freopen("interstellar.in", "r", stdin); freopen("interstellar.out", "w", stdout); read(m), read(r), read(k), init(m); n = (m - 1) / k + 1; vector <int> invr, res; for (int i = 0; i <= n - 1; i++) invr.push_back(power(inv[i * k], r)); Poly :: init(), Poly :: getinv(invr, res); int ans = 0; for (int i = 0; i <= n - 1; i++) update(ans, P - 1ll * res[i] * power(inv[m - i * k], r) % P); ans = 1ll * ans * power(fac[m], r) % P; if (n & 1) writeln((P - ans) % P); else writeln(ans); return 0; }