话不多说
新建矩阵
[sum[i]f[i+1]...f[i+m]]
\left[
\begin{matrix}
sum[i] \\
f[i+1] \\
...\\
f[i+m]
\end{matrix}
\right]
⎣⎢⎢⎡sum[i]f[i+1]...f[i+m]⎦⎥⎥⎤
不难看出,转移矩阵为
[11......0001...00001...............110000]
\left[
\begin{matrix}
1 & 1 & ... &...&0\\
0& 0& 1&...&0\\
0&0&0&1&...\\
...&...&...&...&1\\
1&0&0&0&0
\end{matrix}
\right]
⎣⎢⎢⎢⎢⎡100...1100...0...10...0......1...000...10⎦⎥⎥⎥⎥⎤是一个m+1的矩阵
那么直接矩阵快速幂n次,答案就是c[1][1]
// luogu-judger-enable-o2
// luogu-judger-enable-o2
#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<cmath>
#include<queue>
using namespace std;
typedef long long ll;
const int maxn = 25;
const int mod = 1e9+7;
ll n,m,f[maxn];
struct Mul
{
int x,y;
ll c[maxn][maxn];
friend Mul operator *(const Mul &a,const Mul &b)
{
Mul c;
c.x=a.x;
c.y=b.y;
for(int i=1;i<=a.x;i++)
{
for(int j=1;j<=b.y;j++)
{
c.c[i][j]=0;
for(int k=1;k<=a.y;k++)
{
c.c[i][j]=(c.c[i][j]+a.c[i][k]*b.c[k][j])%mod;
}
}
}
return c;
};
void clear()
{
x=y=0;
memset(c,0,sizeof(c));
}
void out()
{
for(int i=1;i<=x;i++,cout<<endl)
for(int j=1;j<=y;cout<<c[i][j]<<" ",j++);
cout<<endl;
};
};
Mul fast(Mul a,ll k)
{
if(k==1)return a;
Mul res=a;k--;
while(k)
{
if(k&1)res=a*res;
k/=2,a=a*a;
}
return res;
}
int main()
{
scanf("%lld%lld",&n,&m);
if(n<=m){cout<<n+1;return 0;}
Mul temp,move;
temp.clear();
temp.x=m+1;
temp.y=1;
for(int i=1;i<=m+1;i++)temp.c[i][1]=1;
move.clear();
move.x=m+1,move.y=m+1;
move.c[1][1]=move.c[m+1][1]=1;
for(int i=1;i<=m;i++)move.c[i][i+1]=1;
Mul ans=fast(move,n+1);
ans=ans*temp;
//ans.out();
cout<<ans.c[m+1][1];
}