/*
s[i]->前I个数之和;
朴素方程:dp[i]=min(dp[j]+s[i]-s[j]-(i-j)*a[j+1]);
假设I>J>K,且在K点的决策比J好,则有
dp[j]+sum[i]-s[j]-(i-j)*a[j+1]>= dp[k]+s[i]-s[k]-(i-k)*a[k+1]
化简得:
dp[j]-dp[k]-s[j]+s[k]+j*a[j+1]-k*a[k+1]>=i*(a[j+1]-a[k+1])
令G(j,k)= dp[j]-dp[k]-s[j]+s[k]+j*a[j+1]-k*a[k+1]
S(j,k)= a[j+1]-a[k+1]
则上式化为G(j,k)>=i*S(J,K)
考虑每组不少于K个元素这个限制。只需延迟加入的时机即可。
若延迟m-1个回合加入,有可能使前一组的个数少于m个。
若延迟2*m-1个回合加入,则不会出现这情况。但此时加入的数应是i-m+1(假设是第I回合)
*/
#include<iostream>
#include<cstdio>
#include<cstring>
#define MAXSIZE 505000
#define sf scanf
#define pf printf
#define __int64 long long
using namespace std;
__int64 dp[MAXSIZE],s[MAXSIZE],a[MAXSIZE],queue[MAXSIZE];
__int64 get_dp(int i,int j)
{
return dp[j]+s[i]-s[j]-(i-j)*a[j+1];
}
__int64 get_G(int j,int k)
{
return (dp[j]-dp[k]+s[k]-s[j]+j*a[j+1]-k*a[k+1]);
}
__int64 get_S(int j,int k)
{
return (a[j+1]-a[k+1]);
}
int main()
{
int test;
sf("%d",&test);
while(test--)
{
memset(dp,0,sizeof(dp));
int n,m;
sf("%d%d",&n,&m);
for(int i=1; i<=n; i++)
{
sf("%lld",&a[i]);
s[i]=s[i-1]+a[i];
}
int front=0,rear=0;
queue[rear++]=0;
dp[0]=0;
for(int i=1; i<=n; i++)
{
while(rear-1>front&&get_G(queue[front+1],queue[front])<=i*get_S(queue[front+1],queue[front])) front++;
dp[i]=get_dp(i,queue[front]);
if(i>=2*m-1)
{
int j=i-m+1;
while(rear-1>front&&(get_G(j,queue[rear-1])*get_S(queue[rear-1],queue[rear-2])<=get_G(queue[rear-1],queue[rear-2])*get_S(j,queue[rear-1])))
rear--;
queue[rear++]=i-m+1;
}
}
cout<<dp[n]<<endl;
}
}