gym100548F Color

本文介绍了一种使用组合数学和容斥原理解决特定染色方案计数问题的方法,并提供了详细的算法实现过程及代码示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

题意:n个花盆,m种颜色,要求必须用k种颜色染色这n个花盆,问有多少种方案

题解:可以想到少于等于k种颜色的方案数 g(k) = c(m, k)*k*(k-1)^(n-1)这个很简单,关键在于要去算等于k的答案,这里不能用f(k)-f(k-1)因为小于k的颜色不一定只出现一次。。。

可以想到用容斥去重,这里假设g(k)恰好是k种颜色的答案,可以得到f(k) = sum(c(k, i)*g(i)) (2<=i<=k), 反演一下就是g(k) = sum(-1^(k-i)*c(k, k-i)*f(i)) 而f(i) = k*(k-1)^(n-1)

注意要预处理,不然会超时

#include <bits/stdc++.h>
#define maxn 1000100
#define INF 0x3f3f3f3f
typedef long long ll;
const ll mod = 1e9+7;
using namespace std;
ll exp_mod(ll a,ll b,ll p){
    ll ans = 1;
    a %= p;
    while(b){
        if(b&1) ans = ans*a%p;
        a = (a*a)%p;
        b >>= 1;
    }
    return ans;
}
ll inv[maxn], c[maxn];
int main(){
    ll T, n, m, k, ans, num=1, temp, flag, ans1;
    for(ll i=1;i<=1e6;i++) inv[i] = exp_mod(i, mod-2, mod);
    scanf("%lld", &T);
    while(T--){
        ans = 1, flag = 1, ans1 = 0;
        scanf("%lld%lld%lld", &n, &m, &k);
        c[0] = 1;
        for(ll i=1;i<=k;i++) c[i] = c[i-1]*(m-i+1)%mod*inv[i]%mod;
        ans = c[k];
        for(ll i=1;i<=k;i++) c[i] = c[i-1]*(k-i+1)%mod*inv[i]%mod;
        for(ll i=k;i>=1;i--) ans1 = (ans1+flag*c[i]%mod*i%mod*exp_mod(i-1, n-1, mod)%mod)%mod,flag *= -1;
        printf("Case #%lld: %lld\n", num++, (ans*ans1%mod+mod)%mod);
    }
    return 0;
}

 

转载于:https://www.cnblogs.com/Noevon/p/7823169.html

好的,我可以为您提供一个简单的示例代码,用DQN算法训练gym的parking-v0场景。请确保您已经安装了gym和tensorflow库。 首先,让我们导入所需的库: ```python import gym import numpy as np import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.optimizers import Adam from collections import deque import random import cv2 ``` 接下来,我们将定义我们的DQN网络。这里我们使用一个简单的神经网络,用于从每个状态中预测每个动作的Q值: ```python class DQN: def __init__(self, state_size, action_size): self.state_size = state_size self.action_size = action_size self.memory = deque(maxlen=2000) self.gamma = 0.95 self.epsilon = 1.0 self.epsilon_min = 0.01 self.epsilon_decay = 0.995 self.learning_rate = 0.001 self.model = self._build_model() def _build_model(self): model = Sequential() model.add(Dense(32, input_dim=self.state_size, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(self.action_size, activation='linear')) model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate)) return model def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def act(self, state): if np.random.rand() <= self.epsilon: return np.random.randint(self.action_size) else: return np.argmax(self.model.predict(state)[0]) def replay(self, batch_size): minibatch = random.sample(self.memory, batch_size) for state, action, reward, next_state, done in minibatch: target = reward if not done: target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0])) target_f = self.model.predict(state) target_f[0][action] = target self.model.fit(state, target_f, epochs=1, verbose=0) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay def load(self, name): self.model.load_weights(name) def save(self, name): self.model.save_weights(name) ``` 现在,我们将定义我们的训练循环: ```python state_size = 640 * 480 # 像素数量 action_size = 5 # 动作数量,包括左转,右转,前进,后退和停止 env = gym.make('parking-v0') agent = DQN(state_size, action_size) batch_size = 32 num_episodes = 100 for episode in range(num_episodes): state = cv2.cvtColor(env.reset(), cv2.COLOR_BGR2GRAY) # 转换为灰度图像 state = cv2.resize(state, (640, 480)) # 调整大小 state = np.reshape(state, [1, state_size]) done = False total_reward = 0 while not done: action = agent.act(state) next_state, reward, done, _ = env.step(action) next_state = cv2.cvtColor(next_state, cv2.COLOR_BGR2GRAY) next_state = cv2.resize(next_state, (640, 480)) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action, reward, next_state, done) state = next_state total_reward += reward if len(agent.memory) > batch_size: agent.replay(batch_size) print("Episode: {}, Score: {}".format(episode, total_reward)) env.close() ``` 在训练结束后,我们可以使用以下代码保存我们的模型: ```python agent.save("model.h5") ``` 最后,我们可以使用以下代码测试我们的模型,并输出场景视频: ```python agent.load("model.h5") state = cv2.cvtColor(env.reset(), cv2.COLOR_BGR2GRAY) state = cv2.resize(state, (640, 480)) state = np.reshape(state, [1, state_size]) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (640, 480)) done = False while not done: action = agent.act(state) next_state, reward, done, _ = env.step(action) next_state = cv2.cvtColor(next_state, cv2.COLOR_BGR2GRAY) next_state = cv2.resize(next_state, (640, 480)) next_state = np.reshape(next_state, [1, state_size]) state = next_state out.write(cv2.cvtColor(env.render(mode='rgb_array'), cv2.COLOR_RGB2BGR)) out.release() env.close() ``` 这将输出一个名为“output.mp4”的视频文件,显示我们的模型在parking-v0场景中的行为。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值