import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
import math
import random
import os
import gym
# Hyper Parameters
STATE_DIM = 4
ACTION_DIM = 2
STEP = 2000
SAMPLE_NUMS = 30
class ActorNetwork(nn.Module):
def __init__(self,input_size,hidden_size,action_size):
super(ActorNetwork, self).__init__()
self.fc1 = nn.Linear(input_size,hidden_size)
self.fc2 = nn.Linear(hidden_size,hidden_size)
self.fc3 = nn.Linear(hidden_size,action_size)
def forward(self,x):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
out = F.log_softmax(self.fc3(out))
return out
class ValueNetwork(nn.Module):
def __init__(self,input_size,hidden_size,output_size):
super(ValueNetwork, self).__init__()
self.fc1 = nn.Linear(input_size,hidden_size)
self.fc2 = nn.Linear(hidden_size