87 lines
2.1 KiB
Nim
87 lines
2.1 KiB
Nim
|
import std / [random]
|
||
|
import arraymancer
|
||
|
|
||
|
randomize()
|
||
|
|
||
|
# Define all the hyperparameters used in DDQN
|
||
|
type
|
||
|
HyperParams* = object
|
||
|
# generic hyperparams
|
||
|
batchSize: int = 64
|
||
|
discountFactor: float = 0.9
|
||
|
learningRate: float = 1e-4
|
||
|
|
||
|
# greedy-ε policy stuff
|
||
|
explorationRate: float = 1
|
||
|
explorationRateDecay: float = 0.9999975
|
||
|
explorationRateMinimum: float = 1e-2
|
||
|
|
||
|
# ddqn specific (arbitrary defaults)
|
||
|
burnin: int = 1000
|
||
|
learnEvery: int = 3
|
||
|
syncEvery: int = 100
|
||
|
memoryLength: int = 10000
|
||
|
|
||
|
OptimizerName = enum
|
||
|
adam
|
||
|
sgd
|
||
|
sgdMomentum
|
||
|
|
||
|
OptimizerParams* = object
|
||
|
# names are "SGD", "SGDMomentum", or "Adam"
|
||
|
name: OptimizerName = adam
|
||
|
learning_rate: float = 1e-5
|
||
|
momentum: float = 0.0
|
||
|
decay: float = 0.0
|
||
|
nesterov: bool = false
|
||
|
beta1: float = 0.9
|
||
|
beta2: float = 0.999
|
||
|
epsilon: float = 1e-8
|
||
|
|
||
|
ActivationFunction = enum
|
||
|
relu
|
||
|
sigmoid
|
||
|
softmax
|
||
|
tanh
|
||
|
|
||
|
NetworkParams* = object
|
||
|
hiddenLayersNum: int
|
||
|
hiddenLayersSize: int
|
||
|
activationFunction: ActivationFunction
|
||
|
|
||
|
type
|
||
|
Agent* = ref object
|
||
|
inputDims: int
|
||
|
actionDims: int
|
||
|
hParams: HyperParams
|
||
|
optimParams: OptimizerParams
|
||
|
networkParams: NetworkParams
|
||
|
save_dir: string = "chkpts"
|
||
|
load: bool = false
|
||
|
|
||
|
proc act*(model: Agent, state: Tensor): int =
|
||
|
var actionIndex = rand(model.actionDims)
|
||
|
if rand(1.0) > model.hParams.explorationRate:
|
||
|
echo "The agent has acted."
|
||
|
# TODO: Implement actual model
|
||
|
#[var
|
||
|
actionValues = model.forward(state)
|
||
|
actionIndex = argmax(actionValues).item()]#
|
||
|
actionIndex = 1
|
||
|
|
||
|
model.hParams.explorationRate *= model.hParams.explorationRateDecay
|
||
|
model.hParams.explorationRate = max(model.hParams.explorationRate, model.hParams.explorationRateMinimum)
|
||
|
|
||
|
return actionIndex
|
||
|
|
||
|
proc cache*(model: Agent, state: Tensor, nextState: Tensor, action: int, reward: float, done: bool) =
|
||
|
# TODO: Implement memory (either in Agent type or find another way)
|
||
|
model.memory
|
||
|
|
||
|
|
||
|
var
|
||
|
model: Agent = Agent(inputDims: 3, actionDims: 3)
|
||
|
testor = [1.0,2.0].toTensor()
|
||
|
|
||
|
var action = model.act(testor)
|