started work on dqn

2024-11-12 23:33:41 +01:00 · 2024-11-12 23:33:41 +01:00 · 22bb3091c1
commit 22bb3091c1
parent eacc3ce30a
1 changed files with 86 additions and 0 deletions
--- a/dqn/agent.nim
+++ b/dqn/agent.nim
@ -0,0 +1,86 @@
+import std / [random]
+import arraymancer
+
+randomize()
+
+# Define all the hyperparameters used in DDQN
+type
+  HyperParams* = object
+    # generic hyperparams
+    batchSize: int = 64
+    discountFactor: float = 0.9
+    learningRate: float = 1e-4
+
+    # greedy-ε policy stuff
+    explorationRate: float = 1
+    explorationRateDecay: float = 0.9999975
+    explorationRateMinimum: float = 1e-2
+
+    # ddqn specific (arbitrary defaults)
+    burnin: int = 1000
+    learnEvery: int = 3
+    syncEvery: int = 100
+    memoryLength: int = 10000
+
+  OptimizerName = enum
+    adam
+    sgd
+    sgdMomentum
+
+  OptimizerParams* = object
+    # names are "SGD", "SGDMomentum", or "Adam"
+    name: OptimizerName = adam
+    learning_rate: float = 1e-5
+    momentum: float = 0.0
+    decay: float = 0.0
+    nesterov: bool = false
+    beta1: float = 0.9
+    beta2: float = 0.999
+    epsilon: float = 1e-8
+
+  ActivationFunction = enum
+    relu
+    sigmoid
+    softmax
+    tanh
+
+  NetworkParams* = object
+    hiddenLayersNum: int
+    hiddenLayersSize: int
+    activationFunction: ActivationFunction
+
+type
+  Agent* = ref object
+    inputDims: int
+    actionDims: int
+    hParams: HyperParams
+    optimParams: OptimizerParams
+    networkParams: NetworkParams
+    save_dir: string = "chkpts"
+    load: bool = false
+
+proc act*(model: Agent, state: Tensor): int =
+  var actionIndex = rand(model.actionDims)
+  if rand(1.0) > model.hParams.explorationRate:
+    echo "The agent has acted."
+    # TODO: Implement actual model
+    #[var
+      actionValues = model.forward(state)
+      actionIndex = argmax(actionValues).item()]#
+    actionIndex = 1
+
+  model.hParams.explorationRate *= model.hParams.explorationRateDecay
+  model.hParams.explorationRate = max(model.hParams.explorationRate, model.hParams.explorationRateMinimum)
+
+  return actionIndex
+
+proc cache*(model: Agent, state: Tensor, nextState: Tensor, action: int, reward: float, done: bool) =
+  # TODO: Implement memory (either in Agent type or find another way)
+  model.memory
+
+
+var
+  model: Agent = Agent(inputDims: 3, actionDims: 3)
+  testor = [1.0,2.0].toTensor()
+
+var action = model.act(testor)