oreads/dqn/agent.nim

import std / [random]
import arraymancer

randomize()

# Define all the hyperparameters used in DDQN
type
  HyperParams* = object
    # generic hyperparams
    batchSize: int = 64
    discountFactor: float = 0.9
    learningRate: float = 1e-4

    # greedy-ε policy stuff
    explorationRate: float = 1
    explorationRateDecay: float = 0.9999975
    explorationRateMinimum: float = 1e-2

    # ddqn specific (arbitrary defaults)
    burnin: int = 1000
    learnEvery: int = 3
    syncEvery: int = 100
    memoryLength: int = 10000

  OptimizerName = enum
    adam
    sgd
    sgdMomentum

  OptimizerParams* = object
    # names are "SGD", "SGDMomentum", or "Adam"
    name: OptimizerName = adam
    learning_rate: float = 1e-5
    momentum: float = 0.0
    decay: float = 0.0
    nesterov: bool = false
    beta1: float = 0.9
    beta2: float = 0.999
    epsilon: float = 1e-8

  ActivationFunction = enum
    relu
    sigmoid
    softmax
    tanh

  NetworkParams* = object
    hiddenLayersNum: int
    hiddenLayersSize: int
    activationFunction: ActivationFunction

type
  Agent* = ref object
    inputDims: int
    actionDims: int
    hParams: HyperParams
    optimParams: OptimizerParams
    networkParams: NetworkParams
    save_dir: string = "chkpts"
    load: bool = false

proc act*(model: Agent, state: Tensor): int =
  var actionIndex = rand(model.actionDims)
  if rand(1.0) > model.hParams.explorationRate:
    echo "The agent has acted."
    # TODO: Implement actual model
    #[var
      actionValues = model.forward(state)
      actionIndex = argmax(actionValues).item()]#
    actionIndex = 1

  model.hParams.explorationRate *= model.hParams.explorationRateDecay
  model.hParams.explorationRate = max(model.hParams.explorationRate, model.hParams.explorationRateMinimum)

  return actionIndex

proc cache*(model: Agent, state: Tensor, nextState: Tensor, action: int, reward: float, done: bool) =
  # TODO: Implement memory (either in Agent type or find another way)
  model.memory


var
  model: Agent = Agent(inputDims: 3, actionDims: 3)
  testor = [1.0,2.0].toTensor()

var action = model.act(testor)