melite/network.nim

import arraymancer

#### The following need to be combined gently
let ctx = newContext Tensor[float32]

let
  SINGLETON = 1


type
  LinearLayer = object
    weight: Variable[Tensor[float32]]
    bias: Variable[Tensor[float32]]
  Nimertes = object
    hidden: LinearLayer
    output: LinearLayer

template 
  weightInit(shape:varargs[int], initKind: untyped): Variable =
    ctx.variable(
      initKind(shape, float32),
      requiresGrad = true
    )
      
proc newNimertesInstance*(ctx: Context[Tensor[float32]], hiddenSize: int, dimIn: int, dimOut: int): Nimertes =
  result.hidden.weight = weightInit(hiddenSize, dimIn, kaimingNormal)
  result.hidden.bias = weightInit(SINGLETON, hiddenSize, kaimingNormal) 
  result.output.weight = weightInit(dimOut, hiddenSize, kaimingNormal)
  result.output.bias = weightInit(SINGLETON, dimOut, kaimingNormal)

proc forward*(network: Nimertes, x: Variable[Tensor[float32]]): Variable[Tensor[float32]] =
  result = x.linear(
    network.hidden.weight, network.hidden.bias).relu.linear(network.output.weight, network.output.bias)

proc saveModel*(network: Nimertes) =
  # this is a quick prototype, but you get the idea.
  # perhaps a better way to do this would be to save all weights/biases of
  # the model into a single file.
  network.hidden.weight.value.writeNpy("hiddenweight.npy")
  network.hidden.bias.value.writeNpy("hiddenbias.npy")
  network.output.weight.value.writeNpy("outputweight.npy")
  network.output.bias.value.writeNpy("outputbias.npy") 

proc load*(ctx: Context[Tensor[float32]]): Nimertes =
  result.hidden.weight = ctx.variable(readNpy[float32]("hiddenweight.npy"),requiresGrad = true)
  result.hidden.bias = ctx.variable(readNpy[float32]("hiddenbias.npy"),requiresGrad = true)
  result.output.weight = ctx.variable(readNpy[float32]("outputweight.npy"),requiresGrad = true)
  result.output.bias = ctx.variable(readNpy[float32]("outputbias.npy"),requiresGrad = true)

##### Second Way to implement
let 
  vocabSize = 64
  hiddenSize = 100
  nLayers = 2

network Nimertes2:
  layers:
    encoder: Embedding(vocabSize, vocabSize)
    gru: GRULayer(encoder.out_shape[0], hiddenSize, nLayers)
    decoder: Linear(hiddenSize, vocabSize)
  forward input, hidden0:
    let (output, hiddenN) = init.encoder.gru(hidden0)
    # result.output is of shape [Sequence, BatchSize, HiddenSize]
    # In our case the sequence is 1 so we can simply flatten
    let flattened = output.reshape(output.value.shape[1], hiddenSize)

    (output: flattened.decoder, hidden: hiddenN)

export Nimertes2

#### Third Way
let
  dimIn = vocabSize
  dimOut = vocabSize

network Nimertes3:
  layers:
    encoder: Embedding(vocabSize, vocabSize)
    fc1: Linear(dimIn, hiddenSize)
    fc2: Linear(hiddenSize, dimOut)
  forward input, targets, output:
    let 
      logits = input.encoder 
      (batch, time, channels) = logits.shape()
      reshapeLogits = logits.reshape(batch*time, channels)
      reshapeTargets = targets.reshape(batch*time)
      loss = output.softmax_cross_entropy()
    # x.fc1.relu.fc2

export Nimertes3
Move project to nyrid melite 2024-10-22 22:26:05 +00:00			`import arraymancer`

			`#### The following need to be combined gently`
			`let ctx = newContext Tensor[float32]`

			`let`
			`SINGLETON = 1`


			`type`
			`LinearLayer = object`
			`weight: Variable[Tensor[float32]]`
			`bias: Variable[Tensor[float32]]`
			`Nimertes = object`
			`hidden: LinearLayer`
			`output: LinearLayer`

			`template`
			`weightInit(shape:varargs[int], initKind: untyped): Variable =`
			`ctx.variable(`
			`initKind(shape, float32),`
			`requiresGrad = true`
			`)`

			`proc newNimertesInstance*(ctx: Context[Tensor[float32]], hiddenSize: int, dimIn: int, dimOut: int): Nimertes =`
			`result.hidden.weight = weightInit(hiddenSize, dimIn, kaimingNormal)`
			`result.hidden.bias = weightInit(SINGLETON, hiddenSize, kaimingNormal)`
			`result.output.weight = weightInit(dimOut, hiddenSize, kaimingNormal)`
			`result.output.bias = weightInit(SINGLETON, dimOut, kaimingNormal)`

			`proc forward*(network: Nimertes, x: Variable[Tensor[float32]]): Variable[Tensor[float32]] =`
			`result = x.linear(`
			`network.hidden.weight, network.hidden.bias).relu.linear(network.output.weight, network.output.bias)`

			`proc saveModel*(network: Nimertes) =`
			`# this is a quick prototype, but you get the idea.`
			`# perhaps a better way to do this would be to save all weights/biases of`
			`# the model into a single file.`
			`network.hidden.weight.value.writeNpy("hiddenweight.npy")`
			`network.hidden.bias.value.writeNpy("hiddenbias.npy")`
			`network.output.weight.value.writeNpy("outputweight.npy")`
			`network.output.bias.value.writeNpy("outputbias.npy")`

			`proc load*(ctx: Context[Tensor[float32]]): Nimertes =`
			`result.hidden.weight = ctx.variable(readNpy[float32]("hiddenweight.npy"),requiresGrad = true)`
			`result.hidden.bias = ctx.variable(readNpy[float32]("hiddenbias.npy"),requiresGrad = true)`
			`result.output.weight = ctx.variable(readNpy[float32]("outputweight.npy"),requiresGrad = true)`
			`result.output.bias = ctx.variable(readNpy[float32]("outputbias.npy"),requiresGrad = true)`

			`##### Second Way to implement`
			`let`
			`vocabSize = 64`
			`hiddenSize = 100`
			`nLayers = 2`

			`network Nimertes2:`
			`layers:`
			`encoder: Embedding(vocabSize, vocabSize)`
			`gru: GRULayer(encoder.out_shape[0], hiddenSize, nLayers)`
			`decoder: Linear(hiddenSize, vocabSize)`
			`forward input, hidden0:`
			`let (output, hiddenN) = init.encoder.gru(hidden0)`
			`# result.output is of shape [Sequence, BatchSize, HiddenSize]`
			`# In our case the sequence is 1 so we can simply flatten`
			`let flattened = output.reshape(output.value.shape[1], hiddenSize)`

			`(output: flattened.decoder, hidden: hiddenN)`

			`export Nimertes2`

			`#### Third Way`
			`let`
			`dimIn = vocabSize`
			`dimOut = vocabSize`

			`network Nimertes3:`
			`layers:`
			`encoder: Embedding(vocabSize, vocabSize)`
			`fc1: Linear(dimIn, hiddenSize)`
			`fc2: Linear(hiddenSize, dimOut)`
			`forward input, targets, output:`
			`let`
			`logits = input.encoder`
			`(batch, time, channels) = logits.shape()`
			`reshapeLogits = logits.reshape(batch*time, channels)`
			`reshapeTargets = targets.reshape(batch*time)`
			`loss = output.softmax_cross_entropy()`
			`# x.fc1.relu.fc2`

			`export Nimertes3`