melite/network.nim

91 lines
3 KiB
Nim
Raw Permalink Normal View History

2024-10-22 22:26:05 +00:00
import arraymancer
#### The following need to be combined gently
let ctx = newContext Tensor[float32]
let
SINGLETON = 1
type
LinearLayer = object
weight: Variable[Tensor[float32]]
bias: Variable[Tensor[float32]]
Nimertes = object
hidden: LinearLayer
output: LinearLayer
template
weightInit(shape:varargs[int], initKind: untyped): Variable =
ctx.variable(
initKind(shape, float32),
requiresGrad = true
)
proc newNimertesInstance*(ctx: Context[Tensor[float32]], hiddenSize: int, dimIn: int, dimOut: int): Nimertes =
result.hidden.weight = weightInit(hiddenSize, dimIn, kaimingNormal)
result.hidden.bias = weightInit(SINGLETON, hiddenSize, kaimingNormal)
result.output.weight = weightInit(dimOut, hiddenSize, kaimingNormal)
result.output.bias = weightInit(SINGLETON, dimOut, kaimingNormal)
proc forward*(network: Nimertes, x: Variable[Tensor[float32]]): Variable[Tensor[float32]] =
result = x.linear(
network.hidden.weight, network.hidden.bias).relu.linear(network.output.weight, network.output.bias)
proc saveModel*(network: Nimertes) =
# this is a quick prototype, but you get the idea.
# perhaps a better way to do this would be to save all weights/biases of
# the model into a single file.
network.hidden.weight.value.writeNpy("hiddenweight.npy")
network.hidden.bias.value.writeNpy("hiddenbias.npy")
network.output.weight.value.writeNpy("outputweight.npy")
network.output.bias.value.writeNpy("outputbias.npy")
proc load*(ctx: Context[Tensor[float32]]): Nimertes =
result.hidden.weight = ctx.variable(readNpy[float32]("hiddenweight.npy"),requiresGrad = true)
result.hidden.bias = ctx.variable(readNpy[float32]("hiddenbias.npy"),requiresGrad = true)
result.output.weight = ctx.variable(readNpy[float32]("outputweight.npy"),requiresGrad = true)
result.output.bias = ctx.variable(readNpy[float32]("outputbias.npy"),requiresGrad = true)
##### Second Way to implement
let
vocabSize = 64
hiddenSize = 100
nLayers = 2
network Nimertes2:
layers:
encoder: Embedding(vocabSize, vocabSize)
gru: GRULayer(encoder.out_shape[0], hiddenSize, nLayers)
decoder: Linear(hiddenSize, vocabSize)
forward input, hidden0:
let (output, hiddenN) = init.encoder.gru(hidden0)
# result.output is of shape [Sequence, BatchSize, HiddenSize]
# In our case the sequence is 1 so we can simply flatten
let flattened = output.reshape(output.value.shape[1], hiddenSize)
(output: flattened.decoder, hidden: hiddenN)
export Nimertes2
#### Third Way
let
dimIn = vocabSize
dimOut = vocabSize
network Nimertes3:
layers:
encoder: Embedding(vocabSize, vocabSize)
fc1: Linear(dimIn, hiddenSize)
fc2: Linear(hiddenSize, dimOut)
forward input, targets, output:
let
logits = input.encoder
(batch, time, channels) = logits.shape()
reshapeLogits = logits.reshape(batch*time, channels)
reshapeTargets = targets.reshape(batch*time)
loss = output.softmax_cross_entropy()
# x.fc1.relu.fc2
export Nimertes3