91 lines
3 KiB
Nim
91 lines
3 KiB
Nim
|
import arraymancer
|
||
|
|
||
|
#### The following need to be combined gently
|
||
|
let ctx = newContext Tensor[float32]
|
||
|
|
||
|
let
|
||
|
SINGLETON = 1
|
||
|
|
||
|
|
||
|
type
|
||
|
LinearLayer = object
|
||
|
weight: Variable[Tensor[float32]]
|
||
|
bias: Variable[Tensor[float32]]
|
||
|
Nimertes = object
|
||
|
hidden: LinearLayer
|
||
|
output: LinearLayer
|
||
|
|
||
|
template
|
||
|
weightInit(shape:varargs[int], initKind: untyped): Variable =
|
||
|
ctx.variable(
|
||
|
initKind(shape, float32),
|
||
|
requiresGrad = true
|
||
|
)
|
||
|
|
||
|
proc newNimertesInstance*(ctx: Context[Tensor[float32]], hiddenSize: int, dimIn: int, dimOut: int): Nimertes =
|
||
|
result.hidden.weight = weightInit(hiddenSize, dimIn, kaimingNormal)
|
||
|
result.hidden.bias = weightInit(SINGLETON, hiddenSize, kaimingNormal)
|
||
|
result.output.weight = weightInit(dimOut, hiddenSize, kaimingNormal)
|
||
|
result.output.bias = weightInit(SINGLETON, dimOut, kaimingNormal)
|
||
|
|
||
|
proc forward*(network: Nimertes, x: Variable[Tensor[float32]]): Variable[Tensor[float32]] =
|
||
|
result = x.linear(
|
||
|
network.hidden.weight, network.hidden.bias).relu.linear(network.output.weight, network.output.bias)
|
||
|
|
||
|
proc saveModel*(network: Nimertes) =
|
||
|
# this is a quick prototype, but you get the idea.
|
||
|
# perhaps a better way to do this would be to save all weights/biases of
|
||
|
# the model into a single file.
|
||
|
network.hidden.weight.value.writeNpy("hiddenweight.npy")
|
||
|
network.hidden.bias.value.writeNpy("hiddenbias.npy")
|
||
|
network.output.weight.value.writeNpy("outputweight.npy")
|
||
|
network.output.bias.value.writeNpy("outputbias.npy")
|
||
|
|
||
|
proc load*(ctx: Context[Tensor[float32]]): Nimertes =
|
||
|
result.hidden.weight = ctx.variable(readNpy[float32]("hiddenweight.npy"),requiresGrad = true)
|
||
|
result.hidden.bias = ctx.variable(readNpy[float32]("hiddenbias.npy"),requiresGrad = true)
|
||
|
result.output.weight = ctx.variable(readNpy[float32]("outputweight.npy"),requiresGrad = true)
|
||
|
result.output.bias = ctx.variable(readNpy[float32]("outputbias.npy"),requiresGrad = true)
|
||
|
|
||
|
##### Second Way to implement
|
||
|
let
|
||
|
vocabSize = 64
|
||
|
hiddenSize = 100
|
||
|
nLayers = 2
|
||
|
|
||
|
network Nimertes2:
|
||
|
layers:
|
||
|
encoder: Embedding(vocabSize, vocabSize)
|
||
|
gru: GRULayer(encoder.out_shape[0], hiddenSize, nLayers)
|
||
|
decoder: Linear(hiddenSize, vocabSize)
|
||
|
forward input, hidden0:
|
||
|
let (output, hiddenN) = init.encoder.gru(hidden0)
|
||
|
# result.output is of shape [Sequence, BatchSize, HiddenSize]
|
||
|
# In our case the sequence is 1 so we can simply flatten
|
||
|
let flattened = output.reshape(output.value.shape[1], hiddenSize)
|
||
|
|
||
|
(output: flattened.decoder, hidden: hiddenN)
|
||
|
|
||
|
export Nimertes2
|
||
|
|
||
|
#### Third Way
|
||
|
let
|
||
|
dimIn = vocabSize
|
||
|
dimOut = vocabSize
|
||
|
|
||
|
network Nimertes3:
|
||
|
layers:
|
||
|
encoder: Embedding(vocabSize, vocabSize)
|
||
|
fc1: Linear(dimIn, hiddenSize)
|
||
|
fc2: Linear(hiddenSize, dimOut)
|
||
|
forward input, targets, output:
|
||
|
let
|
||
|
logits = input.encoder
|
||
|
(batch, time, channels) = logits.shape()
|
||
|
reshapeLogits = logits.reshape(batch*time, channels)
|
||
|
reshapeTargets = targets.reshape(batch*time)
|
||
|
loss = output.softmax_cross_entropy()
|
||
|
# x.fc1.relu.fc2
|
||
|
|
||
|
export Nimertes3
|