import arraymancer #### The following need to be combined gently let ctx = newContext Tensor[float32] let SINGLETON = 1 type LinearLayer = object weight: Variable[Tensor[float32]] bias: Variable[Tensor[float32]] Nimertes = object hidden: LinearLayer output: LinearLayer template weightInit(shape:varargs[int], initKind: untyped): Variable = ctx.variable( initKind(shape, float32), requiresGrad = true ) proc newNimertesInstance*(ctx: Context[Tensor[float32]], hiddenSize: int, dimIn: int, dimOut: int): Nimertes = result.hidden.weight = weightInit(hiddenSize, dimIn, kaimingNormal) result.hidden.bias = weightInit(SINGLETON, hiddenSize, kaimingNormal) result.output.weight = weightInit(dimOut, hiddenSize, kaimingNormal) result.output.bias = weightInit(SINGLETON, dimOut, kaimingNormal) proc forward*(network: Nimertes, x: Variable[Tensor[float32]]): Variable[Tensor[float32]] = result = x.linear( network.hidden.weight, network.hidden.bias).relu.linear(network.output.weight, network.output.bias) proc saveModel*(network: Nimertes) = # this is a quick prototype, but you get the idea. # perhaps a better way to do this would be to save all weights/biases of # the model into a single file. network.hidden.weight.value.writeNpy("hiddenweight.npy") network.hidden.bias.value.writeNpy("hiddenbias.npy") network.output.weight.value.writeNpy("outputweight.npy") network.output.bias.value.writeNpy("outputbias.npy") proc load*(ctx: Context[Tensor[float32]]): Nimertes = result.hidden.weight = ctx.variable(readNpy[float32]("hiddenweight.npy"),requiresGrad = true) result.hidden.bias = ctx.variable(readNpy[float32]("hiddenbias.npy"),requiresGrad = true) result.output.weight = ctx.variable(readNpy[float32]("outputweight.npy"),requiresGrad = true) result.output.bias = ctx.variable(readNpy[float32]("outputbias.npy"),requiresGrad = true) ##### Second Way to implement let vocabSize = 64 hiddenSize = 100 nLayers = 2 network Nimertes2: layers: encoder: Embedding(vocabSize, vocabSize) gru: GRULayer(encoder.out_shape[0], hiddenSize, nLayers) decoder: Linear(hiddenSize, vocabSize) forward input, hidden0: let (output, hiddenN) = init.encoder.gru(hidden0) # result.output is of shape [Sequence, BatchSize, HiddenSize] # In our case the sequence is 1 so we can simply flatten let flattened = output.reshape(output.value.shape[1], hiddenSize) (output: flattened.decoder, hidden: hiddenN) export Nimertes2 #### Third Way let dimIn = vocabSize dimOut = vocabSize network Nimertes3: layers: encoder: Embedding(vocabSize, vocabSize) fc1: Linear(dimIn, hiddenSize) fc2: Linear(hiddenSize, dimOut) forward input, targets, output: let logits = input.encoder (batch, time, channels) = logits.shape() reshapeLogits = logits.reshape(batch*time, channels) reshapeTargets = targets.reshape(batch*time) loss = output.softmax_cross_entropy() # x.fc1.relu.fc2 export Nimertes3