import std / [ tables, os, strformat ] import random import arraymancer import ./batcher import ./hparams import ./generator import ./textEncoder randomize() ###### Text encoding let vocabSize: int = stringToInt.len() var encodedText: seq[int] = encodeString(textContent, stringToInt) ###### Split corpus into training and validation sets ####### const perchentageTraining = 80 # how much % of the corpus is given for training. let trainingSetEnd:int = (perchentageTraining*encodedText.len/100).int let trainingSet: seq[int] = encodedText[0..trainingSetEnd] let validationSet: seq[int] = encodedText[trainingSetEnd..textContent.len-1] ###### Define NN let ctx = newContext Tensor[float32] network Nimertes: layers: encoder: Embedding(vocabSize, hiddenSize) hiddenLinear: Linear(hiddenSize, hiddenSize) outputLayer: Linear(hiddenSize, vocabSize) forward x: x.encoder.tanh.hiddenLinear.tanh.hiddenLinear.tanh.outputLayer ###### Save/Load Model proc saveModel(ctx: Context[AnyTensor[float32]], model: Nimertes, dir: string) = echo "\nsaving model..." for layer, layerField in model.fieldPairs: var layerName = layer for field, tensorVariable in layerField.fieldPairs: var fieldName = field when tensorVariable is Variable[Tensor[float32]]: tensorVariable.value.writeNPY(dir/fmt"{layerName}_{fieldName}.npy") else: discard echo "model saved" proc initModel(ctx: Context[AnyTensor[float32]], model: Nimertes, dir: string): Nimertes = echo "\nweights exist" echo "\nloading model..." for layer, _ in model.fieldPairs: var layerName = layer case layerName of "encoder": model.encoder.weight.value = readNPY[float32](dir/fmt"{layerName}_weight.npy") of "hiddenLinear": model.hiddenLinear.weight.value = readNPY[float32](dir/fmt"{layerName}_weight.npy") model.hiddenLinear.bias.value = readNPY[float32](dir/fmt"{layerName}_bias.npy") of "outputLinear": model.outputLayer.weight.value = readNPY[float32](dir/fmt"{layerName}_weight.npy") model.outputLayer.bias.value = readNPY[float32](dir/fmt"{layerName}_bias.npy") echo "model loaded\n" return model #### Initialize NN var model = ctx.init(Nimertes) optim = model.optimizer(Adam, learningRate=3e-4'f32, beta1=0.9'f32, beta2=0.9'f32, eps=1e-5'f32) if fileExists("tinyBiGram/encoder_weight.npy"): model = ctx.initModel(model, "tinyBiGram") ###### Generate Text proc generateText(ctx: Context[AnyTensor[float32]], model: Nimertes, seedCharacters="Wh", seqLen=blockSize, temperature=0.8'f32): string = ctx.no_grad_mode: let primer = encodeString(seedCharacters, stringToInt).toTensor.unsqueeze(1) result = seedCharacters var input = primer[^1, _] output: Variable[Tensor[float32]] for _ in 0 ..< seqLen: output = model.forward(input.squeeze(0)) var preds = output.value preds /.= temperature let probs = preds.softmax().squeeze(0) # Sample and append to result let encodedChar = probs.sample() result &= decodeString(encodedChar, intToString) input = newTensor[int](1,1) input[0, 0] = encodedChar ###### Training var totalLoss: seq[float] var plotidx : seq[float] for i in 0..numEpochs: var (trainingBatch, trainingBatchNext): (Tensor[int], Tensor[int]) = getBatch("train", trainingSet, validationSet) output: Variable[Tensor[float32]] batchLoss: Variable[Tensor[float32]] if i %% evalIter == 0: echo "\n", ctx.generateText(model), "\n" ctx.saveModel(model, "tinyBiGram") else: for i in 0 ..< batchSize: var inputTensor: Tensor[int] = trainingBatch[i, _] targetTensor: Tensor[int] = trainingBatchNext[i, _] output = model.forward(inputTensor.squeeze(0)) batchLoss = output.sparseSoftmaxCrossEntropy(target=targetTensor.squeeze(0)) batchLoss.backprop() optim.update() totalLoss.add(batchLoss.value[0]) plotidx.add(i.float) ###### Plot results and show final output echo ctx.generateText(model)