Move project to nyrid melite
This commit is contained in:
commit
3cd93f27b5
11 changed files with 6073 additions and 0 deletions
3
.gitignore
vendored
Executable file
3
.gitignore
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
nimcache/
|
||||
nimblecache/
|
||||
htmldocs/
|
61
LICENSE
Normal file
61
LICENSE
Normal file
|
@ -0,0 +1,61 @@
|
|||
Don't Be Evil License (DBEL) 1.0
|
||||
|
||||
1. Acceptance
|
||||
By using, copying, modifying, or distributing the source code, training data, training environment, or its associated machine learning model weights (collectively the "Software"), you agree to comply with all terms outlines in this license.
|
||||
|
||||
2. Copyright License
|
||||
The Licensor (defined below) grants you a non-exclusive, worldwide, royalty-free, non-sublicensable, non-transferable clicense to use, copy, modify, and distribute the Software, including associated model weights, training data, and training environments, subject to the conditions set forth in this license. This includes the right to create and distribute derivative works of the Software, provided that the limitations below are observed.
|
||||
|
||||
3. Non-Commercial Use Only
|
||||
You may use, copy, modify, and distribute the Software and derivative works solely for non-commercial purposes. Non-commercial purposes include, but are not limited to:
|
||||
- Personal research and study.
|
||||
- Educational and academic projects.
|
||||
- Public knowledge and hobby projects
|
||||
- Religious observance.
|
||||
- Non-commercial research, or AI and machine learning (ML) experimentation.
|
||||
|
||||
4. Distribution and Monetization Provisions
|
||||
Any use of the Software or derivative works for profit, or in a business context, including in monetized services and products, requries explicit, seperate permission from the Licensor. The restrictions on commercial use apply to both the source code and any model weights produced by the Software.
|
||||
|
||||
Any distribution must include this license, and the non-commercial restriction must be maintained. Weights resulting from use of the Software, including but not limited to training or fine-tuning models, must be shared under this same license, ensuring all restrictions and conditions are preserved.
|
||||
|
||||
5. Integrity of the Licensor's Software
|
||||
You may not alter, remove, or obscure any functionalities related to payment, donation, or attribution in any distributed version of the Licensed Materials. You must retain all notices of copyright, licensing, and attribution provided by the Licensor in any derivative works.
|
||||
|
||||
You may not alter or remove copyright, license, or trademark notices in the Software, and any public mention of the Software must include attribution to the Licensor.
|
||||
|
||||
6. Patents
|
||||
This license grants you a patent license under any patents held by the Licensor that are directly related to the Software. If you or your company make any claim that the Software infringes on a patent, your rights under this license terminate immediately.
|
||||
|
||||
7. Distribution of Modifications
|
||||
If you modify the Software, you must
|
||||
- Provide prominent and clear notice of any modifications
|
||||
- Retain all original notices of copyright, licensing, and attribution to the Licensor.
|
||||
- Distribute modified versions under this license.
|
||||
|
||||
8. Fair Use
|
||||
Nothing under this license restricts your rights under applicable laws regarding fair use of copyrighted material.
|
||||
|
||||
9. No Other Rights
|
||||
These terms do not allow you to sublicense, assign, or transfer any of your rights to third parties, except as expressly allowed by the terms.
|
||||
|
||||
These terms do not prevent the Licensor from granting licenses to anyone else.
|
||||
|
||||
These terms do not imply any other licenses.
|
||||
|
||||
No other rights beyond those explicitly stated are granted.
|
||||
|
||||
10. Termination
|
||||
Your rights under this license will automatically terminate if you breach any of its terms. The Licensor may provide you with a 30-day period to rectify any breach. If you fail to do so, or if you breach the terms again after rectification, your license will terminate permanently.
|
||||
|
||||
11. Disclaimer of Warranty
|
||||
The Licensed Materials are provides “as-is”, without any warranties, express or implied, including but not limited to warranties of fitness for a particular purpose. The Licensor is not liable for any claims or damages arising from your use of the Licensed Materials.
|
||||
|
||||
12. Definitions
|
||||
- "Licensor": The entity or individual offering the Licensed Materials under this license.
|
||||
- "Licensed Materials": The software, source code, training data, training environment, model weights, and any associated AI/ML components provided under this license.
|
||||
- "You": The individual or entity accepting the terms of this license, including any organization or entity that this individual or entity might work for or represent, including any entities under common control.
|
||||
- "Your license": The license granted to you for the software under this terms.
|
||||
- "Model weights": The machine learning model parameters generated by training or fine-tuning models using the Licensed Materials.
|
||||
- "Use": Anything you do with the software requiring your license
|
||||
- "Trademark": Trademarks, service marks, and similar rights.
|
3
README.md
Executable file
3
README.md
Executable file
|
@ -0,0 +1,3 @@
|
|||
# melite
|
||||
|
||||
an exploration of nlp bigram and ngram models using nim, to both learn the language and to learn nlp
|
34
batcher.nim
Executable file
34
batcher.nim
Executable file
|
@ -0,0 +1,34 @@
|
|||
import hparams
|
||||
|
||||
import arraymancer
|
||||
### CPU Part Starts Here
|
||||
# var trainingBlock: seq[int] = trainingSet[0..blockSize]
|
||||
# var trainingBlockNext: seq[int] = trainingSet[1..blockSize+1]
|
||||
|
||||
# for i in 0..blockSize-1:
|
||||
# var context = trainingBlock[0..i+1]
|
||||
# var target = trainingBlockNext[i]
|
||||
# echo "when input is", context, "target is", target
|
||||
#[
|
||||
The above is done sequentially on the CPU, as a baseline since I can't afford a GPU.
|
||||
Below is the implementation for the GPU, using batches. We can (and probably will) use the CPU for this, but Arraymancer allows to send to device at compile time using a flag (-d:cuda) so we don't have to use the PyTorch .to_device('cuda') stuff . More testing is definitely needed.
|
||||
]#
|
||||
proc getBatch*(split: string, trainingSet: seq[int], validationSet: seq[int]): (Tensor[int], Tensor[int]) =
|
||||
var data: seq[int]
|
||||
if split == "train":
|
||||
data = trainingSet
|
||||
else:
|
||||
data = validationSet
|
||||
|
||||
let ix = randomTensor(shape=[batchSize], max=len(data)-blockSize)
|
||||
|
||||
var
|
||||
x: Tensor[int] = [data[0..<blockSize-1]].toTensor()
|
||||
y: Tensor[int] = [data[1..<blockSize]].toTensor()
|
||||
|
||||
for i in ix[1..len(ix)-1]:
|
||||
x = x.concat([data[i..<i+blockSize-1]].toTensor(), axis=0)
|
||||
y = y.concat([data[i+1..<i+blockSize]].toTensor(), axis=0)
|
||||
|
||||
result=(x,y)
|
||||
|
133
bigram.nim
Executable file
133
bigram.nim
Executable file
|
@ -0,0 +1,133 @@
|
|||
import std / [ tables, os, strformat ]
|
||||
import random
|
||||
|
||||
import arraymancer
|
||||
|
||||
import ./batcher
|
||||
import ./hparams
|
||||
import ./generator
|
||||
import ./textEncoder
|
||||
|
||||
|
||||
randomize()
|
||||
|
||||
###### Text encoding
|
||||
let vocabSize: int = stringToInt.len()
|
||||
|
||||
var encodedText: seq[int] = encodeString(textContent, stringToInt)
|
||||
|
||||
###### Split corpus into training and validation sets #######
|
||||
const perchentageTraining = 80 # how much % of the corpus is given for training.
|
||||
|
||||
let trainingSetEnd:int = (perchentageTraining*encodedText.len/100).int
|
||||
|
||||
let trainingSet: seq[int] = encodedText[0..trainingSetEnd]
|
||||
|
||||
let validationSet: seq[int] = encodedText[trainingSetEnd..textContent.len-1]
|
||||
|
||||
###### Define NN
|
||||
let ctx = newContext Tensor[float32]
|
||||
|
||||
network Nimertes:
|
||||
layers:
|
||||
encoder: Embedding(vocabSize, hiddenSize)
|
||||
hiddenLinear: Linear(hiddenSize, hiddenSize)
|
||||
outputLayer: Linear(hiddenSize, vocabSize)
|
||||
forward x:
|
||||
x.encoder.tanh.hiddenLinear.tanh.hiddenLinear.tanh.outputLayer
|
||||
|
||||
###### Save/Load Model
|
||||
proc saveModel(ctx: Context[AnyTensor[float32]], model: Nimertes, dir: string) =
|
||||
echo "\nsaving model..."
|
||||
for layer, layerField in model.fieldPairs:
|
||||
var layerName = layer
|
||||
for field, tensorVariable in layerField.fieldPairs:
|
||||
var fieldName = field
|
||||
when tensorVariable is Variable[Tensor[float32]]:
|
||||
tensorVariable.value.writeNPY(dir/fmt"{layerName}_{fieldName}.npy")
|
||||
else:
|
||||
discard
|
||||
echo "model saved"
|
||||
|
||||
proc initModel(ctx: Context[AnyTensor[float32]], model: Nimertes, dir: string): Nimertes =
|
||||
echo "\nweights exist"
|
||||
echo "\nloading model..."
|
||||
for layer, _ in model.fieldPairs:
|
||||
var layerName = layer
|
||||
case layerName
|
||||
of "encoder":
|
||||
model.encoder.weight.value = readNPY[float32](dir/fmt"{layerName}_weight.npy")
|
||||
of "hiddenLinear":
|
||||
model.hiddenLinear.weight.value = readNPY[float32](dir/fmt"{layerName}_weight.npy")
|
||||
model.hiddenLinear.bias.value = readNPY[float32](dir/fmt"{layerName}_bias.npy")
|
||||
of "outputLinear":
|
||||
model.outputLayer.weight.value = readNPY[float32](dir/fmt"{layerName}_weight.npy")
|
||||
model.outputLayer.bias.value = readNPY[float32](dir/fmt"{layerName}_bias.npy")
|
||||
echo "model loaded\n"
|
||||
return model
|
||||
|
||||
#### Initialize NN
|
||||
var
|
||||
model = ctx.init(Nimertes)
|
||||
optim = model.optimizer(Adam, learningRate=3e-4'f32, beta1=0.9'f32, beta2=0.9'f32, eps=1e-5'f32)
|
||||
|
||||
if fileExists("tinyBiGram/encoder_weight.npy"):
|
||||
model = ctx.initModel(model, "tinyBiGram")
|
||||
|
||||
###### Generate Text
|
||||
proc generateText(ctx: Context[AnyTensor[float32]], model: Nimertes, seedCharacters="Wh", seqLen=blockSize, temperature=0.8'f32): string =
|
||||
|
||||
ctx.no_grad_mode:
|
||||
let primer = encodeString(seedCharacters, stringToInt).toTensor.unsqueeze(1)
|
||||
|
||||
result = seedCharacters
|
||||
|
||||
var
|
||||
input = primer[^1, _]
|
||||
output: Variable[Tensor[float32]]
|
||||
|
||||
for _ in 0 ..< seqLen:
|
||||
output = model.forward(input.squeeze(0))
|
||||
var preds = output.value
|
||||
|
||||
preds /.= temperature
|
||||
let probs = preds.softmax().squeeze(0)
|
||||
|
||||
# Sample and append to result
|
||||
let encodedChar = probs.sample()
|
||||
result &= decodeString(encodedChar, intToString)
|
||||
|
||||
input = newTensor[int](1,1)
|
||||
input[0, 0] = encodedChar
|
||||
|
||||
###### Training
|
||||
var totalLoss: seq[float]
|
||||
var plotidx : seq[float]
|
||||
|
||||
for i in 0..numEpochs:
|
||||
var
|
||||
(trainingBatch, trainingBatchNext): (Tensor[int], Tensor[int]) = getBatch("train", trainingSet, validationSet)
|
||||
output: Variable[Tensor[float32]]
|
||||
batchLoss: Variable[Tensor[float32]]
|
||||
|
||||
if i %% evalIter == 0:
|
||||
echo "\n", ctx.generateText(model), "\n"
|
||||
ctx.saveModel(model, "tinyBiGram")
|
||||
else:
|
||||
for i in 0 ..< batchSize:
|
||||
var
|
||||
inputTensor: Tensor[int] = trainingBatch[i, _]
|
||||
targetTensor: Tensor[int] = trainingBatchNext[i, _]
|
||||
|
||||
output = model.forward(inputTensor.squeeze(0))
|
||||
batchLoss = output.sparseSoftmaxCrossEntropy(target=targetTensor.squeeze(0))
|
||||
|
||||
batchLoss.backprop()
|
||||
optim.update()
|
||||
|
||||
totalLoss.add(batchLoss.value[0])
|
||||
plotidx.add(i.float)
|
||||
|
||||
###### Plot results and show final output
|
||||
echo ctx.generateText(model)
|
||||
|
21
generator.nim
Executable file
21
generator.nim
Executable file
|
@ -0,0 +1,21 @@
|
|||
import std/algorithm
|
||||
import random
|
||||
|
||||
import arraymancer
|
||||
|
||||
|
||||
proc searchsorted[T](x: openArray[T], value: T, leftSide: static bool = true): int =
|
||||
when leftSide:
|
||||
result = x.lowerBound(value)
|
||||
else:
|
||||
result = x.upperBound(value)
|
||||
|
||||
proc sample*(probs: Tensor[float32]): int =
|
||||
var
|
||||
rng = initRand()
|
||||
let
|
||||
u = rng.rand(1.0'f32)
|
||||
cdf = cumsum(probs, axis=0)
|
||||
cdfA = cast[ptr UncheckedArray[float32]](cdf.unsafeRawOffset)
|
||||
result = cdfA.toOpenArray(0, cdf.size-1).searchsorted(u, leftSide=false)
|
||||
|
65
gpt.nim
Normal file
65
gpt.nim
Normal file
|
@ -0,0 +1,65 @@
|
|||
import std / [ tables, os ]
|
||||
import random
|
||||
|
||||
import arraymancer
|
||||
import plotly
|
||||
import progress
|
||||
import therapist
|
||||
|
||||
import ./batcher
|
||||
import ./hparams
|
||||
import ./textEncoder
|
||||
import ./generator
|
||||
|
||||
|
||||
randomize()
|
||||
|
||||
let spec = (
|
||||
dir: newStringArg(@["-d", "--dir"], defaultVal="defaultDir", help="Directory to save/load from."),
|
||||
|
||||
help: newHelpArg(@["-h", "--help"], help="Show help message"),
|
||||
)
|
||||
spec.parseOrQuit("Nimertes")
|
||||
|
||||
let dirName = spec.dir.value
|
||||
|
||||
var
|
||||
bar = newProgressBar(total=numEpochs)
|
||||
bar.start()
|
||||
|
||||
###### Text encoding
|
||||
let vocabSize: int = stringToInt.len()
|
||||
|
||||
# var encodedText: seq[int] = encodeString(textContent, stringToInt)
|
||||
|
||||
# ###### Split corpus into training and validation sets #######
|
||||
# const perchentageTraining = 80 # how much % of the corpus is given for training.
|
||||
|
||||
# let trainingSetEnd:int = (perchentageTraining*encodedText.len/100).int
|
||||
|
||||
# let trainingSet: seq[int] = encodedText[0..trainingSetEnd]
|
||||
|
||||
# let validationSet: seq[int] = encodedText[trainingSetEnd..textContent.len-1]
|
||||
|
||||
###### Define NN
|
||||
let ctx = newContext Tensor[float32]
|
||||
|
||||
# TODO: make Block type for Nimertes
|
||||
# type Block:
|
||||
|
||||
network NimertesGPT:
|
||||
layers:
|
||||
tokenEmbedder: Embedding(vocabSize, hiddenSize)
|
||||
positionEmbedder: Embedding(blockSize, nEmbed)
|
||||
# blockLayer: Block(nEmbed,)
|
||||
languageModelHead: Linear(nEmbed, vocabSize)
|
||||
hiddenLinear: Linear(hiddenSize, hiddenSize)
|
||||
outputLayer: Linear(hiddenSize, nEmbed)
|
||||
forward x:
|
||||
tokenEmbedding = x.tokenEmbedder()
|
||||
positionEmbedding = .positionEmbedder()
|
||||
x.tokenEmbedding.positionEmbedding.tanh.hiddenLinear.tanh.hiddenLinear.tanh.outputLayer
|
||||
|
||||
###### Initialize NN
|
||||
var
|
||||
model = ctx.init(NimertesGPT)
|
11
hparams.nim
Executable file
11
hparams.nim
Executable file
|
@ -0,0 +1,11 @@
|
|||
const
|
||||
blockSize*: int = 200
|
||||
batchSize*: int = 100
|
||||
numEpochs*: int = 1000
|
||||
evalIter*: int = 250
|
||||
|
||||
### Network architecture params
|
||||
const
|
||||
hiddenSize* = 100
|
||||
numLayers* = 4
|
||||
nEmbed*: int = 300
|
90
network.nim
Executable file
90
network.nim
Executable file
|
@ -0,0 +1,90 @@
|
|||
import arraymancer
|
||||
|
||||
#### The following need to be combined gently
|
||||
let ctx = newContext Tensor[float32]
|
||||
|
||||
let
|
||||
SINGLETON = 1
|
||||
|
||||
|
||||
type
|
||||
LinearLayer = object
|
||||
weight: Variable[Tensor[float32]]
|
||||
bias: Variable[Tensor[float32]]
|
||||
Nimertes = object
|
||||
hidden: LinearLayer
|
||||
output: LinearLayer
|
||||
|
||||
template
|
||||
weightInit(shape:varargs[int], initKind: untyped): Variable =
|
||||
ctx.variable(
|
||||
initKind(shape, float32),
|
||||
requiresGrad = true
|
||||
)
|
||||
|
||||
proc newNimertesInstance*(ctx: Context[Tensor[float32]], hiddenSize: int, dimIn: int, dimOut: int): Nimertes =
|
||||
result.hidden.weight = weightInit(hiddenSize, dimIn, kaimingNormal)
|
||||
result.hidden.bias = weightInit(SINGLETON, hiddenSize, kaimingNormal)
|
||||
result.output.weight = weightInit(dimOut, hiddenSize, kaimingNormal)
|
||||
result.output.bias = weightInit(SINGLETON, dimOut, kaimingNormal)
|
||||
|
||||
proc forward*(network: Nimertes, x: Variable[Tensor[float32]]): Variable[Tensor[float32]] =
|
||||
result = x.linear(
|
||||
network.hidden.weight, network.hidden.bias).relu.linear(network.output.weight, network.output.bias)
|
||||
|
||||
proc saveModel*(network: Nimertes) =
|
||||
# this is a quick prototype, but you get the idea.
|
||||
# perhaps a better way to do this would be to save all weights/biases of
|
||||
# the model into a single file.
|
||||
network.hidden.weight.value.writeNpy("hiddenweight.npy")
|
||||
network.hidden.bias.value.writeNpy("hiddenbias.npy")
|
||||
network.output.weight.value.writeNpy("outputweight.npy")
|
||||
network.output.bias.value.writeNpy("outputbias.npy")
|
||||
|
||||
proc load*(ctx: Context[Tensor[float32]]): Nimertes =
|
||||
result.hidden.weight = ctx.variable(readNpy[float32]("hiddenweight.npy"),requiresGrad = true)
|
||||
result.hidden.bias = ctx.variable(readNpy[float32]("hiddenbias.npy"),requiresGrad = true)
|
||||
result.output.weight = ctx.variable(readNpy[float32]("outputweight.npy"),requiresGrad = true)
|
||||
result.output.bias = ctx.variable(readNpy[float32]("outputbias.npy"),requiresGrad = true)
|
||||
|
||||
##### Second Way to implement
|
||||
let
|
||||
vocabSize = 64
|
||||
hiddenSize = 100
|
||||
nLayers = 2
|
||||
|
||||
network Nimertes2:
|
||||
layers:
|
||||
encoder: Embedding(vocabSize, vocabSize)
|
||||
gru: GRULayer(encoder.out_shape[0], hiddenSize, nLayers)
|
||||
decoder: Linear(hiddenSize, vocabSize)
|
||||
forward input, hidden0:
|
||||
let (output, hiddenN) = init.encoder.gru(hidden0)
|
||||
# result.output is of shape [Sequence, BatchSize, HiddenSize]
|
||||
# In our case the sequence is 1 so we can simply flatten
|
||||
let flattened = output.reshape(output.value.shape[1], hiddenSize)
|
||||
|
||||
(output: flattened.decoder, hidden: hiddenN)
|
||||
|
||||
export Nimertes2
|
||||
|
||||
#### Third Way
|
||||
let
|
||||
dimIn = vocabSize
|
||||
dimOut = vocabSize
|
||||
|
||||
network Nimertes3:
|
||||
layers:
|
||||
encoder: Embedding(vocabSize, vocabSize)
|
||||
fc1: Linear(dimIn, hiddenSize)
|
||||
fc2: Linear(hiddenSize, dimOut)
|
||||
forward input, targets, output:
|
||||
let
|
||||
logits = input.encoder
|
||||
(batch, time, channels) = logits.shape()
|
||||
reshapeLogits = logits.reshape(batch*time, channels)
|
||||
reshapeTargets = targets.reshape(batch*time)
|
||||
loss = output.softmax_cross_entropy()
|
||||
# x.fc1.relu.fc2
|
||||
|
||||
export Nimertes3
|
49
textEncoder.nim
Executable file
49
textEncoder.nim
Executable file
|
@ -0,0 +1,49 @@
|
|||
import
|
||||
std / [setutils, sequtils, tables]
|
||||
|
||||
const
|
||||
textContent* = readFile("wizard.txt")
|
||||
|
||||
########### Text Endocing/Decoding ############
|
||||
proc createStringTable*(text: string): Table[char, int] =
|
||||
|
||||
let charSet = text.toSet
|
||||
|
||||
var stringToInt = initTable[char, int]()
|
||||
|
||||
for id, glyph in charSet.toSeq:
|
||||
stringToInt[glyph] = id
|
||||
|
||||
result = stringToInt
|
||||
|
||||
proc createIntTable*(text: string): Table[int,char] =
|
||||
|
||||
let charSet = text.toSet
|
||||
|
||||
var intToString = initTable[int, char]()
|
||||
|
||||
for id, glyph in charSet.toSeq:
|
||||
intToString[id] = glyph
|
||||
|
||||
result = intToString
|
||||
|
||||
proc encodeString*(str: string, stringToInt: Table[char, int]): seq[int] =
|
||||
result = @[]
|
||||
|
||||
for glyph in str:
|
||||
result.add(stringToInt[glyph])
|
||||
|
||||
proc decodeString*(list: seq[int], intToString: Table[int, char]): string =
|
||||
result = ""
|
||||
|
||||
for item in list:
|
||||
result.add(intToString[item])
|
||||
|
||||
proc decodeString*(letter: int, intToString: Table[int, char]): char =
|
||||
result = intToString[letter]
|
||||
|
||||
|
||||
|
||||
let
|
||||
stringToInt*: Table[char, int] = createStringTable(textContent)
|
||||
intToString*: Table[int, char] = createIntTable(textContent)
|
5603
wizard.txt
Executable file
5603
wizard.txt
Executable file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue