35 lines
1.3 KiB
Nim
35 lines
1.3 KiB
Nim
|
import hparams
|
||
|
|
||
|
import arraymancer
|
||
|
### CPU Part Starts Here
|
||
|
# var trainingBlock: seq[int] = trainingSet[0..blockSize]
|
||
|
# var trainingBlockNext: seq[int] = trainingSet[1..blockSize+1]
|
||
|
|
||
|
# for i in 0..blockSize-1:
|
||
|
# var context = trainingBlock[0..i+1]
|
||
|
# var target = trainingBlockNext[i]
|
||
|
# echo "when input is", context, "target is", target
|
||
|
#[
|
||
|
The above is done sequentially on the CPU, as a baseline since I can't afford a GPU.
|
||
|
Below is the implementation for the GPU, using batches. We can (and probably will) use the CPU for this, but Arraymancer allows to send to device at compile time using a flag (-d:cuda) so we don't have to use the PyTorch .to_device('cuda') stuff . More testing is definitely needed.
|
||
|
]#
|
||
|
proc getBatch*(split: string, trainingSet: seq[int], validationSet: seq[int]): (Tensor[int], Tensor[int]) =
|
||
|
var data: seq[int]
|
||
|
if split == "train":
|
||
|
data = trainingSet
|
||
|
else:
|
||
|
data = validationSet
|
||
|
|
||
|
let ix = randomTensor(shape=[batchSize], max=len(data)-blockSize)
|
||
|
|
||
|
var
|
||
|
x: Tensor[int] = [data[0..<blockSize-1]].toTensor()
|
||
|
y: Tensor[int] = [data[1..<blockSize]].toTensor()
|
||
|
|
||
|
for i in ix[1..len(ix)-1]:
|
||
|
x = x.concat([data[i..<i+blockSize-1]].toTensor(), axis=0)
|
||
|
y = y.concat([data[i+1..<i+blockSize]].toTensor(), axis=0)
|
||
|
|
||
|
result=(x,y)
|
||
|
|