melite/batcher.nim

import hparams

import arraymancer
### CPU Part Starts Here
# var trainingBlock: seq[int] = trainingSet[0..blockSize]
# var trainingBlockNext: seq[int] = trainingSet[1..blockSize+1]

# for i in 0..blockSize-1:
#   var context = trainingBlock[0..i+1]
#   var target = trainingBlockNext[i]
#   echo "when input is", context, "target is", target
#[
The above  is done sequentially on the CPU, as a baseline since I can't afford a GPU.
Below is the implementation for the GPU, using batches. We can (and probably will) use the CPU for this, but Arraymancer allows to send to device at compile time using a flag (-d:cuda) so we don't have to use the PyTorch .to_device('cuda') stuff . More testing is definitely needed.
]#
proc getBatch*(split: string, trainingSet: seq[int], validationSet: seq[int]): (Tensor[int], Tensor[int]) =
  var data: seq[int]
  if split == "train":
    data = trainingSet
  else:
    data = validationSet

  let ix = randomTensor(shape=[batchSize], max=len(data)-blockSize)

  var 
    x: Tensor[int] = [data[0..<blockSize-1]].toTensor()
    y: Tensor[int] = [data[1..<blockSize]].toTensor()

  for i in ix[1..len(ix)-1]:
    x = x.concat([data[i..<i+blockSize-1]].toTensor(), axis=0)
    y = y.concat([data[i+1..<i+blockSize]].toTensor(), axis=0)

  result=(x,y)
Move project to nyrid melite 2024-10-22 22:26:05 +00:00			`import hparams`

			`import arraymancer`
			`### CPU Part Starts Here`
			`# var trainingBlock: seq[int] = trainingSet[0..blockSize]`
			`# var trainingBlockNext: seq[int] = trainingSet[1..blockSize+1]`

			`# for i in 0..blockSize-1:`
			`# var context = trainingBlock[0..i+1]`
			`# var target = trainingBlockNext[i]`
			`# echo "when input is", context, "target is", target`
			`#[`
			`The above is done sequentially on the CPU, as a baseline since I can't afford a GPU.`
			`Below is the implementation for the GPU, using batches. We can (and probably will) use the CPU for this, but Arraymancer allows to send to device at compile time using a flag (-d:cuda) so we don't have to use the PyTorch .to_device('cuda') stuff . More testing is definitely needed.`
			`]#`
			`proc getBatch*(split: string, trainingSet: seq[int], validationSet: seq[int]): (Tensor[int], Tensor[int]) =`
			`var data: seq[int]`
			`if split == "train":`
			`data = trainingSet`
			`else:`
			`data = validationSet`

			`let ix = randomTensor(shape=[batchSize], max=len(data)-blockSize)`

			`var`
			`x: Tensor[int] = [data[0..<blockSize-1]].toTensor()`
			`y: Tensor[int] = [data[1..<blockSize]].toTensor()`

			`for i in ix[1..len(ix)-1]:`
			`x = x.concat([data[i..<i+blockSize-1]].toTensor(), axis=0)`
			`y = y.concat([data[i+1..<i+blockSize]].toTensor(), axis=0)`

			`result=(x,y)`