import hparams

import arraymancer
### CPU Part Starts Here
# var trainingBlock: seq[int] = trainingSet[0..blockSize]
# var trainingBlockNext: seq[int] = trainingSet[1..blockSize+1]

# for i in 0..blockSize-1:
#   var context = trainingBlock[0..i+1]
#   var target = trainingBlockNext[i]
#   echo "when input is", context, "target is", target
#[
The above  is done sequentially on the CPU, as a baseline since I can't afford a GPU.
Below is the implementation for the GPU, using batches. We can (and probably will) use the CPU for this, but Arraymancer allows to send to device at compile time using a flag (-d:cuda) so we don't have to use the PyTorch .to_device('cuda') stuff . More testing is definitely needed.
]#
proc getBatch*(split: string, trainingSet: seq[int], validationSet: seq[int]): (Tensor[int], Tensor[int]) =
  var data: seq[int]
  if split == "train":
    data = trainingSet
  else:
    data = validationSet

  let ix = randomTensor(shape=[batchSize], max=len(data)-blockSize)

  var 
    x: Tensor[int] = [data[0..<blockSize-1]].toTensor()
    y: Tensor[int] = [data[1..<blockSize]].toTensor()

  for i in ix[1..len(ix)-1]:
    x = x.concat([data[i..<i+blockSize-1]].toTensor(), axis=0)
    y = y.concat([data[i+1..<i+blockSize]].toTensor(), axis=0)

  result=(x,y)