When adding tokens to a batch, the index is zero based but is checked against being greater than the max batch size. This results in an out-of-bounds access when the final token is added.
@@ -178,7 +178,7 @@ func (s *Server) run(ctx context.Context) {
for j, t := range seq.tokens {
// todo: make this n_batch
- if j > s.batchSize {
+ if j >= s.batchSize {
break
}
batch.Add(t, seq.nPast, []int{i}, !seq.prompt())