This doesn't have any impact currently because NUM_PARALLEL is forced to 1 for embeddings, so both indicies will always be 0.
@@ -454,7 +454,7 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
// if done processing the prompt, generate an embedding and return
if seq.embeddingOnly {
- embed := s.lc.GetEmbeddingsSeq(i)
+ embed := s.lc.GetEmbeddingsSeq(seq.cache.Id)
if embed == nil {
embed = s.lc.GetEmbeddingsIth(seq.iBatch)
}