numPredict is used to enforce a limit on the number of tokens to generate. Is it passed in from Ollama but it is never stored to be checked.
@@ -91,6 +91,7 @@ func (s *Server) NewSequence(prompt string, numPredict int, stop []string, param
return &Sequence{
tokens: tokens,
n_prompt_tokens: len(tokens),
+ numPredict: numPredict,
responses: make(chan string, 1),
embedding: make(chan []float32, 1),
samplingCtx: sc,