Explorar el Código

better estimate scratch buffer size

Jeffrey Morgan hace 1 año
padre
commit
58ce2d8273
Se han modificado 1 ficheros con 2 adiciones y 2 borrados
  1. 2 2
      llm/llm.go

+ 2 - 2
llm/llm.go

@@ -62,8 +62,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 
 
 	// this amount is the overhead + tensors in memory
 	// this amount is the overhead + tensors in memory
 	// TODO: get this from the llama.cpp's graph calcluations instead of
 	// TODO: get this from the llama.cpp's graph calcluations instead of
-	// guessing it's ~1/7th of the kv cache times gqa
-	requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 7
+	// estimating it's 1/6 * kv_cache_size * num_gqa
+	requiredAlloc := int64(ggml.NumGQA()) * requiredKv / 6
 
 
 	requiredTotal := requiredModel + requiredKv + requiredAlloc
 	requiredTotal := requiredModel + requiredKv + requiredAlloc