Explorar o código

llm: normalise kvct parameter handling (#7926)

Sam hai 5 meses
pai
achega
539be43640
Modificáronse 2 ficheiros con 2 adicións e 2 borrados
  1. 1 1
      llm/memory.go
  2. 1 1
      llm/server.go

+ 1 - 1
llm/memory.go

@@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 
 
 	var kvct string
 	var kvct string
 	if fa {
 	if fa {
-		requested := envconfig.KvCacheType()
+		requested := strings.ToLower(envconfig.KvCacheType())
 		if requested != "" && ggml.SupportsKVCacheType(requested) {
 		if requested != "" && ggml.SupportsKVCacheType(requested) {
 			kvct = requested
 			kvct = requested
 		}
 		}

+ 1 - 1
llm/server.go

@@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 		fa = false
 		fa = false
 	}
 	}
 
 
-	kvct := envconfig.KvCacheType()
+	kvct := strings.ToLower(envconfig.KvCacheType())
 
 
 	if fa {
 	if fa {
 		slog.Info("enabling flash attention")
 		slog.Info("enabling flash attention")