Explorar o código

fix: regression unsupported metal types

omitting `--n-gpu-layers` means use metal on macos which isn't correct
since ollama uses `num_gpu=0` to explicitly disable gpu for file types
that are not implemented in metal
Michael Yang hai 1 ano
pai
achega
cb4a80b693
Modificáronse 1 ficheiros con 1 adicións e 4 borrados
  1. 1 4
      llm/llama.go

+ 1 - 4
llm/llama.go

@@ -292,13 +292,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
 		"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
 		"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
 		"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
 		"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
 		"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
 		"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
+		"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
 		"--embedding",
 		"--embedding",
 	}
 	}
 
 
-	if numGPU > 0 {
-		params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
-	}
-
 	if opts.NumGQA > 0 {
 	if opts.NumGQA > 0 {
 		params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
 		params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
 	}
 	}