|
@@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|
var layers int
|
|
var layers int
|
|
layers, estimatedVRAM, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
|
|
layers, estimatedVRAM, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
|
|
|
|
|
|
- if gpus[0].Library == "metal" && estimatedVRAM > systemMemory {
|
|
|
|
|
|
+ switch {
|
|
|
|
+ case gpus[0].Library == "metal" && estimatedVRAM > systemMemory:
|
|
// disable partial offloading when model is greater than total system memory as this
|
|
// disable partial offloading when model is greater than total system memory as this
|
|
// can lead to locking up the system
|
|
// can lead to locking up the system
|
|
opts.NumGPU = 0
|
|
opts.NumGPU = 0
|
|
- } else if gpus[0].Library != "metal" && layers == 0 {
|
|
|
|
|
|
+ case gpus[0].Library != "metal" && layers == 0:
|
|
// Don't bother loading into the GPU if no layers can fit
|
|
// Don't bother loading into the GPU if no layers can fit
|
|
cpuRunner = serverForCpu()
|
|
cpuRunner = serverForCpu()
|
|
gpuCount = 0
|
|
gpuCount = 0
|
|
- } else if opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu" {
|
|
|
|
|
|
+ case opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu":
|
|
opts.NumGPU = layers
|
|
opts.NumGPU = layers
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// Loop through potential servers
|
|
// Loop through potential servers
|
|
- finalErr := fmt.Errorf("no suitable llama servers found")
|
|
|
|
|
|
+ finalErr := errors.New("no suitable llama servers found")
|
|
|
|
|
|
if len(adapters) > 1 {
|
|
if len(adapters) > 1 {
|
|
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
|
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
|
@@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|
|
|
|
|
server := filepath.Join(dir, "ollama_llama_server")
|
|
server := filepath.Join(dir, "ollama_llama_server")
|
|
if runtime.GOOS == "windows" {
|
|
if runtime.GOOS == "windows" {
|
|
- server = server + ".exe"
|
|
|
|
|
|
+ server += ".exe"
|
|
}
|
|
}
|
|
|
|
|
|
// Detect tmp cleaners wiping out the file
|
|
// Detect tmp cleaners wiping out the file
|
|
@@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
|
|
resp, err := http.DefaultClient.Do(req)
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
if err != nil {
|
|
if errors.Is(err, context.DeadlineExceeded) {
|
|
if errors.Is(err, context.DeadlineExceeded) {
|
|
- return ServerStatusNotResponding, fmt.Errorf("server not responding")
|
|
|
|
|
|
+ return ServerStatusNotResponding, errors.New("server not responding")
|
|
}
|
|
}
|
|
return ServerStatusError, fmt.Errorf("health resp: %w", err)
|
|
return ServerStatusError, fmt.Errorf("health resp: %w", err)
|
|
}
|
|
}
|