|
@@ -421,16 +421,21 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
|
|
slog.Debug("evaluating already loaded", "model", req.model.ModelPath)
|
|
|
runner.refMu.Lock()
|
|
|
defer runner.refMu.Unlock()
|
|
|
- // Ignore the NumGPU settings for comparison
|
|
|
- optsExisting := runner.Options.Runner
|
|
|
- optsExisting.NumGPU = -1
|
|
|
- optsNew := req.opts.Runner
|
|
|
- optsNew.NumGPU = -1
|
|
|
+
|
|
|
timeout := 10 * time.Second
|
|
|
if runner.loading {
|
|
|
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
|
|
|
}
|
|
|
- ctx, cancel := context.WithTimeout(ctx, timeout) // BUG -
|
|
|
+
|
|
|
+ // Don't reload runner if num_gpu=-1 was provided
|
|
|
+ optsExisting := runner.Options.Runner
|
|
|
+ optsNew := req.opts.Runner
|
|
|
+ if optsNew.NumGPU < 0 {
|
|
|
+ optsExisting.NumGPU = -1
|
|
|
+ optsNew.NumGPU = -1
|
|
|
+ }
|
|
|
+
|
|
|
+ ctx, cancel := context.WithTimeout(ctx, timeout)
|
|
|
defer cancel()
|
|
|
if !reflect.DeepEqual(runner.adapters, req.model.AdapterPaths) || // have the adapters changed?
|
|
|
!reflect.DeepEqual(runner.projectors, req.model.ProjectorPaths) || // have the projectors changed?
|
|
@@ -438,6 +443,7 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
|
|
runner.llama.Ping(ctx) != nil {
|
|
|
return true
|
|
|
}
|
|
|
+
|
|
|
return false
|
|
|
}
|
|
|
|