|
@@ -64,9 +64,6 @@ func extServerResponseToErr(resp C.ext_server_resp_t) error {
|
|
|
return fmt.Errorf(C.GoString(resp.msg))
|
|
|
}
|
|
|
|
|
|
-// Note: current implementation does not support concurrent instantiations
|
|
|
-var llm *dynExtServer
|
|
|
-
|
|
|
func newDynExtServer(library, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
|
|
|
if !mutex.TryLock() {
|
|
|
slog.Info("concurrent llm servers not yet supported, waiting for prior server to complete")
|
|
@@ -83,7 +80,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
|
|
mutex.Unlock()
|
|
|
return nil, fmt.Errorf("Unable to load dynamic library: %s", C.GoString(resp.msg))
|
|
|
}
|
|
|
- llm = &dynExtServer{
|
|
|
+ llm := dynExtServer{
|
|
|
s: srv,
|
|
|
options: opts,
|
|
|
}
|
|
@@ -161,7 +158,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
|
|
|
|
|
slog.Info("Starting llama main loop")
|
|
|
C.dyn_llama_server_start(llm.s)
|
|
|
- return llm, nil
|
|
|
+ return &llm, nil
|
|
|
}
|
|
|
|
|
|
func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn func(PredictResult)) error {
|