1 rok temu · b80661e8c7
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@@ -149,7 +149,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
 
				 
			
 
				 	slog.Info("Initializing llama server")
			
 
				 	slog.Debug(fmt.Sprintf("server params: %+v", sparams))
			
 
				-	initResp := newExtServerResp(128)
			
 
				+	initResp := newExtServerResp(512)
			
 
				 	defer freeExtServerResp(initResp)
			
 
				 	C.dyn_llama_server_init(llm.s, &sparams, &initResp)
			
 
				 	if initResp.id < 0 {
			
--- a/llm/ext_server/ext_server.cpp
+++ b/llm/ext_server/ext_server.cpp
@@ -114,16 +114,12 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
 
				     llama_backend_init();
			
 
				     llama_numa_init(params.numa);
			
 
				 
			
 
				-    // load the model
			
 
				-    if (!llama->load_model(params)) {
			
 
				-      // TODO - consider modifying the logging logic or patching load_model so
			
 
				-      // we can capture more detailed error messages and pass them back to the
			
 
				-      // caller for better UX
			
 
				-      err->id = -1;
			
 
				-      snprintf(err->msg, err->msg_len, "error loading model %s",
			
 
				-               params.model.c_str());
			
 
				-      return;
			
 
				-    }
			
 
				+  if (!llama->load_model(params)) { 
			
 
				+    // an error occured that was not thrown
			
 
				+    err->id = -1;
			
 
				+    snprintf(err->msg, err->msg_len, "error loading model %s", params.model.c_str());
			
 
				+    return;
			
 
				+  }
			
 
				 
			
 
				     llama->initialize();
			
 
				   } catch (std::exception &e) {
			
--- a/llm/patches/03-load_exception.diff
+++ b/llm/patches/03-load_exception.diff
@@ -0,0 +1,44 @@
 
				+diff --git a/llama.cpp b/llama.cpp
			
 
				+index 4225f955..7b762f86 100644
			
 
				+--- a/llama.cpp
			
 
				++++ b/llama.cpp
			
 
				+@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
			
 
				+         }
			
 
				+     } catch (const std::exception & err) {
			
 
				+         LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
			
 
				+-        return -1;
			
 
				++        throw;
			
 
				+     }
			
 
				+ 
			
 
				+     return 0;
			
 
				+@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
			
 
				+         };
			
 
				+     }
			
 
				+ 
			
 
				+-    int status = llama_model_load(path_model, *model, params);
			
 
				+-    GGML_ASSERT(status <= 0);
			
 
				+-    if (status < 0) {
			
 
				+-        if (status == -1) {
			
 
				+-            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
			
 
				+-        } else if (status == -2) {
			
 
				+-            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
			
 
				++    try {
			
 
				++        int status = llama_model_load(path_model, *model, params);
			
 
				++        GGML_ASSERT(status <= 0);
			
 
				++        if (status < 0) {
			
 
				++            if (status == -1) {
			
 
				++                LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
			
 
				++            } else if (status == -2) {
			
 
				++                LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
			
 
				++            }
			
 
				++            delete model;
			
 
				++            return nullptr;
			
 
				+         }
			
 
				++    } catch (...) {
			
 
				++        LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
			
 
				+         delete model;
			
 
				+-        return nullptr;
			
 
				++        throw;
			
 
				+     }
			
 
				+ 
			
 
				+     return model;