فهرست منبع

llm: fix seed value not being applied to requests (#4986)

Jeffrey Morgan 10 ماه پیش
والد
کامیت
ead259d877
2فایلهای تغییر یافته به همراه3 افزوده شده و 9 حذف شده
  1. 2 3
      docs/api.md
  2. 1 6
      llm/ext_server/server.cpp

+ 2 - 3
docs/api.md

@@ -250,7 +250,7 @@ curl http://localhost:11434/api/generate -d '{
 
 
 #### Request (Reproducible outputs)
 #### Request (Reproducible outputs)
 
 
-For reproducible outputs, set `temperature` to 0 and `seed` to a number:
+For reproducible outputs, set `seed` to a number:
 
 
 ##### Request
 ##### Request
 
 
@@ -259,8 +259,7 @@ curl http://localhost:11434/api/generate -d '{
   "model": "mistral",
   "model": "mistral",
   "prompt": "Why is the sky blue?",
   "prompt": "Why is the sky blue?",
   "options": {
   "options": {
-    "seed": 123,
-    "temperature": 0
+    "seed": 123
   }
   }
 }'
 }'
 ```
 ```

+ 1 - 6
llm/ext_server/server.cpp

@@ -359,7 +359,6 @@ struct llama_server_context
 
 
     // slots / clients
     // slots / clients
     std::vector<server_slot> slots;
     std::vector<server_slot> slots;
-    json default_generation_settings_for_props;
 
 
     llama_server_queue    queue_tasks;
     llama_server_queue    queue_tasks;
     llama_server_response queue_results;
     llama_server_response queue_results;
@@ -483,9 +482,6 @@ struct llama_server_context
             slots.push_back(slot);
             slots.push_back(slot);
         }
         }
 
 
-        default_generation_settings_for_props = get_formated_generation(slots.front());
-        default_generation_settings_for_props["seed"] = -1;
-
         batch = llama_batch_init(n_ctx, 0, params.n_parallel);
         batch = llama_batch_init(n_ctx, 0, params.n_parallel);
     }
     }
 
 
@@ -584,7 +580,7 @@ struct llama_server_context
         slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
         slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
         slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
         slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
         slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
         slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
-        slot->params.seed               = json_value(data, "seed",              default_params.seed);
+        slot->sparams.seed              = json_value(data, "seed",              default_params.seed);
         slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
         slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
         slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
         slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
         slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
         slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
@@ -811,7 +807,6 @@ struct llama_server_context
             llama_sampling_free(slot->ctx_sampling);
             llama_sampling_free(slot->ctx_sampling);
         }
         }
         slot->ctx_sampling = llama_sampling_init(slot->sparams);
         slot->ctx_sampling = llama_sampling_init(slot->sparams);
-        llama_set_rng_seed(ctx, slot->params.seed);
         slot->command = LOAD_PROMPT;
         slot->command = LOAD_PROMPT;
 
 
         all_slots_are_idle = false;
         all_slots_are_idle = false;