|
@@ -71,7 +71,7 @@ func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options
|
|
|
|
|
|
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
|
|
|
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
|
|
|
-func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
|
|
|
+func (s *Server) scheduleRunner(ctx context.Context, name string, mTemplate string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
|
|
|
if name == "" {
|
|
|
return nil, nil, nil, fmt.Errorf("model %w", errRequired)
|
|
|
}
|
|
@@ -81,6 +81,13 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capabil
|
|
|
return nil, nil, nil, err
|
|
|
}
|
|
|
|
|
|
+ if mTemplate != "" {
|
|
|
+ model.Template, err = template.Parse(mTemplate)
|
|
|
+ if err != nil {
|
|
|
+ return nil, nil, nil, err
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
if err := model.CheckCapabilities(caps...); err != nil {
|
|
|
return nil, nil, nil, fmt.Errorf("%s %w", name, err)
|
|
|
}
|
|
@@ -120,7 +127,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|
|
}
|
|
|
|
|
|
caps := []Capability{CapabilityCompletion}
|
|
|
- r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
|
|
|
+ r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, "", caps, req.Options, req.KeepAlive)
|
|
|
if errors.Is(err, errCapabilityCompletion) {
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
|
|
|
return
|
|
@@ -256,7 +263,7 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
- r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
|
|
|
+ r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, "", []Capability{}, req.Options, req.KeepAlive)
|
|
|
if err != nil {
|
|
|
handleScheduleError(c, req.Model, err)
|
|
|
return
|
|
@@ -1132,7 +1139,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|
|
}
|
|
|
|
|
|
caps := []Capability{CapabilityCompletion}
|
|
|
- r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
|
|
|
+ r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, req.Template, caps, req.Options, req.KeepAlive)
|
|
|
if errors.Is(err, errCapabilityCompletion) {
|
|
|
c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
|
|
|
return
|