1 年之前 · 4a33cede20
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -1,17 +1,11 @@
 
															 package llm
														
 
															 import (
														
 
															-	"bytes"
														
 
															-	"context"
														
 
															 	_ "embed"
														
 
															-	"errors"
														
 
															 	"fmt"
														
 
															-	"os"
														
 
															-	"os/exec"
														
 
															 	"time"
														
 
															 	"github.com/jmorganca/ollama/api"
														
 
															-	"github.com/jmorganca/ollama/format"
														
 
															 )
														
 
															 const jsonGrammar = `
														
@@ -42,51 +36,12 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
 
															 ws ::= ([ \t\n] ws)?
														
 
															 `
														
 
															-type Running struct {
														
 
															-	Port          int
														
 
															-	Cmd           *exec.Cmd
														
 
															-	Cancel        context.CancelFunc
														
 
															-	*StatusWriter // captures error messages from the llama runner process
														
 
															-}
														
 
															-
														
 
															 type ImageData struct {
														
 
															 	Data []byte `json:"data"`
														
 
															 	ID   int    `json:"id"`
														
 
															 }
														
 
															-var (
														
 
															-	errNvidiaSMI     = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
														
 
															-	errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
														
 
															-	payloadMissing   = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
														
 
															-)
														
 
															-
														
 
															-// StatusWriter is a writer that captures error messages from the llama runner process
														
 
															-type StatusWriter struct {
														
 
															-	ErrCh      chan error
														
 
															-	LastErrMsg string
														
 
															-}
														
 
															-
														
 
															-func NewStatusWriter() *StatusWriter {
														
 
															-	return &StatusWriter{
														
 
															-		ErrCh: make(chan error, 1),
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-func (w *StatusWriter) Write(b []byte) (int, error) {
														
 
															-	var errMsg string
														
 
															-	if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
														
 
															-		errMsg = string(bytes.TrimSpace(after))
														
 
															-	} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
														
 
															-		errMsg = string(bytes.TrimSpace(after))
														
 
															-	}
														
 
															-
														
 
															-	if errMsg != "" {
														
 
															-		w.LastErrMsg = errMsg
														
 
															-		w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
														
 
															-	}
														
 
															-
														
 
															-	return os.Stderr.Write(b)
														
 
															-}
														
 
															+var payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
														
 
															 type prediction struct {
														
 
															 	Content string `json:"content"`
														
@@ -102,9 +57,7 @@ type prediction struct {
 
															 	}
														
 
															 }
														
 
															-const maxBufferSize = 512 * format.KiloByte
														
 
															 const maxRetries = 3
														
 
															-const retryDelay = 1 * time.Second
														
 
															 type PredictOpts struct {
														
 
															 	Prompt  string