|
@@ -1,17 +1,11 @@
|
|
package llm
|
|
package llm
|
|
|
|
|
|
import (
|
|
import (
|
|
- "bytes"
|
|
|
|
- "context"
|
|
|
|
_ "embed"
|
|
_ "embed"
|
|
- "errors"
|
|
|
|
"fmt"
|
|
"fmt"
|
|
- "os"
|
|
|
|
- "os/exec"
|
|
|
|
"time"
|
|
"time"
|
|
|
|
|
|
"github.com/jmorganca/ollama/api"
|
|
"github.com/jmorganca/ollama/api"
|
|
- "github.com/jmorganca/ollama/format"
|
|
|
|
)
|
|
)
|
|
|
|
|
|
const jsonGrammar = `
|
|
const jsonGrammar = `
|
|
@@ -42,51 +36,12 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
|
ws ::= ([ \t\n] ws)?
|
|
ws ::= ([ \t\n] ws)?
|
|
`
|
|
`
|
|
|
|
|
|
-type Running struct {
|
|
|
|
- Port int
|
|
|
|
- Cmd *exec.Cmd
|
|
|
|
- Cancel context.CancelFunc
|
|
|
|
- *StatusWriter // captures error messages from the llama runner process
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
type ImageData struct {
|
|
type ImageData struct {
|
|
Data []byte `json:"data"`
|
|
Data []byte `json:"data"`
|
|
ID int `json:"id"`
|
|
ID int `json:"id"`
|
|
}
|
|
}
|
|
|
|
|
|
-var (
|
|
|
|
- errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
|
|
|
|
- errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
|
|
|
|
- payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
|
|
|
|
-)
|
|
|
|
-
|
|
|
|
-// StatusWriter is a writer that captures error messages from the llama runner process
|
|
|
|
-type StatusWriter struct {
|
|
|
|
- ErrCh chan error
|
|
|
|
- LastErrMsg string
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func NewStatusWriter() *StatusWriter {
|
|
|
|
- return &StatusWriter{
|
|
|
|
- ErrCh: make(chan error, 1),
|
|
|
|
- }
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-func (w *StatusWriter) Write(b []byte) (int, error) {
|
|
|
|
- var errMsg string
|
|
|
|
- if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
|
|
|
|
- errMsg = string(bytes.TrimSpace(after))
|
|
|
|
- } else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
|
|
|
|
- errMsg = string(bytes.TrimSpace(after))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if errMsg != "" {
|
|
|
|
- w.LastErrMsg = errMsg
|
|
|
|
- w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return os.Stderr.Write(b)
|
|
|
|
-}
|
|
|
|
|
|
+var payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
|
|
|
|
|
|
type prediction struct {
|
|
type prediction struct {
|
|
Content string `json:"content"`
|
|
Content string `json:"content"`
|
|
@@ -102,9 +57,7 @@ type prediction struct {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-const maxBufferSize = 512 * format.KiloByte
|
|
|
|
const maxRetries = 3
|
|
const maxRetries = 3
|
|
-const retryDelay = 1 * time.Second
|
|
|
|
|
|
|
|
type PredictOpts struct {
|
|
type PredictOpts struct {
|
|
Prompt string
|
|
Prompt string
|