123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- package llm
- import (
- "bytes"
- "context"
- _ "embed"
- "errors"
- "fmt"
- "os"
- "os/exec"
- "sync"
- "time"
- "github.com/jmorganca/ollama/api"
- "github.com/jmorganca/ollama/format"
- )
- const jsonGrammar = `
- root ::= object
- value ::= object | array | string | number | ("true" | "false" | "null") ws
- object ::=
- "{" ws (
- string ":" ws value
- ("," ws string ":" ws value)*
- )? "}" ws
- array ::=
- "[" ws (
- value
- ("," ws value)*
- )? "]" ws
- string ::=
- "\"" (
- [^"\\] |
- "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
- )* "\"" ws
- number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
- # Optional space: by convention, applied in this grammar after literal chars when allowed
- ws ::= ([ \t\n] ws)?
- `
- type llamaModel struct {
- hyperparameters llamaHyperparameters
- }
- func (llm *llamaModel) ModelFamily() string {
- return "llama"
- }
- func llamaModelType(numLayer uint32) string {
- switch numLayer {
- case 26:
- return "3B"
- case 32:
- return "7B"
- case 40:
- return "13B"
- case 48:
- return "34B"
- case 60:
- return "30B"
- case 80:
- return "65B"
- default:
- return "unknown"
- }
- }
- func (llm *llamaModel) ModelType() string {
- return llamaModelType(llm.hyperparameters.NumLayer)
- }
- func (llm *llamaModel) FileType() string {
- return fileType(llm.hyperparameters.FileType)
- }
- func (llm *llamaModel) NumLayers() int64 {
- return int64(llm.hyperparameters.NumLayer)
- }
- type llamaHyperparameters struct {
- // NumVocab is the size of the model's vocabulary.
- NumVocab uint32
- // NumEmbd is the size of the model's embedding layer.
- NumEmbd uint32
- NumMult uint32
- NumHead uint32
- // NumLayer is the number of layers in the model.
- NumLayer uint32
- NumRot uint32
- // FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
- FileType uint32
- }
- type Running struct {
- Port int
- Cmd *exec.Cmd
- Cancel context.CancelFunc
- exitOnce sync.Once
- exitCh chan error // channel to receive the exit status of the subprocess
- *StatusWriter // captures error messages from the llama runner process
- }
- type ImageData struct {
- Data []byte `json:"data"`
- ID int `json:"id"`
- }
- var (
- errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
- errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
- payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
- )
- // StatusWriter is a writer that captures error messages from the llama runner process
- type StatusWriter struct {
- ErrCh chan error
- LastErrMsg string
- }
- func NewStatusWriter() *StatusWriter {
- return &StatusWriter{
- ErrCh: make(chan error, 1),
- }
- }
- func (w *StatusWriter) Write(b []byte) (int, error) {
- var errMsg string
- if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
- errMsg = string(bytes.TrimSpace(after))
- } else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
- errMsg = string(bytes.TrimSpace(after))
- }
- if errMsg != "" {
- w.LastErrMsg = errMsg
- w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
- }
- return os.Stderr.Write(b)
- }
- type prediction struct {
- Content string `json:"content"`
- Model string `json:"model"`
- Prompt string `json:"prompt"`
- Stop bool `json:"stop"`
- Timings struct {
- PredictedN int `json:"predicted_n"`
- PredictedMS float64 `json:"predicted_ms"`
- PromptN int `json:"prompt_n"`
- PromptMS float64 `json:"prompt_ms"`
- }
- }
- const maxBufferSize = 512 * format.KiloByte
- const maxRetries = 3
- const retryDelay = 1 * time.Second
- type PredictOpts struct {
- Prompt string
- Format string
- Images []api.ImageData
- Options api.Options
- }
- type PredictResult struct {
- Content string
- Done bool
- PromptEvalCount int
- PromptEvalDuration time.Duration
- EvalCount int
- EvalDuration time.Duration
- }
- type TokenizeRequest struct {
- Content string `json:"content"`
- }
- type TokenizeResponse struct {
- Tokens []int `json:"tokens"`
- }
- type DetokenizeRequest struct {
- Tokens []int `json:"tokens"`
- }
- type DetokenizeResponse struct {
- Content string `json:"content"`
- }
- type EmbeddingRequest struct {
- Content string `json:"content"`
- }
- type EmbeddingResponse struct {
- Embedding []float64 `json:"embedding"`
- }
|