llama.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. package llm
  2. import (
  3. "bytes"
  4. "context"
  5. _ "embed"
  6. "errors"
  7. "fmt"
  8. "os"
  9. "os/exec"
  10. "time"
  11. "github.com/jmorganca/ollama/api"
  12. "github.com/jmorganca/ollama/format"
  13. )
  14. const jsonGrammar = `
  15. root ::= object
  16. value ::= object | array | string | number | ("true" | "false" | "null") ws
  17. object ::=
  18. "{" ws (
  19. string ":" ws value
  20. ("," ws string ":" ws value)*
  21. )? "}" ws
  22. array ::=
  23. "[" ws (
  24. value
  25. ("," ws value)*
  26. )? "]" ws
  27. string ::=
  28. "\"" (
  29. [^"\\] |
  30. "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
  31. )* "\"" ws
  32. number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
  33. # Optional space: by convention, applied in this grammar after literal chars when allowed
  34. ws ::= ([ \t\n] ws)?
  35. `
  36. type Running struct {
  37. Port int
  38. Cmd *exec.Cmd
  39. Cancel context.CancelFunc
  40. *StatusWriter // captures error messages from the llama runner process
  41. }
  42. type ImageData struct {
  43. Data []byte `json:"data"`
  44. ID int `json:"id"`
  45. }
  46. var (
  47. errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
  48. errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
  49. payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
  50. )
  51. // StatusWriter is a writer that captures error messages from the llama runner process
  52. type StatusWriter struct {
  53. ErrCh chan error
  54. LastErrMsg string
  55. }
  56. func NewStatusWriter() *StatusWriter {
  57. return &StatusWriter{
  58. ErrCh: make(chan error, 1),
  59. }
  60. }
  61. func (w *StatusWriter) Write(b []byte) (int, error) {
  62. var errMsg string
  63. if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
  64. errMsg = string(bytes.TrimSpace(after))
  65. } else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
  66. errMsg = string(bytes.TrimSpace(after))
  67. }
  68. if errMsg != "" {
  69. w.LastErrMsg = errMsg
  70. w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
  71. }
  72. return os.Stderr.Write(b)
  73. }
  74. type prediction struct {
  75. Content string `json:"content"`
  76. Model string `json:"model"`
  77. Prompt string `json:"prompt"`
  78. Stop bool `json:"stop"`
  79. Timings struct {
  80. PredictedN int `json:"predicted_n"`
  81. PredictedMS float64 `json:"predicted_ms"`
  82. PromptN int `json:"prompt_n"`
  83. PromptMS float64 `json:"prompt_ms"`
  84. }
  85. }
  86. const maxBufferSize = 512 * format.KiloByte
  87. const maxRetries = 3
  88. const retryDelay = 1 * time.Second
  89. type PredictOpts struct {
  90. Prompt string
  91. Format string
  92. Images []api.ImageData
  93. Options api.Options
  94. }
  95. type PredictResult struct {
  96. Content string
  97. Done bool
  98. PromptEvalCount int
  99. PromptEvalDuration time.Duration
  100. EvalCount int
  101. EvalDuration time.Duration
  102. }
  103. type TokenizeRequest struct {
  104. Content string `json:"content"`
  105. }
  106. type TokenizeResponse struct {
  107. Tokens []int `json:"tokens"`
  108. }
  109. type DetokenizeRequest struct {
  110. Tokens []int `json:"tokens"`
  111. }
  112. type DetokenizeResponse struct {
  113. Content string `json:"content"`
  114. }
  115. type EmbeddingRequest struct {
  116. Content string `json:"content"`
  117. }
  118. type EmbeddingResponse struct {
  119. Embedding []float64 `json:"embedding"`
  120. }