types.go 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. package api
  2. import (
  3. "fmt"
  4. "net/http"
  5. "strings"
  6. )
  7. type Error struct {
  8. Code int32 `json:"code"`
  9. Message string `json:"message"`
  10. }
  11. func (e Error) Error() string {
  12. if e.Message == "" {
  13. return fmt.Sprintf("%d %v", e.Code, strings.ToLower(http.StatusText(int(e.Code))))
  14. }
  15. return e.Message
  16. }
  17. type PullRequest struct {
  18. Model string `json:"model"`
  19. }
  20. type PullProgress struct {
  21. Total int64 `json:"total"`
  22. Completed int64 `json:"completed"`
  23. Percent float64 `json:"percent"`
  24. Error Error `json:"error"`
  25. }
  26. type GenerateRequest struct {
  27. Model string `json:"model"`
  28. Prompt string `json:"prompt"`
  29. ModelOptions *ModelOptions `json:"model_opts,omitempty"`
  30. PredictOptions *PredictOptions `json:"predict_opts,omitempty"`
  31. }
  32. type ModelOptions struct {
  33. ContextSize int `json:"context_size,omitempty"`
  34. Seed int `json:"seed,omitempty"`
  35. NBatch int `json:"n_batch,omitempty"`
  36. F16Memory bool `json:"memory_f16,omitempty"`
  37. MLock bool `json:"mlock,omitempty"`
  38. MMap bool `json:"mmap,omitempty"`
  39. VocabOnly bool `json:"vocab_only,omitempty"`
  40. LowVRAM bool `json:"low_vram,omitempty"`
  41. Embeddings bool `json:"embeddings,omitempty"`
  42. NUMA bool `json:"numa,omitempty"`
  43. NGPULayers int `json:"gpu_layers,omitempty"`
  44. MainGPU string `json:"main_gpu,omitempty"`
  45. TensorSplit string `json:"tensor_split,omitempty"`
  46. }
  47. type PredictOptions struct {
  48. Seed int `json:"seed,omitempty"`
  49. Threads int `json:"threads,omitempty"`
  50. Tokens int `json:"tokens,omitempty"`
  51. TopK int `json:"top_k,omitempty"`
  52. Repeat int `json:"repeat,omitempty"`
  53. Batch int `json:"batch,omitempty"`
  54. NKeep int `json:"nkeep,omitempty"`
  55. TopP float64 `json:"top_p,omitempty"`
  56. Temperature float64 `json:"temp,omitempty"`
  57. Penalty float64 `json:"penalty,omitempty"`
  58. F16KV bool
  59. DebugMode bool
  60. StopPrompts []string
  61. IgnoreEOS bool `json:"ignore_eos,omitempty"`
  62. TailFreeSamplingZ float64 `json:"tfs_z,omitempty"`
  63. TypicalP float64 `json:"typical_p,omitempty"`
  64. FrequencyPenalty float64 `json:"freq_penalty,omitempty"`
  65. PresencePenalty float64 `json:"pres_penalty,omitempty"`
  66. Mirostat int `json:"mirostat,omitempty"`
  67. MirostatETA float64 `json:"mirostat_lr,omitempty"`
  68. MirostatTAU float64 `json:"mirostat_ent,omitempty"`
  69. PenalizeNL bool `json:"penalize_nl,omitempty"`
  70. LogitBias string `json:"logit_bias,omitempty"`
  71. PathPromptCache string
  72. MLock bool `json:"mlock,omitempty"`
  73. MMap bool `json:"mmap,omitempty"`
  74. PromptCacheAll bool
  75. PromptCacheRO bool
  76. MainGPU string
  77. TensorSplit string
  78. }
  79. var DefaultModelOptions ModelOptions = ModelOptions{
  80. ContextSize: 512,
  81. Seed: 0,
  82. F16Memory: true,
  83. MLock: false,
  84. Embeddings: true,
  85. MMap: true,
  86. LowVRAM: false,
  87. }
  88. var DefaultPredictOptions PredictOptions = PredictOptions{
  89. Seed: -1,
  90. Threads: -1,
  91. Tokens: 512,
  92. Penalty: 1.1,
  93. Repeat: 64,
  94. Batch: 512,
  95. NKeep: 64,
  96. TopK: 90,
  97. TopP: 0.86,
  98. TailFreeSamplingZ: 1.0,
  99. TypicalP: 1.0,
  100. Temperature: 0.8,
  101. FrequencyPenalty: 0.0,
  102. PresencePenalty: 0.0,
  103. Mirostat: 0,
  104. MirostatTAU: 5.0,
  105. MirostatETA: 0.1,
  106. MMap: true,
  107. StopPrompts: []string{"llama"},
  108. }
  109. type GenerateResponse struct {
  110. Response string `json:"response"`
  111. }