ggml.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. package llm
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "io"
  6. "path"
  7. "sync"
  8. )
  9. type ModelFamily string
  10. const ModelFamilyUnknown ModelFamily = "unknown"
  11. type ModelType uint32
  12. const (
  13. ModelType3B ModelType = 26
  14. ModelType7B ModelType = 32
  15. ModelType13B ModelType = 40
  16. ModelType34B ModelType = 48
  17. ModelType30B ModelType = 60
  18. ModelType65B ModelType = 80
  19. )
  20. func (mt ModelType) String() string {
  21. switch mt {
  22. case ModelType3B:
  23. return "3B"
  24. case ModelType7B:
  25. return "7B"
  26. case ModelType13B:
  27. return "13B"
  28. case ModelType34B:
  29. return "34B"
  30. case ModelType30B:
  31. return "30B"
  32. case ModelType65B:
  33. return "65B"
  34. default:
  35. return "Unknown"
  36. }
  37. }
  38. type FileType interface {
  39. String() string
  40. }
  41. type GGML struct {
  42. magic uint32
  43. container
  44. model
  45. }
  46. type model interface {
  47. ModelFamily() ModelFamily
  48. ModelType() ModelType
  49. FileType() FileType
  50. }
  51. type container interface {
  52. Name() string
  53. Decode(io.Reader) (model, error)
  54. }
  55. type containerGGML struct{}
  56. func (c *containerGGML) Name() string {
  57. return "ggml"
  58. }
  59. func (c *containerGGML) Decode(r io.Reader) (model, error) {
  60. return nil, nil
  61. }
  62. type containerGGMF struct {
  63. version uint32
  64. }
  65. func (c *containerGGMF) Name() string {
  66. return "ggmf"
  67. }
  68. func (c *containerGGMF) Decode(r io.Reader) (model, error) {
  69. var version uint32
  70. binary.Read(r, binary.LittleEndian, &version)
  71. switch version {
  72. case 1:
  73. default:
  74. return nil, errors.New("invalid version")
  75. }
  76. c.version = version
  77. return nil, nil
  78. }
  79. type containerGGJT struct {
  80. version uint32
  81. }
  82. func (c *containerGGJT) Name() string {
  83. return "ggjt"
  84. }
  85. func (c *containerGGJT) Decode(r io.Reader) (model, error) {
  86. var version uint32
  87. binary.Read(r, binary.LittleEndian, &version)
  88. switch version {
  89. case 1, 2, 3:
  90. default:
  91. return nil, errors.New("invalid version")
  92. }
  93. c.version = version
  94. // different model types may have different layouts for hyperparameters
  95. var llama llamaModel
  96. binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
  97. return &llama, nil
  98. }
  99. type containerLORA struct {
  100. version uint32
  101. }
  102. func (c *containerLORA) Name() string {
  103. return "ggla"
  104. }
  105. func (c *containerLORA) Decode(r io.Reader) (model, error) {
  106. var version uint32
  107. binary.Read(r, binary.LittleEndian, &version)
  108. switch version {
  109. case 1:
  110. default:
  111. return nil, errors.New("invalid version")
  112. }
  113. c.version = version
  114. return nil, nil
  115. }
  116. var (
  117. ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
  118. ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
  119. )
  120. var (
  121. ggmlInit sync.Once
  122. ggmlRunnerPath string
  123. )
  124. func ggmlRunner() ModelRunner {
  125. ggmlInit.Do(func() {
  126. ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU)
  127. })
  128. return ModelRunner{Path: ggmlRunnerPath}
  129. }
  130. const (
  131. // Magic constant for `ggml` files (unversioned).
  132. FILE_MAGIC_GGML = 0x67676d6c
  133. // Magic constant for `ggml` files (versioned, ggmf).
  134. FILE_MAGIC_GGMF = 0x67676d66
  135. // Magic constant for `ggml` files (versioned, ggjt).
  136. FILE_MAGIC_GGJT = 0x67676a74
  137. // Magic constant for `ggla` files (LoRA adapter).
  138. FILE_MAGIC_GGLA = 0x67676C61
  139. // Magic constant for `gguf` files (versioned, gguf)
  140. FILE_MAGIC_GGUF = 0x46554747
  141. )
  142. func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
  143. var ggml GGML
  144. binary.Read(r, binary.LittleEndian, &ggml.magic)
  145. switch ggml.magic {
  146. case FILE_MAGIC_GGML:
  147. ggml.container = &containerGGML{}
  148. case FILE_MAGIC_GGMF:
  149. ggml.container = &containerGGMF{}
  150. case FILE_MAGIC_GGJT:
  151. ggml.container = &containerGGJT{}
  152. case FILE_MAGIC_GGLA:
  153. ggml.container = &containerLORA{}
  154. case FILE_MAGIC_GGUF:
  155. ggml.container = &containerGGUF{}
  156. default:
  157. return nil, errors.New("invalid file magic")
  158. }
  159. model, err := ggml.Decode(r)
  160. if err != nil {
  161. return nil, err
  162. }
  163. ggml.model = model
  164. // final model type
  165. return &ggml, nil
  166. }