ggla.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. package llm
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "io"
  6. "slices"
  7. )
  8. type ContainerGGLA struct {
  9. version uint32
  10. }
  11. func (c *ContainerGGLA) Name() string {
  12. return "ggla"
  13. }
  14. func (c *ContainerGGLA) Decode(rso *readSeekOffset) (model, error) {
  15. binary.Read(rso, binary.LittleEndian, &c.version)
  16. switch c.version {
  17. case 1:
  18. default:
  19. return nil, errors.New("invalid version")
  20. }
  21. model := newModelGGLA(c)
  22. err := model.decode(rso)
  23. return model, err
  24. }
  25. type ModelGGLA struct {
  26. *ContainerGGLA
  27. kv KV
  28. tensors []Tensor
  29. }
  30. func newModelGGLA(container *ContainerGGLA) *ModelGGLA {
  31. return &ModelGGLA{
  32. ContainerGGLA: container,
  33. kv: make(KV),
  34. }
  35. }
  36. func (m *ModelGGLA) decode(rso *readSeekOffset) error {
  37. var r uint32
  38. if err := binary.Read(rso, binary.LittleEndian, &r); err != nil {
  39. return err
  40. }
  41. m.kv["r"] = r
  42. var alpha uint32
  43. if err := binary.Read(rso, binary.LittleEndian, &alpha); err != nil {
  44. return err
  45. }
  46. m.kv["alpha"] = alpha
  47. for {
  48. var dims uint32
  49. if err := binary.Read(rso, binary.LittleEndian, &dims); err != nil {
  50. return err
  51. }
  52. var namesize uint32
  53. if err := binary.Read(rso, binary.LittleEndian, &namesize); err != nil {
  54. return err
  55. }
  56. var t Tensor
  57. if err := binary.Read(rso, binary.LittleEndian, &t.Kind); err != nil {
  58. return err
  59. }
  60. t.Shape = make([]uint64, dims)
  61. for i := 0; uint32(i) < dims; i++ {
  62. var shape32 uint32
  63. if err := binary.Read(rso, binary.LittleEndian, &shape32); err != nil {
  64. return err
  65. }
  66. t.Shape[i] = uint64(shape32)
  67. }
  68. // ggla tensor shape is reversed
  69. // ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44
  70. slices.Reverse(t.Shape)
  71. name := make([]byte, namesize)
  72. if err := binary.Read(rso, binary.LittleEndian, &name); err != nil {
  73. return err
  74. }
  75. t.Name = string(name)
  76. if _, err := rso.Seek((rso.offset+31)&-32, io.SeekStart); err != nil {
  77. return err
  78. }
  79. t.Offset = uint64(rso.offset)
  80. if _, err := rso.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
  81. return err
  82. }
  83. m.tensors = append(m.tensors, t)
  84. }
  85. }
  86. func (m *ModelGGLA) KV() KV {
  87. return m.kv
  88. }
  89. func (m *ModelGGLA) Tensor() []Tensor {
  90. return m.tensors
  91. }
  92. func (*ModelGGLA) ModelFamily() string {
  93. return "ggla"
  94. }
  95. func (*ModelGGLA) ModelType() string {
  96. panic("not implemented")
  97. }
  98. func (*ModelGGLA) FileType() string {
  99. panic("not implemented")
  100. }
  101. func (*ModelGGLA) NumLayers() uint32 {
  102. panic("not implemented")
  103. }
  104. func (*ModelGGLA) NumGQA() uint32 {
  105. panic("not implemented")
  106. }
  107. func (*ModelGGLA) NumEmbed() uint32 {
  108. panic("not implemented")
  109. }
  110. func (*ModelGGLA) NumHead() uint32 {
  111. panic("not implemented")
  112. }
  113. func (*ModelGGLA) NumHeadKv() uint32 {
  114. panic("not implemented")
  115. }
  116. func (*ModelGGLA) NumCtx() uint32 {
  117. panic("not implemented")
  118. }