ggla.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. package llm
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "io"
  6. "slices"
  7. )
  8. type containerGGLA struct {
  9. version uint32
  10. }
  11. func (c *containerGGLA) Name() string {
  12. return "ggla"
  13. }
  14. func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
  15. if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
  16. return nil, err
  17. }
  18. switch c.version {
  19. case 1:
  20. default:
  21. return nil, errors.New("invalid version")
  22. }
  23. model := newGGLA(c)
  24. err := model.decode(rs)
  25. return model, err
  26. }
  27. type ggla struct {
  28. *containerGGLA
  29. kv KV
  30. tensors []Tensor
  31. }
  32. func newGGLA(container *containerGGLA) *ggla {
  33. return &ggla{
  34. containerGGLA: container,
  35. kv: make(KV),
  36. }
  37. }
  38. func (m *ggla) decode(rs io.ReadSeeker) error {
  39. var r uint32
  40. if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
  41. return err
  42. }
  43. m.kv["r"] = r
  44. var alpha uint32
  45. if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
  46. return err
  47. }
  48. m.kv["alpha"] = alpha
  49. for {
  50. var dims uint32
  51. if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
  52. return err
  53. }
  54. var namesize uint32
  55. if err := binary.Read(rs, binary.LittleEndian, &namesize); err != nil {
  56. return err
  57. }
  58. var t Tensor
  59. if err := binary.Read(rs, binary.LittleEndian, &t.Kind); err != nil {
  60. return err
  61. }
  62. t.Shape = make([]uint64, dims)
  63. for i := 0; uint32(i) < dims; i++ {
  64. var shape32 uint32
  65. if err := binary.Read(rs, binary.LittleEndian, &shape32); err != nil {
  66. return err
  67. }
  68. t.Shape[i] = uint64(shape32)
  69. }
  70. // ggla tensor shape is reversed
  71. // ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44
  72. slices.Reverse(t.Shape)
  73. name := make([]byte, namesize)
  74. if err := binary.Read(rs, binary.LittleEndian, &name); err != nil {
  75. return err
  76. }
  77. t.Name = string(name)
  78. offset, err := rs.Seek(0, io.SeekCurrent)
  79. if err != nil {
  80. return err
  81. }
  82. if _, err := rs.Seek((offset+31)&-32, io.SeekStart); err != nil {
  83. return err
  84. }
  85. offset, err = rs.Seek(0, io.SeekCurrent)
  86. if err != nil {
  87. return err
  88. }
  89. t.Offset = uint64(offset)
  90. if _, err := rs.Seek(int64(t.size()), io.SeekCurrent); err != nil {
  91. return err
  92. }
  93. m.tensors = append(m.tensors, t)
  94. }
  95. }
  96. func (m *ggla) KV() KV {
  97. return m.kv
  98. }
  99. func (m *ggla) Tensor() []Tensor {
  100. return m.tensors
  101. }
  102. func (*ggla) ModelFamily() string {
  103. return "ggla"
  104. }
  105. func (*ggla) ModelType() string {
  106. panic("not implemented")
  107. }
  108. func (*ggla) FileType() string {
  109. panic("not implemented")
  110. }
  111. func (*ggla) NumLayers() uint32 {
  112. panic("not implemented")
  113. }
  114. func (*ggla) NumGQA() uint32 {
  115. panic("not implemented")
  116. }
  117. func (*ggla) NumEmbed() uint32 {
  118. panic("not implemented")
  119. }
  120. func (*ggla) NumHead() uint32 {
  121. panic("not implemented")
  122. }
  123. func (*ggla) NumHeadKv() uint32 {
  124. panic("not implemented")
  125. }
  126. func (*ggla) NumCtx() uint32 {
  127. panic("not implemented")
  128. }