torch.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. package convert
  2. import (
  3. "encoding/binary"
  4. "encoding/json"
  5. "fmt"
  6. "io"
  7. "log/slog"
  8. "os"
  9. "path/filepath"
  10. "regexp"
  11. "strings"
  12. "github.com/nlpodyssey/gopickle/pytorch"
  13. "github.com/nlpodyssey/gopickle/types"
  14. "github.com/x448/float16"
  15. "ollama.com/llm"
  16. )
  17. type torchWriterTo struct {
  18. t *llm.Tensor
  19. params *Params
  20. bo ByteOrder
  21. storage pytorch.StorageInterface
  22. handler func(w io.Writer, r torchWriterTo) error
  23. }
  24. type TorchFormat struct{}
  25. func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
  26. slog.Debug("getting torch tensors")
  27. files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin"))
  28. if err != nil {
  29. slog.Error("didn't find any torch files")
  30. return nil, err
  31. }
  32. var offset uint64
  33. var tensors []llm.Tensor
  34. for _, fn := range files {
  35. m, err := pytorch.Load(fn)
  36. if err != nil {
  37. slog.Error(fmt.Sprintf("error unpickling: %q", err))
  38. return []llm.Tensor{}, err
  39. }
  40. for _, k := range m.(*types.Dict).Keys() {
  41. if strings.HasSuffix(k.(string), "self_attn.rotary_emb.inv_freq") {
  42. continue
  43. }
  44. t, _ := m.(*types.Dict).Get(k)
  45. tshape := t.(*pytorch.Tensor).Size
  46. var size uint64
  47. var kind uint32
  48. switch len(tshape) {
  49. case 0:
  50. continue
  51. case 1:
  52. // convert to float32
  53. kind = 0
  54. size = uint64(tshape[0] * 4)
  55. case 2:
  56. // convert to float16
  57. kind = 1
  58. size = uint64(tshape[0] * tshape[1] * 2)
  59. }
  60. ggufName, err := tf.GetLayerName(k.(string))
  61. if err != nil {
  62. slog.Error("%v", err)
  63. return nil, err
  64. }
  65. slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName))
  66. shape := []uint64{0, 0, 0, 0}
  67. for i := range tshape {
  68. shape[i] = uint64(tshape[i])
  69. }
  70. tensor := llm.Tensor{
  71. Name: ggufName,
  72. Kind: kind,
  73. Offset: offset, // calculate the offset
  74. Shape: shape[:],
  75. }
  76. tensor.WriterTo = torchWriterTo{
  77. t: &tensor,
  78. params: params,
  79. bo: params.ByteOrder,
  80. storage: t.(*pytorch.Tensor).Source,
  81. }
  82. tensors = append(tensors, tensor)
  83. offset += size
  84. }
  85. }
  86. return tensors, nil
  87. }
  88. func getAltParams(dirpath string) (*Params, error) {
  89. f, err := os.Open(filepath.Join(dirpath, "params.json"))
  90. if err != nil {
  91. slog.Error("no params.json")
  92. return nil, err
  93. }
  94. defer f.Close()
  95. type TorchParams struct {
  96. HiddenSize int `json:"dim"`
  97. AttentionHeads int `json:"n_heads"`
  98. KeyValHeads int `json:"n_kv_heads"`
  99. HiddenLayers int `json:"n_layers"`
  100. RopeTheta int `json:"rope_theta"`
  101. NormEPS float64 `json:"norm_eps"`
  102. }
  103. var tparams TorchParams
  104. d := json.NewDecoder(f)
  105. err = d.Decode(&tparams)
  106. if err != nil {
  107. return nil, err
  108. }
  109. params := &Params{
  110. HiddenSize: tparams.HiddenSize,
  111. AttentionHeads: tparams.AttentionHeads,
  112. KeyValHeads: tparams.KeyValHeads,
  113. HiddenLayers: tparams.HiddenLayers,
  114. NormEPS: tparams.NormEPS,
  115. }
  116. switch {
  117. case tparams.RopeTheta == 1000000:
  118. // Codellama
  119. params.ContextSize = 16384
  120. case tparams.NormEPS == 1e-06:
  121. // llama2
  122. slog.Debug("Found llama2 - setting context size to 4096")
  123. params.ContextSize = 4096
  124. default:
  125. params.ContextSize = 2048
  126. }
  127. params.ByteOrder = binary.LittleEndian
  128. return params, nil
  129. }
  130. func (m *TorchFormat) GetParams(dirpath string) (*Params, error) {
  131. f, err := os.Open(filepath.Join(dirpath, "config.json"))
  132. if err != nil {
  133. if os.IsNotExist(err) {
  134. // try params.json instead
  135. return getAltParams(dirpath)
  136. } else {
  137. return nil, err
  138. }
  139. }
  140. var params Params
  141. d := json.NewDecoder(f)
  142. err = d.Decode(&params)
  143. if err != nil {
  144. return nil, err
  145. }
  146. params.ByteOrder = binary.LittleEndian
  147. return &params, nil
  148. }
  149. func (m *TorchFormat) GetLayerName(n string) (string, error) {
  150. directMap := map[string]string{
  151. "tok_embeddings.weight": "token_embd.weight",
  152. "output.weight": "output.weight",
  153. "norm.weight": "output_norm.weight",
  154. "rope.freqs": "rope_freqs.weight",
  155. "model.embed_tokens.weight": "token_embd.weight",
  156. "lm_head.weight": "output.weight",
  157. "model.norm.weight": "output_norm.weight",
  158. }
  159. lMap := map[string]string{
  160. "layers.(\\d+).attention_norm.weight": "blk.$1.attn_norm.weight",
  161. "layers.(\\d+).attention_output_norm.weight": "blk.$1.attn_norm.weight",
  162. "layers.(\\d+).feed_forward.w2.weight": "blk.$1.ffn_down.weight",
  163. "layers.(\\d+).feed_forward.w1.weight": "blk.$1.ffn_gate.weight",
  164. "layers.(\\d+).feed_forward.w3.weight": "blk.$1.ffn_up.weight",
  165. "layers.(\\d+).ffn_norm.weight": "blk.$1.ffn_norm.weight",
  166. "layers.(\\d+).attention.wk.weight": "blk.$1.attn_k.weight",
  167. "layers.(\\d+).attention.wo.weight": "blk.$1.attn_output.weight",
  168. "layers.(\\d+).attention.wq.weight": "blk.$1.attn_q.weight",
  169. "layers.(\\d+).attention.wv.weight": "blk.$1.attn_v.weight",
  170. "model.layers.(\\d+).input_layernorm.weight": "blk.$1.attn_norm.weight",
  171. "model.layers.(\\d+).mlp.down_proj.weight": "blk.$1.ffn_down.weight",
  172. "model.layers.(\\d+).mlp.gate_proj.weight": "blk.$1.ffn_gate.weight",
  173. "model.layers.(\\d+).mlp.up_proj.weight": "blk.$1.ffn_up.weight",
  174. "model.layers.(\\d+).post_attention_layernorm.weight": "blk.$1.ffn_norm.weight",
  175. "model.layers.(\\d+).self_attn.k_proj.weight": "blk.$1.attn_k.weight",
  176. "model.layers.(\\d+).self_attn.o_proj.weight": "blk.$1.attn_output.weight",
  177. "model.layers.(\\d+).self_attn.q_proj.weight": "blk.$1.attn_q.weight",
  178. "model.layers.(\\d+).self_attn.v_proj.weight": "blk.$1.attn_v.weight",
  179. }
  180. v, ok := directMap[n]
  181. if ok {
  182. return v, nil
  183. }
  184. // quick hack to rename the layers to gguf format
  185. for k, v := range lMap {
  186. re := regexp.MustCompile(k)
  187. newName := re.ReplaceAllString(n, v)
  188. if newName != n {
  189. return newName, nil
  190. }
  191. }
  192. return "", fmt.Errorf("couldn't find a layer name for '%s'", n)
  193. }
  194. func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
  195. // use the handler if one is present
  196. if r.handler != nil {
  197. return 0, r.handler(w, r)
  198. }
  199. switch r.storage.(type) {
  200. case *pytorch.FloatStorage:
  201. slog.Warn(fmt.Sprintf("unexpected storage found for layer '%s'; skipping", r.t.Name))
  202. return 0, nil
  203. case *pytorch.HalfStorage:
  204. switch r.t.Kind {
  205. case 0:
  206. data := r.storage.(*pytorch.HalfStorage).Data
  207. slog.Debug(fmt.Sprintf("%35s F32 (%d)", r.t.Name, len(data)))
  208. if err := binary.Write(w, r.bo, data); err != nil {
  209. return 0, err
  210. }
  211. case 1:
  212. data := r.storage.(*pytorch.HalfStorage).Data
  213. tData := make([]uint16, len(data))
  214. for cnt, v := range data {
  215. tData[cnt] = uint16(float16.Fromfloat32(v))
  216. }
  217. slog.Debug(fmt.Sprintf("%35s F16 (%d)", r.t.Name, len(tData)))
  218. if err := binary.Write(w, r.bo, tData); err != nil {
  219. return 0, err
  220. }
  221. }
  222. }
  223. return 0, nil
  224. }
  225. func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
  226. switch len(params.Architectures) {
  227. case 0:
  228. return nil, fmt.Errorf("No architecture specified to convert")
  229. case 1:
  230. switch params.Architectures[0] {
  231. case "LlamaForCausalLM":
  232. return &LlamaModel{
  233. ModelData{
  234. Name: name,
  235. Path: dirPath,
  236. Params: params,
  237. Format: m,
  238. },
  239. }, nil
  240. default:
  241. return nil, fmt.Errorf("Models based on '%s' are not yet supported", params.Architectures[0])
  242. }
  243. }
  244. return nil, fmt.Errorf("Unknown error")
  245. }