convert_phi3.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. package convert
  2. import (
  3. "cmp"
  4. "encoding/binary"
  5. "io"
  6. "math"
  7. "strings"
  8. "sync"
  9. "github.com/ollama/ollama/llm"
  10. )
  11. type phi3 struct {
  12. Parameters
  13. NumHiddenLayers uint32 `json:"num_hidden_layers"`
  14. NLayers uint32 `json:"n_layers"`
  15. HiddenSize uint32 `json:"hidden_size"`
  16. NEmbd uint32 `json:"n_embd"`
  17. IntermediateSize uint32 `json:"intermediate_size"`
  18. NumAttentionHeads uint32 `json:"num_attention_heads"`
  19. NHead uint32 `json:"n_head"`
  20. NumKeyValueHeads uint32 `json:"num_key_value_heads"`
  21. NHeadKV uint32 `json:"n_head_kv"`
  22. RopeTheta float32 `json:"rope_theta"`
  23. RopeScaling struct {
  24. Type string `json:"type"`
  25. LongFactor ropeFactor `json:"long_factor"`
  26. ShortFactor ropeFactor `json:"short_factor"`
  27. } `json:"rope_scaling"`
  28. RMSNormEPS float32 `json:"rms_norm_eps"`
  29. NPositions uint32 `json:"n_positions"`
  30. MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
  31. OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
  32. SlidingWindow uint32 `json:"sliding_window"`
  33. }
  34. var _ Converter = (*phi3)(nil)
  35. func (p *phi3) KV(t *Tokenizer) llm.KV {
  36. kv := p.Parameters.KV(t)
  37. kv["general.architecture"] = "phi3"
  38. kv["general.name"] = "phi3"
  39. kv["phi3.context_length"] = p.MaxPositionEmbeddings
  40. kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
  41. kv["phi3.feed_forward_length"] = p.IntermediateSize
  42. kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
  43. kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
  44. kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
  45. kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
  46. kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
  47. kv["phi3.rope.freq_base"] = p.RopeTheta
  48. kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
  49. kv["phi3.attention.sliding_window"] = p.SlidingWindow
  50. scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
  51. switch p.RopeScaling.Type {
  52. case "":
  53. // no scaling
  54. case "su", "longrope":
  55. kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
  56. case "yarn":
  57. kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
  58. default:
  59. panic("unknown rope scaling type")
  60. }
  61. return kv
  62. }
  63. func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
  64. var addRopeFactors sync.Once
  65. out := make([]llm.Tensor, 0, len(ts)+2)
  66. for _, t := range ts {
  67. name := p.tensorName(t.Name())
  68. if strings.HasPrefix(name, "blk.0.") {
  69. addRopeFactors.Do(func() {
  70. out = append(out, llm.Tensor{
  71. Name: "rope_factors_long.weight",
  72. Kind: 0,
  73. Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
  74. WriterTo: p.RopeScaling.LongFactor,
  75. }, llm.Tensor{
  76. Name: "rope_factors_short.weight",
  77. Kind: 0,
  78. Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
  79. WriterTo: p.RopeScaling.ShortFactor,
  80. })
  81. })
  82. }
  83. out = append(out, llm.Tensor{
  84. Name: name,
  85. Kind: t.Kind(),
  86. Shape: t.Shape(),
  87. WriterTo: t,
  88. })
  89. }
  90. return out
  91. }
  92. func (p *phi3) tensorName(n string) string {
  93. return strings.NewReplacer(
  94. "lm_head", "output",
  95. "model.embed_tokens", "token_embd",
  96. "model.norm", "output_norm",
  97. "model.layers", "blk",
  98. "input_layernorm", "attn_norm",
  99. "self_attn.qkv_proj", "attn_qkv",
  100. "self_attn.o_proj", "attn_output",
  101. "mlp.down_proj", "ffn_down",
  102. "mlp.gate_up_proj", "ffn_up",
  103. "post_attention_layernorm", "ffn_norm",
  104. ).Replace(n)
  105. }
  106. type ropeFactor []float32
  107. func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
  108. err := binary.Write(w, binary.LittleEndian, r)
  109. return 0, err
  110. }