convert_phi3.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. package convert
  2. import (
  3. "cmp"
  4. "encoding/binary"
  5. "io"
  6. "math"
  7. "strings"
  8. "sync"
  9. "github.com/ollama/ollama/llm"
  10. )
  11. type phi3 struct {
  12. Parameters
  13. NumHiddenLayers uint32 `json:"num_hidden_layers"`
  14. NLayers uint32 `json:"n_layers"`
  15. HiddenSize uint32 `json:"hidden_size"`
  16. NEmbd uint32 `json:"n_embd"`
  17. IntermediateSize uint32 `json:"intermediate_size"`
  18. NumAttentionHeads uint32 `json:"num_attention_heads"`
  19. NHead uint32 `json:"n_head"`
  20. NumKeyValueHeads uint32 `json:"num_key_value_heads"`
  21. NHeadKV uint32 `json:"n_head_kv"`
  22. RopeTheta float32 `json:"rope_theta"`
  23. RopeScaling struct {
  24. Type string `json:"type"`
  25. LongFactor ropeFactor `json:"long_factor"`
  26. ShortFactor ropeFactor `json:"short_factor"`
  27. } `json:"rope_scaling"`
  28. RMSNormEPS float32 `json:"rms_norm_eps"`
  29. NPositions uint32 `json:"n_positions"`
  30. MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
  31. OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
  32. SlidingWindow uint32 `json:"sliding_window"`
  33. }
  34. var _ Converter = (*phi3)(nil)
  35. func (p *phi3) KV(t *Tokenizer) llm.KV {
  36. kv := p.Parameters.KV(t)
  37. kv["general.architecture"] = "phi3"
  38. kv["general.name"] = "phi3"
  39. kv["phi3.context_length"] = p.MaxPositionEmbeddings
  40. kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
  41. kv["phi3.feed_forward_length"] = p.IntermediateSize
  42. kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
  43. kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
  44. kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
  45. kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
  46. kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
  47. kv["phi3.rope.freq_base"] = p.RopeTheta
  48. kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
  49. kv["phi3.attention.sliding_window"] = p.SlidingWindow
  50. scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
  51. switch p.RopeScaling.Type {
  52. case "":
  53. // no scaling
  54. case "su", "longrope":
  55. kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
  56. case "yarn":
  57. kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
  58. default:
  59. panic("unknown rope scaling type")
  60. }
  61. return kv
  62. }
  63. func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
  64. var addRopeFactors sync.Once
  65. out := make([]llm.Tensor, 0, len(ts)+2)
  66. for _, t := range ts {
  67. if strings.HasPrefix(t.Name(), "blk.0.") {
  68. addRopeFactors.Do(func() {
  69. out = append(out, llm.Tensor{
  70. Name: "rope_factors_long.weight",
  71. Kind: 0,
  72. Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
  73. WriterTo: p.RopeScaling.LongFactor,
  74. }, llm.Tensor{
  75. Name: "rope_factors_short.weight",
  76. Kind: 0,
  77. Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
  78. WriterTo: p.RopeScaling.ShortFactor,
  79. })
  80. })
  81. }
  82. out = append(out, llm.Tensor{
  83. Name: t.Name(),
  84. Kind: t.Kind(),
  85. Shape: t.Shape(),
  86. WriterTo: t,
  87. })
  88. }
  89. return out
  90. }
  91. func (p *phi3) Replacements() []string {
  92. return []string{
  93. "lm_head", "output",
  94. "model.embed_tokens", "token_embd",
  95. "model.norm", "output_norm",
  96. "model.layers", "blk",
  97. "input_layernorm", "attn_norm",
  98. "self_attn.qkv_proj", "attn_qkv",
  99. "self_attn.o_proj", "attn_output",
  100. "mlp.down_proj", "ffn_down",
  101. "mlp.gate_up_proj", "ffn_up",
  102. "post_attention_layernorm", "ffn_norm",
  103. }
  104. }
  105. type ropeFactor []float32
  106. func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
  107. err := binary.Write(w, binary.LittleEndian, r)
  108. return 0, err
  109. }