convert_phi3.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. package convert
  2. import (
  3. "cmp"
  4. "encoding/binary"
  5. "io"
  6. "math"
  7. "strings"
  8. "sync"
  9. "github.com/ollama/ollama/llm"
  10. )
  11. type phi3Model struct {
  12. ModelParameters
  13. NumHiddenLayers uint32 `json:"num_hidden_layers"`
  14. NLayers uint32 `json:"n_layers"`
  15. HiddenSize uint32 `json:"hidden_size"`
  16. NEmbd uint32 `json:"n_embd"`
  17. IntermediateSize uint32 `json:"intermediate_size"`
  18. NumAttentionHeads uint32 `json:"num_attention_heads"`
  19. NHead uint32 `json:"n_head"`
  20. NumKeyValueHeads uint32 `json:"num_key_value_heads"`
  21. NHeadKV uint32 `json:"n_head_kv"`
  22. RopeTheta float32 `json:"rope_theta"`
  23. RopeScaling struct {
  24. Type string `json:"type"`
  25. LongFactor ropeFactor `json:"long_factor"`
  26. ShortFactor ropeFactor `json:"short_factor"`
  27. } `json:"rope_scaling"`
  28. RMSNormEPS float32 `json:"rms_norm_eps"`
  29. NPositions uint32 `json:"n_positions"`
  30. MaxPositionEmbeddings uint32 `json:"max_position_embeddings"`
  31. OriginalMaxPositionEmbeddings uint32 `json:"original_max_position_embeddings"`
  32. SlidingWindow uint32 `json:"sliding_window"`
  33. }
  34. var _ ModelConverter = (*phi3Model)(nil)
  35. func (p *phi3Model) KV(t *Tokenizer) llm.KV {
  36. kv := p.ModelParameters.KV(t)
  37. kv["general.architecture"] = "phi3"
  38. kv["phi3.context_length"] = p.MaxPositionEmbeddings
  39. kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
  40. kv["phi3.feed_forward_length"] = p.IntermediateSize
  41. kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
  42. kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
  43. kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
  44. kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
  45. kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
  46. kv["phi3.rope.freq_base"] = p.RopeTheta
  47. kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
  48. kv["phi3.attention.sliding_window"] = p.SlidingWindow
  49. scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)
  50. switch p.RopeScaling.Type {
  51. case "":
  52. // no scaling
  53. case "su", "longrope":
  54. kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
  55. case "yarn":
  56. kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
  57. default:
  58. panic("unknown rope scaling type")
  59. }
  60. return kv
  61. }
  62. func (p *phi3Model) Tensors(ts []Tensor) []llm.Tensor {
  63. var addRopeFactors sync.Once
  64. out := make([]llm.Tensor, 0, len(ts)+2)
  65. for _, t := range ts {
  66. if strings.HasPrefix(t.Name(), "blk.0.") {
  67. addRopeFactors.Do(func() {
  68. out = append(out, llm.Tensor{
  69. Name: "rope_factors_long.weight",
  70. Kind: 0,
  71. Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
  72. WriterTo: p.RopeScaling.LongFactor,
  73. }, llm.Tensor{
  74. Name: "rope_factors_short.weight",
  75. Kind: 0,
  76. Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
  77. WriterTo: p.RopeScaling.ShortFactor,
  78. })
  79. })
  80. }
  81. out = append(out, llm.Tensor{
  82. Name: t.Name(),
  83. Kind: t.Kind(),
  84. Shape: t.Shape(),
  85. WriterTo: t,
  86. })
  87. }
  88. return out
  89. }
  90. func (p *phi3Model) Replacements() []string {
  91. return []string{
  92. "lm_head", "output",
  93. "model.embed_tokens", "token_embd",
  94. "model.norm", "output_norm",
  95. "model.layers", "blk",
  96. "input_layernorm", "attn_norm",
  97. "self_attn.qkv_proj", "attn_qkv",
  98. "self_attn.o_proj", "attn_output",
  99. "mlp.down_proj", "ffn_down",
  100. "mlp.gate_up_proj", "ffn_up",
  101. "post_attention_layernorm", "ffn_norm",
  102. }
  103. }
  104. type ropeFactor []float32
  105. func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
  106. err := binary.Write(w, binary.LittleEndian, r)
  107. return 0, err
  108. }