|
@@ -3,6 +3,7 @@ package convert
|
|
|
import (
|
|
|
"cmp"
|
|
|
"fmt"
|
|
|
+ "math"
|
|
|
"strings"
|
|
|
|
|
|
"github.com/pdevine/tensor"
|
|
@@ -27,8 +28,14 @@ type llama struct {
|
|
|
NumKeyValueHeads uint32 `json:"num_key_value_heads"`
|
|
|
RopeTheta float32 `json:"rope_theta"`
|
|
|
RopeScaling struct {
|
|
|
- Type string `json:"type"`
|
|
|
- Factor float32 `json:"factor"`
|
|
|
+ Type string `json:"type"`
|
|
|
+ RopeType string `json:"rope_type"`
|
|
|
+ Factor float32 `json:"factor"`
|
|
|
+ LowFrequencyFactor float32 `json:"low_freq_factor"`
|
|
|
+ HighFrequencyFactor float32 `json:"high_freq_factor"`
|
|
|
+ OriginalMaxPositionalEmbeddings uint32 `json:"original_max_positional_embeddings"`
|
|
|
+
|
|
|
+ factors ropeFactor
|
|
|
} `json:"rope_scaling"`
|
|
|
RMSNormEPS float32 `json:"rms_norm_eps"`
|
|
|
LayerNormEPS float32 `json:"layer_norm_eps"`
|
|
@@ -42,7 +49,6 @@ var _ Converter = (*llama)(nil)
|
|
|
func (p *llama) KV(t *Tokenizer) llm.KV {
|
|
|
kv := p.Parameters.KV(t)
|
|
|
kv["general.architecture"] = "llama"
|
|
|
- kv["general.name"] = "llama"
|
|
|
kv["llama.vocab_size"] = p.VocabSize
|
|
|
|
|
|
kv["llama.block_count"] = cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
|
|
@@ -71,6 +77,27 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
|
|
|
if p.RopeScaling.Type == "linear" {
|
|
|
kv["llama.rope.scaling.type"] = p.RopeScaling.Type
|
|
|
kv["llama.rope.scaling.factor"] = p.RopeScaling.Factor
|
|
|
+ } else if p.RopeScaling.RopeType == "llama3" {
|
|
|
+ dim := p.HiddenSize / p.NumAttentionHeads
|
|
|
+ for i := uint32(0); i < dim; i += 2 {
|
|
|
+ factor := cmp.Or(p.RopeScaling.Factor, 8.0)
|
|
|
+ factorLow := cmp.Or(p.RopeScaling.LowFrequencyFactor, 1.0)
|
|
|
+ factorHigh := cmp.Or(p.RopeScaling.HighFrequencyFactor, 4.0)
|
|
|
+
|
|
|
+ original := cmp.Or(p.RopeScaling.OriginalMaxPositionalEmbeddings, 8192)
|
|
|
+ lambdaLow := float32(original) / factorLow
|
|
|
+ lambdaHigh := float32(original) / factorHigh
|
|
|
+
|
|
|
+ lambda := 2 * math.Pi * math.Pow(float64(p.RopeTheta), float64(i)/float64(dim))
|
|
|
+ if lambda < float64(lambdaHigh) {
|
|
|
+ p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0)
|
|
|
+ } else if lambda > float64(lambdaLow) {
|
|
|
+ p.RopeScaling.factors = append(p.RopeScaling.factors, factor)
|
|
|
+ } else {
|
|
|
+ smooth := (float32(original)/float32(lambda) - factorLow) / (factorHigh - factorLow)
|
|
|
+ p.RopeScaling.factors = append(p.RopeScaling.factors, 1.0/((1-smooth)/factor+smooth))
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
if p.NumKeyValueHeads > 0 {
|
|
@@ -95,6 +122,16 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
|
|
|
|
|
|
func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
|
|
|
var out []llm.Tensor
|
|
|
+
|
|
|
+ if p.RopeScaling.factors != nil {
|
|
|
+ out = append(out, llm.Tensor{
|
|
|
+ Name: "rope_freqs.weight",
|
|
|
+ Kind: 0,
|
|
|
+ Shape: []uint64{uint64(len(p.RopeScaling.factors))},
|
|
|
+ WriterTo: p.RopeScaling.factors,
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
for _, t := range ts {
|
|
|
if strings.HasSuffix(t.Name(), "attn_q.weight") ||
|
|
|
strings.HasSuffix(t.Name(), "attn_k.weight") {
|