Prechádzať zdrojové kódy

support new "longrope" attention factor

Bruce MacDonald 10 mesiacov pred
rodič
commit
aec77d6a05
1 zmenil súbory, kde vykonal 1 pridanie a 1 odobranie
  1. 1 1
      convert/convert_phi3.go

+ 1 - 1
convert/convert_phi3.go

@@ -58,7 +58,7 @@ func (p *phi3) KV(t *Tokenizer) llm.KV {
 	switch p.RopeScaling.Type {
 	case "":
 		// no scaling
-	case "su":
+	case "su", "longrope":
 		kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
 	case "yarn":
 		kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))