瀏覽代碼

temporary work around for converting spm

Patrick Devine 1 月之前
父節點
當前提交
631fecc6d9
共有 1 個文件被更改,包括 6 次插入0 次删除
  1. 6 0
      convert/tokenizer_spm.go

+ 6 - 0
convert/tokenizer_spm.go

@@ -47,6 +47,12 @@ func parseSentencePiece(fsys fs.FS) (*Vocabulary, error) {
 			v.Types = append(v.Types, int32(t))
 		default:
 			tt := int32(sentencepiece.ModelProto_SentencePiece_NORMAL)
+
+			// temporary fix to handle gemma3 broken configs
+			if slices.Contains([]string{"<end_of_turn>", "<start_of_turn>"}, piece.GetPiece()) {
+				tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)
+			}
+
 			for _, t := range ast {
 				if t.Content == piece.GetPiece() {
 					tt = int32(sentencepiece.ModelProto_SentencePiece_CONTROL)