Pārlūkot izejas kodu

model: validate left and right pairs before merging them

jmorganca 1 mēnesi atpakaļ
vecāks
revīzija
20e3593863
1 mainītis faili ar 4 papildinājumiem un 0 dzēšanām
  1. 4 0
      model/process_text_spm.go

+ 4 - 0
model/process_text_spm.go

@@ -169,6 +169,10 @@ func (spm SentencePieceModel) Encode(s string, addSpecial bool) ([]int32, error)
 					continue
 				}
 
+				if id := spm.vocab.Encode(string(left.runes) + string(right.runes)); id < 0 {
+					continue
+				}
+
 				merges[pair.a].runes = append(left.runes, right.runes...)
 				merges[pair.b].runes = nil
 				merges[pair.a].n = right.n