|
@@ -209,7 +209,7 @@ func TestLlama(t *testing.T) {
|
|
|
})
|
|
|
}
|
|
|
|
|
|
-func Benchmark(b *testing.B) {
|
|
|
+func BenchmarkBytePairEncoding(b *testing.B) {
|
|
|
tokenizer := llama(b)
|
|
|
bts, err := os.ReadFile(filepath.Join("testdata", "war-and-peace.txt"))
|
|
|
if err != nil {
|
|
@@ -243,5 +243,12 @@ func Benchmark(b *testing.B) {
|
|
|
}
|
|
|
}
|
|
|
})
|
|
|
+
|
|
|
+ b.Run("split"+strconv.Itoa(n), func(b *testing.B) {
|
|
|
+ b.ResetTimer()
|
|
|
+ for range b.N {
|
|
|
+ slices.Collect(tokenizer.split(string(bts)))
|
|
|
+ }
|
|
|
+ })
|
|
|
}
|
|
|
}
|