Jelajahi Sumber

llama.go: Use dynamic buffer for TokenToPiece

The cgo binding for llama_token_to_piece uses a fixed 12 byte buffer,
which is usually but not always enough to hold a token. This increase
the buffer size if needed, similar to what llama.cpp does internally.
Jesse Gross 8 bulan lalu
induk
melakukan
523d84c563
1 mengubah file dengan 18 tambahan dan 4 penghapusan
  1. 18 4
      llama/llama.go

+ 18 - 4
llama/llama.go

@@ -260,15 +260,29 @@ type Model struct {
 }
 
 func (m *Model) TokenToPiece(token int) string {
-	buf := make([]byte, 12)
-	C.llama_token_to_piece(
+	tokenLen := 12
+	buf := make([]byte, tokenLen)
+	tokenLen = int(C.llama_token_to_piece(
 		m.c,
 		C.int32_t(token),
 		(*C.char)(unsafe.Pointer(&buf[0])),
-		C.int32_t(12),
+		C.int32_t(tokenLen),
 		C.int32_t(0),
 		C.bool(true),
-	)
+	))
+	if tokenLen < 0 {
+		tokenLen = -tokenLen
+
+		buf = make([]byte, tokenLen)
+		C.llama_token_to_piece(
+			m.c,
+			C.int32_t(token),
+			(*C.char)(unsafe.Pointer(&buf[0])),
+			C.int32_t(tokenLen),
+			C.int32_t(0),
+			C.bool(true),
+		)
+	}
 	return strings.TrimRight(string(buf), "\x00")
 }