Procházet zdrojové kódy

llm: Fix array out-of-bounds memory access when tokenizing

tokenize() passes a string length longer than the actual data into
llama_tokenize(). This entire string length gets scanned in the
C++ code despite there being a NULL terminator in the correct
location (because it gets converted into std::string). The result
is read of uninitialized memory, which depending on the contents
of that memory fails the check for partial multi-byte UTF8
characters.

In addition, if there is not enough space in the passed buffer for
token output then llama_tokenize() returns the required space as
a negative number. We should convert this to a positive number
before reallocing.

The first problem results in the following splat:
libc++abi: terminating due to uncaught exception of type std::invalid_argument: failed to convert utf8 to codepoint
SIGABRT: abort
PC=0x193cd55f0 m=11 sigcode=0
signal arrived during cgo execution

goroutine 27 gp=0x14000708700 m=11 mp=0x14000584908 [syscall]:
runtime.cgocall(0x105549e68, 0x140000c6bf8)
	/opt/homebrew/Cellar/go/1.22.5/libexec/src/runtime/cgocall.go:157 +0x44 fp=0x140000c6bc0 sp=0x140000c6b80 pc=0x104b372c4
github.com/ollama/ollama/llm._Cfunc_llama_tokenize(0x15180f400, 0x152009a00, 0x5aa, 0x140002e8800, 0x5aa, 0x1, 0x1)
	_cgo_gotypes.go:270 +0x34 fp=0x140000c6bf0 sp=0x140000c6bc0 pc=0x104ef7664
github.com/ollama/ollama/llm.tokenize.func2(0x140001dd800?, 0x152009a00, 0x5aa, 0x1400012cdc0?)
	/Users/jesse/ollama/llm/llm.go:74 +0x8c fp=0x140000c6c50 sp=0x140000c6bf0 pc=0x104ef83cc
github.com/ollama/ollama/llm.tokenize(0x140003f7da0, {0x140001dd800, 0x5a8})
	/Users/jesse/ollama/llm/llm.go:74 +0xb4 fp=0x140000c6d90 sp=0x140000c6c50 pc=0x104ef7f94
github.com/ollama/ollama/llm.(*llmServer).Tokenize(0x140000c6df8?, {0x105516574?, 0x5a8?}, {0x140001dd800?, 0x140000c6d00?})
	/Users/jesse/ollama/llm/server.go:963 +0x2c fp=0x140000c6dc0 sp=0x140000c6d90 pc=0x104ef6b6c
github.com/ollama/ollama/llm.LlamaServer.Tokenize-fm({0x105e876f0?, 0x140001e5c70?}, {0x140001dd800?, 0x140000350e0?})
	<autogenerated>:1 +0x50 fp=0x140000c6e00 sp=0x140000c6dc0 pc=0x105532fc0
github.com/ollama/ollama/server.chatPrompt({0x105e876f0, 0x140001e5c70}, 0x14000616480, 0x140000c7508, 0x1400013e000, {0x1400014e008, 0x7, 0x7}, {0x0, 0x0, ...})
	/Users/jesse/ollama/server/prompt.go:36 +0x2a0 fp=0x140000c7100 sp=0x140000c6e00 pc=0x1055165a0
github.com/ollama/ollama/server.(*Server).ChatHandler(0x1400000e9c0, 0x1400011c100)
	/Users/jesse/ollama/server/routes.go:1340 +0x478 fp=0x140000c7610 sp=0x140000c7100 pc=0x105523318
github.com/ollama/ollama/server.(*Server).ChatHandler-fm(0x9?)
	<autogenerated>:1 +0x30 fp=0x140000c7630 sp=0x140000c7610 pc=0x105533130
Jesse Gross před 8 měsíci
rodič
revize
1c36f36c41
1 změnil soubory, kde provedl 13 přidání a 6 odebrání
  1. 13 6
      llm/llm.go

+ 13 - 6
llm/llm.go

@@ -69,19 +69,26 @@ func tokenize(model *loadedModel, content string) ([]int, error) {
 	ccontent := C.CString(content)
 	defer C.free(unsafe.Pointer(ccontent))
 
-	len := len(content) + 2
-	tokens := make([]C.int32_t, len)
+	tokenCount := len(content) + 2
+	tokens := make([]C.int32_t, tokenCount)
 
-	tokenCount := C.llama_tokenize(model.model, ccontent, C.int32_t(len), &tokens[0], C.int32_t(len), true, true)
+	tokenCount = int(C.llama_tokenize(model.model, ccontent, C.int32_t(len(content)),
+		&tokens[0], C.int32_t(tokenCount), true, true))
 	if tokenCount < 0 {
+		tokenCount = -tokenCount
 		slog.Info("XXX got negative response", "count", tokenCount)
-		tokens = make([]C.int32_t, int(tokenCount))
-		tokenCount = C.llama_tokenize(model.model, ccontent, C.int32_t(len), &tokens[0], tokenCount, true, true)
+		tokens = make([]C.int32_t, tokenCount)
+		tokenCount = int(C.llama_tokenize(model.model, ccontent, C.int32_t(len(content)), &tokens[0],
+			C.int32_t(tokenCount), true, true))
+
+		if tokenCount < 0 {
+			return nil, fmt.Errorf("failed to tokenize: %d", tokenCount)
+		}
 	} else if tokenCount == 0 {
 		return nil, nil
 	}
 	ret := make([]int, tokenCount)
-	for i := range int(tokenCount) {
+	for i := range tokenCount {
 		ret[i] = int(tokens[i])
 	}
 	slog.Debug("XXX tokenized", "tokens", tokens, "content", content)