8 hónapja · 90d25d3b0a
--- a/llama/runner/runner.go
+++ b/llama/runner/runner.go
@@ -167,6 +167,36 @@ func (s *Server) shiftContext(seqIndex int) {
 
				 	seq.nPast -= numDiscard
			
 
				 }
			
 
				 
			
 
				+func incompleteUnicode(token string) bool {
			
 
				+	incomplete := false
			
 
				+
			
 
				+	// check if there is incomplete UTF-8 character at the end
			
 
				+	for i := 1; i < 5 && i <= len(token); i++ {
			
 
				+		c := token[len(token)-i]
			
 
				+
			
 
				+		if (c & 0xc0) == 0x80 {
			
 
				+			// continuation byte: 10xxxxxx
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		if (c & 0xe0) == 0xc0 {
			
 
				+			// 2-byte character: 110xxxxx ...
			
 
				+			incomplete = i < 2
			
 
				+		} else if (c & 0xf0) == 0xe0 {
			
 
				+			// 3-byte character: 1110xxxx ...
			
 
				+			incomplete = i < 3
			
 
				+		} else if (c & 0xf8) == 0xf0 {
			
 
				+			// 4-byte character: 11110xxx ...
			
 
				+			incomplete = i < 4
			
 
				+		}
			
 
				+
			
 
				+		// else 1-byte character or invalid byte
			
 
				+		break
			
 
				+	}
			
 
				+
			
 
				+	return incomplete
			
 
				+}
			
 
				+
			
 
				 func (s *Server) run(ctx context.Context) {
			
 
				 	// TODO - should this be n_ctx / parallel like the old server.cpp setup?
			
 
				 	batch := llama.NewBatch(s.batchSize, 0, s.parallel)
			
@@ -296,6 +326,11 @@ func (s *Server) run(ctx context.Context) {
 
				 
			
 
				 				pieces[i] = append(pieces[i], piece)
			
 
				 				sequence := strings.Join(pieces[i], "")
			
 
				+
			
 
				+				if incompleteUnicode(sequence) {
			
 
				+					continue
			
 
				+				}
			
 
				+
			
 
				 				if ok, stop := findStop(sequence, seq.stop); ok {
			
 
				 					slog.Info("hit stop token", "stop", seq.stop)