1 rok temu · da74384a3e
--- a/llama/binding/binding.cpp
+++ b/llama/binding/binding.cpp
@@ -24,7 +24,7 @@
 
				 #include <windows.h>
			
 
				 #endif
			
 
				 
			
 
				-#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) ||          \
			
 
				+#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || \
			
 
				     defined(_WIN32)
			
 
				 void sigint_handler(int signo) {
			
 
				   if (signo == SIGINT) {
			
@@ -573,15 +573,13 @@ void *llama_allocate_params(
 
				     const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
			
 
				     float frequency_penalty, float presence_penalty, int mirostat,
			
 
				     float mirostat_eta, float mirostat_tau, bool penalize_nl,
			
 
				-    const char *logit_bias, const char *session_file, bool prompt_cache_all,
			
 
				-    bool mlock, bool mmap, const char *maingpu, const char *tensorsplit,
			
 
				-    bool prompt_cache_ro) {
			
 
				+    const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
			
 
				+    const char *tensorsplit) {
			
 
				   gpt_params *params = new gpt_params;
			
 
				   params->seed = seed;
			
 
				   params->n_threads = threads;
			
 
				   params->n_predict = tokens;
			
 
				   params->repeat_last_n = repeat_last_n;
			
 
				-  params->prompt_cache_ro = prompt_cache_ro;
			
 
				   params->top_k = top_k;
			
 
				   params->top_p = top_p;
			
 
				   params->memory_f16 = memory_f16;
			
@@ -612,9 +610,6 @@ void *llama_allocate_params(
 
				     }
			
 
				   }
			
 
				 
			
 
				-  params->prompt_cache_all = prompt_cache_all;
			
 
				-  params->path_prompt_cache = session_file;
			
 
				-
			
 
				   if (ignore_eos) {
			
 
				     params->logit_bias[llama_token_eos()] = -INFINITY;
			
 
				   }
			
--- a/llama/binding/binding.h
+++ b/llama/binding/binding.h
@@ -31,9 +31,8 @@ void *llama_allocate_params(
 
				     const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
			
 
				     float frequency_penalty, float presence_penalty, int mirostat,
			
 
				     float mirostat_eta, float mirostat_tau, bool penalize_nl,
			
 
				-    const char *logit_bias, const char *session_file, bool prompt_cache_all,
			
 
				-    bool mlock, bool mmap, const char *maingpu, const char *tensorsplit,
			
 
				-    bool prompt_cache_ro);
			
 
				+    const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
			
 
				+    const char *tensorsplit);
			
 
				 
			
 
				 void llama_free_params(void *params_ptr);
			
 
				 
			
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -28,6 +28,7 @@ package llama
 
				 // #include "binding/binding.h"
			
 
				 // #include <stdlib.h>
			
 
				 import "C"
			
 
				+
			
 
				 import (
			
 
				 	"fmt"
			
 
				 	"strings"
			
@@ -69,7 +70,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
 
				 		po.Tokens = 99999999
			
 
				 	}
			
 
				 	defer C.free(unsafe.Pointer(input))
			
 
				-	
			
 
				+
			
 
				 	reverseCount := len(po.StopPrompts)
			
 
				 	reversePrompt := make([]*C.char, reverseCount)
			
 
				 	var pass **C.char
			
@@ -86,9 +87,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
 
				 		C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
			
 
				 		C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
			
 
				 		C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), C.CString(po.LogitBias),
			
 
				-		C.CString(po.PathPromptCache), C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap),
			
 
				-		C.CString(po.MainGPU), C.CString(po.TensorSplit),
			
 
				-		C.bool(po.PromptCacheRO),
			
 
				+		C.bool(po.MLock), C.bool(po.MMap), C.CString(po.MainGPU), C.CString(po.TensorSplit),
			
 
				 	)
			
 
				 	defer C.llama_free_params(params)
			
 
				 
			
@@ -128,9 +127,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
 
				 	cLogitBias := C.CString(po.LogitBias)
			
 
				 	defer C.free(unsafe.Pointer(cLogitBias))
			
 
				 
			
 
				-	cPathPromptCache := C.CString(po.PathPromptCache)
			
 
				-	defer C.free(unsafe.Pointer(cPathPromptCache))
			
 
				-
			
 
				 	cMainGPU := C.CString(po.MainGPU)
			
 
				 	defer C.free(unsafe.Pointer(cMainGPU))
			
 
				 
			
@@ -143,9 +139,7 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
 
				 		C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
			
 
				 		C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
			
 
				 		C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), cLogitBias,
			
 
				-		cPathPromptCache, C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap),
			
 
				-		cMainGPU, cTensorSplit,
			
 
				-		C.bool(po.PromptCacheRO),
			
 
				+		C.bool(po.MLock), C.bool(po.MMap), cMainGPU, cTensorSplit,
			
 
				 	)
			
 
				 	defer C.llama_free_params(params)
			
 
				 
			
--- a/llama/options.go
+++ b/llama/options.go
@@ -57,11 +57,9 @@ type PredictOptions struct {
 
				 	LogitBias         string
			
 
				 	TokenCallback     func(string) bool
			
 
				 
			
 
				-	PathPromptCache             string
			
 
				-	MLock, MMap, PromptCacheAll bool
			
 
				-	PromptCacheRO               bool
			
 
				-	MainGPU                     string
			
 
				-	TensorSplit                 string
			
 
				+	MLock, MMap bool
			
 
				+	MainGPU     string
			
 
				+	TensorSplit string
			
 
				 }
			
 
				 
			
 
				 type PredictOption func(p *PredictOptions)
			
@@ -182,14 +180,6 @@ var Debug PredictOption = func(p *PredictOptions) {
 
				 	p.DebugMode = true
			
 
				 }
			
 
				 
			
 
				-var EnablePromptCacheAll PredictOption = func(p *PredictOptions) {
			
 
				-	p.PromptCacheAll = true
			
 
				-}
			
 
				-
			
 
				-var EnablePromptCacheRO PredictOption = func(p *PredictOptions) {
			
 
				-	p.PromptCacheRO = true
			
 
				-}
			
 
				-
			
 
				 var EnableMLock ModelOption = func(p *ModelOptions) {
			
 
				 	p.MLock = true
			
 
				 }
			
@@ -284,13 +274,6 @@ func SetTemperature(temp float64) PredictOption {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// SetPathPromptCache sets the session file to store the prompt cache.
			
 
				-func SetPathPromptCache(f string) PredictOption {
			
 
				-	return func(p *PredictOptions) {
			
 
				-		p.PathPromptCache = f
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 // SetPenalty sets the repetition penalty for text generation.
			
 
				 func SetPenalty(penalty float64) PredictOption {
			
 
				 	return func(p *PredictOptions) {