11 ヶ月前 · c895a7d13f
--- a/.golangci.yaml
+++ b/.golangci.yaml
@@ -14,4 +14,6 @@ linters:
 
				     # - goimports
			
 
				     - misspell
			
 
				     - nilerr
			
 
				+    - nolintlint
			
 
				+    - nosprintfhostport
			
 
				     - unused
			
--- a/api/types.go
+++ b/api/types.go
@@ -306,7 +306,7 @@ type GenerateResponse struct {
 
				 	// Model is the model name that generated the response.
			
 
				 	Model string `json:"model"`
			
 
				 
			
 
				-	//CreatedAt is the timestamp of the response.
			
 
				+	// CreatedAt is the timestamp of the response.
			
 
				 	CreatedAt time.Time `json:"created_at"`
			
 
				 
			
 
				 	// Response is the textual response itself.
			
--- a/convert/llama.go
+++ b/convert/llama.go
@@ -119,11 +119,12 @@ func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([
 
				 	}
			
 
				 
			
 
				 	var heads int
			
 
				-	if strings.HasSuffix(name, "attn_q.weight") {
			
 
				+	switch {
			
 
				+	case strings.HasSuffix(name, "attn_q.weight"):
			
 
				 		heads = params.AttentionHeads
			
 
				-	} else if strings.HasSuffix(name, "attn_k.weight") {
			
 
				+	case strings.HasSuffix(name, "attn_k.weight"):
			
 
				 		heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
			
 
				-	} else {
			
 
				+	default:
			
 
				 		return nil, fmt.Errorf("unknown tensor name: %s", name)
			
 
				 	}
			
 
				 
			
--- a/convert/safetensors.go
+++ b/convert/safetensors.go
@@ -120,7 +120,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
 
				 			Name:   name,
			
 
				 			Kind:   kind,
			
 
				 			Offset: offset,
			
 
				-			Shape:  shape[:],
			
 
				+			Shape:  shape,
			
 
				 		}
			
 
				 
			
 
				 		t.WriterTo = safetensorWriterTo{
			
--- a/convert/tokenizer.go
+++ b/convert/tokenizer.go
@@ -85,11 +85,8 @@ func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, e
 
				 
			
 
				 	sha256sum := sha256.New()
			
 
				 	for _, pt := range t.PreTokenizer.PreTokenizers {
			
 
				-		switch pt.Type {
			
 
				-		case "Split":
			
 
				-			if pt.Pattern.Regex != "" {
			
 
				-				sha256sum.Write([]byte(pt.Pattern.Regex))
			
 
				-			}
			
 
				+		if pt.Type == "Split" && pt.Pattern.Regex != "" {
			
 
				+			sha256sum.Write([]byte(pt.Pattern.Regex))
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/convert/torch.go
+++ b/convert/torch.go
@@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
 
				 				Name:   ggufName,
			
 
				 				Kind:   kind,
			
 
				 				Offset: offset, // calculate the offset
			
 
				-				Shape:  shape[:],
			
 
				+				Shape:  shape,
			
 
				 			}
			
 
				 
			
 
				 			tensor.WriterTo = torchWriterTo{
			
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -127,7 +127,7 @@ func LoadConfig() {
 
				 		var paths []string
			
 
				 		for _, root := range []string{filepath.Dir(appExe), cwd} {
			
 
				 			paths = append(paths,
			
 
				-				filepath.Join(root),
			
 
				+				root,
			
 
				 				filepath.Join(root, "windows-"+runtime.GOARCH),
			
 
				 				filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
			
 
				 			)
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 		var layers int
			
 
				 		layers, estimatedVRAM, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
			
 
				 
			
 
				-		if gpus[0].Library == "metal" && estimatedVRAM > systemMemory {
			
 
				+		switch {
			
 
				+		case gpus[0].Library == "metal" && estimatedVRAM > systemMemory:
			
 
				 			// disable partial offloading when model is greater than total system memory as this
			
 
				 			// can lead to locking up the system
			
 
				 			opts.NumGPU = 0
			
 
				-		} else if gpus[0].Library != "metal" && layers == 0 {
			
 
				+		case gpus[0].Library != "metal" && layers == 0:
			
 
				 			// Don't bother loading into the GPU if no layers can fit
			
 
				 			cpuRunner = serverForCpu()
			
 
				 			gpuCount = 0
			
 
				-		} else if opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu" {
			
 
				+		case opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu":
			
 
				 			opts.NumGPU = layers
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	// Loop through potential servers
			
 
				-	finalErr := fmt.Errorf("no suitable llama servers found")
			
 
				+	finalErr := errors.New("no suitable llama servers found")
			
 
				 
			
 
				 	if len(adapters) > 1 {
			
 
				 		return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
			
@@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 
			
 
				 		server := filepath.Join(dir, "ollama_llama_server")
			
 
				 		if runtime.GOOS == "windows" {
			
 
				-			server = server + ".exe"
			
 
				+			server += ".exe"
			
 
				 		}
			
 
				 
			
 
				 		// Detect tmp cleaners wiping out the file
			
@@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
 
				 	resp, err := http.DefaultClient.Do(req)
			
 
				 	if err != nil {
			
 
				 		if errors.Is(err, context.DeadlineExceeded) {
			
 
				-			return ServerStatusNotResponding, fmt.Errorf("server not responding")
			
 
				+			return ServerStatusNotResponding, errors.New("server not responding")
			
 
				 		}
			
 
				 		return ServerStatusError, fmt.Errorf("health resp: %w", err)
			
 
				 	}
			
--- a/server/sched.go
+++ b/server/sched.go
@@ -66,7 +66,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
 
				 		opts.NumCtx = 4
			
 
				 	}
			
 
				 
			
 
				-	opts.NumCtx = opts.NumCtx * envconfig.NumParallel
			
 
				+	opts.NumCtx *= envconfig.NumParallel
			
 
				 
			
 
				 	req := &LlmRequest{
			
 
				 		ctx:             c,
			
--- a/types/model/name_test.go
+++ b/types/model/name_test.go
@@ -325,7 +325,7 @@ func TestParseNameFromFilepath(t *testing.T) {
 
				 		filepath.Join("host:port", "namespace", "model", "tag"): {Host: "host:port", Namespace: "namespace", Model: "model", Tag: "tag"},
			
 
				 		filepath.Join("namespace", "model", "tag"):              {},
			
 
				 		filepath.Join("model", "tag"):                           {},
			
 
				-		filepath.Join("model"):                                  {},
			
 
				+		"model":                                                 {},
			
 
				 		filepath.Join("..", "..", "model", "tag"):               {},
			
 
				 		filepath.Join("", "namespace", ".", "tag"):              {},
			
 
				 		filepath.Join(".", ".", ".", "."):                       {},