8 miesięcy temu · e7254617e3
--- a/main.go
+++ b/main.go
@@ -6,8 +6,15 @@ import (
 
															 	"github.com/spf13/cobra"
														
 
															 	"github.com/ollama/ollama/cmd"
														
 
															+
														
 
															+	"net/http"
														
 
															+	_ "net/http/pprof"
														
 
															 )
														
 
															 func main() {
														
 
															+	go func() {
														
 
															+		http.ListenAndServe("localhost:6060", nil)
														
 
															+	}()
														
 
															+
														
 
															 	cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
														
 
															 }
														
--- a/server/images.go
+++ b/server/images.go
@@ -21,6 +21,7 @@ import (
 
															 	"slices"
														
 
															 	"strconv"
														
 
															 	"strings"
														
 
															+	"sync"
														
 
															 	"github.com/ollama/ollama/api"
														
 
															 	"github.com/ollama/ollama/auth"
														
@@ -209,13 +210,25 @@ type RootFS struct {
 
															 	DiffIDs []string `json:"diff_ids"`
														
 
															 }
														
 
															+var manifestCache struct {
														
 
															+	sync.Mutex
														
 
															+	cache map[string]*Manifest
														
 
															+}
														
 
															+
														
 
															 func GetManifest(mp ModelPath) (*Manifest, string, error) {
														
 
															-	fp, err := mp.GetManifestPath()
														
 
															-	if err != nil {
														
 
															-		return nil, "", err
														
 
															+	manifestCache.Lock()
														
 
															+	defer manifestCache.Unlock()
														
 
															+
														
 
															+	if manifestCache.cache == nil {
														
 
															+		manifestCache.cache = make(map[string]*Manifest)
														
 
															 	}
														
 
															-	if _, err = os.Stat(fp); err != nil {
														
 
															+	if manifest, ok := manifestCache.cache[mp.GetFullTagname()]; ok {
														
 
															+		return manifest, "", nil
														
 
															+	}
														
 
															+
														
 
															+	fp, err := mp.GetManifestPath()
														
 
															+	if err != nil {
														
 
															 		return nil, "", err
														
 
															 	}
														
@@ -233,6 +246,8 @@ func GetManifest(mp ModelPath) (*Manifest, string, error) {
 
															 		return nil, "", err
														
 
															 	}
														
 
															+	manifestCache.cache[mp.GetFullTagname()] = manifest
														
 
															+
														
 
															 	return manifest, shaStr, nil
														
 
															 }
														
--- a/server/routes.go
+++ b/server/routes.go
@@ -18,6 +18,7 @@ import (
 
															 	"path/filepath"
														
 
															 	"slices"
														
 
															 	"strings"
														
 
															+	"sync"
														
 
															 	"syscall"
														
 
															 	"time"
														
@@ -42,6 +43,9 @@ var mode string = gin.DebugMode
 
															 type Server struct {
														
 
															 	addr  net.Addr
														
 
															 	sched *Scheduler
														
 
															+
														
 
															+	mu                  sync.Mutex
														
 
															+	contextLengthLookup map[string]int
														
 
															 }
														
 
															 func init() {
														
@@ -343,11 +347,24 @@ func (s *Server) EmbedHandler(c *gin.Context) {
 
															 		return
														
 
															 	}
														
 
															-	kvData, err := getKVData(m.ModelPath, false)
														
 
															-	if err != nil {
														
 
															-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
														
 
															-		return
														
 
															-	}
														
 
															+	contextLength, err := func() (int, error) {
														
 
															+		s.mu.Lock()
														
 
															+		defer s.mu.Unlock()
														
 
															+		if s.contextLengthLookup == nil {
														
 
															+			s.contextLengthLookup = make(map[string]int)
														
 
															+		}
														
 
															+		contextLength, ok := s.contextLengthLookup[m.ModelPath]
														
 
															+		if !ok {
														
 
															+			kvData, err := getKVData(m.ModelPath, false)
														
 
															+			if err != nil {
														
 
															+				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
														
 
															+				return 0, err
														
 
															+			}
														
 
															+			contextLength = int(kvData.ContextLength())
														
 
															+			s.contextLengthLookup[m.ModelPath] = int(kvData.ContextLength())
														
 
															+		}
														
 
															+		return contextLength, nil
														
 
															+	}()
														
 
															 	var count int
														
 
															 	for i, s := range input {
														
@@ -357,7 +374,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
 
															 			return
														
 
															 		}
														
 
															-		ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
														
 
															+		ctxLen := min(opts.NumCtx, int(contextLength))
														
 
															 		if len(tokens) > ctxLen {
														
 
															 			if !truncate {
														
 
															 				c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
														
--- a/server/sched.go
+++ b/server/sched.go
@@ -66,7 +66,7 @@ func InitScheduler(ctx context.Context) *Scheduler {
 
															 		pendingReqCh:  make(chan *LlmRequest, maxQueue),
														
 
															 		finishedReqCh: make(chan *LlmRequest, maxQueue),
														
 
															 		expiredCh:     make(chan *runnerRef, maxQueue),
														
 
															-		unloadedCh:    make(chan interface{}, maxQueue),
														
 
															+		unloadedCh:    make(chan any, maxQueue),
														
 
															 		loaded:        make(map[string]*runnerRef),
														
 
															 		newServerFn:   llm.NewLlamaServer,
														
 
															 		getGpuFn:      gpu.GetGPUInfo,