Browse Source

Merge branch 'ollama:main' into arm64static

Jeremy 1 year ago
parent
commit
fd048f1367
5 changed files with 25 additions and 9 deletions
  1. 6 0
      llm/ggml.go
  2. 13 8
      llm/gguf.go
  3. 0 1
      llm/server.go
  4. 5 0
      types/model/name.go
  5. 1 0
      types/model/name_test.go

+ 6 - 0
llm/ggml.go

@@ -381,6 +381,12 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
 		)
 		)
 
 
 		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
 		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
+	case "stablelm":
+		fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
+		partialOffload = max(
+			4*batch*(vocab+2*embedding),
+			fullOffload,
+		)
 	}
 	}
 
 
 	return
 	return

+ 13 - 8
llm/gguf.go

@@ -248,13 +248,17 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 	}
 	}
 
 
 	padding := llm.padding(offset, int64(alignment))
 	padding := llm.padding(offset, int64(alignment))
-	if _, err := rs.Seek(padding-offset, io.SeekCurrent); err != nil {
+	if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
 		return err
 		return err
 	}
 	}
 
 
 	for _, tensor := range llm.tensors {
 	for _, tensor := range llm.tensors {
-		padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
-		if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
+		if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
+			return err
+		}
+
+		padding := llm.padding(int64(tensor.size()), int64(alignment))
+		if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
 			return err
 			return err
 		}
 		}
 	}
 	}
@@ -623,8 +627,9 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 		return err
 		return err
 	}
 	}
 
 
-	padding := llm.padding(offset, 32)
-	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
+	var alignment int64 = 32
+	padding := llm.padding(offset, alignment)
+	if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
 		return err
 		return err
 	}
 	}
 
 
@@ -638,8 +643,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 			return err
 			return err
 		}
 		}
 
 
-		padding := llm.padding(offset, 32)
-		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
+		padding := llm.padding(offset, alignment)
+		if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
 			return err
 			return err
 		}
 		}
 	}
 	}
@@ -648,5 +653,5 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
 }
 }
 
 
 func (gguf) padding(offset, align int64) int64 {
 func (gguf) padding(offset, align int64) int64 {
-	return (offset + align - 1) / align * align
+	return (align - offset%align) % align
 }
 }

+ 0 - 1
llm/server.go

@@ -112,7 +112,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 	var memoryLayerOutput uint64
 	var memoryLayerOutput uint64
 	for k, v := range layers {
 	for k, v := range layers {
 		if !strings.HasPrefix(k, "blk.") {
 		if !strings.HasPrefix(k, "blk.") {
-			slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size()))
 			memoryLayerOutput += v.size()
 			memoryLayerOutput += v.size()
 		}
 		}
 	}
 	}

+ 5 - 0
types/model/name.go

@@ -521,6 +521,8 @@ func parts(s string) iter_Seq2[PartKind, string] {
 						return
 						return
 					}
 					}
 					state, j, partLen = PartModel, i, 0
 					state, j, partLen = PartModel, i, 0
+				case PartHost:
+					// noop: support for host:port
 				default:
 				default:
 					yield(PartExtraneous, s[i+1:j])
 					yield(PartExtraneous, s[i+1:j])
 					return
 					return
@@ -678,6 +680,9 @@ func isValidByteFor(kind PartKind, c byte) bool {
 	if kind == PartNamespace && c == '.' {
 	if kind == PartNamespace && c == '.' {
 		return false
 		return false
 	}
 	}
+	if kind == PartHost && c == ':' {
+		return true
+	}
 	if c == '.' || c == '-' {
 	if c == '.' || c == '-' {
 		return true
 		return true
 	}
 	}

+ 1 - 0
types/model/name_test.go

@@ -40,6 +40,7 @@ var testNames = map[string]fields{
 	"user/model":                     {namespace: "user", model: "model"},
 	"user/model":                     {namespace: "user", model: "model"},
 	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
 	"example.com/ns/mistral:7b+Q4_0": {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "Q4_0"},
 	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
 	"example.com/ns/mistral:7b+X":    {host: "example.com", namespace: "ns", model: "mistral", tag: "7b", build: "X"},
+	"localhost:5000/ns/mistral":      {host: "localhost:5000", namespace: "ns", model: "mistral"},
 
 
 	// invalid digest
 	// invalid digest
 	"mistral:latest@invalid256-": {},
 	"mistral:latest@invalid256-": {},