浏览代码

Merge pull request #3712 from ollama/mxyng/mem

add stablelm graph calculation
Michael Yang 1 年之前
父节点
当前提交
8645076a71
共有 2 个文件被更改,包括 6 次插入1 次删除
  1. 6 0
      llm/ggml.go
  2. 0 1
      llm/server.go

+ 6 - 0
llm/ggml.go

@@ -381,6 +381,12 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
 		)
 		)
 
 
 		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
 		partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
+	case "stablelm":
+		fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
+		partialOffload = max(
+			4*batch*(vocab+2*embedding),
+			fullOffload,
+		)
 	}
 	}
 
 
 	return
 	return

+ 0 - 1
llm/server.go

@@ -112,7 +112,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 	var memoryLayerOutput uint64
 	var memoryLayerOutput uint64
 	for k, v := range layers {
 	for k, v := range layers {
 		if !strings.HasPrefix(k, "blk.") {
 		if !strings.HasPrefix(k, "blk.") {
-			slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size()))
 			memoryLayerOutput += v.size()
 			memoryLayerOutput += v.size()
 		}
 		}
 	}
 	}