浏览代码

Handle models with divergent layer sizes

The recent refactoring of the memory prediction assumed all layers
are the same size, but for some models (like deepseek-coder-v2) this
is not the case, so our predictions were significantly off.
Daniel Hiltgen 10 月之前
父节点
当前提交
359b15a597
共有 1 个文件被更改,包括 6 次插入0 次删除
  1. 6 0
      llm/memory.go

+ 6 - 0
llm/memory.go

@@ -1,6 +1,7 @@
 package llm
 package llm
 
 
 import (
 import (
+	"fmt"
 	"log/slog"
 	"log/slog"
 	"strconv"
 	"strconv"
 	"strings"
 	"strings"
@@ -179,6 +180,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 
 
 	// For all the layers, find where they can fit on the GPU(s)
 	// For all the layers, find where they can fit on the GPU(s)
 	for i := range int(ggml.KV().BlockCount()) {
 	for i := range int(ggml.KV().BlockCount()) {
+		// Some models have inconsistent layer sizes
+		if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
+			layerSize = blk.size()
+			layerSize += kv / ggml.KV().BlockCount()
+		}
 		memoryWeights += layerSize
 		memoryWeights += layerSize
 
 
 		if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
 		if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {