|
@@ -5,7 +5,6 @@ import (
|
|
"log/slog"
|
|
"log/slog"
|
|
"os"
|
|
"os"
|
|
"strconv"
|
|
"strconv"
|
|
- "strings"
|
|
|
|
|
|
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/format"
|
|
"github.com/ollama/ollama/format"
|
|
@@ -100,8 +99,22 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|
return 0, 0
|
|
return 0, 0
|
|
}
|
|
}
|
|
|
|
|
|
- var layerCount int
|
|
|
|
layers := ggml.Tensors().Layers()
|
|
layers := ggml.Tensors().Layers()
|
|
|
|
+
|
|
|
|
+ var memoryLayerOutput uint64
|
|
|
|
+ for k, v := range layers {
|
|
|
|
+ if k == "output" || k == "output_norm" {
|
|
|
|
+ memoryLayerOutput += v.size()
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if gpus[0].Library == "metal" && opts.UseMMap {
|
|
|
|
+ // memory is preallocated for output tensors
|
|
|
|
+ memoryRequiredTotal += memoryLayerOutput
|
|
|
|
+ memoryRequiredPartial += memoryLayerOutput
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ var layerCount int
|
|
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
|
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
|
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
|
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
|
|
|
|
|
@@ -115,15 +128,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- var memoryLayerOutput uint64
|
|
|
|
- for k, v := range layers {
|
|
|
|
- if !strings.HasPrefix(k, "blk.") {
|
|
|
|
- memoryLayerOutput += v.size()
|
|
|
|
- }
|
|
|
|
|
|
+ if gpus[0].Library != "metal" || !opts.UseMMap {
|
|
|
|
+ // memory was not preallocated for output tensors
|
|
|
|
+ memoryRequiredTotal += memoryLayerOutput
|
|
}
|
|
}
|
|
|
|
|
|
- memoryRequiredTotal += memoryLayerOutput
|
|
|
|
-
|
|
|
|
if memoryAvailable > memoryRequiredTotal {
|
|
if memoryAvailable > memoryRequiredTotal {
|
|
layerCount = int(ggml.KV().BlockCount()) + 1
|
|
layerCount = int(ggml.KV().BlockCount()) + 1
|
|
memoryRequiredPartial = memoryRequiredTotal
|
|
memoryRequiredPartial = memoryRequiredTotal
|