1 miesiąc temu · 033cec232a
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -611,6 +611,14 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 
				 			embeddingLength*numPatches*maxNumTiles +
			
 
				 			9*embeddingLength*numPaddedPatches*maxNumTiles +
			
 
				 			numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
			
 
				+	case "gemma3":
			
 
				+		for name, layer := range llm.Tensors().GroupLayers() {
			
 
				+			if strings.HasPrefix(name, "v.") {
			
 
				+				for _, tensor := range layer {
			
 
				+					weights += tensor.Size()
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				 	return weights, graphSize
			
 
				 }
			
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -218,8 +218,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
 
				 		if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
			
 
				 			layerSize = blk.Size()
			
 
				 			layerSize += kv / f.KV().BlockCount()
			
 
				+			memoryWeights += blk.Size()
			
 
				 		}
			
 
				-		memoryWeights += layerSize
			
 
				 
			
 
				 		if opts.NumGPU >= 0 && layerCount >= opts.NumGPU {
			
 
				 			// Stop allocating on GPU(s) once we hit the users target NumGPU
			
@@ -376,7 +376,7 @@ func (m MemoryEstimate) LogValue() slog.Value {
 
				 				// memory of the weights
			
 
				 				"total", format.HumanBytes2(m.memoryWeights),
			
 
				 				// memory of repeating layers
			
 
				-				"repeating", format.HumanBytes2(m.memoryWeights-m.memoryLayerOutput),
			
 
				+				"repeating", format.HumanBytes2(m.memoryWeights),
			
 
				 				// memory of non-repeating layers
			
 
				 				"nonrepeating", format.HumanBytes2(m.memoryLayerOutput),
			
 
				 			),