浏览代码

Merge pull request #5121 from ollama/mxyng/deepseekv2

deepseek v2 graph
Michael Yang 10 月之前
父节点
当前提交
21adf8b6d2
共有 1 个文件被更改,包括 11 次插入0 次删除
  1. 11 0
      llm/ggml.go

+ 11 - 0
llm/ggml.go

@@ -367,6 +367,17 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
 			4*batch*(vocab+2*embedding),
 			4*batch*(vocab+2*embedding),
 			fullOffload,
 			fullOffload,
 		)
 		)
+	case "deepseek2":
+		keys := uint64(llm.KV()["deepseek2.attention.key_length"].(uint32))
+		fullOffload = max(
+			4*batch*(3*embedding+vocab),
+			4*batch*(3*embedding+2+context*(1+headsKV)+2*keys*headsKV),
+		)
+
+		partialOffload = max(
+			4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
+			4*batch*(2*embedding+1+2*keys*headsKV+context+context*headsKV)+4*keys*context*headsKV+embedding*keys*headsKV*9/16,
+		)
 	}
 	}
 
 
 	return
 	return