浏览代码

ml/backend/ggml: handle user specified cpu offloading

Michael Yang 2 月之前
父节点
当前提交
26c2e0bd35
共有 1 个文件被更改,包括 15 次插入10 次删除
  1. 15 10
      ml/backend/ggml/ggml.go

+ 15 - 10
ml/backend/ggml/ggml.go

@@ -67,7 +67,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
 		"num_key_values", len(meta.KV()),
 		"num_key_values", len(meta.KV()),
 	)
 	)
 
 
-	type dbt struct {
+	type deviceBufferType struct {
 		d   *C.struct_ggml_backend_device
 		d   *C.struct_ggml_backend_device
 		bts []*C.struct_ggml_backend_buffer_type
 		bts []*C.struct_ggml_backend_buffer_type
 	}
 	}
@@ -96,7 +96,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
 	var sum uint64
 	var sum uint64
 	var cumsum []uint64
 	var cumsum []uint64
 
 
-	var gpuBufferTypes []dbt
+	var gpuDeviceBufferTypes []deviceBufferType
 	for _, d := range gpus {
 	for _, d := range gpus {
 		var free, total C.size_t
 		var free, total C.size_t
 		C.ggml_backend_dev_memory(d, &free, &total)
 		C.ggml_backend_dev_memory(d, &free, &total)
@@ -104,7 +104,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
 		cumsum = append(cumsum, sum)
 		cumsum = append(cumsum, sum)
 
 
 		bt := C.ggml_backend_dev_buffer_type(d)
 		bt := C.ggml_backend_dev_buffer_type(d)
-		gpuBufferTypes = append(gpuBufferTypes, dbt{
+		gpuDeviceBufferTypes = append(gpuDeviceBufferTypes, deviceBufferType{
 			d:   d,
 			d:   d,
 			bts: append([]*C.struct_ggml_backend_buffer_type{bt}, cpuBufferTypes...),
 			bts: append([]*C.struct_ggml_backend_buffer_type{bt}, cpuBufferTypes...),
 		})
 		})
@@ -115,7 +115,8 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
 		splits[i] = float64(cumsum[i]) / float64(sum)
 		splits[i] = float64(cumsum[i]) / float64(sum)
 	}
 	}
 
 
-	input := dbt{C.ggml_backend_dev_by_type(C.GGML_BACKEND_DEVICE_TYPE_CPU), cpuBufferTypes}
+	cpuDeviceBufferTypes := deviceBufferType{C.ggml_backend_dev_by_type(C.GGML_BACKEND_DEVICE_TYPE_CPU), cpuBufferTypes}
+	input := cpuDeviceBufferTypes
 
 
 	var blocks int
 	var blocks int
 	for key, value := range meta.KV() {
 	for key, value := range meta.KV() {
@@ -124,18 +125,22 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
 		}
 		}
 	}
 	}
 
 
-	indexFunc := func(i int) func(float64) bool {
-		return func(f float64) bool {
-			return float64(i)/float64(blocks+1) < f
+	assignLayer := func(i int) (temp deviceBufferType) {
+		if i >= params.NumGPULayers {
+			return cpuDeviceBufferTypes
 		}
 		}
+
+		return gpuDeviceBufferTypes[slices.IndexFunc(splits, func(f float64) bool {
+			return float64(i)/float64(blocks+1) < f
+		})]
 	}
 	}
 
 
-	layers := make([]dbt, blocks)
+	layers := make([]deviceBufferType, blocks)
 	for i := range layers {
 	for i := range layers {
-		layers[i] = gpuBufferTypes[slices.IndexFunc(splits, indexFunc(i))]
+		layers[i] = assignLayer(i)
 	}
 	}
 
 
-	output := gpuBufferTypes[slices.IndexFunc(splits, indexFunc(blocks))]
+	output := assignLayer(blocks)
 
 
 	maxTensors := len(meta.Tensors().Items())
 	maxTensors := len(meta.Tensors().Items())
 	maxTensors += 1
 	maxTensors += 1