2 months ago · ee141cc821
--- a/kvcache/causal_test.go
+++ b/kvcache/causal_test.go
@@ -309,7 +309,7 @@ func (b *testBackend) SystemInfo() string {
 
				 
			
 
				 type testContext struct{}
			
 
				 
			
 
				-func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
			
 
				+func (c *testContext) Empty(dtype ml.DType, shape ...int) ml.Tensor {
			
 
				 	total := 0
			
 
				 
			
 
				 	if len(shape) > 0 {
			
@@ -322,8 +322,12 @@ func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 
				 	return &testTensor{dtype: dtype, elementSize: 4, data: make([]float32, total), shape: shape}
			
 
				 }
			
 
				 
			
 
				+func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
			
 
				+	return c.Empty(dtype, shape...)
			
 
				+}
			
 
				+
			
 
				 func (c *testContext) FromFloatSlice(s []float32, shape ...int) (ml.Tensor, error) {
			
 
				-	t := c.Zeros(ml.DTypeF32, shape...).(*testTensor)
			
 
				+	t := c.Empty(ml.DTypeF32, shape...).(*testTensor)
			
 
				 
			
 
				 	copy(t.data, s)
			
 
				 
			
@@ -391,7 +395,7 @@ func (t *testTensor) Floats() []float32 {
 
				 }
			
 
				 
			
 
				 func (t *testTensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
			
 
				-	out := ctx.Zeros(t.DType(), t.Shape()...).(*testTensor)
			
 
				+	out := ctx.Empty(t.DType(), t.Shape()...).(*testTensor)
			
 
				 
			
 
				 	for i := range out.data {
			
 
				 		out.data[i] = t.data[i] + t2.(*testTensor).data[i]
			
@@ -468,7 +472,7 @@ func (t *testTensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
 
				 
			
 
				 	context := &testContext{}
			
 
				 
			
 
				-	view := context.Zeros(t.dtype, s...).(*testTensor)
			
 
				+	view := context.Empty(t.dtype, s...).(*testTensor)
			
 
				 	view.data = t.data[offset : offset+len(view.data)]
			
 
				 
			
 
				 	return view
			
--- a/kvcache/encoder.go
+++ b/kvcache/encoder.go
@@ -105,8 +105,8 @@ func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
 
				 	}
			
 
				 
			
 
				 	if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
			
 
				-		c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...)
			
 
				-		c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...)
			
 
				+		c.keys[c.curLayer] = c.cacheCtx.Empty(key.DType(), key.Shape()...)
			
 
				+		c.values[c.curLayer] = c.cacheCtx.Empty(value.DType(), value.Shape()...)
			
 
				 	}
			
 
				 
			
 
				 	ctx.Forward(
			
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -82,6 +82,7 @@ func NewBackend(f *os.File, params BackendParams) (Backend, error) {
 
				 }
			
 
				 
			
 
				 type Context interface {
			
 
				+	Empty(dtype DType, shape ...int) Tensor
			
 
				 	Zeros(dtype DType, shape ...int) Tensor
			
 
				 	FromFloatSlice(s []float32, shape ...int) (Tensor, error)
			
 
				 	FromIntSlice(s []int32, shape ...int) (Tensor, error)
			
@@ -195,7 +196,7 @@ func Dump(ctx Context, t Tensor, opts ...DumpOptions) string {
 
				 			return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
			
 
				 		})
			
 
				 	case DTypeF16:
			
 
				-		f32 := ctx.Zeros(DTypeF32, t.Shape()...)
			
 
				+		f32 := ctx.Empty(DTypeF32, t.Shape()...)
			
 
				 		f32 = t.Copy(ctx, f32)
			
 
				 		return dump[[]float32](ctx, f32, opts[0].Items, func(f float32) string {
			
 
				 			return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
			
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -304,7 +304,7 @@ func shapeToGGML(shape []int) *C.int64_t {
 
				 	return &sh[0]
			
 
				 }
			
 
				 
			
 
				-func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
			
 
				+func newTensor(ctx Context, dtype ml.DType, zero bool, shape []int) ml.Tensor {
			
 
				 	if len(shape) < 1 || len(shape) > 4 {
			
 
				 		panic("unsupported number of dimensions")
			
 
				 	}
			
@@ -318,19 +318,29 @@ func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 
				 	var t *C.struct_ggml_tensor
			
 
				 	switch dtype {
			
 
				 	case ml.DTypeF32:
			
 
				-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape))
			
 
				+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape))
			
 
				 	case ml.DTypeF16:
			
 
				-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape))
			
 
				+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape))
			
 
				 	case ml.DTypeI32:
			
 
				-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape))
			
 
				+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape))
			
 
				 	default:
			
 
				 		panic("unsupported dtype")
			
 
				 	}
			
 
				 
			
 
				-	b := C.ggml_backend_alloc_buffer(c.backend, C.ggml_nbytes(t))
			
 
				+	b := C.ggml_backend_alloc_buffer(ctx.backend, C.ggml_nbytes(t))
			
 
				 	C.ggml_backend_tensor_alloc(b, t, C.ggml_backend_buffer_get_base(b))
			
 
				-	C.ggml_set_zero(t)
			
 
				-	return &Tensor{b: c.b, t: t}
			
 
				+	if zero {
			
 
				+		C.ggml_set_zero(t)
			
 
				+	}
			
 
				+	return &Tensor{b: ctx.b, t: t}
			
 
				+}
			
 
				+
			
 
				+func (c Context) Empty(dtype ml.DType, shape ...int) ml.Tensor {
			
 
				+	return newTensor(c, dtype, false, shape)
			
 
				+}
			
 
				+
			
 
				+func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
			
 
				+	return newTensor(c, dtype, true, shape)
			
 
				 }
			
 
				 
			
 
				 func fromSlice[S ~[]E, E float32 | int32](ctx Context, s S, shape []int, dtype uint32) (ml.Tensor, error) {