ml/backend/ggml: create tensor on specific backend

some tensors should be created on specific backends to reduce number of copies and improve performance
2025-11-11 02:47:22 +01:00 · 2025-02-25 16:06:32 -08:00
parent 764e199d67
commit 7bae7fa5ce
6 changed files with 129 additions and 60 deletions
--- a/kvcache/causal.go
+++ b/kvcache/causal.go
@@ -237,13 +237,13 @@ func (c *Causal) buildMask(ctx ml.Context, positions []int32, seqs []int) (ml.Te
 		mask[i] = float32(math.Inf(-1))
 	}

-	maskTensor, err := ctx.FromFloatSlice(mask, length, batchSize)
+	maskTensor, err := ctx.Input().FromFloatSlice(mask, length, batchSize)
 	if err != nil {
 		return nil, err
 	}

 	if c.config.MaskDType != ml.DTypeF32 {
-		out := ctx.Empty(c.config.MaskDType, maskTensor.Shape()...)
+		out := ctx.Input().Empty(c.config.MaskDType, maskTensor.Shape()...)
 		ctx.Forward(maskTensor.Copy(ctx, out))
 		maskTensor = out
 	}
@@ -440,7 +440,7 @@ func (c *Causal) Put(ctx ml.Context, key, value ml.Tensor) {
 	}

 	if _, ok := c.ctxs[c.curLayer]; !ok {
-		c.ctxs[c.curLayer] = c.backend.NewContext()
+		c.ctxs[c.curLayer] = c.backend.NewContextSize(2).Layer(c.curLayer)
 	}

 	if _, ok := c.keys[c.curLayer]; !ok {