encoder.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. package kvcache
  2. import (
  3. "github.com/ollama/ollama/ml"
  4. )
  5. // Encoder cache stores K and V tensors that are position independent
  6. //
  7. // The tensors can be of any shape and will be returned as they were stored
  8. // The mask is currently always nil
  9. //
  10. // Not currently safe for multiple sequences
  11. type EncoderCache struct {
  12. // ** current forward pass **
  13. // the active layer for Get and Put
  14. curLayer int
  15. // if something is stored during this pass, this
  16. // will be the position (but there is no guarantee
  17. // anything will be stored)
  18. curPos int32
  19. // ** cache metadata **
  20. // was something stored in the cache?
  21. encoderCached bool
  22. // position of the cached data
  23. encoderPos int32
  24. // ** cache data storage **
  25. cacheCtx ml.Context
  26. keys, values []ml.Tensor
  27. }
  28. func NewEncoderCache() *EncoderCache {
  29. return &EncoderCache{}
  30. }
  31. func (c *EncoderCache) Init(backend ml.Backend, dtype ml.DType, capacity int32) {
  32. c.cacheCtx = backend.NewContext()
  33. }
  34. func (c *EncoderCache) Close() {
  35. c.cacheCtx.Close()
  36. }
  37. func (c *EncoderCache) StartForward(ctx ml.Context, positions []int32, seqs []int) error {
  38. // The image is always in the first position
  39. c.curPos = positions[0]
  40. return nil
  41. }
  42. func (c *EncoderCache) SetLayer(layer int) {
  43. if layer >= len(c.keys) {
  44. c.keys = append(c.keys, make([]ml.Tensor, layer-len(c.keys)+1)...)
  45. c.values = append(c.values, make([]ml.Tensor, layer-len(c.values)+1)...)
  46. }
  47. c.curLayer = layer
  48. }
  49. func (c *EncoderCache) EncoderCached() bool {
  50. return c.encoderCached
  51. }
  52. func (c *EncoderCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
  53. return c.keys[c.curLayer], c.values[c.curLayer], nil
  54. }
  55. func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
  56. c.encoderPos = c.curPos
  57. c.encoderCached = true
  58. if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
  59. c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...)
  60. c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...)
  61. }
  62. ctx.Forward(
  63. key.Copy(ctx, c.keys[c.curLayer]),
  64. value.Copy(ctx, c.values[c.curLayer]),
  65. )
  66. }
  67. func (c *EncoderCache) CopyPrefix(srcSeq, dstSeq int, len int32) {
  68. panic("encoder cache does not support multiple sequences")
  69. }
  70. func (c *EncoderCache) Remove(seq int, beginIndex, endIndex int32) error {
  71. if c.encoderPos >= beginIndex && c.encoderPos < endIndex {
  72. c.encoderCached = false
  73. }
  74. return nil
  75. }