encoder.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. package kvcache
  2. import (
  3. "fmt"
  4. "github.com/ollama/ollama/ml"
  5. )
  6. // Encoder cache stores K and V tensors that are position independent
  7. //
  8. // The tensors can be of any shape and will be returned as they were stored
  9. // The mask is currently always nil
  10. //
  11. // Not currently safe for multiple sequences
  12. type EncoderCache struct {
  13. // config controls mostly backend-specific optimizations
  14. config *ml.CacheConfig
  15. // ** current forward pass **
  16. // the active layer for Get and Put
  17. curLayer int
  18. // if something is stored during this pass, this
  19. // will be the position (but there is no guarantee
  20. // anything will be stored)
  21. curPos int32
  22. // ** cache metadata **
  23. // was something stored in the cache?
  24. encoderCached bool
  25. // position of the cached data
  26. encoderPos int32
  27. // ** cache data storage **
  28. cacheCtx ml.Context
  29. keys, values []ml.Tensor
  30. }
  31. func NewEncoderCache() *EncoderCache {
  32. return &EncoderCache{}
  33. }
  34. func (c *EncoderCache) Init(backend ml.Backend, dtype ml.DType, capacity int32) {
  35. if c.config == nil {
  36. var config ml.CacheConfig
  37. if cc, ok := backend.(ml.BackendCacheConfig); ok {
  38. config = cc.CacheConfig()
  39. }
  40. c.config = &config
  41. }
  42. if c.config.CachePadding != 0 && c.config.CachePadding != 1 {
  43. panic(fmt.Errorf("encoder cache is unable to enforce requested CachePadding (%v)", c.config.CachePadding))
  44. }
  45. c.cacheCtx = backend.NewContext()
  46. }
  47. func (c *EncoderCache) SetConfig(config ml.CacheConfig) {
  48. if c.config != nil {
  49. panic("config cannot be changed after being previously set, either by the model or backend")
  50. }
  51. c.config = &config
  52. }
  53. func (c *EncoderCache) Close() {
  54. c.cacheCtx.Close()
  55. }
  56. func (c *EncoderCache) StartForward(ctx ml.Context, positions []int32, seqs []int) error {
  57. // The image is always in the first position
  58. c.curPos = positions[0]
  59. return nil
  60. }
  61. func (c *EncoderCache) SetLayer(layer int) {
  62. if layer >= len(c.keys) {
  63. c.keys = append(c.keys, make([]ml.Tensor, layer-len(c.keys)+1)...)
  64. c.values = append(c.values, make([]ml.Tensor, layer-len(c.values)+1)...)
  65. }
  66. c.curLayer = layer
  67. }
  68. func (c *EncoderCache) EncoderCached() bool {
  69. return c.encoderCached
  70. }
  71. func (c *EncoderCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
  72. return c.keys[c.curLayer], c.values[c.curLayer], nil
  73. }
  74. func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
  75. c.encoderPos = c.curPos
  76. c.encoderCached = true
  77. if c.config.PermutedV {
  78. value = value.Permute(ctx, 1, 2, 0, 3)
  79. }
  80. if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
  81. c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...)
  82. c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...)
  83. }
  84. ctx.Forward(
  85. key.Copy(ctx, c.keys[c.curLayer]),
  86. value.Copy(ctx, c.values[c.curLayer]),
  87. )
  88. }
  89. func (c *EncoderCache) CopyPrefix(srcSeq, dstSeq int, len int32) {
  90. panic("encoder cache does not support multiple sequences")
  91. }
  92. func (c *EncoderCache) Remove(seq int, beginIndex, endIndex int32) error {
  93. if c.encoderPos >= beginIndex && c.encoderPos < endIndex {
  94. c.encoderCached = false
  95. }
  96. return nil
  97. }