encoder.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. package kvcache
  2. import (
  3. "fmt"
  4. "github.com/ollama/ollama/ml"
  5. "github.com/ollama/ollama/model/input"
  6. )
  7. // Encoder cache stores K and V tensors that are position independent
  8. //
  9. // The tensors can be of any shape and will be returned as they were stored
  10. // The mask is currently always nil
  11. //
  12. // Not currently safe for multiple sequences
  13. type EncoderCache struct {
  14. // config controls mostly backend-specific optimizations
  15. config *ml.CacheConfig
  16. // ** current forward pass **
  17. // the active layer for Get and Put
  18. curLayer int
  19. // if something is stored during this pass, this
  20. // will be the position (but there is no guarantee
  21. // anything will be stored)
  22. curPos int32
  23. // ** cache metadata **
  24. // was something stored in the cache?
  25. encoderCached bool
  26. // position of the cached data
  27. encoderPos int32
  28. // ** cache data storage **
  29. backend ml.Backend
  30. ctxs map[int]ml.Context
  31. keys, values map[int]ml.Tensor
  32. }
  33. func NewEncoderCache() *EncoderCache {
  34. return &EncoderCache{
  35. ctxs: make(map[int]ml.Context),
  36. keys: make(map[int]ml.Tensor),
  37. values: make(map[int]ml.Tensor),
  38. }
  39. }
  40. func (c *EncoderCache) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity, maxBatch int) {
  41. if c.config == nil {
  42. var config ml.CacheConfig
  43. if cc, ok := backend.(ml.BackendCacheConfig); ok {
  44. config = cc.CacheConfig()
  45. }
  46. c.config = &config
  47. }
  48. if maxSequences > 1 {
  49. panic(fmt.Errorf("encoder cache does not support multiple sequences; requested: %v", maxSequences))
  50. }
  51. if c.config.CachePadding != 0 && c.config.CachePadding != 1 {
  52. panic(fmt.Errorf("encoder cache is unable to enforce requested CachePadding (%v)", c.config.CachePadding))
  53. }
  54. c.backend = backend
  55. }
  56. func (c *EncoderCache) SetConfig(config ml.CacheConfig) {
  57. if c.config != nil {
  58. panic("config cannot be changed after being previously set, either by the model or backend")
  59. }
  60. c.config = &config
  61. }
  62. func (c *EncoderCache) Close() {
  63. for _, ctx := range c.ctxs {
  64. ctx.Close()
  65. }
  66. }
  67. func (c *EncoderCache) StartForward(ctx ml.Context, batch input.Batch) error {
  68. // We work with the most recent image
  69. if len(batch.Multimodal) > 0 {
  70. c.curPos = batch.Positions[batch.Multimodal[len(batch.Multimodal)-1].Index]
  71. }
  72. return nil
  73. }
  74. func (c *EncoderCache) SetLayer(layer int) {
  75. c.curLayer = layer
  76. }
  77. func (c *EncoderCache) EncoderCached() bool {
  78. return c.encoderCached
  79. }
  80. func (c *EncoderCache) Get(ctx ml.Context) (ml.Tensor, ml.Tensor, ml.Tensor) {
  81. return c.keys[c.curLayer], c.values[c.curLayer], nil
  82. }
  83. func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
  84. c.encoderPos = c.curPos
  85. c.encoderCached = true
  86. if c.config.PermutedV {
  87. value = value.Permute(ctx, 1, 2, 0, 3)
  88. }
  89. if _, ok := c.ctxs[c.curLayer]; !ok {
  90. c.ctxs[c.curLayer] = c.backend.NewContextSize(2).Layer(c.curLayer)
  91. }
  92. if _, ok := c.keys[c.curLayer]; !ok {
  93. c.keys[c.curLayer] = c.ctxs[c.curLayer].Empty(key.DType(), key.Shape()...)
  94. }
  95. if _, ok := c.values[c.curLayer]; !ok {
  96. c.values[c.curLayer] = c.ctxs[c.curLayer].Empty(value.DType(), value.Shape()...)
  97. }
  98. ctx.Forward(
  99. key.Copy(ctx, c.keys[c.curLayer]),
  100. value.Copy(ctx, c.values[c.curLayer]),
  101. )
  102. }
  103. func (c *EncoderCache) CopyPrefix(srcSeq, dstSeq int, len int32) {
  104. panic("encoder cache does not support multiple sequences")
  105. }
  106. func (c *EncoderCache) Remove(seq int, beginIndex, endIndex int32) error {
  107. if c.encoderPos >= beginIndex && c.encoderPos < endIndex {
  108. c.encoderCached = false
  109. }
  110. return nil
  111. }