input.go 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. package input
  2. import "github.com/ollama/ollama/ml"
  3. // Input represents one token in the input stream
  4. type Input struct {
  5. // Token is a single element of text.
  6. Token int32
  7. // Multimodal is opaque data representing a non-text
  8. // element such as an image (or part of one if the image
  9. // can be processed in pieces). It may be either together
  10. // with Token or on its own.
  11. Multimodal any
  12. // MultimodalHash is a unique representation of the data
  13. // stored in Multimodal, used for caching and comparing
  14. // equality.
  15. MultimodalHash uint64
  16. // SameBatch forces the following number of tokens to be processed
  17. // in a single batch, breaking and extending batches as needed.
  18. // Useful for things like images that must be processed in one
  19. // shot.
  20. SameBatch int
  21. }
  22. // MultimodalIndex is a multimodal element (such as an image)
  23. // together with an index into the slice of Inputs with the
  24. // corresponding token. Note that the index is not the same
  25. // as the position - to find that use the index with the
  26. // Positions slice.
  27. type MultimodalIndex struct {
  28. Index int
  29. Multimodal any
  30. }
  31. // Batch contains the inputs for a model forward pass
  32. type Batch struct {
  33. // Inputs is the input tokens, including placeholders for multimodal inputs.
  34. Inputs ml.Tensor
  35. // Multimodal is a set of multimodal embeddings previously created by
  36. // EncodeMultimodal, along with an index into Inputs. Unused for text-only
  37. // models or for batches without multimodal elements.
  38. Multimodal []MultimodalIndex
  39. // Positions is the position for each Input, relative to its sequence. Equal
  40. // in length to Inputs.
  41. Positions []int32
  42. // Sequences is the sequence for each Input. Equal in length to Inputs.
  43. Sequences []int
  44. // Outputs are the set of indicies into Inputs for which output data should
  45. // be returned.
  46. Outputs []int32
  47. }