structured_outputs.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. package sample
  2. import (
  3. "fmt"
  4. "runtime"
  5. "time"
  6. "github.com/ollama/ollama/model"
  7. )
  8. type JSONSampler struct {
  9. schema *Schema
  10. propIdx int
  11. propToNodeMap map[string]*PDA
  12. pdaSampler *PushdownSampler
  13. decodedToks []string
  14. }
  15. func NewJSONSampler(proc model.TextProcessor, schema *Schema) (*JSONSampler, error) {
  16. if proc == nil {
  17. return nil, fmt.Errorf("TextProcessor cannot be nil")
  18. }
  19. pdaSampler, err := NewPushdownSampler(proc)
  20. if err != nil {
  21. return nil, fmt.Errorf("failed to create PushdownSampler: %w", err)
  22. }
  23. if schema == nil {
  24. return &JSONSampler{
  25. schema: nil,
  26. propIdx: -1,
  27. propToNodeMap: nil,
  28. pdaSampler: pdaSampler,
  29. }, nil
  30. }
  31. fmt.Println("schema not nil")
  32. so := &JSONSampler{
  33. schema: schema,
  34. propIdx: -1,
  35. propToNodeMap: make(map[string]*PDA),
  36. pdaSampler: pdaSampler,
  37. }
  38. so.schemaToGraph()
  39. // Benchmark token decoding
  40. start := time.Now()
  41. var m runtime.MemStats
  42. runtime.ReadMemStats(&m)
  43. before := m.Alloc
  44. vocab := proc.GetVocabulary()
  45. decodedToks := make([]string, len(vocab.Values))
  46. for i := range vocab.Values {
  47. token, err := proc.Decode([]int32{int32(i)})
  48. if err != nil {
  49. return nil, err
  50. }
  51. decodedToks[i] = token
  52. }
  53. so.decodedToks = decodedToks
  54. runtime.ReadMemStats(&m)
  55. after := m.Alloc
  56. fmt.Printf("Token decode memory usage = %.2f MB\n", float64(after-before)/(1024*1024))
  57. fmt.Printf("Token decode time = %v\n", time.Since(start))
  58. fmt.Println("--------------------------------")
  59. fmt.Println("SOSampler")
  60. fmt.Println("--------------------------------")
  61. // Benchmark this section
  62. start = time.Now()
  63. runtime.ReadMemStats(&m)
  64. before = m.Alloc
  65. // TODO: still messed up
  66. // TODO: recursion use case
  67. // key masks
  68. for _, prop := range so.schema.Properties {
  69. node := so.propToNodeMap[prop.Name]
  70. // propName -> node
  71. curState := node.State
  72. fromNode := node
  73. so.pdaSampler.CreateMask(fromNode)
  74. for curState == StateInStructuredKey {
  75. // there is only one edge
  76. for r, toNode := range fromNode.TransitionEdges {
  77. // fmt.Println("rune", r, "edge", toNode.State)
  78. so.pdaSampler.CreateMask(toNode)
  79. fmt.Printf("created mask for %c\n", r)
  80. curState = toNode.State
  81. fmt.Println("next state", curState)
  82. // TODO: theres an extra gen for " right now
  83. fromNode = toNode
  84. }
  85. }
  86. }
  87. runtime.ReadMemStats(&m)
  88. after = m.Alloc
  89. fmt.Printf("Mask creation memory usage = %.2f MB\n", float64(after-before)/(1024*1024))
  90. fmt.Printf("Mask creation time = %v\n", time.Since(start))
  91. fmt.Println("--------------------------------")
  92. return so, nil
  93. }
  94. func (s *JSONSampler) schemaToGraph() {
  95. schemaType := s.schema.EffectiveType()
  96. switch schemaType {
  97. case "object":
  98. // TODO: see if we need to connect these to the JSON graph
  99. // each prop is a key
  100. for _, prop := range s.schema.Properties {
  101. // name of key
  102. name := prop.Name
  103. keyNode := &PDA{
  104. State: StateInStructuredKey, // this is unchanging, will impact sampling
  105. TransitionEdges: make(map[rune]*PDA),
  106. MaskTokenIDToNode: make(map[int32]*PDA),
  107. }
  108. prevNode := keyNode
  109. for _, r := range name {
  110. runeNode := &PDA{
  111. State: StateInStructuredKey, // this is unchanging, will impact sampling
  112. TransitionEdges: make(map[rune]*PDA),
  113. MaskTokenIDToNode: make(map[int32]*PDA),
  114. }
  115. fmt.Println("runeNode created", runeNode.State)
  116. fmt.Printf("runeNode created %c\n", r)
  117. // since alloc on heap connections wil still map
  118. prevNode.TransitionEdges[r] = runeNode
  119. prevNode = runeNode
  120. }
  121. // point to end of object key node after all chars are done
  122. prevNode.TransitionEdges['"'] = s.pdaSampler.stateToNodeMap[StateInObjectKeyEnd]
  123. // points to start of the key
  124. s.propToNodeMap[name] = keyNode
  125. fmt.Println("name", name, "keyNode", keyNode.State)
  126. }
  127. }
  128. // TODO: do values + recursion
  129. }
  130. func (s *JSONSampler) Apply(logits []float64) ([]float64, error) {
  131. if s.schema == nil {
  132. return s.pdaSampler.Apply(logits)
  133. }
  134. switch s.pdaSampler.curNode.State {
  135. // doesnt account for multi rune case
  136. case StateInObjectKey:
  137. if s.propIdx > len(s.schema.Properties)-1 {
  138. return nil, fmt.Errorf("propIdx out of bounds")
  139. }
  140. // fmt.Println("in object key - structured outputs")
  141. // TODO: this tracking should probably be coming from a stack to track nested objects
  142. // simple case
  143. s.propIdx++
  144. fmt.Println("propIdx", s.propIdx)
  145. prop := s.schema.Properties[s.propIdx]
  146. fmt.Println("prop", prop.Name)
  147. s.pdaSampler.curNode = s.propToNodeMap[prop.Name]
  148. fmt.Println("changed curNode state to", s.pdaSampler.curNode.State)
  149. logits, err := s.pdaSampler.maskLogits(logits, s.pdaSampler.curNode)
  150. if err != nil {
  151. return nil, err
  152. }
  153. return logits, nil
  154. default:
  155. // Will only happen for the last prop - can also be precomputed.
  156. if s.propIdx == len(s.schema.Properties)-1 {
  157. // todo: if i incremenet propidx then i know im in last value as well
  158. switch s.pdaSampler.curNode.State {
  159. case StateInObjectEnd:
  160. fmt.Println("<<<<< in obj end - generating mask for", s.pdaSampler.curNode.State)
  161. s.pdaSampler.curNode.TransitionEdges = make(map[rune]*PDA)
  162. s.pdaSampler.curNode = NewPDANode(StateTerminate)
  163. s.propIdx++
  164. // TODO: this needs to be optimized in some way, computing mask on the fly is expensive
  165. case StateInNumber, StateInString, StateInBool, StateInNull, StateInListEnd:
  166. fmt.Println("<<<<< last prop - generating mask for", s.pdaSampler.curNode.State)
  167. delete(s.pdaSampler.curNode.TransitionEdges, ',')
  168. s.pdaSampler.curNode.MaskTokenIDToNode = make(map[int32]*PDA)
  169. s.pdaSampler.CreateMask(s.pdaSampler.curNode)
  170. s.propIdx++
  171. }
  172. }
  173. return s.pdaSampler.Apply(logits)
  174. }
  175. }
  176. func (s *JSONSampler) UpdateState(tokenSlice []int32) error {
  177. err := s.pdaSampler.UpdateState(tokenSlice)
  178. if err != nil {
  179. return err
  180. }
  181. if s.schema == nil {
  182. // Don't need to update state for unconstrained JSON sampling
  183. return nil
  184. }
  185. switch s.pdaSampler.curNode.State {
  186. case StateInObjectKey:
  187. s.propIdx++
  188. fmt.Println("propIdx", s.propIdx)
  189. prop := s.schema.Properties[s.propIdx]
  190. fmt.Println("prop", prop.Name)
  191. s.pdaSampler.curNode = s.propToNodeMap[prop.Name]
  192. str, err := s.pdaSampler.proc.Decode(tokenSlice)
  193. if err != nil {
  194. return err
  195. }
  196. fmt.Println("str", str)
  197. return nil
  198. default:
  199. return nil
  200. }
  201. }