grammar.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. package grammar
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "fmt"
  6. "iter"
  7. "strconv"
  8. "github.com/ollama/ollama/grammar/jsonschema"
  9. )
  10. const jsonTerms = `
  11. # Unicode
  12. #
  13. # Unicode characters can be specified directly in the grammar, for example
  14. # hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit
  15. # (\UXXXXXXXX).
  16. unicode ::= \x{hex}{2} | \u{hex}{4} | \U{hex}{8}
  17. # JSON grammar from RFC 7159
  18. null ::= "null"
  19. object ::= "{" (kv ("," kv)*)? "}"
  20. array ::= "[" (value ("," value)*)? "]"
  21. kv ::= string ":" value
  22. integer ::= "0" | [1-9] [0-9]*
  23. number ::= "-"? integer frac? exp?
  24. frac ::= "." [0-9]+
  25. exp ::= ("e" | "E") ("+" | "-") [0-9]+
  26. string ::= "\"" char* "\""
  27. escape ::= ["/" | "b" | "f" | "n" | "r" | "t" | unicode]
  28. char ::= [^"\\] | escape
  29. space ::= (" " | "\t" | "\n" | "\r")*
  30. hex ::= [0-9] | [a-f] | [A-F]
  31. boolean ::= "true" | "false"
  32. value ::= object | array | string | number | boolean | "null"
  33. # User-defined
  34. `
  35. // FromSchema generates a grammar from a JSON schema.
  36. func FromSchema(buf []byte, jsonSchema []byte) ([]byte, error) {
  37. var s *jsonschema.Schema
  38. if err := json.Unmarshal(jsonSchema, &s); err != nil {
  39. return nil, err
  40. }
  41. var g builder
  42. // "root" is the only rule that is guaranteed to exist, so we start
  43. // with its length for padding, and then adjust it as we go.
  44. g.pad = len("root")
  45. for id := range dependencies("root", s) {
  46. g.pad = max(g.pad, len(id))
  47. }
  48. g.b.WriteString(jsonTerms)
  49. ids := make(map[*jsonschema.Schema]string)
  50. for id, s := range dependencies("root", s) {
  51. ids[s] = id
  52. g.define(id)
  53. if err := fromSchema(&g, ids, s); err != nil {
  54. return nil, err
  55. }
  56. }
  57. g.define("root")
  58. if err := fromSchema(&g, ids, s); err != nil {
  59. return nil, err
  60. }
  61. g.define("") // finalize the last rule
  62. return g.b.Bytes(), nil
  63. }
  64. func fromSchema(g *builder, ids map[*jsonschema.Schema]string, s *jsonschema.Schema) error {
  65. switch typ := s.EffectiveType(); typ {
  66. case "array":
  67. if len(s.PrefixItems) == 0 && s.Items == nil {
  68. g.u("array")
  69. } else {
  70. g.q("[")
  71. for i, s := range s.PrefixItems {
  72. if i > 0 {
  73. g.q(",")
  74. }
  75. g.u(ids[s])
  76. }
  77. if s.Items != nil {
  78. g.u("(")
  79. if len(s.PrefixItems) > 0 {
  80. g.q(",")
  81. }
  82. g.u(ids[s.Items])
  83. g.u(")*")
  84. }
  85. g.q("]")
  86. }
  87. case "object":
  88. if len(s.Properties) == 0 {
  89. g.u("object")
  90. } else {
  91. g.q("{")
  92. for i, p := range s.Properties {
  93. name := ids[p]
  94. if i > 0 {
  95. g.q(",")
  96. }
  97. g.q(p.Name)
  98. g.q(":")
  99. g.u(name)
  100. }
  101. g.q("}")
  102. }
  103. case "number":
  104. buildConstrainedNumber(g, s)
  105. case "string":
  106. if len(s.Enum) == 0 {
  107. g.u("string")
  108. } else {
  109. g.u("(")
  110. for i, e := range s.Enum {
  111. if i > 0 {
  112. g.q("|")
  113. }
  114. g.q(string(e))
  115. }
  116. g.u(")")
  117. }
  118. case "boolean", "value", "null", "integer":
  119. g.u(typ)
  120. default:
  121. return fmt.Errorf("%s: unsupported type %q", s.Name, typ)
  122. }
  123. return nil
  124. }
  125. // dependencies returns a sequence of all child dependencies of the schema in
  126. // post-order.
  127. //
  128. // The first value is the id/pointer to the dependency, and the second value
  129. // is the schema.
  130. func dependencies(id string, s *jsonschema.Schema) iter.Seq2[string, *jsonschema.Schema] {
  131. return func(yield func(string, *jsonschema.Schema) bool) {
  132. for i, p := range s.Properties {
  133. id := fmt.Sprintf("%s_%d", id, i)
  134. for did, d := range dependencies(id, p) {
  135. if !yield(did, d) {
  136. return
  137. }
  138. }
  139. if !yield(id, p) {
  140. return
  141. }
  142. }
  143. for i, p := range s.PrefixItems {
  144. id := fmt.Sprintf("tuple_%d", i)
  145. for did, d := range dependencies(id, p) {
  146. id := fmt.Sprintf("%s_%s", id, did)
  147. if !yield(id, d) {
  148. return
  149. }
  150. }
  151. if !yield(id, p) {
  152. return
  153. }
  154. }
  155. if s.Items != nil {
  156. id := fmt.Sprintf("%s_tuple_%d", id, len(s.PrefixItems))
  157. for did, d := range dependencies(id, s.Items) {
  158. if !yield(did, d) {
  159. return
  160. }
  161. }
  162. if !yield(id, s.Items) {
  163. return
  164. }
  165. }
  166. }
  167. }
  168. type builder struct {
  169. b bytes.Buffer
  170. pad int
  171. rules int
  172. items int
  173. }
  174. // define terminates the current rule, if any, and then either starts a new
  175. // rule or does nothing else if the name is empty.
  176. func (b *builder) define(name string) {
  177. if b.rules > 0 {
  178. b.b.WriteString(";\n")
  179. }
  180. if name == "" {
  181. return
  182. }
  183. fmt.Fprintf(&b.b, "% -*s", b.pad, name)
  184. b.b.WriteString(" ::=")
  185. b.rules++
  186. b.items = 0
  187. }
  188. // quote appends a terminal to the current rule.
  189. func (b *builder) q(s string) {
  190. if b.items > 0 {
  191. b.b.WriteString(" ")
  192. }
  193. b.b.WriteString(" ")
  194. b.b.WriteString(strconv.Quote(s))
  195. }
  196. // u appends a non-terminal to the current rule.
  197. func (b *builder) u(s string) {
  198. if b.items > 0 {
  199. b.b.WriteString(" ")
  200. }
  201. b.b.WriteString(" ")
  202. b.b.WriteString(s)
  203. }
  204. func buildConstrainedNumber(b *builder, s *jsonschema.Schema) {
  205. if s.Minimum == 0 && s.Maximum == 0 {
  206. b.u("TODO")
  207. } else {
  208. b.u("number")
  209. }
  210. }