123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- package grammar
- import (
- "bytes"
- "encoding/json"
- "fmt"
- "iter"
- "strconv"
- "github.com/ollama/ollama/grammar/jsonschema"
- )
- const jsonTerms = `
- # Unicode
- #
- # Unicode characters can be specified directly in the grammar, for example
- # hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit
- # (\UXXXXXXXX).
- unicode ::= \x{hex}{2} | \u{hex}{4} | \U{hex}{8}
- # JSON grammar from RFC 7159
- null ::= "null"
- object ::= "{" (kv ("," kv)*)? "}"
- array ::= "[" (value ("," value)*)? "]"
- kv ::= string ":" value
- integer ::= "0" | [1-9] [0-9]*
- number ::= "-"? integer frac? exp?
- frac ::= "." [0-9]+
- exp ::= ("e" | "E") ("+" | "-") [0-9]+
- string ::= "\"" char* "\""
- escape ::= ["/" | "b" | "f" | "n" | "r" | "t" | unicode]
- char ::= [^"\\] | escape
- space ::= (" " | "\t" | "\n" | "\r")*
- hex ::= [0-9] | [a-f] | [A-F]
- boolean ::= "true" | "false"
- value ::= object | array | string | number | boolean | "null"
- # User-defined
- `
- // FromSchema generates a grammar from a JSON schema.
- func FromSchema(buf []byte, jsonSchema []byte) ([]byte, error) {
- var s *jsonschema.Schema
- if err := json.Unmarshal(jsonSchema, &s); err != nil {
- return nil, err
- }
- var g builder
- // "root" is the only rule that is guaranteed to exist, so we start
- // with its length for padding, and then adjust it as we go.
- g.pad = len("root")
- for id := range dependencies("root", s) {
- g.pad = max(g.pad, len(id))
- }
- g.b.WriteString(jsonTerms)
- ids := make(map[*jsonschema.Schema]string)
- for id, s := range dependencies("root", s) {
- ids[s] = id
- g.define(id)
- if err := fromSchema(&g, ids, s); err != nil {
- return nil, err
- }
- }
- g.define("root")
- if err := fromSchema(&g, ids, s); err != nil {
- return nil, err
- }
- g.define("") // finalize the last rule
- return g.b.Bytes(), nil
- }
- func fromSchema(g *builder, ids map[*jsonschema.Schema]string, s *jsonschema.Schema) error {
- switch typ := s.EffectiveType(); typ {
- case "array":
- if len(s.PrefixItems) == 0 && s.Items == nil {
- g.u("array")
- } else {
- g.q("[")
- for i, s := range s.PrefixItems {
- if i > 0 {
- g.q(",")
- }
- g.u(ids[s])
- }
- if s.Items != nil {
- g.u("(")
- if len(s.PrefixItems) > 0 {
- g.q(",")
- }
- g.u(ids[s.Items])
- g.u(")*")
- }
- g.q("]")
- }
- case "object":
- if len(s.Properties) == 0 {
- g.u("object")
- } else {
- g.q("{")
- for i, p := range s.Properties {
- name := ids[p]
- if i > 0 {
- g.q(",")
- }
- g.q(p.Name)
- g.q(":")
- g.u(name)
- }
- g.q("}")
- }
- case "number":
- buildConstrainedNumber(g, s)
- case "string":
- if len(s.Enum) == 0 {
- g.u("string")
- } else {
- g.u("(")
- for i, e := range s.Enum {
- if i > 0 {
- g.q("|")
- }
- g.q(string(e))
- }
- g.u(")")
- }
- case "boolean", "value", "null", "integer":
- g.u(typ)
- default:
- return fmt.Errorf("%s: unsupported type %q", s.Name, typ)
- }
- return nil
- }
- // dependencies returns a sequence of all child dependencies of the schema in
- // post-order.
- //
- // The first value is the id/pointer to the dependency, and the second value
- // is the schema.
- func dependencies(id string, s *jsonschema.Schema) iter.Seq2[string, *jsonschema.Schema] {
- return func(yield func(string, *jsonschema.Schema) bool) {
- for i, p := range s.Properties {
- id := fmt.Sprintf("%s_%d", id, i)
- for did, d := range dependencies(id, p) {
- if !yield(did, d) {
- return
- }
- }
- if !yield(id, p) {
- return
- }
- }
- for i, p := range s.PrefixItems {
- id := fmt.Sprintf("tuple_%d", i)
- for did, d := range dependencies(id, p) {
- id := fmt.Sprintf("%s_%s", id, did)
- if !yield(id, d) {
- return
- }
- }
- if !yield(id, p) {
- return
- }
- }
- if s.Items != nil {
- id := fmt.Sprintf("%s_tuple_%d", id, len(s.PrefixItems))
- for did, d := range dependencies(id, s.Items) {
- if !yield(did, d) {
- return
- }
- }
- if !yield(id, s.Items) {
- return
- }
- }
- }
- }
- type builder struct {
- b bytes.Buffer
- pad int
- rules int
- items int
- }
- // define terminates the current rule, if any, and then either starts a new
- // rule or does nothing else if the name is empty.
- func (b *builder) define(name string) {
- if b.rules > 0 {
- b.b.WriteString(";\n")
- }
- if name == "" {
- return
- }
- fmt.Fprintf(&b.b, "% -*s", b.pad, name)
- b.b.WriteString(" ::=")
- b.rules++
- b.items = 0
- }
- // quote appends a terminal to the current rule.
- func (b *builder) q(s string) {
- if b.items > 0 {
- b.b.WriteString(" ")
- }
- b.b.WriteString(" ")
- b.b.WriteString(strconv.Quote(s))
- }
- // u appends a non-terminal to the current rule.
- func (b *builder) u(s string) {
- if b.items > 0 {
- b.b.WriteString(" ")
- }
- b.b.WriteString(" ")
- b.b.WriteString(s)
- }
- func buildConstrainedNumber(b *builder, s *jsonschema.Schema) {
- if s.Minimum == 0 && s.Maximum == 0 {
- b.u("TODO")
- } else {
- b.u("number")
- }
- }
|