parser.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. "golang.org/x/mod/semver"
  11. "golang.org/x/text/encoding/unicode"
  12. "golang.org/x/text/transform"
  13. )
  14. type File struct {
  15. Commands []Command
  16. }
  17. func (f File) String() string {
  18. var sb strings.Builder
  19. for _, cmd := range f.Commands {
  20. fmt.Fprintln(&sb, cmd.String())
  21. }
  22. return sb.String()
  23. }
  24. type Command struct {
  25. Name string
  26. Args string
  27. }
  28. func (c Command) String() string {
  29. var sb strings.Builder
  30. switch c.Name {
  31. case "model":
  32. fmt.Fprintf(&sb, "FROM %s", c.Args)
  33. case "license", "template", "system", "adapter":
  34. fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
  35. case "message":
  36. role, message, _ := strings.Cut(c.Args, ": ")
  37. fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
  38. case "ollama":
  39. fmt.Fprintf(&sb, "OLLAMA %s", c.Args)
  40. default:
  41. fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
  42. }
  43. return sb.String()
  44. }
  45. type state int
  46. const (
  47. stateNil state = iota
  48. stateName
  49. stateValue
  50. stateParameter
  51. stateMessage
  52. stateComment
  53. stateVersion
  54. )
  55. var (
  56. errMissingFrom = errors.New("no FROM line")
  57. errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
  58. errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
  59. errInvalidVersion = errors.New("invalid OLLAMA version")
  60. )
  61. func ParseFile(r io.Reader) (*File, error) {
  62. var cmd Command
  63. var curr state
  64. var b bytes.Buffer
  65. var role string
  66. var f File
  67. tr := unicode.BOMOverride(unicode.UTF8.NewDecoder())
  68. br := bufio.NewReader(transform.NewReader(r, tr))
  69. for {
  70. r, _, err := br.ReadRune()
  71. if errors.Is(err, io.EOF) {
  72. break
  73. } else if err != nil {
  74. return nil, err
  75. }
  76. next, r, err := parseRuneForState(r, curr)
  77. if errors.Is(err, io.ErrUnexpectedEOF) {
  78. return nil, fmt.Errorf("%w: %s", err, b.String())
  79. } else if err != nil {
  80. return nil, err
  81. }
  82. // process the state transition, some transitions need to be intercepted and redirected
  83. if next != curr {
  84. switch curr {
  85. case stateName:
  86. if !isValidCommand(b.String()) {
  87. return nil, errInvalidCommand
  88. }
  89. // next state sometimes depends on the current buffer value
  90. switch s := strings.ToLower(b.String()); s {
  91. case "from":
  92. cmd.Name = "model"
  93. case "parameter":
  94. // transition to stateParameter which sets command name
  95. next = stateParameter
  96. case "message":
  97. // transition to stateMessage which validates the message role
  98. next = stateMessage
  99. cmd.Name = s
  100. case "ollama":
  101. next = stateVersion
  102. fallthrough
  103. default:
  104. cmd.Name = s
  105. }
  106. case stateParameter:
  107. cmd.Name = b.String()
  108. case stateMessage:
  109. if !isValidMessageRole(b.String()) {
  110. return nil, errInvalidMessageRole
  111. }
  112. role = b.String()
  113. case stateComment, stateNil:
  114. // pass
  115. case stateVersion:
  116. s, ok := unquote(strings.TrimSpace(b.String()))
  117. if !ok {
  118. if _, err := b.WriteRune(r); err != nil {
  119. return nil, err
  120. }
  121. continue
  122. } else if isSpace(r) {
  123. return nil, errInvalidVersion
  124. }
  125. if s[0] != 'v' {
  126. s = "v" + s
  127. }
  128. if !semver.IsValid(s) {
  129. return nil, errInvalidVersion
  130. }
  131. cmd.Args = semver.Canonical(s)
  132. f.Commands = append(f.Commands, cmd)
  133. case stateValue:
  134. s, ok := unquote(strings.TrimSpace(b.String()))
  135. if !ok || isSpace(r) {
  136. if _, err := b.WriteRune(r); err != nil {
  137. return nil, err
  138. }
  139. continue
  140. }
  141. if role != "" {
  142. s = role + ": " + s
  143. role = ""
  144. }
  145. cmd.Args = s
  146. f.Commands = append(f.Commands, cmd)
  147. }
  148. b.Reset()
  149. curr = next
  150. }
  151. if strconv.IsPrint(r) {
  152. if _, err := b.WriteRune(r); err != nil {
  153. return nil, err
  154. }
  155. }
  156. }
  157. // flush the buffer
  158. switch curr {
  159. case stateComment, stateNil:
  160. // pass; nothing to flush
  161. case stateVersion:
  162. s, ok := unquote(strings.TrimSpace(b.String()))
  163. if !ok {
  164. return nil, io.ErrUnexpectedEOF
  165. }
  166. if s[0] != 'v' {
  167. s = "v" + s
  168. }
  169. if !semver.IsValid(s) {
  170. return nil, errInvalidVersion
  171. }
  172. cmd.Args = semver.Canonical(s)
  173. f.Commands = append(f.Commands, cmd)
  174. case stateValue:
  175. s, ok := unquote(strings.TrimSpace(b.String()))
  176. if !ok {
  177. return nil, io.ErrUnexpectedEOF
  178. }
  179. if role != "" {
  180. s = role + ": " + s
  181. }
  182. cmd.Args = s
  183. f.Commands = append(f.Commands, cmd)
  184. default:
  185. return nil, io.ErrUnexpectedEOF
  186. }
  187. for _, cmd := range f.Commands {
  188. if cmd.Name == "model" {
  189. return &f, nil
  190. }
  191. }
  192. return nil, errMissingFrom
  193. }
  194. func parseRuneForState(r rune, cs state) (state, rune, error) {
  195. switch cs {
  196. case stateNil:
  197. switch {
  198. case r == '#':
  199. return stateComment, 0, nil
  200. case isSpace(r), isNewline(r):
  201. return stateNil, 0, nil
  202. default:
  203. return stateName, r, nil
  204. }
  205. case stateName:
  206. switch {
  207. case isAlpha(r):
  208. return stateName, r, nil
  209. case isSpace(r):
  210. return stateValue, 0, nil
  211. default:
  212. return stateNil, 0, errInvalidCommand
  213. }
  214. case stateValue:
  215. switch {
  216. case isNewline(r):
  217. return stateNil, r, nil
  218. case isSpace(r):
  219. return stateNil, r, nil
  220. default:
  221. return stateValue, r, nil
  222. }
  223. case stateParameter:
  224. switch {
  225. case isAlpha(r), isNumber(r), r == '_':
  226. return stateParameter, r, nil
  227. case isSpace(r):
  228. return stateValue, 0, nil
  229. default:
  230. return stateNil, 0, io.ErrUnexpectedEOF
  231. }
  232. case stateMessage:
  233. switch {
  234. case isAlpha(r):
  235. return stateMessage, r, nil
  236. case isSpace(r):
  237. return stateValue, 0, nil
  238. default:
  239. return stateNil, 0, io.ErrUnexpectedEOF
  240. }
  241. case stateComment:
  242. switch {
  243. case isNewline(r):
  244. return stateNil, 0, nil
  245. default:
  246. return stateComment, 0, nil
  247. }
  248. case stateVersion:
  249. switch {
  250. case isNewline(r), isSpace(r):
  251. return stateNil, 0, nil
  252. case isAlpha(r), isNumber(r), r == '.', r == '+', r == '-':
  253. return stateVersion, r, nil
  254. default:
  255. return stateNil, r, nil
  256. }
  257. default:
  258. return stateNil, 0, errors.New("")
  259. }
  260. }
  261. func quote(s string) string {
  262. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  263. if strings.Contains(s, "\"") {
  264. return `"""` + s + `"""`
  265. }
  266. return `"` + s + `"`
  267. }
  268. return s
  269. }
  270. func unquote(s string) (string, bool) {
  271. // TODO: single quotes
  272. if len(s) >= 3 && s[:3] == `"""` {
  273. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  274. return s[3 : len(s)-3], true
  275. }
  276. return "", false
  277. }
  278. if len(s) >= 1 && s[0] == '"' {
  279. if len(s) >= 2 && s[len(s)-1] == '"' {
  280. return s[1 : len(s)-1], true
  281. }
  282. return "", false
  283. }
  284. return s, true
  285. }
  286. func isAlpha(r rune) bool {
  287. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  288. }
  289. func isNumber(r rune) bool {
  290. return r >= '0' && r <= '9'
  291. }
  292. func isSpace(r rune) bool {
  293. return r == ' ' || r == '\t'
  294. }
  295. func isNewline(r rune) bool {
  296. return r == '\r' || r == '\n'
  297. }
  298. func isValidMessageRole(role string) bool {
  299. return role == "system" || role == "user" || role == "assistant"
  300. }
  301. func isValidCommand(cmd string) bool {
  302. switch strings.ToLower(cmd) {
  303. case "from", "license", "template", "system", "adapter", "parameter", "message", "ollama":
  304. return true
  305. default:
  306. return false
  307. }
  308. }