parser.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. "github.com/Masterminds/semver/v3"
  11. "golang.org/x/text/encoding/unicode"
  12. "golang.org/x/text/transform"
  13. )
  14. type File struct {
  15. Commands []Command
  16. }
  17. func (f File) String() string {
  18. var sb strings.Builder
  19. for _, cmd := range f.Commands {
  20. fmt.Fprintln(&sb, cmd.String())
  21. }
  22. return sb.String()
  23. }
  24. type Command struct {
  25. Name string
  26. Args string
  27. }
  28. func (c Command) String() string {
  29. var sb strings.Builder
  30. switch c.Name {
  31. case "model":
  32. fmt.Fprintf(&sb, "FROM %s", c.Args)
  33. case "license", "template", "system", "adapter":
  34. fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
  35. case "message":
  36. role, message, _ := strings.Cut(c.Args, ": ")
  37. fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
  38. case "ollama":
  39. fmt.Fprintf(&sb, "OLLAMA %s", quote(c.Args))
  40. default:
  41. fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
  42. }
  43. return sb.String()
  44. }
  45. type state int
  46. const (
  47. stateNil state = iota
  48. stateName
  49. stateValue
  50. stateParameter
  51. stateMessage
  52. stateComment
  53. stateVersion
  54. )
  55. var (
  56. errMissingFrom = errors.New("no FROM line")
  57. errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
  58. errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
  59. errInvalidVersion = errors.New("invalid version")
  60. )
  61. func ParseFile(r io.Reader) (*File, error) {
  62. var cmd Command
  63. var curr state
  64. var b bytes.Buffer
  65. var role string
  66. var f File
  67. tr := unicode.BOMOverride(unicode.UTF8.NewDecoder())
  68. br := bufio.NewReader(transform.NewReader(r, tr))
  69. for {
  70. r, _, err := br.ReadRune()
  71. if errors.Is(err, io.EOF) {
  72. break
  73. } else if err != nil {
  74. return nil, err
  75. }
  76. next, r, err := parseRuneForState(r, curr)
  77. if errors.Is(err, io.ErrUnexpectedEOF) {
  78. return nil, fmt.Errorf("%w: %s", err, b.String())
  79. } else if err != nil {
  80. return nil, err
  81. }
  82. // process the state transition, some transitions need to be intercepted and redirected
  83. if next != curr {
  84. switch curr {
  85. case stateName:
  86. if !isValidCommand(b.String()) {
  87. return nil, errInvalidCommand
  88. }
  89. // next state sometimes depends on the current buffer value
  90. switch s := strings.ToLower(b.String()); s {
  91. case "from":
  92. cmd.Name = "model"
  93. case "parameter":
  94. // transition to stateParameter which sets command name
  95. next = stateParameter
  96. case "message":
  97. // transition to stateMessage which validates the message role
  98. next = stateMessage
  99. cmd.Name = s
  100. case "ollama":
  101. next = stateVersion
  102. fallthrough
  103. default:
  104. cmd.Name = s
  105. }
  106. case stateParameter:
  107. cmd.Name = b.String()
  108. case stateMessage:
  109. if !isValidMessageRole(b.String()) {
  110. return nil, errInvalidMessageRole
  111. }
  112. role = b.String()
  113. case stateComment, stateNil:
  114. // pass
  115. case stateVersion:
  116. s, ok := unquote(strings.TrimSpace(b.String()))
  117. if !ok {
  118. if _, err := b.WriteRune(r); err != nil {
  119. return nil, err
  120. }
  121. continue
  122. } else if isSpace(r){
  123. return nil, errInvalidVersion
  124. } else if _, err := semver.NewVersion(s); err != nil {
  125. return nil, errInvalidVersion
  126. }
  127. cmd.Args = s
  128. f.Commands = append(f.Commands, cmd)
  129. case stateValue:
  130. s, ok := unquote(strings.TrimSpace(b.String()))
  131. if !ok || isSpace(r) {
  132. if _, err := b.WriteRune(r); err != nil {
  133. return nil, err
  134. }
  135. continue
  136. }
  137. if role != "" {
  138. s = role + ": " + s
  139. role = ""
  140. }
  141. cmd.Args = s
  142. f.Commands = append(f.Commands, cmd)
  143. }
  144. b.Reset()
  145. curr = next
  146. }
  147. if strconv.IsPrint(r) {
  148. if _, err := b.WriteRune(r); err != nil {
  149. return nil, err
  150. }
  151. }
  152. }
  153. // flush the buffer
  154. switch curr {
  155. case stateComment, stateNil:
  156. // pass; nothing to flush
  157. case stateVersion:
  158. s, ok := unquote(strings.TrimSpace(b.String()))
  159. if !ok {
  160. return nil, io.ErrUnexpectedEOF
  161. } else if _, err := semver.NewVersion(s); err != nil {
  162. return nil, errInvalidVersion
  163. }
  164. cmd.Args = s
  165. f.Commands = append(f.Commands, cmd)
  166. case stateValue:
  167. s, ok := unquote(strings.TrimSpace(b.String()))
  168. if !ok {
  169. return nil, io.ErrUnexpectedEOF
  170. }
  171. if role != "" {
  172. s = role + ": " + s
  173. }
  174. cmd.Args = s
  175. f.Commands = append(f.Commands, cmd)
  176. default:
  177. return nil, io.ErrUnexpectedEOF
  178. }
  179. for _, cmd := range f.Commands {
  180. if cmd.Name == "model" {
  181. return &f, nil
  182. }
  183. }
  184. return nil, errMissingFrom
  185. }
  186. func parseRuneForState(r rune, cs state) (state, rune, error) {
  187. switch cs {
  188. case stateNil:
  189. switch {
  190. case r == '#':
  191. return stateComment, 0, nil
  192. case isSpace(r), isNewline(r):
  193. return stateNil, 0, nil
  194. default:
  195. return stateName, r, nil
  196. }
  197. case stateName:
  198. switch {
  199. case isAlpha(r):
  200. return stateName, r, nil
  201. case isSpace(r):
  202. return stateValue, 0, nil
  203. default:
  204. return stateNil, 0, errInvalidCommand
  205. }
  206. case stateValue:
  207. switch {
  208. case isNewline(r):
  209. return stateNil, r, nil
  210. case isSpace(r):
  211. return stateNil, r, nil
  212. default:
  213. return stateValue, r, nil
  214. }
  215. case stateParameter:
  216. switch {
  217. case isAlpha(r), isNumber(r), r == '_':
  218. return stateParameter, r, nil
  219. case isSpace(r):
  220. return stateValue, 0, nil
  221. default:
  222. return stateNil, 0, io.ErrUnexpectedEOF
  223. }
  224. case stateMessage:
  225. switch {
  226. case isAlpha(r):
  227. return stateMessage, r, nil
  228. case isSpace(r):
  229. return stateValue, 0, nil
  230. default:
  231. return stateNil, 0, io.ErrUnexpectedEOF
  232. }
  233. case stateComment:
  234. switch {
  235. case isNewline(r):
  236. return stateNil, 0, nil
  237. default:
  238. return stateComment, 0, nil
  239. }
  240. case stateVersion:
  241. switch {
  242. case isNewline(r), isSpace(r):
  243. return stateNil, 0, nil
  244. case isAlpha(r), isNumber(r), r == '.':
  245. return stateVersion, r, nil
  246. default:
  247. return stateNil, r, nil
  248. }
  249. default:
  250. return stateNil, 0, errors.New("")
  251. }
  252. }
  253. func quote(s string) string {
  254. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  255. if strings.Contains(s, "\"") {
  256. return `"""` + s + `"""`
  257. }
  258. return `"` + s + `"`
  259. }
  260. return s
  261. }
  262. func unquote(s string) (string, bool) {
  263. // TODO: single quotes
  264. if len(s) >= 3 && s[:3] == `"""` {
  265. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  266. return s[3 : len(s)-3], true
  267. }
  268. return "", false
  269. }
  270. if len(s) >= 1 && s[0] == '"' {
  271. if len(s) >= 2 && s[len(s)-1] == '"' {
  272. return s[1 : len(s)-1], true
  273. }
  274. return "", false
  275. }
  276. return s, true
  277. }
  278. func isAlpha(r rune) bool {
  279. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  280. }
  281. func isNumber(r rune) bool {
  282. return r >= '0' && r <= '9'
  283. }
  284. func isSpace(r rune) bool {
  285. return r == ' ' || r == '\t'
  286. }
  287. func isNewline(r rune) bool {
  288. return r == '\r' || r == '\n'
  289. }
  290. func isValidMessageRole(role string) bool {
  291. return role == "system" || role == "user" || role == "assistant"
  292. }
  293. func isValidCommand(cmd string) bool {
  294. switch strings.ToLower(cmd) {
  295. case "from", "license", "template", "system", "adapter", "parameter", "message", "ollama":
  296. return true
  297. default:
  298. return false
  299. }
  300. }