parser.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. "golang.org/x/text/encoding/unicode"
  11. "golang.org/x/text/transform"
  12. )
  13. type File struct {
  14. Commands []Command
  15. }
  16. func (f File) String() string {
  17. var sb strings.Builder
  18. for _, cmd := range f.Commands {
  19. fmt.Fprintln(&sb, cmd.String())
  20. }
  21. return sb.String()
  22. }
  23. type Command struct {
  24. Name string
  25. Args string
  26. }
  27. func (c Command) String() string {
  28. var sb strings.Builder
  29. switch c.Name {
  30. case "model":
  31. fmt.Fprintf(&sb, "FROM %s", c.Args)
  32. case "license", "template", "system", "adapter":
  33. fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
  34. case "message":
  35. role, message, _ := strings.Cut(c.Args, ": ")
  36. fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
  37. default:
  38. fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
  39. }
  40. return sb.String()
  41. }
  42. type state int
  43. const (
  44. stateNil state = iota
  45. stateName
  46. stateValue
  47. stateParameter
  48. stateMessage
  49. stateComment
  50. )
  51. var (
  52. errMissingFrom = errors.New("no FROM line")
  53. errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
  54. errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
  55. )
  56. type ParserError struct {
  57. LineNumber int
  58. Msg string
  59. }
  60. func (e *ParserError) Error() string {
  61. if e.LineNumber > 0 {
  62. return fmt.Sprintf("(line %d): %s", e.LineNumber, e.Msg)
  63. }
  64. return e.Msg
  65. }
  66. func ParseFile(r io.Reader) (*File, error) {
  67. var cmd Command
  68. var curr state
  69. var currLine int = 1
  70. var b bytes.Buffer
  71. var role string
  72. var f File
  73. tr := unicode.BOMOverride(unicode.UTF8.NewDecoder())
  74. br := bufio.NewReader(transform.NewReader(r, tr))
  75. for {
  76. r, _, err := br.ReadRune()
  77. if errors.Is(err, io.EOF) {
  78. break
  79. } else if err != nil {
  80. return nil, err
  81. }
  82. if isNewline(r) {
  83. currLine++
  84. }
  85. next, r, err := parseRuneForState(r, curr)
  86. if errors.Is(err, io.ErrUnexpectedEOF) {
  87. return nil, fmt.Errorf("%w: %s", err, b.String())
  88. } else if err != nil {
  89. return nil, &ParserError{
  90. LineNumber: currLine,
  91. Msg: err.Error(),
  92. }
  93. }
  94. // process the state transition, some transitions need to be intercepted and redirected
  95. if next != curr {
  96. switch curr {
  97. case stateName:
  98. if !isValidCommand(b.String()) {
  99. return nil, &ParserError{
  100. LineNumber: currLine,
  101. Msg: errInvalidCommand.Error(),
  102. }
  103. }
  104. // next state sometimes depends on the current buffer value
  105. switch s := strings.ToLower(b.String()); s {
  106. case "from":
  107. cmd.Name = "model"
  108. case "parameter":
  109. // transition to stateParameter which sets command name
  110. next = stateParameter
  111. case "message":
  112. // transition to stateMessage which validates the message role
  113. next = stateMessage
  114. fallthrough
  115. default:
  116. cmd.Name = s
  117. }
  118. case stateParameter:
  119. cmd.Name = b.String()
  120. case stateMessage:
  121. if !isValidMessageRole(b.String()) {
  122. return nil, &ParserError{
  123. LineNumber: currLine,
  124. Msg: errInvalidMessageRole.Error(),
  125. }
  126. }
  127. role = b.String()
  128. case stateComment, stateNil:
  129. // pass
  130. case stateValue:
  131. s, ok := unquote(strings.TrimSpace(b.String()))
  132. if !ok || isSpace(r) {
  133. if _, err := b.WriteRune(r); err != nil {
  134. return nil, err
  135. }
  136. continue
  137. }
  138. if role != "" {
  139. s = role + ": " + s
  140. role = ""
  141. }
  142. cmd.Args = s
  143. f.Commands = append(f.Commands, cmd)
  144. }
  145. b.Reset()
  146. curr = next
  147. }
  148. if strconv.IsPrint(r) {
  149. if _, err := b.WriteRune(r); err != nil {
  150. return nil, err
  151. }
  152. }
  153. }
  154. // flush the buffer
  155. switch curr {
  156. case stateComment, stateNil:
  157. // pass; nothing to flush
  158. case stateValue:
  159. s, ok := unquote(strings.TrimSpace(b.String()))
  160. if !ok {
  161. return nil, io.ErrUnexpectedEOF
  162. }
  163. if role != "" {
  164. s = role + ": " + s
  165. }
  166. cmd.Args = s
  167. f.Commands = append(f.Commands, cmd)
  168. default:
  169. return nil, io.ErrUnexpectedEOF
  170. }
  171. for _, cmd := range f.Commands {
  172. if cmd.Name == "model" {
  173. return &f, nil
  174. }
  175. }
  176. return nil, errMissingFrom
  177. }
  178. func parseRuneForState(r rune, cs state) (state, rune, error) {
  179. switch cs {
  180. case stateNil:
  181. switch {
  182. case r == '#':
  183. return stateComment, 0, nil
  184. case isSpace(r), isNewline(r):
  185. return stateNil, 0, nil
  186. default:
  187. return stateName, r, nil
  188. }
  189. case stateName:
  190. switch {
  191. case isAlpha(r):
  192. return stateName, r, nil
  193. case isSpace(r):
  194. return stateValue, 0, nil
  195. default:
  196. return stateNil, 0, errInvalidCommand
  197. }
  198. case stateValue:
  199. switch {
  200. case isNewline(r):
  201. return stateNil, r, nil
  202. case isSpace(r):
  203. return stateNil, r, nil
  204. default:
  205. return stateValue, r, nil
  206. }
  207. case stateParameter:
  208. switch {
  209. case isAlpha(r), isNumber(r), r == '_':
  210. return stateParameter, r, nil
  211. case isSpace(r):
  212. return stateValue, 0, nil
  213. default:
  214. return stateNil, 0, io.ErrUnexpectedEOF
  215. }
  216. case stateMessage:
  217. switch {
  218. case isAlpha(r):
  219. return stateMessage, r, nil
  220. case isSpace(r):
  221. return stateValue, 0, nil
  222. default:
  223. return stateNil, 0, io.ErrUnexpectedEOF
  224. }
  225. case stateComment:
  226. switch {
  227. case isNewline(r):
  228. return stateNil, 0, nil
  229. default:
  230. return stateComment, 0, nil
  231. }
  232. default:
  233. return stateNil, 0, errors.New("")
  234. }
  235. }
  236. func quote(s string) string {
  237. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  238. if strings.Contains(s, "\"") {
  239. return `"""` + s + `"""`
  240. }
  241. return `"` + s + `"`
  242. }
  243. return s
  244. }
  245. func unquote(s string) (string, bool) {
  246. // TODO: single quotes
  247. if len(s) >= 3 && s[:3] == `"""` {
  248. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  249. return s[3 : len(s)-3], true
  250. }
  251. return "", false
  252. }
  253. if len(s) >= 1 && s[0] == '"' {
  254. if len(s) >= 2 && s[len(s)-1] == '"' {
  255. return s[1 : len(s)-1], true
  256. }
  257. return "", false
  258. }
  259. return s, true
  260. }
  261. func isAlpha(r rune) bool {
  262. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  263. }
  264. func isNumber(r rune) bool {
  265. return r >= '0' && r <= '9'
  266. }
  267. func isSpace(r rune) bool {
  268. return r == ' ' || r == '\t'
  269. }
  270. func isNewline(r rune) bool {
  271. return r == '\r' || r == '\n'
  272. }
  273. func isValidMessageRole(role string) bool {
  274. return role == "system" || role == "user" || role == "assistant"
  275. }
  276. func isValidCommand(cmd string) bool {
  277. switch strings.ToLower(cmd) {
  278. case "from", "license", "template", "system", "adapter", "parameter", "message":
  279. return true
  280. default:
  281. return false
  282. }
  283. }