parser.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "slices"
  9. "strconv"
  10. "strings"
  11. )
  12. type Command struct {
  13. Name string
  14. Args string
  15. }
  16. type state int
  17. const (
  18. stateNil state = iota
  19. stateName
  20. stateValue
  21. stateParameter
  22. stateMessage
  23. stateComment
  24. )
  25. var errInvalidRole = errors.New("role must be one of \"system\", \"user\", or \"assistant\"")
  26. func Parse(r io.Reader) (cmds []Command, err error) {
  27. var cmd Command
  28. var curr state
  29. var b bytes.Buffer
  30. var role string
  31. br := bufio.NewReader(r)
  32. for {
  33. r, _, err := br.ReadRune()
  34. if errors.Is(err, io.EOF) {
  35. break
  36. } else if err != nil {
  37. return nil, err
  38. }
  39. next, r, err := parseRuneForState(r, curr)
  40. if errors.Is(err, io.ErrUnexpectedEOF) {
  41. return nil, fmt.Errorf("%w: %s", err, b.String())
  42. } else if err != nil {
  43. return nil, err
  44. }
  45. if next != curr {
  46. switch curr {
  47. case stateName, stateParameter:
  48. switch s := strings.ToLower(b.String()); s {
  49. case "from":
  50. cmd.Name = "model"
  51. case "parameter":
  52. next = stateParameter
  53. case "message":
  54. next = stateMessage
  55. fallthrough
  56. default:
  57. cmd.Name = s
  58. }
  59. case stateMessage:
  60. if !slices.Contains([]string{"system", "user", "assistant"}, b.String()) {
  61. return nil, errInvalidRole
  62. }
  63. role = b.String()
  64. case stateComment, stateNil:
  65. // pass
  66. case stateValue:
  67. s := b.String()
  68. s, ok := unquote(b.String())
  69. if !ok || isSpace(r) {
  70. if _, err := b.WriteRune(r); err != nil {
  71. return nil, err
  72. }
  73. continue
  74. }
  75. if role != "" {
  76. s = role + ": " + s
  77. role = ""
  78. }
  79. cmd.Args = s
  80. cmds = append(cmds, cmd)
  81. }
  82. b.Reset()
  83. curr = next
  84. }
  85. if strconv.IsPrint(r) {
  86. if _, err := b.WriteRune(r); err != nil {
  87. return nil, err
  88. }
  89. }
  90. }
  91. // flush the buffer
  92. switch curr {
  93. case stateComment, stateNil:
  94. // pass; nothing to flush
  95. case stateValue:
  96. if _, ok := unquote(b.String()); !ok {
  97. return nil, io.ErrUnexpectedEOF
  98. }
  99. cmd.Args = b.String()
  100. cmds = append(cmds, cmd)
  101. default:
  102. return nil, io.ErrUnexpectedEOF
  103. }
  104. for _, cmd := range cmds {
  105. if cmd.Name == "model" {
  106. return cmds, nil
  107. }
  108. }
  109. return nil, errors.New("no FROM line")
  110. }
  111. func parseRuneForState(r rune, cs state) (state, rune, error) {
  112. switch cs {
  113. case stateNil:
  114. switch {
  115. case r == '#':
  116. return stateComment, 0, nil
  117. case isSpace(r), isNewline(r):
  118. return stateNil, 0, nil
  119. default:
  120. return stateName, r, nil
  121. }
  122. case stateName:
  123. switch {
  124. case isAlpha(r):
  125. return stateName, r, nil
  126. case isSpace(r):
  127. return stateValue, 0, nil
  128. default:
  129. return stateNil, 0, errors.New("invalid")
  130. }
  131. case stateValue:
  132. switch {
  133. case isNewline(r):
  134. return stateNil, r, nil
  135. case isSpace(r):
  136. return stateNil, r, nil
  137. default:
  138. return stateValue, r, nil
  139. }
  140. case stateParameter:
  141. switch {
  142. case isAlpha(r), isNumber(r), r == '_':
  143. return stateParameter, r, nil
  144. case isSpace(r):
  145. return stateValue, 0, nil
  146. default:
  147. return stateNil, 0, io.ErrUnexpectedEOF
  148. }
  149. case stateMessage:
  150. switch {
  151. case isAlpha(r):
  152. return stateMessage, r, nil
  153. case isSpace(r):
  154. return stateValue, 0, nil
  155. default:
  156. return stateNil, 0, io.ErrUnexpectedEOF
  157. }
  158. case stateComment:
  159. switch {
  160. case isNewline(r):
  161. return stateNil, 0, nil
  162. default:
  163. return stateComment, 0, nil
  164. }
  165. default:
  166. return stateNil, 0, errors.New("")
  167. }
  168. }
  169. func unquote(s string) (string, bool) {
  170. if len(s) == 0 {
  171. return "", false
  172. }
  173. // TODO: single quotes
  174. if len(s) >= 3 && s[:3] == `"""` {
  175. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  176. return s[3 : len(s)-3], true
  177. }
  178. return "", false
  179. }
  180. if len(s) >= 1 && s[0] == '"' {
  181. if len(s) >= 2 && s[len(s)-1] == '"' {
  182. return s[1 : len(s)-1], true
  183. }
  184. return "", false
  185. }
  186. return s, true
  187. }
  188. func isAlpha(r rune) bool {
  189. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  190. }
  191. func isNumber(r rune) bool {
  192. return r >= '0' && r <= '9'
  193. }
  194. func isSpace(r rune) bool {
  195. return r == ' ' || r == '\t'
  196. }
  197. func isNewline(r rune) bool {
  198. return r == '\r' || r == '\n'
  199. }
  200. func isValidRole(role string) bool {
  201. return role == "system" || role == "user" || role == "assistant"
  202. }