parser.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. type Command struct {
  12. Name string
  13. Args string
  14. }
  15. type state int
  16. const (
  17. stateNil state = iota
  18. stateName
  19. stateValue
  20. stateParameter
  21. stateMessage
  22. stateComment
  23. )
  24. var (
  25. errMissingFrom = errors.New("no FROM line")
  26. errInvalidRole = errors.New("role must be one of \"system\", \"user\", or \"assistant\"")
  27. )
  28. func Parse(r io.Reader) (cmds []Command, err error) {
  29. var cmd Command
  30. var curr state
  31. var b bytes.Buffer
  32. var role string
  33. br := bufio.NewReader(r)
  34. for {
  35. r, _, err := br.ReadRune()
  36. if errors.Is(err, io.EOF) {
  37. break
  38. } else if err != nil {
  39. return nil, err
  40. }
  41. next, r, err := parseRuneForState(r, curr)
  42. if errors.Is(err, io.ErrUnexpectedEOF) {
  43. return nil, fmt.Errorf("%w: %s", err, b.String())
  44. } else if err != nil {
  45. return nil, err
  46. }
  47. if next != curr {
  48. switch curr {
  49. case stateName, stateParameter:
  50. switch s := strings.ToLower(b.String()); s {
  51. case "from":
  52. cmd.Name = "model"
  53. case "parameter":
  54. next = stateParameter
  55. case "message":
  56. next = stateMessage
  57. fallthrough
  58. default:
  59. cmd.Name = s
  60. }
  61. case stateMessage:
  62. if !isValidRole(b.String()) {
  63. return nil, errInvalidRole
  64. }
  65. role = b.String()
  66. case stateComment, stateNil:
  67. // pass
  68. case stateValue:
  69. s, ok := unquote(b.String())
  70. if !ok || isSpace(r) {
  71. if _, err := b.WriteRune(r); err != nil {
  72. return nil, err
  73. }
  74. continue
  75. }
  76. if role != "" {
  77. s = role + ": " + s
  78. role = ""
  79. }
  80. cmd.Args = s
  81. cmds = append(cmds, cmd)
  82. }
  83. b.Reset()
  84. curr = next
  85. }
  86. if strconv.IsPrint(r) {
  87. if _, err := b.WriteRune(r); err != nil {
  88. return nil, err
  89. }
  90. }
  91. }
  92. // flush the buffer
  93. switch curr {
  94. case stateComment, stateNil:
  95. // pass; nothing to flush
  96. case stateValue:
  97. if _, ok := unquote(b.String()); !ok {
  98. return nil, io.ErrUnexpectedEOF
  99. }
  100. cmd.Args = b.String()
  101. cmds = append(cmds, cmd)
  102. default:
  103. return nil, io.ErrUnexpectedEOF
  104. }
  105. for _, cmd := range cmds {
  106. if cmd.Name == "model" {
  107. return cmds, nil
  108. }
  109. }
  110. return nil, errMissingFrom
  111. }
  112. func parseRuneForState(r rune, cs state) (state, rune, error) {
  113. switch cs {
  114. case stateNil:
  115. switch {
  116. case r == '#':
  117. return stateComment, 0, nil
  118. case isSpace(r), isNewline(r):
  119. return stateNil, 0, nil
  120. default:
  121. return stateName, r, nil
  122. }
  123. case stateName:
  124. switch {
  125. case isAlpha(r):
  126. return stateName, r, nil
  127. case isSpace(r):
  128. return stateValue, 0, nil
  129. default:
  130. return stateNil, 0, errors.New("invalid")
  131. }
  132. case stateValue:
  133. switch {
  134. case isNewline(r):
  135. return stateNil, r, nil
  136. case isSpace(r):
  137. return stateNil, r, nil
  138. default:
  139. return stateValue, r, nil
  140. }
  141. case stateParameter:
  142. switch {
  143. case isAlpha(r), isNumber(r), r == '_':
  144. return stateParameter, r, nil
  145. case isSpace(r):
  146. return stateValue, 0, nil
  147. default:
  148. return stateNil, 0, io.ErrUnexpectedEOF
  149. }
  150. case stateMessage:
  151. switch {
  152. case isAlpha(r):
  153. return stateMessage, r, nil
  154. case isSpace(r):
  155. return stateValue, 0, nil
  156. default:
  157. return stateNil, 0, io.ErrUnexpectedEOF
  158. }
  159. case stateComment:
  160. switch {
  161. case isNewline(r):
  162. return stateNil, 0, nil
  163. default:
  164. return stateComment, 0, nil
  165. }
  166. default:
  167. return stateNil, 0, errors.New("")
  168. }
  169. }
  170. func unquote(s string) (string, bool) {
  171. if len(s) == 0 {
  172. return "", false
  173. }
  174. // TODO: single quotes
  175. if len(s) >= 3 && s[:3] == `"""` {
  176. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  177. return s[3 : len(s)-3], true
  178. }
  179. return "", false
  180. }
  181. if len(s) >= 1 && s[0] == '"' {
  182. if len(s) >= 2 && s[len(s)-1] == '"' {
  183. return s[1 : len(s)-1], true
  184. }
  185. return "", false
  186. }
  187. return s, true
  188. }
  189. func isAlpha(r rune) bool {
  190. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  191. }
  192. func isNumber(r rune) bool {
  193. return r >= '0' && r <= '9'
  194. }
  195. func isSpace(r rune) bool {
  196. return r == ' ' || r == '\t'
  197. }
  198. func isNewline(r rune) bool {
  199. return r == '\r' || r == '\n'
  200. }
  201. func isValidRole(role string) bool {
  202. return role == "system" || role == "user" || role == "assistant"
  203. }