parser.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. type Command struct {
  12. Name string
  13. Args string
  14. }
  15. type state int
  16. const (
  17. stateNil state = iota
  18. stateName
  19. stateValue
  20. stateParameter
  21. stateMessage
  22. stateComment
  23. )
  24. var (
  25. errMissingFrom = errors.New("no FROM line")
  26. errInvalidRole = errors.New("role must be one of \"system\", \"user\", or \"assistant\"")
  27. )
  28. func Format(cmds []Command) string {
  29. var sb strings.Builder
  30. for _, cmd := range cmds {
  31. name := cmd.Name
  32. args := cmd.Args
  33. switch cmd.Name {
  34. case "model":
  35. name = "from"
  36. args = cmd.Args
  37. case "license", "template", "system", "adapter":
  38. args = quote(args)
  39. case "message":
  40. role, message, _ := strings.Cut(cmd.Args, ": ")
  41. args = role + " " + quote(message)
  42. default:
  43. name = "parameter"
  44. args = cmd.Name + " " + quote(cmd.Args)
  45. }
  46. fmt.Fprintln(&sb, strings.ToUpper(name), args)
  47. }
  48. return sb.String()
  49. }
  50. func Parse(r io.Reader) (cmds []Command, err error) {
  51. var cmd Command
  52. var curr state
  53. var b bytes.Buffer
  54. var role string
  55. br := bufio.NewReader(r)
  56. for {
  57. r, _, err := br.ReadRune()
  58. if errors.Is(err, io.EOF) {
  59. break
  60. } else if err != nil {
  61. return nil, err
  62. }
  63. next, r, err := parseRuneForState(r, curr)
  64. if errors.Is(err, io.ErrUnexpectedEOF) {
  65. return nil, fmt.Errorf("%w: %s", err, b.String())
  66. } else if err != nil {
  67. return nil, err
  68. }
  69. // process the state transition, some transitions need to be intercepted and redirected
  70. if next != curr {
  71. switch curr {
  72. case stateName, stateParameter:
  73. // next state sometimes depends on the current buffer value
  74. switch s := strings.ToLower(b.String()); s {
  75. case "from":
  76. cmd.Name = "model"
  77. case "parameter":
  78. // transition to stateParameter which sets command name
  79. next = stateParameter
  80. case "message":
  81. // transition to stateMessage which validates the message role
  82. next = stateMessage
  83. fallthrough
  84. default:
  85. cmd.Name = s
  86. }
  87. case stateMessage:
  88. if !isValidMessageRole(b.String()) {
  89. return nil, errInvalidRole
  90. }
  91. role = b.String()
  92. case stateComment, stateNil:
  93. // pass
  94. case stateValue:
  95. s, ok := unquote(b.String())
  96. if !ok || isSpace(r) {
  97. if _, err := b.WriteRune(r); err != nil {
  98. return nil, err
  99. }
  100. continue
  101. }
  102. if role != "" {
  103. s = role + ": " + s
  104. role = ""
  105. }
  106. cmd.Args = s
  107. cmds = append(cmds, cmd)
  108. }
  109. b.Reset()
  110. curr = next
  111. }
  112. if strconv.IsPrint(r) {
  113. if _, err := b.WriteRune(r); err != nil {
  114. return nil, err
  115. }
  116. }
  117. }
  118. // flush the buffer
  119. switch curr {
  120. case stateComment, stateNil:
  121. // pass; nothing to flush
  122. case stateValue:
  123. s, ok := unquote(b.String())
  124. if !ok {
  125. return nil, io.ErrUnexpectedEOF
  126. }
  127. if role != "" {
  128. s = role + ": " + s
  129. }
  130. cmd.Args = s
  131. cmds = append(cmds, cmd)
  132. default:
  133. return nil, io.ErrUnexpectedEOF
  134. }
  135. for _, cmd := range cmds {
  136. if cmd.Name == "model" {
  137. return cmds, nil
  138. }
  139. }
  140. return nil, errMissingFrom
  141. }
  142. func parseRuneForState(r rune, cs state) (state, rune, error) {
  143. switch cs {
  144. case stateNil:
  145. switch {
  146. case r == '#':
  147. return stateComment, 0, nil
  148. case isSpace(r), isNewline(r):
  149. return stateNil, 0, nil
  150. default:
  151. return stateName, r, nil
  152. }
  153. case stateName:
  154. switch {
  155. case isAlpha(r):
  156. return stateName, r, nil
  157. case isSpace(r):
  158. return stateValue, 0, nil
  159. default:
  160. return stateNil, 0, errors.New("invalid")
  161. }
  162. case stateValue:
  163. switch {
  164. case isNewline(r):
  165. return stateNil, r, nil
  166. case isSpace(r):
  167. return stateNil, r, nil
  168. default:
  169. return stateValue, r, nil
  170. }
  171. case stateParameter:
  172. switch {
  173. case isAlpha(r), isNumber(r), r == '_':
  174. return stateParameter, r, nil
  175. case isSpace(r):
  176. return stateValue, 0, nil
  177. default:
  178. return stateNil, 0, io.ErrUnexpectedEOF
  179. }
  180. case stateMessage:
  181. switch {
  182. case isAlpha(r):
  183. return stateMessage, r, nil
  184. case isSpace(r):
  185. return stateValue, 0, nil
  186. default:
  187. return stateNil, 0, io.ErrUnexpectedEOF
  188. }
  189. case stateComment:
  190. switch {
  191. case isNewline(r):
  192. return stateNil, 0, nil
  193. default:
  194. return stateComment, 0, nil
  195. }
  196. default:
  197. return stateNil, 0, errors.New("")
  198. }
  199. }
  200. func quote(s string) string {
  201. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  202. if strings.Contains(s, "\"") {
  203. return `"""` + s + `"""`
  204. }
  205. return `"` + s + `"`
  206. }
  207. return s
  208. }
  209. func unquote(s string) (string, bool) {
  210. if len(s) == 0 {
  211. return "", false
  212. }
  213. // TODO: single quotes
  214. if len(s) >= 3 && s[:3] == `"""` {
  215. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  216. return s[3 : len(s)-3], true
  217. }
  218. return "", false
  219. }
  220. if len(s) >= 1 && s[0] == '"' {
  221. if len(s) >= 2 && s[len(s)-1] == '"' {
  222. return s[1 : len(s)-1], true
  223. }
  224. return "", false
  225. }
  226. return s, true
  227. }
  228. func isAlpha(r rune) bool {
  229. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  230. }
  231. func isNumber(r rune) bool {
  232. return r >= '0' && r <= '9'
  233. }
  234. func isSpace(r rune) bool {
  235. return r == ' ' || r == '\t'
  236. }
  237. func isNewline(r rune) bool {
  238. return r == '\r' || r == '\n'
  239. }
  240. func isValidMessageRole(role string) bool {
  241. return role == "system" || role == "user" || role == "assistant"
  242. }