parser.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. type Command struct {
  12. Name string
  13. Args string
  14. }
  15. type state int
  16. const (
  17. stateNil state = iota
  18. stateName
  19. stateValue
  20. stateParameter
  21. stateMessage
  22. stateComment
  23. )
  24. var (
  25. errMissingFrom = errors.New("no FROM line")
  26. errInvalidRole = errors.New("role must be one of \"system\", \"user\", or \"assistant\"")
  27. )
  28. func Format(cmds []Command) string {
  29. var b bytes.Buffer
  30. for _, cmd := range cmds {
  31. name := cmd.Name
  32. args := cmd.Args
  33. switch cmd.Name {
  34. case "model":
  35. name = "from"
  36. args = cmd.Args
  37. case "license", "template", "system", "adapter":
  38. args = quote(args)
  39. // pass
  40. case "message":
  41. role, message, _ := strings.Cut(cmd.Args, ": ")
  42. args = role + " " + quote(message)
  43. default:
  44. name = "parameter"
  45. args = cmd.Name + " " + cmd.Args
  46. }
  47. fmt.Fprintln(&b, strings.ToUpper(name), args)
  48. }
  49. return b.String()
  50. }
  51. func Parse(r io.Reader) (cmds []Command, err error) {
  52. var cmd Command
  53. var curr state
  54. var b bytes.Buffer
  55. var role string
  56. br := bufio.NewReader(r)
  57. for {
  58. r, _, err := br.ReadRune()
  59. if errors.Is(err, io.EOF) {
  60. break
  61. } else if err != nil {
  62. return nil, err
  63. }
  64. next, r, err := parseRuneForState(r, curr)
  65. if errors.Is(err, io.ErrUnexpectedEOF) {
  66. return nil, fmt.Errorf("%w: %s", err, b.String())
  67. } else if err != nil {
  68. return nil, err
  69. }
  70. // process the state transition, some transitions need to be intercepted and redirected
  71. if next != curr {
  72. switch curr {
  73. case stateName, stateParameter:
  74. // next state sometimes depends on the current buffer value
  75. switch s := strings.ToLower(b.String()); s {
  76. case "from":
  77. cmd.Name = "model"
  78. case "parameter":
  79. // transition to stateParameter which sets command name
  80. next = stateParameter
  81. case "message":
  82. // transition to stateMessage which validates the message role
  83. next = stateMessage
  84. fallthrough
  85. default:
  86. cmd.Name = s
  87. }
  88. case stateMessage:
  89. if !isValidMessageRole(b.String()) {
  90. return nil, errInvalidRole
  91. }
  92. role = b.String()
  93. case stateComment, stateNil:
  94. // pass
  95. case stateValue:
  96. s, ok := unquote(b.String())
  97. if !ok || isSpace(r) {
  98. if _, err := b.WriteRune(r); err != nil {
  99. return nil, err
  100. }
  101. continue
  102. }
  103. if role != "" {
  104. s = role + ": " + s
  105. role = ""
  106. }
  107. cmd.Args = s
  108. cmds = append(cmds, cmd)
  109. }
  110. b.Reset()
  111. curr = next
  112. }
  113. if strconv.IsPrint(r) {
  114. if _, err := b.WriteRune(r); err != nil {
  115. return nil, err
  116. }
  117. }
  118. }
  119. // flush the buffer
  120. switch curr {
  121. case stateComment, stateNil:
  122. // pass; nothing to flush
  123. case stateValue:
  124. s, ok := unquote(b.String())
  125. if !ok {
  126. return nil, io.ErrUnexpectedEOF
  127. }
  128. if role != "" {
  129. s = role + ": " + s
  130. }
  131. cmd.Args = s
  132. cmds = append(cmds, cmd)
  133. default:
  134. return nil, io.ErrUnexpectedEOF
  135. }
  136. for _, cmd := range cmds {
  137. if cmd.Name == "model" {
  138. return cmds, nil
  139. }
  140. }
  141. return nil, errMissingFrom
  142. }
  143. func parseRuneForState(r rune, cs state) (state, rune, error) {
  144. switch cs {
  145. case stateNil:
  146. switch {
  147. case r == '#':
  148. return stateComment, 0, nil
  149. case isSpace(r), isNewline(r):
  150. return stateNil, 0, nil
  151. default:
  152. return stateName, r, nil
  153. }
  154. case stateName:
  155. switch {
  156. case isAlpha(r):
  157. return stateName, r, nil
  158. case isSpace(r):
  159. return stateValue, 0, nil
  160. default:
  161. return stateNil, 0, errors.New("invalid")
  162. }
  163. case stateValue:
  164. switch {
  165. case isNewline(r):
  166. return stateNil, r, nil
  167. case isSpace(r):
  168. return stateNil, r, nil
  169. default:
  170. return stateValue, r, nil
  171. }
  172. case stateParameter:
  173. switch {
  174. case isAlpha(r), isNumber(r), r == '_':
  175. return stateParameter, r, nil
  176. case isSpace(r):
  177. return stateValue, 0, nil
  178. default:
  179. return stateNil, 0, io.ErrUnexpectedEOF
  180. }
  181. case stateMessage:
  182. switch {
  183. case isAlpha(r):
  184. return stateMessage, r, nil
  185. case isSpace(r):
  186. return stateValue, 0, nil
  187. default:
  188. return stateNil, 0, io.ErrUnexpectedEOF
  189. }
  190. case stateComment:
  191. switch {
  192. case isNewline(r):
  193. return stateNil, 0, nil
  194. default:
  195. return stateComment, 0, nil
  196. }
  197. default:
  198. return stateNil, 0, errors.New("")
  199. }
  200. }
  201. func quote(s string) string {
  202. if strings.Contains(s, "\n") || strings.HasSuffix(s, " ") {
  203. if strings.Contains(s, "\"") {
  204. return `"""` + s + `"""`
  205. }
  206. return strconv.Quote(s)
  207. }
  208. return s
  209. }
  210. func unquote(s string) (string, bool) {
  211. if len(s) == 0 {
  212. return "", false
  213. }
  214. // TODO: single quotes
  215. if len(s) >= 3 && s[:3] == `"""` {
  216. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  217. return s[3 : len(s)-3], true
  218. }
  219. return "", false
  220. }
  221. if len(s) >= 1 && s[0] == '"' {
  222. if len(s) >= 2 && s[len(s)-1] == '"' {
  223. return s[1 : len(s)-1], true
  224. }
  225. return "", false
  226. }
  227. return s, true
  228. }
  229. func isAlpha(r rune) bool {
  230. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  231. }
  232. func isNumber(r rune) bool {
  233. return r >= '0' && r <= '9'
  234. }
  235. func isSpace(r rune) bool {
  236. return r == ' ' || r == '\t'
  237. }
  238. func isNewline(r rune) bool {
  239. return r == '\r' || r == '\n'
  240. }
  241. func isValidMessageRole(role string) bool {
  242. return role == "system" || role == "user" || role == "assistant"
  243. }