parser.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strconv"
  9. "strings"
  10. )
  11. type Command struct {
  12. Name string
  13. Args string
  14. }
  15. type state int
  16. const (
  17. stateNil state = iota
  18. stateName
  19. stateValue
  20. stateParameter
  21. stateMessage
  22. stateComment
  23. )
  24. var (
  25. errMissingFrom = errors.New("no FROM line")
  26. errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
  27. errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
  28. )
  29. func Format(cmds []Command) string {
  30. var sb strings.Builder
  31. for _, cmd := range cmds {
  32. name := cmd.Name
  33. args := cmd.Args
  34. switch cmd.Name {
  35. case "model":
  36. name = "from"
  37. args = cmd.Args
  38. case "license", "template", "system", "adapter":
  39. args = quote(args)
  40. case "message":
  41. role, message, _ := strings.Cut(cmd.Args, ": ")
  42. args = role + " " + quote(message)
  43. default:
  44. name = "parameter"
  45. args = cmd.Name + " " + quote(cmd.Args)
  46. }
  47. fmt.Fprintln(&sb, strings.ToUpper(name), args)
  48. }
  49. return sb.String()
  50. }
  51. func Parse(r io.Reader) (cmds []Command, err error) {
  52. var cmd Command
  53. var curr state
  54. var b bytes.Buffer
  55. var role string
  56. br := bufio.NewReader(r)
  57. for {
  58. r, _, err := br.ReadRune()
  59. if errors.Is(err, io.EOF) {
  60. break
  61. } else if err != nil {
  62. return nil, err
  63. }
  64. next, r, err := parseRuneForState(r, curr)
  65. if errors.Is(err, io.ErrUnexpectedEOF) {
  66. return nil, fmt.Errorf("%w: %s", err, b.String())
  67. } else if err != nil {
  68. return nil, err
  69. }
  70. // process the state transition, some transitions need to be intercepted and redirected
  71. if next != curr {
  72. switch curr {
  73. case stateName:
  74. if !isValidCommand(b.String()) {
  75. return nil, errInvalidCommand
  76. }
  77. // next state sometimes depends on the current buffer value
  78. switch s := strings.ToLower(b.String()); s {
  79. case "from":
  80. cmd.Name = "model"
  81. case "parameter":
  82. // transition to stateParameter which sets command name
  83. next = stateParameter
  84. case "message":
  85. // transition to stateMessage which validates the message role
  86. next = stateMessage
  87. fallthrough
  88. default:
  89. cmd.Name = s
  90. }
  91. case stateParameter:
  92. cmd.Name = b.String()
  93. case stateMessage:
  94. if !isValidMessageRole(b.String()) {
  95. return nil, errInvalidMessageRole
  96. }
  97. role = b.String()
  98. case stateComment, stateNil:
  99. // pass
  100. case stateValue:
  101. s, ok := unquote(b.String())
  102. if !ok || isSpace(r) {
  103. if _, err := b.WriteRune(r); err != nil {
  104. return nil, err
  105. }
  106. continue
  107. }
  108. if role != "" {
  109. s = role + ": " + s
  110. role = ""
  111. }
  112. cmd.Args = s
  113. cmds = append(cmds, cmd)
  114. }
  115. b.Reset()
  116. curr = next
  117. }
  118. if strconv.IsPrint(r) {
  119. if _, err := b.WriteRune(r); err != nil {
  120. return nil, err
  121. }
  122. }
  123. }
  124. // flush the buffer
  125. switch curr {
  126. case stateComment, stateNil:
  127. // pass; nothing to flush
  128. case stateValue:
  129. s, ok := unquote(b.String())
  130. if !ok {
  131. return nil, io.ErrUnexpectedEOF
  132. }
  133. if role != "" {
  134. s = role + ": " + s
  135. }
  136. cmd.Args = s
  137. cmds = append(cmds, cmd)
  138. default:
  139. return nil, io.ErrUnexpectedEOF
  140. }
  141. for _, cmd := range cmds {
  142. if cmd.Name == "model" {
  143. return cmds, nil
  144. }
  145. }
  146. return nil, errMissingFrom
  147. }
  148. func parseRuneForState(r rune, cs state) (state, rune, error) {
  149. switch cs {
  150. case stateNil:
  151. switch {
  152. case r == '#':
  153. return stateComment, 0, nil
  154. case isSpace(r), isNewline(r):
  155. return stateNil, 0, nil
  156. default:
  157. return stateName, r, nil
  158. }
  159. case stateName:
  160. switch {
  161. case isAlpha(r):
  162. return stateName, r, nil
  163. case isSpace(r):
  164. return stateValue, 0, nil
  165. default:
  166. return stateNil, 0, errInvalidCommand
  167. }
  168. case stateValue:
  169. switch {
  170. case isNewline(r):
  171. return stateNil, r, nil
  172. case isSpace(r):
  173. return stateNil, r, nil
  174. default:
  175. return stateValue, r, nil
  176. }
  177. case stateParameter:
  178. switch {
  179. case isAlpha(r), isNumber(r), r == '_':
  180. return stateParameter, r, nil
  181. case isSpace(r):
  182. return stateValue, 0, nil
  183. default:
  184. return stateNil, 0, io.ErrUnexpectedEOF
  185. }
  186. case stateMessage:
  187. switch {
  188. case isAlpha(r):
  189. return stateMessage, r, nil
  190. case isSpace(r):
  191. return stateValue, 0, nil
  192. default:
  193. return stateNil, 0, io.ErrUnexpectedEOF
  194. }
  195. case stateComment:
  196. switch {
  197. case isNewline(r):
  198. return stateNil, 0, nil
  199. default:
  200. return stateComment, 0, nil
  201. }
  202. default:
  203. return stateNil, 0, errors.New("")
  204. }
  205. }
  206. func quote(s string) string {
  207. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  208. if strings.Contains(s, "\"") {
  209. return `"""` + s + `"""`
  210. }
  211. return `"` + s + `"`
  212. }
  213. return s
  214. }
  215. func unquote(s string) (string, bool) {
  216. if len(s) == 0 {
  217. return "", false
  218. }
  219. // TODO: single quotes
  220. if len(s) >= 3 && s[:3] == `"""` {
  221. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  222. return s[3 : len(s)-3], true
  223. }
  224. return "", false
  225. }
  226. if len(s) >= 1 && s[0] == '"' {
  227. if len(s) >= 2 && s[len(s)-1] == '"' {
  228. return s[1 : len(s)-1], true
  229. }
  230. return "", false
  231. }
  232. return s, true
  233. }
  234. func isAlpha(r rune) bool {
  235. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  236. }
  237. func isNumber(r rune) bool {
  238. return r >= '0' && r <= '9'
  239. }
  240. func isSpace(r rune) bool {
  241. return r == ' ' || r == '\t'
  242. }
  243. func isNewline(r rune) bool {
  244. return r == '\r' || r == '\n'
  245. }
  246. func isValidMessageRole(role string) bool {
  247. return role == "system" || role == "user" || role == "assistant"
  248. }
  249. func isValidCommand(cmd string) bool {
  250. switch strings.ToLower(cmd) {
  251. case "from", "license", "template", "system", "adapter", "parameter", "message":
  252. return true
  253. default:
  254. return false
  255. }
  256. }