llm.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. package llm
  2. // #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
  3. // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/src/libllama.a ${SRCDIR}/build/darwin/arm64_static/ggml/src/libggml.a -framework Accelerate -lstdc++
  4. // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/src/libllama.a ${SRCDIR}/build/darwin/x86_64_static/ggml/src/libggml.a -framework Accelerate -lstdc++
  5. // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/src/libllama.a ${SRCDIR}/build/windows/amd64_static/ggml/src/libggml.a -static -lstdc++
  6. // #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/src/libllama.a ${SRCDIR}/build/windows/arm64_static/ggml/src/libggml.a -static -lstdc++
  7. // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/src/libllama.a ${SRCDIR}/build/linux/x86_64_static/ggml/src/libggml.a -lstdc++
  8. // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/src/libllama.a ${SRCDIR}/build/linux/arm64_static/ggml/src/libggml.a -lstdc++
  9. // #include <stdlib.h>
  10. // #include "llama.h"
  11. import "C"
  12. import (
  13. "fmt"
  14. "log/slog"
  15. "unsafe"
  16. )
  17. // SystemInfo is an unused example of calling llama.cpp functions using CGo
  18. func SystemInfo() string {
  19. return C.GoString(C.llama_print_system_info())
  20. }
  21. func Quantize(infile, outfile string, ftype fileType) error {
  22. cinfile := C.CString(infile)
  23. defer C.free(unsafe.Pointer(cinfile))
  24. coutfile := C.CString(outfile)
  25. defer C.free(unsafe.Pointer(coutfile))
  26. params := C.llama_model_quantize_default_params()
  27. params.nthread = -1
  28. params.ftype = ftype.Value()
  29. if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
  30. return fmt.Errorf("llama_model_quantize: %d", rc)
  31. }
  32. return nil
  33. }
  34. type loadedModel struct {
  35. model *C.struct_llama_model
  36. }
  37. func loadModel(modelfile string, vocabOnly bool) (*loadedModel, error) {
  38. // TODO figure out how to quiet down the logging so we don't have 2 copies of the model metadata showing up
  39. slog.Info("XXX initializing default model params")
  40. params := C.llama_model_default_params()
  41. params.vocab_only = C.bool(vocabOnly)
  42. cmodelfile := C.CString(modelfile)
  43. defer C.free(unsafe.Pointer(cmodelfile))
  44. slog.Info("XXX loading model", "model", modelfile)
  45. model := C.llama_load_model_from_file(cmodelfile, params)
  46. if model == nil {
  47. return nil, fmt.Errorf("failed to load model %s", modelfile)
  48. }
  49. return &loadedModel{model}, nil
  50. }
  51. func freeModel(model *loadedModel) {
  52. C.llama_free_model(model.model)
  53. }
  54. func tokenize(model *loadedModel, content string) ([]int, error) {
  55. ccontent := C.CString(content)
  56. defer C.free(unsafe.Pointer(ccontent))
  57. tokenCount := len(content) + 2
  58. tokens := make([]C.int32_t, tokenCount)
  59. tokenCount = int(C.llama_tokenize(model.model, ccontent, C.int32_t(len(content)),
  60. &tokens[0], C.int32_t(tokenCount), true, true))
  61. if tokenCount < 0 {
  62. tokenCount = -tokenCount
  63. slog.Info("XXX got negative response", "count", tokenCount)
  64. tokens = make([]C.int32_t, tokenCount)
  65. tokenCount = int(C.llama_tokenize(model.model, ccontent, C.int32_t(len(content)), &tokens[0],
  66. C.int32_t(tokenCount), true, true))
  67. if tokenCount < 0 {
  68. return nil, fmt.Errorf("failed to tokenize: %d", tokenCount)
  69. }
  70. } else if tokenCount == 0 {
  71. return nil, nil
  72. }
  73. ret := make([]int, tokenCount)
  74. for i := range tokenCount {
  75. ret[i] = int(tokens[i])
  76. }
  77. slog.Debug("XXX tokenized", "tokens", tokens, "content", content)
  78. return ret, nil
  79. }
  80. func detokenize(model *loadedModel, tokens []int) string {
  81. slog.Info("XXX in CGO detokenize")
  82. var resp string
  83. for _, token := range tokens {
  84. buf := make([]C.char, 8)
  85. nTokens := C.llama_token_to_piece(model.model, C.int(token), &buf[0], 8, 0, true)
  86. if nTokens < 0 {
  87. buf = make([]C.char, -nTokens)
  88. nTokens = C.llama_token_to_piece(model.model, C.int(token), &buf[0], -nTokens, 0, true)
  89. }
  90. tokString := C.GoStringN(&buf[0], nTokens)
  91. resp += tokString
  92. }
  93. slog.Debug("XXX detokenized", "tokens", tokens, "content", resp)
  94. return resp
  95. }