llm.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. package llm
  2. // #cgo CFLAGS: -Illama.cpp
  3. // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
  4. // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
  5. // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
  6. // #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
  7. // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
  8. // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
  9. // #include <stdlib.h>
  10. // #include "llama.h"
  11. import "C"
  12. import (
  13. "fmt"
  14. "strings"
  15. "unsafe"
  16. )
  17. // SystemInfo is an unused example of calling llama.cpp functions using CGo
  18. func SystemInfo() string {
  19. return C.GoString(C.llama_print_system_info())
  20. }
  21. func Quantize(infile, outfile string, ftype fileType) error {
  22. cinfile := C.CString(infile)
  23. defer C.free(unsafe.Pointer(cinfile))
  24. coutfile := C.CString(outfile)
  25. defer C.free(unsafe.Pointer(coutfile))
  26. params := C.llama_model_quantize_default_params()
  27. params.nthread = -1
  28. params.ftype = ftype.Value()
  29. if rc := C.llama_model_quantize(cinfile, coutfile, &params); rc != 0 {
  30. return fmt.Errorf("llama_model_quantize: %d", rc)
  31. }
  32. return nil
  33. }
  34. type llamaModel struct {
  35. m *C.struct_llama_model
  36. }
  37. func newLlamaModel(p string) *llamaModel {
  38. cs := C.CString(p)
  39. defer C.free(unsafe.Pointer(cs))
  40. params := C.llama_model_default_params()
  41. params.vocab_only = true
  42. return &llamaModel{
  43. C.llama_load_model_from_file(cs, params),
  44. }
  45. }
  46. func (llm *llamaModel) Close() {
  47. C.llama_free_model(llm.m)
  48. }
  49. func (llm *llamaModel) Tokenize(s string) []int {
  50. cs := C.CString(s)
  51. defer C.free(unsafe.Pointer(cs))
  52. ltokens := make([]C.llama_token, len(s)+2)
  53. n := C.llama_tokenize(
  54. llm.m,
  55. cs,
  56. C.int32_t(len(s)),
  57. &ltokens[0],
  58. C.int32_t(len(ltokens)),
  59. false,
  60. true,
  61. )
  62. if n < 0 {
  63. return nil
  64. }
  65. tokens := make([]int, n)
  66. for i := 0; i < int(n); i++ {
  67. tokens[i] = int(ltokens[i])
  68. }
  69. return tokens
  70. }
  71. func (llm *llamaModel) Detokenize(i32s []int) string {
  72. var sb strings.Builder
  73. for _, i32 := range i32s {
  74. c := make([]byte, 512)
  75. if n := C.llama_token_to_piece(llm.m, C.llama_token(i32), (*C.char)(unsafe.Pointer(&c[0])), C.int(len(c)), false); n > 0 {
  76. sb.WriteString(unsafe.String(&c[0], n))
  77. }
  78. }
  79. return sb.String()
  80. }