llm.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. package llm
  2. // #cgo CFLAGS: -Illama.cpp
  3. // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
  4. // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
  5. // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
  6. // #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
  7. // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
  8. // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
  9. // #include <stdlib.h>
  10. // #include "llama.h"
  11. import "C"
  12. import (
  13. "fmt"
  14. "unsafe"
  15. )
  16. // SystemInfo is an unused example of calling llama.cpp functions using CGo
  17. func SystemInfo() string {
  18. return C.GoString(C.llama_print_system_info())
  19. }
  20. func Quantize(infile, outfile, filetype string) error {
  21. cinfile := C.CString(infile)
  22. defer C.free(unsafe.Pointer(cinfile))
  23. coutfile := C.CString(outfile)
  24. defer C.free(unsafe.Pointer(coutfile))
  25. params := C.llama_model_quantize_default_params()
  26. params.nthread = -1
  27. switch filetype {
  28. case "F32":
  29. params.ftype = fileTypeF32
  30. case "F16":
  31. params.ftype = fileTypeF16
  32. case "Q4_0":
  33. params.ftype = fileTypeQ4_0
  34. case "Q4_1":
  35. params.ftype = fileTypeQ4_1
  36. case "Q4_1_F16":
  37. params.ftype = fileTypeQ4_1_F16
  38. case "Q8_0":
  39. params.ftype = fileTypeQ8_0
  40. case "Q5_0":
  41. params.ftype = fileTypeQ5_0
  42. case "Q5_1":
  43. params.ftype = fileTypeQ5_1
  44. case "Q2_K":
  45. params.ftype = fileTypeQ2_K
  46. case "Q3_K_S":
  47. params.ftype = fileTypeQ3_K_S
  48. case "Q3_K_M":
  49. params.ftype = fileTypeQ3_K_M
  50. case "Q3_K_L":
  51. params.ftype = fileTypeQ3_K_L
  52. case "Q4_K_S":
  53. params.ftype = fileTypeQ4_K_S
  54. case "Q4_K_M":
  55. params.ftype = fileTypeQ4_K_M
  56. case "Q5_K_S":
  57. params.ftype = fileTypeQ5_K_S
  58. case "Q5_K_M":
  59. params.ftype = fileTypeQ5_K_M
  60. case "Q6_K":
  61. params.ftype = fileTypeQ6_K
  62. case "IQ2_XXS":
  63. params.ftype = fileTypeIQ2_XXS
  64. case "IQ2_XS":
  65. params.ftype = fileTypeIQ2_XS
  66. case "Q2_K_S":
  67. params.ftype = fileTypeQ2_K_S
  68. case "Q3_K_XS":
  69. params.ftype = fileTypeQ3_K_XS
  70. case "IQ3_XXS":
  71. params.ftype = fileTypeIQ3_XXS
  72. default:
  73. return fmt.Errorf("unknown filetype: %s", filetype)
  74. }
  75. if retval := C.llama_model_quantize(cinfile, coutfile, &params); retval != 0 {
  76. return fmt.Errorf("llama_model_quantize: %d", retval)
  77. }
  78. return nil
  79. }