llm.go 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. package llm
  2. // #cgo CFLAGS: -Illama.cpp
  3. // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
  4. // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
  5. // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
  6. // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
  7. // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
  8. // #include <stdlib.h>
  9. // #include "llama.h"
  10. import "C"
  11. import (
  12. "fmt"
  13. "unsafe"
  14. )
  15. // SystemInfo is an unused example of calling llama.cpp functions using CGo
  16. func SystemInfo() string {
  17. return C.GoString(C.llama_print_system_info())
  18. }
  19. func Quantize(infile, outfile, filetype string) error {
  20. cinfile := C.CString(infile)
  21. defer C.free(unsafe.Pointer(cinfile))
  22. coutfile := C.CString(outfile)
  23. defer C.free(unsafe.Pointer(coutfile))
  24. params := C.llama_model_quantize_default_params()
  25. params.nthread = -1
  26. switch filetype {
  27. case "F32":
  28. params.ftype = fileTypeF32
  29. case "F16":
  30. params.ftype = fileTypeF16
  31. case "Q4_0":
  32. params.ftype = fileTypeQ4_0
  33. case "Q4_1":
  34. params.ftype = fileTypeQ4_1
  35. case "Q4_1_F16":
  36. params.ftype = fileTypeQ4_1_F16
  37. case "Q8_0":
  38. params.ftype = fileTypeQ8_0
  39. case "Q5_0":
  40. params.ftype = fileTypeQ5_0
  41. case "Q5_1":
  42. params.ftype = fileTypeQ5_1
  43. case "Q2_K":
  44. params.ftype = fileTypeQ2_K
  45. case "Q3_K_S":
  46. params.ftype = fileTypeQ3_K_S
  47. case "Q3_K_M":
  48. params.ftype = fileTypeQ3_K_M
  49. case "Q3_K_L":
  50. params.ftype = fileTypeQ3_K_L
  51. case "Q4_K_S":
  52. params.ftype = fileTypeQ4_K_S
  53. case "Q4_K_M":
  54. params.ftype = fileTypeQ4_K_M
  55. case "Q5_K_S":
  56. params.ftype = fileTypeQ5_K_S
  57. case "Q5_K_M":
  58. params.ftype = fileTypeQ5_K_M
  59. case "Q6_K":
  60. params.ftype = fileTypeQ6_K
  61. case "IQ2_XXS":
  62. params.ftype = fileTypeIQ2_XXS
  63. case "IQ2_XS":
  64. params.ftype = fileTypeIQ2_XS
  65. case "Q2_K_S":
  66. params.ftype = fileTypeQ2_K_S
  67. case "Q3_K_XS":
  68. params.ftype = fileTypeQ3_K_XS
  69. case "IQ3_XXS":
  70. params.ftype = fileTypeIQ3_XXS
  71. default:
  72. return fmt.Errorf("unknown filetype: %s", filetype)
  73. }
  74. if retval := C.llama_model_quantize(cinfile, coutfile, &params); retval != 0 {
  75. return fmt.Errorf("llama_model_quantize: %d", retval)
  76. }
  77. return nil
  78. }