123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- package llm
- // #cgo CFLAGS: -Illama.cpp -Illama.cpp/include -Illama.cpp/ggml/include
- // #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/src/libllama.a ${SRCDIR}/build/darwin/arm64_static/ggml/src/libggml.a -framework Accelerate -lstdc++
- // #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/src/libllama.a ${SRCDIR}/build/darwin/x86_64_static/ggml/src/libggml.a -framework Accelerate -lstdc++
- // #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/src/libllama.a ${SRCDIR}/build/windows/amd64_static/ggml/src/libggml.a -static -lstdc++
- // #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/src/libllama.a ${SRCDIR}/build/windows/arm64_static/ggml/src/libggml.a -static -lstdc++
- // #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/src/libllama.a ${SRCDIR}/build/linux/x86_64_static/ggml/src/libggml.a -lstdc++
- // #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/src/libllama.a ${SRCDIR}/build/linux/arm64_static/ggml/src/libggml.a -lstdc++
- // #include <stdlib.h>
- // #include "llama.h"
- import "C"
- import (
- "fmt"
- "log/slog"
- "unsafe"
- )
- // SystemInfo is an unused example of calling llama.cpp functions using CGo
- func SystemInfo() string {
- return C.GoString(C.llama_print_system_info())
- }
- func Quantize(infile, outfile string, ftype fileType) error {
- cinfile := C.CString(infile)
- defer C.free(unsafe.Pointer(cinfile))
- coutfile := C.CString(outfile)
- defer C.free(unsafe.Pointer(coutfile))
- params := C.llama_model_quantize_default_params()
- params.nthread = -1
- params.ftype = ftype.Value()
- if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
- return fmt.Errorf("llama_model_quantize: %d", rc)
- }
- return nil
- }
- type loadedModel struct {
- model *C.struct_llama_model
- }
- func loadModel(modelfile string, vocabOnly bool) (*loadedModel, error) {
- // TODO figure out how to quiet down the logging so we don't have 2 copies of the model metadata showing up
- slog.Info("XXX initializing default model params")
- params := C.llama_model_default_params()
- params.vocab_only = C.bool(vocabOnly)
- cmodelfile := C.CString(modelfile)
- defer C.free(unsafe.Pointer(cmodelfile))
- slog.Info("XXX loading model", "model", modelfile)
- model := C.llama_load_model_from_file(cmodelfile, params)
- if model == nil {
- return nil, fmt.Errorf("failed to load model %s", modelfile)
- }
- return &loadedModel{model}, nil
- }
- func freeModel(model *loadedModel) {
- C.llama_free_model(model.model)
- }
- func tokenize(model *loadedModel, content string) ([]int, error) {
- ccontent := C.CString(content)
- defer C.free(unsafe.Pointer(ccontent))
- len := len(content) + 2
- tokens := make([]C.int32_t, len)
- tokenCount := C.llama_tokenize(model.model, ccontent, C.int32_t(len), &tokens[0], C.int32_t(len), true, true)
- if tokenCount < 0 {
- slog.Info("XXX got negative response", "count", tokenCount)
- tokens = make([]C.int32_t, int(tokenCount))
- tokenCount = C.llama_tokenize(model.model, ccontent, C.int32_t(len), &tokens[0], tokenCount, true, true)
- } else if tokenCount == 0 {
- return nil, nil
- }
- ret := make([]int, tokenCount)
- for i := range int(tokenCount) {
- ret[i] = int(tokens[i])
- }
- slog.Debug("XXX tokenized", "tokens", tokens, "content", content)
- return ret, nil
- }
- func detokenize(model *loadedModel, tokens []int) string {
- slog.Info("XXX in CGO detokenize")
- var resp string
- for _, token := range tokens {
- buf := make([]C.char, 8)
- nTokens := C.llama_token_to_piece(model.model, C.int(token), &buf[0], 8, 0, true)
- if nTokens < 0 {
- buf = make([]C.char, -nTokens)
- nTokens = C.llama_token_to_piece(model.model, C.int(token), &buf[0], -nTokens, 0, true)
- }
- tokString := C.GoStringN(&buf[0], nTokens)
- resp += tokString
- }
- slog.Debug("XXX detokenized", "tokens", tokens, "content", resp)
- return resp
- }
|