123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233 |
- package llm
- import (
- "compress/gzip"
- "errors"
- "fmt"
- "io"
- "io/fs"
- "log/slog"
- "os"
- "path/filepath"
- "runtime"
- "strings"
- "sync"
- "golang.org/x/exp/slices"
- "golang.org/x/sync/errgroup"
- "github.com/ollama/ollama/gpu"
- )
- // Libraries names may contain an optional variant separated by '_'
- // For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
- // Any library without a variant is the lowest common denominator
- var availableDynLibs = map[string]string{}
- const pathComponentCount = 7
- // getDynLibs returns an ordered list of LLM libraries to try, starting with the best
- func getDynLibs(gpuInfo gpu.GpuInfo) []string {
- // Short circuit if we know we're using the default built-in (darwin only)
- if gpuInfo.Library == "default" {
- return []string{"default"}
- }
- // TODO - temporary until we have multiple CPU variations for Darwin
- // Short circuit on darwin with metal only
- if len(availableDynLibs) == 1 {
- if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
- return []string{availableDynLibs["metal"]}
- }
- }
- exactMatch := ""
- dynLibs := []string{}
- altDynLibs := []string{}
- requested := gpuInfo.Library
- if gpuInfo.Variant != "" {
- requested += "_" + gpuInfo.Variant
- }
- // Try to find an exact match
- for cmp := range availableDynLibs {
- if requested == cmp {
- exactMatch = cmp
- dynLibs = []string{availableDynLibs[cmp]}
- break
- }
- }
- // Then for GPUs load alternates and sort the list for consistent load ordering
- if gpuInfo.Library != "cpu" {
- for cmp := range availableDynLibs {
- if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
- altDynLibs = append(altDynLibs, cmp)
- }
- }
- slices.Sort(altDynLibs)
- for _, altDynLib := range altDynLibs {
- dynLibs = append(dynLibs, availableDynLibs[altDynLib])
- }
- }
- // Load up the best CPU variant if not primary requested
- if gpuInfo.Library != "cpu" {
- variant := gpu.GetCPUVariant()
- // If no variant, then we fall back to default
- // If we have a variant, try that if we find an exact match
- // Attempting to run the wrong CPU instructions will panic the
- // process
- if variant != "" {
- for cmp := range availableDynLibs {
- if cmp == "cpu_"+variant {
- dynLibs = append(dynLibs, availableDynLibs[cmp])
- break
- }
- }
- } else {
- dynLibs = append(dynLibs, availableDynLibs["cpu"])
- }
- }
- // Finally, if we didn't find any matches, LCD CPU FTW
- if len(dynLibs) == 0 {
- dynLibs = []string{availableDynLibs["cpu"]}
- }
- slog.Debug(fmt.Sprintf("ordered list of LLM libraries to try %v", dynLibs))
- return dynLibs
- }
- func rocmDynLibPresent() bool {
- for dynLibName := range availableDynLibs {
- if strings.HasPrefix(dynLibName, "rocm") {
- return true
- }
- }
- return false
- }
- func nativeInit() error {
- payloadsDir, err := gpu.PayloadsDir()
- if err != nil {
- return err
- }
- slog.Info(fmt.Sprintf("Extracting dynamic libraries to %s ...", payloadsDir))
- libs, err := extractDynamicLibs(payloadsDir, "llama.cpp/build/*/*/*/lib/*")
- if err != nil {
- if errors.Is(err, payloadMissing) {
- slog.Info(fmt.Sprintf("%s", payloadMissing))
- return nil
- }
- return err
- }
- for _, lib := range libs {
- // The last dir component is the variant name
- variant := filepath.Base(filepath.Dir(lib))
- availableDynLibs[variant] = lib
- }
- if err := verifyDriverAccess(); err != nil {
- return err
- }
- // Report which dynamic libraries we have loaded to assist troubleshooting
- variants := make([]string, len(availableDynLibs))
- i := 0
- for variant := range availableDynLibs {
- variants[i] = variant
- i++
- }
- slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
- slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
- return nil
- }
- func extractDynamicLibs(payloadsDir, glob string) ([]string, error) {
- files, err := fs.Glob(libEmbed, glob)
- if err != nil || len(files) == 0 {
- return nil, payloadMissing
- }
- var mu sync.Mutex
- var libs []string
- var g errgroup.Group
- for _, file := range files {
- pathComps := strings.Split(file, "/")
- if len(pathComps) != pathComponentCount {
- slog.Error(fmt.Sprintf("unexpected payload components: %v", pathComps))
- continue
- }
- file := file
- g.Go(func() error {
- // llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
- // Include the variant in the path to avoid conflicts between multiple server libs
- targetDir := filepath.Join(payloadsDir, pathComps[pathComponentCount-3])
- srcFile, err := libEmbed.Open(file)
- if err != nil {
- return fmt.Errorf("read payload %s: %v", file, err)
- }
- defer srcFile.Close()
- if err := os.MkdirAll(targetDir, 0o755); err != nil {
- return fmt.Errorf("create payload lib dir %s: %v", payloadsDir, err)
- }
- src := io.Reader(srcFile)
- filename := file
- if strings.HasSuffix(file, ".gz") {
- src, err = gzip.NewReader(src)
- if err != nil {
- return fmt.Errorf("decompress payload %s: %v", file, err)
- }
- filename = strings.TrimSuffix(filename, ".gz")
- }
- destFile := filepath.Join(targetDir, filepath.Base(filename))
- if strings.Contains(destFile, "server") {
- mu.Lock()
- libs = append(libs, destFile)
- mu.Unlock()
- }
- destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
- if err != nil {
- return fmt.Errorf("write payload %s: %v", file, err)
- }
- defer destFp.Close()
- if _, err := io.Copy(destFp, src); err != nil {
- return fmt.Errorf("copy payload %s: %v", file, err)
- }
- return nil
- })
- }
- err = g.Wait()
- if err != nil {
- // If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted
- gpu.Cleanup()
- return nil, err
- }
- return libs, nil
- }
- func verifyDriverAccess() error {
- if runtime.GOOS != "linux" {
- return nil
- }
- // Only check ROCm access if we have the dynamic lib loaded
- if rocmDynLibPresent() {
- // Verify we have permissions - either running as root, or we have group access to the driver
- fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
- if err != nil {
- if errors.Is(err, fs.ErrPermission) {
- return fmt.Errorf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
- } else if errors.Is(err, fs.ErrNotExist) {
- // expected behavior without a radeon card
- return nil
- }
- return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
- }
- fd.Close()
- }
- return nil
- }
|