123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578 |
- /**
- * llama.cpp - commit 081b29bd2a3d91e7772e3910ce223dd63b8d7d26 - do not edit this file
- *
- * MIT License
- *
- * Copyright (c) 2023-2024 The ggml authors
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #include "ggml-backend-impl.h"
- #include "ggml-backend.h"
- #include "ggml-impl.h"
- #include <algorithm>
- #include <codecvt>
- #include <cstring>
- #include <filesystem>
- #include <locale>
- #include <memory>
- #include <string>
- #include <type_traits>
- #include <vector>
- #ifdef _WIN32
- # define WIN32_LEAN_AND_MEAN
- # ifndef NOMINMAX
- # define NOMINMAX
- # endif
- # include <windows.h>
- #elif defined(__APPLE__)
- # include <mach-o/dyld.h>
- # include <dlfcn.h>
- #else
- # include <dlfcn.h>
- # include <unistd.h>
- #endif
- // Backend registry
- #ifdef GGML_USE_CPU
- #include "ggml-cpu.h"
- #endif
- #ifdef GGML_USE_CUDA
- #include "ggml-cuda.h"
- #endif
- #ifdef GGML_USE_METAL
- #include "ggml-metal.h"
- #endif
- #ifdef GGML_USE_SYCL
- #include "ggml-sycl.h"
- #endif
- #ifdef GGML_USE_VULKAN
- #include "ggml-vulkan.h"
- #endif
- #ifdef GGML_USE_OPENCL
- #include "ggml-opencl.h"
- #endif
- #ifdef GGML_USE_BLAS
- #include "ggml-blas.h"
- #endif
- #ifdef GGML_USE_RPC
- #include "ggml-rpc.h"
- #endif
- #ifdef GGML_USE_CANN
- #include "ggml-cann.h"
- #endif
- #ifdef GGML_USE_KOMPUTE
- #include "ggml-kompute.h"
- #endif
- #ifdef _WIN32
- using dl_handle = std::remove_pointer_t<HMODULE>;
- struct dl_handle_deleter {
- void operator()(HMODULE handle) {
- FreeLibrary(handle);
- }
- };
- static dl_handle * dl_load_library(const std::wstring & path) {
- // suppress error dialogs for missing DLLs
- DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
- SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
- HMODULE handle = LoadLibraryW(path.c_str());
- SetErrorMode(old_mode);
- return handle;
- }
- static dl_handle * dl_load_library(const std::string & path) {
- std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
- return dl_load_library(converter.from_bytes(path));
- }
- static void * dl_get_sym(dl_handle * handle, const char * name) {
- DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
- SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
- void * p = (void *) GetProcAddress(handle, name);
- SetErrorMode(old_mode);
- return p;
- }
- #else
- using dl_handle = void;
- struct dl_handle_deleter {
- void operator()(void * handle) {
- dlclose(handle);
- }
- };
- static void * dl_load_library(const std::string & path) {
- dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
- return handle;
- }
- static void * dl_get_sym(dl_handle * handle, const char * name) {
- return dlsym(handle, name);
- }
- #endif
- using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
- struct ggml_backend_reg_entry {
- ggml_backend_reg_t reg;
- dl_handle_ptr handle;
- };
- struct ggml_backend_registry {
- std::vector<ggml_backend_reg_entry> backends;
- std::vector<ggml_backend_dev_t> devices;
- ggml_backend_registry() {
- #ifdef GGML_USE_CUDA
- register_backend(ggml_backend_cuda_reg());
- #endif
- #ifdef GGML_USE_METAL
- register_backend(ggml_backend_metal_reg());
- #endif
- #ifdef GGML_USE_SYCL
- register_backend(ggml_backend_sycl_reg());
- #endif
- #ifdef GGML_USE_VULKAN
- register_backend(ggml_backend_vk_reg());
- #endif
- #ifdef GGML_USE_OPENCL
- register_backend(ggml_backend_opencl_reg());
- #endif
- #ifdef GGML_USE_CANN
- register_backend(ggml_backend_cann_reg());
- #endif
- #ifdef GGML_USE_BLAS
- register_backend(ggml_backend_blas_reg());
- #endif
- #ifdef GGML_USE_RPC
- register_backend(ggml_backend_rpc_reg());
- #endif
- #ifdef GGML_USE_KOMPUTE
- register_backend(ggml_backend_kompute_reg());
- #endif
- #ifdef GGML_USE_CPU
- register_backend(ggml_backend_cpu_reg());
- #endif
- }
- ~ggml_backend_registry() {
- // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
- // since backend threads may still be running and accessing resources from the dynamic library
- for (auto & entry : backends) {
- if (entry.handle) {
- entry.handle.release(); // NOLINT
- }
- }
- }
- void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
- if (!reg) {
- return;
- }
- #ifndef NDEBUG
- GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
- __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
- #endif
- backends.push_back({ reg, std::move(handle) });
- for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
- register_device(ggml_backend_reg_dev_get(reg, i));
- }
- }
- void register_device(ggml_backend_dev_t device) {
- #ifndef NDEBUG
- GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
- #endif
- devices.push_back(device);
- }
- ggml_backend_reg_t load_backend(const char * path, bool silent) {
- dl_handle_ptr handle { dl_load_library(path) };
- if (!handle) {
- if (!silent) {
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
- }
- return nullptr;
- }
- auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
- if (score_fn && score_fn() == 0) {
- if (!silent) {
- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
- }
- return nullptr;
- }
- auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
- if (!backend_init_fn) {
- if (!silent) {
- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
- }
- return nullptr;
- }
- ggml_backend_reg_t reg = backend_init_fn();
- if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
- if (!silent) {
- if (!reg) {
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
- } else {
- GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
- __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
- }
- }
- return nullptr;
- }
- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
- register_backend(reg, std::move(handle));
- return reg;
- }
- void unload_backend(ggml_backend_reg_t reg, bool silent) {
- auto it = std::find_if(backends.begin(), backends.end(),
- [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
- if (it == backends.end()) {
- if (!silent) {
- GGML_LOG_ERROR("%s: backend not found\n", __func__);
- }
- return;
- }
- if (!silent) {
- GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
- }
- // remove devices
- devices.erase(
- std::remove_if(devices.begin(), devices.end(),
- [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
- devices.end());
- // remove backend
- backends.erase(it);
- }
- };
- static ggml_backend_registry & get_reg() {
- static ggml_backend_registry reg;
- return reg;
- }
- // Internal API
- void ggml_backend_register(ggml_backend_reg_t reg) {
- get_reg().register_backend(reg);
- }
- void ggml_backend_device_register(ggml_backend_dev_t device) {
- get_reg().register_device(device);
- }
- // Backend (reg) enumeration
- static bool striequals(const char * a, const char * b) {
- for (; *a && *b; a++, b++) {
- if (std::tolower(*a) != std::tolower(*b)) {
- return false;
- }
- }
- return *a == *b;
- }
- size_t ggml_backend_reg_count() {
- return get_reg().backends.size();
- }
- ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
- GGML_ASSERT(index < ggml_backend_reg_count());
- return get_reg().backends[index].reg;
- }
- ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
- for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
- ggml_backend_reg_t reg = ggml_backend_reg_get(i);
- if (striequals(ggml_backend_reg_name(reg), name)) {
- return reg;
- }
- }
- return nullptr;
- }
- // Device enumeration
- size_t ggml_backend_dev_count() {
- return get_reg().devices.size();
- }
- ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
- GGML_ASSERT(index < ggml_backend_dev_count());
- return get_reg().devices[index];
- }
- ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
- for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
- ggml_backend_dev_t dev = ggml_backend_dev_get(i);
- if (striequals(ggml_backend_dev_name(dev), name)) {
- return dev;
- }
- }
- return nullptr;
- }
- ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
- for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
- ggml_backend_dev_t dev = ggml_backend_dev_get(i);
- if (ggml_backend_dev_type(dev) == type) {
- return dev;
- }
- }
- return nullptr;
- }
- // Convenience functions
- ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
- ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
- if (!dev) {
- return nullptr;
- }
- return ggml_backend_dev_init(dev, params);
- }
- ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
- ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
- if (!dev) {
- return nullptr;
- }
- return ggml_backend_dev_init(dev, params);
- }
- ggml_backend_t ggml_backend_init_best(void) {
- ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
- if (!dev) {
- dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
- }
- if (!dev) {
- return nullptr;
- }
- return ggml_backend_dev_init(dev, nullptr);
- }
- // Dynamic loading
- ggml_backend_reg_t ggml_backend_load(const char * path) {
- return get_reg().load_backend(path, false);
- }
- void ggml_backend_unload(ggml_backend_reg_t reg) {
- get_reg().unload_backend(reg, true);
- }
- static std::string get_executable_path() {
- #if defined(__APPLE__)
- // get executable path
- std::vector<char> path;
- uint32_t size;
- while (true) {
- size = path.size();
- if (_NSGetExecutablePath(path.data(), &size) == 0) {
- break;
- }
- path.resize(size);
- }
- std::string base_path(path.data(), size);
- // remove executable name
- auto last_slash = base_path.find_last_of('/');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- return base_path + "/";
- #elif defined(__linux__)
- std::string base_path = ".";
- std::vector<char> path(1024);
- while (true) {
- // get executable path
- ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
- if (len == -1) {
- break;
- }
- if (len < (ssize_t) path.size()) {
- base_path = std::string(path.data(), len);
- // remove executable name
- auto last_slash = base_path.find_last_of('/');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- break;
- }
- path.resize(path.size() * 2);
- }
- return base_path + "/";
- #elif defined(_WIN32)
- std::vector<char> path(MAX_PATH);
- DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
- if (len == 0) {
- return "";
- }
- std::string base_path(path.data(), len);
- // remove executable name
- auto last_slash = base_path.find_last_of('\\');
- if (last_slash != std::string::npos) {
- base_path = base_path.substr(0, last_slash);
- }
- return base_path + "\\";
- #endif
- }
- static std::string backend_filename_prefix() {
- #ifdef _WIN32
- return "ggml-";
- #else
- return "libggml-";
- #endif
- }
- static std::string backend_filename_suffix() {
- #ifdef _WIN32
- return ".dll";
- #else
- return ".so";
- #endif
- }
- static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
- // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
- // TODO: search system paths
- std::string file_prefix = backend_filename_prefix() + name + "-";
- std::vector<std::string> search_paths;
- if (user_search_path == nullptr) {
- search_paths.push_back("./");
- search_paths.push_back(get_executable_path());
- } else {
- #if defined(_WIN32)
- search_paths.push_back(std::string(user_search_path) + "\\");
- #else
- search_paths.push_back(std::string(user_search_path) + "/");
- #endif
- }
- int best_score = 0;
- std::string best_path;
- namespace fs = std::filesystem;
- for (const auto & search_path : search_paths) {
- if (!fs::exists(search_path)) {
- continue;
- }
- fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
- for (const auto & entry : dir_it) {
- if (entry.is_regular_file()) {
- std::string filename = entry.path().filename().string();
- std::string ext = entry.path().extension().string();
- if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
- dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
- if (!handle && !silent) {
- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
- }
- if (handle) {
- auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
- if (score_fn) {
- int s = score_fn();
- #ifndef NDEBUG
- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
- #endif
- if (s > best_score) {
- best_score = s;
- best_path = entry.path().string();
- }
- } else {
- if (!silent) {
- GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
- }
- }
- }
- }
- }
- }
- }
- if (best_score == 0) {
- // try to load the base backend
- for (const auto & search_path : search_paths) {
- std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
- if (fs::exists(path)) {
- return get_reg().load_backend(path.c_str(), silent);
- }
- }
- return nullptr;
- }
- return get_reg().load_backend(best_path.c_str(), silent);
- }
- void ggml_backend_load_all() {
- ggml_backend_load_all_from_path(nullptr);
- }
- void ggml_backend_load_all_from_path(const char * dir_path) {
- #ifdef NDEBUG
- bool silent = true;
- #else
- bool silent = false;
- #endif
- ggml_backend_load_best("blas", silent, dir_path);
- ggml_backend_load_best("cann", silent, dir_path);
- ggml_backend_load_best("cuda", silent, dir_path);
- ggml_backend_load_best("hip", silent, dir_path);
- ggml_backend_load_best("kompute", silent, dir_path);
- ggml_backend_load_best("metal", silent, dir_path);
- ggml_backend_load_best("rpc", silent, dir_path);
- ggml_backend_load_best("sycl", silent, dir_path);
- ggml_backend_load_best("vulkan", silent, dir_path);
- ggml_backend_load_best("opencl", silent, dir_path);
- ggml_backend_load_best("musa", silent, dir_path);
- ggml_backend_load_best("cpu", silent, dir_path);
- }
|