ggml-backend-reg.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. /**
  2. * llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
  3. *
  4. * MIT License
  5. *
  6. * Copyright (c) 2023-2024 The ggml authors
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all
  16. * copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. * SOFTWARE.
  25. */
  26. #include "ggml-backend-impl.h"
  27. #include "ggml-backend.h"
  28. #include "ggml-impl.h"
  29. #include <algorithm>
  30. #include <codecvt>
  31. #include <cstring>
  32. #include <filesystem>
  33. #include <locale>
  34. #include <memory>
  35. #include <string>
  36. #include <type_traits>
  37. #include <vector>
  38. #ifdef _WIN32
  39. # define WIN32_LEAN_AND_MEAN
  40. # ifndef NOMINMAX
  41. # define NOMINMAX
  42. # endif
  43. # include <windows.h>
  44. #elif defined(__APPLE__)
  45. # include <mach-o/dyld.h>
  46. # include <dlfcn.h>
  47. #else
  48. # include <dlfcn.h>
  49. # include <unistd.h>
  50. #endif
  51. // Backend registry
  52. #ifdef GGML_USE_CPU
  53. #include "ggml-cpu.h"
  54. #endif
  55. #ifdef GGML_USE_CUDA
  56. #include "ggml-cuda.h"
  57. #endif
  58. #ifdef GGML_USE_METAL
  59. #include "ggml-metal.h"
  60. #endif
  61. #ifdef GGML_USE_SYCL
  62. #include "ggml-sycl.h"
  63. #endif
  64. #ifdef GGML_USE_VULKAN
  65. #include "ggml-vulkan.h"
  66. #endif
  67. #ifdef GGML_USE_BLAS
  68. #include "ggml-blas.h"
  69. #endif
  70. #ifdef GGML_USE_RPC
  71. #include "ggml-rpc.h"
  72. #endif
  73. #ifdef GGML_USE_CANN
  74. #include "ggml-cann.h"
  75. #endif
  76. #ifdef GGML_USE_KOMPUTE
  77. #include "ggml-kompute.h"
  78. #endif
  79. #ifdef _WIN32
  80. using dl_handle = std::remove_pointer_t<HMODULE>;
  81. struct dl_handle_deleter {
  82. void operator()(HMODULE handle) {
  83. FreeLibrary(handle);
  84. }
  85. };
  86. static dl_handle * dl_load_library(const std::wstring & path) {
  87. // suppress error dialogs for missing DLLs
  88. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  89. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  90. HMODULE handle = LoadLibraryW(path.c_str());
  91. SetErrorMode(old_mode);
  92. return handle;
  93. }
  94. static dl_handle * dl_load_library(const std::string & path) {
  95. std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
  96. return dl_load_library(converter.from_bytes(path));
  97. }
  98. static void * dl_get_sym(dl_handle * handle, const char * name) {
  99. DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
  100. SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
  101. void * p = (void *) GetProcAddress(handle, name);
  102. SetErrorMode(old_mode);
  103. return p;
  104. }
  105. #else
  106. using dl_handle = void;
  107. struct dl_handle_deleter {
  108. void operator()(void * handle) {
  109. dlclose(handle);
  110. }
  111. };
  112. static void * dl_load_library(const std::string & path) {
  113. dl_handle * handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
  114. return handle;
  115. }
  116. static void * dl_get_sym(dl_handle * handle, const char * name) {
  117. return dlsym(handle, name);
  118. }
  119. #endif
  120. using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
  121. struct ggml_backend_reg_entry {
  122. ggml_backend_reg_t reg;
  123. dl_handle_ptr handle;
  124. };
  125. struct ggml_backend_registry {
  126. std::vector<ggml_backend_reg_entry> backends;
  127. std::vector<ggml_backend_dev_t> devices;
  128. ggml_backend_registry() {
  129. #ifdef GGML_USE_CUDA
  130. register_backend(ggml_backend_cuda_reg());
  131. #endif
  132. #ifdef GGML_USE_METAL
  133. register_backend(ggml_backend_metal_reg());
  134. #endif
  135. #ifdef GGML_USE_SYCL
  136. register_backend(ggml_backend_sycl_reg());
  137. #endif
  138. #ifdef GGML_USE_VULKAN
  139. register_backend(ggml_backend_vk_reg());
  140. #endif
  141. #ifdef GGML_USE_CANN
  142. register_backend(ggml_backend_cann_reg());
  143. #endif
  144. #ifdef GGML_USE_BLAS
  145. register_backend(ggml_backend_blas_reg());
  146. #endif
  147. #ifdef GGML_USE_RPC
  148. register_backend(ggml_backend_rpc_reg());
  149. #endif
  150. #ifdef GGML_USE_KOMPUTE
  151. register_backend(ggml_backend_kompute_reg());
  152. #endif
  153. #ifdef GGML_USE_CPU
  154. register_backend(ggml_backend_cpu_reg());
  155. #endif
  156. }
  157. ~ggml_backend_registry() {
  158. // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources,
  159. // since backend threads may still be running and accessing resources from the dynamic library
  160. for (auto & entry : backends) {
  161. if (entry.handle) {
  162. entry.handle.release(); // NOLINT
  163. }
  164. }
  165. }
  166. void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
  167. if (!reg) {
  168. return;
  169. }
  170. #ifndef NDEBUG
  171. GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
  172. __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
  173. #endif
  174. backends.push_back({ reg, std::move(handle) });
  175. for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
  176. register_device(ggml_backend_reg_dev_get(reg, i));
  177. }
  178. }
  179. void register_device(ggml_backend_dev_t device) {
  180. #ifndef NDEBUG
  181. GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
  182. #endif
  183. devices.push_back(device);
  184. }
  185. ggml_backend_reg_t load_backend(const char * path, bool silent) {
  186. dl_handle_ptr handle { dl_load_library(path) };
  187. if (!handle) {
  188. if (!silent) {
  189. GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path);
  190. }
  191. return nullptr;
  192. }
  193. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  194. if (score_fn && score_fn() == 0) {
  195. if (!silent) {
  196. GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path);
  197. }
  198. return nullptr;
  199. }
  200. auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init");
  201. if (!backend_init_fn) {
  202. if (!silent) {
  203. GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path);
  204. }
  205. return nullptr;
  206. }
  207. ggml_backend_reg_t reg = backend_init_fn();
  208. if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
  209. if (!silent) {
  210. if (!reg) {
  211. GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
  212. } else {
  213. GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
  214. __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
  215. }
  216. }
  217. return nullptr;
  218. }
  219. GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
  220. register_backend(reg, std::move(handle));
  221. return reg;
  222. }
  223. void unload_backend(ggml_backend_reg_t reg, bool silent) {
  224. auto it = std::find_if(backends.begin(), backends.end(),
  225. [reg](const ggml_backend_reg_entry & entry) { return entry.reg == reg; });
  226. if (it == backends.end()) {
  227. if (!silent) {
  228. GGML_LOG_ERROR("%s: backend not found\n", __func__);
  229. }
  230. return;
  231. }
  232. if (!silent) {
  233. GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
  234. }
  235. // remove devices
  236. devices.erase(
  237. std::remove_if(devices.begin(), devices.end(),
  238. [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
  239. devices.end());
  240. // remove backend
  241. backends.erase(it);
  242. }
  243. };
  244. static ggml_backend_registry & get_reg() {
  245. static ggml_backend_registry reg;
  246. return reg;
  247. }
  248. // Internal API
  249. void ggml_backend_register(ggml_backend_reg_t reg) {
  250. get_reg().register_backend(reg);
  251. }
  252. void ggml_backend_device_register(ggml_backend_dev_t device) {
  253. get_reg().register_device(device);
  254. }
  255. // Backend (reg) enumeration
  256. static bool striequals(const char * a, const char * b) {
  257. for (; *a && *b; a++, b++) {
  258. if (std::tolower(*a) != std::tolower(*b)) {
  259. return false;
  260. }
  261. }
  262. return *a == *b;
  263. }
  264. size_t ggml_backend_reg_count() {
  265. return get_reg().backends.size();
  266. }
  267. ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
  268. GGML_ASSERT(index < ggml_backend_reg_count());
  269. return get_reg().backends[index].reg;
  270. }
  271. ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
  272. for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
  273. ggml_backend_reg_t reg = ggml_backend_reg_get(i);
  274. if (striequals(ggml_backend_reg_name(reg), name)) {
  275. return reg;
  276. }
  277. }
  278. return nullptr;
  279. }
  280. // Device enumeration
  281. size_t ggml_backend_dev_count() {
  282. return get_reg().devices.size();
  283. }
  284. ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
  285. GGML_ASSERT(index < ggml_backend_dev_count());
  286. return get_reg().devices[index];
  287. }
  288. ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
  289. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  290. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  291. if (striequals(ggml_backend_dev_name(dev), name)) {
  292. return dev;
  293. }
  294. }
  295. return nullptr;
  296. }
  297. ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
  298. for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
  299. ggml_backend_dev_t dev = ggml_backend_dev_get(i);
  300. if (ggml_backend_dev_type(dev) == type) {
  301. return dev;
  302. }
  303. }
  304. return nullptr;
  305. }
  306. // Convenience functions
  307. ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
  308. ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
  309. if (!dev) {
  310. return nullptr;
  311. }
  312. return ggml_backend_dev_init(dev, params);
  313. }
  314. ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
  315. ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
  316. if (!dev) {
  317. return nullptr;
  318. }
  319. return ggml_backend_dev_init(dev, params);
  320. }
  321. ggml_backend_t ggml_backend_init_best(void) {
  322. ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
  323. if (!dev) {
  324. dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
  325. }
  326. if (!dev) {
  327. return nullptr;
  328. }
  329. return ggml_backend_dev_init(dev, nullptr);
  330. }
  331. // Dynamic loading
  332. ggml_backend_reg_t ggml_backend_load(const char * path) {
  333. return get_reg().load_backend(path, false);
  334. }
  335. void ggml_backend_unload(ggml_backend_reg_t reg) {
  336. get_reg().unload_backend(reg, true);
  337. }
  338. static std::string get_executable_path() {
  339. #if defined(__APPLE__)
  340. // get executable path
  341. std::vector<char> path;
  342. uint32_t size;
  343. while (true) {
  344. size = path.size();
  345. if (_NSGetExecutablePath(path.data(), &size) == 0) {
  346. break;
  347. }
  348. path.resize(size);
  349. }
  350. std::string base_path(path.data(), size);
  351. // remove executable name
  352. auto last_slash = base_path.find_last_of('/');
  353. if (last_slash != std::string::npos) {
  354. base_path = base_path.substr(0, last_slash);
  355. }
  356. return base_path + "/";
  357. #elif defined(__linux__)
  358. std::string base_path = ".";
  359. std::vector<char> path(1024);
  360. while (true) {
  361. // get executable path
  362. ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
  363. if (len == -1) {
  364. break;
  365. }
  366. if (len < (ssize_t) path.size()) {
  367. base_path = std::string(path.data(), len);
  368. // remove executable name
  369. auto last_slash = base_path.find_last_of('/');
  370. if (last_slash != std::string::npos) {
  371. base_path = base_path.substr(0, last_slash);
  372. }
  373. break;
  374. }
  375. path.resize(path.size() * 2);
  376. }
  377. return base_path + "/";
  378. #elif defined(_WIN32)
  379. std::vector<char> path(MAX_PATH);
  380. DWORD len = GetModuleFileNameA(NULL, path.data(), path.size());
  381. if (len == 0) {
  382. return "";
  383. }
  384. std::string base_path(path.data(), len);
  385. // remove executable name
  386. auto last_slash = base_path.find_last_of('\\');
  387. if (last_slash != std::string::npos) {
  388. base_path = base_path.substr(0, last_slash);
  389. }
  390. return base_path + "\\";
  391. #endif
  392. }
  393. static std::string backend_filename_prefix() {
  394. #ifdef _WIN32
  395. return "ggml-";
  396. #else
  397. return "libggml-";
  398. #endif
  399. }
  400. static std::string backend_filename_suffix() {
  401. #ifdef _WIN32
  402. return ".dll";
  403. #else
  404. return ".so";
  405. #endif
  406. }
  407. static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent) {
  408. // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
  409. // TODO: search system paths
  410. std::vector<std::string> search_paths = { "./", get_executable_path() };
  411. std::string file_prefix = backend_filename_prefix() + name + "-";
  412. int best_score = 0;
  413. std::string best_path;
  414. namespace fs = std::filesystem;
  415. for (const auto & search_path : search_paths) {
  416. if (!fs::exists(search_path)) {
  417. continue;
  418. }
  419. for (const auto & entry : fs::directory_iterator(search_path)) {
  420. if (entry.is_regular_file()) {
  421. std::string filename = entry.path().filename().string();
  422. std::string ext = entry.path().extension().string();
  423. if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
  424. dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
  425. if (!handle && !silent) {
  426. GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
  427. }
  428. if (handle) {
  429. auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
  430. if (score_fn) {
  431. int s = score_fn();
  432. #ifndef NDEBUG
  433. GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
  434. #endif
  435. if (s > best_score) {
  436. best_score = s;
  437. best_path = entry.path().string();
  438. }
  439. }
  440. }
  441. }
  442. }
  443. }
  444. }
  445. if (best_score == 0) {
  446. // try to load the base backend
  447. for (const auto & search_path : search_paths) {
  448. std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
  449. if (fs::exists(path)) {
  450. return get_reg().load_backend(path.c_str(), silent);
  451. }
  452. }
  453. return nullptr;
  454. }
  455. return get_reg().load_backend(best_path.c_str(), silent);
  456. }
  457. void ggml_backend_load_all() {
  458. ggml_backend_load_best("blas", true);
  459. ggml_backend_load_best("cann", true);
  460. ggml_backend_load_best("cuda", true);
  461. ggml_backend_load_best("hip", true);
  462. ggml_backend_load_best("kompute", true);
  463. ggml_backend_load_best("metal", true);
  464. ggml_backend_load_best("rpc", true);
  465. ggml_backend_load_best("sycl", true);
  466. ggml_backend_load_best("vulkan", true);
  467. ggml_backend_load_best("musa", true);
  468. ggml_backend_load_best("cpu", true);
  469. }