|
@@ -2335,9 +2335,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|
|
invalid_param = true;
|
|
|
break;
|
|
|
}
|
|
|
-#ifndef GGML_USE_CUBLAS
|
|
|
- fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting the split mode has no effect.\n");
|
|
|
-#endif // GGML_USE_CUBLAS
|
|
|
+#ifndef GGML_USE_CUDA
|
|
|
+ fprintf(stderr, "warning: llama.cpp was compiled without CUDA. Setting the split mode has no effect.\n");
|
|
|
+#endif // GGML_USE_CUDA
|
|
|
}
|
|
|
else if (arg == "--tensor-split" || arg == "-ts")
|
|
|
{
|
|
@@ -2346,7 +2346,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|
|
invalid_param = true;
|
|
|
break;
|
|
|
}
|
|
|
-#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
|
|
|
+#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
|
|
|
std::string arg_next = argv[i];
|
|
|
|
|
|
// split string by , and /
|
|
@@ -2367,8 +2367,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|
|
}
|
|
|
}
|
|
|
#else
|
|
|
- LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n", {});
|
|
|
-#endif // GGML_USE_CUBLAS
|
|
|
+ LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a tensor split.\n", {});
|
|
|
+#endif // GGML_USE_CUDA
|
|
|
}
|
|
|
else if (arg == "--main-gpu" || arg == "-mg")
|
|
|
{
|
|
@@ -2377,7 +2377,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
|
|
|
invalid_param = true;
|
|
|
break;
|
|
|
}
|
|
|
-#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
|
|
|
+#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
|
|
|
params.main_gpu = std::stoi(argv[i]);
|
|
|
#else
|
|
|
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.", {});
|