11 months ago · fb9cdfa723
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -2335,9 +2335,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
 
				                 invalid_param = true;
			
 
				                 break;
			
 
				             }
			
 
				-#ifndef GGML_USE_CUBLAS
			
 
				-            fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. Setting the split mode has no effect.\n");
			
 
				-#endif // GGML_USE_CUBLAS
			
 
				+#ifndef GGML_USE_CUDA
			
 
				+            fprintf(stderr, "warning: llama.cpp was compiled without CUDA. Setting the split mode has no effect.\n");
			
 
				+#endif // GGML_USE_CUDA
			
 
				         }
			
 
				         else if (arg == "--tensor-split" || arg == "-ts")
			
 
				         {
			
@@ -2346,7 +2346,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
 
				                 invalid_param = true;
			
 
				                 break;
			
 
				             }
			
 
				-#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
			
 
				+#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
			
 
				             std::string arg_next = argv[i];
			
 
				 
			
 
				             // split string by , and /
			
@@ -2367,8 +2367,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
 
				                 }
			
 
				             }
			
 
				 #else
			
 
				-            LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n", {});
			
 
				-#endif // GGML_USE_CUBLAS
			
 
				+            LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a tensor split.\n", {});
			
 
				+#endif // GGML_USE_CUDA
			
 
				         }
			
 
				         else if (arg == "--main-gpu" || arg == "-mg")
			
 
				         {
			
@@ -2377,7 +2377,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
 
				                 invalid_param = true;
			
 
				                 break;
			
 
				             }
			
 
				-#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
			
 
				+#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
			
 
				             params.main_gpu = std::stoi(argv[i]);
			
 
				 #else
			
 
				             LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.", {});