Browse Source

enable metal gpu acceleration

ggml-metal.metal must be in the same directory as the ollama binary
otherwise llama.cpp will not be able to find it and load it.

1. go generate llama/llama_metal.go
2. go build .
3. ./ollama serve
Michael Yang 1 year ago
parent
commit
1b7183c5a1
3 changed files with 15 additions and 15 deletions
  1. 9 14
      llama/CMakeLists.txt
  2. 5 0
      llama/llama_metal.go
  3. 1 1
      server/routes.go

+ 9 - 14
llama/CMakeLists.txt

@@ -1,4 +1,6 @@
-cmake_minimum_required(VERSION 3.10) 
+cmake_minimum_required(VERSION 3.12)
+project(binding)
+
 include(FetchContent)
 
 FetchContent_Declare(
@@ -9,20 +11,13 @@ FetchContent_Declare(
 
 FetchContent_MakeAvailable(llama_cpp)
 
-if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    set(LLAMA_METAL ON)
-    add_compile_definitions(GGML_USE_METAL)
-endif()
-
-project(binding)
-
 add_library(binding ${CMAKE_CURRENT_SOURCE_DIR}/binding/binding.cpp ${llama_cpp_SOURCE_DIR}/examples/common.cpp)
-target_compile_features(binding PRIVATE cxx_std_11)
-target_include_directories(binding PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR})
 target_include_directories(binding PRIVATE ${llama_cpp_SOURCE_DIR}/examples)
 target_link_libraries(binding llama ggml_static)
 
-configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/ggml-metal.metal COPYONLY)
-add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libllama.a ${CMAKE_CURRENT_BINARY_DIR})
-add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different ${llama_cpp_BINARY_DIR}/libggml_static.a ${CMAKE_CURRENT_BINARY_DIR})
+if (LLAMA_METAL)
+    configure_file(${llama_cpp_SOURCE_DIR}/ggml-metal.metal ${CMAKE_CURRENT_BINARY_DIR}/../../ggml-metal.metal COPYONLY)
+endif()
+
+add_custom_target(copy_libllama ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:llama> ${CMAKE_CURRENT_BINARY_DIR})
+add_custom_target(copy_libggml_static ALL COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:ggml_static> ${CMAKE_CURRENT_BINARY_DIR})

+ 5 - 0
llama/llama_metal.go

@@ -0,0 +1,5 @@
+//go:build metal
+package llama
+
+//go:generate cmake -S . -B build --fresh -DLLAMA_METAL=on
+//go:generate cmake --build build

+ 1 - 1
server/routes.go

@@ -22,7 +22,7 @@ func pull(c *gin.Context) {
 
 func generate(c *gin.Context) {
 	// TODO: these should be request parameters
-	gpulayers := 0
+	gpulayers := 1
 	tokens := 512
 	threads := runtime.NumCPU()
 	// TODO: set prompt from template