Browse Source

Code shuffle to clean up the llm dir

Daniel Hiltgen 1 year ago
parent
commit
77d96da94b

+ 1 - 1
.dockerignore

@@ -2,7 +2,7 @@
 ollama
 app
 dist
-llm/llama.cpp/gguf
+llm/llama.cpp
 .env
 .cache
 test_data

+ 1 - 1
.gitmodules

@@ -1,5 +1,5 @@
 [submodule "llm/llama.cpp/gguf"]
-    path = llm/llama.cpp/gguf
+	path = llm/llama.cpp/gguf
     url = https://github.com/ggerganov/llama.cpp.git
     ignore = dirty
     shallow = true

+ 1 - 1
llm/llama.cpp/CMakeLists.txt → llm/ext_server/CMakeLists.txt

@@ -2,7 +2,7 @@
 
 set(TARGET ext_server)
 option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
-add_library(${TARGET} STATIC ../../../ext_server.cpp)
+add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp)
 target_include_directories(${TARGET} PRIVATE ../../common)
 target_include_directories(${TARGET} PRIVATE ../..)
 target_include_directories(${TARGET} PRIVATE ../../..)

+ 4 - 0
llm/ext_server/README.md

@@ -0,0 +1,4 @@
+# Extern C Server
+
+This directory contains a thin facade we layer on top of the Llama.cpp server
+to expose `extern C` interfaces to access the functionality through direct API calls in-process

+ 0 - 0
llm/llama.cpp/ext_server.cpp → llm/ext_server/ext_server.cpp


+ 0 - 0
llm/llama.cpp/ext_server.h → llm/ext_server/ext_server.h


+ 9 - 9
llm/ext_server_common.go

@@ -1,7 +1,7 @@
 package llm
 
 /*
-#cgo CFLAGS: -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/gguf -I${SRCDIR}/llama.cpp/gguf/common -I${SRCDIR}/llama.cpp/gguf/examples/server
+#cgo CFLAGS: -I${SRCDIR}/ext_server -I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/common -I${SRCDIR}/llama.cpp/examples/server
 #cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 #cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
 #cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
@@ -10,17 +10,17 @@ package llm
 #cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
 #cgo darwin LDFLAGS: -lc++ -framework Accelerate
 #cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libcommon.a
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libext_server.a
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libllama.a
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/darwin/metal/lib/libggml_static.a
+#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libcommon.a
+#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libext_server.a
+#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
+#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
 #cgo linux CFLAGS: -D_GNU_SOURCE
 #cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
 #cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libext_server.a
-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libcommon.a
-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libllama.a
-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/linux/cpu/lib/libggml_static.a
+#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libext_server.a
+#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libcommon.a
+#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libllama.a
+#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libggml_static.a
 #cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
 #cgo linux windows LDFLAGS: -lpthread
 

+ 7 - 7
llm/llama.cpp/gen_common.sh → llm/generate/gen_common.sh

@@ -1,7 +1,7 @@
 # common logic accross linux and darwin
 
 init_vars() {
-    LLAMACPP_DIR=gguf
+    LLAMACPP_DIR=../llama.cpp
     PATCHES="0001-Expose-callable-API-for-server.patch"
     CMAKE_DEFS=""
     CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
@@ -19,18 +19,18 @@ git_module_setup() {
         return
     fi
     git submodule init
-    git submodule update --force gguf
+    git submodule update --force ${LLAMACPP_DIR}
 
 }
 
 apply_patches() {
     # Wire up our CMakefile
-    if ! grep ollama gguf/examples/server/CMakeLists.txt; then
-        echo 'include (../../../CMakeLists.txt) # ollama' >>gguf/examples/server/CMakeLists.txt
+    if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
+        echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
     fi
     # Avoid duplicate main symbols when we link into the cgo binary
-    sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&
-        mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp
+    sed -e 's/int main(/int __main(/g' <${LLAMACPP_DIR}/examples/server/server.cpp >${LLAMACPP_DIR}/examples/server/server.cpp.tmp &&
+        mv ${LLAMACPP_DIR}/examples/server/server.cpp.tmp ${LLAMACPP_DIR}/examples/server/server.cpp
 }
 
 build() {
@@ -49,5 +49,5 @@ install() {
 
 # Keep the local tree clean after we're done with the build
 cleanup() {
-    (cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
+    (cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
 }

+ 2 - 2
llm/llama.cpp/gen_darwin.sh → llm/generate/gen_darwin.sh

@@ -1,6 +1,6 @@
 #!/bin/bash
 # This script is intended to run inside the go generate
-# working directory must be ../llm/llama.cpp
+# working directory must be ./llm/generate/
 
 # TODO - add hardening to detect missing tools (cmake, etc.)
 
@@ -10,7 +10,7 @@ echo "Starting darwin generate script"
 source $(dirname $0)/gen_common.sh
 init_vars
 CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
-BUILD_DIR="gguf/build/darwin/metal"
+BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
 case "${GOARCH}" in
 "amd64")
     CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"

+ 4 - 4
llm/llama.cpp/gen_linux.sh → llm/generate/gen_linux.sh

@@ -1,6 +1,6 @@
 #!/bin/bash
 # This script is intended to run inside the go generate
-# working directory must be llm/llama.cpp
+# working directory must be llm/generate/
 
 # First we build our default built-in library which will be linked into the CGO
 # binary as a normal dependency. This default build is CPU based.
@@ -52,7 +52,7 @@ apply_patches
 # CPU first for the default library
 #
 CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
-BUILD_DIR="gguf/build/linux/cpu"
+BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
 
 build
 install
@@ -64,7 +64,7 @@ if [ -d /usr/local/cuda/lib64/ ]; then
     echo "CUDA libraries detected - building dynamic CUDA library"
     init_vars
     CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
-    BUILD_DIR="gguf/build/linux/cuda"
+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda"
     CUDA_LIB_DIR=/usr/local/cuda/lib64
     build
     install
@@ -98,7 +98,7 @@ if [ -d "${ROCM_PATH}" ]; then
     echo "ROCm libraries detected - building dynamic ROCm library"
     init_vars
     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
-    BUILD_DIR="gguf/build/linux/rocm"
+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm"
     build
     install
     gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \

+ 12 - 11
llm/llama.cpp/gen_windows.ps1 → llm/generate/gen_windows.ps1

@@ -3,6 +3,7 @@
 $ErrorActionPreference = "Stop"
 
 function init_vars {
+    $script:llamacppDir = "../llama.cpp"
     $script:patches = @("0001-Expose-callable-API-for-server.patch")
     $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
     $script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
@@ -19,25 +20,25 @@ function git_module_setup {
     # TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
     & git submodule init
     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
-    & git submodule update --force gguf
+    & git submodule update --force "${script:llamacppDir}"
     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
 }
 
 function apply_patches {
     # Wire up our CMakefile
-    if (!(Select-String -Path "gguf/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
-        Add-Content -Path "gguf/examples/server/CMakeLists.txt" -Value 'include (../../../CMakeLists.txt) # ollama'
+    if (!(Select-String -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
+        Add-Content -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Value 'include (../../../ext_server/CMakeLists.txt) # ollama'
     }
     # Avoid duplicate main symbols when we link into the cgo binary
-    $content = Get-Content -Path "./gguf/examples/server/server.cpp"
+    $content = Get-Content -Path "${script:llamacppDir}/examples/server/server.cpp"
     $content = $content -replace 'int main\(', 'int __main('
-    Set-Content -Path "./gguf/examples/server/server.cpp" -Value $content
+    Set-Content -Path "${script:llamacppDir}/examples/server/server.cpp" -Value $content
 }
 
 function build {
-    write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs"
+    write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
     & cmake --version
-    & cmake -S gguf -B $script:buildDir $script:cmakeDefs
+    & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
     write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
     & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
@@ -55,7 +56,7 @@ function install {
 }
 
 function cleanup {
-    Set-Location "gguf/examples/server"
+    Set-Location "${script:llamacppDir}/examples/server"
     git checkout CMakeLists.txt server.cpp
 }
 
@@ -64,20 +65,20 @@ git_module_setup
 apply_patches
 
 # first build CPU based
-$script:buildDir="gguf/build/windows/cpu"
+$script:buildDir="${script:llamacppDir}/build/windows/cpu"
 
 build
 install
 
 # Then build cuda as a dynamically loaded library
 init_vars
-$script:buildDir="gguf/build/windows/cuda"
+$script:buildDir="${script:llamacppDir}/build/windows/cuda"
 $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
 build
 install
 
 # TODO - actually implement ROCm support on windows
-$script:buildDir="gguf/build/windows/rocm"
+$script:buildDir="${script:llamacppDir}/build/windows/rocm"
 
 rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
 md "${script:buildDir}/lib" -ea 0 > $null

+ 1 - 1
llm/llama.cpp/generate_darwin.go → llm/generate/generate_darwin.go

@@ -1,3 +1,3 @@
-package llm
+package generate
 
 //go:generate sh ./gen_darwin.sh

+ 1 - 1
llm/llama.cpp/generate_linux.go → llm/generate/generate_linux.go

@@ -1,3 +1,3 @@
-package llm
+package generate
 
 //go:generate bash ./gen_linux.sh

+ 1 - 1
llm/llama.cpp/generate_windows.go → llm/generate/generate_windows.go

@@ -1,3 +1,3 @@
-package llm
+package generate
 
 //go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1

+ 2 - 2
llm/shim_darwin.go

@@ -13,7 +13,7 @@ import (
 	"github.com/jmorganca/ollama/api"
 )
 
-//go:embed llama.cpp/gguf/ggml-metal.metal
+//go:embed llama.cpp/ggml-metal.metal
 var libEmbed embed.FS
 
 func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
@@ -22,7 +22,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
 }
 
 func nativeInit(workdir string) error {
-	err := extractPayloadFiles(workdir, "llama.cpp/gguf/ggml-metal.metal")
+	err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
 	if err != nil {
 		if err == payloadMissing {
 			// TODO perhaps consider this a hard failure on arm macs?

+ 6 - 4
llm/shim_ext_server.go

@@ -34,6 +34,8 @@ type shimExtServer struct {
 var shimMutex sync.Mutex
 var llm *shimExtServer
 
+const pathComponentCount = 6
+
 func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
 	C.dynamic_shim_llama_server_init(llm.s, sparams, err)
 }
@@ -112,7 +114,7 @@ func (llm *shimExtServer) Close() {
 }
 
 func nativeInit(workdir string) error {
-	libs, err := extractDynamicLibs(workdir, "llama.cpp/gguf/build/*/*/lib/*")
+	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
 	if err != nil {
 		if err == payloadMissing {
 			log.Printf("%s", payloadMissing)
@@ -151,13 +153,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 
 	for _, file := range files {
 		pathComps := strings.Split(file, "/")
-		if len(pathComps) != 7 {
+		if len(pathComps) != pathComponentCount {
 			log.Printf("unexpected payload components: %v", pathComps)
 			continue
 		}
-		// llama.cpp/gguf/build/$OS/$VARIANT/lib/$LIBRARY
+		// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
 		// Include the variant in the path to avoid conflicts between multiple server libs
-		targetDir := filepath.Join(workDir, pathComps[4])
+		targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
 		srcFile, err := libEmbed.Open(file)
 		if err != nil {
 			return nil, fmt.Errorf("read payload %s: %v", file, err)

+ 1 - 1
llm/shim_ext_server_linux.go

@@ -10,7 +10,7 @@ import (
 	"strings"
 )
 
-//go:embed llama.cpp/gguf/build/*/*/lib/*.so
+//go:embed llama.cpp/build/*/*/lib/*.so
 var libEmbed embed.FS
 
 func updatePath(dir string) {

+ 1 - 1
llm/shim_ext_server_windows.go

@@ -8,7 +8,7 @@ import (
 	"strings"
 )
 
-//go:embed llama.cpp/gguf/build/windows/*/lib/*.dll
+//go:embed llama.cpp/build/windows/*/lib/*.dll
 var libEmbed embed.FS
 
 func updatePath(dir string) {

+ 0 - 0
llm/llama.cpp/gguf → tmp