10 月之前 · 8ed51cac37
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -178,7 +178,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
 
				         CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
			
 
				         echo "Building custom CUDA GPU"
			
 
				     else
			
 
				-        CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DLLAMA_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
			
 
				+        CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DLLAMA_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
			
 
				     fi
			
 
				     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}"
			
 
				     BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
			
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -1,5 +1,7 @@
 
				 #!powershell
			
 
				 
			
 
				+$ErrorActionPreference = "Stop"
			
 
				+
			
 
				 function amdGPUs {
			
 
				     if ($env:AMDGPU_TARGETS) {
			
 
				         return $env:AMDGPU_TARGETS
			
@@ -84,9 +86,9 @@ function init_vars {
 
				 function git_module_setup {
			
 
				     # TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
			
 
				     & git submodule init
			
 
				-    if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)}
			
 
				+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
			
 
				     & git submodule update --force "${script:llamacppDir}"
			
 
				-    if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)}
			
 
				+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
			
 
				 }
			
 
				 
			
 
				 function apply_patches {
			
@@ -120,7 +122,7 @@ function build {
 
				     write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
			
 
				     & cmake --version
			
 
				     & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
			
 
				-    if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)}
			
 
				+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
			
 
				     if ($cmakeDefs -contains "-G") {
			
 
				         $extra=@("-j8")
			
 
				     } else {
			
@@ -128,7 +130,7 @@ function build {
 
				     }
			
 
				     write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
			
 
				     & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
			
 
				-    if ($LASTEXITCODE -ne 0) { write-host "cmake build exit status $LASTEXITCODE"; throw($LASTEXITCODE)}
			
 
				+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
			
 
				     # Rearrange output to be consistent between different generators
			
 
				     if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) {
			
 
				         mv -force "${script:buildDir}/bin/${script:config}/*" "${script:buildDir}/bin/"
			
@@ -142,7 +144,7 @@ function sign {
 
				         foreach ($file in @(get-childitem "${script:buildDir}/bin/*.exe") + @(get-childitem "${script:buildDir}/bin/*.dll")){
			
 
				             & "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
			
 
				                 /csp "Google Cloud KMS Provider" /kc "${env:KEY_CONTAINER}" $file
			
 
				-            if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)}
			
 
				+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
			
 
				         }
			
 
				     }
			
 
				 }
			
@@ -218,13 +220,7 @@ function build_static() {
 
				     }
			
 
				 }
			
 
				 
			
 
				-function build_cpu() {
			
 
				-    if ($script:ARCH -eq "arm64") {
			
 
				-        $gen_arch = "ARM64"
			
 
				-    } else { # amd64
			
 
				-        $gen_arch = "x64"
			
 
				-    }
			
 
				-
			
 
				+function build_cpu($gen_arch) {
			
 
				     if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
			
 
				         # remaining llama.cpp builds use MSVC 
			
 
				         init_vars
			
@@ -287,7 +283,7 @@ function build_cuda() {
 
				             "-DLLAMA_AVX=on",
			
 
				             "-DLLAMA_AVX2=off",
			
 
				             "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
			
 
				-            "-DCMAKE_CUDA_FLAGS=-t8"
			
 
				+            "-DCMAKE_CUDA_FLAGS=-t8",
			
 
				             "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
			
 
				             )
			
 
				         if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
			
@@ -410,29 +406,16 @@ init_vars
 
				 if ($($args.count) -eq 0) {
			
 
				     git_module_setup
			
 
				     apply_patches
			
 
				-
			
 
				-    $tasks = @("build_static", "build_cpu")
			
 
				-    $jobs = @()
			
 
				-    if ($script:ARCH -ne "arm64") {
			
 
				-        $tasks += $("build_cpu_avx", "build_cpu_avx2", "build_cuda", "build_oneapi", "build_rocm")
			
 
				-    }
			
 
				-    foreach ($t in $tasks) {
			
 
				-        $jobs += @(Start-ThreadJob -ThrottleLimit 12 -FilePath .\gen_windows.ps1 -ArgumentList $t -Name $t)
			
 
				-    }
			
 
				-    get-job
			
 
				-    foreach ($job in $jobs) {
			
 
				-        write-host "----" $job.Name output follows
			
 
				-        receive-job -wait -job $job
			
 
				-        write-host "----" $job.Name $job.State
			
 
				-        write-host ""
			
 
				-        if ($job.State -contains 'Failed') {
			
 
				-            cleanup
			
 
				-            write-host "Terminating remaining jobs (this takes a while, you can ^C)"
			
 
				-            # TODO find some way to kill the spawned cmake processes faster
			
 
				-            remove-job -force -job $jobs
			
 
				-            exit(-1)
			
 
				-        }
			
 
				-        get-job
			
 
				+    build_static
			
 
				+    if ($script:ARCH -eq "arm64") {
			
 
				+        build_cpu("ARM64")
			
 
				+    } else { # amd64
			
 
				+        build_cpu("x64")
			
 
				+        build_cpu_avx
			
 
				+        build_cpu_avx2
			
 
				+        build_cuda
			
 
				+        build_oneapi
			
 
				+        build_rocm
			
 
				     }
			
 
				 
			
 
				     cleanup