Prechádzať zdrojové kódy

move `runner` package down

jmorganca 11 mesiacov pred
rodič
commit
4dd63c1fef
7 zmenil súbory, kde vykonal 100 pridanie a 51 odobranie
  1. 2 2
      llama/README.md
  2. 1 0
      llama/ggml-metal.m
  3. 2 1
      llama/llama.go
  4. 70 48
      llama/sync.sh
  5. 0 0
      runner/README.md
  6. 0 0
      runner/runner.go
  7. 25 0
      scripts/build_llama_darwin.sh

+ 2 - 2
llama/README.md

@@ -58,9 +58,9 @@ go build -tags=cuda .
 
 
 ### ROCm
 ### ROCm
 
 
-Install [ROCm 5.7.1](https://rocm.docs.amd.com/en/docs-5.7.1/) and [Strawberry Perl](https://strawberryperl.com/):
+Install [ROCm 5.7.1](https://rocm.docs.amd.com/en/docs-5.7.1/) and [Strawberry Perl](https://strawberryperl.com/).
 
 
-Build `ggml-hipblas.dll`:
+Then, build `ggml-hipblas.dll`:
 
 
 ```shell
 ```shell
 ./hipblas.sh
 ./hipblas.sh

+ 1 - 0
llama/ggml-metal.m

@@ -1,3 +1,4 @@
+//go:build darwin,arm64
 #import "ggml-metal.h"
 #import "ggml-metal.h"
 
 
 #import "ggml-backend-impl.h"
 #import "ggml-backend-impl.h"

+ 2 - 1
llama/llama.go

@@ -2,8 +2,9 @@ package llama
 
 
 // #cgo darwin,arm64 CFLAGS: -std=c11 -DGGML_USE_METAL -DGGML_METAL_EMBED_LIBRARY -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 // #cgo darwin,arm64 CFLAGS: -std=c11 -DGGML_USE_METAL -DGGML_METAL_EMBED_LIBRARY -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 // #cgo darwin,arm64 CXXFLAGS: -std=c++11 -DGGML_USE_METAL -DGGML_METAL_EMBED_LIBRARY -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 // #cgo darwin,arm64 CXXFLAGS: -std=c++11 -DGGML_USE_METAL -DGGML_METAL_EMBED_LIBRARY -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
-// #cgo darwin,amd64 CXXFLAGS: -std=c++11
 // #cgo darwin,arm64 LDFLAGS: -ld_classic ${SRCDIR}/ggml-metal.o -framework Foundation -framework Metal -framework MetalKit -framework Accelerate
 // #cgo darwin,arm64 LDFLAGS: -ld_classic ${SRCDIR}/ggml-metal.o -framework Foundation -framework Metal -framework MetalKit -framework Accelerate
+// #cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
+// #cgo darwin,amd64 CXXFLAGS: -std=c++11 -Wno-incompatible-pointer-types-discards-qualifiers
 // #cgo darwin,amd64 LDFLAGS: -ld_classic -framework Foundation -framework Accelerate
 // #cgo darwin,amd64 LDFLAGS: -ld_classic -framework Foundation -framework Accelerate
 // #cgo windows LDFLAGS: -lmsvcrt
 // #cgo windows LDFLAGS: -lmsvcrt
 // #cgo avx CFLAGS: -mavx
 // #cgo avx CFLAGS: -mavx

+ 70 - 48
llama/sync.sh

@@ -6,51 +6,73 @@ src_dir=$1
 # Set the destination directory (current directory)
 # Set the destination directory (current directory)
 dst_dir="."
 dst_dir="."
 
 
-# llama.cpp
-cp $src_dir/unicode.cpp $dst_dir/unicode.cpp
-cp $src_dir/unicode.h $dst_dir/unicode.h
-cp $src_dir/unicode-data.cpp $dst_dir/unicode-data.cpp
-cp $src_dir/unicode-data.h $dst_dir/unicode-data.h
-cp $src_dir/llama.cpp $dst_dir/llama.cpp
-cp $src_dir/llama.h $dst_dir/llama.h
-cp $src_dir/sgemm.cpp $dst_dir/sgemm.cpp
-cp $src_dir/sgemm.h $dst_dir/sgemm.h
-
-# ggml
-cp $src_dir/ggml.c $dst_dir/ggml.c
-cp $src_dir/ggml.h $dst_dir/ggml.h
-cp $src_dir/ggml-quants.c $dst_dir/ggml-quants.c
-cp $src_dir/ggml-quants.h $dst_dir/ggml-quants.h
-cp $src_dir/ggml-metal.metal $dst_dir/ggml-metal.metal
-cp $src_dir/ggml-metal.h $dst_dir/ggml-metal.h
-cp $src_dir/ggml-metal.m $dst_dir/ggml-metal.m
-cp $src_dir/ggml-impl.h $dst_dir/ggml-impl.h
-cp $src_dir/ggml-cuda.h $dst_dir/ggml-cuda.h
-cp $src_dir/ggml-cuda.cu $dst_dir/ggml-cuda.cu
-cp $src_dir/ggml-common.h $dst_dir/ggml-common.h
-cp $src_dir/ggml-backend.h $dst_dir/ggml-backend.h
-cp $src_dir/ggml-backend.c $dst_dir/ggml-backend.c
-cp $src_dir/ggml-backend-impl.h $dst_dir/ggml-backend-impl.h
-cp $src_dir/ggml-alloc.h $dst_dir/ggml-alloc.h
-cp $src_dir/ggml-alloc.c $dst_dir/ggml-alloc.c
-
-# ggml-cuda
-mkdir -p $dst_dir/ggml-cuda
-cp $src_dir/ggml-cuda/*.cu $dst_dir/ggml-cuda/
-cp $src_dir/ggml-cuda/*.cuh $dst_dir/ggml-cuda/
-
-sed -i 's/extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/\/\/ extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/' ggml-cuda.cu
-sed -i '34iGGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);' ggml-cuda.h
-
-# ggml-metal
-sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > temp.metal
-TEMP_ASSEMBLY=$(mktemp)
-echo ".section __DATA, __ggml_metallib"   >  $TEMP_ASSEMBLY
-echo ".globl _ggml_metallib_start"        >> $TEMP_ASSEMBLY
-echo "_ggml_metallib_start:"              >> $TEMP_ASSEMBLY
-echo ".incbin \"temp.metal\"" >> $TEMP_ASSEMBLY
-echo ".globl _ggml_metallib_end"          >> $TEMP_ASSEMBLY
-echo "_ggml_metallib_end:"                >> $TEMP_ASSEMBLY
-as -mmacosx-version-min=11.3 $TEMP_ASSEMBLY -o ggml-metal.o
-rm -f $TEMP_ASSEMBLY
-rm -rf temp.metal
+# # llama.cpp
+# cp $src_dir/unicode.cpp $dst_dir/unicode.cpp
+# cp $src_dir/unicode.h $dst_dir/unicode.h
+# cp $src_dir/unicode-data.cpp $dst_dir/unicode-data.cpp
+# cp $src_dir/unicode-data.h $dst_dir/unicode-data.h
+# cp $src_dir/llama.cpp $dst_dir/llama.cpp
+# cp $src_dir/llama.h $dst_dir/llama.h
+# cp $src_dir/sgemm.cpp $dst_dir/sgemm.cpp
+# cp $src_dir/sgemm.h $dst_dir/sgemm.h
+
+# # ggml
+# cp $src_dir/ggml.c $dst_dir/ggml.c
+# cp $src_dir/ggml.h $dst_dir/ggml.h
+# cp $src_dir/ggml-quants.c $dst_dir/ggml-quants.c
+# cp $src_dir/ggml-quants.h $dst_dir/ggml-quants.h
+# cp $src_dir/ggml-metal.metal $dst_dir/ggml-metal.metal
+# cp $src_dir/ggml-metal.h $dst_dir/ggml-metal.h
+# cp $src_dir/ggml-metal.m $dst_dir/ggml-metal.m
+# cp $src_dir/ggml-impl.h $dst_dir/ggml-impl.h
+# cp $src_dir/ggml-cuda.h $dst_dir/ggml-cuda.h
+# cp $src_dir/ggml-cuda.cu $dst_dir/ggml-cuda.cu
+# cp $src_dir/ggml-common.h $dst_dir/ggml-common.h
+# cp $src_dir/ggml-backend.h $dst_dir/ggml-backend.h
+# cp $src_dir/ggml-backend.c $dst_dir/ggml-backend.c
+# cp $src_dir/ggml-backend-impl.h $dst_dir/ggml-backend-impl.h
+# cp $src_dir/ggml-alloc.h $dst_dir/ggml-alloc.h
+# cp $src_dir/ggml-alloc.c $dst_dir/ggml-alloc.c
+
+# # ggml-cuda
+# mkdir -p $dst_dir/ggml-cuda
+# cp $src_dir/ggml-cuda/*.cu $dst_dir/ggml-cuda/
+# cp $src_dir/ggml-cuda/*.cuh $dst_dir/ggml-cuda/
+
+# sed -i 's/extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/\/\/ extern "C" GGML_CALL int ggml_backend_cuda_reg_devices();/' ggml-cuda.cu
+# sed -i '34iGGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);' ggml-cuda.h
+
+# # ggml-metal
+# sed -i '' '1s;^;//go:build darwin,arm64\n;' ggml-metal.m
+# sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > temp.metal
+# TEMP_ASSEMBLY=$(mktemp)
+# echo ".section __DATA, __ggml_metallib"   >  $TEMP_ASSEMBLY
+# echo ".globl _ggml_metallib_start"        >> $TEMP_ASSEMBLY
+# echo "_ggml_metallib_start:"              >> $TEMP_ASSEMBLY
+# echo ".incbin \"temp.metal\"" >> $TEMP_ASSEMBLY
+# echo ".globl _ggml_metallib_end"          >> $TEMP_ASSEMBLY
+# echo "_ggml_metallib_end:"                >> $TEMP_ASSEMBLY
+# as -mmacosx-version-min=11.3 $TEMP_ASSEMBLY -o ggml-metal.o
+# rm -f $TEMP_ASSEMBLY
+# rm -rf temp.metal
+
+# add license info
+LICENSE=$(mktemp)
+cleanup() {
+    rm -f $LICENSE
+}
+trap cleanup 0
+
+cat <<EOF | sed 's/ *$//' >$LICENSE
+/**
+ * llama.cpp - git $SHA1
+ *
+$(sed 's/^/ * /' <$1/$src_dir/LICENSE)
+ */
+
+for IN in $OUT/*.{c,h,cpp,m,metal,cu}; do
+    TMP=$(mktemp)
+    status "updating license $IN"
+    cat $LICENSE $IN >$TMP
+    mv $TMP $IN
+done

+ 0 - 0
llama/runner/README.md → runner/README.md


+ 0 - 0
llama/runner/runner.go → runner/runner.go


+ 25 - 0
scripts/build_llama_darwin.sh

@@ -0,0 +1,25 @@
+#!/bin/sh
+
+set -e
+
+export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
+export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
+
+mkdir -p dist
+
+# amd64 runners
+export CGO_CFLAGS_ALLOW=-mfma
+export CGO_CXXFLAGS_ALLOW=-mfma
+CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -trimpath -o dist/ollama_llama_runner_darwin_amd64 ./runner &
+CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -tags avx -trimpath -o dist/ollama_llama_runner_darwin_amd64_avx ./runner &
+CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -tags avx,avx2 -trimpath -o dist/ollama_llama_runner_darwin_amd64_avx2 ./runner &
+wait
+
+# amd64
+CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build -x -ldflags="-s -w" -o dist/ollama_darwin_amd64 .
+
+# arm64 runner
+CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -trimpath -o dist/ollama_llama_runner_darwin_arm64 ./runner
+
+# arm64
+CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build -ldflags="-s -w" -trimpath -o dist/ollama_darwin_arm64 .