11 months ago · 6dab2a9d3a
--- a/llama/README.md
+++ b/llama/README.md
@@ -12,7 +12,6 @@ Supported:
 
				 - [x] Linux CUDA
			
 
				 - [x] Linux ROCm
			
 
				 - [x] Llava
			
 
				-- [x] Parallel Requests
			
 
				 
			
 
				 Extra build steps are required for CUDA and ROCm on Windows since `nvcc` and `hipcc` both require using msvc as the host compiler. For these small dlls are created:
			
 
				 
			
@@ -61,6 +60,8 @@ go build -tags=cuda .
 
				 
			
 
				 ## Windows
			
 
				 
			
 
				+Download [w64devkit](https://github.com/skeeto/w64devkit/releases/latest) for a simple MinGW development environment.
			
 
				+
			
 
				 ### CUDA
			
 
				 
			
 
				 Install the [CUDA toolkit v11.3.1](https://developer.nvidia.com/cuda-11-3-1-download-archive) then build the cuda code:
			
@@ -95,9 +96,8 @@ go build -tags=rocm .
 
				 
			
 
				 ## Syncing with llama.cpp
			
 
				 
			
 
				-To update this package to the latest llama.cpp code, use the `scripts/sync_llama.sh` script from the root of this repo:
			
 
				+To update this package to the latest llama.cpp code, use the `sync_llama.sh` script from the root of this repo:
			
 
				 
			
 
				 ```
			
 
				-cd ollama
			
 
				-./scripts/sync_llama.sh ../llama.cpp
			
 
				+./sync_llama.sh ../../llama.cpp
			
 
				 ```
			
--- a/llama/runner/README.md
+++ b/llama/runner/README.md
@@ -1,11 +1,15 @@
 
				 # `runner`
			
 
				 
			
 
				-A subprocess runner for loading a model and running inference via a small http web server.
			
 
				+A minimial runner for loading a model and running inference via a http web server.
			
 
				 
			
 
				 ```
			
 
				 ./runner -model <model binary>
			
 
				 ```
			
 
				 
			
 
				+### Completion
			
 
				+
			
 
				 ```
			
 
				-curl -X POST -H "Content-Type: application/json" -d '{"prompt": "hi"}' http://localhost:8080/
			
 
				+curl -X POST -H "Content-Type: application/json" -d '{"prompt": "hi"}' http://localhost:8080/completion
			
 
				 ```
			
 
				+
			
 
				+### Embeddings
			
--- a/scripts/sync_llama.sh
+++ b/scripts/sync_llama.sh
@@ -11,7 +11,7 @@ if [ -z "$src_dir" ]; then
 
				 fi
			
 
				 
			
 
				 # Set the destination directory
			
 
				-dst_dir=./llama
			
 
				+dst_dir=.
			
 
				 
			
 
				 # llama.cpp
			
 
				 cp $src_dir/unicode.cpp $dst_dir/unicode.cpp
			
@@ -106,7 +106,7 @@ for IN in $dst_dir/*.{c,h,cpp,m,metal,cu}; do
 
				 done
			
 
				 
			
 
				 # ggml-metal
			
 
				-sed -e '/#include "ggml-common.h"/r llama/ggml-common.h' -e '/#include "ggml-common.h"/d' < $dst_dir/ggml-metal.metal > temp.metal
			
 
				+sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < $dst_dir/ggml-metal.metal > temp.metal
			
 
				 TEMP_ASSEMBLY=$(mktemp)
			
 
				 echo ".section __DATA, __ggml_metallib"   >  $TEMP_ASSEMBLY
			
 
				 echo ".globl _ggml_metallib_start"        >> $TEMP_ASSEMBLY