1 年之前 · cbe2adc78a
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -8,7 +8,15 @@ jobs:
 
				     strategy:
			
 
				       matrix:
			
 
				         os: [ubuntu-latest, macos-latest, windows-latest]
			
 
				+        arch: [amd64, arm64]
			
 
				+        exclude:
			
 
				+          - os: ubuntu-latest
			
 
				+            arch: arm64
			
 
				+          - os: windows-latest
			
 
				+            arch: arm64
			
 
				     runs-on: ${{ matrix.os }}
			
 
				+    env:
			
 
				+      GOARCH: ${{ matrix.arch }}
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				       - uses: actions/setup-go@v4
			
@@ -33,7 +41,7 @@ jobs:
 
				       - run: go generate -x ./...
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				-          name: ${{ matrix.os }}-libraries
			
 
				+          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
			
 
				           path: |
			
 
				             llm/llama.cpp/build/**/lib/*
			
 
				   lint:
			
@@ -41,7 +49,18 @@ jobs:
 
				     strategy:
			
 
				       matrix:
			
 
				         os: [ubuntu-latest, macos-latest, windows-latest]
			
 
				+        arch: [amd64, arm64]
			
 
				+        exclude:
			
 
				+          - os: ubuntu-latest
			
 
				+            arch: arm64
			
 
				+          - os: windows-latest
			
 
				+            arch: arm64
			
 
				+          - os: macos-latest
			
 
				+            arch: amd64
			
 
				     runs-on: ${{ matrix.os }}
			
 
				+    env:
			
 
				+      GOARCH: ${{ matrix.arch }}
			
 
				+      CGO_ENABLED: "1"
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				         with:
			
@@ -52,7 +71,7 @@ jobs:
 
				           cache: false
			
 
				       - uses: actions/download-artifact@v4
			
 
				         with:
			
 
				-          name: ${{ matrix.os }}-libraries
			
 
				+          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
			
 
				           path: llm/llama.cpp/build
			
 
				       - uses: golangci/golangci-lint-action@v3
			
 
				   test:
			
@@ -60,6 +79,12 @@ jobs:
 
				     strategy:
			
 
				       matrix:
			
 
				         os: [ubuntu-latest, macos-latest, windows-latest]
			
 
				+        arch: [amd64, arm64]
			
 
				+        exclude:
			
 
				+          - os: ubuntu-latest
			
 
				+            arch: arm64
			
 
				+          - os: windows-latest
			
 
				+            arch: arm64
			
 
				     runs-on: ${{ matrix.os }}
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
@@ -72,7 +97,7 @@ jobs:
 
				       - run: go get
			
 
				       - uses: actions/download-artifact@v4
			
 
				         with:
			
 
				-          name: ${{ matrix.os }}-libraries
			
 
				+          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
			
 
				           path: llm/llama.cpp/build
			
 
				       - run: go build
			
 
				       - run: go test -v ./...
			
--- a/README.md
+++ b/README.md
@@ -248,6 +248,10 @@ curl http://localhost:11434/api/chat -d '{
 
				 
			
 
				 See the [API documentation](./docs/api.md) for all endpoints.
			
 
				 
			
 
				+## Integrations
			
 
				+
			
 
				+- [ollama-python](https://github.com/jmorganca/ollama-python)
			
 
				+
			
 
				 ## Community Integrations
			
 
				 
			
 
				 ### Web & Desktop
			
--- a/api/client.py
+++ b/api/client.py
@@ -1,284 +0,0 @@
 
				-import os
			
 
				-import json
			
 
				-import requests
			
 
				-import os
			
 
				-import hashlib
			
 
				-import json
			
 
				-from pathlib import Path
			
 
				-
			
 
				-BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
			
 
				-
			
 
				-# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
			
 
				-# The final response object will include statistics and additional data from the request. Use the callback function to override
			
 
				-# the default handler.
			
 
				-def generate(model_name, prompt, system=None, template=None, format="", context=None, options=None, callback=None):
			
 
				-    try:
			
 
				-        url = f"{BASE_URL}/api/generate"
			
 
				-        payload = {
			
 
				-            "model": model_name, 
			
 
				-            "prompt": prompt, 
			
 
				-            "system": system, 
			
 
				-            "template": template, 
			
 
				-            "context": context, 
			
 
				-            "options": options,
			
 
				-            "format": format,
			
 
				-        }
			
 
				-        
			
 
				-        # Remove keys with None values
			
 
				-        payload = {k: v for k, v in payload.items() if v is not None}
			
 
				-        
			
 
				-        with requests.post(url, json=payload, stream=True) as response:
			
 
				-            response.raise_for_status()
			
 
				-            
			
 
				-            # Creating a variable to hold the context history of the final chunk
			
 
				-            final_context = None
			
 
				-            
			
 
				-            # Variable to hold concatenated response strings if no callback is provided
			
 
				-            full_response = ""
			
 
				-
			
 
				-            # Iterating over the response line by line and displaying the details
			
 
				-            for line in response.iter_lines():
			
 
				-                if line:
			
 
				-                    # Parsing each line (JSON chunk) and extracting the details
			
 
				-                    chunk = json.loads(line)
			
 
				-                    
			
 
				-                    # If a callback function is provided, call it with the chunk
			
 
				-                    if callback:
			
 
				-                        callback(chunk)
			
 
				-                    else:
			
 
				-                        # If this is not the last chunk, add the "response" field value to full_response and print it
			
 
				-                        if not chunk.get("done"):
			
 
				-                            response_piece = chunk.get("response", "")
			
 
				-                            full_response += response_piece
			
 
				-                            print(response_piece, end="", flush=True)
			
 
				-                    
			
 
				-                    # Check if it's the last chunk (done is true)
			
 
				-                    if chunk.get("done"):
			
 
				-                        final_context = chunk.get("context")
			
 
				-            
			
 
				-            # Return the full response and the final context
			
 
				-            return full_response, final_context
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-        return None, None
			
 
				-    
			
 
				-
			
 
				-# Create a blob file on the server if it doesn't exist.
			
 
				-def create_blob(digest, file_path):
			
 
				-    url = f"{BASE_URL}/api/blobs/{digest}"
			
 
				-
			
 
				-    # Check if the blob exists
			
 
				-    response = requests.head(url)
			
 
				-    if response.status_code != 404:
			
 
				-        return  # Blob already exists, no need to upload
			
 
				-    response.raise_for_status()
			
 
				-
			
 
				-    # Upload the blob
			
 
				-    with open(file_path, 'rb') as file_data:
			
 
				-        requests.post(url, data=file_data)
			
 
				-
			
 
				-
			
 
				-# Create a model from a Modelfile. Use the callback function to override the default handler.
			
 
				-def create(model_name, filename, callback=None):
			
 
				-    try:
			
 
				-        file_path = Path(filename).expanduser().resolve()
			
 
				-        processed_lines = []
			
 
				-
			
 
				-        # Read and process the modelfile
			
 
				-        with open(file_path, 'r') as f:
			
 
				-            for line in f:            
			
 
				-                # Skip empty or whitespace-only lines
			
 
				-                if not line.strip():
			
 
				-                    continue
			
 
				-            
			
 
				-                command, args = line.split(maxsplit=1)
			
 
				-
			
 
				-                if command.upper() in ["FROM", "ADAPTER"]:
			
 
				-                    path = Path(args.strip()).expanduser()
			
 
				-
			
 
				-                    # Check if path is relative and resolve it
			
 
				-                    if not path.is_absolute():
			
 
				-                        path = (file_path.parent / path)
			
 
				-
			
 
				-                    # Skip if file does not exist for "model", this is handled by the server
			
 
				-                    if not path.exists():
			
 
				-                        processed_lines.append(line)
			
 
				-                        continue
			
 
				-
			
 
				-                    # Calculate SHA-256 hash
			
 
				-                    with open(path, 'rb') as bin_file:
			
 
				-                        hash = hashlib.sha256()
			
 
				-                        hash.update(bin_file.read())
			
 
				-                        blob = f"sha256:{hash.hexdigest()}"
			
 
				-                
			
 
				-                    # Add the file to the remote server
			
 
				-                    create_blob(blob, path)
			
 
				-
			
 
				-                    # Replace path with digest in the line
			
 
				-                    line = f"{command} @{blob}\n"
			
 
				-
			
 
				-                processed_lines.append(line)
			
 
				-
			
 
				-        # Combine processed lines back into a single string
			
 
				-        modelfile_content = '\n'.join(processed_lines)
			
 
				-
			
 
				-        url = f"{BASE_URL}/api/create"
			
 
				-        payload = {"name": model_name, "modelfile": modelfile_content}
			
 
				-
			
 
				-        # Making a POST request with the stream parameter set to True to handle streaming responses
			
 
				-        with requests.post(url, json=payload, stream=True) as response:
			
 
				-            response.raise_for_status()
			
 
				-            # Iterating over the response line by line and displaying the status
			
 
				-            for line in response.iter_lines():
			
 
				-                if line:
			
 
				-                    chunk = json.loads(line)
			
 
				-                    if callback:
			
 
				-                        callback(chunk)
			
 
				-                    else:
			
 
				-                        print(f"Status: {chunk.get('status')}")
			
 
				-
			
 
				-    except Exception as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-
			
 
				-
			
 
				-# Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple
			
 
				-# calls to will share the same download progress. Use the callback function to override the default handler.
			
 
				-def pull(model_name, insecure=False, callback=None):
			
 
				-    try:
			
 
				-        url = f"{BASE_URL}/api/pull"
			
 
				-        payload = {
			
 
				-            "name": model_name,
			
 
				-            "insecure": insecure
			
 
				-        }
			
 
				-
			
 
				-        # Making a POST request with the stream parameter set to True to handle streaming responses
			
 
				-        with requests.post(url, json=payload, stream=True) as response:
			
 
				-            response.raise_for_status()
			
 
				-
			
 
				-            # Iterating over the response line by line and displaying the details
			
 
				-            for line in response.iter_lines():
			
 
				-                if line:
			
 
				-                    # Parsing each line (JSON chunk) and extracting the details
			
 
				-                    chunk = json.loads(line)
			
 
				-
			
 
				-                    # If a callback function is provided, call it with the chunk
			
 
				-                    if callback:
			
 
				-                        callback(chunk)
			
 
				-                    else:
			
 
				-                        # Print the status message directly to the console
			
 
				-                        print(chunk.get('status', ''), end='', flush=True)
			
 
				-                    
			
 
				-                    # If there's layer data, you might also want to print that (adjust as necessary)
			
 
				-                    if 'digest' in chunk:
			
 
				-                        print(f" - Digest: {chunk['digest']}", end='', flush=True)
			
 
				-                        print(f" - Total: {chunk['total']}", end='', flush=True)
			
 
				-                        print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
			
 
				-                    else:
			
 
				-                        print()
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-
			
 
				-# Push a model to the model registry. Use the callback function to override the default handler.
			
 
				-def push(model_name, insecure=False, callback=None):
			
 
				-    try:
			
 
				-        url = f"{BASE_URL}/api/push"
			
 
				-        payload = {
			
 
				-            "name": model_name,
			
 
				-            "insecure": insecure
			
 
				-        }
			
 
				-
			
 
				-        # Making a POST request with the stream parameter set to True to handle streaming responses
			
 
				-        with requests.post(url, json=payload, stream=True) as response:
			
 
				-            response.raise_for_status()
			
 
				-
			
 
				-            # Iterating over the response line by line and displaying the details
			
 
				-            for line in response.iter_lines():
			
 
				-                if line:
			
 
				-                    # Parsing each line (JSON chunk) and extracting the details
			
 
				-                    chunk = json.loads(line)
			
 
				-
			
 
				-                    # If a callback function is provided, call it with the chunk
			
 
				-                    if callback:
			
 
				-                        callback(chunk)
			
 
				-                    else:
			
 
				-                        # Print the status message directly to the console
			
 
				-                        print(chunk.get('status', ''), end='', flush=True)
			
 
				-                    
			
 
				-                    # If there's layer data, you might also want to print that (adjust as necessary)
			
 
				-                    if 'digest' in chunk:
			
 
				-                        print(f" - Digest: {chunk['digest']}", end='', flush=True)
			
 
				-                        print(f" - Total: {chunk['total']}", end='', flush=True)
			
 
				-                        print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
			
 
				-                    else:
			
 
				-                        print()
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-
			
 
				-# List models that are available locally.
			
 
				-def list():
			
 
				-    try:
			
 
				-        response = requests.get(f"{BASE_URL}/api/tags")
			
 
				-        response.raise_for_status()
			
 
				-        data = response.json()
			
 
				-        models = data.get('models', [])
			
 
				-        return models
			
 
				-
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-        return None
			
 
				-
			
 
				-# Copy a model. Creates a model with another name from an existing model.
			
 
				-def copy(source, destination):
			
 
				-    try:
			
 
				-        # Create the JSON payload
			
 
				-        payload = {
			
 
				-            "source": source,
			
 
				-            "destination": destination
			
 
				-        }
			
 
				-        
			
 
				-        response = requests.post(f"{BASE_URL}/api/copy", json=payload)
			
 
				-        response.raise_for_status()
			
 
				-        
			
 
				-        # If the request was successful, return a message indicating that the copy was successful
			
 
				-        return "Copy successful"
			
 
				-
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-        return None
			
 
				-
			
 
				-# Delete a model and its data.
			
 
				-def delete(model_name):
			
 
				-    try:
			
 
				-        url = f"{BASE_URL}/api/delete"
			
 
				-        payload = {"name": model_name}
			
 
				-        response = requests.delete(url, json=payload)
			
 
				-        response.raise_for_status()
			
 
				-        return "Delete successful"
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-        return None
			
 
				-
			
 
				-# Show info about a model.
			
 
				-def show(model_name):
			
 
				-    try:
			
 
				-        url = f"{BASE_URL}/api/show"
			
 
				-        payload = {"name": model_name}
			
 
				-        response = requests.post(url, json=payload)
			
 
				-        response.raise_for_status()
			
 
				-        
			
 
				-        # Parse the JSON response and return it
			
 
				-        data = response.json()
			
 
				-        return data
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-        return None
			
 
				-
			
 
				-def heartbeat():
			
 
				-    try:
			
 
				-        url = f"{BASE_URL}/"
			
 
				-        response = requests.head(url)
			
 
				-        response.raise_for_status()
			
 
				-        return "Ollama is running"
			
 
				-    except requests.exceptions.RequestException as e:
			
 
				-        print(f"An error occurred: {e}")
			
 
				-        return "Ollama is not running"
			
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@@ -75,7 +75,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
 
				 	updatePath(filepath.Dir(library))
			
 
				 	libPath := C.CString(library)
			
 
				 	defer C.free(unsafe.Pointer(libPath))
			
 
				-	resp := newExtServerResp(128)
			
 
				+	resp := newExtServerResp(512)
			
 
				 	defer freeExtServerResp(resp)
			
 
				 	var srv C.struct_dynamic_llama_server
			
 
				 	C.dyn_init(libPath, &srv, &resp)
			
@@ -181,7 +181,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
 
				 		"seed":              predict.Options.Seed,
			
 
				 		"stop":              predict.Options.Stop,
			
 
				 		"image_data":        imageData,
			
 
				-		"cache_prompt":      true,
			
 
				 	}
			
 
				 
			
 
				 	if predict.Format == "json" {
			
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -14,9 +14,11 @@ BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
 
				 case "${GOARCH}" in
			
 
				 "amd64")
			
 
				     CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
			
 
				+    ARCH="x86_64"
			
 
				     ;;
			
 
				 "arm64")
			
 
				     CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}"
			
 
				+    ARCH="arm64"
			
 
				     ;;
			
 
				 *)
			
 
				     echo "GOARCH must be set"
			
@@ -30,6 +32,7 @@ apply_patches
 
				 build
			
 
				 install
			
 
				 gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
			
 
				+    -arch ${ARCH} \
			
 
				     -Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
			
 
				     ${BUILD_DIR}/lib/libcommon.a \
			
 
				     ${BUILD_DIR}/lib/libllama.a \
			
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -39,8 +39,13 @@ amdGPUs() {
 
				 }
			
 
				 
			
 
				 echo "Starting linux generate script"
			
 
				-if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
			
 
				-    export CUDACXX=/usr/local/cuda/bin/nvcc
			
 
				+if [ -z "${CUDACXX}" ]; then
			
 
				+    if [ -x /usr/local/cuda/bin/nvcc ]; then
			
 
				+        export CUDACXX=/usr/local/cuda/bin/nvcc
			
 
				+    else
			
 
				+        # Try the default location in case it exists
			
 
				+        export CUDACXX=$(command -v nvcc)
			
 
				+    fi
			
 
				 fi
			
 
				 COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
			
 
				 source $(dirname $0)/gen_common.sh
			
@@ -109,33 +114,41 @@ else
 
				     echo "Skipping CPU generation step as requested"
			
 
				 fi
			
 
				 
			
 
				-for cudalibpath in "/usr/local/cuda/lib64" "/opt/cuda/targets/x86_64-linux/lib"; do
			
 
				-    if [ -d "$cudalibpath" ]; then
			
 
				-        echo "CUDA libraries detected - building dynamic CUDA library"
			
 
				-        init_vars
			
 
				-        CUDA_MAJOR=$(find "$cudalibpath" -name 'libcudart.so.*' -print | head -1 | cut -f3 -d. || true)
			
 
				-        if [ -n "${CUDA_MAJOR}" ]; then
			
 
				-            CUDA_VARIANT="_v${CUDA_MAJOR}"
			
 
				-        fi
			
 
				-        CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
			
 
				-        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
			
 
				-        CUDA_LIB_DIR="$cudalibpath"
			
 
				-        build
			
 
				-        install
			
 
				-        gcc -fPIC -g -shared -o "${BUILD_DIR}/lib/libext_server.so" \
			
 
				-            -Wl,--whole-archive \
			
 
				-            "${BUILD_DIR}/lib/libext_server.a" \
			
 
				-            "${BUILD_DIR}/lib/libcommon.a" \
			
 
				-            "${BUILD_DIR}/lib/libllama.a" \
			
 
				-            -Wl,--no-whole-archive \
			
 
				-            "${CUDA_LIB_DIR}/libcudart_static.a" \
			
 
				-            "${CUDA_LIB_DIR}/libcublas_static.a" \
			
 
				-            "${CUDA_LIB_DIR}/libcublasLt_static.a" \
			
 
				-            "${CUDA_LIB_DIR}/libcudadevrt.a" \
			
 
				-            "${CUDA_LIB_DIR}/libculibos.a" \
			
 
				-            -lrt -lpthread -ldl -lstdc++ -lm
			
 
				+# If needed, look for the default CUDA toolkit location
			
 
				+if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
			
 
				+    CUDA_LIB_DIR=/usr/local/cuda/lib64
			
 
				+fi
			
 
				+
			
 
				+# If needed, look for CUDA on Arch Linux
			
 
				+if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
			
 
				+    CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
			
 
				+fi
			
 
				+
			
 
				+if [ -d "${CUDA_LIB_DIR}" ]; then
			
 
				+    echo "CUDA libraries detected - building dynamic CUDA library"
			
 
				+    init_vars
			
 
				+    CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
			
 
				+    if [ -n "${CUDA_MAJOR}" ]; then
			
 
				+        CUDA_VARIANT=_v${CUDA_MAJOR}
			
 
				     fi
			
 
				-done
			
 
				+    CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
			
 
				+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
			
 
				+    build
			
 
				+    install
			
 
				+    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
			
 
				+        -Wl,--whole-archive \
			
 
				+        ${BUILD_DIR}/lib/libext_server.a \
			
 
				+        ${BUILD_DIR}/lib/libcommon.a \
			
 
				+        ${BUILD_DIR}/lib/libllama.a \
			
 
				+        -Wl,--no-whole-archive \
			
 
				+        ${CUDA_LIB_DIR}/libcudart_static.a \
			
 
				+        ${CUDA_LIB_DIR}/libcublas_static.a \
			
 
				+        ${CUDA_LIB_DIR}/libcublasLt_static.a \
			
 
				+        ${CUDA_LIB_DIR}/libcudadevrt.a \
			
 
				+        ${CUDA_LIB_DIR}/libculibos.a \
			
 
				+        -lcuda \
			
 
				+        -lrt -lpthread -ldl -lstdc++ -lm
			
 
				+fi
			
 
				 
			
 
				 if [ -z "${ROCM_PATH}" ]; then
			
 
				     # Try the default location in case it exists
			
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
 
				-Subproject commit 328b83de23b33240e28f4e74900d1d06726f5eb1
			
 
				+Subproject commit 584d674be622fbf1578694ada6e62eebedbfd377
			
--- a/server/images.go
+++ b/server/images.go
@@ -1132,49 +1132,46 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
 
				 var errUnauthorized = fmt.Errorf("unauthorized")
			
 
				 
			
 
				 func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
			
 
				-	resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
			
 
				-	if err != nil {
			
 
				-		if !errors.Is(err, context.Canceled) {
			
 
				-			log.Printf("request failed: %v", err)
			
 
				-		}
			
 
				-
			
 
				-		return nil, err
			
 
				-	}
			
 
				-
			
 
				-	switch {
			
 
				-	case resp.StatusCode == http.StatusUnauthorized:
			
 
				-		// Handle authentication error with one retry
			
 
				-		auth := resp.Header.Get("www-authenticate")
			
 
				-		authRedir := ParseAuthRedirectString(auth)
			
 
				-		token, err := getAuthToken(ctx, authRedir)
			
 
				+	for i := 0; i < 2; i++ {
			
 
				+		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
			
 
				 		if err != nil {
			
 
				+			if !errors.Is(err, context.Canceled) {
			
 
				+				log.Printf("request failed: %v", err)
			
 
				+			}
			
 
				+
			
 
				 			return nil, err
			
 
				 		}
			
 
				-		regOpts.Token = token
			
 
				-		if body != nil {
			
 
				-			_, err = body.Seek(0, io.SeekStart)
			
 
				+
			
 
				+		switch {
			
 
				+		case resp.StatusCode == http.StatusUnauthorized:
			
 
				+			// Handle authentication error with one retry
			
 
				+			auth := resp.Header.Get("www-authenticate")
			
 
				+			authRedir := ParseAuthRedirectString(auth)
			
 
				+			token, err := getAuthToken(ctx, authRedir)
			
 
				 			if err != nil {
			
 
				 				return nil, err
			
 
				 			}
			
 
				+			regOpts.Token = token
			
 
				+			if body != nil {
			
 
				+				_, err = body.Seek(0, io.SeekStart)
			
 
				+				if err != nil {
			
 
				+					return nil, err
			
 
				+				}
			
 
				+			}
			
 
				+		case resp.StatusCode == http.StatusNotFound:
			
 
				+			return nil, os.ErrNotExist
			
 
				+		case resp.StatusCode >= http.StatusBadRequest:
			
 
				+			responseBody, err := io.ReadAll(resp.Body)
			
 
				+			if err != nil {
			
 
				+				return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
			
 
				+			}
			
 
				+			return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
			
 
				+		default:
			
 
				+			return resp, nil
			
 
				 		}
			
 
				-
			
 
				-		resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
			
 
				-		if resp.StatusCode == http.StatusUnauthorized {
			
 
				-			return nil, errUnauthorized
			
 
				-		}
			
 
				-
			
 
				-		return resp, err
			
 
				-	case resp.StatusCode == http.StatusNotFound:
			
 
				-		return nil, os.ErrNotExist
			
 
				-	case resp.StatusCode >= http.StatusBadRequest:
			
 
				-		responseBody, err := io.ReadAll(resp.Body)
			
 
				-		if err != nil {
			
 
				-			return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
			
 
				-		}
			
 
				-		return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
			
 
				 	}
			
 
				 
			
 
				-	return resp, nil
			
 
				+	return nil, errUnauthorized
			
 
				 }
			
 
				 
			
 
				 func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
			
--- a/server/modelpath.go
+++ b/server/modelpath.go
@@ -46,6 +46,7 @@ func ParseModelPath(name string) ModelPath {
 
				 		name = after
			
 
				 	}
			
 
				 
			
 
				+	name = strings.ReplaceAll(name, string(os.PathSeparator), "/")
			
 
				 	parts := strings.Split(name, "/")
			
 
				 	switch len(parts) {
			
 
				 	case 3:
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -15,7 +15,6 @@ import (
 
				 	"path/filepath"
			
 
				 	"reflect"
			
 
				 	"runtime"
			
 
				-	"strconv"
			
 
				 	"strings"
			
 
				 	"sync"
			
 
				 	"syscall"
			
@@ -668,27 +667,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
 
				 	cs := 30
			
 
				 	for k, v := range model.Options {
			
 
				 		switch val := v.(type) {
			
 
				-		case string:
			
 
				-			params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
			
 
				-		case int:
			
 
				-			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
			
 
				-		case float64:
			
 
				-			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
			
 
				-		case bool:
			
 
				-			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
			
 
				 		case []interface{}:
			
 
				 			for _, nv := range val {
			
 
				-				switch nval := nv.(type) {
			
 
				-				case string:
			
 
				-					params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
			
 
				-				case int:
			
 
				-					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
			
 
				-				case float64:
			
 
				-					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
			
 
				-				case bool:
			
 
				-					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
			
 
				-				}
			
 
				+				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
			
 
				 			}
			
 
				+		default:
			
 
				+			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
			
 
				 		}
			
 
				 	}
			
 
				 	resp.Parameters = strings.Join(params, "\n")
			
--- a/server/routes_test.go
+++ b/server/routes_test.go
@@ -9,6 +9,7 @@ import (
 
				 	"net/http"
			
 
				 	"net/http/httptest"
			
 
				 	"os"
			
 
				+	"sort"
			
 
				 	"strings"
			
 
				 	"testing"
			
 
				 
			
@@ -50,7 +51,7 @@ func Test_Routes(t *testing.T) {
 
				 	createTestModel := func(t *testing.T, name string) {
			
 
				 		fname := createTestFile(t, "ollama-model")
			
 
				 
			
 
				-		modelfile := strings.NewReader(fmt.Sprintf("FROM %s", fname))
			
 
				+		modelfile := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
			
 
				 		commands, err := parser.Parse(modelfile)
			
 
				 		assert.Nil(t, err)
			
 
				 		fn := func(resp api.ProgressResponse) {
			
@@ -167,6 +168,42 @@ func Test_Routes(t *testing.T) {
 
				 				assert.Equal(t, "beefsteak:latest", model.ShortName)
			
 
				 			},
			
 
				 		},
			
 
				+		{
			
 
				+			Name:   "Show Model Handler",
			
 
				+			Method: http.MethodPost,
			
 
				+			Path:   "/api/show",
			
 
				+			Setup: func(t *testing.T, req *http.Request) {
			
 
				+				createTestModel(t, "show-model")
			
 
				+				showReq := api.ShowRequest{Model: "show-model"}
			
 
				+				jsonData, err := json.Marshal(showReq)
			
 
				+				assert.Nil(t, err)
			
 
				+				req.Body = io.NopCloser(bytes.NewReader(jsonData))
			
 
				+			},
			
 
				+			Expected: func(t *testing.T, resp *http.Response) {
			
 
				+				contentType := resp.Header.Get("Content-Type")
			
 
				+				assert.Equal(t, contentType, "application/json; charset=utf-8")
			
 
				+				body, err := io.ReadAll(resp.Body)
			
 
				+				assert.Nil(t, err)
			
 
				+
			
 
				+				var showResp api.ShowResponse
			
 
				+				err = json.Unmarshal(body, &showResp)
			
 
				+				assert.Nil(t, err)
			
 
				+
			
 
				+				var params []string
			
 
				+				paramsSplit := strings.Split(showResp.Parameters, "\n")
			
 
				+				for _, p := range paramsSplit {
			
 
				+					params = append(params, strings.Join(strings.Fields(p), " "))
			
 
				+				}
			
 
				+				sort.Strings(params)
			
 
				+				expectedParams := []string{
			
 
				+					"seed 42",
			
 
				+					"stop \"bar\"",
			
 
				+					"stop \"foo\"",
			
 
				+					"top_p 0.9",
			
 
				+				}
			
 
				+				assert.Equal(t, expectedParams, params)
			
 
				+			},
			
 
				+		},
			
 
				 	}
			
 
				 
			
 
				 	s, err := setupServer(t)