1 vuosi sitten · c7047d7353
--- a/examples/kubernetes/README.md
+++ b/examples/kubernetes/README.md
@@ -0,0 +1,36 @@
 
				+# Deploy Ollama to Kubernetes
			
 
				+
			
 
				+## Prerequisites
			
 
				+
			
 
				+- Ollama: https://ollama.ai/download
			
 
				+- Kubernetes cluster. This example will use Google Kubernetes Engine.
			
 
				+
			
 
				+## Steps
			
 
				+
			
 
				+1. Create the Ollama namespace, daemon set, and service
			
 
				+
			
 
				+    ```bash
			
 
				+    kubectl apply -f cpu.yaml
			
 
				+    ```
			
 
				+
			
 
				+1. Port forward the Ollama service to connect and use it locally
			
 
				+
			
 
				+    ```bash
			
 
				+    kubectl -n ollama port-forward service/ollama 11434:80
			
 
				+    ```
			
 
				+
			
 
				+1. Pull and run a model, for example `orca-mini:3b`
			
 
				+
			
 
				+    ```bash
			
 
				+    ollama run orca-mini:3b
			
 
				+    ```
			
 
				+
			
 
				+## (Optional) Hardware Acceleration
			
 
				+
			
 
				+Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin). Follow the link for more details.
			
 
				+
			
 
				+Once configured, create a GPU enabled Ollama deployment.
			
 
				+
			
 
				+```bash
			
 
				+kubectl apply -f gpu.yaml
			
 
				+```
			
--- a/examples/kubernetes/cpu.yaml
+++ b/examples/kubernetes/cpu.yaml
@@ -0,0 +1,42 @@
 
				+---
			
 
				+apiVersion: v1
			
 
				+kind: Namespace
			
 
				+metadata:
			
 
				+  name: ollama
			
 
				+---
			
 
				+apiVersion: apps/v1
			
 
				+kind: Deployment
			
 
				+metadata:
			
 
				+  name: ollama
			
 
				+  namespace: ollama
			
 
				+spec:
			
 
				+  selector:
			
 
				+    matchLabels:
			
 
				+      name: ollama
			
 
				+  template:
			
 
				+    metadata:
			
 
				+      labels:
			
 
				+        name: ollama
			
 
				+    spec:
			
 
				+      containers:
			
 
				+      - name: ollama
			
 
				+        image: ollama/ollama:latest
			
 
				+        ports:
			
 
				+        - name: http
			
 
				+          containerPort: 11434
			
 
				+          protocol: TCP
			
 
				+---
			
 
				+apiVersion: v1
			
 
				+kind: Service
			
 
				+metadata:
			
 
				+  name: ollama
			
 
				+  namespace: ollama
			
 
				+spec:
			
 
				+  type: ClusterIP
			
 
				+  selector:
			
 
				+    name: ollama
			
 
				+  ports:
			
 
				+  - port: 80
			
 
				+    name: http
			
 
				+    targetPort: http
			
 
				+    protocol: TCP
			
--- a/examples/kubernetes/gpu.yaml
+++ b/examples/kubernetes/gpu.yaml
@@ -0,0 +1,56 @@
 
				+---
			
 
				+apiVersion: v1
			
 
				+kind: Namespace
			
 
				+metadata:
			
 
				+  name: ollama
			
 
				+---
			
 
				+apiVersion: apps/v1
			
 
				+kind: Deployment
			
 
				+metadata:
			
 
				+  name: ollama
			
 
				+  namespace: ollama
			
 
				+spec:
			
 
				+  strategy:
			
 
				+    type: Recreate
			
 
				+  selector:
			
 
				+    matchLabels:
			
 
				+      name: ollama
			
 
				+  template:
			
 
				+    metadata:
			
 
				+      labels:
			
 
				+        name: ollama
			
 
				+    spec:
			
 
				+      containers:
			
 
				+      - name: ollama
			
 
				+        image: ollama/ollama:latest
			
 
				+        env:
			
 
				+        - name: PATH
			
 
				+          value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin
			
 
				+        - name: LD_LIBRARY_PATH
			
 
				+          value: /usr/local/nvidia/lib64
			
 
				+        ports:
			
 
				+        - name: http
			
 
				+          containerPort: 11434
			
 
				+          protocol: TCP
			
 
				+        resources:
			
 
				+          limits:
			
 
				+            nvidia.com/gpu: 1
			
 
				+      tolerations:
			
 
				+      - key: nvidia.com/gpu
			
 
				+        operator: Exists
			
 
				+        effect: NoSchedule
			
 
				+---
			
 
				+apiVersion: v1
			
 
				+kind: Service
			
 
				+metadata:
			
 
				+  name: ollama
			
 
				+  namespace: ollama
			
 
				+spec:
			
 
				+  type: ClusterIP
			
 
				+  selector:
			
 
				+    name: ollama
			
 
				+  ports:
			
 
				+  - port: 80
			
 
				+    name: http
			
 
				+    targetPort: http
			
 
				+    protocol: TCP