Browse Source

Added support for kubernetes, kustomize, helm

Daniele Viti 1 year ago
parent
commit
d14d26bdfd

+ 0 - 0
kubernetes/helm/.helmignore


+ 5 - 0
kubernetes/helm/Chart.yaml

@@ -0,0 +1,5 @@
+apiVersion: v2
+name: ollama-webui
+description: "Ollama Web UI: A User-Friendly Web Interface for Chat Interactions 👋"
+version: 0.1.0
+icon: https://raw.githubusercontent.com/ollama-webui/ollama-webui/main/static/favicon.png

+ 4 - 0
kubernetes/helm/templates/ollama-namespace.yaml

@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: {{ .Values.namespace }}

+ 12 - 0
kubernetes/helm/templates/ollama-service.yaml

@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-service
+  namespace: {{ .Values.namespace }}
+spec:
+  selector:
+    app: ollama
+  ports:
+  - protocol: TCP
+    port: {{ .Values.ollama.servicePort }}
+    targetPort: {{ .Values.ollama.servicePort }}

+ 38 - 0
kubernetes/helm/templates/ollama-statefulset.yaml

@@ -0,0 +1,38 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: ollama
+  namespace: {{ .Values.namespace }}
+spec:
+  serviceName: "ollama"
+  replicas: {{ .Values.ollama.replicaCount }}
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: {{ .Values.ollama.image }}
+        ports:
+        - containerPort: {{ .Values.ollama.servicePort }}
+        resources:
+          limits:
+            cpu: {{ .Values.ollama.resources.limits.cpu }}
+            memory: {{ .Values.ollama.resources.limits.memory }}
+            nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }}
+        volumeMounts:
+        - name: ollama-volume
+          mountPath: /root/.ollama
+        tty: true
+  volumeClaimTemplates:
+  - metadata:
+      name: ollama-volume
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      resources:
+        requests:
+          storage: 1Gi

+ 28 - 0
kubernetes/helm/templates/webui-deployment.yaml

@@ -0,0 +1,28 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama-webui-deployment
+  namespace: {{ .Values.namespace }}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama-webui
+  template:
+    metadata:
+      labels:
+        app: ollama-webui
+    spec:
+      containers:
+      - name: ollama-webui
+        image: ghcr.io/ollama-webui/ollama-webui:main
+        ports:
+        - containerPort: 8080
+        resources:
+          limits:
+            cpu: "500m"
+            memory: "500Mi"
+        env:
+        - name: OLLAMA_API_BASE_URL
+          value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api"
+        tty: true

+ 20 - 0
kubernetes/helm/templates/webui-ingress.yaml

@@ -0,0 +1,20 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: ollama-webui-ingress
+  namespace: {{ .Values.namespace }}
+  #annotations:
+    # Use appropriate annotations for your Ingress controller, e.g., for NGINX:
+    # nginx.ingress.kubernetes.io/rewrite-target: /
+spec:
+  rules:
+  - host: {{ .Values.webui.ingress.host }}
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: ollama-webui-service
+            port:
+              number: {{ .Values.webui.servicePort }}

+ 15 - 0
kubernetes/helm/templates/webui-service.yaml

@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-webui-service
+  namespace: {{ .Values.namespace }}
+spec:
+  type: NodePort  # Use LoadBalancer if you're on a cloud that supports it
+  selector:
+    app: ollama-webui
+  ports:
+    - protocol: TCP
+      port: {{ .Values.webui.servicePort }}
+      targetPort: {{ .Values.webui.servicePort }}
+      # If using NodePort, you can optionally specify the nodePort:
+      # nodePort: 30000

+ 23 - 0
kubernetes/helm/values.yaml

@@ -0,0 +1,23 @@
+namespace: ollama-namespace
+
+ollama:
+  replicaCount: 1
+  image: ollama/ollama:latest
+  servicePort: 11434
+  resources:
+    limits:
+      cpu: "2000m"
+      memory: "2Gi"
+      nvidia.com/gpu: "1"
+  volumeSize: 1Gi
+
+webui:
+  replicaCount: 1
+  image: ghcr.io/ollama-webui/ollama-webui:main
+  servicePort: 8080
+  resources:
+    limits:
+      cpu: "500m"
+      memory: "500Mi"
+  ingress:
+    host: ollama.minikube.local

+ 4 - 0
kubernetes/manifest/base/ollama-namespace.yaml

@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ollama-namespace

+ 12 - 0
kubernetes/manifest/base/ollama-service.yaml

@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-service
+  namespace: ollama-namespace
+spec:
+  selector:
+    app: ollama
+  ports:
+  - protocol: TCP
+    port: 11434
+    targetPort: 11434

+ 37 - 0
kubernetes/manifest/base/ollama-statefulset.yaml

@@ -0,0 +1,37 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: ollama
+  namespace: ollama-namespace
+spec:
+  serviceName: "ollama"
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama
+  template:
+    metadata:
+      labels:
+        app: ollama
+    spec:
+      containers:
+      - name: ollama
+        image: ollama/ollama:latest
+        ports:
+        - containerPort: 11434
+        resources:
+          limits:
+            cpu: "2000m"
+            memory: "2Gi"
+        volumeMounts:
+        - name: ollama-volume
+          mountPath: /root/.ollama
+        tty: true
+  volumeClaimTemplates:
+  - metadata:
+      name: ollama-volume
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      resources:
+        requests:
+          storage: 1Gi

+ 28 - 0
kubernetes/manifest/base/webui-deployment.yaml

@@ -0,0 +1,28 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama-webui-deployment
+  namespace: ollama-namespace
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama-webui
+  template:
+    metadata:
+      labels:
+        app: ollama-webui
+    spec:
+      containers:
+      - name: ollama-webui
+        image: ghcr.io/ollama-webui/ollama-webui:main
+        ports:
+        - containerPort: 8080
+        resources:
+          limits:
+            cpu: "500m"
+            memory: "500Mi"
+        env:
+        - name: OLLAMA_API_BASE_URL
+          value: "http://ollama-service.ollama-namespace.svc.cluster.local:11434/api"
+        tty: true

+ 20 - 0
kubernetes/manifest/base/webui-ingress.yaml

@@ -0,0 +1,20 @@
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: ollama-webui-ingress
+  namespace: ollama-namespace
+  #annotations:
+    # Use appropriate annotations for your Ingress controller, e.g., for NGINX:
+    # nginx.ingress.kubernetes.io/rewrite-target: /
+spec:
+  rules:
+  - host: ollama.minikube.local
+    http:
+      paths:
+      - path: /
+        pathType: Prefix
+        backend:
+          service:
+            name: ollama-webui-service
+            port:
+              number: 8080

+ 15 - 0
kubernetes/manifest/base/webui-service.yaml

@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-webui-service
+  namespace: ollama-namespace
+spec:
+  type: NodePort  # Use LoadBalancer if you're on a cloud that supports it
+  selector:
+    app: ollama-webui
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8080
+      # If using NodePort, you can optionally specify the nodePort:
+      # nodePort: 30000

+ 12 - 0
kubernetes/manifest/kustomization.yaml

@@ -0,0 +1,12 @@
+resources:
+- base/ollama-namespace.yaml
+- base/ollama-service.yaml
+- base/ollama-statefulset.yaml
+- base/webui-deployment.yaml
+- base/webui-service.yaml
+- base/webui-ingress.yaml
+
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+patches:
+- path: patches/ollama-statefulset-gpu.yaml

+ 17 - 0
kubernetes/manifest/patches/ollama-statefulset-gpu.yaml

@@ -0,0 +1,17 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: ollama
+  namespace: ollama-namespace
+spec:
+  selector:
+    matchLabels:
+      app: ollama
+  serviceName: "ollama"
+  template:
+    spec:
+      containers:
+      - name: ollama
+        resources:
+          limits:
+            nvidia.com/gpu: "1"