Dockerfile 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. # Note: once we have fully transitioned to the Go server, this will replace the old Dockerfile at the top of the tree
  2. ARG GOLANG_VERSION=1.22.5
  3. ARG CMAKE_VERSION=3.22.1
  4. ARG CUDA_VERSION_11=11.3.1
  5. ARG CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
  6. ARG CUDA_VERSION_12=12.4.0
  7. ARG CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
  8. ARG ROCM_VERSION=6.1.2
  9. ### To create a local image for building linux binaries on mac or windows with efficient incremental builds
  10. #
  11. # docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
  12. # docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
  13. #
  14. ### Then incremental builds will be much faster in this container
  15. #
  16. # make -C llama -j 10 && go build -trimpath -o dist/linux-amd64/ollama .
  17. #
  18. FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
  19. ARG CMAKE_VERSION
  20. ARG GOLANG_VERSION
  21. ARG CUDA_VERSION_11
  22. ARG CUDA_VERSION_12
  23. COPY ./scripts/rh_linux_deps.sh /
  24. ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
  25. ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
  26. ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
  27. RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
  28. RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
  29. dnf clean all && \
  30. dnf install -y \
  31. zsh \
  32. cuda-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
  33. cuda-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
  34. # TODO intel oneapi goes here...
  35. ENV GOARCH amd64
  36. ENV CGO_ENABLED 1
  37. WORKDIR /go/src/github.com/ollama/ollama/
  38. ENTRYPOINT [ "zsh" ]
  39. ### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
  40. # Note: this does not contain jetson variants
  41. #
  42. # docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
  43. # docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
  44. #
  45. FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
  46. ARG CMAKE_VERSION
  47. ARG GOLANG_VERSION
  48. ARG CUDA_VERSION_11
  49. ARG CUDA_VERSION_12
  50. COPY ./scripts/rh_linux_deps.sh /
  51. RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
  52. RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
  53. dnf config-manager --set-enabled appstream && \
  54. dnf clean all && \
  55. dnf install -y \
  56. zsh \
  57. cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
  58. cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
  59. ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
  60. ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
  61. ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
  62. ENV GOARCH amd64
  63. ENV CGO_ENABLED 1
  64. WORKDIR /go/src/github.com/ollama/ollama/
  65. ENTRYPOINT [ "zsh" ]
  66. FROM --platform=linux/amd64 unified-builder-amd64 AS runners-amd64
  67. COPY . .
  68. ARG OLLAMA_SKIP_CUDA_GENERATE
  69. ARG OLLAMA_SKIP_CUDA_11_GENERATE
  70. ARG OLLAMA_SKIP_CUDA_12_GENERATE
  71. ARG OLLAMA_SKIP_ROCM_GENERATE
  72. ARG CUDA_V11_ARCHITECTURES
  73. ARG CUDA_V12_ARCHITECTURES
  74. ARG OLLAMA_FAST_BUILD
  75. RUN --mount=type=cache,target=/root/.ccache \
  76. if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
  77. make -C llama -j $(expr $(nproc) / 2 ) ; \
  78. else \
  79. make -C llama -j 5 ; \
  80. fi
  81. FROM --platform=linux/arm64 unified-builder-arm64 AS runners-arm64
  82. COPY . .
  83. ARG OLLAMA_SKIP_CUDA_GENERATE
  84. ARG OLLAMA_SKIP_CUDA_11_GENERATE
  85. ARG OLLAMA_SKIP_CUDA_12_GENERATE
  86. ARG CUDA_V11_ARCHITECTURES
  87. ARG CUDA_V12_ARCHITECTURES
  88. ARG OLLAMA_FAST_BUILD
  89. RUN --mount=type=cache,target=/root/.ccache \
  90. make -C llama -j 8
  91. # Intermediate stages used for ./scripts/build_linux.sh
  92. FROM --platform=linux/amd64 centos:7 AS builder-amd64
  93. ARG CMAKE_VERSION
  94. ARG GOLANG_VERSION
  95. COPY ./scripts/rh_linux_deps.sh /
  96. RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
  97. ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
  98. ENV CGO_ENABLED 1
  99. ENV GOARCH amd64
  100. WORKDIR /go/src/github.com/ollama/ollama
  101. FROM --platform=linux/amd64 builder-amd64 AS build-amd64
  102. COPY . .
  103. COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
  104. COPY --from=runners-amd64 /go/src/github.com/ollama/ollama/build/ build/
  105. ARG GOFLAGS
  106. ARG CGO_CFLAGS
  107. ARG OLLAMA_SKIP_ROCM_GENERATE
  108. RUN --mount=type=cache,target=/root/.ccache \
  109. go build -trimpath -o dist/linux-amd64/bin/ollama .
  110. RUN cd dist/linux-$GOARCH && \
  111. tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
  112. RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
  113. cd dist/linux-$GOARCH-rocm && \
  114. tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
  115. fi
  116. FROM --platform=linux/arm64 rockylinux:8 AS builder-arm64
  117. ARG CMAKE_VERSION
  118. ARG GOLANG_VERSION
  119. COPY ./scripts/rh_linux_deps.sh /
  120. RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
  121. ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
  122. ENV CGO_ENABLED 1
  123. ENV GOARCH arm64
  124. WORKDIR /go/src/github.com/ollama/ollama
  125. FROM --platform=linux/arm64 builder-arm64 AS build-arm64
  126. COPY . .
  127. COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
  128. COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/build/ build/
  129. ARG GOFLAGS
  130. ARG CGO_CFLAGS
  131. RUN --mount=type=cache,target=/root/.ccache \
  132. go build -trimpath -o dist/linux-arm64/bin/ollama .
  133. RUN cd dist/linux-$GOARCH && \
  134. tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
  135. FROM --platform=linux/amd64 scratch AS dist-amd64
  136. COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
  137. FROM --platform=linux/arm64 scratch AS dist-arm64
  138. COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
  139. FROM dist-$TARGETARCH AS dist
  140. # Optimized container images do not cary nested payloads
  141. FROM --platform=linux/amd64 builder-amd64 AS container-build-amd64
  142. WORKDIR /go/src/github.com/ollama/ollama
  143. COPY . .
  144. ARG GOFLAGS
  145. ARG CGO_CFLAGS
  146. RUN --mount=type=cache,target=/root/.ccache \
  147. go build -trimpath -o dist/linux-amd64/bin/ollama .
  148. FROM --platform=linux/arm64 builder-arm64 AS container-build-arm64
  149. WORKDIR /go/src/github.com/ollama/ollama
  150. COPY . .
  151. ARG GOFLAGS
  152. ARG CGO_CFLAGS
  153. RUN --mount=type=cache,target=/root/.ccache \
  154. go build -trimpath -o dist/linux-arm64/bin/ollama .
  155. # For amd64 container images, filter out cuda/rocm to minimize size
  156. FROM runners-amd64 AS runners-cuda-amd64
  157. RUN rm -rf \
  158. ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
  159. ./dist/linux-amd64/lib/ollama/runners/rocm*
  160. FROM runners-amd64 AS runners-rocm-amd64
  161. RUN rm -rf \
  162. ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
  163. ./dist/linux-amd64/lib/ollama/libcu*.so* \
  164. ./dist/linux-amd64/lib/ollama/runners/cuda*
  165. FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
  166. RUN apt-get update && \
  167. apt-get install -y ca-certificates && \
  168. rm -rf /var/lib/apt/lists/*
  169. COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
  170. COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
  171. FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
  172. RUN apt-get update && \
  173. apt-get install -y ca-certificates && \
  174. rm -rf /var/lib/apt/lists/*
  175. COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
  176. COPY --from=runners-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
  177. # ROCm libraries larger so we keep it distinct from the CPU/CUDA image
  178. FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
  179. # Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
  180. # across releases
  181. COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
  182. RUN apt-get update && \
  183. apt-get install -y ca-certificates && \
  184. rm -rf /var/lib/apt/lists/*
  185. COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
  186. COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
  187. EXPOSE 11434
  188. ENV OLLAMA_HOST 0.0.0.0
  189. ENTRYPOINT ["/bin/ollama"]
  190. CMD ["serve"]
  191. FROM runtime-$TARGETARCH
  192. EXPOSE 11434
  193. ENV OLLAMA_HOST 0.0.0.0
  194. ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
  195. ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
  196. ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
  197. ENV NVIDIA_VISIBLE_DEVICES=all
  198. ENTRYPOINT ["/bin/ollama"]
  199. CMD ["serve"]