Browse Source

Fix rocm windows build and clean up dependency gathering (#7305)

On windows ensure windows version define is properly set for rocm.
Remove duplicate rocm arch flags.
Resolve wildcards in the targets so parallel builds don't race.
Use readlink to resolve rocm dependencies since wildcards omit libelf
Keep windows rocm deps aligned with unified packaging model
Daniel Hiltgen 6 months ago
parent
commit
5c44461ccf
3 changed files with 18 additions and 14 deletions
  1. 2 2
      llama/Dockerfile
  2. 12 8
      llama/make/Makefile.rocm
  3. 4 4
      llama/make/gpu.make

+ 2 - 2
llama/Dockerfile

@@ -9,7 +9,7 @@ ARG ROCM_VERSION=6.1.2
 
 ### To create a local image for building linux binaries on mac or windows with efficient incremental builds
 #
-# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile.new --target unified-builder-amd64 .
+# docker build --platform linux/amd64 -t builder-amd64 -f llama/Dockerfile --target unified-builder-amd64 .
 # docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
 #
 ### Then incremental builds will be much faster in this container
@@ -41,7 +41,7 @@ ENTRYPOINT [ "zsh" ]
 ### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
 # Note: this does not contain jetson variants
 #
-# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile.new --target unified-builder-arm64 .
+# docker build --platform linux/arm64 -t builder-arm64 -f llama/Dockerfile --target unified-builder-arm64 .
 # docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
 #
 FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64

+ 12 - 8
llama/make/Makefile.rocm

@@ -21,7 +21,8 @@ else ifeq ($(OS),linux)
 	GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
 	GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
 	GPU_COMPILER:=$(GPU_COMPILER_LINUX)
-	ROCM_TRANSITIVE_LIBS = $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' '  | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf | sort -u )
+	ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' '  | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
+	ROCM_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
 endif
 
 # TODO future multi-variant support for ROCm
@@ -36,14 +37,18 @@ GPU_RUNNER_DRIVER_LIB_LINK := -lamdhip64
 GPU_RUNNER_LIBS_SHORT := hipblas rocblas
 GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN))
 GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX))
-GPU_COMPILER_CFLAGS_WIN = $(CFLAGS)
+GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
 GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
-GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS)
+GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
 GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
 
 ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
-ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
-ROCM_DIST_DEPS_LIBS = $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS)))
+ifeq ($(OS),windows)
+	ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama
+else ifeq ($(OS),linux)
+	ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
+endif
+ROCM_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))))
 ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
 
 ifeq ($(OS),linux)
@@ -84,8 +89,7 @@ GPU_COMPILER_CUFLAGS = \
 	-Wno-pass-failed \
 	-Wno-deprecated-declarations \
 	-Wno-unused-result \
-	-I. \
-	$(foreach arch, $(HIP_ARCHS_COMMON), --offload-arch=$(arch))
+	-I.
 
 include make/gpu.make
 
@@ -98,4 +102,4 @@ $(ROCBLAS_DIST_DEP_MANIFEST):
 	@echo "rocblas library copy complete"
 $(ROCM_DIST_DEPS_LIBS): 
 	@-mkdir -p $(dir $@)
-	$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@)* $(dir $@)
+	$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)

+ 4 - 4
llama/make/gpu.make

@@ -79,7 +79,7 @@ $(GPU_RUNNER_NAME): $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
 # Build targets
 $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu
 	@-mkdir -p $(dir $@)
-	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
+	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
 $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
 	@-mkdir -p $(dir $@)
 	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $<
@@ -97,14 +97,14 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).
 # Distribution targets
 $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
 	@-mkdir -p $(dir $@)
-	cp $< $@
+	$(CP) $< $@
 $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
 $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
 	@-mkdir -p $(dir $@)
-	cp $< $@
+	$(CP) $< $@
 $(DIST_GPU_RUNNER_LIB_DEPS): 
 	@-mkdir -p $(dir $@)
-	$(CP) $(GPU_LIB_DIR)/$(notdir $@)* $(dir $@)
+	$(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
 
 # Payload targets
 $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server