소스 검색

Fix rocm windows build and clean up dependency gathering (#7305)

On windows ensure windows version define is properly set for rocm.
Remove duplicate rocm arch flags.
Resolve wildcards in the targets so parallel builds don't race.
Use readlink to resolve rocm dependencies since wildcards omit libelf
Keep windows rocm deps aligned with unified packaging model
Daniel Hiltgen 6 달 전
부모
커밋
5c44461ccf
3개의 변경된 파일18개의 추가작업 그리고 14개의 파일을 삭제
  1. 2 2
      llama/Dockerfile
  2. 12 8
      llama/make/Makefile.rocm
  3. 4 4
      llama/make/gpu.make

+ 2 - 2
llama/Dockerfile

@@ -9,7 +9,7 @@ ARG ROCM_VERSION=6.1.2
 
 
 ### To create a local image for building linux binaries on mac or windows with efficient incremental builds
 ### To create a local image for building linux binaries on mac or windows with efficient incremental builds
 #
 #
-# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile.new --target unified-builder-amd64 .
+# docker build --platform linux/amd64 -t builder-amd64 -f llama/Dockerfile --target unified-builder-amd64 .
 # docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
 # docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
 #
 #
 ### Then incremental builds will be much faster in this container
 ### Then incremental builds will be much faster in this container
@@ -41,7 +41,7 @@ ENTRYPOINT [ "zsh" ]
 ### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
 ### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
 # Note: this does not contain jetson variants
 # Note: this does not contain jetson variants
 #
 #
-# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile.new --target unified-builder-arm64 .
+# docker build --platform linux/arm64 -t builder-arm64 -f llama/Dockerfile --target unified-builder-arm64 .
 # docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
 # docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
 #
 #
 FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
 FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64

+ 12 - 8
llama/make/Makefile.rocm

@@ -21,7 +21,8 @@ else ifeq ($(OS),linux)
 	GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
 	GPU_LIB_DIR_LINUX := $(HIP_PATH)/lib
 	GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
 	GPU_COMPILER_LINUX := $(shell X=$$(which hipcc 2>/dev/null) && echo $$X)
 	GPU_COMPILER:=$(GPU_COMPILER_LINUX)
 	GPU_COMPILER:=$(GPU_COMPILER_LINUX)
-	ROCM_TRANSITIVE_LIBS = $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' '  | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf | sort -u )
+	ROCM_TRANSITIVE_LIBS_INITIAL = $(sort $(shell ldd $(ROCM_LIBS) | grep "=>" | cut -f2 -d= | cut -f2 -d' '  | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf))
+	ROCM_TRANSITIVE_LIBS = $(sort $(shell readlink -f $(ROCM_TRANSITIVE_LIBS_INITIAL)) $(ROCM_TRANSITIVE_LIBS_INITIAL))
 endif
 endif
 
 
 # TODO future multi-variant support for ROCm
 # TODO future multi-variant support for ROCm
@@ -36,14 +37,18 @@ GPU_RUNNER_DRIVER_LIB_LINK := -lamdhip64
 GPU_RUNNER_LIBS_SHORT := hipblas rocblas
 GPU_RUNNER_LIBS_SHORT := hipblas rocblas
 GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN))
 GPU_PATH_ROOT_WIN=$(dir $(GPU_LIB_DIR_WIN))
 GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX))
 GPU_PATH_ROOT_LINUX=$(dir $(GPU_LIB_DIR_LINUX))
-GPU_COMPILER_CFLAGS_WIN = $(CFLAGS)
+GPU_COMPILER_CFLAGS_WIN = $(CFLAGS) -D_WIN32_WINNT=0x602
 GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
 GPU_COMPILER_CFLAGS_LINUX = $(CFLAGS) -fPIC -D_GNU_SOURCE
-GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS)
+GPU_COMPILER_CXXFLAGS_WIN = $(CXXFLAGS) -D_WIN32_WINNT=0x602
 GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
 GPU_COMPILER_CXXFLAGS_LINUX = $(CXXFLAGS) -fPIC -D_GNU_SOURCE
 
 
 ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
 ROCM_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT),$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
-ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
-ROCM_DIST_DEPS_LIBS = $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS)))
+ifeq ($(OS),windows)
+	ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH))/lib/ollama
+else ifeq ($(OS),linux)
+	ROCM_DIST_DEPS_DIR = $(abspath $(SRC_DIR)/../dist/$(OS)-$(ARCH)-rocm)/lib/ollama
+endif
+ROCM_DIST_DEPS_LIBS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(ROCM_LIBS)) $(notdir $(ROCM_TRANSITIVE_LIBS))))
 ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
 ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
 
 
 ifeq ($(OS),linux)
 ifeq ($(OS),linux)
@@ -84,8 +89,7 @@ GPU_COMPILER_CUFLAGS = \
 	-Wno-pass-failed \
 	-Wno-pass-failed \
 	-Wno-deprecated-declarations \
 	-Wno-deprecated-declarations \
 	-Wno-unused-result \
 	-Wno-unused-result \
-	-I. \
-	$(foreach arch, $(HIP_ARCHS_COMMON), --offload-arch=$(arch))
+	-I.
 
 
 include make/gpu.make
 include make/gpu.make
 
 
@@ -98,4 +102,4 @@ $(ROCBLAS_DIST_DEP_MANIFEST):
 	@echo "rocblas library copy complete"
 	@echo "rocblas library copy complete"
 $(ROCM_DIST_DEPS_LIBS): 
 $(ROCM_DIST_DEPS_LIBS): 
 	@-mkdir -p $(dir $@)
 	@-mkdir -p $(dir $@)
-	$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@)* $(dir $@)
+	$(CP) $(dir $(filter %$(notdir $@),$(ROCM_LIBS) $(ROCM_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)

+ 4 - 4
llama/make/gpu.make

@@ -79,7 +79,7 @@ $(GPU_RUNNER_NAME): $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
 # Build targets
 # Build targets
 $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu
 $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu
 	@-mkdir -p $(dir $@)
 	@-mkdir -p $(dir $@)
-	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
+	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
 $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
 $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
 	@-mkdir -p $(dir $@)
 	@-mkdir -p $(dir $@)
 	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $<
 	$(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $<
@@ -97,14 +97,14 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).
 # Distribution targets
 # Distribution targets
 $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
 $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
 	@-mkdir -p $(dir $@)
 	@-mkdir -p $(dir $@)
-	cp $< $@
+	$(CP) $< $@
 $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
 $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
 $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
 $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
 	@-mkdir -p $(dir $@)
 	@-mkdir -p $(dir $@)
-	cp $< $@
+	$(CP) $< $@
 $(DIST_GPU_RUNNER_LIB_DEPS): 
 $(DIST_GPU_RUNNER_LIB_DEPS): 
 	@-mkdir -p $(dir $@)
 	@-mkdir -p $(dir $@)
-	$(CP) $(GPU_LIB_DIR)/$(notdir $@)* $(dir $@)
+	$(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
 
 
 # Payload targets
 # Payload targets
 $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server 
 $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server