Browse Source

Prefix all build artifacts with an OS/ARCH dir

This will help keep incremental builds from stomping on each other and make it
easier to stitch together the final runner payloads
Daniel Hiltgen 10 months ago
parent
commit
5152a430f5
3 changed files with 47 additions and 35 deletions
  1. 1 7
      llama/.gitignore
  2. 38 25
      llama/Makefile
  3. 8 3
      llama/llama.go

+ 1 - 7
llama/.gitignore

@@ -1,9 +1,3 @@
 *.bin
 *.gguf
-*.lib
-*.exp
-*.dll
-*.so
-*.o
-*.obj
-ollama_runner*
+build/

+ 38 - 25
llama/Makefile

@@ -1,15 +1,20 @@
 OS := $(shell uname -s)
 ARCH := $(or $(ARCH), $(shell uname -m))
+ifeq ($(ARCH),x86_64)
+	ARCH := amd64
+endif
+ifneq (,$(findstring MINGW,$(OS))$(findstring MSYS,$(OS)))
+	OS := Windows
+endif
 
 export CGO_CFLAGS_ALLOW = -mfma|-mf16c
 export CGO_CXXFLAGS_ALLOW = -mfma|-mf16c
 export HIP_PLATFORM = amd
 
-ifeq ($(ARCH),x86_64)
-	ARCH := amd64
-endif
+SRC_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+BUILD_DIR := $(SRC_DIR)/build/$(OS)/$(ARCH)/
 
-ifneq (,$(findstring MINGW,$(OS))$(findstring MSYS,$(OS)))
+ifeq ($(OS),Windows)
 	OBJ_EXT := obj
 	SHARED_EXT := dll
 	EXE_EXT := .exe
@@ -48,11 +53,11 @@ CUDA_SRCS := \
 
 CUDA_OBJS := $(CUDA_SRCS:.cu=.cuda.$(OBJ_EXT))
 CUDA_OBJS := $(CUDA_OBJS:.c=.cuda.$(OBJ_EXT))
-CUDA_OBJS := $(CUDA_OBJS:.cpp=.cuda.$(OBJ_EXT))
+CUDA_OBJS := $(addprefix $(BUILD_DIR),$(CUDA_OBJS:.cpp=.cuda.$(OBJ_EXT)))
 
 HIP_OBJS := $(CUDA_SRCS:.cu=.hip.$(OBJ_EXT))
 HIP_OBJS := $(HIP_OBJS:.c=.hip.$(OBJ_EXT))
-HIP_OBJS := $(HIP_OBJS:.cpp=.hip.$(OBJ_EXT))
+HIP_OBJS := $(addprefix $(BUILD_DIR),$(HIP_OBJS:.cpp=.hip.$(OBJ_EXT)))
 
 CUDA_FLAGS := \
 	--generate-code=arch=compute_50,code=[compute_50,sm_50] \
@@ -111,64 +116,72 @@ ifeq ($(OS), Linux)
 	CUDA_FLAGS += -fPIC 
 	CFLAGS += -Xcompiler -fPIC -D_GNU_SOURCE
 	CXXFLAGS += -Xcompiler -fPIC -D_GNU_SOURCE
-else ifneq (,$(findstring MINGW,$(OS)))
+else ifeq ($(OS),Windows)
 	HIP_FLAGS += -Xclang --dependent-lib=msvcrt
 endif
 
-RUNNERS := ollama_runner$(EXE_EXT)
+RUNNERS := $(BUILD_DIR)ollama_runner$(EXE_EXT)
 ifeq ($(ARCH),amd64)
-	RUNNERS += ollama_runner_avx$(EXE_EXT) ollama_runner_avx2$(EXE_EXT)
+	RUNNERS += $(BUILD_DIR)ollama_runner_avx$(EXE_EXT) $(BUILD_DIR)ollama_runner_avx2$(EXE_EXT)
 endif
 ifneq ($(NVCC),)
-	RUNNERS += ollama_runner_cuda$(EXE_EXT)
+	RUNNERS += $(BUILD_DIR)ollama_runner_cuda$(EXE_EXT)
 endif
 ifneq ($(HIPCC),)
-	RUNNERS += ollama_runner_rocm$(EXE_EXT)
+	RUNNERS += $(BUILD_DIR)ollama_runner_rocm$(EXE_EXT)
 endif
 
 runners: $(RUNNERS)
 
-%.cuda.$(OBJ_EXT): %.cu
+$(BUILD_DIR)%.cuda.$(OBJ_EXT): %.cu
+	@-mkdir -p $(dir $@)
 	$(NVCC) -c $(CUDA_FLAGS) -o $@ $<
 
-%.cuda.$(OBJ_EXT): %.c
+$(BUILD_DIR)%.cuda.$(OBJ_EXT): %.c
+	@-mkdir -p $(dir $@)
 	$(NVCC) -c $(CFLAGS) -o $@ $<
 
-%.cuda.$(OBJ_EXT): %.cpp
+$(BUILD_DIR)%.cuda.$(OBJ_EXT): %.cpp
+	@-mkdir -p $(dir $@)
 	$(NVCC) -c $(CXXFLAGS) -o $@ $<
 
-$(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT): $(CUDA_OBJS)
+$(BUILD_DIR)$(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT): $(CUDA_OBJS)
+	@-mkdir -p $(dir $@)
 	$(NVCC) --shared -lcuda -lcublas -lcudart -lcublasLt $(CUDA_FLAGS) $(CUDA_OBJS) -o $@
 
-%.hip.$(OBJ_EXT): %.cu
+$(BUILD_DIR)%.hip.$(OBJ_EXT): %.cu
+	@-mkdir -p $(dir $@)
 	$(HIPCC) -c $(HIP_FLAGS) -o $@ $<
 
-%.hip.$(OBJ_EXT): %.c
+$(BUILD_DIR)%.hip.$(OBJ_EXT): %.c
+	@-mkdir -p $(dir $@)
 	$(HIPCC) -c $(CFLAGS) -o $@ $<
 
-%.hip.$(OBJ_EXT): %.cpp
+$(BUILD_DIR)%.hip.$(OBJ_EXT): %.cpp
+	@-mkdir -p $(dir $@)
 	$(HIPCC) -c $(CXXFLAGS) -o $@ $<
 
-$(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT): $(HIP_OBJS)
+$(BUILD_DIR)$(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT): $(HIP_OBJS)
+	@-mkdir -p $(dir $@)
 	$(HIPCC) --shared -lhipblas -lamdhip64 -lrocblas $(HIP_OBJS) -o $@
 
-ollama_runner$(EXE_EXT):
+$(BUILD_DIR)ollama_runner$(EXE_EXT):
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -o $@ ./runner
 
-ollama_runner_avx$(EXE_EXT):
+$(BUILD_DIR)ollama_runner_avx$(EXE_EXT):
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx -o $@ ./runner
 
-ollama_runner_avx2$(EXE_EXT):
+$(BUILD_DIR)ollama_runner_avx2$(EXE_EXT):
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,avx2 -o $@ ./runner
 
-ollama_runner_cuda$(EXE_EXT): $(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT)
+$(BUILD_DIR)ollama_runner_cuda$(EXE_EXT): $(BUILD_DIR)$(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT)
 	CGO_ENABLED=1 GOARCH=$(ARCH) CGO_LDFLAGS=-L"$(CUDA_LIB)" go build -ldflags "-s -w" -tags avx,cuda -o $@ ./runner
 
-ollama_runner_rocm$(EXE_EXT): $(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT)
+$(BUILD_DIR)ollama_runner_rocm$(EXE_EXT): $(BUILD_DIR)$(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT)
 	CGO_ENABLED=1 GOARCH=$(ARCH) CGO_LDFLAGS=-L"$(HIP_LIB)" go build -ldflags "-s -w" -tags avx,rocm -o $@ ./runner
 
 clean:
-	rm -f $(CUDA_OBJS) $(HIP_OBJS) $(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT) ggml_cuda.* $(SHARED_PREFIX)ggml_hipblas.* ollama_runner*
+	rm -rf $(BUILD_DIR)
 
 .PHONY: runners clean ollama_runner$(EXE_EXT) ollama_runner_avx$(EXE_EXT) ollama_runner_avx2$(EXE_EXT) ollama_runner_cuda$(EXE_EXT) ollama_runner_rocm$(EXE_EXT)
 

+ 8 - 3
llama/llama.go

@@ -9,13 +9,18 @@ package llama
 #cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
 #cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
 #cgo darwin,amd64 LDFLAGS: -framework Foundation
+#cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/Darwin/amd64
 #cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 #cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 #cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate
 #cgo linux CFLAGS: -D_GNU_SOURCE
 #cgo linux CXXFLAGS: -D_GNU_SOURCE
+#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/Linux/arm64
+#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
 #cgo windows CFLAGS: -Wno-discarded-qualifiers
 #cgo windows LDFLAGS: -lmsvcrt
+#cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/Windows/arm64
+#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/Windows/amd64
 #cgo avx CFLAGS: -mavx
 #cgo avx CXXFLAGS: -mavx
 #cgo avx2 CFLAGS: -mavx2 -mfma
@@ -25,9 +30,9 @@ package llama
 #cgo rocm CFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
 #cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
 #cgo rocm LDFLAGS: -L${SRCDIR} -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
-#cgo windows,cuda LDFLAGS: -L${SRCDIR} -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt
-#cgo windows,rocm LDFLAGS: -L${SRCDIR} -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
-#cgo linux,cuda LDFLAGS: -L${SRCDIR} -L/usr/local/cuda/lib64 -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt -lpthread -ldl -lrt
+#cgo windows,cuda LDFLAGS: -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt
+#cgo windows,rocm LDFLAGS: -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
+#cgo linux,cuda LDFLAGS: -L/usr/local/cuda/lib64 -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt -lpthread -ldl -lrt
 #cgo linux,rocm LDFLAGS: -L/opt/rocm/lib
 
 #include <stdlib.h>