瀏覽代碼

Prefix all build artifacts with an OS/ARCH dir

This will help keep incremental builds from stomping on each other and make it
easier to stitch together the final runner payloads
Daniel Hiltgen 10 月之前
父節點
當前提交
5152a430f5
共有 3 個文件被更改,包括 47 次插入35 次删除
  1. 1 7
      llama/.gitignore
  2. 38 25
      llama/Makefile
  3. 8 3
      llama/llama.go

+ 1 - 7
llama/.gitignore

@@ -1,9 +1,3 @@
 *.bin
 *.bin
 *.gguf
 *.gguf
-*.lib
-*.exp
-*.dll
-*.so
-*.o
-*.obj
-ollama_runner*
+build/

+ 38 - 25
llama/Makefile

@@ -1,15 +1,20 @@
 OS := $(shell uname -s)
 OS := $(shell uname -s)
 ARCH := $(or $(ARCH), $(shell uname -m))
 ARCH := $(or $(ARCH), $(shell uname -m))
+ifeq ($(ARCH),x86_64)
+	ARCH := amd64
+endif
+ifneq (,$(findstring MINGW,$(OS))$(findstring MSYS,$(OS)))
+	OS := Windows
+endif
 
 
 export CGO_CFLAGS_ALLOW = -mfma|-mf16c
 export CGO_CFLAGS_ALLOW = -mfma|-mf16c
 export CGO_CXXFLAGS_ALLOW = -mfma|-mf16c
 export CGO_CXXFLAGS_ALLOW = -mfma|-mf16c
 export HIP_PLATFORM = amd
 export HIP_PLATFORM = amd
 
 
-ifeq ($(ARCH),x86_64)
-	ARCH := amd64
-endif
+SRC_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+BUILD_DIR := $(SRC_DIR)/build/$(OS)/$(ARCH)/
 
 
-ifneq (,$(findstring MINGW,$(OS))$(findstring MSYS,$(OS)))
+ifeq ($(OS),Windows)
 	OBJ_EXT := obj
 	OBJ_EXT := obj
 	SHARED_EXT := dll
 	SHARED_EXT := dll
 	EXE_EXT := .exe
 	EXE_EXT := .exe
@@ -48,11 +53,11 @@ CUDA_SRCS := \
 
 
 CUDA_OBJS := $(CUDA_SRCS:.cu=.cuda.$(OBJ_EXT))
 CUDA_OBJS := $(CUDA_SRCS:.cu=.cuda.$(OBJ_EXT))
 CUDA_OBJS := $(CUDA_OBJS:.c=.cuda.$(OBJ_EXT))
 CUDA_OBJS := $(CUDA_OBJS:.c=.cuda.$(OBJ_EXT))
-CUDA_OBJS := $(CUDA_OBJS:.cpp=.cuda.$(OBJ_EXT))
+CUDA_OBJS := $(addprefix $(BUILD_DIR),$(CUDA_OBJS:.cpp=.cuda.$(OBJ_EXT)))
 
 
 HIP_OBJS := $(CUDA_SRCS:.cu=.hip.$(OBJ_EXT))
 HIP_OBJS := $(CUDA_SRCS:.cu=.hip.$(OBJ_EXT))
 HIP_OBJS := $(HIP_OBJS:.c=.hip.$(OBJ_EXT))
 HIP_OBJS := $(HIP_OBJS:.c=.hip.$(OBJ_EXT))
-HIP_OBJS := $(HIP_OBJS:.cpp=.hip.$(OBJ_EXT))
+HIP_OBJS := $(addprefix $(BUILD_DIR),$(HIP_OBJS:.cpp=.hip.$(OBJ_EXT)))
 
 
 CUDA_FLAGS := \
 CUDA_FLAGS := \
 	--generate-code=arch=compute_50,code=[compute_50,sm_50] \
 	--generate-code=arch=compute_50,code=[compute_50,sm_50] \
@@ -111,64 +116,72 @@ ifeq ($(OS), Linux)
 	CUDA_FLAGS += -fPIC 
 	CUDA_FLAGS += -fPIC 
 	CFLAGS += -Xcompiler -fPIC -D_GNU_SOURCE
 	CFLAGS += -Xcompiler -fPIC -D_GNU_SOURCE
 	CXXFLAGS += -Xcompiler -fPIC -D_GNU_SOURCE
 	CXXFLAGS += -Xcompiler -fPIC -D_GNU_SOURCE
-else ifneq (,$(findstring MINGW,$(OS)))
+else ifeq ($(OS),Windows)
 	HIP_FLAGS += -Xclang --dependent-lib=msvcrt
 	HIP_FLAGS += -Xclang --dependent-lib=msvcrt
 endif
 endif
 
 
-RUNNERS := ollama_runner$(EXE_EXT)
+RUNNERS := $(BUILD_DIR)ollama_runner$(EXE_EXT)
 ifeq ($(ARCH),amd64)
 ifeq ($(ARCH),amd64)
-	RUNNERS += ollama_runner_avx$(EXE_EXT) ollama_runner_avx2$(EXE_EXT)
+	RUNNERS += $(BUILD_DIR)ollama_runner_avx$(EXE_EXT) $(BUILD_DIR)ollama_runner_avx2$(EXE_EXT)
 endif
 endif
 ifneq ($(NVCC),)
 ifneq ($(NVCC),)
-	RUNNERS += ollama_runner_cuda$(EXE_EXT)
+	RUNNERS += $(BUILD_DIR)ollama_runner_cuda$(EXE_EXT)
 endif
 endif
 ifneq ($(HIPCC),)
 ifneq ($(HIPCC),)
-	RUNNERS += ollama_runner_rocm$(EXE_EXT)
+	RUNNERS += $(BUILD_DIR)ollama_runner_rocm$(EXE_EXT)
 endif
 endif
 
 
 runners: $(RUNNERS)
 runners: $(RUNNERS)
 
 
-%.cuda.$(OBJ_EXT): %.cu
+$(BUILD_DIR)%.cuda.$(OBJ_EXT): %.cu
+	@-mkdir -p $(dir $@)
 	$(NVCC) -c $(CUDA_FLAGS) -o $@ $<
 	$(NVCC) -c $(CUDA_FLAGS) -o $@ $<
 
 
-%.cuda.$(OBJ_EXT): %.c
+$(BUILD_DIR)%.cuda.$(OBJ_EXT): %.c
+	@-mkdir -p $(dir $@)
 	$(NVCC) -c $(CFLAGS) -o $@ $<
 	$(NVCC) -c $(CFLAGS) -o $@ $<
 
 
-%.cuda.$(OBJ_EXT): %.cpp
+$(BUILD_DIR)%.cuda.$(OBJ_EXT): %.cpp
+	@-mkdir -p $(dir $@)
 	$(NVCC) -c $(CXXFLAGS) -o $@ $<
 	$(NVCC) -c $(CXXFLAGS) -o $@ $<
 
 
-$(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT): $(CUDA_OBJS)
+$(BUILD_DIR)$(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT): $(CUDA_OBJS)
+	@-mkdir -p $(dir $@)
 	$(NVCC) --shared -lcuda -lcublas -lcudart -lcublasLt $(CUDA_FLAGS) $(CUDA_OBJS) -o $@
 	$(NVCC) --shared -lcuda -lcublas -lcudart -lcublasLt $(CUDA_FLAGS) $(CUDA_OBJS) -o $@
 
 
-%.hip.$(OBJ_EXT): %.cu
+$(BUILD_DIR)%.hip.$(OBJ_EXT): %.cu
+	@-mkdir -p $(dir $@)
 	$(HIPCC) -c $(HIP_FLAGS) -o $@ $<
 	$(HIPCC) -c $(HIP_FLAGS) -o $@ $<
 
 
-%.hip.$(OBJ_EXT): %.c
+$(BUILD_DIR)%.hip.$(OBJ_EXT): %.c
+	@-mkdir -p $(dir $@)
 	$(HIPCC) -c $(CFLAGS) -o $@ $<
 	$(HIPCC) -c $(CFLAGS) -o $@ $<
 
 
-%.hip.$(OBJ_EXT): %.cpp
+$(BUILD_DIR)%.hip.$(OBJ_EXT): %.cpp
+	@-mkdir -p $(dir $@)
 	$(HIPCC) -c $(CXXFLAGS) -o $@ $<
 	$(HIPCC) -c $(CXXFLAGS) -o $@ $<
 
 
-$(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT): $(HIP_OBJS)
+$(BUILD_DIR)$(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT): $(HIP_OBJS)
+	@-mkdir -p $(dir $@)
 	$(HIPCC) --shared -lhipblas -lamdhip64 -lrocblas $(HIP_OBJS) -o $@
 	$(HIPCC) --shared -lhipblas -lamdhip64 -lrocblas $(HIP_OBJS) -o $@
 
 
-ollama_runner$(EXE_EXT):
+$(BUILD_DIR)ollama_runner$(EXE_EXT):
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -o $@ ./runner
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -o $@ ./runner
 
 
-ollama_runner_avx$(EXE_EXT):
+$(BUILD_DIR)ollama_runner_avx$(EXE_EXT):
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx -o $@ ./runner
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx -o $@ ./runner
 
 
-ollama_runner_avx2$(EXE_EXT):
+$(BUILD_DIR)ollama_runner_avx2$(EXE_EXT):
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,avx2 -o $@ ./runner
 	CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,avx2 -o $@ ./runner
 
 
-ollama_runner_cuda$(EXE_EXT): $(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT)
+$(BUILD_DIR)ollama_runner_cuda$(EXE_EXT): $(BUILD_DIR)$(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT)
 	CGO_ENABLED=1 GOARCH=$(ARCH) CGO_LDFLAGS=-L"$(CUDA_LIB)" go build -ldflags "-s -w" -tags avx,cuda -o $@ ./runner
 	CGO_ENABLED=1 GOARCH=$(ARCH) CGO_LDFLAGS=-L"$(CUDA_LIB)" go build -ldflags "-s -w" -tags avx,cuda -o $@ ./runner
 
 
-ollama_runner_rocm$(EXE_EXT): $(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT)
+$(BUILD_DIR)ollama_runner_rocm$(EXE_EXT): $(BUILD_DIR)$(SHARED_PREFIX)ggml_hipblas.$(SHARED_EXT)
 	CGO_ENABLED=1 GOARCH=$(ARCH) CGO_LDFLAGS=-L"$(HIP_LIB)" go build -ldflags "-s -w" -tags avx,rocm -o $@ ./runner
 	CGO_ENABLED=1 GOARCH=$(ARCH) CGO_LDFLAGS=-L"$(HIP_LIB)" go build -ldflags "-s -w" -tags avx,rocm -o $@ ./runner
 
 
 clean:
 clean:
-	rm -f $(CUDA_OBJS) $(HIP_OBJS) $(SHARED_PREFIX)ggml_cuda.$(SHARED_EXT) ggml_cuda.* $(SHARED_PREFIX)ggml_hipblas.* ollama_runner*
+	rm -rf $(BUILD_DIR)
 
 
 .PHONY: runners clean ollama_runner$(EXE_EXT) ollama_runner_avx$(EXE_EXT) ollama_runner_avx2$(EXE_EXT) ollama_runner_cuda$(EXE_EXT) ollama_runner_rocm$(EXE_EXT)
 .PHONY: runners clean ollama_runner$(EXE_EXT) ollama_runner_avx$(EXE_EXT) ollama_runner_avx2$(EXE_EXT) ollama_runner_cuda$(EXE_EXT) ollama_runner_rocm$(EXE_EXT)
 
 

+ 8 - 3
llama/llama.go

@@ -9,13 +9,18 @@ package llama
 #cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
 #cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
 #cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
 #cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
 #cgo darwin,amd64 LDFLAGS: -framework Foundation
 #cgo darwin,amd64 LDFLAGS: -framework Foundation
+#cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/build/Darwin/amd64
 #cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 #cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 #cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 #cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
 #cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate
 #cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate
 #cgo linux CFLAGS: -D_GNU_SOURCE
 #cgo linux CFLAGS: -D_GNU_SOURCE
 #cgo linux CXXFLAGS: -D_GNU_SOURCE
 #cgo linux CXXFLAGS: -D_GNU_SOURCE
+#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/build/Linux/arm64
+#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/build/Linux/amd64
 #cgo windows CFLAGS: -Wno-discarded-qualifiers
 #cgo windows CFLAGS: -Wno-discarded-qualifiers
 #cgo windows LDFLAGS: -lmsvcrt
 #cgo windows LDFLAGS: -lmsvcrt
+#cgo windows,arm64 LDFLAGS: -L${SRCDIR}/build/Windows/arm64
+#cgo windows,amd64 LDFLAGS: -L${SRCDIR}/build/Windows/amd64
 #cgo avx CFLAGS: -mavx
 #cgo avx CFLAGS: -mavx
 #cgo avx CXXFLAGS: -mavx
 #cgo avx CXXFLAGS: -mavx
 #cgo avx2 CFLAGS: -mavx2 -mfma
 #cgo avx2 CFLAGS: -mavx2 -mfma
@@ -25,9 +30,9 @@ package llama
 #cgo rocm CFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
 #cgo rocm CFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
 #cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
 #cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
 #cgo rocm LDFLAGS: -L${SRCDIR} -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
 #cgo rocm LDFLAGS: -L${SRCDIR} -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
-#cgo windows,cuda LDFLAGS: -L${SRCDIR} -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt
-#cgo windows,rocm LDFLAGS: -L${SRCDIR} -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
-#cgo linux,cuda LDFLAGS: -L${SRCDIR} -L/usr/local/cuda/lib64 -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt -lpthread -ldl -lrt
+#cgo windows,cuda LDFLAGS: -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt
+#cgo windows,rocm LDFLAGS: -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
+#cgo linux,cuda LDFLAGS: -L/usr/local/cuda/lib64 -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt -lpthread -ldl -lrt
 #cgo linux,rocm LDFLAGS: -L/opt/rocm/lib
 #cgo linux,rocm LDFLAGS: -L/opt/rocm/lib
 
 
 #include <stdlib.h>
 #include <stdlib.h>