gpu.make 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # Generalized GPU runner build
  2. ifndef GPU_RUNNER_NAME
  3. dummy:
  4. $(error This makefile is not meant to build directly, but instead included in other Makefiles that set required variables)
  5. endif
  6. ifeq ($(OS),windows)
  7. GPU_COMPILER:=$(GPU_COMPILER_WIN)
  8. GPU_LIB_DIR:=$(GPU_LIB_DIR_WIN)
  9. CGO_EXTRA_LDFLAGS:=$(CGO_EXTRA_LDFLAGS_WIN)
  10. GPU_COMPILER_CFLAGS = $(GPU_COMPILER_CFLAGS_WIN)
  11. GPU_COMPILER_CXXFLAGS = $(GPU_COMPILER_CXXFLAGS_WIN)
  12. else ifeq ($(OS),linux)
  13. GPU_COMPILER:=$(GPU_COMPILER_LINUX)
  14. GPU_LIB_DIR:=$(GPU_LIB_DIR_LINUX)
  15. CGO_EXTRA_LDFLAGS:=$(CGO_EXTRA_LDFLAGS_LINUX)
  16. GPU_COMPILER_CFLAGS = $(GPU_COMPILER_CFLAGS_LINUX)
  17. GPU_COMPILER_CXXFLAGS = $(GPU_COMPILER_CXXFLAGS_LINUX)
  18. endif
  19. GPU_GOFLAGS="-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$(VERSION)\" \"-X=github.com/ollama/ollama/llama.CpuFeatures=$(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS))\" $(TARGET_LDFLAGS)"
  20. # TODO Unify how we handle dependencies in the dist/packaging and install flow
  21. # today, cuda is bundled, but rocm is split out. Should split them each out by runner
  22. DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR)
  23. ifeq ($(OS),windows)
  24. _OS_GPU_RUNNER_CPU_FLAGS=$(call uc,$(GPU_RUNNER_CPU_FLAGS))
  25. else ifeq ($(OS),linux)
  26. _OS_GPU_RUNNER_CPU_FLAGS=$(GPU_RUNNER_CPU_FLAGS)
  27. endif
  28. GPU_RUNNER_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
  29. DIST_GPU_RUNNER_LIB_DEPS = $(addprefix $(DIST_GPU_RUNNER_DEPS_DIR)/,$(notdir $(GPU_RUNNER_LIBS)))
  30. COMMON_SRCS := \
  31. $(wildcard *.c) \
  32. $(wildcard *.cpp)
  33. COMMON_HDRS := \
  34. $(wildcard *.h) \
  35. $(wildcard *.hpp)
  36. GPU_RUNNER_SRCS := \
  37. ggml-cuda.cu \
  38. $(filter-out $(wildcard ggml-cuda/fattn*.cu),$(wildcard ggml-cuda/*.cu)) \
  39. $(wildcard ggml-cuda/template-instances/mmq*.cu) \
  40. ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp ggml-aarch64.c
  41. GPU_RUNNER_HDRS := \
  42. $(wildcard ggml-cuda/*.cuh)
  43. # Conditional flags and components to speed up developer builds
  44. ifneq ($(OLLAMA_FAST_BUILD),)
  45. GPU_COMPILER_CUFLAGS += \
  46. -DGGML_DISABLE_FLASH_ATTN
  47. else
  48. GPU_RUNNER_SRCS += \
  49. $(wildcard ggml-cuda/fattn*.cu) \
  50. $(wildcard ggml-cuda/template-instances/fattn-wmma*.cu) \
  51. $(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu) \
  52. $(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu) \
  53. $(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu)
  54. endif
  55. GPU_RUNNER_OBJS := $(GPU_RUNNER_SRCS:.cu=.$(GPU_RUNNER_NAME).$(OBJ_EXT))
  56. GPU_RUNNER_OBJS := $(GPU_RUNNER_OBJS:.c=.$(GPU_RUNNER_NAME).$(OBJ_EXT))
  57. GPU_RUNNER_OBJS := $(addprefix $(BUILD_DIR)/,$(GPU_RUNNER_OBJS:.cpp=.$(GPU_RUNNER_NAME).$(OBJ_EXT)))
  58. DIST_RUNNERS = $(addprefix $(RUNNERS_DIST_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)))
  59. ifneq ($(OS),windows)
  60. PAYLOAD_RUNNERS = $(addprefix $(RUNNERS_PAYLOAD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT).gz,$(GPU_RUNNER_NAME)))
  61. endif
  62. BUILD_RUNNERS = $(addprefix $(RUNNERS_BUILD_DIR)/,$(addsuffix /ollama_llama_server$(EXE_EXT),$(GPU_RUNNER_NAME)))
  63. $(GPU_RUNNER_NAME): $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
  64. # Build targets
  65. $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cu
  66. @-mkdir -p $(dir $@)
  67. $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) $(GPU_COMPILER_CUFLAGS) $(GPU_RUNNER_ARCH_FLAGS) -o $@ $<
  68. $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.c
  69. @-mkdir -p $(dir $@)
  70. $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CFLAGS) -o $@ $<
  71. $(BUILD_DIR)/%.$(GPU_RUNNER_NAME).$(OBJ_EXT): %.cpp
  72. @-mkdir -p $(dir $@)
  73. $(CCACHE) $(GPU_COMPILER) -c $(GPU_COMPILER_CXXFLAGS) -o $@ $<
  74. $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): TARGET_CGO_LDFLAGS = -L"$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/" $(CGO_EXTRA_LDFLAGS)
  75. $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) *.go ./runner/*.go $(COMMON_SRCS) $(COMMON_HDRS)
  76. @-mkdir -p $(dir $@)
  77. GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
  78. $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
  79. @-mkdir -p $(dir $@)
  80. $(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
  81. # Distribution targets
  82. $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%
  83. @-mkdir -p $(dir $@)
  84. $(CP) $< $@
  85. $(RUNNERS_DIST_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT) $(GPU_DIST_DEPS_LIBS)
  86. $(DIST_LIB_DIR)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT)
  87. @-mkdir -p $(dir $@)
  88. $(CP) $< $@
  89. $(DIST_GPU_RUNNER_LIB_DEPS):
  90. @-mkdir -p $(dir $@)
  91. $(CP) $(GPU_LIB_DIR)/$(notdir $@) $(dir $@)
  92. $(GPU_DIST_DEPS_LIBS):
  93. @-mkdir -p $(dir $@)
  94. $(CP) $(dir $(filter %$(notdir $@),$(GPU_LIBS) $(GPU_TRANSITIVE_LIBS)))/$(notdir $@) $(dir $@)
  95. # Payload targets
  96. $(RUNNERS_PAYLOAD_DIR)/%/ollama_llama_server.gz: $(RUNNERS_BUILD_DIR)/%/ollama_llama_server
  97. @-mkdir -p $(dir $@)
  98. ${GZIP} --best -c $< > $@
  99. $(RUNNERS_PAYLOAD_DIR)/$(GPU_RUNNER_NAME)/%.gz: $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/%
  100. @-mkdir -p $(dir $@)
  101. ${GZIP} --best -c $< > $@
  102. clean:
  103. rm -f $(GPU_RUNNER_OBJS) $(BUILD_RUNNERS) $(DIST_RUNNERS) $(PAYLOAD_RUNNERS)
  104. .PHONY: clean $(GPU_RUNNER_NAME)
  105. # Handy debugging for make variables
  106. print-%:
  107. @echo '$*=$($*)'