build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
# top level makefile for Ollama
|
|
|
|
include make/common-defs.make
|
|
|
|
|
|
|
|
|
|
|
|
# Determine which if any GPU runners we should build
|
|
|
|
include make/cuda-v11-defs.make
|
|
|
|
include make/cuda-v12-defs.make
|
|
|
|
include make/rocm-defs.make
|
|
|
|
|
|
|
|
ifeq ($(CUSTOM_CPU_FLAGS),)
|
|
|
|
ifeq ($(ARCH),amd64)
|
|
|
|
RUNNER_TARGETS=cpu
|
|
|
|
endif
|
|
|
|
# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
|
|
|
|
ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
|
|
|
|
ifneq ($(CUDA_11_COMPILER),)
|
|
|
|
RUNNER_TARGETS += cuda_v11
|
|
|
|
endif
|
|
|
|
ifneq ($(CUDA_12_COMPILER),)
|
|
|
|
RUNNER_TARGETS += cuda_v12
|
|
|
|
endif
|
|
|
|
endif
|
|
|
|
else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
|
|
|
|
ifneq ($(CUDA_12_COMPILER),)
|
|
|
|
RUNNER_TARGETS += cuda_v12
|
|
|
|
else ifneq ($(CUDA_11_COMPILER),)
|
|
|
|
RUNNER_TARGETS += cuda_v11
|
|
|
|
endif
|
|
|
|
endif
|
|
|
|
|
|
|
|
ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),)
|
|
|
|
ifneq ($(HIP_COMPILER),)
|
|
|
|
RUNNER_TARGETS += rocm
|
|
|
|
endif
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
all: runners exe
|
|
|
|
|
|
|
|
dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe
|
|
|
|
|
|
|
|
dist_%:
|
|
|
|
@$(MAKE) --no-print-directory -f make/Makefile.$* dist
|
|
|
|
|
|
|
|
runners: $(RUNNER_TARGETS)
|
|
|
|
|
|
|
|
$(RUNNER_TARGETS):
|
|
|
|
@$(MAKE) --no-print-directory -f make/Makefile.$@
|
|
|
|
|
|
|
|
exe dist_exe:
|
|
|
|
@$(MAKE) --no-print-directory -f make/Makefile.ollama $@
|
|
|
|
|
|
|
|
help-sync apply-patches create-patches sync sync-clean:
|
|
|
|
@$(MAKE) --no-print-directory -f make/Makefile.sync $@
|
|
|
|
|
|
|
|
test integration lint:
|
|
|
|
@$(MAKE) --no-print-directory -f make/Makefile.test $@
|
|
|
|
|
|
|
|
clean:
|
|
|
|
rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE)
|
|
|
|
go clean -cache
|
|
|
|
|
|
|
|
help:
|
|
|
|
@echo "The following make targets will help you build Ollama"
|
|
|
|
@echo ""
|
|
|
|
@echo " make all # (default target) Build Ollama llm subprocess runners, and the primary ollama executable"
|
|
|
|
@echo " make runners # Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable"
|
|
|
|
@echo " make <runner> # Build specific runners. Enabled: '$(RUNNER_TARGETS)'"
|
|
|
|
@echo " make dist # Build the runners and primary ollama executable for distribution"
|
|
|
|
@echo " make help-sync # Help information on vendor update targets"
|
|
|
|
@echo " make help-runners # Help information on runner targets"
|
|
|
|
@echo ""
|
|
|
|
@echo "The following make targets will help you test Ollama"
|
|
|
|
@echo ""
|
|
|
|
@echo " make test # Run unit tests"
|
|
|
|
@echo " make integration # Run integration tests. You must 'make all' first"
|
|
|
|
@echo " make lint # Run lint and style tests"
|
|
|
|
@echo ""
|
|
|
|
@echo "For more information see 'docs/development.md'"
|
|
|
|
@echo ""
|
|
|
|
|
|
|
|
|
|
|
|
help-runners:
|
|
|
|
@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'"
|
|
|
|
@echo ""
|
|
|
|
@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)' (Override with CUSTOM_CPU_FLAGS)"
|
|
|
|
@echo ""
|
|
|
|
@echo "# CUDA_PATH sets the location where CUDA toolkits are present"
|
|
|
|
@echo "CUDA_PATH=$(CUDA_PATH)"
|
|
|
|
@echo " CUDA_11_PATH=$(CUDA_11_PATH)"
|
|
|
|
@echo " CUDA_11_COMPILER=$(CUDA_11_COMPILER)"
|
|
|
|
@echo " CUDA_12_PATH=$(CUDA_12_PATH)"
|
|
|
|
@echo " CUDA_12_COMPILER=$(CUDA_12_COMPILER)"
|
|
|
|
@echo ""
|
|
|
|
@echo "# HIP_PATH sets the location where the ROCm toolkit is present"
|
|
|
|
@echo "HIP_PATH=$(HIP_PATH)"
|
|
|
|
@echo " HIP_COMPILER=$(HIP_COMPILER)"
|
|
|
|
|
|
|
|
.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS)
|
|
|
|
|
|
|
|
# Handy debugging for make variables
|
|
|
|
print-%:
|
|
|
|
@echo '$*=$($*)'
|