2024-10-26 17:03:37 -07:00
|
|
|
ARG GOLANG_VERSION=1.22.8
|
2024-06-13 20:46:14 -07:00
|
|
|
ARG CUDA_VERSION_11=11.3.1
|
|
|
|
ARG CUDA_VERSION_12=12.4.0
|
2024-07-15 15:10:22 -07:00
|
|
|
ARG ROCM_VERSION=6.1.2
|
2024-11-12 10:31:52 -08:00
|
|
|
ARG JETPACK_6=r36.2.0
|
|
|
|
ARG JETPACK_5=r35.4.1
|
2023-09-22 11:57:31 -07:00
|
|
|
|
2024-10-30 10:34:28 -07:00
|
|
|
### To create a local image for building linux binaries on mac or windows with efficient incremental builds
|
|
|
|
#
|
|
|
|
# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
|
|
|
|
# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
|
|
|
|
#
|
|
|
|
### Then incremental builds will be much faster in this container
|
|
|
|
#
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
# make -j 10 dist
|
2024-10-30 10:34:28 -07:00
|
|
|
#
|
|
|
|
FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
|
|
|
|
ARG GOLANG_VERSION
|
|
|
|
ARG CUDA_VERSION_11
|
|
|
|
ARG CUDA_VERSION_12
|
2024-06-13 20:46:14 -07:00
|
|
|
COPY ./scripts/rh_linux_deps.sh /
|
2024-10-30 10:34:28 -07:00
|
|
|
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
|
|
|
|
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
2024-10-30 10:34:28 -07:00
|
|
|
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
|
|
|
|
dnf clean all && \
|
|
|
|
dnf install -y \
|
|
|
|
zsh \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
|
|
|
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
2024-10-30 10:34:28 -07:00
|
|
|
# TODO intel oneapi goes here...
|
|
|
|
ENV GOARCH amd64
|
|
|
|
ENV CGO_ENABLED 1
|
|
|
|
WORKDIR /go/src/github.com/ollama/ollama/
|
|
|
|
ENTRYPOINT [ "zsh" ]
|
|
|
|
|
|
|
|
### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
|
|
|
|
# Note: this does not contain jetson variants
|
|
|
|
#
|
|
|
|
# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
|
|
|
|
# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
|
|
|
|
#
|
|
|
|
FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
|
|
|
|
ARG GOLANG_VERSION
|
|
|
|
ARG CUDA_VERSION_11
|
|
|
|
ARG CUDA_VERSION_12
|
2024-01-21 11:37:11 -08:00
|
|
|
COPY ./scripts/rh_linux_deps.sh /
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
|
2024-10-30 10:34:28 -07:00
|
|
|
RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
|
|
|
|
dnf config-manager --set-enabled appstream && \
|
|
|
|
dnf clean all && \
|
|
|
|
dnf install -y \
|
|
|
|
zsh \
|
|
|
|
cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
|
|
|
|
cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
|
|
|
|
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
|
|
|
|
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
|
|
|
|
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
ENV GOARCH arm64
|
2024-10-30 10:34:28 -07:00
|
|
|
ENV CGO_ENABLED 1
|
|
|
|
WORKDIR /go/src/github.com/ollama/ollama/
|
|
|
|
ENTRYPOINT [ "zsh" ]
|
|
|
|
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64
|
2024-10-30 10:34:28 -07:00
|
|
|
COPY . .
|
|
|
|
ARG OLLAMA_SKIP_CUDA_GENERATE
|
|
|
|
ARG OLLAMA_SKIP_ROCM_GENERATE
|
|
|
|
ARG OLLAMA_FAST_BUILD
|
2024-12-11 14:09:57 -08:00
|
|
|
ARG VERSION
|
2024-06-13 20:46:14 -07:00
|
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
2024-10-30 10:34:28 -07:00
|
|
|
if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
|
2024-12-14 14:56:05 -08:00
|
|
|
make -j $(nproc) dist ; \
|
2024-10-30 10:34:28 -07:00
|
|
|
else \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
make -j 5 dist ; \
|
|
|
|
fi
|
|
|
|
RUN cd dist/linux-$GOARCH && \
|
|
|
|
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
|
|
|
RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
|
|
|
|
cd dist/linux-$GOARCH-rocm && \
|
|
|
|
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
|
2024-10-30 10:34:28 -07:00
|
|
|
fi
|
2024-11-12 10:31:52 -08:00
|
|
|
|
|
|
|
# Jetsons need to be built in discrete stages
|
|
|
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
|
|
|
|
ARG GOLANG_VERSION
|
|
|
|
RUN apt-get update && apt-get install -y git curl ccache && \
|
|
|
|
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
|
|
|
|
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
|
|
|
|
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
|
|
|
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
|
|
WORKDIR /go/src/github.com/ollama/ollama/
|
|
|
|
COPY . .
|
|
|
|
ARG CGO_CFLAGS
|
|
|
|
ENV GOARCH arm64
|
2024-12-11 14:09:57 -08:00
|
|
|
ARG VERSION
|
2024-11-12 10:31:52 -08:00
|
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
make -j 5 dist_cuda_v11 \
|
2024-11-12 10:31:52 -08:00
|
|
|
CUDA_ARCHITECTURES="72;87" \
|
|
|
|
GPU_RUNNER_VARIANT=_jetpack5 \
|
|
|
|
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
|
|
|
|
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
|
|
|
|
|
|
|
|
FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
|
|
|
|
ARG GOLANG_VERSION
|
|
|
|
RUN apt-get update && apt-get install -y git curl ccache && \
|
|
|
|
curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
|
|
|
|
ln -s /usr/local/go/bin/go /usr/local/bin/go && \
|
|
|
|
ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
|
|
|
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
|
|
WORKDIR /go/src/github.com/ollama/ollama/
|
|
|
|
COPY . .
|
|
|
|
ARG CGO_CFLAGS
|
|
|
|
ENV GOARCH arm64
|
2024-12-11 14:09:57 -08:00
|
|
|
ARG VERSION
|
2024-11-12 10:31:52 -08:00
|
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
make -j 5 dist_cuda_v12 \
|
2024-11-12 10:31:52 -08:00
|
|
|
CUDA_ARCHITECTURES="87" \
|
|
|
|
GPU_RUNNER_VARIANT=_jetpack6 \
|
|
|
|
DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
|
|
|
|
DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
|
2024-01-21 11:37:11 -08:00
|
|
|
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64
|
2024-01-21 11:37:11 -08:00
|
|
|
COPY . .
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
ARG OLLAMA_SKIP_CUDA_GENERATE
|
|
|
|
ARG OLLAMA_FAST_BUILD
|
2024-12-11 14:09:57 -08:00
|
|
|
ARG VERSION
|
2024-08-09 07:21:40 -07:00
|
|
|
RUN --mount=type=cache,target=/root/.ccache \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
make -j 5 dist
|
2024-11-12 10:31:52 -08:00
|
|
|
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
|
|
|
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
|
2024-09-12 12:10:30 -07:00
|
|
|
RUN cd dist/linux-$GOARCH && \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
|
2024-11-12 10:31:52 -08:00
|
|
|
RUN cd dist/linux-$GOARCH-jetpack5 && \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
|
2024-11-12 10:31:52 -08:00
|
|
|
RUN cd dist/linux-$GOARCH-jetpack6 && \
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
|
2024-08-14 16:32:57 -07:00
|
|
|
|
2024-09-12 12:10:30 -07:00
|
|
|
FROM --platform=linux/amd64 scratch AS dist-amd64
|
|
|
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
|
|
|
FROM --platform=linux/arm64 scratch AS dist-arm64
|
|
|
|
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
|
2024-10-30 10:34:28 -07:00
|
|
|
FROM dist-$TARGETARCH AS dist
|
2024-09-12 12:10:30 -07:00
|
|
|
|
|
|
|
|
2024-10-30 10:34:28 -07:00
|
|
|
# For amd64 container images, filter out cuda/rocm to minimize size
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
FROM build-amd64 AS runners-cuda-amd64
|
2024-10-30 10:34:28 -07:00
|
|
|
RUN rm -rf \
|
|
|
|
./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
|
|
|
|
./dist/linux-amd64/lib/ollama/runners/rocm*
|
|
|
|
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
FROM build-amd64 AS runners-rocm-amd64
|
2024-10-30 10:34:28 -07:00
|
|
|
RUN rm -rf \
|
|
|
|
./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
|
|
|
|
./dist/linux-amd64/lib/ollama/libcu*.so* \
|
|
|
|
./dist/linux-amd64/lib/ollama/runners/cuda*
|
|
|
|
|
2024-09-09 17:22:20 -07:00
|
|
|
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
|
2024-09-12 12:10:30 -07:00
|
|
|
RUN apt-get update && \
|
|
|
|
apt-get install -y ca-certificates && \
|
2024-11-12 10:31:52 -08:00
|
|
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
2024-10-30 10:34:28 -07:00
|
|
|
COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
2024-08-14 16:32:57 -07:00
|
|
|
|
2024-09-09 17:22:20 -07:00
|
|
|
FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
|
2024-09-12 12:10:30 -07:00
|
|
|
RUN apt-get update && \
|
|
|
|
apt-get install -y ca-certificates && \
|
2024-11-12 10:31:52 -08:00
|
|
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
|
|
|
|
COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
|
2024-11-14 16:02:01 -08:00
|
|
|
COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
|
|
|
|
COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
|
2024-11-12 10:31:52 -08:00
|
|
|
|
2024-01-21 11:37:11 -08:00
|
|
|
|
2024-09-12 12:10:30 -07:00
|
|
|
# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
|
|
|
|
FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
|
|
|
|
# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
|
|
|
|
# across releases
|
2024-10-30 10:34:28 -07:00
|
|
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
|
2024-09-12 12:10:30 -07:00
|
|
|
RUN apt-get update && \
|
|
|
|
apt-get install -y ca-certificates && \
|
2024-11-12 10:31:52 -08:00
|
|
|
apt-get clean && rm -rf /var/lib/apt/lists/*
|
build: Make target improvements (#7499)
* llama: wire up builtin runner
This adds a new entrypoint into the ollama CLI to run the cgo built runner.
On Mac arm64, this will have GPU support, but on all other platforms it will
be the lowest common denominator CPU build. After we fully transition
to the new Go runners more tech-debt can be removed and we can stop building
the "default" runner via make and rely on the builtin always.
* build: Make target improvements
Add a few new targets and help for building locally.
This also adjusts the runner lookup to favor local builds, then
runners relative to the executable, and finally payloads.
* Support customized CPU flags for runners
This implements a simplified custom CPU flags pattern for the runners.
When built without overrides, the runner name contains the vector flag
we check for (AVX) to ensure we don't try to run on unsupported systems
and crash. If the user builds a customized set, we omit the naming
scheme and don't check for compatibility. This avoids checking
requirements at runtime, so that logic has been removed as well. This
can be used to build GPU runners with no vector flags, or CPU/GPU
runners with additional flags (e.g. AVX512) enabled.
* Use relative paths
If the user checks out the repo in a path that contains spaces, make gets
really confused so use relative paths for everything in-repo to avoid breakage.
* Remove payloads from main binary
* install: clean up prior libraries
This removes support for v0.3.6 and older versions (before the tar bundle)
and ensures we clean up prior libraries before extracting the bundle(s).
Without this change, runners and dependent libraries could leak when we
update and lead to subtle runtime errors.
2024-12-10 09:47:19 -08:00
|
|
|
COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
|
2024-10-30 10:34:28 -07:00
|
|
|
COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
|
|
|
|
|
2024-01-25 16:58:05 -08:00
|
|
|
EXPOSE 11434
|
2024-10-30 10:34:28 -07:00
|
|
|
ENV OLLAMA_HOST 0.0.0.0
|
2024-01-25 16:58:05 -08:00
|
|
|
|
|
|
|
ENTRYPOINT ["/bin/ollama"]
|
|
|
|
CMD ["serve"]
|
|
|
|
|
2024-01-21 11:37:11 -08:00
|
|
|
FROM runtime-$TARGETARCH
|
2023-09-30 12:32:06 -07:00
|
|
|
EXPOSE 11434
|
2024-10-30 10:34:28 -07:00
|
|
|
ENV OLLAMA_HOST 0.0.0.0
|
2024-01-21 11:37:11 -08:00
|
|
|
ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
2024-01-25 16:58:05 -08:00
|
|
|
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
2023-11-30 21:16:56 -08:00
|
|
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
2024-02-29 08:43:08 -08:00
|
|
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
2023-11-30 21:16:56 -08:00
|
|
|
|
2023-07-03 18:56:17 -04:00
|
|
|
ENTRYPOINT ["/bin/ollama"]
|
2023-07-06 16:34:14 -04:00
|
|
|
CMD ["serve"]
|