diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 50177050c..c6ab84fe0 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -478,243 +478,77 @@ jobs: dist/OllamaSetup.exe dist/ollama-windows-*.zip - # Linux x86 assets built using the container based build - build-linux-amd64: + build-linux: environment: release runs-on: linux - env: - PLATFORM: linux/amd64 - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Set Version - shell: bash - run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - run: | - ./scripts/build_linux.sh - - uses: actions/upload-artifact@v4 - with: - name: dist-linux-amd64 - path: | - dist/*linux* - !dist/*-cov - - # Linux ARM assets built using the container based build - # (at present, docker isn't pre-installed on arm ubunutu images) - build-linux-arm64: - environment: release - runs-on: linux-arm64 - env: - PLATFORM: linux/arm64 - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Set Version - shell: bash - run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV - - name: 'Install Docker' - run: | - # Add Docker's official GPG key: - env - uname -a - sudo apt-get update - sudo apt-get install -y ca-certificates curl - sudo install -m 0755 -d /etc/apt/keyrings - sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc - sudo chmod a+r /etc/apt/keyrings/docker.asc - - # Add the repository to Apt sources: - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - sudo apt-get install -y docker-ce docker-ce-cli containerd.io - sudo usermod -aG docker $USER - sudo apt-get install acl - sudo setfacl --modify user:$USER:rw /var/run/docker.sock - - run: | - ./scripts/build_linux.sh - - uses: actions/upload-artifact@v4 - with: - name: dist-linux-arm64 - path: | - dist/*linux* - !dist/*-cov - - # Container image build - build-container-image: - environment: release strategy: matrix: - runner: - - linux - - linux-arm64 - runs-on: ${{ matrix.runner }} - env: - FINAL_IMAGE_REPO: ollama/ollama + include: + - os: linux + arch: amd64 + targets: [archive, rocm] + - os: linux + arch: arm64 + targets: [archive] steps: - uses: actions/checkout@v4 + - uses: docker/setup-qemu-action@v3 + - uses: docker/setup-buildx-action@v3 + - run: | + apt-get update && apt-get install pigz + for TARGET in ${{ matrix.targets }}; do docker buildx build --platform $PLATFORM --target $TARGET --output type=local,dest=dist/$PLATFORM .; done + tar c -C dist/$PLATFORM . | pigz -9cv >dist/ollama-${PLATFORM//\//-}.tar.gz + env: + PLATFORM: ${{ matrix.os }}/${{ matrix.arch }} + - uses: actions/upload-artifact@v4 with: - submodules: recursive - - name: 'Install Docker' - if: ${{ startsWith(matrix.runner, 'linux-arm64') }} - run: | - sudo apt-get update - sudo apt-get install -y ca-certificates curl - sudo install -m 0755 -d /etc/apt/keyrings - sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc - sudo chmod a+r /etc/apt/keyrings/docker.asc - echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - sudo apt-get update - sudo apt-get install -y docker-ce docker-ce-cli containerd.io - sudo usermod -aG docker $USER - sudo apt-get install acl - sudo setfacl --modify user:$USER:rw /var/run/docker.sock - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.FINAL_IMAGE_REPO }} - flavor: | - latest=false - tags: | - type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr - type=semver,pattern={{version}} - - name: Set Version - shell: bash - run: | - machine=$(uname -m) - case ${machine} in - x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; - aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; - esac >>$GITHUB_ENV - echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - context: "." - platforms: linux/${{ env.ARCH }} - build-args: | - GOFLAGS - outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true - - name: Export digest - run: | - mkdir -p /tmp/digests - digest="${{ steps.build.outputs.digest }}" - touch "/tmp/digests/${digest#sha256:}" - - name: Upload digest - uses: actions/upload-artifact@v4 - with: - name: digests-${{ env.PLATFORM_PAIR }} - path: /tmp/digests/* - if-no-files-found: error - retention-days: 1 - merge: + name: dist-${{ matrix.os }}-${{ matrix.arch }} + path: | + dist/ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.gz + + build-docker: environment: release - runs-on: linux - needs: - - build-container-image - env: - FINAL_IMAGE_REPO: ollama/ollama + runs-on: ubuntu-latest + strategy: + matrix: + include: + - flavor: | + latest=auto + platforms: linux/amd64,linux/arm64 + build-args: [GOFLAGS] + - flavor: | + suffix=-rocm,onlatest=false + platforms: linux/amd64 + build-args: [GOFLAGS, FLAVOR=rocm] steps: - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Download digests - uses: actions/download-artifact@v4 - with: - path: /tmp/digests - pattern: digests-* - merge-multiple: true - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.FINAL_IMAGE_REPO }} - flavor: | - latest=false - tags: | - type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr - type=semver,pattern={{version}} - - name: Set Version - shell: bash - run: | - machine=$(uname -m) - case ${machine} in - x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;; - aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;; - esac >>$GITHUB_ENV - echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV - - name: Login to Docker Hub - uses: docker/login-action@v3 + - uses: docker/setup-qemu-action@v2 + - uses: docker/setup-buildx-action@v2 + - uses: docker/login-action@v3 with: username: ${{ vars.DOCKER_USER }} password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - name: Create manifest list and push - working-directory: /tmp/digests - run: | - docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ - $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *) - - name: Inspect image - run: | - docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }} - build-container-image-rocm: - environment: release - runs-on: linux - env: - FINAL_IMAGE_REPO: ollama/ollama - ARCH: amd64 - PLATFORM_PAIR: linux-amd64 - steps: - - uses: actions/checkout@v4 + - id: metadata + uses: docker/metadata-action@v4 with: - submodules: recursive - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.FINAL_IMAGE_REPO }} - flavor: | - latest=false + flavor: ${{ matrix.flavor }} + images: | + ollama/ollama tags: | - type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr type=semver,pattern={{version}} - - name: Set Version - shell: bash - run: | - echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Login to Docker Hub - uses: docker/login-action@v3 + - uses: docker/build-push-action@v6 with: - username: ${{ vars.DOCKER_USER }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - context: "." - target: runtime-rocm - build-args: | - GOFLAGS - tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm + context: . push: true + platforms: ${{ matrix.platforms }} + build-args: ${{ matrix.build-args }} + tags: ${{ steps.metadata.outputs.tags }} + labels: ${{ steps.metadata.outputs.labels }} + cache-from: type=registry,ref=ollama/ollama:latest + cache-to: type=inline + provenance: false + env: + GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ steps.metadata.outputs.version }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" # Aggregate all the assets and ship a release release: diff --git a/CMakePresets.json b/CMakePresets.json index f1b6d41b4..2bda4eb10 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -3,7 +3,7 @@ "configurePresets": [ { "name": "Default", - "binaryDir": "${sourceDir}/build", + "binaryDir": "${sourceDir}/dist/build", "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } diff --git a/Dockerfile b/Dockerfile index 47228df61..eb741fdf5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,201 +1,161 @@ -ARG GOLANG_VERSION=1.22.8 -ARG CUDA_VERSION_11=11.3.1 -ARG CUDA_VERSION_12=12.4.0 -ARG ROCM_VERSION=6.1.2 -ARG JETPACK_6=r36.2.0 -ARG JETPACK_5=r35.4.1 +# vim: filetype=dockerfile -### To create a local image for building linux binaries on mac or windows with efficient incremental builds -# -# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 . -# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64 -# -### Then incremental builds will be much faster in this container -# -# make -j 10 dist -# -FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64 -ARG GOLANG_VERSION -ARG CUDA_VERSION_11 -ARG CUDA_VERSION_12 -COPY ./scripts/rh_linux_deps.sh / -ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 -RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ - dnf clean all && \ - dnf install -y \ - zsh \ - cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \ - cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g") -# TODO intel oneapi goes here... -ENV GOARCH amd64 -ENV CGO_ENABLED 1 -WORKDIR /go/src/github.com/ollama/ollama/ -ENTRYPOINT [ "zsh" ] +ARG FLAVOR=${TARGETARCH} -### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds -# Note: this does not contain jetson variants -# -# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 . -# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64 -# -FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64 -ARG GOLANG_VERSION -ARG CUDA_VERSION_11 -ARG CUDA_VERSION_12 -COPY ./scripts/rh_linux_deps.sh / -RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh -RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \ - dnf config-manager --set-enabled appstream && \ - dnf clean all && \ - dnf install -y \ - zsh \ - cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \ - cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g") -ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 -ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64 -ENV GOARCH arm64 -ENV CGO_ENABLED 1 -WORKDIR /go/src/github.com/ollama/ollama/ -ENTRYPOINT [ "zsh" ] +ARG ROCMVERSION=6.1.2 +ARG JETPACK5VERSION=r35.4.1 +ARG JETPACK6VERSION=r36.2.0 +ARG CMAKEVERSION=3.31.2 -FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64 -COPY . . -ARG OLLAMA_SKIP_CUDA_GENERATE -ARG OLLAMA_SKIP_ROCM_GENERATE -ARG OLLAMA_FAST_BUILD -ARG VERSION -ARG CUSTOM_CPU_FLAGS +FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCMVERSION}-complete AS base-amd64 +RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \ + && yum install -y yum-utils devtoolset-10-gcc devtoolset-10-gcc-c++ \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \ + && curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1 +ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:/opt/rh/devtoolset-11/root/usr/bin:$PATH + +FROM --platform=linux/arm64 rockylinux:8 AS base-arm64 +# install epel-release for ccache +RUN yum install -y yum-utils epel-release \ + && yum install -y clang ccache \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo +ENV CC=clang CXX=clang++ + +FROM base-${TARGETARCH} AS base +ARG CMAKEVERSION +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml +ENV LDFLAGS=-s + +FROM base AS cpu +# amd64 uses gcc which requires devtoolset-11 for AVX extensions while arm64 uses clang +RUN if [ "$(uname -m)" = "x86_64" ]; then yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++; fi +ENV PATH=/opt/rh/devtoolset-11/root/usr/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ - if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \ - make -j $(nproc) dist ; \ - else \ - make -j 5 dist ; \ - fi -RUN cd dist/linux-$GOARCH && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz -RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \ - cd dist/linux-$GOARCH-rocm && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\ - fi + cmake --preset 'Default' && cmake --build --parallel --preset 'Default' -# Jetsons need to be built in discrete stages -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64 -ARG GOLANG_VERSION -RUN apt-get update && apt-get install -y git curl ccache && \ - curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ - ln -s /usr/local/go/bin/go /usr/local/bin/go && \ - ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -WORKDIR /go/src/github.com/ollama/ollama/ -COPY . . -ARG CGO_CFLAGS -ENV GOARCH arm64 -ARG VERSION +FROM base AS cuda-11 +ARG CUDA11VERSION=11.3 +RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-} +ENV PATH=/usr/local/cuda-11/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ - make -j 5 dist_cuda_v11 \ - CUDA_ARCHITECTURES="72;87" \ - GPU_RUNNER_VARIANT=_jetpack5 \ - DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \ - DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5 + cmake --preset 'CUDA 11' && cmake --build --parallel --preset 'CUDA 11' -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64 -ARG GOLANG_VERSION -RUN apt-get update && apt-get install -y git curl ccache && \ - curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \ - ln -s /usr/local/go/bin/go /usr/local/bin/go && \ - ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -WORKDIR /go/src/github.com/ollama/ollama/ -COPY . . -ARG CGO_CFLAGS -ENV GOARCH arm64 -ARG VERSION +FROM base AS cuda-12 +ARG CUDA12VERSION=12.4 +RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-} +ENV PATH=/usr/local/cuda-12/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ - make -j 5 dist_cuda_v12 \ - CUDA_ARCHITECTURES="87" \ - GPU_RUNNER_VARIANT=_jetpack6 \ - DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \ - DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6 + cmake --preset 'CUDA 12' && cmake --build --parallel --preset 'CUDA 12' -FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64 -COPY . . -ARG OLLAMA_SKIP_CUDA_GENERATE -ARG OLLAMA_FAST_BUILD -ARG VERSION +FROM base AS rocm-6 RUN --mount=type=cache,target=/root/.ccache \ - make -j 5 dist -COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/ -RUN cd dist/linux-$GOARCH && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz -RUN cd dist/linux-$GOARCH-jetpack5 && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz -RUN cd dist/linux-$GOARCH-jetpack6 && \ - tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz + cmake --preset 'ROCm 6' && cmake --build --parallel --preset 'ROCm 6' -FROM --platform=linux/amd64 scratch AS dist-amd64 -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / -FROM --platform=linux/arm64 scratch AS dist-arm64 -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz / -FROM dist-$TARGETARCH AS dist +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5 +ARG CMAKEVERSION +RUN apt-get update && apt-get install -y curl ccache \ + && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml +RUN --mount=type=cache,target=/root/.ccache \ + cmake --preset 'JetPack 5' && cmake --build --parallel --preset 'JetPack 5' +FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6 +ARG CMAKEVERSION +RUN apt-get update && apt-get install -y curl ccache \ + && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +COPY CMakeLists.txt CMakePresets.json . +COPY ml/backend/ggml/ggml ml/backend/ggml/ggml +RUN --mount=type=cache,target=/root/.ccache \ + cmake --preset 'JetPack 6' && cmake --build --parallel --preset 'JetPack 6' -# For amd64 container images, filter out cuda/rocm to minimize size -FROM build-amd64 AS runners-cuda-amd64 -RUN rm -rf \ - ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \ - ./dist/linux-amd64/lib/ollama/runners/rocm* +FROM base AS build +ARG GOVERSION=1.23.4 +RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local +ENV PATH=/usr/local/go/bin:$PATH +WORKDIR /go/src/github.com/ollama/ollama +COPY . . +ARG GOFLAGS="'-ldflags=-w -s'" +ENV CGO_ENABLED=1 +RUN --mount=type=cache,target=/root/.cache/go-build \ + go build -trimpath -buildmode=pie -o /bin/ollama . -FROM build-amd64 AS runners-rocm-amd64 -RUN rm -rf \ - ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \ - ./dist/linux-amd64/lib/ollama/libcu*.so* \ - ./dist/linux-amd64/lib/ollama/runners/cuda* +FROM --platform=linux/amd64 scratch AS amd64 +COPY --from=cuda-11 --chmod=644 \ + dist/build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.11 \ + /usr/local/cuda/lib64/libcublasLt.so.11 \ + /usr/local/cuda/lib64/libcudart.so.11.0 \ + /lib/ollama/cuda_v11/ +COPY --from=cuda-12 --chmod=644 \ + dist/build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.12 \ + /usr/local/cuda/lib64/libcublasLt.so.12 \ + /usr/local/cuda/lib64/libcudart.so.12 \ + /lib/ollama/cuda_v12/ -FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64 -RUN apt-get update && \ - apt-get install -y ca-certificates && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ -COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ +FROM --platform=linux/arm64 scratch AS arm64 +COPY --from=cuda-11 --chmod=644 \ + dist/build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.11 \ + /usr/local/cuda/lib64/libcublasLt.so.11 \ + /usr/local/cuda/lib64/libcudart.so.11.0 \ + /lib/ollama/cuda_v11/ +COPY --from=cuda-12 --chmod=644 \ + dist/build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.12 \ + /usr/local/cuda/lib64/libcublasLt.so.12 \ + /usr/local/cuda/lib64/libcudart.so.12 \ + /lib/ollama/cuda_v12/ +COPY --from=jetpack-5 --chmod=644 \ + dist/build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.11 \ + /usr/local/cuda/lib64/libcublasLt.so.11 \ + /usr/local/cuda/lib64/libcudart.so.11.0 \ + /lib/ollama/cuda_jetpack5/ +COPY --from=jetpack-6 --chmod=644 \ + dist/build/lib/libggml-cuda.so \ + /usr/local/cuda/lib64/libcublas.so.12 \ + /usr/local/cuda/lib64/libcublasLt.so.12 \ + /usr/local/cuda/lib64/libcudart.so.12 \ + /lib/ollama/cuda_jetpack6/ -FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64 -RUN apt-get update && \ - apt-get install -y ca-certificates && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/ -COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/ -COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/ -COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/ +FROM --platform=linux/arm64 scratch AS rocm +COPY --from=rocm-6 --chmod=644 \ + dist/build/lib/libggml-hip.so \ + /opt/rocm/lib/libamdhip64.so.6 \ + /opt/rocm/lib/libhipblas.so.2 \ + /opt/rocm/lib/librocblas.so.4 \ + /opt/rocm/lib/libamd_comgr.so.2 \ + /opt/rocm/lib/libhsa-runtime64.so.1 \ + /opt/rocm/lib/librocprofiler-register.so.0 \ + /opt/amdgpu/lib64/libdrm_amdgpu.so.1 \ + /opt/amdgpu/lib64/libdrm.so.2 \ + /usr/lib64/libnuma.so.1 \ + /lib/ollama/rocm/ +COPY --from=rocm-6 /opt/rocm/lib/rocblas/ /lib/ollama/rocm/rocblas/ +FROM ${FLAVOR} AS archive +COPY --from=cpu --chmod=644 \ + dist/build/lib/libggml-base.so \ + dist/build/lib/libggml-cpu-*.so \ + /lib/ollama/ +COPY --from=build /bin/ollama /bin/ollama -# ROCm libraries larger so we keep it distinct from the CPU/CUDA image -FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm -# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer -# across releases -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/ -RUN apt-get update && \ - apt-get install -y ca-certificates && \ - apt-get clean && rm -rf /var/lib/apt/lists/* -COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/ -COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/ - -EXPOSE 11434 -ENV OLLAMA_HOST 0.0.0.0 - -ENTRYPOINT ["/bin/ollama"] -CMD ["serve"] - -FROM runtime-$TARGETARCH -EXPOSE 11434 -ENV OLLAMA_HOST 0.0.0.0 +FROM ubuntu:20.04 +RUN apt-get update \ + && apt-get install -y ca-certificates \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +COPY --from=archive /bin/ /usr/bin/ ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 +COPY --from=archive /lib/ollama/ /usr/lib/ollama/ +ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_VISIBLE_DEVICES=all - +ENV OLLAMA_HOST=0.0.0.0:11434 +EXPOSE 11434 ENTRYPOINT ["/bin/ollama"] CMD ["serve"] diff --git a/Dockerfile2 b/Dockerfile2 deleted file mode 100644 index e1008c1b9..000000000 --- a/Dockerfile2 +++ /dev/null @@ -1,59 +0,0 @@ -ARG CUDA_11_VERSION=11.3 -ARG CUDA_12_VERSION=12.4 -ARG ROCM_VERSION=6.1.2 -ARG JETPACK_5_VERSION=r35.4.1 -ARG JETPACK_6_VERSION=r36.2.0 -ARG CMAKE_VERSION=3.31.2 - -FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS base -ARG CMAKE_VERSION -RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz | tar xz -C /usr --strip-components 1 -RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \ - && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo - -# FROM --platform=linux/arm64 rockylinux:8 AS base -# ARG CMAKE_VERSION -# RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1 -# RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo - -FROM base AS amd64 -ARG CUDA_11_VERSION -ARG CUDA_12_VERSION -RUN yum install -y cuda-toolkit-${CUDA_11_VERSION//./-} \ - && yum install -y cuda-toolkit-${CUDA_12_VERSION//./-} -COPY CMakeLists.txt CMakeLists.txt -COPY ml/backend/ggml/ggml ml/backend/ggml/ggml - -FROM --platform=linux/amd64 amd64 AS cuda_11 -ENV PATH=/usr/local/cuda-${CUDA_11_VERSION}/bin:$PATH -RUN cmake --build --parallel --preset 'CUDA 11' - -FROM --platform=linux/amd64 amd64 AS cuda_12 -ENV PATH=/usr/local/cuda-${CUDA_12_VERSION}/bin:$PATH -RUN cmake --build --parallel --preset 'CUDA 11' - -FROM --platform=linux/amd64 amd64 AS rocm -RUN cmake --build --parallel --preset 'ROCm 6' - -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5_VERSION} AS jetpack_5 -ARG CMAKE_VERSION -RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1 -COPY CMakeLists.txt . -COPY ml/backend/ggml/ggml . -RUN cmake --build --parallel --preset 'JetPack 5' - -FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6_VERSION} AS jetpack_6 -ARG CMAKE_VERSION -RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.tar.gz | tar xz -C /usr --strip-components 1 -COPY CMakeLists.txt . -COPY ml/backend/ggml/ggml . -RUN cmake --build --parallel --preset 'JetPack 6' - -FROM --platform=linux/amd64 golang:1.23 -COPY --from=cuda_11 build/libggml-cuda.so libggml-cuda-11.so -COPY --from=cuda_12 build/libggml-cuda.so libggml-cuda-12.so -COPY --from=rocm build/libggml-hip.so libggml-hip.so - -# FROM --platform=linux/arm64 golang:1.23 -# COPY --from=jetpack_5 build/libggml-cuda.so libggml-cuda-jetpack-5.so -# COPY --from=jetpack_6 build/libggml-cuda.so libggml-cuda-jetpack-6.so diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index 894d9dd2e..a0c3d2f00 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -18,7 +18,7 @@ docker buildx build \ --output type=local,dest=./dist/ \ --platform=${PLATFORM} \ ${OLLAMA_COMMON_BUILD_ARGS} \ - --target dist \ + --target archive \ -f Dockerfile \ . @@ -26,4 +26,4 @@ docker buildx build \ if echo $PLATFORM | grep "," > /dev/null ; then mv -f ./dist/linux_*64/ollama* ./dist/ rmdir ./dist/linux_*64 -fi \ No newline at end of file +fi