From e91ae3d47d8153c4b7c10dba031b77d7ae408ef0 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 25 Feb 2025 13:47:36 -0800 Subject: [PATCH] Update ROCm (6.3 linux, 6.2 windows) and CUDA v12.8 (#9304) * Bump cuda and rocm versions Update ROCm to linux:6.3 win:6.2 and CUDA v12 to 12.8. Yum has some silent failure modes, so largely switch to dnf. * Fix windows build script --- .github/workflows/release.yaml | 8 +-- .github/workflows/test.yaml | 2 +- Dockerfile | 28 +++++----- scripts/build_docker.sh | 2 +- scripts/build_linux.sh | 34 ++++++++++-- scripts/build_windows.ps1 | 94 +++++++++++++++++++--------------- 6 files changed, 104 insertions(+), 64 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 37d525e91..12f361408 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -111,13 +111,13 @@ jobs: - os: windows arch: amd64 preset: 'CUDA 12' - install: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe - cuda-version: '12.4' + install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe + cuda-version: '12.8' - os: windows arch: amd64 preset: 'ROCm 6' - install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe - rocm-version: '6.1' + install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe + rocm-version: '6.2' runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }} environment: release env: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 56a2cc4fd..431bc3282 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -81,7 +81,7 @@ jobs: install: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe flags: '-DCMAKE_CUDA_ARCHITECTURES=87' - preset: ROCm - install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe + install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe flags: '-DAMDGPU_TARGETS=gfx1010' runs-on: windows steps: diff --git a/Dockerfile b/Dockerfile index 09612824b..46d4713e7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,20 +4,22 @@ ARG FLAVOR=${TARGETARCH} ARG ROCMVERSION=6.3.3 ARG JETPACK5VERSION=r35.4.1 -ARG JETPACK6VERSION=r36.2.0 +ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 +# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 -RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \ - && yum install -y yum-utils gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ \ - && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \ - && curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1 -ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH +RUN yum install -y yum-utils \ + && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \ + && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \ + && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \ + && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo +ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH -FROM --platform=linux/arm64 rockylinux:8 AS base-arm64 +FROM --platform=linux/arm64 almalinux:8 AS base-arm64 # install epel-release for ccache RUN yum install -y yum-utils epel-release \ - && yum install -y clang ccache \ + && dnf install -y clang ccache \ && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo ENV CC=clang CXX=clang++ @@ -29,7 +31,8 @@ COPY ml/backend/ggml/ggml ml/backend/ggml/ggml ENV LDFLAGS=-s FROM base AS cpu -# amd64 uses gcc which requires gcc-toolset-11 for AVX extensions while arm64 uses clang +RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ +ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CPU' \ && cmake --build --parallel --preset 'CPU' \ @@ -37,7 +40,7 @@ RUN --mount=type=cache,target=/root/.ccache \ FROM base AS cuda-11 ARG CUDA11VERSION=11.3 -RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-} +RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-} ENV PATH=/usr/local/cuda-11/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 11' \ @@ -45,8 +48,8 @@ RUN --mount=type=cache,target=/root/.ccache \ && cmake --install build --component CUDA --strip --parallel 8 FROM base AS cuda-12 -ARG CUDA12VERSION=12.4 -RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-} +ARG CUDA12VERSION=12.8 +RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-} ENV PATH=/usr/local/cuda-12/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 12' \ @@ -54,6 +57,7 @@ RUN --mount=type=cache,target=/root/.ccache \ && cmake --install build --component CUDA --strip --parallel 8 FROM base AS rocm-6 +ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'ROCm 6' \ && cmake --build --parallel --preset 'ROCm 6' \ diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh index 567eb7c7a..1dd8d1f68 100755 --- a/scripts/build_docker.sh +++ b/scripts/build_docker.sh @@ -28,7 +28,7 @@ if echo $PLATFORM | grep "amd64" > /dev/null; then ${LOAD_OR_PUSH} \ --platform=linux/amd64 \ ${OLLAMA_COMMON_BUILD_ARGS} \ - --target runtime-rocm \ + --build-arg FLAVOR=rocm \ -f Dockerfile \ -t ${FINAL_IMAGE_REPO}:$VERSION-rocm \ . diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh index a0c3d2f00..618722d11 100755 --- a/scripts/build_linux.sh +++ b/scripts/build_linux.sh @@ -22,8 +22,34 @@ docker buildx build \ -f Dockerfile \ . -# buildx behavior changes for single vs. multiplatform -if echo $PLATFORM | grep "," > /dev/null ; then - mv -f ./dist/linux_*64/ollama* ./dist/ - rmdir ./dist/linux_*64 +if echo $PLATFORM | grep "amd64" > /dev/null; then + outDir="./dist" + if echo $PLATFORM | grep "," > /dev/null ; then + outDir="./dist/linux_amd64" + fi + docker buildx build \ + --output type=local,dest=${outDir} \ + --platform=linux/amd64 \ + ${OLLAMA_COMMON_BUILD_ARGS} \ + --build-arg FLAVOR=rocm \ + --target archive \ + -f Dockerfile \ + . +fi + +# buildx behavior changes for single vs. multiplatform +echo "Compressing linux tar bundles..." +if echo $PLATFORM | grep "," > /dev/null ; then + tar c -C ./dist/linux_arm64 --exclude cuda_jetpack5 --exclude cuda_jetpack6 . | pigz -9vc >./dist/ollama-linux-arm64.tgz + tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack5 | pigz -9vc >./dist/ollama-linux-arm64-jetpack5.tgz + tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack6 | pigz -9vc >./dist/ollama-linux-arm64-jetpack6.tgz + tar c -C ./dist/linux_amd64 --exclude rocm . | pigz -9vc >./dist/ollama-linux-amd64.tgz + tar c -C ./dist/linux_amd64 ./lib/ollama/rocm | pigz -9vc >./dist/ollama-linux-amd64-rocm.tgz +elif echo $PLATFORM | grep "arm64" > /dev/null ; then + tar c -C ./dist/ --exclude cuda_jetpack5 --exclude cuda_jetpack6 bin lib | pigz -9vc >./dist/ollama-linux-arm64.tgz + tar c -C ./dist/ ./lib/ollama/cuda_jetpack5 | pigz -9vc >./dist/ollama-linux-arm64-jetpack5.tgz + tar c -C ./dist/ ./lib/ollama/cuda_jetpack6 | pigz -9vc >./dist/ollama-linux-arm64-jetpack6.tgz +elif echo $PLATFORM | grep "amd64" > /dev/null ; then + tar c -C ./dist/ --exclude rocm bin lib | pigz -9vc >./dist/ollama-linux-amd64.tgz + tar c -C ./dist/ ./lib/ollama/rocm | pigz -9vc >./dist/ollama-linux-amd64-rocm.tgz fi diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index 68f3b11d4..465cc5518 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -26,6 +26,9 @@ function checkEnv() { $MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0] } + if (-Not (get-command -ErrorAction silent ninja)) { + $script:NINJA_DIR=(gci -path (Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation -r -fi ninja.exe) | split-path -parent + } # Locate CUDA versions # Note: this assumes every version found will be built $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') @@ -75,6 +78,7 @@ function checkEnv() { } else { write-host "Code signing disabled - please set KEY_CONTAINERS to sign and copy ollama_inc.crt to the top of the source tree" } + $script:JOBS=((Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors) } @@ -83,51 +87,57 @@ function buildOllama() { Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}" New-Item "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ItemType Directory -ea 0 - - # Default first, then conditionall ROCm and cuda v11 - write-host "Building Default native backend libraries" - $env:CMAKE_GENERATOR="ninja" - & cmake --preset Default + & cmake --fresh --preset CPU --install-prefix $script:DIST_DIR if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --build --preset Default -j 12 + & cmake --build --preset CPU --parallel $script:JOBS + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --install build --component CPU --strip if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --install build -j 12 - - # TODO - add steps for v11 and ROCm - # - # if ("$script:CUDA_DIRS".Contains("v11") -and "$script:CUDA_DIRS".Contains("v12")) { - # # We assume the default is v12, so override for v11 - # $origCUDA_PATH=$env:CUDA_PATH - # $hashEnv = @{} - # Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value } - # $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }} - # write-host "$v11" - # # $env:CUDA_PATH=$hashEnv[$v11] - # # $env:CUDACXX=$hashEnv[$v11]+"\bin\nvcc.exe" - # $env:CUDAToolkit_ROOT=$hashEnv[$v11] - # # ls env: - # write-host "Building CUDA v11 backend libraries" - # & cmake --preset "CUDA 11" - # $env:CUDA_PATH=$origCUDA_PATH - # exit(1) - # if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - # # & cmake --build --preset "CUDA 11" -j 12 - # # if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - # } - # if ($env:HIP_PATH) { - # write-host "Building ROCm backend libraries" - # $env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe" - # $env:HIP_PLATFORM="amd" - # $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - # & cmake --preset "ROCm" - # $env:HIPCXX="" - # $env:HIP_PLATFORM="" - # $env:CMAKE_PREFIX_PATH="" - # if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - # & cmake --build --preset "ROCm" -j 12 - # if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - # } + $hashEnv = @{} + Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value } + if ("$script:CUDA_DIRS".Contains("v11")) { + $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }} + $env:CUDAToolkit_ROOT=$hashEnv[$v11] + write-host "Building CUDA v11 backend libraries" + # Note: cuda v11 requires msvc 2019 so force the older generator + # to avoid 2022 (or newer) from being used as the default + & cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --build --preset "CUDA 11" --parallel $script:JOBS + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --install build --component "CUDA" --strip + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + } + if ("$script:CUDA_DIRS".Contains("v12")) { + $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }} + $env:CUDAToolkit_ROOT=$hashEnv[$v12] + write-host "Building CUDA v12 backend libraries" + & cmake --fresh --preset "CUDA 12" --install-prefix $script:DIST_DIR + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --build --preset "CUDA 12" --parallel $script:JOBS + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --install build --component "CUDA" --strip + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + } + if ($env:HIP_PATH) { + write-host "Building ROCm backend libraries" + if ($null -ne $script:NINJA_DIR) { + $env:PATH="$script:NINJA_DIR;$env:PATH" + } + $env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe" + $env:HIP_PLATFORM="amd" + $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" + & cmake --fresh --preset "ROCm 6" -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ --install-prefix $script:DIST_DIR + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + $env:HIPCXX="" + $env:HIP_PLATFORM="" + $env:CMAKE_PREFIX_PATH="" + & cmake --build --preset "ROCm" --parallel $script:JOBS + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + & cmake --install build --component "HIP" --strip + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + } } else { write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set" }