Update ROCm (6.3 linux, 6.2 windows) and CUDA v12.8 (#9304)

* Bump cuda and rocm versions

Update ROCm to linux:6.3 win:6.2 and CUDA v12 to 12.8.
Yum has some silent failure modes, so largely switch to dnf.

* Fix windows build script
This commit is contained in:
Daniel Hiltgen 2025-02-25 13:47:36 -08:00 committed by GitHub
parent 6ecd7f64ba
commit e91ae3d47d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 104 additions and 64 deletions

View File

@ -111,13 +111,13 @@ jobs:
- os: windows
arch: amd64
preset: 'CUDA 12'
install: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
cuda-version: '12.4'
install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
cuda-version: '12.8'
- os: windows
arch: amd64
preset: 'ROCm 6'
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
rocm-version: '6.1'
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
rocm-version: '6.2'
runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
environment: release
env:

View File

@ -81,7 +81,7 @@ jobs:
install: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
- preset: ROCm
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
flags: '-DAMDGPU_TARGETS=gfx1010'
runs-on: windows
steps:

View File

@ -4,20 +4,22 @@ ARG FLAVOR=${TARGETARCH}
ARG ROCMVERSION=6.3.3
ARG JETPACK5VERSION=r35.4.1
ARG JETPACK6VERSION=r36.2.0
ARG JETPACK6VERSION=r36.4.0
ARG CMAKEVERSION=3.31.2
# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
&& yum install -y yum-utils gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ \
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
&& curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1
ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
RUN yum install -y yum-utils \
&& yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
&& rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
&& dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
FROM --platform=linux/arm64 rockylinux:8 AS base-arm64
FROM --platform=linux/arm64 almalinux:8 AS base-arm64
# install epel-release for ccache
RUN yum install -y yum-utils epel-release \
&& yum install -y clang ccache \
&& dnf install -y clang ccache \
&& yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
ENV CC=clang CXX=clang++
@ -29,7 +31,8 @@ COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
ENV LDFLAGS=-s
FROM base AS cpu
# amd64 uses gcc which requires gcc-toolset-11 for AVX extensions while arm64 uses clang
RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CPU' \
&& cmake --build --parallel --preset 'CPU' \
@ -37,7 +40,7 @@ RUN --mount=type=cache,target=/root/.ccache \
FROM base AS cuda-11
ARG CUDA11VERSION=11.3
RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-}
RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
ENV PATH=/usr/local/cuda-11/bin:$PATH
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CUDA 11' \
@ -45,8 +48,8 @@ RUN --mount=type=cache,target=/root/.ccache \
&& cmake --install build --component CUDA --strip --parallel 8
FROM base AS cuda-12
ARG CUDA12VERSION=12.4
RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-}
ARG CUDA12VERSION=12.8
RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
ENV PATH=/usr/local/cuda-12/bin:$PATH
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'CUDA 12' \
@ -54,6 +57,7 @@ RUN --mount=type=cache,target=/root/.ccache \
&& cmake --install build --component CUDA --strip --parallel 8
FROM base AS rocm-6
ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
RUN --mount=type=cache,target=/root/.ccache \
cmake --preset 'ROCm 6' \
&& cmake --build --parallel --preset 'ROCm 6' \

View File

@ -28,7 +28,7 @@ if echo $PLATFORM | grep "amd64" > /dev/null; then
${LOAD_OR_PUSH} \
--platform=linux/amd64 \
${OLLAMA_COMMON_BUILD_ARGS} \
--target runtime-rocm \
--build-arg FLAVOR=rocm \
-f Dockerfile \
-t ${FINAL_IMAGE_REPO}:$VERSION-rocm \
.

View File

@ -22,8 +22,34 @@ docker buildx build \
-f Dockerfile \
.
# buildx behavior changes for single vs. multiplatform
if echo $PLATFORM | grep "," > /dev/null ; then
mv -f ./dist/linux_*64/ollama* ./dist/
rmdir ./dist/linux_*64
if echo $PLATFORM | grep "amd64" > /dev/null; then
outDir="./dist"
if echo $PLATFORM | grep "," > /dev/null ; then
outDir="./dist/linux_amd64"
fi
docker buildx build \
--output type=local,dest=${outDir} \
--platform=linux/amd64 \
${OLLAMA_COMMON_BUILD_ARGS} \
--build-arg FLAVOR=rocm \
--target archive \
-f Dockerfile \
.
fi
# buildx behavior changes for single vs. multiplatform
echo "Compressing linux tar bundles..."
if echo $PLATFORM | grep "," > /dev/null ; then
tar c -C ./dist/linux_arm64 --exclude cuda_jetpack5 --exclude cuda_jetpack6 . | pigz -9vc >./dist/ollama-linux-arm64.tgz
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack5 | pigz -9vc >./dist/ollama-linux-arm64-jetpack5.tgz
tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack6 | pigz -9vc >./dist/ollama-linux-arm64-jetpack6.tgz
tar c -C ./dist/linux_amd64 --exclude rocm . | pigz -9vc >./dist/ollama-linux-amd64.tgz
tar c -C ./dist/linux_amd64 ./lib/ollama/rocm | pigz -9vc >./dist/ollama-linux-amd64-rocm.tgz
elif echo $PLATFORM | grep "arm64" > /dev/null ; then
tar c -C ./dist/ --exclude cuda_jetpack5 --exclude cuda_jetpack6 bin lib | pigz -9vc >./dist/ollama-linux-arm64.tgz
tar c -C ./dist/ ./lib/ollama/cuda_jetpack5 | pigz -9vc >./dist/ollama-linux-arm64-jetpack5.tgz
tar c -C ./dist/ ./lib/ollama/cuda_jetpack6 | pigz -9vc >./dist/ollama-linux-arm64-jetpack6.tgz
elif echo $PLATFORM | grep "amd64" > /dev/null ; then
tar c -C ./dist/ --exclude rocm bin lib | pigz -9vc >./dist/ollama-linux-amd64.tgz
tar c -C ./dist/ ./lib/ollama/rocm | pigz -9vc >./dist/ollama-linux-amd64-rocm.tgz
fi

View File

@ -26,6 +26,9 @@ function checkEnv() {
$MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation
$env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
}
if (-Not (get-command -ErrorAction silent ninja)) {
$script:NINJA_DIR=(gci -path (Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation -r -fi ninja.exe) | split-path -parent
}
# Locate CUDA versions
# Note: this assumes every version found will be built
$cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
@ -75,6 +78,7 @@ function checkEnv() {
} else {
write-host "Code signing disabled - please set KEY_CONTAINERS to sign and copy ollama_inc.crt to the top of the source tree"
}
$script:JOBS=((Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors)
}
@ -83,51 +87,57 @@ function buildOllama() {
Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
New-Item "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ItemType Directory -ea 0
# Default first, then conditionall ROCm and cuda v11
write-host "Building Default native backend libraries"
$env:CMAKE_GENERATOR="ninja"
& cmake --preset Default
& cmake --fresh --preset CPU --install-prefix $script:DIST_DIR
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --build --preset Default -j 12
& cmake --build --preset CPU --parallel $script:JOBS
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build --component CPU --strip
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build -j 12
# TODO - add steps for v11 and ROCm
#
# if ("$script:CUDA_DIRS".Contains("v11") -and "$script:CUDA_DIRS".Contains("v12")) {
# # We assume the default is v12, so override for v11
# $origCUDA_PATH=$env:CUDA_PATH
# $hashEnv = @{}
# Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
# $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
# write-host "$v11"
# # $env:CUDA_PATH=$hashEnv[$v11]
# # $env:CUDACXX=$hashEnv[$v11]+"\bin\nvcc.exe"
# $env:CUDAToolkit_ROOT=$hashEnv[$v11]
# # ls env:
# write-host "Building CUDA v11 backend libraries"
# & cmake --preset "CUDA 11"
# $env:CUDA_PATH=$origCUDA_PATH
# exit(1)
# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# # & cmake --build --preset "CUDA 11" -j 12
# # if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# }
# if ($env:HIP_PATH) {
# write-host "Building ROCm backend libraries"
# $env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe"
# $env:HIP_PLATFORM="amd"
# $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
# & cmake --preset "ROCm"
# $env:HIPCXX=""
# $env:HIP_PLATFORM=""
# $env:CMAKE_PREFIX_PATH=""
# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# & cmake --build --preset "ROCm" -j 12
# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# }
$hashEnv = @{}
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
if ("$script:CUDA_DIRS".Contains("v11")) {
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
$env:CUDAToolkit_ROOT=$hashEnv[$v11]
write-host "Building CUDA v11 backend libraries"
# Note: cuda v11 requires msvc 2019 so force the older generator
# to avoid 2022 (or newer) from being used as the default
& cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --build --preset "CUDA 11" --parallel $script:JOBS
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build --component "CUDA" --strip
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
if ("$script:CUDA_DIRS".Contains("v12")) {
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
write-host "Building CUDA v12 backend libraries"
& cmake --fresh --preset "CUDA 12" --install-prefix $script:DIST_DIR
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --build --preset "CUDA 12" --parallel $script:JOBS
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build --component "CUDA" --strip
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
if ($env:HIP_PATH) {
write-host "Building ROCm backend libraries"
if ($null -ne $script:NINJA_DIR) {
$env:PATH="$script:NINJA_DIR;$env:PATH"
}
$env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe"
$env:HIP_PLATFORM="amd"
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
& cmake --fresh --preset "ROCm 6" -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ --install-prefix $script:DIST_DIR
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
$env:HIPCXX=""
$env:HIP_PLATFORM=""
$env:CMAKE_PREFIX_PATH=""
& cmake --build --preset "ROCm" --parallel $script:JOBS
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& cmake --install build --component "HIP" --strip
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
} else {
write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
}