From 1c6669e64cc8a482fbf1e35c0249f17b35a4e87a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 23 Jun 2025 14:07:00 -0700 Subject: [PATCH 1/3] Re-remove cuda v11 (#10694) * Re-remove cuda v11 Revert the revert - drop v11 support requiring drivers newer than Feb 23 This reverts commit c6bcdc4223c50071b59a19c42cc54ec9932f696f. * Simplify layout With only one version of the GPU libraries, we can simplify things down somewhat. (Jetsons still require special handling) * distinct sbsa variant for linux arm64 This avoids accidentally trying to load the sbsa cuda libraries on a jetson system which results in crashes. * temporary prevent rocm+cuda mixed loading --- .github/workflows/release.yaml | 7 ---- .github/workflows/test.yaml | 6 ++-- CMakeLists.txt | 11 ++++--- CMakePresets.json | 13 -------- Dockerfile | 24 ++++---------- discover/cuda_common.go | 4 +++ discover/path.go | 2 +- docs/gpu.md | 2 +- docs/troubleshooting.md | 2 +- ...rary-prevent-rocm-cuda-mixed-loading.patch | 32 +++++++++++++++++++ llm/server.go | 2 +- ml/backend/ggml/ggml/src/ggml-backend-reg.cpp | 12 +++++-- scripts/build_windows.ps1 | 14 -------- scripts/env.sh | 2 -- 14 files changed, 67 insertions(+), 66 deletions(-) create mode 100644 llama/patches/0018-temporary-prevent-rocm-cuda-mixed-loading.patch diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index f423106e7..4e5a5d476 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -103,11 +103,6 @@ jobs: arch: [amd64] preset: ['CPU'] include: - - os: windows - arch: amd64 - preset: 'CUDA 11' - install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe - cuda-version: '11.3' - os: windows arch: amd64 preset: 'CUDA 12' @@ -324,8 +319,6 @@ jobs: case "$COMPONENT" in bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; - lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; - lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;; lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;; lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;; lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;; diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 27e229fcf..2e7093391 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -46,7 +46,7 @@ jobs: include: - preset: CPU - preset: CUDA - container: nvidia/cuda:11.8.0-devel-ubuntu22.04 + container: nvidia/cuda:12.8.1-devel-ubuntu22.04 flags: '-DCMAKE_CUDA_ARCHITECTURES=87' - preset: ROCm container: rocm/dev-ubuntu-22.04:6.1.2 @@ -78,7 +78,7 @@ jobs: include: - preset: CPU - preset: CUDA - install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe + install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe flags: '-DCMAKE_CUDA_ARCHITECTURES=80' - preset: ROCm install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe @@ -102,7 +102,7 @@ jobs: $ErrorActionPreference = "Stop" if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') { Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe" - Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait + Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait } $cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path diff --git a/CMakeLists.txt b/CMakeLists.txt index c005d0140..b3b5438a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,14 +78,13 @@ if(CMAKE_CUDA_COMPILER) find_package(CUDAToolkit) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda) - set(OLLAMA_CUDA_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/cuda_v${CUDAToolkit_VERSION_MAJOR}) install(TARGETS ggml-cuda RUNTIME_DEPENDENCIES DIRECTORIES ${CUDAToolkit_BIN_DIR} ${CUDAToolkit_LIBRARY_DIR} PRE_INCLUDE_REGEXES cublas cublasLt cudart PRE_EXCLUDE_REGEXES ".*" - RUNTIME DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA - LIBRARY DESTINATION ${OLLAMA_CUDA_INSTALL_DIR} COMPONENT CUDA + RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA + LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT CUDA ) endif() @@ -116,7 +115,11 @@ if(CMAKE_HIP_COMPILER) set(OLLAMA_HIP_INSTALL_DIR ${OLLAMA_INSTALL_DIR}/rocm) install(TARGETS ggml-hip - RUNTIME_DEPENDENCIES + RUNTIME_DEPENDENCY_SET rocm + RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP + LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT HIP + ) + install(RUNTIME_DEPENDENCY_SET rocm DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR} PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf PRE_EXCLUDE_REGEXES ".*" diff --git a/CMakePresets.json b/CMakePresets.json index 3234ce2c6..9a4dcc06b 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -17,14 +17,6 @@ "name": "CUDA", "inherits": [ "Default" ] }, - { - "name": "CUDA 11", - "inherits": [ "CUDA" ], - "cacheVariables": { - "CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;70;75;80;86", - "CMAKE_CUDA_FLAGS": "-Wno-deprecated-gpu-targets -t 2" - } - }, { "name": "CUDA 12", "inherits": [ "CUDA" ], @@ -79,11 +71,6 @@ "configurePreset": "CUDA", "targets": [ "ggml-cuda" ] }, - { - "name": "CUDA 11", - "inherits": [ "CUDA" ], - "configurePreset": "CUDA 11" - }, { "name": "CUDA 12", "inherits": [ "CUDA" ], diff --git a/Dockerfile b/Dockerfile index 4c6619e77..da2ae3db0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,12 +7,13 @@ ARG JETPACK5VERSION=r35.4.1 ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 -# CUDA v11 requires gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version +# We require gcc v10 minimum. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 RUN yum install -y yum-utils \ && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \ && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \ && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8 \ + && dnf install -y ccache \ && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH @@ -38,15 +39,6 @@ RUN --mount=type=cache,target=/root/.ccache \ && cmake --build --parallel --preset 'CPU' \ && cmake --install build --component CPU --strip --parallel 8 -FROM base AS cuda-11 -ARG CUDA11VERSION=11.3 -RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-} -ENV PATH=/usr/local/cuda-11/bin:$PATH -RUN --mount=type=cache,target=/root/.ccache \ - cmake --preset 'CUDA 11' \ - && cmake --build --parallel --preset 'CUDA 11' \ - && cmake --install build --component CUDA --strip --parallel 8 - FROM base AS cuda-12 ARG CUDA12VERSION=12.8 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-} @@ -98,17 +90,15 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ go build -trimpath -buildmode=pie -o /bin/ollama . FROM --platform=linux/amd64 scratch AS amd64 -COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 -COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 +COPY --from=cuda-12 dist/lib/ollama /lib/ollama FROM --platform=linux/arm64 scratch AS arm64 -COPY --from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11 -COPY --from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12 -COPY --from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5 -COPY --from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6 +COPY --from=cuda-12 dist/lib/ollama /lib/ollama/cuda_sbsa +COPY --from=jetpack-5 dist/lib/ollama /lib/ollama/cuda_jetpack5 +COPY --from=jetpack-6 dist/lib/ollama /lib/ollama/cuda_jetpack6 FROM scratch AS rocm -COPY --from=rocm-6 dist/lib/ollama/rocm /lib/ollama/rocm +COPY --from=rocm-6 dist/lib/ollama /lib/ollama FROM ${FLAVOR} AS archive COPY --from=cpu dist/lib/ollama /lib/ollama diff --git a/discover/cuda_common.go b/discover/cuda_common.go index 048295297..3c7cb6698 100644 --- a/discover/cuda_common.go +++ b/discover/cuda_common.go @@ -3,6 +3,7 @@ package discover import ( + "fmt" "log/slog" "os" "regexp" @@ -55,10 +56,13 @@ func cudaVariant(gpuInfo CudaGPUInfo) string { } } } + return "sbsa" } // driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers if gpuInfo.DriverMajor < 12 || (gpuInfo.DriverMajor == 12 && gpuInfo.DriverMinor == 0) { + // The detected driver is older than Feb 2023 + slog.Warn("old CUDA driver detected - please upgrade to a newer driver", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor)) return "v11" } return "v12" diff --git a/discover/path.go b/discover/path.go index 8a20d8c21..68e63009a 100644 --- a/discover/path.go +++ b/discover/path.go @@ -12,7 +12,7 @@ import ( // '../lib/ollama' on Linux and the executable's directory on macOS // note: distribution builds, additional GPU-specific libraries are // found in subdirectories of the returned path, such as -// 'cuda_v11', 'cuda_v12', 'rocm', etc. +// 'cuda_v12', 'rocm', etc. var LibOllamaPath string = func() string { exe, err := os.Executable() if err != nil { diff --git a/docs/gpu.md b/docs/gpu.md index b54c66ab6..61ff6e458 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -1,6 +1,6 @@ # GPU ## Nvidia -Ollama supports Nvidia GPUs with compute capability 5.0+. +Ollama supports Nvidia GPUs with compute capability 5.0+ and driver version 531 and newer. Check your compute compatibility to see if your card is supported: [https://developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index ba5487fef..995b33aca 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -43,7 +43,7 @@ Ollama includes multiple LLM libraries compiled for different GPUs and CPU vecto In the server log, you will see a message that looks something like this (varies from release to release): ``` -Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5] +Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5] ``` **Experimental LLM Library Override** diff --git a/llama/patches/0018-temporary-prevent-rocm-cuda-mixed-loading.patch b/llama/patches/0018-temporary-prevent-rocm-cuda-mixed-loading.patch new file mode 100644 index 000000000..205dc64ae --- /dev/null +++ b/llama/patches/0018-temporary-prevent-rocm-cuda-mixed-loading.patch @@ -0,0 +1,32 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Daniel Hiltgen +Date: Sun, 22 Jun 2025 09:22:05 -0700 +Subject: [PATCH] temporary prevent rocm+cuda mixed loading + +--- + ggml/src/ggml-backend-reg.cpp | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp +index 4e67d243..8f49f084 100644 +--- a/ggml/src/ggml-backend-reg.cpp ++++ b/ggml/src/ggml-backend-reg.cpp +@@ -573,8 +573,16 @@ void ggml_backend_load_all_from_path(const char * dir_path) { + + ggml_backend_load_best("blas", silent, dir_path); + ggml_backend_load_best("cann", silent, dir_path); +- ggml_backend_load_best("cuda", silent, dir_path); +- ggml_backend_load_best("hip", silent, dir_path); ++ ++ // Avoid mixed hip+cuda configurations ++ const char * hip_devices = std::getenv("HIP_VISIBLE_DEVICES"); ++ const char * rocr_devices = std::getenv("ROCR_VISIBLE_DEVICES"); ++ if (!hip_devices && !rocr_devices) { ++ ggml_backend_load_best("cuda", silent, dir_path); ++ } else { ++ ggml_backend_load_best("hip", silent, dir_path); ++ } ++ + ggml_backend_load_best("kompute", silent, dir_path); + ggml_backend_load_best("metal", silent, dir_path); + ggml_backend_load_best("rpc", silent, dir_path); diff --git a/llm/server.go b/llm/server.go index 373f6faef..373eaf1f2 100644 --- a/llm/server.go +++ b/llm/server.go @@ -311,7 +311,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a params = append(params, "--mmproj", projectors[0]) } - // iterate through compatible GPU libraries such as 'cuda_v12', 'cuda_v11', 'rocm', etc. + // iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc. // adding each library's respective path to the LD_LIBRARY_PATH, until finally running // without any LD_LIBRARY_PATH flags for { diff --git a/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp index 4e67d243a..8f49f0846 100644 --- a/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp +++ b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp @@ -573,8 +573,16 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ggml_backend_load_best("blas", silent, dir_path); ggml_backend_load_best("cann", silent, dir_path); - ggml_backend_load_best("cuda", silent, dir_path); - ggml_backend_load_best("hip", silent, dir_path); + + // Avoid mixed hip+cuda configurations + const char * hip_devices = std::getenv("HIP_VISIBLE_DEVICES"); + const char * rocr_devices = std::getenv("ROCR_VISIBLE_DEVICES"); + if (!hip_devices && !rocr_devices) { + ggml_backend_load_best("cuda", silent, dir_path); + } else { + ggml_backend_load_best("hip", silent, dir_path); + } + ggml_backend_load_best("kompute", silent, dir_path); ggml_backend_load_best("metal", silent, dir_path); ggml_backend_load_best("rpc", silent, dir_path); diff --git a/scripts/build_windows.ps1 b/scripts/build_windows.ps1 index e4c0b3d93..eaac2c600 100644 --- a/scripts/build_windows.ps1 +++ b/scripts/build_windows.ps1 @@ -27,7 +27,6 @@ function checkEnv() { $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0] } # Locate CUDA versions - # Note: this assumes every version found will be built $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue') if ($cudaList.length -eq 0) { $d=(get-command -ea 'silentlycontinue' nvcc).path @@ -94,19 +93,6 @@ function buildOllama() { $hashEnv = @{} Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value } - if ("$script:CUDA_DIRS".Contains("v11")) { - $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }} - $env:CUDAToolkit_ROOT=$hashEnv[$v11] - write-host "Building CUDA v11 backend libraries" - # Note: cuda v11 requires msvc 2019 so force the older generator - # to avoid 2022 (or newer) from being used as the default - & cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR - if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --build --preset "CUDA 11" --config Release --parallel $script:JOBS - if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - & cmake --install build --component "CUDA" --strip - if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - } if ("$script:CUDA_DIRS".Contains("v12")) { $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }} $env:CUDAToolkit_ROOT=$hashEnv[$v12] diff --git a/scripts/env.sh b/scripts/env.sh index c5e6f530a..65a970bdc 100644 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -10,9 +10,7 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \ --build-arg=GOFLAGS \ --build-arg=OLLAMA_CUSTOM_CPU_DEFS \ --build-arg=OLLAMA_SKIP_CUDA_GENERATE \ - --build-arg=OLLAMA_SKIP_CUDA_11_GENERATE \ --build-arg=OLLAMA_SKIP_CUDA_12_GENERATE \ - --build-arg=CUDA_V11_ARCHITECTURES \ --build-arg=CUDA_V12_ARCHITECTURES \ --build-arg=OLLAMA_SKIP_ROCM_GENERATE \ --build-arg=OLLAMA_FAST_BUILD \ From 10a8e04a8dcdd06de4ccaa3c2fe19452d2714b15 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 23 Jun 2025 15:52:50 -0700 Subject: [PATCH 2/3] avoid context overflow (#11175) For smaller context models, make sure we do not exceed the training size. --- llm/server.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llm/server.go b/llm/server.go index 373eaf1f2..7d921f144 100644 --- a/llm/server.go +++ b/llm/server.go @@ -139,6 +139,13 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a gpus = discover.GetCPUInfo() } + // Verify the requested context size is <= the model training size + trainCtx := f.KV().ContextLength() + if opts.NumCtx/numParallel > int(trainCtx) && trainCtx > 0 { + slog.Warn("requested context size too large for model", "num_ctx", opts.NumCtx, "num_parallel", numParallel, "n_ctx_train", trainCtx) + opts.NumCtx = int(trainCtx) * numParallel + } + estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel) if len(gpus) > 1 || gpus[0].Library != "cpu" { switch { From c85c0ebf895016c36bab10be4dd92f594c400df3 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 24 Jun 2025 13:26:55 -0700 Subject: [PATCH 3/3] CI: switch windows to vs 2022 (#11184) * CI: switch windows to vs 2022 * ci: fix regex match --- .github/workflows/release.yaml | 9 ++++++--- .github/workflows/test.yaml | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 4e5a5d476..97c07ccc5 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -155,6 +155,9 @@ jobs: echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append + echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append + echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append + echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append - if: matrix.preset == 'CPU' run: | echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append @@ -173,8 +176,8 @@ jobs: key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }} - name: Build target "${{ matrix.preset }}" run: | - Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' - Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo' + Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' + Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo' cmake --preset "${{ matrix.preset }}" cmake --build --parallel --preset "${{ matrix.preset }}" cmake --install build --component "${{ startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || 'CPU' }}" --strip --parallel 8 @@ -241,7 +244,7 @@ jobs: dist\${{ matrix.os }}-${{ matrix.arch }}-app.exe windows-sign: - runs-on: windows-2022 + runs-on: windows environment: release needs: [windows-depends, windows-build] steps: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2e7093391..00b2ad791 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -36,7 +36,7 @@ jobs: | xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))" } - echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT + echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT linux: needs: [changes] @@ -120,6 +120,9 @@ jobs: echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append + echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append + echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append + echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append - if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }} uses: actions/cache/save@v4 with: @@ -133,8 +136,8 @@ jobs: path: ${{ github.workspace }}\.ccache key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }} - run: | - Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' - Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo' + Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' + Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo' cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }} cmake --build --parallel --preset "${{ matrix.preset }}" env: