diff --git a/.github/workflows/bitcoin-core-ci.yml b/.github/workflows/bitcoin-core-ci.yml new file mode 100644 index 00000000000..e6ac83f0bd4 --- /dev/null +++ b/.github/workflows/bitcoin-core-ci.yml @@ -0,0 +1,300 @@ +# Copyright (c) The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or https://opensource.org/license/mit. + +# Test libmultiprocess inside Bitcoin Core by replacing the subtree copy +# with the version from this PR, then building and running IPC-related +# unit & functional tests. + +name: Bitcoin Core CI + +on: + push: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + BITCOIN_REPO: bitcoin/bitcoin + LLVM_VERSION: 22 + LIBCXX_DIR: /tmp/libcxx-build/ + +jobs: + bitcoin-core: + name: ${{ matrix.name }} + runs-on: ${{ matrix.runner }} + timeout-minutes: 120 + + strategy: + fail-fast: false + matrix: + include: + - name: 'ASan + UBSan' + unit_test_runs: 15 + functional_test_runs: 20 + nproc_multiplier: 2 + functional_timeout_factor: 40 + runner: ubuntu-24.04 + apt-llvm: true + packages: >- + ccache + clang-22 + llvm-22 + libclang-rt-22-dev + libevent-dev + libboost-dev + libsqlite3-dev + libcapnp-dev + capnproto + ninja-build + pkgconf + python3-pip + pip-packages: --break-system-packages pycapnp + cmake-args: |- + -DSANITIZERS=address,float-divide-by-zero,integer,undefined + -DCMAKE_C_COMPILER=clang + -DCMAKE_CXX_COMPILER=clang++ + -DCMAKE_C_FLAGS=-ftrivial-auto-var-init=pattern + -DCMAKE_CXX_FLAGS=-ftrivial-auto-var-init=pattern + + - name: 'macOS' + unit_test_runs: 50 + functional_test_runs: 20 + nproc_multiplier: 2 + functional_timeout_factor: 40 + runner: macos-15 + brew-packages: ccache capnp boost libevent sqlite pkgconf ninja + pip-packages: --break-system-packages pycapnp + cmake-args: |- + -DREDUCE_EXPORTS=ON + + env: + CCACHE_MAXSIZE: 400M + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - name: Checkout Bitcoin Core + uses: actions/checkout@v4 + with: + repository: ${{ env.BITCOIN_REPO }} + fetch-depth: 1 + + - name: Checkout libmultiprocess + uses: actions/checkout@v4 + with: + path: _libmultiprocess + + - name: Replace libmultiprocess subtree + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh replace_subtree + + - name: Add LLVM apt repository + if: matrix.apt-llvm + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh add_llvm_apt_repository + + - name: Install APT packages + if: matrix.packages + run: _libmultiprocess/ci/scripts/ci_helpers.sh install_apt_packages ${{ matrix.packages }} + + - name: Configure LLVM alternatives + if: matrix.packages + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh install_llvm_alternatives + + - name: Install Homebrew packages + if: matrix.brew-packages + run: _libmultiprocess/ci/scripts/ci_helpers.sh install_homebrew_packages ${{ matrix.brew-packages }} + + - name: Install pip packages + if: matrix.pip-packages + run: _libmultiprocess/ci/scripts/ci_helpers.sh install_pip_packages ${{ matrix.pip-packages }} + + - name: Determine parallelism + run: _libmultiprocess/ci/scripts/ci_helpers.sh determine_parallelism "${{ matrix.nproc_multiplier }}" + + - name: Restore ccache + id: ccache-restore + uses: actions/cache/restore@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-${{ matrix.name }}-${{ github.ref }}-${{ github.sha }} + restore-keys: | + ccache-${{ matrix.name }}-${{ github.ref }}- + ccache-${{ matrix.name }}- + + - name: Reset ccache stats + if: matrix.packages || matrix.brew-packages + run: _libmultiprocess/ci/scripts/ci_helpers.sh reset_ccache_stats + + - name: CMake configure + env: + BITCOIN_CORE_CMAKE_ARGS: ${{ matrix.cmake-args }} + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh configure_bitcoin_core + + - name: Build + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh build_bitcoin_core + + - name: Show ccache stats + if: matrix.packages || matrix.brew-packages + run: _libmultiprocess/ci/scripts/ci_helpers.sh show_ccache_stats + + - name: Run IPC unit tests + env: + ASAN_OPTIONS: detect_leaks=1:detect_stack_use_after_return=1:check_initialization_order=1:strict_init_order=1 + LSAN_OPTIONS: suppressions=${{ github.workspace }}/test/sanitizer_suppressions/lsan + UBSAN_OPTIONS: suppressions=${{ github.workspace }}/test/sanitizer_suppressions/ubsan:print_stacktrace=1:halt_on_error=1:report_error_type=1 + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh run_ipc_unit_tests "${{ matrix.unit_test_runs }}" + + - name: Run IPC functional tests + env: + ASAN_OPTIONS: detect_leaks=1:detect_stack_use_after_return=1:check_initialization_order=1:strict_init_order=1 + LSAN_OPTIONS: suppressions=${{ github.workspace }}/test/sanitizer_suppressions/lsan + UBSAN_OPTIONS: suppressions=${{ github.workspace }}/test/sanitizer_suppressions/ubsan:print_stacktrace=1:halt_on_error=1:report_error_type=1 + CI_FAILFAST_TEST_LEAVE_DANGLING: 1 + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh run_ipc_functional_tests "${{ matrix.functional_test_runs }}" "${{ matrix.functional_timeout_factor }}" + + - name: Save ccache + uses: actions/cache/save@v4 + if: github.ref == 'refs/heads/master' || steps.ccache-restore.outputs.cache-hit != 'true' + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-${{ matrix.name }}-${{ github.ref }}-${{ github.sha }} + + bitcoin-core-tsan: + name: ${{ matrix.name }} + runs-on: ubuntu-24.04 + timeout-minutes: 180 + + strategy: + matrix: + include: + - name: TSan + unit_test_runs: 8 + functional_test_runs: 25 + nproc_multiplier: 2 + functional_timeout_factor: 40 + + env: + CCACHE_MAXSIZE: 400M + CCACHE_DIR: ${{ github.workspace }}/.ccache + LIBCXX_FLAGS: >- + -fsanitize=thread + -nostdinc++ + -nostdlib++ + -isystem /tmp/libcxx-build/include/c++/v1 + -L/tmp/libcxx-build/lib + -Wl,-rpath,/tmp/libcxx-build/lib + -lc++ + -lc++abi + -lpthread + -Wno-unused-command-line-argument + TSAN_OPTIONS: suppressions=${{ github.workspace }}/test/sanitizer_suppressions/tsan:halt_on_error=1:second_deadlock_stack=1 + + steps: + - name: Checkout Bitcoin Core + uses: actions/checkout@v4 + with: + repository: ${{ env.BITCOIN_REPO }} + fetch-depth: 1 + + - name: Checkout libmultiprocess + uses: actions/checkout@v4 + with: + path: _libmultiprocess + + - name: Add LLVM apt repository + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh add_llvm_apt_repository + + - name: Install packages + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh install_tsan_packages + + - name: Determine parallelism + run: _libmultiprocess/ci/scripts/ci_helpers.sh determine_parallelism "${{ matrix.nproc_multiplier }}" + + - name: Restore instrumented libc++ cache + id: libcxx-cache + uses: actions/cache@v4 + with: + path: ${{ env.LIBCXX_DIR }} + key: libcxx-Thread-llvmorg-${{ env.LLVM_VERSION }}.1.0 + + - name: Build instrumented libc++ + if: steps.libcxx-cache.outputs.cache-hit != 'true' + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh build_instrumented_libcxx + + - name: Determine host + id: host + run: _libmultiprocess/ci/scripts/ci_helpers.sh determine_host + + - name: Restore depends cache + id: depends-cache + uses: actions/cache/restore@v4 + with: + path: | + depends/built + depends/${{ steps.host.outputs.host }} + key: depends-tsan-${{ hashFiles('depends/packages/*.mk') }}-${{ env.LLVM_VERSION }} + + - name: Build depends (stage 1, without IPC) + if: steps.depends-cache.outputs.cache-hit != 'true' + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh build_depends_without_ipc + + - name: Save depends cache + uses: actions/cache/save@v4 + if: steps.depends-cache.outputs.cache-hit != 'true' + with: + path: | + depends/built + depends/${{ steps.host.outputs.host }} + key: depends-tsan-${{ hashFiles('depends/packages/*.mk') }}-${{ env.LLVM_VERSION }} + + - name: Replace libmultiprocess subtree + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh replace_subtree + + - name: Build depends (stage 2, IPC packages including libmultiprocess) + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh build_depends_with_ipc + + - name: Restore ccache + id: ccache-restore + uses: actions/cache/restore@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-TSan-${{ github.ref }}-${{ github.sha }} + restore-keys: | + ccache-TSan-${{ github.ref }}- + ccache-TSan- + + - name: Reset ccache stats + run: _libmultiprocess/ci/scripts/ci_helpers.sh reset_ccache_stats + + - name: CMake configure + env: + BITCOIN_CORE_CMAKE_ARGS: |- + -DSANITIZERS=thread + -DAPPEND_CPPFLAGS=-DARENA_DEBUG -DDEBUG_LOCKCONTENTION -D_LIBCPP_REMOVE_TRANSITIVE_INCLUDES + -DCMAKE_TOOLCHAIN_FILE=depends/${{ steps.host.outputs.host }}/toolchain.cmake + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh configure_bitcoin_core + + - name: Build + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh build_bitcoin_core + + - name: Show ccache stats + run: _libmultiprocess/ci/scripts/ci_helpers.sh show_ccache_stats + + - name: Run IPC unit tests + env: + LD_LIBRARY_PATH: depends/${{ steps.host.outputs.host }}/lib + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh run_ipc_unit_tests "${{ matrix.unit_test_runs }}" + + - name: Run IPC functional tests + env: + LD_LIBRARY_PATH: depends/${{ steps.host.outputs.host }}/lib + CI_FAILFAST_TEST_LEAVE_DANGLING: 1 + run: _libmultiprocess/ci/scripts/bitcoin_core_ci.sh run_ipc_functional_tests "${{ matrix.functional_test_runs }}" "${{ matrix.functional_timeout_factor }}" + + - name: Save ccache + uses: actions/cache/save@v4 + if: github.ref == 'refs/heads/master' || steps.ccache-restore.outputs.cache-hit != 'true' + with: + path: ${{ env.CCACHE_DIR }} + key: ccache-TSan-${{ github.ref }}-${{ github.sha }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f4a6623416c..ffaca1cac93 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -120,16 +120,21 @@ jobs: env: HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: 1 run: | - brew install --quiet ninja capnp + brew install --quiet ninja capnp llvm - name: Run CI script run: | + export PATH="$(brew --prefix llvm)/bin:$PATH" CI_CONFIG="ci/configs/macos.bash" bash ci/scripts/ci.sh build: runs-on: ubuntu-latest env: + NIXPKGS_CHANNEL: nixos-25.05 + NIX_EXTRA_CONFIG: | + keep-env-derivations = true + keep-outputs = true NIX_EXTRA_CONFIG_ACT: | sandbox = false filter-syscalls = false @@ -144,14 +149,33 @@ jobs: steps: - uses: actions/checkout@v5 + - name: Determine CI configuration + id: config + env: + CI_CONFIG: ci/configs/${{ matrix.config }}.bash + run: ci/scripts/config.sh + - name: Install Nix uses: cachix/install-nix-action@v31 # 2025-05-27, from https://github.com/cachix/install-nix-action/tags with: - nix_path: nixpkgs=channel:nixos-25.05 # latest release + nix_path: nixpkgs=https://github.com/NixOS/nixpkgs/archive/${{ steps.config.outputs.nixpkgs_rev }}.tar.gz # Act executes inside an unprivileged container (Docker or Podman), # so KVM support isn't available. enable_kvm: "${{ github.actor != 'nektos/act' }}" - extra_nix_config: ${{ github.actor == 'nektos/act' && env.NIX_EXTRA_CONFIG_ACT || '' }} + extra_nix_config: | + ${{ env.NIX_EXTRA_CONFIG }} + ${{ github.actor == 'nektos/act' && env.NIX_EXTRA_CONFIG_ACT || '' }} + + - name: Cache Nix store + if: steps.config.outputs.cache_nix_store == 'true' + uses: nix-community/cache-nix-action@v7 + with: + primary-key: nix-${{ runner.os }}-${{ matrix.config }}-${{ steps.config.outputs.nixpkgs_rev }}-${{ hashFiles('shell.nix', 'ci/patches/*.patch', format('ci/configs/{0}.bash', matrix.config)) }} + restore-prefixes-first-match: | + nix-${{ runner.os }}-${{ matrix.config }}-${{ steps.config.outputs.nixpkgs_rev }}- + nix-${{ runner.os }}-${{ matrix.config }}- + nix-${{ runner.os }}- + gc-max-store-size-linux: 10G - name: Run CI script env: diff --git a/ci/configs/gnu32.bash b/ci/configs/gnu32.bash index 961821ce8ce..04d78a24fc5 100644 --- a/ci/configs/gnu32.bash +++ b/ci/configs/gnu32.bash @@ -1,5 +1,7 @@ CI_DESC="CI job cross-compiling to 32-bit" CI_DIR=build-gnu32 +# Cache the heaviest Nix job to stay within GitHub's cache budget. +CI_CACHE_NIX_STORE=true NIX_ARGS=( --arg minimal true --arg crossPkgs 'import { crossSystem = { config = "i686-unknown-linux-gnu"; }; }' diff --git a/ci/configs/macos.bash b/ci/configs/macos.bash index a444bacd51e..895d795f891 100644 --- a/ci/configs/macos.bash +++ b/ci/configs/macos.bash @@ -1,5 +1,5 @@ CI_DESC="CI config for macOS" CI_DIR=build-macos export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-unused-parameter" -CMAKE_ARGS=(-G Ninja) +CMAKE_ARGS=(-G Ninja -DMP_ENABLE_CLANG_TIDY=ON) BUILD_ARGS=(-k 0) diff --git a/ci/scripts/bitcoin_core_ci.sh b/ci/scripts/bitcoin_core_ci.sh new file mode 100755 index 00000000000..7f3c5cbd4b9 --- /dev/null +++ b/ci/scripts/bitcoin_core_ci.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash + +export LC_ALL=C + +set -o errexit -o nounset -o pipefail -o xtrace + +readonly SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" + +source "${SCRIPT_DIR}/ci_helpers.sh" + +replace_subtree() { + rm -rf src/ipc/libmultiprocess + cp -a _libmultiprocess src/ipc/libmultiprocess + rm -rf src/ipc/libmultiprocess/.git +} + +add_llvm_apt_repository() { + curl -s "https://apt.llvm.org/llvm-snapshot.gpg.key" | sudo tee "/etc/apt/trusted.gpg.d/apt.llvm.org.asc" > /dev/null + source /etc/os-release + echo "deb http://apt.llvm.org/${VERSION_CODENAME}/ llvm-toolchain-${VERSION_CODENAME}-${LLVM_VERSION} main" | sudo tee "/etc/apt/sources.list.d/llvm.list" + sudo apt-get update +} + +install_llvm_alternatives() { + sudo update-alternatives --install /usr/bin/clang++ clang++ "/usr/bin/clang++-${LLVM_VERSION}" 100 + sudo update-alternatives --install /usr/bin/clang clang "/usr/bin/clang-${LLVM_VERSION}" 100 + sudo update-alternatives --install /usr/bin/llvm-symbolizer llvm-symbolizer "/usr/bin/llvm-symbolizer-${LLVM_VERSION}" 100 +} + +set_llvm_alternatives() { + sudo update-alternatives --set clang "/usr/bin/clang-${LLVM_VERSION}" + sudo update-alternatives --set clang++ "/usr/bin/clang++-${LLVM_VERSION}" + sudo update-alternatives --set llvm-symbolizer "/usr/bin/llvm-symbolizer-${LLVM_VERSION}" +} + +install_tsan_packages() { + install_apt_packages \ + ccache \ + "clang-${LLVM_VERSION}" \ + "llvm-${LLVM_VERSION}" \ + "llvm-${LLVM_VERSION}-dev" \ + "libclang-${LLVM_VERSION}-dev" \ + "libclang-rt-${LLVM_VERSION}-dev" \ + ninja-build \ + pkgconf \ + python3-pip \ + bison + install_llvm_alternatives + set_llvm_alternatives + install_pip_packages --break-system-packages pycapnp +} + +build_instrumented_libcxx() { + export PATH="/usr/lib/llvm-${LLVM_VERSION}/bin:${PATH}" + + ls -l /usr/bin/clang /usr/bin/clang++ /usr/bin/llvm-symbolizer + which clang clang++ llvm-symbolizer + clang --version + clang++ --version + "/usr/bin/clang-${LLVM_VERSION}" --version + "/usr/bin/clang++-${LLVM_VERSION}" --version + git clone --depth=1 https://github.com/llvm/llvm-project -b "llvmorg-${LLVM_VERSION}.1.0" /tmp/llvm-project + cmake -G Ninja -B "${LIBCXX_DIR}" \ + -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_USE_SANITIZER=Thread \ + -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" \ + -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" \ + -DLLVM_TARGETS_TO_BUILD=Native \ + -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF \ + -DLIBCXX_INCLUDE_TESTS=OFF \ + -DLIBCXXABI_INCLUDE_TESTS=OFF \ + -DLIBUNWIND_INCLUDE_TESTS=OFF \ + -DLIBCXXABI_USE_LLVM_UNWINDER=OFF \ + -S /tmp/llvm-project/runtimes + grep -E 'CMAKE_(C|CXX)_COMPILER' "${LIBCXX_DIR}/CMakeCache.txt" + ninja -C "${LIBCXX_DIR}" -j "${BUILD_PARALLEL}" -v + rm -rf /tmp/llvm-project +} + +configure_bitcoin_core() { + local cmake_arg + local cmake_args=() + + if [[ -n "${BITCOIN_CORE_CMAKE_ARGS:-}" ]]; then + while IFS= read -r cmake_arg; do + [[ -n "${cmake_arg}" ]] || continue + cmake_args+=("${cmake_arg}") + done <<< "${BITCOIN_CORE_CMAKE_ARGS}" + fi + + cmake -S . -B build \ + --preset=dev-mode \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_GUI=OFF \ + -DBUILD_GUI_TESTS=OFF \ + -DWITH_ZMQ=OFF \ + -DWITH_USDT=OFF \ + -DBUILD_BENCH=OFF \ + -DBUILD_FUZZ_BINARY=OFF \ + -DWITH_QRENCODE=OFF \ + -G Ninja \ + "${cmake_args[@]}" +} + +build_bitcoin_core() { + cmake --build build --parallel "${BUILD_PARALLEL}" +} + +build_depends_without_ipc() { + make -C depends -j "${BUILD_PARALLEL}" \ + CC=clang \ + CXX=clang++ \ + CXXFLAGS="${LIBCXX_FLAGS}" \ + NO_QT=1 \ + NO_ZMQ=1 \ + NO_USDT=1 \ + NO_QR=1 \ + NO_IPC=1 +} + +build_depends_with_ipc() { + make -C depends -j "${BUILD_PARALLEL}" \ + CC=clang \ + CXX=clang++ \ + CXXFLAGS="${LIBCXX_FLAGS}" \ + NO_QT=1 \ + NO_ZMQ=1 \ + NO_USDT=1 \ + NO_QR=1 +} + +run_ipc_unit_tests() { + local runs="$1" + + for _ in $(seq 1 "${runs}"); do + build/bin/test_bitcoin --run_test=ipc_tests,miner_tests --catch_system_error=no --log_level=nothing --report_level=no + done +} + +run_ipc_functional_tests() { + local runs="$1" + local timeout_factor="$2" + local test_scripts + local test_args=() + + test_scripts=$(python3 -c "import sys; import os; sys.path.append(os.path.abspath('build/test/functional')); from test_runner import ALL_SCRIPTS; print(' '.join(s for s in ALL_SCRIPTS if s.startswith('interface_ipc')))") + for _ in $(seq 1 "${runs}"); do + for script in $test_scripts; do + test_args+=("$script") + done + done + build/test/functional/test_runner.py "${test_args[@]}" --jobs "${PARALLEL}" --timeout-factor="${timeout_factor}" --failfast --combinedlogslen=99999999 +} + +main() { + local command="${1:?missing command}" + shift + + [[ "${command}" =~ ^[a-z_][a-z0-9_]*$ ]] || { + echo "Invalid command: ${command}" >&2 + exit 1 + } + + if declare -F "${command}" >/dev/null; then + "${command}" "$@" + else + echo "Unknown command: ${command}" >&2 + exit 1 + fi +} + +main "$@" diff --git a/ci/scripts/ci_helpers.sh b/ci/scripts/ci_helpers.sh new file mode 100755 index 00000000000..1b6d0121969 --- /dev/null +++ b/ci/scripts/ci_helpers.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash + +export LC_ALL=C + +set -o errexit -o nounset -o pipefail -o xtrace + +write_env_var() { + local key="$1" + local value="$2" + + if [[ -n "${GITHUB_ENV:-}" ]]; then + echo "${key}=${value}" >> "${GITHUB_ENV}" + else + export "${key}=${value}" + fi +} + +write_output_var() { + local key="$1" + local value="$2" + + if [[ -n "${GITHUB_OUTPUT:-}" ]]; then + echo "${key}=${value}" >> "${GITHUB_OUTPUT}" + else + echo "${key}=${value}" + fi +} + +available_nproc() { + if command -v nproc >/dev/null 2>&1; then + nproc + else + sysctl -n hw.logicalcpu + fi +} + +determine_parallelism() { + local multiplier="$1" + local available + + available="$(available_nproc)" + write_env_var BUILD_PARALLEL "${available}" + write_env_var PARALLEL "$((available * multiplier))" +} + +reset_ccache_stats() { + which ccache + ccache --version + ccache --zero-stats +} + +show_ccache_stats() { + ccache --show-stats +} + +install_apt_packages() { + sudo apt-get install --no-install-recommends -y "$@" +} + +install_homebrew_packages() { + HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 brew install --quiet "$@" +} + +install_pip_packages() { + pip3 install "$@" +} + +determine_host() { + local config_guess="${1:-./depends/config.guess}" + local output_key="${2:-host}" + + write_output_var "${output_key}" "$("${config_guess}")" +} + +ci_helpers_main() { + local command="${1:?missing command}" + shift + + [[ "${command}" =~ ^[a-z_][a-z0-9_]*$ ]] || { + echo "Invalid command: ${command}" >&2 + exit 1 + } + + if declare -F "${command}" >/dev/null; then + "${command}" "$@" + else + echo "Unknown command: ${command}" >&2 + exit 1 + fi +} + +if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then + ci_helpers_main "$@" +fi diff --git a/ci/scripts/config.sh b/ci/scripts/config.sh new file mode 100755 index 00000000000..7fe8118d7be --- /dev/null +++ b/ci/scripts/config.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Copyright (c) The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# +# Source CI configuration and output variables needed by the workflow. + +export LC_ALL=C + +set -o errexit -o nounset -o pipefail -o xtrace + +readonly SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" + +source "${SCRIPT_DIR}/ci_helpers.sh" + +[ "${CI_CONFIG+x}" ] && source "$CI_CONFIG" + +# Resolve the nixpkgs channel to a specific revision for use in cache keys. +if [[ -n "${NIXPKGS_CHANNEL:-}" ]]; then + rev="$(curl --fail --location --silent --show-error "https://channels.nixos.org/${NIXPKGS_CHANNEL}/git-revision")" + test -n "${rev}" + write_output_var nixpkgs_rev "${rev}" +fi + +write_output_var cache_nix_store "${CI_CACHE_NIX_STORE:-false}" diff --git a/ci/scripts/run.sh b/ci/scripts/run.sh index 4b7808314ce..04ac8596d49 100755 --- a/ci/scripts/run.sh +++ b/ci/scripts/run.sh @@ -11,3 +11,14 @@ set -o errexit -o nounset -o pipefail -o xtrace [ "${CI_CONFIG+x}" ] && source "$CI_CONFIG" nix develop --ignore-environment --keep CI_CONFIG --keep CI_CLEAN "${NIX_ARGS[@]+"${NIX_ARGS[@]}"}" -f shell.nix --command ci/scripts/ci.sh + +# Create a GC root for the shell closure so the cache-nix-action save step +# does not garbage-collect it. +if [ -n "${CI_CACHE_NIX_STORE-}" ]; then + nix-build shell.nix \ + -o "$CI_DIR/gcroot" \ + "${NIX_ARGS[@]+"${NIX_ARGS[@]}"}" + # Verify the closure is complete so the cache-nix-action save step has + # everything it needs. + nix-store --query --requisites "$CI_DIR/gcroot" >/dev/null +fi diff --git a/cmake/pthread_checks.cmake b/cmake/pthread_checks.cmake index 241978d5677..877114165a6 100644 --- a/cmake/pthread_checks.cmake +++ b/cmake/pthread_checks.cmake @@ -25,7 +25,7 @@ check_cxx_source_compiles(" int main(int argc, char** argv) { uint64_t tid; - pthread_threadid_np(NULL, &tid); + pthread_threadid_np(nullptr, &tid); return 0; }" HAVE_PTHREAD_THREADID_NP) diff --git a/doc/versions.md b/doc/versions.md index 9a7b12238f0..db5647df807 100644 --- a/doc/versions.md +++ b/doc/versions.md @@ -7,55 +7,68 @@ Library versions are tracked with simple Versioning policy is described in the [version.h](../include/mp/version.h) include. -## v8 -- Current unstable version in master branch. +## v9 +- Current unstable version. -## v7.0 -- Adds SpawnProcess race fix, cmake `target_capnp_sources` option, ci and documentation improvements. -- Used in Bitcoin Core master branch, pulled in by [#34363](https://github.com/bitcoin/bitcoin/pull/34363). +## [v8.0](https://github.com/bitcoin-core/libmultiprocess/commits/v8.0) +- Better support for non-libmultiprocess IPC clients: avoiding errors on unclean disconnects, and allowing simultaneous requests to worker threads which previously triggered "thread busy" errors. +- Used in Bitcoin Core, pulled in by [#34422](https://github.com/bitcoin/bitcoin/pull/34422). -## v7.0-pre1 +## [v7.0](https://github.com/bitcoin-core/libmultiprocess/commits/v7.0) +- Adds SpawnProcess race fix, cmake `target_capnp_sources` option, ci and documentation improvements. Adds `version.h` header declaring major and minor version numbers. +- Used in Bitcoin Core, pulled in by [#34363](https://github.com/bitcoin/bitcoin/pull/34363). -- Adds support for log levels to reduce logging and "thread busy" error to avoid a crash on misuse. Fixes intermittent mptest hang and makes other minor improvements. -- Used in Bitcoin Core 30.1 and 30.2 releases and 30.x branch. Pulled in by [#33412](https://github.com/bitcoin/bitcoin/pull/33412), [#33518](https://github.com/bitcoin/bitcoin/pull/33518), and [#33519](https://github.com/bitcoin/bitcoin/pull/33519). +## [v7.0-pre2](https://github.com/bitcoin-core/libmultiprocess/commits/v7.0-pre2) +- Fixes intermittent mptest hang and makes other minor improvements. +- Used in Bitcoin Core 30.1 and 30.2 releases and 30.x branch, pulled in by [#33518](https://github.com/bitcoin/bitcoin/pull/33518) and [#33519](https://github.com/bitcoin/bitcoin/pull/33519). -## v6.0 +## [v7.0-pre1](https://github.com/bitcoin-core/libmultiprocess/commits/v7.0-pre1) -- Adds fixes for unclean shutdowns and thread sanitizer issues and adds CI scripts. +- Adds support for log levels to reduce logging and "thread busy" error to avoid a crash on misuse. +- Minimum required version for Bitcoin Core 30.1 and 30.2 releases and 30.x branch, pulled in by [#33412](https://github.com/bitcoin/bitcoin/pull/33412), [#33518](https://github.com/bitcoin/bitcoin/pull/33518), and [#33519](https://github.com/bitcoin/bitcoin/pull/33519). + +## [v6.0](https://github.com/bitcoin-core/libmultiprocess/commits/v6.0) + +- Adds CI scripts and build and test fixes. +- Used in Bitcoin Core 30.0 release, pulled in by [#32345](https://github.com/bitcoin/bitcoin/pull/32345), [#33241](https://github.com/bitcoin/bitcoin/pull/33241), and [#33322](https://github.com/bitcoin/bitcoin/pull/33322). + +## [v6.0-pre1](https://github.com/bitcoin-core/libmultiprocess/commits/v6.0-pre1) + +- Adds fixes for unclean shutdowns and thread sanitizer issues. - Drops `EventLoop::addClient` and `EventLoop::removeClient` methods, requiring clients to use new `EventLoopRef` class instead. -- Used in Bitcoin Core 30.0 release. Pulled in by [#31741](https://github.com/bitcoin/bitcoin/pull/31741), [#32641](https://github.com/bitcoin/bitcoin/pull/32641), [#32345](https://github.com/bitcoin/bitcoin/pull/32345), [#33241](https://github.com/bitcoin/bitcoin/pull/33241), and [#33322](https://github.com/bitcoin/bitcoin/pull/33322). +- Minimum required version for Bitcoin Core 30.0 release, pulled in by [#31741](https://github.com/bitcoin/bitcoin/pull/31741), [#32641](https://github.com/bitcoin/bitcoin/pull/32641), and [#32345](https://github.com/bitcoin/bitcoin/pull/32345). -## v5.0 +## [v5.0](https://github.com/bitcoin-core/libmultiprocess/commits/v5.0) - Adds build improvements and tidy/warning fixes. - Used in Bitcoin Core 29 releases, pulled in by [#31945](https://github.com/bitcoin/bitcoin/pull/31945). -## v5.0-pre1 +## [v5.0-pre1](https://github.com/bitcoin-core/libmultiprocess/commits/v5.0-pre1) - Adds many improvements to Bitcoin Core mining interface: splitting up type headers, fixing shutdown bugs, adding subtree build support. - Broke up `proxy-types.h` into `type-*.h` files requiring clients to explicitly include overloads needed for C++ ↔️ Cap'n Proto type conversions. - Now requires C++ 20 support. - Minimum required version for Bitcoin Core 29 releases, pulled in by [#30509](https://github.com/bitcoin/bitcoin/pull/30509), [#30510](https://github.com/bitcoin/bitcoin/pull/30510), [#31105](https://github.com/bitcoin/bitcoin/pull/31105), [#31740](https://github.com/bitcoin/bitcoin/pull/31740). -## v4.0 +## [v4.0](https://github.com/bitcoin-core/libmultiprocess/commits/v4.0) - Added better cmake support, installing cmake package files so clients do not need to use pkgconfig. - Used in Bitcoin Core 28 releases, pulled in by [#30490](https://github.com/bitcoin/bitcoin/pull/30490) and [#30513](https://github.com/bitcoin/bitcoin/pull/30513). -## v3.0 +## [v3.0](https://github.com/bitcoin-core/libmultiprocess/commits/v3.0) - Dropped compatibility with Cap'n Proto versions before 0.7. - Used in Bitcoin Core 27 releases, pulled in by [#28735](https://github.com/bitcoin/bitcoin/pull/28735), [#28907](https://github.com/bitcoin/bitcoin/pull/28907), and [#29276](https://github.com/bitcoin/bitcoin/pull/29276). -## v2.0 +## [v2.0](https://github.com/bitcoin-core/libmultiprocess/commits/v2.0) - Changed `PassField` function signature. - Now requires C++17 support. - Used in Bitcoin Core 25 and 26 releases, pulled in by [#26672](https://github.com/bitcoin/bitcoin/pull/26672). -## v1.0 +## [v1.0](https://github.com/bitcoin-core/libmultiprocess/commits/v1.0) - Dropped hardcoded includes in generated files, now requiring `include` and `includeTypes` annotations. - Used in Bitcoin Core 22, 23, and 24 releases, pulled in by [#19160](https://github.com/bitcoin/bitcoin/pull/19160). -## v0.0 +## [v0.0](https://github.com/bitcoin-core/libmultiprocess/commits/v0.0) - Initial version used in a downstream release. - Used in Bitcoin Core 21 releases, pulled in by [#16367](https://github.com/bitcoin/bitcoin/pull/16367), [#18588](https://github.com/bitcoin/bitcoin/pull/18588), and [#18677](https://github.com/bitcoin/bitcoin/pull/18677). diff --git a/include/mp/proxy-io.h b/include/mp/proxy-io.h index c298257c4da..d7b9f0e5468 100644 --- a/include/mp/proxy-io.h +++ b/include/mp/proxy-io.h @@ -340,6 +340,22 @@ public: //! External context pointer. void* m_context; + + //! Hook called when ProxyServer::makeThread() is called. + std::function testing_hook_makethread; + + //! Hook called on the worker thread inside makeThread(), after the thread + //! context is set up and thread_context promise is fulfilled, but before it + //! starts waiting for requests. + std::function testing_hook_makethread_created; + + //! Hook called on the worker thread when it starts to execute an async + //! request. Used by tests to control timing or inject behavior at this + //! point in execution. + std::function testing_hook_async_request_start; + + //! Hook called on the worker thread just before returning results. + std::function testing_hook_async_request_done; }; //! Single element task queue used to handle recursive capnp calls. (If the @@ -814,6 +830,7 @@ void _Serve(EventLoop& loop, kj::Own&& stream, InitImpl& init return kj::heap>(std::shared_ptr(&init, [](InitImpl*){}), connection); }); auto it = loop.m_incoming_connections.begin(); + MP_LOG(loop, Log::Info) << "IPC server: socket connected."; it->onDisconnect([&loop, it] { MP_LOG(loop, Log::Info) << "IPC server: socket disconnected."; loop.m_incoming_connections.erase(it); diff --git a/include/mp/proxy-types.h b/include/mp/proxy-types.h index 5d38048498f..c13debf361d 100644 --- a/include/mp/proxy-types.h +++ b/include/mp/proxy-types.h @@ -23,9 +23,10 @@ public: ValueField(Value&& value) : m_value(value) {} Value& m_value; + const Value& get() const { return m_value; } Value& get() { return m_value; } Value& init() { return m_value; } - bool has() { return true; } + bool has() const { return true; } }; template @@ -166,10 +167,52 @@ struct ReadDestUpdate Value& m_value; }; -template -decltype(auto) ReadField(TypeList, Args&&... args) +//! Return whether to read a C++ value from a Cap'n Proto field. Returning +//! false can be useful to interpret certain Cap'n Proto field values as null +//! C++ values when initializing nullable C++ std::optional / std::unique_ptr / +//! std::shared_ptr types. +//! +//! For example, when reading from a `List(Data)` field into a +//! `std::vector>` value, it's useful to be +//! able to interpret empty `Data` values as null pointers. This is useful +//! because the Cap'n Proto C++ API does not currently provide a way to +//! distinguish between null and empty Data values in a List[*], so we need to +//! choose some Data value to represent null if we want to allow passing null +//! pointers. Since no CTransaction is ever serialized as empty Data, it's safe +//! to use empty Data values to represent null pointers. +//! +//! [*] The Cap'n Proto wire format actually does distinguish between null and +//! empty Data values inside Lists, and the C++ API does allow distinguishing +//! between null and empty Data values in other contexts, just not the List +//! context, so this limitation could be removed in the future. +//! +//! Design note: CustomHasField() and CustomHasValue() are inverses of each +//! other. CustomHasField() allows leaving Cap'n Proto fields unset when C++ +//! types have certain values, and CustomHasValue() allows leaving C++ values +//! unset when Cap'n Proto fields have certain values. But internally the +//! functions get called in different ways. This is because in C++, unlike in +//! Cap'n Proto not every C++ type is default constructible, and it may be +//! impossible to leave certain C++ values unset. For example if a C++ method +//! requires function parameters, there's no way to call the function without +//! constructing values for each of the parameters. Similarly there's no way to +//! add values to C++ vectors or maps without initializing those values. This +//! is not the case in Cap'n Proto where all values are optional and it's +//! possible to skip initializing parameters and list elements. +//! +//! Because of this difference, CustomHasValue() works universally and can be +//! used to disable BuildField() calls in every context, while CustomHasField() +//! can only be used to disable ReadField() calls in certain contexts like +//! std::optional and pointer contexts. +template +bool CustomHasField(TypeList, InvokeContext& invoke_context, const Input& input) { - return CustomReadField(TypeList...>(), Priority<2>(), std::forward(args)...); + return input.has(); +} + +template +decltype(auto) ReadField(TypeList, InvokeContext& invoke_context, Input&& input, Args&&... args) +{ + return CustomReadField(TypeList...>(), Priority<2>(), invoke_context, std::forward(input), std::forward(args)...); } template @@ -190,6 +233,13 @@ void ThrowField(TypeList, InvokeContext& invoke_context, Input&& throw std::runtime_error(std::string(CharCast(data.begin()), data.size())); } +//! Return whether to write a C++ value into a Cap'n Proto field. Returning +//! false can be useful to map certain C++ values to unset Cap'n Proto fields. +//! +//! For example the bitcoin `Coin` class asserts false when a spent coin is +//! serialized. But some C++ methods return these coins, so there needs to be a +//! way to represent them in Cap'n Proto and a null Data field is a convenient +//! representation. template bool CustomHasValue(InvokeContext& invoke_context, const Values&... value) { @@ -372,7 +422,7 @@ struct ClientException void handleField(InvokeContext& invoke_context, Results& results, ParamList) { StructField input(results); - if (input.has()) { + if (CustomHasField(TypeList(), invoke_context, input)) { ThrowField(TypeList(), invoke_context, input); } } @@ -595,7 +645,7 @@ template void clientDestroy(Client& client) { if (client.m_context.connection) { - MP_LOG(*client.m_context.loop, Log::Info) << "IPC client destroy " << typeid(client).name(); + MP_LOG(*client.m_context.loop, Log::Debug) << "IPC client destroy " << typeid(client).name(); } else { KJ_LOG(INFO, "IPC interrupted client destroy", typeid(client).name()); } @@ -604,7 +654,7 @@ void clientDestroy(Client& client) template void serverDestroy(Server& server) { - MP_LOG(*server.m_context.loop, Log::Info) << "IPC server destroy " << typeid(server).name(); + MP_LOG(*server.m_context.loop, Log::Debug) << "IPC server destroy " << typeid(server).name(); } //! Entry point called by generated client code that looks like: diff --git a/include/mp/type-context.h b/include/mp/type-context.h index 72c39630d00..8efd4fa767f 100644 --- a/include/mp/type-context.h +++ b/include/mp/type-context.h @@ -61,9 +61,8 @@ auto PassField(Priority<1>, TypeList<>, ServerContext& server_context, const Fn& std::is_same::value, kj::Promise>::type { - const auto& params = server_context.call_context.getParams(); - Context::Reader context_arg = Accessor::get(params); auto& server = server_context.proxy_server; + EventLoop& loop = *server.m_context.loop; int req = server_context.req; // Keep a reference to the ProxyServer instance by assigning it to the self // variable. ProxyServer instances are reference-counted and if the client @@ -72,136 +71,148 @@ auto PassField(Priority<1>, TypeList<>, ServerContext& server_context, const Fn& // needs to be destroyed on the event loop thread so it is freed in a sync() // call below. auto self = server.thisCap(); - auto invoke = [self = kj::mv(self), call_context = kj::mv(server_context.call_context), &server, req, fn, args...](CancelMonitor& cancel_monitor) mutable { - MP_LOG(*server.m_context.loop, Log::Debug) << "IPC server executing request #" << req; - const auto& params = call_context.getParams(); - Context::Reader context_arg = Accessor::get(params); - ServerContext server_context{server, call_context, req}; - { - // Before invoking the function, store a reference to the - // callbackThread provided by the client in the - // thread_local.request_threads map. This way, if this - // server thread needs to execute any RPCs that call back to - // the client, they will happen on the same client thread - // that is waiting for this function, just like what would - // happen if this were a normal function call made on the - // local stack. - // - // If the request_threads map already has an entry for this - // connection, it will be left unchanged, and it indicates - // that the current thread is an RPC client thread which is - // in the middle of an RPC call, and the current RPC call is - // a nested call from the remote thread handling that RPC - // call. In this case, the callbackThread value should point - // to the same thread already in the map, so there is no - // need to update the map. - auto& thread_context = g_thread_context; - auto& request_threads = thread_context.request_threads; - ConnThread request_thread; - bool inserted{false}; - Mutex cancel_mutex; - Lock cancel_lock{cancel_mutex}; - server_context.cancel_lock = &cancel_lock; - server.m_context.loop->sync([&] { - // Detect request being canceled before it executes. - if (cancel_monitor.m_canceled) { - server_context.request_canceled = true; - return; - } - // Detect request being canceled while it executes. - assert(!cancel_monitor.m_on_cancel); - cancel_monitor.m_on_cancel = [&server, &server_context, &cancel_mutex, req]() { - MP_LOG(*server.m_context.loop, Log::Info) << "IPC server request #" << req << " canceled while executing."; - // Lock cancel_mutex here to block the event loop - // thread and prevent it from deleting the request's - // params and response structs while the execution - // thread is accessing them. Because this lock is - // released before the event loop thread does delete - // the structs, the mutex does not provide any - // protection from the event loop deleting the - // structs _before_ the execution thread acquires - // it. So in addition to locking the mutex, the - // execution thread always checks request_canceled - // as well before accessing the structs. - Lock cancel_lock{cancel_mutex}; - server_context.request_canceled = true; - }; - // Update requests_threads map if not canceled. - std::tie(request_thread, inserted) = SetThread( - GuardedRef{thread_context.waiter->m_mutex, request_threads}, server.m_context.connection, - [&] { return context_arg.getCallbackThread(); }); - }); - - // If an entry was inserted into the request_threads map, - // remove it after calling fn.invoke. If an entry was not - // inserted, one already existed, meaning this must be a - // recursive call (IPC call calling back to the caller which - // makes another IPC call), so avoid modifying the map. - const bool erase_thread{inserted}; - KJ_DEFER( - // Release the cancel lock before calling loop->sync and - // waiting for the event loop thread, because if a - // cancellation happened, it needs to run the on_cancel - // callback above. It's safe to release cancel_lock at - // this point because the fn.invoke() call below will be - // finished and no longer accessing the params or - // results structs. - cancel_lock.m_lock.unlock(); - // Erase the request_threads entry on the event loop - // thread with loop->sync(), so if the connection is - // broken there is not a race between this thread and - // the disconnect handler trying to destroy the thread - // client object. - server.m_context.loop->sync([&] { - auto self_dispose{kj::mv(self)}; - if (erase_thread) { - // Look up the thread again without using existing - // iterator since entry may no longer be there after - // a disconnect. Destroy node after releasing - // Waiter::m_mutex, so the ProxyClient - // destructor is able to use EventLoop::mutex - // without violating lock order. - ConnThreads::node_type removed; - { - Lock lock(thread_context.waiter->m_mutex); - removed = request_threads.extract(server.m_context.connection); - } - } - }); - ); - if (server_context.request_canceled) { - MP_LOG(*server.m_context.loop, Log::Info) << "IPC server request #" << req << " canceled before it could be executed"; - } else KJ_IF_MAYBE(exception, kj::runCatchingExceptions([&]{ - try { - fn.invoke(server_context, args...); - } catch (const InterruptException& e) { - MP_LOG(*server.m_context.loop, Log::Info) << "IPC server request #" << req << " interrupted (" << e.what() << ")"; - } - })) { - MP_LOG(*server.m_context.loop, Log::Error) << "IPC server request #" << req << " uncaught exception (" << kj::str(*exception).cStr() << ")"; - throw kj::mv(*exception); - } - // End of scope: if KJ_DEFER was reached, it runs here - } - return call_context; + auto invoke = [self = kj::mv(self), call_context = kj::mv(server_context.call_context), &server, &loop, req, fn, args...](CancelMonitor& cancel_monitor) mutable { + MP_LOG(loop, Log::Debug) << "IPC server executing request #" << req; + if (loop.testing_hook_async_request_start) loop.testing_hook_async_request_start(); + KJ_DEFER(if (loop.testing_hook_async_request_done) loop.testing_hook_async_request_done()); + ServerContext server_context{server, call_context, req}; + // Before invoking the function, store a reference to the + // callbackThread provided by the client in the + // thread_local.request_threads map. This way, if this + // server thread needs to execute any RPCs that call back to + // the client, they will happen on the same client thread + // that is waiting for this function, just like what would + // happen if this were a normal function call made on the + // local stack. + // + // If the request_threads map already has an entry for this + // connection, it will be left unchanged, and it indicates + // that the current thread is an RPC client thread which is + // in the middle of an RPC call, and the current RPC call is + // a nested call from the remote thread handling that RPC + // call. In this case, the callbackThread value should point + // to the same thread already in the map, so there is no + // need to update the map. + auto& thread_context = g_thread_context; + auto& request_threads = thread_context.request_threads; + ConnThread request_thread; + bool inserted{false}; + Mutex cancel_mutex; + Lock cancel_lock{cancel_mutex}; + server_context.cancel_lock = &cancel_lock; + loop.sync([&] { + // Detect request being canceled before it executes. + if (cancel_monitor.m_canceled) { + server_context.request_canceled = true; + return; + } + // Detect request being canceled while it executes. + assert(!cancel_monitor.m_on_cancel); + cancel_monitor.m_on_cancel = [&loop, &server_context, &cancel_mutex, req]() { + MP_LOG(loop, Log::Info) << "IPC server request #" << req << " canceled while executing."; + // Lock cancel_mutex here to block the event loop + // thread and prevent it from deleting the request's + // params and response structs while the execution + // thread is accessing them. Because this lock is + // released before the event loop thread does delete + // the structs, the mutex does not provide any + // protection from the event loop deleting the + // structs _before_ the execution thread acquires + // it. So in addition to locking the mutex, the + // execution thread always checks request_canceled + // as well before accessing the structs. + Lock cancel_lock{cancel_mutex}; + server_context.request_canceled = true; }; + // Update requests_threads map if not canceled. We know + // the request is not canceled currently because + // cancel_monitor.m_canceled was checked above and this + // code is running on the event loop thread. + std::tie(request_thread, inserted) = SetThread( + GuardedRef{thread_context.waiter->m_mutex, request_threads}, server.m_context.connection, + [&] { return Accessor::get(call_context.getParams()).getCallbackThread(); }); + }); + + // If an entry was inserted into the request_threads map, + // remove it after calling fn.invoke. If an entry was not + // inserted, one already existed, meaning this must be a + // recursive call (IPC call calling back to the caller which + // makes another IPC call), so avoid modifying the map. + const bool erase_thread{inserted}; + KJ_DEFER( + // Release the cancel lock before calling loop->sync and + // waiting for the event loop thread, because if a + // cancellation happened, it needs to run the on_cancel + // callback above. It's safe to release cancel_lock at + // this point because the fn.invoke() call below will be + // finished and no longer accessing the params or + // results structs. + cancel_lock.m_lock.unlock(); + // Erase the request_threads entry on the event loop + // thread with loop->sync(), so if the connection is + // broken there is not a race between this thread and + // the disconnect handler trying to destroy the thread + // client object. + loop.sync([&] { + // Clear cancellation callback. At this point the + // method invocation finished and the result is + // either being returned, or discarded if a + // cancellation happened. So we do not need to be + // notified of cancellations after this point. Also + // we do not want to be notified because + // cancel_mutex and server_context could be out of + // scope when it happens. + cancel_monitor.m_on_cancel = nullptr; + auto self_dispose{kj::mv(self)}; + if (erase_thread) { + // Look up the thread again without using existing + // iterator since entry may no longer be there after + // a disconnect. Destroy node after releasing + // Waiter::m_mutex, so the ProxyClient + // destructor is able to use EventLoop::mutex + // without violating lock order. + ConnThreads::node_type removed; + { + Lock lock(thread_context.waiter->m_mutex); + removed = request_threads.extract(server.m_context.connection); + } + } + }); + ); + if (server_context.request_canceled) { + MP_LOG(loop, Log::Info) << "IPC server request #" << req << " canceled before it could be executed"; + } else KJ_IF_MAYBE(exception, kj::runCatchingExceptions([&]{ + try { + fn.invoke(server_context, args...); + } catch (const InterruptException& e) { + MP_LOG(loop, Log::Info) << "IPC server request #" << req << " interrupted (" << e.what() << ")"; + } + })) { + MP_LOG(loop, Log::Error) << "IPC server request #" << req << " uncaught exception (" << kj::str(*exception).cStr() << ")"; + throw kj::mv(*exception); + } + return call_context; + // End of scope: if KJ_DEFER was reached, it runs here + }; // Lookup Thread object specified by the client. The specified thread should // be a local Thread::Server object, but it needs to be looked up // asynchronously with getLocalServer(). + const auto& params = server_context.call_context.getParams(); + Context::Reader context_arg = Accessor::get(params); auto thread_client = context_arg.getThread(); auto result = server.m_context.connection->m_threads.getLocalServer(thread_client) - .then([&server, invoke = kj::mv(invoke), req](const kj::Maybe& perhaps) mutable { + .then([&loop, invoke = kj::mv(invoke), req](const kj::Maybe& perhaps) mutable { // Assuming the thread object is found, pass it a pointer to the // `invoke` lambda above which will invoke the function on that // thread. KJ_IF_MAYBE (thread_server, perhaps) { auto& thread = static_cast&>(*thread_server); - MP_LOG(*server.m_context.loop, Log::Debug) + MP_LOG(loop, Log::Debug) << "IPC server post request #" << req << " {" << thread.m_thread_context.thread_name << "}"; return thread.template post(std::move(invoke)); } else { - MP_LOG(*server.m_context.loop, Log::Error) + MP_LOG(loop, Log::Error) << "IPC server error request #" << req << ", missing thread to execute request"; throw std::runtime_error("invalid thread handle"); } diff --git a/include/mp/type-data.h b/include/mp/type-data.h index 5da4cfce5f4..cdcdd30eb4c 100644 --- a/include/mp/type-data.h +++ b/include/mp/type-data.h @@ -7,6 +7,9 @@ #include +#include +#include + namespace mp { template concept IsSpanOf = diff --git a/include/mp/type-function.h b/include/mp/type-function.h index 47d3b30be29..951e08852d3 100644 --- a/include/mp/type-function.h +++ b/include/mp/type-function.h @@ -48,13 +48,13 @@ struct ProxyCallFn }; template -decltype(auto) CustomReadField(TypeList>, +decltype(auto) CustomReadField(TypeList> types, Priority<1>, InvokeContext& invoke_context, Input&& input, ReadDest&& read_dest) { - if (input.has()) { + if (CustomHasField(types, invoke_context, input)) { using Interface = typename Decay::Calls; auto client = std::make_shared>( input.get(), &invoke_context.connection, /* destroy_connection= */ false); diff --git a/include/mp/type-interface.h b/include/mp/type-interface.h index 7a98b4afab4..a32c53d268f 100644 --- a/include/mp/type-interface.h +++ b/include/mp/type-interface.h @@ -85,7 +85,7 @@ decltype(auto) CustomReadField(TypeList>, typename Decay::Calls* enable = nullptr) { using Interface = typename Decay::Calls; - if (input.has()) { + if (CustomHasField(TypeList(), invoke_context, input)) { return read_dest.construct( CustomMakeProxyClient(invoke_context, std::move(input.get()))); } @@ -101,7 +101,7 @@ decltype(auto) CustomReadField(TypeList>, typename Decay::Calls* enable = nullptr) { using Interface = typename Decay::Calls; - if (input.has()) { + if (CustomHasField(TypeList(), invoke_context, input)) { return read_dest.construct( CustomMakeProxyClient(invoke_context, std::move(input.get()))); } diff --git a/include/mp/type-map.h b/include/mp/type-map.h index 6fa5623e510..50a590cbc3f 100644 --- a/include/mp/type-map.h +++ b/include/mp/type-map.h @@ -27,6 +27,35 @@ void CustomBuildField(TypeList>, } } +// Replacement for `m.emplace(piecewise_construct, t1, t2)` to work around a +// Clang 22 regression triggered by libc++'s std::map piecewise emplace: when +// the key constructor argument tuple is empty (std::tuple<>), libc++'s internal +// "try key extraction" SFINAE probe instantiates std::tuple_element<0, +// std::tuple<>>, which Clang 22 diagnoses as an out-of-bounds pack access ("a +// parameter pack may not be accessed at an out of bounds index") instead of +// treating it as substitution failure. See LLVM issue #167709 and the upstream +// fix in llvm/llvm-project PR #183614. +// https://github.com/llvm/llvm-project/issues/167709 +// https://github.com/llvm/llvm-project/pull/183614 +template +auto EmplacePiecewiseSafe( + Map& m, + const std::piecewise_construct_t&, + Tuple1&& t1, + Tuple2&& t2) +{ + if constexpr (std::tuple_size_v> == 0) { + // Avoid tuple<> / tuple<> (LLVM 22 libc++ regression path) + return m.emplace(std::piecewise_construct, + std::forward_as_tuple(typename Map::key_type{}), + std::forward(t2)); + } else { + return m.emplace(std::piecewise_construct, + std::forward(t1), + std::forward(t2)); + } +} + template decltype(auto) CustomReadField(TypeList>, Priority<1>, @@ -42,7 +71,7 @@ decltype(auto) CustomReadField(TypeList>, Make(item), ReadDestEmplace( TypeList>(), [&](auto&&... args) -> auto& { - return *value.emplace(std::forward(args)...).first; + return *EmplacePiecewiseSafe(value, std::forward(args)...).first; })); } }); diff --git a/include/mp/type-message.h b/include/mp/type-message.h index baa46eb0cad..b1cec13d6c1 100644 --- a/include/mp/type-message.h +++ b/include/mp/type-message.h @@ -23,13 +23,13 @@ void CustomBuildField(TypeList, Priority<2>, InvokeContext& invoke_co //! overloads. Defining a CustomReadMessage overload is simpler than defining a //! CustomReadField overload because it only requires defining a normal //! function, not a template function, but less flexible. -template -decltype(auto) CustomReadField(TypeList, Priority<2>, InvokeContext& invoke_context, Reader&& reader, +template +decltype(auto) CustomReadField(TypeList, Priority<2>, InvokeContext& invoke_context, Input&& input, ReadDest&& read_dest, - decltype(CustomReadMessage(invoke_context, reader.get(), + decltype(CustomReadMessage(invoke_context, input.get(), std::declval()))* enable = nullptr) { - return read_dest.update([&](auto& value) { if (reader.has()) CustomReadMessage(invoke_context, reader.get(), value); }); + return read_dest.update([&](auto& value) { if (CustomHasField(TypeList(), invoke_context, input)) CustomReadMessage(invoke_context, input.get(), value); }); } //! Helper for CustomPassField below. Call Accessor::init method if it has one, diff --git a/include/mp/type-optional.h b/include/mp/type-optional.h index 6f23fd4d521..80a8f4f3026 100644 --- a/include/mp/type-optional.h +++ b/include/mp/type-optional.h @@ -17,8 +17,7 @@ void CustomBuildField(TypeList>, { if (value) { output.setHas(); - // FIXME: should std::move value if destvalue is rref? - BuildField(TypeList(), invoke_context, output, *value); + BuildField(TypeList(), invoke_context, output, *std::forward(value)); } } @@ -30,7 +29,7 @@ decltype(auto) CustomReadField(TypeList>, ReadDest&& read_dest) { return read_dest.update([&](auto& value) { - if (!input.has()) { + if (!CustomHasField(TypeList(), invoke_context, input)) { value.reset(); } else if (value) { ReadField(TypeList(), invoke_context, input, ReadDestUpdate(*value)); diff --git a/include/mp/type-pointer.h b/include/mp/type-pointer.h index 98b7aa817fe..192e1b034a3 100644 --- a/include/mp/type-pointer.h +++ b/include/mp/type-pointer.h @@ -50,7 +50,7 @@ decltype(auto) CustomReadField(TypeList>, ReadDest&& read_dest) { return read_dest.update([&](auto& value) { - if (!input.has()) { + if (!CustomHasField(TypeList(), invoke_context, input)) { value.reset(); } else if (value) { ReadField(TypeList(), invoke_context, input, ReadDestUpdate(*value)); @@ -72,7 +72,7 @@ decltype(auto) CustomReadField(TypeList>, ReadDest&& read_dest) { return read_dest.update([&](auto& value) { - if (!input.has()) { + if (!CustomHasField(TypeList(), invoke_context, input)) { value.reset(); return; } diff --git a/include/mp/version.h b/include/mp/version.h index a6b0096feb8..5c8753cf837 100644 --- a/include/mp/version.h +++ b/include/mp/version.h @@ -24,7 +24,7 @@ //! pointing at the prior merge commit. The /doc/versions.md file should also be //! updated, noting any significant or incompatible changes made since the //! previous version. -#define MP_MAJOR_VERSION 8 +#define MP_MAJOR_VERSION 9 //! Minor version number. Should be incremented in stable branches after //! backporting changes. The /doc/versions.md file should also be updated to diff --git a/src/mp/gen.cpp b/src/mp/gen.cpp index 53d3fd36efa..603f9ccb720 100644 --- a/src/mp/gen.cpp +++ b/src/mp/gen.cpp @@ -228,6 +228,7 @@ static void Generate(kj::StringPtr src_prefix, cpp_client << "#include <" << include_path << ".proxy-types.h>\n"; cpp_client << "#include \n"; cpp_client << "#include \n"; + cpp_client << "#include \n"; cpp_client << "#include \n"; cpp_client << "#include \n"; cpp_client << "#include \n"; diff --git a/src/mp/proxy.cpp b/src/mp/proxy.cpp index da22ae62c95..d24208dbfd8 100644 --- a/src/mp/proxy.cpp +++ b/src/mp/proxy.cpp @@ -411,13 +411,16 @@ ProxyServer::ProxyServer(Connection& connection) : m_connection(conne kj::Promise ProxyServer::makeThread(MakeThreadContext context) { + EventLoop& loop{*m_connection.m_loop}; + if (loop.testing_hook_makethread) loop.testing_hook_makethread(); const std::string from = context.getParams().getName(); std::promise thread_context; - std::thread thread([&thread_context, from, this]() { - g_thread_context.thread_name = ThreadName(m_connection.m_loop->m_exe_name) + " (from " + from + ")"; + std::thread thread([&loop, &thread_context, from]() { + g_thread_context.thread_name = ThreadName(loop.m_exe_name) + " (from " + from + ")"; g_thread_context.waiter = std::make_unique(); - thread_context.set_value(&g_thread_context); Lock lock(g_thread_context.waiter->m_mutex); + thread_context.set_value(&g_thread_context); + if (loop.testing_hook_makethread_created) loop.testing_hook_makethread_created(); // Wait for shutdown signal from ProxyServer destructor (signal // is just waiter getting set to null.) g_thread_context.waiter->wait(lock, [] { return !g_thread_context.waiter; }); diff --git a/src/mp/util.cpp b/src/mp/util.cpp index 7b486082a06..463947b9bbb 100644 --- a/src/mp/util.cpp +++ b/src/mp/util.cpp @@ -81,7 +81,7 @@ std::string ThreadName(const char* exe_name) buffer << syscall(SYS_gettid); #elif defined(HAVE_PTHREAD_THREADID_NP) uint64_t tid = 0; - pthread_threadid_np(NULL, &tid); + pthread_threadid_np(nullptr, &tid); buffer << tid; #elif defined(HAVE_PTHREAD_GETTHREADID_NP) buffer << pthread_getthreadid_np(); @@ -102,7 +102,7 @@ std::string LogEscape(const kj::StringTree& string, size_t max_size) result.append("\\\\"); } else if (c < 0x20 || c > 0x7e) { char escape[4]; - snprintf(escape, 4, "\\%02x", c); + snprintf(escape, sizeof(escape), "\\%02x", static_cast(c)); result.append(escape); } else { result.push_back(c); diff --git a/test/mp/test/foo-types.h b/test/mp/test/foo-types.h index e70bc4c100b..735adb7689c 100644 --- a/test/mp/test/foo-types.h +++ b/test/mp/test/foo-types.h @@ -13,12 +13,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -56,6 +58,14 @@ decltype(auto) CustomReadField(TypeList, Priority<1>, InvokeContext& } // namespace test +template +bool CustomHasField(TypeList, InvokeContext& invoke_context, const Input& input) +{ + // Cap'n Proto C++ cannot distinguish null vs empty Data in List(Data), so + // interpret empty Data as null for this specific type. + return input.get().size() != 0; +} + inline void CustomBuildMessage(InvokeContext& invoke_context, const test::FooMessage& src, test::messages::FooMessage::Builder&& builder) diff --git a/test/mp/test/foo.capnp b/test/mp/test/foo.capnp index 5bdb5754bde..5bdee2575b5 100644 --- a/test/mp/test/foo.capnp +++ b/test/mp/test/foo.capnp @@ -34,6 +34,7 @@ interface FooInterface $Proxy.wrap("mp::test::FooImplementation") { callFn @17 () -> (); callFnAsync @18 (context :Proxy.Context) -> (); callIntFnAsync @21 (context :Proxy.Context, arg :Int32) -> (result :Int32); + passDataPointers @22 (arg :List(Data)) -> (result :List(Data)); } interface FooCallback $Proxy.wrap("mp::test::FooCallback") { diff --git a/test/mp/test/foo.h b/test/mp/test/foo.h index 73f43da3531..317af025eb6 100644 --- a/test/mp/test/foo.h +++ b/test/mp/test/foo.h @@ -45,6 +45,9 @@ struct FooMutable std::string message; }; +using FooData = std::vector; +using FooDataRef = std::shared_ptr; + class FooCallback { public: @@ -80,6 +83,7 @@ public: void passMutable(FooMutable& foo) { foo.message += " call"; } FooEnum passEnum(FooEnum foo) { return foo; } int passFn(std::function fn) { return fn(); } + std::vector passDataPointers(std::vector values) { return values; } std::shared_ptr m_callback; void callFn() { assert(m_fn); m_fn(); } void callFnAsync() { assert(m_fn); m_fn(); } diff --git a/test/mp/test/test.cpp b/test/mp/test/test.cpp index b8df4677ca0..d91edb40dce 100644 --- a/test/mp/test/test.cpp +++ b/test/mp/test/test.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -49,8 +50,8 @@ static_assert(std::is_integral_v, "MP_MINOR_VERSION * Test setup class creating a two way connection between a * ProxyServer object and a ProxyClient. * - * Provides client_disconnect and server_disconnect lambdas that can be used to - * trigger disconnects and test handling of broken and closed connections. + * Provides disconnection lambdas that can be used to trigger + * disconnects and test handling of broken and closed connections. * * Accepts a client_owns_connection option to test different ProxyClient * destroy_connection values and control whether destroying the ProxyClient @@ -63,6 +64,7 @@ class TestSetup { public: std::function server_disconnect; + std::function server_disconnect_later; std::function client_disconnect; std::promise>> client_promise; std::unique_ptr> client; @@ -88,6 +90,10 @@ public: return capnp::Capability::Client(kj::mv(server_proxy)); }); server_disconnect = [&] { loop.sync([&] { server_connection.reset(); }); }; + server_disconnect_later = [&] { + assert(std::this_thread::get_id() == loop.m_thread_id); + loop.m_task_set->add(kj::evalLater([&] { server_connection.reset(); })); + }; // Set handler to destroy the server when the client disconnects. This // is ignored if server_disconnect() is called instead. server_connection->onDisconnect([&] { server_connection.reset(); }); @@ -211,6 +217,15 @@ KJ_TEST("Call FooInterface methods") KJ_EXPECT(mut.message == "init build pass call return read"); KJ_EXPECT(foo->passFn([]{ return 10; }) == 10); + + std::vector data_in; + data_in.push_back(std::make_shared(FooData{'H', 'i'})); + data_in.push_back(nullptr); + std::vector data_out{foo->passDataPointers(data_in)}; + KJ_EXPECT(data_out.size() == 2); + KJ_REQUIRE(data_out[0] != nullptr); + KJ_EXPECT(*data_out[0] == *data_in[0]); + KJ_EXPECT(!data_out[1]); } KJ_TEST("Call IPC method after client connection is closed") @@ -316,6 +331,102 @@ KJ_TEST("Calling IPC method, disconnecting and blocking during the call") signal.set_value(); } +KJ_TEST("Worker thread destroyed before it is initialized") +{ + // Regression test for bitcoin/bitcoin#34711, bitcoin/bitcoin#34756 where a + // worker thread is destroyed before it starts waiting for work. + // + // The test uses the `makethread` hook to trigger a disconnect as soon as + // ProxyServer::makeThread is called, so without the bugfix, + // ProxyServer::~ProxyServer would run and destroy the waiter before + // the worker thread started waiting, causing a SIGSEGV when it did start. + TestSetup setup; + ProxyClient* foo = setup.client.get(); + foo->initThreadMap(); + setup.server->m_impl->m_fn = [] {}; + + EventLoop& loop = *setup.server->m_context.connection->m_loop; + loop.testing_hook_makethread = [&] { + // Use disconnect_later to queue the disconnect, because the makethread + // hook is called on the event loop thread. The disconnect should happen + // as soon as the event loop is idle. + setup.server_disconnect_later(); + }; + loop.testing_hook_makethread_created = [&] { + // Sleep to allow event loop to run and process the queued disconnect + // before the worker thread starts waiting. + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + }; + + bool disconnected{false}; + try { + foo->callFnAsync(); + } catch (const std::runtime_error& e) { + KJ_EXPECT(std::string_view{e.what()} == "IPC client method call interrupted by disconnect."); + disconnected = true; + } + KJ_EXPECT(disconnected); +} + +KJ_TEST("Calling async IPC method, with server disconnect racing the call") +{ + // Regression test for bitcoin/bitcoin#34777 heap-use-after-free where + // an async request is canceled before it starts to execute. + // + // Use testing_hook_async_request_start to trigger a disconnect from the + // worker thread as soon as it begins to execute an async request. Without + // the bugfix, the worker thread would trigger a SIGSEGV after this by + // calling call_context.getParams(). + TestSetup setup; + ProxyClient* foo = setup.client.get(); + foo->initThreadMap(); + setup.server->m_impl->m_fn = [] {}; + + EventLoop& loop = *setup.server->m_context.connection->m_loop; + loop.testing_hook_async_request_start = [&] { + setup.server_disconnect(); + // Sleep is necessary to let the event loop fully clean up after the + // disconnect and trigger the SIGSEGV. + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + }; + + try { + foo->callFnAsync(); + KJ_EXPECT(false); + } catch (const std::runtime_error& e) { + KJ_EXPECT(std::string_view{e.what()} == "IPC client method call interrupted by disconnect."); + } +} + +KJ_TEST("Calling async IPC method, with server disconnect after cleanup") +{ + // Regression test for bitcoin/bitcoin#34782 stack-use-after-return where + // an async request is canceled after it finishes executing but before the + // response is sent. + // + // Use testing_hook_async_request_done to trigger a disconnect from the + // worker thread after it executes an async request but before it returns. + // Without the bugfix, the m_on_cancel callback would be called at this + // point, accessing the cancel_mutex stack variable that had gone out of + // scope. + TestSetup setup; + ProxyClient* foo = setup.client.get(); + foo->initThreadMap(); + setup.server->m_impl->m_fn = [] {}; + + EventLoop& loop = *setup.server->m_context.connection->m_loop; + loop.testing_hook_async_request_done = [&] { + setup.server_disconnect(); + }; + + try { + foo->callFnAsync(); + KJ_EXPECT(false); + } catch (const std::runtime_error& e) { + KJ_EXPECT(std::string_view{e.what()} == "IPC client method call interrupted by disconnect."); + } +} + KJ_TEST("Make simultaneous IPC calls on single remote thread") { TestSetup setup;