diff --git a/.bumpversion.toml b/.bumpversion.toml
index 00b4f083..b10711ab 100644
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "1.0.0-beta.5"
+current_version = "1.0.0-beta.7"
 parse = """(?x)
     (?P<major>0|[1-9]\\d*)\\.
     (?P<minor>0|[1-9]\\d*)\\.
@@ -64,3 +64,8 @@ filename = "CMakeLists.txt"
 serialize = ["{major}.{minor}.{patch}"]
 search = "project(roofer VERSION {current_version}"
 replace = "project(roofer VERSION {new_version}"
+
+[[tool.bumpversion.files]]
+filename = "apps/roofer-app/version.hpp"
+search = "RF_VERSION \"{current_version}\""
+replace = "RF_VERSION \"{new_version}\""
diff --git a/.dockerignore b/.dockerignore
index 02aca4ca..ce952dcc 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -4,7 +4,5 @@
 
 docs
 experiments
-external/vcpkg
 external/val3dity
 tests
-vcpkg_installed
diff --git a/.github/workflows/build-push-docker.yml b/.github/workflows/build-push-docker.yml
index 8ed1053d..b9723de9 100644
--- a/.github/workflows/build-push-docker.yml
+++ b/.github/workflows/build-push-docker.yml
@@ -7,8 +7,6 @@ on:
     paths-ignore:
       - "**.md"
       - "**.rst"
-    branches:
-      - develop
   pull_request:
     branches:
       - develop
@@ -16,21 +14,46 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v5
+
       - name: Set docker image tag to 'develop'
         run: echo "DOCKER_TAG=develop" >> $GITHUB_ENV
+
       - name: Set docker image tag to git tag
         if: startsWith(github.ref, 'refs/tags/v')
         run: echo "DOCKER_TAG=${{ github.ref_name }}" >> $GITHUB_ENV
+
       - name: Login to Docker Hub
+        if: github.event_name != 'pull_request'
         uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKER_HUB_USERNAME }}
           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
+
       - name: Set up Docker Buildx
+        id: buildx
         uses: docker/setup-buildx-action@v3
+
+      - name: Build
+        if: github.event_name == 'pull_request'
+        uses: docker/build-push-action@v6
+        with:
+          context: ./
+          file: ./docker/Dockerfile
+          builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            JOBS=2
+            VERSION=${{ env.DOCKER_TAG }}
+          push: false
+          tags: 3dgi/roofer:${{ env.DOCKER_TAG }}
+          cache-from: type=registry,ref=3dgi/roofer:buildcache
+
       - name: Build and push
+        if: github.event_name != 'pull_request'
         uses: docker/build-push-action@v6
         with:
           context: ./
diff --git a/.github/workflows/build_install_conan.yml b/.github/workflows/build_install_conan.yml
new file mode 100644
index 00000000..b883f340
--- /dev/null
+++ b/.github/workflows/build_install_conan.yml
@@ -0,0 +1,41 @@
+name: Build and Install (Conan)
+
+on:
+  push:
+    tags:
+      - "v*"
+  pull_request:
+    branches: ["develop"]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-24.04
+            build_type: Release
+            build_jobs: 4
+          - os: windows-2022
+            build_type: Release
+            build_jobs: 4
+          - os: macos-15
+            build_type: Release
+            build_jobs: 3
+    uses: ./.github/workflows/conan-build.yml
+    with:
+      runs_on: ${{ matrix.os }}
+      build_type: ${{ matrix.build_type }}
+      build_jobs: ${{ matrix.build_jobs }}
+      build_apps: true
+      use_spdlog: true
+      use_val3dity: false
+      build_bindings: false
+      build_testing: true
+      build_doc_helper: false
+      run_tests: true
+      artifact_suffix: build
diff --git a/.github/workflows/build_install_test_vcpkg.yml b/.github/workflows/build_install_test_vcpkg.yml
deleted file mode 100644
index 5002dbcb..00000000
--- a/.github/workflows/build_install_test_vcpkg.yml
+++ /dev/null
@@ -1,232 +0,0 @@
-# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
-# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
-name: Build and Test (vcpkg)
-
-on:
-  # push to main branch
-  push:
-    tags:
-      - "v*"
-  pull_request:
-    branches: ["develop"]
-
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ${{ matrix.os }}
-
-    strategy:
-      # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable.
-      fail-fast: false
-
-      # To add more build types (Release, Debug, RelWithDebInfo, etc.) customize the build_type list.
-      # See: https://github.com/actions/runner-images?tab=readme-ov-file#available-images
-      matrix:
-        os: [ubuntu-24.04, windows-2022, macos-15]
-        build_type: [Release]
-        # c_compiler: [gcc, clang, cl]
-        # Default compilers:
-        #   - Ubuntu 22.04 (jammy): gcc-11, clang-14
-        #   - Ubuntu 24.04 (noble): gcc-13, clang-18
-        #   - macOS 14: clang-15
-        # include:
-        #   - os: ubuntu-24.04
-        #     c_compiler: gcc
-        #     cpp_compiler: g++
-        #   - os: ubuntu-24.04
-        #     c_compiler: clang
-        #     cpp_compiler: clang++
-        #   - os: windows-2022
-        #     c_compiler: cl
-        #     cpp_compiler: cl
-        #   - os: macos-15
-        #     c_compiler: clang
-        #     cpp_compiler: clang++
-        # exclude:
-        #   - os: ubuntu-24.04
-        #     c_compiler: cl
-        #   - os: windows-2022
-        #     c_compiler: gcc
-        #   - os: windows-2022
-        #     c_compiler: clang
-        #   - os: macos-15
-        #     c_compiler: cl
-        #   - os: macos-15
-        #     c_compiler: gcc
-
-    env:
-      USERNAME: 3DBAG
-      FEED_URL: https://nuget.pkg.github.com/3DBAG/index.json
-      VCPKG_BINARY_SOURCES: "clear;nuget,https://nuget.pkg.github.com/3DBAG/index.json,readwrite"
-
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/github-script@v7
-        with:
-          script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
-            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
-      # Need python for running the tests on the installed exes
-      - uses: actions/setup-python@v5
-        if: runner.os == 'Windows'
-        with:
-          python-version: "3.13"
-      - name: Install the latest version of uv and set the python version to 3.13
-        if: runner.os == 'Windows'
-        uses: astral-sh/setup-uv@v6
-        with:
-          activate-environment: true
-          enable-cache: true
-      - name: Install python dependencies
-        if: runner.os == 'Windows'
-        run: uv sync
-
-      - name: Set reusable strings
-        if: runner.os == 'Windows'
-        # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
-        shell: bash
-        run: |
-          echo "VCPKG_ROOT=$VCPKG_INSTALLATION_ROOT" >> $GITHUB_ENV
-          echo "pybind11_ROOT=$(python -m pybind11 --cmakedir)" >> $GITHUB_ENV
-
-      - if: runner.os == 'Windows'
-        name: "Install vcpkg"
-        shell: bash
-        run: |
-          git clone https://github.com/microsoft/vcpkg.git
-          cd vcpkg
-          git checkout dbe35ceb30c688bf72e952ab23778e009a578f18
-          echo "VCPKG_ROOT=${{ github.workspace }}/vcpkg" >> "$GITHUB_ENV"
-          echo "VCPKG_EXE=${{ github.workspace }}/vcpkg/vcpkg" >> "$GITHUB_ENV"
-          echo "${{ github.workspace }}/vcpkg" >> $GITHUB_PATH
-
-      - if: runner.os == 'Windows'
-        name: "Bootstrap vcpkg"
-        run: |
-          vcpkg/bootstrap-vcpkg.bat
-
-      - if: runner.os == 'Windows'
-        name: Add NuGet sources
-        shell: pwsh
-        run: |
-          .$(${{ env.VCPKG_EXE }} fetch nuget) `
-            sources add `
-            -Source "${{ env.FEED_URL }}" `
-            -StorePasswordInClearText `
-            -Name GitHubPackages `
-            -UserName "${{ env.USERNAME }}" `
-            -Password "${{ secrets.GH_PACKAGES_TOKEN }}"
-          .$(${{ env.VCPKG_EXE }} fetch nuget) `
-            setapikey "${{ secrets.GH_PACKAGES_TOKEN }}" `
-            -Source "${{ env.FEED_URL }}"
-
-      - if: runner.os != 'Windows'
-        uses: nixbuild/nix-quick-install-action@v30
-        # uses: cachix/install-nix-action@v31
-        # with:
-        #   nix_path: nixpkgs=channel:nixos-unstable
-        name: "install nix"
-
-      - if: runner.os != 'Windows'
-        name: Restore uv cache
-        uses: actions/cache@v4
-        with:
-          path: /tmp/.uv-cache
-          key: uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
-          restore-keys: |
-            uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
-            uv-${{ runner.os }}
-
-      - if: runner.os != 'Windows'
-        uses: nicknovitski/nix-develop@v1
-        env:
-          # Configure a constant location for the uv cache
-          UV_CACHE_DIR: /tmp/.uv-cache
-        with:
-          arguments: "--ignore-environment --keep ACTIONS_CACHE_URL --keep ACTIONS_RUNTIME_TOKEN --keep VCPKG_BINARY_SOURCES --keep UV_CACHE_DIR --impure"
-        name: "nix develop"
-
-      - if: runner.os != 'Windows'
-        name: Add NuGet sources
-        shell: bash
-        run: |
-          mono `vcpkg fetch nuget | tail -n 1` \
-            sources add \
-            -Source "${{ env.FEED_URL }}" \
-            -StorePasswordInClearText \
-            -Name GitHubPackages \
-            -UserName "${{ env.USERNAME }}" \
-            -Password "${{ secrets.GH_PACKAGES_TOKEN }}"
-          mono `vcpkg fetch nuget | tail -n 1` \
-            setapikey "${{ secrets.GH_PACKAGES_TOKEN }}" \
-            -Source "${{ env.FEED_URL }}"
-
-      - name: Configure CMake
-        # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
-        # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-        # -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-        # -DCMAKE_C_COMPILER=${{ matrix.c_compiler }}
-        # -DVCPKG_INSTALL_OPTIONS="--debug"
-        run: >
-          cmake -B build
-          -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install
-          --preset gh-${{ runner.os }}
-          -S ${{ github.workspace }}
-
-      # - name: Show error logs
-      #   if: ${{ failure() }}
-      #   shell: bash
-      #   run: >
-      #     cat /Users/runner/work/roofer/roofer/build/vcpkg-manifest-install.log
-
-      - name: Build
-        # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
-        run: cmake --build build --config ${{ matrix.build_type }} --verbose --preset gh-${{ runner.os }}
-
-      - name: Test
-        # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
-        # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
-        run: ctest --preset test-built --build-config ${{ matrix.build_type }} --test-dir build --output-on-failure
-
-      - name: Install
-        run: cmake --install build --config ${{ matrix.build_type }} --verbose
-
-      - if: runner.os == 'Windows'
-        name: show contents install dir
-        run: |
-          cd install
-          tree /F
-      - if: runner.os != 'Windows'
-        name: show contents install dir
-        run: |
-          cd install
-          ls -R
-      - if: runner.os == 'macOS'
-        name: show contents install dir
-        run: |
-          cd install/bin
-          bundle_libcxx.sh roofer
-
-      - name: Test installed
-        shell: bash
-        run: ctest --preset test-installed --build-config ${{ matrix.build_type }} --test-dir build --output-on-failure
-
-      - if: runner.os != 'Windows'
-        name: Minimize uv cache
-        run: uv cache prune --ci
-
-      - name: Set lowercase runner variables
-        shell: bash
-        run: |
-          echo "RUNNER_OS_LC=$(echo '${{ runner.os }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
-          echo "RUNNER_ARCH_LC=$(echo '${{ runner.arch }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_ENV
-
-      - name: Upload binaries
-        if: startsWith(github.ref, 'refs/tags/')
-        uses: actions/upload-artifact@v4
-        with:
-          name: roofer-${{ env.RUNNER_OS_LC }}-${{ env.RUNNER_ARCH_LC }}-${{ github.ref_name }}
-          path: |
-            install
-          retention-days: 7
diff --git a/.github/workflows/conan-build.yml b/.github/workflows/conan-build.yml
new file mode 100644
index 00000000..36625311
--- /dev/null
+++ b/.github/workflows/conan-build.yml
@@ -0,0 +1,136 @@
+name: Conan Build
+
+on:
+  workflow_call:
+    inputs:
+      runs_on:
+        description: Runner label for the build job.
+        required: true
+        type: string
+      build_type:
+        description: CMake build type.
+        required: false
+        type: string
+        default: Release
+      build_jobs:
+        description: Number of parallel build jobs.
+        required: true
+        type: number
+      build_apps:
+        description: Build the roofer CLI applications.
+        required: false
+        type: boolean
+        default: true
+      use_spdlog:
+        description: Enable the spdlog logging backend.
+        required: false
+        type: boolean
+        default: true
+      use_val3dity:
+        description: Enable val3dity support.
+        required: false
+        type: boolean
+        default: false
+      build_bindings:
+        description: Build the Python bindings.
+        required: false
+        type: boolean
+        default: false
+      build_testing:
+        description: Build and run the test suite.
+        required: false
+        type: boolean
+        default: false
+      build_doc_helper:
+        description: Build the documentation helper executable.
+        required: false
+        type: boolean
+        default: false
+      run_tests:
+        description: Run ctest after building.
+        required: false
+        type: boolean
+        default: false
+      artifact_suffix:
+        description: Suffix used for the uploaded artifact name.
+        required: false
+        type: string
+        default: build
+    outputs:
+      artifact_name:
+        description: Uploaded build artifact name.
+        value: ${{ jobs.build.outputs.artifact_name }}
+
+jobs:
+  build:
+    runs-on: ${{ inputs.runs_on }}
+    permissions:
+      contents: read
+
+    outputs:
+      artifact_name: ${{ steps.meta.outputs.artifact_name }}
+
+    steps:
+      - uses: actions/checkout@v5
+
+      - name: Install and setup Conan
+        uses: conan-io/setup-conan@v1
+        with:
+          home: ${{ runner.temp }}/conan
+          cache_packages: true
+          use_venv: true
+
+      - name: Detect Conan profile
+        run: conan profile detect --force
+
+      - name: Install dependencies
+        run: >
+          conan install .
+          --output-folder=build
+          --build=missing
+          --settings=build_type=${{ inputs.build_type }}
+          --settings=compiler.cppstd=20
+          --options="&:build_apps=${{ inputs.build_apps && 'True' || 'False' }}"
+          --options="&:use_spdlog=${{ inputs.use_spdlog && 'True' || 'False' }}"
+          --options="&:use_val3dity=${{ inputs.use_val3dity && 'True' || 'False' }}"
+          --options="&:build_bindings=${{ inputs.build_bindings && 'True' || 'False' }}"
+          --options="&:build_testing=${{ inputs.build_testing && 'True' || 'False' }}"
+
+      - name: Configure CMake
+        # Conan forwards package options to the matching RF_* CMake options.
+        run: >
+          cmake -B build
+          -S ${{ github.workspace }}
+          -DCMAKE_TOOLCHAIN_FILE="${{ github.workspace }}/build/conan_toolchain.cmake"
+          -DCMAKE_INSTALL_PREFIX="${{ github.workspace }}/install"
+          -DCMAKE_BUILD_TYPE=${{ inputs.build_type }}
+          -DRF_BUILD_DOC_HELPER=${{ inputs.build_doc_helper && 'ON' || 'OFF' }}
+          -DRF_TEST_INSTALL=${{ inputs.run_tests && 'ON' || 'OFF' }}
+
+      - name: Build
+        run: cmake --build build --config ${{ inputs.build_type }} --parallel ${{ inputs.build_jobs }} --verbose
+
+      - name: Install
+        run: cmake --install build --config ${{ inputs.build_type }} --verbose
+
+      - name: Test
+        if: ${{ inputs.run_tests }}
+        env:
+          GDAL_DATA: ${{ github.workspace }}/install/share/gdal
+          PROJ_DATA: ${{ github.workspace }}/install/share/proj
+        run: ctest --test-dir build --build-config ${{ inputs.build_type }} --output-on-failure
+
+      - name: Set artifact name
+        id: meta
+        shell: bash
+        run: |
+          echo "artifact_name=roofer-conan-${{ runner.os }}-${{ runner.arch }}-${{ inputs.artifact_suffix }}" >> "$GITHUB_OUTPUT"
+
+      - name: Upload build artifact
+        uses: actions/upload-artifact@v6
+        with:
+          name: ${{ steps.meta.outputs.artifact_name }}
+          path: |
+            build
+            install
+          retention-days: 7
diff --git a/.github/workflows/cpp-linter.yml b/.github/workflows/cpp-linter.yml
index 5e96619d..51792ba4 100644
--- a/.github/workflows/cpp-linter.yml
+++ b/.github/workflows/cpp-linter.yml
@@ -2,32 +2,31 @@ name: cpp-linter
 
 on:
   pull_request:
-    branches: [ develop ]
-    paths: ['**.cpp', '**.h', '**.hpp']
+    branches: [develop]
+    paths: ["**.cpp", "**.h", "**.hpp"]
 
 jobs:
   cpp-linter:
     runs-on: ubuntu-latest
 
     permissions:
-      contents: write
+      contents: read
+      pull-requests: read
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - uses: cpp-linter/cpp-linter-action@main
         id: linter
         continue-on-error: true
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
-          style: 'file' # Use .clang-format config file
-          tidy-checks: '' # Use .clang-tidy config file
+          style: "file" # Use .clang-format config file
+          tidy-checks: "" # Use .clang-tidy config file
           files-changed-only: false
           thread-comments: false
 
-      - name: Fail fast?!
+      - name: Report linter issues
         if: steps.linter.outputs.checks-failed != 0
         run: |
           echo "some linter checks failed. ${{ steps.linter.outputs.checks-failed }}"
-        # for actual deployment
-        # run: exit 1
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index a8b8a717..f297208c 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -3,39 +3,42 @@ name: Documentation
 on:
   push:
     branches:
-      - 'main'
-    # pull_request:
-    #   branches: ["develop"]
+      - "main"
+  pull_request:
+    branches: ["develop"]
 
 jobs:
-  deploy:
-    name: Build and publish documentation
-    runs-on: ubuntu-latest
+  build:
+    uses: ./.github/workflows/conan-build.yml
+    with:
+      runs_on: ubuntu-24.04
+      build_type: Release
+      build_jobs: 4
+      build_apps: false
+      use_spdlog: false
+      use_val3dity: false
+      build_bindings: true
+      build_testing: false
+      build_doc_helper: true
+      run_tests: false
+      artifact_suffix: docs
 
-    permissions:
-      contents: write
+  generate:
+    name: Generate documentation
+    runs-on: ubuntu-latest
+    needs: build
 
     env:
-      USERNAME: 3DBAG
-      FEED_URL: https://nuget.pkg.github.com/3DBAG/index.json
-      VCPKG_BINARY_SOURCES: "clear;nuget,https://nuget.pkg.github.com/3DBAG/index.json,readwrite"
+      UV_CACHE_DIR: /tmp/.uv-cache
 
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/github-script@v7
-        with:
-          script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
-            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
+    permissions:
+      contents: read
 
-      - uses: nixbuild/nix-quick-install-action@v30
-        # uses: cachix/install-nix-action@v31
-        # with:
-        #   nix_path: nixpkgs=channel:nixos-unstable
-        name: "install nix"
+    steps:
+      - uses: actions/checkout@v5
 
       - name: Restore uv cache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
         with:
           path: /tmp/.uv-cache
           key: uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
@@ -43,52 +46,64 @@ jobs:
             uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
             uv-${{ runner.os }}
 
-      - uses: nicknovitski/nix-develop@v1
-        env:
-          # Configure a constant location for the uv cache
-          UV_CACHE_DIR: /tmp/.uv-cache
+      - name: Download Conan build artifact
+        uses: actions/download-artifact@v4
         with:
-          arguments: "--ignore-environment --keep ACTIONS_CACHE_URL --keep ACTIONS_RUNTIME_TOKEN --keep VCPKG_BINARY_SOURCES --keep UV_CACHE_DIR --impure"
-        name: "nix develop"
+          name: ${{ needs.build.outputs.artifact_name }}
+          path: ${{ github.workspace }}
 
-      - name: Add NuGet sources
+      - name: Restore doc-helper execute bit
+        shell: bash
+        run: chmod +x build/apps/roofer-app/doc-helper
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install docs tooling
         shell: bash
         run: |
-          mono `vcpkg fetch nuget | tail -n 1` \
-            sources add \
-            -Source "${{ env.FEED_URL }}" \
-            -StorePasswordInClearText \
-            -Name GitHubPackages \
-            -UserName "${{ env.USERNAME }}" \
-            -Password "${{ secrets.GH_PACKAGES_TOKEN }}"
-          mono `vcpkg fetch nuget | tail -n 1` \
-            setapikey "${{ secrets.GH_PACKAGES_TOKEN }}" \
-            -Source "${{ env.FEED_URL }}"
-
-      - name: Configure CMake
-        # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
-        # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-        run: >
-          cmake -B build
-          --preset vcpkg-docs
-          -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/install
-          -S ${{ github.workspace }}
-
-      - name: Build bindings
-        # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator).
-        run: |
-          cmake --build build --config Release --verbose --preset gh-${{ runner.os }}
-          cmake --install build --config Release --verbose
+          sudo apt-get update
+          sudo apt-get install -y doxygen
+          python -m pip install --upgrade pip uv
+
+      - name: Sync Python dependencies
+        shell: bash
+        run: uv sync --frozen
 
       - name: Generate Docs
+        shell: bash
         run: |
-          cd docs
-          make html
+          uv run -- make -C docs html
+
+      - name: Upload documentation artifact
+        uses: actions/upload-artifact@v6
+        with:
+          name: docs-html
+          path: ${{ github.workspace }}/docs/_build/html
+          retention-days: 7
+
+  deploy:
+    name: Publish documentation
+    runs-on: ubuntu-latest
+    needs: generate
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+
+    permissions:
+      contents: write
+
+    steps:
+      - name: Download documentation artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: docs-html
+          path: ${{ github.workspace }}/docs-html
 
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@v4
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: ${{ github.workspace }}/docs/_build/html
+          publish_dir: ${{ github.workspace }}/docs-html
           # destination_dir: dev
           publish_branch: gh-pages
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 372f3101..0c3a3b4f 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -2,15 +2,18 @@ name: Formatting
 
 on:
   pull_request:
-    branches: [ "develop" ]
+    branches: ["develop"]
+
+permissions:
+  contents: read
 
 jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4
-    - uses: actions/setup-python@v5
-      with:
-        python-version: '3.12'
-        cache: 'pip'
-    - uses: pre-commit/action@v3.0.1
+      - uses: actions/checkout@v5
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: "pip"
+      - uses: pre-commit/action@v3.0.1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 50fc9300..066d4324 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Floating point exception when the first and last point are the same in LinearRing holes
 - potential segfault with roofer --help
 
+### Changed
+- Revamp recommended build system to use conan instead of vcpkg. Easier to set up and maintain, smaller build artifacts, faster builds.
+- Add support for Nix builds
+
 ## [1.0.0-beta.5] - 2025-08-27
 
 ### Fixed
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2977cf82..420d48ab 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,10 @@
 # FetchContent-related changes
 cmake_minimum_required(VERSION 3.25)
 
+# Load the toolchain before declaring project options, so Conan-provided
+# variables can override option defaults via CMP0077.
+project(roofer VERSION 1.0.0 LANGUAGES C CXX)
+
 # Options
 option(RF_USE_LOGGER_SPDLOG "Use spdlog as logging backend." OFF)
 option(RF_USE_RERUN "Enable features dependent on Rerun" OFF)
@@ -12,29 +16,7 @@ option(RF_BUILD_BINDINGS "Build python bindings with pybind" OFF)
 option(BUILD_SHARED_LIBS "Build using shared libraries (may not work)" OFF)
 option(RF_BUILD_TESTING "Enable tests for roofer" OFF)
 option(RF_ENABLE_HEAP_TRACING "Enable heap allocation overloads" OFF)
-
-# Enable the vcpkg features that are required by the options
-if(RF_USE_LOGGER_SPDLOG)
-  list(APPEND VCPKG_MANIFEST_FEATURES "spdlog")
-endif()
-if(RF_BUILD_TESTING)
-  list(APPEND VCPKG_MANIFEST_FEATURES "test")
-endif()
-if(RF_BUILD_APPS)
-  list(APPEND VCPKG_MANIFEST_FEATURES "apps")
-endif()
-if(RF_USE_VAL3DITY)
-  list(APPEND VCPKG_MANIFEST_FEATURES "val3dity")
-endif()
-if(RF_USE_RERUN)
-  list(APPEND VCPKG_MANIFEST_FEATURES "app-rerun")
-endif()
-# if(RF_BUILD_BINDINGS)
-#   list(APPEND VCPKG_MANIFEST_FEATURES "python")
-# endif()
-
-# TODO: add version number
-project(roofer VERSION 1.0.0 LANGUAGES C CXX)
+option(RF_USE_CPM "Use CPM to fetch dependencies" ON)
 
 # Global CMake variables are set here We use C++20, with the assumption that we
 # only implement features that are supported by GCC, Clang, MSVC, Apple Clang
@@ -79,20 +61,22 @@ else()
   message(STATUS "Logging backend: internal")
 endif()
 
-# We have to use CPM (or FetchContent) even with vcpkg, because of
-# cmake-git-version-tracking, val3dity, rerun
-include(CPM)
-cpmaddpackage(
-  "gh:andrew-hardin/cmake-git-version-tracking#6c0cb87edd029ddfb403a8e24577c144a03605a6"
-)
+# CPM can be used to get val3dity and rerun
+if(RF_USE_CPM OR RF_USE_RERUN)
+  include(CPM)
+endif()
+
 set(GIT_IGNORE_UNTRACKED TRUE)
 
 if(RF_USE_RERUN)
   set(RERUN_DOWNLOAD_AND_BUILD_ARROW OFF)
-  cpmaddpackage(
-    NAME rerun_sdk URL
-    https://github.com/rerun-io/rerun/releases/latest/download/rerun_cpp_sdk.zip
-  )
+  find_package(rerun_sdk QUIET)
+  if(NOT rerun_sdk_FOUND)
+    CPMAddPackage(
+      NAME rerun_sdk
+      URL https://github.com/rerun-io/rerun/releases/latest/download/rerun_cpp_sdk.zip
+    )
+  endif()
 endif()
 
 if(MSVC)
diff --git a/CMakePresets.json b/CMakePresets.json
index 784d557f..515563d6 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -2,23 +2,8 @@
   "version": 6,
   "configurePresets": [
     {
-      "name": "vcpkg-full-test",
-      "description": "Build with vcpkg with all dependencies, to test a full build.",
-      "toolchainFile": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
-      "cacheVariables": {
-        "CMAKE_BUILD_TYPE": "Release",
-        "RF_USE_VAL3DITY": "ON",
-        "RF_USE_RERUN": "OFF",
-        "RF_USE_LOGGER_SPDLOG": "OFF",
-        "RF_BUILD_TESTING": "ON",
-        "RF_BUILD_APPS": "ON",
-        "RF_BUILD_BINDINGS": "ON"
-      }
-    },
-    {
-      "name": "vcpkg-minimal",
-      "description": "Build with vcpkg without any optional dependencies, in order to get the quickest build possible.",
-      "toolchainFile": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
+      "name": "nix-minimal",
+      "description": "Build with pure nix just the apps",
       "cacheVariables": {
         "CMAKE_BUILD_TYPE": "Release",
         "RF_USE_VAL3DITY": "OFF",
@@ -29,16 +14,6 @@
         "RF_BUILD_BINDINGS": "OFF"
       }
     },
-    {
-      "name": "vcpkg-minimal-test",
-      "description": "Build with vcpkg without any optional dependencies, in order to get the quickest build possible for running tests.",
-      "inherits": "vcpkg-minimal",
-      "cacheVariables": {
-        "RF_BUILD_TESTING": "ON",
-        "RF_BUILD_APPS": "ON",
-        "RF_BUILD_BINDINGS": "ON"
-      }
-    },
     {
       "name": "system-minimal-test",
       "description": "Build with system packages without any optional dependencies, in order to get the quickest build possible for running tests.",
@@ -53,73 +28,18 @@
       }
     },
     {
-      "name": "vcpkg-with-bindings",
-      "description": "Build python bindings using vcpkg and pybind as a dependency",
-      "inherits": "vcpkg-minimal",
+      "name": "gh-conan-release",
+      "binaryDir": "${sourceDir}/build",
       "cacheVariables": {
-        "RF_BUILD_BINDINGS": "ON"
+        "CMAKE_BUILD_TYPE": "Release"
       }
-    },
-    {
-      "name": "vcpkg-docs",
-      "description": "Build python bindings needed for documentation generation",
-      "toolchainFile": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
-      "cacheVariables": {
-        "CMAKE_BUILD_TYPE": "Release",
-        "RF_USE_VAL3DITY": "OFF",
-        "RF_USE_RERUN": "OFF",
-        "RF_USE_LOGGER_SPDLOG": "OFF",
-        "RF_BUILD_TESTING": "OFF",
-        "RF_BUILD_APPS": "OFF",
-        "RF_BUILD_DOC_HELPER": "ON",
-        "RF_BUILD_BINDINGS": "ON"
-      }
-    },
-    {
-      "name": "gh-Linux",
-      "description": "Preset for github actions CI Linux configure",
-      "inherits": "vcpkg-full-test"
-    },
-    {
-      "name": "gh-macOS",
-      "description": "Preset for github actions CI macOS configure",
-      "inherits": "vcpkg-full-test"
-    },
-    {
-      "name": "gh-Windows",
-      "description": "Preset for github actions CI Windows configure",
-      "inherits": "vcpkg-full-test",
-      "cacheVariables": {
-        "X_VCPKG_APPLOCAL_DEPS_INSTALL": "ON"
-      }
-    }
-  ],
-  "buildPresets": [
-    {
-      "name": "gh-Linux",
-      "description": "Preset for github actions CI Linux build",
-      "configurePreset": "gh-Linux",
-      "jobs": 4
-    },
-    {
-      "name": "gh-macOS",
-      "description": "Preset for github actions CI macOS build",
-      "configurePreset": "gh-macOS",
-      "jobs": 3
-    },
-    {
-      "name": "gh-Windows",
-      "description": "Preset for github actions CI Windows build",
-      "configurePreset": "gh-Windows",
-      "jobs": 4,
-      "configuration": "Release"
     }
   ],
   "testPresets": [
     {
       "name": "test-built",
       "description": "Test the compiled targets in their build directory",
-      "configurePreset": "vcpkg-minimal-test",
+      "configurePreset": "gh-conan-release",
       "output": {
         "outputOnFailure": true,
         "verbosity": "verbose"
@@ -133,7 +53,7 @@
     {
       "name": "test-installed",
       "description": "Test the installed artifacts",
-      "configurePreset": "vcpkg-minimal-test",
+      "configurePreset": "gh-conan-release",
       "output": {
         "outputOnFailure": true,
         "verbosity": "verbose"
diff --git a/README-dev.md b/README-dev.md
index 75555bba..64318109 100644
--- a/README-dev.md
+++ b/README-dev.md
@@ -108,13 +108,60 @@ add_test(
 
 ## Documentation
 
-To build the documentation locally:
+To build the documentation locally, first build the `rooferpy` module and `doc-helper` executable.
+The recommended path is Conan:
 
 ```shell
+conan profile detect --force
+conan install . \
+  --output-folder=build \
+  --build=missing \
+  --settings=build_type=Release \
+  --settings=compiler.cppstd=20 \
+  --options="&:build_apps=False" \
+  --options="&:use_spdlog=False" \
+  --options="&:use_val3dity=False" \
+  --options="&:build_bindings=True" \
+  --options="&:build_testing=False"
+# Conan forwards the package options above to the matching RF_* CMake options.
+cmake -S . -B build \
+  -G Ninja \
+  -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_INSTALL_PREFIX=$PWD/install \
+  -DRF_BUILD_DOC_HELPER=ON
+cmake --build build --target rooferpy doc-helper
+cmake --install build
 cd docs
 make html
 ```
 
+If you prefer Nix, you can do the same with the `nix develop` shell:
+
+```shell
+nix develop
+cmake -S . -B build \
+  -G Ninja \
+  -DRF_BUILD_APPS=OFF \
+  -DRF_USE_LOGGER_SPDLOG=OFF \
+  -DRF_USE_VAL3DITY=OFF \
+  -DRF_BUILD_BINDINGS=ON \
+  -DRF_BUILD_TESTING=OFF \
+  -DRF_BUILD_DOC_HELPER=ON \
+  -DRF_USE_CPM=OFF
+cmake --build build --target rooferpy doc-helper
+cmake --install build
+cd docs
+make html
+```
+
+If you want the packaged Nix outputs instead of a local build tree, use:
+
+```shell
+nix build .#default
+nix build .#rooferpy
+```
+
 The rendered documentation is in the `docs/html` directory, and the main page is `docs/html/index.html`.
 
 #### Documenting the code
diff --git a/apps/external/BS_thread_pool.hpp b/apps/external/BS_thread_pool.hpp
new file mode 100644
index 00000000..636b6c3a
--- /dev/null
+++ b/apps/external/BS_thread_pool.hpp
@@ -0,0 +1,2870 @@
+/**
+ * ██████  ███████       ████████ ██   ██ ██████  ███████  █████  ██████ ██████
+ * ██████   ██████  ██ ██   ██ ██      ██ ██    ██    ██   ██ ██   ██ ██      ██
+ * ██ ██   ██         ██   ██ ██    ██ ██    ██ ██ ██████  ███████          ██
+ * ███████ ██████  █████   ███████ ██   ██         ██████  ██    ██ ██    ██ ██
+ * ██   ██      ██ ██ ██    ██    ██   ██ ██   ██ ██      ██   ██ ██   ██ ██ ██
+ * ██ ██    ██ ██ ██████  ███████          ██    ██   ██ ██   ██ ███████ ██   ██
+ * ██████  ███████ ██       ██████   ██████  ███████
+ *
+ * @file BS_thread_pool.hpp
+ * @author Barak Shoshany (baraksh@gmail.com) (https://baraksh.com/)
+ * @version 5.0.0
+ * @date 2024-12-19
+ * @copyright Copyright (c) 2024 Barak Shoshany. Licensed under the MIT license.
+ * If you found this project useful, please consider starring it on GitHub! If
+ * you use this library in software of any kind, please provide a link to the
+ * GitHub repository https://github.com/bshoshany/thread-pool in the source code
+ * and documentation. If you use this library in published research, please cite
+ * it as follows: Barak Shoshany, "A C++17 Thread Pool for High-Performance
+ * Scientific Computing", doi:10.1016/j.softx.2024.101687, SoftwareX 26 (2024)
+ * 101687, arXiv:2105.00613
+ *
+ * @brief `BS::thread_pool`: a fast, lightweight, modern, and easy-to-use
+ * C++17/C++20/C++23 thread pool library. This header file contains the entire
+ * library, and is the only file needed to use the library.
+ */
+
+#ifndef BS_THREAD_POOL_HPP
+#define BS_THREAD_POOL_HPP
+
+// We need to include <version> since if we're using `import std` it will not
+// define any feature-test macros, including `__cpp_lib_modules`, which we need
+// to check if `import std` is supported in the first place.
+#ifdef __has_include
+#if __has_include(<version>)
+#include <version>  // NOLINT(misc-include-cleaner)
+#endif
+#endif
+
+// If the macro `BS_THREAD_POOL_IMPORT_STD` is defined, import the C++ Standard
+// Library as a module. Otherwise, include the relevant Standard Library header
+// files. This is currently only officially supported by MSVC with Microsoft STL
+// and LLVM Clang (NOT Apple Clang) with LLVM libc++. It is not supported by GCC
+// with any standard library, or any compiler with GNU libstdc++. We also check
+// that the feature is enabled by checking `__cpp_lib_modules`. However, MSVC
+// defines this macro even in C++20 mode, which is not standards-compliant, so
+// we check that we are in C++23 mode; MSVC currently reports `__cplusplus` as
+// `202004L` for C++23 mode, so we use that value.
+#if defined(BS_THREAD_POOL_IMPORT_STD) && defined(__cpp_lib_modules) &&      \
+    (__cplusplus >= 202004L) &&                                              \
+    (defined(_MSC_VER) || (defined(__clang__) && defined(_LIBCPP_VERSION) && \
+                           !defined(__apple_build_version__)))
+// Only allow importing the `std` module if the library itself is imported as a
+// module. If the library is included as a header file, this will force the
+// program that included the header file to also import `std`, which is not
+// desirable and can lead to compilation errors if the program `#include`s any
+// Standard Library header files.
+#ifdef BS_THREAD_POOL_MODULE
+import std;
+#else
+#error \
+    "The thread pool library cannot import the C++ Standard Library as a module using `import std` if the library itself is not imported as a module. Either use `import BS.thread_pool` to import the libary, or remove the `BS_THREAD_POOL_IMPORT_STD` macro. Aborting compilation."
+#endif
+#else
+#undef BS_THREAD_POOL_IMPORT_STD
+
+#include <algorithm>
+#include <chrono>
+#include <condition_variable>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <future>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <queue>
+#include <string>
+#include <thread>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#ifdef __cpp_concepts
+#include <concepts>
+#endif
+#ifdef __cpp_exceptions
+#include <exception>
+#include <stdexcept>
+#endif
+#ifdef __cpp_impl_three_way_comparison
+#include <compare>
+#endif
+#ifdef __cpp_lib_int_pow2
+#include <bit>
+#endif
+#ifdef __cpp_lib_semaphore
+#include <semaphore>
+#endif
+#ifdef __cpp_lib_jthread
+#include <stop_token>
+#endif
+#endif
+
+#ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS
+#if defined(_WIN32)
+#include <windows.h>
+#undef min
+#undef max
+#elif defined(__linux__) || defined(__APPLE__)
+#include <pthread.h>
+#include <sched.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#if defined(__linux__)
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#endif
+#else
+#undef BS_THREAD_POOL_NATIVE_EXTENSIONS
+#endif
+#endif
+
+#if defined(__linux__)
+// On Linux, <sys/sysmacros.h> defines macros called `major` and `minor`. We
+// undefine them here so the `version` struct can work.
+#ifdef major
+#undef major
+#endif
+#ifdef minor
+#undef minor
+#endif
+#endif
+
+/**
+ * @brief A namespace used by Barak Shoshany's projects.
+ */
+namespace BS {
+// Macros indicating the version of the thread pool library.
+#define BS_THREAD_POOL_VERSION_MAJOR 5
+#define BS_THREAD_POOL_VERSION_MINOR 0
+#define BS_THREAD_POOL_VERSION_PATCH 0
+
+  /**
+   * @brief A struct used to store a version number, which can be checked and
+   * compared at compilation time.
+   */
+  struct version {
+    constexpr version(const std::uint64_t major_, const std::uint64_t minor_,
+                      const std::uint64_t patch_) noexcept
+        : major(major_), minor(minor_), patch(patch_) {}
+
+// In C++20 and later we can use the spaceship operator `<=>` to automatically
+// generate comparison operators. In C++17 we have to define them manually.
+#ifdef __cpp_impl_three_way_comparison
+    std::strong_ordering operator<=>(const version&) const = default;
+#else
+    [[nodiscard]] constexpr friend bool operator==(
+        const version& lhs, const version& rhs) noexcept {
+      return std::tuple(lhs.major, lhs.minor, lhs.patch) ==
+             std::tuple(rhs.major, rhs.minor, rhs.patch);
+    }
+
+    [[nodiscard]] constexpr friend bool operator!=(
+        const version& lhs, const version& rhs) noexcept {
+      return !(lhs == rhs);
+    }
+
+    [[nodiscard]] constexpr friend bool operator<(const version& lhs,
+                                                  const version& rhs) noexcept {
+      return std::tuple(lhs.major, lhs.minor, lhs.patch) <
+             std::tuple(rhs.major, rhs.minor, rhs.patch);
+    }
+
+    [[nodiscard]] constexpr friend bool operator>=(
+        const version& lhs, const version& rhs) noexcept {
+      return !(lhs < rhs);
+    }
+
+    [[nodiscard]] constexpr friend bool operator>(const version& lhs,
+                                                  const version& rhs) noexcept {
+      return std::tuple(lhs.major, lhs.minor, lhs.patch) >
+             std::tuple(rhs.major, rhs.minor, rhs.patch);
+    }
+
+    [[nodiscard]] constexpr friend bool operator<=(
+        const version& lhs, const version& rhs) noexcept {
+      return !(lhs > rhs);
+    }
+#endif
+
+    [[nodiscard]] std::string to_string() const {
+      return std::to_string(major) + '.' + std::to_string(minor) + '.' +
+             std::to_string(patch);
+    }
+
+    friend std::ostream& operator<<(std::ostream& stream, const version& ver) {
+      stream << ver.to_string();
+      return stream;
+    }
+
+    std::uint64_t major;
+    std::uint64_t minor;
+    std::uint64_t patch;
+  };  // struct version
+
+  /**
+   * @brief The version of the thread pool library.
+   */
+  inline constexpr version thread_pool_version(BS_THREAD_POOL_VERSION_MAJOR,
+                                               BS_THREAD_POOL_VERSION_MINOR,
+                                               BS_THREAD_POOL_VERSION_PATCH);
+
+#ifdef BS_THREAD_POOL_MODULE
+  // If the library is being compiled as a module, ensure that the version of
+  // the module file matches the version of the header file.
+  static_assert(thread_pool_version == version(BS_THREAD_POOL_MODULE),
+                "The versions of BS.thread_pool.cppm and BS_thread_pool.hpp do "
+                "not match. Aborting compilation.");
+  /**
+   * @brief A flag indicating whether the thread pool library was compiled as a
+   * C++20 module.
+   */
+  inline constexpr bool thread_pool_module = true;
+#else
+  /**
+   * @brief A flag indicating whether the thread pool library was compiled as a
+   * C++20 module.
+   */
+  inline constexpr bool thread_pool_module = false;
+#endif
+
+#ifdef BS_THREAD_POOL_IMPORT_STD
+  /**
+   * @brief A flag indicating whether the thread pool library imported the C++23
+   * Standard Library module using `import std`.
+   */
+  inline constexpr bool thread_pool_import_std = true;
+#else
+  /**
+   * @brief A flag indicating whether the thread pool library imported the C++23
+   * Standard Library module using `import std`.
+   */
+  inline constexpr bool thread_pool_import_std = false;
+#endif
+
+#ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS
+  /**
+   * @brief A flag indicating whether the thread pool library's native
+   * extensions are enabled.
+   */
+  inline constexpr bool thread_pool_native_extensions = true;
+#else
+  /**
+   * @brief A flag indicating whether the thread pool library's native
+   * extensions are enabled.
+   */
+  inline constexpr bool thread_pool_native_extensions = false;
+#endif
+
+  /**
+   * @brief The type used for the bitmask template parameter of the thread pool.
+   */
+  using opt_t = std::uint8_t;
+
+  template <opt_t>
+  class thread_pool;
+
+#ifdef __cpp_lib_move_only_function
+  /**
+   * @brief The template to use to store functions in the task queue and other
+   * places. In C++23 and later we use `std::move_only_function`.
+   */
+  template <typename... S>
+  using function_t = std::move_only_function<S...>;
+#else
+  /**
+   * @brief The template to use to store functions in the task queue and other
+   * places. In C++17 we use `std::function`.
+   */
+  template <typename... S>
+  using function_t = std::function<S...>;
+#endif
+
+  /**
+   * @brief The type of tasks in the task queue.
+   */
+  using task_t = function_t<void()>;
+
+#ifdef __cpp_lib_jthread
+  /**
+   * @brief The type of threads to use. In C++20 and later we use
+   * `std::jthread`.
+   */
+  using thread_t = std::jthread;
+// The following macros are used to determine how to stop the workers. In C++20
+// and later we can use `std::stop_token`.
+#define BS_THREAD_POOL_WORKER_TOKEN const std::stop_token &stop_token,
+#define BS_THREAD_POOL_WAIT_TOKEN , stop_token
+#define BS_THREAD_POOL_STOP_CONDITION stop_token.stop_requested()
+#define BS_THREAD_POOL_OR_STOP_CONDITION
+#else
+  /**
+   * @brief The type of threads to use. In C++17 we use`std::thread`.
+   */
+  using thread_t = std::thread;
+// The following macros are used to determine how to stop the workers. In C++17
+// we use a manual flag `workers_running`.
+#define BS_THREAD_POOL_WORKER_TOKEN
+#define BS_THREAD_POOL_WAIT_TOKEN
+#define BS_THREAD_POOL_STOP_CONDITION !workers_running
+#define BS_THREAD_POOL_OR_STOP_CONDITION || !workers_running
+#endif
+
+  /**
+   * @brief A type used to indicate the priority of a task. Defined to be a
+   * signed integer with a width of exactly 8 bits (-128 to +127).
+   */
+  using priority_t = std::int8_t;
+
+  /**
+   * @brief An enum containing some pre-defined priorities for convenience.
+   */
+  enum pr : priority_t {
+    lowest = -128,
+    low = -64,
+    normal = 0,
+    high = +64,
+    highest = +127
+  };
+
+  /**
+   * @brief A helper struct to store a task with an assigned priority.
+   */
+  struct [[nodiscard]] pr_task {
+    /**
+     * @brief Construct a new task with an assigned priority.
+     *
+     * @param task_ The task.
+     * @param priority_ The desired priority.
+     */
+    explicit pr_task(task_t&& task_, const priority_t priority_ = 0) noexcept(
+        std::is_nothrow_move_constructible_v<task_t>)
+        : task(std::move(task_)), priority(priority_) {}
+
+    /**
+     * @brief Compare the priority of two tasks.
+     *
+     * @param lhs The first task.
+     * @param rhs The second task.
+     * @return `true` if the first task has a lower priority than the second
+     * task, `false` otherwise.
+     */
+    [[nodiscard]] friend bool operator<(const pr_task& lhs,
+                                        const pr_task& rhs) noexcept {
+      return lhs.priority < rhs.priority;
+    }
+
+    /**
+     * @brief The task.
+     */
+    task_t task;
+
+    /**
+     * @brief The priority of the task.
+     */
+    priority_t priority = 0;
+  };  // struct pr_task
+
+// In C++20 and later we can use concepts. In C++17 we instead use SFINAE
+// ("Substitution Failure Is Not An Error") with `std::enable_if_t`.
+#ifdef __cpp_concepts
+#define BS_THREAD_POOL_IF_PAUSE_ENABLED \
+  template <bool P = pause_enabled>     \
+    requires(P)
+  template <typename F>
+  concept init_func_c = std::invocable<F> || std::invocable<F, std::size_t>;
+#define BS_THREAD_POOL_INIT_FUNC_CONCEPT(F) init_func_c F
+#else
+#define BS_THREAD_POOL_IF_PAUSE_ENABLED \
+  template <bool P = pause_enabled, typename = std::enable_if_t<P>>
+#define BS_THREAD_POOL_INIT_FUNC_CONCEPT(F)                   \
+  typename F,                                                 \
+      typename = std::enable_if_t < std::is_invocable_v<F> || \
+                 std::is_invocable_v < F,                     \
+      std::size_t >>  // NOLINT(bugprone-macro-parentheses)
+#endif
+
+  /**
+   * @brief A helper class to facilitate waiting for and/or getting the results
+   * of multiple futures at once.
+   *
+   * @tparam T The return type of the futures.
+   */
+  template <typename T>
+  class [[nodiscard]] multi_future : public std::vector<std::future<T>> {
+   public:
+    // Inherit all constructors from the base class `std::vector`.
+    using std::vector<std::future<T>>::vector;
+
+    /**
+     * @brief Get the results from all the futures stored in this
+     * `BS::multi_future`, rethrowing any stored exceptions.
+     *
+     * @return If the futures return `void`, this function returns `void` as
+     * well. Otherwise, it returns a vector containing the results.
+     */
+    [[nodiscard]] std::conditional_t<std::is_void_v<T>, void, std::vector<T>>
+    get() {
+      if constexpr (std::is_void_v<T>) {
+        for (std::future<T>& future : *this) future.get();
+        return;
+      } else {
+        std::vector<T> results;
+        results.reserve(this->size());
+        for (std::future<T>& future : *this) results.push_back(future.get());
+        return results;
+      }
+    }
+
+    /**
+     * @brief Check how many of the futures stored in this `BS::multi_future`
+     * are ready.
+     *
+     * @return The number of ready futures.
+     */
+    [[nodiscard]] std::size_t ready_count() const {
+      std::size_t count = 0;
+      for (const std::future<T>& future : *this) {
+        if (future.wait_for(std::chrono::duration<double>::zero()) ==
+            std::future_status::ready)
+          ++count;
+      }
+      return count;
+    }
+
+    /**
+     * @brief Check if all the futures stored in this `BS::multi_future` are
+     * valid.
+     *
+     * @return `true` if all futures are valid, `false` if at least one of the
+     * futures is not valid.
+     */
+    [[nodiscard]] bool valid() const noexcept {
+      bool is_valid = true;
+      for (const std::future<T>& future : *this)
+        is_valid = is_valid && future.valid();
+      return is_valid;
+    }
+
+    /**
+     * @brief Wait for all the futures stored in this `BS::multi_future`.
+     */
+    void wait() const {
+      for (const std::future<T>& future : *this) future.wait();
+    }
+
+    /**
+     * @brief Wait for all the futures stored in this `BS::multi_future`, but
+     * stop waiting after the specified duration has passed. This function first
+     * waits for the first future for the desired duration. If that future is
+     * ready before the duration expires, this function waits for the second
+     * future for whatever remains of the duration. It continues similarly until
+     * the duration expires.
+     *
+     * @tparam R An arithmetic type representing the number of ticks to wait.
+     * @tparam P An `std::ratio` representing the length of each tick in
+     * seconds.
+     * @param duration The amount of time to wait.
+     * @return `true` if all futures have been waited for before the duration
+     * expired, `false` otherwise.
+     */
+    template <typename R, typename P>
+    bool wait_for(const std::chrono::duration<R, P>& duration) const {
+      const std::chrono::time_point<std::chrono::steady_clock> start_time =
+          std::chrono::steady_clock::now();
+      for (const std::future<T>& future : *this) {
+        future.wait_for(duration -
+                        (std::chrono::steady_clock::now() - start_time));
+        if (duration < std::chrono::steady_clock::now() - start_time)
+          return false;
+      }
+      return true;
+    }
+
+    /**
+     * @brief Wait for all the futures stored in this `BS::multi_future`, but
+     * stop waiting after the specified time point has been reached. This
+     * function first waits for the first future until the desired time point.
+     * If that future is ready before the time point is reached, this function
+     * waits for the second future until the desired time point. It continues
+     * similarly until the time point is reached.
+     *
+     * @tparam C The type of the clock used to measure time.
+     * @tparam D An `std::chrono::duration` type used to indicate the time
+     * point.
+     * @param timeout_time The time point at which to stop waiting.
+     * @return `true` if all futures have been waited for before the time point
+     * was reached, `false` otherwise.
+     */
+    template <typename C, typename D>
+    bool wait_until(const std::chrono::time_point<C, D>& timeout_time) const {
+      for (const std::future<T>& future : *this) {
+        future.wait_until(timeout_time);
+        if (timeout_time < std::chrono::steady_clock::now()) return false;
+      }
+      return true;
+    }
+  };  // class multi_future
+
+  /**
+   * @brief A helper class to divide a range into blocks. Used by
+   * `detach_blocks()`, `submit_blocks()`, `detach_loop()`, and `submit_loop()`.
+   *
+   * @tparam T The type of the indices. Should be a signed or unsigned integer.
+   */
+  template <typename T>
+  class [[nodiscard]] blocks {
+   public:
+    /**
+     * @brief Construct a `blocks` object with the given specifications.
+     *
+     * @param first_index_ The first index in the range.
+     * @param index_after_last_ The index after the last index in the range.
+     * @param num_blocks_ The desired number of blocks to divide the range into.
+     */
+    blocks(const T first_index_, const T index_after_last_,
+           const std::size_t num_blocks_) noexcept
+        : first_index(first_index_),
+          index_after_last(index_after_last_),
+          num_blocks(num_blocks_) {
+      if (index_after_last > first_index) {
+        const std::size_t total_size =
+            static_cast<std::size_t>(index_after_last - first_index);
+        num_blocks = std::min(num_blocks, total_size);
+        block_size = total_size / num_blocks;
+        remainder = total_size % num_blocks;
+        if (block_size == 0) {
+          block_size = 1;
+          num_blocks = (total_size > 1) ? total_size : 1;
+        }
+      } else {
+        num_blocks = 0;
+      }
+    }
+
+    /**
+     * @brief Get the index after the last index of a block.
+     *
+     * @param block The block number.
+     * @return The index after the last index.
+     */
+    [[nodiscard]] T end(const std::size_t block) const noexcept {
+      return (block == num_blocks - 1) ? index_after_last : start(block + 1);
+    }
+
+    /**
+     * @brief Get the number of blocks. Note that this may be different than the
+     * desired number of blocks that was passed to the constructor.
+     *
+     * @return The number of blocks.
+     */
+    [[nodiscard]] std::size_t get_num_blocks() const noexcept {
+      return num_blocks;
+    }
+
+    /**
+     * @brief Get the first index of a block.
+     *
+     * @param block The block number.
+     * @return The first index.
+     */
+    [[nodiscard]] T start(const std::size_t block) const noexcept {
+      return first_index + static_cast<T>(block * block_size) +
+             static_cast<T>(block < remainder ? block : remainder);
+    }
+
+   private:
+    /**
+     * @brief The size of each block (except possibly the last block).
+     */
+    std::size_t block_size = 0;
+
+    /**
+     * @brief The first index in the range.
+     */
+    T first_index = 0;
+
+    /**
+     * @brief The index after the last index in the range.
+     */
+    T index_after_last = 0;
+
+    /**
+     * @brief The number of blocks.
+     */
+    std::size_t num_blocks = 0;
+
+    /**
+     * @brief The remainder obtained after dividing the total size by the number
+     * of blocks.
+     */
+    std::size_t remainder = 0;
+  };  // class blocks
+
+#ifdef __cpp_exceptions
+  /**
+   * @brief An exception that will be thrown by `wait()`, `wait_for()`, and
+   * `wait_until()` if the user tries to call them from within a thread of the
+   * same pool, which would result in a deadlock. Only used if the flag
+   * `BS:tp::wait_deadlock_checks` is enabled in the template parameter of
+   * `BS::thread_pool`.
+   */
+  struct wait_deadlock : public std::runtime_error {
+    wait_deadlock() : std::runtime_error("BS::wait_deadlock"){};
+  };
+#endif
+
+#ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS
+#if defined(_WIN32)
+  /**
+   * @brief An enum containing pre-defined OS-specific process priority values
+   * for portability.
+   */
+  enum class os_process_priority {
+    idle = IDLE_PRIORITY_CLASS,
+    below_normal = BELOW_NORMAL_PRIORITY_CLASS,
+    normal = NORMAL_PRIORITY_CLASS,
+    above_normal = ABOVE_NORMAL_PRIORITY_CLASS,
+    high = HIGH_PRIORITY_CLASS,
+    realtime = REALTIME_PRIORITY_CLASS
+  };
+
+  /**
+   * @brief An enum containing pre-defined OS-specific thread priority values
+   * for portability.
+   */
+  enum class os_thread_priority {
+    idle = THREAD_PRIORITY_IDLE,
+    lowest = THREAD_PRIORITY_LOWEST,
+    below_normal = THREAD_PRIORITY_BELOW_NORMAL,
+    normal = THREAD_PRIORITY_NORMAL,
+    above_normal = THREAD_PRIORITY_ABOVE_NORMAL,
+    highest = THREAD_PRIORITY_HIGHEST,
+    realtime = THREAD_PRIORITY_TIME_CRITICAL
+  };
+#elif defined(__linux__) || defined(__APPLE__)
+  /**
+   * @brief An enum containing pre-defined OS-specific process priority values
+   * for portability.
+   */
+  enum class os_process_priority {
+    idle = PRIO_MAX - 2,
+    below_normal = PRIO_MAX / 2,
+    normal = 0,
+    above_normal = PRIO_MIN / 3,
+    high = PRIO_MIN * 2 / 3,
+    realtime = PRIO_MIN
+  };
+
+  /**
+   * @brief An enum containing pre-defined OS-specific thread priority values
+   * for portability.
+   */
+  enum class os_thread_priority {
+    idle,
+    lowest,
+    below_normal,
+    normal,
+    above_normal,
+    highest,
+    realtime
+  };
+#endif
+
+  /**
+   * @brief Get the processor affinity of the current process using the current
+   * platform's native API. This should work on Windows and Linux, but is not
+   * possible on macOS as the native API does not allow it.
+   *
+   * @return An `std::optional` object, optionally containing the processor
+   * affinity of the current process as an `std::vector<bool>` where each
+   * element corresponds to a logical processor. If the returned object does not
+   * contain a value, then the affinity could not be determined. On macOS, this
+   * function always returns `std::nullopt`.
+   */
+  [[nodiscard]] inline std::optional<std::vector<bool>>
+  get_os_process_affinity() {
+#if defined(_WIN32)
+    DWORD_PTR process_mask = 0;
+    DWORD_PTR system_mask = 0;
+    if (GetProcessAffinityMask(GetCurrentProcess(), &process_mask,
+                               &system_mask) == 0)
+      return std::nullopt;
+#ifdef __cpp_lib_int_pow2
+    const std::size_t num_cpus =
+        static_cast<std::size_t>(std::bit_width(system_mask));
+#else
+    std::size_t num_cpus = 0;
+    if (system_mask != 0) {
+      num_cpus = 1;
+      while ((system_mask >>= 1U) != 0U) ++num_cpus;
+    }
+#endif
+    std::vector<bool> affinity(num_cpus);
+    for (std::size_t i = 0; i < num_cpus; ++i)
+      affinity[i] = ((process_mask & (1ULL << i)) != 0ULL);
+    return affinity;
+#elif defined(__linux__)
+    cpu_set_t cpu_set;
+    CPU_ZERO(&cpu_set);
+    if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &cpu_set) != 0)
+      return std::nullopt;
+    const int num_cpus = get_nprocs();
+    if (num_cpus < 1) return std::nullopt;
+    std::vector<bool> affinity(static_cast<std::size_t>(num_cpus));
+    for (std::size_t i = 0; i < affinity.size(); ++i)
+      affinity[i] = CPU_ISSET(i, &cpu_set);
+    return affinity;
+#elif defined(__APPLE__)
+    return std::nullopt;
+#endif
+  }
+
+  /**
+   * @brief Set the processor affinity of the current process using the current
+   * platform's native API. This should work on Windows and Linux, but is not
+   * possible on macOS as the native API does not allow it.
+   *
+   * @param affinity The processor affinity to set, as an `std::vector<bool>`
+   * where each element corresponds to a logical processor.
+   * @return `true` if the affinity was set successfully, `false` otherwise. On
+   * macOS, this function always returns `false`.
+   */
+  inline bool set_os_process_affinity(const std::vector<bool>& affinity) {
+#if defined(_WIN32)
+    DWORD_PTR process_mask = 0;
+    for (std::size_t i = 0;
+         i < std::min<std::size_t>(affinity.size(), sizeof(DWORD_PTR) * 8); ++i)
+      process_mask |= (affinity[i] ? (1ULL << i) : 0ULL);
+    return SetProcessAffinityMask(GetCurrentProcess(), process_mask) != 0;
+#elif defined(__linux__)
+    cpu_set_t cpu_set;
+    CPU_ZERO(&cpu_set);
+    for (std::size_t i = 0;
+         i < std::min<std::size_t>(affinity.size(), CPU_SETSIZE); ++i) {
+      if (affinity[i]) CPU_SET(i, &cpu_set);
+    }
+    return sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpu_set) == 0;
+#elif defined(__APPLE__)
+    return affinity[0] &&
+           false;  // NOLINT(readability-simplify-boolean-expr) // Using
+                   // `affinity` to suppress unused parameter warning.
+#endif
+  }
+
+  /**
+   * @brief Get the priority of the current process using the current platform's
+   * native API. This should work on Windows, Linux, and macOS.
+   *
+   * @return An `std::optional` object, optionally containing the priority of
+   * the current process, as a member of the enum `BS::os_process_priority`. If
+   * the returned object does not contain a value, then either the priority
+   * could not be determined, or it is not one of the pre-defined values and
+   * therefore cannot be represented in a portable way.
+   */
+  [[nodiscard]] inline std::optional<os_process_priority>
+  get_os_process_priority() {
+#if defined(_WIN32)
+    // On Windows, this is straightforward.
+    const DWORD priority = GetPriorityClass(GetCurrentProcess());
+    if (priority == 0) return std::nullopt;
+    return static_cast<os_process_priority>(priority);
+#elif defined(__linux__) || defined(__APPLE__)
+    // On Linux/macOS there is no direct analogue of `GetPriorityClass()` on
+    // Windows, so instead we get the "nice" value. The usual range is -20 to 19
+    // or 20, with higher values corresponding to lower priorities. However, we
+    // are only using 6 pre-defined values for portability, so if the value was
+    // set via any means other than `BS::set_os_process_priority()`, it may not
+    // match one of our pre-defined values. Note that `getpriority()` returns -1
+    // on error, but since this does not correspond to any of our pre-defined
+    // values, this function will return `std::nullopt` anyway.
+    const int nice_val = getpriority(PRIO_PROCESS, static_cast<id_t>(getpid()));
+    switch (nice_val) {
+      case static_cast<int>(os_process_priority::idle):
+        return os_process_priority::idle;
+      case static_cast<int>(os_process_priority::below_normal):
+        return os_process_priority::below_normal;
+      case static_cast<int>(os_process_priority::normal):
+        return os_process_priority::normal;
+      case static_cast<int>(os_process_priority::above_normal):
+        return os_process_priority::above_normal;
+      case static_cast<int>(os_process_priority::high):
+        return os_process_priority::high;
+      case static_cast<int>(os_process_priority::realtime):
+        return os_process_priority::realtime;
+      default:
+        return std::nullopt;
+    }
+#endif
+  }
+
+  /**
+   * @brief Set the priority of the current process using the current platform's
+   * native API. This should work on Windows, Linux, and macOS. However, note
+   * that higher priorities might require elevated permissions.
+   *
+   * @param priority The priority to set. Must be a value from the enum
+   * `BS::os_process_priority`.
+   * @return `true` if the priority was set successfully, `false` otherwise.
+   * Usually, `false` means that the user does not have the necessary
+   * permissions to set the desired priority.
+   */
+  inline bool set_os_process_priority(const os_process_priority priority) {
+#if defined(_WIN32)
+    // On Windows, this is straightforward.
+    return SetPriorityClass(GetCurrentProcess(),
+                            static_cast<DWORD>(priority)) != 0;
+#elif defined(__linux__) || defined(__APPLE__)
+    // On Linux/macOS there is no direct analogue of `SetPriorityClass()` on
+    // Windows, so instead we set the "nice" value. The usual range is -20 to 19
+    // or 20, with higher values corresponding to lower priorities. However, we
+    // are only using 6 pre-defined values for portability. Note that the "nice"
+    // values are only relevant for the `SCHED_OTHER` policy, but we do not set
+    // that policy here, as it is per-thread rather than per-process. Also, it's
+    // important to note that a non-root user cannot decrease the nice value
+    // (i.e. increase the process priority), only increase it. This can cause
+    // confusing behavior. For example, if the current priority is
+    // `BS::os_process_priority::normal` and the user sets it to
+    // `BS::os_process_priority::idle`, they cannot change it back
+    // `BS::os_process_priority::normal`.
+    return setpriority(PRIO_PROCESS, static_cast<id_t>(getpid()),
+                       static_cast<int>(priority)) == 0;
+#endif
+  }
+#endif
+
+  /**
+   * @brief A class used to obtain information about the current thread and, if
+   * native extensions are enabled, set its priority and affinity.
+   */
+  class [[nodiscard]] this_thread {
+    template <opt_t>
+    friend class thread_pool;
+
+   public:
+    /**
+     * @brief Get the index of the current thread. If this thread belongs to a
+     * `BS::thread_pool` object, the return value will be an index in the range
+     * `[0, N)` where `N == BS::thread_pool::get_thread_count()`. Otherwise, for
+     * example if this thread is the main thread or an independent thread not in
+     * any pools, `std::nullopt` will be returned.
+     *
+     * @return An `std::optional` object, optionally containing a thread index.
+     */
+    [[nodiscard]] static std::optional<std::size_t> get_index() noexcept {
+      return my_index;
+    }
+
+    /**
+     * @brief Get a pointer to the thread pool that owns the current thread. If
+     * this thread belongs to a `BS::thread_pool` object, the return value will
+     * be a `void` pointer to that object. Otherwise, for example if this thread
+     * is the main thread or an independent thread not in any pools,
+     * `std::nullopt` will be returned.
+     *
+     * @return An `std::optional` object, optionally containing a pointer to a
+     * thread pool. Note that this will be a `void` pointer, so it must be cast
+     * to the desired instantiation of the `BS::thread_pool` template in order
+     * to use any member functions.
+     */
+    [[nodiscard]] static std::optional<void*> get_pool() noexcept {
+      return my_pool;
+    }
+
+#ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS
+    /**
+     * @brief Get the processor affinity of the current thread using the current
+     * platform's native API. This should work on Windows and Linux, but is not
+     * possible on macOS as the native API does not allow it.
+     *
+     * @return An `std::optional` object, optionally containing the processor
+     * affinity of the current thread as an `std::vector<bool>` where each
+     * element corresponds to a logical processor. If the returned object does
+     * not contain a value, then the affinity could not be determined. On macOS,
+     * this function always returns `std::nullopt`.
+     */
+    [[nodiscard]] static std::optional<std::vector<bool>>
+    get_os_thread_affinity() {
+#if defined(_WIN32)
+      // Windows does not have a `GetThreadAffinityMask()` function, but
+      // `SetThreadAffinityMask()` returns the previous affinity mask, so we can
+      // use that to get the current affinity and then restore it. It's a bit of
+      // a hack, but it works. Since the thread affinity must be a subset of the
+      // process affinity, we use the process affinity as the temporary value.
+      DWORD_PTR process_mask = 0;
+      DWORD_PTR system_mask = 0;
+      if (GetProcessAffinityMask(GetCurrentProcess(), &process_mask,
+                                 &system_mask) == 0)
+        return std::nullopt;
+      const DWORD_PTR previous_mask =
+          SetThreadAffinityMask(GetCurrentThread(), process_mask);
+      if (previous_mask == 0) return std::nullopt;
+      SetThreadAffinityMask(GetCurrentThread(), previous_mask);
+#ifdef __cpp_lib_int_pow2
+      const std::size_t num_cpus =
+          static_cast<std::size_t>(std::bit_width(system_mask));
+#else
+      std::size_t num_cpus = 0;
+      if (system_mask != 0) {
+        num_cpus = 1;
+        while ((system_mask >>= 1U) != 0U) ++num_cpus;
+      }
+#endif
+      std::vector<bool> affinity(num_cpus);
+      for (std::size_t i = 0; i < num_cpus; ++i)
+        affinity[i] = ((previous_mask & (1ULL << i)) != 0ULL);
+      return affinity;
+#elif defined(__linux__)
+      cpu_set_t cpu_set;
+      CPU_ZERO(&cpu_set);
+      if (pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set) !=
+          0)
+        return std::nullopt;
+      const int num_cpus = get_nprocs();
+      if (num_cpus < 1) return std::nullopt;
+      std::vector<bool> affinity(static_cast<std::size_t>(num_cpus));
+      for (std::size_t i = 0; i < affinity.size(); ++i)
+        affinity[i] = CPU_ISSET(i, &cpu_set);
+      return affinity;
+#elif defined(__APPLE__)
+      return std::nullopt;
+#endif
+    }
+
+    /**
+     * @brief Set the processor affinity of the current thread using the current
+     * platform's native API. This should work on Windows and Linux, but is not
+     * possible on macOS as the native API does not allow it. Note that the
+     * thread affinity must be a subset of the process affinity (as obtained
+     * using `BS::get_os_process_affinity()`) for the containing process of a
+     * thread.
+     *
+     * @param affinity The processor affinity to set, as an `std::vector<bool>`
+     * where each element corresponds to a logical processor.
+     * @return `true` if the affinity was set successfully, `false` otherwise.
+     * On macOS, this function always returns `false`.
+     */
+    static bool set_os_thread_affinity(const std::vector<bool>& affinity) {
+#if defined(_WIN32)
+      DWORD_PTR thread_mask = 0;
+      for (std::size_t i = 0;
+           i < std::min<std::size_t>(affinity.size(), sizeof(DWORD_PTR) * 8);
+           ++i)
+        thread_mask |= (affinity[i] ? (1ULL << i) : 0ULL);
+      return SetThreadAffinityMask(GetCurrentThread(), thread_mask) != 0;
+#elif defined(__linux__)
+      cpu_set_t cpu_set;
+      CPU_ZERO(&cpu_set);
+      for (std::size_t i = 0;
+           i < std::min<std::size_t>(affinity.size(), CPU_SETSIZE); ++i) {
+        if (affinity[i]) CPU_SET(i, &cpu_set);
+      }
+      return pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
+                                    &cpu_set) == 0;
+#elif defined(__APPLE__)
+      return affinity[0] &&
+             false;  // NOLINT(readability-simplify-boolean-expr) // Using
+                     // `affinity` to suppress unused parameter warning.
+#endif
+    }
+
+    /**
+     * @brief Get the name of the current thread using the current platform's
+     * native API. This should work on Windows, Linux, and macOS.
+     *
+     * @return An `std::optional` object, optionally containing the name of the
+     * current thread. If the returned object does not contain a value, then the
+     * name could not be determined.
+     */
+    [[nodiscard]] static std::optional<std::string> get_os_thread_name() {
+#if defined(_WIN32)
+      // On Windows thread names are wide strings, so we need to convert them to
+      // normal strings.
+      PWSTR data = nullptr;
+      const HRESULT hr = GetThreadDescription(GetCurrentThread(), &data);
+      if (FAILED(hr)) return std::nullopt;
+      if (data == nullptr) return std::nullopt;
+      const int size = WideCharToMultiByte(CP_UTF8, 0, data, -1, nullptr, 0,
+                                           nullptr, nullptr);
+      if (size == 0) {
+        LocalFree(data);
+        return std::nullopt;
+      }
+      std::string name(static_cast<std::size_t>(size) - 1, 0);
+      const int result = WideCharToMultiByte(CP_UTF8, 0, data, -1, name.data(),
+                                             size, nullptr, nullptr);
+      LocalFree(data);
+      if (result == 0) return std::nullopt;
+      return name;
+#elif defined(__linux__) || defined(__APPLE__)
+#ifdef __linux__
+      // On Linux thread names are limited to 16 characters, including the null
+      // terminator.
+      constexpr std::size_t buffer_size = 16;
+#else
+      // On macOS thread names are limited to 64 characters, including the null
+      // terminator.
+      constexpr std::size_t buffer_size = 64;
+#endif
+      char name[buffer_size] = {};
+      if (pthread_getname_np(pthread_self(), name, buffer_size) != 0)
+        return std::nullopt;
+      return std::string(name);
+#endif
+    }
+
+    /**
+     * @brief Set the name of the current thread using the current platform's
+     * native API. This should work on Windows, Linux, and macOS. Note that on
+     * Linux thread names are limited to 16 characters, including the null
+     * terminator.
+     *
+     * @param name The name to set.
+     * @return `true` if the name was set successfully, `false` otherwise.
+     */
+    static bool set_os_thread_name(const std::string& name) {
+#if defined(_WIN32)
+      // On Windows thread names are wide strings, so we need to convert them
+      // from normal strings.
+      const int size =
+          MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, nullptr, 0);
+      if (size == 0) return false;
+      std::wstring wide(static_cast<std::size_t>(size), 0);
+      if (MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, wide.data(), size) ==
+          0)
+        return false;
+      const HRESULT hr = SetThreadDescription(GetCurrentThread(), wide.data());
+      return SUCCEEDED(hr);
+#elif defined(__linux__)
+      // On Linux this is straightforward.
+      return pthread_setname_np(pthread_self(), name.data()) == 0;
+#elif defined(__APPLE__)
+      // On macOS, unlike Linux, a thread can only set a name for itself, so the
+      // signature is different.
+      return pthread_setname_np(name.data()) == 0;
+#endif
+    }
+
+    /**
+     * @brief Get the priority of the current thread using the current
+     * platform's native API. This should work on Windows, Linux, and macOS.
+     *
+     * @return An `std::optional` object, optionally containing the priority of
+     * the current thread, as a member of the enum `BS::os_thread_priority`. If
+     * the returned object does not contain a value, then either the priority
+     * could not be determined, or it is not one of the pre-defined values.
+     */
+    [[nodiscard]] static std::optional<os_thread_priority>
+    get_os_thread_priority() {
+#if defined(_WIN32)
+      // On Windows, this is straightforward.
+      const int priority = GetThreadPriority(GetCurrentThread());
+      if (priority == THREAD_PRIORITY_ERROR_RETURN) return std::nullopt;
+      return static_cast<os_thread_priority>(priority);
+#elif defined(__linux__)
+      // On Linux, we distill the choices of scheduling policy, priority, and
+      // "nice" value into 7 pre-defined levels, for simplicity and portability.
+      // The total number of possible combinations of policies and priorities is
+      // much larger, so if the value was set via any means other than
+      // `BS::this_thread::set_os_thread_priority()`, it may not match one of
+      // our pre-defined values.
+      int policy = 0;
+      struct sched_param param = {};
+      if (pthread_getschedparam(pthread_self(), &policy, &param) != 0)
+        return std::nullopt;
+      if (policy == SCHED_FIFO &&
+          param.sched_priority == sched_get_priority_max(SCHED_FIFO)) {
+        // The only pre-defined priority that uses SCHED_FIFO and the maximum
+        // available priority value is the "realtime" priority.
+        return os_thread_priority::realtime;
+      }
+      if (policy == SCHED_RR &&
+          param.sched_priority == sched_get_priority_min(SCHED_RR) +
+                                      (sched_get_priority_max(SCHED_RR) -
+                                       sched_get_priority_min(SCHED_RR)) /
+                                          2) {
+        // The only pre-defined priority that uses SCHED_RR and a priority in
+        // the middle of the available range is the "highest" priority.
+        return os_thread_priority::highest;
+      }
+#ifdef __linux__
+      if (policy == SCHED_IDLE) {
+        // The only pre-defined priority that uses SCHED_IDLE is the "idle"
+        // priority. Note that this scheduling policy is not available on macOS.
+        return os_thread_priority::idle;
+      }
+#endif
+      if (policy == SCHED_OTHER) {
+        // For SCHED_OTHER, the result depends on the "nice" value. The usual
+        // range is -20 to 19 or 20, with higher values corresponding to lower
+        // priorities. Note that `getpriority()` returns -1 on error, but since
+        // this does not correspond to any of our pre-defined values, this
+        // function will return `std::nullopt` anyway.
+        const int nice_val =
+            getpriority(PRIO_PROCESS, static_cast<id_t>(syscall(SYS_gettid)));
+        switch (nice_val) {
+          case PRIO_MIN + 2:
+            return os_thread_priority::above_normal;
+          case 0:
+            return os_thread_priority::normal;
+          case (PRIO_MAX / 2) + (PRIO_MAX % 2):
+            return os_thread_priority::below_normal;
+          case PRIO_MAX - 3:
+            return os_thread_priority::lowest;
+#ifdef __APPLE__
+          // `SCHED_IDLE` doesn't exist on macOS, so we use the policy
+          // `SCHED_OTHER` with a "nice" value of `PRIO_MAX - 2`.
+          case PRIO_MAX - 2:
+            return os_thread_priority::idle;
+#endif
+          default:
+            return std::nullopt;
+        }
+      }
+      return std::nullopt;
+#elif defined(__APPLE__)
+      // On macOS, we distill the choices of scheduling policy and priority into
+      // 7 pre-defined levels, for simplicity and portability. The total number
+      // of possible combinations of policies and priorities is much larger, so
+      // if the value was set via any means other than
+      // `BS::this_thread::set_os_thread_priority()`, it may not match one of
+      // our pre-defined values.
+      int policy = 0;
+      struct sched_param param = {};
+      if (pthread_getschedparam(pthread_self(), &policy, &param) != 0)
+        return std::nullopt;
+      if (policy == SCHED_FIFO &&
+          param.sched_priority == sched_get_priority_max(SCHED_FIFO)) {
+        // The only pre-defined priority that uses SCHED_FIFO and the maximum
+        // available priority value is the "realtime" priority.
+        return os_thread_priority::realtime;
+      }
+      if (policy == SCHED_RR &&
+          param.sched_priority == sched_get_priority_min(SCHED_RR) +
+                                      (sched_get_priority_max(SCHED_RR) -
+                                       sched_get_priority_min(SCHED_RR)) /
+                                          2) {
+        // The only pre-defined priority that uses SCHED_RR and a priority in
+        // the middle of the available range is the "highest" priority.
+        return os_thread_priority::highest;
+      }
+      if (policy == SCHED_OTHER) {
+        // For SCHED_OTHER, the result depends on the specific value of the
+        // priority.
+        if (param.sched_priority == sched_get_priority_max(SCHED_OTHER))
+          return os_thread_priority::above_normal;
+        if (param.sched_priority == sched_get_priority_min(SCHED_OTHER) +
+                                        (sched_get_priority_max(SCHED_OTHER) -
+                                         sched_get_priority_min(SCHED_OTHER)) /
+                                            2)
+          return os_thread_priority::normal;
+        if (param.sched_priority == sched_get_priority_min(SCHED_OTHER) +
+                                        (sched_get_priority_max(SCHED_OTHER) -
+                                         sched_get_priority_min(SCHED_OTHER)) *
+                                            2 / 3)
+          return os_thread_priority::below_normal;
+        if (param.sched_priority == sched_get_priority_min(SCHED_OTHER) +
+                                        (sched_get_priority_max(SCHED_OTHER) -
+                                         sched_get_priority_min(SCHED_OTHER)) /
+                                            3)
+          return os_thread_priority::lowest;
+        if (param.sched_priority == sched_get_priority_min(SCHED_OTHER))
+          return os_thread_priority::idle;
+        return std::nullopt;
+      }
+      return std::nullopt;
+#endif
+    }
+
+    /**
+     * @brief Set the priority of the current thread using the current
+     * platform's native API. This should work on Windows, Linux, and macOS.
+     * However, note that higher priorities might require elevated permissions.
+     *
+     * @param priority The priority to set. Must be a value from the enum
+     * `BS::os_thread_priority`.
+     * @return `true` if the priority was set successfully, `false` otherwise.
+     * Usually, `false` means that the user does not have the necessary
+     * permissions to set the desired priority.
+     */
+    static bool set_os_thread_priority(const os_thread_priority priority) {
+#if defined(_WIN32)
+      // On Windows, this is straightforward.
+      return SetThreadPriority(GetCurrentThread(),
+                               static_cast<int>(priority)) != 0;
+#elif defined(__linux__)
+      // On Linux, we distill the choices of scheduling policy, priority, and
+      // "nice" value into 7 pre-defined levels, for simplicity and portability.
+      // The total number of possible combinations of policies and priorities is
+      // much larger, but allowing more fine-grained control would not be
+      // portable.
+      int policy = 0;
+      struct sched_param param = {};
+      std::optional<int> nice_val = std::nullopt;
+      switch (priority) {
+        case os_thread_priority::realtime:
+          // "Realtime" pre-defined priority: We use the policy `SCHED_FIFO`
+          // with the highest possible priority.
+          policy = SCHED_FIFO;
+          param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+          break;
+        case os_thread_priority::highest:
+          // "Highest" pre-defined priority: We use the policy `SCHED_RR`
+          // ("round-robin") with a priority in the middle of the available
+          // range.
+          policy = SCHED_RR;
+          param.sched_priority = sched_get_priority_min(SCHED_RR) +
+                                 (sched_get_priority_max(SCHED_RR) -
+                                  sched_get_priority_min(SCHED_RR)) /
+                                     2;
+          break;
+        case os_thread_priority::above_normal:
+          // "Above normal" pre-defined priority: We use the policy
+          // `SCHED_OTHER` (the default). This policy does not accept a priority
+          // value, so priority must be 0. However, we set the "nice" value to
+          // the minimum value as given by `PRIO_MIN`, plus 2 (which should
+          // evaluate to -18). The usual range is -20 to 19 or 20, with higher
+          // values corresponding to lower priorities.
+          policy = SCHED_OTHER;
+          param.sched_priority = 0;
+          nice_val = PRIO_MIN + 2;
+          break;
+        case os_thread_priority::normal:
+          // "Normal" pre-defined priority: We use the policy `SCHED_OTHER`,
+          // priority must be 0, and we set the "nice" value to 0 (the default).
+          policy = SCHED_OTHER;
+          param.sched_priority = 0;
+          nice_val = 0;
+          break;
+        case os_thread_priority::below_normal:
+          // "Below normal" pre-defined priority: We use the policy
+          // `SCHED_OTHER`, priority must be 0, and we set the "nice" value to
+          // half the maximum value as given by `PRIO_MAX`, rounded up (which
+          // should evaluate to 10).
+          policy = SCHED_OTHER;
+          param.sched_priority = 0;
+          nice_val = (PRIO_MAX / 2) + (PRIO_MAX % 2);
+          break;
+        case os_thread_priority::lowest:
+          // "Lowest" pre-defined priority: We use the policy `SCHED_OTHER`,
+          // priority must be 0, and we set the "nice" value to the maximum
+          // value as given by `PRIO_MAX`, minus 3 (which should evaluate to
+          // 17).
+          policy = SCHED_OTHER;
+          param.sched_priority = 0;
+          nice_val = PRIO_MAX - 3;
+          break;
+        case os_thread_priority::idle:
+          // "Idle" pre-defined priority on Linux: We use the policy
+          // `SCHED_IDLE`, priority must be 0, and we don't touch the "nice"
+          // value.
+          policy = SCHED_IDLE;
+          param.sched_priority = 0;
+          break;
+        default:
+          return false;
+      }
+      bool success =
+          (pthread_setschedparam(pthread_self(), policy, &param) == 0);
+      if (nice_val.has_value())
+        success =
+            success &&
+            (setpriority(PRIO_PROCESS, static_cast<id_t>(syscall(SYS_gettid)),
+                         nice_val.value()) == 0);
+      return success;
+#elif defined(__APPLE__)
+      // On macOS, unlike Linux, the "nice" value is per-process, not per-thread
+      // (in compliance with the POSIX standard). However, unlike Linux,
+      // `SCHED_OTHER` on macOS does have a range of priorities. So for
+      // `realtime` and `highest` priorities we use `SCHED_FIFO` and `SCHED_RR`
+      // respectively as for Linux, but for the other priorities we use
+      // `SCHED_OTHER` with a priority in the range given by
+      // `sched_get_priority_min(SCHED_OTHER)` to
+      // `sched_get_priority_max(SCHED_OTHER)`.
+      int policy = 0;
+      struct sched_param param = {};
+      switch (priority) {
+        case os_thread_priority::realtime:
+          // "Realtime" pre-defined priority: We use the policy `SCHED_FIFO`
+          // with the highest possible priority.
+          policy = SCHED_FIFO;
+          param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+          break;
+        case os_thread_priority::highest:
+          // "Highest" pre-defined priority: We use the policy `SCHED_RR`
+          // ("round-robin") with a priority in the middle of the available
+          // range.
+          policy = SCHED_RR;
+          param.sched_priority = sched_get_priority_min(SCHED_RR) +
+                                 (sched_get_priority_max(SCHED_RR) -
+                                  sched_get_priority_min(SCHED_RR)) /
+                                     2;
+          break;
+        case os_thread_priority::above_normal:
+          // "Above normal" pre-defined priority: We use the policy
+          // `SCHED_OTHER` (the default) with the highest possible priority.
+          policy = SCHED_OTHER;
+          param.sched_priority = sched_get_priority_max(SCHED_OTHER);
+          break;
+        case os_thread_priority::normal:
+          // "Normal" pre-defined priority: We use the policy `SCHED_OTHER` (the
+          // default) with a priority in the middle of the available range
+          // (which appears to be the default?).
+          policy = SCHED_OTHER;
+          param.sched_priority = sched_get_priority_min(SCHED_OTHER) +
+                                 (sched_get_priority_max(SCHED_OTHER) -
+                                  sched_get_priority_min(SCHED_OTHER)) /
+                                     2;
+          break;
+        case os_thread_priority::below_normal:
+          // "Below normal" pre-defined priority: We use the policy
+          // `SCHED_OTHER` (the default) with a priority equal to 2/3rds of the
+          // normal value.
+          policy = SCHED_OTHER;
+          param.sched_priority = sched_get_priority_min(SCHED_OTHER) +
+                                 (sched_get_priority_max(SCHED_OTHER) -
+                                  sched_get_priority_min(SCHED_OTHER)) *
+                                     2 / 3;
+          break;
+        case os_thread_priority::lowest:
+          // "Lowest" pre-defined priority: We use the policy `SCHED_OTHER` (the
+          // default) with a priority equal to 1/3rd of the normal value.
+          policy = SCHED_OTHER;
+          param.sched_priority = sched_get_priority_min(SCHED_OTHER) +
+                                 (sched_get_priority_max(SCHED_OTHER) -
+                                  sched_get_priority_min(SCHED_OTHER)) /
+                                     3;
+          break;
+        case os_thread_priority::idle:
+          // "Idle" pre-defined priority on macOS: We use the policy
+          // `SCHED_OTHER` (the default) with the lowest possible priority.
+          policy = SCHED_OTHER;
+          param.sched_priority = sched_get_priority_min(SCHED_OTHER);
+          break;
+        default:
+          return false;
+      }
+      return pthread_setschedparam(pthread_self(), policy, &param) == 0;
+#endif
+    }
+#endif
+
+   private:
+    inline static thread_local std::optional<std::size_t> my_index =
+        std::nullopt;
+    inline static thread_local std::optional<void*> my_pool = std::nullopt;
+  };  // class this_thread
+
+  /**
+   * @brief A meta-programming template to determine the common type of two
+   * integer types. Unlike `std::common_type`, this template maintains correct
+   * signedness.
+   *
+   * @tparam T1 The first type.
+   * @tparam T2 The second type.
+   * @tparam Enable A dummy parameter to enable SFINAE in specializations.
+   */
+  template <typename T1, typename T2, typename Enable = void>
+  struct common_index_type {
+    // Fallback to `std::common_type_t` if no specialization matches.
+    using type = std::common_type_t<T1, T2>;
+  };
+
+  // The common type of two signed integers is the larger of the integers, with
+  // the same signedness.
+  template <typename T1, typename T2>
+  struct common_index_type<
+      T1, T2, std::enable_if_t<std::is_signed_v<T1> && std::is_signed_v<T2>>> {
+    using type = std::conditional_t<(sizeof(T1) >= sizeof(T2)), T1, T2>;
+  };
+
+  // The common type of two unsigned integers is the larger of the integers,
+  // with the same signedness.
+  template <typename T1, typename T2>
+  struct common_index_type<
+      T1, T2,
+      std::enable_if_t<std::is_unsigned_v<T1> && std::is_unsigned_v<T2>>> {
+    using type = std::conditional_t<(sizeof(T1) >= sizeof(T2)), T1, T2>;
+  };
+
+  // The common type of a signed and an unsigned integer is a signed integer
+  // that can hold the full ranges of both integers.
+  template <typename T1, typename T2>
+  struct common_index_type<
+      T1, T2,
+      std::enable_if_t<(std::is_signed_v<T1> && std::is_unsigned_v<T2>) ||
+                       (std::is_unsigned_v<T1> && std::is_signed_v<T2>)>> {
+    using S = std::conditional_t<std::is_signed_v<T1>, T1, T2>;
+    using U = std::conditional_t<std::is_unsigned_v<T1>, T1, T2>;
+    static constexpr std::size_t larger_size =
+        (sizeof(S) > sizeof(U)) ? sizeof(S) : sizeof(U);
+    using type = std::conditional_t<
+        larger_size <= 4,
+        // If both integers are 32 bits or less, the common type should be a
+        // signed type that can hold both of them. If both are 8 bits, or the
+        // signed type is 16 bits and the unsigned type is 8 bits, the common
+        // type is `std::int16_t`. Otherwise, if both are 16 bits, or the signed
+        // type is 32 bits and the unsigned type is smaller, the common type is
+        // `std::int32_t`. Otherwise, if both are 32 bits or less, the common
+        // type is `std::int64_t`.
+        std::conditional_t<
+            larger_size == 1 || (sizeof(S) == 2 && sizeof(U) == 1),
+            std::int16_t,
+            std::conditional_t<larger_size == 2 ||
+                                   (sizeof(S) == 4 && sizeof(U) < 4),
+                               std::int32_t, std::int64_t>>,
+        // If the unsigned integer is 64 bits, the common type should also be an
+        // unsigned 64-bit integer, that is, `std::uint64_t`. The reason is that
+        // the most common scenario where this might happen is where the indices
+        // go from 0 to `x` where `x` has been previously defined as
+        // `std::size_t`, e.g. the size of a vector. Note that this will fail if
+        // the first index is negative; in that case, the user must cast the
+        // indices explicitly to the desired common type. If the unsigned
+        // integer is not 64 bits, then the signed integer must be 64 bits,
+        // hence the common type is `std::int64_t`.
+        std::conditional_t<sizeof(U) == 8, std::uint64_t, std::int64_t>>;
+  };
+
+  /**
+   * @brief A helper type alias to obtain the common type from the template
+   * `BS::common_index_type`.
+   *
+   * @tparam T1 The first type.
+   * @tparam T2 The second type.
+   */
+  template <typename T1, typename T2>
+  using common_index_type_t = typename common_index_type<T1, T2>::type;
+
+  /**
+   * @brief An enumeration of flags to be used in the bitmask template parameter
+   * of `BS::thread_pool` to enable optional features.
+   */
+  enum tp : opt_t {
+    /**
+     * @brief No optional features enabled.
+     */
+    none = 0,
+
+    /**
+     * @brief Enable task priority.
+     */
+    priority = 1 << 0,
+
+    /**
+     * @brief Enable pausing.
+     */
+    pause = 1 << 2,
+
+    /**
+     * @brief Enable wait deadlock checks.
+     */
+    wait_deadlock_checks = 1 << 3
+  };
+
+  /**
+   * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23
+   * thread pool class. This alias defines a thread pool with all optional
+   * features disabled.
+   */
+  using light_thread_pool = thread_pool<tp::none>;
+
+  /**
+   * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23
+   * thread pool class. This alias defines a thread pool with task priority
+   * enabled.
+   */
+  using priority_thread_pool = thread_pool<tp::priority>;
+
+  /**
+   * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23
+   * thread pool class. This alias defines a thread pool with pausing enabled.
+   */
+  using pause_thread_pool = thread_pool<tp::pause>;
+
+  /**
+   * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23
+   * thread pool class. This alias defines a thread pool with wait deadlock
+   * checks enabled.
+   */
+  using wdc_thread_pool = thread_pool<tp::wait_deadlock_checks>;
+
+  /**
+   * @brief A fast, lightweight, modern, and easy-to-use C++17/C++20/C++23
+   * thread pool class.
+   *
+   * @tparam OptFlags A bitmask of flags which can be used to enable optional
+   * features. The flags are members of the `BS::tp` enumeration:
+   * `BS::tp::priority`, `BS::tp::pause`, and `BS::tp::wait_deadlock_checks`.
+   * The default is `BS::tp::none`, which disables all optional features. To
+   * enable multiple features, use the bitwise OR operator `|`, e.g.
+   * `BS::tp::priority | BS::tp::pause`.
+   */
+  template <opt_t OptFlags = tp::none>
+  class [[nodiscard]] thread_pool {
+   public:
+    /**
+     * @brief A flag indicating whether task priority is enabled.
+     */
+    static constexpr bool priority_enabled = (OptFlags & tp::priority) != 0;
+
+    /**
+     * @brief A flag indicating whether pausing is enabled.
+     */
+    static constexpr bool pause_enabled = (OptFlags & tp::pause) != 0;
+
+    /**
+     * @brief A flag indicating whether wait deadlock checks are enabled.
+     */
+    static constexpr bool wait_deadlock_checks_enabled =
+        (OptFlags & tp::wait_deadlock_checks) != 0;
+
+#ifndef __cpp_exceptions
+    static_assert(!wait_deadlock_checks_enabled,
+                  "Wait deadlock checks cannot be enabled if exception "
+                  "handling is disabled.");
+#endif
+
+    // ============================
+    // Constructors and destructors
+    // ============================
+
+    /**
+     * @brief Construct a new thread pool. The number of threads will be the
+     * total number of hardware threads available, as reported by the
+     * implementation. This is usually determined by the number of cores in the
+     * CPU. If a core is hyperthreaded, it will count as two threads.
+     */
+    thread_pool() : thread_pool(0, [] {}) {}
+
+    /**
+     * @brief Construct a new thread pool with the specified number of threads.
+     *
+     * @param num_threads The number of threads to use.
+     */
+    explicit thread_pool(const std::size_t num_threads)
+        : thread_pool(num_threads, [] {}) {}
+
+    /**
+     * @brief Construct a new thread pool with the specified initialization
+     * function.
+     *
+     * @param init An initialization function to run in each thread before it
+     * starts executing any submitted tasks. The function must have no return
+     * value, and can either take one argument, the thread index of type
+     * `std::size_t`, or zero arguments. It will be executed exactly once per
+     * thread, when the thread is first constructed. The initialization function
+     * must not throw any exceptions, as that will result in program
+     * termination. Any exceptions must be handled explicitly within the
+     * function.
+     */
+    template <BS_THREAD_POOL_INIT_FUNC_CONCEPT(F)>
+    explicit thread_pool(F&& init) : thread_pool(0, std::forward<F>(init)) {}
+
+    /**
+     * @brief Construct a new thread pool with the specified number of threads
+     * and initialization function.
+     *
+     * @param num_threads The number of threads to use.
+     * @param init An initialization function to run in each thread before it
+     * starts executing any submitted tasks. The function must have no return
+     * value, and can either take one argument, the thread index of type
+     * `std::size_t`, or zero arguments. It will be executed exactly once per
+     * thread, when the thread is first constructed. The initialization function
+     * must not throw any exceptions, as that will result in program
+     * termination. Any exceptions must be handled explicitly within the
+     * function.
+     */
+    template <BS_THREAD_POOL_INIT_FUNC_CONCEPT(F)>
+    thread_pool(const std::size_t num_threads, F&& init) {
+      create_threads(num_threads, std::forward<F>(init));
+    }
+
+    // The copy and move constructors and assignment operators are deleted. The
+    // thread pool cannot be copied or moved.
+    thread_pool(const thread_pool&) = delete;
+    thread_pool(thread_pool&&) = delete;
+    thread_pool& operator=(const thread_pool&) = delete;
+    thread_pool& operator=(thread_pool&&) = delete;
+
+    /**
+     * @brief Destruct the thread pool. Waits for all tasks to complete, then
+     * destroys all threads. If a cleanup function was set, it will run in each
+     * thread right before it is destroyed. Note that if the pool is paused,
+     * then any tasks still in the queue will never be executed.
+     */
+    ~thread_pool() noexcept {
+#ifdef __cpp_exceptions
+      try {
+#endif
+        wait();
+#ifndef __cpp_lib_jthread
+        destroy_threads();
+#endif
+#ifdef __cpp_exceptions
+      } catch (...) {
+      }
+#endif
+    }
+
+    // =======================
+    // Public member functions
+    // =======================
+
+    /**
+     * @brief Parallelize a loop by automatically splitting it into blocks and
+     * submitting each block separately to the queue, with the specified
+     * priority. The block function takes two arguments, the start and end of
+     * the block, so that it is only called once per block, but it is up to the
+     * user make sure the block function correctly deals with all the indices in
+     * each block. Does not return a `BS::multi_future`, so the user must use
+     * `wait()` or some other method to ensure that the loop finishes executing,
+     * otherwise bad things will happen.
+     *
+     * @tparam T1 The type of the first index. Should be a signed or unsigned
+     * integer.
+     * @tparam T2 The type of the index after the last index. Should be a signed
+     * or unsigned integer.
+     * @tparam F The type of the function to loop through.
+     * @param first_index The first index in the loop.
+     * @param index_after_last The index after the last index in the loop. The
+     * loop will iterate from `first_index` to `(index_after_last - 1)`
+     * inclusive. In other words, it will be equivalent to `for (T i =
+     * first_index; i < index_after_last; ++i)`. Note that if `index_after_last
+     * <= first_index`, no blocks will be submitted.
+     * @param block A function that will be called once per block. Should take
+     * exactly two arguments: the first index in the block and the index after
+     * the last index in the block. `block(start, end)` should typically involve
+     * a loop of the form `for (T i = start; i < end; ++i)`.
+     * @param num_blocks The maximum number of blocks to split the loop into.
+     * The default is 0, which means the number of blocks will be equal to the
+     * number of threads in the pool.
+     * @param priority The priority of the tasks. Should be between -128 and
+     * +127 (a signed 8-bit integer). The default is 0. Only taken into account
+     * if the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     */
+    template <typename T1, typename T2,
+              typename T = common_index_type_t<T1, T2>, typename F>
+    void detach_blocks(const T1 first_index, const T2 index_after_last,
+                       F&& block, const std::size_t num_blocks = 0,
+                       const priority_t priority = 0) {
+      if (static_cast<T>(index_after_last) > static_cast<T>(first_index)) {
+        const std::shared_ptr<std::decay_t<F>> block_ptr =
+            std::make_shared<std::decay_t<F>>(std::forward<F>(block));
+        const blocks blks(static_cast<T>(first_index),
+                          static_cast<T>(index_after_last),
+                          num_blocks ? num_blocks : thread_count);
+        for (std::size_t blk = 0; blk < blks.get_num_blocks(); ++blk) {
+          detach_task([block_ptr, start = blks.start(blk),
+                       end = blks.end(blk)] { (*block_ptr)(start, end); },
+                      priority);
+        }
+      }
+    }
+
+    /**
+     * @brief Parallelize a loop by automatically splitting it into blocks and
+     * submitting each block separately to the queue, with the specified
+     * priority. The loop function takes one argument, the loop index, so that
+     * it is called many times per block. Does not return a `BS::multi_future`,
+     * so the user must use `wait()` or some other method to ensure that the
+     * loop finishes executing, otherwise bad things will happen.
+     *
+     * @tparam T1 The type of the first index. Should be a signed or unsigned
+     * integer.
+     * @tparam T2 The type of the index after the last index. Should be a signed
+     * or unsigned integer.
+     * @tparam F The type of the function to loop through.
+     * @param first_index The first index in the loop.
+     * @param index_after_last The index after the last index in the loop. The
+     * loop will iterate from `first_index` to `(index_after_last - 1)`
+     * inclusive. In other words, it will be equivalent to `for (T i =
+     * first_index; i < index_after_last; ++i)`. Note that if `index_after_last
+     * <= first_index`, no blocks will be submitted.
+     * @param loop The function to loop through. Will be called once per index,
+     * many times per block. Should take exactly one argument: the loop index.
+     * @param num_blocks The maximum number of blocks to split the loop into.
+     * The default is 0, which means the number of blocks will be equal to the
+     * number of threads in the pool.
+     * @param priority The priority of the tasks. Should be between -128 and
+     * +127 (a signed 8-bit integer). The default is 0. Only taken into account
+     * if the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     */
+    template <typename T1, typename T2,
+              typename T = common_index_type_t<T1, T2>, typename F>
+    void detach_loop(const T1 first_index, const T2 index_after_last, F&& loop,
+                     const std::size_t num_blocks = 0,
+                     const priority_t priority = 0) {
+      if (static_cast<T>(index_after_last) > static_cast<T>(first_index)) {
+        const std::shared_ptr<std::decay_t<F>> loop_ptr =
+            std::make_shared<std::decay_t<F>>(std::forward<F>(loop));
+        const blocks blks(static_cast<T>(first_index),
+                          static_cast<T>(index_after_last),
+                          num_blocks ? num_blocks : thread_count);
+        for (std::size_t blk = 0; blk < blks.get_num_blocks(); ++blk) {
+          detach_task(
+              [loop_ptr, start = blks.start(blk), end = blks.end(blk)] {
+                for (T i = start; i < end; ++i) (*loop_ptr)(i);
+              },
+              priority);
+        }
+      }
+    }
+
+    /**
+     * @brief Submit a sequence of tasks enumerated by indices to the queue,
+     * with the specified priority. The sequence function takes one argument,
+     * the task index, and will be called once per index. Does not return a
+     * `BS::multi_future`, so the user must use `wait()` or some other method to
+     * ensure that the sequence finishes executing, otherwise bad things will
+     * happen.
+     *
+     * @tparam T1 The type of the first index. Should be a signed or unsigned
+     * integer.
+     * @tparam T2 The type of the index after the last index. Should be a signed
+     * or unsigned integer.
+     * @tparam F The type of the function used to define the sequence.
+     * @param first_index The first index in the sequence.
+     * @param index_after_last The index after the last index in the sequence.
+     * The sequence will iterate from `first_index` to `(index_after_last - 1)`
+     * inclusive. In other words, it will be equivalent to `for (T i =
+     * first_index; i < index_after_last; ++i)`. Note that if `index_after_last
+     * <= first_index`, no tasks will be submitted.
+     * @param sequence The function used to define the sequence. Will be called
+     * once per index. Should take exactly one argument, the index.
+     * @param priority The priority of the tasks. Should be between -128 and
+     * +127 (a signed 8-bit integer). The default is 0. Only taken into account
+     * if the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     */
+    template <typename T1, typename T2,
+              typename T = common_index_type_t<T1, T2>, typename F>
+    void detach_sequence(const T1 first_index, const T2 index_after_last,
+                         F&& sequence, const priority_t priority = 0) {
+      if (static_cast<T>(index_after_last) > static_cast<T>(first_index)) {
+        const std::shared_ptr<std::decay_t<F>> sequence_ptr =
+            std::make_shared<std::decay_t<F>>(std::forward<F>(sequence));
+        for (T i = static_cast<T>(first_index);
+             i < static_cast<T>(index_after_last); ++i) {
+          detach_task([sequence_ptr, i] { (*sequence_ptr)(i); }, priority);
+        }
+      }
+    }
+
+    /**
+     * @brief Submit a function with no arguments and no return value into the
+     * task queue, with the specified priority. To submit a function with
+     * arguments, enclose it in a lambda expression. Does not return a future,
+     * so the user must use `wait()` or some other method to ensure that the
+     * task finishes executing, otherwise bad things will happen.
+     *
+     * @tparam F The type of the function.
+     * @param task The function to submit.
+     * @param priority The priority of the task. Should be between -128 and +127
+     * (a signed 8-bit integer). The default is 0. Only taken into account if
+     * the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     */
+    template <typename F>
+    void detach_task(F&& task, const priority_t priority = 0) {
+      {
+        const std::scoped_lock tasks_lock(tasks_mutex);
+        if constexpr (priority_enabled)
+          tasks.emplace(std::forward<F>(task), priority);
+        else
+          tasks.emplace(std::forward<F>(task));
+      }
+      task_available_cv.notify_one();
+    }
+
+#ifdef BS_THREAD_POOL_NATIVE_EXTENSIONS
+    /**
+     * @brief Get a vector containing the underlying implementation-defined
+     * thread handles for each of the pool's threads, as obtained by
+     * `std::thread::native_handle()` (or `std::jthread::native_handle()` in
+     * C++20 and later).
+     *
+     * @return The native thread handles.
+     */
+    [[nodiscard]] std::vector<thread_t::native_handle_type> get_native_handles()
+        const {
+      std::vector<thread_t::native_handle_type> native_handles(thread_count);
+      for (std::size_t i = 0; i < thread_count; ++i)
+        native_handles[i] = threads[i].native_handle();
+      return native_handles;
+    }
+#endif
+
+    /**
+     * @brief Get the number of tasks currently waiting in the queue to be
+     * executed by the threads.
+     *
+     * @return The number of queued tasks.
+     */
+    [[nodiscard]] std::size_t get_tasks_queued() const {
+      const std::scoped_lock tasks_lock(tasks_mutex);
+      return tasks.size();
+    }
+
+    /**
+     * @brief Get the number of tasks currently being executed by the threads.
+     *
+     * @return The number of running tasks.
+     */
+    [[nodiscard]] std::size_t get_tasks_running() const {
+      const std::scoped_lock tasks_lock(tasks_mutex);
+      return tasks_running;
+    }
+
+    /**
+     * @brief Get the total number of unfinished tasks: either still waiting in
+     * the queue, or running in a thread. Note that `get_tasks_total() ==
+     * get_tasks_queued() + get_tasks_running()`.
+     *
+     * @return The total number of tasks.
+     */
+    [[nodiscard]] std::size_t get_tasks_total() const {
+      const std::scoped_lock tasks_lock(tasks_mutex);
+      return tasks_running + tasks.size();
+    }
+
+    /**
+     * @brief Get the number of threads in the pool.
+     *
+     * @return The number of threads.
+     */
+    [[nodiscard]] std::size_t get_thread_count() const noexcept {
+      return thread_count;
+    }
+
+    /**
+     * @brief Get a vector containing the unique identifiers for each of the
+     * pool's threads, as obtained by `std::thread::get_id()` (or
+     * `std::jthread::get_id()` in C++20 and later).
+     *
+     * @return The unique thread identifiers.
+     */
+    [[nodiscard]] std::vector<thread_t::id> get_thread_ids() const {
+      std::vector<thread_t::id> thread_ids(thread_count);
+      for (std::size_t i = 0; i < thread_count; ++i)
+        thread_ids[i] = threads[i].get_id();
+      return thread_ids;
+    }
+
+    /**
+     * @brief Check whether the pool is currently paused. Only enabled if the
+     * flag `BS:tp::pause` is enabled in the template parameter.
+     *
+     * @return `true` if the pool is paused, `false` if it is not paused.
+     */
+    BS_THREAD_POOL_IF_PAUSE_ENABLED
+    [[nodiscard]] bool is_paused() const {
+      const std::scoped_lock tasks_lock(tasks_mutex);
+      return paused;
+    }
+
+    /**
+     * @brief Pause the pool. The workers will temporarily stop retrieving new
+     * tasks out of the queue, although any tasks already executed will keep
+     * running until they are finished. Only enabled if the flag `BS:tp::pause`
+     * is enabled in the template parameter.
+     */
+    BS_THREAD_POOL_IF_PAUSE_ENABLED
+    void pause() {
+      const std::scoped_lock tasks_lock(tasks_mutex);
+      paused = true;
+    }
+
+    /**
+     * @brief Purge all the tasks waiting in the queue. Tasks that are currently
+     * running will not be affected, but any tasks still waiting in the queue
+     * will be discarded, and will never be executed by the threads. Please note
+     * that there is no way to restore the purged tasks.
+     */
+    void purge() {
+      const std::scoped_lock tasks_lock(tasks_mutex);
+      tasks = {};
+    }
+
+    /**
+     * @brief Reset the pool with the total number of hardware threads
+     * available, as reported by the implementation. Waits for all currently
+     * running tasks to be completed, then destroys all threads in the pool and
+     * creates a new thread pool with the new number of threads. Any tasks that
+     * were waiting in the queue before the pool was reset will then be executed
+     * by the new threads. If the pool was paused before resetting it, the new
+     * pool will be paused as well.
+     */
+    void reset() {
+      reset(0, [](std::size_t) {});
+    }
+
+    /**
+     * @brief Reset the pool with a new number of threads. Waits for all
+     * currently running tasks to be completed, then destroys all threads in the
+     * pool and creates a new thread pool with the new number of threads. Any
+     * tasks that were waiting in the queue before the pool was reset will then
+     * be executed by the new threads. If the pool was paused before resetting
+     * it, the new pool will be paused as well.
+     *
+     * @param num_threads The number of threads to use.
+     */
+    void reset(const std::size_t num_threads) {
+      reset(num_threads, [](std::size_t) {});
+    }
+
+    /**
+     * @brief Reset the pool with the total number of hardware threads
+     * available, as reported by the implementation, and a new initialization
+     * function. Waits for all currently running tasks to be completed, then
+     * destroys all threads in the pool and creates a new thread pool with the
+     * new number of threads and initialization function. Any tasks that were
+     * waiting in the queue before the pool was reset will then be executed by
+     * the new threads. If the pool was paused before resetting it, the new pool
+     * will be paused as well.
+     *
+     * @param init An initialization function to run in each thread before it
+     * starts executing any submitted tasks. The function must have no return
+     * value, and can either take one argument, the thread index of type
+     * `std::size_t`, or zero arguments. It will be executed exactly once per
+     * thread, when the thread is first constructed. The initialization function
+     * must not throw any exceptions, as that will result in program
+     * termination. Any exceptions must be handled explicitly within the
+     * function.
+     */
+    template <BS_THREAD_POOL_INIT_FUNC_CONCEPT(F)>
+    void reset(F&& init) {
+      reset(0, std::forward<F>(init));
+    }
+
+    /**
+     * @brief Reset the pool with a new number of threads and a new
+     * initialization function. Waits for all currently running tasks to be
+     * completed, then destroys all threads in the pool and creates a new thread
+     * pool with the new number of threads and initialization function. Any
+     * tasks that were waiting in the queue before the pool was reset will then
+     * be executed by the new threads. If the pool was paused before resetting
+     * it, the new pool will be paused as well.
+     *
+     * @param num_threads The number of threads to use.
+     * @param init An initialization function to run in each thread before it
+     * starts executing any submitted tasks. The function must have no return
+     * value, and can either take one argument, the thread index of type
+     * `std::size_t`, or zero arguments. It will be executed exactly once per
+     * thread, when the thread is first constructed. The initialization function
+     * must not throw any exceptions, as that will result in program
+     * termination. Any exceptions must be handled explicitly within the
+     * function.
+     */
+    template <BS_THREAD_POOL_INIT_FUNC_CONCEPT(F)>
+    void reset(const std::size_t num_threads, F&& init) {
+      if constexpr (pause_enabled) {
+        std::unique_lock tasks_lock(tasks_mutex);
+        const bool was_paused = paused;
+        paused = true;
+        tasks_lock.unlock();
+        reset_pool(num_threads, std::forward<F>(init));
+        tasks_lock.lock();
+        paused = was_paused;
+      } else {
+        reset_pool(num_threads, std::forward<F>(init));
+      }
+    }
+
+    /**
+     * @brief Set the thread pool's cleanup function.
+     *
+     * @param cleanup A cleanup function to run in each thread right before it
+     * is destroyed, which will happen when the pool is destructed or reset. The
+     * function must have no return value, and can either take one argument, the
+     * thread index of type `std::size_t`, or zero arguments. The cleanup
+     * function must not throw any exceptions, as that will result in program
+     * termination. Any exceptions must be handled explicitly within the
+     * function.
+     */
+    template <BS_THREAD_POOL_INIT_FUNC_CONCEPT(F)>
+    void set_cleanup_func(F&& cleanup) {
+      if constexpr (std::is_invocable_v<F, std::size_t>) {
+        cleanup_func = std::forward<F>(cleanup);
+      } else {
+        cleanup_func = [cleanup = std::forward<F>(cleanup)](std::size_t) {
+          cleanup();
+        };
+      }
+    }
+
+    /**
+     * @brief Parallelize a loop by automatically splitting it into blocks and
+     * submitting each block separately to the queue, with the specified
+     * priority. The block function takes two arguments, the start and end of
+     * the block, so that it is only called once per block, but it is up to the
+     * user make sure the block function correctly deals with all the indices in
+     * each block. Returns a `BS::multi_future` that contains the futures for
+     * all of the blocks.
+     *
+     * @tparam T1 The type of the first index. Should be a signed or unsigned
+     * integer.
+     * @tparam T2 The type of the index after the last index. Should be a signed
+     * or unsigned integer.
+     * @tparam F The type of the function to loop through.
+     * @tparam R The return type of the function to loop through (can be
+     * `void`).
+     * @param first_index The first index in the loop.
+     * @param index_after_last The index after the last index in the loop. The
+     * loop will iterate from `first_index` to `(index_after_last - 1)`
+     * inclusive. In other words, it will be equivalent to `for (T i =
+     * first_index; i < index_after_last; ++i)`. Note that if `index_after_last
+     * <= first_index`, no blocks will be submitted, and an empty
+     * `BS::multi_future` will be returned.
+     * @param block A function that will be called once per block. Should take
+     * exactly two arguments: the first index in the block and the index after
+     * the last index in the block. `block(start, end)` should typically involve
+     * a loop of the form `for (T i = start; i < end; ++i)`.
+     * @param num_blocks The maximum number of blocks to split the loop into.
+     * The default is 0, which means the number of blocks will be equal to the
+     * number of threads in the pool.
+     * @param priority The priority of the tasks. Should be between -128 and
+     * +127 (a signed 8-bit integer). The default is 0. Only taken into account
+     * if the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     * @return A `BS::multi_future` that can be used to wait for all the blocks
+     * to finish. If the block function returns a value, the `BS::multi_future`
+     * can also be used to obtain the values returned by each block.
+     */
+    template <typename T1, typename T2,
+              typename T = common_index_type_t<T1, T2>, typename F,
+              typename R = std::invoke_result_t<std::decay_t<F>, T, T>>
+    [[nodiscard]] multi_future<R> submit_blocks(
+        const T1 first_index, const T2 index_after_last, F&& block,
+        const std::size_t num_blocks = 0, const priority_t priority = 0) {
+      if (static_cast<T>(index_after_last) > static_cast<T>(first_index)) {
+        const std::shared_ptr<std::decay_t<F>> block_ptr =
+            std::make_shared<std::decay_t<F>>(std::forward<F>(block));
+        const blocks blks(static_cast<T>(first_index),
+                          static_cast<T>(index_after_last),
+                          num_blocks ? num_blocks : thread_count);
+        multi_future<R> future;
+        future.reserve(blks.get_num_blocks());
+        for (std::size_t blk = 0; blk < blks.get_num_blocks(); ++blk) {
+          future.push_back(submit_task(
+              [block_ptr, start = blks.start(blk), end = blks.end(blk)] {
+                return (*block_ptr)(start, end);
+              },
+              priority));
+        }
+        return future;
+      }
+      return {};
+    }
+
+    /**
+     * @brief Parallelize a loop by automatically splitting it into blocks and
+     * submitting each block separately to the queue, with the specified
+     * priority. The loop function takes one argument, the loop index, so that
+     * it is called many times per block. It must have no return value. Returns
+     * a `BS::multi_future` that contains the futures for all of the blocks.
+     *
+     * @tparam T1 The type of the first index. Should be a signed or unsigned
+     * integer.
+     * @tparam T2 The type of the index after the last index. Should be a signed
+     * or unsigned integer.
+     * @tparam F The type of the function to loop through.
+     * @param first_index The first index in the loop.
+     * @param index_after_last The index after the last index in the loop. The
+     * loop will iterate from `first_index` to `(index_after_last - 1)`
+     * inclusive. In other words, it will be equivalent to `for (T i =
+     * first_index; i < index_after_last; ++i)`. Note that if `index_after_last
+     * <= first_index`, no tasks will be submitted, and an empty
+     * `BS::multi_future` will be returned.
+     * @param loop The function to loop through. Will be called once per index,
+     * many times per block. Should take exactly one argument: the loop index.
+     * It cannot have a return value.
+     * @param num_blocks The maximum number of blocks to split the loop into.
+     * The default is 0, which means the number of blocks will be equal to the
+     * number of threads in the pool.
+     * @param priority The priority of the tasks. Should be between -128 and
+     * +127 (a signed 8-bit integer). The default is 0. Only taken into account
+     * if the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     * @return A `BS::multi_future` that can be used to wait for all the blocks
+     * to finish.
+     */
+    template <typename T1, typename T2,
+              typename T = common_index_type_t<T1, T2>, typename F>
+    [[nodiscard]] multi_future<void> submit_loop(
+        const T1 first_index, const T2 index_after_last, F&& loop,
+        const std::size_t num_blocks = 0, const priority_t priority = 0) {
+      if (static_cast<T>(index_after_last) > static_cast<T>(first_index)) {
+        const std::shared_ptr<std::decay_t<F>> loop_ptr =
+            std::make_shared<std::decay_t<F>>(std::forward<F>(loop));
+        const blocks blks(static_cast<T>(first_index),
+                          static_cast<T>(index_after_last),
+                          num_blocks ? num_blocks : thread_count);
+        multi_future<void> future;
+        future.reserve(blks.get_num_blocks());
+        for (std::size_t blk = 0; blk < blks.get_num_blocks(); ++blk) {
+          future.push_back(submit_task(
+              [loop_ptr, start = blks.start(blk), end = blks.end(blk)] {
+                for (T i = start; i < end; ++i) (*loop_ptr)(i);
+              },
+              priority));
+        }
+        return future;
+      }
+      return {};
+    }
+
+    /**
+     * @brief Submit a sequence of tasks enumerated by indices to the queue,
+     * with the specified priority. The sequence function takes one argument,
+     * the task index, and will be called once per index. Returns a
+     * `BS::multi_future` that contains the futures for all of the tasks.
+     *
+     * @tparam T1 The type of the first index. Should be a signed or unsigned
+     * integer.
+     * @tparam T2 The type of the index after the last index. Should be a signed
+     * or unsigned integer.
+     * @tparam F The type of the function used to define the sequence.
+     * @tparam R The return type of the function used to define the sequence
+     * (can be `void`).
+     * @param first_index The first index in the sequence.
+     * @param index_after_last The index after the last index in the sequence.
+     * The sequence will iterate from `first_index` to `(index_after_last - 1)`
+     * inclusive. In other words, it will be equivalent to `for (T i =
+     * first_index; i < index_after_last; ++i)`. Note that if `index_after_last
+     * <= first_index`, no tasks will be submitted, and an empty
+     * `BS::multi_future` will be returned.
+     * @param sequence The function used to define the sequence. Will be called
+     * once per index. Should take exactly one argument, the index.
+     * @param priority The priority of the tasks. Should be between -128 and
+     * +127 (a signed 8-bit integer). The default is 0. Only taken into account
+     * if the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     * @return A `BS::multi_future` that can be used to wait for all the tasks
+     * to finish. If the sequence function returns a value, the
+     * `BS::multi_future` can also be used to obtain the values returned by each
+     * task.
+     */
+    template <typename T1, typename T2,
+              typename T = common_index_type_t<T1, T2>, typename F,
+              typename R = std::invoke_result_t<std::decay_t<F>, T>>
+    [[nodiscard]] multi_future<R> submit_sequence(
+        const T1 first_index, const T2 index_after_last, F&& sequence,
+        const priority_t priority = 0) {
+      if (static_cast<T>(index_after_last) > static_cast<T>(first_index)) {
+        const std::shared_ptr<std::decay_t<F>> sequence_ptr =
+            std::make_shared<std::decay_t<F>>(std::forward<F>(sequence));
+        multi_future<R> future;
+        future.reserve(static_cast<std::size_t>(
+            static_cast<T>(index_after_last) > static_cast<T>(first_index)));
+        for (T i = static_cast<T>(first_index);
+             i < static_cast<T>(index_after_last); ++i) {
+          future.push_back(submit_task(
+              [sequence_ptr, i] { return (*sequence_ptr)(i); }, priority));
+        }
+        return future;
+      }
+      return {};
+    }
+
+    /**
+     * @brief Submit a function with no arguments into the task queue, with the
+     * specified priority. To submit a function with arguments, enclose it in a
+     * lambda expression. If the function has a return value, get a future for
+     * the eventual returned value. If the function has no return value, get an
+     * `std::future<void>` which can be used to wait until the task finishes.
+     *
+     * @tparam F The type of the function.
+     * @tparam R The return type of the function (can be `void`).
+     * @param task The function to submit.
+     * @param priority The priority of the task. Should be between -128 and +127
+     * (a signed 8-bit integer). The default is 0. Only taken into account if
+     * the flag `BS:tp::priority` is enabled in the template parameter,
+     * otherwise has no effect.
+     * @return A future to be used later to wait for the function to finish
+     * executing and/or obtain its returned value if it has one.
+     */
+    template <typename F, typename R = std::invoke_result_t<std::decay_t<F>>>
+    [[nodiscard]] std::future<R> submit_task(F&& task,
+                                             const priority_t priority = 0) {
+#ifdef __cpp_lib_move_only_function
+      std::promise<R> promise;
+#define BS_THREAD_POOL_PROMISE_MEMBER_ACCESS promise.
+#else
+      const std::shared_ptr<std::promise<R>> promise =
+          std::make_shared<std::promise<R>>();
+#define BS_THREAD_POOL_PROMISE_MEMBER_ACCESS promise->
+#endif
+      std::future<R> future = BS_THREAD_POOL_PROMISE_MEMBER_ACCESS get_future();
+      detach_task(
+          [task = std::forward<F>(task),
+           promise = std::move(promise)]() mutable {
+#ifdef __cpp_exceptions
+            try {
+#endif
+              if constexpr (std::is_void_v<R>) {
+                task();
+                BS_THREAD_POOL_PROMISE_MEMBER_ACCESS set_value();
+              } else {
+                BS_THREAD_POOL_PROMISE_MEMBER_ACCESS set_value(task());
+              }
+#ifdef __cpp_exceptions
+            } catch (...) {
+              try {
+                BS_THREAD_POOL_PROMISE_MEMBER_ACCESS set_exception(
+                    std::current_exception());
+              } catch (...) {
+              }
+            }
+#endif
+          },
+          priority);
+      return future;
+    }
+
+    /**
+     * @brief Unpause the pool. The workers will resume retrieving new tasks out
+     * of the queue. Only enabled if the flag `BS:tp::pause` is enabled in the
+     * template parameter.
+     */
+    BS_THREAD_POOL_IF_PAUSE_ENABLED
+    void unpause() {
+      {
+        const std::scoped_lock tasks_lock(tasks_mutex);
+        paused = false;
+      }
+      task_available_cv.notify_all();
+    }
+
+    /**
+     * @brief Wait for tasks to be completed. Normally, this function waits for
+     * all tasks, both those that are currently running in the threads and those
+     * that are still waiting in the queue. However, if the pool is paused, this
+     * function only waits for the currently running tasks (otherwise it would
+     * wait forever). Note: To wait for just one specific task, use
+     * `submit_task()` instead, and call the `wait()` member function of the
+     * generated future.
+     *
+     * @throws `wait_deadlock` if called from within a thread of the same pool,
+     * which would result in a deadlock. Only enabled if the flag
+     * `BS:tp::wait_deadlock_checks` is enabled in the template parameter.
+     */
+    void wait() {
+#ifdef __cpp_exceptions
+      if constexpr (wait_deadlock_checks_enabled) {
+        if (this_thread::get_pool() == this) throw wait_deadlock();
+      }
+#endif
+      std::unique_lock tasks_lock(tasks_mutex);
+      waiting = true;
+      tasks_done_cv.wait(tasks_lock, [this] {
+        if constexpr (pause_enabled)
+          return (tasks_running == 0) && (paused || tasks.empty());
+        else
+          return (tasks_running == 0) && tasks.empty();
+      });
+      waiting = false;
+    }
+
+    /**
+     * @brief Wait for tasks to be completed, but stop waiting after the
+     * specified duration has passed.
+     *
+     * @tparam R An arithmetic type representing the number of ticks to wait.
+     * @tparam P An `std::ratio` representing the length of each tick in
+     * seconds.
+     * @param duration The amount of time to wait.
+     * @return `true` if all tasks finished running, `false` if the duration
+     * expired but some tasks are still running.
+     * @throws `wait_deadlock` if called from within a thread of the same pool,
+     * which would result in a deadlock. Only enabled if the flag
+     * `BS:tp::wait_deadlock_checks` is enabled in the template parameter.
+     */
+    template <typename R, typename P>
+    bool wait_for(const std::chrono::duration<R, P>& duration) {
+#ifdef __cpp_exceptions
+      if constexpr (wait_deadlock_checks_enabled) {
+        if (this_thread::get_pool() == this) throw wait_deadlock();
+      }
+#endif
+      std::unique_lock tasks_lock(tasks_mutex);
+      waiting = true;
+      const bool status = tasks_done_cv.wait_for(tasks_lock, duration, [this] {
+        if constexpr (pause_enabled)
+          return (tasks_running == 0) && (paused || tasks.empty());
+        else
+          return (tasks_running == 0) && tasks.empty();
+      });
+      waiting = false;
+      return status;
+    }
+
+    /**
+     * @brief Wait for tasks to be completed, but stop waiting after the
+     * specified time point has been reached.
+     *
+     * @tparam C The type of the clock used to measure time.
+     * @tparam D An `std::chrono::duration` type used to indicate the time
+     * point.
+     * @param timeout_time The time point at which to stop waiting.
+     * @return `true` if all tasks finished running, `false` if the time point
+     * was reached but some tasks are still running.
+     * @throws `wait_deadlock` if called from within a thread of the same pool,
+     * which would result in a deadlock. Only enabled if the flag
+     * `BS:tp::wait_deadlock_checks` is enabled in the template parameter.
+     */
+    template <typename C, typename D>
+    bool wait_until(const std::chrono::time_point<C, D>& timeout_time) {
+#ifdef __cpp_exceptions
+      if constexpr (wait_deadlock_checks_enabled) {
+        if (this_thread::get_pool() == this) throw wait_deadlock();
+      }
+#endif
+      std::unique_lock tasks_lock(tasks_mutex);
+      waiting = true;
+      const bool status =
+          tasks_done_cv.wait_until(tasks_lock, timeout_time, [this] {
+            if constexpr (pause_enabled)
+              return (tasks_running == 0) && (paused || tasks.empty());
+            else
+              return (tasks_running == 0) && tasks.empty();
+          });
+      waiting = false;
+      return status;
+    }
+
+   private:
+    // ========================
+    // Private member functions
+    // ========================
+
+    /**
+     * @brief Create the threads in the pool and assign a worker to each thread.
+     *
+     * @param num_threads The number of threads to use.
+     * @param init An initialization function to run in each thread before it
+     * starts executing any submitted tasks.
+     */
+    template <typename F>
+    void create_threads(const std::size_t num_threads, F&& init) {
+      if constexpr (std::is_invocable_v<F, std::size_t>) {
+        init_func = std::forward<F>(init);
+      } else {
+        init_func = [init = std::forward<F>(init)](std::size_t) { init(); };
+      }
+      thread_count = determine_thread_count(num_threads);
+      threads = std::make_unique<thread_t[]>(thread_count);
+      {
+        const std::scoped_lock tasks_lock(tasks_mutex);
+        tasks_running = thread_count;
+#ifndef __cpp_lib_jthread
+        workers_running = true;
+#endif
+      }
+      for (std::size_t i = 0; i < thread_count; ++i) {
+        threads[i] = thread_t(
+            [this, i]
+#ifdef __cpp_lib_jthread
+            (const std::stop_token& stop_token) { worker(stop_token, i); }
+#else
+            { worker(i); }
+#endif
+        );
+      }
+    }
+
+#ifndef __cpp_lib_jthread
+    /**
+     * @brief Destroy the threads in the pool.
+     */
+    void destroy_threads() {
+      {
+        const std::scoped_lock tasks_lock(tasks_mutex);
+        workers_running = false;
+      }
+      task_available_cv.notify_all();
+      for (std::size_t i = 0; i < thread_count; ++i) threads[i].join();
+    }
+#endif
+
+    /**
+     * @brief Determine how many threads the pool should have, based on the
+     * parameter passed to the constructor or reset().
+     *
+     * @param num_threads The parameter passed to the constructor or `reset()`.
+     * If the parameter is a positive number, then the pool will be created with
+     * this number of threads. If the parameter is non-positive, or a parameter
+     * was not supplied (in which case it will have the default value of 0),
+     * then the pool will be created with the total number of hardware threads
+     * available, as obtained from `thread_t::hardware_concurrency()`. If the
+     * latter returns zero for some reason, then the pool will be created with
+     * just one thread.
+     * @return The number of threads to use for constructing the pool.
+     */
+    [[nodiscard]] static std::size_t determine_thread_count(
+        const std::size_t num_threads) noexcept {
+      if (num_threads > 0) return num_threads;
+      if (thread_t::hardware_concurrency() > 0)
+        return thread_t::hardware_concurrency();
+      return 1;
+    }
+
+    /**
+     * @brief Pop a task from the queue.
+     *
+     * @return The task.
+     */
+    [[nodiscard]] task_t pop_task() {
+      task_t task;
+      if constexpr (priority_enabled)
+        task = std::move(const_cast<pr_task&>(tasks.top()).task);
+      else
+        task = std::move(tasks.front());
+      tasks.pop();
+      return task;
+    }
+
+    /**
+     * @brief Reset the pool with a new number of threads and a new
+     * initialization function. This member function implements the actual
+     * reset, while the public member function `reset()` also handles the case
+     * where the pool is paused.
+     *
+     * @param num_threads The number of threads to use.
+     * @param init An initialization function to run in each thread before it
+     * starts executing any submitted tasks.
+     */
+    template <typename F>
+    void reset_pool(const std::size_t num_threads, F&& init) {
+      wait();
+#ifndef __cpp_lib_jthread
+      destroy_threads();
+#endif
+      create_threads(num_threads, std::forward<F>(init));
+    }
+
+    /**
+     * @brief A worker function to be assigned to each thread in the pool. Waits
+     * until it is notified by `detach_task()` that a task is available, and
+     * then retrieves the task from the queue and executes it. Once the task
+     * finishes, the worker notifies `wait()` in case it is waiting.
+     *
+     * @param idx The index of this thread.
+     */
+    void worker(BS_THREAD_POOL_WORKER_TOKEN const std::size_t idx) {
+      this_thread::my_pool = this;
+      this_thread::my_index = idx;
+      init_func(idx);
+      while (true) {
+        std::unique_lock tasks_lock(tasks_mutex);
+        --tasks_running;
+        if constexpr (pause_enabled) {
+          if (waiting && (tasks_running == 0) && (paused || tasks.empty()))
+            tasks_done_cv.notify_all();
+        } else {
+          if (waiting && (tasks_running == 0) && tasks.empty())
+            tasks_done_cv.notify_all();
+        }
+        task_available_cv.wait(tasks_lock BS_THREAD_POOL_WAIT_TOKEN, [this] {
+          if constexpr (pause_enabled)
+            return !(paused || tasks.empty()) BS_THREAD_POOL_OR_STOP_CONDITION;
+          else
+            return !tasks.empty() BS_THREAD_POOL_OR_STOP_CONDITION;
+        });
+        if (BS_THREAD_POOL_STOP_CONDITION) break;
+        {
+          task_t task = pop_task();  // NOLINT(misc-const-correctness) In C++23
+                                     // this cannot be const since
+                                     // `std::move_only_function::operator()` is
+                                     // not a const member function.
+          ++tasks_running;
+          tasks_lock.unlock();
+#ifdef __cpp_exceptions
+          try {
+#endif
+            task();
+#ifdef __cpp_exceptions
+          } catch (...) {
+          }
+#endif
+        }
+      }
+      cleanup_func(idx);
+      this_thread::my_index = std::nullopt;
+      this_thread::my_pool = std::nullopt;
+    }
+
+    // ============
+    // Private data
+    // ============
+
+    /**
+     * @brief A cleanup function to run in each thread right before it is
+     * destroyed, which will happen when the pool is destructed or reset. The
+     * function must have no return value, and can either take one argument, the
+     * thread index of type `std::size_t`, or zero arguments. The cleanup
+     * function must not throw any exceptions, as that will result in program
+     * termination. Any exceptions must be handled explicitly within the
+     * function. The default is an empty function, i.e., no cleanup will be
+     * performed.
+     */
+    function_t<void(std::size_t)> cleanup_func = [](std::size_t) {};
+
+    /**
+     * @brief An initialization function to run in each thread before it starts
+     * executing any submitted tasks. The function must have no return value,
+     * and can either take one argument, the thread index of type `std::size_t`,
+     * or zero arguments. It will be executed exactly once per thread, when the
+     * thread is first constructed. The initialization function must not throw
+     * any exceptions, as that will result in program termination. Any
+     * exceptions must be handled explicitly within the function. The default is
+     * an empty function, i.e., no initialization will be performed.
+     */
+    function_t<void(std::size_t)> init_func = [](std::size_t) {};
+
+    /**
+     * @brief A flag indicating whether the workers should pause. When set to
+     * `true`, the workers temporarily stop retrieving new tasks out of the
+     * queue, although any tasks already executed will keep running until they
+     * are finished. When set to `false` again, the workers resume retrieving
+     * tasks. Only enabled if the flag `BS:tp::pause` is enabled in the template
+     * parameter.
+     */
+    std::conditional_t<pause_enabled, bool, std::monostate> paused = {};
+
+/**
+ * @brief A condition variable to notify `worker()` that a new task has become
+ * available.
+ */
+#ifdef __cpp_lib_jthread
+    std::condition_variable_any
+#else
+    std::condition_variable
+#endif
+        task_available_cv;
+
+    /**
+     * @brief A condition variable to notify `wait()` that the tasks are done.
+     */
+    std::condition_variable tasks_done_cv;
+
+    /**
+     * @brief A queue of tasks to be executed by the threads.
+     */
+    std::conditional_t<priority_enabled, std::priority_queue<pr_task>,
+                       std::queue<task_t>>
+        tasks;
+
+    /**
+     * @brief A mutex to synchronize access to the task queue by different
+     * threads.
+     */
+    mutable std::mutex tasks_mutex;
+
+    /**
+     * @brief A counter for the total number of currently running tasks.
+     */
+    std::size_t tasks_running = 0;
+
+    /**
+     * @brief The number of threads in the pool.
+     */
+    std::size_t thread_count = 0;
+
+    /**
+     * @brief A smart pointer to manage the memory allocated for the threads.
+     */
+    std::unique_ptr<thread_t[]> threads = nullptr;
+
+    /**
+     * @brief A flag indicating that `wait()` is active and expects to be
+     * notified whenever a task is done.
+     */
+    bool waiting = false;
+
+#ifndef __cpp_lib_jthread
+    /**
+     * @brief A flag indicating to the workers to keep running. When set to
+     * `false`, the workers terminate permanently.
+     */
+    bool workers_running = false;
+#endif
+  };  // class thread_pool
+
+  /**
+   * @brief A utility class to synchronize printing to an output stream by
+   * different threads.
+   */
+  class [[nodiscard]] synced_stream {
+   public:
+    /**
+     * @brief Construct a new synced stream which prints to `std::cout`.
+     */
+    explicit synced_stream() { add_stream(std::cout); }
+
+    /**
+     * @brief Construct a new synced stream which prints to the given output
+     * stream(s).
+     *
+     * @tparam T The types of the output streams to print to.
+     * @param streams The output streams to print to.
+     */
+    template <typename... T>
+    explicit synced_stream(T&... streams) {
+      (add_stream(streams), ...);
+    }
+
+    /**
+     * @brief Add a stream to the list of output streams to print to.
+     *
+     * @param stream The stream.
+     */
+    void add_stream(std::ostream& stream) { out_streams.push_back(&stream); }
+
+    /**
+     * @brief Get a reference to a vector containing pointers to the output
+     * streams to print to.
+     *
+     * @return The output streams.
+     */
+    std::vector<std::ostream*>& get_streams() noexcept { return out_streams; }
+
+    /**
+     * @brief Print any number of items into the output stream. Ensures that no
+     * other threads print to this stream simultaneously, as long as they all
+     * exclusively use the same `BS::synced_stream` object to print.
+     *
+     * @tparam T The types of the items.
+     * @param items The items to print.
+     */
+    template <typename... T>
+    void print(const T&... items) {
+      const std::scoped_lock stream_lock(stream_mutex);
+      for (std::ostream* const stream : out_streams) (*stream << ... << items);
+    }
+
+    /**
+     * @brief Print any number of items into the output stream, followed by a
+     * newline character. Ensures that no other threads print to this stream
+     * simultaneously, as long as they all exclusively use the same
+     * `BS::synced_stream` object to print.
+     *
+     * @tparam T The types of the items.
+     * @param items The items to print.
+     */
+    template <typename... T>
+    void println(T&&... items) {
+      print(std::forward<T>(items)..., '\n');
+    }
+
+    /**
+     * @brief Remove a stream from the list of output streams to print to.
+     *
+     * @param stream The stream.
+     */
+    void remove_stream(std::ostream& stream) {
+      out_streams.erase(
+          std::remove(out_streams.begin(), out_streams.end(), &stream),
+          out_streams.end());
+    }
+
+    /**
+     * @brief A stream manipulator to pass to a `BS::synced_stream` (an explicit
+     * cast of `std::endl`). Prints a newline character to the stream, and then
+     * flushes it. Should only be used if flushing is desired, otherwise a
+     * newline character should be used instead.
+     */
+    inline static std::ostream& (&endl)(std::ostream&) =
+        static_cast<std::ostream& (&)(std::ostream&)>(std::endl);
+
+    /**
+     * @brief A stream manipulator to pass to a `BS::synced_stream` (an explicit
+     * cast of `std::flush`). Used to flush the stream.
+     */
+    inline static std::ostream& (&flush)(std::ostream&) =
+        static_cast<std::ostream& (&)(std::ostream&)>(std::flush);
+
+   private:
+    /**
+     * @brief The output streams to print to.
+     */
+    std::vector<std::ostream*> out_streams;
+
+    /**
+     * @brief A mutex to synchronize printing.
+     */
+    mutable std::mutex stream_mutex;
+  };  // class synced_stream
+
+#ifdef __cpp_lib_semaphore
+  using binary_semaphore = std::binary_semaphore;
+  template <std::ptrdiff_t LeastMaxValue = std::counting_semaphore<>::max()>
+  using counting_semaphore = std::counting_semaphore<LeastMaxValue>;
+#else
+  /**
+   * @brief A polyfill for `std::counting_semaphore`, to be used if C++20
+   * features are not available. A `counting_semaphore` is a synchronization
+   * primitive that allows more than one concurrent access to the same resource.
+   * The number of concurrent accessors is limited by the semaphore's counter,
+   * which is decremented when a thread acquires the semaphore and incremented
+   * when a thread releases the semaphore. If the counter is zero, a thread
+   * trying to acquire the semaphore will be blocked until another thread
+   * releases the semaphore.
+   *
+   * @tparam LeastMaxValue The least maximum value of the counter. (In this
+   * implementation, it is also the actual maximum value.)
+   */
+  template <std::ptrdiff_t LeastMaxValue =
+                std::numeric_limits<std::ptrdiff_t>::max()>
+  class [[nodiscard]] counting_semaphore {
+    static_assert(LeastMaxValue >= 0,
+                  "The least maximum value for a counting semaphore must not "
+                  "be negative.");
+
+   public:
+    /**
+     * @brief Construct a new counting semaphore with the given initial counter
+     * value.
+     *
+     * @param desired The initial counter value.
+     */
+    constexpr explicit counting_semaphore(const std::ptrdiff_t desired)
+        : counter(desired) {}
+
+    // The copy and move constructors and assignment operators are deleted. The
+    // semaphore cannot be copied or moved.
+    counting_semaphore(const counting_semaphore&) = delete;
+    counting_semaphore(counting_semaphore&&) = delete;
+    counting_semaphore& operator=(const counting_semaphore&) = delete;
+    counting_semaphore& operator=(counting_semaphore&&) = delete;
+    ~counting_semaphore() = default;
+
+    /**
+     * @brief Returns the internal counter's maximum possible value, which in
+     * this implementation is equal to `LeastMaxValue`.
+     *
+     * @return The internal counter's maximum possible value.
+     */
+    [[nodiscard]] static constexpr std::ptrdiff_t max() noexcept {
+      return LeastMaxValue;
+    }
+
+    /**
+     * @brief Atomically decrements the internal counter by 1 if it is greater
+     * than 0; otherwise blocks until it is greater than 0 and can successfully
+     * decrement the internal counter.
+     */
+    void acquire() {
+      std::unique_lock lock(mutex);
+      cv.wait(lock, [this] { return counter > 0; });
+      --counter;
+    }
+
+    /**
+     * @brief Atomically increments the internal counter. Any thread(s) waiting
+     * for the counter to be greater than 0, such as due to being blocked in
+     * `acquire()`, will subsequently be unblocked.
+     *
+     * @param update The amount to increment the internal counter by. Defaults
+     * to 1.
+     */
+    void release(const std::ptrdiff_t update = 1) {
+      {
+        const std::scoped_lock lock(mutex);
+        counter += update;
+      }
+      cv.notify_all();
+    }
+
+    /**
+     * @brief Tries to atomically decrement the internal counter by 1 if it is
+     * greater than 0; no blocking occurs regardless.
+     *
+     * @return `true` if decremented the internal counter, `false` otherwise.
+     */
+    bool try_acquire() {
+      std::scoped_lock lock(mutex);
+      if (counter > 0) {
+        --counter;
+        return true;
+      }
+      return false;
+    }
+
+    /**
+     * @brief Tries to atomically decrement the internal counter by 1 if it is
+     * greater than 0; otherwise blocks until it is greater than 0 and can
+     * successfully decrement the internal counter, or the `rel_time` duration
+     * has been exceeded.
+     *
+     * @tparam Rep An arithmetic type representing the number of ticks to wait.
+     * @tparam Period An `std::ratio` representing the length of each tick in
+     * seconds.
+     * @param rel_time The duration the function must wait. Note that the
+     * function may wait for longer.
+     * @return `true` if decremented the internal counter, `false` otherwise.
+     */
+    template <class Rep, class Period>
+    bool try_acquire_for(const std::chrono::duration<Rep, Period>& rel_time) {
+      std::unique_lock lock(mutex);
+      if (!cv.wait_for(lock, rel_time, [this] { return counter > 0; }))
+        return false;
+      --counter;
+      return true;
+    }
+
+    /**
+     * @brief Tries to atomically decrement the internal counter by 1 if it is
+     * greater than 0; otherwise blocks until it is greater than 0 and can
+     * successfully decrement the internal counter, or the `abs_time` time point
+     * has been passed.
+     *
+     * @tparam Clock The type of the clock used to measure time.
+     * @tparam Duration An `std::chrono::duration` type used to indicate the
+     * time point.
+     * @param abs_time The earliest time the function must wait until. Note that
+     * the function may wait for longer.
+     * @return `true` if decremented the internal counter, `false` otherwise.
+     */
+    template <class Clock, class Duration>
+    bool try_acquire_until(
+        const std::chrono::time_point<Clock, Duration>& abs_time) {
+      std::unique_lock lock(mutex);
+      if (!cv.wait_until(lock, abs_time, [this] { return counter > 0; }))
+        return false;
+      --counter;
+      return true;
+    }
+
+   private:
+    /**
+     * @brief The semaphore's counter.
+     */
+    std::ptrdiff_t counter;
+
+    /**
+     * @brief A condition variable used to wait for the counter.
+     */
+    std::condition_variable cv;
+
+    /**
+     * @brief A mutex used to synchronize access to the counter.
+     */
+    mutable std::mutex mutex;
+  };
+
+  /**
+   * @brief A polyfill for `std::binary_semaphore`, to be used if C++20 features
+   * are not available.
+   */
+  using binary_semaphore = counting_semaphore<1>;
+#endif
+}  // namespace BS
+#endif  // BS_THREAD_POOL_HPP
diff --git a/apps/roofer-app/CMakeLists.txt b/apps/roofer-app/CMakeLists.txt
index 59d24c7a..140465ec 100644
--- a/apps/roofer-app/CMakeLists.txt
+++ b/apps/roofer-app/CMakeLists.txt
@@ -1,12 +1,7 @@
 if(RF_BUILD_APPS)
   set(APP_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/roofer-app.cpp")
 
-  set(ROOFER_LINK_LIBRARIES roofer-extra fmt::fmt cmake_git_version_tracking)
-
-  if(NOT CMAKE_SYSTEM_NAME STREQUAL "Windows")
-    find_package(mimalloc CONFIG REQUIRED)
-    list(APPEND ROOFER_LINK_LIBRARIES $<IF:$<TARGET_EXISTS:mimalloc-static>,mimalloc-static,mimalloc>)
-  endif()
+  set(ROOFER_LINK_LIBRARIES roofer-extra fmt::fmt)
 
   add_executable("roofer" ${APP_SOURCES})
 
@@ -43,23 +38,48 @@ if(RF_BUILD_APPS)
     ARCHIVE DESTINATION lib
     LIBRARY DESTINATION lib
     RUNTIME DESTINATION bin)
-  if(DEFINED CMAKE_TOOLCHAIN_FILE)
-    install (DIRECTORY ${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/share/proj
-            DESTINATION "share"
-            FILES_MATCHING PATTERN "*" PATTERN "*.cmake" EXCLUDE)
-    install(DIRECTORY ${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/share/gdal
-            DESTINATION "share"
-            FILES_MATCHING PATTERN "*" PATTERN "*.cmake" EXCLUDE)
+  if(RF_GIT_HASH)
+    target_compile_definitions(roofer PRIVATE RF_GIT_HASH="${RF_GIT_HASH}")
   endif()
 endif()
 
-if(RF_BUILD_DOC_HELPER OR RF_BUILD_APPS)
+if(RF_BUILD_DOC_HELPER)
   add_executable("doc-helper" "doc-helper.cpp")
   set_target_properties("doc-helper" PROPERTIES CXX_STANDARD 20)
-  target_link_libraries("doc-helper" PRIVATE roofer-core fmt::fmt cmake_git_version_tracking)
-  # install(
-  #   TARGETS "doc-helper"
-  #   ARCHIVE DESTINATION lib
-  #   LIBRARY DESTINATION lib
-  #   RUNTIME DESTINATION bin)
+  target_link_libraries("doc-helper" PRIVATE roofer-core fmt::fmt)
+  install(
+     TARGETS "doc-helper"
+     ARCHIVE DESTINATION lib
+     LIBRARY DESTINATION lib
+     RUNTIME DESTINATION bin)
+  if(RF_GIT_HASH)
+    target_compile_definitions(doc-helper PRIVATE RF_GIT_HASH="${RF_GIT_HASH}")
+  endif()
+endif()
+
+# get the --version flag working. Try to use RF_GIT_HASH if defined (eg by nix build), otherwise fallback to cmake helper function
+if(RF_BUILD_DOC_HELPER OR RF_BUILD_APPS)
+  if(NOT RF_GIT_HASH)
+    set(GIT_HASH_HEADER ${CMAKE_BINARY_DIR}/git_hash.h)
+
+    add_custom_command(
+        OUTPUT ${GIT_HASH_HEADER}
+        COMMAND ${CMAKE_COMMAND} -DOUT=${GIT_HASH_HEADER} -P ${CMAKE_SOURCE_DIR}/cmake/gen_git_hash_header.cmake
+        WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+        COMMENT "Generating git_hash.h"
+        VERBATIM
+    )
+
+    add_custom_target(update_git_hash ALL DEPENDS ${GIT_HASH_HEADER})
+
+    if (RF_BUILD_APPS)
+      target_include_directories(roofer PRIVATE ${CMAKE_BINARY_DIR})
+      add_dependencies(roofer update_git_hash)
+    endif()
+
+    if (RF_BUILD_DOC_HELPER)
+      target_include_directories("doc-helper" PRIVATE ${CMAKE_BINARY_DIR})
+      add_dependencies("doc-helper" update_git_hash)
+    endif()
+  endif()
 endif()
diff --git a/apps/roofer-app/allocators.hpp b/apps/roofer-app/allocators.hpp
index 5bf4ed60..efde1699 100644
--- a/apps/roofer-app/allocators.hpp
+++ b/apps/roofer-app/allocators.hpp
@@ -12,134 +12,18 @@ namespace {
   };
   HeapAllocationCounter heap_allocation_counter;
 }  // namespace
-#endif
-
-/*
- * Code snippet below is taken from
- * https://github.com/microsoft/mimalloc/blob/dev/include/mimalloc-new-delete.h
- * and modified to work with the roofer trace feature for heap memory usage.
- */
-#if defined(IS_LINUX) || defined(IS_MACOS)
-#if defined(_MSC_VER) && defined(_Ret_notnull_) && \
-    defined(_Post_writable_byte_size_)
-   // stay consistent with VCRT definitions
-#define mi_decl_new(n) \
-  mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
-#define mi_decl_new_nothrow(n)                                                 \
-  mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) \
-      _Post_writable_byte_size_(n)
-#else
-#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict
-#define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict
-#endif
-
-void operator delete(void* p) noexcept { mi_free(p); };
-void operator delete[](void* p) noexcept { mi_free(p); };
 
-void operator delete(void* p, const std::nothrow_t&) noexcept { mi_free(p); }
-void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); }
-
-mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new(n);
-}
-mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new(n);
+void* operator new(size_t size) {
+  heap_allocation_counter.total_allocated += size;
+  return malloc(size);
 }
-
-mi_decl_new_nothrow(n) void* operator new(std::size_t n,
-                                          const std::nothrow_t& tag) noexcept {
-  (void)(tag);
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new_nothrow(n);
-}
-mi_decl_new_nothrow(n) void* operator new[](
-    std::size_t n, const std::nothrow_t& tag) noexcept {
-  (void)(tag);
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new_nothrow(n);
-}
-
-#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
-void operator delete(void* p, std::size_t n) noexcept {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_freed += n;
-#endif
-  mi_free_size(p, n);
-};
-void operator delete[](void* p, std::size_t n) noexcept {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_freed += n;
-#endif
-  mi_free_size(p, n);
+void operator delete(void* memory, size_t size) noexcept {
+  heap_allocation_counter.total_freed += size;
+  free(memory);
 };
-#endif
 
-#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
-void operator delete(void* p, std::align_val_t al) noexcept {
-  mi_free_aligned(p, static_cast<size_t>(al));
-}
-void operator delete[](void* p, std::align_val_t al) noexcept {
-  mi_free_aligned(p, static_cast<size_t>(al));
-}
-void operator delete(void* p, std::size_t n, std::align_val_t al) noexcept {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_freed += n;
 #endif
-  mi_free_size_aligned(p, n, static_cast<size_t>(al));
-};
-void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_freed += n;
-#endif
-  mi_free_size_aligned(p, n, static_cast<size_t>(al));
-};
-void operator delete(void* p, std::align_val_t al,
-                     const std::nothrow_t&) noexcept {
-  mi_free_aligned(p, static_cast<size_t>(al));
-}
-void operator delete[](void* p, std::align_val_t al,
-                       const std::nothrow_t&) noexcept {
-  mi_free_aligned(p, static_cast<size_t>(al));
-}
 
-void* operator new(std::size_t n, std::align_val_t al) noexcept(false) {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new_aligned(n, static_cast<size_t>(al));
-}
-void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new_aligned(n, static_cast<size_t>(al));
-}
-void* operator new(std::size_t n, std::align_val_t al,
-                   const std::nothrow_t&) noexcept {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new_aligned_nothrow(n, static_cast<size_t>(al));
-}
-void* operator new[](std::size_t n, std::align_val_t al,
-                     const std::nothrow_t&) noexcept {
-#ifdef RF_ENABLE_HEAP_TRACING
-  heap_allocation_counter.total_allocated += n;
-#endif
-  return mi_new_aligned_nothrow(n, static_cast<size_t>(al));
-}
-#endif
-#endif
 /*
  * Author:  David Robert Nadeau
  * Site:    http://NadeauSoftware.com/
diff --git a/apps/roofer-app/config.hpp b/apps/roofer-app/config.hpp
index e199a2e1..c802f583 100644
--- a/apps/roofer-app/config.hpp
+++ b/apps/roofer-app/config.hpp
@@ -38,7 +38,7 @@
 #include <list>
 #include <filesystem>
 #include <utility>
-#include "git.h"
+#include "version.hpp"
 
 namespace roofer::enums {
   enum TerrainStrategy {
@@ -912,11 +912,7 @@ struct RooferConfigHandler {
   }
 
   void print_version() {
-    std::cout << std::format(
-        "roofer {} ({}{}{})\n", git_Describe(),
-        std::strcmp(git_Branch(), "main") ? ""
-                                          : std::format("{}, ", git_Branch()),
-        git_AnyUncommittedChanges() ? "dirty, " : "", git_CommitDate());
+    std::cout << std::format("roofer {} ({})\n", RF_VERSION, RF_GIT_HASH);
   }
 
   void parse_cli_first_pass(CLIArgs& c) {
diff --git a/apps/roofer-app/roofer-app.cpp b/apps/roofer-app/roofer-app.cpp
index 7fa8e67b..30ae9d7c 100644
--- a/apps/roofer-app/roofer-app.cpp
+++ b/apps/roofer-app/roofer-app.cpp
@@ -81,10 +81,7 @@ namespace fs = std::filesystem;
 #include <rerun.hpp>
 #endif
 
-#if defined(IS_LINUX) || defined(IS_MACOS)
-#include <new>
-#include <mimalloc-override.h>
-#else
+#if defined(IS_WINDOWS)
 #undef RF_ENABLE_HEAP_TRACING
 #endif
 #include "allocators.hpp"
diff --git a/apps/roofer-app/version.hpp b/apps/roofer-app/version.hpp
new file mode 100644
index 00000000..e0f7d4af
--- /dev/null
+++ b/apps/roofer-app/version.hpp
@@ -0,0 +1,4 @@
+#define RF_VERSION "1.0.0-beta.5"
+#if not defined(RF_GIT_HASH)
+#include "git_hash.h"
+#endif
diff --git a/cmake/gen_git_hash_header.cmake b/cmake/gen_git_hash_header.cmake
new file mode 100644
index 00000000..ae2c15d4
--- /dev/null
+++ b/cmake/gen_git_hash_header.cmake
@@ -0,0 +1,9 @@
+# gen_git_hash_header.cmake
+execute_process(
+    COMMAND git describe --match=NeVeRmAtCh --always --dirty
+    OUTPUT_VARIABLE HASH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+
+file(WRITE ${OUT} "#pragma once\n")
+file(APPEND ${OUT} "#define RF_GIT_HASH \"${HASH}\"\n")
diff --git a/conanfile.py b/conanfile.py
new file mode 100644
index 00000000..a269b619
--- /dev/null
+++ b/conanfile.py
@@ -0,0 +1,89 @@
+from conan import ConanFile
+from conan.tools.cmake import CMakeToolchain, CMakeDeps
+
+
+class RooferRecipe(ConanFile):
+    name = "roofer"
+    version = "1.0.0"
+    settings = "os", "compiler", "build_type", "arch"
+
+    options = {
+        "build_apps":     [True, False],
+        "use_spdlog":     [True, False],
+        "use_val3dity":   [True, False],
+        "build_bindings": [True, False],
+        "build_testing":  [True, False],
+    }
+    default_options = {
+        "build_apps":     True,
+        "use_spdlog":     True,
+        "use_val3dity":   False,
+        "build_bindings": False,
+        "build_testing":  False,
+    }
+
+    def requirements(self):
+        # Core deps (always required)
+        self.requires("cgal/6.1.1")
+        self.requires("eigen/3.4.0")
+        self.requires("fmt/11.1.3")
+
+        if self.options.use_spdlog or self.options.use_val3dity:
+            self.requires("spdlog/1.15.1")
+
+        if self.options.build_apps:
+            self.requires("geos/3.13.0", override=True)
+            self.requires("gdal/3.12.1")
+            self.requires("nlohmann_json/3.11.3")
+            self.requires("laslib/2.0.2")
+            # bshoshany-thread-pool is header-only, not yet in ConanCenter;
+            # leave as CPM fallback for now (RF_USE_CPM=ON)
+
+        if self.options.build_bindings:
+            self.requires("pybind11/3.0.1")
+
+        if self.options.use_val3dity:
+            self.requires("pugixml/1.15")
+            self.requires("tclap/1.2.5")
+
+        if self.options.build_testing:
+            self.requires("catch2/3.7.1")
+
+    def configure(self):
+        # Keep GDAL close to the minimum feature set Roofer uses:
+        # GeoPackage, PostgreSQL/PostGIS, and GeoTIFF.
+        self.options["gdal"].with_arrow = False
+        self.options["gdal"].with_curl = False
+        self.options["gdal"].with_expat = False
+        self.options["gdal"].with_geos = True
+        self.options["gdal"].with_gif = False
+        self.options["gdal"].with_hdf4 = False
+        self.options["gdal"].with_hdf5 = False
+        self.options["gdal"].with_jpeg = False
+        self.options["gdal"].with_lerc = False
+        self.options["gdal"].with_libdeflate = False
+        self.options["gdal"].with_opencl = False
+        self.options["gdal"].with_pg = True      # postgresql
+        self.options["gdal"].with_png = False
+        self.options["gdal"].with_qhull = False
+        self.options["gdal"].with_sqlite3 = True
+        self.options["libtiff"].cxx = False
+        self.options["libtiff"].jpeg = False
+        self.options["gdal"].gdal_optional_drivers = False
+        # Keep OGR optional drivers enabled: the PG driver depends on PGDump,
+        # and ConanCenter does not expose per-driver toggles here.
+        self.options["gdal"].ogr_optional_drivers = True
+        self.options["gdal"].tools = False
+
+
+    def generate(self):
+        deps = CMakeDeps(self)
+        deps.generate()
+        tc = CMakeToolchain(self)
+        tc.variables["RF_USE_CPM"] = False
+        tc.variables["RF_BUILD_APPS"] = bool(self.options.build_apps)
+        tc.variables["RF_USE_LOGGER_SPDLOG"] = bool(self.options.use_spdlog)
+        tc.variables["RF_USE_VAL3DITY"] = bool(self.options.use_val3dity)
+        tc.variables["RF_BUILD_BINDINGS"] = bool(self.options.build_bindings)
+        tc.variables["RF_BUILD_TESTING"] = bool(self.options.build_testing)
+        tc.generate()
diff --git a/distribution/macOS/bundle_libcxx.sh b/distribution/macOS/bundle_libcxx.sh
deleted file mode 100755
index 3bec91ef..00000000
--- a/distribution/macOS/bundle_libcxx.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-
-# Script to bundle dynamic libraries for roofer binary into ../lib and update paths
-
-set -e
-
-# Define paths
-BINARY="roofer"
-LIB_DIR="$(dirname "$BINARY")/../lib"
-mkdir -p "$LIB_DIR"
-
-# Function to check if a library is a system library
-is_system_lib() {
-    local lib_path="$1"
-    if [[ "$lib_path" == /usr/lib/* || "$lib_path" == /System/Library/* ]]; then
-        return 0 # True (is system library)
-    else
-        return 1 # False (not system library)
-    fi
-}
-
-# Function to get dependencies of a binary or library
-get_deps() {
-    local file="$1"
-    otool -L "$file" | tail -n +2 | awk '{print $1}' | while read -r dep; do
-        echo "$dep"
-    done
-}
-
-# Copy non-system libraries to LIB_DIR and update paths
-copy_and_update_lib() {
-    local lib_path="$1"
-    local target_file="$2" # Binary or library to update
-    local lib_name=$(basename "$lib_path")
-    local new_path="@executable_path/../lib/$lib_name"
-
-    if ! is_system_lib "$lib_path"; then
-        # Copy library to LIB_DIR
-        cp "$lib_path" "$LIB_DIR/$lib_name"
-        chmod +w "$LIB_DIR/$lib_name"
-
-        # Update the library's own ID
-        install_name_tool -id "$new_path" "$LIB_DIR/$lib_name"
-
-        # Update the path in the target file
-        install_name_tool -change "$lib_path" "$new_path" "$target_file"
-
-        # Process dependencies of the copied library
-        for dep in $(get_deps "$lib_path"); do
-            local dep_name=$(basename "$dep")
-            local new_dep_path="@executable_path/../lib/$dep_name"
-            if ! is_system_lib "$dep"; then
-                cp "$dep" "$LIB_DIR/$dep_name"
-                chmod +w "$LIB_DIR/$dep_name"
-                install_name_tool -id "$new_dep_path" "$LIB_DIR/$dep_name"
-                install_name_tool -change "$dep" "$new_dep_path" "$LIB_DIR/$lib_name"
-            fi
-        done
-    fi
-}
-
-# Ensure the binary is writable
-chmod +w "$BINARY"
-
-# Process dependencies of the roofer binary
-for dep in $(get_deps "$BINARY"); do
-    copy_and_update_lib "$dep" "$BINARY"
-done
-
-echo "Bundling complete. Libraries copied to $LIB_DIR and paths updated."
diff --git a/docker/Dockerfile b/docker/Dockerfile
index e71832d2..9c52a5b5 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,95 +1,77 @@
-FROM ubuntu:24.04 AS vcpkg-build
-
-# Set the location for the vcpkg download cache
-RUN mkdir -p /var/cache/vcpkg
-ENV VCPKG_ROOT=/opt/vcpkg
-ENV VCPKG_DOWNLOADS=/var/cache/vcpkg
-ENV VCPKG_DEFAULT_TRIPLET=x64-linux
-ENV VCPKG_BUILD_TYPE=release
-
-# Install dependencies for building vcpkg and packages
-RUN apt-get update \
-    && apt-get install -y g++ gcc cmake git curl zip unzip tar openssl ca-certificates \
-      pkg-config autoconf-archive autoconf libtool autotools-dev automake
-
-# Clone vcpkg
-WORKDIR /opt
-RUN git clone https://github.com/microsoft/vcpkg.git
-
-# Bootstrap vcpkg
-WORKDIR /opt/vcpkg
-RUN ./bootstrap-vcpkg.sh
-
-
 FROM ubuntu:24.04 AS builder
+
 ARG JOBS=2
+ARG VERSION=unknown
 
 ARG ROOFER_ROOT=/opt/roofer
 ARG ROOFER_INSTALL=$ROOFER_ROOT/install
 ARG ROOFER_BUILD=$ROOFER_ROOT/build
-# Set the VCPKG_ROOT environment variable so your build system can find it
-ENV VCPKG_ROOT=/opt/vcpkg
-# Cache the downloads from vcpkg in the builder stage as well
-ENV VCPKG_DOWNLOADS=/var/cache/vcpkg
-ENV VCPKG_DEFAULT_TRIPLET=x64-linux
-ENV VCPKG_BUILD_TYPE=release
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH=/opt/conan-venv/bin:$PATH
 
 RUN --mount=target=/var/lib/apt/lists,type=cache,sharing=locked \
     --mount=target=/var/cache/apt,type=cache,sharing=locked \
     rm -f /etc/apt/apt.conf.d/docker-clean \
     && apt-get update \
-    && apt-get -y install g++ gcc cmake git curl zip unzip tar openssl ca-certificates \
-      pkg-config ninja-build autoconf-archive autoconf libtool bison flex autotools-dev automake linux-libc-dev \
-      python3 bison flex \
+    && apt-get -y install \
+      ca-certificates \
+      cmake \
+      curl \
+      g++ \
+      gcc \
+      git \
+      ninja-build \
+      python3 \
+      python3-venv \
     && apt-get clean  \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy vcpkg artifacts from the previous stage
-COPY --from=vcpkg-build /opt/vcpkg /opt/vcpkg
-COPY --from=vcpkg-build /var/cache/vcpkg /var/cache/vcpkg
+RUN python3 -m venv /opt/conan-venv \
+    && /opt/conan-venv/bin/pip install --upgrade pip \
+    && /opt/conan-venv/bin/pip install "conan>=2,<3"
 
-RUN mkdir -p $ROOFER_ROOT/install $ROOFER_ROOT/build/vcpkg_installed
+RUN mkdir -p $ROOFER_ROOT/install $ROOFER_ROOT/build
 WORKDIR $ROOFER_ROOT
-# We only need .git because of the cmake-git-version-tracking
-COPY --link ./.git $ROOFER_ROOT/.git
+
 COPY --link ./apps $ROOFER_ROOT/apps
 COPY --link ./cmake $ROOFER_ROOT/cmake
 COPY --link ./include $ROOFER_ROOT/include
-COPY --link ./rooferpy $ROOFER_ROOT/rooferpy
 COPY --link ./src $ROOFER_ROOT/src
 COPY --link ./CMakeLists.txt $ROOFER_ROOT/CMakeLists.txt
-COPY --link ./CMakePresets.json $ROOFER_ROOT/CMakePresets.json
 COPY --link ./LICENSE $ROOFER_ROOT/LICENSE
 COPY --link ./README.md $ROOFER_ROOT/README.md
-# COPY --link ./requirements.txt $ROOFER_ROOT/requirements.txt
-COPY --link ./vcpkg.json $ROOFER_ROOT/vcpkg.json
-
-WORKDIR $ROOFER_ROOT
-RUN $VCPKG_ROOT/vcpkg x-update-baseline
-
-RUN --mount=target=$ROOFER_BUILD/vcpkg_installed,type=cache cmake \
-    --preset vcpkg-minimal \
-    -DRF_USE_LOGGER_SPDLOG=ON \
-    -DRF_BUILD_APPS=ON \
-    -DRF_USE_VAL3DITY=ON \
-    -DCMAKE_INSTALL_PREFIX=$ROOFER_INSTALL \
-    -S $ROOFER_ROOT \
-    -B $ROOFER_BUILD \
-    -G Ninja \
-    && cmake --build $ROOFER_BUILD -j $JOBS --target install --config Release \
-    && cp -r $ROOFER_BUILD/vcpkg_installed/x64-linux/share/gdal $ROOFER_INSTALL/gdal \
-    && cp -r $ROOFER_BUILD/vcpkg_installed/x64-linux/share/proj $ROOFER_INSTALL/proj
+COPY --link ./conanfile.py $ROOFER_ROOT/conanfile.py
+
+RUN --mount=type=cache,target=/root/.conan2 \
+    conan profile detect --force \
+    && conan install . \
+      --output-folder=$ROOFER_BUILD \
+      --build=missing \
+      --settings=build_type=Release \
+      --settings=compiler.cppstd=20 \
+      --options="&:build_apps=True" \
+      --options="&:use_spdlog=True" \
+      --options="&:use_val3dity=False" \
+      --options="&:build_bindings=False" \
+      --options="&:build_testing=False" \
+    && cmake -B $ROOFER_BUILD \
+      -S $ROOFER_ROOT \
+      -DCMAKE_TOOLCHAIN_FILE=$ROOFER_BUILD/conan_toolchain.cmake \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_INSTALL_PREFIX=$ROOFER_INSTALL \
+      -DRF_GIT_HASH=$VERSION \
+    && cmake --build $ROOFER_BUILD -j $JOBS --target install --config Release
 
 
 FROM ubuntu:24.04 AS production
+
 ARG ROOFER_ROOT=/opt/roofer
-ARG ROOFER_INSTALL=$ROOFER_ROOT/install
-ARG ROOFER_BUILD=$ROOFER_ROOT/build
 
-COPY --link --from=builder $ROOFER_INSTALL/bin /opt/roofer/bin
-COPY --link --from=builder $ROOFER_INSTALL/gdal /opt/roofer/share/gdal
-COPY --link --from=builder $ROOFER_INSTALL/proj /opt/roofer/share/proj
+COPY --link --from=builder $ROOFER_ROOT/install/ /opt/roofer/
 
 ENV GDAL_DATA=/opt/roofer/share/gdal
 ENV PROJ_DATA=/opt/roofer/share/proj
 ENV PATH=/opt/roofer/bin:$PATH
+
+ENTRYPOINT ["roofer"]
diff --git a/docker/nix.dockerfile b/docker/nix.dockerfile
new file mode 100644
index 00000000..ece127cb
--- /dev/null
+++ b/docker/nix.dockerfile
@@ -0,0 +1,30 @@
+# from https://mitchellh.com/writing/nix-with-dockerfiles
+
+# Nix builder
+FROM nixos/nix:latest AS builder
+
+# Copy our source and setup our working dir.
+COPY . /tmp/build
+WORKDIR /tmp/build
+
+# Build our Nix environment
+RUN nix \
+  --extra-experimental-features "nix-command flakes" \
+  --option filter-syscalls false \
+  build
+
+# Copy the Nix store closure into a directory. The Nix store closure is the
+# entire set of Nix store values that we need for our build.
+RUN mkdir /tmp/nix-store-closure
+RUN cp -R $(nix-store -qR result/) /tmp/nix-store-closure
+
+# Final image is based on scratch. We copy a bunch of Nix dependencies
+# but they're fully self-contained so we don't need Nix anymore.
+FROM scratch
+
+WORKDIR /app
+
+# Copy /nix/store
+COPY --from=builder /tmp/nix-store-closure /nix/store
+COPY --from=builder /tmp/build/result /app
+ENTRYPOINT [ "/app/bin/roofer" ]
diff --git a/docs/developers.md b/docs/developers.md
index 17c2c3bd..d2b17283 100644
--- a/docs/developers.md
+++ b/docs/developers.md
@@ -1,51 +1,113 @@
 # Building from source
 
-## Compilation with Nix
+## Compilation with Conan
 
-The easiest way to get all the required dependencies to build roofer is to use [Nix](https://nixos.org). To install nix you can use the [install script from Determinate Systems](https://zero-to-nix.com/start/install/#run). At this moment Nix only works on Linux and macOS.
+Conan is the recommended way to build roofer from source.
 
-Once Nix is installed you can setup the development environment and build roofer like this:
+Install Conan 2 and then configure and build the project like this:
 
 ```sh
 git clone https://github.com/3DBAG/roofer.git
 cd roofer
-nix develop
-mkdir build
-cmake --preset vcpkg-minimal -S . -B build
+conan profile detect --force
+conan install . \
+  --output-folder=build \
+  --build=missing \
+  --settings=build_type=Release \
+  --settings=compiler.cppstd=20 \
+  --options="&:build_apps=True" \
+  --options="&:use_spdlog=True" \
+  --options="&:use_val3dity=False" \
+  --options="&:build_bindings=False" \
+  --options="&:build_testing=False"
+# Conan forwards the package options above to the matching RF_* CMake options.
+cmake -S . -B build \
+  -G Ninja \
+  -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_INSTALL_PREFIX=$PWD/install
 cmake --build build
 # Optionally, install roofer
 cmake --install build
 ```
 
-## Compilation without Nix
+## Compilation with Nix
 
-It is recommended to use [vcpkg](https://vcpkg.io) to build **roofer**.
+If you prefer Nix, you can use the provided development shell. At this moment Nix only works on Linux and macOS.
 
-Follow the [vcpkg instructions](https://learn.microsoft.com/en-gb/vcpkg/get_started/get-started?pivots=shell-cmd) to set it up.
+Once Nix is installed you can set up the development environment and build roofer like this:
 
-After *vcpkg* is set up, set the ``VCPKG_ROOT`` environment variable to point to the directory where vcpkg is installed.
+```sh
+git clone https://github.com/3DBAG/roofer.git
+cd roofer
+nix develop
+cmake -S . -B build \
+  -G Ninja \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DRF_BUILD_APPS=ON \
+  -DRF_USE_LOGGER_SPDLOG=ON \
+  -DRF_USE_VAL3DITY=OFF \
+  -DRF_BUILD_BINDINGS=OFF \
+  -DRF_BUILD_TESTING=OFF \
+  -DRF_USE_CPM=OFF
+cmake --build build
+# Optionally, install roofer
+cmake --install build
+```
 
-On *macOS* you need to install additional build tools:
+If you just want the packaged build outputs, `nix build` also works:
 
-```{code-block} shell
-brew install autoconf autoconf-archive automake libtool
-export PATH="/opt/homebrew/opt/m4/bin:$PATH"
+```sh
+nix build .#default
+nix build .#rooferpy
 ```
 
-On *Ubuntu* you need to install additional build tools:
+## Documentation
+
+To build the documentation locally, first build the documentation helper and Python bindings.
+
+### With Conan
 
-```{code-block} shell
-apt install autoconf bison flex libtool
+```sh
+conan profile detect --force
+conan install . \
+  --output-folder=build \
+  --build=missing \
+  --settings=build_type=Release \
+  --settings=compiler.cppstd=20 \
+  --options="&:build_apps=False" \
+  --options="&:use_spdlog=False" \
+  --options="&:use_val3dity=False" \
+  --options="&:build_bindings=True" \
+  --options="&:build_testing=False"
+# Conan forwards the package options above to the matching RF_* CMake options.
+cmake -S . -B build \
+  -G Ninja \
+  -DCMAKE_TOOLCHAIN_FILE=build/conan_toolchain.cmake \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_INSTALL_PREFIX=$PWD/install \
+  -DRF_BUILD_DOC_HELPER=ON
+cmake --build build --target rooferpy doc-helper
+cmake --install build
+cd docs
+make html
 ```
 
-Clone the roofer repository and use one of the CMake presets to build the roofer.
+### With Nix
 
-```{code-block} shell
-git clone https://github.com/3DBAG/roofer.git
-cd roofer
-mkdir build
-cmake --preset vcpkg-minimal -S . -B build
-cmake --build build
-# Optionally, install roofer
+```sh
+nix develop
+cmake -S . -B build \
+  -G Ninja \
+  -DRF_BUILD_APPS=OFF \
+  -DRF_USE_LOGGER_SPDLOG=OFF \
+  -DRF_USE_VAL3DITY=OFF \
+  -DRF_BUILD_BINDINGS=ON \
+  -DRF_BUILD_TESTING=OFF \
+  -DRF_BUILD_DOC_HELPER=ON \
+  -DRF_USE_CPM=OFF
+cmake --build build --target rooferpy doc-helper
 cmake --install build
+cd docs
+make html
 ```
diff --git a/flake.lock b/flake.lock
index d7eb991d..5268aed5 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,23 +2,40 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1746904237,
-        "narHash": "sha256-3e+AVBczosP5dCLQmMoMEogM57gmZ2qrVSrmq9aResQ=",
-        "owner": "NixOS",
+        "lastModified": 1765934234,
+        "narHash": "sha256-pJjWUzNnjbIAMIc5gRFUuKCDQ9S1cuh3b2hKgA7Mc4A=",
+        "owner": "nixos",
         "repo": "nixpkgs",
-        "rev": "d89fc19e405cb2d55ce7cc114356846a0ee5e956",
+        "rev": "af84f9d270d404c17699522fab95bbf928a2d92f",
         "type": "github"
       },
       "original": {
-        "owner": "NixOS",
-        "ref": "nixos-unstable",
+        "owner": "nixos",
+        "ref": "nixpkgs-unstable",
         "repo": "nixpkgs",
         "type": "github"
       }
     },
     "root": {
       "inputs": {
-        "nixpkgs": "nixpkgs"
+        "nixpkgs": "nixpkgs",
+        "val3dity-src": "val3dity-src"
+      }
+    },
+    "val3dity-src": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1757079329,
+        "narHash": "sha256-ovrJ6Oqx3F8QECoWJJLHRaOczfqZ0jNV0TP99BL93M4=",
+        "owner": "ylannl",
+        "repo": "val3dity",
+        "rev": "54f7350a31ce30e0801b64bef5e9efdda1f358c3",
+        "type": "github"
+      },
+      "original": {
+        "owner": "ylannl",
+        "repo": "val3dity",
+        "type": "github"
       }
     }
   },
diff --git a/flake.nix b/flake.nix
index de600033..bcd7ab3d 100644
--- a/flake.nix
+++ b/flake.nix
@@ -1,74 +1,232 @@
 {
   description = "Development environment for Roofer";
 
-  inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+  inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
+  # inputs.nixpkgs.url = "github:ylannl/nixpkgs/gdalMinimal";
+  inputs.val3dity-src.url = "github:ylannl/val3dity";
+  inputs.val3dity-src.flake = false;
 
-  outputs = { nixpkgs, ... }:
+  outputs = { self, nixpkgs, val3dity-src, ... }:
     let
-      supportedSystems = [ "aarch64-darwin" "x86_64-linux" ];
+      supportedSystems = [ "x86_64-darwin" "aarch64-darwin" "x86_64-linux" "aarch64-linux" ];
       forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
     in {
+      packages = forAllSystems (system:
+        let
+          pkgs = import nixpkgs { system = system; config.allowUnfree = true; };
+          apple_sdk = pkgs.apple-sdk_15;
+          py = pkgs.python313;
+          shortRev = self.shortRev or self.dirtyShortRev or "unknown";
+
+          val3dity = pkgs.stdenv.mkDerivation {
+            pname = "val3dity";
+            version = "2.5.3";
+            src = val3dity-src;
+
+            nativeBuildInputs = with pkgs; [ cmake ninja ];
+            buildInputs = with pkgs; [
+              cgal gmp mpfr eigen
+              geos
+              spdlog
+              pugixml
+              tclap
+              boost
+              nlohmann_json
+            ];
+
+            cmakeFlags = [ "-DVAL3DITY_LIBRARY=ON" "-DVAL3DITY_USE_INTERNAL_DEPS=OFF" "-G Ninja" ];
+          };
+
+          rerun-sdk = pkgs.stdenv.mkDerivation {
+            pname = "rerun-sdk";
+            version = "0.27.3";
+            src = pkgs.fetchurl {
+              url = "https://github.com/rerun-io/rerun/releases/download/0.27.3/rerun_cpp_sdk.zip";
+              sha256 = "1aysn1jsl58vxaakv8j9awnnxll06ay4pwczfs8gi63w3w4yj920";
+            };
+            nativeBuildInputs = [ pkgs.unzip pkgs.cmake ];
+            buildInputs = [ pkgs.arrow-cpp ];
+            cmakeFlags = [
+              "-DRERUN_DOWNLOAD_AND_BUILD_ARROW=OFF"
+            ];
+          };
+
+          rooferDerivation = { withBindings ? false, withApps ? true, withRerun ? false }:
+            pkgs.stdenv.mkDerivation ({
+              pname = "roofer" + pkgs.lib.optionalString withBindings "py";
+              version = "1.0.0-beta.5";
+
+              src = ./.;
+
+              nativeBuildInputs = with pkgs; [
+                cmake
+                ninja
+              ] ++ lib.optionals stdenv.isDarwin [ darwin.DarwinTools apple_sdk ]
+                ++ lib.optionals withBindings [ python313Packages.pybind11 ];
+
+              buildInputs = with pkgs; [
+                # core roofer deps
+                cgal gmp mpfr boost eigen
+                fmt
+              ] ++ lib.optionals stdenv.isDarwin [ apple_sdk ]
+                ++ lib.optionals withBindings [
+                  py
+                ]
+                ++ lib.optionals withApps [
+                  val3dity
+                  spdlog
+                  mimalloc
+                  nlohmann_json
+                  LAStools
+                  geos
+                  gdal
+                  proj
+                  sqlite
+                ]
+                ++ lib.optionals withRerun [
+                  rerun
+                  rerun-sdk
+                ];
+
+              cmakeFlags = [
+                "-DCMAKE_BUILD_TYPE=Release"
+                "-DRF_BUILD_APPS=${if withApps then "ON" else "OFF"}"
+                "-DRF_BUILD_BINDINGS=${if withBindings then "ON" else "OFF"}"
+                "-DRF_USE_VAL3DITY=${if withApps then "ON" else "OFF"}"
+                "-DRF_BUILD_TESTING=OFF"
+                "-DRF_GIT_HASH=${shortRev}"
+                "-DRF_USE_CPM=OFF"
+                "-DRF_USE_LOGGER_SPDLOG=${if withBindings then "OFF" else "ON"}"
+                # there is no nix package for rerun_cpp atm
+                "-DRF_USE_RERUN=${if withRerun then "ON" else "OFF"}"
+                "-G Ninja"
+                "-DRF_BUILD_DOC_HELPER=ON"
+              ];
+
+              preConfigure = pkgs.lib.optionalString withBindings ''
+                export pybind11_DIR="$(${py}/bin/python -c "import pybind11; print(pybind11.get_cmake_dir())")"
+              '';
+
+              meta = with pkgs.lib; {
+                description = "3D building reconstruction from point clouds";
+                homepage = "https://github.com/3DBAG/roofer";
+                license = licenses.lgpl3;
+                platforms = platforms.unix;
+                mainProgram = "roofer";
+              };
+            });
+        in {
+          default = rooferDerivation { withApps = true; withBindings = false; };
+          rooferpy = rooferDerivation { withApps = false; withBindings = true; };
+          roofer-rerun = rooferDerivation { withApps = true; withRerun = true; };
+        });
+
+      dockerImage = forAllSystems (system:
+        let
+          pkgs = import nixpkgs { system = system; config.allowUnfree = true; };
+        in {
+          roofer = pkgs.dockerTools.buildImage {
+            name = "roofer";
+            tag = self.packages.${system}.default.version;
+            copyToRoot = pkgs.buildEnv {
+                name = "image-root";
+                paths = [ self.packages.${system}.default ];
+                pathsToLink = [ "/bin" ];
+              };
+            config = {
+              Entrypoint = [ "roofer" ];
+            };
+          };
+        });
+
       devShells = forAllSystems (system:
         let
-          pkgs = nixpkgs.legacyPackages.${system};
+          pkgs = import nixpkgs { system = system; config.allowUnfree = true; };
           apple_sdk = pkgs.apple-sdk_15;
           py = pkgs.python313;
         in {
+          conan = pkgs.mkShell {
+            buildInputs = with pkgs; [
+              cmakeCurses
+              ninja
+              conan
+            ] ++ lib.optionals stdenv.isDarwin [ darwin.DarwinTools apple_sdk ]
+              ++ lib.optionals stdenv.isLinux [ patchelf ];
+
+            shellHook = ''
+              ${pkgs.lib.optionalString pkgs.stdenv.isLinux ''
+                export CMAKE_LIBRARY_PATH="${pkgs.glibc.out}/lib''${CMAKE_LIBRARY_PATH:+:$CMAKE_LIBRARY_PATH}"
+              ''}
+              echo "Conan dev shell ready. Run 'conan profile detect' if you haven't set up a profile yet."
+              echo ""
+              echo "Conan build steps (replace Release with Debug for debug build):"
+              echo "  conan install . --build=missing --output-folder=build-conan -s build_type=Release"
+              echo "  cd build-conan"
+              echo "  cmake .. -DCMAKE_TOOLCHAIN_FILE=conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release"
+              echo "  cmake --build ."
+              echo ""
+              ${pkgs.lib.optionalString pkgs.stdenv.isLinux ''
+                roofer-patch() {
+                  patchelf --set-interpreter /lib64/ld-linux-x86-64.so.2 "$1"
+                  echo "Patched interpreter on $1"
+                }
+                echo "Tip: run 'roofer-patch <binary>' to fix the ELF interpreter for deployment on regular linux systems"
+              ''}
+            '';
+          };
+
           default = pkgs.mkShell.override {
             # Use stdenvNoCC to avoid compiler contamination
             # stdenv = if pkgs.stdenv.isDarwin then pkgs.stdenvNoCC else pkgs.stdenv;
           } {
             buildInputs = with pkgs; [
               cmakeCurses
-              vcpkg
               ninja
 
-              # to make vcpkg work
-              autoconf
-              automake
-              autoconf-archive
-              pkg-config-unwrapped
-              bash
-              cacert
-              coreutils
-              curl
-              gnumake
-              gzip
-              openssh
-              perl
-              pkg-config
-              libtool
-              zip
-              zstd
-              bison # thrift/arrow/rerun
+              # roofer core deps
+              cgal
+              gmp
+              mpfr
+              pkgsStatic.boost # need static for val3dity
+              eigen
+              fmt
+
+              # val3dity
+              pugixml
+              tclap
+
+              # apps
+              spdlog
+              mimalloc
+              gdal
+              proj
+              sqlite
+              nlohmann_json
+              LAStools
+              geos
 
               # python tools
               py
               uv
-              geos # for shapely
+
+              # linting
+              llvmPackages_18.clang
+              llvmPackages_18.clang-tools
 
               # docs
               doxygen
-            ] ++ lib.optionals stdenv.isDarwin [ darwin.DarwinTools apple_sdk ]
-              ++ lib.optionals (builtins.getEnv "GITHUB_ACTIONS" == "true") [mono]; # this is needed to make gh actions binary caching work with vcpkg
+            ] ++ lib.optionals stdenv.isDarwin [ darwin.DarwinTools apple_sdk ];
 
-            hardeningDisable = [ "fortify" ];
-            VCPKG_ROOT = "${pkgs.vcpkg}/share/vcpkg";
             UV_NO_BINARY = 1;
             # VCPKG_FORCE_SYSTEM_BINARIES = 1;
             scm_version = "unknown";
 
             shellHook = ''
-              ${pkgs.lib.optionalString pkgs.stdenv.isDarwin ''
-                # distribution script for macOS
-                chmod +x distribution/macOS/bundle_libcxx.sh
-                export PATH="$(pwd)/distribution/macOS:$PATH"
-              ''}
               echo "Updating and activating python environment..."
               uv sync
               source .venv/bin/activate
               export pybind11_DIR="$(python -m pybind11 --cmakedir)"
-              echo "Roofer dev shell with vcpkg is ready"
+              echo "Roofer dev shell with Nix is ready"
             '';
           };
         });
diff --git a/include/roofer/common/common.hpp b/include/roofer/common/common.hpp
index 117e3de2..66771566 100644
--- a/include/roofer/common/common.hpp
+++ b/include/roofer/common/common.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include <array>
+#include <functional>
 #include <optional>
 #include <unordered_map>
 #include <vector>
diff --git a/rooferpy/CMakeLists.txt b/rooferpy/CMakeLists.txt
index 8e7e91e2..f5b74901 100644
--- a/rooferpy/CMakeLists.txt
+++ b/rooferpy/CMakeLists.txt
@@ -13,4 +13,9 @@ target_link_libraries(
   PRIVATE ${RECONSTRUCT_LINK_LIBS}
   pybind11::module)
 
-  set_target_properties(rooferpy PROPERTIES OUTPUT_NAME "roofer")
+set_target_properties(rooferpy PROPERTIES OUTPUT_NAME "roofer")
+
+find_package(Python REQUIRED COMPONENTS Interpreter Development)
+install(TARGETS rooferpy
+        DESTINATION "lib/python${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}/site-packages"
+)
diff --git a/rooferpy/README.md b/rooferpy/README.md
index ee6158cd..e337d192 100644
--- a/rooferpy/README.md
+++ b/rooferpy/README.md
@@ -1,11 +1,48 @@
 # Python bindings for roofer C++ API
-We use pybind11 for python bindings. To use the bindings, make sure to [install pybind11](https://pybind11.readthedocs.io/en/latest/installing.html#include-with-pypi) and compile the source using the `vcpkg-with-bindings` preset, i.e.
+We use pybind11 for python bindings. To use the bindings, build with either Conan or Nix.
+
+With Conan:
+
+```
+cd roofer-dev
+conan profile detect --force
+conan install . \
+  --output-folder=build_python \
+  --build=missing \
+  --settings=build_type=Release \
+  --settings=compiler.cppstd=20 \
+  --options="&:build_apps=False" \
+  --options="&:use_spdlog=False" \
+  --options="&:use_val3dity=False" \
+  --options="&:build_bindings=True" \
+  --options="&:build_testing=False"
+cmake -S . -B build_python \
+  -G Ninja \
+  -DCMAKE_TOOLCHAIN_FILE=build_python/conan_toolchain.cmake \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DRF_BUILD_APPS=OFF \
+  -DRF_USE_LOGGER_SPDLOG=OFF \
+  -DRF_USE_VAL3DITY=OFF \
+  -DRF_BUILD_BINDINGS=ON \
+  -DRF_BUILD_TESTING=OFF \
+  -DRF_USE_CPM=OFF
+cmake --build build_python --target rooferpy
+```
+
+With Nix:
 
 ```
 cd roofer-dev
-mkdir build_python
-cmake --preset vcpkg-with-bindings -S . -B build_python
-cmake --build build_python
+nix develop
+cmake -S . -B build_python \
+  -G Ninja \
+  -DRF_BUILD_APPS=OFF \
+  -DRF_USE_LOGGER_SPDLOG=OFF \
+  -DRF_USE_VAL3DITY=OFF \
+  -DRF_BUILD_BINDINGS=ON \
+  -DRF_BUILD_TESTING=OFF \
+  -DRF_USE_CPM=OFF
+cmake --build build_python --target rooferpy
 ```
 
-The rooferpy library will be located in `build_python/rooferpy/roofer.cpyton-<version-and-system>.so`. Import the .so file (e.g. place it in the same folder as .py script) to use roofer python API.
+The rooferpy library will be located in `build_python/rooferpy/roofer.cpython-<version-and-system>.so`. Import the .so file (e.g. place it in the same folder as .py script) to use roofer python API.
diff --git a/src/core/logger/CMakeLists.txt b/src/core/logger/CMakeLists.txt
index df514201..7aadc598 100644
--- a/src/core/logger/CMakeLists.txt
+++ b/src/core/logger/CMakeLists.txt
@@ -6,8 +6,8 @@ add_library("logger" OBJECT ${LIBRARY_SOURCES} ${LIBRARY_HEADERS})
 target_include_directories("logger" PUBLIC ${LIBRARY_INCLUDES})
 
 if(${RF_USE_LOGGER_SPDLOG})
-  target_link_libraries("logger" PRIVATE fmt::fmt spdlog::spdlog)
+  target_link_libraries("logger" PUBLIC fmt::fmt PRIVATE spdlog::spdlog)
   target_compile_definitions("logger" PRIVATE RF_USE_LOGGER_SPDLOG)
 else ()
-  target_link_libraries("logger" PRIVATE fmt::fmt)
+  target_link_libraries("logger" PUBLIC fmt::fmt)
 endif()
diff --git a/src/extra/io/CMakeLists.txt b/src/extra/io/CMakeLists.txt
index 98a71292..05d48e67 100644
--- a/src/extra/io/CMakeLists.txt
+++ b/src/extra/io/CMakeLists.txt
@@ -21,9 +21,30 @@ set(LIBRARY_INCLUDES
 
 find_package(laslib CONFIG REQUIRED)
 find_package(nlohmann_json CONFIG REQUIRED)
+find_package(PROJ CONFIG REQUIRED)
 # GDAL
 find_package(GDAL CONFIG REQUIRED)
 
+if(TARGET laslib::laslib)
+  set(LASLIB_TARGET laslib::laslib)
+elseif(TARGET LASlib)
+  set(LASLIB_TARGET LASlib)
+else()
+  message(FATAL_ERROR "Could not find a usable LASlib target")
+endif()
+
 add_library("io" OBJECT ${LIBRARY_SOURCES} ${LIBRARY_HEADERS})
 target_include_directories("io" PUBLIC ${LIBRARY_INCLUDES})
-target_link_libraries("io" PUBLIC GDAL::GDAL LASlib nlohmann_json::nlohmann_json)
+target_link_libraries("io" PUBLIC GDAL::GDAL ${LASLIB_TARGET} nlohmann_json::nlohmann_json fmt::fmt)
+
+if(DEFINED gdal_PACKAGE_FOLDER_RELEASE AND EXISTS "${gdal_PACKAGE_FOLDER_RELEASE}/res/gdal")
+  install(
+    DIRECTORY "${gdal_PACKAGE_FOLDER_RELEASE}/res/gdal"
+    DESTINATION "share")
+endif()
+
+if(DEFINED proj_PACKAGE_FOLDER_RELEASE AND EXISTS "${proj_PACKAGE_FOLDER_RELEASE}/res")
+  install(
+    DIRECTORY "${proj_PACKAGE_FOLDER_RELEASE}/res/"
+    DESTINATION "share/proj")
+endif()
diff --git a/src/extra/io/PointCloudReaderLASlib.cpp b/src/extra/io/PointCloudReaderLASlib.cpp
index f7d389be..eaad9e11 100644
--- a/src/extra/io/PointCloudReaderLASlib.cpp
+++ b/src/extra/io/PointCloudReaderLASlib.cpp
@@ -23,9 +23,23 @@
 
 #include <array>
 #include <iomanip>
+#include <roofer/io/PointCloudReader.hpp>
+
+#if __has_include(<LASlib/lasreader.hpp>)
+#include <LASlib/lasreader.hpp>
+#elif __has_include(<lasreader.hpp>)
 #include <lasreader.hpp>
+#else
+#error "LASlib header lasreader.hpp not found"
+#endif
+
+#if __has_include(<LASlib/laswriter.hpp>)
+#include <LASlib/laswriter.hpp>
+#elif __has_include(<laswriter.hpp>)
 #include <laswriter.hpp>
-#include <roofer/io/PointCloudReader.hpp>
+#else
+#error "LASlib header laswriter.hpp not found"
+#endif
 
 namespace roofer::io {
 
diff --git a/src/extra/io/PointCloudWriterLASlib.cpp b/src/extra/io/PointCloudWriterLASlib.cpp
index 62c49f91..ff16c22e 100644
--- a/src/extra/io/PointCloudWriterLASlib.cpp
+++ b/src/extra/io/PointCloudWriterLASlib.cpp
@@ -23,9 +23,16 @@
 #include <roofer/logger/logger.h>
 
 #include <filesystem>
-#include <laswriter.hpp>
 #include <roofer/io/PointCloudWriter.hpp>
 
+#if __has_include(<LASlib/laswriter.hpp>)
+#include <LASlib/laswriter.hpp>
+#elif __has_include(<laswriter.hpp>)
+#include <laswriter.hpp>
+#else
+#error "LASlib header laswriter.hpp not found"
+#endif
+
 namespace roofer::io {
 
   namespace fs = std::filesystem;
diff --git a/src/extra/io/StreamCropper.cpp b/src/extra/io/StreamCropper.cpp
index bf059b5a..bfe978a2 100644
--- a/src/extra/io/StreamCropper.cpp
+++ b/src/extra/io/StreamCropper.cpp
@@ -25,11 +25,19 @@
 #include <ctime>
 #include <filesystem>
 #include <iostream>
-#include <lasreader.hpp>
+
 #include <roofer/common/Raster.hpp>
 #include <roofer/common/GridPIPTester.hpp>
 #include <roofer/io/StreamCropper.hpp>
 
+#if __has_include(<LASlib/laswriter.hpp>)
+#include <LASlib/lasreader.hpp>
+#elif __has_include(<lasreader.hpp>)
+#include <lasreader.hpp>
+#else
+#error "LASlib header lasreader.hpp not found"
+#endif
+
 namespace roofer::io {
 
   namespace fs = std::filesystem;
diff --git a/src/extra/misc/CMakeLists.txt b/src/extra/misc/CMakeLists.txt
index 7b679fed..ee94999a 100644
--- a/src/extra/misc/CMakeLists.txt
+++ b/src/extra/misc/CMakeLists.txt
@@ -7,7 +7,7 @@ set(LIBRARY_SOURCES
     "Vector2DOpsGEOS.cpp"
     "MeshPropertyCalculator.cpp")
 if(RF_USE_VAL3DITY)
-    list(APPEND LIBRARY_SOURCES "Val3dator.cpp")
+  list(APPEND LIBRARY_SOURCES "Val3dator.cpp")
 endif()
 set(LIBRARY_HEADERS
     "${ROOFER_INCLUDE_DIR}/roofer/misc/NodataCircleComputer.hpp"
@@ -18,12 +18,17 @@ set(LIBRARY_HEADERS
     "${ROOFER_INCLUDE_DIR}/roofer/misc/Vector2DOps.hpp"
     "${ROOFER_INCLUDE_DIR}/roofer/misc/MeshPropertyCalculator.hpp")
 if(RF_USE_VAL3DITY)
-    list(APPEND LIBRARY_HEADERS "${ROOFER_INCLUDE_DIR}/roofer/misc/Val3dator.hpp")
+  list(APPEND LIBRARY_HEADERS "${ROOFER_INCLUDE_DIR}/roofer/misc/Val3dator.hpp")
 endif()
 set(LIBRARY_INCLUDES
     "${ROOFER_INCLUDE_DIR}")
 
 find_package(GEOS CONFIG REQUIRED)
+# Workaround: Conan's geos recipe sets an empty generator expression for
+# INTERFACE_INCLUDE_DIRECTORIES on geos_c. Append the real path via find_path,
+# which searches CMAKE_INCLUDE_PATH that Conan correctly populates.
+find_path(GEOS_C_INCLUDE_DIR geos_c.h REQUIRED)
+set_property(TARGET GEOS::geos_c APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${GEOS_C_INCLUDE_DIR}")
 
 add_library("misc" OBJECT ${LIBRARY_SOURCES} ${LIBRARY_HEADERS})
 target_include_directories("misc" PUBLIC ${LIBRARY_INCLUDES})
@@ -31,15 +36,20 @@ target_include_directories("misc" PUBLIC ${LIBRARY_INCLUDES})
 set(MISC_LINK_LIBS
     CGAL::CGAL
     GEOS::geos_c
+    fmt::fmt
 )
 if(RF_USE_VAL3DITY)
+  if(RF_USE_CPM)
     cpmaddpackage(
-        NAME val3dity
-        GITHUB_REPOSITORY "ylannl/val3dity"
-        GIT_TAG "master"
-        OPTIONS "VAL3DITY_LIBRARY ON"
-                "VAL3DITY_USE_INTERNAL_DEPS OFF")
-    list(APPEND MISC_LINK_LIBS val3dity)
+            NAME val3dity
+            GITHUB_REPOSITORY "ylannl/val3dity"
+            GIT_TAG "master"
+            OPTIONS "VAL3DITY_LIBRARY ON"
+                    "VAL3DITY_USE_INTERNAL_DEPS OFF")
+  else()
+    find_package(val3dity REQUIRED)
+  endif()
+  list(APPEND MISC_LINK_LIBS val3dity)
 endif()
 
 target_link_libraries("misc" PUBLIC ${MISC_LINK_LIBS})
diff --git a/src/extra/misc/Val3dator.cpp b/src/extra/misc/Val3dator.cpp
index bfc640e2..a67cbd5e 100644
--- a/src/extra/misc/Val3dator.cpp
+++ b/src/extra/misc/Val3dator.cpp
@@ -22,7 +22,7 @@
 // val3dity
 #include <roofer/misc/Val3dator.hpp>
 
-#include "val3dity.h"
+#include <val3dity/val3dity.h>
 
 namespace roofer::misc {
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index e3fe05f1..902d2425 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -6,11 +6,9 @@ include(FetchContent)
 set(CONFIG_DIR "${CMAKE_CURRENT_SOURCE_DIR}/config")
 set(DATA_URL_ROOT "https://data.3dbag.nl/testdata/roofer")
 set(DATA_DIR "${CMAKE_CURRENT_SOURCE_DIR}/data")
-if(DEFINED VCPKG_TOOLCHAIN)
-  set(TEST_ENVIRONMENT
-      "PROJ_DATA=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/share/proj;GDAL_DATA=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/share/gdal"
-  )
-endif()
+set(TEST_ENVIRONMENT
+    "PROJ_DATA=${CMAKE_INSTALL_PREFIX}/share/proj;GDAL_DATA=${CMAKE_INSTALL_PREFIX}/share/gdal"
+)
 
 FetchContent_Declare(
   wippolder
@@ -59,8 +57,7 @@ include_directories("${PROJECT_SOURCE_DIR}/apps/external")
 add_executable("reconstruct_api"
                "${CMAKE_CURRENT_SOURCE_DIR}/test_reconstruct_api.cpp")
 set_target_properties("reconstruct_api" PROPERTIES CXX_STANDARD 20)
-target_link_libraries("reconstruct_api" PRIVATE roofer-extra
-                                                cmake_git_version_tracking)
+target_link_libraries("reconstruct_api" PRIVATE roofer-extra)
 add_test(
   NAME "reconstruct-api-wippolder"
   COMMAND $<TARGET_FILE:reconstruct_api>
@@ -103,9 +100,7 @@ if(RF_BUILD_APPS)
 
   # Integration tests that are run on the installed artifacts must be prefixed
   # with "installed-". Note that these tests must be invoked *after* the
-  # artifacts were installed. We don't set the TEST_ENVIRONMENT for the
-  # installed apps, because the required paths are supposed to be set by the
-  # install process.
+  # artifacts were installed.
   if(RF_TEST_INSTALL)
     add_test(
       NAME "installed-roofer-version"
@@ -117,27 +112,11 @@ if(RF_BUILD_APPS)
       COMMAND roofer --config "${CONFIG_DIR}/roofer-wippolder.toml"
       WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
 
-    add_test(
-      NAME "installed-reconstruct-version"
-      COMMAND reconstruct --version
-      WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
-
-    add_test(
-      NAME "installed-reconstruct-wippolder"
-      COMMAND reconstruct --config "${CONFIG_DIR}/roofer-wippolder.toml"
-      WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}")
-
     set(tests_installed
-        "installed-roofer-version;installed-reconstruct-version;installed-roofer-wippolder;installed-reconstruct-wippolder"
+        "installed-roofer-version;installed-roofer-wippolder"
     )
-    set(WINDOWS_INSTALLED_ENVIRONMENT
-        "GDAL_DATA=${CMAKE_INSTALL_PREFIX}\\share\\gdal;PATH=C:\\Program Files (x86)\\roofer\\bin\;$ENV{PATH}"
-    )
-    if(WIN32)
-      set_tests_properties(
-        ${tests_installed} PROPERTIES ENVIRONMENT
-                                      "${WINDOWS_INSTALLED_ENVIRONMENT}")
-    endif()
+    set_tests_properties(
+      ${tests_installed} PROPERTIES ENVIRONMENT "${TEST_ENVIRONMENT}")
   endif()
 endif()
 
diff --git a/tests/test_reconstruct_api.cpp b/tests/test_reconstruct_api.cpp
index b667f606..db2ac7e2 100644
--- a/tests/test_reconstruct_api.cpp
+++ b/tests/test_reconstruct_api.cpp
@@ -29,7 +29,6 @@
 
 #include "argh.h"
 #include "fmt/format.h"
-#include "git.h"
 
 typedef CGAL::Exact_predicates_inexact_constructions_kernel K;
 typedef CGAL::Point_3<K> Point_3;
@@ -44,13 +43,7 @@ void print_help(std::string program_name) {
   std::cout << "   -v, --verbose                Be more verbose" << "\n";
 }
 // ... get the input pointcloud and footprint polygon for your building
-void print_version() {
-  std::cout << fmt::format(
-      "roofer {} ({}{}{})\n", git_Describe(),
-      std::strcmp(git_Branch(), "main") ? ""
-                                        : fmt::format("{}, ", git_Branch()),
-      git_AnyUncommittedChanges() ? "dirty, " : "", git_CommitDate());
-}
+void print_version() { std::cout << "roofer test_reconstruct_api\n"; }
 
 int main(int argc, const char* argv[]) {
   auto cmdl = argh::parser();
diff --git a/uv.lock b/uv.lock
index 3d317f9f..f1068c26 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.13"
 
 [[package]]
diff --git a/vcpkg.json b/vcpkg.json
deleted file mode 100644
index c460e7bb..00000000
--- a/vcpkg.json
+++ /dev/null
@@ -1,75 +0,0 @@
-{
-  "name": "roofer",
-  "version-string": "1.0.0",
-  "features": {
-    "spdlog": {
-      "description": "Use spdlog as logging backend.",
-      "dependencies": [
-        "spdlog"
-      ]
-    },
-    "test": {
-      "description": "Build tests.",
-      "dependencies": [
-        "catch2"
-      ]
-    },
-    "val3dity": {
-      "description": "Use val3dity for 3D validation.",
-      "dependencies": [
-        "pugixml",
-        "spdlog",
-        "tclap"
-      ]
-    },
-    "apps": {
-      "description": "Build apps.",
-      "dependencies": [
-        "geos",
-        "lastools",
-        {
-          "name": "mimalloc",
-          "features": []
-        },
-        {
-          "name": "gdal",
-          "default-features": false,
-          "features": [
-            "sqlite3",
-            "postgresql",
-            "geos"
-          ]
-        },
-        {
-          "name": "bshoshany-thread-pool",
-          "version>=": "4.1.0"
-        },
-        {
-          "name": "nlohmann-json",
-          "version>=": "3.11.3"
-        }
-      ]
-    },
-    "app-rerun": {
-      "description": "Build with rerun support.",
-      "dependencies": [
-        "arrow"
-      ]
-    }
-  },
-  "dependencies": [
-    {
-      "name": "cgal",
-      "version>=": "6.0"
-    },
-    "eigen3",
-    "fmt"
-  ],
-  "overrides": [
-    {
-      "name": "nlohmann-json",
-      "version": "3.11.3"
-    }
-  ],
-  "builtin-baseline": "dbe35ceb30c688bf72e952ab23778e009a578f18"
-}