diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..ed5e6cc4 --- /dev/null +++ b/.clang-format @@ -0,0 +1,3 @@ +BasedOnStyle: LLVM +IndentWidth: 4 +ColumnLimit: 120 diff --git a/.dockerignore b/.dockerignore index a0e0fba6..7707bfc4 100644 --- a/.dockerignore +++ b/.dockerignore @@ -29,4 +29,4 @@ LICENSE # Docker files (not needed in the build context) Dockerfile docker-compose.yml -.dockerignore \ No newline at end of file +.dockerignore diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml new file mode 100644 index 00000000..bd9675c9 --- /dev/null +++ b/.github/workflows/build-test.yml @@ -0,0 +1,93 @@ +name: Build and Test + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build-and-test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install dependencies (Linux) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake libz-dev + + - name: Install dependencies (macOS) + if: runner.os == 'macOS' + run: | + brew update + brew install cmake zlib bash + echo "$(brew --prefix)/bin" >> $GITHUB_PATH + + - name: Configure + run: cmake -S . -B build + shell: bash + - name: Build + run: cmake --build build --parallel + shell: bash + + - name: Run tests + run: | + cd build + ctest --output-on-failure + shell: bash + + python-wheels: + needs: build-and-test + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install dependencies (Linux) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake libz-dev python3-pip + + - name: Install dependencies (macOS) + if: runner.os == 'macOS' + run: | + brew update + brew install cmake zlib python@3 bash + echo "$(brew --prefix)/bin" >> $GITHUB_PATH + + - name: Build project + run: | + cmake -S . -B build -DPYTHON_BINDINGS=ON + cmake --build build --parallel + cmake --install build --prefix $PWD/install + shell: bash + + - name: Build wheel + run: | + python3 -m pip install --upgrade pip wheel + pip wheel ./python -w dist + shell: bash + + - name: Test Python wheel + run: | + pip install dist/*.whl + echo "$PWD/install/bin" >> $GITHUB_PATH + cat <<' EOF' | sed 's/^ //' | python3 - + import vcfx + print('version:', vcfx.get_version()) + tools = vcfx.available_tools() + print('tools:', len(tools)) + if tools: + vcfx.run_tool(tools[0], '--help', check=False) + EOF + shell: bash diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index d217e59d..3e740efd 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -57,4 +57,4 @@ jobs: labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 cache-from: type=gha - cache-to: type=gha,mode=max \ No newline at end of file + cache-to: type=gha,mode=max diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index acb29d11..609db83a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -29,23 +29,23 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 0 - + - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.x' - + - name: Install dependencies run: | python -m pip install --upgrade pip pip install mkdocs-material pymdown-extensions - + - name: Deploy to GitHub Pages run: | git config --global user.name "${GITHUB_ACTOR}" git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" mkdocs gh-deploy --force - + # Only for pull requests - just build to validate build: runs-on: ubuntu-latest @@ -53,16 +53,16 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v3 - + - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.x' - + - name: Install dependencies run: | python -m pip install --upgrade pip pip install mkdocs-material pymdown-extensions - + - name: Build documentation - run: mkdocs build \ No newline at end of file + run: mkdocs build diff --git a/.gitignore b/.gitignore index 37beb001..40b05829 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,22 @@ Thumbs.db # Other tools.md prompt.md -names.md \ No newline at end of file +names.md + +# Temporary outputs from genotype_query tests +tests/tmp/genotype_query/ +tests/data/genotype_query/missing_malformed.vcf +tests/data/genotype_query/multi_sample.vcf +tests/data/genotype_query/single_sample.vcf +tests/expected/genotype_query/missing_malformed_01.vcf +tests/expected/genotype_query/multi_11_flexible.vcf +tests/expected/genotype_query/multi_11_strict.vcf +tests/expected/genotype_query/multi_12_flexible.vcf +tests/expected/genotype_query/no_match.vcf +tests/expected/genotype_query/single_sample_flex_01.vcf +tests/expected/genotype_query/single_sample_strict_01.vcf + +# General temporary test output directories +tests/tmp/ +tests/out/ +tmp/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 662b06cc..cf4d4459 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,8 @@ set(VCFX_VERSION_MINOR 0) set(VCFX_VERSION_PATCH 2) set(VCFX_VERSION "${VCFX_VERSION_MAJOR}.${VCFX_VERSION_MINOR}.${VCFX_VERSION_PATCH}") +add_compile_definitions(VCFX_VERSION="${VCFX_VERSION}") + project(VCFX VERSION ${VCFX_VERSION} DESCRIPTION "A Comprehensive VCF Manipulation Toolkit" @@ -14,9 +16,21 @@ project(VCFX # Optionally allow building for WebAssembly via Emscripten option(BUILD_WASM "Build with emscripten toolchain" OFF) +option(PYTHON_BINDINGS "Build Python bindings" ON) if(BUILD_WASM) - set(CMAKE_TOOLCHAIN_FILE "/path/to/emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + if(NOT CMAKE_TOOLCHAIN_FILE) + if(DEFINED ENV{EMSDK} AND EXISTS "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake") + set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + elseif(DEFINED ENV{EMSCRIPTEN} AND EXISTS "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake") + set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + endif() + endif() + + if(NOT EXISTS "${CMAKE_TOOLCHAIN_FILE}") + message(FATAL_ERROR "Emscripten toolchain file not found. Please set CMAKE_TOOLCHAIN_FILE or EMSDK.") + endif() + message(STATUS "Building for WebAssembly (Emscripten).") endif() @@ -42,9 +56,12 @@ enable_testing() # Add top-level 'src' subdirectory, which in turn references each tool subdirectory add_subdirectory(src) -# Add a tests subdir if you have tests -# Comment out this line since we don't have a CMakeLists.txt file in the tests directory -# add_subdirectory(tests) +if(PYTHON_BINDINGS) + add_subdirectory(python) +endif() + +# Add the test suite +add_subdirectory(tests) # Installation configuration include(GNUInstallDirs) diff --git a/DOCKER.md b/DOCKER.md index c206c53c..2b0f34c8 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -8,16 +8,16 @@ VCFX is available as a pre-built Docker image on GitHub Container Registry: ```bash # Pull the image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Mount a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Example: Process a VCF file (using tests/data/valid.vcf as an example) -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` Using the pre-built image is recommended for most users as it: @@ -65,19 +65,19 @@ There are several ways to run VCFX tools with Docker: ```bash # With the pre-built image -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # With a locally built image docker run --rm vcfx:local VCFX_tool_name [options] # Mount the tests/data directory to access test files -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files in the tests/data directory -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' # Example: Calculate allele frequencies for a VCF file -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` ### Using Docker Compose @@ -98,7 +98,7 @@ docker-compose run --rm vcfx 'cat /data/valid.vcf | VCFX_allele_freq_calc > /dat When using Docker directly, you need to mount a directory to access your files: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` When using Docker Compose, the `tests/data` directory is mounted by default: @@ -115,7 +115,7 @@ You can modify the docker-compose.yml file to mount a different directory if nee You can create complex pipelines by chaining VCFX tools: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' ``` ### Creating Shell Scripts @@ -126,7 +126,7 @@ For complex workflows, consider creating a shell script: #!/bin/bash # save as vcfx_workflow.sh -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | \ +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_allele_freq_calc > /data/pipeline_output.tsv' @@ -147,7 +147,7 @@ If you encounter permission issues with files created in the container: ```bash # Run the container with your user ID -docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` ### Container Not Finding Commands @@ -156,5 +156,5 @@ If the container can't find VCFX commands, ensure they were properly built in th ```bash # List available VCFX tools in the container -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' ``` \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index a68f4b73..9e242e1a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,8 @@ RUN apt-get update && apt-get install -y \ cmake \ git \ libz-dev \ + python3 \ + python3-dev \ && rm -rf /var/lib/apt/lists/* # Create a working directory @@ -46,14 +48,15 @@ COPY --from=builder /app/build/src /usr/local/bin/ # Create a directory for data WORKDIR /data -# Add the script that adds tools to PATH +# Add the helper scripts COPY add_vcfx_tools_to_path.sh /usr/local/bin/ +COPY docker_entrypoint.sh /usr/local/bin/ -# Make the script executable -RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh +# Make them executable +RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh /usr/local/bin/docker_entrypoint.sh -# Set the entry point -ENTRYPOINT ["/bin/bash", "-c"] +# Use a custom entrypoint that sets up PATH for the tools +ENTRYPOINT ["/usr/local/bin/docker_entrypoint.sh"] # Default command shows available tools -CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] \ No newline at end of file +CMD ["bash", "-c", "echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] diff --git a/README.md b/README.md index 10301dd0..891d0695 100644 --- a/README.md +++ b/README.md @@ -75,21 +75,40 @@ cat input.vcf | \ VCFX_allele_freq_calc > snp_frequencies.tsv ``` +### Listing Available Tools + +```bash +vcfx list +``` + +### Show Tool Documentation + +```bash +vcfx help allele_counter +``` + ## Building for WebAssembly If you have [Emscripten](https://emscripten.org/) installed: ```bash mkdir build_wasm && cd build_wasm -cmake -DBUILD_WASM=ON .. +emcmake cmake -DBUILD_WASM=ON .. cmake --build . ``` ## Running Tests +From your build directory, run: + +```bash +ctest --output-on-failure +``` + +You can also execute all shell scripts directly with: + ```bash -cd build -ctest --verbose +bash ../tests/test_all.sh ``` ## Contributing diff --git a/add_vcfx_tools_to_path.sh b/add_vcfx_tools_to_path.sh index 41787a66..43a647b0 100644 --- a/add_vcfx_tools_to_path.sh +++ b/add_vcfx_tools_to_path.sh @@ -6,37 +6,41 @@ # Usage: # source ./add_vcfx_tools_to_path.sh -# Where is the root of this script? (i.e., your VCFX repository root) -# Adjust if needed; for example if you keep this script in the top-level dir: +# Determine potential base directories that may contain VCFX tools. +# When running from the build tree this will be build/src, but inside the +# Docker image the tools reside in /usr/local/bin/VCFX_*/. REPO_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -# Our compiled tools should be under build/src +BASE_DIRS=() BUILD_SRC_DIR="${REPO_ROOT}/build/src" +if [ -d "${BUILD_SRC_DIR}" ]; then + BASE_DIRS+=("${BUILD_SRC_DIR}") +fi -# Check that this path exists: -if [ ! -d "${BUILD_SRC_DIR}" ]; then - echo "Error: build/src directory not found at: ${BUILD_SRC_DIR}" - echo "Make sure you have run 'cmake .. && make' inside ./build" - return 1 +# Also check the standard installation prefix used in the Docker image +if compgen -G "/usr/local/bin/VCFX_*" > /dev/null; then + BASE_DIRS+=("/usr/local/bin") fi -# We'll gather a list of directories under build/src/VCFX_* -# that actually contain an executable matching the pattern "VCFX_*" -# Then add those directories to PATH. +if [ ${#BASE_DIRS[@]} -eq 0 ]; then + echo "Warning: No VCFX tool directories found." + return 1 +fi +# Gather directories containing executables named VCFX_* TOOL_DIRS="" -while IFS= read -r -d '' toolExec; do - # 'toolExec' is something like: build/src/VCFX_af_subsetter/VCFX_af_subsetter - toolDir=$(dirname "$toolExec") - # Only add it once if not present - if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then - TOOL_DIRS="${TOOL_DIRS}:${toolDir}" - fi -done < <(find "${BUILD_SRC_DIR}" -type f -perm +111 -name 'VCFX_*' -print0 2>/dev/null) +for base in "${BASE_DIRS[@]}"; do + while IFS= read -r -d '' toolExec; do + toolDir=$(dirname "$toolExec") + if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then + TOOL_DIRS="${TOOL_DIRS}:${toolDir}" + fi + done < <(find "$base" -type f -perm /111 -name 'VCFX_*' -print0 2>/dev/null) +done # If empty (no tools found), bail out if [ -z "$TOOL_DIRS" ]; then - echo "Warning: No VCFX tools found in ${BUILD_SRC_DIR}. Did you run 'make'?" + echo "Warning: No VCFX tools found." else # Remove leading colon TOOL_DIRS="${TOOL_DIRS#:}" diff --git a/compile_wasm.sh b/compile_wasm.sh index 6dbfb8a8..25d8cd6a 100644 --- a/compile_wasm.sh +++ b/compile_wasm.sh @@ -4,10 +4,14 @@ set -e mkdir -p build_wasm cd build_wasm -# Turn on BUILD_WASM -cmake -DBUILD_WASM=ON .. +# Turn on BUILD_WASM using emcmake if available +if command -v emcmake >/dev/null 2>&1; then + emcmake cmake -DBUILD_WASM=ON .. +else + cmake -DBUILD_WASM=ON .. +fi cmake --build . -echo "All VCFX tools built for WebAssembly in build_wasm/." +echo "All VCFX tools and the vcfx wrapper built for WebAssembly in build_wasm/." echo "Use 'ls -R build_wasm' to see output. If you want .html or .js from Emscripten, you can adjust linking flags or suffixes." diff --git a/docker_entrypoint.sh b/docker_entrypoint.sh new file mode 100755 index 00000000..18f1316a --- /dev/null +++ b/docker_entrypoint.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Entrypoint for VCFX Docker image. +# It adds VCFX tool directories to the PATH and then executes the given command. + +# Source the helper script if available +if [ -f /usr/local/bin/add_vcfx_tools_to_path.sh ]; then + source /usr/local/bin/add_vcfx_tools_to_path.sh +fi + +exec "$@" diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 9029abf4..354ca654 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -32,6 +32,7 @@ We welcome suggestions for new features or improvements to existing functionalit 1. Fork the repository 2. Create a new branch for your feature or bug fix 3. Write your code, following our coding standards + (run `clang-format -i ` before committing) 4. Add tests for your changes 5. Ensure all tests pass 6. Update documentation as needed @@ -40,6 +41,17 @@ We welcome suggestions for new features or improvements to existing functionalit ## Development Setup +### Code Formatting + +We use `clang-format` to keep the C++ code style consistent. A basic configuration +is provided in `.clang-format` at the repository root. Please run: + +```bash +clang-format -i path/to/changed_file.cpp +``` + +before committing changes. + ### Prerequisites - CMake (version 3.10 or higher) @@ -58,11 +70,16 @@ make ### Running Tests -After building the project, run the tests to ensure everything is working correctly: +After building the project, run the test suite from the `build` directory: + +```bash +ctest --output-on-failure +``` + +You can still run all shell tests directly if needed: ```bash -cd build -ctest --verbose +bash ../tests/test_all.sh ``` ## Coding Standards diff --git a/docs/VCFX_af_subsetter.md b/docs/VCFX_af_subsetter.md index 91871cda..59b79b24 100644 --- a/docs/VCFX_af_subsetter.md +++ b/docs/VCFX_af_subsetter.md @@ -13,6 +13,7 @@ VCFX_af_subsetter --af-filter "MIN-MAX" < input.vcf > filtered.vcf |--------|-------------| | `-a`, `--af-filter ` | Required. Allele frequency range for filtering (e.g., `0.01-0.05`) | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_af_subsetter` processes VCF files line by line and filters variants based on their allele frequency (AF) values from the INFO field. The tool: diff --git a/docs/VCFX_alignment_checker.md b/docs/VCFX_alignment_checker.md index 62c5f264..f7c9dc48 100644 --- a/docs/VCFX_alignment_checker.md +++ b/docs/VCFX_alignment_checker.md @@ -13,6 +13,7 @@ VCFX_alignment_checker --alignment-discrepancy > di |--------|-------------| | `-a`, `--alignment-discrepancy` | Enable alignment discrepancy checking mode | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_alignment_checker` compares VCF variants against a reference genome to validate sequence consistency. The tool: diff --git a/docs/VCFX_allele_balance_calc.md b/docs/VCFX_allele_balance_calc.md index 567e0c83..2ce451e7 100644 --- a/docs/VCFX_allele_balance_calc.md +++ b/docs/VCFX_allele_balance_calc.md @@ -13,6 +13,7 @@ VCFX_allele_balance_calc [OPTIONS] < input.vcf > allele_balance.tsv |--------|-------------| | `-s`, `--samples "Sample1 Sample2..."` | Optional. Specify sample names to calculate allele balance for (space-separated). If omitted, all samples are processed. | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_allele_balance_calc` processes a VCF file and calculates the allele balance for each variant in each specified sample. The tool: diff --git a/docs/VCFX_allele_balance_filter.md b/docs/VCFX_allele_balance_filter.md index bf9c6227..34ca1c27 100644 --- a/docs/VCFX_allele_balance_filter.md +++ b/docs/VCFX_allele_balance_filter.md @@ -16,6 +16,7 @@ VCFX_allele_balance_filter --filter-allele-balance < input.vcf > fil |--------|-------------| | `-f`, `--filter-allele-balance` | Required. Allele balance threshold between 0.0 and 1.0 | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_allele_counter.md b/docs/VCFX_allele_counter.md index 7031632a..492354df 100644 --- a/docs/VCFX_allele_counter.md +++ b/docs/VCFX_allele_counter.md @@ -13,6 +13,7 @@ VCFX_allele_counter [OPTIONS] < input.vcf > allele_counts.tsv |--------|-------------| | `-s`, `--samples "Sample1 Sample2..."` | Optional. Specify sample names to calculate allele counts for (space-separated). If omitted, all samples are processed. | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_allele_counter` processes a VCF file and counts reference and alternate alleles for each variant in each specified sample. The tool: diff --git a/docs/VCFX_allele_freq_calc.md b/docs/VCFX_allele_freq_calc.md index dbc04afa..fcfbd8a8 100644 --- a/docs/VCFX_allele_freq_calc.md +++ b/docs/VCFX_allele_freq_calc.md @@ -15,6 +15,7 @@ VCFX_allele_freq_calc [OPTIONS] < input.vcf > allele_frequencies.tsv | Option | Description | |-------------|--------------------------------------------| | `--help`, `-h` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_ancestry_assigner.md b/docs/VCFX_ancestry_assigner.md index b80d0a04..40bfba1c 100644 --- a/docs/VCFX_ancestry_assigner.md +++ b/docs/VCFX_ancestry_assigner.md @@ -16,6 +16,7 @@ VCFX_ancestry_assigner --assign-ancestry < input.vcf > ancestry_resu |--------|-------------| | `-a`, `--assign-ancestry ` | Required. Path to a file containing population-specific allele frequencies | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_ancestry_inferrer.md b/docs/VCFX_ancestry_inferrer.md index 8ee59361..33ab8fda 100644 --- a/docs/VCFX_ancestry_inferrer.md +++ b/docs/VCFX_ancestry_inferrer.md @@ -16,6 +16,7 @@ VCFX_ancestry_inferrer --frequency [OPTIONS] < input.vcf > ancestry_ |--------|-------------| | `--frequency ` | Required. Path to a file containing population-specific allele frequencies | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_annotation_extractor.md b/docs/VCFX_annotation_extractor.md index aa4deda3..c492e918 100644 --- a/docs/VCFX_annotation_extractor.md +++ b/docs/VCFX_annotation_extractor.md @@ -16,6 +16,7 @@ VCFX_annotation_extractor --annotation-extract "FIELD1,FIELD2,..." < input.vcf > |--------|-------------| | `-a`, `--annotation-extract ` | Required. Comma-separated list of INFO field annotations to extract (e.g., "ANN,Gene,Impact") | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_compressor.md b/docs/VCFX_compressor.md index 4f31f8e4..36060995 100644 --- a/docs/VCFX_compressor.md +++ b/docs/VCFX_compressor.md @@ -17,6 +17,7 @@ VCFX_compressor [OPTIONS] < input_file > output_file | `-c`, `--compress` | Compress the input VCF file (read from stdin, write to stdout) | | `-d`, `--decompress` | Decompress the input VCF.gz file (read from stdin, write to stdout) | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_concordance_checker.md b/docs/VCFX_concordance_checker.md index 10aef346..11b55786 100644 --- a/docs/VCFX_concordance_checker.md +++ b/docs/VCFX_concordance_checker.md @@ -13,6 +13,7 @@ VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" < input.vcf > concordance_r |--------|-------------| | `-s`, `--samples "SAMPLE1 SAMPLE2"` | Required. Names of the two samples to compare, separated by a space | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_concordance_checker` analyzes a VCF file and compares the genotypes of two specified samples for each variant. The tool: diff --git a/docs/VCFX_cross_sample_concordance.md b/docs/VCFX_cross_sample_concordance.md index 2bfbf0d9..aa77e1bb 100644 --- a/docs/VCFX_cross_sample_concordance.md +++ b/docs/VCFX_cross_sample_concordance.md @@ -12,6 +12,7 @@ VCFX_cross_sample_concordance [OPTIONS] < input.vcf > concordance_results.tsv | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_cross_sample_concordance` examines each variant in a multi-sample VCF file and determines if all samples with valid genotypes have the same normalized genotype. The tool: diff --git a/docs/VCFX_custom_annotator.md b/docs/VCFX_custom_annotator.md index f238dc28..058f583e 100644 --- a/docs/VCFX_custom_annotator.md +++ b/docs/VCFX_custom_annotator.md @@ -16,6 +16,7 @@ VCFX_custom_annotator --add-annotation [OPTIONS] < input.vcf > |--------|-------------| | `-a`, `--add-annotation ` | Required. Path to the annotation file containing the custom annotations | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_diff_tool.md b/docs/VCFX_diff_tool.md index 4aa79332..eb204367 100644 --- a/docs/VCFX_diff_tool.md +++ b/docs/VCFX_diff_tool.md @@ -17,6 +17,7 @@ VCFX_diff_tool --file1 --file2 | `-a`, `--file1 ` | Required. Path to the first VCF file | | `-b`, `--file2 ` | Required. Path to the second VCF file | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_distance_calculator.md b/docs/VCFX_distance_calculator.md index 4eeea7ac..c178715c 100644 --- a/docs/VCFX_distance_calculator.md +++ b/docs/VCFX_distance_calculator.md @@ -15,6 +15,7 @@ VCFX_distance_calculator [OPTIONS] < input.vcf > variant_distances.tsv | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_dosage_calculator.md b/docs/VCFX_dosage_calculator.md index 227bc9ec..56239e97 100644 --- a/docs/VCFX_dosage_calculator.md +++ b/docs/VCFX_dosage_calculator.md @@ -15,6 +15,7 @@ VCFX_dosage_calculator [OPTIONS] < input.vcf > dosage_output.txt | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_duplicate_remover.md b/docs/VCFX_duplicate_remover.md index 7075e956..6bad844d 100644 --- a/docs/VCFX_duplicate_remover.md +++ b/docs/VCFX_duplicate_remover.md @@ -15,6 +15,7 @@ VCFX_duplicate_remover [OPTIONS] < input.vcf > deduplicated.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_fasta_converter.md b/docs/VCFX_fasta_converter.md index 64ee6ed4..7ea0fbd6 100644 --- a/docs/VCFX_fasta_converter.md +++ b/docs/VCFX_fasta_converter.md @@ -15,6 +15,7 @@ VCFX_fasta_converter [OPTIONS] < input.vcf > output.fasta | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_field_extractor.md b/docs/VCFX_field_extractor.md index d1fb88e3..d51c8186 100644 --- a/docs/VCFX_field_extractor.md +++ b/docs/VCFX_field_extractor.md @@ -13,6 +13,7 @@ VCFX_field_extractor --fields "FIELD1,FIELD2,..." [OPTIONS] < input.vcf > output |--------|-------------| | `-f`, `--fields` | Required. Comma-separated list of fields to extract (no spaces between fields) | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_field_extractor` processes a VCF file and extracts only the specified fields for each variant. The tool: diff --git a/docs/VCFX_file_splitter.md b/docs/VCFX_file_splitter.md index 5072cbbe..652c7de9 100644 --- a/docs/VCFX_file_splitter.md +++ b/docs/VCFX_file_splitter.md @@ -16,6 +16,7 @@ VCFX_file_splitter [OPTIONS] < input.vcf |--------|-------------| | `-p`, `--prefix ` | Output file prefix (default: "split") | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_format_converter.md b/docs/VCFX_format_converter.md index 50c25a3d..e10f3891 100644 --- a/docs/VCFX_format_converter.md +++ b/docs/VCFX_format_converter.md @@ -14,6 +14,7 @@ VCFX_format_converter [OPTIONS] < input.vcf > output.file | `--to-bed` | Convert the input VCF file to BED format | | `--to-csv` | Convert the input VCF file to CSV format | | `--help`, `-h` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_format_converter` reads a VCF file from standard input and converts it to the specified output format. The tool: diff --git a/docs/VCFX_genotype_query.md b/docs/VCFX_genotype_query.md index 1e923076..aab43902 100644 --- a/docs/VCFX_genotype_query.md +++ b/docs/VCFX_genotype_query.md @@ -17,6 +17,7 @@ VCFX_genotype_query [OPTIONS] < input.vcf > filtered.vcf | `--genotype-query`, `-g` "GENOTYPE" | Specify the genotype to query (e.g., "0/1", "1/1") | | `--strict` | Use strict string comparison (no phasing unification or allele sorting) | | `--help`, `-h` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_gl_filter.md b/docs/VCFX_gl_filter.md index d6d7b0fc..ade46e71 100644 --- a/docs/VCFX_gl_filter.md +++ b/docs/VCFX_gl_filter.md @@ -14,6 +14,7 @@ VCFX_gl_filter --filter "" [--mode ] < input.vcf > filtered. | `-f`, `--filter ` | Required. Filter condition (e.g., `GQ>20`, `DP>=10`, `PL<50`) | | `-m`, `--mode ` | Optional. Determines if all samples must pass the condition (`all`, default) or at least one sample must pass (`any`) | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_gl_filter` examines numeric fields in the FORMAT column of a VCF file and filters variant records based on whether the samples satisfy the specified condition. The tool: diff --git a/docs/VCFX_haplotype_extractor.md b/docs/VCFX_haplotype_extractor.md index 07d70904..a8bd9d96 100644 --- a/docs/VCFX_haplotype_extractor.md +++ b/docs/VCFX_haplotype_extractor.md @@ -17,6 +17,7 @@ VCFX_haplotype_extractor [OPTIONS] < input.vcf > haplotypes.tsv | `--block-size ` | Maximum distance in base pairs between consecutive variants to be included in the same block (default: 100,000) | | `--check-phase-consistency` | Enable checks for phase consistency between adjacent variants in a block | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_haplotype_phaser.md b/docs/VCFX_haplotype_phaser.md index 36481ceb..de4bda15 100644 --- a/docs/VCFX_haplotype_phaser.md +++ b/docs/VCFX_haplotype_phaser.md @@ -16,6 +16,7 @@ VCFX_haplotype_phaser [OPTIONS] < input.vcf > blocks.txt |--------|-------------| | `-l`, `--ld-threshold ` | r² threshold for LD-based grouping (0.0-1.0, default: 0.8) | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_header_parser.md b/docs/VCFX_header_parser.md index 8848873f..f31edc34 100644 --- a/docs/VCFX_header_parser.md +++ b/docs/VCFX_header_parser.md @@ -15,6 +15,7 @@ VCFX_header_parser [OPTIONS] < input.vcf > header.txt | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description @@ -87,7 +88,7 @@ The tool implements simple strategies for handling edge cases: 1. **Empty files**: If the input file is empty, no output is produced 2. **Files without headers**: If the file has no header lines, no output is produced 3. **Malformed headers**: All lines starting with "#" are considered header lines, even if they don't follow VCF specifications -4. **Line endings**: Both Unix (LF) and Windows (CRLF) line endings are handled correctly +4. **Line endings**: LF and CRLF line endings are handled correctly 5. **Partial headers**: If the file ends in the middle of the header section, all header lines up to that point are output ## Performance diff --git a/docs/VCFX_hwe_tester.md b/docs/VCFX_hwe_tester.md index d6feb009..e370b974 100644 --- a/docs/VCFX_hwe_tester.md +++ b/docs/VCFX_hwe_tester.md @@ -15,6 +15,7 @@ VCFX_hwe_tester [OPTIONS] < input.vcf > hwe_results.txt | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_impact_filter.md b/docs/VCFX_impact_filter.md index 897d5698..13a7b3fa 100644 --- a/docs/VCFX_impact_filter.md +++ b/docs/VCFX_impact_filter.md @@ -16,6 +16,7 @@ VCFX_impact_filter --filter-impact < input.vcf > filtered.vcf |--------|-------------| | `-i`, `--filter-impact ` | Required. Impact level threshold. Must be one of: HIGH, MODERATE, LOW, MODIFIER | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_inbreeding_calculator.md b/docs/VCFX_inbreeding_calculator.md index def1abfa..cbbedd07 100644 --- a/docs/VCFX_inbreeding_calculator.md +++ b/docs/VCFX_inbreeding_calculator.md @@ -18,6 +18,7 @@ VCFX_inbreeding_calculator [OPTIONS] < input.vcf > output.txt | `--skip-boundary` | Skip sites with boundary frequencies (p=0 or p=1) | | `--count-boundary-as-used` | Count boundary sites in usedCount even when skipping them | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_indel_normalizer.md b/docs/VCFX_indel_normalizer.md index 47c48fa2..661d05c7 100644 --- a/docs/VCFX_indel_normalizer.md +++ b/docs/VCFX_indel_normalizer.md @@ -12,6 +12,7 @@ VCFX_indel_normalizer [OPTIONS] < input.vcf > normalized.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_indel_normalizer` processes a VCF file and normalizes indel variants by: diff --git a/docs/VCFX_indexer.md b/docs/VCFX_indexer.md index 7d85ddf0..65f69f38 100644 --- a/docs/VCFX_indexer.md +++ b/docs/VCFX_indexer.md @@ -1,7 +1,7 @@ # VCFX_indexer ## Overview -`VCFX_indexer` is a utility tool for creating a byte-offset index of a VCF file. It generates a simple tab-delimited index file that maps chromosome and position to the exact byte offset in the original file, enabling efficient random access to variants without scanning the entire file. +`VCFX_indexer` is a utility tool for creating a byte-offset index of a VCF file. It generates a simple tab-delimited index file that maps chromosome and position to the exact byte offset in the original file, enabling efficient random access to variants without scanning the entire file. The index uses 64-bit integers for both the position and the byte offset so very large coordinates are fully supported. ## Usage @@ -14,6 +14,7 @@ VCFX_indexer [OPTIONS] < input.vcf > index.tsv | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description @@ -25,9 +26,9 @@ VCFX_indexer [OPTIONS] < input.vcf > index.tsv - Extracting the chromosome (CHROM) and position (POS) values - Calculating the precise byte offset from the start of the file 4. Writing a three-column index to standard output with: - - CHROM: The chromosome identifier from the VCF - - POS: The position value from the VCF - - FILE_OFFSET: The byte offset to the start of the line in the source file + - CHROM: The chromosome identifier from the VCF + - POS: The position value from the VCF (stored as a 64-bit integer) + - FILE_OFFSET: The byte offset to the start of the line in the source file (also 64-bit) This index enables efficient random access to specific variants in large VCF files by allowing tools to seek directly to a byte offset rather than scanning the entire file. It's particularly useful for building tools that need to query specific regions of a VCF file. @@ -44,8 +45,8 @@ CHROM POS FILE_OFFSET Where: - `CHROM` is the chromosome identifier from the VCF -- `POS` is the genomic position from the VCF -- `FILE_OFFSET` is the byte offset from the start of the VCF file +- `POS` is the genomic position from the VCF (64-bit integer) +- `FILE_OFFSET` is the byte offset from the start of the VCF file (64-bit integer) ## Examples @@ -71,7 +72,7 @@ tail -c +23456 input.vcf | head -1 ### File Format Detection -- The tool automatically handles both Unix (LF) and Windows (CRLF) line endings +- The tool automatically handles LF and CRLF line endings - Byte offsets are calculated correctly regardless of the line ending style ### Malformed VCF Files diff --git a/docs/VCFX_info_aggregator.md b/docs/VCFX_info_aggregator.md index 1f7857ef..79fa7a9f 100644 --- a/docs/VCFX_info_aggregator.md +++ b/docs/VCFX_info_aggregator.md @@ -14,6 +14,7 @@ VCFX_info_aggregator [OPTIONS] < input.vcf > output.vcf - `-a`, `--aggregate-info `: Comma-separated list of INFO fields to aggregate (required). - `-h`, `--help`: Display help message and exit. +| `-v`, `--version` | Show program version and exit | ## Description @@ -79,7 +80,7 @@ The tool implements several strategies for handling edge cases: 2. **Missing fields**: If a specified INFO field is not present in a particular variant, it is simply skipped for that variant. 3. **Empty input**: The tool will process empty files correctly, reporting zeros for sums and averages. 4. **Malformed VCF**: If a data line is encountered before the `#CHROM` header, an error is reported. -5. **Line endings**: The tool correctly handles both Unix (LF) and Windows (CRLF) line endings. +5. **Line endings**: The tool correctly handles LF and CRLF line endings. 6. **Partial final line**: The tool properly processes files that do not end with a newline character. ## Performance diff --git a/docs/VCFX_info_parser.md b/docs/VCFX_info_parser.md index a93a64a8..2ce74055 100644 --- a/docs/VCFX_info_parser.md +++ b/docs/VCFX_info_parser.md @@ -16,6 +16,7 @@ VCFX_info_parser --info "FIELD1,FIELD2,..." < input.vcf > extracted_info.tsv |--------|-------------| | `-i`, `--info ` | Required. Comma-separated list of INFO fields to extract (e.g., "DP,AF,SOMATIC") | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description @@ -82,7 +83,7 @@ The tool implements several strategies for handling edge cases: 3. **Malformed lines**: Lines that don't conform to VCF format are skipped with a warning message 4. **Empty input**: The tool correctly handles empty input files 5. **Header lines**: VCF header lines (starting with #) are skipped -6. **Line endings**: Both Unix (LF) and Windows (CRLF) line endings are supported +6. **Line endings**: LF and CRLF line endings are supported 7. **Partial final line**: Files without a final newline character are processed correctly ## Performance diff --git a/docs/VCFX_info_summarizer.md b/docs/VCFX_info_summarizer.md index 50b4566d..f1d8b061 100644 --- a/docs/VCFX_info_summarizer.md +++ b/docs/VCFX_info_summarizer.md @@ -16,6 +16,7 @@ VCFX_info_summarizer --info "FIELD1,FIELD2,..." < input.vcf > summary_stats.tsv |--------|-------------| | `-i`, `--info ` | Required. Comma-separated list of INFO fields to analyze (e.g., "DP,AF,MQ") | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_ld_calculator.md b/docs/VCFX_ld_calculator.md index 7a40bbbe..1dc5440a 100644 --- a/docs/VCFX_ld_calculator.md +++ b/docs/VCFX_ld_calculator.md @@ -16,6 +16,7 @@ VCFX_ld_calculator [OPTIONS] < input.vcf > ld_matrix.txt |--------|-------------| | `--region ` | Only compute LD for variants in the specified region | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description @@ -77,7 +78,7 @@ VCFX_ld_calculator --region chr1:10000-20000 < input.vcf > ld_matrix.txt Filter for common variants first, then calculate LD: ```bash -cat input.vcf | VCFX_af_subsetter --min-af 0.05 | VCFX_ld_calculator > common_variants_ld.txt +cat input.vcf | VCFX_af_subsetter --af-filter '0.05-1.0' | VCFX_ld_calculator > common_variants_ld.txt ``` ## Handling Special Cases diff --git a/docs/VCFX_merger.md b/docs/VCFX_merger.md index 27124ab5..6fb790ff 100644 --- a/docs/VCFX_merger.md +++ b/docs/VCFX_merger.md @@ -16,6 +16,7 @@ VCFX_merger --merge file1.vcf,file2.vcf,... [options] > merged.vcf |--------|-------------| | `-m, --merge` | Comma-separated list of VCF files to merge | | `-h, --help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_metadata_summarizer.md b/docs/VCFX_metadata_summarizer.md index 111bcf06..26eb18f2 100644 --- a/docs/VCFX_metadata_summarizer.md +++ b/docs/VCFX_metadata_summarizer.md @@ -15,6 +15,7 @@ VCFX_metadata_summarizer [options] < input.vcf | Option | Description | |--------|-------------| | `-h, --help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_missing_data_handler.md b/docs/VCFX_missing_data_handler.md index b12cf9b0..9df50524 100644 --- a/docs/VCFX_missing_data_handler.md +++ b/docs/VCFX_missing_data_handler.md @@ -17,6 +17,7 @@ VCFX_missing_data_handler [OPTIONS] [files...] > processed.vcf | `--fill-missing`, `-f` | Impute missing genotypes with a default value | | `--default-genotype`, `-d` | Specify the default genotype for imputation (default: "./.") | | `--help`, `-h` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_missing_detector.md b/docs/VCFX_missing_detector.md index 53325d21..7cb368b9 100644 --- a/docs/VCFX_missing_detector.md +++ b/docs/VCFX_missing_detector.md @@ -15,6 +15,7 @@ VCFX_missing_detector [OPTIONS] < input.vcf > flagged.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_multiallelic_splitter.md b/docs/VCFX_multiallelic_splitter.md index e7f4692b..870dca12 100644 --- a/docs/VCFX_multiallelic_splitter.md +++ b/docs/VCFX_multiallelic_splitter.md @@ -15,6 +15,7 @@ VCFX_multiallelic_splitter [OPTIONS] < input.vcf > biallelic_output.vcf | Option | Description | |--------|-------------| | `--help`, `-h` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_nonref_filter.md b/docs/VCFX_nonref_filter.md index 278c082e..eda94165 100644 --- a/docs/VCFX_nonref_filter.md +++ b/docs/VCFX_nonref_filter.md @@ -15,6 +15,7 @@ VCFX_nonref_filter [OPTIONS] < input.vcf > filtered.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description @@ -70,7 +71,7 @@ VCFX_nonref_filter > high_quality_nonref.vcf # Create a pipeline of filters cat input.vcf | \ VCFX_nonref_filter | \ -VCFX_phred_filter --min-quality 30 > filtered.vcf +VCFX_phred_filter --phred-filter 30 > filtered.vcf ``` ## Homozygous Reference Detection diff --git a/docs/VCFX_outlier_detector.md b/docs/VCFX_outlier_detector.md index 5cff70a6..32bc614c 100644 --- a/docs/VCFX_outlier_detector.md +++ b/docs/VCFX_outlier_detector.md @@ -16,9 +16,12 @@ VCFX_outlier_detector --metric --threshold [--variant|--sample] < in |--------|-------------| | `--metric`, `-m` | Name of the metric to use (e.g., AF, DP, GQ) | | `--threshold`, `-t` | Numeric threshold value for outlier detection | -| `--variant`, `-v` | Variant mode: identify variants with INFO field metrics above threshold | +| `--variant`, `-V` | Variant mode: identify variants with INFO field metrics above threshold | | `--sample`, `-s` | Sample mode: identify samples with average genotype metrics above threshold | | `--help`, `-h` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | + +**Note:** `-v` shows the version information. Use `--variant` or the short option `-V` to run in variant mode. ## Description diff --git a/docs/VCFX_phase_checker.md b/docs/VCFX_phase_checker.md index df0bf65d..4bf62779 100644 --- a/docs/VCFX_phase_checker.md +++ b/docs/VCFX_phase_checker.md @@ -15,6 +15,7 @@ VCFX_phase_checker [OPTIONS] < input.vcf > phased_output.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_phase_quality_filter.md b/docs/VCFX_phase_quality_filter.md index 80a9a427..c6e18231 100644 --- a/docs/VCFX_phase_quality_filter.md +++ b/docs/VCFX_phase_quality_filter.md @@ -15,6 +15,7 @@ VCFX_phase_quality_filter --filter-pq "PQ" < input.vcf > output.v | Option | Description | |--------|-------------| | `-h, --help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-f, --filter-pq` | Condition like 'PQ>30', 'PQ>=20', 'PQ!=10', etc. | ## Description diff --git a/docs/VCFX_phred_filter.md b/docs/VCFX_phred_filter.md index ad7e73ae..b662f91b 100644 --- a/docs/VCFX_phred_filter.md +++ b/docs/VCFX_phred_filter.md @@ -17,6 +17,7 @@ VCFX_phred_filter [OPTIONS] < input.vcf > filtered.vcf | `-p`, `--phred-filter` | Set PHRED quality score threshold (default: 30.0) | | `-k`, `--keep-missing-qual` | Keep variants with missing quality values (represented as ".") | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_population_filter.md b/docs/VCFX_population_filter.md index cabfdeba..e98db84e 100644 --- a/docs/VCFX_population_filter.md +++ b/docs/VCFX_population_filter.md @@ -12,6 +12,7 @@ VCFX_population_filter [OPTIONS] < input.vcf > output.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-p`, `--population ` | **Required**: Population tag to keep (e.g., 'EUR', 'AFR', 'EAS') | | `-m`, `--pop-map ` | **Required**: Tab-delimited file mapping sample names to populations | diff --git a/docs/VCFX_position_subsetter.md b/docs/VCFX_position_subsetter.md index fcd33045..727e78f4 100644 --- a/docs/VCFX_position_subsetter.md +++ b/docs/VCFX_position_subsetter.md @@ -13,6 +13,7 @@ VCFX_position_subsetter --region "CHR:START-END" < input.vcf > filtered.vcf |--------|-------------| | `-r`, `--region ` | Required. Genomic region to extract in the format "chromosome:start-end" | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_position_subsetter` reads a VCF file from standard input and outputs only those variants that fall within the specified genomic region. The tool: diff --git a/docs/VCFX_probability_filter.md b/docs/VCFX_probability_filter.md index 8978e5d9..10d0a434 100644 --- a/docs/VCFX_probability_filter.md +++ b/docs/VCFX_probability_filter.md @@ -13,6 +13,7 @@ VCFX_probability_filter --filter-probability "" < input.vcf > filtere |--------|-------------| | `-f, --filter-probability ` | Specify the probability filter condition (e.g., `GP>0.9`) | | `-h, --help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_probability_filter` analyzes the genotype probability fields in the FORMAT column of a VCF file and filters variants based on a user-defined condition. The tool: diff --git a/docs/VCFX_quality_adjuster.md b/docs/VCFX_quality_adjuster.md index 80d7b8b4..56ff873d 100644 --- a/docs/VCFX_quality_adjuster.md +++ b/docs/VCFX_quality_adjuster.md @@ -17,6 +17,7 @@ VCFX_quality_adjuster [OPTIONS] < input.vcf > output.vcf | `-a`, `--adjust-qual ` | Required. The transformation function to apply. Must be one of: `log`, `sqrt`, `square`, or `identity`. | | `-n`, `--no-clamp` | Do not clamp negative or extremely large values resulting from transformations. | | `-h`, `--help` | Display help message and exit. | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_record_filter.md b/docs/VCFX_record_filter.md index 8ae0b8f1..c406e4eb 100644 --- a/docs/VCFX_record_filter.md +++ b/docs/VCFX_record_filter.md @@ -14,6 +14,7 @@ VCFX_record_filter --filter "CRITERIA" [OPTIONS] < input.vcf > filtered.vcf | `-f`, `--filter ` | Required. One or more filtering criteria separated by semicolons (e.g., `"POS>10000;QUAL>=30;AF<0.05"`) | | `-l`, `--logic ` | Logic for combining multiple criteria: `and` (default) requires all criteria to pass, `or` requires any criterion to pass | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description `VCFX_record_filter` evaluates each variant in a VCF file against specified criteria and outputs only variants that satisfy these criteria. The tool: diff --git a/docs/VCFX_ref_comparator.md b/docs/VCFX_ref_comparator.md index fa2eba48..a3db6c76 100644 --- a/docs/VCFX_ref_comparator.md +++ b/docs/VCFX_ref_comparator.md @@ -16,6 +16,7 @@ VCFX_ref_comparator --reference < input.vcf > annotated.vcf |--------|-------------| | `-r`, `--reference` | Required. Path to reference genome in FASTA format | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_reformatter.md b/docs/VCFX_reformatter.md index 2934839f..05e8f96f 100644 --- a/docs/VCFX_reformatter.md +++ b/docs/VCFX_reformatter.md @@ -15,6 +15,7 @@ VCFX_reformatter [options] < input.vcf > output.vcf | Option | Description | |--------|-------------| | `-h, --help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-c, --compress-info ` | Remove specified INFO keys (comma-separated) | | `-f, --compress-format ` | Remove specified FORMAT keys (comma-separated) | | `-i, --reorder-info ` | Reorder INFO keys (comma-separated) | diff --git a/docs/VCFX_region_subsampler.md b/docs/VCFX_region_subsampler.md index 0ff4eaaa..c8d4396c 100644 --- a/docs/VCFX_region_subsampler.md +++ b/docs/VCFX_region_subsampler.md @@ -15,6 +15,7 @@ VCFX_region_subsampler --region-bed FILE < input.vcf > output.vcf | Option | Description | |--------|-------------| | `-h, --help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-b, --region-bed FILE` | BED file listing regions to keep | ## Description diff --git a/docs/VCFX_sample_extractor.md b/docs/VCFX_sample_extractor.md index da470f56..96b2ca06 100644 --- a/docs/VCFX_sample_extractor.md +++ b/docs/VCFX_sample_extractor.md @@ -16,6 +16,7 @@ VCFX_sample_extractor [OPTIONS] < input.vcf > subset.vcf |--------|-------------| | `-s`, `--samples` LIST | Comma or space separated list of sample names to extract | | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_sorter.md b/docs/VCFX_sorter.md index 561978df..d66708e1 100644 --- a/docs/VCFX_sorter.md +++ b/docs/VCFX_sorter.md @@ -12,6 +12,7 @@ VCFX_sorter [OPTIONS] < input.vcf > output.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-n`, `--natural-chr` | Use natural chromosome sorting (chr1 < chr2 < chr10) instead of lexicographic sorting | ## Description diff --git a/docs/VCFX_subsampler.md b/docs/VCFX_subsampler.md index 4094b9e9..0ba828ca 100644 --- a/docs/VCFX_subsampler.md +++ b/docs/VCFX_subsampler.md @@ -17,6 +17,7 @@ VCFX_subsampler [options] < input.vcf > output.vcf | `-s, --subsample ` | Required: Number of variants to keep in the output | | `--seed ` | Optional: Use a specific random seed for reproducible results | | `-h, --help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | ## Description diff --git a/docs/VCFX_sv_handler.md b/docs/VCFX_sv_handler.md index 14f90490..67ff1dcb 100644 --- a/docs/VCFX_sv_handler.md +++ b/docs/VCFX_sv_handler.md @@ -12,6 +12,7 @@ VCFX_sv_handler [OPTIONS] < input.vcf > output.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-f`, `--sv-filter-only` | Keep only lines that have 'SVTYPE=' in their INFO field | | `-m`, `--sv-modify` | Modify the INFO field of structural variants to add additional annotations | diff --git a/docs/VCFX_validator.md b/docs/VCFX_validator.md index bb46f329..dde07d1c 100644 --- a/docs/VCFX_validator.md +++ b/docs/VCFX_validator.md @@ -14,7 +14,8 @@ VCFX_validator [OPTIONS] < input.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | -| `-s`, `--strict` | Enable stricter validation checks (reserved for future implementation) | +| `-v`, `--version` | Show program version and exit | +| `-s`, `--strict` | Enable stricter validation checks | ## Description `VCFX_validator` processes a VCF file to verify its structural validity by: @@ -59,22 +60,35 @@ This tool is useful for validating VCF files before processing them with other t - INFO: Must be '.' or contain valid key-value pairs or flags: - If not '.', must contain at least one valid entry - Key-value pairs must have a non-empty key + - Flags (without '=') are allowed +### Strict Mode +When `--strict` is used, additional checks are applied: +- The number of columns in every data line must exactly match the `#CHROM` header. +- If FORMAT/sample columns are present, each sample field must contain the same + number of sub-fields as specified in the FORMAT column. +- Any warning that would normally be emitted is treated as an error and causes + the validator to exit with a non-zero status. + ## Examples ### Basic Validation Check if a VCF file is valid: ```bash -VCFX_validator < input.vcf +VCFX_validator < input.vcf > validated.vcf ``` ### Using Strict Mode -Enable stricter validation (note: additional strict checks are reserved for future implementation): +Enable stricter validation with additional checks: ```bash -VCFX_validator --strict < input.vcf +VCFX_validator --strict < input.vcf > validated.vcf ``` +When the input is valid, the original VCF is written unchanged to standard output, +allowing `VCFX_validator` to be used as a filter in processing pipelines. Informational +messages such as `VCF file is valid.` are printed to standard error. + ### Redirecting Error Messages Save validation errors to a file: ```bash diff --git a/docs/VCFX_variant_classifier.md b/docs/VCFX_variant_classifier.md index 48304688..b2b5be15 100644 --- a/docs/VCFX_variant_classifier.md +++ b/docs/VCFX_variant_classifier.md @@ -15,6 +15,7 @@ VCFX_variant_classifier [OPTIONS] < input.vcf > output.vcf_or_tsv | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-a`, `--append-info` | Instead of producing a TSV, output a valid VCF with a new 'VCF_CLASS' subfield in the INFO column | ## Description diff --git a/docs/VCFX_variant_counter.md b/docs/VCFX_variant_counter.md index 47e6e9dd..29839c7b 100644 --- a/docs/VCFX_variant_counter.md +++ b/docs/VCFX_variant_counter.md @@ -12,6 +12,7 @@ VCFX_variant_counter [OPTIONS] < input.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | +| `-v`, `--version` | Show program version and exit | | `-s`, `--strict` | Fail on any data line with fewer than 8 columns | ## Description diff --git a/docs/citation.md b/docs/citation.md index 035f7aaf..18c83beb 100644 --- a/docs/citation.md +++ b/docs/citation.md @@ -24,7 +24,7 @@ The VCFX toolkit was presented at the 12th International Work-Conference on Bioi ### Abstract -VCFX is a collection of specialized C/C++ command-line tools designed for efficient manipulation, analysis, and transformation of VCF (Variant Call Format) files used in genomic research and bioinformatics. Each tool is an independent executable that follows the Unix philosophy: do one thing well and work seamlessly with other tools through standard input/output streams. The toolkit includes over 60 specialized utilities covering the entire lifecycle of variant data analysis from filtering and annotation to merging, phasing, and structural variant manipulation. +VCFX is a collection of specialized C/C++ command-line tools designed for efficient manipulation, analysis, and transformation of VCF (Variant Call Format) files used in genomic research and bioinformatics. Each tool is an independent executable that follows the Unix philosophy: do one thing well and work seamlessly with other tools through standard input/output streams. The toolkit includes 60 specialized utilities covering the entire lifecycle of variant data analysis from filtering and annotation to merging, phasing, and structural variant manipulation. ## How to Cite in Different Formats diff --git a/docs/docker.md b/docs/docker.md index e71a060c..1e8ebcdb 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -8,16 +8,16 @@ VCFX is available as a pre-built Docker image on GitHub Container Registry: ```bash # Pull the image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Mount a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Example: Process a VCF file (using tests/data/valid.vcf as an example) -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` Using the pre-built image is recommended for most users as it: @@ -65,19 +65,19 @@ There are several ways to run VCFX tools with Docker: ```bash # With the pre-built image -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # With a locally built image docker run --rm vcfx:local VCFX_tool_name [options] # Mount the tests/data directory to access test files -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files in the tests/data directory -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' # Example: Calculate allele frequencies for a VCF file -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` ### Using Docker Compose @@ -98,7 +98,7 @@ docker-compose run --rm vcfx 'cat /data/valid.vcf | VCFX_allele_freq_calc > /dat When using Docker directly, you need to mount a directory to access your files: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` When using Docker Compose, the `tests/data` directory is mounted by default: @@ -115,7 +115,7 @@ You can modify the docker-compose.yml file to mount a different directory if nee You can create complex pipelines by chaining VCFX tools: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' ``` ### Creating Shell Scripts @@ -126,7 +126,7 @@ For complex workflows, consider creating a shell script: #!/bin/bash # save as vcfx_workflow.sh -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | \ +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_allele_freq_calc > /data/pipeline_output.tsv' @@ -147,7 +147,7 @@ If you encounter permission issues with files created in the container: ```bash # Run the container with your user ID -docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` ### Container Not Finding Commands @@ -156,7 +156,7 @@ If the container can't find VCFX commands, ensure they were properly built in th ```bash # List available VCFX tools in the container -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' ``` ## Citation diff --git a/docs/index.md b/docs/index.md index 60144f62..2eb30e9b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -84,7 +84,7 @@ Tools for converting or reformatting VCF data: Tools for validating and checking data quality: -- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between VCF files +- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between samples in a VCF file - [VCFX_missing_detector](VCFX_missing_detector.md) - Detect and report missing data - [VCFX_validator](VCFX_validator.md) - Validate VCF format compliance - [View all quality control tools...](tools_overview.md#quality-control) @@ -98,6 +98,25 @@ Tools for handling VCF files: - [VCFX_compressor](VCFX_compressor.md) - Compress VCF files efficiently - [View all file management tools...](tools_overview.md#file-management) +### Annotation and Reporting + +Tools for annotating and extracting information from VCF files: + +- [VCFX_custom_annotator](VCFX_custom_annotator.md) - Add custom annotations to VCF files +- [VCFX_info_summarizer](VCFX_info_summarizer.md) - Summarize INFO fields +- ... (include a few more key tools) +- [View all annotation tools...](tools_overview.md#annotation-and-reporting) + +### Data Processing + +Tools for processing variants and samples: + +- [VCFX_missing_data_handler](VCFX_missing_data_handler.md) - Handle missing data +- [VCFX_quality_adjuster](VCFX_quality_adjuster.md) - Adjust quality scores +- [VCFX_haplotype_phaser](VCFX_haplotype_phaser.md) - Phase haplotypes +- [VCFX_haplotype_extractor](VCFX_haplotype_extractor.md) - Extract haplotype information +- [View all processing tools...](tools_overview.md#data-processing) + For a complete list of all tools and detailed usage examples, see the [tools overview](tools_overview.md). ## Who Should Use VCFX? @@ -127,19 +146,15 @@ VCFX tools are designed to be used in pipelines. Here are some common usage patt # Extract phased variants, filter by quality, and calculate allele frequencies cat input.vcf | \ VCFX_phase_checker | \ - VCFX_phred_filter --min-qual 30 | \ + VCFX_phred_filter --phred-filter 30 | \ VCFX_allele_freq_calc > result.tsv ``` -### Sample Selection and Comparison +### Sample Comparison ```bash -# Extract samples and check concordance -cat input.vcf | \ - VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 > samples.vcf - -cat samples.vcf reference.vcf | \ - VCFX_concordance_checker > concordance_report.tsv +# Check concordance between two samples in a single VCF +cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance_report.tsv ``` See the [tools overview page](tools_overview.md#common-usage-patterns) for more usage examples. diff --git a/docs/installation.md b/docs/installation.md index 4c697df9..6f14121d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -35,13 +35,13 @@ The simplest way to use VCFX is with Docker, which requires no compilation: ```bash # Pull the VCFX Docker image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files by mounting a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/input.vcf | VCFX_tool_name > /data/output.tsv' +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/input.vcf | VCFX_tool_name > /data/output.tsv' ``` This method is ideal for: diff --git a/docs/python_api.md b/docs/python_api.md new file mode 100644 index 00000000..d1672aab --- /dev/null +++ b/docs/python_api.md @@ -0,0 +1,76 @@ +# Python API + +VCFX provides optional Python bindings exposing a subset of helper +functions from the C++ `vcfx_core` library. The bindings are built as a +native Python extension and can be enabled through CMake. + +## Installation + +Build the project with the `PYTHON_BINDINGS` option enabled: + +```bash +mkdir build && cd build +cmake -DPYTHON_BINDINGS=ON .. +make -j +``` + +The compiled module will be placed in the `build/python` directory. +You can also install the package via `pip` which will invoke CMake +automatically: + +```bash +pip install ./python +``` + +## Available Functions + +The module exposes the following helpers: + +- `trim(text)` – remove leading and trailing whitespace. +- `split(text, delimiter)` – split `text` by the given delimiter and + return a list of strings. +- `read_file_maybe_compressed(path)` – read a plain or gzip/BGZF + compressed file and return its contents as a string. +- `read_maybe_compressed(data)` – decompress a bytes object if it is + gzip/BGZF compressed and return the resulting bytes. +- `get_version()` – return the VCFX version string. + +## Example Usage + +```python +import vcfx + +print(vcfx.trim(" abc ")) +# 'abc' + +print(vcfx.split("A,B,C", ",")) +# ['A', 'B', 'C'] + +data = vcfx.read_maybe_compressed(b"hello") +print(data) + +version = vcfx.get_version() +print("VCFX version:", version) +``` + +## Tool Wrappers + +Besides the helper functions, the package provides lightweight wrappers for +all command line tools shipped with VCFX. The wrappers simply invoke the +corresponding ``VCFX_*`` executable via ``subprocess``. + +Use ``vcfx.available_tools()`` to see which tools are accessible on your +``PATH`` and call them either via ``vcfx.run_tool(name, *args)`` or by using +the tool name as a function: + +```python +import vcfx + +print(vcfx.available_tools()) + +# run through the generic helper +vcfx.run_tool("alignment_checker", "--help") + +# or directly by name (if available) +vcfx.alignment_checker("--help") +``` diff --git a/docs/quickstart.md b/docs/quickstart.md index 261901bd..22504065 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -37,7 +37,7 @@ Filter for high-quality SNPs: cat input.vcf | \ VCFX_variant_classifier --append-info | \ grep 'VCF_CLASS=SNP' | \ - VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf + VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf ``` ### Example 2: Population Analysis @@ -62,10 +62,10 @@ cat input.vcf | \ ### Example 4: Quality Control -Check concordance between two VCF files: +Check concordance between two samples in a single VCF file: ```bash -VCFX_concordance_checker --vcf1 sample1.vcf --vcf2 sample2.vcf > concordance_report.tsv +cat sample.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance_report.tsv ``` ## Working with Compressed Files @@ -120,18 +120,14 @@ cat input.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_missing_detector --max-missing 0.1 | \ - VCFX_phred_filter --min-qual 20 > qc_passed.vcf + VCFX_phred_filter --phred-filter 20 > qc_passed.vcf ``` ### Sample Comparison ```bash -# Extract common samples -VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 < input1.vcf > samples1.vcf -VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 < input2.vcf > samples2.vcf - -# Check concordance -VCFX_concordance_checker --vcf1 samples1.vcf --vcf2 samples2.vcf > concordance.tsv +# Check concordance between two samples in a single VCF +cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance.tsv ``` ### Population Structure Analysis diff --git a/docs/styles/custom.css b/docs/styles/custom.css new file mode 100644 index 00000000..74717038 --- /dev/null +++ b/docs/styles/custom.css @@ -0,0 +1,6 @@ +[data-md-color-scheme="slate"] { + --md-primary-fg-color: #90caf9; + --md-accent-fg-color: #90caf9; + --md-default-fg-color: #e0e0e0; + --md-default-bg-color: #121212; +} diff --git a/docs/tools_overview.md b/docs/tools_overview.md index 76c3b152..abeb3af1 100644 --- a/docs/tools_overview.md +++ b/docs/tools_overview.md @@ -2,6 +2,9 @@ VCFX is a collection of C/C++ tools for processing and analyzing VCF (Variant Call Format) files, with optional WebAssembly compatibility. Each tool is an independent command-line executable that can parse input from `stdin` and write to `stdout`, enabling flexible piping and integration into bioinformatics pipelines. +The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` or the alias `vcfx list` to see available subcommands. To view Markdown documentation for a tool, run `vcfx help `. All individual `VCFX_*` binaries remain available if you prefer calling them directly. +Every tool also accepts `--version` to display the build version. + ## Tool Categories ### Data Analysis @@ -57,7 +60,7 @@ Tools for converting or reformatting VCF data: Tools for validating and checking data quality: -- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between VCF files +- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between samples in a VCF file - [VCFX_missing_detector](VCFX_missing_detector.md) - Detect and report missing data - [VCFX_outlier_detector](VCFX_outlier_detector.md) - Detect outlier samples or variants - [VCFX_alignment_checker](VCFX_alignment_checker.md) - Check alignment of variants @@ -109,7 +112,7 @@ VCFX tools are designed to be combined in pipelines. Here are some common usage # Extract phased variants, filter by quality, and calculate allele frequencies cat input.vcf | \ VCFX_phase_checker | \ - VCFX_phred_filter --min-qual 30 | \ + VCFX_phred_filter --phred-filter 30 | \ VCFX_allele_freq_calc > result.tsv ``` @@ -120,15 +123,14 @@ cat input.vcf | \ cat input.vcf | \ VCFX_variant_classifier --append-info | \ grep 'VCF_CLASS=SNP' | \ - VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf + VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf ``` -### Sample Extraction and Comparison +### Sample Comparison ```bash -# Extract samples and check concordance -cat input.vcf | VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 > samples.vcf -cat samples.vcf reference.vcf | VCFX_concordance_checker > concordance_report.tsv +# Check concordance between two samples in a single VCF +cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance.tsv ``` ### Linkage Disequilibrium Analysis @@ -136,7 +138,7 @@ cat samples.vcf reference.vcf | VCFX_concordance_checker > concordance_report.ts ```bash # Calculate LD in a specific region after filtering for common variants cat input.vcf | \ - VCFX_af_subsetter --min-af 0.05 | \ + VCFX_af_subsetter --af-filter '0.05-1.0' | \ VCFX_ld_calculator --region chr1:10000-20000 > ld_matrix.txt ``` @@ -165,5 +167,5 @@ cat input.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_missing_detector --max-missing 0.1 | \ - VCFX_phred_filter --min-qual 20 > qc_passed.vcf + VCFX_phred_filter --phred-filter 20 > qc_passed.vcf ``` \ No newline at end of file diff --git a/include/vcfx_core.h b/include/vcfx_core.h index ff542a10..80516071 100644 --- a/include/vcfx_core.h +++ b/include/vcfx_core.h @@ -3,7 +3,52 @@ #include #include +#include +#include -// Core functionalities for VCFX tools +namespace vcfx { + +// Trim leading and trailing whitespace from a string +std::string trim(const std::string& str); + +// Split a string on the given delimiter +std::vector split(const std::string& str, char delimiter); + +// Convenience helpers for printing common messages +void print_error(const std::string& msg, std::ostream& os = std::cerr); +void print_version(const std::string& tool, const std::string& version, + std::ostream& os = std::cout); + +inline std::string get_version() { +#ifdef VCFX_VERSION + return VCFX_VERSION; +#else + return "unknown"; +#endif +} + +inline bool handle_version_flag(int argc, char* argv[], const std::string& tool, + std::ostream& os = std::cout) { + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--version") == 0 || + std::strcmp(argv[i], "-v") == 0) { + print_version(tool, get_version(), os); + return true; + } + } + return false; +} + +// Read entire input stream, automatically decompressing if gzip/BGZF +// compressed. Returns true on success and stores the resulting text in +// 'out'. +bool read_maybe_compressed(std::istream& in, std::string& out); + +// Convenience helper to read a file that may be gzip/BGZF compressed. The file +// is loaded completely into memory and stored in 'out'. Returns true on +// success. +bool read_file_maybe_compressed(const std::string& path, std::string& out); + +} // namespace vcfx #endif // VCFX_CORE_H diff --git a/mkdocs.yml b/mkdocs.yml index 3a683ca9..41f2d1b0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,8 +29,8 @@ theme: icon: material/brightness-7 name: Switch to dark mode - scheme: slate - primary: indigo - accent: indigo + primary: blue grey + accent: light blue toggle: icon: material/brightness-4 name: Switch to light mode @@ -44,6 +44,8 @@ extra: link: https://github.com/ieeta-pt/VCFX - icon: fontawesome/brands/twitter link: https://twitter.com/MiguelFSilva1 +extra_css: + - styles/custom.css # Extensions markdown_extensions: @@ -135,7 +137,8 @@ nav: - VCFX_missing_data_handler: VCFX_missing_data_handler.md - VCFX_quality_adjuster: VCFX_quality_adjuster.md - VCFX_haplotype_phaser: VCFX_haplotype_phaser.md - - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md + - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md + - Python API: python_api.md - Contributing: CONTRIBUTING.md - Citation: citation.md - - License: LICENSE.md \ No newline at end of file + - License: LICENSE.md diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 00000000..40a4efcc --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.14) + +if(NOT PYTHON_BINDINGS) + return() +endif() + +find_package(Python3 COMPONENTS Development REQUIRED) + +add_library(_vcfx MODULE bindings.cpp) +target_link_libraries(_vcfx PRIVATE vcfx_core Python3::Python) + +# Place the compiled module into the build/python directory +set_target_properties(_vcfx PROPERTIES + PREFIX "" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/python/vcfx" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/python/vcfx" +) + +configure_file(__init__.py "${CMAKE_BINARY_DIR}/python/vcfx/__init__.py" COPYONLY) +configure_file(tools.py "${CMAKE_BINARY_DIR}/python/vcfx/tools.py" COPYONLY) + +install(TARGETS _vcfx + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx) +install(FILES __init__.py tools.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx) diff --git a/python/__init__.py b/python/__init__.py new file mode 100644 index 00000000..2f2ae974 --- /dev/null +++ b/python/__init__.py @@ -0,0 +1,14 @@ +"""Python bindings for the VCFX toolkit.""" + +from ._vcfx import * # noqa: F401,F403 +from . import tools as _tools + +# Re-export helper functions for convenience +available_tools = _tools.available_tools +run_tool = _tools.run_tool + + +def __getattr__(name): + """Provide access to tool wrappers as attributes.""" + return getattr(_tools, name) + diff --git a/python/bindings.cpp b/python/bindings.cpp new file mode 100644 index 00000000..1e2d0577 --- /dev/null +++ b/python/bindings.cpp @@ -0,0 +1,92 @@ +#include +#include "vcfx_core.h" +#include +#include +#include + +// Helper to convert std::vector to Python list +static PyObject* to_py_list(const std::vector& vec) { + PyObject* list = PyList_New(vec.size()); + if (!list) return nullptr; + for (size_t i = 0; i < vec.size(); ++i) { + PyObject* item = PyUnicode_FromString(vec[i].c_str()); + if (!item) { + Py_DECREF(list); + return nullptr; + } + PyList_SET_ITEM(list, i, item); // steals reference + } + return list; +} + +static PyObject* py_trim(PyObject*, PyObject* args) { + const char* text; + if (!PyArg_ParseTuple(args, "s", &text)) + return nullptr; + std::string result = vcfx::trim(text); + return PyUnicode_FromString(result.c_str()); +} + +static PyObject* py_split(PyObject*, PyObject* args) { + const char* text; + const char* delim; + if (!PyArg_ParseTuple(args, "ss", &text, &delim)) + return nullptr; + std::vector parts = vcfx::split(text, delim[0]); + return to_py_list(parts); +} + +static PyObject* py_read_file(PyObject*, PyObject* args) { + const char* path; + if (!PyArg_ParseTuple(args, "s", &path)) + return nullptr; + std::string out; + if (!vcfx::read_file_maybe_compressed(path, out)) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read file"); + return nullptr; + } + return PyBytes_FromStringAndSize(out.data(), out.size()); +} + +static PyObject* py_get_version(PyObject*, PyObject*) { + std::string ver = vcfx::get_version(); + return PyUnicode_FromString(ver.c_str()); +} + +static PyObject* py_read_stream(PyObject*, PyObject* args) { + Py_buffer buf; + if (!PyArg_ParseTuple(args, "y*", &buf)) + return nullptr; + std::string data(static_cast(buf.buf), buf.len); + PyBuffer_Release(&buf); + std::istringstream ss(data); + std::string out; + if (!vcfx::read_maybe_compressed(ss, out)) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read data"); + return nullptr; + } + return PyBytes_FromStringAndSize(out.data(), out.size()); +} + +static PyMethodDef VcfxMethods[] = { + {"trim", py_trim, METH_VARARGS, "Trim leading and trailing whitespace"}, + {"split", py_split, METH_VARARGS, "Split a string on the given delimiter"}, + {"read_file_maybe_compressed", py_read_file, METH_VARARGS, + "Read a (possibly compressed) file and return its contents"}, + {"read_maybe_compressed", py_read_stream, METH_VARARGS, + "Decompress bytes if needed and return the contents"}, + {"get_version", py_get_version, METH_NOARGS, "Return VCFX version string"}, + {nullptr, nullptr, 0, nullptr} +}; + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_vcfx", + "Python bindings for VCFX helper functions", + -1, + VcfxMethods +}; + +PyMODINIT_FUNC PyInit__vcfx(void) { + return PyModule_Create(&moduledef); +} diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 00000000..c247405f --- /dev/null +++ b/python/setup.py @@ -0,0 +1,44 @@ +import pathlib +import re +import subprocess +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext + +def read_version(): + root = pathlib.Path(__file__).resolve().parent.parent / "CMakeLists.txt" + text = root.read_text() + m = re.search(r"set\(VCFX_VERSION\s+\"([0-9.]+)\"\)", text) + return m.group(1) if m else "0.0.0" + +class CMakeExtension(Extension): + def __init__(self, name): + super().__init__(name, sources=[]) + +class CMakeBuild(build_ext): + def build_extension(self, ext): + extdir = pathlib.Path(self.get_ext_fullpath(ext.name)).parent.resolve() + cmake_args = [ + f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}', + f'-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={extdir}', + '-DPYTHON_BINDINGS=ON' + ] + build_temp = pathlib.Path(self.build_temp) + build_temp.mkdir(parents=True, exist_ok=True) + source_dir = pathlib.Path(__file__).resolve().parent.parent + subprocess.check_call(['cmake', str(source_dir)] + cmake_args, cwd=build_temp) + subprocess.check_call(['cmake', '--build', '.', '--target', '_vcfx'], cwd=build_temp) + +setup( + name='vcfx', + version=read_version(), + packages=['vcfx'], + package_dir={'vcfx': '.'}, + ext_modules=[CMakeExtension('_vcfx')], + cmdclass={'build_ext': CMakeBuild}, + zip_safe=False, + classifiers=[ + 'Programming Language :: Python :: 3', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX :: Linux', + ], +) diff --git a/python/tools.py b/python/tools.py new file mode 100644 index 00000000..13e61626 --- /dev/null +++ b/python/tools.py @@ -0,0 +1,52 @@ +import subprocess +import shutil +import functools + +__all__ = ["available_tools", "run_tool"] + + +def available_tools(): + """Return a list of VCFX tools available on the PATH.""" + result = subprocess.run(["vcfx", "--list"], capture_output=True, text=True) + if result.returncode != 0: + return [] + return [line.strip() for line in result.stdout.splitlines() if line.strip()] + + +def run_tool(tool, *args, check=True, capture_output=False, text=True, **kwargs): + """Run a VCFX tool using subprocess. + + Parameters + ---------- + tool : str + Name of the tool without the ``VCFX_`` prefix. + *args : list + Arguments passed to the tool. + check : bool, optional + If ``True`` (default) raise ``CalledProcessError`` on non-zero + return code. + capture_output : bool, optional + If ``True`` capture stdout/stderr and return them on the returned + ``CompletedProcess`` object. + text : bool, optional + If ``True`` decode output as text. Defaults to ``True``. + **kwargs : dict + Additional keyword arguments forwarded to ``subprocess.run``. + + Returns + ------- + subprocess.CompletedProcess + """ + exe = shutil.which(f"VCFX_{tool}") + if exe is None: + raise FileNotFoundError(f"VCFX tool '{tool}' not found in PATH") + cmd = [exe, *map(str, args)] + return subprocess.run(cmd, check=check, capture_output=capture_output, text=text, **kwargs) + + +# Lazy attribute access for tool wrappers + +def __getattr__(name): + if name in available_tools(): + return functools.partial(run_tool, name) + raise AttributeError(f"module 'vcfx' has no attribute '{name}'") diff --git a/site/VCFX_validator/index.html b/site/VCFX_validator/index.html index 68d9582c..f68f8549 100644 --- a/site/VCFX_validator/index.html +++ b/site/VCFX_validator/index.html @@ -2884,12 +2884,13 @@

Data LinesExamples

Basic Validation

Check if a VCF file is valid: -

VCFX_validator < input.vcf
+
VCFX_validator < input.vcf > validated.vcf
 

Using Strict Mode

Enable stricter validation (note: additional strict checks are reserved for future implementation): -

VCFX_validator --strict < input.vcf
+
VCFX_validator --strict < input.vcf > validated.vcf
 

+

When the input is valid, the VCF contents are echoed to standard output so the tool can be used in pipelines. Informational messages such as VCF file is valid. are written to standard error.

Redirecting Error Messages

Save validation errors to a file:

VCFX_validator < input.vcf 2> validation_errors.txt
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7f29f6a4..9408aaa3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,142 +2,36 @@ cmake_minimum_required(VERSION 3.14)
 
 # Build the core library from your shared code
 add_library(vcfx_core STATIC vcfx_core.cpp)
+set_property(TARGET vcfx_core PROPERTY POSITION_INDEPENDENT_CODE ON)
 target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include)
+target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB)
+if(WIN32)
+    target_link_libraries(vcfx_core PUBLIC getopt)
+endif()
 
 # Add all tool subdirectories
-add_subdirectory(VCFX_header_parser)
-add_subdirectory(VCFX_record_filter)
-add_subdirectory(VCFX_field_extractor)
-add_subdirectory(VCFX_format_converter)
-add_subdirectory(VCFX_variant_counter)
-add_subdirectory(VCFX_sample_extractor)
-add_subdirectory(VCFX_sorter)
-add_subdirectory(VCFX_validator)
-add_subdirectory(VCFX_subsampler)
-add_subdirectory(VCFX_genotype_query)
-add_subdirectory(VCFX_allele_freq_calc)
-add_subdirectory(VCFX_indexer)
-add_subdirectory(VCFX_compressor)
-add_subdirectory(VCFX_position_subsetter)
-add_subdirectory(VCFX_haplotype_extractor)
-add_subdirectory(VCFX_info_parser)
-add_subdirectory(VCFX_variant_classifier)
-add_subdirectory(VCFX_duplicate_remover)
-add_subdirectory(VCFX_info_summarizer)
-add_subdirectory(VCFX_distance_calculator)
-add_subdirectory(VCFX_multiallelic_splitter)
-add_subdirectory(VCFX_missing_data_handler)
-add_subdirectory(VCFX_concordance_checker)
-add_subdirectory(VCFX_allele_balance_calc)
-add_subdirectory(VCFX_allele_counter)
-add_subdirectory(VCFX_phase_checker)
-add_subdirectory(VCFX_annotation_extractor)
-add_subdirectory(VCFX_phred_filter)
-add_subdirectory(VCFX_merger)
-add_subdirectory(VCFX_metadata_summarizer)
-add_subdirectory(VCFX_hwe_tester)
-add_subdirectory(VCFX_fasta_converter)
-add_subdirectory(VCFX_nonref_filter)
-add_subdirectory(VCFX_dosage_calculator)
-add_subdirectory(VCFX_population_filter)
-add_subdirectory(VCFX_file_splitter)
-add_subdirectory(VCFX_gl_filter)
-add_subdirectory(VCFX_ref_comparator)
-add_subdirectory(VCFX_ancestry_inferrer)
-add_subdirectory(VCFX_impact_filter)
-add_subdirectory(VCFX_info_aggregator)
-add_subdirectory(VCFX_probability_filter)
-add_subdirectory(VCFX_diff_tool)
-add_subdirectory(VCFX_cross_sample_concordance)
-add_subdirectory(VCFX_phase_quality_filter)
-add_subdirectory(VCFX_indel_normalizer)
-add_subdirectory(VCFX_custom_annotator)
-add_subdirectory(VCFX_region_subsampler)
-add_subdirectory(VCFX_allele_balance_filter)
-add_subdirectory(VCFX_missing_detector)
-add_subdirectory(VCFX_haplotype_phaser)
-add_subdirectory(VCFX_af_subsetter)
-add_subdirectory(VCFX_sv_handler)
-add_subdirectory(VCFX_reformatter)
-add_subdirectory(VCFX_quality_adjuster)
-add_subdirectory(VCFX_inbreeding_calculator)
-add_subdirectory(VCFX_outlier_detector)
-add_subdirectory(VCFX_alignment_checker)
-add_subdirectory(VCFX_ancestry_assigner)
-add_subdirectory(VCFX_ld_calculator)
+add_subdirectory(vcfx_wrapper)
+
+# Automatically detect tool directories named "VCFX_*" and
+# build/install them.
+set(VCFX_TOOLS vcfx)
+file(GLOB TOOL_DIRS RELATIVE ${CMAKE_CURRENT_LIST_DIR} VCFX_*)
+list(REMOVE_ITEM TOOL_DIRS vcfx_wrapper) # avoid duplicate on case-insensitive filesystems
+foreach(dir ${TOOL_DIRS})
+    if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${dir}")
+        add_subdirectory(${dir})
+        list(APPEND VCFX_TOOLS ${dir})
+    endif()
+endforeach()
 
 # Install the core library
 install(TARGETS vcfx_core
         ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 )
 
-# Define a list of all tool executables for installation
-set(VCFX_TOOLS
-    VCFX_header_parser
-    VCFX_record_filter
-    VCFX_field_extractor
-    VCFX_format_converter
-    VCFX_variant_counter
-    VCFX_sample_extractor
-    VCFX_sorter
-    VCFX_validator
-    VCFX_subsampler
-    VCFX_genotype_query
-    VCFX_allele_freq_calc
-    VCFX_indexer
-    VCFX_compressor
-    VCFX_position_subsetter
-    VCFX_haplotype_extractor
-    VCFX_info_parser
-    VCFX_variant_classifier
-    VCFX_duplicate_remover
-    VCFX_info_summarizer
-    VCFX_distance_calculator
-    VCFX_multiallelic_splitter
-    VCFX_missing_data_handler
-    VCFX_concordance_checker
-    VCFX_allele_balance_calc
-    VCFX_allele_counter
-    VCFX_phase_checker
-    VCFX_annotation_extractor
-    VCFX_phred_filter
-    VCFX_merger
-    VCFX_metadata_summarizer
-    VCFX_hwe_tester
-    VCFX_fasta_converter
-    VCFX_nonref_filter
-    VCFX_dosage_calculator
-    VCFX_population_filter
-    VCFX_file_splitter
-    VCFX_gl_filter
-    VCFX_ref_comparator
-    VCFX_ancestry_inferrer
-    VCFX_impact_filter
-    VCFX_info_aggregator
-    VCFX_probability_filter
-    VCFX_diff_tool
-    VCFX_cross_sample_concordance
-    VCFX_phase_quality_filter
-    VCFX_indel_normalizer
-    VCFX_custom_annotator
-    VCFX_region_subsampler
-    VCFX_allele_balance_filter
-    VCFX_missing_detector
-    VCFX_haplotype_phaser
-    VCFX_af_subsetter
-    VCFX_sv_handler
-    VCFX_reformatter
-    VCFX_quality_adjuster
-    VCFX_inbreeding_calculator
-    VCFX_outlier_detector
-    VCFX_alignment_checker
-    VCFX_ancestry_assigner
-    VCFX_ld_calculator
-)
-
 # Install all tool executables
 install(TARGETS ${VCFX_TOOLS}
         RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 )
 
-message(STATUS "VCFX tools to be installed: ${VCFX_TOOLS}")
\ No newline at end of file
+message(STATUS "VCFX tools to be installed: ${VCFX_TOOLS}")
diff --git a/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp b/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
index 2a6b0579..8110b7eb 100644
--- a/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
+++ b/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_af_subsetter.h"
 #include 
 #include 
@@ -156,6 +157,7 @@ void VCFXAfSubsetter::subsetByAlleleFrequency(std::istream& in, std::ostream& ou
 // Typical main():
 //
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_af_subsetter")) return 0;
     VCFXAfSubsetter afSubsetter;
     return afSubsetter.run(argc, argv);
 }
diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
index 8973a089..6e407b46 100644
--- a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
+++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_alignment_checker.h"
 #include 
 #include 
@@ -55,15 +56,8 @@ int VCFXAlignmentChecker::run(int argc, char* argv[]) {
         return 1;
     }
 
-    // Open reference genome file
-    std::ifstream refStream(refFile);
-    if (!refStream.is_open()) {
-        std::cerr << "Error: Unable to open reference genome file: " << refFile << "\n";
-        return 1;
-    }
-
-    // Load reference genome into memory
-    if (!loadReferenceGenome(refStream)) {
+    // Load reference genome index
+    if (!loadReferenceGenome(refFile)) {
         std::cerr << "Error: Failed to load reference genome.\n";
         return 1;
     }
@@ -85,70 +79,108 @@ void VCFXAlignmentChecker::displayHelp() {
               << "  VCFX_alignment_checker --alignment-discrepancy input.vcf reference.fasta > discrepancies.txt\n";
 }
 
-bool VCFXAlignmentChecker::loadReferenceGenome(std::istream& in) {
+bool VCFXAlignmentChecker::loadReferenceGenome(const std::string& path) {
+    referencePath = path;
+    referenceIndex.clear();
+
+    referenceStream.open(path, std::ios::in);
+    if (!referenceStream.is_open()) {
+        std::cerr << "Error: Unable to open reference genome file: " << path << "\n";
+        return false;
+    }
+
     std::string line;
     std::string currentChrom;
-    std::string seq;
+    FastaIndexEntry entry;
+    std::size_t seqLen = 0;
 
-    while (std::getline(in, line)) {
+    // record file offset where we will read sequence lines
+    while (std::getline(referenceStream, line)) {
         if (line.empty()) {
             continue;
         }
         if (line[0] == '>') {
-            // If we already had a chromosome loaded, store its sequence
             if (!currentChrom.empty()) {
-                referenceGenome[normalizeChromosome(currentChrom)] = seq;
+                entry.length = seqLen;
+                referenceIndex[normalizeChromosome(currentChrom)] = entry;
             }
-            // Start a new chromosome
-            seq.clear();
-            // Grab chromosome name (up to first space)
-            size_t pos = line.find(' ');
+
+            currentChrom.clear();
+            seqLen = 0;
+            entry = FastaIndexEntry();
+
+            std::size_t pos = line.find(' ');
             if (pos != std::string::npos) {
                 currentChrom = line.substr(1, pos - 1);
             } else {
                 currentChrom = line.substr(1);
             }
+
+            entry.offset = referenceStream.tellg();
+            entry.basesPerLine = 0;
+            entry.bytesPerLine = 0;
         } else {
-            // Append this line to the sequence (uppercase)
-            std::transform(line.begin(), line.end(), line.begin(), ::toupper);
-            seq += line;
+            if (entry.basesPerLine == 0) {
+                entry.basesPerLine = line.size();
+                entry.bytesPerLine = line.size() + 1; // assume single '\n'
+            }
+            seqLen += line.size();
         }
     }
 
-    // Store the last chromosome read
     if (!currentChrom.empty()) {
-        referenceGenome[normalizeChromosome(currentChrom)] = seq;
+        entry.length = seqLen;
+        referenceIndex[normalizeChromosome(currentChrom)] = entry;
     }
 
+    referenceStream.clear();
+    referenceStream.seekg(0);
     return true;
 }
 
 std::string VCFXAlignmentChecker::normalizeChromosome(const std::string& chrom) {
-    // NOTE: This logic may cause mismatches if your reference is named "1" but your VCF says "chr1".
-    // You may want to adjust this to match your actual naming conventions.
     std::string norm = chrom;
-    if (norm.find("chr") != 0 && 
-        !(norm == "X" || norm == "Y" || norm == "MT" ||
-          std::all_of(norm.begin(), norm.end(), ::isdigit))) 
-    {
-        norm = "chr" + norm;
+    // convert to upper and drop leading "CHR" if present
+    if (norm.size() >= 3 && (norm.rfind("chr", 0) == 0 || norm.rfind("CHR", 0) == 0)) {
+        norm = norm.substr(3);
     }
+    std::transform(norm.begin(), norm.end(), norm.begin(), ::toupper);
     return norm;
 }
 
 std::string VCFXAlignmentChecker::getReferenceBases(const std::string& chrom, int pos, int length) {
-    auto it = referenceGenome.find(normalizeChromosome(chrom));
-    if (it == referenceGenome.end()) {
+    auto it = referenceIndex.find(normalizeChromosome(chrom));
+    if (it == referenceIndex.end()) {
         return "";
     }
 
-    const std::string& seq = it->second;
-    // Convert VCF 1-based 'pos' to a 0-based index into the string
-    size_t startIndex = static_cast(pos - 1);
-    if (pos < 1 || (startIndex + length) > seq.size()) {
+    const FastaIndexEntry& entry = it->second;
+    if (pos < 1 || static_cast(pos - 1) >= entry.length) {
         return "";
     }
-    return seq.substr(startIndex, length);
+
+    int remaining = length;
+    std::size_t currPos = static_cast(pos - 1);
+    std::string result;
+    result.reserve(length);
+
+    while (remaining > 0 && currPos < entry.length) {
+        std::size_t lineIdx = currPos / entry.basesPerLine;
+        std::size_t lineOffset = currPos % entry.basesPerLine;
+        std::size_t chunk = std::min(entry.basesPerLine - lineOffset, remaining);
+
+        std::streampos filePos = entry.offset + static_cast(lineIdx * entry.bytesPerLine + lineOffset);
+        referenceStream.clear();
+        referenceStream.seekg(filePos);
+        std::string buf(chunk, '\0');
+        referenceStream.read(&buf[0], chunk);
+        result += buf;
+
+        currPos += chunk;
+        remaining -= static_cast(chunk);
+    }
+
+    return result;
 }
 
 void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream& out) {
@@ -176,6 +208,9 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
                 while (std::getline(ss, field, '\t')) {
                     headers.push_back(field);
                 }
+                if (!headers.empty() && !headers[0].empty() && headers[0][0] == '#') {
+                    headers[0].erase(0, 1); // drop leading '#'
+                }
                 for (size_t i = 0; i < headers.size(); ++i) {
                     if (headers[i] == "CHROM") chrIndex = static_cast(i);
                     else if (headers[i] == "POS")   posIndex = static_cast(i);
@@ -248,11 +283,11 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
                         << "\t" << allele << "\t" << "REF_MISMATCH"
                         << "\t" << ref_base << "\t" << ref << "\n";
                 }
-                // Compare ALT in VCF vs reference genome's same position
-                // (Often for a standard SNP, the reference base is the only thing in the FASTA.)
-                // This is somewhat conceptual: we're checking if the ALT base is the same as reference at that position.
+                // Compare ALT to the reference base at the same position.
+                // Here we flag a mismatch when the ALT allele is actually the
+                // same as the reference (i.e. not a true variant).
                 std::string alt_base = ref_base; // The reference at that position
-                if (allele != alt_base) {
+                if (allele == alt_base) {
                     out << chrom << "\t" << posVal << "\t" << id << "\t" << ref
                         << "\t" << allele << "\t" << "ALT_MISMATCH"
                         << "\t" << alt_base << "\t" << allele << "\n";
@@ -290,6 +325,7 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
 
 // Typical main(), linking to run()
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_alignment_checker")) return 0;
     VCFXAlignmentChecker alignmentChecker;
     return alignmentChecker.run(argc, argv);
 }
diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.h b/src/VCFX_alignment_checker/VCFX_alignment_checker.h
index 6ff8e1a9..f62201c4 100644
--- a/src/VCFX_alignment_checker/VCFX_alignment_checker.h
+++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.h
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 
 // VCFXAlignmentChecker: Header file for Reference Alignment Discrepancy Finder Tool
 class VCFXAlignmentChecker {
@@ -17,7 +18,7 @@ class VCFXAlignmentChecker {
     void displayHelp();
 
     // Loads the reference genome from a FASTA file
-    bool loadReferenceGenome(std::istream& in);
+    bool loadReferenceGenome(const std::string& path);
 
     // Checks discrepancies between VCF variants and the in-memory reference genome
     void checkDiscrepancies(std::istream& vcfIn, std::ostream& out);
@@ -26,7 +27,16 @@ class VCFXAlignmentChecker {
     std::string getReferenceBases(const std::string& chrom, int pos, int length = 1);
 
     // Stores the reference genome sequences, keyed by normalized chromosome name
-    std::unordered_map referenceGenome;
+    struct FastaIndexEntry {
+        std::streampos offset = 0;     // file offset to first base
+        std::size_t   length = 0;      // total bases in sequence
+        std::size_t   basesPerLine = 0;    // number of bases per line in FASTA
+        std::size_t   bytesPerLine = 0;    // bytes per line including newline
+    };
+
+    std::unordered_map referenceIndex;
+    std::ifstream referenceStream;
+    std::string referencePath;
 
     // Helper function to convert chromosome names to a consistent format
     std::string normalizeChromosome(const std::string& chrom);
diff --git a/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp b/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
index 71acdeeb..27e8bfd7 100644
--- a/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
+++ b/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 // VCFX_allele_balance_calc.cpp
 
 #include 
@@ -224,6 +225,7 @@ bool calculateAlleleBalance(std::istream& in, std::ostream& out, const AlleleBal
 // main()
 // ---------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_balance_calc")) return 0;
     AlleleBalanceArguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp b/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
index 88c963d2..ad7fbe43 100644
--- a/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
+++ b/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -197,6 +198,7 @@ double VCFXAlleleBalanceFilter::calculateAlleleBalance(const std::string& genoty
 // main() linking to class
 // ------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_balance_filter")) return 0;
     VCFXAlleleBalanceFilter alleleBalanceFilter;
     return alleleBalanceFilter.run(argc, argv);
 }
diff --git a/src/VCFX_allele_counter/VCFX_allele_counter.cpp b/src/VCFX_allele_counter/VCFX_allele_counter.cpp
index 2e6ac5e9..b3ad6f4d 100644
--- a/src/VCFX_allele_counter/VCFX_allele_counter.cpp
+++ b/src/VCFX_allele_counter/VCFX_allele_counter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -232,6 +233,7 @@ static bool countAlleles(std::istream& in, std::ostream& out, const AlleleCounte
 // main()
 // ---------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_counter")) return 0;
     AlleleCounterArguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp b/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
index 2344ac6d..f25dccb1 100644
--- a/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
+++ b/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -183,6 +184,7 @@ static void calculateAlleleFrequency(std::istream& in, std::ostream& out) {
 // main()
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_freq_calc")) return 0;
     // Parse arguments for help
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp b/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
index ee21cce4..22a2e1ff 100644
--- a/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
+++ b/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -408,6 +409,7 @@ void VCFXAncestryAssigner::assignAncestry(std::istream& vcfIn, std::ostream& out
 // main() - just instantiate and run
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ancestry_assigner")) return 0;
     VCFXAncestryAssigner assigner;
     return assigner.run(argc, argv);
 }
diff --git a/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp b/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
index 1c72b384..00725dcd 100644
--- a/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
+++ b/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -58,6 +59,7 @@ class VCFXAncestryInferrer {
 // main() - create the inferrer and run
 // ----------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ancestry_inferrer")) return 0;
     VCFXAncestryInferrer inferrer;
     return inferrer.run(argc, argv);
 }
diff --git a/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp b/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
index 2245d156..544a2ea5 100644
--- a/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
+++ b/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -273,6 +274,7 @@ static void processVCF(std::istream &in, const AnnotationOptions &opts) {
 // main()
 // --------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_annotation_extractor")) return 0;
     AnnotationOptions opts;
     if (!parseArguments(argc, argv, opts)) {
         // parseArguments already printed help if needed
diff --git a/src/VCFX_compressor/VCFX_compressor.cpp b/src/VCFX_compressor/VCFX_compressor.cpp
index e2476577..3ffd05a5 100644
--- a/src/VCFX_compressor/VCFX_compressor.cpp
+++ b/src/VCFX_compressor/VCFX_compressor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -149,6 +150,7 @@ static bool compressDecompressVCF(std::istream& in, std::ostream& out, bool comp
 // main
 // ---------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_compressor")) return 0;
     bool compress = false;
     bool decompress = false;
 
diff --git a/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp b/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
index c6b8df79..ae6f694c 100644
--- a/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
+++ b/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -275,6 +276,7 @@ static bool calculateConcordance(std::istream &in, std::ostream &out, const Conc
 // main
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_concordance_checker")) return 0;
     ConcordanceArguments args;
     if (!parseArguments(argc, argv, args)) {
         // parseArguments prints error/help if needed
diff --git a/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp b/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
index 50456e7e..1a9b694e 100644
--- a/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
+++ b/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -238,6 +239,7 @@ static void calculateConcordance(std::istream &in, std::ostream &out) {
 // Command-line parsing + main
 // --------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_cross_sample_concordance")) return 0;
     bool showHelp = false;
 
     static struct option longOpts[] = {
diff --git a/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp b/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
index 28e9ea29..a5319263 100644
--- a/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
+++ b/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -282,6 +283,7 @@ int VCFXCustomAnnotator::run(int argc, char* argv[]) {
 // main
 // ---------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_custom_annotator")) return 0;
     VCFXCustomAnnotator annotator;
     return annotator.run(argc, argv);
 }
diff --git a/src/VCFX_diff_tool/VCFX_diff_tool.cpp b/src/VCFX_diff_tool/VCFX_diff_tool.cpp
index 8f8f4611..ea984daa 100644
--- a/src/VCFX_diff_tool/VCFX_diff_tool.cpp
+++ b/src/VCFX_diff_tool/VCFX_diff_tool.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -205,6 +206,7 @@ int VCFXDiffTool::run(int argc, char* argv[]) {
 // main
 // ----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_diff_tool")) return 0;
     VCFXDiffTool diffTool;
     return diffTool.run(argc, argv);
 }
diff --git a/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp b/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
index 0fd2552c..67e717a5 100644
--- a/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
+++ b/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 // VCFX_distance_calculator.cpp
 #include "VCFX_distance_calculator.h"
 #include 
@@ -160,6 +161,7 @@ bool calculateDistances(std::istream& in, std::ostream& out) {
 // main: Parses command-line arguments and calls calculateDistances.
 // --------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_distance_calculator")) return 0;
     // Check for help option.
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp b/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
index 0dfa7f30..e41f5c0b 100644
--- a/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
+++ b/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_dosage_calculator.h"
 #include 
 #include 
@@ -216,6 +217,7 @@ std::vector VCFXDosageCalculator::split(const std::string& str, cha
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_dosage_calculator")) return 0;
     VCFXDosageCalculator dosageCalculator;
     return dosageCalculator.run(argc, argv);
 }
diff --git a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
index 77c5ce1a..9a75cc9a 100644
--- a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
+++ b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_duplicate_remover.h"
 #include 
 #include 
@@ -64,26 +65,20 @@ static VariantKey generateVariantKey(const std::string& chrom,
         key.pos = 0;
     }
     key.ref = ref;
-    key.alt = "";  // Will be set to normalized ALT.
-    // Normalize ALT: sort multi-allelic entries.
-    key.alt = generateNormalizedVariantKey(chrom, pos, ref, alt).substr(chrom.size() + pos.size() + ref.size() + 3); // skip prefix "chrom:pos:ref:"
-    // Alternatively, simply:
-    key.alt = generateNormalizedVariantKey(chrom, pos, ref, alt);
-    // However, since generateNormalizedVariantKey already concatenates chrom:pos:ref:normalizedAlt,
-    // we extract the normalizedAlt portion if needed. For simplicity, we can just store the full key.
-    // For our VariantKey, we want: chrom, pos, ref, normalizedAlt.
-    // We'll do that by re-parsing:
-    std::vector parts = splitString(generateNormalizedVariantKey(chrom, pos, ref, alt), ':');
-    if (parts.size() >= 4) {
-        key.chrom = parts[0];
-        try {
-            key.pos = std::stoi(parts[1]);
-        } catch (...) {
-            key.pos = 0;
+
+    // Normalize ALT: split multi-allelic values, sort them, then rejoin.  This
+    // avoids parsing the generated key string, which could break for ALT
+    // alleles containing ':' such as breakend notation.
+    std::vector alts = splitString(alt, ',');
+    std::sort(alts.begin(), alts.end());
+    std::ostringstream oss;
+    for (size_t i = 0; i < alts.size(); ++i) {
+        if (i > 0) {
+            oss << ',';
         }
-        key.ref = parts[2];
-        key.alt = parts[3];
+        oss << alts[i];
     }
+    key.alt = oss.str();
     return key;
 }
 
@@ -129,6 +124,7 @@ bool removeDuplicates(std::istream& in, std::ostream& out) {
 // main: Parse command-line arguments and call removeDuplicates.
 // ----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_duplicate_remover")) return 0;
     // Simple argument parsing: if --help or -h is provided, print help.
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp b/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
index 110df41f..84c79222 100644
--- a/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
+++ b/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_fasta_converter.h"
 #include 
 #include 
@@ -320,6 +321,7 @@ void VCFXFastaConverter::convertVCFtoFasta(std::istream& in, std::ostream& out)
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_fasta_converter")) return 0;
     VCFXFastaConverter app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_field_extractor/VCFX_field_extractor.cpp b/src/VCFX_field_extractor/VCFX_field_extractor.cpp
index b7fe05cd..31359024 100644
--- a/src/VCFX_field_extractor/VCFX_field_extractor.cpp
+++ b/src/VCFX_field_extractor/VCFX_field_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_field_extractor.h"
 #include 
 #include 
@@ -245,6 +246,7 @@ void extractFields(std::istream& in, std::ostream& out, const std::vector fields;
     bool showHelp = false;
 
diff --git a/src/VCFX_file_splitter/VCFX_file_splitter.cpp b/src/VCFX_file_splitter/VCFX_file_splitter.cpp
index 8d5c21f5..87c7b264 100644
--- a/src/VCFX_file_splitter/VCFX_file_splitter.cpp
+++ b/src/VCFX_file_splitter/VCFX_file_splitter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_file_splitter.h"
 #include 
 #include 
@@ -159,6 +160,7 @@ void VCFXFileSplitter::splitVCFByChromosome(std::istream& in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_file_splitter")) return 0;
     VCFXFileSplitter splitter;
     return splitter.run(argc, argv);
 }
diff --git a/src/VCFX_format_converter/VCFX_format_converter.cpp b/src/VCFX_format_converter/VCFX_format_converter.cpp
index 9998a63b..e374d165 100644
--- a/src/VCFX_format_converter/VCFX_format_converter.cpp
+++ b/src/VCFX_format_converter/VCFX_format_converter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_format_converter.h"
 #include 
 #include 
@@ -111,8 +112,9 @@ static std::string csvEscape(const std::string &field) {
     tmp.push_back('"');
     for (char c : field) {
         if (c == '"') {
-            // double it
-            tmp += "\"\"";
+            // double it by writing two quotes
+            tmp.push_back('"');
+            tmp.push_back('"');
         } else {
             tmp.push_back(c);
         }
@@ -184,6 +186,7 @@ void convertVCFtoCSV(std::istream& in, std::ostream& out) {
 // main
 // -----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_format_converter")) return 0;
     OutputFormat format;
     bool valid = parseArguments(argc, argv, format);
 
diff --git a/src/VCFX_genotype_query/VCFX_genotype_query.cpp b/src/VCFX_genotype_query/VCFX_genotype_query.cpp
index e472b6af..f1451c23 100644
--- a/src/VCFX_genotype_query/VCFX_genotype_query.cpp
+++ b/src/VCFX_genotype_query/VCFX_genotype_query.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_genotype_query.h"
 #include 
 #include 
@@ -239,6 +240,7 @@ void genotypeQuery(std::istream& in, std::ostream& out,
 // main
 // ------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_genotype_query")) return 0;
     std::string genotypeQueryStr;
     bool strictCompare = false;
     if (!parseArguments(argc, argv, genotypeQueryStr, strictCompare)) {
diff --git a/src/VCFX_gl_filter/VCFX_gl_filter.cpp b/src/VCFX_gl_filter/VCFX_gl_filter.cpp
index 8360fc9e..769b8502 100644
--- a/src/VCFX_gl_filter/VCFX_gl_filter.cpp
+++ b/src/VCFX_gl_filter/VCFX_gl_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_gl_filter.h"
 #include 
 #include 
@@ -263,6 +264,7 @@ void VCFXGLFilter::filterByGL(std::istream& in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_gl_filter")) return 0;
     VCFXGLFilter app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
index e1bbf55f..9475ba6e 100644
--- a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
+++ b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_haplotype_extractor.h"
 #include 
 #include 
@@ -16,9 +17,10 @@ void printHelp() {
               << "Usage: VCFX_haplotype_extractor [OPTIONS]\n\n"
               << "Options:\n"
               << "  --help, -h                 Display this help message and exit.\n"
-              << "  --block-size          Maximum distance for grouping consecutive variants (default 100000).\n"
-              << "  --check-phase-consistency  If set, try a minimal check across variants.\n\n"
-              << "Description:\n"
+             << "  --block-size          Maximum distance for grouping consecutive variants (default 100000).\n"
+             << "  --check-phase-consistency  If set, try a minimal check across variants.\n"
+             << "  --debug                   Output verbose debug information.\n\n"
+             << "Description:\n"
               << "  Extracts phased haplotype blocks from genotype data in a VCF file. "
               << "It reconstructs haplotypes for each sample by analyzing phased genotype fields.\n\n"
               << "Examples:\n"
@@ -86,8 +88,10 @@ bool HaplotypeExtractor::phaseIsConsistent(const HaplotypeBlock& block,
         return false;
     }
     
-    // Debug the whole process
-    std::cerr << "Checking phase consistency\n";
+    // Optional debugging output
+    if (debugMode) {
+        std::cerr << "Checking phase consistency\n";
+    }
 
     for (size_t s=0; s inconsistent
         // Check for phase flips - when both alleles flip positions
         if (lastAllele1 != newAllele1 && lastAllele2 != newAllele2 && 
             lastAllele1 == newAllele2 && lastAllele2 == newAllele1) {
-            std::cerr << "Phase flip detected in sample " << s << "\n";
+            if (debugMode) {
+                std::cerr << "Phase flip detected in sample " << s << "\n";
+            }
             return false;
         }
     }
 
-    std::cerr << "All phases consistent\n";
+    if (debugMode) {
+        std::cerr << "All phases consistent\n";
+    }
     return true;
 }
 
@@ -316,8 +328,10 @@ bool HaplotypeExtractor::extractHaplotypes(std::istream& in, std::ostream& out)
 // main
 // ---------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_haplotype_extractor")) return 0;
     int blockSize = 100000;
     bool doCheck = false;
+    bool debug = false;
 
     // simple arg parse
     for (int i=1; i sampleNames;
     size_t numSamples = 0;
@@ -41,6 +44,9 @@ class HaplotypeExtractor {
     // If true, we do a simplistic cross-variant check for consistent phasing
     bool checkPhaseConsistency = false;
 
+    // If true, print verbose debugging information
+    bool debugMode = false;
+
     // Parses the #CHROM line to extract sample names
     bool parseHeader(const std::string& headerLine);
 
diff --git a/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp b/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
index 9b2eba87..6cbfa965 100644
--- a/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
+++ b/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_haplotype_phaser.h"
 #include 
 #include 
@@ -318,6 +319,7 @@ std::vector> VCFXHaplotypePhaser::groupVariants(const std::vect
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_haplotype_phaser")) return 0;
     VCFXHaplotypePhaser hp;
     return hp.run(argc, argv);
 }
diff --git a/src/VCFX_header_parser/VCFX_header_parser.cpp b/src/VCFX_header_parser/VCFX_header_parser.cpp
index 97164a43..45ee87ed 100644
--- a/src/VCFX_header_parser/VCFX_header_parser.cpp
+++ b/src/VCFX_header_parser/VCFX_header_parser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_header_parser.h"
 #include 
 #include 
@@ -26,6 +27,7 @@ void processHeader(std::istream& in, std::ostream& out) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_header_parser")) return 0;
     // Simple argument parsing
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp b/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
index 64fbc9d8..d54e7f24 100644
--- a/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
+++ b/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_hwe_tester.h"
 #include 
 #include 
@@ -255,6 +256,7 @@ void VCFXHWETester::performHWE(std::istream& in){
 
 // actual main
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_hwe_tester")) return 0;
     VCFXHWETester tester;
     return tester.run(argc, argv);
 }
diff --git a/src/VCFX_impact_filter/VCFX_impact_filter.cpp b/src/VCFX_impact_filter/VCFX_impact_filter.cpp
index 86034924..51bfea85 100644
--- a/src/VCFX_impact_filter/VCFX_impact_filter.cpp
+++ b/src/VCFX_impact_filter/VCFX_impact_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_impact_filter.h"
 #include 
 #include 
@@ -200,6 +201,7 @@ void VCFXImpactFilter::filterByImpact(std::istream& in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_impact_filter")) return 0;
     VCFXImpactFilter filt;
     return filt.run(argc, argv);
 }
diff --git a/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp b/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
index 4b095239..4353d7b3 100644
--- a/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
+++ b/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_inbreeding_calculator.h"
 #include 
 #include 
@@ -351,6 +352,7 @@ int VCFXInbreedingCalculator::run(int argc, char* argv[]){
 // -------------------------------------------------------------------------
 // main entry point
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_inbreeding_calculator")) return 0;
     VCFXInbreedingCalculator calc;
     return calc.run(argc, argv);
 }
diff --git a/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp b/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
index 6c4f2fc7..0a60d6bb 100644
--- a/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
+++ b/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_indel_normalizer.h"
 #include 
 #include 
@@ -253,6 +254,7 @@ void VCFXIndelNormalizer::normalizeIndels(std::istream& in, std::ostream& out) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_indel_normalizer")) return 0;
     VCFXIndelNormalizer norm;
     return norm.run(argc, argv);
 }
diff --git a/src/VCFX_indexer/VCFX_indexer.cpp b/src/VCFX_indexer/VCFX_indexer.cpp
index a99c185c..1eeac8d1 100644
--- a/src/VCFX_indexer/VCFX_indexer.cpp
+++ b/src/VCFX_indexer/VCFX_indexer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_indexer.h"
 #include 
 #include 
@@ -129,9 +130,9 @@ void VCFXIndexer::createVCFIndex(std::istream &in, std::ostream &out) {
         const std::string &chrom = fields[0];
         const std::string &posStr = fields[1];
 
-        int posVal = 0;
+        std::int64_t posVal = 0;
         try {
-            posVal = std::stoi(posStr);
+            posVal = std::stoll(posStr);
         } catch (...) {
             // Not a valid integer => skip
             return;
@@ -189,6 +190,7 @@ void VCFXIndexer::createVCFIndex(std::istream &in, std::ostream &out) {
 
 // Optional main if you build as a single executable
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_indexer")) return 0;
     VCFXIndexer idx;
     return idx.run(argc, argv);
 }
diff --git a/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp b/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
index f15d8825..91deee46 100644
--- a/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
+++ b/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_aggregator.h"
 #include 
 #include 
@@ -214,6 +215,7 @@ void VCFXInfoAggregator::aggregateInfo(std::istream& in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_info_aggregator")) return 0;
     VCFXInfoAggregator app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_info_parser/VCFX_info_parser.cpp b/src/VCFX_info_parser/VCFX_info_parser.cpp
index 286ef5a0..51cc6461 100644
--- a/src/VCFX_info_parser/VCFX_info_parser.cpp
+++ b/src/VCFX_info_parser/VCFX_info_parser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_parser.h"
 #include 
 #include 
@@ -139,6 +140,7 @@ bool parseInfoFields(std::istream& in, std::ostream& out, const std::vector info_fields;
 
     // parse arguments
diff --git a/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp b/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
index 79d43a81..0917bcd9 100644
--- a/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
+++ b/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_summarizer.h"
 #include 
 #include 
@@ -225,6 +226,7 @@ bool summarizeInfoFields(std::istream& in, std::ostream& out, const std::vector<
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_info_summarizer")) return 0;
     std::vector info_fields;
 
     // parse arguments
diff --git a/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp b/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
index ef5bc318..9076d375 100644
--- a/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
+++ b/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_ld_calculator.h"
 #include 
 #include 
@@ -345,6 +346,7 @@ int VCFXLDCalculator::run(int argc, char* argv[]) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ld_calculator")) return 0;
     VCFXLDCalculator calc;
     return calc.run(argc, argv);
 }
diff --git a/src/VCFX_merger/VCFX_merger.cpp b/src/VCFX_merger/VCFX_merger.cpp
index 639bf593..c81634c1 100644
--- a/src/VCFX_merger/VCFX_merger.cpp
+++ b/src/VCFX_merger/VCFX_merger.cpp
@@ -1,8 +1,10 @@
+#include "vcfx_core.h"
 #include "VCFX_merger.h"
 #include 
 #include 
+#include 
+#include 
 #include 
-#include 
 #include 
 #include 
 
@@ -62,87 +64,64 @@ void VCFXMerger::displayHelp() {
 }
 
 void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostream& out) {
-    std::vector> allVariants;
-    std::vector allHeaders;
+    struct Variant {
+        std::string chrom;
+        long pos = 0;
+        std::string line;
+    };
 
-    for (const auto& file : inputFiles) {
-        std::vector> variants;
-        std::vector headerLines;
-        parseVCF(file, variants, headerLines);
+    std::vector variants;
+    std::vector headers;
+    bool headersCaptured = false;
 
-        // If no headers yet, copy the first file's headers
-        if (allHeaders.empty()) {
-            allHeaders = headerLines;
+    for (const auto& file : inputFiles) {
+        std::ifstream stream(file);
+        if (!stream.is_open()) {
+            std::cerr << "Failed to open file: " << file << "\n";
+            continue;
         }
 
-        // Append all variants
-        allVariants.insert(allVariants.end(), variants.begin(), variants.end());
-    }
-
-    // Sort all variants by chromosome and position
-    std::sort(
-        allVariants.begin(),
-        allVariants.end(),
-        [this](const std::vector& a, const std::vector& b) {
-            if (a[0] == b[0]) {
-                return std::stoi(a[1]) < std::stoi(b[1]);
+        std::string line;
+        while (std::getline(stream, line)) {
+            if (line.empty())
+                continue;
+            if (line[0] == '#') {
+                if (!headersCaptured)
+                    headers.push_back(line);
+                continue;
             }
-            return a[0] < b[0];
-        }
-    );
-
-    // Output headers
-    for (const auto& header : allHeaders) {
-        out << header << "\n";
-    }
 
-    // Output merged variants
-    for (const auto& variant : allVariants) {
-        for (size_t i = 0; i < variant.size(); ++i) {
-            out << variant[i];
-            if (i < variant.size() - 1) {
-                out << "\t";
-            }
+            std::istringstream ss(line);
+            Variant v;
+            std::getline(ss, v.chrom, '\t');
+            std::string pos_str;
+            std::getline(ss, pos_str, '\t');
+            v.pos = std::strtol(pos_str.c_str(), nullptr, 10);
+            v.line = line;
+            variants.push_back(std::move(v));
         }
-        out << "\n";
-    }
-}
 
-void VCFXMerger::parseVCF(const std::string& filename,
-                          std::vector>& variants,
-                          std::vector& headerLines) {
-    std::ifstream infile(filename);
-    if (!infile.is_open()) {
-        std::cerr << "Failed to open file: " << filename << "\n";
-        return;
+        if (!headersCaptured && !headers.empty())
+            headersCaptured = true;
     }
 
-    std::string line;
-    while (std::getline(infile, line)) {
-        if (line.empty()) continue;
-
-        if (line[0] == '#') {
-            headerLines.push_back(line);
-            continue;
-        }
+    for (const auto& h : headers) {
+        out << h << '\n';
+    }
 
-        // Split by tab
-        std::vector fields;
-        std::string field;
-        size_t pos = 0;
-        while ((pos = line.find('\t')) != std::string::npos) {
-            field = line.substr(0, pos);
-            fields.push_back(field);
-            line.erase(0, pos + 1);
-        }
-        fields.push_back(line);
+    std::sort(variants.begin(), variants.end(), [](const Variant& a, const Variant& b) {
+        if (a.chrom == b.chrom) return a.pos < b.pos;
+        return a.chrom < b.chrom;
+    });
 
-        variants.push_back(fields);
+    for (const auto& v : variants) {
+        out << v.line << '\n';
     }
-    infile.close();
 }
 
+
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_merger")) return 0;
     VCFXMerger merger;
     return merger.run(argc, argv);
 }
diff --git a/src/VCFX_merger/VCFX_merger.h b/src/VCFX_merger/VCFX_merger.h
index 637abba2..cfc7abb1 100644
--- a/src/VCFX_merger/VCFX_merger.h
+++ b/src/VCFX_merger/VCFX_merger.h
@@ -18,11 +18,6 @@ class VCFXMerger {
     // Processes and merges VCF files
     void mergeVCF(const std::vector& inputFiles, std::ostream& out);
 
-    // Parses a VCF file and stores variants
-    void parseVCF(const std::string& filename, std::vector>& variants, std::vector& headerLines);
-
-    // Compares variants based on chromosome and position
-    bool compareVariants(const std::vector& a, const std::vector& b);
 };
 
 #endif // VCFX_MERGER_H
diff --git a/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp b/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
index 795e8e0b..b8be9bc7 100644
--- a/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
+++ b/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_metadata_summarizer.h"
 #include 
 #include 
@@ -155,6 +156,7 @@ void VCFXMetadataSummarizer::printSummary() const {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_metadata_summarizer")) return 0;
     VCFXMetadataSummarizer summarizer;
     return summarizer.run(argc, argv);
 }
diff --git a/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp b/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
index 83191947..7812f65a 100644
--- a/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
+++ b/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_missing_data_handler.h"
 #include 
 #include 
@@ -259,6 +260,7 @@ bool handleMissingDataAll(const Arguments& args) {
  * @return int Exit status.
  */
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_missing_data_handler")) return 0;
     Arguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_missing_detector/VCFX_missing_detector.cpp b/src/VCFX_missing_detector/VCFX_missing_detector.cpp
index 3ae391a1..78033e3d 100644
--- a/src/VCFX_missing_detector/VCFX_missing_detector.cpp
+++ b/src/VCFX_missing_detector/VCFX_missing_detector.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_missing_detector.h"
 #include 
 #include 
@@ -208,6 +209,7 @@ void VCFXMissingDetector::detectMissingGenotypes(std::istream& in, std::ostream&
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_missing_detector")) return 0;
     VCFXMissingDetector missingDetector;
     return missingDetector.run(argc, argv);
 }
diff --git a/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp b/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
index 5f3a280a..8922df62 100644
--- a/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
+++ b/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_multiallelic_splitter.h"
 #include 
 #include 
@@ -288,6 +289,7 @@ bool splitMultiAllelicVariants(std::istream &in, std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_multiallelic_splitter")) return 0;
     for(int i=1; i< argc; i++){
         std::string arg= argv[i];
         if(arg=="--help"|| arg=="-h"){
diff --git a/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp b/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
index ab9bf18c..10ec7601 100644
--- a/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
+++ b/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_nonref_filter.h"
 #include 
 #include 
@@ -132,6 +133,7 @@ void VCFXNonRefFilter::filterNonRef(std::istream& in, std::ostream& out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_nonref_filter")) return 0;
     VCFXNonRefFilter app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
index 16710d8d..ae146354 100644
--- a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
+++ b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_outlier_detector.h"
 #include 
 #include 
@@ -27,12 +28,12 @@ int VCFXOutlierDetector::run(int argc, char* argv[]){
         {"help", no_argument, 0, 'h'},
         {"metric", required_argument, 0, 'm'},
         {"threshold", required_argument, 0, 't'},
-        {"variant", no_argument, 0, 'v'},
+        {"variant", no_argument, 0, 'V'},
         {"sample", no_argument, 0, 's'},
         {0,0,0,0}
     };
     while(true){
-        int c= getopt_long(argc, argv, "hm:t:vs", long_opts, nullptr);
+        int c= getopt_long(argc, argv, "hm:t:Vs", long_opts, nullptr);
         if(c==-1) break;
         switch(c){
             case 'h':
@@ -49,7 +50,7 @@ int VCFXOutlierDetector::run(int argc, char* argv[]){
                     return 1;
                 }
                 break;
-            case 'v':
+            case 'V':
                 isVariantMode= true;
                 break;
             case 's':
@@ -77,7 +78,7 @@ void VCFXOutlierDetector::displayHelp(){
 "  --help, -h           Print this help.\n"
 "  --metric, -m    Name of the metric to use (e.g. AF, DP, GQ...).\n"
 "  --threshold, -t  Numeric threshold.\n"
-"  --variant, -v        Evaluate each variant's  in INFO>threshold => print.\n"
+"  --variant, -V        Evaluate each variant's  in INFO>threshold => print.\n"
 "  --sample, -s         Evaluate sample averages of  in genotype subfield => print outliers.\n\n"
 "Examples:\n"
 "  1) Outlier variants with AF>0.05:\n"
@@ -304,6 +305,7 @@ void VCFXOutlierDetector::detectOutliers(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_outlier_detector")) return 0;
     VCFXOutlierDetector app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_phase_checker/VCFX_phase_checker.cpp b/src/VCFX_phase_checker/VCFX_phase_checker.cpp
index 052da607..2d613cfa 100644
--- a/src/VCFX_phase_checker/VCFX_phase_checker.cpp
+++ b/src/VCFX_phase_checker/VCFX_phase_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phase_checker.h"
 #include 
 #include 
@@ -164,6 +165,7 @@ void VCFXPhaseChecker::processVCF(std::istream &in, std::ostream &out) {
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phase_checker")) return 0;
     VCFXPhaseChecker checker;
     return checker.run(argc, argv);
 }
diff --git a/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp b/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
index 87f6a835..19bcd240 100644
--- a/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
+++ b/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phase_quality_filter.h"
 #include 
 #include 
@@ -201,6 +202,7 @@ double VCFXPhaseQualityFilter::parsePQScore(const std::string &info) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phase_quality_filter")) return 0;
     VCFXPhaseQualityFilter f;
     return f.run(argc, argv);
 }
diff --git a/src/VCFX_phred_filter/VCFX_phred_filter.cpp b/src/VCFX_phred_filter/VCFX_phred_filter.cpp
index d7d01711..0ea89008 100644
--- a/src/VCFX_phred_filter/VCFX_phred_filter.cpp
+++ b/src/VCFX_phred_filter/VCFX_phred_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phred_filter.h"
 #include 
 #include 
@@ -119,6 +120,7 @@ double VCFXPhredFilter::parseQUAL(const std::string &qualStr, bool keepMissingAs
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phred_filter")) return 0;
     VCFXPhredFilter pf;
     return pf.run(argc,argv);
 }
diff --git a/src/VCFX_population_filter/VCFX_population_filter.cpp b/src/VCFX_population_filter/VCFX_population_filter.cpp
index e1b33280..d1ebf032 100644
--- a/src/VCFX_population_filter/VCFX_population_filter.cpp
+++ b/src/VCFX_population_filter/VCFX_population_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_population_filter.h"
 #include 
 #include 
@@ -190,6 +191,7 @@ void VCFXPopulationFilter::filterPopulation(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_population_filter")) return 0;
     VCFXPopulationFilter pf;
     return pf.run(argc, argv);
 }
diff --git a/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp b/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
index 70258c7b..0437d160 100644
--- a/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
+++ b/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_position_subsetter.h"
 #include 
 #include 
@@ -146,6 +147,7 @@ bool VCFXPositionSubsetter::subsetVCFByPosition(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_position_subsetter")) return 0;
     VCFXPositionSubsetter subsetter;
     return subsetter.run(argc, argv);
 }
diff --git a/src/VCFX_probability_filter/VCFX_probability_filter.cpp b/src/VCFX_probability_filter/VCFX_probability_filter.cpp
index 770f6c8b..ee082f26 100644
--- a/src/VCFX_probability_filter/VCFX_probability_filter.cpp
+++ b/src/VCFX_probability_filter/VCFX_probability_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_probability_filter.h"
 #include 
 #include 
@@ -210,6 +211,7 @@ void VCFXProbabilityFilter::filterByProbability(std::istream& in, std::ostream&
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_probability_filter")) return 0;
     VCFXProbabilityFilter probabilityFilter;
     return probabilityFilter.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
index b09fe473..9cccdcf1 100644
--- a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
+++ b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_quality_adjuster.h"
 #include 
 #include 
@@ -157,7 +158,14 @@ void VCFXQualityAdjuster::adjustQualityScores(std::istream &in, std::ostream &ou
             // clamp large values
             if(newQual>1e12) newQual= 1e12;
         }
-        fields[5]= std::to_string(newQual);
+        std::string qualStr;
+        if(std::isnan(newQual)){
+            // ensure consistent representation for NaN
+            qualStr = "nan";
+        } else {
+            qualStr = std::to_string(newQual);
+        }
+        fields[5]= qualStr;
         std::ostringstream oss;
         for(size_t i=0; i0) oss<<"\t";
@@ -168,6 +176,7 @@ void VCFXQualityAdjuster::adjustQualityScores(std::istream &in, std::ostream &ou
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_quality_adjuster")) return 0;
     VCFXQualityAdjuster app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_record_filter/VCFX_record_filter.cpp b/src/VCFX_record_filter/VCFX_record_filter.cpp
index 6b0273e9..871ffb82 100644
--- a/src/VCFX_record_filter/VCFX_record_filter.cpp
+++ b/src/VCFX_record_filter/VCFX_record_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_record_filter.h"
 #include 
 #include 
@@ -320,6 +321,7 @@ void printHelp(){
 
 // main with typical argument parse
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_record_filter")) return 0;
     if(argc==1){
         printHelp();
         return 0;
diff --git a/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp b/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
index bf121cab..1b26b073 100644
--- a/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
+++ b/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_ref_comparator.h"
 #include 
 #include 
@@ -273,6 +274,7 @@ void VCFXRefComparator::compareVCF(std::istream &vcfIn, std::ostream &vcfOut){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ref_comparator")) return 0;
     VCFXRefComparator refComp;
     return refComp.run(argc, argv);
 }
diff --git a/src/VCFX_reformatter/VCFX_reformatter.cpp b/src/VCFX_reformatter/VCFX_reformatter.cpp
index 070cca04..642b2b31 100644
--- a/src/VCFX_reformatter/VCFX_reformatter.cpp
+++ b/src/VCFX_reformatter/VCFX_reformatter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_reformatter.h"
 #include 
 #include 
@@ -461,6 +462,7 @@ std::string VCFXReformatter::applyFormatReorderToSample(const std::string &sampl
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_reformatter")) return 0;
     VCFXReformatter reformatter;
     return reformatter.run(argc, argv);
 }
diff --git a/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp b/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
index 31d35247..60d5081e 100644
--- a/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
+++ b/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_region_subsampler.h"
 #include 
 #include 
@@ -254,6 +255,7 @@ void VCFXRegionSubsampler::processVCF(std::istream &in, std::ostream &out) {
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_region_subsampler")) return 0;
     VCFXRegionSubsampler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp b/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
index 877f22f1..52f65eda 100644
--- a/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
+++ b/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sample_extractor.h"
 #include 
 #include 
@@ -215,6 +216,7 @@ void VCFXSampleExtractor::extractSamples(std::istream &in, std::ostream &out,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sample_extractor")) return 0;
     VCFXSampleExtractor app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sorter/VCFX_sorter.cpp b/src/VCFX_sorter/VCFX_sorter.cpp
index 5627f9ad..681c2a8b 100644
--- a/src/VCFX_sorter/VCFX_sorter.cpp
+++ b/src/VCFX_sorter/VCFX_sorter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sorter.h"
 #include 
 #include 
@@ -218,6 +219,7 @@ void VCFXSorter::outputVCF(std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sorter")) return 0;
     VCFXSorter app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_subsampler/VCFX_subsampler.cpp b/src/VCFX_subsampler/VCFX_subsampler.cpp
index dfab860c..f1a8ec78 100644
--- a/src/VCFX_subsampler/VCFX_subsampler.cpp
+++ b/src/VCFX_subsampler/VCFX_subsampler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_subsampler.h"
 #include 
 #include 
@@ -162,6 +163,7 @@ void VCFXSubsampler::subsampleLines(std::istream &in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_subsampler")) return 0;
     VCFXSubsampler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sv_handler/VCFX_sv_handler.cpp b/src/VCFX_sv_handler/VCFX_sv_handler.cpp
index 0ab3f1bd..c22f7c27 100644
--- a/src/VCFX_sv_handler/VCFX_sv_handler.cpp
+++ b/src/VCFX_sv_handler/VCFX_sv_handler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sv_handler.h"
 #include 
 #include 
@@ -205,6 +206,7 @@ void VCFXSvHandler::handleStructuralVariants(std::istream &in, std::ostream &out
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sv_handler")) return 0;
     VCFXSvHandler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_validator/VCFX_validator.cpp b/src/VCFX_validator/VCFX_validator.cpp
index 24c0bd22..e70fc3e2 100644
--- a/src/VCFX_validator/VCFX_validator.cpp
+++ b/src/VCFX_validator/VCFX_validator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_validator.h"
 #include 
 #include 
@@ -7,7 +8,7 @@
 #include 
 
 static std::string trim(const std::string &s){
-    size_t start=0; 
+    size_t start=0;
     while(start split(const std::string &s, char delim){
+    std::vector out;
+    std::stringstream ss(s);
+    std::string item;
+    while(std::getline(ss, item, delim)) out.push_back(item);
+    return out;
+}
+
 int VCFXValidator::run(int argc, char* argv[]){
     bool hasStdin = !isatty(fileno(stdin));
     if(argc==1 && !hasStdin){
@@ -51,14 +60,18 @@ void VCFXValidator::displayHelp(){
 "  VCFX_validator [options] < input.vcf\n\n"
 "Options:\n"
 "  -h, --help     Show this help.\n"
-"  -s, --strict   Enable stricter checks (not fully implemented, but reserved).\n\n"
+"  -s, --strict   Enable stricter checks.\n\n"
 "Description:\n"
 "  Validates:\n"
 "   * All '##' lines are recognized as meta lines.\n"
-"   * #CHROM line is present, has at least 8 columns.\n"
+"   * #CHROM line is present and well formed.\n"
 "   * Each data line has >=8 columns, checks CHROM non-empty, POS>0,\n"
 "     REF/ALT non-empty, QUAL is '.' or non-negative float, FILTER non-empty,\n"
-"     INFO is minimal check. Logs errors/warnings.\n"
+"     INFO is minimally checked.\n"
+"  In strict mode additional checks are performed:\n"
+"   * Data line column count must match the #CHROM header.\n"
+"   * Sample columns must match the FORMAT field structure.\n"
+"   * Any warning is treated as an error.\n"
 "  Exits 0 if pass, 1 if fail.\n";
 }
 
@@ -86,11 +99,24 @@ bool VCFXValidator::validateChromHeader(const std::string &line, int lineNumber)
         std::cerr<<"Error: #CHROM line at "<< lineNumber <<" has <8 columns.\n";
         return false;
     }
-    // typically #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, (FORMAT?), ...
     if(f[0]!="#CHROM"){
         std::cerr<<"Error: #CHROM line doesn't start with '#CHROM' at line "<< lineNumber <<".\n";
         return false;
     }
+
+    headerColumnCount = static_cast(f.size());
+    headerHasFormat = (headerColumnCount > 8);
+    sampleCount = headerHasFormat ? headerColumnCount - 9 : 0;
+
+    if(headerHasFormat && f[8] != "FORMAT"){
+        std::string msg = "Warning: column 9 of #CHROM header is not 'FORMAT'.";
+        if(strictMode){
+            std::cerr << "Error: " << msg << "\n";
+            return false;
+        } else {
+            std::cerr << msg << "\n";
+        }
+    }
     return true;
 }
 
@@ -108,6 +134,16 @@ bool VCFXValidator::validateDataLine(const std::string &line, int lineNumber){
         std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n";
         return false;
     }
+    if(headerColumnCount>0){
+        if(strictMode && static_cast(f.size()) != headerColumnCount){
+            std::cerr << "Error: line "<(f.size()) != headerColumnCount){
+            std::cerr << "Warning: line "< formatParts = split(f[8], ':');
+        for(size_t i=9;i sampleParts = split(f[i], ':');
+            if(sampleParts.size()!=formatParts.size()){
+                std::string msg = "Warning: sample column " + std::to_string(i-8) +
+                    " does not match FORMAT field";
+                if(strictMode){
+                    std::cerr<<"Error: "<8){
+        std::string msg = "Warning: data line has sample columns but header lacks FORMAT";
+        if(strictMode){
+            std::cerr<<"Error: "< lines;
 
     while(true){
         if(!std::getline(in, line)) break;
@@ -224,16 +292,21 @@ bool VCFXValidator::validateVCF(std::istream &in){
             }
             if(!validateDataLine(line, lineNum)) return false;
         }
+        lines.push_back(line);
     }
     if(!foundChromLine){
         std::cerr<<"Error: no #CHROM line found in file.\n";
         return false;
     }
-    std::cout<<"VCF file is valid.\n";
+    for(const auto &l : lines){
+        std::cout << l << '\n';
+    }
+    std::cerr<<"VCF file is valid.\n";
     return true;
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_validator")) return 0;
     VCFXValidator validator;
     return validator.run(argc, argv);
 }
diff --git a/src/VCFX_validator/VCFX_validator.h b/src/VCFX_validator/VCFX_validator.h
index c4d13d93..d908dc9d 100644
--- a/src/VCFX_validator/VCFX_validator.h
+++ b/src/VCFX_validator/VCFX_validator.h
@@ -11,6 +11,12 @@ class VCFXValidator {
 private:
     // If we add advanced checks for e.g. "strict" mode, we store a bool
     bool strictMode = false;
+    // Number of columns in the #CHROM header line
+    int headerColumnCount = 0;
+    // Whether the header includes FORMAT/sample columns
+    bool headerHasFormat = false;
+    // Number of sample columns
+    int sampleCount = 0;
 
     // Show usage
     void displayHelp();
diff --git a/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp b/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
index 14d9d511..1e4d16d5 100644
--- a/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
+++ b/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_variant_classifier.h"
 #include 
 #include 
@@ -326,6 +327,7 @@ void VCFXVariantClassifier::classifyStream(std::istream &in, std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_variant_classifier")) return 0;
     VCFXVariantClassifier app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.cpp b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
index 37e037ca..55549d47 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.cpp
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
@@ -4,6 +4,9 @@
 #include 
 #include 
 #include 
+#include "vcfx_core.h"
+#include 
+#include 
 
 void VCFXVariantCounter::displayHelp(){
     std::cout <<
@@ -55,7 +58,28 @@ int VCFXVariantCounter::run(int argc, char* argv[]){
         return 0;
     }
     
-    int total= countVariants(std::cin);
+    auto peek1 = std::cin.peek();
+    bool isEmpty = (peek1 == EOF);
+    bool isGzip = false;
+    if(!isEmpty){
+        int c1 = std::cin.get();
+        int c2 = std::cin.get();
+        if(c2 != EOF){
+            isGzip = (static_cast(c1) == 0x1f &&
+                      static_cast(c2) == 0x8b);
+            std::cin.putback(static_cast(c2));
+        }
+        std::cin.putback(static_cast(c1));
+    }
+
+    int total = -1;
+    if(isEmpty){
+        total = 0;
+    } else if(isGzip){
+        total = countVariantsGzip(std::cin);
+    } else {
+        total = countVariants(std::cin);
+    }
     if(total<0){
         // indicates an error if strict
         return 1;
@@ -64,40 +88,95 @@ int VCFXVariantCounter::run(int argc, char* argv[]){
     return 0;
 }
 
+bool VCFXVariantCounter::processLine(const std::string &line, int lineNumber, int &count){
+    if(line.empty()) return true;
+    if(line[0]=='#') return true;
+    std::stringstream ss(line);
+    std::vector fields;
+    {
+        std::string col;
+        while(std::getline(ss,col,'\t')){
+            fields.push_back(col);
+        }
+    }
+    if(fields.size()<8){
+        if(strictMode){
+            std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n";
+            return false;
+        } else {
+            std::cerr<<"Warning: skipping line "< fields;
-        {
-            std::string col;
-            while(std::getline(ss,col,'\t')){
-                fields.push_back(col);
+        if(!processLine(line, lineNumber, count)) return -1;
+    }
+    return count;
+}
+
+int VCFXVariantCounter::countVariantsGzip(std::istream &in){
+    constexpr int CHUNK = 16384;
+    char inBuf[CHUNK];
+    char outBuf[CHUNK];
+    z_stream strm; std::memset(&strm,0,sizeof(strm));
+    if(inflateInit2(&strm,15+32)!=Z_OK){
+        std::cerr<<"Error: inflateInit2 failed.\n";
+        return -1;
+    }
+    int count=0; int lineNumber=0; std::string buffer; int ret=Z_OK;
+    do {
+        in.read(inBuf, CHUNK);
+        strm.avail_in = static_cast(in.gcount());
+        if(strm.avail_in==0 && in.eof()) break;
+        strm.next_in = reinterpret_cast(inBuf);
+        do {
+            strm.avail_out = CHUNK;
+            strm.next_out = reinterpret_cast(outBuf);
+            ret = inflate(&strm, Z_NO_FLUSH);
+            if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR){
+                std::cerr<<"Error: decompression failed.\n";
+                inflateEnd(&strm);
+                return -1;
             }
-        }
-        if(fields.size()<8){
-            if(strictMode){
-                std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n";
-                return -1; // indicates error
-            } else {
-                std::cerr<<"Warning: skipping line "<0){
+                buffer.append(outBuf, have);
+                size_t pos;
+                while((pos = buffer.find('\n')) != std::string::npos){
+                    std::string line = buffer.substr(0,pos);
+                    buffer.erase(0,pos+1);
+                    lineNumber++;
+                    if(!processLine(line,lineNumber,count)){
+                        inflateEnd(&strm);
+                        return -1;
+                    }
+                }
             }
+        } while(strm.avail_out==0);
+    } while(ret != Z_STREAM_END);
+
+    if(!buffer.empty()){
+        lineNumber++;
+        if(!processLine(buffer,lineNumber,count)){
+            inflateEnd(&strm);
+            return -1;
         }
-        // if we get here => count it
-        count++;
     }
+    inflateEnd(&strm);
     return count;
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_variant_counter")) return 0;
     VCFXVariantCounter app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.h b/src/VCFX_variant_counter/VCFX_variant_counter.h
index c53b8358..6c5b05c0 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.h
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.h
@@ -17,6 +17,8 @@ class VCFXVariantCounter {
 
     // The actual counting function
     int countVariants(std::istream &in);
+    int countVariantsGzip(std::istream &in);
+    bool processLine(const std::string &line, int lineNumber, int &count);
 
 };
 
diff --git a/src/vcfx_core.cpp b/src/vcfx_core.cpp
index bbfcaa5c..43f06a5f 100644
--- a/src/vcfx_core.cpp
+++ b/src/vcfx_core.cpp
@@ -1,4 +1,138 @@
 #include "vcfx_core.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
-// Implementation of core functionalities
-// Add actual implementations as needed
+namespace vcfx {
+
+std::string trim(const std::string& str) {
+    auto first = str.find_first_not_of(" \t\n\r");
+    if (first == std::string::npos) {
+        return "";
+    }
+    auto last = str.find_last_not_of(" \t\n\r");
+    return str.substr(first, last - first + 1);
+}
+
+std::vector split(const std::string& str, char delimiter) {
+    std::vector result;
+    std::istringstream iss(str);
+    std::string item;
+    while (std::getline(iss, item, delimiter)) {
+        result.push_back(item);
+    }
+    return result;
+}
+
+void print_error(const std::string& msg, std::ostream& os) {
+    os << "Error: " << msg << '\n';
+}
+
+void print_version(const std::string& tool, const std::string& version,
+                   std::ostream& os) {
+    os << tool << " version " << version << '\n';
+}
+
+// ------------------------------------------------------------
+// Internal helper: decompress gzip/BGZF data from 'in' into 'out'
+// ------------------------------------------------------------
+static bool decompress_gzip_stream(std::istream& in, std::string& out) {
+    constexpr int CHUNK = 16384;
+    char inBuf[CHUNK];
+    char outBuf[CHUNK];
+
+    z_stream strm;
+    std::memset(&strm, 0, sizeof(strm));
+    if (inflateInit2(&strm, 15 + 32) != Z_OK) {
+        return false;
+    }
+
+    int ret = Z_OK;
+    do {
+        in.read(inBuf, CHUNK);
+        strm.avail_in = static_cast(in.gcount());
+        if (strm.avail_in == 0 && in.eof()) {
+            break;
+        }
+        strm.next_in = reinterpret_cast(inBuf);
+
+        do {
+            strm.avail_out = CHUNK;
+            strm.next_out = reinterpret_cast(outBuf);
+            ret = inflate(&strm, Z_NO_FLUSH);
+            if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT ||
+                ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) {
+                inflateEnd(&strm);
+                return false;
+            }
+            size_t have = CHUNK - strm.avail_out;
+            if (have > 0) {
+                out.append(outBuf, have);
+            }
+        } while (strm.avail_out == 0);
+    } while (ret != Z_STREAM_END);
+
+    inflateEnd(&strm);
+    return ret == Z_STREAM_END;
+}
+
+// ------------------------------------------------------------
+// Detect gzip magic numbers on a stream without consuming them
+// ------------------------------------------------------------
+static bool stream_has_gzip_magic(std::istream& in) {
+    int c1 = in.get();
+    if (c1 == EOF) {
+        return false;
+    }
+    int c2 = in.get();
+    if (c2 == EOF) {
+        in.unget();
+        return false;
+    }
+    bool isGz = (static_cast(c1) == 0x1f &&
+                 static_cast(c2) == 0x8b);
+    in.putback(static_cast(c2));
+    in.putback(static_cast(c1));
+    return isGz;
+}
+
+bool read_maybe_compressed(std::istream& in, std::string& out) {
+    out.clear();
+    if (stream_has_gzip_magic(in)) {
+        return decompress_gzip_stream(in, out);
+    }
+    std::ostringstream oss;
+    oss << in.rdbuf();
+    out = oss.str();
+    return true;
+}
+
+bool read_file_maybe_compressed(const std::string& path, std::string& out) {
+    std::ifstream file(path, std::ios::binary);
+    if (!file.is_open()) {
+        return false;
+    }
+    bool isGz = false;
+    if (path.size() >= 3 &&
+        (path.compare(path.size() - 3, 3, ".gz") == 0)) {
+        isGz = true;
+    } else if (path.size() >= 4 &&
+               (path.compare(path.size() - 4, 4, ".bgz") == 0)) {
+        isGz = true;
+    } else if (path.size() >= 5 &&
+               (path.compare(path.size() - 5, 5, ".bgzf") == 0)) {
+        isGz = true;
+    }
+    if (isGz || stream_has_gzip_magic(file)) {
+        return decompress_gzip_stream(file, out);
+    }
+    std::ostringstream oss;
+    oss << file.rdbuf();
+    out = oss.str();
+    return true;
+}
+
+}  // namespace vcfx
diff --git a/src/vcfx_wrapper/CMakeLists.txt b/src/vcfx_wrapper/CMakeLists.txt
new file mode 100644
index 00000000..d9824862
--- /dev/null
+++ b/src/vcfx_wrapper/CMakeLists.txt
@@ -0,0 +1 @@
+add_executable(vcfx vcfx.cpp)
diff --git a/src/vcfx_wrapper/vcfx.cpp b/src/vcfx_wrapper/vcfx.cpp
new file mode 100644
index 00000000..2625136f
--- /dev/null
+++ b/src/vcfx_wrapper/vcfx.cpp
@@ -0,0 +1,153 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static void print_usage(){
+    std::cout << "vcfx - unified interface for VCFX tools\n"
+              << "Usage: vcfx [--help] [--list]  [args]\n\n"
+              << "    Name of a VCFX tool without the 'VCFX_' prefix\n"
+              << "  list          Alias for --list\n"
+              << "  help    Show Markdown documentation for a tool if available\n"
+              << "  --list        List available subcommands found in PATH\n"
+              << "  --help        Show this help message\n";
+}
+
+static void list_commands(){
+    const char* path_env = std::getenv("PATH");
+    if(!path_env) return;
+    std::string paths(path_env);
+    std::set cmds;
+    size_t start=0;
+    while(true){
+        size_t end = paths.find(':', start);
+        std::string dir = paths.substr(start, end - start);
+        DIR* d = opendir(dir.c_str());
+        if(d){
+            struct dirent* e;
+            while((e = readdir(d)) != nullptr){
+                if(std::strncmp(e->d_name, "VCFX_", 5)==0){
+                    std::string name = e->d_name + 5;
+                    std::string full = dir + "/" + e->d_name;
+                    if(access(full.c_str(), X_OK)==0){
+                        cmds.insert(name);
+                    }
+                }
+            }
+            closedir(d);
+        }
+        if(end == std::string::npos) break;
+        start = end + 1;
+    }
+    for(const auto& c : cmds){
+        std::cout << c << '\n';
+    }
+}
+
+static std::vector get_doc_dirs(){
+    std::vector dirs;
+    const char* env = std::getenv("VCFX_DOCS_DIR");
+    if(env) dirs.emplace_back(env);
+
+    char buf[PATH_MAX];
+    ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf)-1);
+    if(len > 0){
+        buf[len] = '\0';
+        std::string exe(buf);
+        auto pos = exe.find_last_of('/');
+        if(pos != std::string::npos){
+            std::string base = exe.substr(0,pos);
+            dirs.push_back(base + "/../share/doc/VCFX");
+            dirs.push_back(base + "/../share/vcfx/docs");
+            dirs.push_back(base + "/../docs");
+            dirs.push_back(base + "/../../docs");
+        }
+    }
+    dirs.push_back("docs");
+    return dirs;
+}
+
+static int print_tool_doc(const std::string& tool){
+    std::string fname = "VCFX_" + tool + ".md";
+    for(const auto& dir : get_doc_dirs()){
+        std::string path = dir + "/" + fname;
+        std::ifstream in(path);
+        if(in){
+            std::cout << in.rdbuf();
+            return 0;
+        }
+    }
+    std::cerr << "Documentation for '" << tool << "' not found." << std::endl;
+    return 1;
+}
+
+int main(int argc, char* argv[]){
+    bool show_help = false;
+    bool show_list = false;
+    static struct option long_opts[] = {
+        {"help", no_argument, 0, 'h'},
+        {"list", no_argument, 0, 'l'},
+        {0,0,0,0}
+    };
+
+    int opt;
+    while((opt = getopt_long(argc, argv, "hl", long_opts, nullptr)) != -1){
+        if(opt == 'h') show_help = true;
+        else if(opt == 'l') show_list = true;
+        else {
+            print_usage();
+            return 1;
+        }
+    }
+
+    if(show_help){
+        print_usage();
+        return 0;
+    }
+    if(show_list){
+        list_commands();
+        return 0;
+    }
+
+    if(optind >= argc){
+        print_usage();
+        return 1;
+    }
+
+    std::string sub = argv[optind];
+
+    if(sub == "list"){
+        list_commands();
+        return 0;
+    }
+
+    if(sub == "help"){
+        if(optind + 1 >= argc){
+            print_usage();
+            return 0;
+        }
+        return print_tool_doc(argv[optind + 1]);
+    }
+
+    std::string exec_name = "VCFX_" + sub;
+
+    std::vector exec_args;
+    exec_args.push_back(const_cast(exec_name.c_str()));
+    for(int i = optind + 1; i < argc; ++i){
+        exec_args.push_back(argv[i]);
+    }
+    exec_args.push_back(nullptr);
+
+    execvp(exec_name.c_str(), exec_args.data());
+    std::perror(exec_name.c_str());
+    return 1;
+}
+
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 00000000..78fc4a59
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,80 @@
+# CMake tests for VCFX shell scripts
+
+set(TEST_SCRIPTS
+    test_af_subsetter.sh
+    test_alignment_checker.sh
+    test_allele_balance_calc.sh
+    test_allele_balance_filter.sh
+    test_allele_counter.sh
+    test_allele_freq_calc.sh
+    test_ancestry_assigner.sh
+    test_ancestry_inferrer.sh
+    test_annotation_extractor.sh
+    test_compressor.sh
+    test_concordance_checker.sh
+    test_cross_sample_concordance.sh
+    test_custom_annotator.sh
+    test_diff_tool.sh
+    test_distance_calculator.sh
+    test_dosage_calculator.sh
+    test_duplicate_remover.sh
+    test_fasta_converter.sh
+    test_field_extractor.sh
+    test_file_splitter.sh
+    test_format_converter.sh
+    test_genotype_query.sh
+    test_gl_filter.sh
+    test_haplotype_extractor.sh
+    test_header_parser.sh
+    test_hwe_tester.sh
+    test_impact_filter.sh
+    test_indel_normalizer.sh
+    test_indexer.sh
+    test_info_aggregator.sh
+    test_info_summarizer.sh
+    test_inbreeding_calculator.sh
+    test_ld_calculator.sh
+    test_metadata_summarizer.sh
+    test_merger.sh
+    test_missing_data_handler.sh
+    test_missing_detector.sh
+    test_multiallelic_splitter.sh
+    test_nonref_filter.sh
+    test_outlier_detector.sh
+    test_phase_checker.sh
+    test_phase_quality_filter.sh
+    test_phred_filter.sh
+    test_population_filter.sh
+    test_position_subsetter.sh
+    test_probability_filter.sh
+    test_quality_adjuster.sh
+    test_record_filter.sh
+    test_ref_comparator.sh
+    test_reformatter.sh
+    test_region_subsampler.sh
+    test_sample_extractor.sh
+    test_sorter.sh
+    test_sv_handler.sh
+    test_subsampler.sh
+    test_validator.sh
+    test_variant_classifier.sh
+    test_variant_counter.sh
+    test_python_bindings.sh
+)
+
+foreach(script ${TEST_SCRIPTS})
+    get_filename_component(name ${script} NAME_WE)
+    add_test(NAME ${name}
+             COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/${script})
+    set_tests_properties(${name} PROPERTIES
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+endforeach()
+
+# Optional Docker-based tests
+find_program(DOCKER_EXECUTABLE docker)
+if(DOCKER_EXECUTABLE)
+    add_test(NAME test_docker
+             COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/test_docker.sh)
+    set_tests_properties(test_docker PROPERTIES
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+endif()
diff --git a/tests/expected/align_Y.txt b/tests/expected/align_Y.txt
index fe7b6ec7..2a84859b 100644
--- a/tests/expected/align_Y.txt
+++ b/tests/expected/align_Y.txt
@@ -1 +1,2 @@
 CHROM	POS	ID	REF	ALT	Discrepancy_Type	Reference_Value	VCF_Value
+chr2	5	.	T	T	ALT_MISMATCH	T	T
diff --git a/tests/out/align_Y.txt b/tests/out/align_Y.txt
index fe7b6ec7..2a84859b 100644
--- a/tests/out/align_Y.txt
+++ b/tests/out/align_Y.txt
@@ -1 +1,2 @@
 CHROM	POS	ID	REF	ALT	Discrepancy_Type	Reference_Value	VCF_Value
+chr2	5	.	T	T	ALT_MISMATCH	T	T
diff --git a/tests/test_all.sh b/tests/test_all.sh
index 75cbb912..86921dea 100755
--- a/tests/test_all.sh
+++ b/tests/test_all.sh
@@ -81,6 +81,8 @@ TEST_SCRIPTS=(
     "test_validator.sh"
     "test_variant_classifier.sh"
     "test_variant_counter.sh"
+    "test_vcfx_wrapper.sh"
+    "test_python_bindings.sh"
 )
 
 # Run all tests
diff --git a/tests/test_custom_annotator.sh b/tests/test_custom_annotator.sh
index 7da1d4a3..cf7b45e8 100755
--- a/tests/test_custom_annotator.sh
+++ b/tests/test_custom_annotator.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Exit on error
 set -e
@@ -125,10 +125,19 @@ for i in $(seq 1 1000); do
     echo "1	$i	A	G	Annotation$i"
 done > "$SCRIPT_DIR/data/large_annotations.txt"
 # Add VCF header
-sed -i '' '1i\
+if [[ "$(uname)" == "Darwin" ]]; then
+    sed -i '' '1i\
 ##fileformat=VCFv4.2\
 ##contig=\
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1' "$SCRIPT_DIR/data/large_input.vcf"
+#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE1\
+' "$SCRIPT_DIR/data/large_input.vcf"
+else
+    sed -i '1i\
+##fileformat=VCFv4.2\
+##contig=\
+#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE1\
+' "$SCRIPT_DIR/data/large_input.vcf"
+fi
 
 time "$ROOT_DIR/build/src/VCFX_custom_annotator/VCFX_custom_annotator" --add-annotation "$SCRIPT_DIR/data/large_annotations.txt" < "$SCRIPT_DIR/data/large_input.vcf" > "$SCRIPT_DIR/data/large_output.vcf"
 if [ $? -eq 0 ]; then
@@ -138,4 +147,5 @@ else
     exit 1
 fi
 
-echo "All tests for VCFX_custom_annotator passed!" 
\ No newline at end of file
+echo "All tests for VCFX_custom_annotator passed!" 
+
diff --git a/tests/test_docker.sh b/tests/test_docker.sh
index 6c9085da..58f9158d 100755
--- a/tests/test_docker.sh
+++ b/tests/test_docker.sh
@@ -1,5 +1,12 @@
 #!/bin/bash
 # This script tests the VCFX Docker image using the existing test files from the tests directory
+# Docker image to use for the tests. CI may override this when using a locally
+# built image.
+VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgemfs/vcfx:latest}"
+
+# Directory paths
+TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+REPO_ROOT="$(dirname "$TESTS_DIR")"
 
 # Function to check if command succeeded
 check_success() {
@@ -13,19 +20,22 @@ check_success() {
 
 # Check if Docker is installed
 if ! command -v docker &> /dev/null; then
-  echo "❌ Docker is not installed. Please install Docker first."
-  exit 1
+  echo "⚠️ Docker is not installed. Skipping Docker tests."
+  exit 0
 fi
 
 echo "🧬 Testing VCFX Docker image with official test files..."
 
 # Pull the latest VCFX image
-echo "📥 Pulling the latest VCFX Docker image..."
-docker pull ghcr.io/ieeta-pt/vcfx:latest
-check_success "Pulled VCFX Docker image"
-
-# Get the directory of this script (tests directory)
-TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+echo "📥 Pulling the latest VCFX Docker image ($VCFX_IMAGE)..."
+if docker pull "$VCFX_IMAGE"; then
+  check_success "Pulled VCFX Docker image"
+else
+  echo "⚠️  Unable to pull $VCFX_IMAGE. Building Docker image locally..."
+  docker build -t vcfx:local "${REPO_ROOT}"
+  check_success "Built local Docker image"
+  VCFX_IMAGE="vcfx:local"
+fi
 
 echo "🔍 Using test files from: ${TESTS_DIR}"
 
@@ -36,36 +46,36 @@ check_success "Created temporary output directory"
 
 # Test 1: List available tools
 echo "📋 Listing available VCFX tools..."
-docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename'
+docker run --rm $VCFX_IMAGE bash -c 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename'
 check_success "Listed available tools"
 
 # Test 2: Validator test
 echo "🔍 Testing VCFX_validator..."
-docker run --rm -v "${TESTS_DIR}:/tests" ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator'
+docker run --rm -v "${TESTS_DIR}:/tests" $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_validator'
 check_success "Validated valid.vcf file"
 
 # Test 3: Allele frequency calculator test
 echo "🧮 Testing VCFX_allele_freq_calc..."
 docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \
-  ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv'
+  $VCFX_IMAGE bash -c 'cat /tests/data/allele_freq_calc/simple.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv'
 check_success "Calculated allele frequencies"
 
 # Test 4: Sample extractor test
 echo "👥 Testing VCFX_sample_extractor..."
 docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \
-  ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf'
+  $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf'
 check_success "Extracted sample"
 
 # Test 5: Variant classifier test
 echo "🔬 Testing VCFX_variant_classifier..."
 docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \
-  ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf'
+  $VCFX_IMAGE bash -c 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf'
 check_success "Classified variants"
 
 # Test 6: Testing a pipeline of commands
 echo "🔄 Testing a pipeline of VCFX tools..."
 docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \
-  ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv'
+  $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv'
 check_success "Executed pipeline of tools"
 
 echo "🎉 All Docker tests completed successfully!"
@@ -77,4 +87,4 @@ echo "📚 For more information on how to use VCFX with Docker, see the document
 # Clean up temporary files
 echo "🧹 Cleaning up..."
 rm -rf "${TEMP_OUTPUT}"
-check_success "Cleaned up temporary files" 
\ No newline at end of file
+check_success "Cleaned up temporary files" 
diff --git a/tests/test_genotype_query.sh b/tests/test_genotype_query.sh
index 363e8beb..3d74d5f6 100755
--- a/tests/test_genotype_query.sh
+++ b/tests/test_genotype_query.sh
@@ -23,9 +23,9 @@ TOOL="../build/src/VCFX_genotype_query/VCFX_genotype_query"
 
 # Directories for test data, expected outputs, and actual output:
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-TMP_DATA_DIR="${SCRIPT_DIR}/tests/data/genotype_query"
-TMP_EXP_DIR="${SCRIPT_DIR}/tests/expected/genotype_query"
-TMP_OUT_DIR="${SCRIPT_DIR}/tests/tmp_genoquout/genotype_query"
+TMP_DATA_DIR="${SCRIPT_DIR}/data/genotype_query"
+TMP_EXP_DIR="${SCRIPT_DIR}/expected/genotype_query"
+TMP_OUT_DIR="${SCRIPT_DIR}/tmp/genotype_query"
 
 mkdir -p "$TMP_DATA_DIR" "$TMP_EXP_DIR" "$TMP_OUT_DIR"
 
diff --git a/tests/test_indexer.sh b/tests/test_indexer.sh
index 61d080e4..68626e08 100755
--- a/tests/test_indexer.sh
+++ b/tests/test_indexer.sh
@@ -160,9 +160,9 @@ fi
 echo "✓ Test 6 passed"
 
 ###############################################################################
-# Test 7: Windows (CRLF) line endings
+# Test 7: CRLF line endings
 ###############################################################################
-echo "Test 7: Windows CRLF line endings"
+echo "Test 7: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/indexer/crlf_unix.vcf" < "${SCRIPT_DIR}/data/indexer/crlf_unix.vcf" << 'EOF'
 ##fileformat=VCFv4.2
diff --git a/tests/test_info_aggregator.sh b/tests/test_info_aggregator.sh
index 3e9bb472..3926f23a 100755
--- a/tests/test_info_aggregator.sh
+++ b/tests/test_info_aggregator.sh
@@ -178,9 +178,9 @@ echo "✓ Test 7 passed"
 
 
 ###############################################################################
-# Test 8: Windows CRLF line endings
+# Test 8: CRLF line endings
 ###############################################################################
-echo "Test 8: Windows CRLF line endings"
+echo "Test 8: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/aggregator/crlf_unix.vcf" << EOF
 ##fileformat=VCFv4.2
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/test_info_parser.sh b/tests/test_info_parser.sh
index e6efdba5..af431d65 100755
--- a/tests/test_info_parser.sh
+++ b/tests/test_info_parser.sh
@@ -172,7 +172,7 @@ grep -q "^1[[:space:]]200" "${SCRIPT_DIR}/data/info_parser/invalid.out" || (echo
 echo "✓ Test 7 passed"
 
 ###############################################################################
-# Test 8: Windows CRLF line endings
+# Test 8: CRLF line endings
 ###############################################################################
 echo "Test 8: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/info_parser/crlf_unix.vcf" <&1)
+    output=$($EXEC $opts < "$input_file" 2>&1)
     exit_code=$?
     
     if [ $exit_code -eq 0 ]; then
@@ -43,13 +44,14 @@ run_test_failure() {
     local description=$2
     local input_file=$3
     local expected_error=$4
+    local opts="$5"
     
     echo -n "Test $test_num: $description... "
     
     # Run the command using process substitution
     local output
     local exit_code
-    output=$($EXEC < "$input_file" 2>&1)
+    output=$($EXEC $opts < "$input_file" 2>&1)
     exit_code=$?
     
     if [ $exit_code -ne 0 ]; then
@@ -171,6 +173,23 @@ chr1	100	.	A	T	.	PASS	.
 chr2	200	rs456	G	C	80	PASS	NS=2;DP=15
 EOF
 
+# Header has one sample column but a data line includes two sample columns
+cat > data/mismatched_columns.vcf << EOF
+##fileformat=VCFv4.2
+##FORMAT=
+EOF
+printf '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE1\n' >> data/mismatched_columns.vcf
+printf 'chr1\t100\t.\tA\tT\t60\tPASS\t.\tGT\t0/1\t0/0\n' >> data/mismatched_columns.vcf
+
+# FORMAT expects two entries but sample has three
+cat > data/format_mismatch.vcf << EOF
+##fileformat=VCFv4.2
+##FORMAT=
+##FORMAT=
+EOF
+printf '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE1\n' >> data/format_mismatch.vcf
+printf 'chr1\t100\t.\tA\tT\t60\tPASS\t.\tGT:DP\t0/1:30:7\n' >> data/format_mismatch.vcf
+
 # Run each test separately and track failures
 failures=0
 
@@ -228,6 +247,18 @@ else
     failures=$((failures + 1))
 fi
 
+# Test 13 - strict mode valid file
+run_test_success 13 "Strict valid VCF" "data/valid.vcf" "--strict"
+[ $? -ne 0 ] && failures=$((failures + 1))
+
+# Test 14 - mismatched columns in strict mode
+run_test_failure 14 "Strict mismatched columns" "data/mismatched_columns.vcf" "columns" "--strict"
+[ $? -ne 0 ] && failures=$((failures + 1))
+
+# Test 15 - FORMAT/sample mismatch in strict mode
+run_test_failure 15 "Strict format mismatch" "data/format_mismatch.vcf" "FORMAT" "--strict"
+[ $? -ne 0 ] && failures=$((failures + 1))
+
 if [ $failures -eq 0 ]; then
     echo "All tests for VCFX_validator passed!"
     exit 0
diff --git a/tests/test_variant_counter.sh b/tests/test_variant_counter.sh
index bf60b9a0..587f7656 100755
--- a/tests/test_variant_counter.sh
+++ b/tests/test_variant_counter.sh
@@ -3,8 +3,16 @@ set -e
 
 echo "=== Testing VCFX_variant_counter ==="
 
-# Executable paths
-VCFX_EXECUTABLE="../build/src/VCFX_variant_counter/VCFX_variant_counter"
+# Determine script and repository locations so the test can be run from
+# anywhere.  This mirrors the approach used by other test scripts.
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+
+# Ensure we run inside the script directory for predictable paths
+cd "$SCRIPT_DIR"
+
+# Path to the built executable
+VCFX_EXECUTABLE="$ROOT_DIR/build/src/VCFX_variant_counter/VCFX_variant_counter"
 
 # Check if executable exists
 if [ ! -f "$VCFX_EXECUTABLE" ]; then
@@ -155,6 +163,17 @@ if [ ! -f data/variant_counter_empty.vcf ]; then
 EOF
 fi
 
+# Create gzipped versions of VCFs
+if [ ! -f data/variant_counter_normal.vcf.gz ]; then
+  gzip -c data/variant_counter_normal.vcf > data/variant_counter_normal.vcf.gz
+fi
+if [ ! -f data/variant_counter_invalid.vcf.gz ]; then
+  gzip -c data/variant_counter_invalid.vcf > data/variant_counter_invalid.vcf.gz
+fi
+if [ ! -f data/variant_counter_empty.vcf.gz ]; then
+  gzip -c data/variant_counter_empty.vcf > data/variant_counter_empty.vcf.gz
+fi
+
 # Test 1: Count variants in a normal VCF file (strict mode)
 run_test 1 "Counting variants in a normal VCF file (strict mode)" \
   "cat data/variant_counter_normal.vcf | $VCFX_EXECUTABLE --strict" \
@@ -212,4 +231,10 @@ diff -u expected/variant_counter_large.txt out/variant_counter_large.txt || {
 }
 echo "  Test 8 passed."
 
-echo "All VCFX_variant_counter tests passed!" 
\ No newline at end of file
+# Test 9: Gzipped normal VCF
+run_test 9 "Counting variants in a gzipped VCF file" \
+  "cat data/variant_counter_normal.vcf.gz | $VCFX_EXECUTABLE" \
+  "expected/variant_counter_normal_nonstrict.txt" \
+  "out/variant_counter_normal_gz.txt"
+
+echo "All VCFX_variant_counter tests passed!"
diff --git a/tests/test_vcfx_wrapper.sh b/tests/test_vcfx_wrapper.sh
new file mode 100755
index 00000000..e2c766e5
--- /dev/null
+++ b/tests/test_vcfx_wrapper.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -e
+set -o pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+VCFX_BIN="$ROOT_DIR/build/src/vcfx_wrapper/vcfx"
+
+# Ensure built tools are in PATH so the wrapper can locate them
+source "$ROOT_DIR/add_vcfx_tools_to_path.sh"
+
+if [ ! -x "$VCFX_BIN" ]; then
+  echo "vcfx executable not found: $VCFX_BIN"
+  exit 1
+fi
+
+LIST_LONG="$($VCFX_BIN --list)"
+LIST_ALIAS="$($VCFX_BIN list)"
+if [ "$LIST_LONG" != "$LIST_ALIAS" ]; then
+  echo "Output of 'vcfx list' does not match '--list'"
+  diff <(echo "$LIST_LONG") <(echo "$LIST_ALIAS") || true
+  exit 1
+fi
+
+echo "$LIST_LONG" > /dev/null # quiet shellcheck complaining about unused var
+
+DOC_FIRST_LINE="$($VCFX_BIN help allele_counter | head -n 1)"
+if ! echo "$DOC_FIRST_LINE" | grep -q "VCFX_allele_counter"; then
+  echo "Help output for allele_counter does not show documentation"
+  echo "First line was: $DOC_FIRST_LINE"
+  exit 1
+fi
+
+echo "✓ vcfx wrapper tests passed"
diff --git a/tests/tests/data/genotype_query/missing_malformed.vcf b/tests/tests/data/genotype_query/missing_malformed.vcf
deleted file mode 100644
index fc65872a..00000000
--- a/tests/tests/data/genotype_query/missing_malformed.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
-1	250	rsO	A	G	.	PASS	.		1/1	1/1
-chr1	300  # <10 fields on purpose
-1	400	rsQ	G	A	99	PASS	.	DP	10	15
-
diff --git a/tests/tests/data/genotype_query/multi_sample.vcf b/tests/tests/data/genotype_query/multi_sample.vcf
deleted file mode 100644
index 18425526..00000000
--- a/tests/tests/data/genotype_query/multi_sample.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-2	400	.	G	A	.	PASS	.	GT	.	.	0/1
-
diff --git a/tests/tests/data/genotype_query/single_sample.vcf b/tests/tests/data/genotype_query/single_sample.vcf
deleted file mode 100644
index 233fa7d6..00000000
--- a/tests/tests/data/genotype_query/single_sample.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-1	300	rsC	A	G	50	PASS	.	GT	1|1
-
diff --git a/tests/tests/expected/genotype_query/missing_malformed_01.vcf b/tests/tests/expected/genotype_query/missing_malformed_01.vcf
deleted file mode 100644
index 3c6ba4cc..00000000
--- a/tests/tests/expected/genotype_query/missing_malformed_01.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
-
diff --git a/tests/tests/expected/genotype_query/multi_11_flexible.vcf b/tests/tests/expected/genotype_query/multi_11_flexible.vcf
deleted file mode 100644
index fe2af3a8..00000000
--- a/tests/tests/expected/genotype_query/multi_11_flexible.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-
diff --git a/tests/tests/expected/genotype_query/multi_11_strict.vcf b/tests/tests/expected/genotype_query/multi_11_strict.vcf
deleted file mode 100644
index add030a4..00000000
--- a/tests/tests/expected/genotype_query/multi_11_strict.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-
diff --git a/tests/tests/expected/genotype_query/multi_12_flexible.vcf b/tests/tests/expected/genotype_query/multi_12_flexible.vcf
deleted file mode 100644
index 01424f8e..00000000
--- a/tests/tests/expected/genotype_query/multi_12_flexible.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
-
diff --git a/tests/tests/expected/genotype_query/no_match.vcf b/tests/tests/expected/genotype_query/no_match.vcf
deleted file mode 100644
index 9d46ba3c..00000000
--- a/tests/tests/expected/genotype_query/no_match.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-
diff --git a/tests/tests/expected/genotype_query/single_sample_flex_01.vcf b/tests/tests/expected/genotype_query/single_sample_flex_01.vcf
deleted file mode 100644
index 52ce2d29..00000000
--- a/tests/tests/expected/genotype_query/single_sample_flex_01.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-
diff --git a/tests/tests/expected/genotype_query/single_sample_strict_01.vcf b/tests/tests/expected/genotype_query/single_sample_strict_01.vcf
deleted file mode 100644
index 625472f0..00000000
--- a/tests/tests/expected/genotype_query/single_sample_strict_01.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-
diff --git a/tests/tests/tmp_genoquout/genotype_query/help_message.txt b/tests/tests/tmp_genoquout/genotype_query/help_message.txt
deleted file mode 100644
index 1d446ddf..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/help_message.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_genotype_query
-Usage: VCFX_genotype_query [OPTIONS]
-
-Options:
-  --genotype-query, -g "GENOTYPE"  Specify the genotype to query (e.g., "0/1", "1/1").
-  --strict                        Use strict string compare (no phasing unify or allele sorting).
-  --help, -h                      Display this help message and exit.
-
-Description:
-  Reads a VCF from stdin, outputs only the lines (plus all header lines) where
-  at least one sample has the specified genotype in the 'GT' subfield.
-
-Examples:
-  # Flexible matching 0/1 or 0|1 => both become 0/1
-  ./VCFX_genotype_query --genotype-query "0/1" < input.vcf > out.vcf
-
-  # Strict matching => "0|1" won't match "0/1"
-  ./VCFX_genotype_query --genotype-query "0|1" --strict < input.vcf > out.vcf
diff --git a/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf b/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
deleted file mode 100644
index 27e716b6..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/missing_args.txt b/tests/tests/tmp_genoquout/genotype_query/missing_args.txt
deleted file mode 100644
index c8c85b1b..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/missing_args.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Usage: ../build/src/VCFX_genotype_query/VCFX_genotype_query --genotype-query "0/1" [--strict] < input.vcf > output.vcf
-Use --help for usage.
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
deleted file mode 100644
index 27e716b6..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
deleted file mode 100644
index 314ce18f..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
+++ /dev/null
@@ -1,3 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
deleted file mode 100644
index 0b19c2aa..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
deleted file mode 100644
index f07c0d2b..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
deleted file mode 100644
index 4212ca27..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
deleted file mode 100644
index a900c879..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf
deleted file mode 100644
index 3c654126..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
diff --git a/tests/tmp/afr_output.vcf b/tests/tmp/afr_output.vcf
deleted file mode 100644
index 6d48cfc7..00000000
--- a/tests/tmp/afr_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE3_AFR	SAMPLE4_AFR
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	1|1:20	0|1:22
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|0:19	0|1:21
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	0|0:18	0|1:24
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	1|1:25	0|1:20
diff --git a/tests/tmp/afr_samples_err.log b/tests/tmp/afr_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/afr_samples_output.tsv b/tests/tmp/afr_samples_output.tsv
deleted file mode 100644
index 09be6da5..00000000
--- a/tests/tmp/afr_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-AFR_SAMPLE1	AFR
-AFR_SAMPLE2	AFR
-AFR_SAMPLE3	AFR
diff --git a/tests/tmp/basic_threshold_20.vcf b/tests/tmp/basic_threshold_20.vcf
deleted file mode 100644
index 82661320..00000000
--- a/tests/tmp/basic_threshold_20.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	200	.	C	T	20	PASS	DP=25
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/basic_threshold_30.vcf b/tests/tmp/basic_threshold_30.vcf
deleted file mode 100644
index 3ace7755..00000000
--- a/tests/tmp/basic_threshold_30.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/basic_threshold_30_keep_missing.vcf b/tests/tmp/basic_threshold_30_keep_missing.vcf
deleted file mode 100644
index ed3b35f8..00000000
--- a/tests/tmp/basic_threshold_30_keep_missing.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	300	.	G	A	30	PASS	DP=40
-1	400	.	T	C	.	PASS	DP=35
diff --git a/tests/tmp/complex_err.log b/tests/tmp/complex_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/complex_output.tsv b/tests/tmp/complex_output.tsv
deleted file mode 100644
index 189c397a..00000000
--- a/tests/tmp/complex_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/dp_ge_20_all_err.log b/tests/tmp/dp_ge_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_ge_20_all_output.vcf b/tests/tmp/dp_ge_20_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/dp_ge_20_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/dp_ge_20_any_err.log b/tests/tmp/dp_ge_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_ge_20_any_output.vcf b/tests/tmp/dp_ge_20_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/dp_ge_20_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/dp_le_20_all_err.log b/tests/tmp/dp_le_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_le_20_all_output.vcf b/tests/tmp/dp_le_20_all_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/dp_le_20_all_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/dp_lt_19_5_any_err.log b/tests/tmp/dp_lt_19_5_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_lt_19_5_any_output.vcf b/tests/tmp/dp_lt_19_5_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/dp_lt_19_5_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/eas_output.vcf b/tests/tmp/eas_output.vcf
deleted file mode 100644
index 58a5bca7..00000000
--- a/tests/tmp/eas_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE5_EAS
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	0|0:18
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	1|1:26
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	1|1:27
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	0|0:19
diff --git a/tests/tmp/eas_samples_err.log b/tests/tmp/eas_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eas_samples_output.tsv b/tests/tmp/eas_samples_output.tsv
deleted file mode 100644
index 3363108c..00000000
--- a/tests/tmp/eas_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EAS_SAMPLE1	EAS
-EAS_SAMPLE2	EAS
-EAS_SAMPLE3	EAS
diff --git a/tests/tmp/empty_map_error.txt b/tests/tmp/empty_map_error.txt
deleted file mode 100644
index f3c371ac..00000000
--- a/tests/tmp/empty_map_error.txt
+++ /dev/null
@@ -1 +0,0 @@
-Warning: No samples found for population tag: EUR
diff --git a/tests/tmp/empty_map_output.vcf b/tests/tmp/empty_map_output.vcf
deleted file mode 100644
index ae84bda2..00000000
--- a/tests/tmp/empty_map_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP
diff --git a/tests/tmp/eq_operator_err.log b/tests/tmp/eq_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eq_operator_output.vcf b/tests/tmp/eq_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/eq_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/equals_format_cleaned.vcf b/tests/tmp/equals_format_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/equals_format_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/equals_format_expected_cleaned.vcf b/tests/tmp/equals_format_expected_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/equals_format_expected_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/equals_format_output.vcf b/tests/tmp/equals_format_output.vcf
deleted file mode 100644
index 696571e0..00000000
--- a/tests/tmp/equals_format_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/error.txt b/tests/tmp/error.txt
deleted file mode 100644
index 44150a0b..00000000
--- a/tests/tmp/error.txt
+++ /dev/null
@@ -1 +0,0 @@
-Error: --mode must be 'any' or 'all'.
diff --git a/tests/tmp/eur_output.vcf b/tests/tmp/eur_output.vcf
deleted file mode 100644
index 00af6e9b..00000000
--- a/tests/tmp/eur_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1_EUR	SAMPLE2_EUR
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	0|0:30	0|1:25
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|1:28	1|1:32
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	1|1:35	0|1:29
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	0|1:31	0|0:27
diff --git a/tests/tmp/eur_samples_err.log b/tests/tmp/eur_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eur_samples_output.tsv b/tests/tmp/eur_samples_output.tsv
deleted file mode 100644
index 15c9445e..00000000
--- a/tests/tmp/eur_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_SAMPLE1	EUR
-EUR_SAMPLE2	EUR
-EUR_SAMPLE3	EUR
diff --git a/tests/tmp/ge_operator_err.log b/tests/tmp/ge_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/ge_operator_output.vcf b/tests/tmp/ge_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/ge_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_eq_30_all_err.log b/tests/tmp/gq_eq_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_eq_30_all_output.vcf b/tests/tmp/gq_eq_30_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/gq_eq_30_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_gt_20_all_err.log b/tests/tmp/gq_gt_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_20_all_output.vcf b/tests/tmp/gq_gt_20_all_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/gq_gt_20_all_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_gt_20_any_err.log b/tests/tmp/gq_gt_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_20_any_output.vcf b/tests/tmp/gq_gt_20_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/gq_gt_20_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_gt_24_5_all_err.log b/tests/tmp/gq_gt_24_5_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_24_5_all_output.vcf b/tests/tmp/gq_gt_24_5_all_output.vcf
deleted file mode 100644
index d6ae73c3..00000000
--- a/tests/tmp/gq_gt_24_5_all_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
diff --git a/tests/tmp/gq_lt_30_all_err.log b/tests/tmp/gq_lt_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_lt_30_all_output.vcf b/tests/tmp/gq_lt_30_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/gq_lt_30_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_lt_30_any_err.log b/tests/tmp/gq_lt_30_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_lt_30_any_output.vcf b/tests/tmp/gq_lt_30_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/gq_lt_30_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_ne_30_all_err.log b/tests/tmp/gq_ne_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_ne_30_all_output.vcf b/tests/tmp/gq_ne_30_all_output.vcf
deleted file mode 100644
index a802d0a6..00000000
--- a/tests/tmp/gq_ne_30_all_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gt_operator_err.log b/tests/tmp/gt_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gt_operator_output.vcf b/tests/tmp/gt_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/gt_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/help_message.txt b/tests/tmp/help_message.txt
deleted file mode 100644
index 178322ae..00000000
--- a/tests/tmp/help_message.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-VCFX_phred_filter: Filter VCF lines by their QUAL field.
-
-Usage:
-  VCFX_phred_filter [options] < input.vcf > output.vcf
-
-Options:
-  -p, --phred-filter       Phred QUAL threshold (default=30)
-  -k, --keep-missing-qual       Treat '.' (missing QUAL) as pass
-  -h, --help                    Display this help and exit
-
-Description:
-  Reads VCF lines from stdin. For each data line, parse the QUAL field.
-  If QUAL >= threshold => print line. Otherwise, skip. By default, missing
-  QUAL ('.') is treated as 0. Use --keep-missing-qual to treat '.' as pass.
-
-Examples:
-  1) Keep variants with QUAL>=30:
-     VCFX_phred_filter -p 30 < in.vcf > out.vcf
-  2) Keep missing QUAL lines:
-     VCFX_phred_filter -p 30 --keep-missing-qual < in.vcf > out.vcf
diff --git a/tests/tmp/help_output.txt b/tests/tmp/help_output.txt
deleted file mode 100644
index 651909da..00000000
--- a/tests/tmp/help_output.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-VCFX_quality_adjuster: Apply a transformation to the QUAL field of a VCF.
-
-Usage:
-  VCFX_quality_adjuster [options] < input.vcf > output.vcf
-
-Options:
-  -h, --help               Show this help.
-  -a, --adjust-qual  Required. One of: log, sqrt, square, identity.
-  -n, --no-clamp           Do not clamp negative or large values.
-
-Description:
-  Reads each line from VCF. If it's a data line with >=8 columns, we parse
-  the QUAL field (6th col). We transform it with , e.g.:
-    log => log(QUAL + 1e-10)
-    sqrt=> sqrt(QUAL)
-    square=> (QUAL * QUAL)
-    identity=> no change
-  By default, negative results from e.g. log are clamped to 0, and large
-  results are capped at 1e12. If you do not want clamping, use --no-clamp.
-
-Examples:
-  1) Log-transform:
-     VCFX_quality_adjuster --adjust-qual log < in.vcf > out.vcf
-  2) Square, keep negative or big values as is:
-     VCFX_quality_adjuster --adjust-qual square --no-clamp < in.vcf > out.vcf
diff --git a/tests/tmp/identity_transform_err.log b/tests/tmp/identity_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/identity_transform_output.vcf b/tests/tmp/identity_transform_output.vcf
deleted file mode 100644
index d7594c78..00000000
--- a/tests/tmp/identity_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	100.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	10.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/invalid_condition_err.log b/tests/tmp/invalid_condition_err.log
deleted file mode 100644
index 01efc7fc..00000000
--- a/tests/tmp/invalid_condition_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Invalid filter condition format. Expected format like "GP>0.9".
diff --git a/tests/tmp/invalid_condition_output.vcf b/tests/tmp/invalid_condition_output.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_err.log b/tests/tmp/invalid_err.log
deleted file mode 100644
index 34167a32..00000000
--- a/tests/tmp/invalid_err.log
+++ /dev/null
@@ -1,4 +0,0 @@
-Warning: Data line encountered before #CHROM header. Skipping line:
-1	100	rs1	A	G	30	PASS	AF=0.25	GT	0/1	0/0	1/1
-Warning: Skipping invalid VCF line (fewer than 9 fields):
-1	300
diff --git a/tests/tmp/invalid_error.log b/tests/tmp/invalid_error.log
deleted file mode 100644
index 9c4f2a0e..00000000
--- a/tests/tmp/invalid_error.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: unsupported transformation 'invalid_transform'.
diff --git a/tests/tmp/invalid_mode_err.log b/tests/tmp/invalid_mode_err.log
deleted file mode 100644
index 63e70083..00000000
--- a/tests/tmp/invalid_mode_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-==== START OF TEST: invalid_mode ====
-Error: --mode must be 'any' or 'all'.
diff --git a/tests/tmp/invalid_mode_out.vcf b/tests/tmp/invalid_mode_out.vcf
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/invalid_mode_out.vcf
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/invalid_mode_output.vcf b/tests/tmp/invalid_mode_output.vcf
deleted file mode 100644
index cabcfbb6..00000000
--- a/tests/tmp/invalid_mode_output.vcf
+++ /dev/null
@@ -1,19 +0,0 @@
-==== START OF TEST: invalid_mode ====
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/invalid_out.vcf b/tests/tmp/invalid_out.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_output.tsv b/tests/tmp/invalid_output.tsv
deleted file mode 100644
index 7267884f..00000000
--- a/tests/tmp/invalid_output.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	200	rs2	C	T	0.0000
diff --git a/tests/tmp/invalid_output.vcf b/tests/tmp/invalid_output.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_records_threshold_30.vcf b/tests/tmp/invalid_records_threshold_30.vcf
deleted file mode 100644
index 687eda1c..00000000
--- a/tests/tmp/invalid_records_threshold_30.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL
-1	100	.	A	G	50
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/invalid_vcf_err.log b/tests/tmp/invalid_vcf_err.log
deleted file mode 100644
index 4878b8e6..00000000
--- a/tests/tmp/invalid_vcf_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Encountered VCF data before #CHROM header.
diff --git a/tests/tmp/invalid_vcf_output.tsv b/tests/tmp/invalid_vcf_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/le_operator_err.log b/tests/tmp/le_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/le_operator_output.vcf b/tests/tmp/le_operator_output.vcf
deleted file mode 100644
index 0946e0ef..00000000
--- a/tests/tmp/le_operator_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GP	0/1:0.1,0.7,0.2	1/1:0,0.1,0.9	0/1:0.1,0.7,0.2
diff --git a/tests/tmp/log_transform_edge_err.log b/tests/tmp/log_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/log_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/log_transform_edge_output.vcf b/tests/tmp/log_transform_edge_output.vcf
deleted file mode 100644
index 4259e741..00000000
--- a/tests/tmp/log_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.000000	PASS	.	GT	0/1
-1	200	rs2	C	T	13.815511	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	nan	PASS	.	GT	0/1
diff --git a/tests/tmp/log_transform_err.log b/tests/tmp/log_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/log_transform_no_clamp_err.log b/tests/tmp/log_transform_no_clamp_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/log_transform_no_clamp_output.vcf b/tests/tmp/log_transform_no_clamp_output.vcf
deleted file mode 100644
index a5a7c33f..00000000
--- a/tests/tmp/log_transform_no_clamp_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	3.401197	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	-23.025851	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	4.605170	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	2.302585	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	-23.025851	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/log_transform_output.vcf b/tests/tmp/log_transform_output.vcf
deleted file mode 100644
index 4e8bce84..00000000
--- a/tests/tmp/log_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	3.401197	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	4.605170	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	2.302585	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/lt_operator_err.log b/tests/tmp/lt_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/lt_operator_output.vcf b/tests/tmp/lt_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/lt_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/malformed_err.log b/tests/tmp/malformed_err.log
deleted file mode 100644
index b34c3cda..00000000
--- a/tests/tmp/malformed_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: invalid VCF line (<9 fields): 1	100	rs1	A	G	30	PASS	AF=0.25
-Warning: invalid VCF line (<9 fields): 1	200	rs2	C	T	40	PASS	AF=0.5
diff --git a/tests/tmp/malformed_freq_err.log b/tests/tmp/malformed_freq_err.log
deleted file mode 100644
index bf3b8a5c..00000000
--- a/tests/tmp/malformed_freq_err.log
+++ /dev/null
@@ -1,3 +0,0 @@
-Warning: Invalid line in frequency file (#1): malformed data
-Error: No valid population frequencies loaded.
-Error: Failed to load population frequencies from tmp/malformed_freqs.txt
diff --git a/tests/tmp/malformed_freq_output.tsv b/tests/tmp/malformed_freq_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/malformed_freqs.txt b/tests/tmp/malformed_freqs.txt
deleted file mode 100644
index b8833029..00000000
--- a/tests/tmp/malformed_freqs.txt
+++ /dev/null
@@ -1 +0,0 @@
-malformed data
diff --git a/tests/tmp/malformed_input_err.log b/tests/tmp/malformed_input_err.log
deleted file mode 100644
index 8fa27b03..00000000
--- a/tests/tmp/malformed_input_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: line with <8 fields => skipping.
-Warning: line with <8 fields => skipping.
diff --git a/tests/tmp/malformed_input_output.vcf b/tests/tmp/malformed_input_output.vcf
deleted file mode 100644
index 9068bd6e..00000000
--- a/tests/tmp/malformed_input_output.vcf
+++ /dev/null
@@ -1,2 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL
diff --git a/tests/tmp/malformed_out.vcf b/tests/tmp/malformed_out.vcf
deleted file mode 100644
index 68d81478..00000000
--- a/tests/tmp/malformed_out.vcf
+++ /dev/null
@@ -1,2 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/tmp/malformed_output.vcf b/tests/tmp/malformed_output.vcf
deleted file mode 100644
index 0fe19f4e..00000000
--- a/tests/tmp/malformed_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1_EUR	SAMPLE2_EUR
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|1:28	1|1:32
-2	150	rs012	T	C	80	PASS	AF=0.4	GT:DP	1|1:35	0|1:29
diff --git a/tests/tmp/malformed_query_01_flexible_output.vcf b/tests/tmp/malformed_query_01_flexible_output.vcf
deleted file mode 100644
index 36f4bf31..00000000
--- a/tests/tmp/malformed_query_01_flexible_output.vcf
+++ /dev/null
@@ -1,11 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	.	C	T	40	PASS	AF=0.5	GT	0/1	0/1	0/0
-1	400	rs4	T	C	45	PASS	.	GT:DP	0|1:25	.|.:--	0|0:12
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/.:15	1/1:28
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	./.:--	0|1:22	1|1:25
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	0/1:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/malformed_threshold_30.vcf b/tests/tmp/malformed_threshold_30.vcf
deleted file mode 100644
index 006c6381..00000000
--- a/tests/tmp/malformed_threshold_30.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
diff --git a/tests/tmp/malformed_threshold_30_keep_missing.vcf b/tests/tmp/malformed_threshold_30_keep_missing.vcf
deleted file mode 100644
index 14ab42f1..00000000
--- a/tests/tmp/malformed_threshold_30_keep_missing.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	400	.	T	C		PASS	DP=35
diff --git a/tests/tmp/malformed_threshold_5.vcf b/tests/tmp/malformed_threshold_5.vcf
deleted file mode 100644
index 92517348..00000000
--- a/tests/tmp/malformed_threshold_5.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	500	.	A	C	10.5	PASS	DP=15
diff --git a/tests/tmp/malformed_vcf_err.log b/tests/tmp/malformed_vcf_err.log
deleted file mode 100644
index 6b11a01f..00000000
--- a/tests/tmp/malformed_vcf_err.log
+++ /dev/null
@@ -1,3 +0,0 @@
-==== START OF TEST: malformed_vcf ====
-Warning: invalid VCF line (<9 fields): 1	100	rs1	A	G	30	PASS	AF=0.25
-Warning: invalid VCF line (<9 fields): 1	200	rs2	C	T	40	PASS	AF=0.5
diff --git a/tests/tmp/malformed_vcf_output.vcf b/tests/tmp/malformed_vcf_output.vcf
deleted file mode 100644
index 3f71f6c2..00000000
--- a/tests/tmp/malformed_vcf_output.vcf
+++ /dev/null
@@ -1,3 +0,0 @@
-==== START OF TEST: malformed_vcf ====
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/tmp/missing_arg_error.log b/tests/tmp/missing_arg_error.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_arg_output.vcf b/tests/tmp/missing_arg_output.vcf
deleted file mode 100644
index 651909da..00000000
--- a/tests/tmp/missing_arg_output.vcf
+++ /dev/null
@@ -1,25 +0,0 @@
-VCFX_quality_adjuster: Apply a transformation to the QUAL field of a VCF.
-
-Usage:
-  VCFX_quality_adjuster [options] < input.vcf > output.vcf
-
-Options:
-  -h, --help               Show this help.
-  -a, --adjust-qual  Required. One of: log, sqrt, square, identity.
-  -n, --no-clamp           Do not clamp negative or large values.
-
-Description:
-  Reads each line from VCF. If it's a data line with >=8 columns, we parse
-  the QUAL field (6th col). We transform it with , e.g.:
-    log => log(QUAL + 1e-10)
-    sqrt=> sqrt(QUAL)
-    square=> (QUAL * QUAL)
-    identity=> no change
-  By default, negative results from e.g. log are clamped to 0, and large
-  results are capped at 1e12. If you do not want clamping, use --no-clamp.
-
-Examples:
-  1) Log-transform:
-     VCFX_quality_adjuster --adjust-qual log < in.vcf > out.vcf
-  2) Square, keep negative or big values as is:
-     VCFX_quality_adjuster --adjust-qual square --no-clamp < in.vcf > out.vcf
diff --git a/tests/tmp/missing_args_output.txt b/tests/tmp/missing_args_output.txt
deleted file mode 100644
index c8c85b1b..00000000
--- a/tests/tmp/missing_args_output.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Usage: ../build/src/VCFX_genotype_query/VCFX_genotype_query --genotype-query "0/1" [--strict] < input.vcf > output.vcf
-Use --help for usage.
diff --git a/tests/tmp/missing_err.log b/tests/tmp/missing_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_field_err.log b/tests/tmp/missing_field_err.log
deleted file mode 100644
index 9bd03884..00000000
--- a/tests/tmp/missing_field_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Specified field "GP" not found in FORMAT column.
diff --git a/tests/tmp/missing_field_gq_gt_20_err.log b/tests/tmp/missing_field_gq_gt_20_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_field_gq_gt_20_output.vcf b/tests/tmp/missing_field_gq_gt_20_output.vcf
deleted file mode 100644
index b10fb63a..00000000
--- a/tests/tmp/missing_field_gq_gt_20_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
diff --git a/tests/tmp/missing_field_output.vcf b/tests/tmp/missing_field_output.vcf
deleted file mode 100644
index b7a7e7ab..00000000
--- a/tests/tmp/missing_field_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
diff --git a/tests/tmp/missing_filter_err.log b/tests/tmp/missing_filter_err.log
deleted file mode 100644
index 2e60ae42..00000000
--- a/tests/tmp/missing_filter_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: --filter must be specified.
diff --git a/tests/tmp/missing_filter_output.vcf b/tests/tmp/missing_filter_output.vcf
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/missing_filter_output.vcf
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/missing_freq_err.log b/tests/tmp/missing_freq_err.log
deleted file mode 100644
index 9c007b94..00000000
--- a/tests/tmp/missing_freq_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Error: Cannot open frequency file: /nonexistent/file.txt
-Error: Failed to load population frequencies from /nonexistent/file.txt
diff --git a/tests/tmp/missing_freq_output.tsv b/tests/tmp/missing_freq_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_output.tsv b/tests/tmp/missing_output.tsv
deleted file mode 100644
index f47e8fb6..00000000
--- a/tests/tmp/missing_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.7500
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/missing_samples_err.log b/tests/tmp/missing_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_samples_output.tsv b/tests/tmp/missing_samples_output.tsv
deleted file mode 100644
index cbeeb4c9..00000000
--- a/tests/tmp/missing_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_MISS	EUR
-AFR_MISS	AFR
-EAS_MISS	EAS
diff --git a/tests/tmp/missing_value_gq_gt_20_all_err.log b/tests/tmp/missing_value_gq_gt_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_value_gq_gt_20_all_output.vcf b/tests/tmp/missing_value_gq_gt_20_all_output.vcf
deleted file mode 100644
index d9a8ffe1..00000000
--- a/tests/tmp/missing_value_gq_gt_20_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP	0/1:25:20	0/0:30:15
diff --git a/tests/tmp/missing_value_gq_gt_20_any_err.log b/tests/tmp/missing_value_gq_gt_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_value_gq_gt_20_any_output.vcf b/tests/tmp/missing_value_gq_gt_20_any_output.vcf
deleted file mode 100644
index 900ca329..00000000
--- a/tests/tmp/missing_value_gq_gt_20_any_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP	0/1:25:20	0/0:30:15
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP	0/0:45:30	0/0::20
diff --git a/tests/tmp/mixed_population_check_err.log b/tests/tmp/mixed_population_check_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/mixed_population_check_output.tsv b/tests/tmp/mixed_population_check_output.tsv
deleted file mode 100644
index 52ec9b21..00000000
--- a/tests/tmp/mixed_population_check_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-Sample	Inferred_Population
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/mixed_population_check_results.tmp b/tests/tmp/mixed_population_check_results.tmp
deleted file mode 100644
index 29eafc2a..00000000
--- a/tests/tmp/mixed_population_check_results.tmp
+++ /dev/null
@@ -1,4 +0,0 @@
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/mixed_samples_err.log b/tests/tmp/mixed_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/mixed_samples_output.tsv b/tests/tmp/mixed_samples_output.tsv
deleted file mode 100644
index 52ec9b21..00000000
--- a/tests/tmp/mixed_samples_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-Sample	Inferred_Population
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/multiallelic_err.log b/tests/tmp/multiallelic_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/multiallelic_output.tsv b/tests/tmp/multiallelic_output.tsv
deleted file mode 100644
index 71fd0ab4..00000000
--- a/tests/tmp/multiallelic_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G,T	0.6667
-1	200	rs2	C	T,G,A	0.8333
diff --git a/tests/tmp/multiallelic_samples_err.log b/tests/tmp/multiallelic_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/multiallelic_samples_output.tsv b/tests/tmp/multiallelic_samples_output.tsv
deleted file mode 100644
index a0251e8c..00000000
--- a/tests/tmp/multiallelic_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_MULTI	EUR
-AFR_MULTI	AFR
-EAS_MULTI	EUR
diff --git a/tests/tmp/ne_operator_err.log b/tests/tmp/ne_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/ne_operator_output.vcf b/tests/tmp/ne_operator_output.vcf
deleted file mode 100644
index 2b906f70..00000000
--- a/tests/tmp/ne_operator_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GP	0/1:0.01,0.98,0.01	0/0:0.99,0.01,0	1/1:0,0.02,0.98
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GP	0/1:0.05,0.9,0.05	0/1:0.1,0.8,0.1	0/0:0.95,0.04,0.01
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GP	0/0:0.85,0.15,0	0/0:0.92,0.08,0	1/1:0,0.05,0.95
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GP	0/1:0.1,0.7,0.2	1/1:0,0.1,0.9	0/1:0.1,0.7,0.2
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GP	0/0:0.94,0.05,0.01	1/1:0.01,0.05,0.94	0/1:0.2,0.75,0.05
diff --git a/tests/tmp/no_args_output.txt b/tests/tmp/no_args_output.txt
deleted file mode 100644
index 4f075489..00000000
--- a/tests/tmp/no_args_output.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-VCFX_population_filter: Subset VCF to samples in specified population.
-
-Usage:
-  VCFX_population_filter [options] < input.vcf > output.vcf
-
-Options:
-  --help, -h               Print this help.
-  --population, -p    Population tag to keep (e.g. 'EUR','AFR', etc.)
-  --pop-map, -m      Tab-delimited file: 'SampleName  Population'
-
-Description:
-  Reads the pop map, finds samples that match the chosen population.
-  Then reads the VCF from stdin and prints lines with only those sample columns.
-  If a sample is not in that population, it's dropped from the #CHROM header and data columns.
-
-Example:
-  VCFX_population_filter --population AFR --pop-map pops.txt < input.vcf > out.vcf
diff --git a/tests/tmp/no_gt_err.log b/tests/tmp/no_gt_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/no_gt_output.tsv b/tests/tmp/no_gt_output.tsv
deleted file mode 100644
index 453ebe59..00000000
--- a/tests/tmp/no_gt_output.tsv
+++ /dev/null
@@ -1 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
diff --git a/tests/tmp/output.txt b/tests/tmp/output.txt
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/output.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/phased_err.log b/tests/tmp/phased_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/phased_output.tsv b/tests/tmp/phased_output.tsv
deleted file mode 100644
index 189c397a..00000000
--- a/tests/tmp/phased_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/phased_samples_err.log b/tests/tmp/phased_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/phased_samples_output.tsv b/tests/tmp/phased_samples_output.tsv
deleted file mode 100644
index deb18772..00000000
--- a/tests/tmp/phased_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_PHASED	EUR
-AFR_PHASED	AFR
-EAS_PHASED	EAS
diff --git a/tests/tmp/pl_gt_40_any_err.log b/tests/tmp/pl_gt_40_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/pl_gt_40_any_output.vcf b/tests/tmp/pl_gt_40_any_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/pl_gt_40_any_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/query_01_flexible_cleaned.vcf b/tests/tmp/query_01_flexible_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/query_01_flexible_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_flexible_expected_cleaned.vcf b/tests/tmp/query_01_flexible_expected_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/query_01_flexible_expected_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_flexible_output.vcf b/tests/tmp/query_01_flexible_output.vcf
deleted file mode 100644
index 696571e0..00000000
--- a/tests/tmp/query_01_flexible_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_pipe_flexible_output.vcf b/tests/tmp/query_01_pipe_flexible_output.vcf
deleted file mode 100644
index e41d6402..00000000
--- a/tests/tmp/query_01_pipe_flexible_output.vcf
+++ /dev/null
@@ -1,19 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
-3	400	rs6	T	A	60	FAIL	AF=0.1	GT:DP:GQ	0/1:30:10	0/1:25:20	0/1:20:30	0/1:22:40
-4	100	rs7	A	G	70	PASS	AF=0.2	GT:DP	0/1:30	0/1:25	0/1:20	0/1:22
-4	200	rs8	C	T	80	PASS	AF=0.3	DP:GT	30:0/1	25:0/1	20:0/1	22:0/1 
\ No newline at end of file
diff --git a/tests/tmp/query_01_pipe_strict_output.vcf b/tests/tmp/query_01_pipe_strict_output.vcf
deleted file mode 100644
index 8a82c11b..00000000
--- a/tests/tmp/query_01_pipe_strict_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
\ No newline at end of file
diff --git a/tests/tmp/query_01_strict_output.vcf b/tests/tmp/query_01_strict_output.vcf
deleted file mode 100644
index b09fd625..00000000
--- a/tests/tmp/query_01_strict_output.vcf
+++ /dev/null
@@ -1,15 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
-3	400	rs6	T	A	60	FAIL	AF=0.1	GT:DP:GQ	0/1:30:10	0/1:25:20	0/1:20:30	0/1:22:40
-4	100	rs7	A	G	70	PASS	AF=0.2	GT:DP	0/1:30	0/1:25	0/1:20	0/1:22
-4	200	rs8	C	T	80	PASS	AF=0.3	DP:GT	30:0/1	25:0/1	20:0/1	22:0/1 
\ No newline at end of file
diff --git a/tests/tmp/query_11_flexible_output.vcf b/tests/tmp/query_11_flexible_output.vcf
deleted file mode 100644
index 742184ce..00000000
--- a/tests/tmp/query_11_flexible_output.vcf
+++ /dev/null
@@ -1,13 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	600	rs6	C	G	55	PASS	AF=0.4	GT:DP	1/1:30	1/1:25	0/0:15
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
\ No newline at end of file
diff --git a/tests/tmp/query_multi_02_flexible_output.vcf b/tests/tmp/query_multi_02_flexible_output.vcf
deleted file mode 100644
index 1fea55ad..00000000
--- a/tests/tmp/query_multi_02_flexible_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
\ No newline at end of file
diff --git a/tests/tmp/simple_err.log b/tests/tmp/simple_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/simple_freqs_err.log b/tests/tmp/simple_freqs_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/simple_freqs_output.tsv b/tests/tmp/simple_freqs_output.tsv
deleted file mode 100644
index 7ab39b3b..00000000
--- a/tests/tmp/simple_freqs_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
-1	300	rs3	G	A	0.3333
-1	400	rs4	T	C	0.8333
diff --git a/tests/tmp/simple_output.tsv b/tests/tmp/simple_output.tsv
deleted file mode 100644
index 7ab39b3b..00000000
--- a/tests/tmp/simple_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
-1	300	rs3	G	A	0.3333
-1	400	rs4	T	C	0.8333
diff --git a/tests/tmp/sqrt_transform_edge_err.log b/tests/tmp/sqrt_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/sqrt_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/sqrt_transform_edge_output.vcf b/tests/tmp/sqrt_transform_edge_output.vcf
deleted file mode 100644
index 4067f73c..00000000
--- a/tests/tmp/sqrt_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.001000	PASS	.	GT	0/1
-1	200	rs2	C	T	1000.000000	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	0.000000	PASS	.	GT	0/1
diff --git a/tests/tmp/sqrt_transform_err.log b/tests/tmp/sqrt_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/sqrt_transform_output.vcf b/tests/tmp/sqrt_transform_output.vcf
deleted file mode 100644
index 5ba0fbd6..00000000
--- a/tests/tmp/sqrt_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	5.477226	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	3.162278	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/square_transform_edge_err.log b/tests/tmp/square_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/square_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/square_transform_edge_output.vcf b/tests/tmp/square_transform_edge_output.vcf
deleted file mode 100644
index 3f277fb3..00000000
--- a/tests/tmp/square_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.000000	PASS	.	GT	0/1
-1	200	rs2	C	T	1000000000000.000000	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	100.000000	PASS	.	GT	0/1
diff --git a/tests/tmp/square_transform_err.log b/tests/tmp/square_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/square_transform_no_clamp_err.log b/tests/tmp/square_transform_no_clamp_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/square_transform_no_clamp_output.vcf b/tests/tmp/square_transform_no_clamp_output.vcf
deleted file mode 100644
index a718db16..00000000
--- a/tests/tmp/square_transform_no_clamp_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	900.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10000.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	100.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/square_transform_output.vcf b/tests/tmp/square_transform_output.vcf
deleted file mode 100644
index a718db16..00000000
--- a/tests/tmp/square_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	900.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10000.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	100.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/unknown_output.vcf b/tests/tmp/unknown_output.vcf
deleted file mode 100644
index ae84bda2..00000000
--- a/tests/tmp/unknown_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP