From ae0b22aab8888bc15a3c46205c829df90cf99b47 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 12:08:57 +0100 Subject: [PATCH 01/54] Fix NaN output in quality adjuster --- src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp | 9 ++++++++- tests/test_custom_annotator.sh | 8 +++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp index b09fe473..68632f96 100644 --- a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp +++ b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp @@ -157,7 +157,14 @@ void VCFXQualityAdjuster::adjustQualityScores(std::istream &in, std::ostream &ou // clamp large values if(newQual>1e12) newQual= 1e12; } - fields[5]= std::to_string(newQual); + std::string qualStr; + if(std::isnan(newQual)){ + // ensure consistent representation for NaN + qualStr = "nan"; + } else { + qualStr = std::to_string(newQual); + } + fields[5]= qualStr; std::ostringstream oss; for(size_t i=0; i0) oss<<"\t"; diff --git a/tests/test_custom_annotator.sh b/tests/test_custom_annotator.sh index 7da1d4a3..ce42c649 100755 --- a/tests/test_custom_annotator.sh +++ b/tests/test_custom_annotator.sh @@ -125,10 +125,11 @@ for i in $(seq 1 1000); do echo "1 $i A G Annotation$i" done > "$SCRIPT_DIR/data/large_annotations.txt" # Add VCF header -sed -i '' '1i\ +sed -i '1i\ ##fileformat=VCFv4.2\ ##contig=\ -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1' "$SCRIPT_DIR/data/large_input.vcf" +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1\ +' "$SCRIPT_DIR/data/large_input.vcf" time "$ROOT_DIR/build/src/VCFX_custom_annotator/VCFX_custom_annotator" --add-annotation "$SCRIPT_DIR/data/large_annotations.txt" < "$SCRIPT_DIR/data/large_input.vcf" > "$SCRIPT_DIR/data/large_output.vcf" if [ $? -eq 0 ]; then @@ -138,4 +139,5 @@ else exit 1 fi -echo "All tests for VCFX_custom_annotator passed!" \ No newline at end of file +echo "All tests for VCFX_custom_annotator passed!" + From d9097e7486f63cc51330b93242d6d9c28a010511 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 15:37:57 +0100 Subject: [PATCH 02/54] Fix Emscripten build detection --- CMakeLists.txt | 13 ++++++++++++- README.md | 2 +- compile_wasm.sh | 8 ++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 662b06cc..17f372a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,18 @@ project(VCFX option(BUILD_WASM "Build with emscripten toolchain" OFF) if(BUILD_WASM) - set(CMAKE_TOOLCHAIN_FILE "/path/to/emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + if(NOT CMAKE_TOOLCHAIN_FILE) + if(DEFINED ENV{EMSDK} AND EXISTS "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake") + set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + elseif(DEFINED ENV{EMSCRIPTEN} AND EXISTS "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake") + set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + endif() + endif() + + if(NOT EXISTS "${CMAKE_TOOLCHAIN_FILE}") + message(FATAL_ERROR "Emscripten toolchain file not found. Please set CMAKE_TOOLCHAIN_FILE or EMSDK.") + endif() + message(STATUS "Building for WebAssembly (Emscripten).") endif() diff --git a/README.md b/README.md index 10301dd0..7e158764 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ If you have [Emscripten](https://emscripten.org/) installed: ```bash mkdir build_wasm && cd build_wasm -cmake -DBUILD_WASM=ON .. +emcmake cmake -DBUILD_WASM=ON .. cmake --build . ``` diff --git a/compile_wasm.sh b/compile_wasm.sh index 6dbfb8a8..e0a5a132 100644 --- a/compile_wasm.sh +++ b/compile_wasm.sh @@ -4,8 +4,12 @@ set -e mkdir -p build_wasm cd build_wasm -# Turn on BUILD_WASM -cmake -DBUILD_WASM=ON .. +# Turn on BUILD_WASM using emcmake if available +if command -v emcmake >/dev/null 2>&1; then + emcmake cmake -DBUILD_WASM=ON .. +else + cmake -DBUILD_WASM=ON .. +fi cmake --build . From 68bac85c751cb11fe5f977e132f347bea1003b30 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 17:49:51 +0100 Subject: [PATCH 03/54] fix: support modern find in path script --- add_vcfx_tools_to_path.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/add_vcfx_tools_to_path.sh b/add_vcfx_tools_to_path.sh index 41787a66..0a68c655 100644 --- a/add_vcfx_tools_to_path.sh +++ b/add_vcfx_tools_to_path.sh @@ -32,7 +32,7 @@ while IFS= read -r -d '' toolExec; do if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then TOOL_DIRS="${TOOL_DIRS}:${toolDir}" fi -done < <(find "${BUILD_SRC_DIR}" -type f -perm +111 -name 'VCFX_*' -print0 2>/dev/null) +done < <(find "${BUILD_SRC_DIR}" -type f -perm /111 -name 'VCFX_*' -print0 2>/dev/null) # If empty (no tools found), bail out if [ -z "$TOOL_DIRS" ]; then From 894e7770d1f283ce4a2c65282b8637cd13ec2cb6 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 18:03:59 +0100 Subject: [PATCH 04/54] fix: ensure newline at EOF for .dockerignore --- .dockerignore | 2 +- .gitignore | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index a0e0fba6..7707bfc4 100644 --- a/.dockerignore +++ b/.dockerignore @@ -29,4 +29,4 @@ LICENSE # Docker files (not needed in the build context) Dockerfile docker-compose.yml -.dockerignore \ No newline at end of file +.dockerignore diff --git a/.gitignore b/.gitignore index 37beb001..9053a952 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,4 @@ Thumbs.db # Other tools.md prompt.md -names.md \ No newline at end of file +names.md From 1c4a27ae0311a61271ec01318cfe31f203ad61bc Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 18:34:08 +0100 Subject: [PATCH 05/54] Optimize alignment checker memory usage --- .../VCFX_alignment_checker.cpp | 105 ++++++++++++------ .../VCFX_alignment_checker.h | 14 ++- 2 files changed, 80 insertions(+), 39 deletions(-) diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp index 8973a089..b4898ee3 100644 --- a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp +++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp @@ -55,15 +55,8 @@ int VCFXAlignmentChecker::run(int argc, char* argv[]) { return 1; } - // Open reference genome file - std::ifstream refStream(refFile); - if (!refStream.is_open()) { - std::cerr << "Error: Unable to open reference genome file: " << refFile << "\n"; - return 1; - } - - // Load reference genome into memory - if (!loadReferenceGenome(refStream)) { + // Load reference genome index + if (!loadReferenceGenome(refFile)) { std::cerr << "Error: Failed to load reference genome.\n"; return 1; } @@ -85,70 +78,108 @@ void VCFXAlignmentChecker::displayHelp() { << " VCFX_alignment_checker --alignment-discrepancy input.vcf reference.fasta > discrepancies.txt\n"; } -bool VCFXAlignmentChecker::loadReferenceGenome(std::istream& in) { +bool VCFXAlignmentChecker::loadReferenceGenome(const std::string& path) { + referencePath = path; + referenceIndex.clear(); + + referenceStream.open(path, std::ios::in); + if (!referenceStream.is_open()) { + std::cerr << "Error: Unable to open reference genome file: " << path << "\n"; + return false; + } + std::string line; std::string currentChrom; - std::string seq; + FastaIndexEntry entry; + std::size_t seqLen = 0; - while (std::getline(in, line)) { + // record file offset where we will read sequence lines + while (std::getline(referenceStream, line)) { if (line.empty()) { continue; } if (line[0] == '>') { - // If we already had a chromosome loaded, store its sequence if (!currentChrom.empty()) { - referenceGenome[normalizeChromosome(currentChrom)] = seq; + entry.length = seqLen; + referenceIndex[normalizeChromosome(currentChrom)] = entry; } - // Start a new chromosome - seq.clear(); - // Grab chromosome name (up to first space) - size_t pos = line.find(' '); + + currentChrom.clear(); + seqLen = 0; + entry = FastaIndexEntry(); + + std::size_t pos = line.find(' '); if (pos != std::string::npos) { currentChrom = line.substr(1, pos - 1); } else { currentChrom = line.substr(1); } + + entry.offset = referenceStream.tellg(); + entry.basesPerLine = 0; + entry.bytesPerLine = 0; } else { - // Append this line to the sequence (uppercase) - std::transform(line.begin(), line.end(), line.begin(), ::toupper); - seq += line; + if (entry.basesPerLine == 0) { + entry.basesPerLine = line.size(); + entry.bytesPerLine = line.size() + 1; // assume single '\n' + } + seqLen += line.size(); } } - // Store the last chromosome read if (!currentChrom.empty()) { - referenceGenome[normalizeChromosome(currentChrom)] = seq; + entry.length = seqLen; + referenceIndex[normalizeChromosome(currentChrom)] = entry; } + referenceStream.clear(); + referenceStream.seekg(0); return true; } std::string VCFXAlignmentChecker::normalizeChromosome(const std::string& chrom) { - // NOTE: This logic may cause mismatches if your reference is named "1" but your VCF says "chr1". - // You may want to adjust this to match your actual naming conventions. std::string norm = chrom; - if (norm.find("chr") != 0 && - !(norm == "X" || norm == "Y" || norm == "MT" || - std::all_of(norm.begin(), norm.end(), ::isdigit))) - { - norm = "chr" + norm; + // convert to upper and drop leading "CHR" if present + if (norm.size() >= 3 && (norm.rfind("chr", 0) == 0 || norm.rfind("CHR", 0) == 0)) { + norm = norm.substr(3); } + std::transform(norm.begin(), norm.end(), norm.begin(), ::toupper); return norm; } std::string VCFXAlignmentChecker::getReferenceBases(const std::string& chrom, int pos, int length) { - auto it = referenceGenome.find(normalizeChromosome(chrom)); - if (it == referenceGenome.end()) { + auto it = referenceIndex.find(normalizeChromosome(chrom)); + if (it == referenceIndex.end()) { return ""; } - const std::string& seq = it->second; - // Convert VCF 1-based 'pos' to a 0-based index into the string - size_t startIndex = static_cast(pos - 1); - if (pos < 1 || (startIndex + length) > seq.size()) { + const FastaIndexEntry& entry = it->second; + if (pos < 1 || static_cast(pos - 1) >= entry.length) { return ""; } - return seq.substr(startIndex, length); + + int remaining = length; + std::size_t currPos = static_cast(pos - 1); + std::string result; + result.reserve(length); + + while (remaining > 0 && currPos < entry.length) { + std::size_t lineIdx = currPos / entry.basesPerLine; + std::size_t lineOffset = currPos % entry.basesPerLine; + std::size_t chunk = std::min(entry.basesPerLine - lineOffset, remaining); + + std::streampos filePos = entry.offset + static_cast(lineIdx * entry.bytesPerLine + lineOffset); + referenceStream.clear(); + referenceStream.seekg(filePos); + std::string buf(chunk, '\0'); + referenceStream.read(&buf[0], chunk); + result += buf; + + currPos += chunk; + remaining -= static_cast(chunk); + } + + return result; } void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream& out) { diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.h b/src/VCFX_alignment_checker/VCFX_alignment_checker.h index 6ff8e1a9..f62201c4 100644 --- a/src/VCFX_alignment_checker/VCFX_alignment_checker.h +++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.h @@ -5,6 +5,7 @@ #include #include #include +#include // VCFXAlignmentChecker: Header file for Reference Alignment Discrepancy Finder Tool class VCFXAlignmentChecker { @@ -17,7 +18,7 @@ class VCFXAlignmentChecker { void displayHelp(); // Loads the reference genome from a FASTA file - bool loadReferenceGenome(std::istream& in); + bool loadReferenceGenome(const std::string& path); // Checks discrepancies between VCF variants and the in-memory reference genome void checkDiscrepancies(std::istream& vcfIn, std::ostream& out); @@ -26,7 +27,16 @@ class VCFXAlignmentChecker { std::string getReferenceBases(const std::string& chrom, int pos, int length = 1); // Stores the reference genome sequences, keyed by normalized chromosome name - std::unordered_map referenceGenome; + struct FastaIndexEntry { + std::streampos offset = 0; // file offset to first base + std::size_t length = 0; // total bases in sequence + std::size_t basesPerLine = 0; // number of bases per line in FASTA + std::size_t bytesPerLine = 0; // bytes per line including newline + }; + + std::unordered_map referenceIndex; + std::ifstream referenceStream; + std::string referencePath; // Helper function to convert chromosome names to a consistent format std::string normalizeChromosome(const std::string& chrom); From 7ad0a7ca9616059d4510265c39c764b111f3d574 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 18:46:17 +0100 Subject: [PATCH 06/54] Make haplotype extractor debug output optional --- .../VCFX_haplotype_extractor.cpp | 35 +++++++++++++------ .../VCFX_haplotype_extractor.h | 6 ++++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp index e1bbf55f..87fd0c79 100644 --- a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp +++ b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp @@ -16,9 +16,10 @@ void printHelp() { << "Usage: VCFX_haplotype_extractor [OPTIONS]\n\n" << "Options:\n" << " --help, -h Display this help message and exit.\n" - << " --block-size Maximum distance for grouping consecutive variants (default 100000).\n" - << " --check-phase-consistency If set, try a minimal check across variants.\n\n" - << "Description:\n" + << " --block-size Maximum distance for grouping consecutive variants (default 100000).\n" + << " --check-phase-consistency If set, try a minimal check across variants.\n" + << " --debug Output verbose debug information.\n\n" + << "Description:\n" << " Extracts phased haplotype blocks from genotype data in a VCF file. " << "It reconstructs haplotypes for each sample by analyzing phased genotype fields.\n\n" << "Examples:\n" @@ -86,8 +87,10 @@ bool HaplotypeExtractor::phaseIsConsistent(const HaplotypeBlock& block, return false; } - // Debug the whole process - std::cerr << "Checking phase consistency\n"; + // Optional debugging output + if (debugMode) { + std::cerr << "Checking phase consistency\n"; + } for (size_t s=0; s inconsistent // Check for phase flips - when both alleles flip positions if (lastAllele1 != newAllele1 && lastAllele2 != newAllele2 && lastAllele1 == newAllele2 && lastAllele2 == newAllele1) { - std::cerr << "Phase flip detected in sample " << s << "\n"; + if (debugMode) { + std::cerr << "Phase flip detected in sample " << s << "\n"; + } return false; } } - std::cerr << "All phases consistent\n"; + if (debugMode) { + std::cerr << "All phases consistent\n"; + } return true; } @@ -318,6 +329,7 @@ bool HaplotypeExtractor::extractHaplotypes(std::istream& in, std::ostream& out) int main(int argc, char* argv[]) { int blockSize = 100000; bool doCheck = false; + bool debug = false; // simple arg parse for (int i=1; i sampleNames; size_t numSamples = 0; @@ -41,6 +44,9 @@ class HaplotypeExtractor { // If true, we do a simplistic cross-variant check for consistent phasing bool checkPhaseConsistency = false; + // If true, print verbose debugging information + bool debugMode = false; + // Parses the #CHROM line to extract sample names bool parseHeader(const std::string& headerLine); From fdf67403086d887d0925bc8b3b7d7d355c04c5db Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 19:06:47 +0100 Subject: [PATCH 07/54] Add trailing newline to Dockerfile and CMakeLists --- Dockerfile | 2 +- src/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a68f4b73..be69319f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,4 +56,4 @@ RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh ENTRYPOINT ["/bin/bash", "-c"] # Default command shows available tools -CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] \ No newline at end of file +CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f29f6a4..26efae66 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -140,4 +140,4 @@ install(TARGETS ${VCFX_TOOLS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) -message(STATUS "VCFX tools to be installed: ${VCFX_TOOLS}") \ No newline at end of file +message(STATUS "VCFX tools to be installed: ${VCFX_TOOLS}") From 42053ff60069f36d228fcf0bab03d385982fe806 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 19:08:44 +0100 Subject: [PATCH 08/54] Add basic utilities to vcfx_core --- include/vcfx_core.h | 16 +++++++++++++++- src/vcfx_core.cpp | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/include/vcfx_core.h b/include/vcfx_core.h index ff542a10..5d2663f5 100644 --- a/include/vcfx_core.h +++ b/include/vcfx_core.h @@ -3,7 +3,21 @@ #include #include +#include -// Core functionalities for VCFX tools +namespace vcfx { + +// Trim leading and trailing whitespace from a string +std::string trim(const std::string& str); + +// Split a string on the given delimiter +std::vector split(const std::string& str, char delimiter); + +// Convenience helpers for printing common messages +void print_error(const std::string& msg, std::ostream& os = std::cerr); +void print_version(const std::string& tool, const std::string& version, + std::ostream& os = std::cout); + +} // namespace vcfx #endif // VCFX_CORE_H diff --git a/src/vcfx_core.cpp b/src/vcfx_core.cpp index bbfcaa5c..c56fd8b6 100644 --- a/src/vcfx_core.cpp +++ b/src/vcfx_core.cpp @@ -1,4 +1,36 @@ #include "vcfx_core.h" +#include +#include +#include -// Implementation of core functionalities -// Add actual implementations as needed +namespace vcfx { + +std::string trim(const std::string& str) { + auto first = str.find_first_not_of(" \t\n\r"); + if (first == std::string::npos) { + return ""; + } + auto last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, last - first + 1); +} + +std::vector split(const std::string& str, char delimiter) { + std::vector result; + std::istringstream iss(str); + std::string item; + while (std::getline(iss, item, delimiter)) { + result.push_back(item); + } + return result; +} + +void print_error(const std::string& msg, std::ostream& os) { + os << "Error: " << msg << '\n'; +} + +void print_version(const std::string& tool, const std::string& version, + std::ostream& os) { + os << tool << " version " << version << '\n'; +} + +} // namespace vcfx From 9cb099c1589e83568f4afd36c3a757e5285b4ad2 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 19:09:51 +0100 Subject: [PATCH 09/54] refactor merger to stream --- src/VCFX_merger/VCFX_merger.cpp | 137 +++++++++++++++++--------------- src/VCFX_merger/VCFX_merger.h | 5 -- 2 files changed, 73 insertions(+), 69 deletions(-) diff --git a/src/VCFX_merger/VCFX_merger.cpp b/src/VCFX_merger/VCFX_merger.cpp index 639bf593..a6b59987 100644 --- a/src/VCFX_merger/VCFX_merger.cpp +++ b/src/VCFX_merger/VCFX_merger.cpp @@ -1,8 +1,8 @@ #include "VCFX_merger.h" #include #include -#include -#include +#include +#include #include #include @@ -62,86 +62,95 @@ void VCFXMerger::displayHelp() { } void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostream& out) { - std::vector> allVariants; - std::vector allHeaders; + struct FileState { + std::ifstream stream; + std::string currentLine; + std::string chrom; + long pos = 0; + bool hasVariant = false; + }; - for (const auto& file : inputFiles) { - std::vector> variants; - std::vector headerLines; - parseVCF(file, variants, headerLines); + std::vector states; + std::vector headers; + bool headersCaptured = false; - // If no headers yet, copy the first file's headers - if (allHeaders.empty()) { - allHeaders = headerLines; + for (const auto& file : inputFiles) { + FileState fs; + fs.stream.open(file); + if (!fs.stream.is_open()) { + std::cerr << "Failed to open file: " << file << "\n"; + continue; } - // Append all variants - allVariants.insert(allVariants.end(), variants.begin(), variants.end()); - } - - // Sort all variants by chromosome and position - std::sort( - allVariants.begin(), - allVariants.end(), - [this](const std::vector& a, const std::vector& b) { - if (a[0] == b[0]) { - return std::stoi(a[1]) < std::stoi(b[1]); + std::string line; + while (std::getline(fs.stream, line)) { + if (line.empty()) + continue; + if (line[0] == '#') { + if (!headersCaptured) + headers.push_back(line); + continue; } - return a[0] < b[0]; + + std::istringstream ss(line); + std::getline(ss, fs.chrom, '\t'); + std::string pos_str; + std::getline(ss, pos_str, '\t'); + fs.pos = std::strtol(pos_str.c_str(), nullptr, 10); + fs.currentLine = line; + fs.hasVariant = true; + break; } - ); - // Output headers - for (const auto& header : allHeaders) { - out << header << "\n"; - } + if (fs.hasVariant) + states.push_back(std::move(fs)); - // Output merged variants - for (const auto& variant : allVariants) { - for (size_t i = 0; i < variant.size(); ++i) { - out << variant[i]; - if (i < variant.size() - 1) { - out << "\t"; - } - } - out << "\n"; + if (!headersCaptured && !headers.empty()) + headersCaptured = true; } -} -void VCFXMerger::parseVCF(const std::string& filename, - std::vector>& variants, - std::vector& headerLines) { - std::ifstream infile(filename); - if (!infile.is_open()) { - std::cerr << "Failed to open file: " << filename << "\n"; - return; + for (const auto& h : headers) { + out << h << '\n'; } - std::string line; - while (std::getline(infile, line)) { - if (line.empty()) continue; + auto cmp = [&](size_t a, size_t b) { + const auto& sa = states[a]; + const auto& sb = states[b]; + if (sa.chrom == sb.chrom) return sa.pos > sb.pos; + return sa.chrom > sb.chrom; + }; + std::priority_queue, decltype(cmp)> pq(cmp); - if (line[0] == '#') { - headerLines.push_back(line); - continue; - } + for (size_t i = 0; i < states.size(); ++i) { + if (states[i].hasVariant) + pq.push(i); + } - // Split by tab - std::vector fields; - std::string field; - size_t pos = 0; - while ((pos = line.find('\t')) != std::string::npos) { - field = line.substr(0, pos); - fields.push_back(field); - line.erase(0, pos + 1); + while (!pq.empty()) { + size_t idx = pq.top(); + pq.pop(); + out << states[idx].currentLine << '\n'; + + std::string line; + while (std::getline(states[idx].stream, line)) { + if (line.empty()) + continue; + if (line[0] == '#') + continue; + + std::istringstream ss(line); + std::getline(ss, states[idx].chrom, '\t'); + std::string pos_str; + std::getline(ss, pos_str, '\t'); + states[idx].pos = std::strtol(pos_str.c_str(), nullptr, 10); + states[idx].currentLine = line; + pq.push(idx); + break; } - fields.push_back(line); - - variants.push_back(fields); } - infile.close(); } + int main(int argc, char* argv[]) { VCFXMerger merger; return merger.run(argc, argv); diff --git a/src/VCFX_merger/VCFX_merger.h b/src/VCFX_merger/VCFX_merger.h index 637abba2..cfc7abb1 100644 --- a/src/VCFX_merger/VCFX_merger.h +++ b/src/VCFX_merger/VCFX_merger.h @@ -18,11 +18,6 @@ class VCFXMerger { // Processes and merges VCF files void mergeVCF(const std::vector& inputFiles, std::ostream& out); - // Parses a VCF file and stores variants - void parseVCF(const std::string& filename, std::vector>& variants, std::vector& headerLines); - - // Compares variants based on chromosome and position - bool compareVariants(const std::vector& a, const std::vector& b); }; #endif // VCFX_MERGER_H From 5dbfd211043a4d7e19c9e6341ebbed34ca88e092 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 23:53:45 +0100 Subject: [PATCH 10/54] Fix merger sort for unsorted inputs --- .github/workflows/docker-publish.yml | 2 +- .github/workflows/docs.yml | 16 +++---- src/VCFX_merger/VCFX_merger.cpp | 68 ++++++++-------------------- 3 files changed, 27 insertions(+), 59 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index d217e59d..3e740efd 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -57,4 +57,4 @@ jobs: labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 cache-from: type=gha - cache-to: type=gha,mode=max \ No newline at end of file + cache-to: type=gha,mode=max diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index acb29d11..609db83a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -29,23 +29,23 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 0 - + - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.x' - + - name: Install dependencies run: | python -m pip install --upgrade pip pip install mkdocs-material pymdown-extensions - + - name: Deploy to GitHub Pages run: | git config --global user.name "${GITHUB_ACTOR}" git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" mkdocs gh-deploy --force - + # Only for pull requests - just build to validate build: runs-on: ubuntu-latest @@ -53,16 +53,16 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v3 - + - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.x' - + - name: Install dependencies run: | python -m pip install --upgrade pip pip install mkdocs-material pymdown-extensions - + - name: Build documentation - run: mkdocs build \ No newline at end of file + run: mkdocs build diff --git a/src/VCFX_merger/VCFX_merger.cpp b/src/VCFX_merger/VCFX_merger.cpp index a6b59987..b8628062 100644 --- a/src/VCFX_merger/VCFX_merger.cpp +++ b/src/VCFX_merger/VCFX_merger.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -62,28 +63,25 @@ void VCFXMerger::displayHelp() { } void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostream& out) { - struct FileState { - std::ifstream stream; - std::string currentLine; + struct Variant { std::string chrom; long pos = 0; - bool hasVariant = false; + std::string line; }; - std::vector states; + std::vector variants; std::vector headers; bool headersCaptured = false; for (const auto& file : inputFiles) { - FileState fs; - fs.stream.open(file); - if (!fs.stream.is_open()) { + std::ifstream stream(file); + if (!stream.is_open()) { std::cerr << "Failed to open file: " << file << "\n"; continue; } std::string line; - while (std::getline(fs.stream, line)) { + while (std::getline(stream, line)) { if (line.empty()) continue; if (line[0] == '#') { @@ -93,18 +91,15 @@ void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostre } std::istringstream ss(line); - std::getline(ss, fs.chrom, '\t'); + Variant v; + std::getline(ss, v.chrom, '\t'); std::string pos_str; std::getline(ss, pos_str, '\t'); - fs.pos = std::strtol(pos_str.c_str(), nullptr, 10); - fs.currentLine = line; - fs.hasVariant = true; - break; + v.pos = std::strtol(pos_str.c_str(), nullptr, 10); + v.line = line; + variants.push_back(std::move(v)); } - if (fs.hasVariant) - states.push_back(std::move(fs)); - if (!headersCaptured && !headers.empty()) headersCaptured = true; } @@ -113,40 +108,13 @@ void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostre out << h << '\n'; } - auto cmp = [&](size_t a, size_t b) { - const auto& sa = states[a]; - const auto& sb = states[b]; - if (sa.chrom == sb.chrom) return sa.pos > sb.pos; - return sa.chrom > sb.chrom; - }; - std::priority_queue, decltype(cmp)> pq(cmp); - - for (size_t i = 0; i < states.size(); ++i) { - if (states[i].hasVariant) - pq.push(i); - } - - while (!pq.empty()) { - size_t idx = pq.top(); - pq.pop(); - out << states[idx].currentLine << '\n'; + std::sort(variants.begin(), variants.end(), [](const Variant& a, const Variant& b) { + if (a.chrom == b.chrom) return a.pos < b.pos; + return a.chrom < b.chrom; + }); - std::string line; - while (std::getline(states[idx].stream, line)) { - if (line.empty()) - continue; - if (line[0] == '#') - continue; - - std::istringstream ss(line); - std::getline(ss, states[idx].chrom, '\t'); - std::string pos_str; - std::getline(ss, pos_str, '\t'); - states[idx].pos = std::strtol(pos_str.c_str(), nullptr, 10); - states[idx].currentLine = line; - pq.push(idx); - break; - } + for (const auto& v : variants) { + out << v.line << '\n'; } } From cffeab02fec50fdd5df90c79b5d1f4d7f547fcc0 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 00:12:46 +0100 Subject: [PATCH 11/54] Add CI workflow to build and test --- .github/workflows/build-test.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/build-test.yml diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml new file mode 100644 index 00000000..e7495aea --- /dev/null +++ b/.github/workflows/build-test.yml @@ -0,0 +1,23 @@ +name: Build and Test + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake libz-dev + + - name: Run shell tests + run: | + bash tests/test_all.sh From 0d8ff78e081a1811bad00bc6cb9a00e9b67bd06d Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 12:01:19 +0100 Subject: [PATCH 12/54] docs: reference public container --- DOCKER.md | 26 +++++++++++++------------- docs/docker.md | 26 +++++++++++++------------- docs/installation.md | 6 +++--- tests/test_docker.sh | 14 +++++++------- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/DOCKER.md b/DOCKER.md index c206c53c..2b0f34c8 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -8,16 +8,16 @@ VCFX is available as a pre-built Docker image on GitHub Container Registry: ```bash # Pull the image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Mount a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Example: Process a VCF file (using tests/data/valid.vcf as an example) -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` Using the pre-built image is recommended for most users as it: @@ -65,19 +65,19 @@ There are several ways to run VCFX tools with Docker: ```bash # With the pre-built image -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # With a locally built image docker run --rm vcfx:local VCFX_tool_name [options] # Mount the tests/data directory to access test files -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files in the tests/data directory -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' # Example: Calculate allele frequencies for a VCF file -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` ### Using Docker Compose @@ -98,7 +98,7 @@ docker-compose run --rm vcfx 'cat /data/valid.vcf | VCFX_allele_freq_calc > /dat When using Docker directly, you need to mount a directory to access your files: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` When using Docker Compose, the `tests/data` directory is mounted by default: @@ -115,7 +115,7 @@ You can modify the docker-compose.yml file to mount a different directory if nee You can create complex pipelines by chaining VCFX tools: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' ``` ### Creating Shell Scripts @@ -126,7 +126,7 @@ For complex workflows, consider creating a shell script: #!/bin/bash # save as vcfx_workflow.sh -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | \ +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_allele_freq_calc > /data/pipeline_output.tsv' @@ -147,7 +147,7 @@ If you encounter permission issues with files created in the container: ```bash # Run the container with your user ID -docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` ### Container Not Finding Commands @@ -156,5 +156,5 @@ If the container can't find VCFX commands, ensure they were properly built in th ```bash # List available VCFX tools in the container -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' ``` \ No newline at end of file diff --git a/docs/docker.md b/docs/docker.md index e71a060c..1e8ebcdb 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -8,16 +8,16 @@ VCFX is available as a pre-built Docker image on GitHub Container Registry: ```bash # Pull the image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Mount a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Example: Process a VCF file (using tests/data/valid.vcf as an example) -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` Using the pre-built image is recommended for most users as it: @@ -65,19 +65,19 @@ There are several ways to run VCFX tools with Docker: ```bash # With the pre-built image -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # With a locally built image docker run --rm vcfx:local VCFX_tool_name [options] # Mount the tests/data directory to access test files -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files in the tests/data directory -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' # Example: Calculate allele frequencies for a VCF file -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` ### Using Docker Compose @@ -98,7 +98,7 @@ docker-compose run --rm vcfx 'cat /data/valid.vcf | VCFX_allele_freq_calc > /dat When using Docker directly, you need to mount a directory to access your files: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` When using Docker Compose, the `tests/data` directory is mounted by default: @@ -115,7 +115,7 @@ You can modify the docker-compose.yml file to mount a different directory if nee You can create complex pipelines by chaining VCFX tools: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' ``` ### Creating Shell Scripts @@ -126,7 +126,7 @@ For complex workflows, consider creating a shell script: #!/bin/bash # save as vcfx_workflow.sh -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | \ +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_allele_freq_calc > /data/pipeline_output.tsv' @@ -147,7 +147,7 @@ If you encounter permission issues with files created in the container: ```bash # Run the container with your user ID -docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` ### Container Not Finding Commands @@ -156,7 +156,7 @@ If the container can't find VCFX commands, ensure they were properly built in th ```bash # List available VCFX tools in the container -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' ``` ## Citation diff --git a/docs/installation.md b/docs/installation.md index 4c697df9..6f14121d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -35,13 +35,13 @@ The simplest way to use VCFX is with Docker, which requires no compilation: ```bash # Pull the VCFX Docker image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files by mounting a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/input.vcf | VCFX_tool_name > /data/output.tsv' +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/input.vcf | VCFX_tool_name > /data/output.tsv' ``` This method is ideal for: diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 6c9085da..b22eed29 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -21,7 +21,7 @@ echo "๐Ÿงฌ Testing VCFX Docker image with official test files..." # Pull the latest VCFX image echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image..." -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest check_success "Pulled VCFX Docker image" # Get the directory of this script (tests directory) @@ -36,36 +36,36 @@ check_success "Created temporary output directory" # Test 1: List available tools echo "๐Ÿ“‹ Listing available VCFX tools..." -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' check_success "Listed available tools" # Test 2: Validator test echo "๐Ÿ” Testing VCFX_validator..." -docker run --rm -v "${TESTS_DIR}:/tests" ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator' +docker run --rm -v "${TESTS_DIR}:/tests" ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator' check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test echo "๐Ÿ‘ฅ Testing VCFX_sample_extractor..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' check_success "Extracted sample" # Test 5: Variant classifier test echo "๐Ÿ”ฌ Testing VCFX_variant_classifier..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' check_success "Classified variants" # Test 6: Testing a pipeline of commands echo "๐Ÿ”„ Testing a pipeline of VCFX tools..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' check_success "Executed pipeline of tools" echo "๐ŸŽ‰ All Docker tests completed successfully!" From 270cff74133666ad4965a0878f0d769888507289 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 12:01:52 +0100 Subject: [PATCH 13/54] Add gzip support and tests --- include/vcfx_core.h | 10 ++ src/CMakeLists.txt | 1 + .../VCFX_variant_counter.cpp | 9 +- src/vcfx_core.cpp | 102 ++++++++++++++++++ tests/test_variant_counter.sh | 19 +++- 5 files changed, 139 insertions(+), 2 deletions(-) diff --git a/include/vcfx_core.h b/include/vcfx_core.h index 5d2663f5..c3a62895 100644 --- a/include/vcfx_core.h +++ b/include/vcfx_core.h @@ -18,6 +18,16 @@ void print_error(const std::string& msg, std::ostream& os = std::cerr); void print_version(const std::string& tool, const std::string& version, std::ostream& os = std::cout); +// Read entire input stream, automatically decompressing if gzip/BGZF +// compressed. Returns true on success and stores the resulting text in +// 'out'. +bool read_maybe_compressed(std::istream& in, std::string& out); + +// Convenience helper to read a file that may be gzip/BGZF compressed. The file +// is loaded completely into memory and stored in 'out'. Returns true on +// success. +bool read_file_maybe_compressed(const std::string& path, std::string& out); + } // namespace vcfx #endif // VCFX_CORE_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 26efae66..ed756a9f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.14) # Build the core library from your shared code add_library(vcfx_core STATIC vcfx_core.cpp) target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include) +target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB) # Add all tool subdirectories add_subdirectory(VCFX_header_parser) diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.cpp b/src/VCFX_variant_counter/VCFX_variant_counter.cpp index 37e037ca..26b21175 100644 --- a/src/VCFX_variant_counter/VCFX_variant_counter.cpp +++ b/src/VCFX_variant_counter/VCFX_variant_counter.cpp @@ -4,6 +4,7 @@ #include #include #include +#include "vcfx_core.h" void VCFXVariantCounter::displayHelp(){ std::cout << @@ -55,7 +56,13 @@ int VCFXVariantCounter::run(int argc, char* argv[]){ return 0; } - int total= countVariants(std::cin); + std::string plainInput; + if(!vcfx::read_maybe_compressed(std::cin, plainInput)){ + std::cerr << "Error: failed to read input" << std::endl; + return 1; + } + std::istringstream inStream(plainInput); + int total= countVariants(inStream); if(total<0){ // indicates an error if strict return 1; diff --git a/src/vcfx_core.cpp b/src/vcfx_core.cpp index c56fd8b6..43f06a5f 100644 --- a/src/vcfx_core.cpp +++ b/src/vcfx_core.cpp @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include namespace vcfx { @@ -33,4 +36,103 @@ void print_version(const std::string& tool, const std::string& version, os << tool << " version " << version << '\n'; } +// ------------------------------------------------------------ +// Internal helper: decompress gzip/BGZF data from 'in' into 'out' +// ------------------------------------------------------------ +static bool decompress_gzip_stream(std::istream& in, std::string& out) { + constexpr int CHUNK = 16384; + char inBuf[CHUNK]; + char outBuf[CHUNK]; + + z_stream strm; + std::memset(&strm, 0, sizeof(strm)); + if (inflateInit2(&strm, 15 + 32) != Z_OK) { + return false; + } + + int ret = Z_OK; + do { + in.read(inBuf, CHUNK); + strm.avail_in = static_cast(in.gcount()); + if (strm.avail_in == 0 && in.eof()) { + break; + } + strm.next_in = reinterpret_cast(inBuf); + + do { + strm.avail_out = CHUNK; + strm.next_out = reinterpret_cast(outBuf); + ret = inflate(&strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || + ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) { + inflateEnd(&strm); + return false; + } + size_t have = CHUNK - strm.avail_out; + if (have > 0) { + out.append(outBuf, have); + } + } while (strm.avail_out == 0); + } while (ret != Z_STREAM_END); + + inflateEnd(&strm); + return ret == Z_STREAM_END; +} + +// ------------------------------------------------------------ +// Detect gzip magic numbers on a stream without consuming them +// ------------------------------------------------------------ +static bool stream_has_gzip_magic(std::istream& in) { + int c1 = in.get(); + if (c1 == EOF) { + return false; + } + int c2 = in.get(); + if (c2 == EOF) { + in.unget(); + return false; + } + bool isGz = (static_cast(c1) == 0x1f && + static_cast(c2) == 0x8b); + in.putback(static_cast(c2)); + in.putback(static_cast(c1)); + return isGz; +} + +bool read_maybe_compressed(std::istream& in, std::string& out) { + out.clear(); + if (stream_has_gzip_magic(in)) { + return decompress_gzip_stream(in, out); + } + std::ostringstream oss; + oss << in.rdbuf(); + out = oss.str(); + return true; +} + +bool read_file_maybe_compressed(const std::string& path, std::string& out) { + std::ifstream file(path, std::ios::binary); + if (!file.is_open()) { + return false; + } + bool isGz = false; + if (path.size() >= 3 && + (path.compare(path.size() - 3, 3, ".gz") == 0)) { + isGz = true; + } else if (path.size() >= 4 && + (path.compare(path.size() - 4, 4, ".bgz") == 0)) { + isGz = true; + } else if (path.size() >= 5 && + (path.compare(path.size() - 5, 5, ".bgzf") == 0)) { + isGz = true; + } + if (isGz || stream_has_gzip_magic(file)) { + return decompress_gzip_stream(file, out); + } + std::ostringstream oss; + oss << file.rdbuf(); + out = oss.str(); + return true; +} + } // namespace vcfx diff --git a/tests/test_variant_counter.sh b/tests/test_variant_counter.sh index bf60b9a0..ae78b320 100755 --- a/tests/test_variant_counter.sh +++ b/tests/test_variant_counter.sh @@ -155,6 +155,17 @@ if [ ! -f data/variant_counter_empty.vcf ]; then EOF fi +# Create gzipped versions of VCFs +if [ ! -f data/variant_counter_normal.vcf.gz ]; then + gzip -c data/variant_counter_normal.vcf > data/variant_counter_normal.vcf.gz +fi +if [ ! -f data/variant_counter_invalid.vcf.gz ]; then + gzip -c data/variant_counter_invalid.vcf > data/variant_counter_invalid.vcf.gz +fi +if [ ! -f data/variant_counter_empty.vcf.gz ]; then + gzip -c data/variant_counter_empty.vcf > data/variant_counter_empty.vcf.gz +fi + # Test 1: Count variants in a normal VCF file (strict mode) run_test 1 "Counting variants in a normal VCF file (strict mode)" \ "cat data/variant_counter_normal.vcf | $VCFX_EXECUTABLE --strict" \ @@ -212,4 +223,10 @@ diff -u expected/variant_counter_large.txt out/variant_counter_large.txt || { } echo " Test 8 passed." -echo "All VCFX_variant_counter tests passed!" \ No newline at end of file +# Test 9: Gzipped normal VCF +run_test 9 "Counting variants in a gzipped VCF file" \ + "cat data/variant_counter_normal.vcf.gz | $VCFX_EXECUTABLE" \ + "expected/variant_counter_normal_nonstrict.txt" \ + "out/variant_counter_normal_gz.txt" + +echo "All VCFX_variant_counter tests passed!" From dd90a091bef993d21cdfefee1130acaf759d2f31 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 14:35:30 +0100 Subject: [PATCH 14/54] test: skip Docker tests if Docker missing --- tests/test_docker.sh | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index b22eed29..998b9401 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -1,5 +1,8 @@ #!/bin/bash # This script tests the VCFX Docker image using the existing test files from the tests directory +# Docker image to use for the tests. CI may override this when using a locally +# built image. +VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/ieeta-pt/vcfx:latest}" # Function to check if command succeeded check_success() { @@ -13,15 +16,15 @@ check_success() { # Check if Docker is installed if ! command -v docker &> /dev/null; then - echo "โŒ Docker is not installed. Please install Docker first." - exit 1 + echo "โš ๏ธ Docker is not installed. Skipping Docker tests." + exit 0 fi echo "๐Ÿงฌ Testing VCFX Docker image with official test files..." # Pull the latest VCFX image echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image..." -docker pull ghcr.io/jorgemfs/vcfx:latest +docker pull $VCFX_IMAGE check_success "Pulled VCFX Docker image" # Get the directory of this script (tests directory) @@ -36,36 +39,36 @@ check_success "Created temporary output directory" # Test 1: List available tools echo "๐Ÿ“‹ Listing available VCFX tools..." -docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' +docker run --rm $VCFX_IMAGE 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' check_success "Listed available tools" # Test 2: Validator test echo "๐Ÿ” Testing VCFX_validator..." -docker run --rm -v "${TESTS_DIR}:/tests" ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator' +docker run --rm -v "${TESTS_DIR}:/tests" $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator' check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test echo "๐Ÿ‘ฅ Testing VCFX_sample_extractor..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' + $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' check_success "Extracted sample" # Test 5: Variant classifier test echo "๐Ÿ”ฌ Testing VCFX_variant_classifier..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' + $VCFX_IMAGE 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' check_success "Classified variants" # Test 6: Testing a pipeline of commands echo "๐Ÿ”„ Testing a pipeline of VCFX tools..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' + $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' check_success "Executed pipeline of tools" echo "๐ŸŽ‰ All Docker tests completed successfully!" @@ -77,4 +80,4 @@ echo "๐Ÿ“š For more information on how to use VCFX with Docker, see the document # Clean up temporary files echo "๐Ÿงน Cleaning up..." rm -rf "${TEMP_OUTPUT}" -check_success "Cleaned up temporary files" \ No newline at end of file +check_success "Cleaned up temporary files" From 8cf4960845358b7156737d4a518b071e5f13b3de Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 14:39:38 +0100 Subject: [PATCH 15/54] Update test_docker.sh --- tests/test_docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 998b9401..01c89815 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -2,7 +2,7 @@ # This script tests the VCFX Docker image using the existing test files from the tests directory # Docker image to use for the tests. CI may override this when using a locally # built image. -VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/ieeta-pt/vcfx:latest}" +VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgeMFS/vcfx:latest}" # Function to check if command succeeded check_success() { From 2f321f44a31f91fa9a31468ee84cceaacd0402de Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 14:43:57 +0100 Subject: [PATCH 16/54] Update test_docker.sh --- tests/test_docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 01c89815..1bb1116d 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -2,7 +2,7 @@ # This script tests the VCFX Docker image using the existing test files from the tests directory # Docker image to use for the tests. CI may override this when using a locally # built image. -VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgeMFS/vcfx:latest}" +VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgemfs/vcfx:latest}" # Function to check if command succeeded check_success() { From c848b21d2c29b6e8e3244176a398824fc99fcf82 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:09:48 +0100 Subject: [PATCH 17/54] Add strict mode validation for VCFX_validator --- docs/VCFX_validator.md | 13 ++++- src/VCFX_validator/VCFX_validator.cpp | 76 +++++++++++++++++++++++++-- src/VCFX_validator/VCFX_validator.h | 6 +++ tests/test_validator.sh | 35 +++++++++++- 4 files changed, 121 insertions(+), 9 deletions(-) diff --git a/docs/VCFX_validator.md b/docs/VCFX_validator.md index bb46f329..3b850697 100644 --- a/docs/VCFX_validator.md +++ b/docs/VCFX_validator.md @@ -14,7 +14,7 @@ VCFX_validator [OPTIONS] < input.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | -| `-s`, `--strict` | Enable stricter validation checks (reserved for future implementation) | +| `-s`, `--strict` | Enable stricter validation checks | ## Description `VCFX_validator` processes a VCF file to verify its structural validity by: @@ -59,8 +59,17 @@ This tool is useful for validating VCF files before processing them with other t - INFO: Must be '.' or contain valid key-value pairs or flags: - If not '.', must contain at least one valid entry - Key-value pairs must have a non-empty key + - Flags (without '=') are allowed +### Strict Mode +When `--strict` is used, additional checks are applied: +- The number of columns in every data line must exactly match the `#CHROM` header. +- If FORMAT/sample columns are present, each sample field must contain the same + number of sub-fields as specified in the FORMAT column. +- Any warning that would normally be emitted is treated as an error and causes + the validator to exit with a non-zero status. + ## Examples ### Basic Validation @@ -70,7 +79,7 @@ VCFX_validator < input.vcf ``` ### Using Strict Mode -Enable stricter validation (note: additional strict checks are reserved for future implementation): +Enable stricter validation with additional checks: ```bash VCFX_validator --strict < input.vcf ``` diff --git a/src/VCFX_validator/VCFX_validator.cpp b/src/VCFX_validator/VCFX_validator.cpp index 24c0bd22..bc5f1c90 100644 --- a/src/VCFX_validator/VCFX_validator.cpp +++ b/src/VCFX_validator/VCFX_validator.cpp @@ -7,7 +7,7 @@ #include static std::string trim(const std::string &s){ - size_t start=0; + size_t start=0; while(start split(const std::string &s, char delim){ + std::vector out; + std::stringstream ss(s); + std::string item; + while(std::getline(ss, item, delim)) out.push_back(item); + return out; +} + int VCFXValidator::run(int argc, char* argv[]){ bool hasStdin = !isatty(fileno(stdin)); if(argc==1 && !hasStdin){ @@ -51,14 +59,18 @@ void VCFXValidator::displayHelp(){ " VCFX_validator [options] < input.vcf\n\n" "Options:\n" " -h, --help Show this help.\n" -" -s, --strict Enable stricter checks (not fully implemented, but reserved).\n\n" +" -s, --strict Enable stricter checks.\n\n" "Description:\n" " Validates:\n" " * All '##' lines are recognized as meta lines.\n" -" * #CHROM line is present, has at least 8 columns.\n" +" * #CHROM line is present and well formed.\n" " * Each data line has >=8 columns, checks CHROM non-empty, POS>0,\n" " REF/ALT non-empty, QUAL is '.' or non-negative float, FILTER non-empty,\n" -" INFO is minimal check. Logs errors/warnings.\n" +" INFO is minimally checked.\n" +" In strict mode additional checks are performed:\n" +" * Data line column count must match the #CHROM header.\n" +" * Sample columns must match the FORMAT field structure.\n" +" * Any warning is treated as an error.\n" " Exits 0 if pass, 1 if fail.\n"; } @@ -86,11 +98,24 @@ bool VCFXValidator::validateChromHeader(const std::string &line, int lineNumber) std::cerr<<"Error: #CHROM line at "<< lineNumber <<" has <8 columns.\n"; return false; } - // typically #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, (FORMAT?), ... if(f[0]!="#CHROM"){ std::cerr<<"Error: #CHROM line doesn't start with '#CHROM' at line "<< lineNumber <<".\n"; return false; } + + headerColumnCount = static_cast(f.size()); + headerHasFormat = (headerColumnCount > 8); + sampleCount = headerHasFormat ? headerColumnCount - 9 : 0; + + if(headerHasFormat && f[8] != "FORMAT"){ + std::string msg = "Warning: column 9 of #CHROM header is not 'FORMAT'."; + if(strictMode){ + std::cerr << "Error: " << msg << "\n"; + return false; + } else { + std::cerr << msg << "\n"; + } + } return true; } @@ -108,6 +133,16 @@ bool VCFXValidator::validateDataLine(const std::string &line, int lineNumber){ std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n"; return false; } + if(headerColumnCount>0){ + if(strictMode && static_cast(f.size()) != headerColumnCount){ + std::cerr << "Error: line "<(f.size()) != headerColumnCount){ + std::cerr << "Warning: line "< formatParts = split(f[8], ':'); + for(size_t i=9;i sampleParts = split(f[i], ':'); + if(sampleParts.size()!=formatParts.size()){ + std::string msg = "Warning: sample column " + std::to_string(i-8) + + " does not match FORMAT field"; + if(strictMode){ + std::cerr<<"Error: "<8){ + std::string msg = "Warning: data line has sample columns but header lacks FORMAT"; + if(strictMode){ + std::cerr<<"Error: "<&1) + output=$($EXEC $opts < "$input_file" 2>&1) exit_code=$? if [ $exit_code -eq 0 ]; then @@ -43,13 +44,14 @@ run_test_failure() { local description=$2 local input_file=$3 local expected_error=$4 + local opts="$5" echo -n "Test $test_num: $description... " # Run the command using process substitution local output local exit_code - output=$($EXEC < "$input_file" 2>&1) + output=$($EXEC $opts < "$input_file" 2>&1) exit_code=$? if [ $exit_code -ne 0 ]; then @@ -171,6 +173,23 @@ chr1 100 . A T . PASS . chr2 200 rs456 G C 80 PASS NS=2;DP=15 EOF +# Header has one sample column but a data line includes two sample columns +cat > data/mismatched_columns.vcf << EOF +##fileformat=VCFv4.2 +##FORMAT= +EOF +printf '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE1\n' >> data/mismatched_columns.vcf +printf 'chr1\t100\t.\tA\tT\t60\tPASS\t.\tGT\t0/1\t0/0\n' >> data/mismatched_columns.vcf + +# FORMAT expects two entries but sample has three +cat > data/format_mismatch.vcf << EOF +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +EOF +printf '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE1\n' >> data/format_mismatch.vcf +printf 'chr1\t100\t.\tA\tT\t60\tPASS\t.\tGT:DP\t0/1:30:7\n' >> data/format_mismatch.vcf + # Run each test separately and track failures failures=0 @@ -228,6 +247,18 @@ else failures=$((failures + 1)) fi +# Test 13 - strict mode valid file +run_test_success 13 "Strict valid VCF" "data/valid.vcf" "--strict" +[ $? -ne 0 ] && failures=$((failures + 1)) + +# Test 14 - mismatched columns in strict mode +run_test_failure 14 "Strict mismatched columns" "data/mismatched_columns.vcf" "columns" "--strict" +[ $? -ne 0 ] && failures=$((failures + 1)) + +# Test 15 - FORMAT/sample mismatch in strict mode +run_test_failure 15 "Strict format mismatch" "data/format_mismatch.vcf" "FORMAT" "--strict" +[ $? -ne 0 ] && failures=$((failures + 1)) + if [ $failures -eq 0 ]; then echo "All tests for VCFX_validator passed!" exit 0 From b8dbc8770c4d31751f6b5e255fc77487d867a76b Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:26:19 +0100 Subject: [PATCH 18/54] Integrate shell scripts with CTest --- .github/workflows/build-test.yml | 11 ++++- CMakeLists.txt | 5 +- README.md | 11 ++++- docs/CONTRIBUTING.md | 11 +++-- tests/CMakeLists.txt | 79 ++++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 10 deletions(-) create mode 100644 tests/CMakeLists.txt diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index e7495aea..9fea25be 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -18,6 +18,13 @@ jobs: sudo apt-get update sudo apt-get install -y build-essential cmake libz-dev - - name: Run shell tests + - name: Configure + run: cmake -S . -B build + + - name: Build + run: cmake --build build -- -j + + - name: Run tests run: | - bash tests/test_all.sh + cd build + ctest --output-on-failure diff --git a/CMakeLists.txt b/CMakeLists.txt index 17f372a2..20f51dca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,9 +53,8 @@ enable_testing() # Add top-level 'src' subdirectory, which in turn references each tool subdirectory add_subdirectory(src) -# Add a tests subdir if you have tests -# Comment out this line since we don't have a CMakeLists.txt file in the tests directory -# add_subdirectory(tests) +# Add the test suite +add_subdirectory(tests) # Installation configuration include(GNUInstallDirs) diff --git a/README.md b/README.md index 7e158764..906803db 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,16 @@ cmake --build . ## Running Tests +From your build directory, run: + +```bash +ctest --output-on-failure +``` + +You can also execute all shell scripts directly with: + ```bash -cd build -ctest --verbose +bash ../tests/test_all.sh ``` ## Contributing diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 9029abf4..ed264c7d 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -58,11 +58,16 @@ make ### Running Tests -After building the project, run the tests to ensure everything is working correctly: +After building the project, run the test suite from the `build` directory: ```bash -cd build -ctest --verbose +ctest --output-on-failure +``` + +You can still run all shell tests directly if needed: + +```bash +bash ../tests/test_all.sh ``` ## Coding Standards diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..558897dc --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,79 @@ +# CMake tests for VCFX shell scripts + +set(TEST_SCRIPTS + test_af_subsetter.sh + test_alignment_checker.sh + test_allele_balance_calc.sh + test_allele_balance_filter.sh + test_allele_counter.sh + test_allele_freq_calc.sh + test_ancestry_assigner.sh + test_ancestry_inferrer.sh + test_annotation_extractor.sh + test_compressor.sh + test_concordance_checker.sh + test_cross_sample_concordance.sh + test_custom_annotator.sh + test_diff_tool.sh + test_distance_calculator.sh + test_dosage_calculator.sh + test_duplicate_remover.sh + test_fasta_converter.sh + test_field_extractor.sh + test_file_splitter.sh + test_format_converter.sh + test_genotype_query.sh + test_gl_filter.sh + test_haplotype_extractor.sh + test_header_parser.sh + test_hwe_tester.sh + test_impact_filter.sh + test_indel_normalizer.sh + test_indexer.sh + test_info_aggregator.sh + test_info_summarizer.sh + test_inbreeding_calculator.sh + test_ld_calculator.sh + test_metadata_summarizer.sh + test_merger.sh + test_missing_data_handler.sh + test_missing_detector.sh + test_multiallelic_splitter.sh + test_nonref_filter.sh + test_outlier_detector.sh + test_phase_checker.sh + test_phase_quality_filter.sh + test_phred_filter.sh + test_population_filter.sh + test_position_subsetter.sh + test_probability_filter.sh + test_quality_adjuster.sh + test_record_filter.sh + test_ref_comparator.sh + test_reformatter.sh + test_region_subsampler.sh + test_sample_extractor.sh + test_sorter.sh + test_sv_handler.sh + test_subsampler.sh + test_validator.sh + test_variant_classifier.sh + test_variant_counter.sh +) + +foreach(script ${TEST_SCRIPTS}) + get_filename_component(name ${script} NAME_WE) + add_test(NAME ${name} + COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/${script}) + set_tests_properties(${name} PROPERTIES + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +endforeach() + +# Optional Docker-based tests +find_program(DOCKER_EXECUTABLE docker) +if(DOCKER_EXECUTABLE) + add_test(NAME test_docker + COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/test_docker.sh) + set_tests_properties(test_docker PROPERTIES + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +endif() From 0563fb225fdc9488bb2f0d8264f5ad95cfec4a8b Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:33:32 +0100 Subject: [PATCH 19/54] Improve Docker test script with local build fallback --- tests/test_docker.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 1bb1116d..18e5b9b6 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -23,9 +23,15 @@ fi echo "๐Ÿงฌ Testing VCFX Docker image with official test files..." # Pull the latest VCFX image -echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image..." -docker pull $VCFX_IMAGE -check_success "Pulled VCFX Docker image" +echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image ($VCFX_IMAGE)..." +if docker pull "$VCFX_IMAGE"; then + check_success "Pulled VCFX Docker image" +else + echo "โš ๏ธ Unable to pull $VCFX_IMAGE. Building Docker image locally..." + docker build -t vcfx:local . + check_success "Built local Docker image" + VCFX_IMAGE="vcfx:local" +fi # Get the directory of this script (tests directory) TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" From 5b47e82e016ff841e94f750ea635386c0538ec01 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:57:16 +0100 Subject: [PATCH 20/54] Fix Docker test input path --- tests/test_docker.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 18e5b9b6..2de967e7 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -4,6 +4,10 @@ # built image. VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgemfs/vcfx:latest}" +# Directory paths +TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +REPO_ROOT="$(dirname "$TESTS_DIR")" + # Function to check if command succeeded check_success() { if [ $? -ne 0 ]; then @@ -28,14 +32,11 @@ if docker pull "$VCFX_IMAGE"; then check_success "Pulled VCFX Docker image" else echo "โš ๏ธ Unable to pull $VCFX_IMAGE. Building Docker image locally..." - docker build -t vcfx:local . + docker build -t vcfx:local "${REPO_ROOT}" check_success "Built local Docker image" VCFX_IMAGE="vcfx:local" fi -# Get the directory of this script (tests directory) -TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" - echo "๐Ÿ” Using test files from: ${TESTS_DIR}" # Create temporary output directory in tests/out @@ -56,7 +57,7 @@ check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/simple.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test From e051302c583cf7b19f8f580be65894e7971cac25 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 00:09:28 +0100 Subject: [PATCH 21/54] Add vcfx wrapper --- compile_wasm.sh | 2 +- docs/tools_overview.md | 2 + src/CMakeLists.txt | 2 + src/vcfx_wrapper/CMakeLists.txt | 1 + src/vcfx_wrapper/vcfx.cpp | 97 +++++++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 src/vcfx_wrapper/CMakeLists.txt create mode 100644 src/vcfx_wrapper/vcfx.cpp diff --git a/compile_wasm.sh b/compile_wasm.sh index e0a5a132..25d8cd6a 100644 --- a/compile_wasm.sh +++ b/compile_wasm.sh @@ -13,5 +13,5 @@ fi cmake --build . -echo "All VCFX tools built for WebAssembly in build_wasm/." +echo "All VCFX tools and the vcfx wrapper built for WebAssembly in build_wasm/." echo "Use 'ls -R build_wasm' to see output. If you want .html or .js from Emscripten, you can adjust linking flags or suffixes." diff --git a/docs/tools_overview.md b/docs/tools_overview.md index 76c3b152..898124b8 100644 --- a/docs/tools_overview.md +++ b/docs/tools_overview.md @@ -2,6 +2,8 @@ VCFX is a collection of C/C++ tools for processing and analyzing VCF (Variant Call Format) files, with optional WebAssembly compatibility. Each tool is an independent command-line executable that can parse input from `stdin` and write to `stdout`, enabling flexible piping and integration into bioinformatics pipelines. +The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` to see available subcommands. All individual `VCFX_*` binaries remain available if you prefer calling them directly. + ## Tool Categories ### Data Analysis diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ed756a9f..2fadba0a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,7 @@ target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB) # Add all tool subdirectories +add_subdirectory(vcfx_wrapper) add_subdirectory(VCFX_header_parser) add_subdirectory(VCFX_record_filter) add_subdirectory(VCFX_field_extractor) @@ -74,6 +75,7 @@ install(TARGETS vcfx_core # Define a list of all tool executables for installation set(VCFX_TOOLS + vcfx VCFX_header_parser VCFX_record_filter VCFX_field_extractor diff --git a/src/vcfx_wrapper/CMakeLists.txt b/src/vcfx_wrapper/CMakeLists.txt new file mode 100644 index 00000000..d9824862 --- /dev/null +++ b/src/vcfx_wrapper/CMakeLists.txt @@ -0,0 +1 @@ +add_executable(vcfx vcfx.cpp) diff --git a/src/vcfx_wrapper/vcfx.cpp b/src/vcfx_wrapper/vcfx.cpp new file mode 100644 index 00000000..70093a13 --- /dev/null +++ b/src/vcfx_wrapper/vcfx.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void print_usage(){ + std::cout << "vcfx - unified interface for VCFX tools\n" + << "Usage: vcfx [--help] [--list] [args]\n\n" + << " Name of a VCFX tool without the 'VCFX_' prefix\n" + << " --list List available subcommands found in PATH\n" + << " --help Show this help message\n"; +} + +static void list_commands(){ + const char* path_env = std::getenv("PATH"); + if(!path_env) return; + std::string paths(path_env); + std::set cmds; + size_t start=0; + while(true){ + size_t end = paths.find(':', start); + std::string dir = paths.substr(start, end - start); + DIR* d = opendir(dir.c_str()); + if(d){ + struct dirent* e; + while((e = readdir(d)) != nullptr){ + if(std::strncmp(e->d_name, "VCFX_", 5)==0){ + std::string name = e->d_name + 5; + std::string full = dir + "/" + e->d_name; + if(access(full.c_str(), X_OK)==0){ + cmds.insert(name); + } + } + } + closedir(d); + } + if(end == std::string::npos) break; + start = end + 1; + } + for(const auto& c : cmds){ + std::cout << c << '\n'; + } +} + +int main(int argc, char* argv[]){ + bool show_help = false; + bool show_list = false; + static struct option long_opts[] = { + {"help", no_argument, 0, 'h'}, + {"list", no_argument, 0, 'l'}, + {0,0,0,0} + }; + + int opt; + while((opt = getopt_long(argc, argv, "hl", long_opts, nullptr)) != -1){ + if(opt == 'h') show_help = true; + else if(opt == 'l') show_list = true; + else { + print_usage(); + return 1; + } + } + + if(show_help){ + print_usage(); + return 0; + } + if(show_list){ + list_commands(); + return 0; + } + + if(optind >= argc){ + print_usage(); + return 1; + } + + std::string sub = argv[optind]; + std::string exec_name = "VCFX_" + sub; + + std::vector exec_args; + exec_args.push_back(const_cast(exec_name.c_str())); + for(int i = optind + 1; i < argc; ++i){ + exec_args.push_back(argv[i]); + } + exec_args.push_back(nullptr); + + execvp(exec_name.c_str(), exec_args.data()); + std::perror(exec_name.c_str()); + return 1; +} + From 9de9437093063601c02eefb18a48655bc0c629ed Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 00:19:25 +0100 Subject: [PATCH 22/54] Add Docker entrypoint and path setup --- Dockerfile | 13 ++++++------ add_vcfx_tools_to_path.sh | 44 +++++++++++++++++++++------------------ docker_entrypoint.sh | 10 +++++++++ 3 files changed, 41 insertions(+), 26 deletions(-) create mode 100755 docker_entrypoint.sh diff --git a/Dockerfile b/Dockerfile index be69319f..857fa1fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,14 +46,15 @@ COPY --from=builder /app/build/src /usr/local/bin/ # Create a directory for data WORKDIR /data -# Add the script that adds tools to PATH +# Add the helper scripts COPY add_vcfx_tools_to_path.sh /usr/local/bin/ +COPY docker_entrypoint.sh /usr/local/bin/ -# Make the script executable -RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh +# Make them executable +RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh /usr/local/bin/docker_entrypoint.sh -# Set the entry point -ENTRYPOINT ["/bin/bash", "-c"] +# Use a custom entrypoint that sets up PATH for the tools +ENTRYPOINT ["/usr/local/bin/docker_entrypoint.sh"] # Default command shows available tools -CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] +CMD ["bash", "-c", "echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] diff --git a/add_vcfx_tools_to_path.sh b/add_vcfx_tools_to_path.sh index 0a68c655..43a647b0 100644 --- a/add_vcfx_tools_to_path.sh +++ b/add_vcfx_tools_to_path.sh @@ -6,37 +6,41 @@ # Usage: # source ./add_vcfx_tools_to_path.sh -# Where is the root of this script? (i.e., your VCFX repository root) -# Adjust if needed; for example if you keep this script in the top-level dir: +# Determine potential base directories that may contain VCFX tools. +# When running from the build tree this will be build/src, but inside the +# Docker image the tools reside in /usr/local/bin/VCFX_*/. REPO_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -# Our compiled tools should be under build/src +BASE_DIRS=() BUILD_SRC_DIR="${REPO_ROOT}/build/src" +if [ -d "${BUILD_SRC_DIR}" ]; then + BASE_DIRS+=("${BUILD_SRC_DIR}") +fi -# Check that this path exists: -if [ ! -d "${BUILD_SRC_DIR}" ]; then - echo "Error: build/src directory not found at: ${BUILD_SRC_DIR}" - echo "Make sure you have run 'cmake .. && make' inside ./build" - return 1 +# Also check the standard installation prefix used in the Docker image +if compgen -G "/usr/local/bin/VCFX_*" > /dev/null; then + BASE_DIRS+=("/usr/local/bin") fi -# We'll gather a list of directories under build/src/VCFX_* -# that actually contain an executable matching the pattern "VCFX_*" -# Then add those directories to PATH. +if [ ${#BASE_DIRS[@]} -eq 0 ]; then + echo "Warning: No VCFX tool directories found." + return 1 +fi +# Gather directories containing executables named VCFX_* TOOL_DIRS="" -while IFS= read -r -d '' toolExec; do - # 'toolExec' is something like: build/src/VCFX_af_subsetter/VCFX_af_subsetter - toolDir=$(dirname "$toolExec") - # Only add it once if not present - if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then - TOOL_DIRS="${TOOL_DIRS}:${toolDir}" - fi -done < <(find "${BUILD_SRC_DIR}" -type f -perm /111 -name 'VCFX_*' -print0 2>/dev/null) +for base in "${BASE_DIRS[@]}"; do + while IFS= read -r -d '' toolExec; do + toolDir=$(dirname "$toolExec") + if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then + TOOL_DIRS="${TOOL_DIRS}:${toolDir}" + fi + done < <(find "$base" -type f -perm /111 -name 'VCFX_*' -print0 2>/dev/null) +done # If empty (no tools found), bail out if [ -z "$TOOL_DIRS" ]; then - echo "Warning: No VCFX tools found in ${BUILD_SRC_DIR}. Did you run 'make'?" + echo "Warning: No VCFX tools found." else # Remove leading colon TOOL_DIRS="${TOOL_DIRS#:}" diff --git a/docker_entrypoint.sh b/docker_entrypoint.sh new file mode 100755 index 00000000..18f1316a --- /dev/null +++ b/docker_entrypoint.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Entrypoint for VCFX Docker image. +# It adds VCFX tool directories to the PATH and then executes the given command. + +# Source the helper script if available +if [ -f /usr/local/bin/add_vcfx_tools_to_path.sh ]; then + source /usr/local/bin/add_vcfx_tools_to_path.sh +fi + +exec "$@" From 594f332bb0a97edc9c9421d3c56725063a3ee7b7 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 00:53:19 +0100 Subject: [PATCH 23/54] fix docker tests to use bash --- tests/test_docker.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 2de967e7..58f9158d 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -46,36 +46,36 @@ check_success "Created temporary output directory" # Test 1: List available tools echo "๐Ÿ“‹ Listing available VCFX tools..." -docker run --rm $VCFX_IMAGE 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' +docker run --rm $VCFX_IMAGE bash -c 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' check_success "Listed available tools" # Test 2: Validator test echo "๐Ÿ” Testing VCFX_validator..." -docker run --rm -v "${TESTS_DIR}:/tests" $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator' +docker run --rm -v "${TESTS_DIR}:/tests" $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_validator' check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/simple.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + $VCFX_IMAGE bash -c 'cat /tests/data/allele_freq_calc/simple.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test echo "๐Ÿ‘ฅ Testing VCFX_sample_extractor..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' + $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' check_success "Extracted sample" # Test 5: Variant classifier test echo "๐Ÿ”ฌ Testing VCFX_variant_classifier..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' + $VCFX_IMAGE bash -c 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' check_success "Classified variants" # Test 6: Testing a pipeline of commands echo "๐Ÿ”„ Testing a pipeline of VCFX tools..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' + $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' check_success "Executed pipeline of tools" echo "๐ŸŽ‰ All Docker tests completed successfully!" From 22052bdf724f0c3ce874062df5d964a7fe1190c4 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 10:52:24 +0100 Subject: [PATCH 24/54] Fix validator passthrough --- docs/VCFX_validator.md | 8 ++++++-- site/VCFX_validator/index.html | 5 +++-- src/VCFX_validator/VCFX_validator.cpp | 7 ++++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/VCFX_validator.md b/docs/VCFX_validator.md index 3b850697..e3d8bcac 100644 --- a/docs/VCFX_validator.md +++ b/docs/VCFX_validator.md @@ -75,15 +75,19 @@ When `--strict` is used, additional checks are applied: ### Basic Validation Check if a VCF file is valid: ```bash -VCFX_validator < input.vcf +VCFX_validator < input.vcf > validated.vcf ``` ### Using Strict Mode Enable stricter validation with additional checks: ```bash -VCFX_validator --strict < input.vcf +VCFX_validator --strict < input.vcf > validated.vcf ``` +When the input is valid, the original VCF is written unchanged to standard output, +allowing `VCFX_validator` to be used as a filter in processing pipelines. Informational +messages such as `VCF file is valid.` are printed to standard error. + ### Redirecting Error Messages Save validation errors to a file: ```bash diff --git a/site/VCFX_validator/index.html b/site/VCFX_validator/index.html index 68d9582c..f68f8549 100644 --- a/site/VCFX_validator/index.html +++ b/site/VCFX_validator/index.html @@ -2884,12 +2884,13 @@

Data LinesExamples

Basic Validation

Check if a VCF file is valid: -

VCFX_validator < input.vcf
+
VCFX_validator < input.vcf > validated.vcf
 

Using Strict Mode

Enable stricter validation (note: additional strict checks are reserved for future implementation): -

VCFX_validator --strict < input.vcf
+
VCFX_validator --strict < input.vcf > validated.vcf
 

+

When the input is valid, the VCF contents are echoed to standard output so the tool can be used in pipelines. Informational messages such as VCF file is valid. are written to standard error.

Redirecting Error Messages

Save validation errors to a file:

VCFX_validator < input.vcf 2> validation_errors.txt
diff --git a/src/VCFX_validator/VCFX_validator.cpp b/src/VCFX_validator/VCFX_validator.cpp
index bc5f1c90..c360636b 100644
--- a/src/VCFX_validator/VCFX_validator.cpp
+++ b/src/VCFX_validator/VCFX_validator.cpp
@@ -260,6 +260,7 @@ bool VCFXValidator::validateVCF(std::istream &in){
     std::string line;
     int lineNum=0;
     bool foundChromLine= false;
+    std::vector lines;
 
     while(true){
         if(!std::getline(in, line)) break;
@@ -290,12 +291,16 @@ bool VCFXValidator::validateVCF(std::istream &in){
             }
             if(!validateDataLine(line, lineNum)) return false;
         }
+        lines.push_back(line);
     }
     if(!foundChromLine){
         std::cerr<<"Error: no #CHROM line found in file.\n";
         return false;
     }
-    std::cout<<"VCF file is valid.\n";
+    for(const auto &l : lines){
+        std::cout << l << '\n';
+    }
+    std::cerr<<"VCF file is valid.\n";
     return true;
 }
 

From 83ef866f40d3c1d9eb439b784a0cba453764af66 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 11:34:04 +0100
Subject: [PATCH 25/54] Simplify tool discovery in CMake

---
 src/CMakeLists.txt | 136 ++++-----------------------------------------
 1 file changed, 11 insertions(+), 125 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2fadba0a..ef6dc6ef 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -7,137 +7,23 @@ target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB)
 
 # Add all tool subdirectories
 add_subdirectory(vcfx_wrapper)
-add_subdirectory(VCFX_header_parser)
-add_subdirectory(VCFX_record_filter)
-add_subdirectory(VCFX_field_extractor)
-add_subdirectory(VCFX_format_converter)
-add_subdirectory(VCFX_variant_counter)
-add_subdirectory(VCFX_sample_extractor)
-add_subdirectory(VCFX_sorter)
-add_subdirectory(VCFX_validator)
-add_subdirectory(VCFX_subsampler)
-add_subdirectory(VCFX_genotype_query)
-add_subdirectory(VCFX_allele_freq_calc)
-add_subdirectory(VCFX_indexer)
-add_subdirectory(VCFX_compressor)
-add_subdirectory(VCFX_position_subsetter)
-add_subdirectory(VCFX_haplotype_extractor)
-add_subdirectory(VCFX_info_parser)
-add_subdirectory(VCFX_variant_classifier)
-add_subdirectory(VCFX_duplicate_remover)
-add_subdirectory(VCFX_info_summarizer)
-add_subdirectory(VCFX_distance_calculator)
-add_subdirectory(VCFX_multiallelic_splitter)
-add_subdirectory(VCFX_missing_data_handler)
-add_subdirectory(VCFX_concordance_checker)
-add_subdirectory(VCFX_allele_balance_calc)
-add_subdirectory(VCFX_allele_counter)
-add_subdirectory(VCFX_phase_checker)
-add_subdirectory(VCFX_annotation_extractor)
-add_subdirectory(VCFX_phred_filter)
-add_subdirectory(VCFX_merger)
-add_subdirectory(VCFX_metadata_summarizer)
-add_subdirectory(VCFX_hwe_tester)
-add_subdirectory(VCFX_fasta_converter)
-add_subdirectory(VCFX_nonref_filter)
-add_subdirectory(VCFX_dosage_calculator)
-add_subdirectory(VCFX_population_filter)
-add_subdirectory(VCFX_file_splitter)
-add_subdirectory(VCFX_gl_filter)
-add_subdirectory(VCFX_ref_comparator)
-add_subdirectory(VCFX_ancestry_inferrer)
-add_subdirectory(VCFX_impact_filter)
-add_subdirectory(VCFX_info_aggregator)
-add_subdirectory(VCFX_probability_filter)
-add_subdirectory(VCFX_diff_tool)
-add_subdirectory(VCFX_cross_sample_concordance)
-add_subdirectory(VCFX_phase_quality_filter)
-add_subdirectory(VCFX_indel_normalizer)
-add_subdirectory(VCFX_custom_annotator)
-add_subdirectory(VCFX_region_subsampler)
-add_subdirectory(VCFX_allele_balance_filter)
-add_subdirectory(VCFX_missing_detector)
-add_subdirectory(VCFX_haplotype_phaser)
-add_subdirectory(VCFX_af_subsetter)
-add_subdirectory(VCFX_sv_handler)
-add_subdirectory(VCFX_reformatter)
-add_subdirectory(VCFX_quality_adjuster)
-add_subdirectory(VCFX_inbreeding_calculator)
-add_subdirectory(VCFX_outlier_detector)
-add_subdirectory(VCFX_alignment_checker)
-add_subdirectory(VCFX_ancestry_assigner)
-add_subdirectory(VCFX_ld_calculator)
+
+# Automatically detect tool directories named "VCFX_*" and
+# build/install them.
+set(VCFX_TOOLS vcfx)
+file(GLOB TOOL_DIRS RELATIVE ${CMAKE_CURRENT_LIST_DIR} VCFX_*)
+foreach(dir ${TOOL_DIRS})
+    if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${dir}")
+        add_subdirectory(${dir})
+        list(APPEND VCFX_TOOLS ${dir})
+    endif()
+endforeach()
 
 # Install the core library
 install(TARGETS vcfx_core
         ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 )
 
-# Define a list of all tool executables for installation
-set(VCFX_TOOLS
-    vcfx
-    VCFX_header_parser
-    VCFX_record_filter
-    VCFX_field_extractor
-    VCFX_format_converter
-    VCFX_variant_counter
-    VCFX_sample_extractor
-    VCFX_sorter
-    VCFX_validator
-    VCFX_subsampler
-    VCFX_genotype_query
-    VCFX_allele_freq_calc
-    VCFX_indexer
-    VCFX_compressor
-    VCFX_position_subsetter
-    VCFX_haplotype_extractor
-    VCFX_info_parser
-    VCFX_variant_classifier
-    VCFX_duplicate_remover
-    VCFX_info_summarizer
-    VCFX_distance_calculator
-    VCFX_multiallelic_splitter
-    VCFX_missing_data_handler
-    VCFX_concordance_checker
-    VCFX_allele_balance_calc
-    VCFX_allele_counter
-    VCFX_phase_checker
-    VCFX_annotation_extractor
-    VCFX_phred_filter
-    VCFX_merger
-    VCFX_metadata_summarizer
-    VCFX_hwe_tester
-    VCFX_fasta_converter
-    VCFX_nonref_filter
-    VCFX_dosage_calculator
-    VCFX_population_filter
-    VCFX_file_splitter
-    VCFX_gl_filter
-    VCFX_ref_comparator
-    VCFX_ancestry_inferrer
-    VCFX_impact_filter
-    VCFX_info_aggregator
-    VCFX_probability_filter
-    VCFX_diff_tool
-    VCFX_cross_sample_concordance
-    VCFX_phase_quality_filter
-    VCFX_indel_normalizer
-    VCFX_custom_annotator
-    VCFX_region_subsampler
-    VCFX_allele_balance_filter
-    VCFX_missing_detector
-    VCFX_haplotype_phaser
-    VCFX_af_subsetter
-    VCFX_sv_handler
-    VCFX_reformatter
-    VCFX_quality_adjuster
-    VCFX_inbreeding_calculator
-    VCFX_outlier_detector
-    VCFX_alignment_checker
-    VCFX_ancestry_assigner
-    VCFX_ld_calculator
-)
-
 # Install all tool executables
 install(TARGETS ${VCFX_TOOLS}
         RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}

From 7144d0ada18557ec0c3fb1a91604a7c3835b2ae5 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 13:10:33 +0100
Subject: [PATCH 26/54] Fix macOS test failures

---
 .github/workflows/build-test.yml | 23 +++++++++++++++++++++--
 tests/test_custom_annotator.sh   | 12 ++++++++++--
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 9fea25be..95820fb3 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -8,23 +8,42 @@ on:
 
 jobs:
   build-and-test:
-    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+    runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
 
-      - name: Install dependencies
+      - name: Install dependencies (Linux)
+        if: runner.os == 'Linux'
         run: |
           sudo apt-get update
           sudo apt-get install -y build-essential cmake libz-dev
 
+      - name: Install dependencies (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew update
+          brew install cmake zlib bash
+          echo "$(brew --prefix)/bin" >> $GITHUB_PATH
+
+      - name: Install dependencies (Windows)
+        if: runner.os == 'Windows'
+        run: |
+          choco install -y cmake zlib
+
       - name: Configure
         run: cmake -S . -B build
+        shell: bash
 
       - name: Build
         run: cmake --build build -- -j
+        shell: bash
 
       - name: Run tests
         run: |
           cd build
           ctest --output-on-failure
+        shell: bash
diff --git a/tests/test_custom_annotator.sh b/tests/test_custom_annotator.sh
index ce42c649..cf7b45e8 100755
--- a/tests/test_custom_annotator.sh
+++ b/tests/test_custom_annotator.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Exit on error
 set -e
@@ -125,11 +125,19 @@ for i in $(seq 1 1000); do
     echo "1	$i	A	G	Annotation$i"
 done > "$SCRIPT_DIR/data/large_annotations.txt"
 # Add VCF header
-sed -i '1i\
+if [[ "$(uname)" == "Darwin" ]]; then
+    sed -i '' '1i\
 ##fileformat=VCFv4.2\
 ##contig=\
 #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE1\
 ' "$SCRIPT_DIR/data/large_input.vcf"
+else
+    sed -i '1i\
+##fileformat=VCFv4.2\
+##contig=\
+#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE1\
+' "$SCRIPT_DIR/data/large_input.vcf"
+fi
 
 time "$ROOT_DIR/build/src/VCFX_custom_annotator/VCFX_custom_annotator" --add-annotation "$SCRIPT_DIR/data/large_annotations.txt" < "$SCRIPT_DIR/data/large_input.vcf" > "$SCRIPT_DIR/data/large_output.vcf"
 if [ $? -eq 0 ]; then

From 5d7a1e8899c2c439a24dc9e5d572a6c44ca62615 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 15:27:25 +0100
Subject: [PATCH 27/54] fix: avoid duplicate add_subdirectory on
 case-insensitive FS

---
 src/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ef6dc6ef..4816d1fc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -12,6 +12,7 @@ add_subdirectory(vcfx_wrapper)
 # build/install them.
 set(VCFX_TOOLS vcfx)
 file(GLOB TOOL_DIRS RELATIVE ${CMAKE_CURRENT_LIST_DIR} VCFX_*)
+list(REMOVE_ITEM TOOL_DIRS vcfx_wrapper) # avoid duplicate on case-insensitive filesystems
 foreach(dir ${TOOL_DIRS})
     if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${dir}")
         add_subdirectory(${dir})

From 0c7d31543bd6ab1baf8cc874e857b4f7d3a616eb Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 16:29:11 +0100
Subject: [PATCH 28/54] Fix Windows getopt

---
 .github/workflows/build-test.yml | 11 +++++++++--
 src/CMakeLists.txt               |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 95820fb3..3863410f 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -31,12 +31,19 @@ jobs:
 
       - name: Install dependencies (Windows)
         if: runner.os == 'Windows'
+        shell: pwsh
         run: |
-          choco install -y cmake zlib
+          choco install -y cmake
+          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib getopt-win32
 
       - name: Configure
-        run: cmake -S . -B build
         shell: bash
+        run: |
+          if [ "$RUNNER_OS" = "Windows" ]; then
+            cmake -S . -B build -DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
+          else
+            cmake -S . -B build
+          fi
 
       - name: Build
         run: cmake --build build -- -j
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4816d1fc..93d6adfc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.14)
 add_library(vcfx_core STATIC vcfx_core.cpp)
 target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include)
 target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB)
+if(WIN32)
+    target_link_libraries(vcfx_core PUBLIC getopt)
+endif()
 
 # Add all tool subdirectories
 add_subdirectory(vcfx_wrapper)

From 9fb2be02408906df5cb1044232f4608437201a12 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 16:56:16 +0100
Subject: [PATCH 29/54] fix: use cmake parallel build

---
 .github/workflows/build-test.yml | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 95820fb3..4124f850 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -31,15 +31,22 @@ jobs:
 
       - name: Install dependencies (Windows)
         if: runner.os == 'Windows'
+        shell: pwsh
         run: |
-          choco install -y cmake zlib
+          choco install -y cmake
+          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib
 
       - name: Configure
-        run: cmake -S . -B build
         shell: bash
+        run: |
+          if [ "$RUNNER_OS" = "Windows" ]; then
+            cmake -S . -B build -DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
+          else
+            cmake -S . -B build
+          fi
 
       - name: Build
-        run: cmake --build build -- -j
+        run: cmake --build build --parallel
         shell: bash
 
       - name: Run tests

From b8a6b8fc36ecfb0a2d06ad08537affaf3dfc8f59 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 17:10:32 +0100
Subject: [PATCH 30/54] Fix Windows build by installing getopt

---
 .github/workflows/build-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index fec9b043..2bb744d3 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -34,7 +34,7 @@ jobs:
         shell: pwsh
         run: |
           choco install -y cmake
-          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib
+          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib getopt
 
 
       - name: Configure

From 06803785ff73af78142905bd1527363b1100b130 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 17:45:21 +0100
Subject: [PATCH 31/54] Remove Windows references

---
 .github/workflows/build-test.yml | 18 ++----------------
 docs/VCFX_header_parser.md       |  2 +-
 docs/VCFX_indexer.md             |  2 +-
 docs/VCFX_info_aggregator.md     |  2 +-
 docs/VCFX_info_parser.md         |  2 +-
 tests/test_indexer.sh            |  4 ++--
 tests/test_indexer_fixed.sh      |  4 ++--
 tests/test_info_aggregator.sh    |  4 ++--
 tests/test_info_parser.sh        |  2 +-
 tests/test_info_summarizer.sh    |  2 +-
 10 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 2bb744d3..a95241db 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -10,7 +10,7 @@ jobs:
   build-and-test:
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout repository
@@ -29,23 +29,9 @@ jobs:
           brew install cmake zlib bash
           echo "$(brew --prefix)/bin" >> $GITHUB_PATH
 
-      - name: Install dependencies (Windows)
-        if: runner.os == 'Windows'
-        shell: pwsh
-        run: |
-          choco install -y cmake
-          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib getopt
-
-
       - name: Configure
+        run: cmake -S . -B build
         shell: bash
-        run: |
-          if [ "$RUNNER_OS" = "Windows" ]; then
-            cmake -S . -B build -DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
-          else
-            cmake -S . -B build
-          fi
-
       - name: Build
         run: cmake --build build --parallel
         shell: bash
diff --git a/docs/VCFX_header_parser.md b/docs/VCFX_header_parser.md
index 8848873f..9706f9cd 100644
--- a/docs/VCFX_header_parser.md
+++ b/docs/VCFX_header_parser.md
@@ -87,7 +87,7 @@ The tool implements simple strategies for handling edge cases:
 1. **Empty files**: If the input file is empty, no output is produced
 2. **Files without headers**: If the file has no header lines, no output is produced
 3. **Malformed headers**: All lines starting with "#" are considered header lines, even if they don't follow VCF specifications
-4. **Line endings**: Both Unix (LF) and Windows (CRLF) line endings are handled correctly
+4. **Line endings**: LF and CRLF line endings are handled correctly
 5. **Partial headers**: If the file ends in the middle of the header section, all header lines up to that point are output
 
 ## Performance
diff --git a/docs/VCFX_indexer.md b/docs/VCFX_indexer.md
index 7d85ddf0..7be8c2c9 100644
--- a/docs/VCFX_indexer.md
+++ b/docs/VCFX_indexer.md
@@ -71,7 +71,7 @@ tail -c +23456 input.vcf | head -1
 
 ### File Format Detection
 
-- The tool automatically handles both Unix (LF) and Windows (CRLF) line endings
+- The tool automatically handles LF and CRLF line endings
 - Byte offsets are calculated correctly regardless of the line ending style
 
 ### Malformed VCF Files
diff --git a/docs/VCFX_info_aggregator.md b/docs/VCFX_info_aggregator.md
index 1f7857ef..a1debf52 100644
--- a/docs/VCFX_info_aggregator.md
+++ b/docs/VCFX_info_aggregator.md
@@ -79,7 +79,7 @@ The tool implements several strategies for handling edge cases:
 2. **Missing fields**: If a specified INFO field is not present in a particular variant, it is simply skipped for that variant.
 3. **Empty input**: The tool will process empty files correctly, reporting zeros for sums and averages.
 4. **Malformed VCF**: If a data line is encountered before the `#CHROM` header, an error is reported.
-5. **Line endings**: The tool correctly handles both Unix (LF) and Windows (CRLF) line endings.
+5. **Line endings**: The tool correctly handles LF and CRLF line endings.
 6. **Partial final line**: The tool properly processes files that do not end with a newline character.
 
 ## Performance
diff --git a/docs/VCFX_info_parser.md b/docs/VCFX_info_parser.md
index a93a64a8..9a4e8e64 100644
--- a/docs/VCFX_info_parser.md
+++ b/docs/VCFX_info_parser.md
@@ -82,7 +82,7 @@ The tool implements several strategies for handling edge cases:
 3. **Malformed lines**: Lines that don't conform to VCF format are skipped with a warning message
 4. **Empty input**: The tool correctly handles empty input files
 5. **Header lines**: VCF header lines (starting with #) are skipped
-6. **Line endings**: Both Unix (LF) and Windows (CRLF) line endings are supported
+6. **Line endings**: LF and CRLF line endings are supported
 7. **Partial final line**: Files without a final newline character are processed correctly
 
 ## Performance
diff --git a/tests/test_indexer.sh b/tests/test_indexer.sh
index 61d080e4..68626e08 100755
--- a/tests/test_indexer.sh
+++ b/tests/test_indexer.sh
@@ -160,9 +160,9 @@ fi
 echo "โœ“ Test 6 passed"
 
 ###############################################################################
-# Test 7: Windows (CRLF) line endings
+# Test 7: CRLF line endings
 ###############################################################################
-echo "Test 7: Windows CRLF line endings"
+echo "Test 7: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/indexer/crlf_unix.vcf" < "${SCRIPT_DIR}/data/indexer/crlf_unix.vcf" << 'EOF'
 ##fileformat=VCFv4.2
diff --git a/tests/test_info_aggregator.sh b/tests/test_info_aggregator.sh
index 3e9bb472..3926f23a 100755
--- a/tests/test_info_aggregator.sh
+++ b/tests/test_info_aggregator.sh
@@ -178,9 +178,9 @@ echo "โœ“ Test 7 passed"
 
 
 ###############################################################################
-# Test 8: Windows CRLF line endings
+# Test 8: CRLF line endings
 ###############################################################################
-echo "Test 8: Windows CRLF line endings"
+echo "Test 8: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/aggregator/crlf_unix.vcf" << EOF
 ##fileformat=VCFv4.2
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/test_info_parser.sh b/tests/test_info_parser.sh
index e6efdba5..af431d65 100755
--- a/tests/test_info_parser.sh
+++ b/tests/test_info_parser.sh
@@ -172,7 +172,7 @@ grep -q "^1[[:space:]]200" "${SCRIPT_DIR}/data/info_parser/invalid.out" || (echo
 echo "โœ“ Test 7 passed"
 
 ###############################################################################
-# Test 8: Windows CRLF line endings
+# Test 8: CRLF line endings
 ###############################################################################
 echo "Test 8: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/info_parser/crlf_unix.vcf" <
Date: Fri, 23 May 2025 18:09:33 +0100
Subject: [PATCH 32/54] feat: add --version support across tools

---
 .clang-format                                 |  3 +++
 CMakeLists.txt                                |  2 ++
 docs/CONTRIBUTING.md                          | 12 +++++++++++
 docs/VCFX_af_subsetter.md                     |  1 +
 docs/VCFX_alignment_checker.md                |  1 +
 docs/VCFX_allele_balance_calc.md              |  1 +
 docs/VCFX_allele_balance_filter.md            |  1 +
 docs/VCFX_allele_counter.md                   |  1 +
 docs/VCFX_ancestry_assigner.md                |  1 +
 docs/VCFX_ancestry_inferrer.md                |  1 +
 docs/VCFX_annotation_extractor.md             |  1 +
 docs/VCFX_compressor.md                       |  1 +
 docs/VCFX_concordance_checker.md              |  1 +
 docs/VCFX_cross_sample_concordance.md         |  1 +
 docs/VCFX_custom_annotator.md                 |  1 +
 docs/VCFX_diff_tool.md                        |  1 +
 docs/VCFX_distance_calculator.md              |  1 +
 docs/VCFX_dosage_calculator.md                |  1 +
 docs/VCFX_duplicate_remover.md                |  1 +
 docs/VCFX_fasta_converter.md                  |  1 +
 docs/VCFX_field_extractor.md                  |  1 +
 docs/VCFX_file_splitter.md                    |  1 +
 docs/VCFX_gl_filter.md                        |  1 +
 docs/VCFX_haplotype_extractor.md              |  1 +
 docs/VCFX_haplotype_phaser.md                 |  1 +
 docs/VCFX_header_parser.md                    |  1 +
 docs/VCFX_hwe_tester.md                       |  1 +
 docs/VCFX_impact_filter.md                    |  1 +
 docs/VCFX_inbreeding_calculator.md            |  1 +
 docs/VCFX_indel_normalizer.md                 |  1 +
 docs/VCFX_indexer.md                          |  1 +
 docs/VCFX_info_aggregator.md                  |  1 +
 docs/VCFX_info_parser.md                      |  1 +
 docs/VCFX_info_summarizer.md                  |  1 +
 docs/VCFX_ld_calculator.md                    |  1 +
 docs/VCFX_missing_detector.md                 |  1 +
 docs/VCFX_nonref_filter.md                    |  1 +
 docs/VCFX_phase_checker.md                    |  1 +
 docs/VCFX_phred_filter.md                     |  1 +
 docs/VCFX_population_filter.md                |  1 +
 docs/VCFX_position_subsetter.md               |  1 +
 docs/VCFX_quality_adjuster.md                 |  1 +
 docs/VCFX_record_filter.md                    |  1 +
 docs/VCFX_ref_comparator.md                   |  1 +
 docs/VCFX_sample_extractor.md                 |  1 +
 docs/VCFX_sorter.md                           |  1 +
 docs/VCFX_sv_handler.md                       |  1 +
 docs/VCFX_validator.md                        |  1 +
 docs/VCFX_variant_classifier.md               |  1 +
 docs/VCFX_variant_counter.md                  |  1 +
 docs/tools_overview.md                        |  1 +
 include/vcfx_core.h                           | 21 +++++++++++++++++++
 src/VCFX_af_subsetter/VCFX_af_subsetter.cpp   |  2 ++
 .../VCFX_alignment_checker.cpp                |  2 ++
 .../VCFX_allele_balance_calc.cpp              |  2 ++
 .../VCFX_allele_balance_filter.cpp            |  2 ++
 .../VCFX_allele_counter.cpp                   |  2 ++
 .../VCFX_allele_freq_calc.cpp                 |  2 ++
 .../VCFX_ancestry_assigner.cpp                |  2 ++
 .../VCFX_ancestry_inferrer.cpp                |  2 ++
 .../VCFX_annotation_extractor.cpp             |  2 ++
 src/VCFX_compressor/VCFX_compressor.cpp       |  2 ++
 .../VCFX_concordance_checker.cpp              |  2 ++
 .../VCFX_cross_sample_concordance.cpp         |  2 ++
 .../VCFX_custom_annotator.cpp                 |  2 ++
 src/VCFX_diff_tool/VCFX_diff_tool.cpp         |  2 ++
 .../VCFX_distance_calculator.cpp              |  2 ++
 .../VCFX_dosage_calculator.cpp                |  2 ++
 .../VCFX_duplicate_remover.cpp                |  2 ++
 .../VCFX_fasta_converter.cpp                  |  2 ++
 .../VCFX_field_extractor.cpp                  |  2 ++
 src/VCFX_file_splitter/VCFX_file_splitter.cpp |  2 ++
 .../VCFX_format_converter.cpp                 |  2 ++
 .../VCFX_genotype_query.cpp                   |  2 ++
 src/VCFX_gl_filter/VCFX_gl_filter.cpp         |  2 ++
 .../VCFX_haplotype_extractor.cpp              |  2 ++
 .../VCFX_haplotype_phaser.cpp                 |  2 ++
 src/VCFX_header_parser/VCFX_header_parser.cpp |  2 ++
 src/VCFX_hwe_tester/VCFX_hwe_tester.cpp       |  2 ++
 src/VCFX_impact_filter/VCFX_impact_filter.cpp |  2 ++
 .../VCFX_inbreeding_calculator.cpp            |  2 ++
 .../VCFX_indel_normalizer.cpp                 |  2 ++
 src/VCFX_indexer/VCFX_indexer.cpp             |  2 ++
 .../VCFX_info_aggregator.cpp                  |  2 ++
 src/VCFX_info_parser/VCFX_info_parser.cpp     |  2 ++
 .../VCFX_info_summarizer.cpp                  |  2 ++
 src/VCFX_ld_calculator/VCFX_ld_calculator.cpp |  2 ++
 src/VCFX_merger/VCFX_merger.cpp               |  2 ++
 .../VCFX_metadata_summarizer.cpp              |  2 ++
 .../VCFX_missing_data_handler.cpp             |  2 ++
 .../VCFX_missing_detector.cpp                 |  2 ++
 .../VCFX_multiallelic_splitter.cpp            |  2 ++
 src/VCFX_nonref_filter/VCFX_nonref_filter.cpp |  2 ++
 .../VCFX_outlier_detector.cpp                 |  2 ++
 src/VCFX_phase_checker/VCFX_phase_checker.cpp |  2 ++
 .../VCFX_phase_quality_filter.cpp             |  2 ++
 src/VCFX_phred_filter/VCFX_phred_filter.cpp   |  2 ++
 .../VCFX_population_filter.cpp                |  2 ++
 .../VCFX_position_subsetter.cpp               |  2 ++
 .../VCFX_probability_filter.cpp               |  2 ++
 .../VCFX_quality_adjuster.cpp                 |  2 ++
 src/VCFX_record_filter/VCFX_record_filter.cpp |  2 ++
 .../VCFX_ref_comparator.cpp                   |  2 ++
 src/VCFX_reformatter/VCFX_reformatter.cpp     |  2 ++
 .../VCFX_region_subsampler.cpp                |  2 ++
 .../VCFX_sample_extractor.cpp                 |  2 ++
 src/VCFX_sorter/VCFX_sorter.cpp               |  2 ++
 src/VCFX_subsampler/VCFX_subsampler.cpp       |  2 ++
 src/VCFX_sv_handler/VCFX_sv_handler.cpp       |  2 ++
 src/VCFX_validator/VCFX_validator.cpp         |  2 ++
 .../VCFX_variant_classifier.cpp               |  2 ++
 .../VCFX_variant_counter.cpp                  |  1 +
 112 files changed, 205 insertions(+)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000..ed5e6cc4
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,3 @@
+BasedOnStyle: LLVM
+IndentWidth: 4
+ColumnLimit: 120
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20f51dca..043cf974 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,6 +6,8 @@ set(VCFX_VERSION_MINOR 0)
 set(VCFX_VERSION_PATCH 2)
 set(VCFX_VERSION "${VCFX_VERSION_MAJOR}.${VCFX_VERSION_MINOR}.${VCFX_VERSION_PATCH}")
 
+add_compile_definitions(VCFX_VERSION="${VCFX_VERSION}")
+
 project(VCFX 
   VERSION ${VCFX_VERSION}
   DESCRIPTION "A Comprehensive VCF Manipulation Toolkit"
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index ed264c7d..354ca654 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -32,6 +32,7 @@ We welcome suggestions for new features or improvements to existing functionalit
 1. Fork the repository
 2. Create a new branch for your feature or bug fix
 3. Write your code, following our coding standards
+   (run `clang-format -i ` before committing)
 4. Add tests for your changes
 5. Ensure all tests pass
 6. Update documentation as needed
@@ -40,6 +41,17 @@ We welcome suggestions for new features or improvements to existing functionalit
 
 ## Development Setup
 
+### Code Formatting
+
+We use `clang-format` to keep the C++ code style consistent. A basic configuration
+is provided in `.clang-format` at the repository root. Please run:
+
+```bash
+clang-format -i path/to/changed_file.cpp
+```
+
+before committing changes.
+
 ### Prerequisites
 
 - CMake (version 3.10 or higher)
diff --git a/docs/VCFX_af_subsetter.md b/docs/VCFX_af_subsetter.md
index 91871cda..59b79b24 100644
--- a/docs/VCFX_af_subsetter.md
+++ b/docs/VCFX_af_subsetter.md
@@ -13,6 +13,7 @@ VCFX_af_subsetter --af-filter "MIN-MAX" < input.vcf > filtered.vcf
 |--------|-------------|
 | `-a`, `--af-filter ` | Required. Allele frequency range for filtering (e.g., `0.01-0.05`) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_af_subsetter` processes VCF files line by line and filters variants based on their allele frequency (AF) values from the INFO field. The tool:
diff --git a/docs/VCFX_alignment_checker.md b/docs/VCFX_alignment_checker.md
index 62c5f264..f7c9dc48 100644
--- a/docs/VCFX_alignment_checker.md
+++ b/docs/VCFX_alignment_checker.md
@@ -13,6 +13,7 @@ VCFX_alignment_checker --alignment-discrepancy   > di
 |--------|-------------|
 | `-a`, `--alignment-discrepancy` | Enable alignment discrepancy checking mode |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_alignment_checker` compares VCF variants against a reference genome to validate sequence consistency. The tool:
diff --git a/docs/VCFX_allele_balance_calc.md b/docs/VCFX_allele_balance_calc.md
index 567e0c83..2ce451e7 100644
--- a/docs/VCFX_allele_balance_calc.md
+++ b/docs/VCFX_allele_balance_calc.md
@@ -13,6 +13,7 @@ VCFX_allele_balance_calc [OPTIONS] < input.vcf > allele_balance.tsv
 |--------|-------------|
 | `-s`, `--samples "Sample1 Sample2..."` | Optional. Specify sample names to calculate allele balance for (space-separated). If omitted, all samples are processed. |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_allele_balance_calc` processes a VCF file and calculates the allele balance for each variant in each specified sample. The tool:
diff --git a/docs/VCFX_allele_balance_filter.md b/docs/VCFX_allele_balance_filter.md
index bf9c6227..34ca1c27 100644
--- a/docs/VCFX_allele_balance_filter.md
+++ b/docs/VCFX_allele_balance_filter.md
@@ -16,6 +16,7 @@ VCFX_allele_balance_filter --filter-allele-balance  < input.vcf > fil
 |--------|-------------|
 | `-f`, `--filter-allele-balance`  | Required. Allele balance threshold between 0.0 and 1.0 |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_allele_counter.md b/docs/VCFX_allele_counter.md
index 7031632a..492354df 100644
--- a/docs/VCFX_allele_counter.md
+++ b/docs/VCFX_allele_counter.md
@@ -13,6 +13,7 @@ VCFX_allele_counter [OPTIONS] < input.vcf > allele_counts.tsv
 |--------|-------------|
 | `-s`, `--samples "Sample1 Sample2..."` | Optional. Specify sample names to calculate allele counts for (space-separated). If omitted, all samples are processed. |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_allele_counter` processes a VCF file and counts reference and alternate alleles for each variant in each specified sample. The tool:
diff --git a/docs/VCFX_ancestry_assigner.md b/docs/VCFX_ancestry_assigner.md
index b80d0a04..40bfba1c 100644
--- a/docs/VCFX_ancestry_assigner.md
+++ b/docs/VCFX_ancestry_assigner.md
@@ -16,6 +16,7 @@ VCFX_ancestry_assigner --assign-ancestry  < input.vcf > ancestry_resu
 |--------|-------------|
 | `-a`, `--assign-ancestry ` | Required. Path to a file containing population-specific allele frequencies |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_ancestry_inferrer.md b/docs/VCFX_ancestry_inferrer.md
index 8ee59361..33ab8fda 100644
--- a/docs/VCFX_ancestry_inferrer.md
+++ b/docs/VCFX_ancestry_inferrer.md
@@ -16,6 +16,7 @@ VCFX_ancestry_inferrer --frequency  [OPTIONS] < input.vcf > ancestry_
 |--------|-------------|
 | `--frequency ` | Required. Path to a file containing population-specific allele frequencies |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_annotation_extractor.md b/docs/VCFX_annotation_extractor.md
index aa4deda3..c492e918 100644
--- a/docs/VCFX_annotation_extractor.md
+++ b/docs/VCFX_annotation_extractor.md
@@ -16,6 +16,7 @@ VCFX_annotation_extractor --annotation-extract "FIELD1,FIELD2,..." < input.vcf >
 |--------|-------------|
 | `-a`, `--annotation-extract ` | Required. Comma-separated list of INFO field annotations to extract (e.g., "ANN,Gene,Impact") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_compressor.md b/docs/VCFX_compressor.md
index 4f31f8e4..36060995 100644
--- a/docs/VCFX_compressor.md
+++ b/docs/VCFX_compressor.md
@@ -17,6 +17,7 @@ VCFX_compressor [OPTIONS] < input_file > output_file
 | `-c`, `--compress` | Compress the input VCF file (read from stdin, write to stdout) |
 | `-d`, `--decompress` | Decompress the input VCF.gz file (read from stdin, write to stdout) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_concordance_checker.md b/docs/VCFX_concordance_checker.md
index 10aef346..11b55786 100644
--- a/docs/VCFX_concordance_checker.md
+++ b/docs/VCFX_concordance_checker.md
@@ -13,6 +13,7 @@ VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" < input.vcf > concordance_r
 |--------|-------------|
 | `-s`, `--samples "SAMPLE1 SAMPLE2"` | Required. Names of the two samples to compare, separated by a space |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_concordance_checker` analyzes a VCF file and compares the genotypes of two specified samples for each variant. The tool:
diff --git a/docs/VCFX_cross_sample_concordance.md b/docs/VCFX_cross_sample_concordance.md
index 2bfbf0d9..aa77e1bb 100644
--- a/docs/VCFX_cross_sample_concordance.md
+++ b/docs/VCFX_cross_sample_concordance.md
@@ -12,6 +12,7 @@ VCFX_cross_sample_concordance [OPTIONS] < input.vcf > concordance_results.tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_cross_sample_concordance` examines each variant in a multi-sample VCF file and determines if all samples with valid genotypes have the same normalized genotype. The tool:
diff --git a/docs/VCFX_custom_annotator.md b/docs/VCFX_custom_annotator.md
index f238dc28..058f583e 100644
--- a/docs/VCFX_custom_annotator.md
+++ b/docs/VCFX_custom_annotator.md
@@ -16,6 +16,7 @@ VCFX_custom_annotator --add-annotation  [OPTIONS] < input.vcf >
 |--------|-------------|
 | `-a`, `--add-annotation ` | Required. Path to the annotation file containing the custom annotations |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_diff_tool.md b/docs/VCFX_diff_tool.md
index 4aa79332..eb204367 100644
--- a/docs/VCFX_diff_tool.md
+++ b/docs/VCFX_diff_tool.md
@@ -17,6 +17,7 @@ VCFX_diff_tool --file1  --file2 
 | `-a`, `--file1 ` | Required. Path to the first VCF file |
 | `-b`, `--file2 ` | Required. Path to the second VCF file |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_distance_calculator.md b/docs/VCFX_distance_calculator.md
index 4eeea7ac..c178715c 100644
--- a/docs/VCFX_distance_calculator.md
+++ b/docs/VCFX_distance_calculator.md
@@ -15,6 +15,7 @@ VCFX_distance_calculator [OPTIONS] < input.vcf > variant_distances.tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_dosage_calculator.md b/docs/VCFX_dosage_calculator.md
index 227bc9ec..56239e97 100644
--- a/docs/VCFX_dosage_calculator.md
+++ b/docs/VCFX_dosage_calculator.md
@@ -15,6 +15,7 @@ VCFX_dosage_calculator [OPTIONS] < input.vcf > dosage_output.txt
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_duplicate_remover.md b/docs/VCFX_duplicate_remover.md
index 7075e956..6bad844d 100644
--- a/docs/VCFX_duplicate_remover.md
+++ b/docs/VCFX_duplicate_remover.md
@@ -15,6 +15,7 @@ VCFX_duplicate_remover [OPTIONS] < input.vcf > deduplicated.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_fasta_converter.md b/docs/VCFX_fasta_converter.md
index 64ee6ed4..7ea0fbd6 100644
--- a/docs/VCFX_fasta_converter.md
+++ b/docs/VCFX_fasta_converter.md
@@ -15,6 +15,7 @@ VCFX_fasta_converter [OPTIONS] < input.vcf > output.fasta
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_field_extractor.md b/docs/VCFX_field_extractor.md
index d1fb88e3..d51c8186 100644
--- a/docs/VCFX_field_extractor.md
+++ b/docs/VCFX_field_extractor.md
@@ -13,6 +13,7 @@ VCFX_field_extractor --fields "FIELD1,FIELD2,..." [OPTIONS] < input.vcf > output
 |--------|-------------|
 | `-f`, `--fields` | Required. Comma-separated list of fields to extract (no spaces between fields) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_field_extractor` processes a VCF file and extracts only the specified fields for each variant. The tool:
diff --git a/docs/VCFX_file_splitter.md b/docs/VCFX_file_splitter.md
index 5072cbbe..652c7de9 100644
--- a/docs/VCFX_file_splitter.md
+++ b/docs/VCFX_file_splitter.md
@@ -16,6 +16,7 @@ VCFX_file_splitter [OPTIONS] < input.vcf
 |--------|-------------|
 | `-p`, `--prefix ` | Output file prefix (default: "split") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_gl_filter.md b/docs/VCFX_gl_filter.md
index d6d7b0fc..ade46e71 100644
--- a/docs/VCFX_gl_filter.md
+++ b/docs/VCFX_gl_filter.md
@@ -14,6 +14,7 @@ VCFX_gl_filter --filter "" [--mode ] < input.vcf > filtered.
 | `-f`, `--filter ` | Required. Filter condition (e.g., `GQ>20`, `DP>=10`, `PL<50`) |
 | `-m`, `--mode ` | Optional. Determines if all samples must pass the condition (`all`, default) or at least one sample must pass (`any`) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_gl_filter` examines numeric fields in the FORMAT column of a VCF file and filters variant records based on whether the samples satisfy the specified condition. The tool:
diff --git a/docs/VCFX_haplotype_extractor.md b/docs/VCFX_haplotype_extractor.md
index 07d70904..a8bd9d96 100644
--- a/docs/VCFX_haplotype_extractor.md
+++ b/docs/VCFX_haplotype_extractor.md
@@ -17,6 +17,7 @@ VCFX_haplotype_extractor [OPTIONS] < input.vcf > haplotypes.tsv
 | `--block-size ` | Maximum distance in base pairs between consecutive variants to be included in the same block (default: 100,000) |
 | `--check-phase-consistency` | Enable checks for phase consistency between adjacent variants in a block |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_haplotype_phaser.md b/docs/VCFX_haplotype_phaser.md
index 36481ceb..de4bda15 100644
--- a/docs/VCFX_haplotype_phaser.md
+++ b/docs/VCFX_haplotype_phaser.md
@@ -16,6 +16,7 @@ VCFX_haplotype_phaser [OPTIONS] < input.vcf > blocks.txt
 |--------|-------------|
 | `-l`, `--ld-threshold ` | rยฒ threshold for LD-based grouping (0.0-1.0, default: 0.8) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_header_parser.md b/docs/VCFX_header_parser.md
index 9706f9cd..f31edc34 100644
--- a/docs/VCFX_header_parser.md
+++ b/docs/VCFX_header_parser.md
@@ -15,6 +15,7 @@ VCFX_header_parser [OPTIONS] < input.vcf > header.txt
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_hwe_tester.md b/docs/VCFX_hwe_tester.md
index d6feb009..e370b974 100644
--- a/docs/VCFX_hwe_tester.md
+++ b/docs/VCFX_hwe_tester.md
@@ -15,6 +15,7 @@ VCFX_hwe_tester [OPTIONS] < input.vcf > hwe_results.txt
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_impact_filter.md b/docs/VCFX_impact_filter.md
index 897d5698..13a7b3fa 100644
--- a/docs/VCFX_impact_filter.md
+++ b/docs/VCFX_impact_filter.md
@@ -16,6 +16,7 @@ VCFX_impact_filter --filter-impact  < input.vcf > filtered.vcf
 |--------|-------------|
 | `-i`, `--filter-impact ` | Required. Impact level threshold. Must be one of: HIGH, MODERATE, LOW, MODIFIER |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_inbreeding_calculator.md b/docs/VCFX_inbreeding_calculator.md
index def1abfa..cbbedd07 100644
--- a/docs/VCFX_inbreeding_calculator.md
+++ b/docs/VCFX_inbreeding_calculator.md
@@ -18,6 +18,7 @@ VCFX_inbreeding_calculator [OPTIONS] < input.vcf > output.txt
 | `--skip-boundary` | Skip sites with boundary frequencies (p=0 or p=1) |
 | `--count-boundary-as-used` | Count boundary sites in usedCount even when skipping them |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_indel_normalizer.md b/docs/VCFX_indel_normalizer.md
index 47c48fa2..661d05c7 100644
--- a/docs/VCFX_indel_normalizer.md
+++ b/docs/VCFX_indel_normalizer.md
@@ -12,6 +12,7 @@ VCFX_indel_normalizer [OPTIONS] < input.vcf > normalized.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_indel_normalizer` processes a VCF file and normalizes indel variants by:
diff --git a/docs/VCFX_indexer.md b/docs/VCFX_indexer.md
index 7be8c2c9..10d870d8 100644
--- a/docs/VCFX_indexer.md
+++ b/docs/VCFX_indexer.md
@@ -14,6 +14,7 @@ VCFX_indexer [OPTIONS] < input.vcf > index.tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_info_aggregator.md b/docs/VCFX_info_aggregator.md
index a1debf52..79fa7a9f 100644
--- a/docs/VCFX_info_aggregator.md
+++ b/docs/VCFX_info_aggregator.md
@@ -14,6 +14,7 @@ VCFX_info_aggregator [OPTIONS] < input.vcf > output.vcf
 
 - `-a`, `--aggregate-info `: Comma-separated list of INFO fields to aggregate (required).
 - `-h`, `--help`: Display help message and exit.
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_info_parser.md b/docs/VCFX_info_parser.md
index 9a4e8e64..2ce74055 100644
--- a/docs/VCFX_info_parser.md
+++ b/docs/VCFX_info_parser.md
@@ -16,6 +16,7 @@ VCFX_info_parser --info "FIELD1,FIELD2,..." < input.vcf > extracted_info.tsv
 |--------|-------------|
 | `-i`, `--info ` | Required. Comma-separated list of INFO fields to extract (e.g., "DP,AF,SOMATIC") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_info_summarizer.md b/docs/VCFX_info_summarizer.md
index 50b4566d..f1d8b061 100644
--- a/docs/VCFX_info_summarizer.md
+++ b/docs/VCFX_info_summarizer.md
@@ -16,6 +16,7 @@ VCFX_info_summarizer --info "FIELD1,FIELD2,..." < input.vcf > summary_stats.tsv
 |--------|-------------|
 | `-i`, `--info ` | Required. Comma-separated list of INFO fields to analyze (e.g., "DP,AF,MQ") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_ld_calculator.md b/docs/VCFX_ld_calculator.md
index 7a40bbbe..34fa081e 100644
--- a/docs/VCFX_ld_calculator.md
+++ b/docs/VCFX_ld_calculator.md
@@ -16,6 +16,7 @@ VCFX_ld_calculator [OPTIONS] < input.vcf > ld_matrix.txt
 |--------|-------------|
 | `--region ` | Only compute LD for variants in the specified region |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_missing_detector.md b/docs/VCFX_missing_detector.md
index 53325d21..7cb368b9 100644
--- a/docs/VCFX_missing_detector.md
+++ b/docs/VCFX_missing_detector.md
@@ -15,6 +15,7 @@ VCFX_missing_detector [OPTIONS] < input.vcf > flagged.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_nonref_filter.md b/docs/VCFX_nonref_filter.md
index 278c082e..aa7f15e9 100644
--- a/docs/VCFX_nonref_filter.md
+++ b/docs/VCFX_nonref_filter.md
@@ -15,6 +15,7 @@ VCFX_nonref_filter [OPTIONS] < input.vcf > filtered.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_phase_checker.md b/docs/VCFX_phase_checker.md
index df0bf65d..4bf62779 100644
--- a/docs/VCFX_phase_checker.md
+++ b/docs/VCFX_phase_checker.md
@@ -15,6 +15,7 @@ VCFX_phase_checker [OPTIONS] < input.vcf > phased_output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_phred_filter.md b/docs/VCFX_phred_filter.md
index ad7e73ae..b662f91b 100644
--- a/docs/VCFX_phred_filter.md
+++ b/docs/VCFX_phred_filter.md
@@ -17,6 +17,7 @@ VCFX_phred_filter [OPTIONS] < input.vcf > filtered.vcf
 | `-p`, `--phred-filter`  | Set PHRED quality score threshold (default: 30.0) |
 | `-k`, `--keep-missing-qual` | Keep variants with missing quality values (represented as ".") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_population_filter.md b/docs/VCFX_population_filter.md
index cabfdeba..e98db84e 100644
--- a/docs/VCFX_population_filter.md
+++ b/docs/VCFX_population_filter.md
@@ -12,6 +12,7 @@ VCFX_population_filter [OPTIONS] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-p`, `--population ` | **Required**: Population tag to keep (e.g., 'EUR', 'AFR', 'EAS') |
 | `-m`, `--pop-map ` | **Required**: Tab-delimited file mapping sample names to populations |
 
diff --git a/docs/VCFX_position_subsetter.md b/docs/VCFX_position_subsetter.md
index fcd33045..727e78f4 100644
--- a/docs/VCFX_position_subsetter.md
+++ b/docs/VCFX_position_subsetter.md
@@ -13,6 +13,7 @@ VCFX_position_subsetter --region "CHR:START-END" < input.vcf > filtered.vcf
 |--------|-------------|
 | `-r`, `--region ` | Required. Genomic region to extract in the format "chromosome:start-end" |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_position_subsetter` reads a VCF file from standard input and outputs only those variants that fall within the specified genomic region. The tool:
diff --git a/docs/VCFX_quality_adjuster.md b/docs/VCFX_quality_adjuster.md
index 80d7b8b4..56ff873d 100644
--- a/docs/VCFX_quality_adjuster.md
+++ b/docs/VCFX_quality_adjuster.md
@@ -17,6 +17,7 @@ VCFX_quality_adjuster [OPTIONS] < input.vcf > output.vcf
 | `-a`, `--adjust-qual ` | Required. The transformation function to apply. Must be one of: `log`, `sqrt`, `square`, or `identity`. |
 | `-n`, `--no-clamp` | Do not clamp negative or extremely large values resulting from transformations. |
 | `-h`, `--help` | Display help message and exit. |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_record_filter.md b/docs/VCFX_record_filter.md
index 8ae0b8f1..c406e4eb 100644
--- a/docs/VCFX_record_filter.md
+++ b/docs/VCFX_record_filter.md
@@ -14,6 +14,7 @@ VCFX_record_filter --filter "CRITERIA" [OPTIONS] < input.vcf > filtered.vcf
 | `-f`, `--filter ` | Required. One or more filtering criteria separated by semicolons (e.g., `"POS>10000;QUAL>=30;AF<0.05"`) |
 | `-l`, `--logic ` | Logic for combining multiple criteria: `and` (default) requires all criteria to pass, `or` requires any criterion to pass |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_record_filter` evaluates each variant in a VCF file against specified criteria and outputs only variants that satisfy these criteria. The tool:
diff --git a/docs/VCFX_ref_comparator.md b/docs/VCFX_ref_comparator.md
index fa2eba48..a3db6c76 100644
--- a/docs/VCFX_ref_comparator.md
+++ b/docs/VCFX_ref_comparator.md
@@ -16,6 +16,7 @@ VCFX_ref_comparator --reference  < input.vcf > annotated.vcf
 |--------|-------------|
 | `-r`, `--reference`  | Required. Path to reference genome in FASTA format |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_sample_extractor.md b/docs/VCFX_sample_extractor.md
index da470f56..96b2ca06 100644
--- a/docs/VCFX_sample_extractor.md
+++ b/docs/VCFX_sample_extractor.md
@@ -16,6 +16,7 @@ VCFX_sample_extractor [OPTIONS] < input.vcf > subset.vcf
 |--------|-------------|
 | `-s`, `--samples` LIST | Comma or space separated list of sample names to extract |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_sorter.md b/docs/VCFX_sorter.md
index 561978df..d66708e1 100644
--- a/docs/VCFX_sorter.md
+++ b/docs/VCFX_sorter.md
@@ -12,6 +12,7 @@ VCFX_sorter [OPTIONS] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-n`, `--natural-chr` | Use natural chromosome sorting (chr1 < chr2 < chr10) instead of lexicographic sorting |
 
 ## Description
diff --git a/docs/VCFX_sv_handler.md b/docs/VCFX_sv_handler.md
index 14f90490..67ff1dcb 100644
--- a/docs/VCFX_sv_handler.md
+++ b/docs/VCFX_sv_handler.md
@@ -12,6 +12,7 @@ VCFX_sv_handler [OPTIONS] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-f`, `--sv-filter-only` | Keep only lines that have 'SVTYPE=' in their INFO field |
 | `-m`, `--sv-modify` | Modify the INFO field of structural variants to add additional annotations |
 
diff --git a/docs/VCFX_validator.md b/docs/VCFX_validator.md
index e3d8bcac..dde07d1c 100644
--- a/docs/VCFX_validator.md
+++ b/docs/VCFX_validator.md
@@ -14,6 +14,7 @@ VCFX_validator [OPTIONS] < input.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-s`, `--strict` | Enable stricter validation checks |
 
 ## Description
diff --git a/docs/VCFX_variant_classifier.md b/docs/VCFX_variant_classifier.md
index 48304688..b2b5be15 100644
--- a/docs/VCFX_variant_classifier.md
+++ b/docs/VCFX_variant_classifier.md
@@ -15,6 +15,7 @@ VCFX_variant_classifier [OPTIONS] < input.vcf > output.vcf_or_tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-a`, `--append-info` | Instead of producing a TSV, output a valid VCF with a new 'VCF_CLASS' subfield in the INFO column |
 
 ## Description
diff --git a/docs/VCFX_variant_counter.md b/docs/VCFX_variant_counter.md
index 47e6e9dd..29839c7b 100644
--- a/docs/VCFX_variant_counter.md
+++ b/docs/VCFX_variant_counter.md
@@ -12,6 +12,7 @@ VCFX_variant_counter [OPTIONS] < input.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-s`, `--strict` | Fail on any data line with fewer than 8 columns |
 
 ## Description
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 898124b8..4361453d 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -3,6 +3,7 @@
 VCFX is a collection of C/C++ tools for processing and analyzing VCF (Variant Call Format) files, with optional WebAssembly compatibility. Each tool is an independent command-line executable that can parse input from `stdin` and write to `stdout`, enabling flexible piping and integration into bioinformatics pipelines.
 
 The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` to see available subcommands. All individual `VCFX_*` binaries remain available if you prefer calling them directly.
+Every tool also accepts `--version` to display the build version.
 
 ## Tool Categories
 
diff --git a/include/vcfx_core.h b/include/vcfx_core.h
index c3a62895..80516071 100644
--- a/include/vcfx_core.h
+++ b/include/vcfx_core.h
@@ -4,6 +4,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace vcfx {
 
@@ -18,6 +19,26 @@ void print_error(const std::string& msg, std::ostream& os = std::cerr);
 void print_version(const std::string& tool, const std::string& version,
                    std::ostream& os = std::cout);
 
+inline std::string get_version() {
+#ifdef VCFX_VERSION
+    return VCFX_VERSION;
+#else
+    return "unknown";
+#endif
+}
+
+inline bool handle_version_flag(int argc, char* argv[], const std::string& tool,
+                                std::ostream& os = std::cout) {
+    for (int i = 1; i < argc; ++i) {
+        if (std::strcmp(argv[i], "--version") == 0 ||
+            std::strcmp(argv[i], "-v") == 0) {
+            print_version(tool, get_version(), os);
+            return true;
+        }
+    }
+    return false;
+}
+
 // Read entire input stream, automatically decompressing if gzip/BGZF
 // compressed. Returns true on success and stores the resulting text in
 // 'out'.
diff --git a/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp b/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
index 2a6b0579..8110b7eb 100644
--- a/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
+++ b/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_af_subsetter.h"
 #include 
 #include 
@@ -156,6 +157,7 @@ void VCFXAfSubsetter::subsetByAlleleFrequency(std::istream& in, std::ostream& ou
 // Typical main():
 //
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_af_subsetter")) return 0;
     VCFXAfSubsetter afSubsetter;
     return afSubsetter.run(argc, argv);
 }
diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
index b4898ee3..f9df880d 100644
--- a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
+++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_alignment_checker.h"
 #include 
 #include 
@@ -321,6 +322,7 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
 
 // Typical main(), linking to run()
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_alignment_checker")) return 0;
     VCFXAlignmentChecker alignmentChecker;
     return alignmentChecker.run(argc, argv);
 }
diff --git a/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp b/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
index 71acdeeb..27e8bfd7 100644
--- a/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
+++ b/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 // VCFX_allele_balance_calc.cpp
 
 #include 
@@ -224,6 +225,7 @@ bool calculateAlleleBalance(std::istream& in, std::ostream& out, const AlleleBal
 // main()
 // ---------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_balance_calc")) return 0;
     AlleleBalanceArguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp b/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
index 88c963d2..ad7fbe43 100644
--- a/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
+++ b/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -197,6 +198,7 @@ double VCFXAlleleBalanceFilter::calculateAlleleBalance(const std::string& genoty
 // main() linking to class
 // ------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_balance_filter")) return 0;
     VCFXAlleleBalanceFilter alleleBalanceFilter;
     return alleleBalanceFilter.run(argc, argv);
 }
diff --git a/src/VCFX_allele_counter/VCFX_allele_counter.cpp b/src/VCFX_allele_counter/VCFX_allele_counter.cpp
index 2e6ac5e9..b3ad6f4d 100644
--- a/src/VCFX_allele_counter/VCFX_allele_counter.cpp
+++ b/src/VCFX_allele_counter/VCFX_allele_counter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -232,6 +233,7 @@ static bool countAlleles(std::istream& in, std::ostream& out, const AlleleCounte
 // main()
 // ---------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_counter")) return 0;
     AlleleCounterArguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp b/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
index 2344ac6d..f25dccb1 100644
--- a/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
+++ b/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -183,6 +184,7 @@ static void calculateAlleleFrequency(std::istream& in, std::ostream& out) {
 // main()
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_freq_calc")) return 0;
     // Parse arguments for help
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp b/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
index ee21cce4..22a2e1ff 100644
--- a/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
+++ b/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -408,6 +409,7 @@ void VCFXAncestryAssigner::assignAncestry(std::istream& vcfIn, std::ostream& out
 // main() - just instantiate and run
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ancestry_assigner")) return 0;
     VCFXAncestryAssigner assigner;
     return assigner.run(argc, argv);
 }
diff --git a/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp b/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
index 1c72b384..00725dcd 100644
--- a/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
+++ b/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -58,6 +59,7 @@ class VCFXAncestryInferrer {
 // main() - create the inferrer and run
 // ----------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ancestry_inferrer")) return 0;
     VCFXAncestryInferrer inferrer;
     return inferrer.run(argc, argv);
 }
diff --git a/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp b/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
index 2245d156..544a2ea5 100644
--- a/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
+++ b/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -273,6 +274,7 @@ static void processVCF(std::istream &in, const AnnotationOptions &opts) {
 // main()
 // --------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_annotation_extractor")) return 0;
     AnnotationOptions opts;
     if (!parseArguments(argc, argv, opts)) {
         // parseArguments already printed help if needed
diff --git a/src/VCFX_compressor/VCFX_compressor.cpp b/src/VCFX_compressor/VCFX_compressor.cpp
index e2476577..3ffd05a5 100644
--- a/src/VCFX_compressor/VCFX_compressor.cpp
+++ b/src/VCFX_compressor/VCFX_compressor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -149,6 +150,7 @@ static bool compressDecompressVCF(std::istream& in, std::ostream& out, bool comp
 // main
 // ---------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_compressor")) return 0;
     bool compress = false;
     bool decompress = false;
 
diff --git a/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp b/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
index c6b8df79..ae6f694c 100644
--- a/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
+++ b/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -275,6 +276,7 @@ static bool calculateConcordance(std::istream &in, std::ostream &out, const Conc
 // main
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_concordance_checker")) return 0;
     ConcordanceArguments args;
     if (!parseArguments(argc, argv, args)) {
         // parseArguments prints error/help if needed
diff --git a/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp b/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
index 50456e7e..1a9b694e 100644
--- a/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
+++ b/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -238,6 +239,7 @@ static void calculateConcordance(std::istream &in, std::ostream &out) {
 // Command-line parsing + main
 // --------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_cross_sample_concordance")) return 0;
     bool showHelp = false;
 
     static struct option longOpts[] = {
diff --git a/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp b/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
index 28e9ea29..a5319263 100644
--- a/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
+++ b/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -282,6 +283,7 @@ int VCFXCustomAnnotator::run(int argc, char* argv[]) {
 // main
 // ---------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_custom_annotator")) return 0;
     VCFXCustomAnnotator annotator;
     return annotator.run(argc, argv);
 }
diff --git a/src/VCFX_diff_tool/VCFX_diff_tool.cpp b/src/VCFX_diff_tool/VCFX_diff_tool.cpp
index 8f8f4611..ea984daa 100644
--- a/src/VCFX_diff_tool/VCFX_diff_tool.cpp
+++ b/src/VCFX_diff_tool/VCFX_diff_tool.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -205,6 +206,7 @@ int VCFXDiffTool::run(int argc, char* argv[]) {
 // main
 // ----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_diff_tool")) return 0;
     VCFXDiffTool diffTool;
     return diffTool.run(argc, argv);
 }
diff --git a/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp b/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
index 0fd2552c..67e717a5 100644
--- a/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
+++ b/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 // VCFX_distance_calculator.cpp
 #include "VCFX_distance_calculator.h"
 #include 
@@ -160,6 +161,7 @@ bool calculateDistances(std::istream& in, std::ostream& out) {
 // main: Parses command-line arguments and calls calculateDistances.
 // --------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_distance_calculator")) return 0;
     // Check for help option.
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp b/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
index 0dfa7f30..e41f5c0b 100644
--- a/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
+++ b/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_dosage_calculator.h"
 #include 
 #include 
@@ -216,6 +217,7 @@ std::vector VCFXDosageCalculator::split(const std::string& str, cha
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_dosage_calculator")) return 0;
     VCFXDosageCalculator dosageCalculator;
     return dosageCalculator.run(argc, argv);
 }
diff --git a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
index 77c5ce1a..72625213 100644
--- a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
+++ b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_duplicate_remover.h"
 #include 
 #include 
@@ -129,6 +130,7 @@ bool removeDuplicates(std::istream& in, std::ostream& out) {
 // main: Parse command-line arguments and call removeDuplicates.
 // ----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_duplicate_remover")) return 0;
     // Simple argument parsing: if --help or -h is provided, print help.
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp b/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
index 110df41f..84c79222 100644
--- a/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
+++ b/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_fasta_converter.h"
 #include 
 #include 
@@ -320,6 +321,7 @@ void VCFXFastaConverter::convertVCFtoFasta(std::istream& in, std::ostream& out)
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_fasta_converter")) return 0;
     VCFXFastaConverter app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_field_extractor/VCFX_field_extractor.cpp b/src/VCFX_field_extractor/VCFX_field_extractor.cpp
index b7fe05cd..31359024 100644
--- a/src/VCFX_field_extractor/VCFX_field_extractor.cpp
+++ b/src/VCFX_field_extractor/VCFX_field_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_field_extractor.h"
 #include 
 #include 
@@ -245,6 +246,7 @@ void extractFields(std::istream& in, std::ostream& out, const std::vector fields;
     bool showHelp = false;
 
diff --git a/src/VCFX_file_splitter/VCFX_file_splitter.cpp b/src/VCFX_file_splitter/VCFX_file_splitter.cpp
index 8d5c21f5..87c7b264 100644
--- a/src/VCFX_file_splitter/VCFX_file_splitter.cpp
+++ b/src/VCFX_file_splitter/VCFX_file_splitter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_file_splitter.h"
 #include 
 #include 
@@ -159,6 +160,7 @@ void VCFXFileSplitter::splitVCFByChromosome(std::istream& in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_file_splitter")) return 0;
     VCFXFileSplitter splitter;
     return splitter.run(argc, argv);
 }
diff --git a/src/VCFX_format_converter/VCFX_format_converter.cpp b/src/VCFX_format_converter/VCFX_format_converter.cpp
index 9998a63b..668b4f67 100644
--- a/src/VCFX_format_converter/VCFX_format_converter.cpp
+++ b/src/VCFX_format_converter/VCFX_format_converter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_format_converter.h"
 #include 
 #include 
@@ -184,6 +185,7 @@ void convertVCFtoCSV(std::istream& in, std::ostream& out) {
 // main
 // -----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_format_converter")) return 0;
     OutputFormat format;
     bool valid = parseArguments(argc, argv, format);
 
diff --git a/src/VCFX_genotype_query/VCFX_genotype_query.cpp b/src/VCFX_genotype_query/VCFX_genotype_query.cpp
index e472b6af..f1451c23 100644
--- a/src/VCFX_genotype_query/VCFX_genotype_query.cpp
+++ b/src/VCFX_genotype_query/VCFX_genotype_query.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_genotype_query.h"
 #include 
 #include 
@@ -239,6 +240,7 @@ void genotypeQuery(std::istream& in, std::ostream& out,
 // main
 // ------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_genotype_query")) return 0;
     std::string genotypeQueryStr;
     bool strictCompare = false;
     if (!parseArguments(argc, argv, genotypeQueryStr, strictCompare)) {
diff --git a/src/VCFX_gl_filter/VCFX_gl_filter.cpp b/src/VCFX_gl_filter/VCFX_gl_filter.cpp
index 8360fc9e..769b8502 100644
--- a/src/VCFX_gl_filter/VCFX_gl_filter.cpp
+++ b/src/VCFX_gl_filter/VCFX_gl_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_gl_filter.h"
 #include 
 #include 
@@ -263,6 +264,7 @@ void VCFXGLFilter::filterByGL(std::istream& in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_gl_filter")) return 0;
     VCFXGLFilter app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
index 87fd0c79..9475ba6e 100644
--- a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
+++ b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_haplotype_extractor.h"
 #include 
 #include 
@@ -327,6 +328,7 @@ bool HaplotypeExtractor::extractHaplotypes(std::istream& in, std::ostream& out)
 // main
 // ---------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_haplotype_extractor")) return 0;
     int blockSize = 100000;
     bool doCheck = false;
     bool debug = false;
diff --git a/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp b/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
index 9b2eba87..6cbfa965 100644
--- a/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
+++ b/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_haplotype_phaser.h"
 #include 
 #include 
@@ -318,6 +319,7 @@ std::vector> VCFXHaplotypePhaser::groupVariants(const std::vect
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_haplotype_phaser")) return 0;
     VCFXHaplotypePhaser hp;
     return hp.run(argc, argv);
 }
diff --git a/src/VCFX_header_parser/VCFX_header_parser.cpp b/src/VCFX_header_parser/VCFX_header_parser.cpp
index 97164a43..45ee87ed 100644
--- a/src/VCFX_header_parser/VCFX_header_parser.cpp
+++ b/src/VCFX_header_parser/VCFX_header_parser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_header_parser.h"
 #include 
 #include 
@@ -26,6 +27,7 @@ void processHeader(std::istream& in, std::ostream& out) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_header_parser")) return 0;
     // Simple argument parsing
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp b/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
index 64fbc9d8..d54e7f24 100644
--- a/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
+++ b/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_hwe_tester.h"
 #include 
 #include 
@@ -255,6 +256,7 @@ void VCFXHWETester::performHWE(std::istream& in){
 
 // actual main
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_hwe_tester")) return 0;
     VCFXHWETester tester;
     return tester.run(argc, argv);
 }
diff --git a/src/VCFX_impact_filter/VCFX_impact_filter.cpp b/src/VCFX_impact_filter/VCFX_impact_filter.cpp
index 86034924..51bfea85 100644
--- a/src/VCFX_impact_filter/VCFX_impact_filter.cpp
+++ b/src/VCFX_impact_filter/VCFX_impact_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_impact_filter.h"
 #include 
 #include 
@@ -200,6 +201,7 @@ void VCFXImpactFilter::filterByImpact(std::istream& in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_impact_filter")) return 0;
     VCFXImpactFilter filt;
     return filt.run(argc, argv);
 }
diff --git a/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp b/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
index 4b095239..4353d7b3 100644
--- a/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
+++ b/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_inbreeding_calculator.h"
 #include 
 #include 
@@ -351,6 +352,7 @@ int VCFXInbreedingCalculator::run(int argc, char* argv[]){
 // -------------------------------------------------------------------------
 // main entry point
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_inbreeding_calculator")) return 0;
     VCFXInbreedingCalculator calc;
     return calc.run(argc, argv);
 }
diff --git a/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp b/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
index 6c4f2fc7..0a60d6bb 100644
--- a/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
+++ b/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_indel_normalizer.h"
 #include 
 #include 
@@ -253,6 +254,7 @@ void VCFXIndelNormalizer::normalizeIndels(std::istream& in, std::ostream& out) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_indel_normalizer")) return 0;
     VCFXIndelNormalizer norm;
     return norm.run(argc, argv);
 }
diff --git a/src/VCFX_indexer/VCFX_indexer.cpp b/src/VCFX_indexer/VCFX_indexer.cpp
index a99c185c..2011871e 100644
--- a/src/VCFX_indexer/VCFX_indexer.cpp
+++ b/src/VCFX_indexer/VCFX_indexer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_indexer.h"
 #include 
 #include 
@@ -189,6 +190,7 @@ void VCFXIndexer::createVCFIndex(std::istream &in, std::ostream &out) {
 
 // Optional main if you build as a single executable
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_indexer")) return 0;
     VCFXIndexer idx;
     return idx.run(argc, argv);
 }
diff --git a/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp b/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
index f15d8825..91deee46 100644
--- a/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
+++ b/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_aggregator.h"
 #include 
 #include 
@@ -214,6 +215,7 @@ void VCFXInfoAggregator::aggregateInfo(std::istream& in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_info_aggregator")) return 0;
     VCFXInfoAggregator app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_info_parser/VCFX_info_parser.cpp b/src/VCFX_info_parser/VCFX_info_parser.cpp
index 286ef5a0..51cc6461 100644
--- a/src/VCFX_info_parser/VCFX_info_parser.cpp
+++ b/src/VCFX_info_parser/VCFX_info_parser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_parser.h"
 #include 
 #include 
@@ -139,6 +140,7 @@ bool parseInfoFields(std::istream& in, std::ostream& out, const std::vector info_fields;
 
     // parse arguments
diff --git a/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp b/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
index 79d43a81..0917bcd9 100644
--- a/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
+++ b/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_summarizer.h"
 #include 
 #include 
@@ -225,6 +226,7 @@ bool summarizeInfoFields(std::istream& in, std::ostream& out, const std::vector<
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_info_summarizer")) return 0;
     std::vector info_fields;
 
     // parse arguments
diff --git a/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp b/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
index ef5bc318..9076d375 100644
--- a/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
+++ b/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_ld_calculator.h"
 #include 
 #include 
@@ -345,6 +346,7 @@ int VCFXLDCalculator::run(int argc, char* argv[]) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ld_calculator")) return 0;
     VCFXLDCalculator calc;
     return calc.run(argc, argv);
 }
diff --git a/src/VCFX_merger/VCFX_merger.cpp b/src/VCFX_merger/VCFX_merger.cpp
index b8628062..c81634c1 100644
--- a/src/VCFX_merger/VCFX_merger.cpp
+++ b/src/VCFX_merger/VCFX_merger.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_merger.h"
 #include 
 #include 
@@ -120,6 +121,7 @@ void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostre
 
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_merger")) return 0;
     VCFXMerger merger;
     return merger.run(argc, argv);
 }
diff --git a/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp b/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
index 795e8e0b..b8be9bc7 100644
--- a/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
+++ b/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_metadata_summarizer.h"
 #include 
 #include 
@@ -155,6 +156,7 @@ void VCFXMetadataSummarizer::printSummary() const {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_metadata_summarizer")) return 0;
     VCFXMetadataSummarizer summarizer;
     return summarizer.run(argc, argv);
 }
diff --git a/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp b/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
index 83191947..7812f65a 100644
--- a/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
+++ b/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_missing_data_handler.h"
 #include 
 #include 
@@ -259,6 +260,7 @@ bool handleMissingDataAll(const Arguments& args) {
  * @return int Exit status.
  */
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_missing_data_handler")) return 0;
     Arguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_missing_detector/VCFX_missing_detector.cpp b/src/VCFX_missing_detector/VCFX_missing_detector.cpp
index 3ae391a1..78033e3d 100644
--- a/src/VCFX_missing_detector/VCFX_missing_detector.cpp
+++ b/src/VCFX_missing_detector/VCFX_missing_detector.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_missing_detector.h"
 #include 
 #include 
@@ -208,6 +209,7 @@ void VCFXMissingDetector::detectMissingGenotypes(std::istream& in, std::ostream&
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_missing_detector")) return 0;
     VCFXMissingDetector missingDetector;
     return missingDetector.run(argc, argv);
 }
diff --git a/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp b/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
index 5f3a280a..8922df62 100644
--- a/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
+++ b/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_multiallelic_splitter.h"
 #include 
 #include 
@@ -288,6 +289,7 @@ bool splitMultiAllelicVariants(std::istream &in, std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_multiallelic_splitter")) return 0;
     for(int i=1; i< argc; i++){
         std::string arg= argv[i];
         if(arg=="--help"|| arg=="-h"){
diff --git a/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp b/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
index ab9bf18c..10ec7601 100644
--- a/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
+++ b/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_nonref_filter.h"
 #include 
 #include 
@@ -132,6 +133,7 @@ void VCFXNonRefFilter::filterNonRef(std::istream& in, std::ostream& out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_nonref_filter")) return 0;
     VCFXNonRefFilter app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
index 16710d8d..d8f383c3 100644
--- a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
+++ b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_outlier_detector.h"
 #include 
 #include 
@@ -304,6 +305,7 @@ void VCFXOutlierDetector::detectOutliers(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_outlier_detector")) return 0;
     VCFXOutlierDetector app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_phase_checker/VCFX_phase_checker.cpp b/src/VCFX_phase_checker/VCFX_phase_checker.cpp
index 052da607..2d613cfa 100644
--- a/src/VCFX_phase_checker/VCFX_phase_checker.cpp
+++ b/src/VCFX_phase_checker/VCFX_phase_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phase_checker.h"
 #include 
 #include 
@@ -164,6 +165,7 @@ void VCFXPhaseChecker::processVCF(std::istream &in, std::ostream &out) {
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phase_checker")) return 0;
     VCFXPhaseChecker checker;
     return checker.run(argc, argv);
 }
diff --git a/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp b/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
index 87f6a835..19bcd240 100644
--- a/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
+++ b/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phase_quality_filter.h"
 #include 
 #include 
@@ -201,6 +202,7 @@ double VCFXPhaseQualityFilter::parsePQScore(const std::string &info) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phase_quality_filter")) return 0;
     VCFXPhaseQualityFilter f;
     return f.run(argc, argv);
 }
diff --git a/src/VCFX_phred_filter/VCFX_phred_filter.cpp b/src/VCFX_phred_filter/VCFX_phred_filter.cpp
index d7d01711..0ea89008 100644
--- a/src/VCFX_phred_filter/VCFX_phred_filter.cpp
+++ b/src/VCFX_phred_filter/VCFX_phred_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phred_filter.h"
 #include 
 #include 
@@ -119,6 +120,7 @@ double VCFXPhredFilter::parseQUAL(const std::string &qualStr, bool keepMissingAs
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phred_filter")) return 0;
     VCFXPhredFilter pf;
     return pf.run(argc,argv);
 }
diff --git a/src/VCFX_population_filter/VCFX_population_filter.cpp b/src/VCFX_population_filter/VCFX_population_filter.cpp
index e1b33280..d1ebf032 100644
--- a/src/VCFX_population_filter/VCFX_population_filter.cpp
+++ b/src/VCFX_population_filter/VCFX_population_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_population_filter.h"
 #include 
 #include 
@@ -190,6 +191,7 @@ void VCFXPopulationFilter::filterPopulation(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_population_filter")) return 0;
     VCFXPopulationFilter pf;
     return pf.run(argc, argv);
 }
diff --git a/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp b/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
index 70258c7b..0437d160 100644
--- a/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
+++ b/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_position_subsetter.h"
 #include 
 #include 
@@ -146,6 +147,7 @@ bool VCFXPositionSubsetter::subsetVCFByPosition(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_position_subsetter")) return 0;
     VCFXPositionSubsetter subsetter;
     return subsetter.run(argc, argv);
 }
diff --git a/src/VCFX_probability_filter/VCFX_probability_filter.cpp b/src/VCFX_probability_filter/VCFX_probability_filter.cpp
index 770f6c8b..ee082f26 100644
--- a/src/VCFX_probability_filter/VCFX_probability_filter.cpp
+++ b/src/VCFX_probability_filter/VCFX_probability_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_probability_filter.h"
 #include 
 #include 
@@ -210,6 +211,7 @@ void VCFXProbabilityFilter::filterByProbability(std::istream& in, std::ostream&
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_probability_filter")) return 0;
     VCFXProbabilityFilter probabilityFilter;
     return probabilityFilter.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
index 68632f96..9cccdcf1 100644
--- a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
+++ b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_quality_adjuster.h"
 #include 
 #include 
@@ -175,6 +176,7 @@ void VCFXQualityAdjuster::adjustQualityScores(std::istream &in, std::ostream &ou
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_quality_adjuster")) return 0;
     VCFXQualityAdjuster app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_record_filter/VCFX_record_filter.cpp b/src/VCFX_record_filter/VCFX_record_filter.cpp
index 6b0273e9..871ffb82 100644
--- a/src/VCFX_record_filter/VCFX_record_filter.cpp
+++ b/src/VCFX_record_filter/VCFX_record_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_record_filter.h"
 #include 
 #include 
@@ -320,6 +321,7 @@ void printHelp(){
 
 // main with typical argument parse
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_record_filter")) return 0;
     if(argc==1){
         printHelp();
         return 0;
diff --git a/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp b/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
index bf121cab..1b26b073 100644
--- a/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
+++ b/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_ref_comparator.h"
 #include 
 #include 
@@ -273,6 +274,7 @@ void VCFXRefComparator::compareVCF(std::istream &vcfIn, std::ostream &vcfOut){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ref_comparator")) return 0;
     VCFXRefComparator refComp;
     return refComp.run(argc, argv);
 }
diff --git a/src/VCFX_reformatter/VCFX_reformatter.cpp b/src/VCFX_reformatter/VCFX_reformatter.cpp
index 070cca04..642b2b31 100644
--- a/src/VCFX_reformatter/VCFX_reformatter.cpp
+++ b/src/VCFX_reformatter/VCFX_reformatter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_reformatter.h"
 #include 
 #include 
@@ -461,6 +462,7 @@ std::string VCFXReformatter::applyFormatReorderToSample(const std::string &sampl
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_reformatter")) return 0;
     VCFXReformatter reformatter;
     return reformatter.run(argc, argv);
 }
diff --git a/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp b/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
index 31d35247..60d5081e 100644
--- a/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
+++ b/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_region_subsampler.h"
 #include 
 #include 
@@ -254,6 +255,7 @@ void VCFXRegionSubsampler::processVCF(std::istream &in, std::ostream &out) {
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_region_subsampler")) return 0;
     VCFXRegionSubsampler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp b/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
index 877f22f1..52f65eda 100644
--- a/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
+++ b/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sample_extractor.h"
 #include 
 #include 
@@ -215,6 +216,7 @@ void VCFXSampleExtractor::extractSamples(std::istream &in, std::ostream &out,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sample_extractor")) return 0;
     VCFXSampleExtractor app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sorter/VCFX_sorter.cpp b/src/VCFX_sorter/VCFX_sorter.cpp
index 5627f9ad..681c2a8b 100644
--- a/src/VCFX_sorter/VCFX_sorter.cpp
+++ b/src/VCFX_sorter/VCFX_sorter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sorter.h"
 #include 
 #include 
@@ -218,6 +219,7 @@ void VCFXSorter::outputVCF(std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sorter")) return 0;
     VCFXSorter app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_subsampler/VCFX_subsampler.cpp b/src/VCFX_subsampler/VCFX_subsampler.cpp
index dfab860c..f1a8ec78 100644
--- a/src/VCFX_subsampler/VCFX_subsampler.cpp
+++ b/src/VCFX_subsampler/VCFX_subsampler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_subsampler.h"
 #include 
 #include 
@@ -162,6 +163,7 @@ void VCFXSubsampler::subsampleLines(std::istream &in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_subsampler")) return 0;
     VCFXSubsampler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sv_handler/VCFX_sv_handler.cpp b/src/VCFX_sv_handler/VCFX_sv_handler.cpp
index 0ab3f1bd..c22f7c27 100644
--- a/src/VCFX_sv_handler/VCFX_sv_handler.cpp
+++ b/src/VCFX_sv_handler/VCFX_sv_handler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sv_handler.h"
 #include 
 #include 
@@ -205,6 +206,7 @@ void VCFXSvHandler::handleStructuralVariants(std::istream &in, std::ostream &out
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sv_handler")) return 0;
     VCFXSvHandler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_validator/VCFX_validator.cpp b/src/VCFX_validator/VCFX_validator.cpp
index c360636b..e70fc3e2 100644
--- a/src/VCFX_validator/VCFX_validator.cpp
+++ b/src/VCFX_validator/VCFX_validator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_validator.h"
 #include 
 #include 
@@ -305,6 +306,7 @@ bool VCFXValidator::validateVCF(std::istream &in){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_validator")) return 0;
     VCFXValidator validator;
     return validator.run(argc, argv);
 }
diff --git a/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp b/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
index 14d9d511..1e4d16d5 100644
--- a/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
+++ b/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_variant_classifier.h"
 #include 
 #include 
@@ -326,6 +327,7 @@ void VCFXVariantClassifier::classifyStream(std::istream &in, std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_variant_classifier")) return 0;
     VCFXVariantClassifier app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.cpp b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
index 26b21175..72828643 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.cpp
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
@@ -105,6 +105,7 @@ int VCFXVariantCounter::countVariants(std::istream &in){
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_variant_counter")) return 0;
     VCFXVariantCounter app;
     return app.run(argc, argv);
 }

From ca6485d04d2a40b1631490925fba733c4bb6c564 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 18:50:05 +0100
Subject: [PATCH 33/54] fix alt mismatch logic

---
 src/VCFX_alignment_checker/VCFX_alignment_checker.cpp | 11 +++++++----
 tests/expected/align_Y.txt                            |  1 +
 tests/out/align_Y.txt                                 |  1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
index f9df880d..6e407b46 100644
--- a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
+++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
@@ -208,6 +208,9 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
                 while (std::getline(ss, field, '\t')) {
                     headers.push_back(field);
                 }
+                if (!headers.empty() && !headers[0].empty() && headers[0][0] == '#') {
+                    headers[0].erase(0, 1); // drop leading '#'
+                }
                 for (size_t i = 0; i < headers.size(); ++i) {
                     if (headers[i] == "CHROM") chrIndex = static_cast(i);
                     else if (headers[i] == "POS")   posIndex = static_cast(i);
@@ -280,11 +283,11 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
                         << "\t" << allele << "\t" << "REF_MISMATCH"
                         << "\t" << ref_base << "\t" << ref << "\n";
                 }
-                // Compare ALT in VCF vs reference genome's same position
-                // (Often for a standard SNP, the reference base is the only thing in the FASTA.)
-                // This is somewhat conceptual: we're checking if the ALT base is the same as reference at that position.
+                // Compare ALT to the reference base at the same position.
+                // Here we flag a mismatch when the ALT allele is actually the
+                // same as the reference (i.e. not a true variant).
                 std::string alt_base = ref_base; // The reference at that position
-                if (allele != alt_base) {
+                if (allele == alt_base) {
                     out << chrom << "\t" << posVal << "\t" << id << "\t" << ref
                         << "\t" << allele << "\t" << "ALT_MISMATCH"
                         << "\t" << alt_base << "\t" << allele << "\n";
diff --git a/tests/expected/align_Y.txt b/tests/expected/align_Y.txt
index fe7b6ec7..2a84859b 100644
--- a/tests/expected/align_Y.txt
+++ b/tests/expected/align_Y.txt
@@ -1 +1,2 @@
 CHROM	POS	ID	REF	ALT	Discrepancy_Type	Reference_Value	VCF_Value
+chr2	5	.	T	T	ALT_MISMATCH	T	T
diff --git a/tests/out/align_Y.txt b/tests/out/align_Y.txt
index fe7b6ec7..2a84859b 100644
--- a/tests/out/align_Y.txt
+++ b/tests/out/align_Y.txt
@@ -1 +1,2 @@
 CHROM	POS	ID	REF	ALT	Discrepancy_Type	Reference_Value	VCF_Value
+chr2	5	.	T	T	ALT_MISMATCH	T	T

From 38c99fd0fe441f09aa9bcc9a384335cebf557088 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:00:46 +0100
Subject: [PATCH 34/54] Ignore genotype query test artifacts

---
 .gitignore                                     | 13 +++++++++++++
 tests/test_genotype_query.sh                   |  6 +++---
 .../data/genotype_query/missing_malformed.vcf  |  8 --------
 .../tests/data/genotype_query/multi_sample.vcf |  9 ---------
 .../data/genotype_query/single_sample.vcf      |  6 ------
 .../genotype_query/missing_malformed_01.vcf    |  5 -----
 .../genotype_query/multi_11_flexible.vcf       |  7 -------
 .../genotype_query/multi_11_strict.vcf         |  6 ------
 .../genotype_query/multi_12_flexible.vcf       |  6 ------
 .../tests/expected/genotype_query/no_match.vcf |  5 -----
 .../genotype_query/single_sample_flex_01.vcf   |  5 -----
 .../genotype_query/single_sample_strict_01.vcf |  4 ----
 .../genotype_query/help_message.txt            | 18 ------------------
 .../genotype_query/long_equals_output.vcf      |  4 ----
 .../genotype_query/missing_args.txt            |  2 --
 .../test_1_single_flex_output.vcf              |  4 ----
 .../test_2_single_strict_output.vcf            |  3 ---
 .../test_3_multi_11_flex_output.vcf            |  6 ------
 .../test_4_multi_11_strict_output.vcf          |  5 -----
 .../test_5_multi_12_flex_output.vcf            |  5 -----
 .../test_6_missing_malformed_output.vcf        |  4 ----
 .../genotype_query/test_7_no_match_output.vcf  |  4 ----
 22 files changed, 16 insertions(+), 119 deletions(-)
 delete mode 100644 tests/tests/data/genotype_query/missing_malformed.vcf
 delete mode 100644 tests/tests/data/genotype_query/multi_sample.vcf
 delete mode 100644 tests/tests/data/genotype_query/single_sample.vcf
 delete mode 100644 tests/tests/expected/genotype_query/missing_malformed_01.vcf
 delete mode 100644 tests/tests/expected/genotype_query/multi_11_flexible.vcf
 delete mode 100644 tests/tests/expected/genotype_query/multi_11_strict.vcf
 delete mode 100644 tests/tests/expected/genotype_query/multi_12_flexible.vcf
 delete mode 100644 tests/tests/expected/genotype_query/no_match.vcf
 delete mode 100644 tests/tests/expected/genotype_query/single_sample_flex_01.vcf
 delete mode 100644 tests/tests/expected/genotype_query/single_sample_strict_01.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/help_message.txt
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/missing_args.txt
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf

diff --git a/.gitignore b/.gitignore
index 9053a952..135e939d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,16 @@ Thumbs.db
 tools.md
 prompt.md
 names.md
+
+# Temporary outputs from genotype_query tests
+tests/tmp/genotype_query/
+tests/data/genotype_query/missing_malformed.vcf
+tests/data/genotype_query/multi_sample.vcf
+tests/data/genotype_query/single_sample.vcf
+tests/expected/genotype_query/missing_malformed_01.vcf
+tests/expected/genotype_query/multi_11_flexible.vcf
+tests/expected/genotype_query/multi_11_strict.vcf
+tests/expected/genotype_query/multi_12_flexible.vcf
+tests/expected/genotype_query/no_match.vcf
+tests/expected/genotype_query/single_sample_flex_01.vcf
+tests/expected/genotype_query/single_sample_strict_01.vcf
diff --git a/tests/test_genotype_query.sh b/tests/test_genotype_query.sh
index 363e8beb..3d74d5f6 100755
--- a/tests/test_genotype_query.sh
+++ b/tests/test_genotype_query.sh
@@ -23,9 +23,9 @@ TOOL="../build/src/VCFX_genotype_query/VCFX_genotype_query"
 
 # Directories for test data, expected outputs, and actual output:
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-TMP_DATA_DIR="${SCRIPT_DIR}/tests/data/genotype_query"
-TMP_EXP_DIR="${SCRIPT_DIR}/tests/expected/genotype_query"
-TMP_OUT_DIR="${SCRIPT_DIR}/tests/tmp_genoquout/genotype_query"
+TMP_DATA_DIR="${SCRIPT_DIR}/data/genotype_query"
+TMP_EXP_DIR="${SCRIPT_DIR}/expected/genotype_query"
+TMP_OUT_DIR="${SCRIPT_DIR}/tmp/genotype_query"
 
 mkdir -p "$TMP_DATA_DIR" "$TMP_EXP_DIR" "$TMP_OUT_DIR"
 
diff --git a/tests/tests/data/genotype_query/missing_malformed.vcf b/tests/tests/data/genotype_query/missing_malformed.vcf
deleted file mode 100644
index fc65872a..00000000
--- a/tests/tests/data/genotype_query/missing_malformed.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
-1	250	rsO	A	G	.	PASS	.		1/1	1/1
-chr1	300  # <10 fields on purpose
-1	400	rsQ	G	A	99	PASS	.	DP	10	15
-
diff --git a/tests/tests/data/genotype_query/multi_sample.vcf b/tests/tests/data/genotype_query/multi_sample.vcf
deleted file mode 100644
index 18425526..00000000
--- a/tests/tests/data/genotype_query/multi_sample.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-2	400	.	G	A	.	PASS	.	GT	.	.	0/1
-
diff --git a/tests/tests/data/genotype_query/single_sample.vcf b/tests/tests/data/genotype_query/single_sample.vcf
deleted file mode 100644
index 233fa7d6..00000000
--- a/tests/tests/data/genotype_query/single_sample.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-1	300	rsC	A	G	50	PASS	.	GT	1|1
-
diff --git a/tests/tests/expected/genotype_query/missing_malformed_01.vcf b/tests/tests/expected/genotype_query/missing_malformed_01.vcf
deleted file mode 100644
index 3c6ba4cc..00000000
--- a/tests/tests/expected/genotype_query/missing_malformed_01.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
-
diff --git a/tests/tests/expected/genotype_query/multi_11_flexible.vcf b/tests/tests/expected/genotype_query/multi_11_flexible.vcf
deleted file mode 100644
index fe2af3a8..00000000
--- a/tests/tests/expected/genotype_query/multi_11_flexible.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-
diff --git a/tests/tests/expected/genotype_query/multi_11_strict.vcf b/tests/tests/expected/genotype_query/multi_11_strict.vcf
deleted file mode 100644
index add030a4..00000000
--- a/tests/tests/expected/genotype_query/multi_11_strict.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-
diff --git a/tests/tests/expected/genotype_query/multi_12_flexible.vcf b/tests/tests/expected/genotype_query/multi_12_flexible.vcf
deleted file mode 100644
index 01424f8e..00000000
--- a/tests/tests/expected/genotype_query/multi_12_flexible.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
-
diff --git a/tests/tests/expected/genotype_query/no_match.vcf b/tests/tests/expected/genotype_query/no_match.vcf
deleted file mode 100644
index 9d46ba3c..00000000
--- a/tests/tests/expected/genotype_query/no_match.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-
diff --git a/tests/tests/expected/genotype_query/single_sample_flex_01.vcf b/tests/tests/expected/genotype_query/single_sample_flex_01.vcf
deleted file mode 100644
index 52ce2d29..00000000
--- a/tests/tests/expected/genotype_query/single_sample_flex_01.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-
diff --git a/tests/tests/expected/genotype_query/single_sample_strict_01.vcf b/tests/tests/expected/genotype_query/single_sample_strict_01.vcf
deleted file mode 100644
index 625472f0..00000000
--- a/tests/tests/expected/genotype_query/single_sample_strict_01.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-
diff --git a/tests/tests/tmp_genoquout/genotype_query/help_message.txt b/tests/tests/tmp_genoquout/genotype_query/help_message.txt
deleted file mode 100644
index 1d446ddf..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/help_message.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_genotype_query
-Usage: VCFX_genotype_query [OPTIONS]
-
-Options:
-  --genotype-query, -g "GENOTYPE"  Specify the genotype to query (e.g., "0/1", "1/1").
-  --strict                        Use strict string compare (no phasing unify or allele sorting).
-  --help, -h                      Display this help message and exit.
-
-Description:
-  Reads a VCF from stdin, outputs only the lines (plus all header lines) where
-  at least one sample has the specified genotype in the 'GT' subfield.
-
-Examples:
-  # Flexible matching 0/1 or 0|1 => both become 0/1
-  ./VCFX_genotype_query --genotype-query "0/1" < input.vcf > out.vcf
-
-  # Strict matching => "0|1" won't match "0/1"
-  ./VCFX_genotype_query --genotype-query "0|1" --strict < input.vcf > out.vcf
diff --git a/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf b/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
deleted file mode 100644
index 27e716b6..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/missing_args.txt b/tests/tests/tmp_genoquout/genotype_query/missing_args.txt
deleted file mode 100644
index c8c85b1b..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/missing_args.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Usage: ../build/src/VCFX_genotype_query/VCFX_genotype_query --genotype-query "0/1" [--strict] < input.vcf > output.vcf
-Use --help for usage.
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
deleted file mode 100644
index 27e716b6..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
deleted file mode 100644
index 314ce18f..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
+++ /dev/null
@@ -1,3 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
deleted file mode 100644
index 0b19c2aa..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
deleted file mode 100644
index f07c0d2b..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
deleted file mode 100644
index 4212ca27..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
deleted file mode 100644
index a900c879..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf
deleted file mode 100644
index 3c654126..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3

From 13d42cbb9adfb63c9235deba4358054a98bba613 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:28:10 +0100
Subject: [PATCH 35/54] Ignore temporary test outputs

---
 .gitignore                                    |  5 ++++
 tests/tmp/afr_output.vcf                      |  9 -------
 tests/tmp/afr_samples_err.log                 |  0
 tests/tmp/afr_samples_output.tsv              |  4 ---
 tests/tmp/basic_threshold_20.vcf              |  6 -----
 tests/tmp/basic_threshold_30.vcf              |  5 ----
 tests/tmp/basic_threshold_30_keep_missing.vcf |  6 -----
 tests/tmp/complex_err.log                     |  0
 tests/tmp/complex_output.tsv                  |  3 ---
 tests/tmp/dp_ge_20_all_err.log                |  0
 tests/tmp/dp_ge_20_all_output.vcf             |  7 ------
 tests/tmp/dp_ge_20_any_err.log                |  0
 tests/tmp/dp_ge_20_any_output.vcf             | 12 ---------
 tests/tmp/dp_le_20_all_err.log                |  0
 tests/tmp/dp_le_20_all_output.vcf             |  9 -------
 tests/tmp/dp_lt_19_5_any_err.log              |  0
 tests/tmp/dp_lt_19_5_any_output.vcf           | 12 ---------
 tests/tmp/eas_output.vcf                      |  9 -------
 tests/tmp/eas_samples_err.log                 |  0
 tests/tmp/eas_samples_output.tsv              |  4 ---
 tests/tmp/empty_map_error.txt                 |  1 -
 tests/tmp/empty_map_output.vcf                |  9 -------
 tests/tmp/eq_operator_err.log                 |  0
 tests/tmp/eq_operator_output.vcf              |  5 ----
 tests/tmp/equals_format_cleaned.vcf           |  1 -
 tests/tmp/equals_format_expected_cleaned.vcf  |  1 -
 tests/tmp/equals_format_output.vcf            | 10 --------
 tests/tmp/error.txt                           |  1 -
 tests/tmp/eur_output.vcf                      |  9 -------
 tests/tmp/eur_samples_err.log                 |  0
 tests/tmp/eur_samples_output.tsv              |  4 ---
 tests/tmp/ge_operator_err.log                 |  0
 tests/tmp/ge_operator_output.vcf              |  5 ----
 tests/tmp/gq_eq_30_all_err.log                |  0
 tests/tmp/gq_eq_30_all_output.vcf             |  7 ------
 tests/tmp/gq_gt_20_all_err.log                |  0
 tests/tmp/gq_gt_20_all_output.vcf             |  9 -------
 tests/tmp/gq_gt_20_any_err.log                |  0
 tests/tmp/gq_gt_20_any_output.vcf             | 12 ---------
 tests/tmp/gq_gt_24_5_all_err.log              |  0
 tests/tmp/gq_gt_24_5_all_output.vcf           |  8 ------
 tests/tmp/gq_lt_30_all_err.log                |  0
 tests/tmp/gq_lt_30_all_output.vcf             |  7 ------
 tests/tmp/gq_lt_30_any_err.log                |  0
 tests/tmp/gq_lt_30_any_output.vcf             | 12 ---------
 tests/tmp/gq_ne_30_all_err.log                |  0
 tests/tmp/gq_ne_30_all_output.vcf             | 10 --------
 tests/tmp/gt_operator_err.log                 |  0
 tests/tmp/gt_operator_output.vcf              |  5 ----
 tests/tmp/help_message.txt                    | 20 ---------------
 tests/tmp/help_output.txt                     | 25 -------------------
 tests/tmp/identity_transform_err.log          |  0
 tests/tmp/identity_transform_output.vcf       | 10 --------
 tests/tmp/invalid_condition_err.log           |  1 -
 tests/tmp/invalid_condition_output.vcf        |  0
 tests/tmp/invalid_err.log                     |  4 ---
 tests/tmp/invalid_error.log                   |  1 -
 tests/tmp/invalid_mode_err.log                |  2 --
 tests/tmp/invalid_mode_out.vcf                | 18 -------------
 tests/tmp/invalid_mode_output.vcf             | 19 --------------
 tests/tmp/invalid_out.vcf                     |  0
 tests/tmp/invalid_output.tsv                  |  2 --
 tests/tmp/invalid_output.vcf                  |  0
 tests/tmp/invalid_records_threshold_30.vcf    |  5 ----
 tests/tmp/invalid_vcf_err.log                 |  1 -
 tests/tmp/invalid_vcf_output.tsv              |  0
 tests/tmp/le_operator_err.log                 |  0
 tests/tmp/le_operator_output.vcf              |  6 -----
 tests/tmp/log_transform_edge_err.log          |  1 -
 tests/tmp/log_transform_edge_output.vcf       |  6 -----
 tests/tmp/log_transform_err.log               |  0
 tests/tmp/log_transform_no_clamp_err.log      |  0
 tests/tmp/log_transform_no_clamp_output.vcf   | 10 --------
 tests/tmp/log_transform_output.vcf            | 10 --------
 tests/tmp/lt_operator_err.log                 |  0
 tests/tmp/lt_operator_output.vcf              |  5 ----
 tests/tmp/malformed_err.log                   |  2 --
 tests/tmp/malformed_freq_err.log              |  3 ---
 tests/tmp/malformed_freq_output.tsv           |  0
 tests/tmp/malformed_freqs.txt                 |  1 -
 tests/tmp/malformed_input_err.log             |  2 --
 tests/tmp/malformed_input_output.vcf          |  2 --
 tests/tmp/malformed_out.vcf                   |  2 --
 tests/tmp/malformed_output.vcf                |  7 ------
 .../malformed_query_01_flexible_output.vcf    | 11 --------
 tests/tmp/malformed_threshold_30.vcf          |  4 ---
 .../malformed_threshold_30_keep_missing.vcf   |  5 ----
 tests/tmp/malformed_threshold_5.vcf           |  5 ----
 tests/tmp/malformed_vcf_err.log               |  3 ---
 tests/tmp/malformed_vcf_output.vcf            |  3 ---
 tests/tmp/missing_arg_error.log               |  0
 tests/tmp/missing_arg_output.vcf              | 25 -------------------
 tests/tmp/missing_args_output.txt             |  2 --
 tests/tmp/missing_err.log                     |  0
 tests/tmp/missing_field_err.log               |  1 -
 tests/tmp/missing_field_gq_gt_20_err.log      |  0
 tests/tmp/missing_field_gq_gt_20_output.vcf   |  5 ----
 tests/tmp/missing_field_output.vcf            |  4 ---
 tests/tmp/missing_filter_err.log              |  1 -
 tests/tmp/missing_filter_output.vcf           | 18 -------------
 tests/tmp/missing_freq_err.log                |  2 --
 tests/tmp/missing_freq_output.tsv             |  0
 tests/tmp/missing_output.tsv                  |  3 ---
 tests/tmp/missing_samples_err.log             |  0
 tests/tmp/missing_samples_output.tsv          |  4 ---
 tests/tmp/missing_value_gq_gt_20_all_err.log  |  0
 .../tmp/missing_value_gq_gt_20_all_output.vcf |  7 ------
 tests/tmp/missing_value_gq_gt_20_any_err.log  |  0
 .../tmp/missing_value_gq_gt_20_any_output.vcf |  8 ------
 tests/tmp/mixed_population_check_err.log      |  0
 tests/tmp/mixed_population_check_output.tsv   |  5 ----
 tests/tmp/mixed_population_check_results.tmp  |  4 ---
 tests/tmp/mixed_samples_err.log               |  0
 tests/tmp/mixed_samples_output.tsv            |  5 ----
 tests/tmp/multiallelic_err.log                |  0
 tests/tmp/multiallelic_output.tsv             |  3 ---
 tests/tmp/multiallelic_samples_err.log        |  0
 tests/tmp/multiallelic_samples_output.tsv     |  4 ---
 tests/tmp/ne_operator_err.log                 |  0
 tests/tmp/ne_operator_output.vcf              | 10 --------
 tests/tmp/no_args_output.txt                  | 17 -------------
 tests/tmp/no_gt_err.log                       |  0
 tests/tmp/no_gt_output.tsv                    |  1 -
 tests/tmp/output.txt                          | 18 -------------
 tests/tmp/phased_err.log                      |  0
 tests/tmp/phased_output.tsv                   |  3 ---
 tests/tmp/phased_samples_err.log              |  0
 tests/tmp/phased_samples_output.tsv           |  4 ---
 tests/tmp/pl_gt_40_any_err.log                |  0
 tests/tmp/pl_gt_40_any_output.vcf             |  9 -------
 tests/tmp/query_01_flexible_cleaned.vcf       |  1 -
 .../query_01_flexible_expected_cleaned.vcf    |  1 -
 tests/tmp/query_01_flexible_output.vcf        | 10 --------
 tests/tmp/query_01_pipe_flexible_output.vcf   | 19 --------------
 tests/tmp/query_01_pipe_strict_output.vcf     | 10 --------
 tests/tmp/query_01_strict_output.vcf          | 15 -----------
 tests/tmp/query_11_flexible_output.vcf        | 13 ----------
 tests/tmp/query_multi_02_flexible_output.vcf  |  8 ------
 tests/tmp/simple_err.log                      |  0
 tests/tmp/simple_freqs_err.log                |  0
 tests/tmp/simple_freqs_output.tsv             |  5 ----
 tests/tmp/simple_output.tsv                   |  5 ----
 tests/tmp/sqrt_transform_edge_err.log         |  1 -
 tests/tmp/sqrt_transform_edge_output.vcf      |  6 -----
 tests/tmp/sqrt_transform_err.log              |  0
 tests/tmp/sqrt_transform_output.vcf           | 10 --------
 tests/tmp/square_transform_edge_err.log       |  1 -
 tests/tmp/square_transform_edge_output.vcf    |  6 -----
 tests/tmp/square_transform_err.log            |  0
 tests/tmp/square_transform_no_clamp_err.log   |  0
 .../tmp/square_transform_no_clamp_output.vcf  | 10 --------
 tests/tmp/square_transform_output.vcf         | 10 --------
 tests/tmp/unknown_output.vcf                  |  9 -------
 153 files changed, 5 insertions(+), 701 deletions(-)
 delete mode 100644 tests/tmp/afr_output.vcf
 delete mode 100644 tests/tmp/afr_samples_err.log
 delete mode 100644 tests/tmp/afr_samples_output.tsv
 delete mode 100644 tests/tmp/basic_threshold_20.vcf
 delete mode 100644 tests/tmp/basic_threshold_30.vcf
 delete mode 100644 tests/tmp/basic_threshold_30_keep_missing.vcf
 delete mode 100644 tests/tmp/complex_err.log
 delete mode 100644 tests/tmp/complex_output.tsv
 delete mode 100644 tests/tmp/dp_ge_20_all_err.log
 delete mode 100644 tests/tmp/dp_ge_20_all_output.vcf
 delete mode 100644 tests/tmp/dp_ge_20_any_err.log
 delete mode 100644 tests/tmp/dp_ge_20_any_output.vcf
 delete mode 100644 tests/tmp/dp_le_20_all_err.log
 delete mode 100644 tests/tmp/dp_le_20_all_output.vcf
 delete mode 100644 tests/tmp/dp_lt_19_5_any_err.log
 delete mode 100644 tests/tmp/dp_lt_19_5_any_output.vcf
 delete mode 100644 tests/tmp/eas_output.vcf
 delete mode 100644 tests/tmp/eas_samples_err.log
 delete mode 100644 tests/tmp/eas_samples_output.tsv
 delete mode 100644 tests/tmp/empty_map_error.txt
 delete mode 100644 tests/tmp/empty_map_output.vcf
 delete mode 100644 tests/tmp/eq_operator_err.log
 delete mode 100644 tests/tmp/eq_operator_output.vcf
 delete mode 100644 tests/tmp/equals_format_cleaned.vcf
 delete mode 100644 tests/tmp/equals_format_expected_cleaned.vcf
 delete mode 100644 tests/tmp/equals_format_output.vcf
 delete mode 100644 tests/tmp/error.txt
 delete mode 100644 tests/tmp/eur_output.vcf
 delete mode 100644 tests/tmp/eur_samples_err.log
 delete mode 100644 tests/tmp/eur_samples_output.tsv
 delete mode 100644 tests/tmp/ge_operator_err.log
 delete mode 100644 tests/tmp/ge_operator_output.vcf
 delete mode 100644 tests/tmp/gq_eq_30_all_err.log
 delete mode 100644 tests/tmp/gq_eq_30_all_output.vcf
 delete mode 100644 tests/tmp/gq_gt_20_all_err.log
 delete mode 100644 tests/tmp/gq_gt_20_all_output.vcf
 delete mode 100644 tests/tmp/gq_gt_20_any_err.log
 delete mode 100644 tests/tmp/gq_gt_20_any_output.vcf
 delete mode 100644 tests/tmp/gq_gt_24_5_all_err.log
 delete mode 100644 tests/tmp/gq_gt_24_5_all_output.vcf
 delete mode 100644 tests/tmp/gq_lt_30_all_err.log
 delete mode 100644 tests/tmp/gq_lt_30_all_output.vcf
 delete mode 100644 tests/tmp/gq_lt_30_any_err.log
 delete mode 100644 tests/tmp/gq_lt_30_any_output.vcf
 delete mode 100644 tests/tmp/gq_ne_30_all_err.log
 delete mode 100644 tests/tmp/gq_ne_30_all_output.vcf
 delete mode 100644 tests/tmp/gt_operator_err.log
 delete mode 100644 tests/tmp/gt_operator_output.vcf
 delete mode 100644 tests/tmp/help_message.txt
 delete mode 100644 tests/tmp/help_output.txt
 delete mode 100644 tests/tmp/identity_transform_err.log
 delete mode 100644 tests/tmp/identity_transform_output.vcf
 delete mode 100644 tests/tmp/invalid_condition_err.log
 delete mode 100644 tests/tmp/invalid_condition_output.vcf
 delete mode 100644 tests/tmp/invalid_err.log
 delete mode 100644 tests/tmp/invalid_error.log
 delete mode 100644 tests/tmp/invalid_mode_err.log
 delete mode 100644 tests/tmp/invalid_mode_out.vcf
 delete mode 100644 tests/tmp/invalid_mode_output.vcf
 delete mode 100644 tests/tmp/invalid_out.vcf
 delete mode 100644 tests/tmp/invalid_output.tsv
 delete mode 100644 tests/tmp/invalid_output.vcf
 delete mode 100644 tests/tmp/invalid_records_threshold_30.vcf
 delete mode 100644 tests/tmp/invalid_vcf_err.log
 delete mode 100644 tests/tmp/invalid_vcf_output.tsv
 delete mode 100644 tests/tmp/le_operator_err.log
 delete mode 100644 tests/tmp/le_operator_output.vcf
 delete mode 100644 tests/tmp/log_transform_edge_err.log
 delete mode 100644 tests/tmp/log_transform_edge_output.vcf
 delete mode 100644 tests/tmp/log_transform_err.log
 delete mode 100644 tests/tmp/log_transform_no_clamp_err.log
 delete mode 100644 tests/tmp/log_transform_no_clamp_output.vcf
 delete mode 100644 tests/tmp/log_transform_output.vcf
 delete mode 100644 tests/tmp/lt_operator_err.log
 delete mode 100644 tests/tmp/lt_operator_output.vcf
 delete mode 100644 tests/tmp/malformed_err.log
 delete mode 100644 tests/tmp/malformed_freq_err.log
 delete mode 100644 tests/tmp/malformed_freq_output.tsv
 delete mode 100644 tests/tmp/malformed_freqs.txt
 delete mode 100644 tests/tmp/malformed_input_err.log
 delete mode 100644 tests/tmp/malformed_input_output.vcf
 delete mode 100644 tests/tmp/malformed_out.vcf
 delete mode 100644 tests/tmp/malformed_output.vcf
 delete mode 100644 tests/tmp/malformed_query_01_flexible_output.vcf
 delete mode 100644 tests/tmp/malformed_threshold_30.vcf
 delete mode 100644 tests/tmp/malformed_threshold_30_keep_missing.vcf
 delete mode 100644 tests/tmp/malformed_threshold_5.vcf
 delete mode 100644 tests/tmp/malformed_vcf_err.log
 delete mode 100644 tests/tmp/malformed_vcf_output.vcf
 delete mode 100644 tests/tmp/missing_arg_error.log
 delete mode 100644 tests/tmp/missing_arg_output.vcf
 delete mode 100644 tests/tmp/missing_args_output.txt
 delete mode 100644 tests/tmp/missing_err.log
 delete mode 100644 tests/tmp/missing_field_err.log
 delete mode 100644 tests/tmp/missing_field_gq_gt_20_err.log
 delete mode 100644 tests/tmp/missing_field_gq_gt_20_output.vcf
 delete mode 100644 tests/tmp/missing_field_output.vcf
 delete mode 100644 tests/tmp/missing_filter_err.log
 delete mode 100644 tests/tmp/missing_filter_output.vcf
 delete mode 100644 tests/tmp/missing_freq_err.log
 delete mode 100644 tests/tmp/missing_freq_output.tsv
 delete mode 100644 tests/tmp/missing_output.tsv
 delete mode 100644 tests/tmp/missing_samples_err.log
 delete mode 100644 tests/tmp/missing_samples_output.tsv
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_all_err.log
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_all_output.vcf
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_any_err.log
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_any_output.vcf
 delete mode 100644 tests/tmp/mixed_population_check_err.log
 delete mode 100644 tests/tmp/mixed_population_check_output.tsv
 delete mode 100644 tests/tmp/mixed_population_check_results.tmp
 delete mode 100644 tests/tmp/mixed_samples_err.log
 delete mode 100644 tests/tmp/mixed_samples_output.tsv
 delete mode 100644 tests/tmp/multiallelic_err.log
 delete mode 100644 tests/tmp/multiallelic_output.tsv
 delete mode 100644 tests/tmp/multiallelic_samples_err.log
 delete mode 100644 tests/tmp/multiallelic_samples_output.tsv
 delete mode 100644 tests/tmp/ne_operator_err.log
 delete mode 100644 tests/tmp/ne_operator_output.vcf
 delete mode 100644 tests/tmp/no_args_output.txt
 delete mode 100644 tests/tmp/no_gt_err.log
 delete mode 100644 tests/tmp/no_gt_output.tsv
 delete mode 100644 tests/tmp/output.txt
 delete mode 100644 tests/tmp/phased_err.log
 delete mode 100644 tests/tmp/phased_output.tsv
 delete mode 100644 tests/tmp/phased_samples_err.log
 delete mode 100644 tests/tmp/phased_samples_output.tsv
 delete mode 100644 tests/tmp/pl_gt_40_any_err.log
 delete mode 100644 tests/tmp/pl_gt_40_any_output.vcf
 delete mode 100644 tests/tmp/query_01_flexible_cleaned.vcf
 delete mode 100644 tests/tmp/query_01_flexible_expected_cleaned.vcf
 delete mode 100644 tests/tmp/query_01_flexible_output.vcf
 delete mode 100644 tests/tmp/query_01_pipe_flexible_output.vcf
 delete mode 100644 tests/tmp/query_01_pipe_strict_output.vcf
 delete mode 100644 tests/tmp/query_01_strict_output.vcf
 delete mode 100644 tests/tmp/query_11_flexible_output.vcf
 delete mode 100644 tests/tmp/query_multi_02_flexible_output.vcf
 delete mode 100644 tests/tmp/simple_err.log
 delete mode 100644 tests/tmp/simple_freqs_err.log
 delete mode 100644 tests/tmp/simple_freqs_output.tsv
 delete mode 100644 tests/tmp/simple_output.tsv
 delete mode 100644 tests/tmp/sqrt_transform_edge_err.log
 delete mode 100644 tests/tmp/sqrt_transform_edge_output.vcf
 delete mode 100644 tests/tmp/sqrt_transform_err.log
 delete mode 100644 tests/tmp/sqrt_transform_output.vcf
 delete mode 100644 tests/tmp/square_transform_edge_err.log
 delete mode 100644 tests/tmp/square_transform_edge_output.vcf
 delete mode 100644 tests/tmp/square_transform_err.log
 delete mode 100644 tests/tmp/square_transform_no_clamp_err.log
 delete mode 100644 tests/tmp/square_transform_no_clamp_output.vcf
 delete mode 100644 tests/tmp/square_transform_output.vcf
 delete mode 100644 tests/tmp/unknown_output.vcf

diff --git a/.gitignore b/.gitignore
index 135e939d..40b05829 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,3 +55,8 @@ tests/expected/genotype_query/multi_12_flexible.vcf
 tests/expected/genotype_query/no_match.vcf
 tests/expected/genotype_query/single_sample_flex_01.vcf
 tests/expected/genotype_query/single_sample_strict_01.vcf
+
+# General temporary test output directories
+tests/tmp/
+tests/out/
+tmp/
diff --git a/tests/tmp/afr_output.vcf b/tests/tmp/afr_output.vcf
deleted file mode 100644
index 6d48cfc7..00000000
--- a/tests/tmp/afr_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE3_AFR	SAMPLE4_AFR
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	1|1:20	0|1:22
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|0:19	0|1:21
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	0|0:18	0|1:24
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	1|1:25	0|1:20
diff --git a/tests/tmp/afr_samples_err.log b/tests/tmp/afr_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/afr_samples_output.tsv b/tests/tmp/afr_samples_output.tsv
deleted file mode 100644
index 09be6da5..00000000
--- a/tests/tmp/afr_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-AFR_SAMPLE1	AFR
-AFR_SAMPLE2	AFR
-AFR_SAMPLE3	AFR
diff --git a/tests/tmp/basic_threshold_20.vcf b/tests/tmp/basic_threshold_20.vcf
deleted file mode 100644
index 82661320..00000000
--- a/tests/tmp/basic_threshold_20.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	200	.	C	T	20	PASS	DP=25
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/basic_threshold_30.vcf b/tests/tmp/basic_threshold_30.vcf
deleted file mode 100644
index 3ace7755..00000000
--- a/tests/tmp/basic_threshold_30.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/basic_threshold_30_keep_missing.vcf b/tests/tmp/basic_threshold_30_keep_missing.vcf
deleted file mode 100644
index ed3b35f8..00000000
--- a/tests/tmp/basic_threshold_30_keep_missing.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	300	.	G	A	30	PASS	DP=40
-1	400	.	T	C	.	PASS	DP=35
diff --git a/tests/tmp/complex_err.log b/tests/tmp/complex_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/complex_output.tsv b/tests/tmp/complex_output.tsv
deleted file mode 100644
index 189c397a..00000000
--- a/tests/tmp/complex_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/dp_ge_20_all_err.log b/tests/tmp/dp_ge_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_ge_20_all_output.vcf b/tests/tmp/dp_ge_20_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/dp_ge_20_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/dp_ge_20_any_err.log b/tests/tmp/dp_ge_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_ge_20_any_output.vcf b/tests/tmp/dp_ge_20_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/dp_ge_20_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/dp_le_20_all_err.log b/tests/tmp/dp_le_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_le_20_all_output.vcf b/tests/tmp/dp_le_20_all_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/dp_le_20_all_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/dp_lt_19_5_any_err.log b/tests/tmp/dp_lt_19_5_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_lt_19_5_any_output.vcf b/tests/tmp/dp_lt_19_5_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/dp_lt_19_5_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/eas_output.vcf b/tests/tmp/eas_output.vcf
deleted file mode 100644
index 58a5bca7..00000000
--- a/tests/tmp/eas_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE5_EAS
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	0|0:18
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	1|1:26
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	1|1:27
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	0|0:19
diff --git a/tests/tmp/eas_samples_err.log b/tests/tmp/eas_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eas_samples_output.tsv b/tests/tmp/eas_samples_output.tsv
deleted file mode 100644
index 3363108c..00000000
--- a/tests/tmp/eas_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EAS_SAMPLE1	EAS
-EAS_SAMPLE2	EAS
-EAS_SAMPLE3	EAS
diff --git a/tests/tmp/empty_map_error.txt b/tests/tmp/empty_map_error.txt
deleted file mode 100644
index f3c371ac..00000000
--- a/tests/tmp/empty_map_error.txt
+++ /dev/null
@@ -1 +0,0 @@
-Warning: No samples found for population tag: EUR
diff --git a/tests/tmp/empty_map_output.vcf b/tests/tmp/empty_map_output.vcf
deleted file mode 100644
index ae84bda2..00000000
--- a/tests/tmp/empty_map_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP
diff --git a/tests/tmp/eq_operator_err.log b/tests/tmp/eq_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eq_operator_output.vcf b/tests/tmp/eq_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/eq_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/equals_format_cleaned.vcf b/tests/tmp/equals_format_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/equals_format_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/equals_format_expected_cleaned.vcf b/tests/tmp/equals_format_expected_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/equals_format_expected_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/equals_format_output.vcf b/tests/tmp/equals_format_output.vcf
deleted file mode 100644
index 696571e0..00000000
--- a/tests/tmp/equals_format_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/error.txt b/tests/tmp/error.txt
deleted file mode 100644
index 44150a0b..00000000
--- a/tests/tmp/error.txt
+++ /dev/null
@@ -1 +0,0 @@
-Error: --mode must be 'any' or 'all'.
diff --git a/tests/tmp/eur_output.vcf b/tests/tmp/eur_output.vcf
deleted file mode 100644
index 00af6e9b..00000000
--- a/tests/tmp/eur_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1_EUR	SAMPLE2_EUR
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	0|0:30	0|1:25
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|1:28	1|1:32
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	1|1:35	0|1:29
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	0|1:31	0|0:27
diff --git a/tests/tmp/eur_samples_err.log b/tests/tmp/eur_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eur_samples_output.tsv b/tests/tmp/eur_samples_output.tsv
deleted file mode 100644
index 15c9445e..00000000
--- a/tests/tmp/eur_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_SAMPLE1	EUR
-EUR_SAMPLE2	EUR
-EUR_SAMPLE3	EUR
diff --git a/tests/tmp/ge_operator_err.log b/tests/tmp/ge_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/ge_operator_output.vcf b/tests/tmp/ge_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/ge_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_eq_30_all_err.log b/tests/tmp/gq_eq_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_eq_30_all_output.vcf b/tests/tmp/gq_eq_30_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/gq_eq_30_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_gt_20_all_err.log b/tests/tmp/gq_gt_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_20_all_output.vcf b/tests/tmp/gq_gt_20_all_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/gq_gt_20_all_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_gt_20_any_err.log b/tests/tmp/gq_gt_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_20_any_output.vcf b/tests/tmp/gq_gt_20_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/gq_gt_20_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_gt_24_5_all_err.log b/tests/tmp/gq_gt_24_5_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_24_5_all_output.vcf b/tests/tmp/gq_gt_24_5_all_output.vcf
deleted file mode 100644
index d6ae73c3..00000000
--- a/tests/tmp/gq_gt_24_5_all_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
diff --git a/tests/tmp/gq_lt_30_all_err.log b/tests/tmp/gq_lt_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_lt_30_all_output.vcf b/tests/tmp/gq_lt_30_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/gq_lt_30_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_lt_30_any_err.log b/tests/tmp/gq_lt_30_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_lt_30_any_output.vcf b/tests/tmp/gq_lt_30_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/gq_lt_30_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_ne_30_all_err.log b/tests/tmp/gq_ne_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_ne_30_all_output.vcf b/tests/tmp/gq_ne_30_all_output.vcf
deleted file mode 100644
index a802d0a6..00000000
--- a/tests/tmp/gq_ne_30_all_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gt_operator_err.log b/tests/tmp/gt_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gt_operator_output.vcf b/tests/tmp/gt_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/gt_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/help_message.txt b/tests/tmp/help_message.txt
deleted file mode 100644
index 178322ae..00000000
--- a/tests/tmp/help_message.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-VCFX_phred_filter: Filter VCF lines by their QUAL field.
-
-Usage:
-  VCFX_phred_filter [options] < input.vcf > output.vcf
-
-Options:
-  -p, --phred-filter       Phred QUAL threshold (default=30)
-  -k, --keep-missing-qual       Treat '.' (missing QUAL) as pass
-  -h, --help                    Display this help and exit
-
-Description:
-  Reads VCF lines from stdin. For each data line, parse the QUAL field.
-  If QUAL >= threshold => print line. Otherwise, skip. By default, missing
-  QUAL ('.') is treated as 0. Use --keep-missing-qual to treat '.' as pass.
-
-Examples:
-  1) Keep variants with QUAL>=30:
-     VCFX_phred_filter -p 30 < in.vcf > out.vcf
-  2) Keep missing QUAL lines:
-     VCFX_phred_filter -p 30 --keep-missing-qual < in.vcf > out.vcf
diff --git a/tests/tmp/help_output.txt b/tests/tmp/help_output.txt
deleted file mode 100644
index 651909da..00000000
--- a/tests/tmp/help_output.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-VCFX_quality_adjuster: Apply a transformation to the QUAL field of a VCF.
-
-Usage:
-  VCFX_quality_adjuster [options] < input.vcf > output.vcf
-
-Options:
-  -h, --help               Show this help.
-  -a, --adjust-qual  Required. One of: log, sqrt, square, identity.
-  -n, --no-clamp           Do not clamp negative or large values.
-
-Description:
-  Reads each line from VCF. If it's a data line with >=8 columns, we parse
-  the QUAL field (6th col). We transform it with , e.g.:
-    log => log(QUAL + 1e-10)
-    sqrt=> sqrt(QUAL)
-    square=> (QUAL * QUAL)
-    identity=> no change
-  By default, negative results from e.g. log are clamped to 0, and large
-  results are capped at 1e12. If you do not want clamping, use --no-clamp.
-
-Examples:
-  1) Log-transform:
-     VCFX_quality_adjuster --adjust-qual log < in.vcf > out.vcf
-  2) Square, keep negative or big values as is:
-     VCFX_quality_adjuster --adjust-qual square --no-clamp < in.vcf > out.vcf
diff --git a/tests/tmp/identity_transform_err.log b/tests/tmp/identity_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/identity_transform_output.vcf b/tests/tmp/identity_transform_output.vcf
deleted file mode 100644
index d7594c78..00000000
--- a/tests/tmp/identity_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	100.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	10.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/invalid_condition_err.log b/tests/tmp/invalid_condition_err.log
deleted file mode 100644
index 01efc7fc..00000000
--- a/tests/tmp/invalid_condition_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Invalid filter condition format. Expected format like "GP>0.9".
diff --git a/tests/tmp/invalid_condition_output.vcf b/tests/tmp/invalid_condition_output.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_err.log b/tests/tmp/invalid_err.log
deleted file mode 100644
index 34167a32..00000000
--- a/tests/tmp/invalid_err.log
+++ /dev/null
@@ -1,4 +0,0 @@
-Warning: Data line encountered before #CHROM header. Skipping line:
-1	100	rs1	A	G	30	PASS	AF=0.25	GT	0/1	0/0	1/1
-Warning: Skipping invalid VCF line (fewer than 9 fields):
-1	300
diff --git a/tests/tmp/invalid_error.log b/tests/tmp/invalid_error.log
deleted file mode 100644
index 9c4f2a0e..00000000
--- a/tests/tmp/invalid_error.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: unsupported transformation 'invalid_transform'.
diff --git a/tests/tmp/invalid_mode_err.log b/tests/tmp/invalid_mode_err.log
deleted file mode 100644
index 63e70083..00000000
--- a/tests/tmp/invalid_mode_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-==== START OF TEST: invalid_mode ====
-Error: --mode must be 'any' or 'all'.
diff --git a/tests/tmp/invalid_mode_out.vcf b/tests/tmp/invalid_mode_out.vcf
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/invalid_mode_out.vcf
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/invalid_mode_output.vcf b/tests/tmp/invalid_mode_output.vcf
deleted file mode 100644
index cabcfbb6..00000000
--- a/tests/tmp/invalid_mode_output.vcf
+++ /dev/null
@@ -1,19 +0,0 @@
-==== START OF TEST: invalid_mode ====
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/invalid_out.vcf b/tests/tmp/invalid_out.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_output.tsv b/tests/tmp/invalid_output.tsv
deleted file mode 100644
index 7267884f..00000000
--- a/tests/tmp/invalid_output.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	200	rs2	C	T	0.0000
diff --git a/tests/tmp/invalid_output.vcf b/tests/tmp/invalid_output.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_records_threshold_30.vcf b/tests/tmp/invalid_records_threshold_30.vcf
deleted file mode 100644
index 687eda1c..00000000
--- a/tests/tmp/invalid_records_threshold_30.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL
-1	100	.	A	G	50
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/invalid_vcf_err.log b/tests/tmp/invalid_vcf_err.log
deleted file mode 100644
index 4878b8e6..00000000
--- a/tests/tmp/invalid_vcf_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Encountered VCF data before #CHROM header.
diff --git a/tests/tmp/invalid_vcf_output.tsv b/tests/tmp/invalid_vcf_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/le_operator_err.log b/tests/tmp/le_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/le_operator_output.vcf b/tests/tmp/le_operator_output.vcf
deleted file mode 100644
index 0946e0ef..00000000
--- a/tests/tmp/le_operator_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GP	0/1:0.1,0.7,0.2	1/1:0,0.1,0.9	0/1:0.1,0.7,0.2
diff --git a/tests/tmp/log_transform_edge_err.log b/tests/tmp/log_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/log_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/log_transform_edge_output.vcf b/tests/tmp/log_transform_edge_output.vcf
deleted file mode 100644
index 4259e741..00000000
--- a/tests/tmp/log_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.000000	PASS	.	GT	0/1
-1	200	rs2	C	T	13.815511	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	nan	PASS	.	GT	0/1
diff --git a/tests/tmp/log_transform_err.log b/tests/tmp/log_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/log_transform_no_clamp_err.log b/tests/tmp/log_transform_no_clamp_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/log_transform_no_clamp_output.vcf b/tests/tmp/log_transform_no_clamp_output.vcf
deleted file mode 100644
index a5a7c33f..00000000
--- a/tests/tmp/log_transform_no_clamp_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	3.401197	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	-23.025851	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	4.605170	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	2.302585	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	-23.025851	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/log_transform_output.vcf b/tests/tmp/log_transform_output.vcf
deleted file mode 100644
index 4e8bce84..00000000
--- a/tests/tmp/log_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	3.401197	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	4.605170	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	2.302585	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/lt_operator_err.log b/tests/tmp/lt_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/lt_operator_output.vcf b/tests/tmp/lt_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/lt_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/malformed_err.log b/tests/tmp/malformed_err.log
deleted file mode 100644
index b34c3cda..00000000
--- a/tests/tmp/malformed_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: invalid VCF line (<9 fields): 1	100	rs1	A	G	30	PASS	AF=0.25
-Warning: invalid VCF line (<9 fields): 1	200	rs2	C	T	40	PASS	AF=0.5
diff --git a/tests/tmp/malformed_freq_err.log b/tests/tmp/malformed_freq_err.log
deleted file mode 100644
index bf3b8a5c..00000000
--- a/tests/tmp/malformed_freq_err.log
+++ /dev/null
@@ -1,3 +0,0 @@
-Warning: Invalid line in frequency file (#1): malformed data
-Error: No valid population frequencies loaded.
-Error: Failed to load population frequencies from tmp/malformed_freqs.txt
diff --git a/tests/tmp/malformed_freq_output.tsv b/tests/tmp/malformed_freq_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/malformed_freqs.txt b/tests/tmp/malformed_freqs.txt
deleted file mode 100644
index b8833029..00000000
--- a/tests/tmp/malformed_freqs.txt
+++ /dev/null
@@ -1 +0,0 @@
-malformed data
diff --git a/tests/tmp/malformed_input_err.log b/tests/tmp/malformed_input_err.log
deleted file mode 100644
index 8fa27b03..00000000
--- a/tests/tmp/malformed_input_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: line with <8 fields => skipping.
-Warning: line with <8 fields => skipping.
diff --git a/tests/tmp/malformed_input_output.vcf b/tests/tmp/malformed_input_output.vcf
deleted file mode 100644
index 9068bd6e..00000000
--- a/tests/tmp/malformed_input_output.vcf
+++ /dev/null
@@ -1,2 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL
diff --git a/tests/tmp/malformed_out.vcf b/tests/tmp/malformed_out.vcf
deleted file mode 100644
index 68d81478..00000000
--- a/tests/tmp/malformed_out.vcf
+++ /dev/null
@@ -1,2 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/tmp/malformed_output.vcf b/tests/tmp/malformed_output.vcf
deleted file mode 100644
index 0fe19f4e..00000000
--- a/tests/tmp/malformed_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1_EUR	SAMPLE2_EUR
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|1:28	1|1:32
-2	150	rs012	T	C	80	PASS	AF=0.4	GT:DP	1|1:35	0|1:29
diff --git a/tests/tmp/malformed_query_01_flexible_output.vcf b/tests/tmp/malformed_query_01_flexible_output.vcf
deleted file mode 100644
index 36f4bf31..00000000
--- a/tests/tmp/malformed_query_01_flexible_output.vcf
+++ /dev/null
@@ -1,11 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	.	C	T	40	PASS	AF=0.5	GT	0/1	0/1	0/0
-1	400	rs4	T	C	45	PASS	.	GT:DP	0|1:25	.|.:--	0|0:12
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/.:15	1/1:28
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	./.:--	0|1:22	1|1:25
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	0/1:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/malformed_threshold_30.vcf b/tests/tmp/malformed_threshold_30.vcf
deleted file mode 100644
index 006c6381..00000000
--- a/tests/tmp/malformed_threshold_30.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
diff --git a/tests/tmp/malformed_threshold_30_keep_missing.vcf b/tests/tmp/malformed_threshold_30_keep_missing.vcf
deleted file mode 100644
index 14ab42f1..00000000
--- a/tests/tmp/malformed_threshold_30_keep_missing.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	400	.	T	C		PASS	DP=35
diff --git a/tests/tmp/malformed_threshold_5.vcf b/tests/tmp/malformed_threshold_5.vcf
deleted file mode 100644
index 92517348..00000000
--- a/tests/tmp/malformed_threshold_5.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	500	.	A	C	10.5	PASS	DP=15
diff --git a/tests/tmp/malformed_vcf_err.log b/tests/tmp/malformed_vcf_err.log
deleted file mode 100644
index 6b11a01f..00000000
--- a/tests/tmp/malformed_vcf_err.log
+++ /dev/null
@@ -1,3 +0,0 @@
-==== START OF TEST: malformed_vcf ====
-Warning: invalid VCF line (<9 fields): 1	100	rs1	A	G	30	PASS	AF=0.25
-Warning: invalid VCF line (<9 fields): 1	200	rs2	C	T	40	PASS	AF=0.5
diff --git a/tests/tmp/malformed_vcf_output.vcf b/tests/tmp/malformed_vcf_output.vcf
deleted file mode 100644
index 3f71f6c2..00000000
--- a/tests/tmp/malformed_vcf_output.vcf
+++ /dev/null
@@ -1,3 +0,0 @@
-==== START OF TEST: malformed_vcf ====
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/tmp/missing_arg_error.log b/tests/tmp/missing_arg_error.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_arg_output.vcf b/tests/tmp/missing_arg_output.vcf
deleted file mode 100644
index 651909da..00000000
--- a/tests/tmp/missing_arg_output.vcf
+++ /dev/null
@@ -1,25 +0,0 @@
-VCFX_quality_adjuster: Apply a transformation to the QUAL field of a VCF.
-
-Usage:
-  VCFX_quality_adjuster [options] < input.vcf > output.vcf
-
-Options:
-  -h, --help               Show this help.
-  -a, --adjust-qual  Required. One of: log, sqrt, square, identity.
-  -n, --no-clamp           Do not clamp negative or large values.
-
-Description:
-  Reads each line from VCF. If it's a data line with >=8 columns, we parse
-  the QUAL field (6th col). We transform it with , e.g.:
-    log => log(QUAL + 1e-10)
-    sqrt=> sqrt(QUAL)
-    square=> (QUAL * QUAL)
-    identity=> no change
-  By default, negative results from e.g. log are clamped to 0, and large
-  results are capped at 1e12. If you do not want clamping, use --no-clamp.
-
-Examples:
-  1) Log-transform:
-     VCFX_quality_adjuster --adjust-qual log < in.vcf > out.vcf
-  2) Square, keep negative or big values as is:
-     VCFX_quality_adjuster --adjust-qual square --no-clamp < in.vcf > out.vcf
diff --git a/tests/tmp/missing_args_output.txt b/tests/tmp/missing_args_output.txt
deleted file mode 100644
index c8c85b1b..00000000
--- a/tests/tmp/missing_args_output.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Usage: ../build/src/VCFX_genotype_query/VCFX_genotype_query --genotype-query "0/1" [--strict] < input.vcf > output.vcf
-Use --help for usage.
diff --git a/tests/tmp/missing_err.log b/tests/tmp/missing_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_field_err.log b/tests/tmp/missing_field_err.log
deleted file mode 100644
index 9bd03884..00000000
--- a/tests/tmp/missing_field_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Specified field "GP" not found in FORMAT column.
diff --git a/tests/tmp/missing_field_gq_gt_20_err.log b/tests/tmp/missing_field_gq_gt_20_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_field_gq_gt_20_output.vcf b/tests/tmp/missing_field_gq_gt_20_output.vcf
deleted file mode 100644
index b10fb63a..00000000
--- a/tests/tmp/missing_field_gq_gt_20_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
diff --git a/tests/tmp/missing_field_output.vcf b/tests/tmp/missing_field_output.vcf
deleted file mode 100644
index b7a7e7ab..00000000
--- a/tests/tmp/missing_field_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
diff --git a/tests/tmp/missing_filter_err.log b/tests/tmp/missing_filter_err.log
deleted file mode 100644
index 2e60ae42..00000000
--- a/tests/tmp/missing_filter_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: --filter must be specified.
diff --git a/tests/tmp/missing_filter_output.vcf b/tests/tmp/missing_filter_output.vcf
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/missing_filter_output.vcf
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/missing_freq_err.log b/tests/tmp/missing_freq_err.log
deleted file mode 100644
index 9c007b94..00000000
--- a/tests/tmp/missing_freq_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Error: Cannot open frequency file: /nonexistent/file.txt
-Error: Failed to load population frequencies from /nonexistent/file.txt
diff --git a/tests/tmp/missing_freq_output.tsv b/tests/tmp/missing_freq_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_output.tsv b/tests/tmp/missing_output.tsv
deleted file mode 100644
index f47e8fb6..00000000
--- a/tests/tmp/missing_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.7500
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/missing_samples_err.log b/tests/tmp/missing_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_samples_output.tsv b/tests/tmp/missing_samples_output.tsv
deleted file mode 100644
index cbeeb4c9..00000000
--- a/tests/tmp/missing_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_MISS	EUR
-AFR_MISS	AFR
-EAS_MISS	EAS
diff --git a/tests/tmp/missing_value_gq_gt_20_all_err.log b/tests/tmp/missing_value_gq_gt_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_value_gq_gt_20_all_output.vcf b/tests/tmp/missing_value_gq_gt_20_all_output.vcf
deleted file mode 100644
index d9a8ffe1..00000000
--- a/tests/tmp/missing_value_gq_gt_20_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP	0/1:25:20	0/0:30:15
diff --git a/tests/tmp/missing_value_gq_gt_20_any_err.log b/tests/tmp/missing_value_gq_gt_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_value_gq_gt_20_any_output.vcf b/tests/tmp/missing_value_gq_gt_20_any_output.vcf
deleted file mode 100644
index 900ca329..00000000
--- a/tests/tmp/missing_value_gq_gt_20_any_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP	0/1:25:20	0/0:30:15
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP	0/0:45:30	0/0::20
diff --git a/tests/tmp/mixed_population_check_err.log b/tests/tmp/mixed_population_check_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/mixed_population_check_output.tsv b/tests/tmp/mixed_population_check_output.tsv
deleted file mode 100644
index 52ec9b21..00000000
--- a/tests/tmp/mixed_population_check_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-Sample	Inferred_Population
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/mixed_population_check_results.tmp b/tests/tmp/mixed_population_check_results.tmp
deleted file mode 100644
index 29eafc2a..00000000
--- a/tests/tmp/mixed_population_check_results.tmp
+++ /dev/null
@@ -1,4 +0,0 @@
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/mixed_samples_err.log b/tests/tmp/mixed_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/mixed_samples_output.tsv b/tests/tmp/mixed_samples_output.tsv
deleted file mode 100644
index 52ec9b21..00000000
--- a/tests/tmp/mixed_samples_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-Sample	Inferred_Population
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/multiallelic_err.log b/tests/tmp/multiallelic_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/multiallelic_output.tsv b/tests/tmp/multiallelic_output.tsv
deleted file mode 100644
index 71fd0ab4..00000000
--- a/tests/tmp/multiallelic_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G,T	0.6667
-1	200	rs2	C	T,G,A	0.8333
diff --git a/tests/tmp/multiallelic_samples_err.log b/tests/tmp/multiallelic_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/multiallelic_samples_output.tsv b/tests/tmp/multiallelic_samples_output.tsv
deleted file mode 100644
index a0251e8c..00000000
--- a/tests/tmp/multiallelic_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_MULTI	EUR
-AFR_MULTI	AFR
-EAS_MULTI	EUR
diff --git a/tests/tmp/ne_operator_err.log b/tests/tmp/ne_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/ne_operator_output.vcf b/tests/tmp/ne_operator_output.vcf
deleted file mode 100644
index 2b906f70..00000000
--- a/tests/tmp/ne_operator_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GP	0/1:0.01,0.98,0.01	0/0:0.99,0.01,0	1/1:0,0.02,0.98
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GP	0/1:0.05,0.9,0.05	0/1:0.1,0.8,0.1	0/0:0.95,0.04,0.01
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GP	0/0:0.85,0.15,0	0/0:0.92,0.08,0	1/1:0,0.05,0.95
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GP	0/1:0.1,0.7,0.2	1/1:0,0.1,0.9	0/1:0.1,0.7,0.2
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GP	0/0:0.94,0.05,0.01	1/1:0.01,0.05,0.94	0/1:0.2,0.75,0.05
diff --git a/tests/tmp/no_args_output.txt b/tests/tmp/no_args_output.txt
deleted file mode 100644
index 4f075489..00000000
--- a/tests/tmp/no_args_output.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-VCFX_population_filter: Subset VCF to samples in specified population.
-
-Usage:
-  VCFX_population_filter [options] < input.vcf > output.vcf
-
-Options:
-  --help, -h               Print this help.
-  --population, -p    Population tag to keep (e.g. 'EUR','AFR', etc.)
-  --pop-map, -m      Tab-delimited file: 'SampleName  Population'
-
-Description:
-  Reads the pop map, finds samples that match the chosen population.
-  Then reads the VCF from stdin and prints lines with only those sample columns.
-  If a sample is not in that population, it's dropped from the #CHROM header and data columns.
-
-Example:
-  VCFX_population_filter --population AFR --pop-map pops.txt < input.vcf > out.vcf
diff --git a/tests/tmp/no_gt_err.log b/tests/tmp/no_gt_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/no_gt_output.tsv b/tests/tmp/no_gt_output.tsv
deleted file mode 100644
index 453ebe59..00000000
--- a/tests/tmp/no_gt_output.tsv
+++ /dev/null
@@ -1 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
diff --git a/tests/tmp/output.txt b/tests/tmp/output.txt
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/output.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/phased_err.log b/tests/tmp/phased_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/phased_output.tsv b/tests/tmp/phased_output.tsv
deleted file mode 100644
index 189c397a..00000000
--- a/tests/tmp/phased_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/phased_samples_err.log b/tests/tmp/phased_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/phased_samples_output.tsv b/tests/tmp/phased_samples_output.tsv
deleted file mode 100644
index deb18772..00000000
--- a/tests/tmp/phased_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_PHASED	EUR
-AFR_PHASED	AFR
-EAS_PHASED	EAS
diff --git a/tests/tmp/pl_gt_40_any_err.log b/tests/tmp/pl_gt_40_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/pl_gt_40_any_output.vcf b/tests/tmp/pl_gt_40_any_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/pl_gt_40_any_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/query_01_flexible_cleaned.vcf b/tests/tmp/query_01_flexible_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/query_01_flexible_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_flexible_expected_cleaned.vcf b/tests/tmp/query_01_flexible_expected_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/query_01_flexible_expected_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_flexible_output.vcf b/tests/tmp/query_01_flexible_output.vcf
deleted file mode 100644
index 696571e0..00000000
--- a/tests/tmp/query_01_flexible_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_pipe_flexible_output.vcf b/tests/tmp/query_01_pipe_flexible_output.vcf
deleted file mode 100644
index e41d6402..00000000
--- a/tests/tmp/query_01_pipe_flexible_output.vcf
+++ /dev/null
@@ -1,19 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
-3	400	rs6	T	A	60	FAIL	AF=0.1	GT:DP:GQ	0/1:30:10	0/1:25:20	0/1:20:30	0/1:22:40
-4	100	rs7	A	G	70	PASS	AF=0.2	GT:DP	0/1:30	0/1:25	0/1:20	0/1:22
-4	200	rs8	C	T	80	PASS	AF=0.3	DP:GT	30:0/1	25:0/1	20:0/1	22:0/1 
\ No newline at end of file
diff --git a/tests/tmp/query_01_pipe_strict_output.vcf b/tests/tmp/query_01_pipe_strict_output.vcf
deleted file mode 100644
index 8a82c11b..00000000
--- a/tests/tmp/query_01_pipe_strict_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
\ No newline at end of file
diff --git a/tests/tmp/query_01_strict_output.vcf b/tests/tmp/query_01_strict_output.vcf
deleted file mode 100644
index b09fd625..00000000
--- a/tests/tmp/query_01_strict_output.vcf
+++ /dev/null
@@ -1,15 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
-3	400	rs6	T	A	60	FAIL	AF=0.1	GT:DP:GQ	0/1:30:10	0/1:25:20	0/1:20:30	0/1:22:40
-4	100	rs7	A	G	70	PASS	AF=0.2	GT:DP	0/1:30	0/1:25	0/1:20	0/1:22
-4	200	rs8	C	T	80	PASS	AF=0.3	DP:GT	30:0/1	25:0/1	20:0/1	22:0/1 
\ No newline at end of file
diff --git a/tests/tmp/query_11_flexible_output.vcf b/tests/tmp/query_11_flexible_output.vcf
deleted file mode 100644
index 742184ce..00000000
--- a/tests/tmp/query_11_flexible_output.vcf
+++ /dev/null
@@ -1,13 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	600	rs6	C	G	55	PASS	AF=0.4	GT:DP	1/1:30	1/1:25	0/0:15
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
\ No newline at end of file
diff --git a/tests/tmp/query_multi_02_flexible_output.vcf b/tests/tmp/query_multi_02_flexible_output.vcf
deleted file mode 100644
index 1fea55ad..00000000
--- a/tests/tmp/query_multi_02_flexible_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
\ No newline at end of file
diff --git a/tests/tmp/simple_err.log b/tests/tmp/simple_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/simple_freqs_err.log b/tests/tmp/simple_freqs_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/simple_freqs_output.tsv b/tests/tmp/simple_freqs_output.tsv
deleted file mode 100644
index 7ab39b3b..00000000
--- a/tests/tmp/simple_freqs_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
-1	300	rs3	G	A	0.3333
-1	400	rs4	T	C	0.8333
diff --git a/tests/tmp/simple_output.tsv b/tests/tmp/simple_output.tsv
deleted file mode 100644
index 7ab39b3b..00000000
--- a/tests/tmp/simple_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
-1	300	rs3	G	A	0.3333
-1	400	rs4	T	C	0.8333
diff --git a/tests/tmp/sqrt_transform_edge_err.log b/tests/tmp/sqrt_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/sqrt_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/sqrt_transform_edge_output.vcf b/tests/tmp/sqrt_transform_edge_output.vcf
deleted file mode 100644
index 4067f73c..00000000
--- a/tests/tmp/sqrt_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.001000	PASS	.	GT	0/1
-1	200	rs2	C	T	1000.000000	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	0.000000	PASS	.	GT	0/1
diff --git a/tests/tmp/sqrt_transform_err.log b/tests/tmp/sqrt_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/sqrt_transform_output.vcf b/tests/tmp/sqrt_transform_output.vcf
deleted file mode 100644
index 5ba0fbd6..00000000
--- a/tests/tmp/sqrt_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	5.477226	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	3.162278	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/square_transform_edge_err.log b/tests/tmp/square_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/square_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/square_transform_edge_output.vcf b/tests/tmp/square_transform_edge_output.vcf
deleted file mode 100644
index 3f277fb3..00000000
--- a/tests/tmp/square_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.000000	PASS	.	GT	0/1
-1	200	rs2	C	T	1000000000000.000000	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	100.000000	PASS	.	GT	0/1
diff --git a/tests/tmp/square_transform_err.log b/tests/tmp/square_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/square_transform_no_clamp_err.log b/tests/tmp/square_transform_no_clamp_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/square_transform_no_clamp_output.vcf b/tests/tmp/square_transform_no_clamp_output.vcf
deleted file mode 100644
index a718db16..00000000
--- a/tests/tmp/square_transform_no_clamp_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	900.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10000.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	100.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/square_transform_output.vcf b/tests/tmp/square_transform_output.vcf
deleted file mode 100644
index a718db16..00000000
--- a/tests/tmp/square_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	900.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10000.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	100.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/unknown_output.vcf b/tests/tmp/unknown_output.vcf
deleted file mode 100644
index ae84bda2..00000000
--- a/tests/tmp/unknown_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP

From f63dce5f9b1f2e6334c5cf9209c3e148ad197996 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:41:13 +0100
Subject: [PATCH 36/54] Fix ALT normalization with colon in duplicate remover

---
 .../VCFX_duplicate_remover.cpp                | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
index 72625213..9a75cc9a 100644
--- a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
+++ b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
@@ -65,26 +65,20 @@ static VariantKey generateVariantKey(const std::string& chrom,
         key.pos = 0;
     }
     key.ref = ref;
-    key.alt = "";  // Will be set to normalized ALT.
-    // Normalize ALT: sort multi-allelic entries.
-    key.alt = generateNormalizedVariantKey(chrom, pos, ref, alt).substr(chrom.size() + pos.size() + ref.size() + 3); // skip prefix "chrom:pos:ref:"
-    // Alternatively, simply:
-    key.alt = generateNormalizedVariantKey(chrom, pos, ref, alt);
-    // However, since generateNormalizedVariantKey already concatenates chrom:pos:ref:normalizedAlt,
-    // we extract the normalizedAlt portion if needed. For simplicity, we can just store the full key.
-    // For our VariantKey, we want: chrom, pos, ref, normalizedAlt.
-    // We'll do that by re-parsing:
-    std::vector parts = splitString(generateNormalizedVariantKey(chrom, pos, ref, alt), ':');
-    if (parts.size() >= 4) {
-        key.chrom = parts[0];
-        try {
-            key.pos = std::stoi(parts[1]);
-        } catch (...) {
-            key.pos = 0;
+
+    // Normalize ALT: split multi-allelic values, sort them, then rejoin.  This
+    // avoids parsing the generated key string, which could break for ALT
+    // alleles containing ':' such as breakend notation.
+    std::vector alts = splitString(alt, ',');
+    std::sort(alts.begin(), alts.end());
+    std::ostringstream oss;
+    for (size_t i = 0; i < alts.size(); ++i) {
+        if (i > 0) {
+            oss << ',';
         }
-        key.ref = parts[2];
-        key.alt = parts[3];
+        oss << alts[i];
     }
+    key.alt = oss.str();
     return key;
 }
 

From dd832843f2d4b2c5527b7a8cafd8371a9f39bbf5 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:42:00 +0100
Subject: [PATCH 37/54] Support 64-bit positions in indexer

---
 docs/VCFX_indexer.md              | 12 ++++++------
 src/VCFX_indexer/VCFX_indexer.cpp |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/VCFX_indexer.md b/docs/VCFX_indexer.md
index 10d870d8..65f69f38 100644
--- a/docs/VCFX_indexer.md
+++ b/docs/VCFX_indexer.md
@@ -1,7 +1,7 @@
 # VCFX_indexer
 
 ## Overview
-`VCFX_indexer` is a utility tool for creating a byte-offset index of a VCF file. It generates a simple tab-delimited index file that maps chromosome and position to the exact byte offset in the original file, enabling efficient random access to variants without scanning the entire file.
+`VCFX_indexer` is a utility tool for creating a byte-offset index of a VCF file. It generates a simple tab-delimited index file that maps chromosome and position to the exact byte offset in the original file, enabling efficient random access to variants without scanning the entire file. The index uses 64-bit integers for both the position and the byte offset so very large coordinates are fully supported.
 
 ## Usage
 
@@ -26,9 +26,9 @@ VCFX_indexer [OPTIONS] < input.vcf > index.tsv
    - Extracting the chromosome (CHROM) and position (POS) values
    - Calculating the precise byte offset from the start of the file
 4. Writing a three-column index to standard output with:
-   - CHROM: The chromosome identifier from the VCF
-   - POS: The position value from the VCF
-   - FILE_OFFSET: The byte offset to the start of the line in the source file
+    - CHROM: The chromosome identifier from the VCF
+    - POS: The position value from the VCF (stored as a 64-bit integer)
+    - FILE_OFFSET: The byte offset to the start of the line in the source file (also 64-bit)
 
 This index enables efficient random access to specific variants in large VCF files by allowing tools to seek directly to a byte offset rather than scanning the entire file. It's particularly useful for building tools that need to query specific regions of a VCF file.
 
@@ -45,8 +45,8 @@ CHROM   POS    FILE_OFFSET
 Where:
 
 - `CHROM` is the chromosome identifier from the VCF
-- `POS` is the genomic position from the VCF
-- `FILE_OFFSET` is the byte offset from the start of the VCF file
+- `POS` is the genomic position from the VCF (64-bit integer)
+- `FILE_OFFSET` is the byte offset from the start of the VCF file (64-bit integer)
 
 ## Examples
 
diff --git a/src/VCFX_indexer/VCFX_indexer.cpp b/src/VCFX_indexer/VCFX_indexer.cpp
index 2011871e..1eeac8d1 100644
--- a/src/VCFX_indexer/VCFX_indexer.cpp
+++ b/src/VCFX_indexer/VCFX_indexer.cpp
@@ -130,9 +130,9 @@ void VCFXIndexer::createVCFIndex(std::istream &in, std::ostream &out) {
         const std::string &chrom = fields[0];
         const std::string &posStr = fields[1];
 
-        int posVal = 0;
+        std::int64_t posVal = 0;
         try {
-            posVal = std::stoi(posStr);
+            posVal = std::stoll(posStr);
         } catch (...) {
             // Not a valid integer => skip
             return;

From 7e96e4255426a79add9bb5d98d5e384235e09056 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:01:39 +0100
Subject: [PATCH 38/54] Refactor variant counter to stream input

---
 .../VCFX_variant_counter.cpp                  | 125 ++++++++++++++----
 .../VCFX_variant_counter.h                    |   2 +
 2 files changed, 100 insertions(+), 27 deletions(-)

diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.cpp b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
index 72828643..55549d47 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.cpp
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
@@ -5,6 +5,8 @@
 #include 
 #include 
 #include "vcfx_core.h"
+#include 
+#include 
 
 void VCFXVariantCounter::displayHelp(){
     std::cout <<
@@ -56,13 +58,28 @@ int VCFXVariantCounter::run(int argc, char* argv[]){
         return 0;
     }
     
-    std::string plainInput;
-    if(!vcfx::read_maybe_compressed(std::cin, plainInput)){
-        std::cerr << "Error: failed to read input" << std::endl;
-        return 1;
+    auto peek1 = std::cin.peek();
+    bool isEmpty = (peek1 == EOF);
+    bool isGzip = false;
+    if(!isEmpty){
+        int c1 = std::cin.get();
+        int c2 = std::cin.get();
+        if(c2 != EOF){
+            isGzip = (static_cast(c1) == 0x1f &&
+                      static_cast(c2) == 0x8b);
+            std::cin.putback(static_cast(c2));
+        }
+        std::cin.putback(static_cast(c1));
+    }
+
+    int total = -1;
+    if(isEmpty){
+        total = 0;
+    } else if(isGzip){
+        total = countVariantsGzip(std::cin);
+    } else {
+        total = countVariants(std::cin);
     }
-    std::istringstream inStream(plainInput);
-    int total= countVariants(inStream);
     if(total<0){
         // indicates an error if strict
         return 1;
@@ -71,36 +88,90 @@ int VCFXVariantCounter::run(int argc, char* argv[]){
     return 0;
 }
 
+bool VCFXVariantCounter::processLine(const std::string &line, int lineNumber, int &count){
+    if(line.empty()) return true;
+    if(line[0]=='#') return true;
+    std::stringstream ss(line);
+    std::vector fields;
+    {
+        std::string col;
+        while(std::getline(ss,col,'\t')){
+            fields.push_back(col);
+        }
+    }
+    if(fields.size()<8){
+        if(strictMode){
+            std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n";
+            return false;
+        } else {
+            std::cerr<<"Warning: skipping line "< fields;
-        {
-            std::string col;
-            while(std::getline(ss,col,'\t')){
-                fields.push_back(col);
+        if(!processLine(line, lineNumber, count)) return -1;
+    }
+    return count;
+}
+
+int VCFXVariantCounter::countVariantsGzip(std::istream &in){
+    constexpr int CHUNK = 16384;
+    char inBuf[CHUNK];
+    char outBuf[CHUNK];
+    z_stream strm; std::memset(&strm,0,sizeof(strm));
+    if(inflateInit2(&strm,15+32)!=Z_OK){
+        std::cerr<<"Error: inflateInit2 failed.\n";
+        return -1;
+    }
+    int count=0; int lineNumber=0; std::string buffer; int ret=Z_OK;
+    do {
+        in.read(inBuf, CHUNK);
+        strm.avail_in = static_cast(in.gcount());
+        if(strm.avail_in==0 && in.eof()) break;
+        strm.next_in = reinterpret_cast(inBuf);
+        do {
+            strm.avail_out = CHUNK;
+            strm.next_out = reinterpret_cast(outBuf);
+            ret = inflate(&strm, Z_NO_FLUSH);
+            if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR){
+                std::cerr<<"Error: decompression failed.\n";
+                inflateEnd(&strm);
+                return -1;
             }
-        }
-        if(fields.size()<8){
-            if(strictMode){
-                std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n";
-                return -1; // indicates error
-            } else {
-                std::cerr<<"Warning: skipping line "<0){
+                buffer.append(outBuf, have);
+                size_t pos;
+                while((pos = buffer.find('\n')) != std::string::npos){
+                    std::string line = buffer.substr(0,pos);
+                    buffer.erase(0,pos+1);
+                    lineNumber++;
+                    if(!processLine(line,lineNumber,count)){
+                        inflateEnd(&strm);
+                        return -1;
+                    }
+                }
             }
+        } while(strm.avail_out==0);
+    } while(ret != Z_STREAM_END);
+
+    if(!buffer.empty()){
+        lineNumber++;
+        if(!processLine(buffer,lineNumber,count)){
+            inflateEnd(&strm);
+            return -1;
         }
-        // if we get here => count it
-        count++;
     }
+    inflateEnd(&strm);
     return count;
 }
 
diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.h b/src/VCFX_variant_counter/VCFX_variant_counter.h
index c53b8358..6c5b05c0 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.h
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.h
@@ -17,6 +17,8 @@ class VCFXVariantCounter {
 
     // The actual counting function
     int countVariants(std::istream &in);
+    int countVariantsGzip(std::istream &in);
+    bool processLine(const std::string &line, int lineNumber, int &count);
 
 };
 

From 6c37b6fdff871ba96a1be575fbfbcaef655369a5 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:07:50 +0100
Subject: [PATCH 39/54] fix CSV quote escaping

---
 src/VCFX_format_converter/VCFX_format_converter.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/VCFX_format_converter/VCFX_format_converter.cpp b/src/VCFX_format_converter/VCFX_format_converter.cpp
index 668b4f67..e374d165 100644
--- a/src/VCFX_format_converter/VCFX_format_converter.cpp
+++ b/src/VCFX_format_converter/VCFX_format_converter.cpp
@@ -112,8 +112,9 @@ static std::string csvEscape(const std::string &field) {
     tmp.push_back('"');
     for (char c : field) {
         if (c == '"') {
-            // double it
-            tmp += "\"\"";
+            // double it by writing two quotes
+            tmp.push_back('"');
+            tmp.push_back('"');
         } else {
             tmp.push_back(c);
         }

From dbda79692655263dc2ed003ba0529d57c584a32a Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:15:47 +0100
Subject: [PATCH 40/54] docs: update phred_filter usage

---
 docs/VCFX_nonref_filter.md | 2 +-
 docs/index.md              | 2 +-
 docs/quickstart.md         | 4 ++--
 docs/tools_overview.md     | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/VCFX_nonref_filter.md b/docs/VCFX_nonref_filter.md
index aa7f15e9..eda94165 100644
--- a/docs/VCFX_nonref_filter.md
+++ b/docs/VCFX_nonref_filter.md
@@ -71,7 +71,7 @@ VCFX_nonref_filter > high_quality_nonref.vcf
 # Create a pipeline of filters
 cat input.vcf | \
 VCFX_nonref_filter | \
-VCFX_phred_filter --min-quality 30 > filtered.vcf
+VCFX_phred_filter --phred-filter 30 > filtered.vcf
 ```
 
 ## Homozygous Reference Detection
diff --git a/docs/index.md b/docs/index.md
index 60144f62..57ffc818 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -127,7 +127,7 @@ VCFX tools are designed to be used in pipelines. Here are some common usage patt
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 261901bd..d99d39a2 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -37,7 +37,7 @@ Filter for high-quality SNPs:
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Example 2: Population Analysis
@@ -120,7 +120,7 @@ cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
   VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ```
 
 ### Sample Comparison
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 4361453d..26b5ba69 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -112,7 +112,7 @@ VCFX tools are designed to be combined in pipelines. Here are some common usage
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
@@ -123,7 +123,7 @@ cat input.vcf | \
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Sample Extraction and Comparison
@@ -168,5 +168,5 @@ cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
   VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ``` 
\ No newline at end of file

From bb9ca6dae16b5fc81ec7776ecb0abb167a025cff Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:55:18 +0100
Subject: [PATCH 41/54] fix test_variant_counter script path

---
 tests/test_variant_counter.sh | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/test_variant_counter.sh b/tests/test_variant_counter.sh
index ae78b320..587f7656 100755
--- a/tests/test_variant_counter.sh
+++ b/tests/test_variant_counter.sh
@@ -3,8 +3,16 @@ set -e
 
 echo "=== Testing VCFX_variant_counter ==="
 
-# Executable paths
-VCFX_EXECUTABLE="../build/src/VCFX_variant_counter/VCFX_variant_counter"
+# Determine script and repository locations so the test can be run from
+# anywhere.  This mirrors the approach used by other test scripts.
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+
+# Ensure we run inside the script directory for predictable paths
+cd "$SCRIPT_DIR"
+
+# Path to the built executable
+VCFX_EXECUTABLE="$ROOT_DIR/build/src/VCFX_variant_counter/VCFX_variant_counter"
 
 # Check if executable exists
 if [ ! -f "$VCFX_EXECUTABLE" ]; then

From e592624fb7f11f539ed4c342838a92a7407bd05e Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:56:34 +0100
Subject: [PATCH 42/54] Merge main

---
 docs/VCFX_ld_calculator.md |  2 +-
 docs/VCFX_nonref_filter.md |  2 +-
 docs/index.md              |  2 +-
 docs/quickstart.md         |  7 ++++---
 docs/tools_overview.md     | 11 ++++++-----
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/VCFX_ld_calculator.md b/docs/VCFX_ld_calculator.md
index 34fa081e..1dc5440a 100644
--- a/docs/VCFX_ld_calculator.md
+++ b/docs/VCFX_ld_calculator.md
@@ -78,7 +78,7 @@ VCFX_ld_calculator --region chr1:10000-20000 < input.vcf > ld_matrix.txt
 Filter for common variants first, then calculate LD:
 
 ```bash
-cat input.vcf | VCFX_af_subsetter --min-af 0.05 | VCFX_ld_calculator > common_variants_ld.txt
+cat input.vcf | VCFX_af_subsetter --af-filter '0.05-1.0' | VCFX_ld_calculator > common_variants_ld.txt
 ```
 
 ## Handling Special Cases
diff --git a/docs/VCFX_nonref_filter.md b/docs/VCFX_nonref_filter.md
index aa7f15e9..eda94165 100644
--- a/docs/VCFX_nonref_filter.md
+++ b/docs/VCFX_nonref_filter.md
@@ -71,7 +71,7 @@ VCFX_nonref_filter > high_quality_nonref.vcf
 # Create a pipeline of filters
 cat input.vcf | \
 VCFX_nonref_filter | \
-VCFX_phred_filter --min-quality 30 > filtered.vcf
+VCFX_phred_filter --phred-filter 30 > filtered.vcf
 ```
 
 ## Homozygous Reference Detection
diff --git a/docs/index.md b/docs/index.md
index 60144f62..57ffc818 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -127,7 +127,7 @@ VCFX tools are designed to be used in pipelines. Here are some common usage patt
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 261901bd..57edc6d3 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -37,7 +37,7 @@ Filter for high-quality SNPs:
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Example 2: Population Analysis
@@ -119,8 +119,9 @@ Here are some common workflows that combine multiple VCFX tools:
 cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
-  VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_missing_detector | \
+  grep -v 'MISSING_GENOTYPES=1' | \
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ```
 
 ### Sample Comparison
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 4361453d..4b66b839 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -112,7 +112,7 @@ VCFX tools are designed to be combined in pipelines. Here are some common usage
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
@@ -123,7 +123,7 @@ cat input.vcf | \
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Sample Extraction and Comparison
@@ -139,7 +139,7 @@ cat samples.vcf reference.vcf | VCFX_concordance_checker > concordance_report.ts
 ```bash
 # Calculate LD in a specific region after filtering for common variants
 cat input.vcf | \
-  VCFX_af_subsetter --min-af 0.05 | \
+  VCFX_af_subsetter --af-filter '0.05-1.0' | \
   VCFX_ld_calculator --region chr1:10000-20000 > ld_matrix.txt
 ```
 
@@ -167,6 +167,7 @@ cat eur.vcf | VCFX_allele_freq_calc > eur_afs.tsv
 cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
-  VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_missing_detector | \
+  grep -v 'MISSING_GENOTYPES=1' | \
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ``` 
\ No newline at end of file

From 5022ec35a84032bf630c7e47e573c8eede948d4f Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 21:06:52 +0100
Subject: [PATCH 43/54] docs: document --version flag

---
 docs/VCFX_allele_freq_calc.md      | 1 +
 docs/VCFX_format_converter.md      | 1 +
 docs/VCFX_genotype_query.md        | 1 +
 docs/VCFX_merger.md                | 1 +
 docs/VCFX_metadata_summarizer.md   | 1 +
 docs/VCFX_missing_data_handler.md  | 1 +
 docs/VCFX_multiallelic_splitter.md | 1 +
 docs/VCFX_outlier_detector.md      | 1 +
 docs/VCFX_phase_quality_filter.md  | 1 +
 docs/VCFX_probability_filter.md    | 1 +
 docs/VCFX_reformatter.md           | 1 +
 docs/VCFX_region_subsampler.md     | 1 +
 docs/VCFX_subsampler.md            | 1 +
 13 files changed, 13 insertions(+)

diff --git a/docs/VCFX_allele_freq_calc.md b/docs/VCFX_allele_freq_calc.md
index dbc04afa..fcfbd8a8 100644
--- a/docs/VCFX_allele_freq_calc.md
+++ b/docs/VCFX_allele_freq_calc.md
@@ -15,6 +15,7 @@ VCFX_allele_freq_calc [OPTIONS] < input.vcf > allele_frequencies.tsv
 | Option      | Description                                |
 |-------------|--------------------------------------------|
 | `--help`, `-h` | Display help message and exit              |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_format_converter.md b/docs/VCFX_format_converter.md
index 50c25a3d..e10f3891 100644
--- a/docs/VCFX_format_converter.md
+++ b/docs/VCFX_format_converter.md
@@ -14,6 +14,7 @@ VCFX_format_converter [OPTIONS] < input.vcf > output.file
 | `--to-bed` | Convert the input VCF file to BED format |
 | `--to-csv` | Convert the input VCF file to CSV format |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_format_converter` reads a VCF file from standard input and converts it to the specified output format. The tool:
diff --git a/docs/VCFX_genotype_query.md b/docs/VCFX_genotype_query.md
index 1e923076..aab43902 100644
--- a/docs/VCFX_genotype_query.md
+++ b/docs/VCFX_genotype_query.md
@@ -17,6 +17,7 @@ VCFX_genotype_query [OPTIONS] < input.vcf > filtered.vcf
 | `--genotype-query`, `-g` "GENOTYPE" | Specify the genotype to query (e.g., "0/1", "1/1") |
 | `--strict` | Use strict string comparison (no phasing unification or allele sorting) |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_merger.md b/docs/VCFX_merger.md
index 27124ab5..6fb790ff 100644
--- a/docs/VCFX_merger.md
+++ b/docs/VCFX_merger.md
@@ -16,6 +16,7 @@ VCFX_merger --merge file1.vcf,file2.vcf,... [options] > merged.vcf
 |--------|-------------|
 | `-m, --merge` | Comma-separated list of VCF files to merge |
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_metadata_summarizer.md b/docs/VCFX_metadata_summarizer.md
index 111bcf06..26eb18f2 100644
--- a/docs/VCFX_metadata_summarizer.md
+++ b/docs/VCFX_metadata_summarizer.md
@@ -15,6 +15,7 @@ VCFX_metadata_summarizer [options] < input.vcf
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_missing_data_handler.md b/docs/VCFX_missing_data_handler.md
index b12cf9b0..9df50524 100644
--- a/docs/VCFX_missing_data_handler.md
+++ b/docs/VCFX_missing_data_handler.md
@@ -17,6 +17,7 @@ VCFX_missing_data_handler [OPTIONS] [files...] > processed.vcf
 | `--fill-missing`, `-f` | Impute missing genotypes with a default value |
 | `--default-genotype`, `-d`  | Specify the default genotype for imputation (default: "./.")  |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_multiallelic_splitter.md b/docs/VCFX_multiallelic_splitter.md
index e7f4692b..870dca12 100644
--- a/docs/VCFX_multiallelic_splitter.md
+++ b/docs/VCFX_multiallelic_splitter.md
@@ -15,6 +15,7 @@ VCFX_multiallelic_splitter [OPTIONS] < input.vcf > biallelic_output.vcf
 | Option | Description |
 |--------|-------------|
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_outlier_detector.md b/docs/VCFX_outlier_detector.md
index 5cff70a6..1f007bb7 100644
--- a/docs/VCFX_outlier_detector.md
+++ b/docs/VCFX_outlier_detector.md
@@ -19,6 +19,7 @@ VCFX_outlier_detector --metric  --threshold  [--variant|--sample] < in
 | `--variant`, `-v` | Variant mode: identify variants with INFO field metrics above threshold |
 | `--sample`, `-s` | Sample mode: identify samples with average genotype metrics above threshold |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_phase_quality_filter.md b/docs/VCFX_phase_quality_filter.md
index 80a9a427..c6e18231 100644
--- a/docs/VCFX_phase_quality_filter.md
+++ b/docs/VCFX_phase_quality_filter.md
@@ -15,6 +15,7 @@ VCFX_phase_quality_filter --filter-pq "PQ" < input.vcf > output.v
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-f, --filter-pq` | Condition like 'PQ>30', 'PQ>=20', 'PQ!=10', etc. |
 
 ## Description
diff --git a/docs/VCFX_probability_filter.md b/docs/VCFX_probability_filter.md
index 8978e5d9..10d0a434 100644
--- a/docs/VCFX_probability_filter.md
+++ b/docs/VCFX_probability_filter.md
@@ -13,6 +13,7 @@ VCFX_probability_filter --filter-probability "" < input.vcf > filtere
 |--------|-------------|
 | `-f, --filter-probability ` | Specify the probability filter condition (e.g., `GP>0.9`) |
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_probability_filter` analyzes the genotype probability fields in the FORMAT column of a VCF file and filters variants based on a user-defined condition. The tool:
diff --git a/docs/VCFX_reformatter.md b/docs/VCFX_reformatter.md
index 2934839f..05e8f96f 100644
--- a/docs/VCFX_reformatter.md
+++ b/docs/VCFX_reformatter.md
@@ -15,6 +15,7 @@ VCFX_reformatter [options] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-c, --compress-info ` | Remove specified INFO keys (comma-separated) |
 | `-f, --compress-format ` | Remove specified FORMAT keys (comma-separated) |
 | `-i, --reorder-info ` | Reorder INFO keys (comma-separated) |
diff --git a/docs/VCFX_region_subsampler.md b/docs/VCFX_region_subsampler.md
index 0ff4eaaa..c8d4396c 100644
--- a/docs/VCFX_region_subsampler.md
+++ b/docs/VCFX_region_subsampler.md
@@ -15,6 +15,7 @@ VCFX_region_subsampler --region-bed FILE < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-b, --region-bed FILE` | BED file listing regions to keep |
 
 ## Description
diff --git a/docs/VCFX_subsampler.md b/docs/VCFX_subsampler.md
index 4094b9e9..0ba828ca 100644
--- a/docs/VCFX_subsampler.md
+++ b/docs/VCFX_subsampler.md
@@ -17,6 +17,7 @@ VCFX_subsampler [options] < input.vcf > output.vcf
 | `-s, --subsample ` | Required: Number of variants to keep in the output |
 | `--seed ` | Optional: Use a specific random seed for reproducible results |
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 

From 6a0f0d05d6c593f43402a68f87cee6ce35ece4e1 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:23:43 +0100
Subject: [PATCH 44/54] Clarify version flag note

---
 docs/VCFX_outlier_detector.md                       | 4 +++-
 src/VCFX_outlier_detector/VCFX_outlier_detector.cpp | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/VCFX_outlier_detector.md b/docs/VCFX_outlier_detector.md
index 1f007bb7..32bc614c 100644
--- a/docs/VCFX_outlier_detector.md
+++ b/docs/VCFX_outlier_detector.md
@@ -16,11 +16,13 @@ VCFX_outlier_detector --metric  --threshold  [--variant|--sample] < in
 |--------|-------------|
 | `--metric`, `-m`  | Name of the metric to use (e.g., AF, DP, GQ) |
 | `--threshold`, `-t`  | Numeric threshold value for outlier detection |
-| `--variant`, `-v` | Variant mode: identify variants with INFO field metrics above threshold |
+| `--variant`, `-V` | Variant mode: identify variants with INFO field metrics above threshold |
 | `--sample`, `-s` | Sample mode: identify samples with average genotype metrics above threshold |
 | `--help`, `-h` | Display help message and exit |
 | `-v`, `--version` | Show program version and exit |
 
+**Note:** `-v` shows the version information. Use `--variant` or the short option `-V` to run in variant mode.
+
 ## Description
 
 VCFX_outlier_detector analyzes VCF files to identify outliers based on numeric metrics. The tool operates in two distinct modes:
diff --git a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
index d8f383c3..ae146354 100644
--- a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
+++ b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
@@ -28,12 +28,12 @@ int VCFXOutlierDetector::run(int argc, char* argv[]){
         {"help", no_argument, 0, 'h'},
         {"metric", required_argument, 0, 'm'},
         {"threshold", required_argument, 0, 't'},
-        {"variant", no_argument, 0, 'v'},
+        {"variant", no_argument, 0, 'V'},
         {"sample", no_argument, 0, 's'},
         {0,0,0,0}
     };
     while(true){
-        int c= getopt_long(argc, argv, "hm:t:vs", long_opts, nullptr);
+        int c= getopt_long(argc, argv, "hm:t:Vs", long_opts, nullptr);
         if(c==-1) break;
         switch(c){
             case 'h':
@@ -50,7 +50,7 @@ int VCFXOutlierDetector::run(int argc, char* argv[]){
                     return 1;
                 }
                 break;
-            case 'v':
+            case 'V':
                 isVariantMode= true;
                 break;
             case 's':
@@ -78,7 +78,7 @@ void VCFXOutlierDetector::displayHelp(){
 "  --help, -h           Print this help.\n"
 "  --metric, -m    Name of the metric to use (e.g. AF, DP, GQ...).\n"
 "  --threshold, -t  Numeric threshold.\n"
-"  --variant, -v        Evaluate each variant's  in INFO>threshold => print.\n"
+"  --variant, -V        Evaluate each variant's  in INFO>threshold => print.\n"
 "  --sample, -s         Evaluate sample averages of  in genotype subfield => print outliers.\n\n"
 "Examples:\n"
 "  1) Outlier variants with AF>0.05:\n"

From a43f028bec01b287182f68054e2d0e985600538d Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:25:18 +0100
Subject: [PATCH 45/54] docs: adjust toolkit utility count

---
 docs/citation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/citation.md b/docs/citation.md
index 035f7aaf..18c83beb 100644
--- a/docs/citation.md
+++ b/docs/citation.md
@@ -24,7 +24,7 @@ The VCFX toolkit was presented at the 12th International Work-Conference on Bioi
 
 ### Abstract
 
-VCFX is a collection of specialized C/C++ command-line tools designed for efficient manipulation, analysis, and transformation of VCF (Variant Call Format) files used in genomic research and bioinformatics. Each tool is an independent executable that follows the Unix philosophy: do one thing well and work seamlessly with other tools through standard input/output streams. The toolkit includes over 60 specialized utilities covering the entire lifecycle of variant data analysis from filtering and annotation to merging, phasing, and structural variant manipulation.
+VCFX is a collection of specialized C/C++ command-line tools designed for efficient manipulation, analysis, and transformation of VCF (Variant Call Format) files used in genomic research and bioinformatics. Each tool is an independent executable that follows the Unix philosophy: do one thing well and work seamlessly with other tools through standard input/output streams. The toolkit includes 60 specialized utilities covering the entire lifecycle of variant data analysis from filtering and annotation to merging, phasing, and structural variant manipulation.
 
 ## How to Cite in Different Formats
 

From de32788702f90fd63a6691e70c5675a17b0032ee Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:27:03 +0100
Subject: [PATCH 46/54] docs: add annotation and processing sections

---
 docs/index.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/docs/index.md b/docs/index.md
index 57ffc818..92ad5135 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -98,6 +98,25 @@ Tools for handling VCF files:
 - [VCFX_compressor](VCFX_compressor.md) - Compress VCF files efficiently
 - [View all file management tools...](tools_overview.md#file-management)
 
+### Annotation and Reporting
+
+Tools for annotating and extracting information from VCF files:
+
+- [VCFX_custom_annotator](VCFX_custom_annotator.md) - Add custom annotations to VCF files
+- [VCFX_info_summarizer](VCFX_info_summarizer.md) - Summarize INFO fields
+- ... (include a few more key tools)
+- [View all annotation tools...](tools_overview.md#annotation-and-reporting)
+
+### Data Processing
+
+Tools for processing variants and samples:
+
+- [VCFX_missing_data_handler](VCFX_missing_data_handler.md) - Handle missing data
+- [VCFX_quality_adjuster](VCFX_quality_adjuster.md) - Adjust quality scores
+- [VCFX_haplotype_phaser](VCFX_haplotype_phaser.md) - Phase haplotypes
+- [VCFX_haplotype_extractor](VCFX_haplotype_extractor.md) - Extract haplotype information
+- [View all processing tools...](tools_overview.md#data-processing)
+
 For a complete list of all tools and detailed usage examples, see the [tools overview](tools_overview.md).
 
 ## Who Should Use VCFX?

From 0a54a83902d634df80be86e8f9cd6d082b790201 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:30:43 +0100
Subject: [PATCH 47/54] docs: rename heading for sample comparison

---
 docs/index.md          | 12 ++++--------
 docs/quickstart.md     | 12 ++++--------
 docs/tools_overview.md |  9 ++++-----
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 57ffc818..27b76be0 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -84,7 +84,7 @@ Tools for converting or reformatting VCF data:
 
 Tools for validating and checking data quality:
 
-- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between VCF files
+- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between samples in a VCF file
 - [VCFX_missing_detector](VCFX_missing_detector.md) - Detect and report missing data
 - [VCFX_validator](VCFX_validator.md) - Validate VCF format compliance
 - [View all quality control tools...](tools_overview.md#quality-control)
@@ -131,15 +131,11 @@ cat input.vcf | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
-### Sample Selection and Comparison
+### Sample Comparison
 
 ```bash
-# Extract samples and check concordance
-cat input.vcf | \
-  VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 > samples.vcf
-
-cat samples.vcf reference.vcf | \
-  VCFX_concordance_checker > concordance_report.tsv
+# Check concordance between two samples in a single VCF
+cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance_report.tsv
 ```
 
 See the [tools overview page](tools_overview.md#common-usage-patterns) for more usage examples.
diff --git a/docs/quickstart.md b/docs/quickstart.md
index d99d39a2..22504065 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -62,10 +62,10 @@ cat input.vcf | \
 
 ### Example 4: Quality Control
 
-Check concordance between two VCF files:
+Check concordance between two samples in a single VCF file:
 
 ```bash
-VCFX_concordance_checker --vcf1 sample1.vcf --vcf2 sample2.vcf > concordance_report.tsv
+cat sample.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance_report.tsv
 ```
 
 ## Working with Compressed Files
@@ -126,12 +126,8 @@ cat input.vcf | \
 ### Sample Comparison
 
 ```bash
-# Extract common samples
-VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 < input1.vcf > samples1.vcf
-VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 < input2.vcf > samples2.vcf
-
-# Check concordance
-VCFX_concordance_checker --vcf1 samples1.vcf --vcf2 samples2.vcf > concordance.tsv
+# Check concordance between two samples in a single VCF
+cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance.tsv
 ```
 
 ### Population Structure Analysis
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index de707ce7..2da498ad 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -60,7 +60,7 @@ Tools for converting or reformatting VCF data:
 
 Tools for validating and checking data quality:
 
-- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between VCF files
+- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between samples in a VCF file
 - [VCFX_missing_detector](VCFX_missing_detector.md) - Detect and report missing data
 - [VCFX_outlier_detector](VCFX_outlier_detector.md) - Detect outlier samples or variants
 - [VCFX_alignment_checker](VCFX_alignment_checker.md) - Check alignment of variants
@@ -126,12 +126,11 @@ cat input.vcf | \
   VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
-### Sample Extraction and Comparison
+### Sample Comparison
 
 ```bash
-# Extract samples and check concordance
-cat input.vcf | VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 > samples.vcf
-cat samples.vcf reference.vcf | VCFX_concordance_checker > concordance_report.tsv
+# Check concordance between two samples in a single VCF
+cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance.tsv
 ```
 
 ### Linkage Disequilibrium Analysis

From c2d1e5363ca59c6d3ccfabf38921d9b488799e7b Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 23:13:56 +0100
Subject: [PATCH 48/54] Improve dark mode

---
 docs/styles/custom.css | 6 ++++++
 mkdocs.yml             | 6 ++++--
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 docs/styles/custom.css

diff --git a/docs/styles/custom.css b/docs/styles/custom.css
new file mode 100644
index 00000000..74717038
--- /dev/null
+++ b/docs/styles/custom.css
@@ -0,0 +1,6 @@
+[data-md-color-scheme="slate"] {
+  --md-primary-fg-color: #90caf9;
+  --md-accent-fg-color: #90caf9;
+  --md-default-fg-color: #e0e0e0;
+  --md-default-bg-color: #121212;
+}
diff --git a/mkdocs.yml b/mkdocs.yml
index 3a683ca9..705e6b99 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -29,8 +29,8 @@ theme:
         icon: material/brightness-7
         name: Switch to dark mode
     - scheme: slate
-      primary: indigo
-      accent: indigo
+      primary: blue grey
+      accent: light blue
       toggle:
         icon: material/brightness-4
         name: Switch to light mode
@@ -44,6 +44,8 @@ extra:
       link: https://github.com/ieeta-pt/VCFX
     - icon: fontawesome/brands/twitter
       link: https://twitter.com/MiguelFSilva1
+extra_css:
+  - styles/custom.css
 
 # Extensions
 markdown_extensions:

From d6aa26174757d6f7e77196c5e3a255a251aaed42 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 00:28:28 +0100
Subject: [PATCH 49/54] Fix Python bindings

---
 CMakeLists.txt                |  5 +++
 docs/python_api.md            | 49 +++++++++++++++++++++++
 mkdocs.yml                    |  5 ++-
 python/CMakeLists.txt         | 24 ++++++++++++
 python/__init__.py            |  3 ++
 python/bindings.cpp           | 74 +++++++++++++++++++++++++++++++++++
 python/setup.py               | 32 +++++++++++++++
 src/CMakeLists.txt            |  1 +
 tests/CMakeLists.txt          |  1 +
 tests/test_all.sh             |  1 +
 tests/test_python_bindings.sh | 24 ++++++++++++
 11 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 docs/python_api.md
 create mode 100644 python/CMakeLists.txt
 create mode 100644 python/__init__.py
 create mode 100644 python/bindings.cpp
 create mode 100644 python/setup.py
 create mode 100755 tests/test_python_bindings.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 043cf974..cf4d4459 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,7 @@ project(VCFX
 
 # Optionally allow building for WebAssembly via Emscripten
 option(BUILD_WASM "Build with emscripten toolchain" OFF)
+option(PYTHON_BINDINGS "Build Python bindings" ON)
 
 if(BUILD_WASM)
     if(NOT CMAKE_TOOLCHAIN_FILE)
@@ -55,6 +56,10 @@ enable_testing()
 # Add top-level 'src' subdirectory, which in turn references each tool subdirectory
 add_subdirectory(src)
 
+if(PYTHON_BINDINGS)
+  add_subdirectory(python)
+endif()
+
 # Add the test suite
 add_subdirectory(tests)
 
diff --git a/docs/python_api.md b/docs/python_api.md
new file mode 100644
index 00000000..713e9191
--- /dev/null
+++ b/docs/python_api.md
@@ -0,0 +1,49 @@
+# Python API
+
+VCFX provides optional Python bindings exposing a subset of helper
+functions from the C++ `vcfx_core` library. The bindings are built as a
+native Python extension and can be enabled through CMake.
+
+## Installation
+
+Build the project with the `PYTHON_BINDINGS` option enabled:
+
+```bash
+mkdir build && cd build
+cmake -DPYTHON_BINDINGS=ON ..
+make -j
+```
+
+The compiled module will be placed in the `build/python` directory.
+You can also install the package via `pip` which will invoke CMake
+automatically:
+
+```bash
+pip install ./python
+```
+
+## Available Functions
+
+The module exposes the following helpers:
+
+- `trim(text)` โ€“ remove leading and trailing whitespace.
+- `split(text, delimiter)` โ€“ split `text` by the given delimiter and
+  return a list of strings.
+- `read_file_maybe_compressed(path)` โ€“ read a plain or gzip/BGZF
+  compressed file and return its contents as a string.
+- `get_version()` โ€“ return the VCFX version string.
+
+## Example Usage
+
+```python
+import vcfx
+
+print(vcfx.trim("  abc  "))
+# 'abc'
+
+print(vcfx.split("A,B,C", ","))
+# ['A', 'B', 'C']
+
+version = vcfx.get_version()
+print("VCFX version:", version)
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index 705e6b99..41f2d1b0 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -137,7 +137,8 @@ nav:
       - VCFX_missing_data_handler: VCFX_missing_data_handler.md
       - VCFX_quality_adjuster: VCFX_quality_adjuster.md
       - VCFX_haplotype_phaser: VCFX_haplotype_phaser.md
-      - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md
+  - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md
+  - Python API: python_api.md
   - Contributing: CONTRIBUTING.md
   - Citation: citation.md
-  - License: LICENSE.md 
\ No newline at end of file
+  - License: LICENSE.md
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
new file mode 100644
index 00000000..ff86f970
--- /dev/null
+++ b/python/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.14)
+
+if(NOT PYTHON_BINDINGS)
+    return()
+endif()
+
+find_package(Python3 COMPONENTS Development REQUIRED)
+
+add_library(_vcfx MODULE bindings.cpp)
+target_link_libraries(_vcfx PRIVATE vcfx_core Python3::Python)
+
+# Place the compiled module into the build/python directory
+set_target_properties(_vcfx PROPERTIES
+    PREFIX ""
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/python/vcfx"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/python/vcfx"
+)
+
+configure_file(__init__.py "${CMAKE_BINARY_DIR}/python/vcfx/__init__.py" COPYONLY)
+
+install(TARGETS _vcfx
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
+install(FILES __init__.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
diff --git a/python/__init__.py b/python/__init__.py
new file mode 100644
index 00000000..8c70352a
--- /dev/null
+++ b/python/__init__.py
@@ -0,0 +1,3 @@
+"""Python bindings for the VCFX toolkit."""
+
+from ._vcfx import *  # noqa: F401,F403
diff --git a/python/bindings.cpp b/python/bindings.cpp
new file mode 100644
index 00000000..a8b18785
--- /dev/null
+++ b/python/bindings.cpp
@@ -0,0 +1,74 @@
+#include 
+#include "vcfx_core.h"
+#include 
+#include 
+
+// Helper to convert std::vector to Python list
+static PyObject* to_py_list(const std::vector& vec) {
+    PyObject* list = PyList_New(vec.size());
+    if (!list) return nullptr;
+    for (size_t i = 0; i < vec.size(); ++i) {
+        PyObject* item = PyUnicode_FromString(vec[i].c_str());
+        if (!item) {
+            Py_DECREF(list);
+            return nullptr;
+        }
+        PyList_SET_ITEM(list, i, item); // steals reference
+    }
+    return list;
+}
+
+static PyObject* py_trim(PyObject*, PyObject* args) {
+    const char* text;
+    if (!PyArg_ParseTuple(args, "s", &text))
+        return nullptr;
+    std::string result = vcfx::trim(text);
+    return PyUnicode_FromString(result.c_str());
+}
+
+static PyObject* py_split(PyObject*, PyObject* args) {
+    const char* text;
+    const char* delim;
+    if (!PyArg_ParseTuple(args, "ss", &text, &delim))
+        return nullptr;
+    std::vector parts = vcfx::split(text, delim[0]);
+    return to_py_list(parts);
+}
+
+static PyObject* py_read_file(PyObject*, PyObject* args) {
+    const char* path;
+    if (!PyArg_ParseTuple(args, "s", &path))
+        return nullptr;
+    std::string out;
+    if (!vcfx::read_file_maybe_compressed(path, out)) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to read file");
+        return nullptr;
+    }
+    return PyBytes_FromStringAndSize(out.data(), out.size());
+}
+
+static PyObject* py_get_version(PyObject*, PyObject*) {
+    std::string ver = vcfx::get_version();
+    return PyUnicode_FromString(ver.c_str());
+}
+
+static PyMethodDef VcfxMethods[] = {
+    {"trim", py_trim, METH_VARARGS, "Trim leading and trailing whitespace"},
+    {"split", py_split, METH_VARARGS, "Split a string on the given delimiter"},
+    {"read_file_maybe_compressed", py_read_file, METH_VARARGS,
+     "Read a (possibly compressed) file and return its contents"},
+    {"get_version", py_get_version, METH_NOARGS, "Return VCFX version string"},
+    {nullptr, nullptr, 0, nullptr}
+};
+
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_vcfx",
+    "Python bindings for VCFX helper functions",
+    -1,
+    VcfxMethods
+};
+
+PyMODINIT_FUNC PyInit__vcfx(void) {
+    return PyModule_Create(&moduledef);
+}
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 00000000..7ad5494e
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,32 @@
+import pathlib
+import subprocess
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext
+
+class CMakeExtension(Extension):
+    def __init__(self, name):
+        super().__init__(name, sources=[])
+
+class CMakeBuild(build_ext):
+    def build_extension(self, ext):
+        extdir = pathlib.Path(self.get_ext_fullpath(ext.name)).parent.resolve()
+        cmake_args = [
+            f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}',
+            f'-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={extdir}',
+            '-DPYTHON_BINDINGS=ON'
+        ]
+        build_temp = pathlib.Path(self.build_temp)
+        build_temp.mkdir(parents=True, exist_ok=True)
+        source_dir = pathlib.Path(__file__).resolve().parent.parent
+        subprocess.check_call(['cmake', str(source_dir)] + cmake_args, cwd=build_temp)
+        subprocess.check_call(['cmake', '--build', '.', '--target', '_vcfx'], cwd=build_temp)
+
+setup(
+    name='vcfx',
+    version='0.0.0',
+    packages=['vcfx'],
+    package_dir={'vcfx': '.'},
+    ext_modules=[CMakeExtension('_vcfx')],
+    cmdclass={'build_ext': CMakeBuild},
+    zip_safe=False,
+)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 93d6adfc..9408aaa3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.14)
 
 # Build the core library from your shared code
 add_library(vcfx_core STATIC vcfx_core.cpp)
+set_property(TARGET vcfx_core PROPERTY POSITION_INDEPENDENT_CODE ON)
 target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include)
 target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB)
 if(WIN32)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 558897dc..78fc4a59 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -59,6 +59,7 @@ set(TEST_SCRIPTS
     test_validator.sh
     test_variant_classifier.sh
     test_variant_counter.sh
+    test_python_bindings.sh
 )
 
 foreach(script ${TEST_SCRIPTS})
diff --git a/tests/test_all.sh b/tests/test_all.sh
index 75cbb912..f226aff1 100755
--- a/tests/test_all.sh
+++ b/tests/test_all.sh
@@ -81,6 +81,7 @@ TEST_SCRIPTS=(
     "test_validator.sh"
     "test_variant_classifier.sh"
     "test_variant_counter.sh"
+    "test_python_bindings.sh"
 )
 
 # Run all tests
diff --git a/tests/test_python_bindings.sh b/tests/test_python_bindings.sh
new file mode 100755
index 00000000..fc6f6350
--- /dev/null
+++ b/tests/test_python_bindings.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -e
+set -o pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+BUILD_DIR="${ROOT_DIR}/build/python_bindings"
+
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+cmake -DPYTHON_BINDINGS=ON ../..
+make -j
+
+cd "$SCRIPT_DIR"
+
+PYTHONPATH="${BUILD_DIR}/python" python3 - <<'PY'
+import vcfx
+out = vcfx.trim("  hello  ")
+if out != "hello":
+    raise SystemExit('trim failed')
+print('Python bindings OK:', out)
+PY

From 2d103671518aa8174c8102b801b22f2beb86b478 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 01:50:33 +0100
Subject: [PATCH 50/54] Fix Docker build by installing Python dev deps

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 857fa1fe..9e242e1a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,6 +9,8 @@ RUN apt-get update && apt-get install -y \
     cmake \
     git \
     libz-dev \
+    python3 \
+    python3-dev \
     && rm -rf /var/lib/apt/lists/*
 
 # Create a working directory

From a8e1ca50683600f48f635af35de1efcf766fd1ad Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 02:17:12 +0100
Subject: [PATCH 51/54] Add Python wrappers and wheel workflow

---
 .github/workflows/build-test.yml | 50 ++++++++++++++++++++++++++++++
 docs/python_api.md               | 22 ++++++++++++++
 python/CMakeLists.txt            |  3 +-
 python/__init__.py               | 11 +++++++
 python/setup.py                  | 14 ++++++++-
 python/tools.py                  | 52 ++++++++++++++++++++++++++++++++
 6 files changed, 150 insertions(+), 2 deletions(-)
 create mode 100644 python/tools.py

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index a95241db..3b87ad6c 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -41,3 +41,53 @@ jobs:
           cd build
           ctest --output-on-failure
         shell: bash
+
+  python-wheels:
+    needs: build-and-test
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Install dependencies (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential cmake libz-dev python3-pip
+
+      - name: Install dependencies (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew update
+          brew install cmake zlib python@3 bash
+          echo "$(brew --prefix)/bin" >> $GITHUB_PATH
+
+      - name: Build project
+        run: |
+          cmake -S . -B build -DPYTHON_BINDINGS=ON
+          cmake --build build --parallel
+          cmake --install build --prefix $PWD/install
+        shell: bash
+
+      - name: Build wheel
+        run: |
+          python3 -m pip install --upgrade pip wheel
+          pip wheel ./python -w dist
+        shell: bash
+
+      - name: Test Python wheel
+        run: |
+          pip install dist/*.whl
+          echo "$PWD/install/bin" >> $GITHUB_PATH
+          python3 - <<'EOF'
+import vcfx
+print('version:', vcfx.get_version())
+tools = vcfx.available_tools()
+print('tools:', len(tools))
+if tools:
+    vcfx.run_tool(tools[0], '--help', check=False)
+EOF
+        shell: bash
diff --git a/docs/python_api.md b/docs/python_api.md
index 713e9191..0db775ce 100644
--- a/docs/python_api.md
+++ b/docs/python_api.md
@@ -47,3 +47,25 @@ print(vcfx.split("A,B,C", ","))
 version = vcfx.get_version()
 print("VCFX version:", version)
 ```
+
+## Tool Wrappers
+
+Besides the helper functions, the package provides lightweight wrappers for
+all command line tools shipped with VCFX.  The wrappers simply invoke the
+corresponding ``VCFX_*`` executable via ``subprocess``.
+
+Use ``vcfx.available_tools()`` to see which tools are accessible on your
+``PATH`` and call them either via ``vcfx.run_tool(name, *args)`` or by using
+the tool name as a function:
+
+```python
+import vcfx
+
+print(vcfx.available_tools())
+
+# run through the generic helper
+vcfx.run_tool("alignment_checker", "--help")
+
+# or directly by name (if available)
+vcfx.alignment_checker("--help")
+```
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index ff86f970..40a4efcc 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -17,8 +17,9 @@ set_target_properties(_vcfx PROPERTIES
 )
 
 configure_file(__init__.py "${CMAKE_BINARY_DIR}/python/vcfx/__init__.py" COPYONLY)
+configure_file(tools.py "${CMAKE_BINARY_DIR}/python/vcfx/tools.py" COPYONLY)
 
 install(TARGETS _vcfx
         LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx
         ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
-install(FILES __init__.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
+install(FILES __init__.py tools.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
diff --git a/python/__init__.py b/python/__init__.py
index 8c70352a..2f2ae974 100644
--- a/python/__init__.py
+++ b/python/__init__.py
@@ -1,3 +1,14 @@
 """Python bindings for the VCFX toolkit."""
 
 from ._vcfx import *  # noqa: F401,F403
+from . import tools as _tools
+
+# Re-export helper functions for convenience
+available_tools = _tools.available_tools
+run_tool = _tools.run_tool
+
+
+def __getattr__(name):
+    """Provide access to tool wrappers as attributes."""
+    return getattr(_tools, name)
+
diff --git a/python/setup.py b/python/setup.py
index 7ad5494e..c247405f 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -1,8 +1,15 @@
 import pathlib
+import re
 import subprocess
 from setuptools import setup, Extension
 from setuptools.command.build_ext import build_ext
 
+def read_version():
+    root = pathlib.Path(__file__).resolve().parent.parent / "CMakeLists.txt"
+    text = root.read_text()
+    m = re.search(r"set\(VCFX_VERSION\s+\"([0-9.]+)\"\)", text)
+    return m.group(1) if m else "0.0.0"
+
 class CMakeExtension(Extension):
     def __init__(self, name):
         super().__init__(name, sources=[])
@@ -23,10 +30,15 @@ def build_extension(self, ext):
 
 setup(
     name='vcfx',
-    version='0.0.0',
+    version=read_version(),
     packages=['vcfx'],
     package_dir={'vcfx': '.'},
     ext_modules=[CMakeExtension('_vcfx')],
     cmdclass={'build_ext': CMakeBuild},
     zip_safe=False,
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'Operating System :: MacOS :: MacOS X',
+        'Operating System :: POSIX :: Linux',
+    ],
 )
diff --git a/python/tools.py b/python/tools.py
new file mode 100644
index 00000000..13e61626
--- /dev/null
+++ b/python/tools.py
@@ -0,0 +1,52 @@
+import subprocess
+import shutil
+import functools
+
+__all__ = ["available_tools", "run_tool"]
+
+
+def available_tools():
+    """Return a list of VCFX tools available on the PATH."""
+    result = subprocess.run(["vcfx", "--list"], capture_output=True, text=True)
+    if result.returncode != 0:
+        return []
+    return [line.strip() for line in result.stdout.splitlines() if line.strip()]
+
+
+def run_tool(tool, *args, check=True, capture_output=False, text=True, **kwargs):
+    """Run a VCFX tool using subprocess.
+
+    Parameters
+    ----------
+    tool : str
+        Name of the tool without the ``VCFX_`` prefix.
+    *args : list
+        Arguments passed to the tool.
+    check : bool, optional
+        If ``True`` (default) raise ``CalledProcessError`` on non-zero
+        return code.
+    capture_output : bool, optional
+        If ``True`` capture stdout/stderr and return them on the returned
+        ``CompletedProcess`` object.
+    text : bool, optional
+        If ``True`` decode output as text. Defaults to ``True``.
+    **kwargs : dict
+        Additional keyword arguments forwarded to ``subprocess.run``.
+
+    Returns
+    -------
+    subprocess.CompletedProcess
+    """
+    exe = shutil.which(f"VCFX_{tool}")
+    if exe is None:
+        raise FileNotFoundError(f"VCFX tool '{tool}' not found in PATH")
+    cmd = [exe, *map(str, args)]
+    return subprocess.run(cmd, check=check, capture_output=capture_output, text=text, **kwargs)
+
+
+# Lazy attribute access for tool wrappers
+
+def __getattr__(name):
+    if name in available_tools():
+        return functools.partial(run_tool, name)
+    raise AttributeError(f"module 'vcfx' has no attribute '{name}'")

From 1e8653f5b5b8dfa834f132e95c70b131a42cdf9e Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 09:58:00 +0100
Subject: [PATCH 52/54] Extend Python bindings

---
 docs/python_api.md            |  5 +++++
 python/bindings.cpp           | 18 ++++++++++++++++++
 tests/test_python_bindings.sh |  3 +++
 3 files changed, 26 insertions(+)

diff --git a/docs/python_api.md b/docs/python_api.md
index 0db775ce..d1672aab 100644
--- a/docs/python_api.md
+++ b/docs/python_api.md
@@ -31,6 +31,8 @@ The module exposes the following helpers:
   return a list of strings.
 - `read_file_maybe_compressed(path)` โ€“ read a plain or gzip/BGZF
   compressed file and return its contents as a string.
+- `read_maybe_compressed(data)` โ€“ decompress a bytes object if it is
+  gzip/BGZF compressed and return the resulting bytes.
 - `get_version()` โ€“ return the VCFX version string.
 
 ## Example Usage
@@ -44,6 +46,9 @@ print(vcfx.trim("  abc  "))
 print(vcfx.split("A,B,C", ","))
 # ['A', 'B', 'C']
 
+data = vcfx.read_maybe_compressed(b"hello")
+print(data)
+
 version = vcfx.get_version()
 print("VCFX version:", version)
 ```
diff --git a/python/bindings.cpp b/python/bindings.cpp
index a8b18785..1e2d0577 100644
--- a/python/bindings.cpp
+++ b/python/bindings.cpp
@@ -2,6 +2,7 @@
 #include "vcfx_core.h"
 #include 
 #include 
+#include 
 
 // Helper to convert std::vector to Python list
 static PyObject* to_py_list(const std::vector& vec) {
@@ -52,11 +53,28 @@ static PyObject* py_get_version(PyObject*, PyObject*) {
     return PyUnicode_FromString(ver.c_str());
 }
 
+static PyObject* py_read_stream(PyObject*, PyObject* args) {
+    Py_buffer buf;
+    if (!PyArg_ParseTuple(args, "y*", &buf))
+        return nullptr;
+    std::string data(static_cast(buf.buf), buf.len);
+    PyBuffer_Release(&buf);
+    std::istringstream ss(data);
+    std::string out;
+    if (!vcfx::read_maybe_compressed(ss, out)) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to read data");
+        return nullptr;
+    }
+    return PyBytes_FromStringAndSize(out.data(), out.size());
+}
+
 static PyMethodDef VcfxMethods[] = {
     {"trim", py_trim, METH_VARARGS, "Trim leading and trailing whitespace"},
     {"split", py_split, METH_VARARGS, "Split a string on the given delimiter"},
     {"read_file_maybe_compressed", py_read_file, METH_VARARGS,
      "Read a (possibly compressed) file and return its contents"},
+    {"read_maybe_compressed", py_read_stream, METH_VARARGS,
+     "Decompress bytes if needed and return the contents"},
     {"get_version", py_get_version, METH_NOARGS, "Return VCFX version string"},
     {nullptr, nullptr, 0, nullptr}
 };
diff --git a/tests/test_python_bindings.sh b/tests/test_python_bindings.sh
index fc6f6350..24947b74 100755
--- a/tests/test_python_bindings.sh
+++ b/tests/test_python_bindings.sh
@@ -20,5 +20,8 @@ import vcfx
 out = vcfx.trim("  hello  ")
 if out != "hello":
     raise SystemExit('trim failed')
+compressed = vcfx.read_maybe_compressed(b"hello")
+if compressed != b"hello":
+    raise SystemExit('read_maybe_compressed failed')
 print('Python bindings OK:', out)
 PY

From e47145dca4c61037070681bde07817be804cbcd2 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 10:26:31 +0100
Subject: [PATCH 53/54] Add vcfx wrapper test

---
 README.md                  | 12 ++++++++
 docs/tools_overview.md     |  2 +-
 src/vcfx_wrapper/vcfx.cpp  | 56 ++++++++++++++++++++++++++++++++++++++
 tests/test_all.sh          |  1 +
 tests/test_vcfx_wrapper.sh | 34 +++++++++++++++++++++++
 5 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100755 tests/test_vcfx_wrapper.sh

diff --git a/README.md b/README.md
index 906803db..891d0695 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,18 @@ cat input.vcf | \
   VCFX_allele_freq_calc > snp_frequencies.tsv
 ```
 
+### Listing Available Tools
+
+```bash
+vcfx list
+```
+
+### Show Tool Documentation
+
+```bash
+vcfx help allele_counter
+```
+
 ## Building for WebAssembly
 
 If you have [Emscripten](https://emscripten.org/) installed:
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 2da498ad..abeb3af1 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -2,7 +2,7 @@
 
 VCFX is a collection of C/C++ tools for processing and analyzing VCF (Variant Call Format) files, with optional WebAssembly compatibility. Each tool is an independent command-line executable that can parse input from `stdin` and write to `stdout`, enabling flexible piping and integration into bioinformatics pipelines.
 
-The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` to see available subcommands. All individual `VCFX_*` binaries remain available if you prefer calling them directly.
+The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` or the alias `vcfx list` to see available subcommands. To view Markdown documentation for a tool, run `vcfx help `. All individual `VCFX_*` binaries remain available if you prefer calling them directly.
 Every tool also accepts `--version` to display the build version.
 
 ## Tool Categories
diff --git a/src/vcfx_wrapper/vcfx.cpp b/src/vcfx_wrapper/vcfx.cpp
index 70093a13..2625136f 100644
--- a/src/vcfx_wrapper/vcfx.cpp
+++ b/src/vcfx_wrapper/vcfx.cpp
@@ -6,12 +6,17 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 
 static void print_usage(){
     std::cout << "vcfx - unified interface for VCFX tools\n"
               << "Usage: vcfx [--help] [--list]  [args]\n\n"
               << "    Name of a VCFX tool without the 'VCFX_' prefix\n"
+              << "  list          Alias for --list\n"
+              << "  help    Show Markdown documentation for a tool if available\n"
               << "  --list        List available subcommands found in PATH\n"
               << "  --help        Show this help message\n";
 }
@@ -47,6 +52,43 @@ static void list_commands(){
     }
 }
 
+static std::vector get_doc_dirs(){
+    std::vector dirs;
+    const char* env = std::getenv("VCFX_DOCS_DIR");
+    if(env) dirs.emplace_back(env);
+
+    char buf[PATH_MAX];
+    ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf)-1);
+    if(len > 0){
+        buf[len] = '\0';
+        std::string exe(buf);
+        auto pos = exe.find_last_of('/');
+        if(pos != std::string::npos){
+            std::string base = exe.substr(0,pos);
+            dirs.push_back(base + "/../share/doc/VCFX");
+            dirs.push_back(base + "/../share/vcfx/docs");
+            dirs.push_back(base + "/../docs");
+            dirs.push_back(base + "/../../docs");
+        }
+    }
+    dirs.push_back("docs");
+    return dirs;
+}
+
+static int print_tool_doc(const std::string& tool){
+    std::string fname = "VCFX_" + tool + ".md";
+    for(const auto& dir : get_doc_dirs()){
+        std::string path = dir + "/" + fname;
+        std::ifstream in(path);
+        if(in){
+            std::cout << in.rdbuf();
+            return 0;
+        }
+    }
+    std::cerr << "Documentation for '" << tool << "' not found." << std::endl;
+    return 1;
+}
+
 int main(int argc, char* argv[]){
     bool show_help = false;
     bool show_list = false;
@@ -81,6 +123,20 @@ int main(int argc, char* argv[]){
     }
 
     std::string sub = argv[optind];
+
+    if(sub == "list"){
+        list_commands();
+        return 0;
+    }
+
+    if(sub == "help"){
+        if(optind + 1 >= argc){
+            print_usage();
+            return 0;
+        }
+        return print_tool_doc(argv[optind + 1]);
+    }
+
     std::string exec_name = "VCFX_" + sub;
 
     std::vector exec_args;
diff --git a/tests/test_all.sh b/tests/test_all.sh
index f226aff1..86921dea 100755
--- a/tests/test_all.sh
+++ b/tests/test_all.sh
@@ -81,6 +81,7 @@ TEST_SCRIPTS=(
     "test_validator.sh"
     "test_variant_classifier.sh"
     "test_variant_counter.sh"
+    "test_vcfx_wrapper.sh"
     "test_python_bindings.sh"
 )
 
diff --git a/tests/test_vcfx_wrapper.sh b/tests/test_vcfx_wrapper.sh
new file mode 100755
index 00000000..e2c766e5
--- /dev/null
+++ b/tests/test_vcfx_wrapper.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -e
+set -o pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+VCFX_BIN="$ROOT_DIR/build/src/vcfx_wrapper/vcfx"
+
+# Ensure built tools are in PATH so the wrapper can locate them
+source "$ROOT_DIR/add_vcfx_tools_to_path.sh"
+
+if [ ! -x "$VCFX_BIN" ]; then
+  echo "vcfx executable not found: $VCFX_BIN"
+  exit 1
+fi
+
+LIST_LONG="$($VCFX_BIN --list)"
+LIST_ALIAS="$($VCFX_BIN list)"
+if [ "$LIST_LONG" != "$LIST_ALIAS" ]; then
+  echo "Output of 'vcfx list' does not match '--list'"
+  diff <(echo "$LIST_LONG") <(echo "$LIST_ALIAS") || true
+  exit 1
+fi
+
+echo "$LIST_LONG" > /dev/null # quiet shellcheck complaining about unused var
+
+DOC_FIRST_LINE="$($VCFX_BIN help allele_counter | head -n 1)"
+if ! echo "$DOC_FIRST_LINE" | grep -q "VCFX_allele_counter"; then
+  echo "Help output for allele_counter does not show documentation"
+  echo "First line was: $DOC_FIRST_LINE"
+  exit 1
+fi
+
+echo "โœ“ vcfx wrapper tests passed"

From 86a75113cd860a84297979dd7c5ff116d630095f Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 11:07:34 +0100
Subject: [PATCH 54/54] Fix YAML syntax in workflow

---
 .github/workflows/build-test.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 3b87ad6c..bd9675c9 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -82,12 +82,12 @@ jobs:
         run: |
           pip install dist/*.whl
           echo "$PWD/install/bin" >> $GITHUB_PATH
-          python3 - <<'EOF'
-import vcfx
-print('version:', vcfx.get_version())
-tools = vcfx.available_tools()
-print('tools:', len(tools))
-if tools:
-    vcfx.run_tool(tools[0], '--help', check=False)
-EOF
+          cat <<'          EOF' | sed 's/^          //' | python3 -
+          import vcfx
+          print('version:', vcfx.get_version())
+          tools = vcfx.available_tools()
+          print('tools:', len(tools))
+          if tools:
+              vcfx.run_tool(tools[0], '--help', check=False)
+          EOF
         shell: bash