From ae0b22aab8888bc15a3c46205c829df90cf99b47 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 12:08:57 +0100 Subject: [PATCH 01/63] Fix NaN output in quality adjuster --- src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp | 9 ++++++++- tests/test_custom_annotator.sh | 8 +++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp index b09fe473..68632f96 100644 --- a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp +++ b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp @@ -157,7 +157,14 @@ void VCFXQualityAdjuster::adjustQualityScores(std::istream &in, std::ostream &ou // clamp large values if(newQual>1e12) newQual= 1e12; } - fields[5]= std::to_string(newQual); + std::string qualStr; + if(std::isnan(newQual)){ + // ensure consistent representation for NaN + qualStr = "nan"; + } else { + qualStr = std::to_string(newQual); + } + fields[5]= qualStr; std::ostringstream oss; for(size_t i=0; i0) oss<<"\t"; diff --git a/tests/test_custom_annotator.sh b/tests/test_custom_annotator.sh index 7da1d4a3..ce42c649 100755 --- a/tests/test_custom_annotator.sh +++ b/tests/test_custom_annotator.sh @@ -125,10 +125,11 @@ for i in $(seq 1 1000); do echo "1 $i A G Annotation$i" done > "$SCRIPT_DIR/data/large_annotations.txt" # Add VCF header -sed -i '' '1i\ +sed -i '1i\ ##fileformat=VCFv4.2\ ##contig=\ -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1' "$SCRIPT_DIR/data/large_input.vcf" +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1\ +' "$SCRIPT_DIR/data/large_input.vcf" time "$ROOT_DIR/build/src/VCFX_custom_annotator/VCFX_custom_annotator" --add-annotation "$SCRIPT_DIR/data/large_annotations.txt" < "$SCRIPT_DIR/data/large_input.vcf" > "$SCRIPT_DIR/data/large_output.vcf" if [ $? -eq 0 ]; then @@ -138,4 +139,5 @@ else exit 1 fi -echo "All tests for VCFX_custom_annotator passed!" \ No newline at end of file +echo "All tests for VCFX_custom_annotator passed!" + From d9097e7486f63cc51330b93242d6d9c28a010511 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 15:37:57 +0100 Subject: [PATCH 02/63] Fix Emscripten build detection --- CMakeLists.txt | 13 ++++++++++++- README.md | 2 +- compile_wasm.sh | 8 ++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 662b06cc..17f372a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,18 @@ project(VCFX option(BUILD_WASM "Build with emscripten toolchain" OFF) if(BUILD_WASM) - set(CMAKE_TOOLCHAIN_FILE "/path/to/emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + if(NOT CMAKE_TOOLCHAIN_FILE) + if(DEFINED ENV{EMSDK} AND EXISTS "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake") + set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + elseif(DEFINED ENV{EMSCRIPTEN} AND EXISTS "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake") + set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE) + endif() + endif() + + if(NOT EXISTS "${CMAKE_TOOLCHAIN_FILE}") + message(FATAL_ERROR "Emscripten toolchain file not found. Please set CMAKE_TOOLCHAIN_FILE or EMSDK.") + endif() + message(STATUS "Building for WebAssembly (Emscripten).") endif() diff --git a/README.md b/README.md index 10301dd0..7e158764 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ If you have [Emscripten](https://emscripten.org/) installed: ```bash mkdir build_wasm && cd build_wasm -cmake -DBUILD_WASM=ON .. +emcmake cmake -DBUILD_WASM=ON .. cmake --build . ``` diff --git a/compile_wasm.sh b/compile_wasm.sh index 6dbfb8a8..e0a5a132 100644 --- a/compile_wasm.sh +++ b/compile_wasm.sh @@ -4,8 +4,12 @@ set -e mkdir -p build_wasm cd build_wasm -# Turn on BUILD_WASM -cmake -DBUILD_WASM=ON .. +# Turn on BUILD_WASM using emcmake if available +if command -v emcmake >/dev/null 2>&1; then + emcmake cmake -DBUILD_WASM=ON .. +else + cmake -DBUILD_WASM=ON .. +fi cmake --build . From 68bac85c751cb11fe5f977e132f347bea1003b30 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 17:49:51 +0100 Subject: [PATCH 03/63] fix: support modern find in path script --- add_vcfx_tools_to_path.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/add_vcfx_tools_to_path.sh b/add_vcfx_tools_to_path.sh index 41787a66..0a68c655 100644 --- a/add_vcfx_tools_to_path.sh +++ b/add_vcfx_tools_to_path.sh @@ -32,7 +32,7 @@ while IFS= read -r -d '' toolExec; do if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then TOOL_DIRS="${TOOL_DIRS}:${toolDir}" fi -done < <(find "${BUILD_SRC_DIR}" -type f -perm +111 -name 'VCFX_*' -print0 2>/dev/null) +done < <(find "${BUILD_SRC_DIR}" -type f -perm /111 -name 'VCFX_*' -print0 2>/dev/null) # If empty (no tools found), bail out if [ -z "$TOOL_DIRS" ]; then From 894e7770d1f283ce4a2c65282b8637cd13ec2cb6 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 18:03:59 +0100 Subject: [PATCH 04/63] fix: ensure newline at EOF for .dockerignore --- .dockerignore | 2 +- .gitignore | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index a0e0fba6..7707bfc4 100644 --- a/.dockerignore +++ b/.dockerignore @@ -29,4 +29,4 @@ LICENSE # Docker files (not needed in the build context) Dockerfile docker-compose.yml -.dockerignore \ No newline at end of file +.dockerignore diff --git a/.gitignore b/.gitignore index 37beb001..9053a952 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,4 @@ Thumbs.db # Other tools.md prompt.md -names.md \ No newline at end of file +names.md From 1c4a27ae0311a61271ec01318cfe31f203ad61bc Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 18:34:08 +0100 Subject: [PATCH 05/63] Optimize alignment checker memory usage --- .../VCFX_alignment_checker.cpp | 105 ++++++++++++------ .../VCFX_alignment_checker.h | 14 ++- 2 files changed, 80 insertions(+), 39 deletions(-) diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp index 8973a089..b4898ee3 100644 --- a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp +++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp @@ -55,15 +55,8 @@ int VCFXAlignmentChecker::run(int argc, char* argv[]) { return 1; } - // Open reference genome file - std::ifstream refStream(refFile); - if (!refStream.is_open()) { - std::cerr << "Error: Unable to open reference genome file: " << refFile << "\n"; - return 1; - } - - // Load reference genome into memory - if (!loadReferenceGenome(refStream)) { + // Load reference genome index + if (!loadReferenceGenome(refFile)) { std::cerr << "Error: Failed to load reference genome.\n"; return 1; } @@ -85,70 +78,108 @@ void VCFXAlignmentChecker::displayHelp() { << " VCFX_alignment_checker --alignment-discrepancy input.vcf reference.fasta > discrepancies.txt\n"; } -bool VCFXAlignmentChecker::loadReferenceGenome(std::istream& in) { +bool VCFXAlignmentChecker::loadReferenceGenome(const std::string& path) { + referencePath = path; + referenceIndex.clear(); + + referenceStream.open(path, std::ios::in); + if (!referenceStream.is_open()) { + std::cerr << "Error: Unable to open reference genome file: " << path << "\n"; + return false; + } + std::string line; std::string currentChrom; - std::string seq; + FastaIndexEntry entry; + std::size_t seqLen = 0; - while (std::getline(in, line)) { + // record file offset where we will read sequence lines + while (std::getline(referenceStream, line)) { if (line.empty()) { continue; } if (line[0] == '>') { - // If we already had a chromosome loaded, store its sequence if (!currentChrom.empty()) { - referenceGenome[normalizeChromosome(currentChrom)] = seq; + entry.length = seqLen; + referenceIndex[normalizeChromosome(currentChrom)] = entry; } - // Start a new chromosome - seq.clear(); - // Grab chromosome name (up to first space) - size_t pos = line.find(' '); + + currentChrom.clear(); + seqLen = 0; + entry = FastaIndexEntry(); + + std::size_t pos = line.find(' '); if (pos != std::string::npos) { currentChrom = line.substr(1, pos - 1); } else { currentChrom = line.substr(1); } + + entry.offset = referenceStream.tellg(); + entry.basesPerLine = 0; + entry.bytesPerLine = 0; } else { - // Append this line to the sequence (uppercase) - std::transform(line.begin(), line.end(), line.begin(), ::toupper); - seq += line; + if (entry.basesPerLine == 0) { + entry.basesPerLine = line.size(); + entry.bytesPerLine = line.size() + 1; // assume single '\n' + } + seqLen += line.size(); } } - // Store the last chromosome read if (!currentChrom.empty()) { - referenceGenome[normalizeChromosome(currentChrom)] = seq; + entry.length = seqLen; + referenceIndex[normalizeChromosome(currentChrom)] = entry; } + referenceStream.clear(); + referenceStream.seekg(0); return true; } std::string VCFXAlignmentChecker::normalizeChromosome(const std::string& chrom) { - // NOTE: This logic may cause mismatches if your reference is named "1" but your VCF says "chr1". - // You may want to adjust this to match your actual naming conventions. std::string norm = chrom; - if (norm.find("chr") != 0 && - !(norm == "X" || norm == "Y" || norm == "MT" || - std::all_of(norm.begin(), norm.end(), ::isdigit))) - { - norm = "chr" + norm; + // convert to upper and drop leading "CHR" if present + if (norm.size() >= 3 && (norm.rfind("chr", 0) == 0 || norm.rfind("CHR", 0) == 0)) { + norm = norm.substr(3); } + std::transform(norm.begin(), norm.end(), norm.begin(), ::toupper); return norm; } std::string VCFXAlignmentChecker::getReferenceBases(const std::string& chrom, int pos, int length) { - auto it = referenceGenome.find(normalizeChromosome(chrom)); - if (it == referenceGenome.end()) { + auto it = referenceIndex.find(normalizeChromosome(chrom)); + if (it == referenceIndex.end()) { return ""; } - const std::string& seq = it->second; - // Convert VCF 1-based 'pos' to a 0-based index into the string - size_t startIndex = static_cast(pos - 1); - if (pos < 1 || (startIndex + length) > seq.size()) { + const FastaIndexEntry& entry = it->second; + if (pos < 1 || static_cast(pos - 1) >= entry.length) { return ""; } - return seq.substr(startIndex, length); + + int remaining = length; + std::size_t currPos = static_cast(pos - 1); + std::string result; + result.reserve(length); + + while (remaining > 0 && currPos < entry.length) { + std::size_t lineIdx = currPos / entry.basesPerLine; + std::size_t lineOffset = currPos % entry.basesPerLine; + std::size_t chunk = std::min(entry.basesPerLine - lineOffset, remaining); + + std::streampos filePos = entry.offset + static_cast(lineIdx * entry.bytesPerLine + lineOffset); + referenceStream.clear(); + referenceStream.seekg(filePos); + std::string buf(chunk, '\0'); + referenceStream.read(&buf[0], chunk); + result += buf; + + currPos += chunk; + remaining -= static_cast(chunk); + } + + return result; } void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream& out) { diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.h b/src/VCFX_alignment_checker/VCFX_alignment_checker.h index 6ff8e1a9..f62201c4 100644 --- a/src/VCFX_alignment_checker/VCFX_alignment_checker.h +++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.h @@ -5,6 +5,7 @@ #include #include #include +#include // VCFXAlignmentChecker: Header file for Reference Alignment Discrepancy Finder Tool class VCFXAlignmentChecker { @@ -17,7 +18,7 @@ class VCFXAlignmentChecker { void displayHelp(); // Loads the reference genome from a FASTA file - bool loadReferenceGenome(std::istream& in); + bool loadReferenceGenome(const std::string& path); // Checks discrepancies between VCF variants and the in-memory reference genome void checkDiscrepancies(std::istream& vcfIn, std::ostream& out); @@ -26,7 +27,16 @@ class VCFXAlignmentChecker { std::string getReferenceBases(const std::string& chrom, int pos, int length = 1); // Stores the reference genome sequences, keyed by normalized chromosome name - std::unordered_map referenceGenome; + struct FastaIndexEntry { + std::streampos offset = 0; // file offset to first base + std::size_t length = 0; // total bases in sequence + std::size_t basesPerLine = 0; // number of bases per line in FASTA + std::size_t bytesPerLine = 0; // bytes per line including newline + }; + + std::unordered_map referenceIndex; + std::ifstream referenceStream; + std::string referencePath; // Helper function to convert chromosome names to a consistent format std::string normalizeChromosome(const std::string& chrom); From 7ad0a7ca9616059d4510265c39c764b111f3d574 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 18:46:17 +0100 Subject: [PATCH 06/63] Make haplotype extractor debug output optional --- .../VCFX_haplotype_extractor.cpp | 35 +++++++++++++------ .../VCFX_haplotype_extractor.h | 6 ++++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp index e1bbf55f..87fd0c79 100644 --- a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp +++ b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp @@ -16,9 +16,10 @@ void printHelp() { << "Usage: VCFX_haplotype_extractor [OPTIONS]\n\n" << "Options:\n" << " --help, -h Display this help message and exit.\n" - << " --block-size Maximum distance for grouping consecutive variants (default 100000).\n" - << " --check-phase-consistency If set, try a minimal check across variants.\n\n" - << "Description:\n" + << " --block-size Maximum distance for grouping consecutive variants (default 100000).\n" + << " --check-phase-consistency If set, try a minimal check across variants.\n" + << " --debug Output verbose debug information.\n\n" + << "Description:\n" << " Extracts phased haplotype blocks from genotype data in a VCF file. " << "It reconstructs haplotypes for each sample by analyzing phased genotype fields.\n\n" << "Examples:\n" @@ -86,8 +87,10 @@ bool HaplotypeExtractor::phaseIsConsistent(const HaplotypeBlock& block, return false; } - // Debug the whole process - std::cerr << "Checking phase consistency\n"; + // Optional debugging output + if (debugMode) { + std::cerr << "Checking phase consistency\n"; + } for (size_t s=0; s inconsistent // Check for phase flips - when both alleles flip positions if (lastAllele1 != newAllele1 && lastAllele2 != newAllele2 && lastAllele1 == newAllele2 && lastAllele2 == newAllele1) { - std::cerr << "Phase flip detected in sample " << s << "\n"; + if (debugMode) { + std::cerr << "Phase flip detected in sample " << s << "\n"; + } return false; } } - std::cerr << "All phases consistent\n"; + if (debugMode) { + std::cerr << "All phases consistent\n"; + } return true; } @@ -318,6 +329,7 @@ bool HaplotypeExtractor::extractHaplotypes(std::istream& in, std::ostream& out) int main(int argc, char* argv[]) { int blockSize = 100000; bool doCheck = false; + bool debug = false; // simple arg parse for (int i=1; i sampleNames; size_t numSamples = 0; @@ -41,6 +44,9 @@ class HaplotypeExtractor { // If true, we do a simplistic cross-variant check for consistent phasing bool checkPhaseConsistency = false; + // If true, print verbose debugging information + bool debugMode = false; + // Parses the #CHROM line to extract sample names bool parseHeader(const std::string& headerLine); From fdf67403086d887d0925bc8b3b7d7d355c04c5db Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 19:06:47 +0100 Subject: [PATCH 07/63] Add trailing newline to Dockerfile and CMakeLists --- Dockerfile | 2 +- src/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a68f4b73..be69319f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,4 +56,4 @@ RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh ENTRYPOINT ["/bin/bash", "-c"] # Default command shows available tools -CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] \ No newline at end of file +CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f29f6a4..26efae66 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -140,4 +140,4 @@ install(TARGETS ${VCFX_TOOLS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) -message(STATUS "VCFX tools to be installed: ${VCFX_TOOLS}") \ No newline at end of file +message(STATUS "VCFX tools to be installed: ${VCFX_TOOLS}") From 42053ff60069f36d228fcf0bab03d385982fe806 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 19:08:44 +0100 Subject: [PATCH 08/63] Add basic utilities to vcfx_core --- include/vcfx_core.h | 16 +++++++++++++++- src/vcfx_core.cpp | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/include/vcfx_core.h b/include/vcfx_core.h index ff542a10..5d2663f5 100644 --- a/include/vcfx_core.h +++ b/include/vcfx_core.h @@ -3,7 +3,21 @@ #include #include +#include -// Core functionalities for VCFX tools +namespace vcfx { + +// Trim leading and trailing whitespace from a string +std::string trim(const std::string& str); + +// Split a string on the given delimiter +std::vector split(const std::string& str, char delimiter); + +// Convenience helpers for printing common messages +void print_error(const std::string& msg, std::ostream& os = std::cerr); +void print_version(const std::string& tool, const std::string& version, + std::ostream& os = std::cout); + +} // namespace vcfx #endif // VCFX_CORE_H diff --git a/src/vcfx_core.cpp b/src/vcfx_core.cpp index bbfcaa5c..c56fd8b6 100644 --- a/src/vcfx_core.cpp +++ b/src/vcfx_core.cpp @@ -1,4 +1,36 @@ #include "vcfx_core.h" +#include +#include +#include -// Implementation of core functionalities -// Add actual implementations as needed +namespace vcfx { + +std::string trim(const std::string& str) { + auto first = str.find_first_not_of(" \t\n\r"); + if (first == std::string::npos) { + return ""; + } + auto last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, last - first + 1); +} + +std::vector split(const std::string& str, char delimiter) { + std::vector result; + std::istringstream iss(str); + std::string item; + while (std::getline(iss, item, delimiter)) { + result.push_back(item); + } + return result; +} + +void print_error(const std::string& msg, std::ostream& os) { + os << "Error: " << msg << '\n'; +} + +void print_version(const std::string& tool, const std::string& version, + std::ostream& os) { + os << tool << " version " << version << '\n'; +} + +} // namespace vcfx From 9cb099c1589e83568f4afd36c3a757e5285b4ad2 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 19:09:51 +0100 Subject: [PATCH 09/63] refactor merger to stream --- src/VCFX_merger/VCFX_merger.cpp | 137 +++++++++++++++++--------------- src/VCFX_merger/VCFX_merger.h | 5 -- 2 files changed, 73 insertions(+), 69 deletions(-) diff --git a/src/VCFX_merger/VCFX_merger.cpp b/src/VCFX_merger/VCFX_merger.cpp index 639bf593..a6b59987 100644 --- a/src/VCFX_merger/VCFX_merger.cpp +++ b/src/VCFX_merger/VCFX_merger.cpp @@ -1,8 +1,8 @@ #include "VCFX_merger.h" #include #include -#include -#include +#include +#include #include #include @@ -62,86 +62,95 @@ void VCFXMerger::displayHelp() { } void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostream& out) { - std::vector> allVariants; - std::vector allHeaders; + struct FileState { + std::ifstream stream; + std::string currentLine; + std::string chrom; + long pos = 0; + bool hasVariant = false; + }; - for (const auto& file : inputFiles) { - std::vector> variants; - std::vector headerLines; - parseVCF(file, variants, headerLines); + std::vector states; + std::vector headers; + bool headersCaptured = false; - // If no headers yet, copy the first file's headers - if (allHeaders.empty()) { - allHeaders = headerLines; + for (const auto& file : inputFiles) { + FileState fs; + fs.stream.open(file); + if (!fs.stream.is_open()) { + std::cerr << "Failed to open file: " << file << "\n"; + continue; } - // Append all variants - allVariants.insert(allVariants.end(), variants.begin(), variants.end()); - } - - // Sort all variants by chromosome and position - std::sort( - allVariants.begin(), - allVariants.end(), - [this](const std::vector& a, const std::vector& b) { - if (a[0] == b[0]) { - return std::stoi(a[1]) < std::stoi(b[1]); + std::string line; + while (std::getline(fs.stream, line)) { + if (line.empty()) + continue; + if (line[0] == '#') { + if (!headersCaptured) + headers.push_back(line); + continue; } - return a[0] < b[0]; + + std::istringstream ss(line); + std::getline(ss, fs.chrom, '\t'); + std::string pos_str; + std::getline(ss, pos_str, '\t'); + fs.pos = std::strtol(pos_str.c_str(), nullptr, 10); + fs.currentLine = line; + fs.hasVariant = true; + break; } - ); - // Output headers - for (const auto& header : allHeaders) { - out << header << "\n"; - } + if (fs.hasVariant) + states.push_back(std::move(fs)); - // Output merged variants - for (const auto& variant : allVariants) { - for (size_t i = 0; i < variant.size(); ++i) { - out << variant[i]; - if (i < variant.size() - 1) { - out << "\t"; - } - } - out << "\n"; + if (!headersCaptured && !headers.empty()) + headersCaptured = true; } -} -void VCFXMerger::parseVCF(const std::string& filename, - std::vector>& variants, - std::vector& headerLines) { - std::ifstream infile(filename); - if (!infile.is_open()) { - std::cerr << "Failed to open file: " << filename << "\n"; - return; + for (const auto& h : headers) { + out << h << '\n'; } - std::string line; - while (std::getline(infile, line)) { - if (line.empty()) continue; + auto cmp = [&](size_t a, size_t b) { + const auto& sa = states[a]; + const auto& sb = states[b]; + if (sa.chrom == sb.chrom) return sa.pos > sb.pos; + return sa.chrom > sb.chrom; + }; + std::priority_queue, decltype(cmp)> pq(cmp); - if (line[0] == '#') { - headerLines.push_back(line); - continue; - } + for (size_t i = 0; i < states.size(); ++i) { + if (states[i].hasVariant) + pq.push(i); + } - // Split by tab - std::vector fields; - std::string field; - size_t pos = 0; - while ((pos = line.find('\t')) != std::string::npos) { - field = line.substr(0, pos); - fields.push_back(field); - line.erase(0, pos + 1); + while (!pq.empty()) { + size_t idx = pq.top(); + pq.pop(); + out << states[idx].currentLine << '\n'; + + std::string line; + while (std::getline(states[idx].stream, line)) { + if (line.empty()) + continue; + if (line[0] == '#') + continue; + + std::istringstream ss(line); + std::getline(ss, states[idx].chrom, '\t'); + std::string pos_str; + std::getline(ss, pos_str, '\t'); + states[idx].pos = std::strtol(pos_str.c_str(), nullptr, 10); + states[idx].currentLine = line; + pq.push(idx); + break; } - fields.push_back(line); - - variants.push_back(fields); } - infile.close(); } + int main(int argc, char* argv[]) { VCFXMerger merger; return merger.run(argc, argv); diff --git a/src/VCFX_merger/VCFX_merger.h b/src/VCFX_merger/VCFX_merger.h index 637abba2..cfc7abb1 100644 --- a/src/VCFX_merger/VCFX_merger.h +++ b/src/VCFX_merger/VCFX_merger.h @@ -18,11 +18,6 @@ class VCFXMerger { // Processes and merges VCF files void mergeVCF(const std::vector& inputFiles, std::ostream& out); - // Parses a VCF file and stores variants - void parseVCF(const std::string& filename, std::vector>& variants, std::vector& headerLines); - - // Compares variants based on chromosome and position - bool compareVariants(const std::vector& a, const std::vector& b); }; #endif // VCFX_MERGER_H From 5dbfd211043a4d7e19c9e6341ebbed34ca88e092 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Wed, 21 May 2025 23:53:45 +0100 Subject: [PATCH 10/63] Fix merger sort for unsorted inputs --- .github/workflows/docker-publish.yml | 2 +- .github/workflows/docs.yml | 16 +++---- src/VCFX_merger/VCFX_merger.cpp | 68 ++++++++-------------------- 3 files changed, 27 insertions(+), 59 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index d217e59d..3e740efd 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -57,4 +57,4 @@ jobs: labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64 cache-from: type=gha - cache-to: type=gha,mode=max \ No newline at end of file + cache-to: type=gha,mode=max diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index acb29d11..609db83a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -29,23 +29,23 @@ jobs: uses: actions/checkout@v3 with: fetch-depth: 0 - + - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.x' - + - name: Install dependencies run: | python -m pip install --upgrade pip pip install mkdocs-material pymdown-extensions - + - name: Deploy to GitHub Pages run: | git config --global user.name "${GITHUB_ACTOR}" git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" mkdocs gh-deploy --force - + # Only for pull requests - just build to validate build: runs-on: ubuntu-latest @@ -53,16 +53,16 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v3 - + - name: Set up Python uses: actions/setup-python@v4 with: python-version: '3.x' - + - name: Install dependencies run: | python -m pip install --upgrade pip pip install mkdocs-material pymdown-extensions - + - name: Build documentation - run: mkdocs build \ No newline at end of file + run: mkdocs build diff --git a/src/VCFX_merger/VCFX_merger.cpp b/src/VCFX_merger/VCFX_merger.cpp index a6b59987..b8628062 100644 --- a/src/VCFX_merger/VCFX_merger.cpp +++ b/src/VCFX_merger/VCFX_merger.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -62,28 +63,25 @@ void VCFXMerger::displayHelp() { } void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostream& out) { - struct FileState { - std::ifstream stream; - std::string currentLine; + struct Variant { std::string chrom; long pos = 0; - bool hasVariant = false; + std::string line; }; - std::vector states; + std::vector variants; std::vector headers; bool headersCaptured = false; for (const auto& file : inputFiles) { - FileState fs; - fs.stream.open(file); - if (!fs.stream.is_open()) { + std::ifstream stream(file); + if (!stream.is_open()) { std::cerr << "Failed to open file: " << file << "\n"; continue; } std::string line; - while (std::getline(fs.stream, line)) { + while (std::getline(stream, line)) { if (line.empty()) continue; if (line[0] == '#') { @@ -93,18 +91,15 @@ void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostre } std::istringstream ss(line); - std::getline(ss, fs.chrom, '\t'); + Variant v; + std::getline(ss, v.chrom, '\t'); std::string pos_str; std::getline(ss, pos_str, '\t'); - fs.pos = std::strtol(pos_str.c_str(), nullptr, 10); - fs.currentLine = line; - fs.hasVariant = true; - break; + v.pos = std::strtol(pos_str.c_str(), nullptr, 10); + v.line = line; + variants.push_back(std::move(v)); } - if (fs.hasVariant) - states.push_back(std::move(fs)); - if (!headersCaptured && !headers.empty()) headersCaptured = true; } @@ -113,40 +108,13 @@ void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostre out << h << '\n'; } - auto cmp = [&](size_t a, size_t b) { - const auto& sa = states[a]; - const auto& sb = states[b]; - if (sa.chrom == sb.chrom) return sa.pos > sb.pos; - return sa.chrom > sb.chrom; - }; - std::priority_queue, decltype(cmp)> pq(cmp); - - for (size_t i = 0; i < states.size(); ++i) { - if (states[i].hasVariant) - pq.push(i); - } - - while (!pq.empty()) { - size_t idx = pq.top(); - pq.pop(); - out << states[idx].currentLine << '\n'; + std::sort(variants.begin(), variants.end(), [](const Variant& a, const Variant& b) { + if (a.chrom == b.chrom) return a.pos < b.pos; + return a.chrom < b.chrom; + }); - std::string line; - while (std::getline(states[idx].stream, line)) { - if (line.empty()) - continue; - if (line[0] == '#') - continue; - - std::istringstream ss(line); - std::getline(ss, states[idx].chrom, '\t'); - std::string pos_str; - std::getline(ss, pos_str, '\t'); - states[idx].pos = std::strtol(pos_str.c_str(), nullptr, 10); - states[idx].currentLine = line; - pq.push(idx); - break; - } + for (const auto& v : variants) { + out << v.line << '\n'; } } From cffeab02fec50fdd5df90c79b5d1f4d7f547fcc0 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 00:12:46 +0100 Subject: [PATCH 11/63] Add CI workflow to build and test --- .github/workflows/build-test.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/build-test.yml diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml new file mode 100644 index 00000000..e7495aea --- /dev/null +++ b/.github/workflows/build-test.yml @@ -0,0 +1,23 @@ +name: Build and Test + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake libz-dev + + - name: Run shell tests + run: | + bash tests/test_all.sh From 0d8ff78e081a1811bad00bc6cb9a00e9b67bd06d Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 12:01:19 +0100 Subject: [PATCH 12/63] docs: reference public container --- DOCKER.md | 26 +++++++++++++------------- docs/docker.md | 26 +++++++++++++------------- docs/installation.md | 6 +++--- tests/test_docker.sh | 14 +++++++------- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/DOCKER.md b/DOCKER.md index c206c53c..2b0f34c8 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -8,16 +8,16 @@ VCFX is available as a pre-built Docker image on GitHub Container Registry: ```bash # Pull the image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Mount a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Example: Process a VCF file (using tests/data/valid.vcf as an example) -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` Using the pre-built image is recommended for most users as it: @@ -65,19 +65,19 @@ There are several ways to run VCFX tools with Docker: ```bash # With the pre-built image -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # With a locally built image docker run --rm vcfx:local VCFX_tool_name [options] # Mount the tests/data directory to access test files -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files in the tests/data directory -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' # Example: Calculate allele frequencies for a VCF file -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` ### Using Docker Compose @@ -98,7 +98,7 @@ docker-compose run --rm vcfx 'cat /data/valid.vcf | VCFX_allele_freq_calc > /dat When using Docker directly, you need to mount a directory to access your files: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` When using Docker Compose, the `tests/data` directory is mounted by default: @@ -115,7 +115,7 @@ You can modify the docker-compose.yml file to mount a different directory if nee You can create complex pipelines by chaining VCFX tools: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' ``` ### Creating Shell Scripts @@ -126,7 +126,7 @@ For complex workflows, consider creating a shell script: #!/bin/bash # save as vcfx_workflow.sh -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | \ +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_allele_freq_calc > /data/pipeline_output.tsv' @@ -147,7 +147,7 @@ If you encounter permission issues with files created in the container: ```bash # Run the container with your user ID -docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` ### Container Not Finding Commands @@ -156,5 +156,5 @@ If the container can't find VCFX commands, ensure they were properly built in th ```bash # List available VCFX tools in the container -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' ``` \ No newline at end of file diff --git a/docs/docker.md b/docs/docker.md index e71a060c..1e8ebcdb 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -8,16 +8,16 @@ VCFX is available as a pre-built Docker image on GitHub Container Registry: ```bash # Pull the image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Mount a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Example: Process a VCF file (using tests/data/valid.vcf as an example) -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` Using the pre-built image is recommended for most users as it: @@ -65,19 +65,19 @@ There are several ways to run VCFX tools with Docker: ```bash # With the pre-built image -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # With a locally built image docker run --rm vcfx:local VCFX_tool_name [options] # Mount the tests/data directory to access test files -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files in the tests/data directory -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_validator' # Example: Calculate allele frequencies for a VCF file -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv' ``` ### Using Docker Compose @@ -98,7 +98,7 @@ docker-compose run --rm vcfx 'cat /data/valid.vcf | VCFX_allele_freq_calc > /dat When using Docker directly, you need to mount a directory to access your files: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` When using Docker Compose, the `tests/data` directory is mounted by default: @@ -115,7 +115,7 @@ You can modify the docker-compose.yml file to mount a different directory if nee You can create complex pipelines by chaining VCFX tools: ```bash -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv' ``` ### Creating Shell Scripts @@ -126,7 +126,7 @@ For complex workflows, consider creating a shell script: #!/bin/bash # save as vcfx_workflow.sh -docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | \ +docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | \ VCFX_validator | \ VCFX_variant_classifier --append-info | \ VCFX_allele_freq_calc > /data/pipeline_output.tsv' @@ -147,7 +147,7 @@ If you encounter permission issues with files created in the container: ```bash # Run the container with your user ID -docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] ``` ### Container Not Finding Commands @@ -156,7 +156,7 @@ If the container can't find VCFX commands, ensure they were properly built in th ```bash # List available VCFX tools in the container -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*' ``` ## Citation diff --git a/docs/installation.md b/docs/installation.md index 4c697df9..6f14121d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -35,13 +35,13 @@ The simplest way to use VCFX is with Docker, which requires no compilation: ```bash # Pull the VCFX Docker image (only needed once) -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest # Run a VCFX tool -docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options] +docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options] # Process files by mounting a directory with your data -docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/input.vcf | VCFX_tool_name > /data/output.tsv' +docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/input.vcf | VCFX_tool_name > /data/output.tsv' ``` This method is ideal for: diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 6c9085da..b22eed29 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -21,7 +21,7 @@ echo "๐Ÿงฌ Testing VCFX Docker image with official test files..." # Pull the latest VCFX image echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image..." -docker pull ghcr.io/ieeta-pt/vcfx:latest +docker pull ghcr.io/jorgemfs/vcfx:latest check_success "Pulled VCFX Docker image" # Get the directory of this script (tests directory) @@ -36,36 +36,36 @@ check_success "Created temporary output directory" # Test 1: List available tools echo "๐Ÿ“‹ Listing available VCFX tools..." -docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' +docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' check_success "Listed available tools" # Test 2: Validator test echo "๐Ÿ” Testing VCFX_validator..." -docker run --rm -v "${TESTS_DIR}:/tests" ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator' +docker run --rm -v "${TESTS_DIR}:/tests" ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator' check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test echo "๐Ÿ‘ฅ Testing VCFX_sample_extractor..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' check_success "Extracted sample" # Test 5: Variant classifier test echo "๐Ÿ”ฌ Testing VCFX_variant_classifier..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' check_success "Classified variants" # Test 6: Testing a pipeline of commands echo "๐Ÿ”„ Testing a pipeline of VCFX tools..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/ieeta-pt/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' + ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' check_success "Executed pipeline of tools" echo "๐ŸŽ‰ All Docker tests completed successfully!" From 270cff74133666ad4965a0878f0d769888507289 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 12:01:52 +0100 Subject: [PATCH 13/63] Add gzip support and tests --- include/vcfx_core.h | 10 ++ src/CMakeLists.txt | 1 + .../VCFX_variant_counter.cpp | 9 +- src/vcfx_core.cpp | 102 ++++++++++++++++++ tests/test_variant_counter.sh | 19 +++- 5 files changed, 139 insertions(+), 2 deletions(-) diff --git a/include/vcfx_core.h b/include/vcfx_core.h index 5d2663f5..c3a62895 100644 --- a/include/vcfx_core.h +++ b/include/vcfx_core.h @@ -18,6 +18,16 @@ void print_error(const std::string& msg, std::ostream& os = std::cerr); void print_version(const std::string& tool, const std::string& version, std::ostream& os = std::cout); +// Read entire input stream, automatically decompressing if gzip/BGZF +// compressed. Returns true on success and stores the resulting text in +// 'out'. +bool read_maybe_compressed(std::istream& in, std::string& out); + +// Convenience helper to read a file that may be gzip/BGZF compressed. The file +// is loaded completely into memory and stored in 'out'. Returns true on +// success. +bool read_file_maybe_compressed(const std::string& path, std::string& out); + } // namespace vcfx #endif // VCFX_CORE_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 26efae66..ed756a9f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.14) # Build the core library from your shared code add_library(vcfx_core STATIC vcfx_core.cpp) target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include) +target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB) # Add all tool subdirectories add_subdirectory(VCFX_header_parser) diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.cpp b/src/VCFX_variant_counter/VCFX_variant_counter.cpp index 37e037ca..26b21175 100644 --- a/src/VCFX_variant_counter/VCFX_variant_counter.cpp +++ b/src/VCFX_variant_counter/VCFX_variant_counter.cpp @@ -4,6 +4,7 @@ #include #include #include +#include "vcfx_core.h" void VCFXVariantCounter::displayHelp(){ std::cout << @@ -55,7 +56,13 @@ int VCFXVariantCounter::run(int argc, char* argv[]){ return 0; } - int total= countVariants(std::cin); + std::string plainInput; + if(!vcfx::read_maybe_compressed(std::cin, plainInput)){ + std::cerr << "Error: failed to read input" << std::endl; + return 1; + } + std::istringstream inStream(plainInput); + int total= countVariants(inStream); if(total<0){ // indicates an error if strict return 1; diff --git a/src/vcfx_core.cpp b/src/vcfx_core.cpp index c56fd8b6..43f06a5f 100644 --- a/src/vcfx_core.cpp +++ b/src/vcfx_core.cpp @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include namespace vcfx { @@ -33,4 +36,103 @@ void print_version(const std::string& tool, const std::string& version, os << tool << " version " << version << '\n'; } +// ------------------------------------------------------------ +// Internal helper: decompress gzip/BGZF data from 'in' into 'out' +// ------------------------------------------------------------ +static bool decompress_gzip_stream(std::istream& in, std::string& out) { + constexpr int CHUNK = 16384; + char inBuf[CHUNK]; + char outBuf[CHUNK]; + + z_stream strm; + std::memset(&strm, 0, sizeof(strm)); + if (inflateInit2(&strm, 15 + 32) != Z_OK) { + return false; + } + + int ret = Z_OK; + do { + in.read(inBuf, CHUNK); + strm.avail_in = static_cast(in.gcount()); + if (strm.avail_in == 0 && in.eof()) { + break; + } + strm.next_in = reinterpret_cast(inBuf); + + do { + strm.avail_out = CHUNK; + strm.next_out = reinterpret_cast(outBuf); + ret = inflate(&strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || + ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) { + inflateEnd(&strm); + return false; + } + size_t have = CHUNK - strm.avail_out; + if (have > 0) { + out.append(outBuf, have); + } + } while (strm.avail_out == 0); + } while (ret != Z_STREAM_END); + + inflateEnd(&strm); + return ret == Z_STREAM_END; +} + +// ------------------------------------------------------------ +// Detect gzip magic numbers on a stream without consuming them +// ------------------------------------------------------------ +static bool stream_has_gzip_magic(std::istream& in) { + int c1 = in.get(); + if (c1 == EOF) { + return false; + } + int c2 = in.get(); + if (c2 == EOF) { + in.unget(); + return false; + } + bool isGz = (static_cast(c1) == 0x1f && + static_cast(c2) == 0x8b); + in.putback(static_cast(c2)); + in.putback(static_cast(c1)); + return isGz; +} + +bool read_maybe_compressed(std::istream& in, std::string& out) { + out.clear(); + if (stream_has_gzip_magic(in)) { + return decompress_gzip_stream(in, out); + } + std::ostringstream oss; + oss << in.rdbuf(); + out = oss.str(); + return true; +} + +bool read_file_maybe_compressed(const std::string& path, std::string& out) { + std::ifstream file(path, std::ios::binary); + if (!file.is_open()) { + return false; + } + bool isGz = false; + if (path.size() >= 3 && + (path.compare(path.size() - 3, 3, ".gz") == 0)) { + isGz = true; + } else if (path.size() >= 4 && + (path.compare(path.size() - 4, 4, ".bgz") == 0)) { + isGz = true; + } else if (path.size() >= 5 && + (path.compare(path.size() - 5, 5, ".bgzf") == 0)) { + isGz = true; + } + if (isGz || stream_has_gzip_magic(file)) { + return decompress_gzip_stream(file, out); + } + std::ostringstream oss; + oss << file.rdbuf(); + out = oss.str(); + return true; +} + } // namespace vcfx diff --git a/tests/test_variant_counter.sh b/tests/test_variant_counter.sh index bf60b9a0..ae78b320 100755 --- a/tests/test_variant_counter.sh +++ b/tests/test_variant_counter.sh @@ -155,6 +155,17 @@ if [ ! -f data/variant_counter_empty.vcf ]; then EOF fi +# Create gzipped versions of VCFs +if [ ! -f data/variant_counter_normal.vcf.gz ]; then + gzip -c data/variant_counter_normal.vcf > data/variant_counter_normal.vcf.gz +fi +if [ ! -f data/variant_counter_invalid.vcf.gz ]; then + gzip -c data/variant_counter_invalid.vcf > data/variant_counter_invalid.vcf.gz +fi +if [ ! -f data/variant_counter_empty.vcf.gz ]; then + gzip -c data/variant_counter_empty.vcf > data/variant_counter_empty.vcf.gz +fi + # Test 1: Count variants in a normal VCF file (strict mode) run_test 1 "Counting variants in a normal VCF file (strict mode)" \ "cat data/variant_counter_normal.vcf | $VCFX_EXECUTABLE --strict" \ @@ -212,4 +223,10 @@ diff -u expected/variant_counter_large.txt out/variant_counter_large.txt || { } echo " Test 8 passed." -echo "All VCFX_variant_counter tests passed!" \ No newline at end of file +# Test 9: Gzipped normal VCF +run_test 9 "Counting variants in a gzipped VCF file" \ + "cat data/variant_counter_normal.vcf.gz | $VCFX_EXECUTABLE" \ + "expected/variant_counter_normal_nonstrict.txt" \ + "out/variant_counter_normal_gz.txt" + +echo "All VCFX_variant_counter tests passed!" From dd90a091bef993d21cdfefee1130acaf759d2f31 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 14:35:30 +0100 Subject: [PATCH 14/63] test: skip Docker tests if Docker missing --- tests/test_docker.sh | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index b22eed29..998b9401 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -1,5 +1,8 @@ #!/bin/bash # This script tests the VCFX Docker image using the existing test files from the tests directory +# Docker image to use for the tests. CI may override this when using a locally +# built image. +VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/ieeta-pt/vcfx:latest}" # Function to check if command succeeded check_success() { @@ -13,15 +16,15 @@ check_success() { # Check if Docker is installed if ! command -v docker &> /dev/null; then - echo "โŒ Docker is not installed. Please install Docker first." - exit 1 + echo "โš ๏ธ Docker is not installed. Skipping Docker tests." + exit 0 fi echo "๐Ÿงฌ Testing VCFX Docker image with official test files..." # Pull the latest VCFX image echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image..." -docker pull ghcr.io/jorgemfs/vcfx:latest +docker pull $VCFX_IMAGE check_success "Pulled VCFX Docker image" # Get the directory of this script (tests directory) @@ -36,36 +39,36 @@ check_success "Created temporary output directory" # Test 1: List available tools echo "๐Ÿ“‹ Listing available VCFX tools..." -docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' +docker run --rm $VCFX_IMAGE 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' check_success "Listed available tools" # Test 2: Validator test echo "๐Ÿ” Testing VCFX_validator..." -docker run --rm -v "${TESTS_DIR}:/tests" ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator' +docker run --rm -v "${TESTS_DIR}:/tests" $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator' check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test echo "๐Ÿ‘ฅ Testing VCFX_sample_extractor..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' + $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' check_success "Extracted sample" # Test 5: Variant classifier test echo "๐Ÿ”ฌ Testing VCFX_variant_classifier..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' + $VCFX_IMAGE 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' check_success "Classified variants" # Test 6: Testing a pipeline of commands echo "๐Ÿ”„ Testing a pipeline of VCFX tools..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - ghcr.io/jorgemfs/vcfx:latest 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' + $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' check_success "Executed pipeline of tools" echo "๐ŸŽ‰ All Docker tests completed successfully!" @@ -77,4 +80,4 @@ echo "๐Ÿ“š For more information on how to use VCFX with Docker, see the document # Clean up temporary files echo "๐Ÿงน Cleaning up..." rm -rf "${TEMP_OUTPUT}" -check_success "Cleaned up temporary files" \ No newline at end of file +check_success "Cleaned up temporary files" From 8cf4960845358b7156737d4a518b071e5f13b3de Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 14:39:38 +0100 Subject: [PATCH 15/63] Update test_docker.sh --- tests/test_docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 998b9401..01c89815 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -2,7 +2,7 @@ # This script tests the VCFX Docker image using the existing test files from the tests directory # Docker image to use for the tests. CI may override this when using a locally # built image. -VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/ieeta-pt/vcfx:latest}" +VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgeMFS/vcfx:latest}" # Function to check if command succeeded check_success() { From 2f321f44a31f91fa9a31468ee84cceaacd0402de Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 14:43:57 +0100 Subject: [PATCH 16/63] Update test_docker.sh --- tests/test_docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 01c89815..1bb1116d 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -2,7 +2,7 @@ # This script tests the VCFX Docker image using the existing test files from the tests directory # Docker image to use for the tests. CI may override this when using a locally # built image. -VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgeMFS/vcfx:latest}" +VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgemfs/vcfx:latest}" # Function to check if command succeeded check_success() { From c848b21d2c29b6e8e3244176a398824fc99fcf82 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:09:48 +0100 Subject: [PATCH 17/63] Add strict mode validation for VCFX_validator --- docs/VCFX_validator.md | 13 ++++- src/VCFX_validator/VCFX_validator.cpp | 76 +++++++++++++++++++++++++-- src/VCFX_validator/VCFX_validator.h | 6 +++ tests/test_validator.sh | 35 +++++++++++- 4 files changed, 121 insertions(+), 9 deletions(-) diff --git a/docs/VCFX_validator.md b/docs/VCFX_validator.md index bb46f329..3b850697 100644 --- a/docs/VCFX_validator.md +++ b/docs/VCFX_validator.md @@ -14,7 +14,7 @@ VCFX_validator [OPTIONS] < input.vcf | Option | Description | |--------|-------------| | `-h`, `--help` | Display help message and exit | -| `-s`, `--strict` | Enable stricter validation checks (reserved for future implementation) | +| `-s`, `--strict` | Enable stricter validation checks | ## Description `VCFX_validator` processes a VCF file to verify its structural validity by: @@ -59,8 +59,17 @@ This tool is useful for validating VCF files before processing them with other t - INFO: Must be '.' or contain valid key-value pairs or flags: - If not '.', must contain at least one valid entry - Key-value pairs must have a non-empty key + - Flags (without '=') are allowed +### Strict Mode +When `--strict` is used, additional checks are applied: +- The number of columns in every data line must exactly match the `#CHROM` header. +- If FORMAT/sample columns are present, each sample field must contain the same + number of sub-fields as specified in the FORMAT column. +- Any warning that would normally be emitted is treated as an error and causes + the validator to exit with a non-zero status. + ## Examples ### Basic Validation @@ -70,7 +79,7 @@ VCFX_validator < input.vcf ``` ### Using Strict Mode -Enable stricter validation (note: additional strict checks are reserved for future implementation): +Enable stricter validation with additional checks: ```bash VCFX_validator --strict < input.vcf ``` diff --git a/src/VCFX_validator/VCFX_validator.cpp b/src/VCFX_validator/VCFX_validator.cpp index 24c0bd22..bc5f1c90 100644 --- a/src/VCFX_validator/VCFX_validator.cpp +++ b/src/VCFX_validator/VCFX_validator.cpp @@ -7,7 +7,7 @@ #include static std::string trim(const std::string &s){ - size_t start=0; + size_t start=0; while(start split(const std::string &s, char delim){ + std::vector out; + std::stringstream ss(s); + std::string item; + while(std::getline(ss, item, delim)) out.push_back(item); + return out; +} + int VCFXValidator::run(int argc, char* argv[]){ bool hasStdin = !isatty(fileno(stdin)); if(argc==1 && !hasStdin){ @@ -51,14 +59,18 @@ void VCFXValidator::displayHelp(){ " VCFX_validator [options] < input.vcf\n\n" "Options:\n" " -h, --help Show this help.\n" -" -s, --strict Enable stricter checks (not fully implemented, but reserved).\n\n" +" -s, --strict Enable stricter checks.\n\n" "Description:\n" " Validates:\n" " * All '##' lines are recognized as meta lines.\n" -" * #CHROM line is present, has at least 8 columns.\n" +" * #CHROM line is present and well formed.\n" " * Each data line has >=8 columns, checks CHROM non-empty, POS>0,\n" " REF/ALT non-empty, QUAL is '.' or non-negative float, FILTER non-empty,\n" -" INFO is minimal check. Logs errors/warnings.\n" +" INFO is minimally checked.\n" +" In strict mode additional checks are performed:\n" +" * Data line column count must match the #CHROM header.\n" +" * Sample columns must match the FORMAT field structure.\n" +" * Any warning is treated as an error.\n" " Exits 0 if pass, 1 if fail.\n"; } @@ -86,11 +98,24 @@ bool VCFXValidator::validateChromHeader(const std::string &line, int lineNumber) std::cerr<<"Error: #CHROM line at "<< lineNumber <<" has <8 columns.\n"; return false; } - // typically #CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, (FORMAT?), ... if(f[0]!="#CHROM"){ std::cerr<<"Error: #CHROM line doesn't start with '#CHROM' at line "<< lineNumber <<".\n"; return false; } + + headerColumnCount = static_cast(f.size()); + headerHasFormat = (headerColumnCount > 8); + sampleCount = headerHasFormat ? headerColumnCount - 9 : 0; + + if(headerHasFormat && f[8] != "FORMAT"){ + std::string msg = "Warning: column 9 of #CHROM header is not 'FORMAT'."; + if(strictMode){ + std::cerr << "Error: " << msg << "\n"; + return false; + } else { + std::cerr << msg << "\n"; + } + } return true; } @@ -108,6 +133,16 @@ bool VCFXValidator::validateDataLine(const std::string &line, int lineNumber){ std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n"; return false; } + if(headerColumnCount>0){ + if(strictMode && static_cast(f.size()) != headerColumnCount){ + std::cerr << "Error: line "<(f.size()) != headerColumnCount){ + std::cerr << "Warning: line "< formatParts = split(f[8], ':'); + for(size_t i=9;i sampleParts = split(f[i], ':'); + if(sampleParts.size()!=formatParts.size()){ + std::string msg = "Warning: sample column " + std::to_string(i-8) + + " does not match FORMAT field"; + if(strictMode){ + std::cerr<<"Error: "<8){ + std::string msg = "Warning: data line has sample columns but header lacks FORMAT"; + if(strictMode){ + std::cerr<<"Error: "<&1) + output=$($EXEC $opts < "$input_file" 2>&1) exit_code=$? if [ $exit_code -eq 0 ]; then @@ -43,13 +44,14 @@ run_test_failure() { local description=$2 local input_file=$3 local expected_error=$4 + local opts="$5" echo -n "Test $test_num: $description... " # Run the command using process substitution local output local exit_code - output=$($EXEC < "$input_file" 2>&1) + output=$($EXEC $opts < "$input_file" 2>&1) exit_code=$? if [ $exit_code -ne 0 ]; then @@ -171,6 +173,23 @@ chr1 100 . A T . PASS . chr2 200 rs456 G C 80 PASS NS=2;DP=15 EOF +# Header has one sample column but a data line includes two sample columns +cat > data/mismatched_columns.vcf << EOF +##fileformat=VCFv4.2 +##FORMAT= +EOF +printf '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE1\n' >> data/mismatched_columns.vcf +printf 'chr1\t100\t.\tA\tT\t60\tPASS\t.\tGT\t0/1\t0/0\n' >> data/mismatched_columns.vcf + +# FORMAT expects two entries but sample has three +cat > data/format_mismatch.vcf << EOF +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +EOF +printf '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE1\n' >> data/format_mismatch.vcf +printf 'chr1\t100\t.\tA\tT\t60\tPASS\t.\tGT:DP\t0/1:30:7\n' >> data/format_mismatch.vcf + # Run each test separately and track failures failures=0 @@ -228,6 +247,18 @@ else failures=$((failures + 1)) fi +# Test 13 - strict mode valid file +run_test_success 13 "Strict valid VCF" "data/valid.vcf" "--strict" +[ $? -ne 0 ] && failures=$((failures + 1)) + +# Test 14 - mismatched columns in strict mode +run_test_failure 14 "Strict mismatched columns" "data/mismatched_columns.vcf" "columns" "--strict" +[ $? -ne 0 ] && failures=$((failures + 1)) + +# Test 15 - FORMAT/sample mismatch in strict mode +run_test_failure 15 "Strict format mismatch" "data/format_mismatch.vcf" "FORMAT" "--strict" +[ $? -ne 0 ] && failures=$((failures + 1)) + if [ $failures -eq 0 ]; then echo "All tests for VCFX_validator passed!" exit 0 From b8dbc8770c4d31751f6b5e255fc77487d867a76b Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:26:19 +0100 Subject: [PATCH 18/63] Integrate shell scripts with CTest --- .github/workflows/build-test.yml | 11 ++++- CMakeLists.txt | 5 +- README.md | 11 ++++- docs/CONTRIBUTING.md | 11 +++-- tests/CMakeLists.txt | 79 ++++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 10 deletions(-) create mode 100644 tests/CMakeLists.txt diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index e7495aea..9fea25be 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -18,6 +18,13 @@ jobs: sudo apt-get update sudo apt-get install -y build-essential cmake libz-dev - - name: Run shell tests + - name: Configure + run: cmake -S . -B build + + - name: Build + run: cmake --build build -- -j + + - name: Run tests run: | - bash tests/test_all.sh + cd build + ctest --output-on-failure diff --git a/CMakeLists.txt b/CMakeLists.txt index 17f372a2..20f51dca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,9 +53,8 @@ enable_testing() # Add top-level 'src' subdirectory, which in turn references each tool subdirectory add_subdirectory(src) -# Add a tests subdir if you have tests -# Comment out this line since we don't have a CMakeLists.txt file in the tests directory -# add_subdirectory(tests) +# Add the test suite +add_subdirectory(tests) # Installation configuration include(GNUInstallDirs) diff --git a/README.md b/README.md index 7e158764..906803db 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,16 @@ cmake --build . ## Running Tests +From your build directory, run: + +```bash +ctest --output-on-failure +``` + +You can also execute all shell scripts directly with: + ```bash -cd build -ctest --verbose +bash ../tests/test_all.sh ``` ## Contributing diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 9029abf4..ed264c7d 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -58,11 +58,16 @@ make ### Running Tests -After building the project, run the tests to ensure everything is working correctly: +After building the project, run the test suite from the `build` directory: ```bash -cd build -ctest --verbose +ctest --output-on-failure +``` + +You can still run all shell tests directly if needed: + +```bash +bash ../tests/test_all.sh ``` ## Coding Standards diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..558897dc --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,79 @@ +# CMake tests for VCFX shell scripts + +set(TEST_SCRIPTS + test_af_subsetter.sh + test_alignment_checker.sh + test_allele_balance_calc.sh + test_allele_balance_filter.sh + test_allele_counter.sh + test_allele_freq_calc.sh + test_ancestry_assigner.sh + test_ancestry_inferrer.sh + test_annotation_extractor.sh + test_compressor.sh + test_concordance_checker.sh + test_cross_sample_concordance.sh + test_custom_annotator.sh + test_diff_tool.sh + test_distance_calculator.sh + test_dosage_calculator.sh + test_duplicate_remover.sh + test_fasta_converter.sh + test_field_extractor.sh + test_file_splitter.sh + test_format_converter.sh + test_genotype_query.sh + test_gl_filter.sh + test_haplotype_extractor.sh + test_header_parser.sh + test_hwe_tester.sh + test_impact_filter.sh + test_indel_normalizer.sh + test_indexer.sh + test_info_aggregator.sh + test_info_summarizer.sh + test_inbreeding_calculator.sh + test_ld_calculator.sh + test_metadata_summarizer.sh + test_merger.sh + test_missing_data_handler.sh + test_missing_detector.sh + test_multiallelic_splitter.sh + test_nonref_filter.sh + test_outlier_detector.sh + test_phase_checker.sh + test_phase_quality_filter.sh + test_phred_filter.sh + test_population_filter.sh + test_position_subsetter.sh + test_probability_filter.sh + test_quality_adjuster.sh + test_record_filter.sh + test_ref_comparator.sh + test_reformatter.sh + test_region_subsampler.sh + test_sample_extractor.sh + test_sorter.sh + test_sv_handler.sh + test_subsampler.sh + test_validator.sh + test_variant_classifier.sh + test_variant_counter.sh +) + +foreach(script ${TEST_SCRIPTS}) + get_filename_component(name ${script} NAME_WE) + add_test(NAME ${name} + COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/${script}) + set_tests_properties(${name} PROPERTIES + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +endforeach() + +# Optional Docker-based tests +find_program(DOCKER_EXECUTABLE docker) +if(DOCKER_EXECUTABLE) + add_test(NAME test_docker + COMMAND bash ${CMAKE_CURRENT_SOURCE_DIR}/test_docker.sh) + set_tests_properties(test_docker PROPERTIES + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +endif() From 0563fb225fdc9488bb2f0d8264f5ad95cfec4a8b Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:33:32 +0100 Subject: [PATCH 19/63] Improve Docker test script with local build fallback --- tests/test_docker.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 1bb1116d..18e5b9b6 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -23,9 +23,15 @@ fi echo "๐Ÿงฌ Testing VCFX Docker image with official test files..." # Pull the latest VCFX image -echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image..." -docker pull $VCFX_IMAGE -check_success "Pulled VCFX Docker image" +echo "๐Ÿ“ฅ Pulling the latest VCFX Docker image ($VCFX_IMAGE)..." +if docker pull "$VCFX_IMAGE"; then + check_success "Pulled VCFX Docker image" +else + echo "โš ๏ธ Unable to pull $VCFX_IMAGE. Building Docker image locally..." + docker build -t vcfx:local . + check_success "Built local Docker image" + VCFX_IMAGE="vcfx:local" +fi # Get the directory of this script (tests directory) TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" From 5b47e82e016ff841e94f750ea635386c0538ec01 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Thu, 22 May 2025 23:57:16 +0100 Subject: [PATCH 20/63] Fix Docker test input path --- tests/test_docker.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 18e5b9b6..2de967e7 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -4,6 +4,10 @@ # built image. VCFX_IMAGE="${VCFX_IMAGE:-ghcr.io/jorgemfs/vcfx:latest}" +# Directory paths +TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +REPO_ROOT="$(dirname "$TESTS_DIR")" + # Function to check if command succeeded check_success() { if [ $? -ne 0 ]; then @@ -28,14 +32,11 @@ if docker pull "$VCFX_IMAGE"; then check_success "Pulled VCFX Docker image" else echo "โš ๏ธ Unable to pull $VCFX_IMAGE. Building Docker image locally..." - docker build -t vcfx:local . + docker build -t vcfx:local "${REPO_ROOT}" check_success "Built local Docker image" VCFX_IMAGE="vcfx:local" fi -# Get the directory of this script (tests directory) -TESTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" - echo "๐Ÿ” Using test files from: ${TESTS_DIR}" # Create temporary output directory in tests/out @@ -56,7 +57,7 @@ check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/test_input.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/simple.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test From e051302c583cf7b19f8f580be65894e7971cac25 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 00:09:28 +0100 Subject: [PATCH 21/63] Add vcfx wrapper --- compile_wasm.sh | 2 +- docs/tools_overview.md | 2 + src/CMakeLists.txt | 2 + src/vcfx_wrapper/CMakeLists.txt | 1 + src/vcfx_wrapper/vcfx.cpp | 97 +++++++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 src/vcfx_wrapper/CMakeLists.txt create mode 100644 src/vcfx_wrapper/vcfx.cpp diff --git a/compile_wasm.sh b/compile_wasm.sh index e0a5a132..25d8cd6a 100644 --- a/compile_wasm.sh +++ b/compile_wasm.sh @@ -13,5 +13,5 @@ fi cmake --build . -echo "All VCFX tools built for WebAssembly in build_wasm/." +echo "All VCFX tools and the vcfx wrapper built for WebAssembly in build_wasm/." echo "Use 'ls -R build_wasm' to see output. If you want .html or .js from Emscripten, you can adjust linking flags or suffixes." diff --git a/docs/tools_overview.md b/docs/tools_overview.md index 76c3b152..898124b8 100644 --- a/docs/tools_overview.md +++ b/docs/tools_overview.md @@ -2,6 +2,8 @@ VCFX is a collection of C/C++ tools for processing and analyzing VCF (Variant Call Format) files, with optional WebAssembly compatibility. Each tool is an independent command-line executable that can parse input from `stdin` and write to `stdout`, enabling flexible piping and integration into bioinformatics pipelines. +The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` to see available subcommands. All individual `VCFX_*` binaries remain available if you prefer calling them directly. + ## Tool Categories ### Data Analysis diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ed756a9f..2fadba0a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,7 @@ target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB) # Add all tool subdirectories +add_subdirectory(vcfx_wrapper) add_subdirectory(VCFX_header_parser) add_subdirectory(VCFX_record_filter) add_subdirectory(VCFX_field_extractor) @@ -74,6 +75,7 @@ install(TARGETS vcfx_core # Define a list of all tool executables for installation set(VCFX_TOOLS + vcfx VCFX_header_parser VCFX_record_filter VCFX_field_extractor diff --git a/src/vcfx_wrapper/CMakeLists.txt b/src/vcfx_wrapper/CMakeLists.txt new file mode 100644 index 00000000..d9824862 --- /dev/null +++ b/src/vcfx_wrapper/CMakeLists.txt @@ -0,0 +1 @@ +add_executable(vcfx vcfx.cpp) diff --git a/src/vcfx_wrapper/vcfx.cpp b/src/vcfx_wrapper/vcfx.cpp new file mode 100644 index 00000000..70093a13 --- /dev/null +++ b/src/vcfx_wrapper/vcfx.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void print_usage(){ + std::cout << "vcfx - unified interface for VCFX tools\n" + << "Usage: vcfx [--help] [--list] [args]\n\n" + << " Name of a VCFX tool without the 'VCFX_' prefix\n" + << " --list List available subcommands found in PATH\n" + << " --help Show this help message\n"; +} + +static void list_commands(){ + const char* path_env = std::getenv("PATH"); + if(!path_env) return; + std::string paths(path_env); + std::set cmds; + size_t start=0; + while(true){ + size_t end = paths.find(':', start); + std::string dir = paths.substr(start, end - start); + DIR* d = opendir(dir.c_str()); + if(d){ + struct dirent* e; + while((e = readdir(d)) != nullptr){ + if(std::strncmp(e->d_name, "VCFX_", 5)==0){ + std::string name = e->d_name + 5; + std::string full = dir + "/" + e->d_name; + if(access(full.c_str(), X_OK)==0){ + cmds.insert(name); + } + } + } + closedir(d); + } + if(end == std::string::npos) break; + start = end + 1; + } + for(const auto& c : cmds){ + std::cout << c << '\n'; + } +} + +int main(int argc, char* argv[]){ + bool show_help = false; + bool show_list = false; + static struct option long_opts[] = { + {"help", no_argument, 0, 'h'}, + {"list", no_argument, 0, 'l'}, + {0,0,0,0} + }; + + int opt; + while((opt = getopt_long(argc, argv, "hl", long_opts, nullptr)) != -1){ + if(opt == 'h') show_help = true; + else if(opt == 'l') show_list = true; + else { + print_usage(); + return 1; + } + } + + if(show_help){ + print_usage(); + return 0; + } + if(show_list){ + list_commands(); + return 0; + } + + if(optind >= argc){ + print_usage(); + return 1; + } + + std::string sub = argv[optind]; + std::string exec_name = "VCFX_" + sub; + + std::vector exec_args; + exec_args.push_back(const_cast(exec_name.c_str())); + for(int i = optind + 1; i < argc; ++i){ + exec_args.push_back(argv[i]); + } + exec_args.push_back(nullptr); + + execvp(exec_name.c_str(), exec_args.data()); + std::perror(exec_name.c_str()); + return 1; +} + From 9de9437093063601c02eefb18a48655bc0c629ed Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 00:19:25 +0100 Subject: [PATCH 22/63] Add Docker entrypoint and path setup --- Dockerfile | 13 ++++++------ add_vcfx_tools_to_path.sh | 44 +++++++++++++++++++++------------------ docker_entrypoint.sh | 10 +++++++++ 3 files changed, 41 insertions(+), 26 deletions(-) create mode 100755 docker_entrypoint.sh diff --git a/Dockerfile b/Dockerfile index be69319f..857fa1fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,14 +46,15 @@ COPY --from=builder /app/build/src /usr/local/bin/ # Create a directory for data WORKDIR /data -# Add the script that adds tools to PATH +# Add the helper scripts COPY add_vcfx_tools_to_path.sh /usr/local/bin/ +COPY docker_entrypoint.sh /usr/local/bin/ -# Make the script executable -RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh +# Make them executable +RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh /usr/local/bin/docker_entrypoint.sh -# Set the entry point -ENTRYPOINT ["/bin/bash", "-c"] +# Use a custom entrypoint that sets up PATH for the tools +ENTRYPOINT ["/usr/local/bin/docker_entrypoint.sh"] # Default command shows available tools -CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] +CMD ["bash", "-c", "echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"] diff --git a/add_vcfx_tools_to_path.sh b/add_vcfx_tools_to_path.sh index 0a68c655..43a647b0 100644 --- a/add_vcfx_tools_to_path.sh +++ b/add_vcfx_tools_to_path.sh @@ -6,37 +6,41 @@ # Usage: # source ./add_vcfx_tools_to_path.sh -# Where is the root of this script? (i.e., your VCFX repository root) -# Adjust if needed; for example if you keep this script in the top-level dir: +# Determine potential base directories that may contain VCFX tools. +# When running from the build tree this will be build/src, but inside the +# Docker image the tools reside in /usr/local/bin/VCFX_*/. REPO_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -# Our compiled tools should be under build/src +BASE_DIRS=() BUILD_SRC_DIR="${REPO_ROOT}/build/src" +if [ -d "${BUILD_SRC_DIR}" ]; then + BASE_DIRS+=("${BUILD_SRC_DIR}") +fi -# Check that this path exists: -if [ ! -d "${BUILD_SRC_DIR}" ]; then - echo "Error: build/src directory not found at: ${BUILD_SRC_DIR}" - echo "Make sure you have run 'cmake .. && make' inside ./build" - return 1 +# Also check the standard installation prefix used in the Docker image +if compgen -G "/usr/local/bin/VCFX_*" > /dev/null; then + BASE_DIRS+=("/usr/local/bin") fi -# We'll gather a list of directories under build/src/VCFX_* -# that actually contain an executable matching the pattern "VCFX_*" -# Then add those directories to PATH. +if [ ${#BASE_DIRS[@]} -eq 0 ]; then + echo "Warning: No VCFX tool directories found." + return 1 +fi +# Gather directories containing executables named VCFX_* TOOL_DIRS="" -while IFS= read -r -d '' toolExec; do - # 'toolExec' is something like: build/src/VCFX_af_subsetter/VCFX_af_subsetter - toolDir=$(dirname "$toolExec") - # Only add it once if not present - if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then - TOOL_DIRS="${TOOL_DIRS}:${toolDir}" - fi -done < <(find "${BUILD_SRC_DIR}" -type f -perm /111 -name 'VCFX_*' -print0 2>/dev/null) +for base in "${BASE_DIRS[@]}"; do + while IFS= read -r -d '' toolExec; do + toolDir=$(dirname "$toolExec") + if [[ ":$TOOL_DIRS:" != *":$toolDir:"* ]]; then + TOOL_DIRS="${TOOL_DIRS}:${toolDir}" + fi + done < <(find "$base" -type f -perm /111 -name 'VCFX_*' -print0 2>/dev/null) +done # If empty (no tools found), bail out if [ -z "$TOOL_DIRS" ]; then - echo "Warning: No VCFX tools found in ${BUILD_SRC_DIR}. Did you run 'make'?" + echo "Warning: No VCFX tools found." else # Remove leading colon TOOL_DIRS="${TOOL_DIRS#:}" diff --git a/docker_entrypoint.sh b/docker_entrypoint.sh new file mode 100755 index 00000000..18f1316a --- /dev/null +++ b/docker_entrypoint.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Entrypoint for VCFX Docker image. +# It adds VCFX tool directories to the PATH and then executes the given command. + +# Source the helper script if available +if [ -f /usr/local/bin/add_vcfx_tools_to_path.sh ]; then + source /usr/local/bin/add_vcfx_tools_to_path.sh +fi + +exec "$@" From 594f332bb0a97edc9c9421d3c56725063a3ee7b7 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 00:53:19 +0100 Subject: [PATCH 23/63] fix docker tests to use bash --- tests/test_docker.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 2de967e7..58f9158d 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -46,36 +46,36 @@ check_success "Created temporary output directory" # Test 1: List available tools echo "๐Ÿ“‹ Listing available VCFX tools..." -docker run --rm $VCFX_IMAGE 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' +docker run --rm $VCFX_IMAGE bash -c 'ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename' check_success "Listed available tools" # Test 2: Validator test echo "๐Ÿ” Testing VCFX_validator..." -docker run --rm -v "${TESTS_DIR}:/tests" $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator' +docker run --rm -v "${TESTS_DIR}:/tests" $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_validator' check_success "Validated valid.vcf file" # Test 3: Allele frequency calculator test echo "๐Ÿงฎ Testing VCFX_allele_freq_calc..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/allele_freq_calc/simple.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' + $VCFX_IMAGE bash -c 'cat /tests/data/allele_freq_calc/simple.vcf | VCFX_allele_freq_calc > /output/allele_freqs.tsv' check_success "Calculated allele frequencies" # Test 4: Sample extractor test echo "๐Ÿ‘ฅ Testing VCFX_sample_extractor..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' + $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_sample_extractor --samples SAMPLE1 > /output/sample1.vcf' check_success "Extracted sample" # Test 5: Variant classifier test echo "๐Ÿ”ฌ Testing VCFX_variant_classifier..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' + $VCFX_IMAGE bash -c 'cat /tests/data/classifier_mixed.vcf | VCFX_variant_classifier --append-info > /output/classified.vcf' check_success "Classified variants" # Test 6: Testing a pipeline of commands echo "๐Ÿ”„ Testing a pipeline of VCFX tools..." docker run --rm -v "${TESTS_DIR}:/tests" -v "${TEMP_OUTPUT}:/output" \ - $VCFX_IMAGE 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' + $VCFX_IMAGE bash -c 'cat /tests/data/valid.vcf | VCFX_validator | VCFX_variant_classifier --append-info | VCFX_allele_freq_calc > /output/pipeline_output.tsv' check_success "Executed pipeline of tools" echo "๐ŸŽ‰ All Docker tests completed successfully!" From 22052bdf724f0c3ce874062df5d964a7fe1190c4 Mon Sep 17 00:00:00 2001 From: Jorge Miguel Silva Date: Fri, 23 May 2025 10:52:24 +0100 Subject: [PATCH 24/63] Fix validator passthrough --- docs/VCFX_validator.md | 8 ++++++-- site/VCFX_validator/index.html | 5 +++-- src/VCFX_validator/VCFX_validator.cpp | 7 ++++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/VCFX_validator.md b/docs/VCFX_validator.md index 3b850697..e3d8bcac 100644 --- a/docs/VCFX_validator.md +++ b/docs/VCFX_validator.md @@ -75,15 +75,19 @@ When `--strict` is used, additional checks are applied: ### Basic Validation Check if a VCF file is valid: ```bash -VCFX_validator < input.vcf +VCFX_validator < input.vcf > validated.vcf ``` ### Using Strict Mode Enable stricter validation with additional checks: ```bash -VCFX_validator --strict < input.vcf +VCFX_validator --strict < input.vcf > validated.vcf ``` +When the input is valid, the original VCF is written unchanged to standard output, +allowing `VCFX_validator` to be used as a filter in processing pipelines. Informational +messages such as `VCF file is valid.` are printed to standard error. + ### Redirecting Error Messages Save validation errors to a file: ```bash diff --git a/site/VCFX_validator/index.html b/site/VCFX_validator/index.html index 68d9582c..f68f8549 100644 --- a/site/VCFX_validator/index.html +++ b/site/VCFX_validator/index.html @@ -2884,12 +2884,13 @@

Data LinesExamples

Basic Validation

Check if a VCF file is valid: -

VCFX_validator < input.vcf
+
VCFX_validator < input.vcf > validated.vcf
 

Using Strict Mode

Enable stricter validation (note: additional strict checks are reserved for future implementation): -

VCFX_validator --strict < input.vcf
+
VCFX_validator --strict < input.vcf > validated.vcf
 

+

When the input is valid, the VCF contents are echoed to standard output so the tool can be used in pipelines. Informational messages such as VCF file is valid. are written to standard error.

Redirecting Error Messages

Save validation errors to a file:

VCFX_validator < input.vcf 2> validation_errors.txt
diff --git a/src/VCFX_validator/VCFX_validator.cpp b/src/VCFX_validator/VCFX_validator.cpp
index bc5f1c90..c360636b 100644
--- a/src/VCFX_validator/VCFX_validator.cpp
+++ b/src/VCFX_validator/VCFX_validator.cpp
@@ -260,6 +260,7 @@ bool VCFXValidator::validateVCF(std::istream &in){
     std::string line;
     int lineNum=0;
     bool foundChromLine= false;
+    std::vector lines;
 
     while(true){
         if(!std::getline(in, line)) break;
@@ -290,12 +291,16 @@ bool VCFXValidator::validateVCF(std::istream &in){
             }
             if(!validateDataLine(line, lineNum)) return false;
         }
+        lines.push_back(line);
     }
     if(!foundChromLine){
         std::cerr<<"Error: no #CHROM line found in file.\n";
         return false;
     }
-    std::cout<<"VCF file is valid.\n";
+    for(const auto &l : lines){
+        std::cout << l << '\n';
+    }
+    std::cerr<<"VCF file is valid.\n";
     return true;
 }
 

From 83ef866f40d3c1d9eb439b784a0cba453764af66 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 11:34:04 +0100
Subject: [PATCH 25/63] Simplify tool discovery in CMake

---
 src/CMakeLists.txt | 136 ++++-----------------------------------------
 1 file changed, 11 insertions(+), 125 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2fadba0a..ef6dc6ef 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -7,137 +7,23 @@ target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB)
 
 # Add all tool subdirectories
 add_subdirectory(vcfx_wrapper)
-add_subdirectory(VCFX_header_parser)
-add_subdirectory(VCFX_record_filter)
-add_subdirectory(VCFX_field_extractor)
-add_subdirectory(VCFX_format_converter)
-add_subdirectory(VCFX_variant_counter)
-add_subdirectory(VCFX_sample_extractor)
-add_subdirectory(VCFX_sorter)
-add_subdirectory(VCFX_validator)
-add_subdirectory(VCFX_subsampler)
-add_subdirectory(VCFX_genotype_query)
-add_subdirectory(VCFX_allele_freq_calc)
-add_subdirectory(VCFX_indexer)
-add_subdirectory(VCFX_compressor)
-add_subdirectory(VCFX_position_subsetter)
-add_subdirectory(VCFX_haplotype_extractor)
-add_subdirectory(VCFX_info_parser)
-add_subdirectory(VCFX_variant_classifier)
-add_subdirectory(VCFX_duplicate_remover)
-add_subdirectory(VCFX_info_summarizer)
-add_subdirectory(VCFX_distance_calculator)
-add_subdirectory(VCFX_multiallelic_splitter)
-add_subdirectory(VCFX_missing_data_handler)
-add_subdirectory(VCFX_concordance_checker)
-add_subdirectory(VCFX_allele_balance_calc)
-add_subdirectory(VCFX_allele_counter)
-add_subdirectory(VCFX_phase_checker)
-add_subdirectory(VCFX_annotation_extractor)
-add_subdirectory(VCFX_phred_filter)
-add_subdirectory(VCFX_merger)
-add_subdirectory(VCFX_metadata_summarizer)
-add_subdirectory(VCFX_hwe_tester)
-add_subdirectory(VCFX_fasta_converter)
-add_subdirectory(VCFX_nonref_filter)
-add_subdirectory(VCFX_dosage_calculator)
-add_subdirectory(VCFX_population_filter)
-add_subdirectory(VCFX_file_splitter)
-add_subdirectory(VCFX_gl_filter)
-add_subdirectory(VCFX_ref_comparator)
-add_subdirectory(VCFX_ancestry_inferrer)
-add_subdirectory(VCFX_impact_filter)
-add_subdirectory(VCFX_info_aggregator)
-add_subdirectory(VCFX_probability_filter)
-add_subdirectory(VCFX_diff_tool)
-add_subdirectory(VCFX_cross_sample_concordance)
-add_subdirectory(VCFX_phase_quality_filter)
-add_subdirectory(VCFX_indel_normalizer)
-add_subdirectory(VCFX_custom_annotator)
-add_subdirectory(VCFX_region_subsampler)
-add_subdirectory(VCFX_allele_balance_filter)
-add_subdirectory(VCFX_missing_detector)
-add_subdirectory(VCFX_haplotype_phaser)
-add_subdirectory(VCFX_af_subsetter)
-add_subdirectory(VCFX_sv_handler)
-add_subdirectory(VCFX_reformatter)
-add_subdirectory(VCFX_quality_adjuster)
-add_subdirectory(VCFX_inbreeding_calculator)
-add_subdirectory(VCFX_outlier_detector)
-add_subdirectory(VCFX_alignment_checker)
-add_subdirectory(VCFX_ancestry_assigner)
-add_subdirectory(VCFX_ld_calculator)
+
+# Automatically detect tool directories named "VCFX_*" and
+# build/install them.
+set(VCFX_TOOLS vcfx)
+file(GLOB TOOL_DIRS RELATIVE ${CMAKE_CURRENT_LIST_DIR} VCFX_*)
+foreach(dir ${TOOL_DIRS})
+    if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${dir}")
+        add_subdirectory(${dir})
+        list(APPEND VCFX_TOOLS ${dir})
+    endif()
+endforeach()
 
 # Install the core library
 install(TARGETS vcfx_core
         ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 )
 
-# Define a list of all tool executables for installation
-set(VCFX_TOOLS
-    vcfx
-    VCFX_header_parser
-    VCFX_record_filter
-    VCFX_field_extractor
-    VCFX_format_converter
-    VCFX_variant_counter
-    VCFX_sample_extractor
-    VCFX_sorter
-    VCFX_validator
-    VCFX_subsampler
-    VCFX_genotype_query
-    VCFX_allele_freq_calc
-    VCFX_indexer
-    VCFX_compressor
-    VCFX_position_subsetter
-    VCFX_haplotype_extractor
-    VCFX_info_parser
-    VCFX_variant_classifier
-    VCFX_duplicate_remover
-    VCFX_info_summarizer
-    VCFX_distance_calculator
-    VCFX_multiallelic_splitter
-    VCFX_missing_data_handler
-    VCFX_concordance_checker
-    VCFX_allele_balance_calc
-    VCFX_allele_counter
-    VCFX_phase_checker
-    VCFX_annotation_extractor
-    VCFX_phred_filter
-    VCFX_merger
-    VCFX_metadata_summarizer
-    VCFX_hwe_tester
-    VCFX_fasta_converter
-    VCFX_nonref_filter
-    VCFX_dosage_calculator
-    VCFX_population_filter
-    VCFX_file_splitter
-    VCFX_gl_filter
-    VCFX_ref_comparator
-    VCFX_ancestry_inferrer
-    VCFX_impact_filter
-    VCFX_info_aggregator
-    VCFX_probability_filter
-    VCFX_diff_tool
-    VCFX_cross_sample_concordance
-    VCFX_phase_quality_filter
-    VCFX_indel_normalizer
-    VCFX_custom_annotator
-    VCFX_region_subsampler
-    VCFX_allele_balance_filter
-    VCFX_missing_detector
-    VCFX_haplotype_phaser
-    VCFX_af_subsetter
-    VCFX_sv_handler
-    VCFX_reformatter
-    VCFX_quality_adjuster
-    VCFX_inbreeding_calculator
-    VCFX_outlier_detector
-    VCFX_alignment_checker
-    VCFX_ancestry_assigner
-    VCFX_ld_calculator
-)
-
 # Install all tool executables
 install(TARGETS ${VCFX_TOOLS}
         RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}

From 7144d0ada18557ec0c3fb1a91604a7c3835b2ae5 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 13:10:33 +0100
Subject: [PATCH 26/63] Fix macOS test failures

---
 .github/workflows/build-test.yml | 23 +++++++++++++++++++++--
 tests/test_custom_annotator.sh   | 12 ++++++++++--
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 9fea25be..95820fb3 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -8,23 +8,42 @@ on:
 
 jobs:
   build-and-test:
-    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+    runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
 
-      - name: Install dependencies
+      - name: Install dependencies (Linux)
+        if: runner.os == 'Linux'
         run: |
           sudo apt-get update
           sudo apt-get install -y build-essential cmake libz-dev
 
+      - name: Install dependencies (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew update
+          brew install cmake zlib bash
+          echo "$(brew --prefix)/bin" >> $GITHUB_PATH
+
+      - name: Install dependencies (Windows)
+        if: runner.os == 'Windows'
+        run: |
+          choco install -y cmake zlib
+
       - name: Configure
         run: cmake -S . -B build
+        shell: bash
 
       - name: Build
         run: cmake --build build -- -j
+        shell: bash
 
       - name: Run tests
         run: |
           cd build
           ctest --output-on-failure
+        shell: bash
diff --git a/tests/test_custom_annotator.sh b/tests/test_custom_annotator.sh
index ce42c649..cf7b45e8 100755
--- a/tests/test_custom_annotator.sh
+++ b/tests/test_custom_annotator.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Exit on error
 set -e
@@ -125,11 +125,19 @@ for i in $(seq 1 1000); do
     echo "1	$i	A	G	Annotation$i"
 done > "$SCRIPT_DIR/data/large_annotations.txt"
 # Add VCF header
-sed -i '1i\
+if [[ "$(uname)" == "Darwin" ]]; then
+    sed -i '' '1i\
 ##fileformat=VCFv4.2\
 ##contig=\
 #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE1\
 ' "$SCRIPT_DIR/data/large_input.vcf"
+else
+    sed -i '1i\
+##fileformat=VCFv4.2\
+##contig=\
+#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE1\
+' "$SCRIPT_DIR/data/large_input.vcf"
+fi
 
 time "$ROOT_DIR/build/src/VCFX_custom_annotator/VCFX_custom_annotator" --add-annotation "$SCRIPT_DIR/data/large_annotations.txt" < "$SCRIPT_DIR/data/large_input.vcf" > "$SCRIPT_DIR/data/large_output.vcf"
 if [ $? -eq 0 ]; then

From 5d7a1e8899c2c439a24dc9e5d572a6c44ca62615 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 15:27:25 +0100
Subject: [PATCH 27/63] fix: avoid duplicate add_subdirectory on
 case-insensitive FS

---
 src/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ef6dc6ef..4816d1fc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -12,6 +12,7 @@ add_subdirectory(vcfx_wrapper)
 # build/install them.
 set(VCFX_TOOLS vcfx)
 file(GLOB TOOL_DIRS RELATIVE ${CMAKE_CURRENT_LIST_DIR} VCFX_*)
+list(REMOVE_ITEM TOOL_DIRS vcfx_wrapper) # avoid duplicate on case-insensitive filesystems
 foreach(dir ${TOOL_DIRS})
     if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${dir}")
         add_subdirectory(${dir})

From 0c7d31543bd6ab1baf8cc874e857b4f7d3a616eb Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 16:29:11 +0100
Subject: [PATCH 28/63] Fix Windows getopt

---
 .github/workflows/build-test.yml | 11 +++++++++--
 src/CMakeLists.txt               |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 95820fb3..3863410f 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -31,12 +31,19 @@ jobs:
 
       - name: Install dependencies (Windows)
         if: runner.os == 'Windows'
+        shell: pwsh
         run: |
-          choco install -y cmake zlib
+          choco install -y cmake
+          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib getopt-win32
 
       - name: Configure
-        run: cmake -S . -B build
         shell: bash
+        run: |
+          if [ "$RUNNER_OS" = "Windows" ]; then
+            cmake -S . -B build -DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
+          else
+            cmake -S . -B build
+          fi
 
       - name: Build
         run: cmake --build build -- -j
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4816d1fc..93d6adfc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,6 +4,9 @@ cmake_minimum_required(VERSION 3.14)
 add_library(vcfx_core STATIC vcfx_core.cpp)
 target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include)
 target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB)
+if(WIN32)
+    target_link_libraries(vcfx_core PUBLIC getopt)
+endif()
 
 # Add all tool subdirectories
 add_subdirectory(vcfx_wrapper)

From 9fb2be02408906df5cb1044232f4608437201a12 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 16:56:16 +0100
Subject: [PATCH 29/63] fix: use cmake parallel build

---
 .github/workflows/build-test.yml | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 95820fb3..4124f850 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -31,15 +31,22 @@ jobs:
 
       - name: Install dependencies (Windows)
         if: runner.os == 'Windows'
+        shell: pwsh
         run: |
-          choco install -y cmake zlib
+          choco install -y cmake
+          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib
 
       - name: Configure
-        run: cmake -S . -B build
         shell: bash
+        run: |
+          if [ "$RUNNER_OS" = "Windows" ]; then
+            cmake -S . -B build -DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
+          else
+            cmake -S . -B build
+          fi
 
       - name: Build
-        run: cmake --build build -- -j
+        run: cmake --build build --parallel
         shell: bash
 
       - name: Run tests

From b8a6b8fc36ecfb0a2d06ad08537affaf3dfc8f59 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 17:10:32 +0100
Subject: [PATCH 30/63] Fix Windows build by installing getopt

---
 .github/workflows/build-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index fec9b043..2bb744d3 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -34,7 +34,7 @@ jobs:
         shell: pwsh
         run: |
           choco install -y cmake
-          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib
+          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib getopt
 
 
       - name: Configure

From 06803785ff73af78142905bd1527363b1100b130 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 17:45:21 +0100
Subject: [PATCH 31/63] Remove Windows references

---
 .github/workflows/build-test.yml | 18 ++----------------
 docs/VCFX_header_parser.md       |  2 +-
 docs/VCFX_indexer.md             |  2 +-
 docs/VCFX_info_aggregator.md     |  2 +-
 docs/VCFX_info_parser.md         |  2 +-
 tests/test_indexer.sh            |  4 ++--
 tests/test_indexer_fixed.sh      |  4 ++--
 tests/test_info_aggregator.sh    |  4 ++--
 tests/test_info_parser.sh        |  2 +-
 tests/test_info_summarizer.sh    |  2 +-
 10 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 2bb744d3..a95241db 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -10,7 +10,7 @@ jobs:
   build-and-test:
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     steps:
       - name: Checkout repository
@@ -29,23 +29,9 @@ jobs:
           brew install cmake zlib bash
           echo "$(brew --prefix)/bin" >> $GITHUB_PATH
 
-      - name: Install dependencies (Windows)
-        if: runner.os == 'Windows'
-        shell: pwsh
-        run: |
-          choco install -y cmake
-          & "$env:VCPKG_INSTALLATION_ROOT\vcpkg.exe" install zlib getopt
-
-
       - name: Configure
+        run: cmake -S . -B build
         shell: bash
-        run: |
-          if [ "$RUNNER_OS" = "Windows" ]; then
-            cmake -S . -B build -DCMAKE_TOOLCHAIN_FILE="$VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
-          else
-            cmake -S . -B build
-          fi
-
       - name: Build
         run: cmake --build build --parallel
         shell: bash
diff --git a/docs/VCFX_header_parser.md b/docs/VCFX_header_parser.md
index 8848873f..9706f9cd 100644
--- a/docs/VCFX_header_parser.md
+++ b/docs/VCFX_header_parser.md
@@ -87,7 +87,7 @@ The tool implements simple strategies for handling edge cases:
 1. **Empty files**: If the input file is empty, no output is produced
 2. **Files without headers**: If the file has no header lines, no output is produced
 3. **Malformed headers**: All lines starting with "#" are considered header lines, even if they don't follow VCF specifications
-4. **Line endings**: Both Unix (LF) and Windows (CRLF) line endings are handled correctly
+4. **Line endings**: LF and CRLF line endings are handled correctly
 5. **Partial headers**: If the file ends in the middle of the header section, all header lines up to that point are output
 
 ## Performance
diff --git a/docs/VCFX_indexer.md b/docs/VCFX_indexer.md
index 7d85ddf0..7be8c2c9 100644
--- a/docs/VCFX_indexer.md
+++ b/docs/VCFX_indexer.md
@@ -71,7 +71,7 @@ tail -c +23456 input.vcf | head -1
 
 ### File Format Detection
 
-- The tool automatically handles both Unix (LF) and Windows (CRLF) line endings
+- The tool automatically handles LF and CRLF line endings
 - Byte offsets are calculated correctly regardless of the line ending style
 
 ### Malformed VCF Files
diff --git a/docs/VCFX_info_aggregator.md b/docs/VCFX_info_aggregator.md
index 1f7857ef..a1debf52 100644
--- a/docs/VCFX_info_aggregator.md
+++ b/docs/VCFX_info_aggregator.md
@@ -79,7 +79,7 @@ The tool implements several strategies for handling edge cases:
 2. **Missing fields**: If a specified INFO field is not present in a particular variant, it is simply skipped for that variant.
 3. **Empty input**: The tool will process empty files correctly, reporting zeros for sums and averages.
 4. **Malformed VCF**: If a data line is encountered before the `#CHROM` header, an error is reported.
-5. **Line endings**: The tool correctly handles both Unix (LF) and Windows (CRLF) line endings.
+5. **Line endings**: The tool correctly handles LF and CRLF line endings.
 6. **Partial final line**: The tool properly processes files that do not end with a newline character.
 
 ## Performance
diff --git a/docs/VCFX_info_parser.md b/docs/VCFX_info_parser.md
index a93a64a8..9a4e8e64 100644
--- a/docs/VCFX_info_parser.md
+++ b/docs/VCFX_info_parser.md
@@ -82,7 +82,7 @@ The tool implements several strategies for handling edge cases:
 3. **Malformed lines**: Lines that don't conform to VCF format are skipped with a warning message
 4. **Empty input**: The tool correctly handles empty input files
 5. **Header lines**: VCF header lines (starting with #) are skipped
-6. **Line endings**: Both Unix (LF) and Windows (CRLF) line endings are supported
+6. **Line endings**: LF and CRLF line endings are supported
 7. **Partial final line**: Files without a final newline character are processed correctly
 
 ## Performance
diff --git a/tests/test_indexer.sh b/tests/test_indexer.sh
index 61d080e4..68626e08 100755
--- a/tests/test_indexer.sh
+++ b/tests/test_indexer.sh
@@ -160,9 +160,9 @@ fi
 echo "โœ“ Test 6 passed"
 
 ###############################################################################
-# Test 7: Windows (CRLF) line endings
+# Test 7: CRLF line endings
 ###############################################################################
-echo "Test 7: Windows CRLF line endings"
+echo "Test 7: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/indexer/crlf_unix.vcf" < "${SCRIPT_DIR}/data/indexer/crlf_unix.vcf" << 'EOF'
 ##fileformat=VCFv4.2
diff --git a/tests/test_info_aggregator.sh b/tests/test_info_aggregator.sh
index 3e9bb472..3926f23a 100755
--- a/tests/test_info_aggregator.sh
+++ b/tests/test_info_aggregator.sh
@@ -178,9 +178,9 @@ echo "โœ“ Test 7 passed"
 
 
 ###############################################################################
-# Test 8: Windows CRLF line endings
+# Test 8: CRLF line endings
 ###############################################################################
-echo "Test 8: Windows CRLF line endings"
+echo "Test 8: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/aggregator/crlf_unix.vcf" << EOF
 ##fileformat=VCFv4.2
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/test_info_parser.sh b/tests/test_info_parser.sh
index e6efdba5..af431d65 100755
--- a/tests/test_info_parser.sh
+++ b/tests/test_info_parser.sh
@@ -172,7 +172,7 @@ grep -q "^1[[:space:]]200" "${SCRIPT_DIR}/data/info_parser/invalid.out" || (echo
 echo "โœ“ Test 7 passed"
 
 ###############################################################################
-# Test 8: Windows CRLF line endings
+# Test 8: CRLF line endings
 ###############################################################################
 echo "Test 8: CRLF line endings"
 cat > "${SCRIPT_DIR}/data/info_parser/crlf_unix.vcf" <
Date: Fri, 23 May 2025 18:09:33 +0100
Subject: [PATCH 32/63] feat: add --version support across tools

---
 .clang-format                                 |  3 +++
 CMakeLists.txt                                |  2 ++
 docs/CONTRIBUTING.md                          | 12 +++++++++++
 docs/VCFX_af_subsetter.md                     |  1 +
 docs/VCFX_alignment_checker.md                |  1 +
 docs/VCFX_allele_balance_calc.md              |  1 +
 docs/VCFX_allele_balance_filter.md            |  1 +
 docs/VCFX_allele_counter.md                   |  1 +
 docs/VCFX_ancestry_assigner.md                |  1 +
 docs/VCFX_ancestry_inferrer.md                |  1 +
 docs/VCFX_annotation_extractor.md             |  1 +
 docs/VCFX_compressor.md                       |  1 +
 docs/VCFX_concordance_checker.md              |  1 +
 docs/VCFX_cross_sample_concordance.md         |  1 +
 docs/VCFX_custom_annotator.md                 |  1 +
 docs/VCFX_diff_tool.md                        |  1 +
 docs/VCFX_distance_calculator.md              |  1 +
 docs/VCFX_dosage_calculator.md                |  1 +
 docs/VCFX_duplicate_remover.md                |  1 +
 docs/VCFX_fasta_converter.md                  |  1 +
 docs/VCFX_field_extractor.md                  |  1 +
 docs/VCFX_file_splitter.md                    |  1 +
 docs/VCFX_gl_filter.md                        |  1 +
 docs/VCFX_haplotype_extractor.md              |  1 +
 docs/VCFX_haplotype_phaser.md                 |  1 +
 docs/VCFX_header_parser.md                    |  1 +
 docs/VCFX_hwe_tester.md                       |  1 +
 docs/VCFX_impact_filter.md                    |  1 +
 docs/VCFX_inbreeding_calculator.md            |  1 +
 docs/VCFX_indel_normalizer.md                 |  1 +
 docs/VCFX_indexer.md                          |  1 +
 docs/VCFX_info_aggregator.md                  |  1 +
 docs/VCFX_info_parser.md                      |  1 +
 docs/VCFX_info_summarizer.md                  |  1 +
 docs/VCFX_ld_calculator.md                    |  1 +
 docs/VCFX_missing_detector.md                 |  1 +
 docs/VCFX_nonref_filter.md                    |  1 +
 docs/VCFX_phase_checker.md                    |  1 +
 docs/VCFX_phred_filter.md                     |  1 +
 docs/VCFX_population_filter.md                |  1 +
 docs/VCFX_position_subsetter.md               |  1 +
 docs/VCFX_quality_adjuster.md                 |  1 +
 docs/VCFX_record_filter.md                    |  1 +
 docs/VCFX_ref_comparator.md                   |  1 +
 docs/VCFX_sample_extractor.md                 |  1 +
 docs/VCFX_sorter.md                           |  1 +
 docs/VCFX_sv_handler.md                       |  1 +
 docs/VCFX_validator.md                        |  1 +
 docs/VCFX_variant_classifier.md               |  1 +
 docs/VCFX_variant_counter.md                  |  1 +
 docs/tools_overview.md                        |  1 +
 include/vcfx_core.h                           | 21 +++++++++++++++++++
 src/VCFX_af_subsetter/VCFX_af_subsetter.cpp   |  2 ++
 .../VCFX_alignment_checker.cpp                |  2 ++
 .../VCFX_allele_balance_calc.cpp              |  2 ++
 .../VCFX_allele_balance_filter.cpp            |  2 ++
 .../VCFX_allele_counter.cpp                   |  2 ++
 .../VCFX_allele_freq_calc.cpp                 |  2 ++
 .../VCFX_ancestry_assigner.cpp                |  2 ++
 .../VCFX_ancestry_inferrer.cpp                |  2 ++
 .../VCFX_annotation_extractor.cpp             |  2 ++
 src/VCFX_compressor/VCFX_compressor.cpp       |  2 ++
 .../VCFX_concordance_checker.cpp              |  2 ++
 .../VCFX_cross_sample_concordance.cpp         |  2 ++
 .../VCFX_custom_annotator.cpp                 |  2 ++
 src/VCFX_diff_tool/VCFX_diff_tool.cpp         |  2 ++
 .../VCFX_distance_calculator.cpp              |  2 ++
 .../VCFX_dosage_calculator.cpp                |  2 ++
 .../VCFX_duplicate_remover.cpp                |  2 ++
 .../VCFX_fasta_converter.cpp                  |  2 ++
 .../VCFX_field_extractor.cpp                  |  2 ++
 src/VCFX_file_splitter/VCFX_file_splitter.cpp |  2 ++
 .../VCFX_format_converter.cpp                 |  2 ++
 .../VCFX_genotype_query.cpp                   |  2 ++
 src/VCFX_gl_filter/VCFX_gl_filter.cpp         |  2 ++
 .../VCFX_haplotype_extractor.cpp              |  2 ++
 .../VCFX_haplotype_phaser.cpp                 |  2 ++
 src/VCFX_header_parser/VCFX_header_parser.cpp |  2 ++
 src/VCFX_hwe_tester/VCFX_hwe_tester.cpp       |  2 ++
 src/VCFX_impact_filter/VCFX_impact_filter.cpp |  2 ++
 .../VCFX_inbreeding_calculator.cpp            |  2 ++
 .../VCFX_indel_normalizer.cpp                 |  2 ++
 src/VCFX_indexer/VCFX_indexer.cpp             |  2 ++
 .../VCFX_info_aggregator.cpp                  |  2 ++
 src/VCFX_info_parser/VCFX_info_parser.cpp     |  2 ++
 .../VCFX_info_summarizer.cpp                  |  2 ++
 src/VCFX_ld_calculator/VCFX_ld_calculator.cpp |  2 ++
 src/VCFX_merger/VCFX_merger.cpp               |  2 ++
 .../VCFX_metadata_summarizer.cpp              |  2 ++
 .../VCFX_missing_data_handler.cpp             |  2 ++
 .../VCFX_missing_detector.cpp                 |  2 ++
 .../VCFX_multiallelic_splitter.cpp            |  2 ++
 src/VCFX_nonref_filter/VCFX_nonref_filter.cpp |  2 ++
 .../VCFX_outlier_detector.cpp                 |  2 ++
 src/VCFX_phase_checker/VCFX_phase_checker.cpp |  2 ++
 .../VCFX_phase_quality_filter.cpp             |  2 ++
 src/VCFX_phred_filter/VCFX_phred_filter.cpp   |  2 ++
 .../VCFX_population_filter.cpp                |  2 ++
 .../VCFX_position_subsetter.cpp               |  2 ++
 .../VCFX_probability_filter.cpp               |  2 ++
 .../VCFX_quality_adjuster.cpp                 |  2 ++
 src/VCFX_record_filter/VCFX_record_filter.cpp |  2 ++
 .../VCFX_ref_comparator.cpp                   |  2 ++
 src/VCFX_reformatter/VCFX_reformatter.cpp     |  2 ++
 .../VCFX_region_subsampler.cpp                |  2 ++
 .../VCFX_sample_extractor.cpp                 |  2 ++
 src/VCFX_sorter/VCFX_sorter.cpp               |  2 ++
 src/VCFX_subsampler/VCFX_subsampler.cpp       |  2 ++
 src/VCFX_sv_handler/VCFX_sv_handler.cpp       |  2 ++
 src/VCFX_validator/VCFX_validator.cpp         |  2 ++
 .../VCFX_variant_classifier.cpp               |  2 ++
 .../VCFX_variant_counter.cpp                  |  1 +
 112 files changed, 205 insertions(+)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000..ed5e6cc4
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,3 @@
+BasedOnStyle: LLVM
+IndentWidth: 4
+ColumnLimit: 120
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20f51dca..043cf974 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,6 +6,8 @@ set(VCFX_VERSION_MINOR 0)
 set(VCFX_VERSION_PATCH 2)
 set(VCFX_VERSION "${VCFX_VERSION_MAJOR}.${VCFX_VERSION_MINOR}.${VCFX_VERSION_PATCH}")
 
+add_compile_definitions(VCFX_VERSION="${VCFX_VERSION}")
+
 project(VCFX 
   VERSION ${VCFX_VERSION}
   DESCRIPTION "A Comprehensive VCF Manipulation Toolkit"
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index ed264c7d..354ca654 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -32,6 +32,7 @@ We welcome suggestions for new features or improvements to existing functionalit
 1. Fork the repository
 2. Create a new branch for your feature or bug fix
 3. Write your code, following our coding standards
+   (run `clang-format -i ` before committing)
 4. Add tests for your changes
 5. Ensure all tests pass
 6. Update documentation as needed
@@ -40,6 +41,17 @@ We welcome suggestions for new features or improvements to existing functionalit
 
 ## Development Setup
 
+### Code Formatting
+
+We use `clang-format` to keep the C++ code style consistent. A basic configuration
+is provided in `.clang-format` at the repository root. Please run:
+
+```bash
+clang-format -i path/to/changed_file.cpp
+```
+
+before committing changes.
+
 ### Prerequisites
 
 - CMake (version 3.10 or higher)
diff --git a/docs/VCFX_af_subsetter.md b/docs/VCFX_af_subsetter.md
index 91871cda..59b79b24 100644
--- a/docs/VCFX_af_subsetter.md
+++ b/docs/VCFX_af_subsetter.md
@@ -13,6 +13,7 @@ VCFX_af_subsetter --af-filter "MIN-MAX" < input.vcf > filtered.vcf
 |--------|-------------|
 | `-a`, `--af-filter ` | Required. Allele frequency range for filtering (e.g., `0.01-0.05`) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_af_subsetter` processes VCF files line by line and filters variants based on their allele frequency (AF) values from the INFO field. The tool:
diff --git a/docs/VCFX_alignment_checker.md b/docs/VCFX_alignment_checker.md
index 62c5f264..f7c9dc48 100644
--- a/docs/VCFX_alignment_checker.md
+++ b/docs/VCFX_alignment_checker.md
@@ -13,6 +13,7 @@ VCFX_alignment_checker --alignment-discrepancy   > di
 |--------|-------------|
 | `-a`, `--alignment-discrepancy` | Enable alignment discrepancy checking mode |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_alignment_checker` compares VCF variants against a reference genome to validate sequence consistency. The tool:
diff --git a/docs/VCFX_allele_balance_calc.md b/docs/VCFX_allele_balance_calc.md
index 567e0c83..2ce451e7 100644
--- a/docs/VCFX_allele_balance_calc.md
+++ b/docs/VCFX_allele_balance_calc.md
@@ -13,6 +13,7 @@ VCFX_allele_balance_calc [OPTIONS] < input.vcf > allele_balance.tsv
 |--------|-------------|
 | `-s`, `--samples "Sample1 Sample2..."` | Optional. Specify sample names to calculate allele balance for (space-separated). If omitted, all samples are processed. |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_allele_balance_calc` processes a VCF file and calculates the allele balance for each variant in each specified sample. The tool:
diff --git a/docs/VCFX_allele_balance_filter.md b/docs/VCFX_allele_balance_filter.md
index bf9c6227..34ca1c27 100644
--- a/docs/VCFX_allele_balance_filter.md
+++ b/docs/VCFX_allele_balance_filter.md
@@ -16,6 +16,7 @@ VCFX_allele_balance_filter --filter-allele-balance  < input.vcf > fil
 |--------|-------------|
 | `-f`, `--filter-allele-balance`  | Required. Allele balance threshold between 0.0 and 1.0 |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_allele_counter.md b/docs/VCFX_allele_counter.md
index 7031632a..492354df 100644
--- a/docs/VCFX_allele_counter.md
+++ b/docs/VCFX_allele_counter.md
@@ -13,6 +13,7 @@ VCFX_allele_counter [OPTIONS] < input.vcf > allele_counts.tsv
 |--------|-------------|
 | `-s`, `--samples "Sample1 Sample2..."` | Optional. Specify sample names to calculate allele counts for (space-separated). If omitted, all samples are processed. |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_allele_counter` processes a VCF file and counts reference and alternate alleles for each variant in each specified sample. The tool:
diff --git a/docs/VCFX_ancestry_assigner.md b/docs/VCFX_ancestry_assigner.md
index b80d0a04..40bfba1c 100644
--- a/docs/VCFX_ancestry_assigner.md
+++ b/docs/VCFX_ancestry_assigner.md
@@ -16,6 +16,7 @@ VCFX_ancestry_assigner --assign-ancestry  < input.vcf > ancestry_resu
 |--------|-------------|
 | `-a`, `--assign-ancestry ` | Required. Path to a file containing population-specific allele frequencies |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_ancestry_inferrer.md b/docs/VCFX_ancestry_inferrer.md
index 8ee59361..33ab8fda 100644
--- a/docs/VCFX_ancestry_inferrer.md
+++ b/docs/VCFX_ancestry_inferrer.md
@@ -16,6 +16,7 @@ VCFX_ancestry_inferrer --frequency  [OPTIONS] < input.vcf > ancestry_
 |--------|-------------|
 | `--frequency ` | Required. Path to a file containing population-specific allele frequencies |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_annotation_extractor.md b/docs/VCFX_annotation_extractor.md
index aa4deda3..c492e918 100644
--- a/docs/VCFX_annotation_extractor.md
+++ b/docs/VCFX_annotation_extractor.md
@@ -16,6 +16,7 @@ VCFX_annotation_extractor --annotation-extract "FIELD1,FIELD2,..." < input.vcf >
 |--------|-------------|
 | `-a`, `--annotation-extract ` | Required. Comma-separated list of INFO field annotations to extract (e.g., "ANN,Gene,Impact") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_compressor.md b/docs/VCFX_compressor.md
index 4f31f8e4..36060995 100644
--- a/docs/VCFX_compressor.md
+++ b/docs/VCFX_compressor.md
@@ -17,6 +17,7 @@ VCFX_compressor [OPTIONS] < input_file > output_file
 | `-c`, `--compress` | Compress the input VCF file (read from stdin, write to stdout) |
 | `-d`, `--decompress` | Decompress the input VCF.gz file (read from stdin, write to stdout) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_concordance_checker.md b/docs/VCFX_concordance_checker.md
index 10aef346..11b55786 100644
--- a/docs/VCFX_concordance_checker.md
+++ b/docs/VCFX_concordance_checker.md
@@ -13,6 +13,7 @@ VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" < input.vcf > concordance_r
 |--------|-------------|
 | `-s`, `--samples "SAMPLE1 SAMPLE2"` | Required. Names of the two samples to compare, separated by a space |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_concordance_checker` analyzes a VCF file and compares the genotypes of two specified samples for each variant. The tool:
diff --git a/docs/VCFX_cross_sample_concordance.md b/docs/VCFX_cross_sample_concordance.md
index 2bfbf0d9..aa77e1bb 100644
--- a/docs/VCFX_cross_sample_concordance.md
+++ b/docs/VCFX_cross_sample_concordance.md
@@ -12,6 +12,7 @@ VCFX_cross_sample_concordance [OPTIONS] < input.vcf > concordance_results.tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_cross_sample_concordance` examines each variant in a multi-sample VCF file and determines if all samples with valid genotypes have the same normalized genotype. The tool:
diff --git a/docs/VCFX_custom_annotator.md b/docs/VCFX_custom_annotator.md
index f238dc28..058f583e 100644
--- a/docs/VCFX_custom_annotator.md
+++ b/docs/VCFX_custom_annotator.md
@@ -16,6 +16,7 @@ VCFX_custom_annotator --add-annotation  [OPTIONS] < input.vcf >
 |--------|-------------|
 | `-a`, `--add-annotation ` | Required. Path to the annotation file containing the custom annotations |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_diff_tool.md b/docs/VCFX_diff_tool.md
index 4aa79332..eb204367 100644
--- a/docs/VCFX_diff_tool.md
+++ b/docs/VCFX_diff_tool.md
@@ -17,6 +17,7 @@ VCFX_diff_tool --file1  --file2 
 | `-a`, `--file1 ` | Required. Path to the first VCF file |
 | `-b`, `--file2 ` | Required. Path to the second VCF file |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_distance_calculator.md b/docs/VCFX_distance_calculator.md
index 4eeea7ac..c178715c 100644
--- a/docs/VCFX_distance_calculator.md
+++ b/docs/VCFX_distance_calculator.md
@@ -15,6 +15,7 @@ VCFX_distance_calculator [OPTIONS] < input.vcf > variant_distances.tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_dosage_calculator.md b/docs/VCFX_dosage_calculator.md
index 227bc9ec..56239e97 100644
--- a/docs/VCFX_dosage_calculator.md
+++ b/docs/VCFX_dosage_calculator.md
@@ -15,6 +15,7 @@ VCFX_dosage_calculator [OPTIONS] < input.vcf > dosage_output.txt
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_duplicate_remover.md b/docs/VCFX_duplicate_remover.md
index 7075e956..6bad844d 100644
--- a/docs/VCFX_duplicate_remover.md
+++ b/docs/VCFX_duplicate_remover.md
@@ -15,6 +15,7 @@ VCFX_duplicate_remover [OPTIONS] < input.vcf > deduplicated.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_fasta_converter.md b/docs/VCFX_fasta_converter.md
index 64ee6ed4..7ea0fbd6 100644
--- a/docs/VCFX_fasta_converter.md
+++ b/docs/VCFX_fasta_converter.md
@@ -15,6 +15,7 @@ VCFX_fasta_converter [OPTIONS] < input.vcf > output.fasta
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_field_extractor.md b/docs/VCFX_field_extractor.md
index d1fb88e3..d51c8186 100644
--- a/docs/VCFX_field_extractor.md
+++ b/docs/VCFX_field_extractor.md
@@ -13,6 +13,7 @@ VCFX_field_extractor --fields "FIELD1,FIELD2,..." [OPTIONS] < input.vcf > output
 |--------|-------------|
 | `-f`, `--fields` | Required. Comma-separated list of fields to extract (no spaces between fields) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_field_extractor` processes a VCF file and extracts only the specified fields for each variant. The tool:
diff --git a/docs/VCFX_file_splitter.md b/docs/VCFX_file_splitter.md
index 5072cbbe..652c7de9 100644
--- a/docs/VCFX_file_splitter.md
+++ b/docs/VCFX_file_splitter.md
@@ -16,6 +16,7 @@ VCFX_file_splitter [OPTIONS] < input.vcf
 |--------|-------------|
 | `-p`, `--prefix ` | Output file prefix (default: "split") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_gl_filter.md b/docs/VCFX_gl_filter.md
index d6d7b0fc..ade46e71 100644
--- a/docs/VCFX_gl_filter.md
+++ b/docs/VCFX_gl_filter.md
@@ -14,6 +14,7 @@ VCFX_gl_filter --filter "" [--mode ] < input.vcf > filtered.
 | `-f`, `--filter ` | Required. Filter condition (e.g., `GQ>20`, `DP>=10`, `PL<50`) |
 | `-m`, `--mode ` | Optional. Determines if all samples must pass the condition (`all`, default) or at least one sample must pass (`any`) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_gl_filter` examines numeric fields in the FORMAT column of a VCF file and filters variant records based on whether the samples satisfy the specified condition. The tool:
diff --git a/docs/VCFX_haplotype_extractor.md b/docs/VCFX_haplotype_extractor.md
index 07d70904..a8bd9d96 100644
--- a/docs/VCFX_haplotype_extractor.md
+++ b/docs/VCFX_haplotype_extractor.md
@@ -17,6 +17,7 @@ VCFX_haplotype_extractor [OPTIONS] < input.vcf > haplotypes.tsv
 | `--block-size ` | Maximum distance in base pairs between consecutive variants to be included in the same block (default: 100,000) |
 | `--check-phase-consistency` | Enable checks for phase consistency between adjacent variants in a block |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_haplotype_phaser.md b/docs/VCFX_haplotype_phaser.md
index 36481ceb..de4bda15 100644
--- a/docs/VCFX_haplotype_phaser.md
+++ b/docs/VCFX_haplotype_phaser.md
@@ -16,6 +16,7 @@ VCFX_haplotype_phaser [OPTIONS] < input.vcf > blocks.txt
 |--------|-------------|
 | `-l`, `--ld-threshold ` | rยฒ threshold for LD-based grouping (0.0-1.0, default: 0.8) |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_header_parser.md b/docs/VCFX_header_parser.md
index 9706f9cd..f31edc34 100644
--- a/docs/VCFX_header_parser.md
+++ b/docs/VCFX_header_parser.md
@@ -15,6 +15,7 @@ VCFX_header_parser [OPTIONS] < input.vcf > header.txt
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_hwe_tester.md b/docs/VCFX_hwe_tester.md
index d6feb009..e370b974 100644
--- a/docs/VCFX_hwe_tester.md
+++ b/docs/VCFX_hwe_tester.md
@@ -15,6 +15,7 @@ VCFX_hwe_tester [OPTIONS] < input.vcf > hwe_results.txt
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_impact_filter.md b/docs/VCFX_impact_filter.md
index 897d5698..13a7b3fa 100644
--- a/docs/VCFX_impact_filter.md
+++ b/docs/VCFX_impact_filter.md
@@ -16,6 +16,7 @@ VCFX_impact_filter --filter-impact  < input.vcf > filtered.vcf
 |--------|-------------|
 | `-i`, `--filter-impact ` | Required. Impact level threshold. Must be one of: HIGH, MODERATE, LOW, MODIFIER |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_inbreeding_calculator.md b/docs/VCFX_inbreeding_calculator.md
index def1abfa..cbbedd07 100644
--- a/docs/VCFX_inbreeding_calculator.md
+++ b/docs/VCFX_inbreeding_calculator.md
@@ -18,6 +18,7 @@ VCFX_inbreeding_calculator [OPTIONS] < input.vcf > output.txt
 | `--skip-boundary` | Skip sites with boundary frequencies (p=0 or p=1) |
 | `--count-boundary-as-used` | Count boundary sites in usedCount even when skipping them |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_indel_normalizer.md b/docs/VCFX_indel_normalizer.md
index 47c48fa2..661d05c7 100644
--- a/docs/VCFX_indel_normalizer.md
+++ b/docs/VCFX_indel_normalizer.md
@@ -12,6 +12,7 @@ VCFX_indel_normalizer [OPTIONS] < input.vcf > normalized.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_indel_normalizer` processes a VCF file and normalizes indel variants by:
diff --git a/docs/VCFX_indexer.md b/docs/VCFX_indexer.md
index 7be8c2c9..10d870d8 100644
--- a/docs/VCFX_indexer.md
+++ b/docs/VCFX_indexer.md
@@ -14,6 +14,7 @@ VCFX_indexer [OPTIONS] < input.vcf > index.tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_info_aggregator.md b/docs/VCFX_info_aggregator.md
index a1debf52..79fa7a9f 100644
--- a/docs/VCFX_info_aggregator.md
+++ b/docs/VCFX_info_aggregator.md
@@ -14,6 +14,7 @@ VCFX_info_aggregator [OPTIONS] < input.vcf > output.vcf
 
 - `-a`, `--aggregate-info `: Comma-separated list of INFO fields to aggregate (required).
 - `-h`, `--help`: Display help message and exit.
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_info_parser.md b/docs/VCFX_info_parser.md
index 9a4e8e64..2ce74055 100644
--- a/docs/VCFX_info_parser.md
+++ b/docs/VCFX_info_parser.md
@@ -16,6 +16,7 @@ VCFX_info_parser --info "FIELD1,FIELD2,..." < input.vcf > extracted_info.tsv
 |--------|-------------|
 | `-i`, `--info ` | Required. Comma-separated list of INFO fields to extract (e.g., "DP,AF,SOMATIC") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_info_summarizer.md b/docs/VCFX_info_summarizer.md
index 50b4566d..f1d8b061 100644
--- a/docs/VCFX_info_summarizer.md
+++ b/docs/VCFX_info_summarizer.md
@@ -16,6 +16,7 @@ VCFX_info_summarizer --info "FIELD1,FIELD2,..." < input.vcf > summary_stats.tsv
 |--------|-------------|
 | `-i`, `--info ` | Required. Comma-separated list of INFO fields to analyze (e.g., "DP,AF,MQ") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_ld_calculator.md b/docs/VCFX_ld_calculator.md
index 7a40bbbe..34fa081e 100644
--- a/docs/VCFX_ld_calculator.md
+++ b/docs/VCFX_ld_calculator.md
@@ -16,6 +16,7 @@ VCFX_ld_calculator [OPTIONS] < input.vcf > ld_matrix.txt
 |--------|-------------|
 | `--region ` | Only compute LD for variants in the specified region |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_missing_detector.md b/docs/VCFX_missing_detector.md
index 53325d21..7cb368b9 100644
--- a/docs/VCFX_missing_detector.md
+++ b/docs/VCFX_missing_detector.md
@@ -15,6 +15,7 @@ VCFX_missing_detector [OPTIONS] < input.vcf > flagged.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_nonref_filter.md b/docs/VCFX_nonref_filter.md
index 278c082e..aa7f15e9 100644
--- a/docs/VCFX_nonref_filter.md
+++ b/docs/VCFX_nonref_filter.md
@@ -15,6 +15,7 @@ VCFX_nonref_filter [OPTIONS] < input.vcf > filtered.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_phase_checker.md b/docs/VCFX_phase_checker.md
index df0bf65d..4bf62779 100644
--- a/docs/VCFX_phase_checker.md
+++ b/docs/VCFX_phase_checker.md
@@ -15,6 +15,7 @@ VCFX_phase_checker [OPTIONS] < input.vcf > phased_output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_phred_filter.md b/docs/VCFX_phred_filter.md
index ad7e73ae..b662f91b 100644
--- a/docs/VCFX_phred_filter.md
+++ b/docs/VCFX_phred_filter.md
@@ -17,6 +17,7 @@ VCFX_phred_filter [OPTIONS] < input.vcf > filtered.vcf
 | `-p`, `--phred-filter`  | Set PHRED quality score threshold (default: 30.0) |
 | `-k`, `--keep-missing-qual` | Keep variants with missing quality values (represented as ".") |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_population_filter.md b/docs/VCFX_population_filter.md
index cabfdeba..e98db84e 100644
--- a/docs/VCFX_population_filter.md
+++ b/docs/VCFX_population_filter.md
@@ -12,6 +12,7 @@ VCFX_population_filter [OPTIONS] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-p`, `--population ` | **Required**: Population tag to keep (e.g., 'EUR', 'AFR', 'EAS') |
 | `-m`, `--pop-map ` | **Required**: Tab-delimited file mapping sample names to populations |
 
diff --git a/docs/VCFX_position_subsetter.md b/docs/VCFX_position_subsetter.md
index fcd33045..727e78f4 100644
--- a/docs/VCFX_position_subsetter.md
+++ b/docs/VCFX_position_subsetter.md
@@ -13,6 +13,7 @@ VCFX_position_subsetter --region "CHR:START-END" < input.vcf > filtered.vcf
 |--------|-------------|
 | `-r`, `--region ` | Required. Genomic region to extract in the format "chromosome:start-end" |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_position_subsetter` reads a VCF file from standard input and outputs only those variants that fall within the specified genomic region. The tool:
diff --git a/docs/VCFX_quality_adjuster.md b/docs/VCFX_quality_adjuster.md
index 80d7b8b4..56ff873d 100644
--- a/docs/VCFX_quality_adjuster.md
+++ b/docs/VCFX_quality_adjuster.md
@@ -17,6 +17,7 @@ VCFX_quality_adjuster [OPTIONS] < input.vcf > output.vcf
 | `-a`, `--adjust-qual ` | Required. The transformation function to apply. Must be one of: `log`, `sqrt`, `square`, or `identity`. |
 | `-n`, `--no-clamp` | Do not clamp negative or extremely large values resulting from transformations. |
 | `-h`, `--help` | Display help message and exit. |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_record_filter.md b/docs/VCFX_record_filter.md
index 8ae0b8f1..c406e4eb 100644
--- a/docs/VCFX_record_filter.md
+++ b/docs/VCFX_record_filter.md
@@ -14,6 +14,7 @@ VCFX_record_filter --filter "CRITERIA" [OPTIONS] < input.vcf > filtered.vcf
 | `-f`, `--filter ` | Required. One or more filtering criteria separated by semicolons (e.g., `"POS>10000;QUAL>=30;AF<0.05"`) |
 | `-l`, `--logic ` | Logic for combining multiple criteria: `and` (default) requires all criteria to pass, `or` requires any criterion to pass |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_record_filter` evaluates each variant in a VCF file against specified criteria and outputs only variants that satisfy these criteria. The tool:
diff --git a/docs/VCFX_ref_comparator.md b/docs/VCFX_ref_comparator.md
index fa2eba48..a3db6c76 100644
--- a/docs/VCFX_ref_comparator.md
+++ b/docs/VCFX_ref_comparator.md
@@ -16,6 +16,7 @@ VCFX_ref_comparator --reference  < input.vcf > annotated.vcf
 |--------|-------------|
 | `-r`, `--reference`  | Required. Path to reference genome in FASTA format |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_sample_extractor.md b/docs/VCFX_sample_extractor.md
index da470f56..96b2ca06 100644
--- a/docs/VCFX_sample_extractor.md
+++ b/docs/VCFX_sample_extractor.md
@@ -16,6 +16,7 @@ VCFX_sample_extractor [OPTIONS] < input.vcf > subset.vcf
 |--------|-------------|
 | `-s`, `--samples` LIST | Comma or space separated list of sample names to extract |
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_sorter.md b/docs/VCFX_sorter.md
index 561978df..d66708e1 100644
--- a/docs/VCFX_sorter.md
+++ b/docs/VCFX_sorter.md
@@ -12,6 +12,7 @@ VCFX_sorter [OPTIONS] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-n`, `--natural-chr` | Use natural chromosome sorting (chr1 < chr2 < chr10) instead of lexicographic sorting |
 
 ## Description
diff --git a/docs/VCFX_sv_handler.md b/docs/VCFX_sv_handler.md
index 14f90490..67ff1dcb 100644
--- a/docs/VCFX_sv_handler.md
+++ b/docs/VCFX_sv_handler.md
@@ -12,6 +12,7 @@ VCFX_sv_handler [OPTIONS] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-f`, `--sv-filter-only` | Keep only lines that have 'SVTYPE=' in their INFO field |
 | `-m`, `--sv-modify` | Modify the INFO field of structural variants to add additional annotations |
 
diff --git a/docs/VCFX_validator.md b/docs/VCFX_validator.md
index e3d8bcac..dde07d1c 100644
--- a/docs/VCFX_validator.md
+++ b/docs/VCFX_validator.md
@@ -14,6 +14,7 @@ VCFX_validator [OPTIONS] < input.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-s`, `--strict` | Enable stricter validation checks |
 
 ## Description
diff --git a/docs/VCFX_variant_classifier.md b/docs/VCFX_variant_classifier.md
index 48304688..b2b5be15 100644
--- a/docs/VCFX_variant_classifier.md
+++ b/docs/VCFX_variant_classifier.md
@@ -15,6 +15,7 @@ VCFX_variant_classifier [OPTIONS] < input.vcf > output.vcf_or_tsv
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-a`, `--append-info` | Instead of producing a TSV, output a valid VCF with a new 'VCF_CLASS' subfield in the INFO column |
 
 ## Description
diff --git a/docs/VCFX_variant_counter.md b/docs/VCFX_variant_counter.md
index 47e6e9dd..29839c7b 100644
--- a/docs/VCFX_variant_counter.md
+++ b/docs/VCFX_variant_counter.md
@@ -12,6 +12,7 @@ VCFX_variant_counter [OPTIONS] < input.vcf
 | Option | Description |
 |--------|-------------|
 | `-h`, `--help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-s`, `--strict` | Fail on any data line with fewer than 8 columns |
 
 ## Description
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 898124b8..4361453d 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -3,6 +3,7 @@
 VCFX is a collection of C/C++ tools for processing and analyzing VCF (Variant Call Format) files, with optional WebAssembly compatibility. Each tool is an independent command-line executable that can parse input from `stdin` and write to `stdout`, enabling flexible piping and integration into bioinformatics pipelines.
 
 The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` to see available subcommands. All individual `VCFX_*` binaries remain available if you prefer calling them directly.
+Every tool also accepts `--version` to display the build version.
 
 ## Tool Categories
 
diff --git a/include/vcfx_core.h b/include/vcfx_core.h
index c3a62895..80516071 100644
--- a/include/vcfx_core.h
+++ b/include/vcfx_core.h
@@ -4,6 +4,7 @@
 #include 
 #include 
 #include 
+#include 
 
 namespace vcfx {
 
@@ -18,6 +19,26 @@ void print_error(const std::string& msg, std::ostream& os = std::cerr);
 void print_version(const std::string& tool, const std::string& version,
                    std::ostream& os = std::cout);
 
+inline std::string get_version() {
+#ifdef VCFX_VERSION
+    return VCFX_VERSION;
+#else
+    return "unknown";
+#endif
+}
+
+inline bool handle_version_flag(int argc, char* argv[], const std::string& tool,
+                                std::ostream& os = std::cout) {
+    for (int i = 1; i < argc; ++i) {
+        if (std::strcmp(argv[i], "--version") == 0 ||
+            std::strcmp(argv[i], "-v") == 0) {
+            print_version(tool, get_version(), os);
+            return true;
+        }
+    }
+    return false;
+}
+
 // Read entire input stream, automatically decompressing if gzip/BGZF
 // compressed. Returns true on success and stores the resulting text in
 // 'out'.
diff --git a/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp b/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
index 2a6b0579..8110b7eb 100644
--- a/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
+++ b/src/VCFX_af_subsetter/VCFX_af_subsetter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_af_subsetter.h"
 #include 
 #include 
@@ -156,6 +157,7 @@ void VCFXAfSubsetter::subsetByAlleleFrequency(std::istream& in, std::ostream& ou
 // Typical main():
 //
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_af_subsetter")) return 0;
     VCFXAfSubsetter afSubsetter;
     return afSubsetter.run(argc, argv);
 }
diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
index b4898ee3..f9df880d 100644
--- a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
+++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_alignment_checker.h"
 #include 
 #include 
@@ -321,6 +322,7 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
 
 // Typical main(), linking to run()
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_alignment_checker")) return 0;
     VCFXAlignmentChecker alignmentChecker;
     return alignmentChecker.run(argc, argv);
 }
diff --git a/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp b/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
index 71acdeeb..27e8bfd7 100644
--- a/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
+++ b/src/VCFX_allele_balance_calc/VCFX_allele_balance_calc.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 // VCFX_allele_balance_calc.cpp
 
 #include 
@@ -224,6 +225,7 @@ bool calculateAlleleBalance(std::istream& in, std::ostream& out, const AlleleBal
 // main()
 // ---------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_balance_calc")) return 0;
     AlleleBalanceArguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp b/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
index 88c963d2..ad7fbe43 100644
--- a/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
+++ b/src/VCFX_allele_balance_filter/VCFX_allele_balance_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -197,6 +198,7 @@ double VCFXAlleleBalanceFilter::calculateAlleleBalance(const std::string& genoty
 // main() linking to class
 // ------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_balance_filter")) return 0;
     VCFXAlleleBalanceFilter alleleBalanceFilter;
     return alleleBalanceFilter.run(argc, argv);
 }
diff --git a/src/VCFX_allele_counter/VCFX_allele_counter.cpp b/src/VCFX_allele_counter/VCFX_allele_counter.cpp
index 2e6ac5e9..b3ad6f4d 100644
--- a/src/VCFX_allele_counter/VCFX_allele_counter.cpp
+++ b/src/VCFX_allele_counter/VCFX_allele_counter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -232,6 +233,7 @@ static bool countAlleles(std::istream& in, std::ostream& out, const AlleleCounte
 // main()
 // ---------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_counter")) return 0;
     AlleleCounterArguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp b/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
index 2344ac6d..f25dccb1 100644
--- a/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
+++ b/src/VCFX_allele_freq_calc/VCFX_allele_freq_calc.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -183,6 +184,7 @@ static void calculateAlleleFrequency(std::istream& in, std::ostream& out) {
 // main()
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_allele_freq_calc")) return 0;
     // Parse arguments for help
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp b/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
index ee21cce4..22a2e1ff 100644
--- a/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
+++ b/src/VCFX_ancestry_assigner/VCFX_ancestry_assigner.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -408,6 +409,7 @@ void VCFXAncestryAssigner::assignAncestry(std::istream& vcfIn, std::ostream& out
 // main() - just instantiate and run
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ancestry_assigner")) return 0;
     VCFXAncestryAssigner assigner;
     return assigner.run(argc, argv);
 }
diff --git a/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp b/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
index 1c72b384..00725dcd 100644
--- a/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
+++ b/src/VCFX_ancestry_inferrer/VCFX_ancestry_inferrer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -58,6 +59,7 @@ class VCFXAncestryInferrer {
 // main() - create the inferrer and run
 // ----------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ancestry_inferrer")) return 0;
     VCFXAncestryInferrer inferrer;
     return inferrer.run(argc, argv);
 }
diff --git a/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp b/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
index 2245d156..544a2ea5 100644
--- a/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
+++ b/src/VCFX_annotation_extractor/VCFX_annotation_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -273,6 +274,7 @@ static void processVCF(std::istream &in, const AnnotationOptions &opts) {
 // main()
 // --------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_annotation_extractor")) return 0;
     AnnotationOptions opts;
     if (!parseArguments(argc, argv, opts)) {
         // parseArguments already printed help if needed
diff --git a/src/VCFX_compressor/VCFX_compressor.cpp b/src/VCFX_compressor/VCFX_compressor.cpp
index e2476577..3ffd05a5 100644
--- a/src/VCFX_compressor/VCFX_compressor.cpp
+++ b/src/VCFX_compressor/VCFX_compressor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -149,6 +150,7 @@ static bool compressDecompressVCF(std::istream& in, std::ostream& out, bool comp
 // main
 // ---------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_compressor")) return 0;
     bool compress = false;
     bool decompress = false;
 
diff --git a/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp b/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
index c6b8df79..ae6f694c 100644
--- a/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
+++ b/src/VCFX_concordance_checker/VCFX_concordance_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -275,6 +276,7 @@ static bool calculateConcordance(std::istream &in, std::ostream &out, const Conc
 // main
 // ---------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_concordance_checker")) return 0;
     ConcordanceArguments args;
     if (!parseArguments(argc, argv, args)) {
         // parseArguments prints error/help if needed
diff --git a/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp b/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
index 50456e7e..1a9b694e 100644
--- a/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
+++ b/src/VCFX_cross_sample_concordance/VCFX_cross_sample_concordance.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -238,6 +239,7 @@ static void calculateConcordance(std::istream &in, std::ostream &out) {
 // Command-line parsing + main
 // --------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_cross_sample_concordance")) return 0;
     bool showHelp = false;
 
     static struct option longOpts[] = {
diff --git a/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp b/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
index 28e9ea29..a5319263 100644
--- a/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
+++ b/src/VCFX_custom_annotator/VCFX_custom_annotator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -282,6 +283,7 @@ int VCFXCustomAnnotator::run(int argc, char* argv[]) {
 // main
 // ---------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_custom_annotator")) return 0;
     VCFXCustomAnnotator annotator;
     return annotator.run(argc, argv);
 }
diff --git a/src/VCFX_diff_tool/VCFX_diff_tool.cpp b/src/VCFX_diff_tool/VCFX_diff_tool.cpp
index 8f8f4611..ea984daa 100644
--- a/src/VCFX_diff_tool/VCFX_diff_tool.cpp
+++ b/src/VCFX_diff_tool/VCFX_diff_tool.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include 
 #include 
 #include 
@@ -205,6 +206,7 @@ int VCFXDiffTool::run(int argc, char* argv[]) {
 // main
 // ----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_diff_tool")) return 0;
     VCFXDiffTool diffTool;
     return diffTool.run(argc, argv);
 }
diff --git a/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp b/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
index 0fd2552c..67e717a5 100644
--- a/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
+++ b/src/VCFX_distance_calculator/VCFX_distance_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 // VCFX_distance_calculator.cpp
 #include "VCFX_distance_calculator.h"
 #include 
@@ -160,6 +161,7 @@ bool calculateDistances(std::istream& in, std::ostream& out) {
 // main: Parses command-line arguments and calls calculateDistances.
 // --------------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_distance_calculator")) return 0;
     // Check for help option.
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp b/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
index 0dfa7f30..e41f5c0b 100644
--- a/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
+++ b/src/VCFX_dosage_calculator/VCFX_dosage_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_dosage_calculator.h"
 #include 
 #include 
@@ -216,6 +217,7 @@ std::vector VCFXDosageCalculator::split(const std::string& str, cha
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_dosage_calculator")) return 0;
     VCFXDosageCalculator dosageCalculator;
     return dosageCalculator.run(argc, argv);
 }
diff --git a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
index 77c5ce1a..72625213 100644
--- a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
+++ b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_duplicate_remover.h"
 #include 
 #include 
@@ -129,6 +130,7 @@ bool removeDuplicates(std::istream& in, std::ostream& out) {
 // main: Parse command-line arguments and call removeDuplicates.
 // ----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_duplicate_remover")) return 0;
     // Simple argument parsing: if --help or -h is provided, print help.
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp b/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
index 110df41f..84c79222 100644
--- a/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
+++ b/src/VCFX_fasta_converter/VCFX_fasta_converter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_fasta_converter.h"
 #include 
 #include 
@@ -320,6 +321,7 @@ void VCFXFastaConverter::convertVCFtoFasta(std::istream& in, std::ostream& out)
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_fasta_converter")) return 0;
     VCFXFastaConverter app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_field_extractor/VCFX_field_extractor.cpp b/src/VCFX_field_extractor/VCFX_field_extractor.cpp
index b7fe05cd..31359024 100644
--- a/src/VCFX_field_extractor/VCFX_field_extractor.cpp
+++ b/src/VCFX_field_extractor/VCFX_field_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_field_extractor.h"
 #include 
 #include 
@@ -245,6 +246,7 @@ void extractFields(std::istream& in, std::ostream& out, const std::vector fields;
     bool showHelp = false;
 
diff --git a/src/VCFX_file_splitter/VCFX_file_splitter.cpp b/src/VCFX_file_splitter/VCFX_file_splitter.cpp
index 8d5c21f5..87c7b264 100644
--- a/src/VCFX_file_splitter/VCFX_file_splitter.cpp
+++ b/src/VCFX_file_splitter/VCFX_file_splitter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_file_splitter.h"
 #include 
 #include 
@@ -159,6 +160,7 @@ void VCFXFileSplitter::splitVCFByChromosome(std::istream& in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_file_splitter")) return 0;
     VCFXFileSplitter splitter;
     return splitter.run(argc, argv);
 }
diff --git a/src/VCFX_format_converter/VCFX_format_converter.cpp b/src/VCFX_format_converter/VCFX_format_converter.cpp
index 9998a63b..668b4f67 100644
--- a/src/VCFX_format_converter/VCFX_format_converter.cpp
+++ b/src/VCFX_format_converter/VCFX_format_converter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_format_converter.h"
 #include 
 #include 
@@ -184,6 +185,7 @@ void convertVCFtoCSV(std::istream& in, std::ostream& out) {
 // main
 // -----------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_format_converter")) return 0;
     OutputFormat format;
     bool valid = parseArguments(argc, argv, format);
 
diff --git a/src/VCFX_genotype_query/VCFX_genotype_query.cpp b/src/VCFX_genotype_query/VCFX_genotype_query.cpp
index e472b6af..f1451c23 100644
--- a/src/VCFX_genotype_query/VCFX_genotype_query.cpp
+++ b/src/VCFX_genotype_query/VCFX_genotype_query.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_genotype_query.h"
 #include 
 #include 
@@ -239,6 +240,7 @@ void genotypeQuery(std::istream& in, std::ostream& out,
 // main
 // ------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_genotype_query")) return 0;
     std::string genotypeQueryStr;
     bool strictCompare = false;
     if (!parseArguments(argc, argv, genotypeQueryStr, strictCompare)) {
diff --git a/src/VCFX_gl_filter/VCFX_gl_filter.cpp b/src/VCFX_gl_filter/VCFX_gl_filter.cpp
index 8360fc9e..769b8502 100644
--- a/src/VCFX_gl_filter/VCFX_gl_filter.cpp
+++ b/src/VCFX_gl_filter/VCFX_gl_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_gl_filter.h"
 #include 
 #include 
@@ -263,6 +264,7 @@ void VCFXGLFilter::filterByGL(std::istream& in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_gl_filter")) return 0;
     VCFXGLFilter app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
index 87fd0c79..9475ba6e 100644
--- a/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
+++ b/src/VCFX_haplotype_extractor/VCFX_haplotype_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_haplotype_extractor.h"
 #include 
 #include 
@@ -327,6 +328,7 @@ bool HaplotypeExtractor::extractHaplotypes(std::istream& in, std::ostream& out)
 // main
 // ---------------------------------------------------------------------
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_haplotype_extractor")) return 0;
     int blockSize = 100000;
     bool doCheck = false;
     bool debug = false;
diff --git a/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp b/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
index 9b2eba87..6cbfa965 100644
--- a/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
+++ b/src/VCFX_haplotype_phaser/VCFX_haplotype_phaser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_haplotype_phaser.h"
 #include 
 #include 
@@ -318,6 +319,7 @@ std::vector> VCFXHaplotypePhaser::groupVariants(const std::vect
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_haplotype_phaser")) return 0;
     VCFXHaplotypePhaser hp;
     return hp.run(argc, argv);
 }
diff --git a/src/VCFX_header_parser/VCFX_header_parser.cpp b/src/VCFX_header_parser/VCFX_header_parser.cpp
index 97164a43..45ee87ed 100644
--- a/src/VCFX_header_parser/VCFX_header_parser.cpp
+++ b/src/VCFX_header_parser/VCFX_header_parser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_header_parser.h"
 #include 
 #include 
@@ -26,6 +27,7 @@ void processHeader(std::istream& in, std::ostream& out) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_header_parser")) return 0;
     // Simple argument parsing
     for (int i = 1; i < argc; ++i) {
         std::string arg = argv[i];
diff --git a/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp b/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
index 64fbc9d8..d54e7f24 100644
--- a/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
+++ b/src/VCFX_hwe_tester/VCFX_hwe_tester.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_hwe_tester.h"
 #include 
 #include 
@@ -255,6 +256,7 @@ void VCFXHWETester::performHWE(std::istream& in){
 
 // actual main
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_hwe_tester")) return 0;
     VCFXHWETester tester;
     return tester.run(argc, argv);
 }
diff --git a/src/VCFX_impact_filter/VCFX_impact_filter.cpp b/src/VCFX_impact_filter/VCFX_impact_filter.cpp
index 86034924..51bfea85 100644
--- a/src/VCFX_impact_filter/VCFX_impact_filter.cpp
+++ b/src/VCFX_impact_filter/VCFX_impact_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_impact_filter.h"
 #include 
 #include 
@@ -200,6 +201,7 @@ void VCFXImpactFilter::filterByImpact(std::istream& in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_impact_filter")) return 0;
     VCFXImpactFilter filt;
     return filt.run(argc, argv);
 }
diff --git a/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp b/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
index 4b095239..4353d7b3 100644
--- a/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
+++ b/src/VCFX_inbreeding_calculator/VCFX_inbreeding_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_inbreeding_calculator.h"
 #include 
 #include 
@@ -351,6 +352,7 @@ int VCFXInbreedingCalculator::run(int argc, char* argv[]){
 // -------------------------------------------------------------------------
 // main entry point
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_inbreeding_calculator")) return 0;
     VCFXInbreedingCalculator calc;
     return calc.run(argc, argv);
 }
diff --git a/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp b/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
index 6c4f2fc7..0a60d6bb 100644
--- a/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
+++ b/src/VCFX_indel_normalizer/VCFX_indel_normalizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_indel_normalizer.h"
 #include 
 #include 
@@ -253,6 +254,7 @@ void VCFXIndelNormalizer::normalizeIndels(std::istream& in, std::ostream& out) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_indel_normalizer")) return 0;
     VCFXIndelNormalizer norm;
     return norm.run(argc, argv);
 }
diff --git a/src/VCFX_indexer/VCFX_indexer.cpp b/src/VCFX_indexer/VCFX_indexer.cpp
index a99c185c..2011871e 100644
--- a/src/VCFX_indexer/VCFX_indexer.cpp
+++ b/src/VCFX_indexer/VCFX_indexer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_indexer.h"
 #include 
 #include 
@@ -189,6 +190,7 @@ void VCFXIndexer::createVCFIndex(std::istream &in, std::ostream &out) {
 
 // Optional main if you build as a single executable
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_indexer")) return 0;
     VCFXIndexer idx;
     return idx.run(argc, argv);
 }
diff --git a/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp b/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
index f15d8825..91deee46 100644
--- a/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
+++ b/src/VCFX_info_aggregator/VCFX_info_aggregator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_aggregator.h"
 #include 
 #include 
@@ -214,6 +215,7 @@ void VCFXInfoAggregator::aggregateInfo(std::istream& in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_info_aggregator")) return 0;
     VCFXInfoAggregator app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_info_parser/VCFX_info_parser.cpp b/src/VCFX_info_parser/VCFX_info_parser.cpp
index 286ef5a0..51cc6461 100644
--- a/src/VCFX_info_parser/VCFX_info_parser.cpp
+++ b/src/VCFX_info_parser/VCFX_info_parser.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_parser.h"
 #include 
 #include 
@@ -139,6 +140,7 @@ bool parseInfoFields(std::istream& in, std::ostream& out, const std::vector info_fields;
 
     // parse arguments
diff --git a/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp b/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
index 79d43a81..0917bcd9 100644
--- a/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
+++ b/src/VCFX_info_summarizer/VCFX_info_summarizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_info_summarizer.h"
 #include 
 #include 
@@ -225,6 +226,7 @@ bool summarizeInfoFields(std::istream& in, std::ostream& out, const std::vector<
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_info_summarizer")) return 0;
     std::vector info_fields;
 
     // parse arguments
diff --git a/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp b/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
index ef5bc318..9076d375 100644
--- a/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
+++ b/src/VCFX_ld_calculator/VCFX_ld_calculator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_ld_calculator.h"
 #include 
 #include 
@@ -345,6 +346,7 @@ int VCFXLDCalculator::run(int argc, char* argv[]) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ld_calculator")) return 0;
     VCFXLDCalculator calc;
     return calc.run(argc, argv);
 }
diff --git a/src/VCFX_merger/VCFX_merger.cpp b/src/VCFX_merger/VCFX_merger.cpp
index b8628062..c81634c1 100644
--- a/src/VCFX_merger/VCFX_merger.cpp
+++ b/src/VCFX_merger/VCFX_merger.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_merger.h"
 #include 
 #include 
@@ -120,6 +121,7 @@ void VCFXMerger::mergeVCF(const std::vector& inputFiles, std::ostre
 
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_merger")) return 0;
     VCFXMerger merger;
     return merger.run(argc, argv);
 }
diff --git a/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp b/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
index 795e8e0b..b8be9bc7 100644
--- a/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
+++ b/src/VCFX_metadata_summarizer/VCFX_metadata_summarizer.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_metadata_summarizer.h"
 #include 
 #include 
@@ -155,6 +156,7 @@ void VCFXMetadataSummarizer::printSummary() const {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_metadata_summarizer")) return 0;
     VCFXMetadataSummarizer summarizer;
     return summarizer.run(argc, argv);
 }
diff --git a/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp b/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
index 83191947..7812f65a 100644
--- a/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
+++ b/src/VCFX_missing_data_handler/VCFX_missing_data_handler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_missing_data_handler.h"
 #include 
 #include 
@@ -259,6 +260,7 @@ bool handleMissingDataAll(const Arguments& args) {
  * @return int Exit status.
  */
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_missing_data_handler")) return 0;
     Arguments args;
     parseArguments(argc, argv, args);
 
diff --git a/src/VCFX_missing_detector/VCFX_missing_detector.cpp b/src/VCFX_missing_detector/VCFX_missing_detector.cpp
index 3ae391a1..78033e3d 100644
--- a/src/VCFX_missing_detector/VCFX_missing_detector.cpp
+++ b/src/VCFX_missing_detector/VCFX_missing_detector.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_missing_detector.h"
 #include 
 #include 
@@ -208,6 +209,7 @@ void VCFXMissingDetector::detectMissingGenotypes(std::istream& in, std::ostream&
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_missing_detector")) return 0;
     VCFXMissingDetector missingDetector;
     return missingDetector.run(argc, argv);
 }
diff --git a/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp b/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
index 5f3a280a..8922df62 100644
--- a/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
+++ b/src/VCFX_multiallelic_splitter/VCFX_multiallelic_splitter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_multiallelic_splitter.h"
 #include 
 #include 
@@ -288,6 +289,7 @@ bool splitMultiAllelicVariants(std::istream &in, std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_multiallelic_splitter")) return 0;
     for(int i=1; i< argc; i++){
         std::string arg= argv[i];
         if(arg=="--help"|| arg=="-h"){
diff --git a/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp b/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
index ab9bf18c..10ec7601 100644
--- a/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
+++ b/src/VCFX_nonref_filter/VCFX_nonref_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_nonref_filter.h"
 #include 
 #include 
@@ -132,6 +133,7 @@ void VCFXNonRefFilter::filterNonRef(std::istream& in, std::ostream& out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_nonref_filter")) return 0;
     VCFXNonRefFilter app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
index 16710d8d..d8f383c3 100644
--- a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
+++ b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_outlier_detector.h"
 #include 
 #include 
@@ -304,6 +305,7 @@ void VCFXOutlierDetector::detectOutliers(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_outlier_detector")) return 0;
     VCFXOutlierDetector app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_phase_checker/VCFX_phase_checker.cpp b/src/VCFX_phase_checker/VCFX_phase_checker.cpp
index 052da607..2d613cfa 100644
--- a/src/VCFX_phase_checker/VCFX_phase_checker.cpp
+++ b/src/VCFX_phase_checker/VCFX_phase_checker.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phase_checker.h"
 #include 
 #include 
@@ -164,6 +165,7 @@ void VCFXPhaseChecker::processVCF(std::istream &in, std::ostream &out) {
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phase_checker")) return 0;
     VCFXPhaseChecker checker;
     return checker.run(argc, argv);
 }
diff --git a/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp b/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
index 87f6a835..19bcd240 100644
--- a/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
+++ b/src/VCFX_phase_quality_filter/VCFX_phase_quality_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phase_quality_filter.h"
 #include 
 #include 
@@ -201,6 +202,7 @@ double VCFXPhaseQualityFilter::parsePQScore(const std::string &info) {
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phase_quality_filter")) return 0;
     VCFXPhaseQualityFilter f;
     return f.run(argc, argv);
 }
diff --git a/src/VCFX_phred_filter/VCFX_phred_filter.cpp b/src/VCFX_phred_filter/VCFX_phred_filter.cpp
index d7d01711..0ea89008 100644
--- a/src/VCFX_phred_filter/VCFX_phred_filter.cpp
+++ b/src/VCFX_phred_filter/VCFX_phred_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_phred_filter.h"
 #include 
 #include 
@@ -119,6 +120,7 @@ double VCFXPhredFilter::parseQUAL(const std::string &qualStr, bool keepMissingAs
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_phred_filter")) return 0;
     VCFXPhredFilter pf;
     return pf.run(argc,argv);
 }
diff --git a/src/VCFX_population_filter/VCFX_population_filter.cpp b/src/VCFX_population_filter/VCFX_population_filter.cpp
index e1b33280..d1ebf032 100644
--- a/src/VCFX_population_filter/VCFX_population_filter.cpp
+++ b/src/VCFX_population_filter/VCFX_population_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_population_filter.h"
 #include 
 #include 
@@ -190,6 +191,7 @@ void VCFXPopulationFilter::filterPopulation(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_population_filter")) return 0;
     VCFXPopulationFilter pf;
     return pf.run(argc, argv);
 }
diff --git a/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp b/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
index 70258c7b..0437d160 100644
--- a/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
+++ b/src/VCFX_position_subsetter/VCFX_position_subsetter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_position_subsetter.h"
 #include 
 #include 
@@ -146,6 +147,7 @@ bool VCFXPositionSubsetter::subsetVCFByPosition(std::istream &in,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_position_subsetter")) return 0;
     VCFXPositionSubsetter subsetter;
     return subsetter.run(argc, argv);
 }
diff --git a/src/VCFX_probability_filter/VCFX_probability_filter.cpp b/src/VCFX_probability_filter/VCFX_probability_filter.cpp
index 770f6c8b..ee082f26 100644
--- a/src/VCFX_probability_filter/VCFX_probability_filter.cpp
+++ b/src/VCFX_probability_filter/VCFX_probability_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_probability_filter.h"
 #include 
 #include 
@@ -210,6 +211,7 @@ void VCFXProbabilityFilter::filterByProbability(std::istream& in, std::ostream&
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_probability_filter")) return 0;
     VCFXProbabilityFilter probabilityFilter;
     return probabilityFilter.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
index 68632f96..9cccdcf1 100644
--- a/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
+++ b/src/VCFX_quality_adjuster/VCFX_quality_adjuster.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_quality_adjuster.h"
 #include 
 #include 
@@ -175,6 +176,7 @@ void VCFXQualityAdjuster::adjustQualityScores(std::istream &in, std::ostream &ou
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_quality_adjuster")) return 0;
     VCFXQualityAdjuster app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_record_filter/VCFX_record_filter.cpp b/src/VCFX_record_filter/VCFX_record_filter.cpp
index 6b0273e9..871ffb82 100644
--- a/src/VCFX_record_filter/VCFX_record_filter.cpp
+++ b/src/VCFX_record_filter/VCFX_record_filter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_record_filter.h"
 #include 
 #include 
@@ -320,6 +321,7 @@ void printHelp(){
 
 // main with typical argument parse
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_record_filter")) return 0;
     if(argc==1){
         printHelp();
         return 0;
diff --git a/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp b/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
index bf121cab..1b26b073 100644
--- a/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
+++ b/src/VCFX_ref_comparator/VCFX_ref_comparator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_ref_comparator.h"
 #include 
 #include 
@@ -273,6 +274,7 @@ void VCFXRefComparator::compareVCF(std::istream &vcfIn, std::ostream &vcfOut){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_ref_comparator")) return 0;
     VCFXRefComparator refComp;
     return refComp.run(argc, argv);
 }
diff --git a/src/VCFX_reformatter/VCFX_reformatter.cpp b/src/VCFX_reformatter/VCFX_reformatter.cpp
index 070cca04..642b2b31 100644
--- a/src/VCFX_reformatter/VCFX_reformatter.cpp
+++ b/src/VCFX_reformatter/VCFX_reformatter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_reformatter.h"
 #include 
 #include 
@@ -461,6 +462,7 @@ std::string VCFXReformatter::applyFormatReorderToSample(const std::string &sampl
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_reformatter")) return 0;
     VCFXReformatter reformatter;
     return reformatter.run(argc, argv);
 }
diff --git a/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp b/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
index 31d35247..60d5081e 100644
--- a/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
+++ b/src/VCFX_region_subsampler/VCFX_region_subsampler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_region_subsampler.h"
 #include 
 #include 
@@ -254,6 +255,7 @@ void VCFXRegionSubsampler::processVCF(std::istream &in, std::ostream &out) {
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_region_subsampler")) return 0;
     VCFXRegionSubsampler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp b/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
index 877f22f1..52f65eda 100644
--- a/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
+++ b/src/VCFX_sample_extractor/VCFX_sample_extractor.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sample_extractor.h"
 #include 
 #include 
@@ -215,6 +216,7 @@ void VCFXSampleExtractor::extractSamples(std::istream &in, std::ostream &out,
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sample_extractor")) return 0;
     VCFXSampleExtractor app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sorter/VCFX_sorter.cpp b/src/VCFX_sorter/VCFX_sorter.cpp
index 5627f9ad..681c2a8b 100644
--- a/src/VCFX_sorter/VCFX_sorter.cpp
+++ b/src/VCFX_sorter/VCFX_sorter.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sorter.h"
 #include 
 #include 
@@ -218,6 +219,7 @@ void VCFXSorter::outputVCF(std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sorter")) return 0;
     VCFXSorter app;
     return app.run(argc, argv);
 }
\ No newline at end of file
diff --git a/src/VCFX_subsampler/VCFX_subsampler.cpp b/src/VCFX_subsampler/VCFX_subsampler.cpp
index dfab860c..f1a8ec78 100644
--- a/src/VCFX_subsampler/VCFX_subsampler.cpp
+++ b/src/VCFX_subsampler/VCFX_subsampler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_subsampler.h"
 #include 
 #include 
@@ -162,6 +163,7 @@ void VCFXSubsampler::subsampleLines(std::istream &in,
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_subsampler")) return 0;
     VCFXSubsampler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_sv_handler/VCFX_sv_handler.cpp b/src/VCFX_sv_handler/VCFX_sv_handler.cpp
index 0ab3f1bd..c22f7c27 100644
--- a/src/VCFX_sv_handler/VCFX_sv_handler.cpp
+++ b/src/VCFX_sv_handler/VCFX_sv_handler.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_sv_handler.h"
 #include 
 #include 
@@ -205,6 +206,7 @@ void VCFXSvHandler::handleStructuralVariants(std::istream &in, std::ostream &out
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_sv_handler")) return 0;
     VCFXSvHandler app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_validator/VCFX_validator.cpp b/src/VCFX_validator/VCFX_validator.cpp
index c360636b..e70fc3e2 100644
--- a/src/VCFX_validator/VCFX_validator.cpp
+++ b/src/VCFX_validator/VCFX_validator.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_validator.h"
 #include 
 #include 
@@ -305,6 +306,7 @@ bool VCFXValidator::validateVCF(std::istream &in){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_validator")) return 0;
     VCFXValidator validator;
     return validator.run(argc, argv);
 }
diff --git a/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp b/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
index 14d9d511..1e4d16d5 100644
--- a/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
+++ b/src/VCFX_variant_classifier/VCFX_variant_classifier.cpp
@@ -1,3 +1,4 @@
+#include "vcfx_core.h"
 #include "VCFX_variant_classifier.h"
 #include 
 #include 
@@ -326,6 +327,7 @@ void VCFXVariantClassifier::classifyStream(std::istream &in, std::ostream &out){
 }
 
 int main(int argc, char* argv[]){
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_variant_classifier")) return 0;
     VCFXVariantClassifier app;
     return app.run(argc, argv);
 }
diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.cpp b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
index 26b21175..72828643 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.cpp
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
@@ -105,6 +105,7 @@ int VCFXVariantCounter::countVariants(std::istream &in){
 }
 
 int main(int argc, char* argv[]) {
+    if (vcfx::handle_version_flag(argc, argv, "VCFX_variant_counter")) return 0;
     VCFXVariantCounter app;
     return app.run(argc, argv);
 }

From ca6485d04d2a40b1631490925fba733c4bb6c564 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 18:50:05 +0100
Subject: [PATCH 33/63] fix alt mismatch logic

---
 src/VCFX_alignment_checker/VCFX_alignment_checker.cpp | 11 +++++++----
 tests/expected/align_Y.txt                            |  1 +
 tests/out/align_Y.txt                                 |  1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
index f9df880d..6e407b46 100644
--- a/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
+++ b/src/VCFX_alignment_checker/VCFX_alignment_checker.cpp
@@ -208,6 +208,9 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
                 while (std::getline(ss, field, '\t')) {
                     headers.push_back(field);
                 }
+                if (!headers.empty() && !headers[0].empty() && headers[0][0] == '#') {
+                    headers[0].erase(0, 1); // drop leading '#'
+                }
                 for (size_t i = 0; i < headers.size(); ++i) {
                     if (headers[i] == "CHROM") chrIndex = static_cast(i);
                     else if (headers[i] == "POS")   posIndex = static_cast(i);
@@ -280,11 +283,11 @@ void VCFXAlignmentChecker::checkDiscrepancies(std::istream& vcfIn, std::ostream&
                         << "\t" << allele << "\t" << "REF_MISMATCH"
                         << "\t" << ref_base << "\t" << ref << "\n";
                 }
-                // Compare ALT in VCF vs reference genome's same position
-                // (Often for a standard SNP, the reference base is the only thing in the FASTA.)
-                // This is somewhat conceptual: we're checking if the ALT base is the same as reference at that position.
+                // Compare ALT to the reference base at the same position.
+                // Here we flag a mismatch when the ALT allele is actually the
+                // same as the reference (i.e. not a true variant).
                 std::string alt_base = ref_base; // The reference at that position
-                if (allele != alt_base) {
+                if (allele == alt_base) {
                     out << chrom << "\t" << posVal << "\t" << id << "\t" << ref
                         << "\t" << allele << "\t" << "ALT_MISMATCH"
                         << "\t" << alt_base << "\t" << allele << "\n";
diff --git a/tests/expected/align_Y.txt b/tests/expected/align_Y.txt
index fe7b6ec7..2a84859b 100644
--- a/tests/expected/align_Y.txt
+++ b/tests/expected/align_Y.txt
@@ -1 +1,2 @@
 CHROM	POS	ID	REF	ALT	Discrepancy_Type	Reference_Value	VCF_Value
+chr2	5	.	T	T	ALT_MISMATCH	T	T
diff --git a/tests/out/align_Y.txt b/tests/out/align_Y.txt
index fe7b6ec7..2a84859b 100644
--- a/tests/out/align_Y.txt
+++ b/tests/out/align_Y.txt
@@ -1 +1,2 @@
 CHROM	POS	ID	REF	ALT	Discrepancy_Type	Reference_Value	VCF_Value
+chr2	5	.	T	T	ALT_MISMATCH	T	T

From 38c99fd0fe441f09aa9bcc9a384335cebf557088 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:00:46 +0100
Subject: [PATCH 34/63] Ignore genotype query test artifacts

---
 .gitignore                                     | 13 +++++++++++++
 tests/test_genotype_query.sh                   |  6 +++---
 .../data/genotype_query/missing_malformed.vcf  |  8 --------
 .../tests/data/genotype_query/multi_sample.vcf |  9 ---------
 .../data/genotype_query/single_sample.vcf      |  6 ------
 .../genotype_query/missing_malformed_01.vcf    |  5 -----
 .../genotype_query/multi_11_flexible.vcf       |  7 -------
 .../genotype_query/multi_11_strict.vcf         |  6 ------
 .../genotype_query/multi_12_flexible.vcf       |  6 ------
 .../tests/expected/genotype_query/no_match.vcf |  5 -----
 .../genotype_query/single_sample_flex_01.vcf   |  5 -----
 .../genotype_query/single_sample_strict_01.vcf |  4 ----
 .../genotype_query/help_message.txt            | 18 ------------------
 .../genotype_query/long_equals_output.vcf      |  4 ----
 .../genotype_query/missing_args.txt            |  2 --
 .../test_1_single_flex_output.vcf              |  4 ----
 .../test_2_single_strict_output.vcf            |  3 ---
 .../test_3_multi_11_flex_output.vcf            |  6 ------
 .../test_4_multi_11_strict_output.vcf          |  5 -----
 .../test_5_multi_12_flex_output.vcf            |  5 -----
 .../test_6_missing_malformed_output.vcf        |  4 ----
 .../genotype_query/test_7_no_match_output.vcf  |  4 ----
 22 files changed, 16 insertions(+), 119 deletions(-)
 delete mode 100644 tests/tests/data/genotype_query/missing_malformed.vcf
 delete mode 100644 tests/tests/data/genotype_query/multi_sample.vcf
 delete mode 100644 tests/tests/data/genotype_query/single_sample.vcf
 delete mode 100644 tests/tests/expected/genotype_query/missing_malformed_01.vcf
 delete mode 100644 tests/tests/expected/genotype_query/multi_11_flexible.vcf
 delete mode 100644 tests/tests/expected/genotype_query/multi_11_strict.vcf
 delete mode 100644 tests/tests/expected/genotype_query/multi_12_flexible.vcf
 delete mode 100644 tests/tests/expected/genotype_query/no_match.vcf
 delete mode 100644 tests/tests/expected/genotype_query/single_sample_flex_01.vcf
 delete mode 100644 tests/tests/expected/genotype_query/single_sample_strict_01.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/help_message.txt
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/missing_args.txt
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
 delete mode 100644 tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf

diff --git a/.gitignore b/.gitignore
index 9053a952..135e939d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,16 @@ Thumbs.db
 tools.md
 prompt.md
 names.md
+
+# Temporary outputs from genotype_query tests
+tests/tmp/genotype_query/
+tests/data/genotype_query/missing_malformed.vcf
+tests/data/genotype_query/multi_sample.vcf
+tests/data/genotype_query/single_sample.vcf
+tests/expected/genotype_query/missing_malformed_01.vcf
+tests/expected/genotype_query/multi_11_flexible.vcf
+tests/expected/genotype_query/multi_11_strict.vcf
+tests/expected/genotype_query/multi_12_flexible.vcf
+tests/expected/genotype_query/no_match.vcf
+tests/expected/genotype_query/single_sample_flex_01.vcf
+tests/expected/genotype_query/single_sample_strict_01.vcf
diff --git a/tests/test_genotype_query.sh b/tests/test_genotype_query.sh
index 363e8beb..3d74d5f6 100755
--- a/tests/test_genotype_query.sh
+++ b/tests/test_genotype_query.sh
@@ -23,9 +23,9 @@ TOOL="../build/src/VCFX_genotype_query/VCFX_genotype_query"
 
 # Directories for test data, expected outputs, and actual output:
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-TMP_DATA_DIR="${SCRIPT_DIR}/tests/data/genotype_query"
-TMP_EXP_DIR="${SCRIPT_DIR}/tests/expected/genotype_query"
-TMP_OUT_DIR="${SCRIPT_DIR}/tests/tmp_genoquout/genotype_query"
+TMP_DATA_DIR="${SCRIPT_DIR}/data/genotype_query"
+TMP_EXP_DIR="${SCRIPT_DIR}/expected/genotype_query"
+TMP_OUT_DIR="${SCRIPT_DIR}/tmp/genotype_query"
 
 mkdir -p "$TMP_DATA_DIR" "$TMP_EXP_DIR" "$TMP_OUT_DIR"
 
diff --git a/tests/tests/data/genotype_query/missing_malformed.vcf b/tests/tests/data/genotype_query/missing_malformed.vcf
deleted file mode 100644
index fc65872a..00000000
--- a/tests/tests/data/genotype_query/missing_malformed.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
-1	250	rsO	A	G	.	PASS	.		1/1	1/1
-chr1	300  # <10 fields on purpose
-1	400	rsQ	G	A	99	PASS	.	DP	10	15
-
diff --git a/tests/tests/data/genotype_query/multi_sample.vcf b/tests/tests/data/genotype_query/multi_sample.vcf
deleted file mode 100644
index 18425526..00000000
--- a/tests/tests/data/genotype_query/multi_sample.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-2	400	.	G	A	.	PASS	.	GT	.	.	0/1
-
diff --git a/tests/tests/data/genotype_query/single_sample.vcf b/tests/tests/data/genotype_query/single_sample.vcf
deleted file mode 100644
index 233fa7d6..00000000
--- a/tests/tests/data/genotype_query/single_sample.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-1	300	rsC	A	G	50	PASS	.	GT	1|1
-
diff --git a/tests/tests/expected/genotype_query/missing_malformed_01.vcf b/tests/tests/expected/genotype_query/missing_malformed_01.vcf
deleted file mode 100644
index 3c6ba4cc..00000000
--- a/tests/tests/expected/genotype_query/missing_malformed_01.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
-
diff --git a/tests/tests/expected/genotype_query/multi_11_flexible.vcf b/tests/tests/expected/genotype_query/multi_11_flexible.vcf
deleted file mode 100644
index fe2af3a8..00000000
--- a/tests/tests/expected/genotype_query/multi_11_flexible.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-
diff --git a/tests/tests/expected/genotype_query/multi_11_strict.vcf b/tests/tests/expected/genotype_query/multi_11_strict.vcf
deleted file mode 100644
index add030a4..00000000
--- a/tests/tests/expected/genotype_query/multi_11_strict.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
-
diff --git a/tests/tests/expected/genotype_query/multi_12_flexible.vcf b/tests/tests/expected/genotype_query/multi_12_flexible.vcf
deleted file mode 100644
index 01424f8e..00000000
--- a/tests/tests/expected/genotype_query/multi_12_flexible.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
-
diff --git a/tests/tests/expected/genotype_query/no_match.vcf b/tests/tests/expected/genotype_query/no_match.vcf
deleted file mode 100644
index 9d46ba3c..00000000
--- a/tests/tests/expected/genotype_query/no_match.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-
diff --git a/tests/tests/expected/genotype_query/single_sample_flex_01.vcf b/tests/tests/expected/genotype_query/single_sample_flex_01.vcf
deleted file mode 100644
index 52ce2d29..00000000
--- a/tests/tests/expected/genotype_query/single_sample_flex_01.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-
diff --git a/tests/tests/expected/genotype_query/single_sample_strict_01.vcf b/tests/tests/expected/genotype_query/single_sample_strict_01.vcf
deleted file mode 100644
index 625472f0..00000000
--- a/tests/tests/expected/genotype_query/single_sample_strict_01.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	200	rsB	A	G	50	PASS	.	GT	0|1
-
diff --git a/tests/tests/tmp_genoquout/genotype_query/help_message.txt b/tests/tests/tmp_genoquout/genotype_query/help_message.txt
deleted file mode 100644
index 1d446ddf..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/help_message.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_genotype_query
-Usage: VCFX_genotype_query [OPTIONS]
-
-Options:
-  --genotype-query, -g "GENOTYPE"  Specify the genotype to query (e.g., "0/1", "1/1").
-  --strict                        Use strict string compare (no phasing unify or allele sorting).
-  --help, -h                      Display this help message and exit.
-
-Description:
-  Reads a VCF from stdin, outputs only the lines (plus all header lines) where
-  at least one sample has the specified genotype in the 'GT' subfield.
-
-Examples:
-  # Flexible matching 0/1 or 0|1 => both become 0/1
-  ./VCFX_genotype_query --genotype-query "0/1" < input.vcf > out.vcf
-
-  # Strict matching => "0|1" won't match "0/1"
-  ./VCFX_genotype_query --genotype-query "0|1" --strict < input.vcf > out.vcf
diff --git a/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf b/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
deleted file mode 100644
index 27e716b6..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/long_equals_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/missing_args.txt b/tests/tests/tmp_genoquout/genotype_query/missing_args.txt
deleted file mode 100644
index c8c85b1b..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/missing_args.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Usage: ../build/src/VCFX_genotype_query/VCFX_genotype_query --genotype-query "0/1" [--strict] < input.vcf > output.vcf
-Use --help for usage.
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
deleted file mode 100644
index 27e716b6..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_1_single_flex_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	100	rsA	A	G	50	PASS	.	GT	0/1
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
deleted file mode 100644
index 314ce18f..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_2_single_strict_output.vcf
+++ /dev/null
@@ -1,3 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ONLYSAMPLE
-1	200	rsB	A	G	50	PASS	.	GT	0|1
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
deleted file mode 100644
index 0b19c2aa..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_3_multi_11_flex_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	100	rsX	A	G	.	PASS	.	GT	0/0	0|1	1/1
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
deleted file mode 100644
index f07c0d2b..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_4_multi_11_strict_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-2	300	rsZ	C	T	.	PASS	.	GT	1|1	1/1	1/0
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
deleted file mode 100644
index 4212ca27..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_5_multi_12_flex_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3
-1	200	rsY	A	G,T	.	PASS	.	GT	1/2	2/2	0/2
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
deleted file mode 100644
index a900c879..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_6_missing_malformed_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	150	rsM	C	G	.	PASS	.	GT:DP	0/1:12	1/1:30
-1	200	rsN	A	T	.	PASS	.	GT	0/1	.
diff --git a/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf b/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf
deleted file mode 100644
index 3c654126..00000000
--- a/tests/tests/tmp_genoquout/genotype_query/test_7_no_match_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##contig=
-##contig=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	S1	S2	S3

From 13d42cbb9adfb63c9235deba4358054a98bba613 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:28:10 +0100
Subject: [PATCH 35/63] Ignore temporary test outputs

---
 .gitignore                                    |  5 ++++
 tests/tmp/afr_output.vcf                      |  9 -------
 tests/tmp/afr_samples_err.log                 |  0
 tests/tmp/afr_samples_output.tsv              |  4 ---
 tests/tmp/basic_threshold_20.vcf              |  6 -----
 tests/tmp/basic_threshold_30.vcf              |  5 ----
 tests/tmp/basic_threshold_30_keep_missing.vcf |  6 -----
 tests/tmp/complex_err.log                     |  0
 tests/tmp/complex_output.tsv                  |  3 ---
 tests/tmp/dp_ge_20_all_err.log                |  0
 tests/tmp/dp_ge_20_all_output.vcf             |  7 ------
 tests/tmp/dp_ge_20_any_err.log                |  0
 tests/tmp/dp_ge_20_any_output.vcf             | 12 ---------
 tests/tmp/dp_le_20_all_err.log                |  0
 tests/tmp/dp_le_20_all_output.vcf             |  9 -------
 tests/tmp/dp_lt_19_5_any_err.log              |  0
 tests/tmp/dp_lt_19_5_any_output.vcf           | 12 ---------
 tests/tmp/eas_output.vcf                      |  9 -------
 tests/tmp/eas_samples_err.log                 |  0
 tests/tmp/eas_samples_output.tsv              |  4 ---
 tests/tmp/empty_map_error.txt                 |  1 -
 tests/tmp/empty_map_output.vcf                |  9 -------
 tests/tmp/eq_operator_err.log                 |  0
 tests/tmp/eq_operator_output.vcf              |  5 ----
 tests/tmp/equals_format_cleaned.vcf           |  1 -
 tests/tmp/equals_format_expected_cleaned.vcf  |  1 -
 tests/tmp/equals_format_output.vcf            | 10 --------
 tests/tmp/error.txt                           |  1 -
 tests/tmp/eur_output.vcf                      |  9 -------
 tests/tmp/eur_samples_err.log                 |  0
 tests/tmp/eur_samples_output.tsv              |  4 ---
 tests/tmp/ge_operator_err.log                 |  0
 tests/tmp/ge_operator_output.vcf              |  5 ----
 tests/tmp/gq_eq_30_all_err.log                |  0
 tests/tmp/gq_eq_30_all_output.vcf             |  7 ------
 tests/tmp/gq_gt_20_all_err.log                |  0
 tests/tmp/gq_gt_20_all_output.vcf             |  9 -------
 tests/tmp/gq_gt_20_any_err.log                |  0
 tests/tmp/gq_gt_20_any_output.vcf             | 12 ---------
 tests/tmp/gq_gt_24_5_all_err.log              |  0
 tests/tmp/gq_gt_24_5_all_output.vcf           |  8 ------
 tests/tmp/gq_lt_30_all_err.log                |  0
 tests/tmp/gq_lt_30_all_output.vcf             |  7 ------
 tests/tmp/gq_lt_30_any_err.log                |  0
 tests/tmp/gq_lt_30_any_output.vcf             | 12 ---------
 tests/tmp/gq_ne_30_all_err.log                |  0
 tests/tmp/gq_ne_30_all_output.vcf             | 10 --------
 tests/tmp/gt_operator_err.log                 |  0
 tests/tmp/gt_operator_output.vcf              |  5 ----
 tests/tmp/help_message.txt                    | 20 ---------------
 tests/tmp/help_output.txt                     | 25 -------------------
 tests/tmp/identity_transform_err.log          |  0
 tests/tmp/identity_transform_output.vcf       | 10 --------
 tests/tmp/invalid_condition_err.log           |  1 -
 tests/tmp/invalid_condition_output.vcf        |  0
 tests/tmp/invalid_err.log                     |  4 ---
 tests/tmp/invalid_error.log                   |  1 -
 tests/tmp/invalid_mode_err.log                |  2 --
 tests/tmp/invalid_mode_out.vcf                | 18 -------------
 tests/tmp/invalid_mode_output.vcf             | 19 --------------
 tests/tmp/invalid_out.vcf                     |  0
 tests/tmp/invalid_output.tsv                  |  2 --
 tests/tmp/invalid_output.vcf                  |  0
 tests/tmp/invalid_records_threshold_30.vcf    |  5 ----
 tests/tmp/invalid_vcf_err.log                 |  1 -
 tests/tmp/invalid_vcf_output.tsv              |  0
 tests/tmp/le_operator_err.log                 |  0
 tests/tmp/le_operator_output.vcf              |  6 -----
 tests/tmp/log_transform_edge_err.log          |  1 -
 tests/tmp/log_transform_edge_output.vcf       |  6 -----
 tests/tmp/log_transform_err.log               |  0
 tests/tmp/log_transform_no_clamp_err.log      |  0
 tests/tmp/log_transform_no_clamp_output.vcf   | 10 --------
 tests/tmp/log_transform_output.vcf            | 10 --------
 tests/tmp/lt_operator_err.log                 |  0
 tests/tmp/lt_operator_output.vcf              |  5 ----
 tests/tmp/malformed_err.log                   |  2 --
 tests/tmp/malformed_freq_err.log              |  3 ---
 tests/tmp/malformed_freq_output.tsv           |  0
 tests/tmp/malformed_freqs.txt                 |  1 -
 tests/tmp/malformed_input_err.log             |  2 --
 tests/tmp/malformed_input_output.vcf          |  2 --
 tests/tmp/malformed_out.vcf                   |  2 --
 tests/tmp/malformed_output.vcf                |  7 ------
 .../malformed_query_01_flexible_output.vcf    | 11 --------
 tests/tmp/malformed_threshold_30.vcf          |  4 ---
 .../malformed_threshold_30_keep_missing.vcf   |  5 ----
 tests/tmp/malformed_threshold_5.vcf           |  5 ----
 tests/tmp/malformed_vcf_err.log               |  3 ---
 tests/tmp/malformed_vcf_output.vcf            |  3 ---
 tests/tmp/missing_arg_error.log               |  0
 tests/tmp/missing_arg_output.vcf              | 25 -------------------
 tests/tmp/missing_args_output.txt             |  2 --
 tests/tmp/missing_err.log                     |  0
 tests/tmp/missing_field_err.log               |  1 -
 tests/tmp/missing_field_gq_gt_20_err.log      |  0
 tests/tmp/missing_field_gq_gt_20_output.vcf   |  5 ----
 tests/tmp/missing_field_output.vcf            |  4 ---
 tests/tmp/missing_filter_err.log              |  1 -
 tests/tmp/missing_filter_output.vcf           | 18 -------------
 tests/tmp/missing_freq_err.log                |  2 --
 tests/tmp/missing_freq_output.tsv             |  0
 tests/tmp/missing_output.tsv                  |  3 ---
 tests/tmp/missing_samples_err.log             |  0
 tests/tmp/missing_samples_output.tsv          |  4 ---
 tests/tmp/missing_value_gq_gt_20_all_err.log  |  0
 .../tmp/missing_value_gq_gt_20_all_output.vcf |  7 ------
 tests/tmp/missing_value_gq_gt_20_any_err.log  |  0
 .../tmp/missing_value_gq_gt_20_any_output.vcf |  8 ------
 tests/tmp/mixed_population_check_err.log      |  0
 tests/tmp/mixed_population_check_output.tsv   |  5 ----
 tests/tmp/mixed_population_check_results.tmp  |  4 ---
 tests/tmp/mixed_samples_err.log               |  0
 tests/tmp/mixed_samples_output.tsv            |  5 ----
 tests/tmp/multiallelic_err.log                |  0
 tests/tmp/multiallelic_output.tsv             |  3 ---
 tests/tmp/multiallelic_samples_err.log        |  0
 tests/tmp/multiallelic_samples_output.tsv     |  4 ---
 tests/tmp/ne_operator_err.log                 |  0
 tests/tmp/ne_operator_output.vcf              | 10 --------
 tests/tmp/no_args_output.txt                  | 17 -------------
 tests/tmp/no_gt_err.log                       |  0
 tests/tmp/no_gt_output.tsv                    |  1 -
 tests/tmp/output.txt                          | 18 -------------
 tests/tmp/phased_err.log                      |  0
 tests/tmp/phased_output.tsv                   |  3 ---
 tests/tmp/phased_samples_err.log              |  0
 tests/tmp/phased_samples_output.tsv           |  4 ---
 tests/tmp/pl_gt_40_any_err.log                |  0
 tests/tmp/pl_gt_40_any_output.vcf             |  9 -------
 tests/tmp/query_01_flexible_cleaned.vcf       |  1 -
 .../query_01_flexible_expected_cleaned.vcf    |  1 -
 tests/tmp/query_01_flexible_output.vcf        | 10 --------
 tests/tmp/query_01_pipe_flexible_output.vcf   | 19 --------------
 tests/tmp/query_01_pipe_strict_output.vcf     | 10 --------
 tests/tmp/query_01_strict_output.vcf          | 15 -----------
 tests/tmp/query_11_flexible_output.vcf        | 13 ----------
 tests/tmp/query_multi_02_flexible_output.vcf  |  8 ------
 tests/tmp/simple_err.log                      |  0
 tests/tmp/simple_freqs_err.log                |  0
 tests/tmp/simple_freqs_output.tsv             |  5 ----
 tests/tmp/simple_output.tsv                   |  5 ----
 tests/tmp/sqrt_transform_edge_err.log         |  1 -
 tests/tmp/sqrt_transform_edge_output.vcf      |  6 -----
 tests/tmp/sqrt_transform_err.log              |  0
 tests/tmp/sqrt_transform_output.vcf           | 10 --------
 tests/tmp/square_transform_edge_err.log       |  1 -
 tests/tmp/square_transform_edge_output.vcf    |  6 -----
 tests/tmp/square_transform_err.log            |  0
 tests/tmp/square_transform_no_clamp_err.log   |  0
 .../tmp/square_transform_no_clamp_output.vcf  | 10 --------
 tests/tmp/square_transform_output.vcf         | 10 --------
 tests/tmp/unknown_output.vcf                  |  9 -------
 153 files changed, 5 insertions(+), 701 deletions(-)
 delete mode 100644 tests/tmp/afr_output.vcf
 delete mode 100644 tests/tmp/afr_samples_err.log
 delete mode 100644 tests/tmp/afr_samples_output.tsv
 delete mode 100644 tests/tmp/basic_threshold_20.vcf
 delete mode 100644 tests/tmp/basic_threshold_30.vcf
 delete mode 100644 tests/tmp/basic_threshold_30_keep_missing.vcf
 delete mode 100644 tests/tmp/complex_err.log
 delete mode 100644 tests/tmp/complex_output.tsv
 delete mode 100644 tests/tmp/dp_ge_20_all_err.log
 delete mode 100644 tests/tmp/dp_ge_20_all_output.vcf
 delete mode 100644 tests/tmp/dp_ge_20_any_err.log
 delete mode 100644 tests/tmp/dp_ge_20_any_output.vcf
 delete mode 100644 tests/tmp/dp_le_20_all_err.log
 delete mode 100644 tests/tmp/dp_le_20_all_output.vcf
 delete mode 100644 tests/tmp/dp_lt_19_5_any_err.log
 delete mode 100644 tests/tmp/dp_lt_19_5_any_output.vcf
 delete mode 100644 tests/tmp/eas_output.vcf
 delete mode 100644 tests/tmp/eas_samples_err.log
 delete mode 100644 tests/tmp/eas_samples_output.tsv
 delete mode 100644 tests/tmp/empty_map_error.txt
 delete mode 100644 tests/tmp/empty_map_output.vcf
 delete mode 100644 tests/tmp/eq_operator_err.log
 delete mode 100644 tests/tmp/eq_operator_output.vcf
 delete mode 100644 tests/tmp/equals_format_cleaned.vcf
 delete mode 100644 tests/tmp/equals_format_expected_cleaned.vcf
 delete mode 100644 tests/tmp/equals_format_output.vcf
 delete mode 100644 tests/tmp/error.txt
 delete mode 100644 tests/tmp/eur_output.vcf
 delete mode 100644 tests/tmp/eur_samples_err.log
 delete mode 100644 tests/tmp/eur_samples_output.tsv
 delete mode 100644 tests/tmp/ge_operator_err.log
 delete mode 100644 tests/tmp/ge_operator_output.vcf
 delete mode 100644 tests/tmp/gq_eq_30_all_err.log
 delete mode 100644 tests/tmp/gq_eq_30_all_output.vcf
 delete mode 100644 tests/tmp/gq_gt_20_all_err.log
 delete mode 100644 tests/tmp/gq_gt_20_all_output.vcf
 delete mode 100644 tests/tmp/gq_gt_20_any_err.log
 delete mode 100644 tests/tmp/gq_gt_20_any_output.vcf
 delete mode 100644 tests/tmp/gq_gt_24_5_all_err.log
 delete mode 100644 tests/tmp/gq_gt_24_5_all_output.vcf
 delete mode 100644 tests/tmp/gq_lt_30_all_err.log
 delete mode 100644 tests/tmp/gq_lt_30_all_output.vcf
 delete mode 100644 tests/tmp/gq_lt_30_any_err.log
 delete mode 100644 tests/tmp/gq_lt_30_any_output.vcf
 delete mode 100644 tests/tmp/gq_ne_30_all_err.log
 delete mode 100644 tests/tmp/gq_ne_30_all_output.vcf
 delete mode 100644 tests/tmp/gt_operator_err.log
 delete mode 100644 tests/tmp/gt_operator_output.vcf
 delete mode 100644 tests/tmp/help_message.txt
 delete mode 100644 tests/tmp/help_output.txt
 delete mode 100644 tests/tmp/identity_transform_err.log
 delete mode 100644 tests/tmp/identity_transform_output.vcf
 delete mode 100644 tests/tmp/invalid_condition_err.log
 delete mode 100644 tests/tmp/invalid_condition_output.vcf
 delete mode 100644 tests/tmp/invalid_err.log
 delete mode 100644 tests/tmp/invalid_error.log
 delete mode 100644 tests/tmp/invalid_mode_err.log
 delete mode 100644 tests/tmp/invalid_mode_out.vcf
 delete mode 100644 tests/tmp/invalid_mode_output.vcf
 delete mode 100644 tests/tmp/invalid_out.vcf
 delete mode 100644 tests/tmp/invalid_output.tsv
 delete mode 100644 tests/tmp/invalid_output.vcf
 delete mode 100644 tests/tmp/invalid_records_threshold_30.vcf
 delete mode 100644 tests/tmp/invalid_vcf_err.log
 delete mode 100644 tests/tmp/invalid_vcf_output.tsv
 delete mode 100644 tests/tmp/le_operator_err.log
 delete mode 100644 tests/tmp/le_operator_output.vcf
 delete mode 100644 tests/tmp/log_transform_edge_err.log
 delete mode 100644 tests/tmp/log_transform_edge_output.vcf
 delete mode 100644 tests/tmp/log_transform_err.log
 delete mode 100644 tests/tmp/log_transform_no_clamp_err.log
 delete mode 100644 tests/tmp/log_transform_no_clamp_output.vcf
 delete mode 100644 tests/tmp/log_transform_output.vcf
 delete mode 100644 tests/tmp/lt_operator_err.log
 delete mode 100644 tests/tmp/lt_operator_output.vcf
 delete mode 100644 tests/tmp/malformed_err.log
 delete mode 100644 tests/tmp/malformed_freq_err.log
 delete mode 100644 tests/tmp/malformed_freq_output.tsv
 delete mode 100644 tests/tmp/malformed_freqs.txt
 delete mode 100644 tests/tmp/malformed_input_err.log
 delete mode 100644 tests/tmp/malformed_input_output.vcf
 delete mode 100644 tests/tmp/malformed_out.vcf
 delete mode 100644 tests/tmp/malformed_output.vcf
 delete mode 100644 tests/tmp/malformed_query_01_flexible_output.vcf
 delete mode 100644 tests/tmp/malformed_threshold_30.vcf
 delete mode 100644 tests/tmp/malformed_threshold_30_keep_missing.vcf
 delete mode 100644 tests/tmp/malformed_threshold_5.vcf
 delete mode 100644 tests/tmp/malformed_vcf_err.log
 delete mode 100644 tests/tmp/malformed_vcf_output.vcf
 delete mode 100644 tests/tmp/missing_arg_error.log
 delete mode 100644 tests/tmp/missing_arg_output.vcf
 delete mode 100644 tests/tmp/missing_args_output.txt
 delete mode 100644 tests/tmp/missing_err.log
 delete mode 100644 tests/tmp/missing_field_err.log
 delete mode 100644 tests/tmp/missing_field_gq_gt_20_err.log
 delete mode 100644 tests/tmp/missing_field_gq_gt_20_output.vcf
 delete mode 100644 tests/tmp/missing_field_output.vcf
 delete mode 100644 tests/tmp/missing_filter_err.log
 delete mode 100644 tests/tmp/missing_filter_output.vcf
 delete mode 100644 tests/tmp/missing_freq_err.log
 delete mode 100644 tests/tmp/missing_freq_output.tsv
 delete mode 100644 tests/tmp/missing_output.tsv
 delete mode 100644 tests/tmp/missing_samples_err.log
 delete mode 100644 tests/tmp/missing_samples_output.tsv
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_all_err.log
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_all_output.vcf
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_any_err.log
 delete mode 100644 tests/tmp/missing_value_gq_gt_20_any_output.vcf
 delete mode 100644 tests/tmp/mixed_population_check_err.log
 delete mode 100644 tests/tmp/mixed_population_check_output.tsv
 delete mode 100644 tests/tmp/mixed_population_check_results.tmp
 delete mode 100644 tests/tmp/mixed_samples_err.log
 delete mode 100644 tests/tmp/mixed_samples_output.tsv
 delete mode 100644 tests/tmp/multiallelic_err.log
 delete mode 100644 tests/tmp/multiallelic_output.tsv
 delete mode 100644 tests/tmp/multiallelic_samples_err.log
 delete mode 100644 tests/tmp/multiallelic_samples_output.tsv
 delete mode 100644 tests/tmp/ne_operator_err.log
 delete mode 100644 tests/tmp/ne_operator_output.vcf
 delete mode 100644 tests/tmp/no_args_output.txt
 delete mode 100644 tests/tmp/no_gt_err.log
 delete mode 100644 tests/tmp/no_gt_output.tsv
 delete mode 100644 tests/tmp/output.txt
 delete mode 100644 tests/tmp/phased_err.log
 delete mode 100644 tests/tmp/phased_output.tsv
 delete mode 100644 tests/tmp/phased_samples_err.log
 delete mode 100644 tests/tmp/phased_samples_output.tsv
 delete mode 100644 tests/tmp/pl_gt_40_any_err.log
 delete mode 100644 tests/tmp/pl_gt_40_any_output.vcf
 delete mode 100644 tests/tmp/query_01_flexible_cleaned.vcf
 delete mode 100644 tests/tmp/query_01_flexible_expected_cleaned.vcf
 delete mode 100644 tests/tmp/query_01_flexible_output.vcf
 delete mode 100644 tests/tmp/query_01_pipe_flexible_output.vcf
 delete mode 100644 tests/tmp/query_01_pipe_strict_output.vcf
 delete mode 100644 tests/tmp/query_01_strict_output.vcf
 delete mode 100644 tests/tmp/query_11_flexible_output.vcf
 delete mode 100644 tests/tmp/query_multi_02_flexible_output.vcf
 delete mode 100644 tests/tmp/simple_err.log
 delete mode 100644 tests/tmp/simple_freqs_err.log
 delete mode 100644 tests/tmp/simple_freqs_output.tsv
 delete mode 100644 tests/tmp/simple_output.tsv
 delete mode 100644 tests/tmp/sqrt_transform_edge_err.log
 delete mode 100644 tests/tmp/sqrt_transform_edge_output.vcf
 delete mode 100644 tests/tmp/sqrt_transform_err.log
 delete mode 100644 tests/tmp/sqrt_transform_output.vcf
 delete mode 100644 tests/tmp/square_transform_edge_err.log
 delete mode 100644 tests/tmp/square_transform_edge_output.vcf
 delete mode 100644 tests/tmp/square_transform_err.log
 delete mode 100644 tests/tmp/square_transform_no_clamp_err.log
 delete mode 100644 tests/tmp/square_transform_no_clamp_output.vcf
 delete mode 100644 tests/tmp/square_transform_output.vcf
 delete mode 100644 tests/tmp/unknown_output.vcf

diff --git a/.gitignore b/.gitignore
index 135e939d..40b05829 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,3 +55,8 @@ tests/expected/genotype_query/multi_12_flexible.vcf
 tests/expected/genotype_query/no_match.vcf
 tests/expected/genotype_query/single_sample_flex_01.vcf
 tests/expected/genotype_query/single_sample_strict_01.vcf
+
+# General temporary test output directories
+tests/tmp/
+tests/out/
+tmp/
diff --git a/tests/tmp/afr_output.vcf b/tests/tmp/afr_output.vcf
deleted file mode 100644
index 6d48cfc7..00000000
--- a/tests/tmp/afr_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE3_AFR	SAMPLE4_AFR
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	1|1:20	0|1:22
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|0:19	0|1:21
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	0|0:18	0|1:24
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	1|1:25	0|1:20
diff --git a/tests/tmp/afr_samples_err.log b/tests/tmp/afr_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/afr_samples_output.tsv b/tests/tmp/afr_samples_output.tsv
deleted file mode 100644
index 09be6da5..00000000
--- a/tests/tmp/afr_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-AFR_SAMPLE1	AFR
-AFR_SAMPLE2	AFR
-AFR_SAMPLE3	AFR
diff --git a/tests/tmp/basic_threshold_20.vcf b/tests/tmp/basic_threshold_20.vcf
deleted file mode 100644
index 82661320..00000000
--- a/tests/tmp/basic_threshold_20.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	200	.	C	T	20	PASS	DP=25
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/basic_threshold_30.vcf b/tests/tmp/basic_threshold_30.vcf
deleted file mode 100644
index 3ace7755..00000000
--- a/tests/tmp/basic_threshold_30.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/basic_threshold_30_keep_missing.vcf b/tests/tmp/basic_threshold_30_keep_missing.vcf
deleted file mode 100644
index ed3b35f8..00000000
--- a/tests/tmp/basic_threshold_30_keep_missing.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	300	.	G	A	30	PASS	DP=40
-1	400	.	T	C	.	PASS	DP=35
diff --git a/tests/tmp/complex_err.log b/tests/tmp/complex_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/complex_output.tsv b/tests/tmp/complex_output.tsv
deleted file mode 100644
index 189c397a..00000000
--- a/tests/tmp/complex_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/dp_ge_20_all_err.log b/tests/tmp/dp_ge_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_ge_20_all_output.vcf b/tests/tmp/dp_ge_20_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/dp_ge_20_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/dp_ge_20_any_err.log b/tests/tmp/dp_ge_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_ge_20_any_output.vcf b/tests/tmp/dp_ge_20_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/dp_ge_20_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/dp_le_20_all_err.log b/tests/tmp/dp_le_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_le_20_all_output.vcf b/tests/tmp/dp_le_20_all_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/dp_le_20_all_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/dp_lt_19_5_any_err.log b/tests/tmp/dp_lt_19_5_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/dp_lt_19_5_any_output.vcf b/tests/tmp/dp_lt_19_5_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/dp_lt_19_5_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/eas_output.vcf b/tests/tmp/eas_output.vcf
deleted file mode 100644
index 58a5bca7..00000000
--- a/tests/tmp/eas_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE5_EAS
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	0|0:18
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	1|1:26
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	1|1:27
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	0|0:19
diff --git a/tests/tmp/eas_samples_err.log b/tests/tmp/eas_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eas_samples_output.tsv b/tests/tmp/eas_samples_output.tsv
deleted file mode 100644
index 3363108c..00000000
--- a/tests/tmp/eas_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EAS_SAMPLE1	EAS
-EAS_SAMPLE2	EAS
-EAS_SAMPLE3	EAS
diff --git a/tests/tmp/empty_map_error.txt b/tests/tmp/empty_map_error.txt
deleted file mode 100644
index f3c371ac..00000000
--- a/tests/tmp/empty_map_error.txt
+++ /dev/null
@@ -1 +0,0 @@
-Warning: No samples found for population tag: EUR
diff --git a/tests/tmp/empty_map_output.vcf b/tests/tmp/empty_map_output.vcf
deleted file mode 100644
index ae84bda2..00000000
--- a/tests/tmp/empty_map_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP
diff --git a/tests/tmp/eq_operator_err.log b/tests/tmp/eq_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eq_operator_output.vcf b/tests/tmp/eq_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/eq_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/equals_format_cleaned.vcf b/tests/tmp/equals_format_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/equals_format_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/equals_format_expected_cleaned.vcf b/tests/tmp/equals_format_expected_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/equals_format_expected_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/equals_format_output.vcf b/tests/tmp/equals_format_output.vcf
deleted file mode 100644
index 696571e0..00000000
--- a/tests/tmp/equals_format_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/error.txt b/tests/tmp/error.txt
deleted file mode 100644
index 44150a0b..00000000
--- a/tests/tmp/error.txt
+++ /dev/null
@@ -1 +0,0 @@
-Error: --mode must be 'any' or 'all'.
diff --git a/tests/tmp/eur_output.vcf b/tests/tmp/eur_output.vcf
deleted file mode 100644
index 00af6e9b..00000000
--- a/tests/tmp/eur_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1_EUR	SAMPLE2_EUR
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP	0|0:30	0|1:25
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|1:28	1|1:32
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP	1|1:35	0|1:29
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP	0|1:31	0|0:27
diff --git a/tests/tmp/eur_samples_err.log b/tests/tmp/eur_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/eur_samples_output.tsv b/tests/tmp/eur_samples_output.tsv
deleted file mode 100644
index 15c9445e..00000000
--- a/tests/tmp/eur_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_SAMPLE1	EUR
-EUR_SAMPLE2	EUR
-EUR_SAMPLE3	EUR
diff --git a/tests/tmp/ge_operator_err.log b/tests/tmp/ge_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/ge_operator_output.vcf b/tests/tmp/ge_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/ge_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_eq_30_all_err.log b/tests/tmp/gq_eq_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_eq_30_all_output.vcf b/tests/tmp/gq_eq_30_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/gq_eq_30_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_gt_20_all_err.log b/tests/tmp/gq_gt_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_20_all_output.vcf b/tests/tmp/gq_gt_20_all_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/gq_gt_20_all_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_gt_20_any_err.log b/tests/tmp/gq_gt_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_20_any_output.vcf b/tests/tmp/gq_gt_20_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/gq_gt_20_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_gt_24_5_all_err.log b/tests/tmp/gq_gt_24_5_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_gt_24_5_all_output.vcf b/tests/tmp/gq_gt_24_5_all_output.vcf
deleted file mode 100644
index d6ae73c3..00000000
--- a/tests/tmp/gq_gt_24_5_all_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
diff --git a/tests/tmp/gq_lt_30_all_err.log b/tests/tmp/gq_lt_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_lt_30_all_output.vcf b/tests/tmp/gq_lt_30_all_output.vcf
deleted file mode 100644
index 85eebfcf..00000000
--- a/tests/tmp/gq_lt_30_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/gq_lt_30_any_err.log b/tests/tmp/gq_lt_30_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_lt_30_any_output.vcf b/tests/tmp/gq_lt_30_any_output.vcf
deleted file mode 100644
index d9b7ce08..00000000
--- a/tests/tmp/gq_lt_30_any_output.vcf
+++ /dev/null
@@ -1,12 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GQ:DP:PL	0/1:20:25:30,0,35	1/1:30:18:40,30,0	0/1:25:22:32,0,38
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gq_ne_30_all_err.log b/tests/tmp/gq_ne_30_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gq_ne_30_all_output.vcf b/tests/tmp/gq_ne_30_all_output.vcf
deleted file mode 100644
index a802d0a6..00000000
--- a/tests/tmp/gq_ne_30_all_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GQ:DP:PL	0/1:15:18:20,0,30	0/1:10:25:15,0,20	0/0:35:22:0,35,45
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP:PL	0/0:45:30:0,45,60	0/0:50:20:0,50,65	1/1:5:8:25,5,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/gt_operator_err.log b/tests/tmp/gt_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/gt_operator_output.vcf b/tests/tmp/gt_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/gt_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/help_message.txt b/tests/tmp/help_message.txt
deleted file mode 100644
index 178322ae..00000000
--- a/tests/tmp/help_message.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-VCFX_phred_filter: Filter VCF lines by their QUAL field.
-
-Usage:
-  VCFX_phred_filter [options] < input.vcf > output.vcf
-
-Options:
-  -p, --phred-filter       Phred QUAL threshold (default=30)
-  -k, --keep-missing-qual       Treat '.' (missing QUAL) as pass
-  -h, --help                    Display this help and exit
-
-Description:
-  Reads VCF lines from stdin. For each data line, parse the QUAL field.
-  If QUAL >= threshold => print line. Otherwise, skip. By default, missing
-  QUAL ('.') is treated as 0. Use --keep-missing-qual to treat '.' as pass.
-
-Examples:
-  1) Keep variants with QUAL>=30:
-     VCFX_phred_filter -p 30 < in.vcf > out.vcf
-  2) Keep missing QUAL lines:
-     VCFX_phred_filter -p 30 --keep-missing-qual < in.vcf > out.vcf
diff --git a/tests/tmp/help_output.txt b/tests/tmp/help_output.txt
deleted file mode 100644
index 651909da..00000000
--- a/tests/tmp/help_output.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-VCFX_quality_adjuster: Apply a transformation to the QUAL field of a VCF.
-
-Usage:
-  VCFX_quality_adjuster [options] < input.vcf > output.vcf
-
-Options:
-  -h, --help               Show this help.
-  -a, --adjust-qual  Required. One of: log, sqrt, square, identity.
-  -n, --no-clamp           Do not clamp negative or large values.
-
-Description:
-  Reads each line from VCF. If it's a data line with >=8 columns, we parse
-  the QUAL field (6th col). We transform it with , e.g.:
-    log => log(QUAL + 1e-10)
-    sqrt=> sqrt(QUAL)
-    square=> (QUAL * QUAL)
-    identity=> no change
-  By default, negative results from e.g. log are clamped to 0, and large
-  results are capped at 1e12. If you do not want clamping, use --no-clamp.
-
-Examples:
-  1) Log-transform:
-     VCFX_quality_adjuster --adjust-qual log < in.vcf > out.vcf
-  2) Square, keep negative or big values as is:
-     VCFX_quality_adjuster --adjust-qual square --no-clamp < in.vcf > out.vcf
diff --git a/tests/tmp/identity_transform_err.log b/tests/tmp/identity_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/identity_transform_output.vcf b/tests/tmp/identity_transform_output.vcf
deleted file mode 100644
index d7594c78..00000000
--- a/tests/tmp/identity_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	100.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	10.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/invalid_condition_err.log b/tests/tmp/invalid_condition_err.log
deleted file mode 100644
index 01efc7fc..00000000
--- a/tests/tmp/invalid_condition_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Invalid filter condition format. Expected format like "GP>0.9".
diff --git a/tests/tmp/invalid_condition_output.vcf b/tests/tmp/invalid_condition_output.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_err.log b/tests/tmp/invalid_err.log
deleted file mode 100644
index 34167a32..00000000
--- a/tests/tmp/invalid_err.log
+++ /dev/null
@@ -1,4 +0,0 @@
-Warning: Data line encountered before #CHROM header. Skipping line:
-1	100	rs1	A	G	30	PASS	AF=0.25	GT	0/1	0/0	1/1
-Warning: Skipping invalid VCF line (fewer than 9 fields):
-1	300
diff --git a/tests/tmp/invalid_error.log b/tests/tmp/invalid_error.log
deleted file mode 100644
index 9c4f2a0e..00000000
--- a/tests/tmp/invalid_error.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: unsupported transformation 'invalid_transform'.
diff --git a/tests/tmp/invalid_mode_err.log b/tests/tmp/invalid_mode_err.log
deleted file mode 100644
index 63e70083..00000000
--- a/tests/tmp/invalid_mode_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-==== START OF TEST: invalid_mode ====
-Error: --mode must be 'any' or 'all'.
diff --git a/tests/tmp/invalid_mode_out.vcf b/tests/tmp/invalid_mode_out.vcf
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/invalid_mode_out.vcf
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/invalid_mode_output.vcf b/tests/tmp/invalid_mode_output.vcf
deleted file mode 100644
index cabcfbb6..00000000
--- a/tests/tmp/invalid_mode_output.vcf
+++ /dev/null
@@ -1,19 +0,0 @@
-==== START OF TEST: invalid_mode ====
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/invalid_out.vcf b/tests/tmp/invalid_out.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_output.tsv b/tests/tmp/invalid_output.tsv
deleted file mode 100644
index 7267884f..00000000
--- a/tests/tmp/invalid_output.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	200	rs2	C	T	0.0000
diff --git a/tests/tmp/invalid_output.vcf b/tests/tmp/invalid_output.vcf
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/invalid_records_threshold_30.vcf b/tests/tmp/invalid_records_threshold_30.vcf
deleted file mode 100644
index 687eda1c..00000000
--- a/tests/tmp/invalid_records_threshold_30.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL
-1	100	.	A	G	50
-1	300	.	G	A	30	PASS	DP=40
diff --git a/tests/tmp/invalid_vcf_err.log b/tests/tmp/invalid_vcf_err.log
deleted file mode 100644
index 4878b8e6..00000000
--- a/tests/tmp/invalid_vcf_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Encountered VCF data before #CHROM header.
diff --git a/tests/tmp/invalid_vcf_output.tsv b/tests/tmp/invalid_vcf_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/le_operator_err.log b/tests/tmp/le_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/le_operator_output.vcf b/tests/tmp/le_operator_output.vcf
deleted file mode 100644
index 0946e0ef..00000000
--- a/tests/tmp/le_operator_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GP	0/1:0.1,0.7,0.2	1/1:0,0.1,0.9	0/1:0.1,0.7,0.2
diff --git a/tests/tmp/log_transform_edge_err.log b/tests/tmp/log_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/log_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/log_transform_edge_output.vcf b/tests/tmp/log_transform_edge_output.vcf
deleted file mode 100644
index 4259e741..00000000
--- a/tests/tmp/log_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.000000	PASS	.	GT	0/1
-1	200	rs2	C	T	13.815511	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	nan	PASS	.	GT	0/1
diff --git a/tests/tmp/log_transform_err.log b/tests/tmp/log_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/log_transform_no_clamp_err.log b/tests/tmp/log_transform_no_clamp_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/log_transform_no_clamp_output.vcf b/tests/tmp/log_transform_no_clamp_output.vcf
deleted file mode 100644
index a5a7c33f..00000000
--- a/tests/tmp/log_transform_no_clamp_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	3.401197	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	-23.025851	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	4.605170	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	2.302585	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	-23.025851	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/log_transform_output.vcf b/tests/tmp/log_transform_output.vcf
deleted file mode 100644
index 4e8bce84..00000000
--- a/tests/tmp/log_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	3.401197	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	4.605170	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	2.302585	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/lt_operator_err.log b/tests/tmp/lt_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/lt_operator_output.vcf b/tests/tmp/lt_operator_output.vcf
deleted file mode 100644
index 0969a3e1..00000000
--- a/tests/tmp/lt_operator_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
diff --git a/tests/tmp/malformed_err.log b/tests/tmp/malformed_err.log
deleted file mode 100644
index b34c3cda..00000000
--- a/tests/tmp/malformed_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: invalid VCF line (<9 fields): 1	100	rs1	A	G	30	PASS	AF=0.25
-Warning: invalid VCF line (<9 fields): 1	200	rs2	C	T	40	PASS	AF=0.5
diff --git a/tests/tmp/malformed_freq_err.log b/tests/tmp/malformed_freq_err.log
deleted file mode 100644
index bf3b8a5c..00000000
--- a/tests/tmp/malformed_freq_err.log
+++ /dev/null
@@ -1,3 +0,0 @@
-Warning: Invalid line in frequency file (#1): malformed data
-Error: No valid population frequencies loaded.
-Error: Failed to load population frequencies from tmp/malformed_freqs.txt
diff --git a/tests/tmp/malformed_freq_output.tsv b/tests/tmp/malformed_freq_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/malformed_freqs.txt b/tests/tmp/malformed_freqs.txt
deleted file mode 100644
index b8833029..00000000
--- a/tests/tmp/malformed_freqs.txt
+++ /dev/null
@@ -1 +0,0 @@
-malformed data
diff --git a/tests/tmp/malformed_input_err.log b/tests/tmp/malformed_input_err.log
deleted file mode 100644
index 8fa27b03..00000000
--- a/tests/tmp/malformed_input_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Warning: line with <8 fields => skipping.
-Warning: line with <8 fields => skipping.
diff --git a/tests/tmp/malformed_input_output.vcf b/tests/tmp/malformed_input_output.vcf
deleted file mode 100644
index 9068bd6e..00000000
--- a/tests/tmp/malformed_input_output.vcf
+++ /dev/null
@@ -1,2 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL
diff --git a/tests/tmp/malformed_out.vcf b/tests/tmp/malformed_out.vcf
deleted file mode 100644
index 68d81478..00000000
--- a/tests/tmp/malformed_out.vcf
+++ /dev/null
@@ -1,2 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/tmp/malformed_output.vcf b/tests/tmp/malformed_output.vcf
deleted file mode 100644
index 0fe19f4e..00000000
--- a/tests/tmp/malformed_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1_EUR	SAMPLE2_EUR
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP	0|1:28	1|1:32
-2	150	rs012	T	C	80	PASS	AF=0.4	GT:DP	1|1:35	0|1:29
diff --git a/tests/tmp/malformed_query_01_flexible_output.vcf b/tests/tmp/malformed_query_01_flexible_output.vcf
deleted file mode 100644
index 36f4bf31..00000000
--- a/tests/tmp/malformed_query_01_flexible_output.vcf
+++ /dev/null
@@ -1,11 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	.	C	T	40	PASS	AF=0.5	GT	0/1	0/1	0/0
-1	400	rs4	T	C	45	PASS	.	GT:DP	0|1:25	.|.:--	0|0:12
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/.:15	1/1:28
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	./.:--	0|1:22	1|1:25
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	0/1:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/malformed_threshold_30.vcf b/tests/tmp/malformed_threshold_30.vcf
deleted file mode 100644
index 006c6381..00000000
--- a/tests/tmp/malformed_threshold_30.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
diff --git a/tests/tmp/malformed_threshold_30_keep_missing.vcf b/tests/tmp/malformed_threshold_30_keep_missing.vcf
deleted file mode 100644
index 14ab42f1..00000000
--- a/tests/tmp/malformed_threshold_30_keep_missing.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	400	.	T	C		PASS	DP=35
diff --git a/tests/tmp/malformed_threshold_5.vcf b/tests/tmp/malformed_threshold_5.vcf
deleted file mode 100644
index 92517348..00000000
--- a/tests/tmp/malformed_threshold_5.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
-1	100	.	A	G	50	PASS	DP=30
-1	500	.	A	C	10.5	PASS	DP=15
diff --git a/tests/tmp/malformed_vcf_err.log b/tests/tmp/malformed_vcf_err.log
deleted file mode 100644
index 6b11a01f..00000000
--- a/tests/tmp/malformed_vcf_err.log
+++ /dev/null
@@ -1,3 +0,0 @@
-==== START OF TEST: malformed_vcf ====
-Warning: invalid VCF line (<9 fields): 1	100	rs1	A	G	30	PASS	AF=0.25
-Warning: invalid VCF line (<9 fields): 1	200	rs2	C	T	40	PASS	AF=0.5
diff --git a/tests/tmp/malformed_vcf_output.vcf b/tests/tmp/malformed_vcf_output.vcf
deleted file mode 100644
index 3f71f6c2..00000000
--- a/tests/tmp/malformed_vcf_output.vcf
+++ /dev/null
@@ -1,3 +0,0 @@
-==== START OF TEST: malformed_vcf ====
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
diff --git a/tests/tmp/missing_arg_error.log b/tests/tmp/missing_arg_error.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_arg_output.vcf b/tests/tmp/missing_arg_output.vcf
deleted file mode 100644
index 651909da..00000000
--- a/tests/tmp/missing_arg_output.vcf
+++ /dev/null
@@ -1,25 +0,0 @@
-VCFX_quality_adjuster: Apply a transformation to the QUAL field of a VCF.
-
-Usage:
-  VCFX_quality_adjuster [options] < input.vcf > output.vcf
-
-Options:
-  -h, --help               Show this help.
-  -a, --adjust-qual  Required. One of: log, sqrt, square, identity.
-  -n, --no-clamp           Do not clamp negative or large values.
-
-Description:
-  Reads each line from VCF. If it's a data line with >=8 columns, we parse
-  the QUAL field (6th col). We transform it with , e.g.:
-    log => log(QUAL + 1e-10)
-    sqrt=> sqrt(QUAL)
-    square=> (QUAL * QUAL)
-    identity=> no change
-  By default, negative results from e.g. log are clamped to 0, and large
-  results are capped at 1e12. If you do not want clamping, use --no-clamp.
-
-Examples:
-  1) Log-transform:
-     VCFX_quality_adjuster --adjust-qual log < in.vcf > out.vcf
-  2) Square, keep negative or big values as is:
-     VCFX_quality_adjuster --adjust-qual square --no-clamp < in.vcf > out.vcf
diff --git a/tests/tmp/missing_args_output.txt b/tests/tmp/missing_args_output.txt
deleted file mode 100644
index c8c85b1b..00000000
--- a/tests/tmp/missing_args_output.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Usage: ../build/src/VCFX_genotype_query/VCFX_genotype_query --genotype-query "0/1" [--strict] < input.vcf > output.vcf
-Use --help for usage.
diff --git a/tests/tmp/missing_err.log b/tests/tmp/missing_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_field_err.log b/tests/tmp/missing_field_err.log
deleted file mode 100644
index 9bd03884..00000000
--- a/tests/tmp/missing_field_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: Specified field "GP" not found in FORMAT column.
diff --git a/tests/tmp/missing_field_gq_gt_20_err.log b/tests/tmp/missing_field_gq_gt_20_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_field_gq_gt_20_output.vcf b/tests/tmp/missing_field_gq_gt_20_output.vcf
deleted file mode 100644
index b10fb63a..00000000
--- a/tests/tmp/missing_field_gq_gt_20_output.vcf
+++ /dev/null
@@ -1,5 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
diff --git a/tests/tmp/missing_field_output.vcf b/tests/tmp/missing_field_output.vcf
deleted file mode 100644
index b7a7e7ab..00000000
--- a/tests/tmp/missing_field_output.vcf
+++ /dev/null
@@ -1,4 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
diff --git a/tests/tmp/missing_filter_err.log b/tests/tmp/missing_filter_err.log
deleted file mode 100644
index 2e60ae42..00000000
--- a/tests/tmp/missing_filter_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Error: --filter must be specified.
diff --git a/tests/tmp/missing_filter_output.vcf b/tests/tmp/missing_filter_output.vcf
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/missing_filter_output.vcf
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/missing_freq_err.log b/tests/tmp/missing_freq_err.log
deleted file mode 100644
index 9c007b94..00000000
--- a/tests/tmp/missing_freq_err.log
+++ /dev/null
@@ -1,2 +0,0 @@
-Error: Cannot open frequency file: /nonexistent/file.txt
-Error: Failed to load population frequencies from /nonexistent/file.txt
diff --git a/tests/tmp/missing_freq_output.tsv b/tests/tmp/missing_freq_output.tsv
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_output.tsv b/tests/tmp/missing_output.tsv
deleted file mode 100644
index f47e8fb6..00000000
--- a/tests/tmp/missing_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.7500
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/missing_samples_err.log b/tests/tmp/missing_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_samples_output.tsv b/tests/tmp/missing_samples_output.tsv
deleted file mode 100644
index cbeeb4c9..00000000
--- a/tests/tmp/missing_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_MISS	EUR
-AFR_MISS	AFR
-EAS_MISS	EAS
diff --git a/tests/tmp/missing_value_gq_gt_20_all_err.log b/tests/tmp/missing_value_gq_gt_20_all_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_value_gq_gt_20_all_output.vcf b/tests/tmp/missing_value_gq_gt_20_all_output.vcf
deleted file mode 100644
index d9a8ffe1..00000000
--- a/tests/tmp/missing_value_gq_gt_20_all_output.vcf
+++ /dev/null
@@ -1,7 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP	0/1:25:20	0/0:30:15
diff --git a/tests/tmp/missing_value_gq_gt_20_any_err.log b/tests/tmp/missing_value_gq_gt_20_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/missing_value_gq_gt_20_any_output.vcf b/tests/tmp/missing_value_gq_gt_20_any_output.vcf
deleted file mode 100644
index 900ca329..00000000
--- a/tests/tmp/missing_value_gq_gt_20_any_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP	0/1:25:20	0/0:30:15
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GQ:DP	0/0:45:30	0/0::20
diff --git a/tests/tmp/mixed_population_check_err.log b/tests/tmp/mixed_population_check_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/mixed_population_check_output.tsv b/tests/tmp/mixed_population_check_output.tsv
deleted file mode 100644
index 52ec9b21..00000000
--- a/tests/tmp/mixed_population_check_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-Sample	Inferred_Population
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/mixed_population_check_results.tmp b/tests/tmp/mixed_population_check_results.tmp
deleted file mode 100644
index 29eafc2a..00000000
--- a/tests/tmp/mixed_population_check_results.tmp
+++ /dev/null
@@ -1,4 +0,0 @@
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/mixed_samples_err.log b/tests/tmp/mixed_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/mixed_samples_output.tsv b/tests/tmp/mixed_samples_output.tsv
deleted file mode 100644
index 52ec9b21..00000000
--- a/tests/tmp/mixed_samples_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-Sample	Inferred_Population
-EUR_SAM	EUR
-AFR_SAM	AFR
-EAS_SAM	EAS
-MIX_SAM	EUR
diff --git a/tests/tmp/multiallelic_err.log b/tests/tmp/multiallelic_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/multiallelic_output.tsv b/tests/tmp/multiallelic_output.tsv
deleted file mode 100644
index 71fd0ab4..00000000
--- a/tests/tmp/multiallelic_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G,T	0.6667
-1	200	rs2	C	T,G,A	0.8333
diff --git a/tests/tmp/multiallelic_samples_err.log b/tests/tmp/multiallelic_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/multiallelic_samples_output.tsv b/tests/tmp/multiallelic_samples_output.tsv
deleted file mode 100644
index a0251e8c..00000000
--- a/tests/tmp/multiallelic_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_MULTI	EUR
-AFR_MULTI	AFR
-EAS_MULTI	EUR
diff --git a/tests/tmp/ne_operator_err.log b/tests/tmp/ne_operator_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/ne_operator_output.vcf b/tests/tmp/ne_operator_output.vcf
deleted file mode 100644
index 2b906f70..00000000
--- a/tests/tmp/ne_operator_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GP	0/1:0.01,0.98,0.01	0/0:0.99,0.01,0	1/1:0,0.02,0.98
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:GP	0/1:0.05,0.9,0.05	0/1:0.1,0.8,0.1	0/0:0.95,0.04,0.01
-1	300	rs3	G	A	50	PASS	AF=0.1	GT:GP	0/0:0.85,0.15,0	0/0:0.92,0.08,0	1/1:0,0.05,0.95
-1	400	rs4	T	C	60	PASS	AF=0.3	GT:GP	0/1:0.1,0.7,0.2	1/1:0,0.1,0.9	0/1:0.1,0.7,0.2
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GP	0/0:0.94,0.05,0.01	1/1:0.01,0.05,0.94	0/1:0.2,0.75,0.05
diff --git a/tests/tmp/no_args_output.txt b/tests/tmp/no_args_output.txt
deleted file mode 100644
index 4f075489..00000000
--- a/tests/tmp/no_args_output.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-VCFX_population_filter: Subset VCF to samples in specified population.
-
-Usage:
-  VCFX_population_filter [options] < input.vcf > output.vcf
-
-Options:
-  --help, -h               Print this help.
-  --population, -p    Population tag to keep (e.g. 'EUR','AFR', etc.)
-  --pop-map, -m      Tab-delimited file: 'SampleName  Population'
-
-Description:
-  Reads the pop map, finds samples that match the chosen population.
-  Then reads the VCF from stdin and prints lines with only those sample columns.
-  If a sample is not in that population, it's dropped from the #CHROM header and data columns.
-
-Example:
-  VCFX_population_filter --population AFR --pop-map pops.txt < input.vcf > out.vcf
diff --git a/tests/tmp/no_gt_err.log b/tests/tmp/no_gt_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/no_gt_output.tsv b/tests/tmp/no_gt_output.tsv
deleted file mode 100644
index 453ebe59..00000000
--- a/tests/tmp/no_gt_output.tsv
+++ /dev/null
@@ -1 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
diff --git a/tests/tmp/output.txt b/tests/tmp/output.txt
deleted file mode 100644
index cebc4ad1..00000000
--- a/tests/tmp/output.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-VCFX_gl_filter: Filter VCF based on a numeric genotype-likelihood field.
-
-Usage:
-  VCFX_gl_filter --filter "" [--mode ] < input.vcf > output.vcf
-
-Options:
-  -h, --help                Display this help message and exit
-  -f, --filter   e.g. "GQ>20" or "DP>=10.5" or "PL==50"
-  -m, --mode       'all' => all samples must pass (default), 'any' => at least one sample passes.
-
-Example:
-  VCFX_gl_filter --filter "GQ>20.5" --mode any < input.vcf > filtered.vcf
-
-Description:
-  The filter condition is a simple expression: ,
-  e.g. GQ>20 or DP!=10 or RGQ<=5.2.
-  The 'mode' determines if all samples must satisfy the condition or
-  if at least one sample satisfying is enough to keep the record.
diff --git a/tests/tmp/phased_err.log b/tests/tmp/phased_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/phased_output.tsv b/tests/tmp/phased_output.tsv
deleted file mode 100644
index 189c397a..00000000
--- a/tests/tmp/phased_output.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
diff --git a/tests/tmp/phased_samples_err.log b/tests/tmp/phased_samples_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/phased_samples_output.tsv b/tests/tmp/phased_samples_output.tsv
deleted file mode 100644
index deb18772..00000000
--- a/tests/tmp/phased_samples_output.tsv
+++ /dev/null
@@ -1,4 +0,0 @@
-Sample	Inferred_Population
-EUR_PHASED	EUR
-AFR_PHASED	AFR
-EAS_PHASED	EAS
diff --git a/tests/tmp/pl_gt_40_any_err.log b/tests/tmp/pl_gt_40_any_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/pl_gt_40_any_output.vcf b/tests/tmp/pl_gt_40_any_output.vcf
deleted file mode 100644
index 87339394..00000000
--- a/tests/tmp/pl_gt_40_any_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:GQ:DP:PL	0/1:25:20:35,0,40	0/0:30:15:0,30,50	1/1:40:18:50,40,0
-1	500	rs5	G	C	70	PASS	AF=0.35	GT:GQ:DP:PL	0/0:55:15:0,55,70	1/1:60:18:75,60,0	0/1:22:20:28,0,32
diff --git a/tests/tmp/query_01_flexible_cleaned.vcf b/tests/tmp/query_01_flexible_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/query_01_flexible_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_flexible_expected_cleaned.vcf b/tests/tmp/query_01_flexible_expected_cleaned.vcf
deleted file mode 100644
index 2bc29661..00000000
--- a/tests/tmp/query_01_flexible_expected_cleaned.vcf
+++ /dev/null
@@ -1 +0,0 @@
-##fileformat=VCFv4.2##INFO=##FORMAT=##FORMAT=#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE31	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:221	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:101	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:221	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:121	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_flexible_output.vcf b/tests/tmp/query_01_flexible_output.vcf
deleted file mode 100644
index 696571e0..00000000
--- a/tests/tmp/query_01_flexible_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22  
\ No newline at end of file
diff --git a/tests/tmp/query_01_pipe_flexible_output.vcf b/tests/tmp/query_01_pipe_flexible_output.vcf
deleted file mode 100644
index e41d6402..00000000
--- a/tests/tmp/query_01_pipe_flexible_output.vcf
+++ /dev/null
@@ -1,19 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
-3	400	rs6	T	A	60	FAIL	AF=0.1	GT:DP:GQ	0/1:30:10	0/1:25:20	0/1:20:30	0/1:22:40
-4	100	rs7	A	G	70	PASS	AF=0.2	GT:DP	0/1:30	0/1:25	0/1:20	0/1:22
-4	200	rs8	C	T	80	PASS	AF=0.3	DP:GT	30:0/1	25:0/1	20:0/1	22:0/1 
\ No newline at end of file
diff --git a/tests/tmp/query_01_pipe_strict_output.vcf b/tests/tmp/query_01_pipe_strict_output.vcf
deleted file mode 100644
index 8a82c11b..00000000
--- a/tests/tmp/query_01_pipe_strict_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	400	rs4	T	C	45	PASS	AF=0.3	GT:DP	0|1:25	1|0:18	0|0:12
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
\ No newline at end of file
diff --git a/tests/tmp/query_01_strict_output.vcf b/tests/tmp/query_01_strict_output.vcf
deleted file mode 100644
index b09fd625..00000000
--- a/tests/tmp/query_01_strict_output.vcf
+++ /dev/null
@@ -1,15 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	200	rs2	C	T	40	PASS	AF=0.5	GT:DP	0/1:18	0/1:25	0/0:10
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	800	rs8	G	C	65	PASS	AF=0.35	GT:DP	0/0:15	1/0:18	0/1:22
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
-3	400	rs6	T	A	60	FAIL	AF=0.1	GT:DP:GQ	0/1:30:10	0/1:25:20	0/1:20:30	0/1:22:40
-4	100	rs7	A	G	70	PASS	AF=0.2	GT:DP	0/1:30	0/1:25	0/1:20	0/1:22
-4	200	rs8	C	T	80	PASS	AF=0.3	DP:GT	30:0/1	25:0/1	20:0/1	22:0/1 
\ No newline at end of file
diff --git a/tests/tmp/query_11_flexible_output.vcf b/tests/tmp/query_11_flexible_output.vcf
deleted file mode 100644
index 742184ce..00000000
--- a/tests/tmp/query_11_flexible_output.vcf
+++ /dev/null
@@ -1,13 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	100	rs1	A	G	30	PASS	AF=0.25	GT:DP	0/1:20	0/0:15	1/1:22
-1	300	rs3	G	A	35	PASS	AF=0.1	GT:DP	0|1:30	0|0:20	1|1:22
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-1	600	rs6	C	G	55	PASS	AF=0.4	GT:DP	1/1:30	1/1:25	0/0:15
-1	700	rs7	A	G	60	PASS	AF=0.15	GT:DP	1|0:20	0|1:22	1|1:25
-2	150	rs3	T	C	80	PASS	AF=0.3	GT:DP:GQ	1/1:35:99	1/0:29:99	0|1:18:99	0/0:24:99
-3	300	rs5	C	G	100	PASS	AF=0.5	GT:DP:GQ	./.:30:99	0/0:25:99	0/1:20:99	1/1:22:99
\ No newline at end of file
diff --git a/tests/tmp/query_multi_02_flexible_output.vcf b/tests/tmp/query_multi_02_flexible_output.vcf
deleted file mode 100644
index 1fea55ad..00000000
--- a/tests/tmp/query_multi_02_flexible_output.vcf
+++ /dev/null
@@ -1,8 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2	SAMPLE3	SAMPLE4
-1	500	rs5	G	A,T	50	PASS	AF=0.2,0.05	GT:DP	0/1:22	0/2:15	1/1:28
-2	250	rs4	G	A,T	90	PASS	AF=0.4,0.1	GT:DP:GQ	1/2:31:99	2/2:27:99	0/2:25:99	0/1:20:99
\ No newline at end of file
diff --git a/tests/tmp/simple_err.log b/tests/tmp/simple_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/simple_freqs_err.log b/tests/tmp/simple_freqs_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/simple_freqs_output.tsv b/tests/tmp/simple_freqs_output.tsv
deleted file mode 100644
index 7ab39b3b..00000000
--- a/tests/tmp/simple_freqs_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
-1	300	rs3	G	A	0.3333
-1	400	rs4	T	C	0.8333
diff --git a/tests/tmp/simple_output.tsv b/tests/tmp/simple_output.tsv
deleted file mode 100644
index 7ab39b3b..00000000
--- a/tests/tmp/simple_output.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-CHROM	POS	ID	REF	ALT	Allele_Frequency
-1	100	rs1	A	G	0.5000
-1	200	rs2	C	T	0.3333
-1	300	rs3	G	A	0.3333
-1	400	rs4	T	C	0.8333
diff --git a/tests/tmp/sqrt_transform_edge_err.log b/tests/tmp/sqrt_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/sqrt_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/sqrt_transform_edge_output.vcf b/tests/tmp/sqrt_transform_edge_output.vcf
deleted file mode 100644
index 4067f73c..00000000
--- a/tests/tmp/sqrt_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.001000	PASS	.	GT	0/1
-1	200	rs2	C	T	1000.000000	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	0.000000	PASS	.	GT	0/1
diff --git a/tests/tmp/sqrt_transform_err.log b/tests/tmp/sqrt_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/sqrt_transform_output.vcf b/tests/tmp/sqrt_transform_output.vcf
deleted file mode 100644
index 5ba0fbd6..00000000
--- a/tests/tmp/sqrt_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	5.477226	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	3.162278	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/square_transform_edge_err.log b/tests/tmp/square_transform_edge_err.log
deleted file mode 100644
index 55d1ada7..00000000
--- a/tests/tmp/square_transform_edge_err.log
+++ /dev/null
@@ -1 +0,0 @@
-Warning: invalid QUAL 'invalid'. Skipping.
diff --git a/tests/tmp/square_transform_edge_output.vcf b/tests/tmp/square_transform_edge_output.vcf
deleted file mode 100644
index 3f277fb3..00000000
--- a/tests/tmp/square_transform_edge_output.vcf
+++ /dev/null
@@ -1,6 +0,0 @@
-##fileformat=VCFv4.2
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1
-1	100	rs1	A	G	0.000000	PASS	.	GT	0/1
-1	200	rs2	C	T	1000000000000.000000	PASS	.	GT	0/1
-1	400	rs4	T	C	0.000000	PASS	.	GT	0/1
-1	500	rs5	G	C	100.000000	PASS	.	GT	0/1
diff --git a/tests/tmp/square_transform_err.log b/tests/tmp/square_transform_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/square_transform_no_clamp_err.log b/tests/tmp/square_transform_no_clamp_err.log
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/tmp/square_transform_no_clamp_output.vcf b/tests/tmp/square_transform_no_clamp_output.vcf
deleted file mode 100644
index a718db16..00000000
--- a/tests/tmp/square_transform_no_clamp_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	900.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10000.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	100.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/square_transform_output.vcf b/tests/tmp/square_transform_output.vcf
deleted file mode 100644
index a718db16..00000000
--- a/tests/tmp/square_transform_output.vcf
+++ /dev/null
@@ -1,10 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	SAMPLE1	SAMPLE2
-1	100	rs1	A	G	900.000000	PASS	AF=0.25	GT:DP	0/1:20	0/0:15
-1	200	rs2	C	T	0.000000	PASS	AF=0.5	GT:DP	0/1:18	0/1:25
-1	300	rs3	G	A	10000.000000	PASS	AF=0.1	GT:DP	0/1:30	0/0:20
-1	400	rs4	T	C	100.000000	PASS	AF=0.3	GT:DP	0/1:25	1/1:18
-1	500	rs5	G	C	0.000000	PASS	AF=0.35	GT:DP	0/0:15	1/1:18
diff --git a/tests/tmp/unknown_output.vcf b/tests/tmp/unknown_output.vcf
deleted file mode 100644
index ae84bda2..00000000
--- a/tests/tmp/unknown_output.vcf
+++ /dev/null
@@ -1,9 +0,0 @@
-##fileformat=VCFv4.2
-##INFO=
-##FORMAT=
-##FORMAT=
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT
-1	100	rs123	A	T	50	PASS	AF=0.1	GT:DP
-1	200	rs456	G	C	60	PASS	AF=0.2	GT:DP
-2	150	rs789	T	C	70	PASS	AF=0.3	GT:DP
-2	250	rs012	G	A	80	PASS	AF=0.4	GT:DP

From f63dce5f9b1f2e6334c5cf9209c3e148ad197996 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:41:13 +0100
Subject: [PATCH 36/63] Fix ALT normalization with colon in duplicate remover

---
 .../VCFX_duplicate_remover.cpp                | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
index 72625213..9a75cc9a 100644
--- a/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
+++ b/src/VCFX_duplicate_remover/VCFX_duplicate_remover.cpp
@@ -65,26 +65,20 @@ static VariantKey generateVariantKey(const std::string& chrom,
         key.pos = 0;
     }
     key.ref = ref;
-    key.alt = "";  // Will be set to normalized ALT.
-    // Normalize ALT: sort multi-allelic entries.
-    key.alt = generateNormalizedVariantKey(chrom, pos, ref, alt).substr(chrom.size() + pos.size() + ref.size() + 3); // skip prefix "chrom:pos:ref:"
-    // Alternatively, simply:
-    key.alt = generateNormalizedVariantKey(chrom, pos, ref, alt);
-    // However, since generateNormalizedVariantKey already concatenates chrom:pos:ref:normalizedAlt,
-    // we extract the normalizedAlt portion if needed. For simplicity, we can just store the full key.
-    // For our VariantKey, we want: chrom, pos, ref, normalizedAlt.
-    // We'll do that by re-parsing:
-    std::vector parts = splitString(generateNormalizedVariantKey(chrom, pos, ref, alt), ':');
-    if (parts.size() >= 4) {
-        key.chrom = parts[0];
-        try {
-            key.pos = std::stoi(parts[1]);
-        } catch (...) {
-            key.pos = 0;
+
+    // Normalize ALT: split multi-allelic values, sort them, then rejoin.  This
+    // avoids parsing the generated key string, which could break for ALT
+    // alleles containing ':' such as breakend notation.
+    std::vector alts = splitString(alt, ',');
+    std::sort(alts.begin(), alts.end());
+    std::ostringstream oss;
+    for (size_t i = 0; i < alts.size(); ++i) {
+        if (i > 0) {
+            oss << ',';
         }
-        key.ref = parts[2];
-        key.alt = parts[3];
+        oss << alts[i];
     }
+    key.alt = oss.str();
     return key;
 }
 

From dd832843f2d4b2c5527b7a8cafd8371a9f39bbf5 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 19:42:00 +0100
Subject: [PATCH 37/63] Support 64-bit positions in indexer

---
 docs/VCFX_indexer.md              | 12 ++++++------
 src/VCFX_indexer/VCFX_indexer.cpp |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/VCFX_indexer.md b/docs/VCFX_indexer.md
index 10d870d8..65f69f38 100644
--- a/docs/VCFX_indexer.md
+++ b/docs/VCFX_indexer.md
@@ -1,7 +1,7 @@
 # VCFX_indexer
 
 ## Overview
-`VCFX_indexer` is a utility tool for creating a byte-offset index of a VCF file. It generates a simple tab-delimited index file that maps chromosome and position to the exact byte offset in the original file, enabling efficient random access to variants without scanning the entire file.
+`VCFX_indexer` is a utility tool for creating a byte-offset index of a VCF file. It generates a simple tab-delimited index file that maps chromosome and position to the exact byte offset in the original file, enabling efficient random access to variants without scanning the entire file. The index uses 64-bit integers for both the position and the byte offset so very large coordinates are fully supported.
 
 ## Usage
 
@@ -26,9 +26,9 @@ VCFX_indexer [OPTIONS] < input.vcf > index.tsv
    - Extracting the chromosome (CHROM) and position (POS) values
    - Calculating the precise byte offset from the start of the file
 4. Writing a three-column index to standard output with:
-   - CHROM: The chromosome identifier from the VCF
-   - POS: The position value from the VCF
-   - FILE_OFFSET: The byte offset to the start of the line in the source file
+    - CHROM: The chromosome identifier from the VCF
+    - POS: The position value from the VCF (stored as a 64-bit integer)
+    - FILE_OFFSET: The byte offset to the start of the line in the source file (also 64-bit)
 
 This index enables efficient random access to specific variants in large VCF files by allowing tools to seek directly to a byte offset rather than scanning the entire file. It's particularly useful for building tools that need to query specific regions of a VCF file.
 
@@ -45,8 +45,8 @@ CHROM   POS    FILE_OFFSET
 Where:
 
 - `CHROM` is the chromosome identifier from the VCF
-- `POS` is the genomic position from the VCF
-- `FILE_OFFSET` is the byte offset from the start of the VCF file
+- `POS` is the genomic position from the VCF (64-bit integer)
+- `FILE_OFFSET` is the byte offset from the start of the VCF file (64-bit integer)
 
 ## Examples
 
diff --git a/src/VCFX_indexer/VCFX_indexer.cpp b/src/VCFX_indexer/VCFX_indexer.cpp
index 2011871e..1eeac8d1 100644
--- a/src/VCFX_indexer/VCFX_indexer.cpp
+++ b/src/VCFX_indexer/VCFX_indexer.cpp
@@ -130,9 +130,9 @@ void VCFXIndexer::createVCFIndex(std::istream &in, std::ostream &out) {
         const std::string &chrom = fields[0];
         const std::string &posStr = fields[1];
 
-        int posVal = 0;
+        std::int64_t posVal = 0;
         try {
-            posVal = std::stoi(posStr);
+            posVal = std::stoll(posStr);
         } catch (...) {
             // Not a valid integer => skip
             return;

From 7e96e4255426a79add9bb5d98d5e384235e09056 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:01:39 +0100
Subject: [PATCH 38/63] Refactor variant counter to stream input

---
 .../VCFX_variant_counter.cpp                  | 125 ++++++++++++++----
 .../VCFX_variant_counter.h                    |   2 +
 2 files changed, 100 insertions(+), 27 deletions(-)

diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.cpp b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
index 72828643..55549d47 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.cpp
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.cpp
@@ -5,6 +5,8 @@
 #include 
 #include 
 #include "vcfx_core.h"
+#include 
+#include 
 
 void VCFXVariantCounter::displayHelp(){
     std::cout <<
@@ -56,13 +58,28 @@ int VCFXVariantCounter::run(int argc, char* argv[]){
         return 0;
     }
     
-    std::string plainInput;
-    if(!vcfx::read_maybe_compressed(std::cin, plainInput)){
-        std::cerr << "Error: failed to read input" << std::endl;
-        return 1;
+    auto peek1 = std::cin.peek();
+    bool isEmpty = (peek1 == EOF);
+    bool isGzip = false;
+    if(!isEmpty){
+        int c1 = std::cin.get();
+        int c2 = std::cin.get();
+        if(c2 != EOF){
+            isGzip = (static_cast(c1) == 0x1f &&
+                      static_cast(c2) == 0x8b);
+            std::cin.putback(static_cast(c2));
+        }
+        std::cin.putback(static_cast(c1));
+    }
+
+    int total = -1;
+    if(isEmpty){
+        total = 0;
+    } else if(isGzip){
+        total = countVariantsGzip(std::cin);
+    } else {
+        total = countVariants(std::cin);
     }
-    std::istringstream inStream(plainInput);
-    int total= countVariants(inStream);
     if(total<0){
         // indicates an error if strict
         return 1;
@@ -71,36 +88,90 @@ int VCFXVariantCounter::run(int argc, char* argv[]){
     return 0;
 }
 
+bool VCFXVariantCounter::processLine(const std::string &line, int lineNumber, int &count){
+    if(line.empty()) return true;
+    if(line[0]=='#') return true;
+    std::stringstream ss(line);
+    std::vector fields;
+    {
+        std::string col;
+        while(std::getline(ss,col,'\t')){
+            fields.push_back(col);
+        }
+    }
+    if(fields.size()<8){
+        if(strictMode){
+            std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n";
+            return false;
+        } else {
+            std::cerr<<"Warning: skipping line "< fields;
-        {
-            std::string col;
-            while(std::getline(ss,col,'\t')){
-                fields.push_back(col);
+        if(!processLine(line, lineNumber, count)) return -1;
+    }
+    return count;
+}
+
+int VCFXVariantCounter::countVariantsGzip(std::istream &in){
+    constexpr int CHUNK = 16384;
+    char inBuf[CHUNK];
+    char outBuf[CHUNK];
+    z_stream strm; std::memset(&strm,0,sizeof(strm));
+    if(inflateInit2(&strm,15+32)!=Z_OK){
+        std::cerr<<"Error: inflateInit2 failed.\n";
+        return -1;
+    }
+    int count=0; int lineNumber=0; std::string buffer; int ret=Z_OK;
+    do {
+        in.read(inBuf, CHUNK);
+        strm.avail_in = static_cast(in.gcount());
+        if(strm.avail_in==0 && in.eof()) break;
+        strm.next_in = reinterpret_cast(inBuf);
+        do {
+            strm.avail_out = CHUNK;
+            strm.next_out = reinterpret_cast(outBuf);
+            ret = inflate(&strm, Z_NO_FLUSH);
+            if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR){
+                std::cerr<<"Error: decompression failed.\n";
+                inflateEnd(&strm);
+                return -1;
             }
-        }
-        if(fields.size()<8){
-            if(strictMode){
-                std::cerr<<"Error: line "<< lineNumber <<" has <8 columns.\n";
-                return -1; // indicates error
-            } else {
-                std::cerr<<"Warning: skipping line "<0){
+                buffer.append(outBuf, have);
+                size_t pos;
+                while((pos = buffer.find('\n')) != std::string::npos){
+                    std::string line = buffer.substr(0,pos);
+                    buffer.erase(0,pos+1);
+                    lineNumber++;
+                    if(!processLine(line,lineNumber,count)){
+                        inflateEnd(&strm);
+                        return -1;
+                    }
+                }
             }
+        } while(strm.avail_out==0);
+    } while(ret != Z_STREAM_END);
+
+    if(!buffer.empty()){
+        lineNumber++;
+        if(!processLine(buffer,lineNumber,count)){
+            inflateEnd(&strm);
+            return -1;
         }
-        // if we get here => count it
-        count++;
     }
+    inflateEnd(&strm);
     return count;
 }
 
diff --git a/src/VCFX_variant_counter/VCFX_variant_counter.h b/src/VCFX_variant_counter/VCFX_variant_counter.h
index c53b8358..6c5b05c0 100644
--- a/src/VCFX_variant_counter/VCFX_variant_counter.h
+++ b/src/VCFX_variant_counter/VCFX_variant_counter.h
@@ -17,6 +17,8 @@ class VCFXVariantCounter {
 
     // The actual counting function
     int countVariants(std::istream &in);
+    int countVariantsGzip(std::istream &in);
+    bool processLine(const std::string &line, int lineNumber, int &count);
 
 };
 

From 6c37b6fdff871ba96a1be575fbfbcaef655369a5 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:07:50 +0100
Subject: [PATCH 39/63] fix CSV quote escaping

---
 src/VCFX_format_converter/VCFX_format_converter.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/VCFX_format_converter/VCFX_format_converter.cpp b/src/VCFX_format_converter/VCFX_format_converter.cpp
index 668b4f67..e374d165 100644
--- a/src/VCFX_format_converter/VCFX_format_converter.cpp
+++ b/src/VCFX_format_converter/VCFX_format_converter.cpp
@@ -112,8 +112,9 @@ static std::string csvEscape(const std::string &field) {
     tmp.push_back('"');
     for (char c : field) {
         if (c == '"') {
-            // double it
-            tmp += "\"\"";
+            // double it by writing two quotes
+            tmp.push_back('"');
+            tmp.push_back('"');
         } else {
             tmp.push_back(c);
         }

From dbda79692655263dc2ed003ba0529d57c584a32a Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:15:47 +0100
Subject: [PATCH 40/63] docs: update phred_filter usage

---
 docs/VCFX_nonref_filter.md | 2 +-
 docs/index.md              | 2 +-
 docs/quickstart.md         | 4 ++--
 docs/tools_overview.md     | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/VCFX_nonref_filter.md b/docs/VCFX_nonref_filter.md
index aa7f15e9..eda94165 100644
--- a/docs/VCFX_nonref_filter.md
+++ b/docs/VCFX_nonref_filter.md
@@ -71,7 +71,7 @@ VCFX_nonref_filter > high_quality_nonref.vcf
 # Create a pipeline of filters
 cat input.vcf | \
 VCFX_nonref_filter | \
-VCFX_phred_filter --min-quality 30 > filtered.vcf
+VCFX_phred_filter --phred-filter 30 > filtered.vcf
 ```
 
 ## Homozygous Reference Detection
diff --git a/docs/index.md b/docs/index.md
index 60144f62..57ffc818 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -127,7 +127,7 @@ VCFX tools are designed to be used in pipelines. Here are some common usage patt
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 261901bd..d99d39a2 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -37,7 +37,7 @@ Filter for high-quality SNPs:
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Example 2: Population Analysis
@@ -120,7 +120,7 @@ cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
   VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ```
 
 ### Sample Comparison
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 4361453d..26b5ba69 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -112,7 +112,7 @@ VCFX tools are designed to be combined in pipelines. Here are some common usage
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
@@ -123,7 +123,7 @@ cat input.vcf | \
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Sample Extraction and Comparison
@@ -168,5 +168,5 @@ cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
   VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ``` 
\ No newline at end of file

From bb9ca6dae16b5fc81ec7776ecb0abb167a025cff Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:55:18 +0100
Subject: [PATCH 41/63] fix test_variant_counter script path

---
 tests/test_variant_counter.sh | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/test_variant_counter.sh b/tests/test_variant_counter.sh
index ae78b320..587f7656 100755
--- a/tests/test_variant_counter.sh
+++ b/tests/test_variant_counter.sh
@@ -3,8 +3,16 @@ set -e
 
 echo "=== Testing VCFX_variant_counter ==="
 
-# Executable paths
-VCFX_EXECUTABLE="../build/src/VCFX_variant_counter/VCFX_variant_counter"
+# Determine script and repository locations so the test can be run from
+# anywhere.  This mirrors the approach used by other test scripts.
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+
+# Ensure we run inside the script directory for predictable paths
+cd "$SCRIPT_DIR"
+
+# Path to the built executable
+VCFX_EXECUTABLE="$ROOT_DIR/build/src/VCFX_variant_counter/VCFX_variant_counter"
 
 # Check if executable exists
 if [ ! -f "$VCFX_EXECUTABLE" ]; then

From e592624fb7f11f539ed4c342838a92a7407bd05e Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 20:56:34 +0100
Subject: [PATCH 42/63] Merge main

---
 docs/VCFX_ld_calculator.md |  2 +-
 docs/VCFX_nonref_filter.md |  2 +-
 docs/index.md              |  2 +-
 docs/quickstart.md         |  7 ++++---
 docs/tools_overview.md     | 11 ++++++-----
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/VCFX_ld_calculator.md b/docs/VCFX_ld_calculator.md
index 34fa081e..1dc5440a 100644
--- a/docs/VCFX_ld_calculator.md
+++ b/docs/VCFX_ld_calculator.md
@@ -78,7 +78,7 @@ VCFX_ld_calculator --region chr1:10000-20000 < input.vcf > ld_matrix.txt
 Filter for common variants first, then calculate LD:
 
 ```bash
-cat input.vcf | VCFX_af_subsetter --min-af 0.05 | VCFX_ld_calculator > common_variants_ld.txt
+cat input.vcf | VCFX_af_subsetter --af-filter '0.05-1.0' | VCFX_ld_calculator > common_variants_ld.txt
 ```
 
 ## Handling Special Cases
diff --git a/docs/VCFX_nonref_filter.md b/docs/VCFX_nonref_filter.md
index aa7f15e9..eda94165 100644
--- a/docs/VCFX_nonref_filter.md
+++ b/docs/VCFX_nonref_filter.md
@@ -71,7 +71,7 @@ VCFX_nonref_filter > high_quality_nonref.vcf
 # Create a pipeline of filters
 cat input.vcf | \
 VCFX_nonref_filter | \
-VCFX_phred_filter --min-quality 30 > filtered.vcf
+VCFX_phred_filter --phred-filter 30 > filtered.vcf
 ```
 
 ## Homozygous Reference Detection
diff --git a/docs/index.md b/docs/index.md
index 60144f62..57ffc818 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -127,7 +127,7 @@ VCFX tools are designed to be used in pipelines. Here are some common usage patt
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 261901bd..57edc6d3 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -37,7 +37,7 @@ Filter for high-quality SNPs:
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Example 2: Population Analysis
@@ -119,8 +119,9 @@ Here are some common workflows that combine multiple VCFX tools:
 cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
-  VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_missing_detector | \
+  grep -v 'MISSING_GENOTYPES=1' | \
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ```
 
 ### Sample Comparison
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 4361453d..4b66b839 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -112,7 +112,7 @@ VCFX tools are designed to be combined in pipelines. Here are some common usage
 # Extract phased variants, filter by quality, and calculate allele frequencies
 cat input.vcf | \
   VCFX_phase_checker | \
-  VCFX_phred_filter --min-qual 30 | \
+  VCFX_phred_filter --phred-filter 30 | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
@@ -123,7 +123,7 @@ cat input.vcf | \
 cat input.vcf | \
   VCFX_variant_classifier --append-info | \
   grep 'VCF_CLASS=SNP' | \
-  VCFX_phred_filter --min-qual 30 > high_quality_snps.vcf
+  VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
 ### Sample Extraction and Comparison
@@ -139,7 +139,7 @@ cat samples.vcf reference.vcf | VCFX_concordance_checker > concordance_report.ts
 ```bash
 # Calculate LD in a specific region after filtering for common variants
 cat input.vcf | \
-  VCFX_af_subsetter --min-af 0.05 | \
+  VCFX_af_subsetter --af-filter '0.05-1.0' | \
   VCFX_ld_calculator --region chr1:10000-20000 > ld_matrix.txt
 ```
 
@@ -167,6 +167,7 @@ cat eur.vcf | VCFX_allele_freq_calc > eur_afs.tsv
 cat input.vcf | \
   VCFX_validator | \
   VCFX_variant_classifier --append-info | \
-  VCFX_missing_detector --max-missing 0.1 | \
-  VCFX_phred_filter --min-qual 20 > qc_passed.vcf
+  VCFX_missing_detector | \
+  grep -v 'MISSING_GENOTYPES=1' | \
+  VCFX_phred_filter --phred-filter 20 > qc_passed.vcf
 ``` 
\ No newline at end of file

From 5022ec35a84032bf630c7e47e573c8eede948d4f Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 21:06:52 +0100
Subject: [PATCH 43/63] docs: document --version flag

---
 docs/VCFX_allele_freq_calc.md      | 1 +
 docs/VCFX_format_converter.md      | 1 +
 docs/VCFX_genotype_query.md        | 1 +
 docs/VCFX_merger.md                | 1 +
 docs/VCFX_metadata_summarizer.md   | 1 +
 docs/VCFX_missing_data_handler.md  | 1 +
 docs/VCFX_multiallelic_splitter.md | 1 +
 docs/VCFX_outlier_detector.md      | 1 +
 docs/VCFX_phase_quality_filter.md  | 1 +
 docs/VCFX_probability_filter.md    | 1 +
 docs/VCFX_reformatter.md           | 1 +
 docs/VCFX_region_subsampler.md     | 1 +
 docs/VCFX_subsampler.md            | 1 +
 13 files changed, 13 insertions(+)

diff --git a/docs/VCFX_allele_freq_calc.md b/docs/VCFX_allele_freq_calc.md
index dbc04afa..fcfbd8a8 100644
--- a/docs/VCFX_allele_freq_calc.md
+++ b/docs/VCFX_allele_freq_calc.md
@@ -15,6 +15,7 @@ VCFX_allele_freq_calc [OPTIONS] < input.vcf > allele_frequencies.tsv
 | Option      | Description                                |
 |-------------|--------------------------------------------|
 | `--help`, `-h` | Display help message and exit              |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_format_converter.md b/docs/VCFX_format_converter.md
index 50c25a3d..e10f3891 100644
--- a/docs/VCFX_format_converter.md
+++ b/docs/VCFX_format_converter.md
@@ -14,6 +14,7 @@ VCFX_format_converter [OPTIONS] < input.vcf > output.file
 | `--to-bed` | Convert the input VCF file to BED format |
 | `--to-csv` | Convert the input VCF file to CSV format |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_format_converter` reads a VCF file from standard input and converts it to the specified output format. The tool:
diff --git a/docs/VCFX_genotype_query.md b/docs/VCFX_genotype_query.md
index 1e923076..aab43902 100644
--- a/docs/VCFX_genotype_query.md
+++ b/docs/VCFX_genotype_query.md
@@ -17,6 +17,7 @@ VCFX_genotype_query [OPTIONS] < input.vcf > filtered.vcf
 | `--genotype-query`, `-g` "GENOTYPE" | Specify the genotype to query (e.g., "0/1", "1/1") |
 | `--strict` | Use strict string comparison (no phasing unification or allele sorting) |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_merger.md b/docs/VCFX_merger.md
index 27124ab5..6fb790ff 100644
--- a/docs/VCFX_merger.md
+++ b/docs/VCFX_merger.md
@@ -16,6 +16,7 @@ VCFX_merger --merge file1.vcf,file2.vcf,... [options] > merged.vcf
 |--------|-------------|
 | `-m, --merge` | Comma-separated list of VCF files to merge |
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_metadata_summarizer.md b/docs/VCFX_metadata_summarizer.md
index 111bcf06..26eb18f2 100644
--- a/docs/VCFX_metadata_summarizer.md
+++ b/docs/VCFX_metadata_summarizer.md
@@ -15,6 +15,7 @@ VCFX_metadata_summarizer [options] < input.vcf
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_missing_data_handler.md b/docs/VCFX_missing_data_handler.md
index b12cf9b0..9df50524 100644
--- a/docs/VCFX_missing_data_handler.md
+++ b/docs/VCFX_missing_data_handler.md
@@ -17,6 +17,7 @@ VCFX_missing_data_handler [OPTIONS] [files...] > processed.vcf
 | `--fill-missing`, `-f` | Impute missing genotypes with a default value |
 | `--default-genotype`, `-d`  | Specify the default genotype for imputation (default: "./.")  |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_multiallelic_splitter.md b/docs/VCFX_multiallelic_splitter.md
index e7f4692b..870dca12 100644
--- a/docs/VCFX_multiallelic_splitter.md
+++ b/docs/VCFX_multiallelic_splitter.md
@@ -15,6 +15,7 @@ VCFX_multiallelic_splitter [OPTIONS] < input.vcf > biallelic_output.vcf
 | Option | Description |
 |--------|-------------|
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_outlier_detector.md b/docs/VCFX_outlier_detector.md
index 5cff70a6..1f007bb7 100644
--- a/docs/VCFX_outlier_detector.md
+++ b/docs/VCFX_outlier_detector.md
@@ -19,6 +19,7 @@ VCFX_outlier_detector --metric  --threshold  [--variant|--sample] < in
 | `--variant`, `-v` | Variant mode: identify variants with INFO field metrics above threshold |
 | `--sample`, `-s` | Sample mode: identify samples with average genotype metrics above threshold |
 | `--help`, `-h` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 
diff --git a/docs/VCFX_phase_quality_filter.md b/docs/VCFX_phase_quality_filter.md
index 80a9a427..c6e18231 100644
--- a/docs/VCFX_phase_quality_filter.md
+++ b/docs/VCFX_phase_quality_filter.md
@@ -15,6 +15,7 @@ VCFX_phase_quality_filter --filter-pq "PQ" < input.vcf > output.v
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-f, --filter-pq` | Condition like 'PQ>30', 'PQ>=20', 'PQ!=10', etc. |
 
 ## Description
diff --git a/docs/VCFX_probability_filter.md b/docs/VCFX_probability_filter.md
index 8978e5d9..10d0a434 100644
--- a/docs/VCFX_probability_filter.md
+++ b/docs/VCFX_probability_filter.md
@@ -13,6 +13,7 @@ VCFX_probability_filter --filter-probability "" < input.vcf > filtere
 |--------|-------------|
 | `-f, --filter-probability ` | Specify the probability filter condition (e.g., `GP>0.9`) |
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 `VCFX_probability_filter` analyzes the genotype probability fields in the FORMAT column of a VCF file and filters variants based on a user-defined condition. The tool:
diff --git a/docs/VCFX_reformatter.md b/docs/VCFX_reformatter.md
index 2934839f..05e8f96f 100644
--- a/docs/VCFX_reformatter.md
+++ b/docs/VCFX_reformatter.md
@@ -15,6 +15,7 @@ VCFX_reformatter [options] < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-c, --compress-info ` | Remove specified INFO keys (comma-separated) |
 | `-f, --compress-format ` | Remove specified FORMAT keys (comma-separated) |
 | `-i, --reorder-info ` | Reorder INFO keys (comma-separated) |
diff --git a/docs/VCFX_region_subsampler.md b/docs/VCFX_region_subsampler.md
index 0ff4eaaa..c8d4396c 100644
--- a/docs/VCFX_region_subsampler.md
+++ b/docs/VCFX_region_subsampler.md
@@ -15,6 +15,7 @@ VCFX_region_subsampler --region-bed FILE < input.vcf > output.vcf
 | Option | Description |
 |--------|-------------|
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 | `-b, --region-bed FILE` | BED file listing regions to keep |
 
 ## Description
diff --git a/docs/VCFX_subsampler.md b/docs/VCFX_subsampler.md
index 4094b9e9..0ba828ca 100644
--- a/docs/VCFX_subsampler.md
+++ b/docs/VCFX_subsampler.md
@@ -17,6 +17,7 @@ VCFX_subsampler [options] < input.vcf > output.vcf
 | `-s, --subsample ` | Required: Number of variants to keep in the output |
 | `--seed ` | Optional: Use a specific random seed for reproducible results |
 | `-h, --help` | Display help message and exit |
+| `-v`, `--version` | Show program version and exit |
 
 ## Description
 

From 6a0f0d05d6c593f43402a68f87cee6ce35ece4e1 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:23:43 +0100
Subject: [PATCH 44/63] Clarify version flag note

---
 docs/VCFX_outlier_detector.md                       | 4 +++-
 src/VCFX_outlier_detector/VCFX_outlier_detector.cpp | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/VCFX_outlier_detector.md b/docs/VCFX_outlier_detector.md
index 1f007bb7..32bc614c 100644
--- a/docs/VCFX_outlier_detector.md
+++ b/docs/VCFX_outlier_detector.md
@@ -16,11 +16,13 @@ VCFX_outlier_detector --metric  --threshold  [--variant|--sample] < in
 |--------|-------------|
 | `--metric`, `-m`  | Name of the metric to use (e.g., AF, DP, GQ) |
 | `--threshold`, `-t`  | Numeric threshold value for outlier detection |
-| `--variant`, `-v` | Variant mode: identify variants with INFO field metrics above threshold |
+| `--variant`, `-V` | Variant mode: identify variants with INFO field metrics above threshold |
 | `--sample`, `-s` | Sample mode: identify samples with average genotype metrics above threshold |
 | `--help`, `-h` | Display help message and exit |
 | `-v`, `--version` | Show program version and exit |
 
+**Note:** `-v` shows the version information. Use `--variant` or the short option `-V` to run in variant mode.
+
 ## Description
 
 VCFX_outlier_detector analyzes VCF files to identify outliers based on numeric metrics. The tool operates in two distinct modes:
diff --git a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
index d8f383c3..ae146354 100644
--- a/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
+++ b/src/VCFX_outlier_detector/VCFX_outlier_detector.cpp
@@ -28,12 +28,12 @@ int VCFXOutlierDetector::run(int argc, char* argv[]){
         {"help", no_argument, 0, 'h'},
         {"metric", required_argument, 0, 'm'},
         {"threshold", required_argument, 0, 't'},
-        {"variant", no_argument, 0, 'v'},
+        {"variant", no_argument, 0, 'V'},
         {"sample", no_argument, 0, 's'},
         {0,0,0,0}
     };
     while(true){
-        int c= getopt_long(argc, argv, "hm:t:vs", long_opts, nullptr);
+        int c= getopt_long(argc, argv, "hm:t:Vs", long_opts, nullptr);
         if(c==-1) break;
         switch(c){
             case 'h':
@@ -50,7 +50,7 @@ int VCFXOutlierDetector::run(int argc, char* argv[]){
                     return 1;
                 }
                 break;
-            case 'v':
+            case 'V':
                 isVariantMode= true;
                 break;
             case 's':
@@ -78,7 +78,7 @@ void VCFXOutlierDetector::displayHelp(){
 "  --help, -h           Print this help.\n"
 "  --metric, -m    Name of the metric to use (e.g. AF, DP, GQ...).\n"
 "  --threshold, -t  Numeric threshold.\n"
-"  --variant, -v        Evaluate each variant's  in INFO>threshold => print.\n"
+"  --variant, -V        Evaluate each variant's  in INFO>threshold => print.\n"
 "  --sample, -s         Evaluate sample averages of  in genotype subfield => print outliers.\n\n"
 "Examples:\n"
 "  1) Outlier variants with AF>0.05:\n"

From a43f028bec01b287182f68054e2d0e985600538d Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:25:18 +0100
Subject: [PATCH 45/63] docs: adjust toolkit utility count

---
 docs/citation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/citation.md b/docs/citation.md
index 035f7aaf..18c83beb 100644
--- a/docs/citation.md
+++ b/docs/citation.md
@@ -24,7 +24,7 @@ The VCFX toolkit was presented at the 12th International Work-Conference on Bioi
 
 ### Abstract
 
-VCFX is a collection of specialized C/C++ command-line tools designed for efficient manipulation, analysis, and transformation of VCF (Variant Call Format) files used in genomic research and bioinformatics. Each tool is an independent executable that follows the Unix philosophy: do one thing well and work seamlessly with other tools through standard input/output streams. The toolkit includes over 60 specialized utilities covering the entire lifecycle of variant data analysis from filtering and annotation to merging, phasing, and structural variant manipulation.
+VCFX is a collection of specialized C/C++ command-line tools designed for efficient manipulation, analysis, and transformation of VCF (Variant Call Format) files used in genomic research and bioinformatics. Each tool is an independent executable that follows the Unix philosophy: do one thing well and work seamlessly with other tools through standard input/output streams. The toolkit includes 60 specialized utilities covering the entire lifecycle of variant data analysis from filtering and annotation to merging, phasing, and structural variant manipulation.
 
 ## How to Cite in Different Formats
 

From de32788702f90fd63a6691e70c5675a17b0032ee Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:27:03 +0100
Subject: [PATCH 46/63] docs: add annotation and processing sections

---
 docs/index.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/docs/index.md b/docs/index.md
index 57ffc818..92ad5135 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -98,6 +98,25 @@ Tools for handling VCF files:
 - [VCFX_compressor](VCFX_compressor.md) - Compress VCF files efficiently
 - [View all file management tools...](tools_overview.md#file-management)
 
+### Annotation and Reporting
+
+Tools for annotating and extracting information from VCF files:
+
+- [VCFX_custom_annotator](VCFX_custom_annotator.md) - Add custom annotations to VCF files
+- [VCFX_info_summarizer](VCFX_info_summarizer.md) - Summarize INFO fields
+- ... (include a few more key tools)
+- [View all annotation tools...](tools_overview.md#annotation-and-reporting)
+
+### Data Processing
+
+Tools for processing variants and samples:
+
+- [VCFX_missing_data_handler](VCFX_missing_data_handler.md) - Handle missing data
+- [VCFX_quality_adjuster](VCFX_quality_adjuster.md) - Adjust quality scores
+- [VCFX_haplotype_phaser](VCFX_haplotype_phaser.md) - Phase haplotypes
+- [VCFX_haplotype_extractor](VCFX_haplotype_extractor.md) - Extract haplotype information
+- [View all processing tools...](tools_overview.md#data-processing)
+
 For a complete list of all tools and detailed usage examples, see the [tools overview](tools_overview.md).
 
 ## Who Should Use VCFX?

From 0a54a83902d634df80be86e8f9cd6d082b790201 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 22:30:43 +0100
Subject: [PATCH 47/63] docs: rename heading for sample comparison

---
 docs/index.md          | 12 ++++--------
 docs/quickstart.md     | 12 ++++--------
 docs/tools_overview.md |  9 ++++-----
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 57ffc818..27b76be0 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -84,7 +84,7 @@ Tools for converting or reformatting VCF data:
 
 Tools for validating and checking data quality:
 
-- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between VCF files
+- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between samples in a VCF file
 - [VCFX_missing_detector](VCFX_missing_detector.md) - Detect and report missing data
 - [VCFX_validator](VCFX_validator.md) - Validate VCF format compliance
 - [View all quality control tools...](tools_overview.md#quality-control)
@@ -131,15 +131,11 @@ cat input.vcf | \
   VCFX_allele_freq_calc > result.tsv
 ```
 
-### Sample Selection and Comparison
+### Sample Comparison
 
 ```bash
-# Extract samples and check concordance
-cat input.vcf | \
-  VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 > samples.vcf
-
-cat samples.vcf reference.vcf | \
-  VCFX_concordance_checker > concordance_report.tsv
+# Check concordance between two samples in a single VCF
+cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance_report.tsv
 ```
 
 See the [tools overview page](tools_overview.md#common-usage-patterns) for more usage examples.
diff --git a/docs/quickstart.md b/docs/quickstart.md
index d99d39a2..22504065 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -62,10 +62,10 @@ cat input.vcf | \
 
 ### Example 4: Quality Control
 
-Check concordance between two VCF files:
+Check concordance between two samples in a single VCF file:
 
 ```bash
-VCFX_concordance_checker --vcf1 sample1.vcf --vcf2 sample2.vcf > concordance_report.tsv
+cat sample.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance_report.tsv
 ```
 
 ## Working with Compressed Files
@@ -126,12 +126,8 @@ cat input.vcf | \
 ### Sample Comparison
 
 ```bash
-# Extract common samples
-VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 < input1.vcf > samples1.vcf
-VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 < input2.vcf > samples2.vcf
-
-# Check concordance
-VCFX_concordance_checker --vcf1 samples1.vcf --vcf2 samples2.vcf > concordance.tsv
+# Check concordance between two samples in a single VCF
+cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance.tsv
 ```
 
 ### Population Structure Analysis
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index de707ce7..2da498ad 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -60,7 +60,7 @@ Tools for converting or reformatting VCF data:
 
 Tools for validating and checking data quality:
 
-- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between VCF files
+- [VCFX_concordance_checker](VCFX_concordance_checker.md) - Check concordance between samples in a VCF file
 - [VCFX_missing_detector](VCFX_missing_detector.md) - Detect and report missing data
 - [VCFX_outlier_detector](VCFX_outlier_detector.md) - Detect outlier samples or variants
 - [VCFX_alignment_checker](VCFX_alignment_checker.md) - Check alignment of variants
@@ -126,12 +126,11 @@ cat input.vcf | \
   VCFX_phred_filter --phred-filter 30 > high_quality_snps.vcf
 ```
 
-### Sample Extraction and Comparison
+### Sample Comparison
 
 ```bash
-# Extract samples and check concordance
-cat input.vcf | VCFX_sample_extractor --samples SAMPLE1,SAMPLE2 > samples.vcf
-cat samples.vcf reference.vcf | VCFX_concordance_checker > concordance_report.tsv
+# Check concordance between two samples in a single VCF
+cat input.vcf | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > concordance.tsv
 ```
 
 ### Linkage Disequilibrium Analysis

From c2d1e5363ca59c6d3ccfabf38921d9b488799e7b Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Fri, 23 May 2025 23:13:56 +0100
Subject: [PATCH 48/63] Improve dark mode

---
 docs/styles/custom.css | 6 ++++++
 mkdocs.yml             | 6 ++++--
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 docs/styles/custom.css

diff --git a/docs/styles/custom.css b/docs/styles/custom.css
new file mode 100644
index 00000000..74717038
--- /dev/null
+++ b/docs/styles/custom.css
@@ -0,0 +1,6 @@
+[data-md-color-scheme="slate"] {
+  --md-primary-fg-color: #90caf9;
+  --md-accent-fg-color: #90caf9;
+  --md-default-fg-color: #e0e0e0;
+  --md-default-bg-color: #121212;
+}
diff --git a/mkdocs.yml b/mkdocs.yml
index 3a683ca9..705e6b99 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -29,8 +29,8 @@ theme:
         icon: material/brightness-7
         name: Switch to dark mode
     - scheme: slate
-      primary: indigo
-      accent: indigo
+      primary: blue grey
+      accent: light blue
       toggle:
         icon: material/brightness-4
         name: Switch to light mode
@@ -44,6 +44,8 @@ extra:
       link: https://github.com/ieeta-pt/VCFX
     - icon: fontawesome/brands/twitter
       link: https://twitter.com/MiguelFSilva1
+extra_css:
+  - styles/custom.css
 
 # Extensions
 markdown_extensions:

From d6aa26174757d6f7e77196c5e3a255a251aaed42 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 00:28:28 +0100
Subject: [PATCH 49/63] Fix Python bindings

---
 CMakeLists.txt                |  5 +++
 docs/python_api.md            | 49 +++++++++++++++++++++++
 mkdocs.yml                    |  5 ++-
 python/CMakeLists.txt         | 24 ++++++++++++
 python/__init__.py            |  3 ++
 python/bindings.cpp           | 74 +++++++++++++++++++++++++++++++++++
 python/setup.py               | 32 +++++++++++++++
 src/CMakeLists.txt            |  1 +
 tests/CMakeLists.txt          |  1 +
 tests/test_all.sh             |  1 +
 tests/test_python_bindings.sh | 24 ++++++++++++
 11 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 docs/python_api.md
 create mode 100644 python/CMakeLists.txt
 create mode 100644 python/__init__.py
 create mode 100644 python/bindings.cpp
 create mode 100644 python/setup.py
 create mode 100755 tests/test_python_bindings.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 043cf974..cf4d4459 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,7 @@ project(VCFX
 
 # Optionally allow building for WebAssembly via Emscripten
 option(BUILD_WASM "Build with emscripten toolchain" OFF)
+option(PYTHON_BINDINGS "Build Python bindings" ON)
 
 if(BUILD_WASM)
     if(NOT CMAKE_TOOLCHAIN_FILE)
@@ -55,6 +56,10 @@ enable_testing()
 # Add top-level 'src' subdirectory, which in turn references each tool subdirectory
 add_subdirectory(src)
 
+if(PYTHON_BINDINGS)
+  add_subdirectory(python)
+endif()
+
 # Add the test suite
 add_subdirectory(tests)
 
diff --git a/docs/python_api.md b/docs/python_api.md
new file mode 100644
index 00000000..713e9191
--- /dev/null
+++ b/docs/python_api.md
@@ -0,0 +1,49 @@
+# Python API
+
+VCFX provides optional Python bindings exposing a subset of helper
+functions from the C++ `vcfx_core` library. The bindings are built as a
+native Python extension and can be enabled through CMake.
+
+## Installation
+
+Build the project with the `PYTHON_BINDINGS` option enabled:
+
+```bash
+mkdir build && cd build
+cmake -DPYTHON_BINDINGS=ON ..
+make -j
+```
+
+The compiled module will be placed in the `build/python` directory.
+You can also install the package via `pip` which will invoke CMake
+automatically:
+
+```bash
+pip install ./python
+```
+
+## Available Functions
+
+The module exposes the following helpers:
+
+- `trim(text)` โ€“ remove leading and trailing whitespace.
+- `split(text, delimiter)` โ€“ split `text` by the given delimiter and
+  return a list of strings.
+- `read_file_maybe_compressed(path)` โ€“ read a plain or gzip/BGZF
+  compressed file and return its contents as a string.
+- `get_version()` โ€“ return the VCFX version string.
+
+## Example Usage
+
+```python
+import vcfx
+
+print(vcfx.trim("  abc  "))
+# 'abc'
+
+print(vcfx.split("A,B,C", ","))
+# ['A', 'B', 'C']
+
+version = vcfx.get_version()
+print("VCFX version:", version)
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index 705e6b99..41f2d1b0 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -137,7 +137,8 @@ nav:
       - VCFX_missing_data_handler: VCFX_missing_data_handler.md
       - VCFX_quality_adjuster: VCFX_quality_adjuster.md
       - VCFX_haplotype_phaser: VCFX_haplotype_phaser.md
-      - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md
+  - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md
+  - Python API: python_api.md
   - Contributing: CONTRIBUTING.md
   - Citation: citation.md
-  - License: LICENSE.md 
\ No newline at end of file
+  - License: LICENSE.md
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
new file mode 100644
index 00000000..ff86f970
--- /dev/null
+++ b/python/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.14)
+
+if(NOT PYTHON_BINDINGS)
+    return()
+endif()
+
+find_package(Python3 COMPONENTS Development REQUIRED)
+
+add_library(_vcfx MODULE bindings.cpp)
+target_link_libraries(_vcfx PRIVATE vcfx_core Python3::Python)
+
+# Place the compiled module into the build/python directory
+set_target_properties(_vcfx PROPERTIES
+    PREFIX ""
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/python/vcfx"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/python/vcfx"
+)
+
+configure_file(__init__.py "${CMAKE_BINARY_DIR}/python/vcfx/__init__.py" COPYONLY)
+
+install(TARGETS _vcfx
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
+install(FILES __init__.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
diff --git a/python/__init__.py b/python/__init__.py
new file mode 100644
index 00000000..8c70352a
--- /dev/null
+++ b/python/__init__.py
@@ -0,0 +1,3 @@
+"""Python bindings for the VCFX toolkit."""
+
+from ._vcfx import *  # noqa: F401,F403
diff --git a/python/bindings.cpp b/python/bindings.cpp
new file mode 100644
index 00000000..a8b18785
--- /dev/null
+++ b/python/bindings.cpp
@@ -0,0 +1,74 @@
+#include 
+#include "vcfx_core.h"
+#include 
+#include 
+
+// Helper to convert std::vector to Python list
+static PyObject* to_py_list(const std::vector& vec) {
+    PyObject* list = PyList_New(vec.size());
+    if (!list) return nullptr;
+    for (size_t i = 0; i < vec.size(); ++i) {
+        PyObject* item = PyUnicode_FromString(vec[i].c_str());
+        if (!item) {
+            Py_DECREF(list);
+            return nullptr;
+        }
+        PyList_SET_ITEM(list, i, item); // steals reference
+    }
+    return list;
+}
+
+static PyObject* py_trim(PyObject*, PyObject* args) {
+    const char* text;
+    if (!PyArg_ParseTuple(args, "s", &text))
+        return nullptr;
+    std::string result = vcfx::trim(text);
+    return PyUnicode_FromString(result.c_str());
+}
+
+static PyObject* py_split(PyObject*, PyObject* args) {
+    const char* text;
+    const char* delim;
+    if (!PyArg_ParseTuple(args, "ss", &text, &delim))
+        return nullptr;
+    std::vector parts = vcfx::split(text, delim[0]);
+    return to_py_list(parts);
+}
+
+static PyObject* py_read_file(PyObject*, PyObject* args) {
+    const char* path;
+    if (!PyArg_ParseTuple(args, "s", &path))
+        return nullptr;
+    std::string out;
+    if (!vcfx::read_file_maybe_compressed(path, out)) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to read file");
+        return nullptr;
+    }
+    return PyBytes_FromStringAndSize(out.data(), out.size());
+}
+
+static PyObject* py_get_version(PyObject*, PyObject*) {
+    std::string ver = vcfx::get_version();
+    return PyUnicode_FromString(ver.c_str());
+}
+
+static PyMethodDef VcfxMethods[] = {
+    {"trim", py_trim, METH_VARARGS, "Trim leading and trailing whitespace"},
+    {"split", py_split, METH_VARARGS, "Split a string on the given delimiter"},
+    {"read_file_maybe_compressed", py_read_file, METH_VARARGS,
+     "Read a (possibly compressed) file and return its contents"},
+    {"get_version", py_get_version, METH_NOARGS, "Return VCFX version string"},
+    {nullptr, nullptr, 0, nullptr}
+};
+
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_vcfx",
+    "Python bindings for VCFX helper functions",
+    -1,
+    VcfxMethods
+};
+
+PyMODINIT_FUNC PyInit__vcfx(void) {
+    return PyModule_Create(&moduledef);
+}
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 00000000..7ad5494e
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,32 @@
+import pathlib
+import subprocess
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext
+
+class CMakeExtension(Extension):
+    def __init__(self, name):
+        super().__init__(name, sources=[])
+
+class CMakeBuild(build_ext):
+    def build_extension(self, ext):
+        extdir = pathlib.Path(self.get_ext_fullpath(ext.name)).parent.resolve()
+        cmake_args = [
+            f'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}',
+            f'-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={extdir}',
+            '-DPYTHON_BINDINGS=ON'
+        ]
+        build_temp = pathlib.Path(self.build_temp)
+        build_temp.mkdir(parents=True, exist_ok=True)
+        source_dir = pathlib.Path(__file__).resolve().parent.parent
+        subprocess.check_call(['cmake', str(source_dir)] + cmake_args, cwd=build_temp)
+        subprocess.check_call(['cmake', '--build', '.', '--target', '_vcfx'], cwd=build_temp)
+
+setup(
+    name='vcfx',
+    version='0.0.0',
+    packages=['vcfx'],
+    package_dir={'vcfx': '.'},
+    ext_modules=[CMakeExtension('_vcfx')],
+    cmdclass={'build_ext': CMakeBuild},
+    zip_safe=False,
+)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 93d6adfc..9408aaa3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.14)
 
 # Build the core library from your shared code
 add_library(vcfx_core STATIC vcfx_core.cpp)
+set_property(TARGET vcfx_core PROPERTY POSITION_INDEPENDENT_CODE ON)
 target_include_directories(vcfx_core PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../include)
 target_link_libraries(vcfx_core PUBLIC ZLIB::ZLIB)
 if(WIN32)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 558897dc..78fc4a59 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -59,6 +59,7 @@ set(TEST_SCRIPTS
     test_validator.sh
     test_variant_classifier.sh
     test_variant_counter.sh
+    test_python_bindings.sh
 )
 
 foreach(script ${TEST_SCRIPTS})
diff --git a/tests/test_all.sh b/tests/test_all.sh
index 75cbb912..f226aff1 100755
--- a/tests/test_all.sh
+++ b/tests/test_all.sh
@@ -81,6 +81,7 @@ TEST_SCRIPTS=(
     "test_validator.sh"
     "test_variant_classifier.sh"
     "test_variant_counter.sh"
+    "test_python_bindings.sh"
 )
 
 # Run all tests
diff --git a/tests/test_python_bindings.sh b/tests/test_python_bindings.sh
new file mode 100755
index 00000000..fc6f6350
--- /dev/null
+++ b/tests/test_python_bindings.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -e
+set -o pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+BUILD_DIR="${ROOT_DIR}/build/python_bindings"
+
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+cmake -DPYTHON_BINDINGS=ON ../..
+make -j
+
+cd "$SCRIPT_DIR"
+
+PYTHONPATH="${BUILD_DIR}/python" python3 - <<'PY'
+import vcfx
+out = vcfx.trim("  hello  ")
+if out != "hello":
+    raise SystemExit('trim failed')
+print('Python bindings OK:', out)
+PY

From 2d103671518aa8174c8102b801b22f2beb86b478 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 01:50:33 +0100
Subject: [PATCH 50/63] Fix Docker build by installing Python dev deps

---
 Dockerfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 857fa1fe..9e242e1a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,6 +9,8 @@ RUN apt-get update && apt-get install -y \
     cmake \
     git \
     libz-dev \
+    python3 \
+    python3-dev \
     && rm -rf /var/lib/apt/lists/*
 
 # Create a working directory

From a8e1ca50683600f48f635af35de1efcf766fd1ad Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 02:17:12 +0100
Subject: [PATCH 51/63] Add Python wrappers and wheel workflow

---
 .github/workflows/build-test.yml | 50 ++++++++++++++++++++++++++++++
 docs/python_api.md               | 22 ++++++++++++++
 python/CMakeLists.txt            |  3 +-
 python/__init__.py               | 11 +++++++
 python/setup.py                  | 14 ++++++++-
 python/tools.py                  | 52 ++++++++++++++++++++++++++++++++
 6 files changed, 150 insertions(+), 2 deletions(-)
 create mode 100644 python/tools.py

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index a95241db..3b87ad6c 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -41,3 +41,53 @@ jobs:
           cd build
           ctest --output-on-failure
         shell: bash
+
+  python-wheels:
+    needs: build-and-test
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Install dependencies (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential cmake libz-dev python3-pip
+
+      - name: Install dependencies (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew update
+          brew install cmake zlib python@3 bash
+          echo "$(brew --prefix)/bin" >> $GITHUB_PATH
+
+      - name: Build project
+        run: |
+          cmake -S . -B build -DPYTHON_BINDINGS=ON
+          cmake --build build --parallel
+          cmake --install build --prefix $PWD/install
+        shell: bash
+
+      - name: Build wheel
+        run: |
+          python3 -m pip install --upgrade pip wheel
+          pip wheel ./python -w dist
+        shell: bash
+
+      - name: Test Python wheel
+        run: |
+          pip install dist/*.whl
+          echo "$PWD/install/bin" >> $GITHUB_PATH
+          python3 - <<'EOF'
+import vcfx
+print('version:', vcfx.get_version())
+tools = vcfx.available_tools()
+print('tools:', len(tools))
+if tools:
+    vcfx.run_tool(tools[0], '--help', check=False)
+EOF
+        shell: bash
diff --git a/docs/python_api.md b/docs/python_api.md
index 713e9191..0db775ce 100644
--- a/docs/python_api.md
+++ b/docs/python_api.md
@@ -47,3 +47,25 @@ print(vcfx.split("A,B,C", ","))
 version = vcfx.get_version()
 print("VCFX version:", version)
 ```
+
+## Tool Wrappers
+
+Besides the helper functions, the package provides lightweight wrappers for
+all command line tools shipped with VCFX.  The wrappers simply invoke the
+corresponding ``VCFX_*`` executable via ``subprocess``.
+
+Use ``vcfx.available_tools()`` to see which tools are accessible on your
+``PATH`` and call them either via ``vcfx.run_tool(name, *args)`` or by using
+the tool name as a function:
+
+```python
+import vcfx
+
+print(vcfx.available_tools())
+
+# run through the generic helper
+vcfx.run_tool("alignment_checker", "--help")
+
+# or directly by name (if available)
+vcfx.alignment_checker("--help")
+```
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index ff86f970..40a4efcc 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -17,8 +17,9 @@ set_target_properties(_vcfx PROPERTIES
 )
 
 configure_file(__init__.py "${CMAKE_BINARY_DIR}/python/vcfx/__init__.py" COPYONLY)
+configure_file(tools.py "${CMAKE_BINARY_DIR}/python/vcfx/tools.py" COPYONLY)
 
 install(TARGETS _vcfx
         LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx
         ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
-install(FILES __init__.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
+install(FILES __init__.py tools.py DESTINATION ${CMAKE_INSTALL_LIBDIR}/vcfx)
diff --git a/python/__init__.py b/python/__init__.py
index 8c70352a..2f2ae974 100644
--- a/python/__init__.py
+++ b/python/__init__.py
@@ -1,3 +1,14 @@
 """Python bindings for the VCFX toolkit."""
 
 from ._vcfx import *  # noqa: F401,F403
+from . import tools as _tools
+
+# Re-export helper functions for convenience
+available_tools = _tools.available_tools
+run_tool = _tools.run_tool
+
+
+def __getattr__(name):
+    """Provide access to tool wrappers as attributes."""
+    return getattr(_tools, name)
+
diff --git a/python/setup.py b/python/setup.py
index 7ad5494e..c247405f 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -1,8 +1,15 @@
 import pathlib
+import re
 import subprocess
 from setuptools import setup, Extension
 from setuptools.command.build_ext import build_ext
 
+def read_version():
+    root = pathlib.Path(__file__).resolve().parent.parent / "CMakeLists.txt"
+    text = root.read_text()
+    m = re.search(r"set\(VCFX_VERSION\s+\"([0-9.]+)\"\)", text)
+    return m.group(1) if m else "0.0.0"
+
 class CMakeExtension(Extension):
     def __init__(self, name):
         super().__init__(name, sources=[])
@@ -23,10 +30,15 @@ def build_extension(self, ext):
 
 setup(
     name='vcfx',
-    version='0.0.0',
+    version=read_version(),
     packages=['vcfx'],
     package_dir={'vcfx': '.'},
     ext_modules=[CMakeExtension('_vcfx')],
     cmdclass={'build_ext': CMakeBuild},
     zip_safe=False,
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'Operating System :: MacOS :: MacOS X',
+        'Operating System :: POSIX :: Linux',
+    ],
 )
diff --git a/python/tools.py b/python/tools.py
new file mode 100644
index 00000000..13e61626
--- /dev/null
+++ b/python/tools.py
@@ -0,0 +1,52 @@
+import subprocess
+import shutil
+import functools
+
+__all__ = ["available_tools", "run_tool"]
+
+
+def available_tools():
+    """Return a list of VCFX tools available on the PATH."""
+    result = subprocess.run(["vcfx", "--list"], capture_output=True, text=True)
+    if result.returncode != 0:
+        return []
+    return [line.strip() for line in result.stdout.splitlines() if line.strip()]
+
+
+def run_tool(tool, *args, check=True, capture_output=False, text=True, **kwargs):
+    """Run a VCFX tool using subprocess.
+
+    Parameters
+    ----------
+    tool : str
+        Name of the tool without the ``VCFX_`` prefix.
+    *args : list
+        Arguments passed to the tool.
+    check : bool, optional
+        If ``True`` (default) raise ``CalledProcessError`` on non-zero
+        return code.
+    capture_output : bool, optional
+        If ``True`` capture stdout/stderr and return them on the returned
+        ``CompletedProcess`` object.
+    text : bool, optional
+        If ``True`` decode output as text. Defaults to ``True``.
+    **kwargs : dict
+        Additional keyword arguments forwarded to ``subprocess.run``.
+
+    Returns
+    -------
+    subprocess.CompletedProcess
+    """
+    exe = shutil.which(f"VCFX_{tool}")
+    if exe is None:
+        raise FileNotFoundError(f"VCFX tool '{tool}' not found in PATH")
+    cmd = [exe, *map(str, args)]
+    return subprocess.run(cmd, check=check, capture_output=capture_output, text=text, **kwargs)
+
+
+# Lazy attribute access for tool wrappers
+
+def __getattr__(name):
+    if name in available_tools():
+        return functools.partial(run_tool, name)
+    raise AttributeError(f"module 'vcfx' has no attribute '{name}'")

From 1e8653f5b5b8dfa834f132e95c70b131a42cdf9e Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 09:58:00 +0100
Subject: [PATCH 52/63] Extend Python bindings

---
 docs/python_api.md            |  5 +++++
 python/bindings.cpp           | 18 ++++++++++++++++++
 tests/test_python_bindings.sh |  3 +++
 3 files changed, 26 insertions(+)

diff --git a/docs/python_api.md b/docs/python_api.md
index 0db775ce..d1672aab 100644
--- a/docs/python_api.md
+++ b/docs/python_api.md
@@ -31,6 +31,8 @@ The module exposes the following helpers:
   return a list of strings.
 - `read_file_maybe_compressed(path)` โ€“ read a plain or gzip/BGZF
   compressed file and return its contents as a string.
+- `read_maybe_compressed(data)` โ€“ decompress a bytes object if it is
+  gzip/BGZF compressed and return the resulting bytes.
 - `get_version()` โ€“ return the VCFX version string.
 
 ## Example Usage
@@ -44,6 +46,9 @@ print(vcfx.trim("  abc  "))
 print(vcfx.split("A,B,C", ","))
 # ['A', 'B', 'C']
 
+data = vcfx.read_maybe_compressed(b"hello")
+print(data)
+
 version = vcfx.get_version()
 print("VCFX version:", version)
 ```
diff --git a/python/bindings.cpp b/python/bindings.cpp
index a8b18785..1e2d0577 100644
--- a/python/bindings.cpp
+++ b/python/bindings.cpp
@@ -2,6 +2,7 @@
 #include "vcfx_core.h"
 #include 
 #include 
+#include 
 
 // Helper to convert std::vector to Python list
 static PyObject* to_py_list(const std::vector& vec) {
@@ -52,11 +53,28 @@ static PyObject* py_get_version(PyObject*, PyObject*) {
     return PyUnicode_FromString(ver.c_str());
 }
 
+static PyObject* py_read_stream(PyObject*, PyObject* args) {
+    Py_buffer buf;
+    if (!PyArg_ParseTuple(args, "y*", &buf))
+        return nullptr;
+    std::string data(static_cast(buf.buf), buf.len);
+    PyBuffer_Release(&buf);
+    std::istringstream ss(data);
+    std::string out;
+    if (!vcfx::read_maybe_compressed(ss, out)) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to read data");
+        return nullptr;
+    }
+    return PyBytes_FromStringAndSize(out.data(), out.size());
+}
+
 static PyMethodDef VcfxMethods[] = {
     {"trim", py_trim, METH_VARARGS, "Trim leading and trailing whitespace"},
     {"split", py_split, METH_VARARGS, "Split a string on the given delimiter"},
     {"read_file_maybe_compressed", py_read_file, METH_VARARGS,
      "Read a (possibly compressed) file and return its contents"},
+    {"read_maybe_compressed", py_read_stream, METH_VARARGS,
+     "Decompress bytes if needed and return the contents"},
     {"get_version", py_get_version, METH_NOARGS, "Return VCFX version string"},
     {nullptr, nullptr, 0, nullptr}
 };
diff --git a/tests/test_python_bindings.sh b/tests/test_python_bindings.sh
index fc6f6350..24947b74 100755
--- a/tests/test_python_bindings.sh
+++ b/tests/test_python_bindings.sh
@@ -20,5 +20,8 @@ import vcfx
 out = vcfx.trim("  hello  ")
 if out != "hello":
     raise SystemExit('trim failed')
+compressed = vcfx.read_maybe_compressed(b"hello")
+if compressed != b"hello":
+    raise SystemExit('read_maybe_compressed failed')
 print('Python bindings OK:', out)
 PY

From e47145dca4c61037070681bde07817be804cbcd2 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 10:26:31 +0100
Subject: [PATCH 53/63] Add vcfx wrapper test

---
 README.md                  | 12 ++++++++
 docs/tools_overview.md     |  2 +-
 src/vcfx_wrapper/vcfx.cpp  | 56 ++++++++++++++++++++++++++++++++++++++
 tests/test_all.sh          |  1 +
 tests/test_vcfx_wrapper.sh | 34 +++++++++++++++++++++++
 5 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100755 tests/test_vcfx_wrapper.sh

diff --git a/README.md b/README.md
index 906803db..891d0695 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,18 @@ cat input.vcf | \
   VCFX_allele_freq_calc > snp_frequencies.tsv
 ```
 
+### Listing Available Tools
+
+```bash
+vcfx list
+```
+
+### Show Tool Documentation
+
+```bash
+vcfx help allele_counter
+```
+
 ## Building for WebAssembly
 
 If you have [Emscripten](https://emscripten.org/) installed:
diff --git a/docs/tools_overview.md b/docs/tools_overview.md
index 2da498ad..abeb3af1 100644
--- a/docs/tools_overview.md
+++ b/docs/tools_overview.md
@@ -2,7 +2,7 @@
 
 VCFX is a collection of C/C++ tools for processing and analyzing VCF (Variant Call Format) files, with optional WebAssembly compatibility. Each tool is an independent command-line executable that can parse input from `stdin` and write to `stdout`, enabling flexible piping and integration into bioinformatics pipelines.
 
-The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` to see available subcommands. All individual `VCFX_*` binaries remain available if you prefer calling them directly.
+The suite also includes a convenience wrapper `vcfx` so you can run commands as `vcfx `. For example, `vcfx variant_counter` is equivalent to running `VCFX_variant_counter`. Use `vcfx --list` or the alias `vcfx list` to see available subcommands. To view Markdown documentation for a tool, run `vcfx help `. All individual `VCFX_*` binaries remain available if you prefer calling them directly.
 Every tool also accepts `--version` to display the build version.
 
 ## Tool Categories
diff --git a/src/vcfx_wrapper/vcfx.cpp b/src/vcfx_wrapper/vcfx.cpp
index 70093a13..2625136f 100644
--- a/src/vcfx_wrapper/vcfx.cpp
+++ b/src/vcfx_wrapper/vcfx.cpp
@@ -6,12 +6,17 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
 
 static void print_usage(){
     std::cout << "vcfx - unified interface for VCFX tools\n"
               << "Usage: vcfx [--help] [--list]  [args]\n\n"
               << "    Name of a VCFX tool without the 'VCFX_' prefix\n"
+              << "  list          Alias for --list\n"
+              << "  help    Show Markdown documentation for a tool if available\n"
               << "  --list        List available subcommands found in PATH\n"
               << "  --help        Show this help message\n";
 }
@@ -47,6 +52,43 @@ static void list_commands(){
     }
 }
 
+static std::vector get_doc_dirs(){
+    std::vector dirs;
+    const char* env = std::getenv("VCFX_DOCS_DIR");
+    if(env) dirs.emplace_back(env);
+
+    char buf[PATH_MAX];
+    ssize_t len = readlink("/proc/self/exe", buf, sizeof(buf)-1);
+    if(len > 0){
+        buf[len] = '\0';
+        std::string exe(buf);
+        auto pos = exe.find_last_of('/');
+        if(pos != std::string::npos){
+            std::string base = exe.substr(0,pos);
+            dirs.push_back(base + "/../share/doc/VCFX");
+            dirs.push_back(base + "/../share/vcfx/docs");
+            dirs.push_back(base + "/../docs");
+            dirs.push_back(base + "/../../docs");
+        }
+    }
+    dirs.push_back("docs");
+    return dirs;
+}
+
+static int print_tool_doc(const std::string& tool){
+    std::string fname = "VCFX_" + tool + ".md";
+    for(const auto& dir : get_doc_dirs()){
+        std::string path = dir + "/" + fname;
+        std::ifstream in(path);
+        if(in){
+            std::cout << in.rdbuf();
+            return 0;
+        }
+    }
+    std::cerr << "Documentation for '" << tool << "' not found." << std::endl;
+    return 1;
+}
+
 int main(int argc, char* argv[]){
     bool show_help = false;
     bool show_list = false;
@@ -81,6 +123,20 @@ int main(int argc, char* argv[]){
     }
 
     std::string sub = argv[optind];
+
+    if(sub == "list"){
+        list_commands();
+        return 0;
+    }
+
+    if(sub == "help"){
+        if(optind + 1 >= argc){
+            print_usage();
+            return 0;
+        }
+        return print_tool_doc(argv[optind + 1]);
+    }
+
     std::string exec_name = "VCFX_" + sub;
 
     std::vector exec_args;
diff --git a/tests/test_all.sh b/tests/test_all.sh
index f226aff1..86921dea 100755
--- a/tests/test_all.sh
+++ b/tests/test_all.sh
@@ -81,6 +81,7 @@ TEST_SCRIPTS=(
     "test_validator.sh"
     "test_variant_classifier.sh"
     "test_variant_counter.sh"
+    "test_vcfx_wrapper.sh"
     "test_python_bindings.sh"
 )
 
diff --git a/tests/test_vcfx_wrapper.sh b/tests/test_vcfx_wrapper.sh
new file mode 100755
index 00000000..e2c766e5
--- /dev/null
+++ b/tests/test_vcfx_wrapper.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -e
+set -o pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+ROOT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
+VCFX_BIN="$ROOT_DIR/build/src/vcfx_wrapper/vcfx"
+
+# Ensure built tools are in PATH so the wrapper can locate them
+source "$ROOT_DIR/add_vcfx_tools_to_path.sh"
+
+if [ ! -x "$VCFX_BIN" ]; then
+  echo "vcfx executable not found: $VCFX_BIN"
+  exit 1
+fi
+
+LIST_LONG="$($VCFX_BIN --list)"
+LIST_ALIAS="$($VCFX_BIN list)"
+if [ "$LIST_LONG" != "$LIST_ALIAS" ]; then
+  echo "Output of 'vcfx list' does not match '--list'"
+  diff <(echo "$LIST_LONG") <(echo "$LIST_ALIAS") || true
+  exit 1
+fi
+
+echo "$LIST_LONG" > /dev/null # quiet shellcheck complaining about unused var
+
+DOC_FIRST_LINE="$($VCFX_BIN help allele_counter | head -n 1)"
+if ! echo "$DOC_FIRST_LINE" | grep -q "VCFX_allele_counter"; then
+  echo "Help output for allele_counter does not show documentation"
+  echo "First line was: $DOC_FIRST_LINE"
+  exit 1
+fi
+
+echo "โœ“ vcfx wrapper tests passed"

From 86a75113cd860a84297979dd7c5ff116d630095f Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 11:07:34 +0100
Subject: [PATCH 54/63] Fix YAML syntax in workflow

---
 .github/workflows/build-test.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 3b87ad6c..bd9675c9 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -82,12 +82,12 @@ jobs:
         run: |
           pip install dist/*.whl
           echo "$PWD/install/bin" >> $GITHUB_PATH
-          python3 - <<'EOF'
-import vcfx
-print('version:', vcfx.get_version())
-tools = vcfx.available_tools()
-print('tools:', len(tools))
-if tools:
-    vcfx.run_tool(tools[0], '--help', check=False)
-EOF
+          cat <<'          EOF' | sed 's/^          //' | python3 -
+          import vcfx
+          print('version:', vcfx.get_version())
+          tools = vcfx.available_tools()
+          print('tools:', len(tools))
+          if tools:
+              vcfx.run_tool(tools[0], '--help', check=False)
+          EOF
         shell: bash

From fa3bbf53493b4352f708406951443790a0786140 Mon Sep 17 00:00:00 2001
From: Jorge Miguel Silva 
Date: Sat, 24 May 2025 11:27:52 +0100
Subject: [PATCH 55/63] docs: Remove site/ directory from version control and
 fix documentation - Removed generated site/ directory (build artifact) -
 Fixed broken link in docs/docker.md - Fixed navigation structure in
 mkdocs.yml (moved VCFX_haplotype_extractor to proper Data Processing section)
 - Documentation builds successfully with mkdocs

---
 docs/docker.md                                |    2 +-
 mkdocs.yml                                    |    2 +-
 site/404.html                                 | 2352 ------
 site/CONTRIBUTING/index.html                  | 2773 -------
 site/LICENSE/index.html                       | 2410 ------
 site/VCFX_af_subsetter/index.html             | 2986 --------
 site/VCFX_alignment_checker/index.html        | 2984 --------
 site/VCFX_allele_balance_calc/index.html      | 3005 --------
 site/VCFX_allele_balance_filter/index.html    | 2835 -------
 site/VCFX_allele_counter/index.html           | 3007 --------
 site/VCFX_allele_freq_calc/index.html         | 2724 -------
 site/VCFX_ancestry_assigner/index.html        | 2818 -------
 site/VCFX_ancestry_inferrer/index.html        | 2809 -------
 site/VCFX_annotation_extractor/index.html     | 2858 -------
 site/VCFX_compressor/index.html               | 2807 -------
 site/VCFX_concordance_checker/index.html      | 3044 --------
 site/VCFX_cross_sample_concordance/index.html | 3018 --------
 site/VCFX_custom_annotator/index.html         | 2837 -------
 site/VCFX_diff_tool/index.html                | 2776 -------
 site/VCFX_distance_calculator/index.html      | 2821 -------
 site/VCFX_dosage_calculator/index.html        | 2765 -------
 site/VCFX_duplicate_remover/index.html        | 2805 -------
 site/VCFX_fasta_converter/index.html          | 2845 -------
 site/VCFX_field_extractor/index.html          | 2993 --------
 site/VCFX_file_splitter/index.html            | 2783 -------
 site/VCFX_format_converter/index.html         | 2929 -------
 site/VCFX_genotype_query/index.html           | 2778 -------
 site/VCFX_gl_filter/index.html                | 3021 --------
 site/VCFX_haplotype_extractor/index.html      | 2832 -------
 site/VCFX_haplotype_phaser/index.html         | 2829 -------
 site/VCFX_header_parser/index.html            | 2795 -------
 site/VCFX_hwe_tester/index.html               | 2796 -------
 site/VCFX_impact_filter/index.html            | 2762 -------
 site/VCFX_inbreeding_calculator/index.html    | 2878 -------
 site/VCFX_indel_normalizer/index.html         | 2961 --------
 site/VCFX_indexer/index.html                  | 2851 -------
 site/VCFX_info_aggregator/index.html          | 2758 -------
 site/VCFX_info_parser/index.html              | 2793 -------
 site/VCFX_info_summarizer/index.html          | 2804 -------
 site/VCFX_ld_calculator/index.html            | 2776 -------
 site/VCFX_merger/index.html                   | 2837 -------
 site/VCFX_metadata_summarizer/index.html      | 2872 -------
 site/VCFX_missing_data_handler/index.html     | 2844 -------
 site/VCFX_missing_detector/index.html         | 2821 -------
 site/VCFX_multiallelic_splitter/index.html    | 2783 -------
 site/VCFX_nonref_filter/index.html            | 2826 -------
 site/VCFX_outlier_detector/index.html         | 2903 -------
 site/VCFX_phase_checker/index.html            | 2754 -------
 site/VCFX_phase_quality_filter/index.html     | 2897 -------
 site/VCFX_phred_filter/index.html             | 2844 -------
 site/VCFX_population_filter/index.html        | 2934 -------
 site/VCFX_position_subsetter/index.html       | 2956 --------
 site/VCFX_probability_filter/index.html       | 2960 --------
 site/VCFX_quality_adjuster/index.html         | 2823 -------
 site/VCFX_record_filter/index.html            | 3008 --------
 site/VCFX_ref_comparator/index.html           | 2829 -------
 site/VCFX_reformatter/index.html              | 2983 --------
 site/VCFX_region_subsampler/index.html        | 3007 --------
 site/VCFX_sample_extractor/index.html         | 2780 -------
 site/VCFX_sorter/index.html                   | 2994 --------
 site/VCFX_subsampler/index.html               | 2869 -------
 site/VCFX_sv_handler/index.html               | 2970 --------
 site/VCFX_validator/index.html                | 3032 --------
 site/VCFX_variant_classifier/index.html       | 2808 -------
 site/VCFX_variant_counter/index.html          | 2829 -------
 site/assets/images/favicon.png                |  Bin 1870 -> 0 bytes
 .../assets/javascripts/bundle.c8b220af.min.js |   16 -
 .../javascripts/bundle.c8b220af.min.js.map    |    7 -
 .../javascripts/lunr/min/lunr.ar.min.js       |    1 -
 .../javascripts/lunr/min/lunr.da.min.js       |   18 -
 .../javascripts/lunr/min/lunr.de.min.js       |   18 -
 .../javascripts/lunr/min/lunr.du.min.js       |   18 -
 .../javascripts/lunr/min/lunr.el.min.js       |    1 -
 .../javascripts/lunr/min/lunr.es.min.js       |   18 -
 .../javascripts/lunr/min/lunr.fi.min.js       |   18 -
 .../javascripts/lunr/min/lunr.fr.min.js       |   18 -
 .../javascripts/lunr/min/lunr.he.min.js       |    1 -
 .../javascripts/lunr/min/lunr.hi.min.js       |    1 -
 .../javascripts/lunr/min/lunr.hu.min.js       |   18 -
 .../javascripts/lunr/min/lunr.hy.min.js       |    1 -
 .../javascripts/lunr/min/lunr.it.min.js       |   18 -
 .../javascripts/lunr/min/lunr.ja.min.js       |    1 -
 .../javascripts/lunr/min/lunr.jp.min.js       |    1 -
 .../javascripts/lunr/min/lunr.kn.min.js       |    1 -
 .../javascripts/lunr/min/lunr.ko.min.js       |    1 -
 .../javascripts/lunr/min/lunr.multi.min.js    |    1 -
 .../javascripts/lunr/min/lunr.nl.min.js       |   18 -
 .../javascripts/lunr/min/lunr.no.min.js       |   18 -
 .../javascripts/lunr/min/lunr.pt.min.js       |   18 -
 .../javascripts/lunr/min/lunr.ro.min.js       |   18 -
 .../javascripts/lunr/min/lunr.ru.min.js       |   18 -
 .../javascripts/lunr/min/lunr.sa.min.js       |    1 -
 .../lunr/min/lunr.stemmer.support.min.js      |    1 -
 .../javascripts/lunr/min/lunr.sv.min.js       |   18 -
 .../javascripts/lunr/min/lunr.ta.min.js       |    1 -
 .../javascripts/lunr/min/lunr.te.min.js       |    1 -
 .../javascripts/lunr/min/lunr.th.min.js       |    1 -
 .../javascripts/lunr/min/lunr.tr.min.js       |   18 -
 .../javascripts/lunr/min/lunr.vi.min.js       |    1 -
 .../javascripts/lunr/min/lunr.zh.min.js       |    1 -
 site/assets/javascripts/lunr/tinyseg.js       |  206 -
 site/assets/javascripts/lunr/wordcut.js       | 6708 -----------------
 .../workers/search.f8cc74c7.min.js            |   42 -
 .../workers/search.f8cc74c7.min.js.map        |    7 -
 site/assets/stylesheets/main.4af4bdda.min.css |    1 -
 .../stylesheets/main.4af4bdda.min.css.map     |    1 -
 .../stylesheets/palette.06af60db.min.css      |    1 -
 .../stylesheets/palette.06af60db.min.css.map  |    1 -
 site/index.html                               | 2954 --------
 site/installation/index.html                  | 2817 -------
 site/quickstart/index.html                    | 2934 -------
 site/search/search_index.json                 |    1 -
 site/sitemap.xml                              |  271 -
 site/sitemap.xml.gz                           |  Bin 740 -> 0 bytes
 site/tool_template/index.html                 | 2572 -------
 site/tools_overview/index.html                | 2907 -------
 116 files changed, 2 insertions(+), 201516 deletions(-)
 delete mode 100644 site/404.html
 delete mode 100644 site/CONTRIBUTING/index.html
 delete mode 100644 site/LICENSE/index.html
 delete mode 100644 site/VCFX_af_subsetter/index.html
 delete mode 100644 site/VCFX_alignment_checker/index.html
 delete mode 100644 site/VCFX_allele_balance_calc/index.html
 delete mode 100644 site/VCFX_allele_balance_filter/index.html
 delete mode 100644 site/VCFX_allele_counter/index.html
 delete mode 100644 site/VCFX_allele_freq_calc/index.html
 delete mode 100644 site/VCFX_ancestry_assigner/index.html
 delete mode 100644 site/VCFX_ancestry_inferrer/index.html
 delete mode 100644 site/VCFX_annotation_extractor/index.html
 delete mode 100644 site/VCFX_compressor/index.html
 delete mode 100644 site/VCFX_concordance_checker/index.html
 delete mode 100644 site/VCFX_cross_sample_concordance/index.html
 delete mode 100644 site/VCFX_custom_annotator/index.html
 delete mode 100644 site/VCFX_diff_tool/index.html
 delete mode 100644 site/VCFX_distance_calculator/index.html
 delete mode 100644 site/VCFX_dosage_calculator/index.html
 delete mode 100644 site/VCFX_duplicate_remover/index.html
 delete mode 100644 site/VCFX_fasta_converter/index.html
 delete mode 100644 site/VCFX_field_extractor/index.html
 delete mode 100644 site/VCFX_file_splitter/index.html
 delete mode 100644 site/VCFX_format_converter/index.html
 delete mode 100644 site/VCFX_genotype_query/index.html
 delete mode 100644 site/VCFX_gl_filter/index.html
 delete mode 100644 site/VCFX_haplotype_extractor/index.html
 delete mode 100644 site/VCFX_haplotype_phaser/index.html
 delete mode 100644 site/VCFX_header_parser/index.html
 delete mode 100644 site/VCFX_hwe_tester/index.html
 delete mode 100644 site/VCFX_impact_filter/index.html
 delete mode 100644 site/VCFX_inbreeding_calculator/index.html
 delete mode 100644 site/VCFX_indel_normalizer/index.html
 delete mode 100644 site/VCFX_indexer/index.html
 delete mode 100644 site/VCFX_info_aggregator/index.html
 delete mode 100644 site/VCFX_info_parser/index.html
 delete mode 100644 site/VCFX_info_summarizer/index.html
 delete mode 100644 site/VCFX_ld_calculator/index.html
 delete mode 100644 site/VCFX_merger/index.html
 delete mode 100644 site/VCFX_metadata_summarizer/index.html
 delete mode 100644 site/VCFX_missing_data_handler/index.html
 delete mode 100644 site/VCFX_missing_detector/index.html
 delete mode 100644 site/VCFX_multiallelic_splitter/index.html
 delete mode 100644 site/VCFX_nonref_filter/index.html
 delete mode 100644 site/VCFX_outlier_detector/index.html
 delete mode 100644 site/VCFX_phase_checker/index.html
 delete mode 100644 site/VCFX_phase_quality_filter/index.html
 delete mode 100644 site/VCFX_phred_filter/index.html
 delete mode 100644 site/VCFX_population_filter/index.html
 delete mode 100644 site/VCFX_position_subsetter/index.html
 delete mode 100644 site/VCFX_probability_filter/index.html
 delete mode 100644 site/VCFX_quality_adjuster/index.html
 delete mode 100644 site/VCFX_record_filter/index.html
 delete mode 100644 site/VCFX_ref_comparator/index.html
 delete mode 100644 site/VCFX_reformatter/index.html
 delete mode 100644 site/VCFX_region_subsampler/index.html
 delete mode 100644 site/VCFX_sample_extractor/index.html
 delete mode 100644 site/VCFX_sorter/index.html
 delete mode 100644 site/VCFX_subsampler/index.html
 delete mode 100644 site/VCFX_sv_handler/index.html
 delete mode 100644 site/VCFX_validator/index.html
 delete mode 100644 site/VCFX_variant_classifier/index.html
 delete mode 100644 site/VCFX_variant_counter/index.html
 delete mode 100644 site/assets/images/favicon.png
 delete mode 100644 site/assets/javascripts/bundle.c8b220af.min.js
 delete mode 100644 site/assets/javascripts/bundle.c8b220af.min.js.map
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.ar.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.da.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.de.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.du.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.el.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.es.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.fi.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.fr.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.he.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.hi.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.hu.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.hy.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.it.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.ja.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.jp.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.kn.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.ko.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.multi.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.nl.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.no.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.pt.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.ro.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.ru.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.sa.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.sv.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.ta.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.te.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.th.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.tr.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.vi.min.js
 delete mode 100644 site/assets/javascripts/lunr/min/lunr.zh.min.js
 delete mode 100644 site/assets/javascripts/lunr/tinyseg.js
 delete mode 100644 site/assets/javascripts/lunr/wordcut.js
 delete mode 100644 site/assets/javascripts/workers/search.f8cc74c7.min.js
 delete mode 100644 site/assets/javascripts/workers/search.f8cc74c7.min.js.map
 delete mode 100644 site/assets/stylesheets/main.4af4bdda.min.css
 delete mode 100644 site/assets/stylesheets/main.4af4bdda.min.css.map
 delete mode 100644 site/assets/stylesheets/palette.06af60db.min.css
 delete mode 100644 site/assets/stylesheets/palette.06af60db.min.css.map
 delete mode 100644 site/index.html
 delete mode 100644 site/installation/index.html
 delete mode 100644 site/quickstart/index.html
 delete mode 100644 site/search/search_index.json
 delete mode 100644 site/sitemap.xml
 delete mode 100644 site/sitemap.xml.gz
 delete mode 100644 site/tool_template/index.html
 delete mode 100644 site/tools_overview/index.html

diff --git a/docs/docker.md b/docs/docker.md
index 1e8ebcdb..70d1755d 100644
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -173,4 +173,4 @@ If you use VCFX with Docker in your research, please cite:
 }
 ```
 
-For more citation formats and information, see the [Citation page](../citation.md). 
\ No newline at end of file
+For more citation formats and information, see the [Citation page](citation.md). 
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 41f2d1b0..bb55acbb 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -137,7 +137,7 @@ nav:
       - VCFX_missing_data_handler: VCFX_missing_data_handler.md
       - VCFX_quality_adjuster: VCFX_quality_adjuster.md
       - VCFX_haplotype_phaser: VCFX_haplotype_phaser.md
-  - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md
+      - VCFX_haplotype_extractor: VCFX_haplotype_extractor.md
   - Python API: python_api.md
   - Contributing: CONTRIBUTING.md
   - Citation: citation.md
diff --git a/site/404.html b/site/404.html
deleted file mode 100644
index 7396de83..00000000
--- a/site/404.html
+++ /dev/null
@@ -1,2352 +0,0 @@
-
-
-
-  
-    
-      
-      
-      
-        
-      
-      
-        
-      
-      
-      
-      
-      
-      
-      
-    
-    
-      
-        VCFX Documentation
-      
-    
-    
-      
-      
-        
-        
-      
-      
-
-
-    
-    
-      
-    
-    
-      
-        
-        
-        
-        
-        
-      
-    
-    
-    
-    
-      
-
-    
-    
-    
-  
-  
-  
-    
-    
-      
-    
-    
-    
-    
-    
-  
-    
-    
-    
-    
-    
- -
-
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- -

404 - Not found

- -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/CONTRIBUTING/index.html b/site/CONTRIBUTING/index.html deleted file mode 100644 index 5171e9d0..00000000 --- a/site/CONTRIBUTING/index.html +++ /dev/null @@ -1,2773 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Contributing - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

Contributing to VCFX

-

Thank you for your interest in contributing to VCFX! This document provides guidelines and instructions for contributing to the project.

-

Code of Conduct

-

Please be respectful and considerate of others when contributing to this project. We aim to foster an inclusive and welcoming community.

-

How to Contribute

-

Reporting Bugs

-

If you find a bug in VCFX, please report it by creating an issue in our GitHub repository. When reporting a bug, please include:

-
    -
  • A clear, descriptive title
  • -
  • A detailed description of the issue, including steps to reproduce
  • -
  • The expected behavior
  • -
  • The actual behavior observed
  • -
  • Any relevant error messages or logs
  • -
  • Your system information (OS, compiler version, etc.)
  • -
-

Suggesting Enhancements

-

We welcome suggestions for new features or improvements to existing functionality. To suggest an enhancement:

-
    -
  1. Check if the enhancement has already been suggested or implemented
  2. -
  3. Create a new issue with a clear description of the enhancement
  4. -
  5. Explain why this enhancement would be useful to VCFX users
  6. -
-

Contributing Code

-
    -
  1. Fork the repository
  2. -
  3. Create a new branch for your feature or bug fix
  4. -
  5. Write your code, following our coding standards
  6. -
  7. Add tests for your changes
  8. -
  9. Ensure all tests pass
  10. -
  11. Update documentation as needed
  12. -
  13. Commit your changes with clear, descriptive commit messages
  14. -
  15. Submit a pull request
  16. -
-

Development Setup

-

Prerequisites

-
    -
  • CMake (version 3.10 or higher)
  • -
  • C++11 compatible compiler (GCC, Clang, MSVC)
  • -
  • Git
  • -
-

Building for Development

-
git clone https://github.com/jorgeMFS/VCFX.git
-cd VCFX
-mkdir build && cd build
-cmake ..
-make
-
-

Running Tests

-

After building the project, run the tests to ensure everything is working correctly:

-
cd build
-ctest --verbose
-
-

Coding Standards

-
    -
  • Use consistent indentation (4 spaces)
  • -
  • Follow naming conventions:
  • -
  • Class names: CamelCase
  • -
  • Functions and methods: camelCase
  • -
  • Variables: snake_case
  • -
  • Constants: UPPER_SNAKE_CASE
  • -
  • Write clear, descriptive comments
  • -
  • Document public API methods
  • -
  • Keep lines to a reasonable length (around 80-100 characters)
  • -
  • Use descriptive variable and function names
  • -
-

Documentation

-
    -
  • Update the appropriate documentation when changing functionality
  • -
  • Document all public methods and classes
  • -
  • Provide usage examples when relevant
  • -
  • Use clear, concise language
  • -
  • Check for spelling and grammar errors
  • -
-

Pull Request Process

-
    -
  1. Update the README.md or relevant documentation with details of changes
  2. -
  3. Update the version number if applicable, following Semantic Versioning
  4. -
  5. The pull request will be merged once it has been reviewed and approved by a maintainer
  6. -
-

License

-

By contributing to VCFX, you agree that your contributions will be licensed under the project's MIT license.

- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/LICENSE/index.html b/site/LICENSE/index.html deleted file mode 100644 index dee56808..00000000 --- a/site/LICENSE/index.html +++ /dev/null @@ -1,2410 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - License - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - -

MIT License

-

Copyright (c) 2024

-

Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions:

-

The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software.

-

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE.

- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_af_subsetter/index.html b/site/VCFX_af_subsetter/index.html deleted file mode 100644 index 38adeab9..00000000 --- a/site/VCFX_af_subsetter/index.html +++ /dev/null @@ -1,2986 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_af_subsetter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_af_subsetter

-

Overview

-

VCFX_af_subsetter filters variants in a VCF file based on allele frequency (AF) values, allowing selection of variants within a specified frequency range. This tool is useful for focusing analysis on variants of specific population prevalence.

-

Usage

-
VCFX_af_subsetter --af-filter "MIN-MAX" < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-a, --af-filter <MIN-MAX>Required. Allele frequency range for filtering (e.g., 0.01-0.05)
-h, --helpDisplay help message and exit
-

Description

-

VCFX_af_subsetter processes VCF files line by line and filters variants based on their allele frequency (AF) values from the INFO field. The tool:

-
    -
  1. Reads the VCF file from standard input
  2. -
  3. Parses the AF values from the INFO field of each variant record
  4. -
  5. Compares these values against the specified minimum and maximum thresholds
  6. -
  7. Retains variants with at least one allele frequency value within the range [MIN, MAX]
  8. -
  9. Outputs the filtered variants to standard output
  10. -
-

This tool is particularly useful for: -- Isolating rare variants (e.g., AF < 0.01) -- Focusing on common variants (e.g., AF > 0.05) -- Selecting variants with specific population frequencies -- Removing extremely rare or fixed variants from analysis

-

Output Format

-

The output is a standard VCF file containing: -- All original header lines from the input VCF -- Only those variant records with AF values within the specified range -- No modification to the content or format of the retained lines

-

Examples

-

Basic Usage

-

Filter for rare variants with frequency between 1% and 5%: -

VCFX_af_subsetter --af-filter "0.01-0.05" < input.vcf > rare_variants.vcf
-

-

Common Variants

-

Filter for common variants with frequency above 5%: -

VCFX_af_subsetter --af-filter "0.05-1.0" < input.vcf > common_variants.vcf
-

-

Extremely Rare Variants

-

Filter for extremely rare variants: -

VCFX_af_subsetter --af-filter "0.0001-0.001" < input.vcf > very_rare_variants.vcf
-

-

Specific Frequency Band

-

Filter for variants with a specific frequency band: -

VCFX_af_subsetter --af-filter "0.4-0.6" < input.vcf > mid_frequency_variants.vcf
-

-

In Pipeline

-

Use in a pipeline with other VCFX tools: -

cat input.vcf | VCFX_af_subsetter --af-filter "0.01-0.05" | VCFX_phred_filter -p 30 > high_quality_rare_variants.vcf
-

-

AF Value Parsing

-

Format Requirements

-

The tool expects AF values in the INFO field in standard VCF format: -- As a key-value pair in the INFO column: AF=0.123 -- For multi-allelic sites, as comma-separated values: AF=0.01,0.05,0.1

-

Range Specification

-

The AF range must be specified as: -- Two numeric values between 0.0 and 1.0 -- Connected by a hyphen (-) -- With the first value (minimum) less than or equal to the second value (maximum)

-

For example: 0.01-0.05, 0.0-0.1, 0.4-0.6

-

Handling Special Cases

-

Multi-allelic Variants

-

For variants with multiple alternate alleles (multi-allelic): -- The INFO field may contain multiple AF values (comma-separated) -- The variant is retained if ANY of the AF values fall within the specified range -- This behavior allows for selective filtering of multi-allelic sites

-

Missing AF Values

-

Variants without an AF annotation in the INFO field: -- Are skipped with a warning message -- Are not included in the output -- Can indicate variants where frequency information is unavailable

-

Full Range

-

Using the range 0.0-1.0 will: -- Keep all variants with valid AF values -- Still skip variants lacking AF annotations -- Effectively function as a filter for "has valid AF information"

-

Malformed Values

-

The tool handles several edge cases: -- Invalid range format: Reports an error if not in MIN-MAX format -- Out-of-range values: Ensures MIN and MAX are between 0.0 and 1.0 -- Inverted ranges: Reports an error if MIN > MAX -- Non-numeric AF values: Skips variants where AF cannot be parsed as a number

-

Performance Considerations

-
    -
  • Processes VCF files line by line for memory efficiency
  • -
  • No preprocessing or indexing required
  • -
  • Linear time complexity with respect to input file size
  • -
  • Minimal CPU and memory usage
  • -
-

Limitations

-
    -
  • Requires AF field to be present in the INFO column
  • -
  • No way to customize the AF field name (hardcoded to "AF")
  • -
  • Cannot filter based on other frequency metrics (e.g., MAF, AC, AN)
  • -
  • No option to include variants with missing AF values
  • -
  • No statistics provided on the number of variants filtered
  • -
  • Cannot combine with other filtering criteria in a single command
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_alignment_checker/index.html b/site/VCFX_alignment_checker/index.html deleted file mode 100644 index 047aa446..00000000 --- a/site/VCFX_alignment_checker/index.html +++ /dev/null @@ -1,2984 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_alignment_checker - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_alignment_checker

-

Overview

-

VCFX_alignment_checker identifies discrepancies between VCF variant entries and a reference genome FASTA file. This tool helps validate that the reference alleles in a VCF file match the corresponding positions in the reference genome.

-

Usage

-
VCFX_alignment_checker --alignment-discrepancy <vcf_file> <reference.fasta> > discrepancies.txt
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-a, --alignment-discrepancyEnable alignment discrepancy checking mode
-h, --helpDisplay help message and exit
-

Description

-

VCFX_alignment_checker compares VCF variants against a reference genome to validate sequence consistency. The tool:

-
    -
  1. Loads a reference genome from a FASTA file into memory
  2. -
  3. Parses each variant in the input VCF file
  4. -
  5. For each variant:
  6. -
  7. Retrieves the corresponding sequence from the reference genome
  8. -
  9. Compares it with the REF and ALT values from the VCF
  10. -
  11. Reports any discrepancies found
  12. -
  13. Outputs a tab-separated report of all detected discrepancies
  14. -
-

This tool is particularly useful for: -- Validating VCF files against their reference genome -- Identifying potential errors in variant calling -- Detecting misalignments in variant positions -- Quality control of VCF data before downstream analysis

-

Input Requirements

-
    -
  • A VCF file with variant records
  • -
  • A FASTA file containing the reference genome sequences
  • -
  • VCF must contain standard CHROM, POS, REF, and ALT fields
  • -
-

Output Format

-

The tool produces a tab-separated values (TSV) file with the following columns:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ColumnDescription
CHROMChromosome of the variant
POSPosition of the variant
IDVariant identifier
REFReference allele in the VCF
ALTAlternate allele in the VCF
Discrepancy_TypeType of discrepancy detected (REF_DISCREPANCY or ALT_DISCREPANCY)
Reference_ValueThe actual sequence from the reference genome
VCF_ValueThe value from the VCF that differs from the reference
-

Examples

-

Basic Usage

-

Check for discrepancies between variants in a VCF file and a reference genome: -

VCFX_alignment_checker --alignment-discrepancy variants.vcf reference.fa > discrepancies.txt
-

-

Pipeline Integration

-

Integrate with other tools for comprehensive validation: -

VCFX_alignment_checker --alignment-discrepancy variants.vcf reference.fa | grep "REF_DISCREPANCY" > ref_errors.tsv
-

-

Discrepancy Types

-

REF_DISCREPANCY

-

Indicates that the REF field in the VCF doesn't match the reference genome at the specified position. This type of discrepancy suggests potential issues with: -- Incorrect variant calling -- Reference genome version mismatch -- Coordinate system errors

-

ALT_DISCREPANCY

-

Indicates that the ALT field doesn't correspond to an expected variation from the reference. For SNPs, this can happen when: -- The variant quality is low -- The variant caller made an error -- There are assembly or alignment issues

-

Handling Special Cases

-

Chromosome Naming

-

The tool attempts to normalize chromosome names between the VCF and reference FASTA by: -- Adding 'chr' prefix when appropriate -- Checking for standard naming conventions (1-22, X, Y, MT) -- This normalization helps handle common mismatches between different naming conventions

-

Indels and Complex Variants

-

For insertions, deletions, and complex variants: -- The tool compares the available bases (minimum length of REF and ALT) -- It checks if the REF allele matches the reference genome -- It also checks if the ALT allele differs from the reference as expected

-

Missing Reference Sequences

-

If a chromosome in the VCF isn't found in the reference genome: -- A warning is issued to stderr -- The variant is skipped from discrepancy checking -- This helps identify potential naming mismatches between files

-

Out-of-Range Positions

-

For positions beyond the reference sequence length: -- The tool issues a warning -- The variant is excluded from the discrepancy report -- This can help identify coordinate system issues

-

Performance Considerations

-
    -
  • Loads the entire reference genome into memory for faster lookups
  • -
  • Processes the VCF file sequentially, line by line
  • -
  • Memory usage scales with the size of the reference genome
  • -
  • Discrepancy checking is computationally efficient after loading the reference
  • -
-

Limitations

-
    -
  • Requires loading the entire reference genome into memory, which can be memory-intensive for large genomes
  • -
  • Chromosome name normalization may not handle all naming conventions
  • -
  • Doesn't account for circularity in mitochondrial or bacterial genomes
  • -
  • No specialized handling for structural variants
  • -
  • Minimal sanity checks for FASTA format integrity
  • -
  • Cannot check variants spanning multiple chromosomes
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_allele_balance_calc/index.html b/site/VCFX_allele_balance_calc/index.html deleted file mode 100644 index 3de6f325..00000000 --- a/site/VCFX_allele_balance_calc/index.html +++ /dev/null @@ -1,3005 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_allele_balance_calc - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_allele_balance_calc

-

Overview

-

VCFX_allele_balance_calc calculates the allele balance for each sample in a VCF file, which is the ratio of reference alleles to alternate alleles in heterozygous genotypes. This metric is useful for assessing potential allelic bias in sequencing data.

-

Usage

-
VCFX_allele_balance_calc [OPTIONS] < input.vcf > allele_balance.tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-s, --samples "Sample1 Sample2..."Optional. Specify sample names to calculate allele balance for (space-separated). If omitted, all samples are processed.
-h, --helpDisplay help message and exit
-

Description

-

VCFX_allele_balance_calc processes a VCF file and calculates the allele balance for each variant in each specified sample. The tool:

-
    -
  1. Reads a VCF file from standard input
  2. -
  3. Identifies sample columns from the VCF header
  4. -
  5. For each variant and each sample:
  6. -
  7. Extracts the genotype information
  8. -
  9. Counts reference (0) and alternate (non-0) alleles
  10. -
  11. Calculates the allele balance as: reference allele count / alternate allele count
  12. -
  13. Outputs a tab-separated file with allele balance values for each variant-sample combination
  14. -
-

This tool is particularly useful for: -- Identifying potential allele-specific biases in sequencing -- Quality control of variant calls -- Assessing imbalanced expression of alleles -- Detecting potential sample contamination

-

Output Format

-

The tool produces a tab-separated values (TSV) file with the following columns:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ColumnDescription
CHROMChromosome of the variant
POSPosition of the variant
IDVariant identifier
REFReference allele
ALTAlternate allele(s)
SampleSample name
Allele_BalanceCalculated allele balance value or "NA" for missing/invalid genotypes
-

Examples

-

Basic Usage (All Samples)

-

Calculate allele balance for all samples in a VCF file: -

VCFX_allele_balance_calc < input.vcf > allele_balance_all.tsv
-

-

Specific Samples

-

Calculate allele balance for specific samples: -

VCFX_allele_balance_calc --samples "SAMPLE1 SAMPLE2" < input.vcf > allele_balance_subset.tsv
-

-

Filtering Results

-

Process the output to focus on imbalanced variants: -

VCFX_allele_balance_calc < input.vcf | awk -F'\t' '$7 != "NA" && $7 < 0.4' > imbalanced_variants.tsv
-

-

Allele Balance Calculation

-

Formula

-

The allele balance is calculated as: -

Allele Balance = Number of Reference Alleles / Number of Alternate Alleles
-

-

Where: -- Reference alleles are those with value "0" in the genotype field -- Alternate alleles are any non-zero value (e.g., "1", "2", etc.) in the genotype field

-

Interpretation

-
    -
  • Value of 0.0: No reference alleles (e.g., "1/1", "1/2")
  • -
  • Value of 1.0: Equal number of reference and alternate alleles (e.g., "0/1")
  • -
  • Value > 1.0: More reference than alternate alleles (unusual for diploid organisms)
  • -
  • "NA": Missing or invalid genotype
  • -
-

Special Cases

-
    -
  • Homozygous reference (e.g., "0/0"): Returns 0.0 (technically it would be undefined due to division by zero)
  • -
  • Missing genotypes (e.g., "./.", ".|."): Returns "NA" in the output
  • -
  • Partial missing (e.g., "0/."): Only valid alleles are counted
  • -
  • Invalid formats: Returns "NA" in the output
  • -
-

Handling Special Cases

-

Missing Data

-
    -
  • Genotypes with missing values (./., .) return "NA" for allele balance
  • -
  • Partial missing genotypes only count the valid alleles present
  • -
-

Multi-allelic Sites

-
    -
  • All non-reference alleles are treated as "alternate" regardless of their specific number
  • -
  • For example, in a genotype "1/2", both alleles are counted as alternate alleles
  • -
-

Phased Genotypes

-
    -
  • Phasing information is ignored for allele balance calculation
  • -
  • Phased genotypes (e.g., "0|1") are treated the same as unphased (e.g., "0/1")
  • -
-

Haploid Genotypes

-
    -
  • Not explicitly handled; the tool expects diploid or polyploid genotypes with separators
  • -
-

Performance Considerations

-
    -
  • Processes VCF files line by line, with minimal memory requirements
  • -
  • Scales linearly with input file size and number of samples
  • -
  • For very large VCF files with many samples, specifying a subset of samples can improve performance
  • -
-

Limitations

-
    -
  • No option to customize the allele balance formula
  • -
  • Simplified handling of multi-allelic sites (all non-reference alleles are grouped)
  • -
  • No automatic filtering based on allele balance values
  • -
  • Cannot account for read depth or genotype quality in calculations
  • -
  • Limited to processing standard VCF genotype fields
  • -
  • Does not produce summary statistics across all variants
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_allele_balance_filter/index.html b/site/VCFX_allele_balance_filter/index.html deleted file mode 100644 index 2c683df2..00000000 --- a/site/VCFX_allele_balance_filter/index.html +++ /dev/null @@ -1,2835 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_allele_balance_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_allele_balance_filter

-

Overview

-

VCFX_allele_balance_filter filters a VCF file to keep only variants where all samples have an allele balance ratio (reference alleles / total alleles) above a specified threshold, allowing for quality control and bias detection in variant calls.

-

Usage

-
VCFX_allele_balance_filter --filter-allele-balance <THRESHOLD> < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-f, --filter-allele-balance Required. Allele balance threshold between 0.0 and 1.0
-h, --helpDisplay help message and exit
-

Description

-

VCFX_allele_balance_filter examines the genotypes in each variant record and calculates an allele balance ratio for each sample. The tool:

-
    -
  1. Processes a VCF file line by line
  2. -
  3. For each variant, calculates the allele balance for each sample's genotype
  4. -
  5. Filters out variants where any sample has an allele balance below the specified threshold
  6. -
  7. Passes through all header lines unchanged
  8. -
  9. Outputs a filtered VCF file with only the passing variants
  10. -
-

Allele balance is calculated as: -

AB = (number of reference alleles) / (total number of alleles)
-

-

Where: -- Reference alleles are those with value "0" in the genotype field -- All other numeric alleles (1, 2, 3, etc.) are counted as alternate alleles -- Missing or non-numeric alleles are excluded from the calculation

-

This tool is useful for: -- Detecting potential sequencing or mapping biases -- Quality control of variant calls -- Filtering out variants with skewed allele representation -- Identifying potential sample contamination or mixed samples

-

Output Format

-

The output is a standard VCF file with the same format as the input, but containing only the variant lines that pass the allele balance filter. All header lines are preserved.

-

Examples

-

Basic Usage

-
# Keep variants where all samples have allele balance >= 0.3
-VCFX_allele_balance_filter --filter-allele-balance 0.3 < input.vcf > balanced.vcf
-
-

Stringent Filtering

-
# Very stringent filtering (close to 50/50 balance required)
-VCFX_allele_balance_filter --filter-allele-balance 0.45 < input.vcf > highly_balanced.vcf
-
-

Counting Filtered Variants

-
# Count how many variants were filtered out
-input_count=$(grep -v "^#" input.vcf | wc -l)
-output_count=$(grep -v "^#" filtered.vcf | wc -l)
-filtered_count=$((input_count - output_count))
-echo "Filtered out $filtered_count variants based on allele balance"
-
-

In a Pipeline

-
# Filter by quality then by allele balance
-grep -v "^#" input.vcf | grep "PASS" | grep "QUAL>30" | \
-VCFX_allele_balance_filter --filter-allele-balance 0.4 > high_quality_balanced.vcf
-
-

Genotype Interpretation

-

The tool examines the GT field of each sample's genotype:

-
    -
  1. Extracts the GT field (before the first colon if present)
  2. -
  3. Treats both phased ('|') and unphased ('/') genotypes the same
  4. -
  5. For each allele:
  6. -
  7. '0' is counted as a reference allele
  8. -
  9. Any other number (1, 2, 3, etc.) is counted as an alternate allele
  10. -
  11. Non-numeric values are ignored
  12. -
-

For example: -- "0/0" โ†’ AB = 2/2 = 1.0 (all reference) -- "0/1" โ†’ AB = 1/2 = 0.5 (half reference, half alternate) -- "1/1" โ†’ AB = 0/2 = 0.0 (all alternate) -- "0/2" โ†’ AB = 1/2 = 0.5 (half reference, half alternate) -- "./1" โ†’ AB = 0/1 = 0.0 (missing reference allele, only alternate counted) -- "./." โ†’ AB = 0/0 = 0.0 (no valid alleles)

-

Handling Special Cases

-
    -
  • Missing genotypes ("./.") are treated as having AB = 0.0
  • -
  • Partial missing ("./1") counts only the present alleles
  • -
  • Non-diploid genotypes (e.g., "0/1/2") are handled correctly by counting alleles individually
  • -
  • Complex genotypes (non-numeric) are skipped when calculating AB
  • -
  • Empty lines are ignored
  • -
  • Header lines are preserved unchanged
  • -
  • Malformed VCF lines with insufficient columns are skipped with a warning
  • -
  • Multi-allelic variants have all non-reference alleles (1, 2, 3, etc.) treated as alternate
  • -
-

Performance

-

The tool is optimized for efficiency: -- Processes the VCF file in a single pass -- Minimal memory usage as it processes one variant at a time -- Constant-time computation of allele balance -- Stops calculating AB for a variant as soon as any sample fails the threshold

-

Limitations

-
    -
  1. Uses a simple all-or-nothing approach (variant passes only if ALL samples pass)
  2. -
  3. No option to specify which samples to include in the filtering
  4. -
  5. Cannot handle sample-specific threshold values
  6. -
  7. No detailed reporting on which samples/variants failed and by how much
  8. -
  9. No option to annotate variants with their allele balance rather than filtering
  10. -
  11. Limited to the strict definition of allele balance (ref/total), not accounting for strand bias
  12. -
  13. Treats all alternate alleles equally, regardless of their identity
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_allele_counter/index.html b/site/VCFX_allele_counter/index.html deleted file mode 100644 index 8d0ceb70..00000000 --- a/site/VCFX_allele_counter/index.html +++ /dev/null @@ -1,3007 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_allele_counter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_allele_counter

-

Overview

-

VCFX_allele_counter counts the number of reference and alternate alleles in each sample for each variant in a VCF file. This tool provides a simple way to quantify allele occurrences across samples.

-

Usage

-
VCFX_allele_counter [OPTIONS] < input.vcf > allele_counts.tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-s, --samples "Sample1 Sample2..."Optional. Specify sample names to calculate allele counts for (space-separated). If omitted, all samples are processed.
-h, --helpDisplay help message and exit
-

Description

-

VCFX_allele_counter processes a VCF file and counts reference and alternate alleles for each variant in each specified sample. The tool:

-
    -
  1. Reads a VCF file from standard input
  2. -
  3. Identifies sample columns from the VCF header
  4. -
  5. For each variant and each sample:
  6. -
  7. Extracts the genotype information
  8. -
  9. Counts reference alleles (0) and alternate alleles (non-0)
  10. -
  11. Outputs both counts in a tabular format
  12. -
  13. Outputs a tab-separated file with allele counts for each variant-sample combination
  14. -
-

This tool is particularly useful for: -- Analyzing allele distribution across samples -- Quantifying the presence of specific alleles -- Preparing data for population genetics analyses -- Validating genotype calls across samples

-

Output Format

-

The tool produces a tab-separated values (TSV) file with the following columns:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ColumnDescription
CHROMChromosome of the variant
POSPosition of the variant
IDVariant identifier
REFReference allele
ALTAlternate allele(s)
SampleSample name
Ref_CountNumber of reference alleles (0) in the sample's genotype
Alt_CountNumber of alternate alleles (non-0) in the sample's genotype
-

Examples

-

Basic Usage (All Samples)

-

Count alleles for all samples in a VCF file: -

VCFX_allele_counter < input.vcf > allele_counts_all.tsv
-

-

Specific Samples

-

Count alleles for specific samples: -

VCFX_allele_counter --samples "SAMPLE1 SAMPLE2" < input.vcf > allele_counts_subset.tsv
-

-

Using with Other Tools

-

Process the output for further analysis: -

VCFX_allele_counter < input.vcf | awk -F'\t' '$8 > 0' > samples_with_alt_alleles.tsv
-

-

Allele Counting Method

-

Reference Alleles

-

The tool counts an allele as a reference allele when it has the value "0" in the genotype field. For example: -- In genotype "0/0", there are 2 reference alleles -- In genotype "0/1", there is 1 reference allele -- In genotype "1/2", there are 0 reference alleles

-

Alternate Alleles

-

The tool counts an allele as an alternate allele when it has any non-zero numeric value in the genotype field. For example: -- In genotype "0/0", there are 0 alternate alleles -- In genotype "0/1", there is 1 alternate allele -- In genotype "1/2", there are 2 alternate alleles -- In genotype "1/1", there are 2 alternate alleles

-

Handling Special Cases

-
    -
  • Missing genotypes (e.g., "./.", ".|."): No counts are recorded for these samples
  • -
  • Partial missing (e.g., "0/."): Only the valid allele is counted
  • -
  • Non-numeric alleles: These are skipped and not counted
  • -
-

Handling Special Cases

-

Missing Data

-
    -
  • Genotypes with missing values (./., .) are skipped
  • -
  • Partial missing genotypes only count the valid alleles present
  • -
-

Multi-allelic Sites

-
    -
  • All non-reference alleles are counted as "alternate" regardless of their specific number
  • -
  • For example, in a genotype "1/2", both alleles count as alternate alleles
  • -
  • The tool does not differentiate between different alternate alleles
  • -
-

Phased Genotypes

-
    -
  • Phasing information is ignored for allele counting
  • -
  • Phased genotypes (e.g., "0|1") are treated the same as unphased (e.g., "0/1")
  • -
-

Invalid Genotypes

-
    -
  • Non-numeric allele values are skipped
  • -
  • Empty genotype fields are skipped
  • -
-

Performance Considerations

-
    -
  • Processes VCF files line by line, with minimal memory requirements
  • -
  • Scales linearly with input file size and number of samples
  • -
  • For very large VCF files with many samples, specifying a subset of samples can improve performance
  • -
-

Limitations

-
    -
  • Does not distinguish between different alternate alleles (e.g., "1" vs "2")
  • -
  • No options for filtering by allele count thresholds
  • -
  • Cannot account for genotype quality or read depth
  • -
  • Limited to processing standard VCF genotype fields
  • -
  • Does not produce summary statistics or aggregate counts
  • -
  • No direct integration with population genetics metrics
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_allele_freq_calc/index.html b/site/VCFX_allele_freq_calc/index.html deleted file mode 100644 index d948f476..00000000 --- a/site/VCFX_allele_freq_calc/index.html +++ /dev/null @@ -1,2724 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_allele_freq_calc - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - -

VCFX_allele_freq_calc

-

Overview

-

The VCFX_allele_freq_calc tool calculates allele frequencies for variants in a VCF file. It reads a VCF file from standard input and outputs a TSV file with chromosome, position, ID, reference allele, alternate allele, and the calculated allele frequency.

-

Usage

-
VCFX_allele_freq_calc [OPTIONS] < input.vcf > allele_frequencies.tsv
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
--help, -hDisplay help message and exit
-

Description

-

VCFX_allele_freq_calc computes the allele frequency for each variant in a VCF file. The allele frequency is calculated as the number of alternate alleles divided by the total number of alleles (reference + alternate) across all samples, considering only non-missing genotypes.

-

The tool: -- Parses the GT (genotype) field for each sample -- Counts reference (0) and alternate (non-zero) alleles -- Calculates frequency as: alternate_count / (reference_count + alternate_count) -- Outputs results in a clean TSV format

-

Output Format

-

The output is a tab-separated file with the following columns:

-
CHROM  POS  ID  REF  ALT  Allele_Frequency
-
-

Where Allele_Frequency is a value between 0.0 and 1.0, formatted with 4 decimal places.

-

Examples

-

Basic Usage

-
./VCFX_allele_freq_calc < input.vcf > allele_frequencies.tsv
-
-

Pipe with Other Commands

-
# Filter variants and calculate allele frequencies
-grep -v "^#" input.vcf | grep "PASS" | ./VCFX_allele_freq_calc > filtered_allele_frequencies.tsv
-
-

Handling Special Cases

-
    -
  • Phased genotypes: Both phased (|) and unphased (/) genotypes are handled the same way
  • -
  • Missing genotypes (./.): Missing genotypes are skipped in the frequency calculation
  • -
  • Multiallelic sites: All non-reference alleles are counted as "alternate" regardless of the specific ALT index
  • -
  • No GT field: Variants without a GT field are skipped
  • -
-

Performance

-

This tool processes VCF files line by line, with minimal memory requirements. It can handle large VCF files efficiently.

-

Limitations

-
    -
  • Requires the GT field to be present in the FORMAT column
  • -
  • Does not distinguish between different alternate alleles in multiallelic sites (all non-reference alleles are counted together)
  • -
  • Cannot handle malformed VCF files, though it will attempt to skip invalid lines with a warning
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_ancestry_assigner/index.html b/site/VCFX_ancestry_assigner/index.html deleted file mode 100644 index 6143fc4b..00000000 --- a/site/VCFX_ancestry_assigner/index.html +++ /dev/null @@ -1,2818 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_ancestry_assigner - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_ancestry_assigner

-

Overview

-

VCFX_ancestry_assigner assigns samples in a VCF file to ancestral populations using a likelihood-based approach based on population-specific allele frequencies.

-

Usage

-
VCFX_ancestry_assigner --assign-ancestry <freq_file> < input.vcf > ancestry_results.txt
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-a, --assign-ancestry <FILE>Required. Path to a file containing population-specific allele frequencies
-h, --helpDisplay help message and exit
-

Description

-

VCFX_ancestry_assigner determines the most likely ancestral population for each sample in a VCF file by calculating genotype likelihoods across multiple populations. The tool:

-
    -
  1. Reads a tab-delimited file containing allele frequencies for different populations
  2. -
  3. Processes the genotypes for each sample in the VCF file
  4. -
  5. Computes likelihood scores for each possible ancestral population
  6. -
  7. Assigns each sample to the population with the highest likelihood score
  8. -
  9. Outputs a simple mapping of sample names to assigned populations
  10. -
-

The tool uses a statistical approach that considers the probability of observing each genotype given the population-specific allele frequencies. For each genotype: -- Homozygous reference (0/0): P = (1-f)ยฒ -- Heterozygous (0/1): P = 2f(1-f) -- Homozygous alternate (1/1): P = fยฒ

-

Where f is the frequency of the alternate allele in a given population.

-

Output Format

-

The output is a tab-delimited text file with the following columns:

-
Sample  Assigned_Population
-
-

Where: -- Sample is the sample name from the VCF file -- Assigned_Population is the ancestral population with the highest likelihood score

-

Examples

-

Basic Usage

-
./VCFX_ancestry_assigner --assign-ancestry population_freqs.tsv < samples.vcf > ancestry_assignments.txt
-
-

Creating a Frequency Reference File

-

The frequency file should have the following tab-delimited format: -

CHROM  POS  REF  ALT  EUR  ASN  AFR
-chr1   10000  A    G    0.1  0.2  0.3
-chr1   20000  C    T    0.2  0.3  0.4
-chr2   15000  T    C    0.4  0.5  0.6
-

-

Using in a Pipeline

-
# Process VCF and append ancestry information as a new column in a metadata file
-cat input.vcf | ./VCFX_ancestry_assigner --assign-ancestry freq.tsv | \
-  join -t $'\t' metadata.txt - > metadata_with_ancestry.txt
-
-

Algorithm

-

The ancestry assignment uses a maximum likelihood approach:

-
    -
  1. For each sample and each variant:
  2. -
  3. Determine the sample's genotype (0/0, 0/1, or 1/1)
  4. -
  5. For each population, calculate the log-likelihood of observing that genotype:
      -
    • Log(P(0/0|pop)) = 2 * log(1-f)
    • -
    • Log(P(0/1|pop)) = log(2) + log(f) + log(1-f)
    • -
    • Log(P(1/1|pop)) = 2 * log(f)
    • -
    -
  6. -
  7. -

    Add this log-likelihood to the population's cumulative score

    -
  8. -
  9. -

    After processing all variants:

    -
  10. -
  11. For each sample, identify the population with the highest cumulative log-likelihood
  12. -
  13. Assign the sample to that population
  14. -
-

This approach is statistically sound and accounts for the probability distribution of genotypes under Hardy-Weinberg equilibrium.

-

Handling Special Cases

-
    -
  • Missing genotypes: Genotypes denoted as "./." are skipped and don't contribute to likelihood calculations
  • -
  • Multi-allelic variants: Treated as biallelic by considering only the first alternate allele
  • -
  • Missing variants: Variants present in the VCF but not in the frequency file are skipped
  • -
  • Phased genotypes: Phase information is ignored; both "0|1" and "0/1" are treated identically
  • -
  • Equal likelihoods: If two populations have exactly the same likelihood (rare), the first one is assigned
  • -
  • No matching variants: If a sample has no variants that match the frequency file, it's assigned to a default population
  • -
  • Non-standard genotypes: Any genotype other than 0/0, 0/1, or 1/1 is skipped
  • -
  • Empty VCF: Will produce no output rows (empty output file)
  • -
-

Performance

-

The tool is optimized for efficiency: -- Uses hash maps for fast lookup of variant frequency data -- Single-pass processing of the VCF file -- Calculates log-likelihoods to avoid numerical underflow with many variants -- Memory usage scales with: - - The number of variants in the frequency file - - The number of reference populations - - The number of samples in the VCF

-

Limitations

-
    -
  • Requires a pre-existing set of population-specific allele frequencies
  • -
  • Assumes Hardy-Weinberg equilibrium for probability calculations
  • -
  • Does not account for linkage disequilibrium between variants
  • -
  • Cannot detect admixed individuals (assigns to a single population)
  • -
  • No confidence metrics for population assignment
  • -
  • Not designed for structural variants or complex multi-allelic sites
  • -
  • No support for non-diploid genotypes or unusual ploidy
  • -
  • Performance depends on the number and informativeness of the variants in the frequency file
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_ancestry_inferrer/index.html b/site/VCFX_ancestry_inferrer/index.html deleted file mode 100644 index 7e34315e..00000000 --- a/site/VCFX_ancestry_inferrer/index.html +++ /dev/null @@ -1,2809 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_ancestry_inferrer - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_ancestry_inferrer

-

Overview

-

VCFX_ancestry_inferrer infers the likely population ancestry for each sample in a VCF file by comparing sample genotypes to known population allele frequencies.

-

Usage

-
VCFX_ancestry_inferrer --frequency <freq_file> [OPTIONS] < input.vcf > ancestry_results.txt
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
--frequency <FILE>Required. Path to a file containing population-specific allele frequencies
-h, --helpDisplay help message and exit
-

Description

-

VCFX_ancestry_inferrer analyzes the genotypes of samples in a VCF file and compares them to known population-specific allele frequencies to determine the most likely ancestry for each sample. The tool:

-
    -
  1. Reads a frequency reference file containing population-specific allele frequencies
  2. -
  3. Processes the VCF file, examining each biallelic or multiallelic variant
  4. -
  5. For each sample, calculates ancestry scores by comparing observed genotypes to population frequency data
  6. -
  7. Assigns each sample to the population with the highest cumulative score
  8. -
  9. Outputs a simple table mapping each sample to its inferred population
  10. -
-

The ancestry inference is based on the principle that individuals from a specific population are more likely to carry alleles at frequencies matching that population's known frequency distribution.

-

Output Format

-

The output is a tab-delimited text file with the following columns:

-
Sample  Inferred_Population
-
-

Where: -- Sample is the sample name from the VCF file -- Inferred_Population is the population with the highest ancestry score

-

Examples

-

Basic Usage

-
./VCFX_ancestry_inferrer --frequency population_freqs.txt < samples.vcf > ancestry_results.txt
-
-

Creating a Frequency Reference File

-

The frequency file should have the following tab-delimited format: -

CHROM  POS  REF  ALT  POPULATION  FREQUENCY
-1      100  A    G    EUR         0.75
-1      100  A    G    AFR         0.10
-1      100  A    G    EAS         0.25
-

-

Using with Multi-Population Data

-
# Combine ancestry results with other data
-./VCFX_ancestry_inferrer --frequency global_freqs.txt < diverse_cohort.vcf | \
-  join -t $'\t' -1 1 -2 1 - phenotype_data.txt > annotated_results.tsv
-
-

Algorithm

-

The ancestry inference algorithm works as follows:

-
    -
  1. For each variant in the VCF file:
  2. -
  3. -

    For each sample with a non-reference genotype:

    -
      -
    • Look up the frequency of that allele in each reference population
    • -
    • Add the frequency value to that population's score for the sample
    • -
    -
  4. -
  5. -

    After processing all variants:

    -
  6. -
  7. For each sample, find the population with the highest cumulative score
  8. -
  9. Assign the sample to that population
  10. -
-

This approach assigns more weight to alleles that are common in a specific population but rare in others, making them more informative for ancestry inference.

-

Handling Special Cases

-
    -
  • Multi-allelic variants: Each alternate allele is treated separately and looked up in the frequency reference
  • -
  • Phased genotypes: Phase information is ignored; both "0|1" and "0/1" are treated identically
  • -
  • Missing genotypes: Missing genotypes ("./.") are skipped and don't contribute to ancestry scores
  • -
  • Missing frequency data: Variants without corresponding frequency data are skipped
  • -
  • Identical scores: If two populations have identical scores for a sample, the first one alphabetically is assigned
  • -
  • Diploid genotypes: Both alleles contribute independently to the ancestry score
  • -
  • Empty VCF: Will produce no output rows (empty output file)
  • -
  • Unknown populations: Only populations defined in the frequency file will be considered
  • -
-

Performance

-

The tool is optimized for efficiency: -- Uses hash maps for constant-time lookups of frequency data -- Single-pass processing of the VCF file -- Memory usage scales with: - - The number of variants in the frequency file - - The number of reference populations - - The number of samples in the VCF

-

Limitations

-
    -
  • Accuracy depends on the quality and relevance of the population frequency data
  • -
  • Works best with large numbers of variants (hundreds to thousands)
  • -
  • Not designed for detecting admixed individuals (reports only the highest-scoring population)
  • -
  • Assumes independence between variants (does not account for linkage disequilibrium)
  • -
  • No confidence scores or statistical measures of assignment certainty
  • -
  • Cannot handle non-biallelic complex variants (e.g., structural variants)
  • -
  • Doesn't account for sample relatedness within the input VCF
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_annotation_extractor/index.html b/site/VCFX_annotation_extractor/index.html deleted file mode 100644 index 49cc1336..00000000 --- a/site/VCFX_annotation_extractor/index.html +++ /dev/null @@ -1,2858 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_annotation_extractor - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_annotation_extractor

-

Overview

-

VCFX_annotation_extractor extracts annotation fields from a VCF file's INFO column and converts them into a tabular format. The tool is particularly useful for extracting specific annotations (such as functional impact, gene name, or any custom annotation) from VCF files into a more analysis-friendly TSV format.

-

Usage

-
VCFX_annotation_extractor --annotation-extract "FIELD1,FIELD2,..." < input.vcf > extracted.tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-a, --annotation-extract <FIELDS>Required. Comma-separated list of INFO field annotations to extract (e.g., "ANN,Gene,Impact")
-h, --helpDisplay help message and exit
-

Description

-

VCFX_annotation_extractor simplifies the extraction and analysis of variant annotations by:

-
    -
  1. Reading a VCF file from standard input
  2. -
  3. Parsing the INFO column to extract user-specified annotation fields
  4. -
  5. Handling multi-allelic variants by creating separate rows for each ALT allele
  6. -
  7. Aligning per-allele annotations (such as ANN) with the corresponding ALT allele
  8. -
  9. Producing a clean tab-delimited output with standardized columns
  10. -
-

This tool is particularly useful for: -- Converting complex VCF annotations into a format suitable for spreadsheet applications -- Extracting specific annotation fields for focused analysis -- Preparing variant annotation data for visualization or reporting -- Working with multi-allelic variants where annotations correspond to specific alleles

-

Output Format

-

The output is a tab-separated (TSV) file with the following columns:

-
CHROM  POS  ID  REF  ALT  <ANNOTATION1>  <ANNOTATION2>  ...
-
-

Where: -- The first five columns are standard VCF fields (chromosome, position, ID, reference allele, alternate allele) -- Each subsequent column contains the value of a requested annotation field -- Missing values are represented by "NA" -- Multi-allelic variants are split into multiple rows, one for each ALT allele -- Per-allele annotations (like ANN) are properly aligned with their corresponding ALT allele

-

Examples

-

Basic Usage - Extract Gene Annotations

-
./VCFX_annotation_extractor --annotation-extract "Gene" < input.vcf > genes.tsv
-
-

Extract Multiple Annotation Fields

-
./VCFX_annotation_extractor --annotation-extract "ANN,Gene,Impact,DP" < input.vcf > annotations.tsv
-
-

Process and Filter in a Pipeline

-
# Extract annotations from only PASS variants
-grep -e "^#" -e "PASS" input.vcf | ./VCFX_annotation_extractor --annotation-extract "ANN,Gene,Impact" > pass_annotations.tsv
-
-

Analyze Impact Distribution

-
# Extract impact annotations and count occurrences
-./VCFX_annotation_extractor --annotation-extract "Impact" < input.vcf | tail -n +2 | cut -f6 | sort | uniq -c
-
-

Multi-allelic Variant Handling

-

The tool handles multi-allelic variants specially:

-
    -
  1. Each ALT allele in a multi-allelic variant gets its own row in the output
  2. -
  3. For Number=A annotations (like ANN) that have multiple comma-separated values, each value is aligned with the corresponding ALT allele
  4. -
  5. For single-value annotations (like Gene, Impact), the same value is used for all ALT alleles of a variant
  6. -
  7. If there are more ALT alleles than annotation values, "NA" is used for the excess ALT alleles
  8. -
-

Example

-

For a variant line with ALT=T,G,C and ANN=missense,stop_gained,intergenic: -- Three rows will be generated in the output (one for each ALT) -- The annotations will be properly aligned: Tโ†’missense, Gโ†’stop_gained, Cโ†’intergenic

-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Missing annotations: If a requested annotation is not found, "NA" is output
  2. -
  3. Malformed VCF lines: Lines with fewer than 8 columns are skipped with a warning
  4. -
  5. Empty annotations: Empty annotation values are preserved and not replaced with "NA"
  6. -
  7. Multi-value annotations: Currently, only ANN field is treated as multi-value and split by commas
  8. -
  9. Header parsing: The tool checks for proper VCF headers before processing data
  10. -
  11. Empty input: The tool correctly handles empty input files, producing only the header line
  12. -
  13. Invalid characters: The tool preserves all characters in annotation values, including special characters
  14. -
-

Performance

-

VCFX_annotation_extractor is designed for efficiency:

-
    -
  1. Single-pass processing reads the VCF file line-by-line without loading the entire file into memory
  2. -
  3. Efficient string parsing with optimized splitting functions
  4. -
  5. Uses hash maps for quick annotation lookups
  6. -
  7. Memory usage scales with the size of individual variant lines rather than the whole file
  8. -
  9. Output is streamed directly without intermediate storage
  10. -
-

Limitations

-
    -
  1. Currently, only the ANN field is recognized as a per-allele (Number=A) field that needs to be split; other Number=A fields are not automatically detected
  2. -
  3. No VCF header parsing to automatically determine which fields are Number=A vs. Number=1
  4. -
  5. Cannot extract FORMAT fields or sample-specific information
  6. -
  7. The output does not include QUAL or FILTER columns from the input VCF
  8. -
  9. No wildcard or regex support for selecting annotation fields
  10. -
  11. Annotation fields with embedded tab or newline characters may cause issues in the output format
  12. -
  13. Limited error recovery for malformed INFO fields
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_compressor/index.html b/site/VCFX_compressor/index.html deleted file mode 100644 index 7948d5b1..00000000 --- a/site/VCFX_compressor/index.html +++ /dev/null @@ -1,2807 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_compressor - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_compressor

-

Overview

-

VCFX_compressor provides simple compression and decompression functionality for VCF files using the zlib library. It allows users to compress VCF files for storage or transfer, and decompress them for analysis.

-

Usage

-
VCFX_compressor [OPTIONS] < input_file > output_file
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-c, --compressCompress the input VCF file (read from stdin, write to stdout)
-d, --decompressDecompress the input VCF.gz file (read from stdin, write to stdout)
-h, --helpDisplay help message and exit
-

Description

-

VCFX_compressor is a straightforward utility that enables compression and decompression of VCF files using zlib's DEFLATE algorithm. The tool:

-
    -
  1. Reads input data from standard input (stdin)
  2. -
  3. Processes the data in memory-efficient chunks
  4. -
  5. Applies compression or decompression based on the specified mode
  6. -
  7. Writes the processed data to standard output (stdout)
  8. -
-

The compression mode produces output compatible with gzip, while the decompression mode can handle standard gzip-compressed files. This makes the tool interoperable with widely used genomics software that expects gzip-compressed VCF files.

-

Output Format

-

The output format depends on the chosen mode:

-
    -
  • Compression mode: Produces a gzip-compatible compressed binary file
  • -
  • Decompression mode: Produces a plain text VCF file
  • -
-

Examples

-

Compressing a VCF File

-
# Basic compression
-./VCFX_compressor --compress < input.vcf > output.vcf.gz
-
-# Compress and view file size reduction
-./VCFX_compressor --compress < input.vcf > output.vcf.gz
-echo "Original size: $(wc -c < input.vcf) bytes"
-echo "Compressed size: $(wc -c < output.vcf.gz) bytes"
-
-

Decompressing a VCF File

-
# Basic decompression
-./VCFX_compressor --decompress < input.vcf.gz > output.vcf
-
-# Decompress for analysis
-./VCFX_compressor --decompress < input.vcf.gz | head -n 20
-
-

In a Pipeline

-
# Filter a VCF file, compress it, then decompress for viewing
-cat input.vcf | grep -v "^#" | grep "PASS" | ./VCFX_compressor --compress > filtered.vcf.gz
-./VCFX_compressor --decompress < filtered.vcf.gz | head
-
-

Data Processing

-

VCFX_compressor processes data in chunks to maintain memory efficiency. The default chunk size is 16KB, which provides a good balance between memory usage and processing efficiency. The tool:

-
    -
  1. Reads input data in 16KB chunks
  2. -
  3. Processes each chunk using zlib's compression/decompression functions
  4. -
  5. Writes processed data to output immediately as each chunk is completed
  6. -
  7. Continues until all input data has been processed
  8. -
-

This streaming approach allows the tool to handle files of any size without loading the entire file into memory.

-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Empty files: Properly handles empty input, producing valid empty output
  2. -
  3. Truncated inputs: When decompressing, detects and warns about truncated or incomplete compressed data
  4. -
  5. Invalid compressed data: Reports errors when attempting to decompress invalid or corrupted data
  6. -
  7. I/O errors: Provides error messages for issues with reading input or writing output
  8. -
  9. Incorrect usage: Enforces mutually exclusive selection of compression or decompression mode
  10. -
-

Performance

-

VCFX_compressor is designed for efficiency:

-
    -
  1. Processes data in chunks, maintaining a low and consistent memory footprint
  2. -
  3. Uses zlib's optimized compression/decompression algorithms
  4. -
  5. Avoids unnecessary memory copying or buffering of the entire file
  6. -
  7. Provides reasonable compression ratios typical of gzip compression
  8. -
  9. Handles large files efficiently due to its streaming architecture
  10. -
-

Limitations

-
    -
  1. Not BGZF compatible: Does not produce block-gzipped format required for indexed access via tabix
  2. -
  3. No compression level control: Uses zlib's default compression level with no user-configurable options
  4. -
  5. Single-threaded: Does not utilize multi-threading for potentially faster processing
  6. -
  7. No integrity verification: Does not verify the integrity of decompressed data
  8. -
  9. Limited format support: Only handles gzip compression, not other formats like bzip2 or xz
  10. -
  11. No indexing support: Does not maintain or generate indices for compressed files
  12. -
  13. Standard I/O only: Cannot directly specify input and output filenames (uses stdin/stdout)
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_concordance_checker/index.html b/site/VCFX_concordance_checker/index.html deleted file mode 100644 index 46a88efa..00000000 --- a/site/VCFX_concordance_checker/index.html +++ /dev/null @@ -1,3044 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_concordance_checker - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_concordance_checker

-

Overview

-

VCFX_concordance_checker compares genotypes between two specified samples within a VCF file to determine concordance (agreement) or discordance (disagreement) for each variant. This tool is useful for comparing genotype calls between different samples, such as technical replicates or related individuals.

-

Usage

-
VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" < input.vcf > concordance_report.tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-s, --samples "SAMPLE1 SAMPLE2"Required. Names of the two samples to compare, separated by a space
-h, --helpDisplay help message and exit
-

Description

-

VCFX_concordance_checker analyzes a VCF file and compares the genotypes of two specified samples for each variant. The tool:

-
    -
  1. Normalizes genotypes by:
  2. -
  3. Converting phased genotypes (|) to unphased (/)
  4. -
  5. Sorting alleles numerically (e.g., 1/0 becomes 0/1)
  6. -
  7. Validating against available alternate alleles
  8. -
  9. Compares the normalized genotypes between the two samples
  10. -
  11. Classifies each variant as:
  12. -
  13. Concordant: Both samples have identical normalized genotypes
  14. -
  15. Discordant: Samples have different normalized genotypes
  16. -
  17. Outputs detailed per-variant results to standard output
  18. -
  19. Provides a summary of concordance statistics to standard error
  20. -
-

This tool is particularly useful for: -- Quality control of technical replicates -- Comparing genotype calls between related samples -- Validating sample identity -- Assessing reproducibility of variant calling pipelines

-

Output Format

-

The tool produces a tab-separated values (TSV) file with the following columns:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ColumnDescription
CHROMChromosome of the variant
POSPosition of the variant
IDVariant identifier
REFReference allele
ALTAlternate allele(s)
SAMPLE1_GTNormalized genotype of the first sample
SAMPLE2_GTNormalized genotype of the second sample
ConcordanceStatus: Concordant or Discordant
-

Additionally, a summary of statistics is printed to standard error, including: -- Total number of variants compared -- Number of concordant genotypes -- Number of discordant genotypes

-

Examples

-

Basic Usage

-

Check concordance between two samples in a VCF file: -

VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" < input.vcf > concordance_report.tsv
-

-

Filtering for Discordant Variants

-

Identify only variants with discordant genotypes: -

VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" < input.vcf | grep "Discordant" > discordant_variants.tsv
-

-

Calculating Concordance Rate

-

Count concordant variants and calculate rate: -

VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" < input.vcf > report.tsv 2> stats.txt
-grep -c "Concordant" report.tsv > concordant_count.txt
-

-

Integration with Other Tools

-

Use as part of a larger analysis pipeline: -

cat input.vcf | VCFX_record_filter --filter "QUAL>30" | VCFX_concordance_checker --samples "SAMPLE1 SAMPLE2" > high_quality_concordance.tsv
-

-

Genotype Normalization

-

Process

-

For each sample's genotype, the tool performs the following normalization steps: -1. Extracts the genotype field (GT) -2. Converts all phase separators (|) to unphased separators (/) -3. Splits the genotype into individual allele indices -4. Validates each allele (skips if missing or invalid) -5. Sorts allele indices in ascending order (e.g., 1/0 โ†’ 0/1) -6. Rejoins the allele indices with / separators

-

Example Normalizations

-
    -
  • 0|1 โ†’ 0/1
  • -
  • 1/0 โ†’ 0/1
  • -
  • 1|2 โ†’ 1/2
  • -
  • 2/1 โ†’ 1/2
  • -
  • ./1 โ†’ (skipped as invalid)
  • -
  • 0/0 โ†’ 0/0 (unchanged)
  • -
-

Handling Special Cases

-

Missing Genotypes

-
    -
  • Genotypes with missing values (./., .) are excluded from comparison
  • -
  • Variants where either sample has missing genotypes are skipped
  • -
-

Multi-allelic Variants

-
    -
  • Alt alleles are parsed from the ALT column (comma-separated)
  • -
  • Genotype allele indices are validated against the number of alt alleles
  • -
  • For multi-allelic variants, the tool correctly compares numerically sorted genotypes
  • -
-

Phased Genotypes

-
    -
  • Phasing information is ignored for concordance calculation
  • -
  • Genotypes that differ only in phasing are considered concordant (e.g., 0|1 and 0/1)
  • -
-

Invalid Genotypes

-
    -
  • Genotypes with non-numeric allele indices are skipped
  • -
  • Allele indices that exceed the number of alternate alleles are treated as invalid
  • -
-

Malformed VCF Lines

-
    -
  • Lines with insufficient columns are skipped
  • -
  • Lines encountered before the #CHROM header cause an error
  • -
  • VCF files without both specified samples cause an error and program termination
  • -
-

Performance Considerations

-
    -
  • Processes the VCF file line by line, requiring minimal memory
  • -
  • No preprocessing or indexing of the VCF file is required
  • -
  • Linear time complexity with respect to file size
  • -
  • Provides a quick summary of concordance statistics for rapid quality assessment
  • -
-

Limitations

-
    -
  • Limited to exactly two samples for comparison
  • -
  • Only processes diploid genotypes (e.g., 0/1, not haploid 0 or polyploid genotypes)
  • -
  • Ignores all FORMAT fields except for GT (genotype)
  • -
  • No consideration of genotype quality or other metrics in concordance assessment
  • -
  • Cannot generate detailed statistics on concordance by variant type
  • -
  • Does not allow customizing concordance criteria beyond exact genotype match
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_cross_sample_concordance/index.html b/site/VCFX_cross_sample_concordance/index.html deleted file mode 100644 index cd9786cc..00000000 --- a/site/VCFX_cross_sample_concordance/index.html +++ /dev/null @@ -1,3018 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_cross_sample_concordance - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_cross_sample_concordance

-

Overview

-

VCFX_cross_sample_concordance analyzes a multi-sample VCF file to determine if genotypes are consistent across all samples for each variant. It identifies variants where samples show concordance (agreement) or discordance (disagreement) in their genotype calls.

-

Usage

-
VCFX_cross_sample_concordance [OPTIONS] < input.vcf > concordance_results.tsv
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_cross_sample_concordance examines each variant in a multi-sample VCF file and determines if all samples with valid genotypes have the same normalized genotype. The tool:

-
    -
  1. Normalizes genotypes across samples by:
  2. -
  3. Converting phased genotypes (|) to unphased (/)
  4. -
  5. Handling multi-allelic variants properly
  6. -
  7. Sorting alleles numerically for consistent comparison (e.g., 1/0 becomes 0/1)
  8. -
  9. Compares the normalized genotypes across all samples for each variant
  10. -
  11. Classifies each variant as:
  12. -
  13. CONCORDANT: All samples with valid genotypes have the same normalized genotype
  14. -
  15. DISCORDANT: Samples have different normalized genotypes
  16. -
  17. NO_GENOTYPES: No samples have valid genotypes for this variant
  18. -
  19. Outputs detailed per-variant results to standard output
  20. -
  21. Provides a summary of concordance statistics to standard error
  22. -
-

This tool is particularly useful for: -- Quality control of multi-sample VCF files -- Identifying potential sample mix-ups or contamination -- Validating genotype calling consistency across technical replicates -- Assessing reliability of variant calls across different sequencing or analysis methods

-

Output Format

-

The tool produces a tab-separated values (TSV) file with the following columns:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ColumnDescription
CHROMChromosome of the variant
POSPosition of the variant
IDVariant identifier
REFReference allele
ALTAlternate allele(s)
Num_SamplesNumber of samples with valid genotypes
Unique_Normalized_GenotypesNumber of distinct normalized genotypes observed
Concordance_StatusStatus: CONCORDANT, DISCORDANT, or NO_GENOTYPES
-

Additionally, a summary of statistics is printed to standard error, including: -- Total number of variants processed -- Number of concordant variants -- Number of discordant variants -- Number of variants with no valid genotypes

-

Examples

-

Basic Usage

-

Check concordance in a multi-sample VCF file: -

VCFX_cross_sample_concordance < input.vcf > concordance_results.tsv
-

-

Filtering for Discordant Variants

-

Identify only variants with discordant genotypes: -

VCFX_cross_sample_concordance < input.vcf | grep "DISCORDANT" > discordant_variants.tsv
-

-

Concordance Analysis in a Pipeline

-

Use as part of a larger analysis pipeline: -

cat input.vcf | VCFX_cross_sample_concordance | awk -F'\t' '{if($8=="CONCORDANT") print $0}' > consistent_variants.tsv
-

-

Saving Both Results and Summary

-

Capture both the detailed results and summary statistics: -

VCFX_cross_sample_concordance < input.vcf > concordance_results.tsv 2> concordance_summary.txt
-

-

Genotype Normalization

-

Process

-

For each sample's genotype, the tool performs the following normalization steps: -1. Extracts the genotype field (first field in the FORMAT column) -2. Converts all phase separators (|) to unphased separators (/) -3. Splits the genotype into individual allele indices -4. Validates each allele (skips if missing or invalid) -5. Sorts allele indices in ascending order (e.g., 1/0 โ†’ 0/1) -6. Rejoins the allele indices with / separators

-

Example Normalizations

-
    -
  • 0|1 โ†’ 0/1
  • -
  • 1/0 โ†’ 0/1
  • -
  • 1|2 โ†’ 1/2
  • -
  • 2/1 โ†’ 1/2
  • -
  • ./1 โ†’ (skipped as invalid)
  • -
  • 0/0 โ†’ 0/0 (unchanged)
  • -
-

Handling Special Cases

-

Missing Genotypes

-
    -
  • Genotypes with missing values (./., .) are excluded from concordance calculation
  • -
  • If no samples have valid genotypes for a variant, it is classified as NO_GENOTYPES
  • -
-

Multi-allelic Variants

-
    -
  • Alt alleles are parsed from the ALT column (comma-separated)
  • -
  • Genotype allele indices are validated against the number of alt alleles
  • -
  • Invalid indices (exceeding the number of alt alleles) are treated as missing
  • -
-

Phased Genotypes

-
    -
  • Phasing information is ignored for concordance calculation
  • -
  • Genotypes that differ only in phasing are considered concordant
  • -
-

Malformed VCF Lines

-
    -
  • Lines with insufficient columns are skipped
  • -
  • Lines encountered before the #CHROM header are skipped with a warning
  • -
  • Genotypes that cannot be parsed are treated as missing
  • -
-

Performance Considerations

-
    -
  • Processes the VCF file line by line, requiring minimal memory
  • -
  • Time complexity scales linearly with file size and number of samples
  • -
  • Efficient handling of large multi-sample VCF files with normalized genotype comparisons
  • -
  • No file indexing or preprocessing required
  • -
-

Limitations

-
    -
  • Only handles diploid genotypes (e.g., 0/1, not haploid 0 or polyploid genotypes)
  • -
  • Ignores genotype phasing information in concordance assessment
  • -
  • Does not consider genotype quality or other FORMAT fields in the assessment
  • -
  • Cannot specify a subset of samples to check (uses all samples in the VCF)
  • -
  • No option to adjust concordance thresholds (e.g., requiring 90% sample agreement)
  • -
  • Cannot output detailed per-sample information for discordant variants
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_custom_annotator/index.html b/site/VCFX_custom_annotator/index.html deleted file mode 100644 index 06b86c03..00000000 --- a/site/VCFX_custom_annotator/index.html +++ /dev/null @@ -1,2837 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_custom_annotator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_custom_annotator

-

Overview

-

VCFX_custom_annotator adds custom annotations to VCF files by matching variants from a user-provided annotation file and inserting them into the INFO field. This tool is particularly useful for incorporating external annotations, functional predictions, or custom labels into your VCF files.

-

Usage

-
VCFX_custom_annotator --add-annotation <annotations.txt> [OPTIONS] < input.vcf > annotated.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-a, --add-annotation <file>Required. Path to the annotation file containing the custom annotations
-h, --helpDisplay help message and exit
-

Description

-

VCFX_custom_annotator is designed to enhance VCF files with custom annotations from an external source. The tool:

-
    -
  1. Reads an annotation file where each line contains: CHROM POS REF ALT annotation_value
  2. -
  3. Creates a lookup map using the variant coordinates and alleles as keys
  4. -
  5. Processes the input VCF file line by line
  6. -
  7. For each variant, generates a key from its coordinates and alleles
  8. -
  9. Looks up any matching annotations
  10. -
  11. Adds the annotation to the INFO field as CustomAnnotation=value
  12. -
  13. For multi-allelic variants, handles each alternate allele separately
  14. -
  15. Outputs the annotated VCF to standard output
  16. -
-

This streamlined process allows for efficient annotation of VCF files with minimal computational overhead.

-

Annotation File Format

-

The annotation file should contain tab-separated fields with the following columns:

-
CHROM POS REF ALT annotation_value
-
-

Example: -

1  100  A  G  HighImpact
-1  200  T  C  ModerateImpact
-2  300  G  A  LowImpact
-

-
    -
  • CHROM: Chromosome name (must match the VCF)
  • -
  • POS: Position (1-based, must match the VCF)
  • -
  • REF: Reference allele (must match the VCF)
  • -
  • ALT: Alternate allele
  • -
  • annotation_value: The annotation text to add (can include spaces after the first 4 fields)
  • -
-

Output Format

-

The output is a valid VCF file that includes:

-
    -
  1. All original header lines
  2. -
  3. A new INFO field definition: ##INFO=<ID=CustomAnnotation,Number=.,Type=String,Description="Custom annotations added by VCFX_custom_annotator (multi-allelic)">
  4. -
  5. All original variant lines with the added CustomAnnotation=value in the INFO column
  6. -
  7. For multi-allelic variants, comma-separated annotation values corresponding to each ALT allele
  8. -
-

For variants without a matching annotation, the value "NA" is used.

-

Examples

-

Basic Usage

-
# Annotate a VCF file with functional impact predictions
-./VCFX_custom_annotator --add-annotation impact_predictions.txt < input.vcf > annotated.vcf
-
-

Viewing Annotated Results

-
# Annotate and view the first few variants
-./VCFX_custom_annotator --add-annotation annotations.txt < input.vcf | head -n 20
-
-

Filtering Based on Annotations

-
# Annotate variants and filter to keep only those with "HighImpact"
-./VCFX_custom_annotator --add-annotation annotations.txt < input.vcf | grep "CustomAnnotation=HighImpact" > high_impact_variants.vcf
-
-

Multi-allelic Variant Handling

-

VCFX_custom_annotator properly handles multi-allelic variants by:

-
    -
  1. Parsing and splitting the ALT field in the VCF on commas
  2. -
  3. Looking up annotations for each REFโ†’ALT pair separately
  4. -
  5. Combining the annotations into a comma-separated list in the same order as the ALT alleles
  6. -
  7. Using "NA" as a placeholder when no annotation is found for a specific allele
  8. -
-

For example, if a variant has ALT=G,C,T and annotations exist for the G and T alleles but not C, the result will be CustomAnnotation=annotation_G,NA,annotation_T.

-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Missing annotations: Uses "NA" when no annotation is found for a variant
  2. -
  3. Empty annotation file: Results in "NA" for all variants
  4. -
  5. Malformed annotation lines: Invalid lines in the annotation file are skipped with a warning
  6. -
  7. Missing annotation file: Reports an error if the annotation file cannot be opened
  8. -
  9. Empty INFO field: If the original INFO field is "." (missing), it's replaced with the new annotation
  10. -
  11. Existing INFO content: If the INFO field has existing content, annotations are appended
  12. -
  13. Multi-allelic variants: Each allele is handled separately with proper ordering
  14. -
-

Performance

-

VCFX_custom_annotator is designed for efficiency:

-
    -
  1. Annotation file is loaded into memory as a hash map for O(1) lookups
  2. -
  3. VCF file is processed in a streaming fashion, with minimal memory overhead
  4. -
  5. Line-by-line processing allows handling of arbitrarily large VCF files
  6. -
  7. String operations are optimized to minimize unnecessary copies
  8. -
  9. Capable of processing thousands of variants per second on typical hardware
  10. -
-

Limitations

-
    -
  1. Requires exact matching of CHROM, POS, REF, and ALT fields
  2. -
  3. Cannot perform fuzzy matching or coordinate-based lookups
  4. -
  5. All annotations are stored in memory, which may be a limitation for extremely large annotation files
  6. -
  7. Limited to a single annotation per variant (although annotations can contain multiple pieces of information)
  8. -
  9. No built-in option to modify the name of the added INFO field from the default "CustomAnnotation"
  10. -
  11. No support for annotating based on overlapping regions rather than exact positions
  12. -
  13. Annotations with internal commas may cause parsing issues in multi-allelic contexts
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_diff_tool/index.html b/site/VCFX_diff_tool/index.html deleted file mode 100644 index 69042614..00000000 --- a/site/VCFX_diff_tool/index.html +++ /dev/null @@ -1,2776 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_diff_tool - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_diff_tool

-

Overview

-

VCFX_diff_tool compares two VCF files and identifies variants that are unique to each file, providing a simple way to detect differences between variant sets.

-

Usage

-
VCFX_diff_tool --file1 <file1.vcf> --file2 <file2.vcf>
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-a, --file1 <FILE>Required. Path to the first VCF file
-b, --file2 <FILE>Required. Path to the second VCF file
-h, --helpDisplay help message and exit
-

Description

-

VCFX_diff_tool analyzes two VCF files, compares their variant content, and identifies differences between them. The tool:

-
    -
  1. Loads variants from both VCF files, ignoring header lines
  2. -
  3. Creates a normalized key for each variant based on chromosome, position, reference allele, and sorted alternate alleles
  4. -
  5. Identifies variants that are unique to each file by comparing these keys
  6. -
  7. Reports the differences in a readable format
  8. -
-

This tool is particularly useful for: -- Validating VCF file transformations -- Checking tool outputs against expected results -- Comparing variant calls between different callers or pipelines -- Verifying that VCF manipulations haven't inadvertently altered variant content

-

Output Format

-

The output consists of two sections:

-
Variants unique to file1.vcf:
-chrom:pos:ref:alt
-chrom:pos:ref:alt
-...
-
-Variants unique to file2.vcf:
-chrom:pos:ref:alt
-chrom:pos:ref:alt
-...
-
-

Where each variant is represented as a colon-separated string with chromosome, position, reference allele, and sorted alternate alleles.

-

Examples

-

Basic Usage

-
./VCFX_diff_tool --file1 original.vcf --file2 modified.vcf
-
-

Comparing Variant Caller Outputs

-
./VCFX_diff_tool --file1 caller1_output.vcf --file2 caller2_output.vcf > caller_differences.txt
-
-

Validate Processing Results

-
# Check that filtering didn't remove variants it shouldn't have
-./VCFX_diff_tool --file1 expected_filtered.vcf --file2 actual_filtered.vcf
-
-

Handling Special Cases

-
    -
  • Multi-allelic variants: Alternate alleles are sorted alphabetically to ensure consistent comparison even if the order differs between files (e.g., "A,G" and "G,A" are treated as identical)
  • -
  • Header differences: Header lines (starting with #) are ignored, so differences in metadata don't affect the comparison
  • -
  • Malformed VCF lines: Invalid lines are skipped with a warning
  • -
  • Empty files: Properly handled; will show all variants from the non-empty file as unique
  • -
  • Missing files: Reports an error if either file cannot be opened
  • -
  • Large files: Efficiently processes files with thousands of variants using hash-based comparison
  • -
-

Performance

-

The tool is optimized for efficiency: -- Uses hash sets for O(1) lookups when comparing variants -- Single-pass processing of each input file -- Memory usage scales with the number of unique variants in both files -- Can handle large VCF files with minimal overhead

-

Limitations

-
    -
  • Compares only chromosome, position, reference, and alternate alleles; ignores other fields like quality, filter, and INFO
  • -
  • Cannot detect differences in sample genotypes
  • -
  • No support for partial matches or fuzzy comparisons (e.g., variants that differ only in quality)
  • -
  • Not designed to handle VCF files with extremely large numbers of variants (hundreds of millions)
  • -
  • Doesn't consider changes in INFO or FORMAT fields as differences
  • -
  • Cannot compare complex structural variants represented in different ways
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_distance_calculator/index.html b/site/VCFX_distance_calculator/index.html deleted file mode 100644 index d624afa7..00000000 --- a/site/VCFX_distance_calculator/index.html +++ /dev/null @@ -1,2821 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_distance_calculator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_distance_calculator

-

Overview

-

VCFX_distance_calculator analyzes a VCF file and calculates the distance (in base pairs) between consecutive variants along each chromosome, providing insights into variant density and spacing across the genome.

-

Usage

-
VCFX_distance_calculator [OPTIONS] < input.vcf > variant_distances.tsv
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_distance_calculator processes a VCF file to measure the distance between variants on the same chromosome. The tool:

-
    -
  1. Reads a VCF file line-by-line
  2. -
  3. Extracts chromosome (CHROM) and position (POS) information from each valid variant
  4. -
  5. For each chromosome, tracks the position of the previous variant
  6. -
  7. Calculates the distance from the previous variant to the current one
  8. -
  9. Outputs a tab-delimited file with the results
  10. -
  11. Provides summary statistics to stderr, including minimum, maximum, and average distances per chromosome
  12. -
-

This tool is useful for: -- Analyzing variant density across the genome -- Identifying regions with unusually sparse or dense variant coverage -- Quality control to detect potential issues with variant calling -- Understanding the distribution of variants in targeted sequencing

-

Output Format

-

The output is a tab-delimited text file with the following columns:

-
CHROM  POS  PREV_POS  DISTANCE
-
-

Where: -- CHROM: The chromosome name -- POS: The position of the current variant -- PREV_POS: The position of the previous variant on the same chromosome (or "NA" for the first variant) -- DISTANCE: The distance in base pairs between current and previous positions (or "NA" for the first variant)

-

Examples

-

Basic Usage

-
./VCFX_distance_calculator < input.vcf > variant_distances.tsv
-
-

Analyzing Specific Chromosomes

-
# Extract only chromosome 1 data
-grep -P "^chr1\t|^CHROM" variant_distances.tsv > chr1_distances.tsv
-
-

Identifying Large Gaps

-
# Find regions with large gaps (>100,000 bp)
-./VCFX_distance_calculator < input.vcf | awk -F'\t' '$4 > 100000 {print}' > large_gaps.tsv
-
-

Visualizing Distance Distribution

-
# Process output for visualization (e.g., with R or Python)
-./VCFX_distance_calculator < input.vcf | \
-  grep -v "NA" | cut -f1,4 > distances_for_plotting.tsv
-
-

Summary Statistics

-

In addition to the main output file, VCFX_distance_calculator prints summary statistics to stderr:

-
=== Summary Statistics ===
-Chromosome: chr1
-  Variants compared: 501
-  Distances computed: 500
-  Total distance: 10000000
-  Min distance: 1
-  Max distance: 150000
-  Average distance: 20000
-
-

This provides a quick overview of variant distribution patterns for each chromosome.

-

Handling Special Cases

-
    -
  • First variant on a chromosome: Marked with "NA" for PREV_POS and DISTANCE
  • -
  • Unsorted VCF files: Processes variants in the order they appear, which may result in negative distances
  • -
  • Duplicate positions: Correctly calculates a distance of 0 between variants at the same position
  • -
  • Malformed lines: Warns about and skips lines that don't follow VCF format
  • -
  • Missing header: Requires a proper VCF header (#CHROM line) before processing variant records
  • -
  • Invalid chromosome names: Skips variants with obviously invalid chromosome names
  • -
  • Non-numeric positions: Skips variants where the position cannot be parsed as an integer
  • -
-

Performance

-

The tool is optimized for efficiency: -- Processes VCF files line-by-line with minimal memory overhead -- Uses hash maps for O(1) lookups of previous positions -- Can handle very large VCF files (tested with millions of variants) -- Memory usage scales with the number of distinct chromosomes, not with file size

-

Limitations

-
    -
  • Does not account for chromosome lengths (cannot detect missing regions)
  • -
  • Does not distinguish between different types of variants
  • -
  • Assumes variants are properly formatted according to VCF specifications
  • -
  • No built-in filtering for quality or other variant attributes
  • -
  • Distances are calculated based on the reference genome coordinates, not actual sequence lengths
  • -
  • Does not handle structural variants in any special way (uses only the position field)
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_dosage_calculator/index.html b/site/VCFX_dosage_calculator/index.html deleted file mode 100644 index 09f160cb..00000000 --- a/site/VCFX_dosage_calculator/index.html +++ /dev/null @@ -1,2765 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_dosage_calculator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
- -
-
- - - -
-
- - - - - - - - -

VCFX_dosage_calculator

-

Overview

-

VCFX_dosage_calculator computes the genetic dosage (count of alternate alleles) for each sample at each variant position in a VCF file, outputting a tab-delimited summary in a convenient format for downstream analysis.

-

Usage

-
VCFX_dosage_calculator [OPTIONS] < input.vcf > dosage_output.txt
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_dosage_calculator reads a VCF file from standard input and calculates the genetic dosage for each sample at each variant position. Dosage is defined as the number of alternate alleles in a genotype, regardless of the specific alternate allele identifier.

-

The dosage calculation follows these rules: -- Reference allele (0): Counts as 0 -- Any alternate allele (1, 2, 3, etc.): Counts as 1 -- Missing or invalid alleles: Reported as "NA"

-

For diploid genotypes, this results in: -- 0/0 โ†’ dosage 0 (no alternate alleles) -- 0/1 โ†’ dosage 1 (one alternate allele) -- 1/1 โ†’ dosage 2 (two alternate alleles) -- 1/2 โ†’ dosage 2 (two alternate alleles, even though they're different alternates) -- ./. โ†’ dosage NA (missing genotype)

-

The tool is useful for: -- Preparing genetic data for association tests -- Quantifying genetic burden -- Converting genotypes to a simple numeric format for statistical analyses -- Creating matrix-like representations of genetic data

-

Output Format

-

The output is a tab-delimited text file with the following columns:

-
CHROM  POS  ID  REF  ALT  Dosages
-
-

Where Dosages is a comma-separated list of dosage values for each sample, in the same order as they appear in the VCF. Values can be 0, 1, 2 (for diploid organisms), or NA for missing/invalid data.

-

Examples

-

Basic Usage

-
./VCFX_dosage_calculator < input.vcf > dosage_results.txt
-
-

Viewing Results

-
# Show the first few lines of results with column headers
-head -n 5 dosage_results.txt | column -t
-
-

Integration with Other Tools

-
# Calculate dosage and use for statistical analysis
-./VCFX_dosage_calculator < input.vcf | \
-  awk -F'\t' '{split($6,d,","); sum=0; count=0; for(i in d) if(d[i]!="NA") {sum+=d[i]; count++} if(count>0) print $1,$2,$3,sum/count}' > avg_dosage.txt
-
-

Handling Special Cases

-
    -
  • Phased genotypes: Phasing information is ignored; "0|1" and "0/1" both result in dosage 1
  • -
  • Missing genotypes: Genotypes represented as "./.", ".|.", or "." are reported as NA
  • -
  • Multi-allelic variants: All non-reference alleles count equally:
  • -
  • 1/2 โ†’ dosage 2
  • -
  • 0/3 โ†’ dosage 1
  • -
  • 2/3 โ†’ dosage 2
  • -
  • Malformed genotypes: Any genotype that doesn't follow the expected format (e.g., "0/X", "ABC") is reported as NA
  • -
  • Non-diploid organisms: The tool assumes diploid genotypes; other ploidy levels may produce unexpected results
  • -
  • No GT field: If the FORMAT column doesn't include GT, a warning is issued and NA is reported for all samples
  • -
  • Missing header: If the #CHROM header line isn't found, processing stops with an error
  • -
-

Performance

-

The tool processes VCF files line by line with minimal memory requirements. Performance is primarily dependent on the number of samples in the VCF file, as each sample's genotype must be processed for every variant.

-

Limitations

-
    -
  • Designed primarily for diploid organisms; may not be suitable for polyploid data
  • -
  • Cannot handle probabilities or fractional dosages (e.g., from imputed data)
  • -
  • Treats all alternate alleles equally; cannot distinguish between different alternate alleles
  • -
  • No special handling for sex chromosomes (X, Y) which may have different ploidy in males and females
  • -
  • No filtering options within the tool (use other VCFX tools for pre-filtering)
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_duplicate_remover/index.html b/site/VCFX_duplicate_remover/index.html deleted file mode 100644 index d6449f5f..00000000 --- a/site/VCFX_duplicate_remover/index.html +++ /dev/null @@ -1,2805 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_duplicate_remover - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
- -
-
- - - -
-
- - - - - - - - -

VCFX_duplicate_remover

-

Overview

-

VCFX_duplicate_remover identifies and removes duplicate variant records from a VCF file based on their essential coordinates (chromosome, position, reference allele, and alternate allele), ensuring that each unique variant is represented only once in the output.

-

Usage

-
VCFX_duplicate_remover [OPTIONS] < input.vcf > deduplicated.vcf
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_duplicate_remover processes a VCF file to detect and remove duplicate variant records. The tool:

-
    -
  1. Reads a VCF file from standard input line by line
  2. -
  3. Passes header lines (beginning with '#') through unchanged
  4. -
  5. For each data line, creates a normalized variant key consisting of:
  6. -
  7. Chromosome name (CHROM)
  8. -
  9. Position (POS)
  10. -
  11. Reference allele (REF)
  12. -
  13. Normalized alternate alleles (ALT)
  14. -
  15. Normalizes multi-allelic variants by sorting the comma-separated ALT alleles alphabetically
  16. -
  17. Tracks seen variants using a hash-based data structure
  18. -
  19. Outputs only the first occurrence of each unique variant, discarding subsequent duplicates
  20. -
  21. Writes the deduplicated VCF to standard output
  22. -
-

This tool is particularly useful for: -- Merging VCF files that may contain overlapping variants -- Cleaning up datasets that have inadvertently duplicated records -- Ensuring downstream analysis tools don't process the same variant multiple times

-

Output Format

-

The output is a valid VCF file with the same format as the input, but with duplicate variant records removed. The first occurrence of each unique variant is preserved, maintaining all original fields (ID, QUAL, FILTER, INFO, FORMAT, samples).

-

Examples

-

Basic Usage

-
# Remove duplicate variants from a VCF file
-./VCFX_duplicate_remover < input.vcf > deduplicated.vcf
-
-

In a Pipeline

-
# Filter a VCF file and then remove duplicates
-./VCFX_record_filter --quality ">20" < input.vcf | \
-./VCFX_duplicate_remover > filtered_unique.vcf
-
-

Checking Results

-
# Count variants before and after deduplication
-echo "Before: $(grep -v '^#' input.vcf | wc -l) variants"
-echo "After: $(grep -v '^#' deduplicated.vcf | wc -l) variants"
-
-

Duplicate Detection

-

The tool determines uniqueness based on four key attributes:

-
    -
  1. Chromosome (CHROM): Exact string match of chromosome name
  2. -
  3. Position (POS): Numerical position on the chromosome
  4. -
  5. Reference Allele (REF): The reference sequence at that position
  6. -
  7. Alternate Alleles (ALT): The alternate alleles, normalized by sorting
  8. -
-

For multi-allelic variants, the ALT field is normalized by: -- Splitting the comma-separated list of alternate alleles -- Sorting the alleles alphabetically -- Re-joining them with commas

-

This normalization ensures that variants with the same alleles but in different order are correctly identified as duplicates. For example, "A,G,T" and "T,A,G" are treated as the same variant after normalization.

-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Multi-allelic variants: Normalizes ALT fields by sorting to handle different orderings
  2. -
  3. Empty lines: Skipped without affecting output
  4. -
  5. Malformed lines: Lines that can't be parsed are skipped with a warning
  6. -
  7. Position parsing errors: If a POS field can't be parsed as an integer, it's set to 0
  8. -
  9. Header lines: All header lines are preserved in the output
  10. -
  11. Empty files: Properly handles empty input files, producing empty output
  12. -
  13. Files with only headers: Header lines are passed through correctly
  14. -
  15. Sample columns: Maintains all sample genotype data in the output
  16. -
-

Performance

-

VCFX_duplicate_remover is designed for efficiency:

-
    -
  1. Single-pass processing with O(n) time complexity where n is the number of variants
  2. -
  3. Uses an optimized hash-based data structure for fast variant lookups
  4. -
  5. Minimal memory overhead, proportional to the number of unique variants
  6. -
  7. Handles large VCF files with millions of variants efficiently
  8. -
  9. String processing optimized for performance with minimal copying
  10. -
-

Limitations

-
    -
  1. Retains the first occurrence of duplicate variants; quality or information in subsequent duplicates is discarded
  2. -
  3. No option to select which duplicate to keep (e.g., the one with highest quality)
  4. -
  5. No facility to annotate output variants with duplicate counts
  6. -
  7. Limited to exact matching; doesn't detect overlapping variants that might represent the same event
  8. -
  9. Doesn't consider INFO fields in uniqueness determination
  10. -
  11. Cannot handle duplicates based on sample-specific criteria
  12. -
  13. No option to only report duplicate variants without removing them
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_fasta_converter/index.html b/site/VCFX_fasta_converter/index.html deleted file mode 100644 index b0a0447e..00000000 --- a/site/VCFX_fasta_converter/index.html +++ /dev/null @@ -1,2845 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_fasta_converter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_fasta_converter

-

Overview

-

VCFX_fasta_converter transforms VCF files into FASTA format, converting variant information into a multiple sequence alignment where each sample's sequence represents its genotypes across all variants.

-

Usage

-
VCFX_fasta_converter [OPTIONS] < input.vcf > output.fasta
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_fasta_converter converts variant information from VCF format into a multiple sequence alignment in FASTA format. The tool:

-
    -
  1. Reads a VCF file with variant data and sample genotypes
  2. -
  3. Creates one FASTA entry for each sample in the VCF
  4. -
  5. Generates one position in the alignment for each variant in the VCF
  6. -
  7. Represents each genotype as a single character:
  8. -
  9. Homozygous genotypes (0/0, 1/1, etc.) are represented by the corresponding base
  10. -
  11. Heterozygous genotypes (0/1, 1/2, etc.) are represented by IUPAC ambiguity codes when possible
  12. -
  13. Complex genotypes (indels, multi-base variants) are represented as 'N'
  14. -
  15. Outputs a FASTA file with one sequence per sample, where each position corresponds to a variant in the VCF
  16. -
-

This tool is useful for: -- Creating alignments for phylogenetic analysis -- Visualizing genetic variation across samples -- Converting VCF data for use with tools that require FASTA format -- Simplifying the representation of genetic variation

-

Output Format

-

The output is a standard FASTA file with one entry per sample:

-
>SAMPLE1
-AGCTYRMKSW
->SAMPLE2
-ATCGYRMNNA
->SAMPLE3
-GACTYRSWNN
-
-

Each position in the sequence corresponds to a variant in the input VCF, with genotypes encoded as follows: -- Homozygous reference (0/0): The reference base (e.g., 'A') -- Homozygous alternate (1/1): The alternate base (e.g., 'G') -- Heterozygous (0/1): IUPAC ambiguity code (e.g., 'R' for A/G) -- Missing genotypes (./.): 'N' -- Complex or unrepresentable genotypes: 'N'

-

Examples

-

Basic Usage

-
./VCFX_fasta_converter < variants.vcf > alignment.fasta
-
-

Viewing the Alignment

-
# Convert to FASTA and view with alignment viewer
-./VCFX_fasta_converter < variants.vcf > alignment.fasta
-aliview alignment.fasta
-
-

Building a Phylogenetic Tree

-
# Create a FASTA alignment from VCF and build a tree
-./VCFX_fasta_converter < variants.vcf > alignment.fasta
-iqtree -s alignment.fasta
-
-

IUPAC Ambiguity Codes

-

The tool uses standard IUPAC nucleotide ambiguity codes to represent heterozygous genotypes:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CodeBasesMeaning
RA/GpuRine
YC/TpYrimidine
MA/CaMino
KG/TKeto
SC/GStrong (3 H-bonds)
WA/TWeak (2 H-bonds)
NAnyaNy base or missing data
-

Handling Special Cases

-
    -
  • Indels and multi-base variants: Represented as 'N' since they can't be unambiguously encoded as a single nucleotide
  • -
  • Multi-allelic sites: Processed using the appropriate IUPAC codes when possible
  • -
  • Phased vs. unphased genotypes: Treated identically (e.g., "0|1" and "0/1" both map to the same IUPAC code)
  • -
  • Missing genotypes: Represented as 'N' in the output sequence
  • -
  • Missing GT field: Variants without a genotype field are skipped
  • -
  • Malformed VCF lines: Skipped with a warning
  • -
  • Invalid nucleotide combinations: Represented as 'N' when no IUPAC code exists
  • -
-

Performance

-

The converter is optimized for efficiency: -- Single-pass processing of the VCF file -- Efficient string handling for sequence construction -- Scales linearly with the number of variants and samples -- Maintains a small memory footprint proportional to the number of samples

-

Limitations

-
    -
  • Cannot represent structural variants, indels, or multi-base substitutions
  • -
  • Loss of information (quality scores, filters, etc.) from the original VCF
  • -
  • No support for non-diploid genotypes
  • -
  • Limited to the standard IUPAC ambiguity codes for representing heterozygosity
  • -
  • Not suitable for variants with complex ALT alleles
  • -
  • No option to include position information in the output
  • -
  • Cannot handle extremely large VCF files due to memory constraints (sequence storage)
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_field_extractor/index.html b/site/VCFX_field_extractor/index.html deleted file mode 100644 index 7aff1d98..00000000 --- a/site/VCFX_field_extractor/index.html +++ /dev/null @@ -1,2993 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_field_extractor - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_field_extractor

-

Overview

-

VCFX_field_extractor is a tool designed to extract and format specific fields from VCF (Variant Call Format) files. It allows users to select and output particular fields from VCF records, including standard fields, INFO subfields, and sample-specific genotype fields in a tabular format.

-

Usage

-
VCFX_field_extractor --fields "FIELD1,FIELD2,..." [OPTIONS] < input.vcf > output.tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-f, --fieldsRequired. Comma-separated list of fields to extract (no spaces between fields)
-h, --helpDisplay help message and exit
-

Description

-

VCFX_field_extractor processes a VCF file and extracts only the specified fields for each variant. The tool:

-
    -
  1. Reads a VCF file from standard input
  2. -
  3. Identifies and parses the VCF header
  4. -
  5. For each variant line, extracts the requested fields
  6. -
  7. Outputs the extracted fields in a tab-separated format
  8. -
-

The tool can extract three types of fields: -- Standard VCF fields: CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO -- INFO subfields: Any key that appears in the INFO column (e.g., DP, AF, TYPE) -- Sample-specific fields: Fields from the genotype columns, specified as either: - - SampleName:Subfield (e.g., SAMPLE1:GT for the genotype of SAMPLE1) - - S<number>:Subfield (e.g., S1:DP for the depth of the first sample)

-

Field Types

-

Standard VCF Fields

-

These are the eight fixed columns in the VCF format: -- CHROM: Chromosome -- POS: Position -- ID: Variant identifier -- REF: Reference allele -- ALT: Alternate allele(s) -- QUAL: Quality score -- FILTER: Filter status -- INFO: Additional information

-

INFO Subfields

-

Any key found in the INFO column can be extracted directly by name. For example: -- DP: Read depth -- AF: Allele frequency -- TYPE: Variant type

-

Sample Fields

-

Sample fields are specified using one of these formats: -- SampleName:Subfield where SampleName is the exact sample name from the VCF header -- S<number>:Subfield where <number> is the 1-based index of the sample column

-

Common sample subfields include: -- GT: Genotype -- DP: Read depth -- GQ: Genotype quality -- Other format fields defined in the VCF

-

Output Format

-

The tool produces a tab-separated values (TSV) file with: -- A header row containing the requested field names -- One row per variant, with each requested field value -- Missing or invalid fields represented as .

-

Examples

-

Basic Standard Fields

-

Extract chromosome, position, ID, reference, and alternate alleles: -

VCFX_field_extractor --fields "CHROM,POS,ID,REF,ALT" < input.vcf > basic_fields.tsv
-

-

INFO Fields

-

Extract depth and allele frequency: -

VCFX_field_extractor --fields "CHROM,POS,DP,AF" < input.vcf > info_fields.tsv
-

-

Sample Genotype Fields

-

Extract genotypes for specific samples: -

VCFX_field_extractor --fields "CHROM,POS,SAMPLE1:GT,SAMPLE2:GT" < input.vcf > genotypes.tsv
-

-

Sample Fields by Index

-

Extract genotypes using sample indices: -

VCFX_field_extractor --fields "CHROM,POS,S1:GT,S2:GT" < input.vcf > genotypes_by_index.tsv
-

-

Mixed Field Types

-

Combine different field types: -

VCFX_field_extractor --fields "CHROM,POS,DP,AF,SAMPLE1:GT,SAMPLE1:DP" < input.vcf > mixed_fields.tsv
-

-

Handling Special Cases

-

Missing Fields

-
    -
  • If a requested field is not found in the VCF record, a . is output
  • -
  • This applies to missing INFO fields, invalid sample names, or non-existent format fields
  • -
-

Malformed Records

-
    -
  • The tool attempts to handle malformed VCF lines gracefully
  • -
  • For lines with too few columns, missing fields are filled with .
  • -
  • Invalid data types in numeric fields are preserved as they appear in the input
  • -
-

Header-only Files

-
    -
  • If a VCF file contains only headers and no variant records, the tool outputs just the header row
  • -
-

Performance Considerations

-
    -
  • The tool processes VCF files line-by-line, with minimal memory overhead
  • -
  • Extraction scales linearly with input size and number of requested fields
  • -
  • For large VCF files, consider extracting only the necessary fields to improve performance
  • -
-

Limitations

-
    -
  • Cannot filter records (only extracts fields from all records)
  • -
  • Cannot perform operations or calculations on the extracted fields
  • -
  • Does not support complex expressions or conditionals
  • -
  • Limited to tab-separated output format
  • -
  • Cannot output field descriptions or metadata from the VCF header
  • -
  • No direct support for multi-allelic splitting or normalization
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_file_splitter/index.html b/site/VCFX_file_splitter/index.html deleted file mode 100644 index 0b0be25a..00000000 --- a/site/VCFX_file_splitter/index.html +++ /dev/null @@ -1,2783 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_file_splitter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - -

VCFX_file_splitter

-

Overview

-

VCFX_file_splitter divides a VCF file into multiple smaller files based on chromosome, creating separate output files for each chromosome present in the input.

-

Usage

-
VCFX_file_splitter [OPTIONS] < input.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-p, --prefix <PREFIX>Output file prefix (default: "split")
-h, --helpDisplay help message and exit
-

Description

-

VCFX_file_splitter reads a VCF file and separates its contents into multiple files, with one file per chromosome. The tool:

-
    -
  1. Reads a VCF file from standard input
  2. -
  3. Extracts the chromosome (CHROM) information from each variant line
  4. -
  5. Creates a separate output file for each unique chromosome encountered
  6. -
  7. Writes all header lines to each output file
  8. -
  9. Distributes variant records to the appropriate chromosome file
  10. -
  11. Produces output files named using the pattern <PREFIX>_<CHROM>.vcf
  12. -
-

This tool is useful for: -- Parallelizing variant processing by chromosome -- Reducing memory requirements when handling large VCF files -- Organizing variant data by chromosome for downstream analysis -- Creating chromosome-specific VCF files for targeted analysis -- Preparing data for tools that work on individual chromosomes

-

Output Format

-

The output consists of multiple VCF files, one for each chromosome in the input. Each file contains: -- All original header lines from the input VCF -- Only the variant records for the corresponding chromosome -- The same format and structure as the original VCF

-

Output files are named following the pattern: -

<PREFIX>_<CHROM>.vcf
-

-

For example, using the default prefix "split", the tool will generate files like: -- split_1.vcf (chromosome 1) -- split_2.vcf (chromosome 2) -- split_X.vcf (chromosome X) -- etc.

-

Examples

-

Basic Usage

-
./VCFX_file_splitter < input.vcf
-
-

This will create files like split_1.vcf, split_2.vcf, etc.

-

Custom Prefix

-
./VCFX_file_splitter --prefix "chr" < input.vcf
-
-

This will create files like chr_1.vcf, chr_2.vcf, etc.

-

Processing Multiple Files

-
# Split multiple VCF files
-for file in *.vcf; do
-  output_prefix="${file%.vcf}"
-  ./VCFX_file_splitter --prefix "$output_prefix" < "$file"
-done
-
-

Handling Special Cases

-
    -
  • Header Lines: All header lines (starting with #) are included in each output file
  • -
  • Additional Headers: If header lines appear after data lines in the input, they are replicated to all open chromosome files
  • -
  • Empty Input: If the input file is empty or contains only headers, a warning message is displayed
  • -
  • Chromosome Naming: Preserves chromosome names exactly as they appear in the input file, including any prefixes or special characters
  • -
  • Malformed Lines: Lines that can't be parsed for chromosome information are skipped with a warning
  • -
  • File Creation Failures: Reports an error if an output file cannot be created (due to permissions, disk space, etc.)
  • -
  • Large Numbers of Chromosomes: Can handle arbitrarily many chromosomes, creating one file for each
  • -
-

Performance

-

The splitter is optimized for efficiency: -- Single-pass processing of the input file -- Streams data directly to output files without storing records in memory -- Uses smart pointers for automatic resource management -- Efficiently handles very large VCF files with minimal memory overhead -- Output files are written incrementally as the input is processed

-

Limitations

-
    -
  • Requires sufficient disk space to store all output files
  • -
  • No built-in compression of output files
  • -
  • Cannot split by other criteria (e.g., position ranges, sample names)
  • -
  • Does not check for duplicate variant entries in the input
  • -
  • No option to merge small chromosomes into a single output file
  • -
  • Cannot control the order of variants within output files (maintains the order from the input)
  • -
  • Files with many chromosomes will generate many output files
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_format_converter/index.html b/site/VCFX_format_converter/index.html deleted file mode 100644 index 419a22d7..00000000 --- a/site/VCFX_format_converter/index.html +++ /dev/null @@ -1,2929 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_format_converter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_format_converter

-

Overview

-

VCFX_format_converter is a tool for converting VCF (Variant Call Format) files to other common file formats. It currently supports conversion to BED (Browser Extensible Data) and CSV (Comma-Separated Values) formats.

-

Usage

-
VCFX_format_converter [OPTIONS] < input.vcf > output.file
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
--to-bedConvert the input VCF file to BED format
--to-csvConvert the input VCF file to CSV format
--help, -hDisplay help message and exit
-

Description

-

VCFX_format_converter reads a VCF file from standard input and converts it to the specified output format. The tool:

-
    -
  1. Parses command-line arguments to determine the desired output format
  2. -
  3. Reads the VCF file line by line
  4. -
  5. Skips header lines (starting with #) in the output
  6. -
  7. Converts each variant record according to the selected format
  8. -
  9. Writes the converted data to standard output
  10. -
-

Conversion Details

-

VCF to BED Conversion

-

BED format requires at least 3 columns: -1. chrom - The chromosome name -2. start - 0-based start position (VCF positions are 1-based) -3. end - 0-based end position (exclusive)

-

The converter implements the following mapping: -- chrom: Direct copy from VCF's CHROM column -- start: VCF POS - 1 (clamped to โ‰ฅ 0) -- end: start + length of REF allele -- name: Direct copy from VCF's ID column

-

For example, a VCF record chr1 10000 rs123 A G ... becomes a BED record chr1 9999 10000 rs123.

-

VCF to CSV Conversion

-

The CSV conversion preserves all columns from the VCF file but changes the delimiter from tabs to commas. The tool:

-
    -
  1. Includes a header row from the #CHROM line (after removing the # character)
  2. -
  3. Properly escapes fields containing commas or quotes according to CSV standards:
  4. -
  5. Fields containing commas or quotes are enclosed in double quotes
  6. -
  7. Double quotes within fields are doubled (e.g., " becomes "")
  8. -
  9. Preserves all columns and their order from the original VCF
  10. -
-

Examples

-

Converting to BED Format

-
VCFX_format_converter --to-bed < input.vcf > output.bed
-
-

This command converts each variant record in input.vcf to BED format, writing the results to output.bed.

-

Converting to CSV Format

-
VCFX_format_converter --to-csv < input.vcf > output.csv
-
-

This command converts the tab-delimited input.vcf to comma-separated format in output.csv, preserving all columns.

-

Handling Special Cases

-

Multi-allelic Variants

-
    -
  • In BED format, multi-allelic variants are represented by a single interval based on the REF allele length
  • -
  • In CSV format, multi-allelic variants are preserved as in the original VCF, with commas in the ALT field properly escaped
  • -
-

Structural Variants

-
    -
  • For BED conversion, the REF allele length is used to calculate the end position
  • -
  • If a structural variant includes an END tag in the INFO field, this is not currently used for BED conversion
  • -
-

Special Characters in CSV

-

Fields containing special characters are handled according to CSV standards: -- Commas: Field is enclosed in double quotes -- Double quotes: Field is enclosed in double quotes and internal quotes are doubled -- Newlines: Preserved within quoted fields -- Fields with ID values containing commas are properly quoted

-

Malformed VCF Files

-

The tool attempts to handle malformed input gracefully: -- Lines with too few columns are skipped -- Invalid position values are skipped -- Non-numeric QUAL values are preserved as-is in CSV output and handled appropriately for BED

-

Empty Files or Headers-Only Files

-
    -
  • For files containing only headers (no variant records), the tool produces:
  • -
  • An empty file for BED output
  • -
  • A header row only for CSV output
  • -
-

Performance Considerations

-
    -
  • The tool processes VCF files line by line, with minimal memory requirements
  • -
  • Performance scales linearly with input file size
  • -
  • No indexing is performed, allowing efficient streaming processing
  • -
-

Limitations

-
    -
  • The BED conversion uses a simple interval representation based on REF allele length
  • -
  • Structural variants with END tags are not specially handled for BED intervals
  • -
  • No specific handling for insertions (which technically have zero length in reference coordinates)
  • -
  • Does not create or use specialized indices
  • -
  • Does not support other output formats such as GFF, TXT, or JSON
  • -
  • Cannot perform filtering operations during conversion
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_genotype_query/index.html b/site/VCFX_genotype_query/index.html deleted file mode 100644 index 3321ed62..00000000 --- a/site/VCFX_genotype_query/index.html +++ /dev/null @@ -1,2778 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_genotype_query - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_genotype_query

-

Overview

-

VCFX_genotype_query filters VCF files to keep only those variant lines where at least one sample has a specified genotype. It's a powerful tool for extracting variants with specific genotypic patterns.

-

Usage

-
VCFX_genotype_query [OPTIONS] < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
--genotype-query, -g "GENOTYPE"Specify the genotype to query (e.g., "0/1", "1/1")
--strictUse strict string comparison (no phasing unification or allele sorting)
--help, -hDisplay help message and exit
-

Description

-

VCFX_genotype_query reads a VCF file from standard input and outputs variants (plus all header lines) where at least one sample has a genotype matching the specified pattern in the 'GT' field.

-

By default, the tool uses flexible matching: -- Unifies phasing separators ('|' and '/' are treated the same) -- Sorts alleles numerically (e.g., "1/0" is treated the same as "0/1")

-

When the --strict option is used, the tool performs exact string matching on genotypes, maintaining phase status and allele order.

-

Output Format

-

The output is a standard VCF file containing: -- All header lines from the input file (unchanged) -- Only variant lines where at least one sample matches the specified genotype pattern

-

Examples

-

Basic Usage (Flexible Matching)

-
# Find all variants where any sample is heterozygous (0/1)
-./VCFX_genotype_query --genotype-query "0/1" < input.vcf > heterozygous.vcf
-
-

Strict Matching

-
# Find variants where any sample has a specifically phased heterozygous genotype (0|1)
-./VCFX_genotype_query --genotype-query "0|1" --strict < input.vcf > phased_heterozygous.vcf
-
-

Finding Homozygous Variants

-
# Find variants where any sample is homozygous for the alternate allele
-./VCFX_genotype_query --genotype-query "1/1" < input.vcf > homozygous_alt.vcf
-
-

Finding Multi-allelic Genotypes

-
# Find variants where any sample has specific multi-allelic genotype
-./VCFX_genotype_query --genotype-query "1/2" < input.vcf > multi_allelic.vcf
-
-

Handling Special Cases

-
    -
  • Phased genotypes: By default, "0|1" and "0/1" are considered equivalent; use --strict to differentiate them
  • -
  • Multi-allelic variants: Can be queried with appropriate genotypes (e.g., "1/2", "0/2")
  • -
  • Missing genotypes: Can match against "././" or "."
  • -
  • Non-diploid genotypes: Supported in both flexible and strict modes
  • -
  • Malformed VCF lines: Lines with fewer than 10 columns are skipped with a warning
  • -
  • Missing GT field: Lines without a GT field in the FORMAT column are skipped
  • -
-

Performance

-

The tool processes VCF files line by line with minimal memory requirements, making it efficient for large files. Performance scales linearly with the number of samples in the VCF file.

-

Limitations

-
    -
  • Only filters based on the presence of the target genotype in at least one sample
  • -
  • Cannot filter based on multiple genotype patterns in a single run
  • -
  • No support for complex queries (e.g., specific samples with specific genotypes)
  • -
  • Skips variants where the GT field is not present in the FORMAT column
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_gl_filter/index.html b/site/VCFX_gl_filter/index.html deleted file mode 100644 index 82ba1dfe..00000000 --- a/site/VCFX_gl_filter/index.html +++ /dev/null @@ -1,3021 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_gl_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_gl_filter

-

Overview

-

VCFX_gl_filter filters VCF records based on numeric genotype-likelihood fields in the FORMAT column, such as genotype quality (GQ), read depth (DP), or phred-scaled likelihoods (PL). This tool helps focus analysis on variants with sufficient genotype quality or other sample-level metrics.

-

Usage

-
VCFX_gl_filter --filter "<CONDITION>" [--mode <any|all>] < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-f, --filter <CONDITION>Required. Filter condition (e.g., GQ>20, DP>=10, PL<50)
-m, --mode <any\|all>Optional. Determines if all samples must pass the condition (all, default) or at least one sample must pass (any)
-h, --helpDisplay help message and exit
-

Description

-

VCFX_gl_filter examines numeric fields in the FORMAT column of a VCF file and filters variant records based on whether the samples satisfy the specified condition. The tool:

-
    -
  1. Parses the filter condition into field name, operator, and threshold value
  2. -
  3. Locates the specified field in the FORMAT column
  4. -
  5. For each variant record, evaluates sample values against the condition
  6. -
  7. Applies the filtering logic based on the specified mode:
  8. -
  9. In all mode (default): keeps variants where ALL samples pass the condition
  10. -
  11. In any mode: keeps variants where AT LEAST ONE sample passes the condition
  12. -
  13. Outputs passing records to standard output
  14. -
-

This tool is particularly useful for: -- Removing variants with low genotype quality -- Filtering based on read depth or coverage -- Filtering on phred-scaled likelihoods or other numeric likelihood measures -- Applying consistent quality thresholds across samples

-

Output Format

-

The output is a standard VCF file containing: -- All original header lines from the input VCF -- Only those variant records where samples satisfy the specified condition according to the mode -- No modification to the content or format of the retained lines

-

Examples

-

Basic Usage with Default Mode

-

Filter variants where all samples have genotype quality (GQ) above 20: -

VCFX_gl_filter --filter "GQ>20" < input.vcf > high_quality.vcf
-

-

Using 'Any' Mode

-

Filter variants where at least one sample has read depth (DP) of 30 or higher: -

VCFX_gl_filter --filter "DP>=30" --mode any < input.vcf > high_depth.vcf
-

-

Exact Value Matching

-

Filter variants where all samples have an exact phred-scaled likelihood value: -

VCFX_gl_filter --filter "PL==50" < input.vcf > specific_pl.vcf
-

-

Negative Filtering with Not Equal

-

Filter variants where all samples have a non-zero genotype quality: -

VCFX_gl_filter --filter "GQ!=0" < input.vcf > non_zero_gq.vcf
-

-

In a Pipeline

-

Use with other VCFX tools in a pipeline: -

cat input.vcf | VCFX_gl_filter --filter "GQ>30" | VCFX_record_filter --filter "QUAL>40" > high_quality_variants.vcf
-

-

Filter Condition Syntax

-

Format

-

The filter condition must follow this syntax: -

FIELD OPERATOR VALUE
-
-Where: -- FIELD: Any numeric field from the FORMAT column (e.g., GQ, DP, PL) -- OPERATOR: One of >, <, >=, <=, ==, != -- VALUE: A numeric threshold (integer or decimal)

-

Examples of valid conditions: -- GQ>20: Genotype quality greater than 20 -- DP>=10.5: Read depth greater than or equal to 10.5 -- PL<30: Phred-scaled likelihood less than 30 -- GL!=0: Genotype likelihood not equal to 0

-

Comparison Operators

-

The tool supports the following comparison operators: -- >: Greater than -- <: Less than -- >=: Greater than or equal to -- <=: Less than or equal to -- ==: Equal to -- !=: Not equal to

-

Handling Special Cases

-

Missing Fields

-

If the specified field is not found in the FORMAT column: -- In all mode: The variant is filtered out -- In any mode: The variant is filtered out

-

Missing Values

-

If a sample has a missing value (.) for the specified field: -- In all mode: The variant is filtered out -- In any mode: The sample is treated as not passing, but the variant may be kept if other samples pass

-

Empty Values

-

Empty values are treated similarly to missing values: -- In all mode: The variant is filtered out -- In any mode: The sample is treated as not passing

-

Non-Numeric Values

-

If a field value cannot be converted to a number: -- In all mode: The variant is filtered out -- In any mode: The sample is treated as not passing

-

Malformed VCF

-
    -
  • For lines with insufficient fields (less than the standard VCF format requires), the tool produces a warning and skips the line
  • -
  • For data lines before the header (#CHROM) line, the tool produces a warning and skips the line
  • -
-

Performance Considerations

-
    -
  • The tool processes VCF files line by line, requiring minimal memory
  • -
  • Regex pattern matching is used for efficient parsing of filter conditions
  • -
  • No preprocessing or indexing of the VCF file is required
  • -
  • Linear time complexity with respect to file size
  • -
-

Limitations

-
    -
  • Only works with numeric fields in the FORMAT column
  • -
  • No support for filtering on string-valued FORMAT fields
  • -
  • Cannot apply different conditions to different samples
  • -
  • Cannot combine multiple conditions in a single filter
  • -
  • No special handling for multi-allelic sites with multiple values per field
  • -
  • Only evaluates the first value when a field contains multiple values
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_haplotype_extractor/index.html b/site/VCFX_haplotype_extractor/index.html deleted file mode 100644 index a5bf192c..00000000 --- a/site/VCFX_haplotype_extractor/index.html +++ /dev/null @@ -1,2832 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_haplotype_extractor - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_haplotype_extractor

-

Overview

-

VCFX_haplotype_extractor reconstructs phased haplotype blocks from genotype data in a VCF file. It identifies stretches of phased variants on the same chromosome and outputs them as continuous haplotype blocks for each sample.

-

Usage

-
VCFX_haplotype_extractor [OPTIONS] < input.vcf > haplotypes.tsv
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
--block-size <SIZE>Maximum distance in base pairs between consecutive variants to be included in the same block (default: 100,000)
--check-phase-consistencyEnable checks for phase consistency between adjacent variants in a block
-h, --helpDisplay help message and exit
-

Description

-

VCFX_haplotype_extractor analyzes phased genotype data in a VCF file to reconstruct continuous haplotype blocks. The tool:

-
    -
  1. Reads a VCF file from standard input
  2. -
  3. Extracts phased genotype (GT) fields for each sample at each variant position
  4. -
  5. Groups consecutive phased variants into blocks based on:
  6. -
  7. Chromosome continuity (variants must be on the same chromosome)
  8. -
  9. Maximum distance threshold (default 100kb between adjacent variants)
  10. -
  11. Optional phase consistency checks across variants
  12. -
  13. Constructs haplotype strings representing the sequence of alleles on each chromosome
  14. -
  15. Outputs blocks of phased haplotypes in a tab-delimited format
  16. -
-

This tool is valuable for: -- Identifying regions of continuous phasing in VCF files -- Preparing haplotype data for downstream analyses -- Reconstructing parental chromosomes from phased variant data -- Quality control of phasing algorithms

-

Output Format

-

The output is a tab-delimited text file with columns:

-
CHROM  START  END  SAMPLE_1_HAPLOTYPES  SAMPLE_2_HAPLOTYPES  ...
-
-

Where: -- CHROM: Chromosome name -- START: Start position of the haplotype block -- END: End position of the haplotype block -- SAMPLE_X_HAPLOTYPES: A pipe-delimited string representing the phased genotypes for that sample

-

Each sample's haplotype column contains a string of pipe-separated genotypes where each genotype is itself pipe-separated (e.g., "0|1|1|0|0|1"). This represents the sequence of alleles in the phased block.

-

Examples

-

Basic Usage

-
./VCFX_haplotype_extractor < phased.vcf > haplotype_blocks.tsv
-
-

Custom Block Size

-
# Use a smaller maximum distance (50kb) to generate more, smaller blocks
-./VCFX_haplotype_extractor --block-size 50000 < phased.vcf > small_blocks.tsv
-
-

With Phase Consistency Checking

-
# Enable checks for phase consistency between variants
-./VCFX_haplotype_extractor --check-phase-consistency < phased.vcf > consistent_blocks.tsv
-
-

Filtering for Large Blocks

-
# Extract only blocks spanning at least 10 variants
-./VCFX_haplotype_extractor < phased.vcf | awk -F'|' '{if (NF >= 10) print}' > large_blocks.tsv
-
-

Phase Consistency

-

When the --check-phase-consistency option is enabled, the tool performs a basic check to detect potential phase inconsistencies:

-
    -
  1. For each new variant, the tool examines its phased alleles for each sample
  2. -
  3. It compares these with the last variant added to the current block
  4. -
  5. If it detects a phase "flip" (e.g., changing from "0|1" to "1|0"), it may start a new block
  6. -
  7. This helps identify regions where phasing may be inconsistent
  8. -
-

This basic consistency checking is useful for identifying phase switches that might indicate errors in the phasing process or real recombination events.

-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Unphased genotypes: Any variant with unphased genotypes ("/" delimiter) is skipped and will not be included in haplotype blocks
  2. -
  3. Missing genotypes: Variants with missing genotypes (".") are handled, but may affect block formation
  4. -
  5. Multiallelic sites: Properly processed with the actual allele codes in the haplotype strings
  6. -
  7. Chromosome changes: Automatically starts a new block when the chromosome changes
  8. -
  9. Large distances: Starts a new block when the distance between consecutive variants exceeds the threshold
  10. -
  11. Empty input: Produces no output blocks but exits cleanly
  12. -
  13. Malformed VCF: Attempts to skip malformed lines with warnings
  14. -
-

Performance

-

VCFX_haplotype_extractor is designed for efficiency:

-
    -
  1. Single-pass processing with O(n) time complexity where n is the number of variants
  2. -
  3. Memory usage scales primarily with the number of samples and the size of the largest haplotype block
  4. -
  5. Streaming architecture allows processing large files without loading them entirely into memory
  6. -
  7. Block-based approach prevents excessive memory usage for very long chromosomes
  8. -
-

Limitations

-
    -
  1. Requires phased genotypes - variants with unphased genotypes are skipped
  2. -
  3. Cannot join blocks across different chromosomes
  4. -
  5. Simple distance-based blocking may not align with biological recombination patterns
  6. -
  7. Basic phase consistency checking may not detect all inconsistencies
  8. -
  9. No ability to export or visualize the relationship between blocks
  10. -
  11. Does not account for potential errors in the original phasing
  12. -
  13. No special handling for reference gaps or known problematic regions
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_haplotype_phaser/index.html b/site/VCFX_haplotype_phaser/index.html deleted file mode 100644 index 919bde97..00000000 --- a/site/VCFX_haplotype_phaser/index.html +++ /dev/null @@ -1,2829 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_haplotype_phaser - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_haplotype_phaser

-

Overview

-

VCFX_haplotype_phaser analyzes genotype data to identify variants in linkage disequilibrium (LD) and groups them into haplotype blocks based on an LD threshold. This tool is useful for identifying sets of genetic variants that tend to be inherited together.

-

Usage

-
VCFX_haplotype_phaser [OPTIONS] < input.vcf > blocks.txt
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-l, --ld-threshold <VALUE>rยฒ threshold for LD-based grouping (0.0-1.0, default: 0.8)
-h, --helpDisplay help message and exit
-

Description

-

VCFX_haplotype_phaser identifies haplotype blocks by grouping variants that exhibit high linkage disequilibrium (LD). The tool:

-
    -
  1. Reads a VCF file from standard input
  2. -
  3. Extracts genotype information for each variant across all samples
  4. -
  5. Calculates pairwise LD (specifically rยฒ) between consecutive variants
  6. -
  7. Groups variants into blocks when they exceed the specified LD threshold
  8. -
  9. Outputs block information showing which variants belong to the same haplotype block
  10. -
-

This is valuable for: -- Identifying haplotypes without requiring family data -- Understanding the correlation structure of variants in a genomic region -- Reducing the dimensionality of genetic data for association testing -- Planning genotyping strategies by selecting tag SNPs from different blocks

-

Output Format

-

The output preserves the original VCF header, appends the haplotype block information, and then includes the original VCF data. The haplotype block section is formatted as follows:

-
#HAPLOTYPE_BLOCKS_START
-Block 1: 0:(chr1:1000), 1:(chr1:1050), 2:(chr1:1100)
-Block 2: 3:(chr1:2000), 4:(chr1:2050)
-Block 3: 5:(chr2:1000), 6:(chr2:1050)
-#HAPLOTYPE_BLOCKS_END
-
-

Each block line contains: -- Block number (1-indexed) -- List of grouped variants with their 0-indexed position in the VCF file -- Chromosome and position of each variant in parentheses

-

Examples

-

Basic Usage

-
./VCFX_haplotype_phaser < input.vcf > haplotype_blocks.txt
-
-

Custom LD Threshold

-
# Use a higher LD threshold for more stringent block definition
-./VCFX_haplotype_phaser --ld-threshold 0.95 < input.vcf > strict_blocks.txt
-
-

Extract Top Blocks

-
# Extract just the first 10 blocks
-./VCFX_haplotype_phaser < input.vcf | grep -A 10 "#HAPLOTYPE_BLOCKS_START" | grep "Block" > top_blocks.txt
-
-

Count Number of Blocks

-
# Count the number of blocks identified
-./VCFX_haplotype_phaser < input.vcf | grep "Block" | wc -l
-
-

LD Calculation

-

The tool calculates LD between variants using the rยฒ statistic:

-
    -
  1. For each pair of variants, it computes:
  2. -
  3. Genotype correlation coefficient (r) across samples
  4. -
  5. Squared correlation coefficient (rยฒ)
  6. -
  7. The rยฒ value ranges from 0 (no LD) to 1 (perfect LD)
  8. -
  9. Variants are grouped into the same block if they have rยฒ โ‰ฅ threshold with the last variant in the block
  10. -
-

For chromosome 1 specifically, the tool also requires the correlation coefficient (r) to be positive, ensuring that the variants are in positive LD (alleles tend to be inherited together rather than showing repulsion).

-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Chromosome boundaries: New blocks always start when the chromosome changes
  2. -
  3. Missing genotypes: Genotypes coded as "./." or with any missing allele are excluded from LD calculations
  4. -
  5. Low LD variants: Start a new block when LD falls below the threshold
  6. -
  7. No variant data: Reports an error if no variants are found
  8. -
  9. Invalid positions: Skips variants with non-numeric positions
  10. -
  11. Malformed genotypes: Handles and skips variants with improperly formatted genotypes
  12. -
  13. Empty VCF: Reports an error and exits cleanly
  14. -
-

Performance

-

VCFX_haplotype_phaser is designed for efficiency:

-
    -
  1. Single-pass processing of variants with minimal memory usage
  2. -
  3. Optimized LD calculation that handles missing data appropriately
  4. -
  5. Scales efficiently with the number of samples and variants
  6. -
  7. Processes large VCF files with thousands of variants in reasonable time
  8. -
  9. Minimal computational overhead by using simplified LD calculations
  10. -
-

Limitations

-
    -
  1. Uses a naive approach to LD calculation that may not account for population structure
  2. -
  3. Genotypes are treated as simple sums (0/0=0, 0/1=1, 1/1=2) without considering phase
  4. -
  5. Cannot merge blocks that are separated by a single low-LD variant
  6. -
  7. No sliding window approach for finding blocks across non-adjacent variants
  8. -
  9. Does not handle multi-allelic variants specially (treats them as bi-allelic)
  10. -
  11. Does not incorporate physical distance in the blocking algorithm
  12. -
  13. No output option for visualizing blocks graphically
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_header_parser/index.html b/site/VCFX_header_parser/index.html deleted file mode 100644 index f4ed87e3..00000000 --- a/site/VCFX_header_parser/index.html +++ /dev/null @@ -1,2795 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_header_parser - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_header_parser

-

Overview

-

VCFX_header_parser is a simple utility that extracts and displays all header lines from a VCF file. This tool makes it easy to examine metadata and structural information without processing the variant data.

-

Usage

-
VCFX_header_parser [OPTIONS] < input.vcf > header.txt
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_header_parser reads a VCF file from standard input and outputs only the header lines (lines starting with "#"). The tool:

-
    -
  1. Reads the VCF file line by line
  2. -
  3. Extracts all lines beginning with "#", which include:
  4. -
  5. VCF version information (##fileformat=VCFv4.2)
  6. -
  7. Reference genome information (##reference=file:///path/to/reference.fa)
  8. -
  9. Contig definitions (##contig=<ID=chr1,length=248956422>)
  10. -
  11. INFO field definitions (##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">)
  12. -
  13. FILTER definitions (##FILTER=<ID=PASS,Description="All filters passed">)
  14. -
  15. FORMAT definitions (##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">)
  16. -
  17. Sample column header line (#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2)
  18. -
  19. Stops reading when it encounters the first non-header line (any line not starting with "#")
  20. -
  21. Outputs all collected header lines to standard output
  22. -
-

This tool is useful for: -- Examining metadata without processing large variant datasets -- Extracting sample names from a VCF file -- Checking VCF file structure and compliance with specifications -- Creating header templates for new VCF files -- Documenting file provenance and contents

-

Output Format

-

The output consists of all header lines from the input VCF file, in the same order they appeared in the original file:

-
##fileformat=VCFv4.2
-##source=VCFX
-##FILTER=<ID=PASS,Description="All filters passed">
-##contig=<ID=chr1,length=248956422>
-#CHROM  POS  ID  REF  ALT  QUAL  FILTER  INFO  FORMAT  SAMPLE1  SAMPLE2
-
-

Examples

-

Basic Usage

-
./VCFX_header_parser < input.vcf > header.txt
-
-

Extracting Sample Names

-
# Extract the sample names (all columns after FORMAT in the #CHROM line)
-./VCFX_header_parser < input.vcf | grep "^#CHROM" | cut -f10- > sample_names.txt
-
-

Counting Contigs

-
# Count the number of contigs defined in the header
-./VCFX_header_parser < input.vcf | grep "##contig" | wc -l
-
-

Verifying VCF Version

-
# Check the VCF file format version
-./VCFX_header_parser < input.vcf | grep "##fileformat" | cut -d= -f2
-
-

Handling Special Cases

-

The tool implements simple strategies for handling edge cases:

-
    -
  1. Empty files: If the input file is empty, no output is produced
  2. -
  3. Files without headers: If the file has no header lines, no output is produced
  4. -
  5. Malformed headers: All lines starting with "#" are considered header lines, even if they don't follow VCF specifications
  6. -
  7. Line endings: Both Unix (LF) and Windows (CRLF) line endings are handled correctly
  8. -
  9. Partial headers: If the file ends in the middle of the header section, all header lines up to that point are output
  10. -
-

Performance

-

VCFX_header_parser is designed for simplicity and efficiency:

-
    -
  1. Processes input line-by-line without loading the entire file into memory
  2. -
  3. Stops processing as soon as it encounters the first non-header line
  4. -
  5. Highly efficient for large VCF files where headers constitute a small portion of the total file size
  6. -
  7. Minimal memory footprint since only the current line being processed is stored in memory
  8. -
-

Limitations

-
    -
  1. No validation of header syntax or compliance with VCF specifications
  2. -
  3. Cannot modify or filter specific header lines
  4. -
  5. No ability to sort or organize header lines
  6. -
  7. No special handling for duplicate header entries
  8. -
  9. Cannot add or merge headers from multiple files
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_hwe_tester/index.html b/site/VCFX_hwe_tester/index.html deleted file mode 100644 index 10c79d5a..00000000 --- a/site/VCFX_hwe_tester/index.html +++ /dev/null @@ -1,2796 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_hwe_tester - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_hwe_tester

-

Overview

-

VCFX_hwe_tester performs Hardy-Weinberg Equilibrium (HWE) testing on biallelic variants in a VCF file, calculating and reporting exact p-values that measure the degree of deviation from expected genotype frequencies.

-

Usage

-
VCFX_hwe_tester [OPTIONS] < input.vcf > hwe_results.txt
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_hwe_tester analyzes each biallelic variant in a VCF file to determine whether its genotype frequencies conform to Hardy-Weinberg Equilibrium expectations. The tool:

-
    -
  1. Reads the VCF file line by line
  2. -
  3. Filters for biallelic variants (skips sites with multiple ALT alleles)
  4. -
  5. Counts homozygous reference (0/0), heterozygous (0/1), and homozygous alternate (1/1) genotypes
  6. -
  7. Calculates an exact p-value for Hardy-Weinberg Equilibrium
  8. -
  9. Reports results in a simple tab-delimited format
  10. -
-

The exact test uses a full enumeration of the probability distribution to obtain an accurate p-value, rather than relying on chi-square approximations. A low p-value indicates significant deviation from Hardy-Weinberg Equilibrium, which might suggest: -- Population stratification -- Selection pressure -- Non-random mating -- Genotyping errors

-

Output Format

-

The output is a tab-delimited text file with the following columns:

-
CHROM  POS  ID  REF  ALT  HWE_pvalue
-
-

Where: -- CHROM, POS, ID, REF, ALT are copied from the input VCF -- HWE_pvalue is the calculated p-value for Hardy-Weinberg Equilibrium

-

Examples

-

Basic Usage

-
./VCFX_hwe_tester < input.vcf > hwe_results.txt
-
-

Filter by HWE p-value

-
# Extract variants with significant HWE deviation (p < 0.05)
-./VCFX_hwe_tester < input.vcf | awk -F'\t' '{if(NR==1 || ($6!="HWE_pvalue" && $6<0.05)) print}' > hwe_significant.txt
-
-

Check for Genotyping Errors

-
# Find potential genotyping errors (extremely low HWE p-values)
-./VCFX_hwe_tester < input.vcf | awk -F'\t' '{if(NR==1 || ($6!="HWE_pvalue" && $6<0.0001)) print}' > potential_errors.txt
-
-

Mathematical Details

-

The tool uses an exact test based on the multinomial distribution of genotypes. For each variant:

-
    -
  1. The observed counts of genotypes (homRef, het, homAlt) are calculated
  2. -
  3. The expected frequencies under HWE are computed as:
  4. -
  5. Expected homRef = pยฒ ร— N
  6. -
  7. Expected het = 2pq ร— N
  8. -
  9. Expected homAlt = qยฒ ร— N
  10. -
-

where p = (2ร—homRef + het)/(2ร—N), q = 1-p, and N = total number of individuals

-
    -
  1. The p-value is calculated by summing the probabilities of all possible genotype configurations that are equally or less likely than the observed configuration
  2. -
-

This approach provides accurate p-values even for low minor allele frequencies or small sample sizes.

-

Handling Special Cases

-
    -
  • Multi-allelic variants: Skipped entirely (only biallelic variants are considered)
  • -
  • Missing genotypes: Excluded from counts when calculating HWE
  • -
  • Phased genotypes: Phase information is ignored; "0|1" is treated the same as "0/1"
  • -
  • Non-standard genotypes: Any genotype other than 0/0, 0/1, or 1/1 is excluded
  • -
  • No valid genotypes: If no valid genotypes are found, the p-value is reported as 1.0
  • -
  • Perfect equilibrium: For variants with genotype frequencies perfectly matching HWE expectations, the p-value is 1.0
  • -
-

Performance

-

The tool is optimized for efficiency: -- Processes one variant at a time, keeping memory usage low -- Caches logarithmic factorial values to speed up calculations -- Uses numerical optimizations to handle large sample sizes -- Scales linearly with the number of variants in the VCF file

-

Limitations

-
    -
  • Only works with biallelic variants
  • -
  • Assumes diploid genotypes
  • -
  • No stratification by population or other groupings
  • -
  • No correction for multiple testing
  • -
  • May be less accurate for extremely rare variants with very few non-reference genotypes
  • -
  • No built-in filtering for variant quality or missing data threshold
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_impact_filter/index.html b/site/VCFX_impact_filter/index.html deleted file mode 100644 index 06c05e82..00000000 --- a/site/VCFX_impact_filter/index.html +++ /dev/null @@ -1,2762 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_impact_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_impact_filter

-

Overview

-

VCFX_impact_filter filters VCF variants based on their predicted functional impact level (HIGH, MODERATE, LOW, or MODIFIER) found in the INFO field of VCF records.

-

Usage

-
VCFX_impact_filter --filter-impact <LEVEL> < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-i, --filter-impact <LEVEL>Required. Impact level threshold. Must be one of: HIGH, MODERATE, LOW, MODIFIER
-h, --helpDisplay help message and exit
-

Description

-

VCFX_impact_filter analyzes variant annotations in a VCF file and filters them based on a specified impact level threshold. The tool:

-
    -
  1. Reads a VCF file line-by-line
  2. -
  3. For each variant, extracts the impact level from the INFO field (looks for IMPACT=...)
  4. -
  5. Classifies the impact into one of four levels: HIGH, MODERATE, LOW, or MODIFIER
  6. -
  7. Keeps only variants with impact level greater than or equal to the specified threshold
  8. -
  9. Adds an EXTRACTED_IMPACT field to the INFO column of retained variants
  10. -
  11. Outputs the filtered VCF with the same format as the input
  12. -
-

The impact level hierarchy used for filtering is: -HIGH > MODERATE > LOW > MODIFIER > UNKNOWN

-

Output Format

-

The output is a valid VCF file containing only variants that meet or exceed the specified impact threshold. Each retained variant will have an additional INFO field:

-
EXTRACTED_IMPACT=<value>
-
-

Where <value> is the original impact value extracted from the INFO field.

-

Examples

-

Filter for HIGH impact variants only

-
./VCFX_impact_filter --filter-impact HIGH < input.vcf > high_impact_variants.vcf
-
-

Filter for MODERATE or higher impact variants

-
./VCFX_impact_filter --filter-impact MODERATE < input.vcf > functional_variants.vcf
-
-

Combining with other tools

-
# Filter by impact then convert to another format
-./VCFX_impact_filter --filter-impact HIGH < input.vcf | \
-  ./VCFX_format_converter --format=bed > high_impact.bed
-
-

Handling Special Cases

-
    -
  • Case insensitivity: Impact values are case insensitive (e.g., "high" and "HIGH" are treated the same)
  • -
  • Extended impact values: Values like "HIGH_MISSENSE" are recognized by looking for the presence of standard impact keywords
  • -
  • Missing IMPACT field: Variants without an IMPACT field in the INFO column are treated as "UNKNOWN" and filtered out by default
  • -
  • Empty INFO field: Properly handled by adding the EXTRACTED_IMPACT field as the only INFO attribute
  • -
  • Multiple impact annotations: If multiple IMPACT fields are present, only the first one is considered
  • -
  • Invalid impact values: Any impact value not recognized as one of the four standard levels is classified as "UNKNOWN"
  • -
-

Performance

-

The tool is optimized for efficiency: -- Processes VCF files line-by-line with minimal memory overhead -- Uses regular expressions for reliable pattern matching -- Processes very large VCF files with linear time complexity

-

Limitations

-
    -
  • Only extracts and analyzes the first IMPACT field found in the INFO column
  • -
  • Cannot differentiate between more detailed impact subclassifications (relies on basic HIGH/MODERATE/LOW/MODIFIER keywords)
  • -
  • Assumes that functional impact annotations follow standard convention with one of the four recognized impact levels
  • -
  • Does not account for the specific variant type (SNP, indel, etc.) when filtering
  • -
  • No built-in options to combine impact filtering with other criteria (e.g., allele frequency)
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_inbreeding_calculator/index.html b/site/VCFX_inbreeding_calculator/index.html deleted file mode 100644 index 04985e61..00000000 --- a/site/VCFX_inbreeding_calculator/index.html +++ /dev/null @@ -1,2878 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_inbreeding_calculator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_inbreeding_calculator

-

Overview

-

VCFX_inbreeding_calculator computes the inbreeding coefficient (F) for each sample in a VCF file, providing a measure of homozygosity relative to Hardy-Weinberg equilibrium expectations.

-

Usage

-
VCFX_inbreeding_calculator [OPTIONS] < input.vcf > output.txt
-
-

Options

- - - - - - - - - - - - - - - - - - - - - - - - - -
OptionDescription
--freq-mode MODEHow to calculate allele frequencies: 'excludeSample' (default) or 'global'
--skip-boundarySkip sites with boundary frequencies (p=0 or p=1)
--count-boundary-as-usedCount boundary sites in usedCount even when skipping them
-h, --helpDisplay help message and exit
-

Description

-

VCFX_inbreeding_calculator analyzes biallelic variants in a VCF file to calculate the inbreeding coefficient (F) for each sample. The inbreeding coefficient measures the reduction in heterozygosity compared to expectations under Hardy-Weinberg equilibrium.

-

The tool: -1. Reads the VCF file in a single pass -2. Filters for biallelic variants (ignores sites with multiple ALT alleles) -3. Encodes genotypes as 0 (0/0), 1 (0/1), 2 (1/1), or -1 (missing/invalid) -4. Calculates allele frequencies using the specified method -5. Computes the inbreeding coefficient for each sample

-

Frequency Modes

-

The tool offers two methods for calculating allele frequencies:

-
    -
  • excludeSample: Each sample's inbreeding coefficient is calculated using allele frequencies derived from all other samples (excluding itself)
  • -
  • global: A single global allele frequency is calculated using all samples, and this same frequency is used for all samples
  • -
-

Boundary Handling

-

For sites where the allele frequency (p) is 0 or 1, you have three options:

-
    -
  1. Use these sites normally (default)
  2. -
  3. Skip boundary sites completely (--skip-boundary)
  4. -
  5. Skip boundary sites for calculations but count them as used sites (--skip-boundary --count-boundary-as-used)
  6. -
-

Output Format

-

The output is a tab-delimited text file with the following columns:

-
Sample  InbreedingCoefficient
-
-

Where: -- Sample is the sample name from the VCF file -- InbreedingCoefficient is the calculated F value, or "NA" if no usable sites were found

-

Examples

-

Basic Usage (Default Settings)

-
./VCFX_inbreeding_calculator < input.vcf > inbreeding_coefficients.txt
-
-

Using Global Frequency Mode

-
./VCFX_inbreeding_calculator --freq-mode global < input.vcf > global_inbreeding.txt
-
-

Skip Boundary Frequencies

-
./VCFX_inbreeding_calculator --skip-boundary < input.vcf > non_boundary_inbreeding.txt
-
-

Custom Boundary Handling

-
./VCFX_inbreeding_calculator --skip-boundary --count-boundary-as-used < input.vcf > custom_boundary.txt
-
-

Formula and Calculation

-

The inbreeding coefficient is calculated as:

-

F = (O - E) / (T - E)

-

Where: -- O = Observed homozygosity (count of 0/0 and 1/1 genotypes) -- E = Expected homozygosity under HWE (โˆ‘(pยฒ+qยฒ) across sites) -- T = Total number of sites (after filtering)

-

In boundary cases: -- If total sites used = 0, F = NA -- If expected = observed, F = 0 -- If expected = total, F = 1

-

Handling Special Cases

-
    -
  • Single-sample VCFs: Always produces "NA" as inbreeding requires population context
  • -
  • Multi-allelic sites: Skipped entirely (only biallelic variants are considered)
  • -
  • Missing genotypes: Coded as -1 and excluded from calculations
  • -
  • Non-diploid genotypes: Treated as missing and excluded
  • -
  • Boundary frequencies: Special handling available via command-line options
  • -
  • Zero usable sites: Returns "NA" for the sample
  • -
  • Small sample sizes: May produce unreliable estimates
  • -
-

Performance

-

The tool performs a single pass through the VCF file, giving it linear complexity with respect to file size. Memory usage scales with: -- Number of samples in the VCF -- Number of biallelic variants

-

Limitations

-
    -
  • Only works with biallelic variants (multiallelic sites are skipped)
  • -
  • Assumes diploid genotypes
  • -
  • May produce unexpected results with very small sample sizes
  • -
  • No built-in filtering for variant quality or other metrics
  • -
  • No chromosome or region-specific analysis
  • -
  • Cannot handle populations with substructure (assumes random mating)
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_indel_normalizer/index.html b/site/VCFX_indel_normalizer/index.html deleted file mode 100644 index 32ef1cdd..00000000 --- a/site/VCFX_indel_normalizer/index.html +++ /dev/null @@ -1,2961 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_indel_normalizer - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_indel_normalizer

-

Overview

-

VCFX_indel_normalizer is a tool for normalizing indel variants in VCF files. It performs left-alignment of variants by removing common prefixes and suffixes between reference and alternate alleles, and splits multi-allelic variants into separate records. This normalization is done without requiring an external reference genome.

-

Usage

-
VCFX_indel_normalizer [OPTIONS] < input.vcf > normalized.vcf
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_indel_normalizer processes a VCF file and normalizes indel variants by:

-
    -
  1. Reading the VCF file from standard input
  2. -
  3. Preserving all header lines without modification
  4. -
  5. For variants with multiple alternate alleles (comma-separated ALT values):
  6. -
  7. Splitting them into separate lines, one per alternate allele
  8. -
  9. For each variant:
  10. -
  11. Removing the longest common prefix from REF and ALT, keeping at least one base
  12. -
  13. Removing the longest common suffix from REF and ALT, keeping at least one base
  14. -
  15. Adjusting the position (POS) to account for removed leading bases
  16. -
  17. Writing the normalized variants to standard output
  18. -
-

This normalization ensures that variants are represented in a consistent, minimal left-aligned form, which is important for variant comparison, annotation, and analysis.

-

Normalization Process

-

Left Alignment Algorithm

-

The tool implements a simple but effective left-alignment approach:

-
    -
  1. Prefix Removal:
  2. -
  3. Identify the longest common prefix between REF and ALT
  4. -
  5. Remove all but one base of this common prefix
  6. -
  7. -

    Adjust the variant position to account for the removed bases

    -
  8. -
  9. -

    Suffix Removal:

    -
  10. -
  11. Identify the longest common suffix between REF and ALT
  12. -
  13. -

    Remove all but one base of this common suffix

    -
  14. -
  15. -

    Special Case Handling:

    -
  16. -
  17. If after normalization REF or ALT is empty, the variant is considered invalid
  18. -
  19. If after normalization REF and ALT are identical, the variant is considered invalid
  20. -
-

Multi-allelic Variant Handling

-

For variants with multiple alternate alleles: -- Each alternate allele is processed separately -- A new VCF line is generated for each alternate allele -- Each new line undergoes the same normalization process

-

Examples

-

Basic Usage

-

Normalize indels in a VCF file: -

VCFX_indel_normalizer < input.vcf > normalized.vcf
-

-

Example Transformations

-

Prefix Removal

-
Before: chr1 100 . ACTG AC   (deletion of TG)
-After:  chr1 102 . TG   -    (adjusted position, simplified representation)
-
-

Suffix Removal

-
Before: chr1 100 . ACTG ACCC (substitution of TG with CC)
-After:  chr1 100 . AC   ACC  (common suffix 'C' removed)
-
-

Multi-allelic Splitting

-
Before: chr1 100 . A    C,G,T
-After:  chr1 100 . A    C
-        chr1 100 . A    G
-        chr1 100 . A    T
-
-

Handling Special Cases

-

Invalid Variants

-
    -
  • If after normalization a variant would have an empty REF or ALT, the original line is preserved
  • -
  • If after normalization REF equals ALT (no actual variant), the original line is preserved
  • -
-

Empty Lines

-
    -
  • Empty lines in the input are preserved as empty lines in the output
  • -
-

Malformed Lines

-
    -
  • Lines with fewer than 10 columns (minimum for a VCF with samples) are output unchanged
  • -
  • Lines with invalid position values are output unchanged
  • -
-

Performance Considerations

-
    -
  • The tool processes VCF files line by line, with minimal memory requirements
  • -
  • Performance scales linearly with input file size
  • -
  • No external reference genome is required, making it lightweight and portable
  • -
-

Limitations

-
    -
  • This tool performs simple left-alignment without checking for sequence repeats in a reference genome
  • -
  • Full left-alignment of variants in repetitive regions requires a reference sequence
  • -
  • Cannot handle complex structural variants beyond simple indels
  • -
  • Limited to the information available in the VCF file itself
  • -
  • No automatic breakup of complex variants (substitutions that could be represented as indels)
  • -
  • No variant filtering capabilities
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_indexer/index.html b/site/VCFX_indexer/index.html deleted file mode 100644 index 2c6f8b50..00000000 --- a/site/VCFX_indexer/index.html +++ /dev/null @@ -1,2851 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_indexer - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_indexer

-

Overview

-

VCFX_indexer is a utility tool for creating a byte-offset index of a VCF file. It generates a simple tab-delimited index file that maps chromosome and position to the exact byte offset in the original file, enabling efficient random access to variants without scanning the entire file.

-

Usage

-
VCFX_indexer [OPTIONS] < input.vcf > index.tsv
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_indexer processes a VCF file to create a position-based index by:

-
    -
  1. Reading the VCF file from standard input as a raw byte stream
  2. -
  3. Locating the #CHROM header line to determine the start of data lines
  4. -
  5. For each variant line:
  6. -
  7. Extracting the chromosome (CHROM) and position (POS) values
  8. -
  9. Calculating the precise byte offset from the start of the file
  10. -
  11. Writing a three-column index to standard output with:
  12. -
  13. CHROM: The chromosome identifier from the VCF
  14. -
  15. POS: The position value from the VCF
  16. -
  17. FILE_OFFSET: The byte offset to the start of the line in the source file
  18. -
-

This index enables efficient random access to specific variants in large VCF files by allowing tools to seek directly to a byte offset rather than scanning the entire file. It's particularly useful for building tools that need to query specific regions of a VCF file.

-

Output Format

-

The index file is a tab-delimited text file with the following format:

-
CHROM   POS    FILE_OFFSET
-1       100    542
-1       200    621
-1       300    702
-
-

Where:

-
    -
  • CHROM is the chromosome identifier from the VCF
  • -
  • POS is the genomic position from the VCF
  • -
  • FILE_OFFSET is the byte offset from the start of the VCF file
  • -
-

Examples

-

Basic Usage

-

Create an index for a VCF file: -

VCFX_indexer < input.vcf > input.vcf.idx
-

-

Using with Other Tools

-

Use the index to quickly extract a specific variant:

-
# Find the offset for position 1:12345
-grep -P "^1\t12345\t" input.vcf.idx
-
-# Use the offset (e.g., 23456) to seek directly to that variant
-tail -c +23456 input.vcf | head -1
-
-

Special Case Handling

-

File Format Detection

-
    -
  • The tool automatically handles both Unix (LF) and Windows (CRLF) line endings
  • -
  • Byte offsets are calculated correctly regardless of the line ending style
  • -
-

Malformed VCF Files

-
    -
  • Lines with unparseable position values are skipped
  • -
  • If the #CHROM header is missing, an error is reported and no index entries are generated
  • -
  • The tool requires variants to be tab-delimited; space-delimited files are not properly indexed
  • -
-

Stream Processing

-
    -
  • The tool can process files from pipes as well as regular files
  • -
  • For piped input, offsets are calculated relative to the beginning of the piped stream
  • -
-

Empty Lines and Comments

-
    -
  • Empty lines and comment lines (starting with #) are properly handled and do not generate index entries
  • -
-

Performance Considerations

-
    -
  • The tool processes the VCF file in a single pass
  • -
  • Memory usage is minimal as the file is processed line-by-line
  • -
  • Large files can be indexed efficiently as the tool uses a buffered reading approach
  • -
  • For very large files, the index itself will be much smaller than the original VCF
  • -
  • The index file size scales with the number of variants, not the file size
  • -
-

Limitations

-
    -
  • No support for indexing other fields besides CHROM and POS
  • -
  • Does not validate the VCF format beyond basic column checking
  • -
  • No built-in compression of the index file
  • -
  • Cannot add new entries to an existing index (must regenerate the full index)
  • -
  • Does not directly support query operations (must be used with other tools)
  • -
  • Cannot handle compressed (gzipped) VCF files directly
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_info_aggregator/index.html b/site/VCFX_info_aggregator/index.html deleted file mode 100644 index 5e7bb645..00000000 --- a/site/VCFX_info_aggregator/index.html +++ /dev/null @@ -1,2758 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_info_aggregator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
- -
-
- - - -
-
- - - - - - - - -

VCFX_info_aggregator

-

Overview

-

VCFX_info_aggregator is a tool that reads a VCF file from standard input, outputs it unmodified, and appends a summary section containing aggregated statistics (sum and average) for specified numeric INFO fields. The summary section is formatted as VCF header-like comments to maintain compatibility with VCF parsers.

-

Usage

-
VCFX_info_aggregator [OPTIONS] < input.vcf > output.vcf
-
-

Options

-
    -
  • -a, --aggregate-info <fields>: Comma-separated list of INFO fields to aggregate (required).
  • -
  • -h, --help: Display help message and exit.
  • -
-

Description

-

VCFX_info_aggregator processes a VCF file line by line, performing the following operations:

-
    -
  1. Parse command-line arguments to determine which INFO fields to aggregate.
  2. -
  3. Read each line from standard input.
  4. -
  5. Output each line unmodified to standard output.
  6. -
  7. For data lines (non-header lines):
  8. -
  9. Parse the INFO column (8th column).
  10. -
  11. Extract specified INFO fields if they exist and contain numeric values.
  12. -
  13. Accumulate these numeric values for later calculation.
  14. -
  15. After processing all lines, append a summary section that includes:
  16. -
  17. A line starting with #AGGREGATION_SUMMARY.
  18. -
  19. For each specified INFO field, a line reporting the sum and average of all valid numeric values found.
  20. -
-

The tool is particularly useful for calculating summary statistics across an entire VCF file, such as average depth of coverage (DP) or average allele frequency (AF).

-

Output Format

-

The output is identical to the input VCF file, with additional summary lines appended at the end:

-
... original VCF content ...
-#AGGREGATION_SUMMARY
-FIELD1: Sum=<total>, Average=<mean>
-FIELD2: Sum=<total>, Average=<mean>
-
-

The summary lines start with # to ensure they are treated as comments by standard VCF parsers, maintaining compatibility.

-

Examples

-

Basic Usage

-

Calculate summary statistics for the depth (DP) field:

-
VCFX_info_aggregator --aggregate-info "DP" < input.vcf > output.vcf
-
-

Multiple Fields

-

Calculate statistics for both depth (DP) and allele frequency (AF):

-
VCFX_info_aggregator --aggregate-info "DP,AF" < input.vcf > output.vcf
-
-

Combining with Other Tools

-

Used in a pipeline to analyze filtered variants:

-
VCFX_record_filter --quality ">30" < input.vcf | VCFX_info_aggregator --aggregate-info "DP,AF,MQ" > filtered_with_stats.vcf
-
-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Non-numeric values: If a field cannot be parsed as a numeric value (e.g., "DP=abc"), it is skipped and not included in the aggregation.
  2. -
  3. Missing fields: If a specified INFO field is not present in a particular variant, it is simply skipped for that variant.
  4. -
  5. Empty input: The tool will process empty files correctly, reporting zeros for sums and averages.
  6. -
  7. Malformed VCF: If a data line is encountered before the #CHROM header, an error is reported.
  8. -
  9. Line endings: The tool correctly handles both Unix (LF) and Windows (CRLF) line endings.
  10. -
  11. Partial final line: The tool properly processes files that do not end with a newline character.
  12. -
-

Performance

-

VCFX_info_aggregator is designed to be memory-efficient and performant:

-
    -
  1. It processes the VCF file in a single pass, with O(1) memory usage relative to the file size.
  2. -
  3. Only the specified INFO fields are parsed and accumulated, minimizing unnecessary processing.
  4. -
  5. The tool streams data directly from input to output, without storing the entire file in memory.
  6. -
-

Limitations

-
    -
  1. The tool only processes numeric values in INFO fields. String, flag, or complex values are not aggregated.
  2. -
  3. For multi-allelic variants, INFO fields like AF that may contain multiple values (comma-separated) are processed as a single value.
  4. -
  5. The tool does not modify or annotate the original variants; it only appends summary statistics at the end of the file.
  6. -
  7. The summary section, while formatted as VCF comments, may not be recognized by all downstream tools that expect a strictly-conforming VCF file.
  8. -
  9. There is no option to calculate additional statistics beyond sum and average (e.g., median, standard deviation).
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_info_parser/index.html b/site/VCFX_info_parser/index.html deleted file mode 100644 index 837ade9a..00000000 --- a/site/VCFX_info_parser/index.html +++ /dev/null @@ -1,2793 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_info_parser - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_info_parser

-

Overview

-

VCFX_info_parser extracts and formats specific INFO fields from a VCF file into a tabular format for easier analysis. The tool processes a VCF file line by line, parses the INFO column, and outputs only the requested fields in a clean TSV format.

-

Usage

-
VCFX_info_parser --info "FIELD1,FIELD2,..." < input.vcf > extracted_info.tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-i, --info <FIELDS>Required. Comma-separated list of INFO fields to extract (e.g., "DP,AF,SOMATIC")
-h, --helpDisplay help message and exit
-

Description

-

VCFX_info_parser simplifies the process of extracting specific information from VCF files by:

-
    -
  1. Reading VCF data from standard input
  2. -
  3. Parsing the INFO column to extract user-specified fields
  4. -
  5. Producing a clean, tabular output with standardized headers
  6. -
  7. Properly handling flags, missing values, and malformed entries
  8. -
-

This tool is particularly useful for: -- Extracting numeric values like depth (DP) or allele frequency (AF) for statistical analysis -- Converting complex VCF INFO fields into a format suitable for spreadsheet applications -- Creating simplified datasets focused on specific annotations -- Preparing data for visualization or report generation

-

Output Format

-

The output is a tab-separated file with the following columns:

-
CHROM  POS  ID  REF  ALT  FIELD1  FIELD2  ...
-
-

Where: -- The first five columns are standard VCF fields (chromosome, position, ID, reference allele, alternate allele) -- Each subsequent column contains the value of a requested INFO field -- Missing values are represented by a dot (.) -- Flag fields (INFO fields with no value) are also represented by a dot (.)

-

Examples

-

Basic Usage - Extract Depth Information

-
./VCFX_info_parser --info "DP" < input.vcf > depth_data.tsv
-
-

Extract Multiple Fields

-
./VCFX_info_parser --info "DP,AF,MQ" < input.vcf > key_metrics.tsv
-
-

Working with Annotation Data

-
./VCFX_info_parser --info "Gene,IMPACT,Consequence" < annotated.vcf > gene_impacts.tsv
-
-

Pipeline Example

-
# Filter a VCF file and extract specific INFO fields
-cat input.vcf | grep "PASS" | ./VCFX_info_parser --info "DP,AF,SOMATIC" > filtered_annotations.tsv
-
-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Flag fields: INFO fields without values (flags like 'SOMATIC') are represented by a dot in the output
  2. -
  3. Missing fields: If a requested INFO field is not present in a specific variant, a dot is printed
  4. -
  5. Malformed lines: Lines that don't conform to VCF format are skipped with a warning message
  6. -
  7. Empty input: The tool correctly handles empty input files
  8. -
  9. Header lines: VCF header lines (starting with #) are skipped
  10. -
  11. Line endings: Both Unix (LF) and Windows (CRLF) line endings are supported
  12. -
  13. Partial final line: Files without a final newline character are processed correctly
  14. -
-

Performance

-

VCFX_info_parser is designed for efficiency:

-
    -
  1. Single-pass processing with line-by-line reading, allowing for streaming of very large files
  2. -
  3. Minimal memory footprint regardless of input file size
  4. -
  5. Efficient string parsing with no complex regular expressions
  6. -
  7. Fast lookup of INFO fields using hash maps
  8. -
-

Limitations

-
    -
  1. Cannot handle multi-allelic variants specially (each row is processed independently)
  2. -
  3. No built-in filtering capabilities (use in conjunction with other filtering tools)
  4. -
  5. Cannot split INFO fields with multiple values (e.g., CSQ fields from VEP)
  6. -
  7. Doesn't preserve VCF headers in the output
  8. -
  9. No option to include additional VCF columns (QUAL, FILTER) in the output
  10. -
  11. Cannot extract FORMAT fields or sample-specific information
  12. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_info_summarizer/index.html b/site/VCFX_info_summarizer/index.html deleted file mode 100644 index b0e05169..00000000 --- a/site/VCFX_info_summarizer/index.html +++ /dev/null @@ -1,2804 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_info_summarizer - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_info_summarizer

-

Overview

-

VCFX_info_summarizer analyzes numeric fields in the INFO column of a VCF file and calculates summary statistics (mean, median, and mode) for each specified field. This tool enables researchers to quickly understand the distribution and central tendencies of key metrics across variants.

-

Usage

-
VCFX_info_summarizer --info "FIELD1,FIELD2,..." < input.vcf > summary_stats.tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-i, --info <FIELDS>Required. Comma-separated list of INFO fields to analyze (e.g., "DP,AF,MQ")
-h, --helpDisplay help message and exit
-

Description

-

VCFX_info_summarizer processes a VCF file to generate statistical summaries of specified numeric INFO fields. The tool:

-
    -
  1. Reads a VCF file from standard input
  2. -
  3. Parses the INFO column from each variant record
  4. -
  5. Extracts numeric values for the specified INFO fields
  6. -
  7. Calculates three key statistics for each field:
  8. -
  9. Mean (average value)
  10. -
  11. Median (middle value)
  12. -
  13. Mode (most frequently occurring value)
  14. -
  15. Outputs the results in a clean, tabular format
  16. -
-

This tool is valuable for: -- Quality control assessment of sequencing data -- Understanding the distribution of metrics like depth, allele frequency, or mapping quality -- Identifying potential biases or anomalies in variant calling -- Summarizing large VCF files for reports or visualizations

-

Output Format

-

The output is a tab-separated file with the following columns:

-
INFO_Field  Mean  Median  Mode
-
-

Where: -- INFO_Field: The name of the INFO field being summarized -- Mean: The arithmetic mean of all numeric values for that field -- Median: The middle value when all values are sorted -- Mode: The most frequently occurring value -- "NA" is displayed if no valid numeric values were found for a field

-

All numeric values are formatted with four decimal places of precision.

-

Examples

-

Basic Usage - Analyze Depth Statistics

-
./VCFX_info_summarizer --info "DP" < input.vcf > depth_stats.tsv
-
-

Analyze Multiple Fields

-
./VCFX_info_summarizer --info "DP,AF,MQ" < input.vcf > variant_stats.tsv
-
-

Analyze Complex Input with Filtering

-
# Get summary stats for only PASS variants
-grep -e "^#" -e "PASS" input.vcf | ./VCFX_info_summarizer --info "DP,QD,FS" > pass_variant_stats.tsv
-
-

Filter and Compare Multiple VCF Files

-
# Create a summary comparison script for multiple files
-for vcf in sample1.vcf sample2.vcf sample3.vcf; do
-  echo "=== $vcf ===" >> summary.txt
-  ./VCFX_info_summarizer --info "DP,AF" < $vcf >> summary.txt
-  echo "" >> summary.txt
-done
-
-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Non-numeric values: Skipped with a warning to stderr, without affecting the calculations for other values
  2. -
  3. Missing fields: If a specified field is not present in a variant's INFO column, it's simply skipped
  4. -
  5. Multi-value fields: Each comma-separated value is processed individually (e.g., AF=0.1,0.2,0.3)
  6. -
  7. Empty input: Outputs "NA" for all statistics if no valid values are found
  8. -
  9. Malformed VCF: Lines that don't conform to VCF format are skipped with a warning
  10. -
  11. Header validation: Checks for the presence of a proper #CHROM header line before processing records
  12. -
  13. Flag fields: INFO flags without values are treated as having a value of "1" for statistical calculations
  14. -
-

Performance

-

VCFX_info_summarizer is designed for efficiency with large VCF files:

-
    -
  1. Single-pass processing with O(n) time complexity where n is the number of variants
  2. -
  3. O(m) memory usage where m is the number of numeric values for the specified fields
  4. -
  5. Efficient string parsing using streams
  6. -
  7. Fast statistical calculations with minimal sorting operations
  8. -
-

Limitations

-
    -
  1. Cannot process non-numeric INFO fields (strings, flags, etc.) except for converting flags to "1"
  2. -
  3. No ability to filter variants based on their values (must be combined with other tools for filtering)
  4. -
  5. Limited to basic statistics (mean, median, mode); no advanced statistics like standard deviation, quartiles, etc.
  6. -
  7. Does not support weighted statistics for multi-allelic variants
  8. -
  9. Cannot process FORMAT fields or perform sample-specific statistical summaries
  10. -
  11. No support for histograms or graphical representations of the data distribution
  12. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_ld_calculator/index.html b/site/VCFX_ld_calculator/index.html deleted file mode 100644 index f108da64..00000000 --- a/site/VCFX_ld_calculator/index.html +++ /dev/null @@ -1,2776 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_ld_calculator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
- -
- - - -
-
- - - - - - - - -

VCFX_ld_calculator

-

Overview

-

VCFX_ld_calculator calculates pairwise linkage disequilibrium (LD) statistics between genetic variants in a VCF file, expressed as rยฒ values. It can analyze variants across an entire file or within a specified genomic region.

-

Usage

-
VCFX_ld_calculator [OPTIONS] < input.vcf > ld_matrix.txt
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
--region <chr:start-end>Only compute LD for variants in the specified region
-h, --helpDisplay help message and exit
-

Description

-

VCFX_ld_calculator reads a VCF file and computes the pairwise linkage disequilibrium (rยฒ) between genetic variants. Linkage disequilibrium is a measure of the non-random association between alleles at different loci, which is important for understanding genetic structure, identifying haplotype blocks, and designing association studies.

-

The tool operates as follows:

-
    -
  1. It reads the VCF file from standard input
  2. -
  3. It collects diploid genotypes for each variant, encoding them as:
  4. -
  5. 0: Homozygous reference (0/0)
  6. -
  7. 1: Heterozygous (0/1 or 1/0)
  8. -
  9. 2: Homozygous alternate (1/1)
  10. -
  11. -1: Missing data or other scenarios (including multi-allelic variants)
  12. -
  13. For each pair of variants within the specified region (or the entire file if no region is specified), it computes pairwise rยฒ values, ignoring samples with missing genotypes
  14. -
  15. It outputs a matrix of rยฒ values along with variant identifiers
  16. -
-

The rยฒ calculation uses the standard formula: -- Let X and Y be the genotype arrays for two variants -- Calculate means of X and Y (meanX, meanY) -- Calculate covariance: cov = average(XY) - meanX * meanY -- Calculate variances: varX = average(Xยฒ) - meanXยฒ, varY similarly -- r = cov / sqrt(varX * varY) -- rยฒ = r * r

-

Output Format

-

The output is a tab-delimited matrix of rยฒ values with a header identifying the variants:

-
#LD_MATRIX_START
-         chr1:100 chr1:200 chr1:300
-chr1:100      1.0     0.4     0.2
-chr1:200     0.4      1.0     0.6
-chr1:300     0.2     0.6      1.0
-
-

If only one or no variants are found in the region, the tool outputs a message indicating that no pairwise LD could be calculated.

-

Examples

-

Basic Usage

-

Calculate LD for all variants in a VCF file:

-
VCFX_ld_calculator < input.vcf > ld_matrix.txt
-
-

Region-Specific LD

-

Calculate LD only for variants in a specific genomic region:

-
VCFX_ld_calculator --region chr1:10000-20000 < input.vcf > ld_matrix.txt
-
-

Integration with Other Tools

-

Filter for common variants first, then calculate LD:

-
cat input.vcf | VCFX_af_subsetter --min-af 0.05 | VCFX_ld_calculator > common_variants_ld.txt
-
-

Handling Special Cases

-
    -
  • Missing Genotypes: Samples with missing genotypes (./. or .|.) are skipped when calculating LD between variant pairs
  • -
  • Multi-allelic Variants: Genotypes involving alleles beyond the reference and first alternate (e.g., 1/2, 2/2) are treated as missing data
  • -
  • Single Variant: If only one variant is found in the region, the tool outputs a message stating that no pairwise LD can be calculated
  • -
  • Empty Region: If no variants are found in the specified region, the tool outputs a message stating that no pairwise LD can be calculated
  • -
  • Invalid Region Format: If the region format is invalid, the tool will display an error message
  • -
-

Performance

-
    -
  • Time complexity is O(nยฒm) where n is the number of variants and m is the number of samples
  • -
  • Memory usage scales linearly with the number of variants and samples
  • -
  • For large datasets with many variants, consider using the --region option to limit the analysis to specific genomic regions
  • -
  • The tool processes the VCF file line by line, so it can handle large files without loading the entire file into memory
  • -
-

Limitations

-
    -
  • Only supports biallelic variants; multi-allelic variants are treated as missing data
  • -
  • Requires diploid genotypes; haploid genotypes will be treated as missing data
  • -
  • Assumes standard VCF format with GT field in the FORMAT column
  • -
  • Does not support phased vs. unphased distinction; both "/" and "|" separators are treated the same
  • -
  • No built-in visualization of LD patterns; additional tools would be needed for heatmap creation
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_merger/index.html b/site/VCFX_merger/index.html deleted file mode 100644 index fbe33b2b..00000000 --- a/site/VCFX_merger/index.html +++ /dev/null @@ -1,2837 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_merger - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_merger

-

Overview

-

VCFX_merger is a tool for merging multiple VCF files by variant position. It combines multiple VCF files while maintaining proper sorting by chromosome and position, and preserves the VCF header information from the first input file.

-

Usage

-
VCFX_merger --merge file1.vcf,file2.vcf,... [options] > merged.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-m, --mergeComma-separated list of VCF files to merge
-h, --helpDisplay help message and exit
-

Description

-

VCFX_merger reads multiple VCF files and combines them into a single output file while:

-
    -
  1. Preserving the VCF header information from the first input file
  2. -
  3. Sorting all variants by chromosome and position
  4. -
  5. Maintaining the original VCF format and field structure
  6. -
  7. Handling multiple input files efficiently
  8. -
-

The tool processes the input files sequentially and merges all variants while ensuring proper sorting. It is particularly useful when you need to combine multiple VCF files from different sources or samples into a single, properly sorted VCF file.

-

Input Requirements

-
    -
  • All input files must be in valid VCF format
  • -
  • Files should have consistent header structures
  • -
  • The first file's header information will be used in the output
  • -
  • Files can contain any number of variants
  • -
-

Output Format

-

The output is a standard VCF file with: -- Header information from the first input file -- All variants sorted by chromosome and position -- Original VCF format preserved -- Tab-delimited fields maintained

-

Examples

-

Basic Usage

-

Merge two VCF files:

-
VCFX_merger --merge sample1.vcf,sample2.vcf > merged.vcf
-
-

Multiple Files

-

Merge three or more VCF files:

-
VCFX_merger --merge file1.vcf,file2.vcf,file3.vcf > combined.vcf
-
-

Integration with Other Tools

-

Merge files and then process the result:

-
VCFX_merger --merge sample1.vcf,sample2.vcf | VCFX_sorter | VCFX_validator > final.vcf
-
-

Error Handling

-

The tool handles various error conditions:

-
    -
  • Missing input files: Reports an error if any specified input file cannot be opened
  • -
  • Invalid VCF format: Preserves the original format but does not validate it
  • -
  • Empty files: Handles empty input files gracefully
  • -
  • Missing --merge argument: Displays help message
  • -
-

Performance Considerations

-
    -
  • Memory usage scales with the number of variants across all input files
  • -
  • Processing time depends on the total number of variants and the number of input files
  • -
  • Files are processed sequentially to minimize memory usage
  • -
  • Sorting is performed in memory after all variants are collected
  • -
-

Limitations

-
    -
  • Only supports standard VCF format files
  • -
  • Does not perform VCF validation (use VCFX_validator for validation)
  • -
  • Preserves only the header information from the first input file
  • -
  • Requires all input files to have consistent field structures
  • -
-

Common Use Cases

-
    -
  1. Combining multiple sample VCFs into a single file
  2. -
  3. Merging region-specific VCF files
  4. -
  5. Combining results from different variant callers
  6. -
  7. Creating a unified VCF file from multiple analysis runs
  8. -
-

Best Practices

-
    -
  1. Validate input files before merging
  2. -
  3. Use consistent VCF versions across input files
  4. -
  5. Consider file sizes and available memory when merging many files
  6. -
  7. Verify the output with VCFX_validator after merging
  8. -
  9. Use VCFX_sorter if additional sorting is needed
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_metadata_summarizer/index.html b/site/VCFX_metadata_summarizer/index.html deleted file mode 100644 index a38705ee..00000000 --- a/site/VCFX_metadata_summarizer/index.html +++ /dev/null @@ -1,2872 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_metadata_summarizer - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_metadata_summarizer

-

Overview

-

VCFX_metadata_summarizer is a tool that analyzes and summarizes key metadata from a VCF file. It provides a comprehensive overview of the file's structure, including counts of contigs, INFO fields, FILTER fields, FORMAT fields, samples, and variants.

-

Usage

-
VCFX_metadata_summarizer [options] < input.vcf
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_metadata_summarizer reads a VCF file and generates a summary of its key structural elements:

-
    -
  1. Counts unique contigs defined in the header
  2. -
  3. Counts unique INFO fields
  4. -
  5. Counts unique FILTER fields
  6. -
  7. Counts unique FORMAT fields
  8. -
  9. Counts the number of samples
  10. -
  11. Counts the total number of variants
  12. -
-

The tool processes the VCF file line by line, parsing both header metadata and variant records to build a complete summary of the file's structure.

-

Input Requirements

-
    -
  • Input must be a valid VCF file
  • -
  • File can be piped through stdin
  • -
  • Supports both VCFv4.0 and VCFv4.2 formats
  • -
  • Handles both single-sample and multi-sample VCFs
  • -
-

Output Format

-

The output is a formatted text summary with the following structure:

-
VCF Metadata Summary:
----------------------
-Number of unique contigs: <count>
-Number of unique INFO fields: <count>
-Number of unique FILTER fields: <count>
-Number of unique FORMAT fields: <count>
-Number of samples: <count>
-Number of variants: <count>
-
-

Examples

-

Basic Usage

-

Summarize metadata from a VCF file:

-
VCFX_metadata_summarizer < input.vcf
-
-

Integration with Other Tools

-

Combine with other VCFX tools:

-
cat input.vcf | VCFX_validator | VCFX_metadata_summarizer
-
-

Example Output

-

For a minimal VCF file with one contig, one INFO field, and a single variant:

-
VCF Metadata Summary:
----------------------
-Number of unique contigs: 1
-Number of unique INFO fields: 1
-Number of unique FILTER fields: 0
-Number of unique FORMAT fields: 0
-Number of samples: 0
-Number of variants: 1
-
-

Header Parsing

-

The tool parses the following types of header lines: -- ##contig=<ID=...> - Contig definitions -- ##INFO=<ID=...> - INFO field definitions -- ##FILTER=<ID=...> - FILTER field definitions -- ##FORMAT=<ID=...> - FORMAT field definitions -- #CHROM... - Column header line (for sample counting)

-

Error Handling

-

The tool handles various input scenarios: -- Empty files -- Files with no header -- Files with no variants -- Files with missing metadata fields -- Files with inconsistent header structures

-

Performance Considerations

-
    -
  • Processes input streamingly
  • -
  • Memory usage scales with the number of unique metadata fields
  • -
  • Efficient for both small and large VCF files
  • -
  • No need to load entire file into memory
  • -
-

Limitations

-
    -
  • Only counts presence of fields, not their values
  • -
  • Does not validate field definitions
  • -
  • Does not check for field consistency across variants
  • -
  • Does not analyze variant content beyond counting
  • -
-

Common Use Cases

-
    -
  1. Quick assessment of VCF file structure
  2. -
  3. Quality control of VCF file completeness
  4. -
  5. Verification of expected metadata presence
  6. -
  7. Sample count verification
  8. -
  9. Variant count verification
  10. -
-

Best Practices

-
    -
  1. Run on VCF files before processing
  2. -
  3. Use in combination with VCFX_validator
  4. -
  5. Check for expected field counts
  6. -
  7. Verify sample counts match expectations
  8. -
  9. Use as part of quality control pipelines
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_missing_data_handler/index.html b/site/VCFX_missing_data_handler/index.html deleted file mode 100644 index 9de3195c..00000000 --- a/site/VCFX_missing_data_handler/index.html +++ /dev/null @@ -1,2844 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_missing_data_handler - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_missing_data_handler

-

Overview

-

VCFX_missing_data_handler identifies and processes missing genotype data in VCF files. It can either flag missing genotypes (default behavior) or impute them with a specified default value, ensuring consistent data representation for downstream analysis.

-

Usage

-
VCFX_missing_data_handler [OPTIONS] [files...] > processed.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
--fill-missing, -fImpute missing genotypes with a default value
--default-genotype, -d Specify the default genotype for imputation (default: "./.")
--help, -hDisplay help message and exit
-

Description

-

VCFX_missing_data_handler processes VCF files to identify and handle missing genotype data. The tool:

-
    -
  1. Reads one or more VCF files (or standard input if no files specified)
  2. -
  3. Identifies missing genotypes in each variant (empty, ".", "./.", or ".|.")
  4. -
  5. Either:
  6. -
  7. Leaves missing genotypes unchanged (default behavior)
  8. -
  9. Replaces missing genotypes with a user-specified value
  10. -
  11. Outputs the processed VCF data to standard output
  12. -
-

This tool is particularly useful for: -- Preparing VCF files for tools that don't handle missing genotypes well -- Standardizing the representation of missing data -- Imputing missing genotypes with reference (0/0) or other default values -- Processing multiple VCF files with consistent handling of missing data

-

Output Format

-

The output is a valid VCF file with the same format as the input, but with missing genotypes either left as-is or replaced with the specified default value. All header lines are preserved.

-

Examples

-

Basic Usage (Flag Only)

-
# Process a single file, leaving missing genotypes as-is
-VCFX_missing_data_handler < input.vcf > flagged_output.vcf
-
-

Impute Missing Data with Default Value

-
# Replace missing genotypes with the default value (./.):
-VCFX_missing_data_handler --fill-missing < input.vcf > imputed_output.vcf
-
-

Impute with Custom Genotype

-
# Replace missing genotypes with homozygous reference (0/0):
-VCFX_missing_data_handler --fill-missing --default-genotype "0/0" < input.vcf > ref_imputed.vcf
-
-

Process Multiple Files

-
# Process multiple files at once:
-VCFX_missing_data_handler --fill-missing file1.vcf file2.vcf > combined_output.vcf
-
-

In a Pipeline

-
# Filter a VCF file and then handle missing data:
-grep -v "^#" input.vcf | grep "PASS" | \
-VCFX_missing_data_handler --fill-missing --default-genotype "0/0" > filtered_imputed.vcf
-
-

Missing Genotype Detection

-

The tool identifies the following representations of missing data:

-
    -
  1. Empty genotype field
  2. -
  3. Single dot: "."
  4. -
  5. Pair of dots with slash: "./."
  6. -
  7. Pair of dots with pipe: ".|."
  8. -
-

Handling Special Cases

-
    -
  • No GT field in FORMAT: If the FORMAT column does not include a GT field, the variant line is left unchanged
  • -
  • Invalid variant lines: Lines with fewer than 9 columns are passed through unchanged
  • -
  • Multiple input files: Processes each file in sequence, properly handling headers
  • -
  • Sample columns structure: Carefully preserves the structure of sample columns, only modifying the GT field
  • -
  • Empty lines: Preserved with a single newline
  • -
  • Header lines: Passed through unchanged
  • -
  • Data before header: Able to handle invalid VCF files where data appears before the header (with a warning)
  • -
-

Performance

-

The tool is designed for efficiency:

-
    -
  1. Line-by-line processing allows handling of arbitrarily large files
  2. -
  3. No need to load the entire file into memory
  4. -
  5. Efficient string splitting and joining operations
  6. -
  7. Handles multiple files in a single run
  8. -
-

Limitations

-
    -
  1. No option to specify which samples should have their missing data imputed
  2. -
  3. Cannot handle phased vs. unphased genotype distinction in imputation
  4. -
  5. No support for probabilistic imputation based on population frequencies
  6. -
  7. No ability to flag sites with a high proportion of missing data
  8. -
  9. Cannot process only specific regions of a VCF file
  10. -
  11. Imputes with the same value regardless of context or neighboring genotypes
  12. -
  13. No reporting of the number or percentage of imputed genotypes
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_missing_detector/index.html b/site/VCFX_missing_detector/index.html deleted file mode 100644 index 7b94290b..00000000 --- a/site/VCFX_missing_detector/index.html +++ /dev/null @@ -1,2821 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_missing_detector - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_missing_detector

-

Overview

-

VCFX_missing_detector identifies and flags variants in a VCF file that contain missing genotype data in any sample. This tool helps researchers identify potentially problematic variants or samples with incomplete data that may require special handling in downstream analyses.

-

Usage

-
VCFX_missing_detector [OPTIONS] < input.vcf > flagged.vcf
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_missing_detector analyzes a VCF file to identify variants with missing genotype data. The tool:

-
    -
  1. Reads a VCF file from standard input line by line
  2. -
  3. For each variant, examines the genotype (GT) field of all samples
  4. -
  5. Identifies missing genotypes where:
  6. -
  7. The entire genotype is missing (e.g., ./., .|., or .)
  8. -
  9. Either allele in a diploid genotype is missing (e.g., ./0, 1/.)
  10. -
  11. Adds a flag MISSING_GENOTYPES=1 to the INFO field of variants with any missing data
  12. -
  13. Writes the processed VCF to standard output
  14. -
-

This simple annotation allows researchers to easily: -- Filter variants based on missing data presence using standard VCF tools -- Identify data completeness issues that might affect analysis results -- Implement different handling strategies for variants with missing data

-

Output Format

-

The output is a valid VCF file with the same format as the input, but with an additional INFO field annotation for variants containing missing genotypes:

-
MISSING_GENOTYPES=1
-
-

This annotation is appended to the existing INFO field, or replaces the . placeholder if the INFO field is empty.

-

Examples

-

Basic Usage

-
# Flag variants with missing genotypes
-./VCFX_missing_detector < input.vcf > flagged.vcf
-
-

In a Pipeline with Filtering

-
# Flag variants with missing genotypes, then filter to keep only complete variants
-./VCFX_missing_detector < input.vcf | grep -v "MISSING_GENOTYPES=1" > complete_variants.vcf
-
-

Counting Missing Variants

-
# Count variants with missing genotypes
-./VCFX_missing_detector < input.vcf | grep "MISSING_GENOTYPES=1" | wc -l
-
-

Counting All Variants Before Summary

-
# Count total variants and those with missing data
-./VCFX_missing_detector < input.vcf > flagged.vcf
-echo "Total variants: $(grep -v "^#" flagged.vcf | wc -l)"
-echo "Variants with missing data: $(grep "MISSING_GENOTYPES=1" flagged.vcf | wc -l)"
-
-

Missing Genotype Detection

-

The tool uses comprehensive logic to identify various forms of missing genotype data:

-
    -
  1. Completely missing genotypes: Formats like ./., .|., or just .
  2. -
  3. Partially missing diploid genotypes: When one allele is missing, like ./1 or 0/.
  4. -
  5. Multi-field format handling: Properly extracts just the GT portion when other fields (like DP, GQ) are present
  6. -
  7. Format field awareness: Correctly identifies the GT position in the FORMAT string
  8. -
-

The tool examines each sample column independently and flags a variant if any sample has missing data.

-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Missing FORMAT field: If GT is not included in the FORMAT column, the variant is passed through unchanged
  2. -
  3. No sample columns: Variants with fewer than 9 columns are passed through unchanged
  4. -
  5. Empty INFO field: If the original INFO is "." (missing), it's replaced with "MISSING_GENOTYPES=1"
  6. -
  7. Non-empty INFO field: The missing flag is appended with a semicolon separator
  8. -
  9. Empty lines: Preserved with a single newline
  10. -
  11. Header lines: Passed through unchanged
  12. -
  13. Non-diploid genotypes: The tool focuses on diploid genotypes with a single delimiter ('/' or '|')
  14. -
-

Performance

-

VCFX_missing_detector is designed for efficiency:

-
    -
  1. Single-pass processing with O(n) time complexity where n is the number of variants
  2. -
  3. Minimal memory usage, with no requirement to load the entire file
  4. -
  5. String operations optimized for performance
  6. -
  7. Line-by-line processing enabling streaming workflow
  8. -
  9. Disk I/O limited only to reading input and writing output
  10. -
-

Limitations

-
    -
  1. Primarily designed for diploid genotypes; may not correctly identify missing data in haploid or polyploid contexts
  2. -
  3. Limited to checking the GT field; does not evaluate other potential indicators of missing data
  4. -
  5. No built-in functionality to annotate the percentage or count of samples with missing data
  6. -
  7. No option to customize the INFO field tag name from the default "MISSING_GENOTYPES"
  8. -
  9. Cannot perform sample-specific missing data analysis, such as identifying which samples contribute most to missingness
  10. -
  11. No threshold options (e.g., flag only if more than X% of samples have missing data)
  12. -
  13. Limited to binary detection (missing/not missing) without quantifying the degree of missingness
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_multiallelic_splitter/index.html b/site/VCFX_multiallelic_splitter/index.html deleted file mode 100644 index da49a90e..00000000 --- a/site/VCFX_multiallelic_splitter/index.html +++ /dev/null @@ -1,2783 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_multiallelic_splitter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - - -

VCFX_multiallelic_splitter

-

Overview

-

VCFX_multiallelic_splitter takes a VCF file with multi-allelic variants (variants with multiple ALT alleles) and splits them into multiple bi-allelic variant lines, while properly handling genotypes and FORMAT/INFO fields with various number specifications.

-

Usage

-
VCFX_multiallelic_splitter [OPTIONS] < input.vcf > biallelic_output.vcf
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
--help, -hDisplay help message and exit
-

Description

-

Multi-allelic variants in VCF files (where multiple alternate alleles are specified in a comma-separated list) can complicate analysis and be incompatible with tools that require bi-allelic variants. This tool converts multi-allelic variants into the equivalent set of bi-allelic variants.

-

Key features: -- Maintains original VCF header information -- Correctly processes INFO fields tagged with different Number attributes (A, R, G) -- Properly adjusts genotypes and FORMAT fields for each resulting bi-allelic variant -- Preserves phasing information in genotypes -- Handles complex symbolic variants (e.g., <DEL>, <INS>) -- Correctly manages missing or malformed fields

-

Output Format

-

The output is a standard VCF file containing: -- All header lines from the input file (unchanged) -- Bi-allelic variants only, with each multi-allelic variant split into multiple lines -- Each split variant maintains the same CHROM, POS, ID, REF, QUAL, and FILTER values -- INFO and FORMAT fields properly adjusted for each alternate allele

-

Examples

-

Basic Usage

-
./VCFX_multiallelic_splitter < multi_allelic.vcf > biallelic.vcf
-
-

Integration with Other Tools

-
# Split multi-allelic variants, then run analysis requiring bi-allelic variants
-cat input.vcf | \
-  ./VCFX_multiallelic_splitter | \
-  ./vcf_analysis_tool > results.txt
-
-

Validation

-
# Validate that all variants in the output are indeed bi-allelic
-./VCFX_multiallelic_splitter < input.vcf | \
-  grep -v "^#" | awk -F'\t' '{print $5}' | grep -c ","
-# Should output 0 if all variants are bi-allelic
-
-

Handling Special Cases

-
    -
  • INFO fields:
  • -
  • Number=A fields (one value per alternate allele): Each split variant gets the corresponding value
  • -
  • Number=R fields (one value per allele including reference): Values are preserved properly
  • -
  • Number=G fields (one value per genotype): Recalculated for bi-allelic case
  • -
  • -

    Number=1 or other fixed numbers: These values are copied unchanged

    -
  • -
  • -

    FORMAT fields:

    -
  • -
  • AD (allelic depth): Properly subset for each resulting variant
  • -
  • PL (genotype likelihoods): Recalculated for each bi-allelic output
  • -
  • -

    GT (genotype): Adjusted to reflect the new allele indices (0/2 may become 0/1 in split variant)

    -
  • -
  • -

    Genotype conversion:

    -
  • -
  • For each variant, genotypes are only preserved if they involve the specific alt allele
  • -
  • Genotypes not involving the current alternate allele are set to missing (./.)
  • -
  • -

    Phased genotypes maintain their phase information

    -
  • -
  • -

    Edge cases:

    -
  • -
  • Missing data in FORMAT fields is properly handled
  • -
  • Symbolic alternate alleles are processed correctly
  • -
  • Star alleles (*) and non-ref symbolic alleles are supported
  • -
-

Performance

-

The tool processes VCF files line by line with minimal memory requirements, with performance primarily dependent on: -- Number of samples in the VCF -- Number of multi-allelic sites -- Complexity of INFO and FORMAT fields

-

For very large VCF files with many samples, processing time scales linearly with file size.

-

Limitations

-
    -
  • No command-line options to control the splitting behavior
  • -
  • Cannot selectively split only certain multi-allelic variants
  • -
  • May produce large output files when the input contains many multi-allelic variants with many samples
  • -
  • Cannot reconstruct the original multi-allelic variants from the split output in all cases
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_nonref_filter/index.html b/site/VCFX_nonref_filter/index.html deleted file mode 100644 index cb6d4547..00000000 --- a/site/VCFX_nonref_filter/index.html +++ /dev/null @@ -1,2826 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_nonref_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_nonref_filter

-

Overview

-

VCFX_nonref_filter removes variants from a VCF file where all samples are homozygous reference (0/0), retaining only variants where at least one sample has an alternate allele or a missing genotype.

-

Usage

-
VCFX_nonref_filter [OPTIONS] < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_nonref_filter examines each variant in a VCF file and filters out those where all samples are homozygous reference (0/0). The tool:

-
    -
  1. Processes a VCF file line by line
  2. -
  3. For each variant, examines the genotype (GT) field of all samples
  4. -
  5. Determines if every sample is definitively homozygous reference
  6. -
  7. Retains variants where at least one sample has a non-reference allele or missing data
  8. -
  9. Outputs a filtered VCF with only the retained variants
  10. -
  11. Passes through all header lines unchanged
  12. -
-

This tool is particularly useful for: -- Removing uninformative variants where no sample has an alternate allele -- Reducing VCF file size by filtering out invariant sites -- Focusing analysis on polymorphic sites -- Preparing variant files for downstream analysis tools that expect polymorphic sites

-

Output Format

-

The output is a standard VCF file with the same format as the input, but containing only variants where at least one sample has a non-reference allele or a missing genotype. All header lines are preserved.

-

Examples

-

Basic Usage

-
# Filter out variants where all samples are homozygous reference
-VCFX_nonref_filter < input.vcf > filtered.vcf
-
-

Counting Filtered Variants

-
# Count how many variants were removed/retained
-input_count=$(grep -v "^#" input.vcf | wc -l)
-output_count=$(grep -v "^#" filtered.vcf | wc -l)
-removed_count=$((input_count - output_count))
-echo "Removed $removed_count homozygous reference variants out of $input_count total variants"
-
-

In a Pipeline

-
# Filter VCF file first by quality, then by non-reference status
-grep -v "^#" input.vcf | grep "PASS" | \
-VCFX_nonref_filter > high_quality_nonref.vcf
-
-

Combining with Other Filters

-
# Create a pipeline of filters
-cat input.vcf | \
-VCFX_nonref_filter | \
-VCFX_phred_filter --min-quality 30 > filtered.vcf
-
-

Homozygous Reference Detection

-

The tool uses comprehensive logic to identify homozygous reference genotypes:

-
    -
  1. If a genotype is missing (e.g., "./.", ".|.", or "."), it's considered NOT homozygous reference, and the variant is retained
  2. -
  3. For each specified allele in a genotype:
  4. -
  5. The allele must be "0" for it to be considered reference
  6. -
  7. Any non-"0" allele (including "1", "2", etc.) is considered alternate
  8. -
-

For example: -- "0/0" โ†’ Homozygous reference (filtered if all samples have this) -- "0/1" โ†’ Heterozygous (variant retained) -- "1/1" โ†’ Homozygous alternate (variant retained) -- "./." โ†’ Missing genotype (variant retained) -- "0/." โ†’ Partially missing (variant retained) -- "0/0/0" โ†’ Polyploid homozygous reference (filtered if all samples have reference)

-

Handling Special Cases

-
    -
  • Missing genotypes: Variants with samples having missing genotypes ("./.") are retained
  • -
  • Partial missing: Genotypes with some missing alleles (e.g., "0/.") are considered not definitively homozygous reference, so the variant is retained
  • -
  • No GT field: If the GT field is not present in the FORMAT column, the variant is retained
  • -
  • Empty lines: Skipped in output
  • -
  • Header lines: Preserved unchanged
  • -
  • Malformed VCF lines: Lines with fewer than 10 columns (required for at least one sample) are passed through unchanged
  • -
  • Data before header: Warning issued and lines passed through unchanged
  • -
-

Performance

-

The tool is designed for efficiency:

-
    -
  1. Processes the VCF file line by line, with minimal memory requirements
  2. -
  3. Fast determination of homozygous reference status using string parsing
  4. -
  5. Early exit when a non-homozygous sample is found
  6. -
  7. No requirements to load the entire file into memory
  8. -
-

Limitations

-
    -
  1. No option to filter based on a subset of samples
  2. -
  3. Cannot retain specific homozygous reference variants based on other criteria
  4. -
  5. No support for filtering by percentage of non-reference samples
  6. -
  7. Missing data is always treated as "not definitely homozygous reference"
  8. -
  9. No built-in option to keep variants where the reference allele might be incorrect
  10. -
  11. Cannot incorporate quality values in the filtering decision
  12. -
  13. No reporting on the number of variants removed or statistics about filtered variants
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_outlier_detector/index.html b/site/VCFX_outlier_detector/index.html deleted file mode 100644 index 5d50c2cb..00000000 --- a/site/VCFX_outlier_detector/index.html +++ /dev/null @@ -1,2903 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_outlier_detector - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_outlier_detector

-

Overview

-

VCFX_outlier_detector identifies outliers in VCF data based on numeric metrics, operating in two modes: variant mode to detect outlier variants exceeding a threshold for a specified INFO field, and sample mode to identify samples with average metrics above a threshold.

-

Usage

-
VCFX_outlier_detector --metric <KEY> --threshold <VAL> [--variant|--sample] < input.vcf > outliers.txt
-
-

Options

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OptionDescription
--metric, -m Name of the metric to use (e.g., AF, DP, GQ)
--threshold, -t Numeric threshold value for outlier detection
--variant, -vVariant mode: identify variants with INFO field metrics above threshold
--sample, -sSample mode: identify samples with average genotype metrics above threshold
--help, -hDisplay help message and exit
-

Description

-

VCFX_outlier_detector analyzes VCF files to identify outliers based on numeric metrics. The tool operates in two distinct modes:

-
    -
  1. Variant Mode (default):
  2. -
  3. Examines each variant's specified INFO field metric
  4. -
  5. Reports variants where the metric exceeds the specified threshold
  6. -
  7. -

    Useful for finding variants with unusual characteristics (e.g., high allele frequency, depth)

    -
  8. -
  9. -

    Sample Mode:

    -
  10. -
  11. Calculates the average value of a specified genotype metric for each sample
  12. -
  13. Reports samples where the average metric exceeds the specified threshold
  14. -
  15. Useful for identifying samples with unusual quality characteristics
  16. -
-

The tool processes VCF files line by line, extracting the relevant metric from either the INFO field (variant mode) or the FORMAT/genotype fields (sample mode). For sample mode, it accumulates values across all variants to calculate the per-sample averages.

-

Output Format

-

Variant Mode

-

The output is a tab-delimited file with the following columns: -

#CHROM  POS  ID  METRIC
-
-Where: -- CHROM, POS, ID: Standard VCF fields for variant identification -- METRIC: The value of the specified metric for each outlier variant

-

Sample Mode

-

The output is a tab-delimited file with the following columns: -

#Sample  Average_METRIC
-
-Where: -- Sample: The sample name -- Average_METRIC: The average value of the metric across all variants (or "NA" if below threshold)

-

Examples

-

Identify Variants with High Allele Frequency

-
# Find variants with allele frequency > 0.05
-VCFX_outlier_detector --metric AF --threshold 0.05 --variant < input.vcf > high_af_variants.txt
-
-

Identify Low-Quality Samples

-
# Find samples with average genotype quality > 30
-VCFX_outlier_detector --metric GQ --threshold 30 --sample < input.vcf > high_quality_samples.txt
-
-

Detect Unusual Depth Variants

-
# Find variants with unusually high depth
-VCFX_outlier_detector --metric DP --threshold 100 --variant < input.vcf > high_depth_variants.txt
-
-

Identify Samples with High Missing Rate

-
# Find samples with high average missing rate
-VCFX_outlier_detector --metric MISSING --threshold 0.2 --sample < input.vcf > high_missing_samples.txt
-
-

Metric Extraction

-

The tool implements two strategies for extracting metrics:

-
    -
  1. INFO field parsing (variant mode):
  2. -
  3. Extracts fields with format KEY=VALUE from the INFO column
  4. -
  5. -

    Converts the value to a numeric type for comparison

    -
  6. -
  7. -

    Genotype field parsing (sample mode):

    -
  8. -
  9. First checks if the metric is directly specified with KEY=VALUE in the genotype field
  10. -
  11. Otherwise, locates the metric position in the FORMAT field and extracts the corresponding value for each sample
  12. -
-

Handling Special Cases

-
    -
  • Missing values: In sample mode, metrics that can't be parsed or are missing are skipped in the average calculation
  • -
  • Invalid numeric values: Non-numeric values are ignored with appropriate warnings
  • -
  • Empty files: Properly handled, producing an appropriate output header
  • -
  • Malformed VCF lines: Lines with too few columns are skipped
  • -
  • Non-standard FORMAT fields: Both standard colon-delimited formats and custom KEY=VALUE formats are supported
  • -
  • No matching metrics: If no instances of the metric are found, a warning is issued
  • -
-

Performance

-

VCFX_outlier_detector is designed for efficiency:

-
    -
  1. Variant Mode:
  2. -
  3. Single pass through the file with O(n) time complexity where n is the number of variants
  4. -
  5. -

    Minimal memory usage, regardless of file size

    -
  6. -
  7. -

    Sample Mode:

    -
  8. -
  9. Requires a single pass but tracks running sums and counts for each sample
  10. -
  11. Memory usage scales with the number of samples, not the number of variants
  12. -
-

Limitations

-
    -
  1. Only supports thresholding in one direction (greater than threshold)
  2. -
  3. No support for statistical outlier detection (e.g., z-scores or percentile-based methods)
  4. -
  5. Cannot filter based on multiple metrics in a single run
  6. -
  7. Sample mode requires the entire file to be processed before producing output
  8. -
  9. No built-in options for handling multi-allelic sites differently
  10. -
  11. Cannot detect outliers based on metadata not present in the VCF file
  12. -
  13. Only numeric metrics are supported; cannot detect categorical outliers
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_phase_checker/index.html b/site/VCFX_phase_checker/index.html deleted file mode 100644 index 25371f69..00000000 --- a/site/VCFX_phase_checker/index.html +++ /dev/null @@ -1,2754 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_phase_checker - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - -
- -
- - - -
-
- - - - - - - - -

VCFX_phase_checker

-

Overview

-

The VCFX_phase_checker tool filters a VCF file to retain only those variant lines where all sample genotypes are fully phased (using the pipe '|' phasing separator). This is particularly useful for downstream analyses that require complete phasing information.

-

Usage

-
VCFX_phase_checker [OPTIONS] < input.vcf > phased_output.vcf
-
-

Options

- - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-

Description

-

VCFX_phase_checker reads a VCF file from standard input and examines the GT (genotype) field for every sample in each variant line. It determines whether each genotype is fully phased using the following criteria:

-
    -
  • A genotype is considered fully phased only if:
  • -
  • It uses the pipe character '|' as the separator between alleles (e.g., "0|1")
  • -
  • It contains no missing alleles (no ".")
  • -
  • It contains no unphased separators ("/")
  • -
-

The tool outputs only those variant lines where all sample genotypes meet these criteria. Lines that don't meet these criteria are skipped, and warnings are written to standard error.

-

Output

-

The output is a valid VCF file containing: -- All header lines from the input file (unchanged) -- Only the variant lines where all samples have fully phased genotypes -- Warnings (to stderr) for each line that was skipped due to unphased genotypes

-

Examples

-

Basic Usage

-
./VCFX_phase_checker < input.vcf > phased_output.vcf
-
-

Capturing Warnings

-
./VCFX_phase_checker < input.vcf > phased_output.vcf 2> unphased_warnings.log
-
-

Counting Phased vs. Unphased Variants

-
# Count total variants
-total=$(grep -v "^#" input.vcf | wc -l)
-# Count phased variants
-phased=$(./VCFX_phase_checker < input.vcf | grep -v "^#" | wc -l)
-# Calculate percentage
-echo "Phased variants: $phased / $total ($(echo "scale=2; 100*$phased/$total" | bc)%)"
-
-

Handling Special Cases

-
    -
  • Haploid genotypes (e.g., "0" or "1"): These are not considered phased; the line will be skipped
  • -
  • Missing genotypes (e.g., "./." or ".|."): These are not considered phased; the line will be skipped
  • -
  • Missing GT field: Lines without a GT field in the FORMAT column are skipped with a warning
  • -
  • Multiallelic variants: These are treated the same as biallelic variants, as long as all alleles are phased
  • -
  • Non-VCF-compliant genotype notation: Any genotype that doesn't follow standard VCF format is not considered phased
  • -
  • Header lines: All header lines (starting with "#") are preserved in the output
  • -
  • Samples with different ploidy levels: Each sample is checked independently; if all are phased, the line is kept
  • -
-

Performance

-

The tool processes files line by line with minimal memory requirements, allowing it to handle very large VCF files efficiently.

-

Limitations

-
    -
  • No option to make a best-effort phasing assumption
  • -
  • Cannot output partially phased lines or filter specific samples
  • -
  • Designed to be used as part of a pipeline, not as a standalone phasing tool
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_phase_quality_filter/index.html b/site/VCFX_phase_quality_filter/index.html deleted file mode 100644 index 95ac9aa6..00000000 --- a/site/VCFX_phase_quality_filter/index.html +++ /dev/null @@ -1,2897 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_phase_quality_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_phase_quality_filter

-

Overview

-

VCFX_phase_quality_filter is a tool for filtering variants based on their phasing quality (PQ) scores in the INFO field. It allows users to specify custom conditions for filtering variants based on their PQ values, supporting various comparison operators.

-

Usage

-
VCFX_phase_quality_filter --filter-pq "PQ<OP><THRESHOLD>" < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-f, --filter-pqCondition like 'PQ>30', 'PQ>=20', 'PQ!=10', etc.
-

Description

-

VCFX_phase_quality_filter reads a VCF file and filters variants based on their phasing quality scores:

-
    -
  1. Extracts the PQ value from the INFO field of each variant
  2. -
  3. Applies the specified comparison condition to the PQ value
  4. -
  5. Keeps variants that satisfy the condition
  6. -
  7. Discards variants that don't meet the criteria
  8. -
-

The tool handles missing or invalid PQ values by treating them as 0.0, ensuring robust processing of incomplete data.

-

Input Requirements

-
    -
  • Input must be a valid VCF file
  • -
  • File can be piped through stdin
  • -
  • PQ values should be present in the INFO field (optional)
  • -
  • Supports both VCFv4.0 and VCFv4.2 formats
  • -
-

Output Format

-

The output is a VCF file containing only the variants that satisfy the specified PQ condition: -- Preserves all header lines -- Maintains original VCF format -- Includes only passing variants -- Preserves all fields and annotations

-

Examples

-

Basic Usage

-

Filter variants with PQ > 30:

-
VCFX_phase_quality_filter --filter-pq "PQ>30" < input.vcf > high_pq.vcf
-
-

Different Operators

-

Various comparison operators are supported:

-
# Keep variants with PQ >= 20
-VCFX_phase_quality_filter --filter-pq "PQ>=20" < input.vcf > pq_ge_20.vcf
-
-# Keep variants with PQ <= 15
-VCFX_phase_quality_filter --filter-pq "PQ<=15" < input.vcf > pq_le_15.vcf
-
-# Keep variants with PQ != 10
-VCFX_phase_quality_filter --filter-pq "PQ!=10" < input.vcf > pq_ne_10.vcf
-
-

Integration with Other Tools

-

Combine with other VCFX tools:

-
cat input.vcf | \
-  VCFX_validator | \
-  VCFX_phase_quality_filter --filter-pq "PQ>30" | \
-  VCFX_metadata_summarizer
-
-

Supported Operators

-

The tool supports the following comparison operators: -- > - Greater than -- >= - Greater than or equal to -- < - Less than -- <= - Less than or equal to -- == - Equal to -- != - Not equal to

-

PQ Value Handling

-

The tool handles PQ values in the following ways: -- Extracts PQ values from INFO field (e.g., "PQ=30") -- Treats missing PQ values as 0.0 -- Handles invalid PQ values gracefully -- Supports decimal values

-

Error Handling

-

The tool handles various error conditions: -- Missing --filter-pq argument -- Invalid condition syntax -- Malformed VCF lines -- Missing or invalid PQ values -- Invalid comparison operators

-

Performance Considerations

-
    -
  • Processes input streamingly
  • -
  • Minimal memory usage
  • -
  • Efficient for both small and large VCF files
  • -
  • No need to load entire file into memory
  • -
-

Limitations

-
    -
  • Only filters based on PQ values
  • -
  • Does not modify PQ values
  • -
  • Does not validate VCF format (use VCFX_validator for validation)
  • -
  • Treats missing PQ values as 0.0
  • -
-

Common Use Cases

-
    -
  1. Filtering low-quality phasing results
  2. -
  3. Quality control of phased variants
  4. -
  5. Selecting high-confidence phased variants
  6. -
  7. Removing poorly phased variants
  8. -
  9. Quality-based subsetting of phased data
  10. -
-

Best Practices

-
    -
  1. Validate input VCF before filtering
  2. -
  3. Use appropriate PQ thresholds based on your data
  4. -
  5. Consider missing PQ values in your analysis
  6. -
  7. Combine with other quality filters
  8. -
  9. Document your filtering criteria
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_phred_filter/index.html b/site/VCFX_phred_filter/index.html deleted file mode 100644 index ec473bb8..00000000 --- a/site/VCFX_phred_filter/index.html +++ /dev/null @@ -1,2844 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_phred_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_phred_filter

-

Overview

-

VCFX_phred_filter filters a VCF file based on the PHRED quality scores (QUAL column), removing variants that fall below a specified quality threshold to focus analysis on higher confidence variant calls.

-

Usage

-
VCFX_phred_filter [OPTIONS] < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-p, --phred-filter Set PHRED quality score threshold (default: 30.0)
-k, --keep-missing-qualKeep variants with missing quality values (represented as ".")
-h, --helpDisplay help message and exit
-

Description

-

VCFX_phred_filter examines the QUAL column of each variant record in a VCF file and filters out variants with quality scores below a specified threshold. The tool:

-
    -
  1. Processes a VCF file line by line
  2. -
  3. Extracts the PHRED quality score from the QUAL column (position 6) of each variant
  4. -
  5. Compares the quality score against the specified threshold
  6. -
  7. Retains variants where the quality meets or exceeds the threshold
  8. -
  9. Passes through all header lines unchanged
  10. -
-

This tool is particularly useful for: -- Removing low-confidence variant calls for downstream analysis -- Quality control of variant data -- Reducing false positives in variant datasets -- Applying consistent quality standards across multiple VCF files

-

Output Format

-

The output is a standard VCF file with the same format as the input, but containing only variants with PHRED quality scores at or above the specified threshold. All header lines are preserved.

-

Examples

-

Basic Usage with Default Threshold

-
# Filter variants using the default quality threshold (30)
-VCFX_phred_filter < input.vcf > filtered.vcf
-
-

Setting a Custom Threshold

-
# Keep only variants with quality scores of 20 or higher
-VCFX_phred_filter -p 20 < input.vcf > q20_filtered.vcf
-
-

Keeping Variants with Missing Quality

-
# Keep variants with quality โ‰ฅ 30 or missing quality values
-VCFX_phred_filter -p 30 -k < input.vcf > high_quality_with_missing.vcf
-
-

In a Pipeline

-
# Combine with other filters in a pipeline
-cat input.vcf | \
-VCFX_phred_filter -p 40 | \
-grep "PASS" > high_quality_pass.vcf
-
-

Using Long Option Format

-
# Using long options for clarity
-VCFX_phred_filter --phred-filter 25 --keep-missing-qual < input.vcf > filtered.vcf
-
-

Quality Score Handling

-

The tool processes quality scores as follows:

-
    -
  1. Standard numeric values: Directly compared to the threshold
  2. -
  3. Missing quality values (represented as "."):
  4. -
  5. By default, treated as 0.0 (filtered out)
  6. -
  7. With the -k option, treated as extremely high (effectively infinity) to ensure they pass
  8. -
  9. Invalid quality values: Logged with a warning and treated as 0.0 (filtered out)
  10. -
-

Handling Special Cases

-
    -
  • Missing quality values: Can be retained with the -k option
  • -
  • Invalid quality formats: Treated as 0.0 with a warning
  • -
  • Empty lines: Preserved with a single newline
  • -
  • Header lines: Preserved unchanged
  • -
  • Malformed VCF lines: Lines with fewer than 6 columns are skipped with a warning
  • -
  • Data before header: Skipped with a warning
  • -
-

Performance

-

The tool is designed for efficiency:

-
    -
  1. Processes VCF files line by line, with minimal memory requirements
  2. -
  3. Simple numeric comparison for fast filtering decisions
  4. -
  5. No requirement to load the entire file into memory
  6. -
  7. Fast string parsing for quality values
  8. -
-

Limitations

-
    -
  1. Focuses only on the QUAL column, not on per-sample or per-genotype quality metrics
  2. -
  3. No support for filtering based on other quality-related fields (e.g., GQ in the genotype fields)
  4. -
  5. Cannot apply different thresholds to different variant types
  6. -
  7. No option to exclude variants based on upper quality bounds
  8. -
  9. Doesn't provide statistics on filtered variants
  10. -
  11. Cannot filter based on multiple quality metrics simultaneously
  12. -
  13. Lacks region-specific filtering capabilities
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_population_filter/index.html b/site/VCFX_population_filter/index.html deleted file mode 100644 index 506692ee..00000000 --- a/site/VCFX_population_filter/index.html +++ /dev/null @@ -1,2934 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_population_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_population_filter

-

Overview

-

VCFX_population_filter is a utility tool for subsetting VCF files to include only samples belonging to a specified population. It filters out samples that don't belong to the chosen population group while preserving the variant data and format information.

-

Usage

-
VCFX_population_filter [OPTIONS] < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-p, --population <TAG>Required: Population tag to keep (e.g., 'EUR', 'AFR', 'EAS')
-m, --pop-map <FILE>Required: Tab-delimited file mapping sample names to populations
-

Description

-

VCFX_population_filter processes a VCF file to create a population-specific subset by:

-
    -
  1. Reading a population map file that associates each sample with a population
  2. -
  3. Identifying samples that belong to the specified population
  4. -
  5. Reading the VCF file from standard input
  6. -
  7. Preserving all meta-information lines (starting with '##') without modification
  8. -
  9. Modifying the #CHROM header line to include only samples from the specified population
  10. -
  11. For each data line:
  12. -
  13. Keeping the first 9 mandatory columns (CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT)
  14. -
  15. Including only genotype columns for samples from the specified population
  16. -
  17. Writing the filtered VCF to standard output
  18. -
-

This tool is particularly useful for: -- Creating population-specific VCF files for population genetics analysis -- Reducing file size by excluding irrelevant samples -- Focusing on specific ancestral groups for targeted studies -- Preparing data for population-stratified association studies

-

Population Map Format

-

The population map file should be a simple tab-delimited text file with: -- Each line containing a sample name and its population designation -- The first column containing the exact sample name as it appears in the VCF header -- The second column containing the population identifier -- No header row (just data rows)

-

Example population map file: -

SAMPLE1  EUR
-SAMPLE2  EUR
-SAMPLE3  AFR
-SAMPLE4  AFR
-SAMPLE5  EAS
-

-

Examples

-

Basic Usage

-

Filter a VCF file to include only European (EUR) samples: -

VCFX_population_filter --population EUR --pop-map population_map.txt < input.vcf > eur_only.vcf
-

-

Different Populations

-

Create separate files for different populations: -

VCFX_population_filter --population AFR --pop-map population_map.txt < input.vcf > afr_only.vcf
-VCFX_population_filter --population EAS --pop-map population_map.txt < input.vcf > eas_only.vcf
-

-

Example Transformations

-

Input VCF (with multiple populations)

-
##fileformat=VCFv4.2
-#CHROM  POS  ID  REF  ALT  QUAL  FILTER  INFO  FORMAT  SAMPLE1_EUR  SAMPLE2_EUR  SAMPLE3_AFR  SAMPLE4_AFR  SAMPLE5_EAS
-1  100  rs123  A  T  50  PASS  AF=0.1  GT:DP  0|0:30  0|1:25  1|1:20  0|1:22  0|0:18
-
-

Output VCF (filtered for EUR)

-
##fileformat=VCFv4.2
-#CHROM  POS  ID  REF  ALT  QUAL  FILTER  INFO  FORMAT  SAMPLE1_EUR  SAMPLE2_EUR
-1  100  rs123  A  T  50  PASS  AF=0.1  GT:DP  0|0:30  0|1:25
-
-

Special Case Handling

-

Missing Samples

-
    -
  • If no samples match the specified population, a warning is issued
  • -
  • The output VCF will contain only the first 9 mandatory columns without any sample data
  • -
-

Malformed Lines

-
    -
  • Lines with fewer than 9 columns are skipped with a warning
  • -
  • Data lines encountered before the #CHROM header are skipped with a warning
  • -
-

Missing #CHROM Header

-
    -
  • If no #CHROM header is found in the file, an error is reported
  • -
-

Invalid Population Map

-
    -
  • If the population map file cannot be opened, an error is reported
  • -
  • Lines in the population map that don't follow the expected format are skipped with a warning
  • -
-

Performance Considerations

-
    -
  • The tool processes the VCF file line by line, with minimal memory requirements
  • -
  • Memory usage is primarily determined by the number of samples in the VCF file
  • -
  • Performance scales linearly with the size of the input file
  • -
  • No external dependencies or reference files are required beyond the population map
  • -
-

Limitations

-
    -
  • Cannot filter variants based on population-specific criteria
  • -
  • Does not update INFO fields like AC/AN to reflect the reduced sample set
  • -
  • No support for more complex population filtering (e.g., including multiple populations)
  • -
  • Cannot handle compressed (gzipped) VCF files directly
  • -
  • Does not validate sample consistency between the VCF and the population map
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_position_subsetter/index.html b/site/VCFX_position_subsetter/index.html deleted file mode 100644 index 6b5a4c2a..00000000 --- a/site/VCFX_position_subsetter/index.html +++ /dev/null @@ -1,2956 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_position_subsetter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_position_subsetter

-

Overview

-

VCFX_position_subsetter extracts variants from a VCF file that fall within a specified genomic region, allowing for targeted analysis of specific chromosomal segments.

-

Usage

-
VCFX_position_subsetter --region "CHR:START-END" < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-r, --region <CHR:START-END>Required. Genomic region to extract in the format "chromosome:start-end"
-h, --helpDisplay help message and exit
-

Description

-

VCFX_position_subsetter reads a VCF file from standard input and outputs only those variants that fall within the specified genomic region. The tool:

-
    -
  1. Processes a VCF file line by line
  2. -
  3. Extracts the chromosome and position from each variant
  4. -
  5. Compares them to the user-specified region
  6. -
  7. Retains variants where:
  8. -
  9. The chromosome matches exactly with the specified chromosome
  10. -
  11. The position falls within the specified start and end coordinates (inclusive)
  12. -
  13. Outputs all header lines unchanged
  14. -
  15. Outputs only variant lines that meet the criteria
  16. -
-

This tool is particularly useful for: -- Focusing analysis on specific genomic regions of interest -- Extracting variants in a gene or regulatory region -- Reducing file size by narrowing down to relevant regions -- Preparing data for local analysis or visualization

-

Output Format

-

The output is a standard VCF file containing: -- All original header lines from the input VCF -- Only the variant records that fall within the specified region -- The same format and structure as the original VCF

-

Examples

-

Basic Usage

-

Extract variants on chromosome 1 between positions 1,000,000 and 2,000,000: -

VCFX_position_subsetter --region "chr1:1000000-2000000" < input.vcf > chr1_region.vcf
-

-

Small Region

-

Extract variants in a small region, such as a specific exon: -

VCFX_position_subsetter --region "chr17:41245000-41245500" < input.vcf > brca1_exon.vcf
-

-

Different Chromosome Format

-

Extract variants using a different chromosome naming format (numeric): -

VCFX_position_subsetter --region "2:150000-250000" < input.vcf > chr2_region.vcf
-

-

Pipeline Integration

-

Use as part of a longer analysis pipeline: -

cat input.vcf | VCFX_position_subsetter --region "chrX:5000000-6000000" | another_tool > final_output.vcf
-

-

Region Parsing

-

Format Requirements

-

The region must be specified in the format "CHR:START-END" where: -- CHR is the chromosome name, exactly as it appears in the VCF -- START is the beginning position (inclusive) -- END is the ending position (inclusive) -- The colon and dash are required separators

-

For example: chr1:10000-20000, X:50000-100000, 22:30500000-31000000

-

Coordinate System

-

The position coordinates use the same 1-based system as VCF files, where the first base of a chromosome is position 1.

-

Handling Special Cases

-

Empty Results

-

If no variants in the input VCF fall within the specified region, the output will contain only the header lines.

-

Non-Existent Chromosome

-

If the specified chromosome does not exist in the input VCF, the output will contain only header lines.

-

Invalid Region Format

-

If the region is not properly formatted (missing colon, missing dash, or end smaller than start), an error is reported and no filtering is performed.

-

Malformed Lines

-
    -
  • Lines with fewer than 2 columns are skipped with a warning
  • -
  • Lines where the position cannot be parsed as an integer are skipped with a warning
  • -
  • Data lines encountered before the #CHROM header are skipped with a warning
  • -
-

Performance Considerations

-
    -
  • Processes the VCF file line by line, with minimal memory requirements
  • -
  • No preprocessing or indexing is done
  • -
  • Performance scales linearly with input file size
  • -
  • More efficient for small regions in large files compared to manually parsing
  • -
-

Limitations

-
    -
  • Can only extract one continuous region at a time
  • -
  • No support for multiple regions in a single run
  • -
  • Cannot extract by gene name or other genomic features directly
  • -
  • Requires exact chromosome name matching
  • -
  • No special handling of complex structural variants that may span region boundaries
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_probability_filter/index.html b/site/VCFX_probability_filter/index.html deleted file mode 100644 index 31a63ac4..00000000 --- a/site/VCFX_probability_filter/index.html +++ /dev/null @@ -1,2960 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_probability_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_probability_filter

-

Overview

-

VCFX_probability_filter filters a VCF file based on specified genotype probability values. It allows you to keep only variants where samples meet certain probability thresholds, using various comparison operators.

-

Usage

-
VCFX_probability_filter --filter-probability "<CONDITION>" < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-f, --filter-probability <condition>Specify the probability filter condition (e.g., GP>0.9)
-h, --helpDisplay help message and exit
-

Description

-

VCFX_probability_filter analyzes the genotype probability fields in the FORMAT column of a VCF file and filters variants based on a user-defined condition. The tool:

-
    -
  1. Reads the VCF file line by line from standard input
  2. -
  3. Parses the specified condition (e.g., GP>0.9)
  4. -
  5. For each variant, examines the probability values in the specified field
  6. -
  7. Keeps variants where all samples meet the condition
  8. -
  9. Outputs the filtered variants to standard output
  10. -
-

The condition must be specified in the format FIELD OPERATOR VALUE, where: -- FIELD is a valid field in the FORMAT column (e.g., GP for genotype probabilities) -- OPERATOR is one of: >, <, >=, <=, ==, != -- VALUE is a numeric threshold

-

Output Format

-

The output is a standard VCF file with the same format as the input, but containing only variants that meet the specified probability condition. All header lines are preserved.

-

Examples

-

Basic Filtering

-

Filter for variants where all samples have a GP (genotype probability) value greater than 0.9: -

VCFX_probability_filter --filter-probability "GP>0.9" < input.vcf > filtered.vcf
-

-

Using Different Operators

-

Filter for variants where all samples have a GP value less than 0.1: -

VCFX_probability_filter --filter-probability "GP<0.1" < input.vcf > filtered.vcf
-

-

Exact Match

-

Filter for variants where all samples have a GP value exactly equal to 0.9: -

VCFX_probability_filter --filter-probability "GP==0.9" < input.vcf > filtered.vcf
-

-

Different Probability Fields

-

Filter using a different probability field (e.g., PP for posterior probability): -

VCFX_probability_filter --filter-probability "PP>=0.8" < input.vcf > filtered.vcf
-

-

In a Pipeline

-

Use as part of a processing pipeline: -

cat input.vcf | VCFX_probability_filter --filter-probability "GP>0.95" | other_vcf_tool > output.vcf
-

-

Probability Value Handling

-

Field Parsing

-

The tool locates the specified probability field (e.g., GP) in the FORMAT column of each variant and extracts the corresponding values for each sample.

-

Value Comparison

-

The extracted probability values are compared to the specified threshold using the given operator. A variant passes the filter only if all samples meet the condition.

-

Multiple Probability Values

-

If a field contains multiple values (e.g., GP often contains three values for a biallelic variant), the filter is applied to the first value that can be successfully extracted and converted to a number.

-

Handling Special Cases

-

Missing Values

-

If a sample has a missing value (.) for the specified probability field, the variant is filtered out.

-

Malformed Values

-

If a probability value cannot be converted to a number, the variant is filtered out with a warning message.

-

Missing Fields

-

If the specified field is not found in the FORMAT column, the tool reports an error and exits.

-

Performance Considerations

-
    -
  • The tool processes the VCF file line by line, requiring minimal memory.
  • -
  • No sorting or indexing is performed, preserving the original order of variants.
  • -
  • Time complexity is linear with respect to the size of the input file.
  • -
-

Limitations

-
    -
  • The tool requires that the specified field exists in the FORMAT column of the VCF file.
  • -
  • It only supports standard comparison operators and cannot handle complex conditions or multiple conditions.
  • -
  • The tool does not handle cases where different samples might need different thresholds.
  • -
  • Missing or malformed values cause the variant to be filtered out, which might not be desired in all cases.
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_quality_adjuster/index.html b/site/VCFX_quality_adjuster/index.html deleted file mode 100644 index 1c942dcf..00000000 --- a/site/VCFX_quality_adjuster/index.html +++ /dev/null @@ -1,2823 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_quality_adjuster - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_quality_adjuster

-

Overview

-

VCFX_quality_adjuster transforms the QUAL field values in a VCF file by applying mathematical functions such as logarithm, square root, or square. This tool is useful for scaling quality scores to make them more interpretable or to prepare them for downstream analysis.

-

Usage

-
VCFX_quality_adjuster [OPTIONS] < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-a, --adjust-qual <FUNC>Required. The transformation function to apply. Must be one of: log, sqrt, square, or identity.
-n, --no-clampDo not clamp negative or extremely large values resulting from transformations.
-h, --helpDisplay help message and exit.
-

Description

-

VCFX_quality_adjuster processes a VCF file line by line, applying a specified mathematical transformation to the QUAL field (6th column) of each variant record. The tool:

-
    -
  1. Reads the VCF file from standard input.
  2. -
  3. Identifies header lines (beginning with #) and passes them through unchanged.
  4. -
  5. For data lines, extracts the QUAL value and applies the specified transformation.
  6. -
  7. By default, clamps negative values to 0 and extremely large values to 10^12.
  8. -
  9. Writes the modified VCF to standard output.
  10. -
-

Supported Transformations

-
    -
  • log: Applies natural logarithm (ln) to the quality score. Small constant (10^-10) is added to prevent log(0).
  • -
  • sqrt: Applies square root to the quality score. Negative values are treated as 0.
  • -
  • square: Multiplies the quality score by itself.
  • -
  • identity: No transformation, passes the quality score unchanged.
  • -
-

Output Format

-

The output is a VCF file with the same format as the input, but with transformed QUAL values. All other fields remain unchanged, maintaining full compatibility with standard VCF parsers and tools.

-

Examples

-

Logarithmic Transformation

-
# Transform quality scores using natural logarithm
-VCFX_quality_adjuster --adjust-qual log < input.vcf > log_transformed.vcf
-
-

Square Root Transformation

-
# Apply square root to quality scores
-VCFX_quality_adjuster --adjust-qual sqrt < input.vcf > sqrt_transformed.vcf
-
-

Square Transformation without Clamping

-
# Square quality scores without clamping large values
-VCFX_quality_adjuster --adjust-qual square --no-clamp < input.vcf > squared_unclamped.vcf
-
-

In a Pipeline

-
# Filter variants and then transform quality scores
-VCFX_record_filter --quality ">20" < input.vcf | VCFX_quality_adjuster --adjust-qual log > filtered_log_transformed.vcf
-
-

Handling Special Cases

-

The tool implements several strategies for handling edge cases:

-
    -
  1. Missing QUAL values: Fields marked with . or empty fields are treated as 0.
  2. -
  3. Non-numeric QUAL values: Lines with non-numeric QUAL values generate a warning and are skipped.
  4. -
  5. Negative results: By default, negative values resulting from transformations (e.g., log of a value < 1) are clamped to 0. This behavior can be disabled with --no-clamp.
  6. -
  7. Very large values: Values above 10^12 are clamped to prevent numerical issues. This behavior can be disabled with --no-clamp.
  8. -
  9. Malformed lines: Lines with fewer than 8 fields generate a warning and are skipped.
  10. -
  11. Empty lines: Empty lines are preserved in the output.
  12. -
-

Performance

-

VCFX_quality_adjuster is designed to be efficient:

-
    -
  1. It processes the VCF file in a single pass, requiring minimal memory footprint.
  2. -
  3. All transformations are simple mathematical functions with constant-time complexity.
  4. -
  5. The tool streams data directly from input to output without storing the entire file in memory.
  6. -
-

Limitations

-
    -
  1. Only the QUAL field is transformed; other numeric fields (like INFO fields) remain unchanged.
  2. -
  3. No facility to define custom transformation functions beyond the four built-in options.
  4. -
  5. Cannot apply different transformations to different variants or regions in a single run.
  6. -
  7. Lack of options for formatting the transformed values (e.g., number of decimal places).
  8. -
  9. No built-in option to back-transform values to their original scale.
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_record_filter/index.html b/site/VCFX_record_filter/index.html deleted file mode 100644 index 0a24cf9f..00000000 --- a/site/VCFX_record_filter/index.html +++ /dev/null @@ -1,3008 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_record_filter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_record_filter

-

Overview

-

VCFX_record_filter filters VCF files based on flexible criteria applied to standard fields (POS, QUAL, FILTER) and INFO fields. It allows for complex filtering with multiple conditions using AND/OR logic.

-

Usage

-
VCFX_record_filter --filter "CRITERIA" [OPTIONS] < input.vcf > filtered.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-f, --filter <CRITERIA>Required. One or more filtering criteria separated by semicolons (e.g., "POS>10000;QUAL>=30;AF<0.05")
-l, --logic <and\|or>Logic for combining multiple criteria: and (default) requires all criteria to pass, or requires any criterion to pass
-h, --helpDisplay help message and exit
-

Description

-

VCFX_record_filter evaluates each variant in a VCF file against specified criteria and outputs only variants that satisfy these criteria. The tool:

-
    -
  1. Reads a VCF file line by line from standard input
  2. -
  3. Passes all header lines (starting with #) unchanged to the output
  4. -
  5. For each data line, evaluates it against the specified criteria
  6. -
  7. If the variant satisfies the criteria, writes it to standard output
  8. -
  9. If the variant fails the criteria, discards it
  10. -
-

Criteria can be specified for: -- Standard VCF fields: POS (numeric), QUAL (numeric), FILTER (string) -- Any key in the INFO column (automatically detected as numeric or string)

-

Each criterion must use one of the following operators: -- Numeric comparisons: >, >=, <, <=, ==, != -- String comparisons: ==, != (equality and inequality only)

-

Output Format

-

The output is a standard VCF file with the same format as the input, but containing only variants that meet the specified filtering criteria. All header lines are preserved.

-

Examples

-

Basic Filtering

-

Filter variants by position: -

VCFX_record_filter --filter "POS>10000" < input.vcf > filtered.vcf
-

-

Quality Filtering

-

Filter variants with QUAL score at least 30: -

VCFX_record_filter --filter "QUAL>=30" < input.vcf > filtered.vcf
-

-

Multiple Criteria with AND Logic

-

Keep only variants that pass all criteria (default AND logic): -

VCFX_record_filter --filter "POS>=1000;FILTER==PASS;DP>10" < input.vcf > filtered.vcf
-

-

Multiple Criteria with OR Logic

-

Keep variants that pass any of the criteria: -

VCFX_record_filter --filter "AF>0.1;DP>100" --logic or < input.vcf > filtered.vcf
-

-

Filtering on INFO Fields

-

Filter based on allele frequency and depth: -

VCFX_record_filter --filter "AF<0.01;DP>=50" < input.vcf > rare_variants.vcf
-

-

String Comparison

-

Filter variants by FILTER status: -

VCFX_record_filter --filter "FILTER==PASS" < input.vcf > passing_variants.vcf
-

-

Criterion Parsing

-

Field Types

-

The tool automatically determines field types: -- POS and QUAL are always treated as numeric fields -- FILTER is always treated as a string field -- INFO fields are parsed as numeric if possible, otherwise as strings

-

Numeric Values

-

For numeric fields, the value specified in the criterion is converted to a double and compared using the specified operator: -

QUAL>=30  # Passes if QUAL is at least 30
-DP>10     # Passes if DP INFO field is greater than 10
-

-

String Values

-

For string fields, only equality (==) and inequality (!=) operators are supported: -

FILTER==PASS     # Passes if FILTER is exactly "PASS"
-SVTYPE!=DEL      # Passes if SVTYPE INFO field is not "DEL"
-

-

Handling Special Cases

-

Missing Values

-
    -
  • Missing QUAL values (.) are treated as 0.0
  • -
  • Missing INFO fields cause the criterion to fail
  • -
  • Empty fields are handled appropriately
  • -
-

Multiple Criteria

-
    -
  • With AND logic, a variant must pass ALL criteria to be included
  • -
  • With OR logic, a variant passes if ANY criterion is satisfied
  • -
-

Malformed Lines

-
    -
  • Lines with fewer than 8 columns are skipped
  • -
  • Data lines before the #CHROM header are skipped with a warning
  • -
-

Performance Considerations

-
    -
  • The tool processes VCF files line by line, requiring minimal memory
  • -
  • Each line is evaluated independently, allowing for efficient processing
  • -
  • For large files with many criteria, using AND logic can be more efficient as it can short-circuit on the first failing criterion
  • -
-

Limitations

-
    -
  • String fields only support equality and inequality comparisons, not substring or pattern matching
  • -
  • No built-in support for sample genotype filtering (focuses on variant-level data)
  • -
  • Cannot filter based on the number of samples with a particular genotype
  • -
  • No support for parentheses or complex boolean expressions beyond simple AND/OR logic
  • -
  • INFO flags (without values) are treated as having a value of 1.0 for numeric comparisons
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_ref_comparator/index.html b/site/VCFX_ref_comparator/index.html deleted file mode 100644 index 98a7202b..00000000 --- a/site/VCFX_ref_comparator/index.html +++ /dev/null @@ -1,2829 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_ref_comparator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_ref_comparator

-

Overview

-

VCFX_ref_comparator validates VCF variant records by comparing their REF and ALT alleles against a reference genome FASTA file, helping to identify discrepancies and annotate variants with their relation to the reference sequence.

-

Usage

-
VCFX_ref_comparator --reference <reference.fasta> < input.vcf > annotated.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-r, --reference Required. Path to reference genome in FASTA format
-h, --helpDisplay help message and exit
-

Description

-

VCFX_ref_comparator analyzes VCF variants by comparing them to a reference genome. The tool:

-
    -
  1. Loads a reference genome from a specified FASTA file
  2. -
  3. Processes each variant in the input VCF file
  4. -
  5. For each variant, compares the REF field with the corresponding sequence in the reference genome
  6. -
  7. Also determines if each ALT allele matches the reference sequence
  8. -
  9. Annotates each variant with a REF_COMPARISON tag in the INFO field, indicating the result of the comparison
  10. -
  11. Outputs an annotated VCF with all original fields preserved
  12. -
-

This tool is particularly useful for: -- Validating the accuracy of variant calls -- Identifying potential errors in variant representation -- Distinguishing true variants from reference matching records -- Quality control of variant datasets

-

Output Format

-

The output is a valid VCF file with the same format as the input, but with an additional REF_COMPARISON field added to the INFO column of each variant line. The output also includes a new header line defining the REF_COMPARISON INFO field.

-

The REF_COMPARISON field can have the following values: -- REF_MISMATCH: The REF allele does not match the reference genome -- REF_MATCH: The REF allele matches the reference genome -- NOVEL: The variant's ALT allele differs from the reference sequence -- ALT_IS_REF: The ALT allele matches the reference sequence (potential reference/alternate swap) -- UNKNOWN_CHROM: The chromosome is not found in the reference genome -- INVALID_POS: The position is out of bounds for the chromosome

-

Examples

-

Basic Usage

-
# Compare variants against a reference genome
-VCFX_ref_comparator --reference genome.fa < input.vcf > validated.vcf
-
-

Filtering for Reference Mismatches

-
# Find variants where the REF allele doesn't match the reference genome
-VCFX_ref_comparator --reference genome.fa < input.vcf | grep "REF_MISMATCH" > mismatches.vcf
-
-

Identifying ALT Alleles that Match Reference

-
# Find variants where the ALT allele actually matches the reference
-VCFX_ref_comparator --reference genome.fa < input.vcf | grep "ALT_IS_REF" > potential_swaps.vcf
-
-

Checking for Invalid Coordinates

-
# Identify variants with invalid chromosomes or positions
-VCFX_ref_comparator --reference genome.fa < input.vcf | grep -E "UNKNOWN_CHROM|INVALID_POS" > invalid_coords.vcf
-
-

Reference Comparison Process

-

The tool performs these steps for each variant:

-
    -
  1. Checks if the variant's chromosome exists in the reference genome
  2. -
  3. Verifies that the position is valid within the chromosome's sequence
  4. -
  5. Extracts the reference sequence at the specified position, matching the length of the REF allele
  6. -
  7. Compares the extracted sequence with the REF allele
  8. -
  9. For each ALT allele, determines if it matches the reference sequence
  10. -
-

All comparisons are case-insensitive, and the reference genome is converted to uppercase during loading.

-

Handling Special Cases

-
    -
  • Chromosome not found: If a chromosome in the VCF is not found in the reference genome, the variant is marked with UNKNOWN_CHROM
  • -
  • Position out of bounds: If a position exceeds the length of the chromosome, the variant is marked with INVALID_POS
  • -
  • Multiple ALT alleles: Each ALT allele is compared separately, and the result is included in the annotation
  • -
  • Symbolic alleles: Not specially handled; will likely result in REF_MATCH,NOVEL annotations
  • -
  • Empty lines: Preserved with a single newline
  • -
  • Header lines: Preserved with a new INFO definition line added before the #CHROM header
  • -
  • Malformed VCF lines: Lines with fewer than 8 columns are skipped with a warning
  • -
  • Data before header: Skipped with a warning
  • -
-

Performance

-

The tool is designed with the following considerations:

-
    -
  1. The entire reference genome is loaded into memory for fast random access
  2. -
  3. Chromosome names are converted to uppercase for case-insensitive matching
  4. -
  5. Whitespace is removed from FASTA sequences during loading
  6. -
  7. The VCF file is processed line by line, avoiding loading the entire file into memory
  8. -
  9. Only the required fields from each variant line are extracted and processed
  10. -
-

For extremely large reference genomes, memory usage may be significant.

-

Limitations

-
    -
  1. Requires loading the entire reference genome into memory
  2. -
  3. Limited to exact string comparison; no alignment is performed for complex variants
  4. -
  5. No special handling for symbolic alleles (like , , etc.)
  6. -
  7. Does not normalize variants before comparison
  8. -
  9. Cannot handle reference genomes with duplicate chromosome names
  10. -
  11. No support for compressed reference files; FASTA must be uncompressed
  12. -
  13. No support for validating only a subset of variants or chromosomes
  14. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_reformatter/index.html b/site/VCFX_reformatter/index.html deleted file mode 100644 index 39355696..00000000 --- a/site/VCFX_reformatter/index.html +++ /dev/null @@ -1,2983 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_reformatter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_reformatter

-

Overview

-

VCFX_reformatter is a tool for reformatting INFO and FORMAT fields in VCF files. It provides functionality to compress (remove) specific fields and reorder fields in both INFO and FORMAT columns, making VCF files more organized and efficient.

-

Usage

-
VCFX_reformatter [options] < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-c, --compress-info <keys>Remove specified INFO keys (comma-separated)
-f, --compress-format <keys>Remove specified FORMAT keys (comma-separated)
-i, --reorder-info <keys>Reorder INFO keys (comma-separated)
-o, --reorder-format <keys>Reorder FORMAT keys (comma-separated)
-

Description

-

VCFX_reformatter modifies VCF files in several ways:

-
    -
  1. INFO Field Compression:
  2. -
  3. Removes specified keys from the semicolon-separated INFO field
  4. -
  5. Preserves remaining fields in their original order
  6. -
  7. -

    Handles both key-value pairs and flag fields

    -
  8. -
  9. -

    FORMAT Field Compression:

    -
  10. -
  11. Removes specified keys from the colon-separated FORMAT field
  12. -
  13. Updates all sample columns to match the new FORMAT structure
  14. -
  15. -

    Maintains data consistency across all samples

    -
  16. -
  17. -

    INFO Field Reordering:

    -
  18. -
  19. Places specified keys at the beginning of the INFO field
  20. -
  21. Appends remaining keys in their original order
  22. -
  23. -

    Preserves all key-value pairs and flags

    -
  24. -
  25. -

    FORMAT Field Reordering:

    -
  26. -
  27. Reorders the FORMAT column keys
  28. -
  29. Updates all sample columns to match the new order
  30. -
  31. Maintains data alignment across all samples
  32. -
-

Input Requirements

-
    -
  • Input must be a valid VCF file
  • -
  • File can be piped through stdin
  • -
  • Supports both VCFv4.0 and VCFv4.2 formats
  • -
  • Handles both single-sample and multi-sample VCFs
  • -
-

Output Format

-

The output is a VCF file with: -- All header lines preserved -- Modified INFO and FORMAT fields according to specifications -- Updated sample columns to match new FORMAT structure -- Original VCF format maintained

-

Examples

-

Basic Usage

-

Remove specific INFO fields and reorder others:

-
VCFX_reformatter --compress-info AF,DP --reorder-info AF,DP < input.vcf > output.vcf
-
-

Format Field Manipulation

-

Remove and reorder FORMAT fields:

-
VCFX_reformatter --compress-format PL,AD --reorder-format GT,DP < input.vcf > output.vcf
-
-

Combined Operations

-

Perform multiple operations in one command:

-
VCFX_reformatter \
-  --compress-info AF,DP \
-  --compress-format PL,AD \
-  --reorder-info AF,DP \
-  --reorder-format GT,DP \
-  < input.vcf > output.vcf
-
-

Integration with Other Tools

-

Combine with other VCFX tools:

-
cat input.vcf | \
-  VCFX_validator | \
-  VCFX_reformatter --compress-info AF,DP | \
-  VCFX_metadata_summarizer
-
-

Field Handling

-

INFO Field Processing

-
    -
  • Handles key-value pairs (e.g., "DP=10")
  • -
  • Handles flag fields (e.g., "PASS")
  • -
  • Preserves field separators
  • -
  • Maintains field order when specified
  • -
-

FORMAT Field Processing

-
    -
  • Updates FORMAT column structure
  • -
  • Modifies all sample columns accordingly
  • -
  • Preserves data alignment
  • -
  • Handles missing values (".")
  • -
-

Error Handling

-

The tool handles various error conditions: -- Malformed VCF lines -- Missing fields -- Invalid field formats -- Inconsistent sample data -- Lines with fewer than 8 columns

-

Performance Considerations

-
    -
  • Processes input streamingly
  • -
  • Efficient memory usage
  • -
  • Handles large files
  • -
  • Preserves original data integrity
  • -
-

Limitations

-
    -
  • Only modifies INFO and FORMAT fields
  • -
  • Does not validate VCF format (use VCFX_validator for validation)
  • -
  • Does not modify other VCF columns
  • -
  • Requires at least 8 columns in data lines
  • -
-

Common Use Cases

-
    -
  1. Removing unnecessary fields to reduce file size
  2. -
  3. Reordering fields for better readability
  4. -
  5. Standardizing VCF format across different sources
  6. -
  7. Preparing VCF files for specific analysis tools
  8. -
  9. Cleaning up VCF files before processing
  10. -
-

Best Practices

-
    -
  1. Validate input VCF before reformatting
  2. -
  3. Back up original files before modification
  4. -
  5. Verify output format meets requirements
  6. -
  7. Use appropriate field combinations
  8. -
  9. Document field modifications
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_region_subsampler/index.html b/site/VCFX_region_subsampler/index.html deleted file mode 100644 index ff574819..00000000 --- a/site/VCFX_region_subsampler/index.html +++ /dev/null @@ -1,3007 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_region_subsampler - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_region_subsampler

-

Overview

-

VCFX_region_subsampler is a tool for filtering VCF variants based on genomic regions specified in a BED file. It keeps only variants whose positions fall within the specified regions, efficiently handling multiple regions and overlapping intervals.

-

Usage

-
VCFX_region_subsampler --region-bed FILE < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-b, --region-bed FILEBED file listing regions to keep
-

Description

-

VCFX_region_subsampler processes a VCF file and a BED file to:

-
    -
  1. Read and parse the BED file containing genomic regions
  2. -
  3. Convert 0-based BED coordinates to 1-based VCF coordinates
  4. -
  5. Merge overlapping or contiguous intervals for efficiency
  6. -
  7. Filter VCF variants to keep only those falling within specified regions
  8. -
  9. Preserve all VCF header information and variant details
  10. -
-

The tool uses binary search for efficient region lookup and handles multiple regions per chromosome.

-

Input Requirements

-

VCF Input

-
    -
  • Must be a valid VCF file
  • -
  • Can be piped through stdin
  • -
  • Supports both VCFv4.0 and VCFv4.2 formats
  • -
  • Must have at least 8 columns (CHROM through INFO)
  • -
-

BED Input

-
    -
  • Standard BED format (chromosome, start, end)
  • -
  • 0-based coordinates (automatically converted to 1-based)
  • -
  • One region per line
  • -
  • Supports multiple regions per chromosome
  • -
  • Invalid lines are skipped with warnings
  • -
-

Output Format

-

The output is a VCF file containing: -- All original VCF header lines -- Only variants falling within specified regions -- Original variant information preserved -- Same format as input VCF

-

Examples

-

Basic Usage

-

Filter variants using a single region:

-
VCFX_region_subsampler --region-bed regions.bed < input.vcf > filtered.vcf
-
-

Multiple Regions

-

Filter using multiple regions across chromosomes:

-
# regions.bed:
-chr1    0    100
-chr2    100  200
-VCFX_region_subsampler --region-bed regions.bed < input.vcf > filtered.vcf
-
-

Integration with Other Tools

-

Combine with other VCFX tools:

-
cat input.vcf | \
-  VCFX_validator | \
-  VCFX_region_subsampler --region-bed regions.bed | \
-  VCFX_metadata_summarizer
-
-

Region Handling

-

Coordinate System

-
    -
  • Input BED: 0-based coordinates
  • -
  • Internal processing: 1-based coordinates
  • -
  • Automatic conversion between systems
  • -
-

Interval Merging

-
    -
  • Overlapping intervals are merged
  • -
  • Contiguous intervals are combined
  • -
  • Maintains efficiency for large region sets
  • -
-

Region Validation

-
    -
  • Skips invalid BED lines
  • -
  • Handles negative intervals
  • -
  • Ignores zero-length intervals
  • -
  • Reports warnings for invalid entries
  • -
-

Error Handling

-

The tool handles various error conditions: -- Missing --region-bed argument -- Invalid BED file format -- Invalid VCF lines -- Missing or malformed coordinates -- Data lines before header

-

Performance Considerations

-
    -
  • Uses binary search for region lookup
  • -
  • Merges overlapping intervals for efficiency
  • -
  • Processes input streamingly
  • -
  • Memory efficient for large region sets
  • -
  • Handles large VCF files
  • -
-

Limitations

-
    -
  • Only filters by position (CHROM, POS)
  • -
  • Does not validate VCF format (use VCFX_validator for validation)
  • -
  • Requires at least 8 columns in VCF
  • -
  • Skips data lines before #CHROM header
  • -
  • Treats invalid BED lines as warnings, not errors
  • -
-

Common Use Cases

-
    -
  1. Extracting variants from specific genomic regions
  2. -
  3. Focusing analysis on particular chromosomal segments
  4. -
  5. Creating region-specific VCF subsets
  6. -
  7. Preparing data for region-based analysis
  8. -
  9. Filtering variants for specific genomic features
  10. -
-

Best Practices

-
    -
  1. Validate input VCF before filtering
  2. -
  3. Verify BED file format and coordinates
  4. -
  5. Check region coverage before processing
  6. -
  7. Monitor warning messages for invalid regions
  8. -
  9. Document region selection criteria
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_sample_extractor/index.html b/site/VCFX_sample_extractor/index.html deleted file mode 100644 index af3899d3..00000000 --- a/site/VCFX_sample_extractor/index.html +++ /dev/null @@ -1,2780 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_sample_extractor - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_sample_extractor

-

Overview

-

VCFX_sample_extractor is a tool that extracts a subset of samples from a VCF file, allowing you to create a smaller, focused VCF containing only the samples of interest.

-

Usage

-
VCFX_sample_extractor [OPTIONS] < input.vcf > subset.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-s, --samples LISTComma or space separated list of sample names to extract
-h, --helpDisplay help message and exit
-

Description

-

VCFX_sample_extractor reads a VCF file from standard input, identifies the samples specified in the command line, and produces a new VCF file containing only those samples. This is useful for:

-
    -
  • Reducing file size by extracting only relevant samples
  • -
  • Creating sample-specific VCF files for specialized analyses
  • -
  • Focusing on specific cohorts or subgroups
  • -
  • Compliance with data sharing permissions that allow sharing only specific samples
  • -
-

The tool: -1. Reads the VCF header to identify sample columns -2. Maintains all meta-information and header lines -3. Extracts only the specified samples, preserving order and data integrity -4. Warns about any requested samples that aren't found in the input VCF

-

Output Format

-

The output is a standard VCF file containing: -- All header lines from the input file -- A modified #CHROM header line that includes only the selected samples -- All variant lines from the input with only the selected sample columns

-

Examples

-

Extract a Single Sample

-
./VCFX_sample_extractor --samples "SAMPLE1" < input.vcf > single_sample.vcf
-
-

Extract Multiple Samples with Comma Delimiter

-
./VCFX_sample_extractor --samples "SAMPLE1,SAMPLE2,SAMPLE3" < input.vcf > subset.vcf
-
-

Extract Multiple Samples with Space Delimiter

-
./VCFX_sample_extractor --samples "SAMPLE1 SAMPLE2 SAMPLE3" < input.vcf > subset.vcf
-
-

Process Large Files

-
# Extract a few samples from a large compressed VCF
-zcat large_file.vcf.gz | ./VCFX_sample_extractor --samples "SAMPLE1,SAMPLE2" | gzip > subset.vcf.gz
-
-

Handling Special Cases

-
    -
  • Missing samples: If a requested sample isn't found in the input VCF, a warning is issued but processing continues with the samples that were found
  • -
  • No samples found: If none of the requested samples are found in the input VCF, the output will contain only the header and variant lines with no sample columns
  • -
  • Malformed VCF: Lines with fewer than 8 columns are skipped with a warning
  • -
  • No sample columns: Input variant lines without sample columns (fewer than 10 columns) are skipped
  • -
  • Empty sample names: Empty sample names in the input list are ignored
  • -
-

Performance

-

The tool processes VCF files line by line, with minimal memory requirements even for very large VCF files. Performance scales with: -- Number of samples in the input VCF (parsing time) -- Number of samples being extracted (output size)

-

Limitations

-
    -
  • No wildcards or regular expressions for sample name matching
  • -
  • Cannot extract samples based on properties or metadata
  • -
  • Cannot reorder samples in the output file (order follows the original VCF)
  • -
  • No option to rename samples in the output file
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_sorter/index.html b/site/VCFX_sorter/index.html deleted file mode 100644 index c0b9897f..00000000 --- a/site/VCFX_sorter/index.html +++ /dev/null @@ -1,2994 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_sorter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_sorter

-

Overview

-

VCFX_sorter is a utility tool for sorting VCF files by chromosome and position. It provides two sorting methods: standard lexicographic sorting and natural chromosome sorting, which handles chromosome numbering in a more intuitive way.

-

Usage

-
VCFX_sorter [OPTIONS] < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-n, --natural-chrUse natural chromosome sorting (chr1 < chr2 < chr10) instead of lexicographic sorting
-

Description

-

VCFX_sorter processes a VCF file to organize variants in a consistent order by:

-
    -
  1. Reading the VCF file from standard input
  2. -
  3. Preserving all header lines without modification
  4. -
  5. Loading all data lines into memory
  6. -
  7. Sorting the data lines by chromosome and position
  8. -
  9. Writing the header lines followed by the sorted data lines to standard output
  10. -
-

The tool supports two distinct sorting methods: -- Lexicographic sorting (default): Sorts chromosomes alphabetically (chr1, chr10, chr2, ...) -- Natural sorting: Sorts chromosomes in numeric order when possible (chr1, chr2, ..., chr10, ...)

-

This tool is particularly useful for: -- Preparing VCF files for downstream analysis tools that expect sorted input -- Merging multiple VCF files that need consistent ordering -- Improving readability and navigation of VCF files -- Making binary searches possible on VCF data

-

Sorting Details

-

Lexicographic Sorting

-

In the default lexicographic mode: -- Chromosomes are compared as strings (e.g., 'chr2' comes after 'chr10') -- Positions are compared numerically within the same chromosome

-

Natural Chromosome Sorting

-

When the --natural-chr option is used: -1. The "chr" prefix (case-insensitive) is identified and removed -2. Any leading digits are parsed as a number -3. Remaining characters are treated as a suffix -4. Sorting precedence: - - First by chromosome prefix (if different) - - Then by numeric part (if both have numbers) - - Then by suffix (if both have the same number) - - Finally by position

-

This results in more intuitive ordering where chr1 < chr2 < chr10, instead of chr1 < chr10 < chr2.

-

Examples

-

Basic Lexicographic Sorting

-

Sort a VCF file using standard lexicographic chromosome ordering: -

VCFX_sorter < unsorted.vcf > sorted.vcf
-

-

Natural Chromosome Sorting

-

Sort a VCF file using natural chromosome ordering: -

VCFX_sorter --natural-chr < unsorted.vcf > sorted.vcf
-

-

Example Transformations

-

Lexicographic Sorting

-
Before:
-chr2  1000  .  A  T  .  PASS  .
-chr1  2000  .  G  C  .  PASS  .
-chr10 1500  .  T  A  .  PASS  .
-
-After:
-chr1  2000  .  G  C  .  PASS  .
-chr10 1500  .  T  A  .  PASS  .
-chr2  1000  .  A  T  .  PASS  .
-
-

Natural Chromosome Sorting

-
Before:
-chr2  1000  .  A  T  .  PASS  .
-chr1  2000  .  G  C  .  PASS  .
-chr10 1500  .  T  A  .  PASS  .
-
-After:
-chr1  2000  .  G  C  .  PASS  .
-chr2  1000  .  A  T  .  PASS  .
-chr10 1500  .  T  A  .  PASS  .
-
-

Handling Special Cases

-

Malformed Lines

-
    -
  • Lines with fewer than 8 columns are skipped with a warning
  • -
  • Lines with an invalid position value are skipped with a warning
  • -
-

Empty Input

-
    -
  • If no input is provided, the help message is displayed
  • -
-

Missing Header

-
    -
  • If no #CHROM header line is found in the input, a warning is issued but processing continues
  • -
-

Complex Chromosome Names

-
    -
  • Chromosomes with non-standard naming follow sorting rules based on the selected mode
  • -
  • Examples of parsing in natural mode:
  • -
  • "chr1" โ†’ prefix="chr", number=1, suffix=""
  • -
  • "chrX" โ†’ prefix="chr", number=none, suffix="X"
  • -
  • "chr10_alt" โ†’ prefix="chr", number=10, suffix="_alt"
  • -
  • "scaffold_123" โ†’ prefix="", number=none, suffix="scaffold_123"
  • -
-

Performance Considerations

-
    -
  • The tool reads the entire VCF file into memory before sorting
  • -
  • Memory usage scales with the number of variants in the input file
  • -
  • Very large VCF files may require significant memory
  • -
  • Processing time is dominated by the sorting operation, which is O(n log n)
  • -
-

Limitations

-
    -
  • No support for on-disk sorting of files too large to fit in memory
  • -
  • Cannot sort by other fields besides chromosome and position
  • -
  • Does not validate VCF format beyond basic column counting
  • -
  • No handling of compressed (gzipped) VCF files directly
  • -
  • Cannot maintain the original order of variants at the same chromosome and position
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_subsampler/index.html b/site/VCFX_subsampler/index.html deleted file mode 100644 index 435109ec..00000000 --- a/site/VCFX_subsampler/index.html +++ /dev/null @@ -1,2869 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_subsampler - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_subsampler

-

Overview

-

VCFX_subsampler is a tool for randomly selecting a specified number of variants from a VCF file. It uses reservoir sampling to efficiently select a random subset of variants while preserving the VCF header information. The tool is particularly useful for creating smaller test datasets or reducing the size of large VCF files for preliminary analysis.

-

Usage

-
VCFX_subsampler [options] < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-s, --subsample <N>Required: Number of variants to keep in the output
--seed <INT>Optional: Use a specific random seed for reproducible results
-h, --helpDisplay help message and exit
-

Description

-

VCFX_subsampler processes a VCF file to:

-
    -
  1. Preserve all header lines (starting with #)
  2. -
  3. Randomly select N variants from the data section
  4. -
  5. Skip invalid lines (those with fewer than 8 columns)
  6. -
  7. Output the selected variants while maintaining VCF format
  8. -
-

The tool uses reservoir sampling to ensure unbiased random selection, even when the total number of variants is unknown in advance.

-

Input Requirements

-
    -
  • Must be a valid VCF file
  • -
  • Can be piped through stdin
  • -
  • Must have at least 8 columns (CHROM through INFO)
  • -
  • Header lines must start with #
  • -
-

Output Format

-

The output is a VCF file containing: -- All original VCF header lines -- N randomly selected variants (or all variants if input has fewer than N) -- Same format as input VCF -- Invalid lines (with <8 columns) are skipped with warnings

-

Examples

-

Basic Usage

-

Select 1000 random variants:

-
VCFX_subsampler --subsample 1000 < input.vcf > subset.vcf
-
-

Reproducible Sampling

-

Use a fixed seed for reproducible results:

-
VCFX_subsampler --subsample 1000 --seed 1234 < input.vcf > subset.vcf
-
-

Integration with Other Tools

-

Combine with other VCFX tools:

-
cat input.vcf | \
-  VCFX_validator | \
-  VCFX_subsampler --subsample 1000 | \
-  VCFX_metadata_summarizer
-
-

Sampling Algorithm

-

The tool uses reservoir sampling to: -- Process the input streamingly -- Maintain a reservoir of N variants -- Replace variants in the reservoir with probability N/count -- Ensure unbiased random selection

-

Error Handling

-

The tool handles various error conditions: -- Missing --subsample argument -- Invalid subsample size (must be >0) -- Invalid seed value -- Invalid VCF lines (with <8 columns) -- Data lines before header

-

Performance Considerations

-
    -
  • Memory efficient: only stores N variants in memory
  • -
  • Processes input streamingly
  • -
  • Preserves header information
  • -
  • Skips invalid lines efficiently
  • -
-

Limitations

-
    -
  • Only samples variants (data lines)
  • -
  • Skips lines with <8 columns
  • -
  • Requires at least 8 columns in VCF
  • -
  • Skips data lines before #CHROM header
  • -
  • No support for weighted sampling
  • -
-

Common Use Cases

-
    -
  1. Creating test datasets
  2. -
  3. Reducing large VCF files for quick analysis
  4. -
  5. Generating random subsets for validation
  6. -
  7. Preparing data for development and testing
  8. -
  9. Creating smaller datasets for preliminary analysis
  10. -
-

Best Practices

-
    -
  1. Validate input VCF before sampling
  2. -
  3. Use --seed for reproducible results
  4. -
  5. Monitor warning messages for skipped lines
  6. -
  7. Consider using VCFX_validator before sampling
  8. -
  9. Document sampling parameters for reproducibility
  10. -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_sv_handler/index.html b/site/VCFX_sv_handler/index.html deleted file mode 100644 index 88c86227..00000000 --- a/site/VCFX_sv_handler/index.html +++ /dev/null @@ -1,2970 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_sv_handler - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_sv_handler

-

Overview

-

VCFX_sv_handler is a utility tool for filtering and modifying structural variant (SV) records in VCF files. It can identify variants with the SVTYPE annotation, either keeping only structural variants or enhancing their annotations with additional information.

-

Usage

-
VCFX_sv_handler [OPTIONS] < input.vcf > output.vcf
-
-

Options

- - - - - - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-f, --sv-filter-onlyKeep only lines that have 'SVTYPE=' in their INFO field
-m, --sv-modifyModify the INFO field of structural variants to add additional annotations
-

Description

-

VCFX_sv_handler processes a VCF file to manage structural variant records by:

-
    -
  1. Reading the VCF file from standard input
  2. -
  3. Preserving all header lines without modification
  4. -
  5. For each data line:
  6. -
  7. Checking if it contains 'SVTYPE=' in the INFO field to identify structural variants
  8. -
  9. If filtering is enabled, keeping only structural variant records
  10. -
  11. If modification is enabled, adding additional annotations to structural variant records
  12. -
  13. Writing the processed VCF to standard output
  14. -
-

Structural variants (SVs) are genomic alterations that involve segments of DNA and include deletions (DEL), duplications (DUP), inversions (INV), and breakends (BND). This tool helps to specifically process these variants, which often require special handling in downstream analyses.

-

Modification Details

-

When the --sv-modify option is used, the tool adds several annotations to structural variant records:

-
    -
  1. For all structural variants:
  2. -
  3. -

    Adds SV_VALIDATED=1 to indicate the variant has been processed

    -
  4. -
  5. -

    For deletions (DEL) and duplications (DUP):

    -
  6. -
  7. -

    Calculates and adds SV_SIZE=<size> based on the difference between END and POS positions

    -
  8. -
  9. -

    For inversions (INV):

    -
  10. -
  11. -

    Adds INV_TYPE=PARALLEL

    -
  12. -
  13. -

    For breakends (BND):

    -
  14. -
  15. Adds BND_ORIENTATION=PAIR
  16. -
-

These modifications can be useful for downstream analyses that rely on standardized annotations or require specific information about structural variants.

-

Examples

-

Filter Structural Variants

-

Keep only structural variant records in a VCF file: -

VCFX_sv_handler --sv-filter-only < input.vcf > sv_only.vcf
-

-

Modify Structural Variants

-

Enhance structural variant records with additional annotations: -

VCFX_sv_handler --sv-modify < input.vcf > annotated.vcf
-

-

Combined Operation

-

Filter and modify structural variants in one operation: -

VCFX_sv_handler --sv-filter-only --sv-modify < input.vcf > processed_sv.vcf
-

-

Example Transformations

-

Filtering Structural Variants

-
Before:
-chr1 100 . A T . PASS DP=30
-chr1 200 . T G . PASS SVTYPE=DEL;END=300
-
-After (with --sv-filter-only):
-chr1 200 . T G . PASS SVTYPE=DEL;END=300
-
-

Modifying Structural Variants

-
Before:
-chr1 200 . T G . PASS SVTYPE=DEL;END=300
-
-After (with --sv-modify):
-chr1 200 . T G . PASS SVTYPE=DEL;END=300;SV_VALIDATED=1;SV_SIZE=100
-
-
Before:
-chr1 400 . G C . PASS SVTYPE=INV;END=500
-
-After (with --sv-modify):
-chr1 400 . G C . PASS SVTYPE=INV;END=500;SV_VALIDATED=1;INV_TYPE=PARALLEL
-
-

Special Case Handling

-

Multiple Options

-
    -
  • When both --sv-filter-only and --sv-modify are used, records are first filtered and then modified
  • -
-

Missing Fields

-
    -
  • If a variant has SVTYPE but no END position, it will still be processed
  • -
  • For size calculations, if either POS or END is missing/invalid, SV_SIZE will not be added
  • -
-

Malformed Lines

-
    -
  • Lines with fewer than 8 columns are skipped with a warning
  • -
  • Lines with invalid POS values are skipped when attempting to modify
  • -
-

Non-Structural Variants

-
    -
  • Non-SV records are preserved unless filtering is enabled
  • -
-

Performance Considerations

-
    -
  • The tool processes the VCF file line by line, with minimal memory requirements
  • -
  • Performance scales linearly with the size of the input file
  • -
  • No external dependencies or reference files are required
  • -
-

Limitations

-
    -
  • The tool identifies structural variants solely by the presence of "SVTYPE=" in the INFO field
  • -
  • It adds fixed annotations without validating if they're appropriate for the specific variant
  • -
  • No support for custom annotations or handling of specific structural variant subtypes
  • -
  • Does not validate the correctness of existing structural variant annotations
  • -
  • Cannot handle compressed (gzipped) VCF files directly
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_validator/index.html b/site/VCFX_validator/index.html deleted file mode 100644 index f68f8549..00000000 --- a/site/VCFX_validator/index.html +++ /dev/null @@ -1,3032 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_validator - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_validator

-

Overview

-

VCFX_validator is a utility tool for checking the validity of VCF files according to the basic VCF format specifications. It performs various checks on the file structure, header format, and data lines to ensure the file is properly formatted and contains valid data.

-

Usage

-
VCFX_validator [OPTIONS] < input.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-s, --strictEnable stricter validation checks (reserved for future implementation)
-

Description

-

VCFX_validator processes a VCF file to verify its structural validity by:

-
    -
  1. Reading the VCF file from standard input
  2. -
  3. Checking that all meta-information lines (starting with '##') are properly formatted
  4. -
  5. Validating that the #CHROM header line is present and has at least 8 required columns
  6. -
  7. For each data line:
  8. -
  9. Ensuring it has at least 8 columns
  10. -
  11. Verifying that CHROM is not empty
  12. -
  13. Confirming POS is a positive integer
  14. -
  15. Checking that REF and ALT are not empty
  16. -
  17. Validating that QUAL is either '.' or a non-negative float
  18. -
  19. Ensuring FILTER is not empty
  20. -
  21. Performing basic validation on the INFO field
  22. -
  23. Reporting errors for any validation failures
  24. -
  25. Returning exit code 0 if the file is valid, or 1 if it contains errors
  26. -
-

This tool is useful for validating VCF files before processing them with other tools, ensuring they meet the basic requirements of the VCF format specification.

-

Validation Details

-

Meta-Information Lines

-
    -
  • Must start with '##'
  • -
  • No specific content validation beyond the prefix
  • -
-

#CHROM Header Line

-
    -
  • Must be present in the file
  • -
  • Must start with '#CHROM'
  • -
  • Must have at least 8 columns (CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO)
  • -
  • Must appear before any data lines
  • -
-

Data Lines

-
    -
  • Must have at least 8 columns
  • -
  • CHROM: Must not be empty
  • -
  • POS: Must be a positive integer
  • -
  • ID: Can be empty or '.' (not validated)
  • -
  • REF: Must not be empty
  • -
  • ALT: Must not be empty
  • -
  • QUAL: Must be '.' or a non-negative float
  • -
  • FILTER: Must not be empty
  • -
  • INFO: Must be '.' or contain valid key-value pairs or flags:
  • -
  • If not '.', must contain at least one valid entry
  • -
  • Key-value pairs must have a non-empty key
  • -
  • Flags (without '=') are allowed
  • -
-

Examples

-

Basic Validation

-

Check if a VCF file is valid: -

VCFX_validator < input.vcf > validated.vcf
-

-

Using Strict Mode

-

Enable stricter validation (note: additional strict checks are reserved for future implementation): -

VCFX_validator --strict < input.vcf > validated.vcf
-

-

When the input is valid, the VCF contents are echoed to standard output so the tool can be used in pipelines. Informational messages such as VCF file is valid. are written to standard error.

-

Redirecting Error Messages

-

Save validation errors to a file: -

VCFX_validator < input.vcf 2> validation_errors.txt
-

-

Example Output

-

For Valid Files

-
VCF file is valid.
-
-

For Invalid Files

-

Error: line 15 has <8 columns.
-
-
Error: line 42 POS must be >0.
-
-
Error: no #CHROM line found in file.
-

-

Special Case Handling

-

Empty Lines

-
    -
  • Empty lines are ignored during validation
  • -
-

Malformed Header Lines

-
    -
  • Lines starting with '#' that are neither '##' meta-information lines nor the '#CHROM' header line are considered errors
  • -
-

Missing #CHROM Line

-
    -
  • If no #CHROM line is found in the file, an error is reported
  • -
  • Data lines encountered before a #CHROM line will cause validation to fail
  • -
-

Whitespace

-
    -
  • Leading and trailing whitespace is trimmed from field values before validation
  • -
-

Performance Considerations

-
    -
  • The tool processes the VCF file line by line, with minimal memory requirements
  • -
  • Performance scales linearly with the size of the input file
  • -
  • No external dependencies or reference files are required
  • -
-

Limitations

-
    -
  • Does not validate VCF version compatibility
  • -
  • No validation of the content of meta-information lines beyond the '##' prefix
  • -
  • Limited validation of INFO and FORMAT fields (no checking against header definitions)
  • -
  • No validation of sample genotype data
  • -
  • No checking of REF/ALT sequence validity (e.g., allowed bases)
  • -
  • No detection of duplicate variant records
  • -
  • Cannot handle compressed (gzipped) VCF files directly
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_variant_classifier/index.html b/site/VCFX_variant_classifier/index.html deleted file mode 100644 index 86709b1a..00000000 --- a/site/VCFX_variant_classifier/index.html +++ /dev/null @@ -1,2808 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_variant_classifier - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_variant_classifier

-

Overview

-

The VCFX_variant_classifier tool analyzes VCF files and classifies variants into various types: SNP, INDEL, MNV, or STRUCTURAL. It can either produce a TSV summary or append classifications to the original VCF file.

-

Usage

-
VCFX_variant_classifier [OPTIONS] < input.vcf > output.vcf_or_tsv
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-a, --append-infoInstead of producing a TSV, output a valid VCF with a new 'VCF_CLASS' subfield in the INFO column
-

Description

-

VCFX_variant_classifier reads each variant line from a VCF file and determines its type based on the following criteria:

-
    -
  • SNP: Single nucleotide polymorphism, where both reference and alternate alleles are single bases
  • -
  • INDEL: Insertions or deletions with length difference less than 50 bp
  • -
  • MNV: Multi-nucleotide variants with the same length but multiple bases changed
  • -
  • STRUCTURAL: Complex variants including:
  • -
  • Symbolic ALT fields (<DEL>, <INS>, etc.)
  • -
  • Breakend notation (containing [ or ])
  • -
  • Variants with length difference โ‰ฅ50 bp
  • -
  • Very large reference or alternate alleles (โ‰ฅ40 bp)
  • -
  • UNKNOWN: Reserved for special cases like missing or identical REF/ALT
  • -
-

Output Formats

-

TSV Mode (Default)

-

By default, the tool outputs a TSV file with the following columns: -

CHROM  POS  ID  REF  ALT  Classification
-

-

VCF Mode (with --append-info)

-

When using the --append-info option, the tool: -- Preserves the original VCF format including all headers -- Adds a VCF_CLASS=TYPE entry to the INFO field of each variant -- Maintains all other VCF fields

-

Examples

-

Basic Classification to TSV

-
./VCFX_variant_classifier < input.vcf > classified.tsv
-
-

Append Classification to VCF

-
./VCFX_variant_classifier --append-info < input.vcf > annotated.vcf
-
-

Filtering Based on Classification

-
# First classify, then filter for structural variants only
-./VCFX_variant_classifier < input.vcf | grep "STRUCTURAL" > structural_variants.tsv
-
-

Handling Special Cases

-
    -
  • Multi-allelic sites: The most complex type among all alternates is assigned (STRUCTURAL > MNV > INDEL > SNP)
  • -
  • Malformed lines: Lines with fewer than 8 columns are skipped with a warning
  • -
  • Missing data: Missing ALT fields or identical REF/ALT entries are classified as UNKNOWN
  • -
  • Symbolic alleles: Any variant with symbolic notation (e.g., <DEL>) is classified as STRUCTURAL
  • -
-

Performance

-

The tool efficiently processes VCF files line by line, allowing it to handle very large files with minimal memory requirements.

-

Limitations

-
    -
  • Classification is based on standard VCF conventions and may need adjustment for non-standard VCFs
  • -
  • Cannot detect complex structural variants that aren't properly annotated in the VCF
  • -
  • Edge cases like very long identical stretches may be classified in unexpected ways
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/VCFX_variant_counter/index.html b/site/VCFX_variant_counter/index.html deleted file mode 100644 index fc4e3b5d..00000000 --- a/site/VCFX_variant_counter/index.html +++ /dev/null @@ -1,2829 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - VCFX_variant_counter - VCFX Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - -
- - -
- -
- - - - - - - - - -
-
- - - -
-
-
- - - - - - - -
-
-
- - - - - - - -
-
- - - - - - - - -

VCFX_variant_counter

-

Overview

-

VCFX_variant_counter is a simple utility tool that counts the total number of valid variants (data lines) in a VCF file. It reads input from standard input, processes each line, and outputs the total count of valid variant records.

-

Usage

-
VCFX_variant_counter [OPTIONS] < input.vcf
-
-

Options

- - - - - - - - - - - - - - - - - -
OptionDescription
-h, --helpDisplay help message and exit
-s, --strictFail on any data line with fewer than 8 columns
-

Description

-

VCFX_variant_counter processes a VCF file by:

-
    -
  1. Reading the VCF file from standard input
  2. -
  3. Ignoring all header lines (those starting with #)
  4. -
  5. For each data line:
  6. -
  7. Checking if it has at least 8 columns (CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO)
  8. -
  9. If it has 8 or more columns, counting it as a valid variant
  10. -
  11. If it has fewer than 8 columns:
      -
    • In strict mode: exiting with an error
    • -
    • In normal mode: skipping the line with a warning
    • -
    -
  12. -
  13. Finally, printing the total count of valid variants
  14. -
-

This tool is useful for quickly determining the number of variants in a VCF file, which can be helpful for quality control, workflow validation, or simply getting an overview of a dataset's size.

-

VCF Format Requirements

-

The tool assumes a standard VCF format where: -- Header lines start with # -- Data lines have at least 8 tab-separated columns (CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO) -- Optional FORMAT and sample columns may follow

-

Examples

-

Basic Usage

-

Count the variants in a VCF file: -

VCFX_variant_counter < input.vcf
-
-Output: -
Total Variants: 1234
-

-

Using Strict Mode

-

Count variants with strict validation (will fail on malformed lines): -

VCFX_variant_counter --strict < input.vcf
-

-

Using in a Pipeline

-

Count variants after filtering: -

cat input.vcf | grep -v "FILTER=FAIL" | VCFX_variant_counter
-

-

Error Handling

-

Invalid Lines

-
    -
  • In normal mode (default):
  • -
  • Lines with fewer than 8 columns are skipped
  • -
  • A warning is printed to standard error for each skipped line
  • -
  • -

    The count continues with valid lines

    -
  • -
  • -

    In strict mode (--strict):

    -
  • -
  • If any line has fewer than 8 columns, the program exits with an error
  • -
  • The error message includes the line number
  • -
  • The exit code is 1 to indicate failure
  • -
-

Empty Lines

-

Empty lines are ignored and not counted.

-

Performance Considerations

-
    -
  • The tool processes the VCF file line by line, with minimal memory requirements
  • -
  • It performs only basic parsing and doesn't validate the content of each field
  • -
  • Performance scales linearly with the number of lines in the input file
  • -
  • No external dependencies or reference files are required
  • -
-

Limitations

-
    -
  • The tool only checks the number of columns, not their content
  • -
  • It doesn't validate if the VCF follows the specification for field formats
  • -
  • No specific handling for compressed files (use external tools like zcat)
  • -
  • No detailed reporting (e.g., breakdown by chromosome or variant type)
  • -
  • Cannot handle VCF files with non-standard line endings
  • -
- - - - - - - - - - - - - -
-
- - - -
- - - -
- - - -
-
-
-
- - - - - - - - - - \ No newline at end of file diff --git a/site/assets/images/favicon.png b/site/assets/images/favicon.png deleted file mode 100644 index 1cf13b9f9d978896599290a74f77d5dbe7d1655c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1870 zcmV-U2eJ5xP)Gc)JR9QMau)O=X#!i9;T z37kk-upj^(fsR36MHs_+1RCI)NNu9}lD0S{B^g8PN?Ww(5|~L#Ng*g{WsqleV}|#l zz8@ri&cTzw_h33bHI+12+kK6WN$h#n5cD8OQt`5kw6p~9H3()bUQ8OS4Q4HTQ=1Ol z_JAocz`fLbT2^{`8n~UAo=#AUOf=SOq4pYkt;XbC&f#7lb$*7=$na!mWCQ`dBQsO0 zLFBSPj*N?#u5&pf2t4XjEGH|=pPQ8xh7tpx;US5Cx_Ju;!O`ya-yF`)b%TEt5>eP1ZX~}sjjA%FJF?h7cX8=b!DZl<6%Cv z*G0uvvU+vmnpLZ2paivG-(cd*y3$hCIcsZcYOGh{$&)A6*XX&kXZd3G8m)G$Zz-LV z^GF3VAW^Mdv!)4OM8EgqRiz~*Cji;uzl2uC9^=8I84vNp;ltJ|q-*uQwGp2ma6cY7 z;`%`!9UXO@fr&Ebapfs34OmS9^u6$)bJxrucutf>`dKPKT%%*d3XlFVKunp9 zasduxjrjs>f8V=D|J=XNZp;_Zy^WgQ$9WDjgY=z@stwiEBm9u5*|34&1Na8BMjjgf3+SHcr`5~>oz1Y?SW^=K z^bTyO6>Gar#P_W2gEMwq)ot3; zREHn~U&Dp0l6YT0&k-wLwYjb?5zGK`W6S2v+K>AM(95m2C20L|3m~rN8dprPr@t)5lsk9Hu*W z?pS990s;Ez=+Rj{x7p``4>+c0G5^pYnB1^!TL=(?HLHZ+HicG{~4F1d^5Awl_2!1jICM-!9eoLhbbT^;yHcefyTAaqRcY zmuctDopPT!%k+}x%lZRKnzykr2}}XfG_ne?nRQO~?%hkzo;@RN{P6o`&mMUWBYMTe z6i8ChtjX&gXl`nvrU>jah)2iNM%JdjqoaeaU%yVn!^70x-flljp6Q5tK}5}&X8&&G zX3fpb3E(!rH=zVI_9Gjl45w@{(ITqngWFe7@9{mX;tO25Z_8 zQHEpI+FkTU#4xu>RkN>b3Tnc3UpWzPXWm#o55GKF09j^Mh~)K7{QqbO_~(@CVq! zS<8954|P8mXN2MRs86xZ&Q4EfM@JB94b=(YGuk)s&^jiSF=t3*oNK3`rD{H`yQ?d; ztE=laAUoZx5?RC8*WKOj`%LXEkgDd>&^Q4M^z`%u0rg-It=hLCVsq!Z%^6eB-OvOT zFZ28TN&cRmgU}Elrnk43)!>Z1FCPL2K$7}gwzIc48NX}#!A1BpJP?#v5wkNprhV** z?Cpalt1oH&{r!o3eSKc&ap)iz2BTn_VV`4>9M^b3;(YY}4>#ML6{~(4mH+?%07*qo IM6N<$f(jP3KmY&$ diff --git a/site/assets/javascripts/bundle.c8b220af.min.js b/site/assets/javascripts/bundle.c8b220af.min.js deleted file mode 100644 index 34b23f95..00000000 --- a/site/assets/javascripts/bundle.c8b220af.min.js +++ /dev/null @@ -1,16 +0,0 @@ -"use strict";(()=>{var Wi=Object.create;var gr=Object.defineProperty;var Vi=Object.getOwnPropertyDescriptor;var Di=Object.getOwnPropertyNames,Vt=Object.getOwnPropertySymbols,Ni=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,ao=Object.prototype.propertyIsEnumerable;var io=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,$=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&io(e,r,t[r]);if(Vt)for(var r of Vt(t))ao.call(t,r)&&io(e,r,t[r]);return e};var so=(e,t)=>{var r={};for(var o in e)yr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Vt)for(var o of Vt(e))t.indexOf(o)<0&&ao.call(e,o)&&(r[o]=e[o]);return r};var xr=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var zi=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of Di(t))!yr.call(e,n)&&n!==r&&gr(e,n,{get:()=>t[n],enumerable:!(o=Vi(t,n))||o.enumerable});return e};var Lt=(e,t,r)=>(r=e!=null?Wi(Ni(e)):{},zi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var co=(e,t,r)=>new Promise((o,n)=>{var i=p=>{try{s(r.next(p))}catch(c){n(c)}},a=p=>{try{s(r.throw(p))}catch(c){n(c)}},s=p=>p.done?o(p.value):Promise.resolve(p.value).then(i,a);s((r=r.apply(e,t)).next())});var lo=xr((Er,po)=>{(function(e,t){typeof Er=="object"&&typeof po!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(Er,function(){"use strict";function e(r){var o=!0,n=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(k){return!!(k&&k!==document&&k.nodeName!=="HTML"&&k.nodeName!=="BODY"&&"classList"in k&&"contains"in k.classList)}function p(k){var ft=k.type,qe=k.tagName;return!!(qe==="INPUT"&&a[ft]&&!k.readOnly||qe==="TEXTAREA"&&!k.readOnly||k.isContentEditable)}function c(k){k.classList.contains("focus-visible")||(k.classList.add("focus-visible"),k.setAttribute("data-focus-visible-added",""))}function l(k){k.hasAttribute("data-focus-visible-added")&&(k.classList.remove("focus-visible"),k.removeAttribute("data-focus-visible-added"))}function f(k){k.metaKey||k.altKey||k.ctrlKey||(s(r.activeElement)&&c(r.activeElement),o=!0)}function u(k){o=!1}function d(k){s(k.target)&&(o||p(k.target))&&c(k.target)}function y(k){s(k.target)&&(k.target.classList.contains("focus-visible")||k.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),l(k.target))}function L(k){document.visibilityState==="hidden"&&(n&&(o=!0),X())}function X(){document.addEventListener("mousemove",J),document.addEventListener("mousedown",J),document.addEventListener("mouseup",J),document.addEventListener("pointermove",J),document.addEventListener("pointerdown",J),document.addEventListener("pointerup",J),document.addEventListener("touchmove",J),document.addEventListener("touchstart",J),document.addEventListener("touchend",J)}function ee(){document.removeEventListener("mousemove",J),document.removeEventListener("mousedown",J),document.removeEventListener("mouseup",J),document.removeEventListener("pointermove",J),document.removeEventListener("pointerdown",J),document.removeEventListener("pointerup",J),document.removeEventListener("touchmove",J),document.removeEventListener("touchstart",J),document.removeEventListener("touchend",J)}function J(k){k.target.nodeName&&k.target.nodeName.toLowerCase()==="html"||(o=!1,ee())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",L,!0),X(),r.addEventListener("focus",d,!0),r.addEventListener("blur",y,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var qr=xr((dy,On)=>{"use strict";/*! - * escape-html - * Copyright(c) 2012-2013 TJ Holowaychuk - * Copyright(c) 2015 Andreas Lubbe - * Copyright(c) 2015 Tiancheng "Timothy" Gu - * MIT Licensed - */var $a=/["'&<>]/;On.exports=Pa;function Pa(e){var t=""+e,r=$a.exec(t);if(!r)return t;var o,n="",i=0,a=0;for(i=r.index;i{/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT ยฉ Zeno Rocha - */(function(t,r){typeof Rt=="object"&&typeof Yr=="object"?Yr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Rt=="object"?Rt.ClipboardJS=r():t.ClipboardJS=r()})(Rt,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return Ui}});var a=i(279),s=i.n(a),p=i(370),c=i.n(p),l=i(817),f=i.n(l);function u(D){try{return document.execCommand(D)}catch(A){return!1}}var d=function(A){var M=f()(A);return u("cut"),M},y=d;function L(D){var A=document.documentElement.getAttribute("dir")==="rtl",M=document.createElement("textarea");M.style.fontSize="12pt",M.style.border="0",M.style.padding="0",M.style.margin="0",M.style.position="absolute",M.style[A?"right":"left"]="-9999px";var F=window.pageYOffset||document.documentElement.scrollTop;return M.style.top="".concat(F,"px"),M.setAttribute("readonly",""),M.value=D,M}var X=function(A,M){var F=L(A);M.container.appendChild(F);var V=f()(F);return u("copy"),F.remove(),V},ee=function(A){var M=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},F="";return typeof A=="string"?F=X(A,M):A instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(A==null?void 0:A.type)?F=X(A.value,M):(F=f()(A),u("copy")),F},J=ee;function k(D){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?k=function(M){return typeof M}:k=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},k(D)}var ft=function(){var A=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},M=A.action,F=M===void 0?"copy":M,V=A.container,Y=A.target,$e=A.text;if(F!=="copy"&&F!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(Y!==void 0)if(Y&&k(Y)==="object"&&Y.nodeType===1){if(F==="copy"&&Y.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(F==="cut"&&(Y.hasAttribute("readonly")||Y.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if($e)return J($e,{container:V});if(Y)return F==="cut"?y(Y):J(Y,{container:V})},qe=ft;function Fe(D){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Fe=function(M){return typeof M}:Fe=function(M){return M&&typeof Symbol=="function"&&M.constructor===Symbol&&M!==Symbol.prototype?"symbol":typeof M},Fe(D)}function ki(D,A){if(!(D instanceof A))throw new TypeError("Cannot call a class as a function")}function no(D,A){for(var M=0;M0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof V.action=="function"?V.action:this.defaultAction,this.target=typeof V.target=="function"?V.target:this.defaultTarget,this.text=typeof V.text=="function"?V.text:this.defaultText,this.container=Fe(V.container)==="object"?V.container:document.body}},{key:"listenClick",value:function(V){var Y=this;this.listener=c()(V,"click",function($e){return Y.onClick($e)})}},{key:"onClick",value:function(V){var Y=V.delegateTarget||V.currentTarget,$e=this.action(Y)||"copy",Wt=qe({action:$e,container:this.container,target:this.target(Y),text:this.text(Y)});this.emit(Wt?"success":"error",{action:$e,text:Wt,trigger:Y,clearSelection:function(){Y&&Y.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(V){return vr("action",V)}},{key:"defaultTarget",value:function(V){var Y=vr("target",V);if(Y)return document.querySelector(Y)}},{key:"defaultText",value:function(V){return vr("text",V)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(V){var Y=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return J(V,Y)}},{key:"cut",value:function(V){return y(V)}},{key:"isSupported",value:function(){var V=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],Y=typeof V=="string"?[V]:V,$e=!!document.queryCommandSupported;return Y.forEach(function(Wt){$e=$e&&!!document.queryCommandSupported(Wt)}),$e}}]),M}(s()),Ui=Fi},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,p){for(;s&&s.nodeType!==n;){if(typeof s.matches=="function"&&s.matches(p))return s;s=s.parentNode}}o.exports=a},438:function(o,n,i){var a=i(828);function s(l,f,u,d,y){var L=c.apply(this,arguments);return l.addEventListener(u,L,y),{destroy:function(){l.removeEventListener(u,L,y)}}}function p(l,f,u,d,y){return typeof l.addEventListener=="function"?s.apply(null,arguments):typeof u=="function"?s.bind(null,document).apply(null,arguments):(typeof l=="string"&&(l=document.querySelectorAll(l)),Array.prototype.map.call(l,function(L){return s(L,f,u,d,y)}))}function c(l,f,u,d){return function(y){y.delegateTarget=a(y.target,f),y.delegateTarget&&d.call(l,y)}}o.exports=p},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(o,n,i){var a=i(879),s=i(438);function p(u,d,y){if(!u&&!d&&!y)throw new Error("Missing required arguments");if(!a.string(d))throw new TypeError("Second argument must be a String");if(!a.fn(y))throw new TypeError("Third argument must be a Function");if(a.node(u))return c(u,d,y);if(a.nodeList(u))return l(u,d,y);if(a.string(u))return f(u,d,y);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(u,d,y){return u.addEventListener(d,y),{destroy:function(){u.removeEventListener(d,y)}}}function l(u,d,y){return Array.prototype.forEach.call(u,function(L){L.addEventListener(d,y)}),{destroy:function(){Array.prototype.forEach.call(u,function(L){L.removeEventListener(d,y)})}}}function f(u,d,y){return s(document.body,u,d,y)}o.exports=p},817:function(o){function n(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var p=window.getSelection(),c=document.createRange();c.selectNodeContents(i),p.removeAllRanges(),p.addRange(c),a=p.toString()}return a}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,a,s){var p=this.e||(this.e={});return(p[i]||(p[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var p=this;function c(){p.off(i,c),a.apply(s,arguments)}return c._=a,this.on(i,c,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),p=0,c=s.length;for(p;p0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function N(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],a;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(s){a={error:s}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(a)throw a.error}}return i}function q(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||p(d,L)})},y&&(n[d]=y(n[d])))}function p(d,y){try{c(o[d](y))}catch(L){u(i[0][3],L)}}function c(d){d.value instanceof nt?Promise.resolve(d.value.v).then(l,f):u(i[0][2],d)}function l(d){p("next",d)}function f(d){p("throw",d)}function u(d,y){d(y),i.shift(),i.length&&p(i[0][0],i[0][1])}}function uo(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof he=="function"?he(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(a){return new Promise(function(s,p){a=e[i](a),n(s,p,a.done,a.value)})}}function n(i,a,s,p){Promise.resolve(p).then(function(c){i({value:c,done:s})},a)}}function H(e){return typeof e=="function"}function ut(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var Nt=ut(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: -`+r.map(function(o,n){return n+1+") "+o.toString()}).join(` - `):"",this.name="UnsubscriptionError",this.errors=r}});function Qe(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ue=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=he(a),p=s.next();!p.done;p=s.next()){var c=p.value;c.remove(this)}}catch(L){t={error:L}}finally{try{p&&!p.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var l=this.initialTeardown;if(H(l))try{l()}catch(L){i=L instanceof Nt?L.errors:[L]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=he(f),d=u.next();!d.done;d=u.next()){var y=d.value;try{ho(y)}catch(L){i=i!=null?i:[],L instanceof Nt?i=q(q([],N(i)),N(L.errors)):i.push(L)}}}catch(L){o={error:L}}finally{try{d&&!d.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new Nt(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ho(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Qe(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Qe(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Tr=Ue.EMPTY;function zt(e){return e instanceof Ue||e&&"closed"in e&&H(e.remove)&&H(e.add)&&H(e.unsubscribe)}function ho(e){H(e)?e():e.unsubscribe()}var Pe={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var dt={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,a=n.isStopped,s=n.observers;return i||a?Tr:(this.currentObservers=null,s.push(r),new Ue(function(){o.currentObservers=null,Qe(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,a=o.isStopped;n?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new j;return r.source=this,r},t.create=function(r,o){return new To(r,o)},t}(j);var To=function(e){oe(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:Tr},t}(g);var _r=function(e){oe(t,e);function t(r){var o=e.call(this)||this;return o._value=r,o}return Object.defineProperty(t.prototype,"value",{get:function(){return this.getValue()},enumerable:!1,configurable:!0}),t.prototype._subscribe=function(r){var o=e.prototype._subscribe.call(this,r);return!o.closed&&r.next(this._value),o},t.prototype.getValue=function(){var r=this,o=r.hasError,n=r.thrownError,i=r._value;if(o)throw n;return this._throwIfClosed(),i},t.prototype.next=function(r){e.prototype.next.call(this,this._value=r)},t}(g);var _t={now:function(){return(_t.delegate||Date).now()},delegate:void 0};var At=function(e){oe(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=_t);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,a=o._infiniteTimeWindow,s=o._timestampProvider,p=o._windowTime;n||(i.push(r),!a&&i.push(s.now()+p)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,a=n._buffer,s=a.slice(),p=0;p0?e.prototype.schedule.call(this,r,o):(this.delay=o,this.state=r,this.scheduler.flush(this),this)},t.prototype.execute=function(r,o){return o>0||this.closed?e.prototype.execute.call(this,r,o):this._execute(r,o)},t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!=null&&n>0||n==null&&this.delay>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.flush(this),0)},t}(gt);var Lo=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t}(yt);var kr=new Lo(Oo);var Mo=function(e){oe(t,e);function t(r,o){var n=e.call(this,r,o)||this;return n.scheduler=r,n.work=o,n}return t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!==null&&n>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=vt.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var a=r.actions;o!=null&&o===r._scheduled&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==o&&(vt.cancelAnimationFrame(o),r._scheduled=void 0)},t}(gt);var _o=function(e){oe(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o;r?o=r.id:(o=this._scheduled,this._scheduled=void 0);var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(yt);var me=new _o(Mo);var S=new j(function(e){return e.complete()});function Kt(e){return e&&H(e.schedule)}function Hr(e){return e[e.length-1]}function Xe(e){return H(Hr(e))?e.pop():void 0}function ke(e){return Kt(Hr(e))?e.pop():void 0}function Yt(e,t){return typeof Hr(e)=="number"?e.pop():t}var xt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Bt(e){return H(e==null?void 0:e.then)}function Gt(e){return H(e[bt])}function Jt(e){return Symbol.asyncIterator&&H(e==null?void 0:e[Symbol.asyncIterator])}function Xt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Zt=Zi();function er(e){return H(e==null?void 0:e[Zt])}function tr(e){return fo(this,arguments,function(){var r,o,n,i;return Dt(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,nt(r.read())];case 3:return o=a.sent(),n=o.value,i=o.done,i?[4,nt(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,nt(n)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function rr(e){return H(e==null?void 0:e.getReader)}function U(e){if(e instanceof j)return e;if(e!=null){if(Gt(e))return ea(e);if(xt(e))return ta(e);if(Bt(e))return ra(e);if(Jt(e))return Ao(e);if(er(e))return oa(e);if(rr(e))return na(e)}throw Xt(e)}function ea(e){return new j(function(t){var r=e[bt]();if(H(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function ta(e){return new j(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?b(function(n,i){return e(n,i,o)}):le,Te(1),r?Ve(t):Qo(function(){return new nr}))}}function jr(e){return e<=0?function(){return S}:E(function(t,r){var o=[];t.subscribe(T(r,function(n){o.push(n),e=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new g}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,p=s===void 0?!0:s;return function(c){var l,f,u,d=0,y=!1,L=!1,X=function(){f==null||f.unsubscribe(),f=void 0},ee=function(){X(),l=u=void 0,y=L=!1},J=function(){var k=l;ee(),k==null||k.unsubscribe()};return E(function(k,ft){d++,!L&&!y&&X();var qe=u=u!=null?u:r();ft.add(function(){d--,d===0&&!L&&!y&&(f=Ur(J,p))}),qe.subscribe(ft),!l&&d>0&&(l=new at({next:function(Fe){return qe.next(Fe)},error:function(Fe){L=!0,X(),f=Ur(ee,n,Fe),qe.error(Fe)},complete:function(){y=!0,X(),f=Ur(ee,a),qe.complete()}}),U(k).subscribe(l))})(c)}}function Ur(e,t){for(var r=[],o=2;oe.next(document)),e}function P(e,t=document){return Array.from(t.querySelectorAll(e))}function R(e,t=document){let r=fe(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function fe(e,t=document){return t.querySelector(e)||void 0}function Ie(){var e,t,r,o;return(o=(r=(t=(e=document.activeElement)==null?void 0:e.shadowRoot)==null?void 0:t.activeElement)!=null?r:document.activeElement)!=null?o:void 0}var wa=O(h(document.body,"focusin"),h(document.body,"focusout")).pipe(_e(1),Q(void 0),m(()=>Ie()||document.body),G(1));function et(e){return wa.pipe(m(t=>e.contains(t)),K())}function Ht(e,t){return C(()=>O(h(e,"mouseenter").pipe(m(()=>!0)),h(e,"mouseleave").pipe(m(()=>!1))).pipe(t?kt(r=>Le(+!r*t)):le,Q(e.matches(":hover"))))}function Jo(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)Jo(e,r)}function x(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)Jo(o,n);return o}function sr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function wt(e){let t=x("script",{src:e});return C(()=>(document.head.appendChild(t),O(h(t,"load"),h(t,"error").pipe(v(()=>$r(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(m(()=>{}),_(()=>document.head.removeChild(t)),Te(1))))}var Xo=new g,Ta=C(()=>typeof ResizeObserver=="undefined"?wt("https://unpkg.com/resize-observer-polyfill"):I(void 0)).pipe(m(()=>new ResizeObserver(e=>e.forEach(t=>Xo.next(t)))),v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function ce(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ge(e){let t=e;for(;t.clientWidth===0&&t.parentElement;)t=t.parentElement;return Ta.pipe(w(r=>r.observe(t)),v(r=>Xo.pipe(b(o=>o.target===t),_(()=>r.unobserve(t)))),m(()=>ce(e)),Q(ce(e)))}function Tt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function cr(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}function Zo(e){let t=[],r=e.parentElement;for(;r;)(e.clientWidth>r.clientWidth||e.clientHeight>r.clientHeight)&&t.push(r),r=(e=r).parentElement;return t.length===0&&t.push(document.documentElement),t}function De(e){return{x:e.offsetLeft,y:e.offsetTop}}function en(e){let t=e.getBoundingClientRect();return{x:t.x+window.scrollX,y:t.y+window.scrollY}}function tn(e){return O(h(window,"load"),h(window,"resize")).pipe(Me(0,me),m(()=>De(e)),Q(De(e)))}function pr(e){return{x:e.scrollLeft,y:e.scrollTop}}function Ne(e){return O(h(e,"scroll"),h(window,"scroll"),h(window,"resize")).pipe(Me(0,me),m(()=>pr(e)),Q(pr(e)))}var rn=new g,Sa=C(()=>I(new IntersectionObserver(e=>{for(let t of e)rn.next(t)},{threshold:0}))).pipe(v(e=>O(Ye,I(e)).pipe(_(()=>e.disconnect()))),G(1));function tt(e){return Sa.pipe(w(t=>t.observe(e)),v(t=>rn.pipe(b(({target:r})=>r===e),_(()=>t.unobserve(e)),m(({isIntersecting:r})=>r))))}function on(e,t=16){return Ne(e).pipe(m(({y:r})=>{let o=ce(e),n=Tt(e);return r>=n.height-o.height-t}),K())}var lr={drawer:R("[data-md-toggle=drawer]"),search:R("[data-md-toggle=search]")};function nn(e){return lr[e].checked}function Je(e,t){lr[e].checked!==t&&lr[e].click()}function ze(e){let t=lr[e];return h(t,"change").pipe(m(()=>t.checked),Q(t.checked))}function Oa(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function La(){return O(h(window,"compositionstart").pipe(m(()=>!0)),h(window,"compositionend").pipe(m(()=>!1))).pipe(Q(!1))}function an(){let e=h(window,"keydown").pipe(b(t=>!(t.metaKey||t.ctrlKey)),m(t=>({mode:nn("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),b(({mode:t,type:r})=>{if(t==="global"){let o=Ie();if(typeof o!="undefined")return!Oa(o,r)}return!0}),pe());return La().pipe(v(t=>t?S:e))}function ye(){return new URL(location.href)}function lt(e,t=!1){if(B("navigation.instant")&&!t){let r=x("a",{href:e.href});document.body.appendChild(r),r.click(),r.remove()}else location.href=e.href}function sn(){return new g}function cn(){return location.hash.slice(1)}function pn(e){let t=x("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Ma(e){return O(h(window,"hashchange"),e).pipe(m(cn),Q(cn()),b(t=>t.length>0),G(1))}function ln(e){return Ma(e).pipe(m(t=>fe(`[id="${t}"]`)),b(t=>typeof t!="undefined"))}function $t(e){let t=matchMedia(e);return ir(r=>t.addListener(()=>r(t.matches))).pipe(Q(t.matches))}function mn(){let e=matchMedia("print");return O(h(window,"beforeprint").pipe(m(()=>!0)),h(window,"afterprint").pipe(m(()=>!1))).pipe(Q(e.matches))}function Nr(e,t){return e.pipe(v(r=>r?t():S))}function zr(e,t){return new j(r=>{let o=new XMLHttpRequest;return o.open("GET",`${e}`),o.responseType="blob",o.addEventListener("load",()=>{o.status>=200&&o.status<300?(r.next(o.response),r.complete()):r.error(new Error(o.statusText))}),o.addEventListener("error",()=>{r.error(new Error("Network error"))}),o.addEventListener("abort",()=>{r.complete()}),typeof(t==null?void 0:t.progress$)!="undefined"&&(o.addEventListener("progress",n=>{var i;if(n.lengthComputable)t.progress$.next(n.loaded/n.total*100);else{let a=(i=o.getResponseHeader("Content-Length"))!=null?i:0;t.progress$.next(n.loaded/+a*100)}}),t.progress$.next(5)),o.send(),()=>o.abort()})}function je(e,t){return zr(e,t).pipe(v(r=>r.text()),m(r=>JSON.parse(r)),G(1))}function fn(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/html")),G(1))}function un(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/xml")),G(1))}function dn(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function hn(){return O(h(window,"scroll",{passive:!0}),h(window,"resize",{passive:!0})).pipe(m(dn),Q(dn()))}function bn(){return{width:innerWidth,height:innerHeight}}function vn(){return h(window,"resize",{passive:!0}).pipe(m(bn),Q(bn()))}function gn(){return z([hn(),vn()]).pipe(m(([e,t])=>({offset:e,size:t})),G(1))}function mr(e,{viewport$:t,header$:r}){let o=t.pipe(te("size")),n=z([o,r]).pipe(m(()=>De(e)));return z([r,t,n]).pipe(m(([{height:i},{offset:a,size:s},{x:p,y:c}])=>({offset:{x:a.x-p,y:a.y-c+i},size:s})))}function _a(e){return h(e,"message",t=>t.data)}function Aa(e){let t=new g;return t.subscribe(r=>e.postMessage(r)),t}function yn(e,t=new Worker(e)){let r=_a(t),o=Aa(t),n=new g;n.subscribe(o);let i=o.pipe(Z(),ie(!0));return n.pipe(Z(),Re(r.pipe(W(i))),pe())}var Ca=R("#__config"),St=JSON.parse(Ca.textContent);St.base=`${new URL(St.base,ye())}`;function xe(){return St}function B(e){return St.features.includes(e)}function Ee(e,t){return typeof t!="undefined"?St.translations[e].replace("#",t.toString()):St.translations[e]}function Se(e,t=document){return R(`[data-md-component=${e}]`,t)}function ae(e,t=document){return P(`[data-md-component=${e}]`,t)}function ka(e){let t=R(".md-typeset > :first-child",e);return h(t,"click",{once:!0}).pipe(m(()=>R(".md-typeset",e)),m(r=>({hash:__md_hash(r.innerHTML)})))}function xn(e){if(!B("announce.dismiss")||!e.childElementCount)return S;if(!e.hidden){let t=R(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return C(()=>{let t=new g;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),ka(e).pipe(w(r=>t.next(r)),_(()=>t.complete()),m(r=>$({ref:e},r)))})}function Ha(e,{target$:t}){return t.pipe(m(r=>({hidden:r!==e})))}function En(e,t){let r=new g;return r.subscribe(({hidden:o})=>{e.hidden=o}),Ha(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))}function Pt(e,t){return t==="inline"?x("div",{class:"md-tooltip md-tooltip--inline",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"})):x("div",{class:"md-tooltip",id:e,role:"tooltip"},x("div",{class:"md-tooltip__inner md-typeset"}))}function wn(...e){return x("div",{class:"md-tooltip2",role:"tooltip"},x("div",{class:"md-tooltip2__inner md-typeset"},e))}function Tn(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return x("aside",{class:"md-annotation",tabIndex:0},Pt(t),x("a",{href:r,class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}else return x("aside",{class:"md-annotation",tabIndex:0},Pt(t),x("span",{class:"md-annotation__index",tabIndex:-1},x("span",{"data-md-annotation-id":e})))}function Sn(e){return x("button",{class:"md-clipboard md-icon",title:Ee("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}var Ln=Lt(qr());function Qr(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(p=>!e.terms[p]).reduce((p,c)=>[...p,x("del",null,(0,Ln.default)(c))," "],[]).slice(0,-1),i=xe(),a=new URL(e.location,i.base);B("search.highlight")&&a.searchParams.set("h",Object.entries(e.terms).filter(([,p])=>p).reduce((p,[c])=>`${p} ${c}`.trim(),""));let{tags:s}=xe();return x("a",{href:`${a}`,class:"md-search-result__link",tabIndex:-1},x("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&x("div",{class:"md-search-result__icon md-icon"}),r>0&&x("h1",null,e.title),r<=0&&x("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&x("nav",{class:"md-tags"},e.tags.map(p=>{let c=s?p in s?`md-tag-icon md-tag--${s[p]}`:"md-tag-icon":"";return x("span",{class:`md-tag ${c}`},p)})),o>0&&n.length>0&&x("p",{class:"md-search-result__terms"},Ee("search.result.term.missing"),": ",...n)))}function Mn(e){let t=e[0].score,r=[...e],o=xe(),n=r.findIndex(l=>!`${new URL(l.location,o.base)}`.includes("#")),[i]=r.splice(n,1),a=r.findIndex(l=>l.scoreQr(l,1)),...p.length?[x("details",{class:"md-search-result__more"},x("summary",{tabIndex:-1},x("div",null,p.length>0&&p.length===1?Ee("search.result.more.one"):Ee("search.result.more.other",p.length))),...p.map(l=>Qr(l,1)))]:[]];return x("li",{class:"md-search-result__item"},c)}function _n(e){return x("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>x("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?sr(r):r)))}function Kr(e){let t=`tabbed-control tabbed-control--${e}`;return x("div",{class:t,hidden:!0},x("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function An(e){return x("div",{class:"md-typeset__scrollwrap"},x("div",{class:"md-typeset__table"},e))}function Ra(e){var o;let t=xe(),r=new URL(`../${e.version}/`,t.base);return x("li",{class:"md-version__item"},x("a",{href:`${r}`,class:"md-version__link"},e.title,((o=t.version)==null?void 0:o.alias)&&e.aliases.length>0&&x("span",{class:"md-version__alias"},e.aliases[0])))}function Cn(e,t){var o;let r=xe();return e=e.filter(n=>{var i;return!((i=n.properties)!=null&&i.hidden)}),x("div",{class:"md-version"},x("button",{class:"md-version__current","aria-label":Ee("select.version")},t.title,((o=r.version)==null?void 0:o.alias)&&t.aliases.length>0&&x("span",{class:"md-version__alias"},t.aliases[0])),x("ul",{class:"md-version__list"},e.map(Ra)))}var Ia=0;function ja(e){let t=z([et(e),Ht(e)]).pipe(m(([o,n])=>o||n),K()),r=C(()=>Zo(e)).pipe(ne(Ne),pt(1),He(t),m(()=>en(e)));return t.pipe(Ae(o=>o),v(()=>z([t,r])),m(([o,n])=>({active:o,offset:n})),pe())}function Fa(e,t){let{content$:r,viewport$:o}=t,n=`__tooltip2_${Ia++}`;return C(()=>{let i=new g,a=new _r(!1);i.pipe(Z(),ie(!1)).subscribe(a);let s=a.pipe(kt(c=>Le(+!c*250,kr)),K(),v(c=>c?r:S),w(c=>c.id=n),pe());z([i.pipe(m(({active:c})=>c)),s.pipe(v(c=>Ht(c,250)),Q(!1))]).pipe(m(c=>c.some(l=>l))).subscribe(a);let p=a.pipe(b(c=>c),re(s,o),m(([c,l,{size:f}])=>{let u=e.getBoundingClientRect(),d=u.width/2;if(l.role==="tooltip")return{x:d,y:8+u.height};if(u.y>=f.height/2){let{height:y}=ce(l);return{x:d,y:-16-y}}else return{x:d,y:16+u.height}}));return z([s,i,p]).subscribe(([c,{offset:l},f])=>{c.style.setProperty("--md-tooltip-host-x",`${l.x}px`),c.style.setProperty("--md-tooltip-host-y",`${l.y}px`),c.style.setProperty("--md-tooltip-x",`${f.x}px`),c.style.setProperty("--md-tooltip-y",`${f.y}px`),c.classList.toggle("md-tooltip2--top",f.y<0),c.classList.toggle("md-tooltip2--bottom",f.y>=0)}),a.pipe(b(c=>c),re(s,(c,l)=>l),b(c=>c.role==="tooltip")).subscribe(c=>{let l=ce(R(":scope > *",c));c.style.setProperty("--md-tooltip-width",`${l.width}px`),c.style.setProperty("--md-tooltip-tail","0px")}),a.pipe(K(),ve(me),re(s)).subscribe(([c,l])=>{l.classList.toggle("md-tooltip2--active",c)}),z([a.pipe(b(c=>c)),s]).subscribe(([c,l])=>{l.role==="dialog"?(e.setAttribute("aria-controls",n),e.setAttribute("aria-haspopup","dialog")):e.setAttribute("aria-describedby",n)}),a.pipe(b(c=>!c)).subscribe(()=>{e.removeAttribute("aria-controls"),e.removeAttribute("aria-describedby"),e.removeAttribute("aria-haspopup")}),ja(e).pipe(w(c=>i.next(c)),_(()=>i.complete()),m(c=>$({ref:e},c)))})}function mt(e,{viewport$:t},r=document.body){return Fa(e,{content$:new j(o=>{let n=e.title,i=wn(n);return o.next(i),e.removeAttribute("title"),r.append(i),()=>{i.remove(),e.setAttribute("title",n)}}),viewport$:t})}function Ua(e,t){let r=C(()=>z([tn(e),Ne(t)])).pipe(m(([{x:o,y:n},i])=>{let{width:a,height:s}=ce(e);return{x:o-i.x+a/2,y:n-i.y+s/2}}));return et(e).pipe(v(o=>r.pipe(m(n=>({active:o,offset:n})),Te(+!o||1/0))))}function kn(e,t,{target$:r}){let[o,n]=Array.from(e.children);return C(()=>{let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({offset:s}){e.style.setProperty("--md-tooltip-x",`${s.x}px`),e.style.setProperty("--md-tooltip-y",`${s.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),tt(e).pipe(W(a)).subscribe(s=>{e.toggleAttribute("data-md-visible",s)}),O(i.pipe(b(({active:s})=>s)),i.pipe(_e(250),b(({active:s})=>!s))).subscribe({next({active:s}){s?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Me(16,me)).subscribe(({active:s})=>{o.classList.toggle("md-tooltip--active",s)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:s})=>s)).subscribe({next(s){s?e.style.setProperty("--md-tooltip-0",`${-s}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),h(n,"click").pipe(W(a),b(s=>!(s.metaKey||s.ctrlKey))).subscribe(s=>{s.stopPropagation(),s.preventDefault()}),h(n,"mousedown").pipe(W(a),re(i)).subscribe(([s,{active:p}])=>{var c;if(s.button!==0||s.metaKey||s.ctrlKey)s.preventDefault();else if(p){s.preventDefault();let l=e.parentElement.closest(".md-annotation");l instanceof HTMLElement?l.focus():(c=Ie())==null||c.blur()}}),r.pipe(W(a),b(s=>s===o),Ge(125)).subscribe(()=>e.focus()),Ua(e,t).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function Wa(e){return e.tagName==="CODE"?P(".c, .c1, .cm",e):[e]}function Va(e){let t=[];for(let r of Wa(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let a;for(;a=/(\(\d+\))(!)?/.exec(i.textContent);){let[,s,p]=a;if(typeof p=="undefined"){let c=i.splitText(a.index);i=c.splitText(s.length),t.push(c)}else{i.textContent=s,t.push(i);break}}}}return t}function Hn(e,t){t.append(...Array.from(e.childNodes))}function fr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,a=new Map;for(let s of Va(t)){let[,p]=s.textContent.match(/\((\d+)\)/);fe(`:scope > li:nth-child(${p})`,e)&&(a.set(p,Tn(p,i)),s.replaceWith(a.get(p)))}return a.size===0?S:C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=[];for(let[l,f]of a)c.push([R(".md-typeset",f),R(`:scope > li:nth-child(${l})`,e)]);return o.pipe(W(p)).subscribe(l=>{e.hidden=!l,e.classList.toggle("md-annotation-list",l);for(let[f,u]of c)l?Hn(f,u):Hn(u,f)}),O(...[...a].map(([,l])=>kn(l,t,{target$:r}))).pipe(_(()=>s.complete()),pe())})}function $n(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return $n(t)}}function Pn(e,t){return C(()=>{let r=$n(e);return typeof r!="undefined"?fr(r,e,t):S})}var Rn=Lt(Br());var Da=0;function In(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return In(t)}}function Na(e){return ge(e).pipe(m(({width:t})=>({scrollable:Tt(e).width>t})),te("scrollable"))}function jn(e,t){let{matches:r}=matchMedia("(hover)"),o=C(()=>{let n=new g,i=n.pipe(jr(1));n.subscribe(({scrollable:c})=>{c&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")});let a=[];if(Rn.default.isSupported()&&(e.closest(".copy")||B("content.code.copy")&&!e.closest(".no-copy"))){let c=e.closest("pre");c.id=`__code_${Da++}`;let l=Sn(c.id);c.insertBefore(l,e),B("content.tooltips")&&a.push(mt(l,{viewport$}))}let s=e.closest(".highlight");if(s instanceof HTMLElement){let c=In(s);if(typeof c!="undefined"&&(s.classList.contains("annotate")||B("content.code.annotate"))){let l=fr(c,e,t);a.push(ge(s).pipe(W(i),m(({width:f,height:u})=>f&&u),K(),v(f=>f?l:S)))}}return P(":scope > span[id]",e).length&&e.classList.add("md-code__content"),Na(e).pipe(w(c=>n.next(c)),_(()=>n.complete()),m(c=>$({ref:e},c)),Re(...a))});return B("content.lazy")?tt(e).pipe(b(n=>n),Te(1),v(()=>o)):o}function za(e,{target$:t,print$:r}){let o=!0;return O(t.pipe(m(n=>n.closest("details:not([open])")),b(n=>e===n),m(()=>({action:"open",reveal:!0}))),r.pipe(b(n=>n||!o),w(()=>o=e.open),m(n=>({action:n?"open":"close"}))))}function Fn(e,t){return C(()=>{let r=new g;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),za(e,t).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}var Un=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.flowchartTitleText{fill:var(--md-mermaid-label-fg-color)}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel p,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel p{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}.classDiagramTitleText{fill:var(--md-mermaid-label-fg-color)}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs marker.marker.composition.class path,defs marker.marker.dependency.class path,defs marker.marker.extension.class path{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs marker.marker.aggregation.class path{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}.statediagramTitleText{fill:var(--md-mermaid-label-fg-color)}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel,.nodeLabel p{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}a .nodeLabel{text-decoration:underline}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.entityTitleText{fill:var(--md-mermaid-label-fg-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}text:not([class]):last-child{fill:var(--md-mermaid-label-fg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var Gr,Qa=0;function Ka(){return typeof mermaid=="undefined"||mermaid instanceof Element?wt("https://unpkg.com/mermaid@11/dist/mermaid.min.js"):I(void 0)}function Wn(e){return e.classList.remove("mermaid"),Gr||(Gr=Ka().pipe(w(()=>mermaid.initialize({startOnLoad:!1,themeCSS:Un,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),m(()=>{}),G(1))),Gr.subscribe(()=>co(this,null,function*(){e.classList.add("mermaid");let t=`__mermaid_${Qa++}`,r=x("div",{class:"mermaid"}),o=e.textContent,{svg:n,fn:i}=yield mermaid.render(t,o),a=r.attachShadow({mode:"closed"});a.innerHTML=n,e.replaceWith(r),i==null||i(a)})),Gr.pipe(m(()=>({ref:e})))}var Vn=x("table");function Dn(e){return e.replaceWith(Vn),Vn.replaceWith(An(e)),I({ref:e})}function Ya(e){let t=e.find(r=>r.checked)||e[0];return O(...e.map(r=>h(r,"change").pipe(m(()=>R(`label[for="${r.id}"]`))))).pipe(Q(R(`label[for="${t.id}"]`)),m(r=>({active:r})))}function Nn(e,{viewport$:t,target$:r}){let o=R(".tabbed-labels",e),n=P(":scope > input",e),i=Kr("prev");e.append(i);let a=Kr("next");return e.append(a),C(()=>{let s=new g,p=s.pipe(Z(),ie(!0));z([s,ge(e),tt(e)]).pipe(W(p),Me(1,me)).subscribe({next([{active:c},l]){let f=De(c),{width:u}=ce(c);e.style.setProperty("--md-indicator-x",`${f.x}px`),e.style.setProperty("--md-indicator-width",`${u}px`);let d=pr(o);(f.xd.x+l.width)&&o.scrollTo({left:Math.max(0,f.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),z([Ne(o),ge(o)]).pipe(W(p)).subscribe(([c,l])=>{let f=Tt(o);i.hidden=c.x<16,a.hidden=c.x>f.width-l.width-16}),O(h(i,"click").pipe(m(()=>-1)),h(a,"click").pipe(m(()=>1))).pipe(W(p)).subscribe(c=>{let{width:l}=ce(o);o.scrollBy({left:l*c,behavior:"smooth"})}),r.pipe(W(p),b(c=>n.includes(c))).subscribe(c=>c.click()),o.classList.add("tabbed-labels--linked");for(let c of n){let l=R(`label[for="${c.id}"]`);l.replaceChildren(x("a",{href:`#${l.htmlFor}`,tabIndex:-1},...Array.from(l.childNodes))),h(l.firstElementChild,"click").pipe(W(p),b(f=>!(f.metaKey||f.ctrlKey)),w(f=>{f.preventDefault(),f.stopPropagation()})).subscribe(()=>{history.replaceState({},"",`#${l.htmlFor}`),l.click()})}return B("content.tabs.link")&&s.pipe(Ce(1),re(t)).subscribe(([{active:c},{offset:l}])=>{let f=c.innerText.trim();if(c.hasAttribute("data-md-switching"))c.removeAttribute("data-md-switching");else{let u=e.offsetTop-l.y;for(let y of P("[data-tabs]"))for(let L of P(":scope > input",y)){let X=R(`label[for="${L.id}"]`);if(X!==c&&X.innerText.trim()===f){X.setAttribute("data-md-switching",""),L.click();break}}window.scrollTo({top:e.offsetTop-u});let d=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([f,...d])])}}),s.pipe(W(p)).subscribe(()=>{for(let c of P("audio, video",e))c.pause()}),Ya(n).pipe(w(c=>s.next(c)),_(()=>s.complete()),m(c=>$({ref:e},c)))}).pipe(Ke(se))}function zn(e,{viewport$:t,target$:r,print$:o}){return O(...P(".annotate:not(.highlight)",e).map(n=>Pn(n,{target$:r,print$:o})),...P("pre:not(.mermaid) > code",e).map(n=>jn(n,{target$:r,print$:o})),...P("pre.mermaid",e).map(n=>Wn(n)),...P("table:not([class])",e).map(n=>Dn(n)),...P("details",e).map(n=>Fn(n,{target$:r,print$:o})),...P("[data-tabs]",e).map(n=>Nn(n,{viewport$:t,target$:r})),...P("[title]",e).filter(()=>B("content.tooltips")).map(n=>mt(n,{viewport$:t})))}function Ba(e,{alert$:t}){return t.pipe(v(r=>O(I(!0),I(!1).pipe(Ge(2e3))).pipe(m(o=>({message:r,active:o})))))}function qn(e,t){let r=R(".md-typeset",e);return C(()=>{let o=new g;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),Ba(e,t).pipe(w(n=>o.next(n)),_(()=>o.complete()),m(n=>$({ref:e},n)))})}var Ga=0;function Ja(e,t){document.body.append(e);let{width:r}=ce(e);e.style.setProperty("--md-tooltip-width",`${r}px`),e.remove();let o=cr(t),n=typeof o!="undefined"?Ne(o):I({x:0,y:0}),i=O(et(t),Ht(t)).pipe(K());return z([i,n]).pipe(m(([a,s])=>{let{x:p,y:c}=De(t),l=ce(t),f=t.closest("table");return f&&t.parentElement&&(p+=f.offsetLeft+t.parentElement.offsetLeft,c+=f.offsetTop+t.parentElement.offsetTop),{active:a,offset:{x:p-s.x+l.width/2-r/2,y:c-s.y+l.height+8}}}))}function Qn(e){let t=e.title;if(!t.length)return S;let r=`__tooltip_${Ga++}`,o=Pt(r,"inline"),n=R(".md-typeset",o);return n.innerHTML=t,C(()=>{let i=new g;return i.subscribe({next({offset:a}){o.style.setProperty("--md-tooltip-x",`${a.x}px`),o.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){o.style.removeProperty("--md-tooltip-x"),o.style.removeProperty("--md-tooltip-y")}}),O(i.pipe(b(({active:a})=>a)),i.pipe(_e(250),b(({active:a})=>!a))).subscribe({next({active:a}){a?(e.insertAdjacentElement("afterend",o),e.setAttribute("aria-describedby",r),e.removeAttribute("title")):(o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t))},complete(){o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t)}}),i.pipe(Me(16,me)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(pt(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:a})=>a)).subscribe({next(a){a?o.style.setProperty("--md-tooltip-0",`${-a}px`):o.style.removeProperty("--md-tooltip-0")},complete(){o.style.removeProperty("--md-tooltip-0")}}),Ja(o,e).pipe(w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))}).pipe(Ke(se))}function Xa({viewport$:e}){if(!B("header.autohide"))return I(!1);let t=e.pipe(m(({offset:{y:n}})=>n),Be(2,1),m(([n,i])=>[nMath.abs(i-n.y)>100),m(([,[n]])=>n),K()),o=ze("search");return z([e,o]).pipe(m(([{offset:n},i])=>n.y>400&&!i),K(),v(n=>n?r:I(!1)),Q(!1))}function Kn(e,t){return C(()=>z([ge(e),Xa(t)])).pipe(m(([{height:r},o])=>({height:r,hidden:o})),K((r,o)=>r.height===o.height&&r.hidden===o.hidden),G(1))}function Yn(e,{header$:t,main$:r}){return C(()=>{let o=new g,n=o.pipe(Z(),ie(!0));o.pipe(te("active"),He(t)).subscribe(([{active:a},{hidden:s}])=>{e.classList.toggle("md-header--shadow",a&&!s),e.hidden=s});let i=ue(P("[title]",e)).pipe(b(()=>B("content.tooltips")),ne(a=>Qn(a)));return r.subscribe(o),t.pipe(W(n),m(a=>$({ref:e},a)),Re(i.pipe(W(n))))})}function Za(e,{viewport$:t,header$:r}){return mr(e,{viewport$:t,header$:r}).pipe(m(({offset:{y:o}})=>{let{height:n}=ce(e);return{active:o>=n}}),te("active"))}function Bn(e,t){return C(()=>{let r=new g;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=fe(".md-content h1");return typeof o=="undefined"?S:Za(o,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))})}function Gn(e,{viewport$:t,header$:r}){let o=r.pipe(m(({height:i})=>i),K()),n=o.pipe(v(()=>ge(e).pipe(m(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),te("bottom"))));return z([o,n,t]).pipe(m(([i,{top:a,bottom:s},{offset:{y:p},size:{height:c}}])=>(c=Math.max(0,c-Math.max(0,a-p,i)-Math.max(0,c+p-s)),{offset:a-i,height:c,active:a-i<=p})),K((i,a)=>i.offset===a.offset&&i.height===a.height&&i.active===a.active))}function es(e){let t=__md_get("__palette")||{index:e.findIndex(o=>matchMedia(o.getAttribute("data-md-color-media")).matches)},r=Math.max(0,Math.min(t.index,e.length-1));return I(...e).pipe(ne(o=>h(o,"change").pipe(m(()=>o))),Q(e[r]),m(o=>({index:e.indexOf(o),color:{media:o.getAttribute("data-md-color-media"),scheme:o.getAttribute("data-md-color-scheme"),primary:o.getAttribute("data-md-color-primary"),accent:o.getAttribute("data-md-color-accent")}})),G(1))}function Jn(e){let t=P("input",e),r=x("meta",{name:"theme-color"});document.head.appendChild(r);let o=x("meta",{name:"color-scheme"});document.head.appendChild(o);let n=$t("(prefers-color-scheme: light)");return C(()=>{let i=new g;return i.subscribe(a=>{if(document.body.setAttribute("data-md-color-switching",""),a.color.media==="(prefers-color-scheme)"){let s=matchMedia("(prefers-color-scheme: light)"),p=document.querySelector(s.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");a.color.scheme=p.getAttribute("data-md-color-scheme"),a.color.primary=p.getAttribute("data-md-color-primary"),a.color.accent=p.getAttribute("data-md-color-accent")}for(let[s,p]of Object.entries(a.color))document.body.setAttribute(`data-md-color-${s}`,p);for(let s=0;sa.key==="Enter"),re(i,(a,s)=>s)).subscribe(({index:a})=>{a=(a+1)%t.length,t[a].click(),t[a].focus()}),i.pipe(m(()=>{let a=Se("header"),s=window.getComputedStyle(a);return o.content=s.colorScheme,s.backgroundColor.match(/\d+/g).map(p=>(+p).toString(16).padStart(2,"0")).join("")})).subscribe(a=>r.content=`#${a}`),i.pipe(ve(se)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")}),es(t).pipe(W(n.pipe(Ce(1))),ct(),w(a=>i.next(a)),_(()=>i.complete()),m(a=>$({ref:e},a)))})}function Xn(e,{progress$:t}){return C(()=>{let r=new g;return r.subscribe(({value:o})=>{e.style.setProperty("--md-progress-value",`${o}`)}),t.pipe(w(o=>r.next({value:o})),_(()=>r.complete()),m(o=>({ref:e,value:o})))})}var Jr=Lt(Br());function ts(e){e.setAttribute("data-md-copying","");let t=e.closest("[data-copy]"),r=t?t.getAttribute("data-copy"):e.innerText;return e.removeAttribute("data-md-copying"),r.trimEnd()}function Zn({alert$:e}){Jr.default.isSupported()&&new j(t=>{new Jr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||ts(R(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(w(t=>{t.trigger.focus()}),m(()=>Ee("clipboard.copied"))).subscribe(e)}function ei(e,t){return e.protocol=t.protocol,e.hostname=t.hostname,e}function rs(e,t){let r=new Map;for(let o of P("url",e)){let n=R("loc",o),i=[ei(new URL(n.textContent),t)];r.set(`${i[0]}`,i);for(let a of P("[rel=alternate]",o)){let s=a.getAttribute("href");s!=null&&i.push(ei(new URL(s),t))}}return r}function ur(e){return un(new URL("sitemap.xml",e)).pipe(m(t=>rs(t,new URL(e))),de(()=>I(new Map)))}function os(e,t){if(!(e.target instanceof Element))return S;let r=e.target.closest("a");if(r===null)return S;if(r.target||e.metaKey||e.ctrlKey)return S;let o=new URL(r.href);return o.search=o.hash="",t.has(`${o}`)?(e.preventDefault(),I(new URL(r.href))):S}function ti(e){let t=new Map;for(let r of P(":scope > *",e.head))t.set(r.outerHTML,r);return t}function ri(e){for(let t of P("[href], [src]",e))for(let r of["href","src"]){let o=t.getAttribute(r);if(o&&!/^(?:[a-z]+:)?\/\//i.test(o)){t[r]=t[r];break}}return I(e)}function ns(e){for(let o of["[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...B("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let n=fe(o),i=fe(o,e);typeof n!="undefined"&&typeof i!="undefined"&&n.replaceWith(i)}let t=ti(document);for(let[o,n]of ti(e))t.has(o)?t.delete(o):document.head.appendChild(n);for(let o of t.values()){let n=o.getAttribute("name");n!=="theme-color"&&n!=="color-scheme"&&o.remove()}let r=Se("container");return We(P("script",r)).pipe(v(o=>{let n=e.createElement("script");if(o.src){for(let i of o.getAttributeNames())n.setAttribute(i,o.getAttribute(i));return o.replaceWith(n),new j(i=>{n.onload=()=>i.complete()})}else return n.textContent=o.textContent,o.replaceWith(n),S}),Z(),ie(document))}function oi({location$:e,viewport$:t,progress$:r}){let o=xe();if(location.protocol==="file:")return S;let n=ur(o.base);I(document).subscribe(ri);let i=h(document.body,"click").pipe(He(n),v(([p,c])=>os(p,c)),pe()),a=h(window,"popstate").pipe(m(ye),pe());i.pipe(re(t)).subscribe(([p,{offset:c}])=>{history.replaceState(c,""),history.pushState(null,"",p)}),O(i,a).subscribe(e);let s=e.pipe(te("pathname"),v(p=>fn(p,{progress$:r}).pipe(de(()=>(lt(p,!0),S)))),v(ri),v(ns),pe());return O(s.pipe(re(e,(p,c)=>c)),s.pipe(v(()=>e),te("hash")),e.pipe(K((p,c)=>p.pathname===c.pathname&&p.hash===c.hash),v(()=>i),w(()=>history.back()))).subscribe(p=>{var c,l;history.state!==null||!p.hash?window.scrollTo(0,(l=(c=history.state)==null?void 0:c.y)!=null?l:0):(history.scrollRestoration="auto",pn(p.hash),history.scrollRestoration="manual")}),e.subscribe(()=>{history.scrollRestoration="manual"}),h(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),t.pipe(te("offset"),_e(100)).subscribe(({offset:p})=>{history.replaceState(p,"")}),s}var ni=Lt(qr());function ii(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,a)=>`${i}${a}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return a=>(0,ni.default)(a).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function It(e){return e.type===1}function dr(e){return e.type===3}function ai(e,t){let r=yn(e);return O(I(location.protocol!=="file:"),ze("search")).pipe(Ae(o=>o),v(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:B("search.suggest")}}})),r}function si(e){var l;let{selectedVersionSitemap:t,selectedVersionBaseURL:r,currentLocation:o,currentBaseURL:n}=e,i=(l=Xr(n))==null?void 0:l.pathname;if(i===void 0)return;let a=ss(o.pathname,i);if(a===void 0)return;let s=ps(t.keys());if(!t.has(s))return;let p=Xr(a,s);if(!p||!t.has(p.href))return;let c=Xr(a,r);if(c)return c.hash=o.hash,c.search=o.search,c}function Xr(e,t){try{return new URL(e,t)}catch(r){return}}function ss(e,t){if(e.startsWith(t))return e.slice(t.length)}function cs(e,t){let r=Math.min(e.length,t.length),o;for(o=0;oS)),o=r.pipe(m(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:a,aliases:s})=>a===i||s.includes(i))||n[0]}));r.pipe(m(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),v(n=>h(document.body,"click").pipe(b(i=>!i.metaKey&&!i.ctrlKey),re(o),v(([i,a])=>{if(i.target instanceof Element){let s=i.target.closest("a");if(s&&!s.target&&n.has(s.href)){let p=s.href;return!i.target.closest(".md-version")&&n.get(p)===a?S:(i.preventDefault(),I(new URL(p)))}}return S}),v(i=>ur(i).pipe(m(a=>{var s;return(s=si({selectedVersionSitemap:a,selectedVersionBaseURL:i,currentLocation:ye(),currentBaseURL:t.base}))!=null?s:i})))))).subscribe(n=>lt(n,!0)),z([r,o]).subscribe(([n,i])=>{R(".md-header__topic").appendChild(Cn(n,i))}),e.pipe(v(()=>o)).subscribe(n=>{var s;let i=new URL(t.base),a=__md_get("__outdated",sessionStorage,i);if(a===null){a=!0;let p=((s=t.version)==null?void 0:s.default)||"latest";Array.isArray(p)||(p=[p]);e:for(let c of p)for(let l of n.aliases.concat(n.version))if(new RegExp(c,"i").test(l)){a=!1;break e}__md_set("__outdated",a,sessionStorage,i)}if(a)for(let p of ae("outdated"))p.hidden=!1})}function ls(e,{worker$:t}){let{searchParams:r}=ye();r.has("q")&&(Je("search",!0),e.value=r.get("q"),e.focus(),ze("search").pipe(Ae(i=>!i)).subscribe(()=>{let i=ye();i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=et(e),n=O(t.pipe(Ae(It)),h(e,"keyup"),o).pipe(m(()=>e.value),K());return z([n,o]).pipe(m(([i,a])=>({value:i,focus:a})),G(1))}function pi(e,{worker$:t}){let r=new g,o=r.pipe(Z(),ie(!0));z([t.pipe(Ae(It)),r],(i,a)=>a).pipe(te("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(te("focus")).subscribe(({focus:i})=>{i&&Je("search",i)}),h(e.form,"reset").pipe(W(o)).subscribe(()=>e.focus());let n=R("header [for=__search]");return h(n,"click").subscribe(()=>e.focus()),ls(e,{worker$:t}).pipe(w(i=>r.next(i)),_(()=>r.complete()),m(i=>$({ref:e},i)),G(1))}function li(e,{worker$:t,query$:r}){let o=new g,n=on(e.parentElement).pipe(b(Boolean)),i=e.parentElement,a=R(":scope > :first-child",e),s=R(":scope > :last-child",e);ze("search").subscribe(l=>s.setAttribute("role",l?"list":"presentation")),o.pipe(re(r),Wr(t.pipe(Ae(It)))).subscribe(([{items:l},{value:f}])=>{switch(l.length){case 0:a.textContent=f.length?Ee("search.result.none"):Ee("search.result.placeholder");break;case 1:a.textContent=Ee("search.result.one");break;default:let u=sr(l.length);a.textContent=Ee("search.result.other",u)}});let p=o.pipe(w(()=>s.innerHTML=""),v(({items:l})=>O(I(...l.slice(0,10)),I(...l.slice(10)).pipe(Be(4),Dr(n),v(([f])=>f)))),m(Mn),pe());return p.subscribe(l=>s.appendChild(l)),p.pipe(ne(l=>{let f=fe("details",l);return typeof f=="undefined"?S:h(f,"toggle").pipe(W(o),m(()=>f))})).subscribe(l=>{l.open===!1&&l.offsetTop<=i.scrollTop&&i.scrollTo({top:l.offsetTop})}),t.pipe(b(dr),m(({data:l})=>l)).pipe(w(l=>o.next(l)),_(()=>o.complete()),m(l=>$({ref:e},l)))}function ms(e,{query$:t}){return t.pipe(m(({value:r})=>{let o=ye();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function mi(e,t){let r=new g,o=r.pipe(Z(),ie(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),h(e,"click").pipe(W(o)).subscribe(n=>n.preventDefault()),ms(e,t).pipe(w(n=>r.next(n)),_(()=>r.complete()),m(n=>$({ref:e},n)))}function fi(e,{worker$:t,keyboard$:r}){let o=new g,n=Se("search-query"),i=O(h(n,"keydown"),h(n,"focus")).pipe(ve(se),m(()=>n.value),K());return o.pipe(He(i),m(([{suggest:s},p])=>{let c=p.split(/([\s-]+)/);if(s!=null&&s.length&&c[c.length-1]){let l=s[s.length-1];l.startsWith(c[c.length-1])&&(c[c.length-1]=l)}else c.length=0;return c})).subscribe(s=>e.innerHTML=s.join("").replace(/\s/g," ")),r.pipe(b(({mode:s})=>s==="search")).subscribe(s=>{switch(s.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(b(dr),m(({data:s})=>s)).pipe(w(s=>o.next(s)),_(()=>o.complete()),m(()=>({ref:e})))}function ui(e,{index$:t,keyboard$:r}){let o=xe();try{let n=ai(o.search,t),i=Se("search-query",e),a=Se("search-result",e);h(e,"click").pipe(b(({target:p})=>p instanceof Element&&!!p.closest("a"))).subscribe(()=>Je("search",!1)),r.pipe(b(({mode:p})=>p==="search")).subscribe(p=>{let c=Ie();switch(p.type){case"Enter":if(c===i){let l=new Map;for(let f of P(":first-child [href]",a)){let u=f.firstElementChild;l.set(f,parseFloat(u.getAttribute("data-md-score")))}if(l.size){let[[f]]=[...l].sort(([,u],[,d])=>d-u);f.click()}p.claim()}break;case"Escape":case"Tab":Je("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof c=="undefined")i.focus();else{let l=[i,...P(":not(details) > [href], summary, details[open] [href]",a)],f=Math.max(0,(Math.max(0,l.indexOf(c))+l.length+(p.type==="ArrowUp"?-1:1))%l.length);l[f].focus()}p.claim();break;default:i!==Ie()&&i.focus()}}),r.pipe(b(({mode:p})=>p==="global")).subscribe(p=>{switch(p.type){case"f":case"s":case"/":i.focus(),i.select(),p.claim();break}});let s=pi(i,{worker$:n});return O(s,li(a,{worker$:n,query$:s})).pipe(Re(...ae("search-share",e).map(p=>mi(p,{query$:s})),...ae("search-suggest",e).map(p=>fi(p,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,Ye}}function di(e,{index$:t,location$:r}){return z([t,r.pipe(Q(ye()),b(o=>!!o.searchParams.get("h")))]).pipe(m(([o,n])=>ii(o.config)(n.searchParams.get("h"))),m(o=>{var a;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let s=i.nextNode();s;s=i.nextNode())if((a=s.parentElement)!=null&&a.offsetHeight){let p=s.textContent,c=o(p);c.length>p.length&&n.set(s,c)}for(let[s,p]of n){let{childNodes:c}=x("span",null,p);s.replaceWith(...Array.from(c))}return{ref:e,nodes:n}}))}function fs(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return z([r,t]).pipe(m(([{offset:i,height:a},{offset:{y:s}}])=>(a=a+Math.min(n,Math.max(0,s-i))-n,{height:a,locked:s>=i+n})),K((i,a)=>i.height===a.height&&i.locked===a.locked))}function Zr(e,o){var n=o,{header$:t}=n,r=so(n,["header$"]);let i=R(".md-sidebar__scrollwrap",e),{y:a}=De(i);return C(()=>{let s=new g,p=s.pipe(Z(),ie(!0)),c=s.pipe(Me(0,me));return c.pipe(re(t)).subscribe({next([{height:l},{height:f}]){i.style.height=`${l-2*a}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),c.pipe(Ae()).subscribe(()=>{for(let l of P(".md-nav__link--active[href]",e)){if(!l.clientHeight)continue;let f=l.closest(".md-sidebar__scrollwrap");if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2})}}}),ue(P("label[tabindex]",e)).pipe(ne(l=>h(l,"click").pipe(ve(se),m(()=>l),W(p)))).subscribe(l=>{let f=R(`[id="${l.htmlFor}"]`);R(`[aria-labelledby="${l.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),fs(e,r).pipe(w(l=>s.next(l)),_(()=>s.complete()),m(l=>$({ref:e},l)))})}function hi(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return st(je(`${r}/releases/latest`).pipe(de(()=>S),m(o=>({version:o.tag_name})),Ve({})),je(r).pipe(de(()=>S),m(o=>({stars:o.stargazers_count,forks:o.forks_count})),Ve({}))).pipe(m(([o,n])=>$($({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return je(r).pipe(m(o=>({repositories:o.public_repos})),Ve({}))}}function bi(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return st(je(`${r}/releases/permalink/latest`).pipe(de(()=>S),m(({tag_name:o})=>({version:o})),Ve({})),je(r).pipe(de(()=>S),m(({star_count:o,forks_count:n})=>({stars:o,forks:n})),Ve({}))).pipe(m(([o,n])=>$($({},o),n)))}function vi(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return hi(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return bi(r,o)}return S}var us;function ds(e){return us||(us=C(()=>{let t=__md_get("__source",sessionStorage);if(t)return I(t);if(ae("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return S}return vi(e.href).pipe(w(o=>__md_set("__source",o,sessionStorage)))}).pipe(de(()=>S),b(t=>Object.keys(t).length>0),m(t=>({facts:t})),G(1)))}function gi(e){let t=R(":scope > :last-child",e);return C(()=>{let r=new g;return r.subscribe(({facts:o})=>{t.appendChild(_n(o)),t.classList.add("md-source__repository--active")}),ds(e).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function hs(e,{viewport$:t,header$:r}){return ge(document.body).pipe(v(()=>mr(e,{header$:r,viewport$:t})),m(({offset:{y:o}})=>({hidden:o>=10})),te("hidden"))}function yi(e,t){return C(()=>{let r=new g;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(B("navigation.tabs.sticky")?I({hidden:!1}):hs(e,t)).pipe(w(o=>r.next(o)),_(()=>r.complete()),m(o=>$({ref:e},o)))})}function bs(e,{viewport$:t,header$:r}){let o=new Map,n=P(".md-nav__link",e);for(let s of n){let p=decodeURIComponent(s.hash.substring(1)),c=fe(`[id="${p}"]`);typeof c!="undefined"&&o.set(s,c)}let i=r.pipe(te("height"),m(({height:s})=>{let p=Se("main"),c=R(":scope > :first-child",p);return s+.8*(c.offsetTop-p.offsetTop)}),pe());return ge(document.body).pipe(te("height"),v(s=>C(()=>{let p=[];return I([...o].reduce((c,[l,f])=>{for(;p.length&&o.get(p[p.length-1]).tagName>=f.tagName;)p.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let d=f.offsetParent;for(;d;d=d.offsetParent)u+=d.offsetTop;return c.set([...p=[...p,l]].reverse(),u)},new Map))}).pipe(m(p=>new Map([...p].sort(([,c],[,l])=>c-l))),He(i),v(([p,c])=>t.pipe(Fr(([l,f],{offset:{y:u},size:d})=>{let y=u+d.height>=Math.floor(s.height);for(;f.length;){let[,L]=f[0];if(L-c=u&&!y)f=[l.pop(),...f];else break}return[l,f]},[[],[...p]]),K((l,f)=>l[0]===f[0]&&l[1]===f[1])))))).pipe(m(([s,p])=>({prev:s.map(([c])=>c),next:p.map(([c])=>c)})),Q({prev:[],next:[]}),Be(2,1),m(([s,p])=>s.prev.length{let i=new g,a=i.pipe(Z(),ie(!0));if(i.subscribe(({prev:s,next:p})=>{for(let[c]of p)c.classList.remove("md-nav__link--passed"),c.classList.remove("md-nav__link--active");for(let[c,[l]]of s.entries())l.classList.add("md-nav__link--passed"),l.classList.toggle("md-nav__link--active",c===s.length-1)}),B("toc.follow")){let s=O(t.pipe(_e(1),m(()=>{})),t.pipe(_e(250),m(()=>"smooth")));i.pipe(b(({prev:p})=>p.length>0),He(o.pipe(ve(se))),re(s)).subscribe(([[{prev:p}],c])=>{let[l]=p[p.length-1];if(l.offsetHeight){let f=cr(l);if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=ce(f);f.scrollTo({top:u-d/2,behavior:c})}}})}return B("navigation.tracking")&&t.pipe(W(a),te("offset"),_e(250),Ce(1),W(n.pipe(Ce(1))),ct({delay:250}),re(i)).subscribe(([,{prev:s}])=>{let p=ye(),c=s[s.length-1];if(c&&c.length){let[l]=c,{hash:f}=new URL(l.href);p.hash!==f&&(p.hash=f,history.replaceState({},"",`${p}`))}else p.hash="",history.replaceState({},"",`${p}`)}),bs(e,{viewport$:t,header$:r}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))})}function vs(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(m(({offset:{y:a}})=>a),Be(2,1),m(([a,s])=>a>s&&s>0),K()),i=r.pipe(m(({active:a})=>a));return z([i,n]).pipe(m(([a,s])=>!(a&&s)),K(),W(o.pipe(Ce(1))),ie(!0),ct({delay:250}),m(a=>({hidden:a})))}function Ei(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new g,a=i.pipe(Z(),ie(!0));return i.subscribe({next({hidden:s}){e.hidden=s,s?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(W(a),te("height")).subscribe(({height:s})=>{e.style.top=`${s+16}px`}),h(e,"click").subscribe(s=>{s.preventDefault(),window.scrollTo({top:0})}),vs(e,{viewport$:t,main$:o,target$:n}).pipe(w(s=>i.next(s)),_(()=>i.complete()),m(s=>$({ref:e},s)))}function wi({document$:e,viewport$:t}){e.pipe(v(()=>P(".md-ellipsis")),ne(r=>tt(r).pipe(W(e.pipe(Ce(1))),b(o=>o),m(()=>r),Te(1))),b(r=>r.offsetWidth{let o=r.innerText,n=r.closest("a")||r;return n.title=o,B("content.tooltips")?mt(n,{viewport$:t}).pipe(W(e.pipe(Ce(1))),_(()=>n.removeAttribute("title"))):S})).subscribe(),B("content.tooltips")&&e.pipe(v(()=>P(".md-status")),ne(r=>mt(r,{viewport$:t}))).subscribe()}function Ti({document$:e,tablet$:t}){e.pipe(v(()=>P(".md-toggle--indeterminate")),w(r=>{r.indeterminate=!0,r.checked=!1}),ne(r=>h(r,"change").pipe(Vr(()=>r.classList.contains("md-toggle--indeterminate")),m(()=>r))),re(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function gs(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function Si({document$:e}){e.pipe(v(()=>P("[data-md-scrollfix]")),w(t=>t.removeAttribute("data-md-scrollfix")),b(gs),ne(t=>h(t,"touchstart").pipe(m(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function Oi({viewport$:e,tablet$:t}){z([ze("search"),t]).pipe(m(([r,o])=>r&&!o),v(r=>I(r).pipe(Ge(r?400:100))),re(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function ys(){return location.protocol==="file:"?wt(`${new URL("search/search_index.js",eo.base)}`).pipe(m(()=>__index),G(1)):je(new URL("search/search_index.json",eo.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var ot=Go(),Ft=sn(),Ot=ln(Ft),to=an(),Oe=gn(),hr=$t("(min-width: 960px)"),Mi=$t("(min-width: 1220px)"),_i=mn(),eo=xe(),Ai=document.forms.namedItem("search")?ys():Ye,ro=new g;Zn({alert$:ro});var oo=new g;B("navigation.instant")&&oi({location$:Ft,viewport$:Oe,progress$:oo}).subscribe(ot);var Li;((Li=eo.version)==null?void 0:Li.provider)==="mike"&&ci({document$:ot});O(Ft,Ot).pipe(Ge(125)).subscribe(()=>{Je("drawer",!1),Je("search",!1)});to.pipe(b(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=fe("link[rel=prev]");typeof t!="undefined"&<(t);break;case"n":case".":let r=fe("link[rel=next]");typeof r!="undefined"&<(r);break;case"Enter":let o=Ie();o instanceof HTMLLabelElement&&o.click()}});wi({viewport$:Oe,document$:ot});Ti({document$:ot,tablet$:hr});Si({document$:ot});Oi({viewport$:Oe,tablet$:hr});var rt=Kn(Se("header"),{viewport$:Oe}),jt=ot.pipe(m(()=>Se("main")),v(e=>Gn(e,{viewport$:Oe,header$:rt})),G(1)),xs=O(...ae("consent").map(e=>En(e,{target$:Ot})),...ae("dialog").map(e=>qn(e,{alert$:ro})),...ae("palette").map(e=>Jn(e)),...ae("progress").map(e=>Xn(e,{progress$:oo})),...ae("search").map(e=>ui(e,{index$:Ai,keyboard$:to})),...ae("source").map(e=>gi(e))),Es=C(()=>O(...ae("announce").map(e=>xn(e)),...ae("content").map(e=>zn(e,{viewport$:Oe,target$:Ot,print$:_i})),...ae("content").map(e=>B("search.highlight")?di(e,{index$:Ai,location$:Ft}):S),...ae("header").map(e=>Yn(e,{viewport$:Oe,header$:rt,main$:jt})),...ae("header-title").map(e=>Bn(e,{viewport$:Oe,header$:rt})),...ae("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?Nr(Mi,()=>Zr(e,{viewport$:Oe,header$:rt,main$:jt})):Nr(hr,()=>Zr(e,{viewport$:Oe,header$:rt,main$:jt}))),...ae("tabs").map(e=>yi(e,{viewport$:Oe,header$:rt})),...ae("toc").map(e=>xi(e,{viewport$:Oe,header$:rt,main$:jt,target$:Ot})),...ae("top").map(e=>Ei(e,{viewport$:Oe,header$:rt,main$:jt,target$:Ot})))),Ci=ot.pipe(v(()=>Es),Re(xs),G(1));Ci.subscribe();window.document$=ot;window.location$=Ft;window.target$=Ot;window.keyboard$=to;window.viewport$=Oe;window.tablet$=hr;window.screen$=Mi;window.print$=_i;window.alert$=ro;window.progress$=oo;window.component$=Ci;})(); -//# sourceMappingURL=bundle.c8b220af.min.js.map - diff --git a/site/assets/javascripts/bundle.c8b220af.min.js.map b/site/assets/javascripts/bundle.c8b220af.min.js.map deleted file mode 100644 index d835be56..00000000 --- a/site/assets/javascripts/bundle.c8b220af.min.js.map +++ /dev/null @@ -1,7 +0,0 @@ -{ - "version": 3, - "sources": ["node_modules/focus-visible/dist/focus-visible.js", "node_modules/escape-html/index.js", "node_modules/clipboard/dist/clipboard.js", "src/templates/assets/javascripts/bundle.ts", "node_modules/tslib/tslib.es6.mjs", "node_modules/rxjs/src/internal/util/isFunction.ts", "node_modules/rxjs/src/internal/util/createErrorClass.ts", "node_modules/rxjs/src/internal/util/UnsubscriptionError.ts", "node_modules/rxjs/src/internal/util/arrRemove.ts", "node_modules/rxjs/src/internal/Subscription.ts", "node_modules/rxjs/src/internal/config.ts", "node_modules/rxjs/src/internal/scheduler/timeoutProvider.ts", "node_modules/rxjs/src/internal/util/reportUnhandledError.ts", "node_modules/rxjs/src/internal/util/noop.ts", "node_modules/rxjs/src/internal/NotificationFactories.ts", "node_modules/rxjs/src/internal/util/errorContext.ts", "node_modules/rxjs/src/internal/Subscriber.ts", "node_modules/rxjs/src/internal/symbol/observable.ts", "node_modules/rxjs/src/internal/util/identity.ts", "node_modules/rxjs/src/internal/util/pipe.ts", "node_modules/rxjs/src/internal/Observable.ts", "node_modules/rxjs/src/internal/util/lift.ts", "node_modules/rxjs/src/internal/operators/OperatorSubscriber.ts", "node_modules/rxjs/src/internal/scheduler/animationFrameProvider.ts", "node_modules/rxjs/src/internal/util/ObjectUnsubscribedError.ts", "node_modules/rxjs/src/internal/Subject.ts", "node_modules/rxjs/src/internal/BehaviorSubject.ts", "node_modules/rxjs/src/internal/scheduler/dateTimestampProvider.ts", "node_modules/rxjs/src/internal/ReplaySubject.ts", "node_modules/rxjs/src/internal/scheduler/Action.ts", "node_modules/rxjs/src/internal/scheduler/intervalProvider.ts", "node_modules/rxjs/src/internal/scheduler/AsyncAction.ts", "node_modules/rxjs/src/internal/Scheduler.ts", "node_modules/rxjs/src/internal/scheduler/AsyncScheduler.ts", "node_modules/rxjs/src/internal/scheduler/async.ts", "node_modules/rxjs/src/internal/scheduler/QueueAction.ts", "node_modules/rxjs/src/internal/scheduler/QueueScheduler.ts", "node_modules/rxjs/src/internal/scheduler/queue.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameAction.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameScheduler.ts", "node_modules/rxjs/src/internal/scheduler/animationFrame.ts", "node_modules/rxjs/src/internal/observable/empty.ts", "node_modules/rxjs/src/internal/util/isScheduler.ts", "node_modules/rxjs/src/internal/util/args.ts", "node_modules/rxjs/src/internal/util/isArrayLike.ts", "node_modules/rxjs/src/internal/util/isPromise.ts", "node_modules/rxjs/src/internal/util/isInteropObservable.ts", "node_modules/rxjs/src/internal/util/isAsyncIterable.ts", "node_modules/rxjs/src/internal/util/throwUnobservableError.ts", "node_modules/rxjs/src/internal/symbol/iterator.ts", "node_modules/rxjs/src/internal/util/isIterable.ts", "node_modules/rxjs/src/internal/util/isReadableStreamLike.ts", "node_modules/rxjs/src/internal/observable/innerFrom.ts", "node_modules/rxjs/src/internal/util/executeSchedule.ts", "node_modules/rxjs/src/internal/operators/observeOn.ts", "node_modules/rxjs/src/internal/operators/subscribeOn.ts", "node_modules/rxjs/src/internal/scheduled/scheduleObservable.ts", "node_modules/rxjs/src/internal/scheduled/schedulePromise.ts", "node_modules/rxjs/src/internal/scheduled/scheduleArray.ts", "node_modules/rxjs/src/internal/scheduled/scheduleIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleAsyncIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleReadableStreamLike.ts", "node_modules/rxjs/src/internal/scheduled/scheduled.ts", "node_modules/rxjs/src/internal/observable/from.ts", "node_modules/rxjs/src/internal/observable/of.ts", "node_modules/rxjs/src/internal/observable/throwError.ts", "node_modules/rxjs/src/internal/util/EmptyError.ts", "node_modules/rxjs/src/internal/util/isDate.ts", "node_modules/rxjs/src/internal/operators/map.ts", "node_modules/rxjs/src/internal/util/mapOneOrManyArgs.ts", "node_modules/rxjs/src/internal/util/argsArgArrayOrObject.ts", "node_modules/rxjs/src/internal/util/createObject.ts", "node_modules/rxjs/src/internal/observable/combineLatest.ts", "node_modules/rxjs/src/internal/operators/mergeInternals.ts", "node_modules/rxjs/src/internal/operators/mergeMap.ts", "node_modules/rxjs/src/internal/operators/mergeAll.ts", "node_modules/rxjs/src/internal/operators/concatAll.ts", "node_modules/rxjs/src/internal/observable/concat.ts", "node_modules/rxjs/src/internal/observable/defer.ts", "node_modules/rxjs/src/internal/observable/fromEvent.ts", "node_modules/rxjs/src/internal/observable/fromEventPattern.ts", "node_modules/rxjs/src/internal/observable/timer.ts", "node_modules/rxjs/src/internal/observable/merge.ts", "node_modules/rxjs/src/internal/observable/never.ts", "node_modules/rxjs/src/internal/util/argsOrArgArray.ts", "node_modules/rxjs/src/internal/operators/filter.ts", "node_modules/rxjs/src/internal/observable/zip.ts", "node_modules/rxjs/src/internal/operators/audit.ts", "node_modules/rxjs/src/internal/operators/auditTime.ts", "node_modules/rxjs/src/internal/operators/bufferCount.ts", "node_modules/rxjs/src/internal/operators/catchError.ts", "node_modules/rxjs/src/internal/operators/scanInternals.ts", "node_modules/rxjs/src/internal/operators/combineLatest.ts", "node_modules/rxjs/src/internal/operators/combineLatestWith.ts", "node_modules/rxjs/src/internal/operators/debounce.ts", "node_modules/rxjs/src/internal/operators/debounceTime.ts", "node_modules/rxjs/src/internal/operators/defaultIfEmpty.ts", "node_modules/rxjs/src/internal/operators/take.ts", "node_modules/rxjs/src/internal/operators/ignoreElements.ts", "node_modules/rxjs/src/internal/operators/mapTo.ts", "node_modules/rxjs/src/internal/operators/delayWhen.ts", "node_modules/rxjs/src/internal/operators/delay.ts", "node_modules/rxjs/src/internal/operators/distinctUntilChanged.ts", "node_modules/rxjs/src/internal/operators/distinctUntilKeyChanged.ts", "node_modules/rxjs/src/internal/operators/throwIfEmpty.ts", "node_modules/rxjs/src/internal/operators/endWith.ts", "node_modules/rxjs/src/internal/operators/finalize.ts", "node_modules/rxjs/src/internal/operators/first.ts", "node_modules/rxjs/src/internal/operators/takeLast.ts", "node_modules/rxjs/src/internal/operators/merge.ts", "node_modules/rxjs/src/internal/operators/mergeWith.ts", "node_modules/rxjs/src/internal/operators/repeat.ts", "node_modules/rxjs/src/internal/operators/scan.ts", "node_modules/rxjs/src/internal/operators/share.ts", "node_modules/rxjs/src/internal/operators/shareReplay.ts", "node_modules/rxjs/src/internal/operators/skip.ts", "node_modules/rxjs/src/internal/operators/skipUntil.ts", "node_modules/rxjs/src/internal/operators/startWith.ts", "node_modules/rxjs/src/internal/operators/switchMap.ts", "node_modules/rxjs/src/internal/operators/takeUntil.ts", "node_modules/rxjs/src/internal/operators/takeWhile.ts", "node_modules/rxjs/src/internal/operators/tap.ts", "node_modules/rxjs/src/internal/operators/throttle.ts", "node_modules/rxjs/src/internal/operators/throttleTime.ts", "node_modules/rxjs/src/internal/operators/withLatestFrom.ts", "node_modules/rxjs/src/internal/operators/zip.ts", "node_modules/rxjs/src/internal/operators/zipWith.ts", "src/templates/assets/javascripts/browser/document/index.ts", "src/templates/assets/javascripts/browser/element/_/index.ts", "src/templates/assets/javascripts/browser/element/focus/index.ts", "src/templates/assets/javascripts/browser/element/hover/index.ts", "src/templates/assets/javascripts/utilities/h/index.ts", "src/templates/assets/javascripts/utilities/round/index.ts", "src/templates/assets/javascripts/browser/script/index.ts", "src/templates/assets/javascripts/browser/element/size/_/index.ts", "src/templates/assets/javascripts/browser/element/size/content/index.ts", "src/templates/assets/javascripts/browser/element/offset/_/index.ts", "src/templates/assets/javascripts/browser/element/offset/content/index.ts", "src/templates/assets/javascripts/browser/element/visibility/index.ts", "src/templates/assets/javascripts/browser/toggle/index.ts", "src/templates/assets/javascripts/browser/keyboard/index.ts", "src/templates/assets/javascripts/browser/location/_/index.ts", "src/templates/assets/javascripts/browser/location/hash/index.ts", "src/templates/assets/javascripts/browser/media/index.ts", "src/templates/assets/javascripts/browser/request/index.ts", "src/templates/assets/javascripts/browser/viewport/offset/index.ts", "src/templates/assets/javascripts/browser/viewport/size/index.ts", "src/templates/assets/javascripts/browser/viewport/_/index.ts", "src/templates/assets/javascripts/browser/viewport/at/index.ts", "src/templates/assets/javascripts/browser/worker/index.ts", "src/templates/assets/javascripts/_/index.ts", "src/templates/assets/javascripts/components/_/index.ts", "src/templates/assets/javascripts/components/announce/index.ts", "src/templates/assets/javascripts/components/consent/index.ts", "src/templates/assets/javascripts/templates/tooltip/index.tsx", "src/templates/assets/javascripts/templates/annotation/index.tsx", "src/templates/assets/javascripts/templates/clipboard/index.tsx", "src/templates/assets/javascripts/templates/search/index.tsx", "src/templates/assets/javascripts/templates/source/index.tsx", "src/templates/assets/javascripts/templates/tabbed/index.tsx", "src/templates/assets/javascripts/templates/table/index.tsx", "src/templates/assets/javascripts/templates/version/index.tsx", "src/templates/assets/javascripts/components/tooltip2/index.ts", "src/templates/assets/javascripts/components/content/annotation/_/index.ts", "src/templates/assets/javascripts/components/content/annotation/list/index.ts", "src/templates/assets/javascripts/components/content/annotation/block/index.ts", "src/templates/assets/javascripts/components/content/code/_/index.ts", "src/templates/assets/javascripts/components/content/details/index.ts", "src/templates/assets/javascripts/components/content/mermaid/index.css", "src/templates/assets/javascripts/components/content/mermaid/index.ts", "src/templates/assets/javascripts/components/content/table/index.ts", "src/templates/assets/javascripts/components/content/tabs/index.ts", "src/templates/assets/javascripts/components/content/_/index.ts", "src/templates/assets/javascripts/components/dialog/index.ts", "src/templates/assets/javascripts/components/tooltip/index.ts", "src/templates/assets/javascripts/components/header/_/index.ts", "src/templates/assets/javascripts/components/header/title/index.ts", "src/templates/assets/javascripts/components/main/index.ts", "src/templates/assets/javascripts/components/palette/index.ts", "src/templates/assets/javascripts/components/progress/index.ts", "src/templates/assets/javascripts/integrations/clipboard/index.ts", "src/templates/assets/javascripts/integrations/sitemap/index.ts", "src/templates/assets/javascripts/integrations/instant/index.ts", "src/templates/assets/javascripts/integrations/search/highlighter/index.ts", "src/templates/assets/javascripts/integrations/search/worker/message/index.ts", "src/templates/assets/javascripts/integrations/search/worker/_/index.ts", "src/templates/assets/javascripts/integrations/version/findurl/index.ts", "src/templates/assets/javascripts/integrations/version/index.ts", "src/templates/assets/javascripts/components/search/query/index.ts", "src/templates/assets/javascripts/components/search/result/index.ts", "src/templates/assets/javascripts/components/search/share/index.ts", "src/templates/assets/javascripts/components/search/suggest/index.ts", "src/templates/assets/javascripts/components/search/_/index.ts", "src/templates/assets/javascripts/components/search/highlight/index.ts", "src/templates/assets/javascripts/components/sidebar/index.ts", "src/templates/assets/javascripts/components/source/facts/github/index.ts", "src/templates/assets/javascripts/components/source/facts/gitlab/index.ts", "src/templates/assets/javascripts/components/source/facts/_/index.ts", "src/templates/assets/javascripts/components/source/_/index.ts", "src/templates/assets/javascripts/components/tabs/index.ts", "src/templates/assets/javascripts/components/toc/index.ts", "src/templates/assets/javascripts/components/top/index.ts", "src/templates/assets/javascripts/patches/ellipsis/index.ts", "src/templates/assets/javascripts/patches/indeterminate/index.ts", "src/templates/assets/javascripts/patches/scrollfix/index.ts", "src/templates/assets/javascripts/patches/scrolllock/index.ts", "src/templates/assets/javascripts/polyfills/index.ts"], - "sourcesContent": ["(function (global, factory) {\n typeof exports === 'object' && typeof module !== 'undefined' ? factory() :\n typeof define === 'function' && define.amd ? define(factory) :\n (factory());\n}(this, (function () { 'use strict';\n\n /**\n * Applies the :focus-visible polyfill at the given scope.\n * A scope in this case is either the top-level Document or a Shadow Root.\n *\n * @param {(Document|ShadowRoot)} scope\n * @see https://github.com/WICG/focus-visible\n */\n function applyFocusVisiblePolyfill(scope) {\n var hadKeyboardEvent = true;\n var hadFocusVisibleRecently = false;\n var hadFocusVisibleRecentlyTimeout = null;\n\n var inputTypesAllowlist = {\n text: true,\n search: true,\n url: true,\n tel: true,\n email: true,\n password: true,\n number: true,\n date: true,\n month: true,\n week: true,\n time: true,\n datetime: true,\n 'datetime-local': true\n };\n\n /**\n * Helper function for legacy browsers and iframes which sometimes focus\n * elements like document, body, and non-interactive SVG.\n * @param {Element} el\n */\n function isValidFocusTarget(el) {\n if (\n el &&\n el !== document &&\n el.nodeName !== 'HTML' &&\n el.nodeName !== 'BODY' &&\n 'classList' in el &&\n 'contains' in el.classList\n ) {\n return true;\n }\n return false;\n }\n\n /**\n * Computes whether the given element should automatically trigger the\n * `focus-visible` class being added, i.e. whether it should always match\n * `:focus-visible` when focused.\n * @param {Element} el\n * @return {boolean}\n */\n function focusTriggersKeyboardModality(el) {\n var type = el.type;\n var tagName = el.tagName;\n\n if (tagName === 'INPUT' && inputTypesAllowlist[type] && !el.readOnly) {\n return true;\n }\n\n if (tagName === 'TEXTAREA' && !el.readOnly) {\n return true;\n }\n\n if (el.isContentEditable) {\n return true;\n }\n\n return false;\n }\n\n /**\n * Add the `focus-visible` class to the given element if it was not added by\n * the author.\n * @param {Element} el\n */\n function addFocusVisibleClass(el) {\n if (el.classList.contains('focus-visible')) {\n return;\n }\n el.classList.add('focus-visible');\n el.setAttribute('data-focus-visible-added', '');\n }\n\n /**\n * Remove the `focus-visible` class from the given element if it was not\n * originally added by the author.\n * @param {Element} el\n */\n function removeFocusVisibleClass(el) {\n if (!el.hasAttribute('data-focus-visible-added')) {\n return;\n }\n el.classList.remove('focus-visible');\n el.removeAttribute('data-focus-visible-added');\n }\n\n /**\n * If the most recent user interaction was via the keyboard;\n * and the key press did not include a meta, alt/option, or control key;\n * then the modality is keyboard. Otherwise, the modality is not keyboard.\n * Apply `focus-visible` to any current active element and keep track\n * of our keyboard modality state with `hadKeyboardEvent`.\n * @param {KeyboardEvent} e\n */\n function onKeyDown(e) {\n if (e.metaKey || e.altKey || e.ctrlKey) {\n return;\n }\n\n if (isValidFocusTarget(scope.activeElement)) {\n addFocusVisibleClass(scope.activeElement);\n }\n\n hadKeyboardEvent = true;\n }\n\n /**\n * If at any point a user clicks with a pointing device, ensure that we change\n * the modality away from keyboard.\n * This avoids the situation where a user presses a key on an already focused\n * element, and then clicks on a different element, focusing it with a\n * pointing device, while we still think we're in keyboard modality.\n * @param {Event} e\n */\n function onPointerDown(e) {\n hadKeyboardEvent = false;\n }\n\n /**\n * On `focus`, add the `focus-visible` class to the target if:\n * - the target received focus as a result of keyboard navigation, or\n * - the event target is an element that will likely require interaction\n * via the keyboard (e.g. a text box)\n * @param {Event} e\n */\n function onFocus(e) {\n // Prevent IE from focusing the document or HTML element.\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (hadKeyboardEvent || focusTriggersKeyboardModality(e.target)) {\n addFocusVisibleClass(e.target);\n }\n }\n\n /**\n * On `blur`, remove the `focus-visible` class from the target.\n * @param {Event} e\n */\n function onBlur(e) {\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (\n e.target.classList.contains('focus-visible') ||\n e.target.hasAttribute('data-focus-visible-added')\n ) {\n // To detect a tab/window switch, we look for a blur event followed\n // rapidly by a visibility change.\n // If we don't see a visibility change within 100ms, it's probably a\n // regular focus change.\n hadFocusVisibleRecently = true;\n window.clearTimeout(hadFocusVisibleRecentlyTimeout);\n hadFocusVisibleRecentlyTimeout = window.setTimeout(function() {\n hadFocusVisibleRecently = false;\n }, 100);\n removeFocusVisibleClass(e.target);\n }\n }\n\n /**\n * If the user changes tabs, keep track of whether or not the previously\n * focused element had .focus-visible.\n * @param {Event} e\n */\n function onVisibilityChange(e) {\n if (document.visibilityState === 'hidden') {\n // If the tab becomes active again, the browser will handle calling focus\n // on the element (Safari actually calls it twice).\n // If this tab change caused a blur on an element with focus-visible,\n // re-apply the class when the user switches back to the tab.\n if (hadFocusVisibleRecently) {\n hadKeyboardEvent = true;\n }\n addInitialPointerMoveListeners();\n }\n }\n\n /**\n * Add a group of listeners to detect usage of any pointing devices.\n * These listeners will be added when the polyfill first loads, and anytime\n * the window is blurred, so that they are active when the window regains\n * focus.\n */\n function addInitialPointerMoveListeners() {\n document.addEventListener('mousemove', onInitialPointerMove);\n document.addEventListener('mousedown', onInitialPointerMove);\n document.addEventListener('mouseup', onInitialPointerMove);\n document.addEventListener('pointermove', onInitialPointerMove);\n document.addEventListener('pointerdown', onInitialPointerMove);\n document.addEventListener('pointerup', onInitialPointerMove);\n document.addEventListener('touchmove', onInitialPointerMove);\n document.addEventListener('touchstart', onInitialPointerMove);\n document.addEventListener('touchend', onInitialPointerMove);\n }\n\n function removeInitialPointerMoveListeners() {\n document.removeEventListener('mousemove', onInitialPointerMove);\n document.removeEventListener('mousedown', onInitialPointerMove);\n document.removeEventListener('mouseup', onInitialPointerMove);\n document.removeEventListener('pointermove', onInitialPointerMove);\n document.removeEventListener('pointerdown', onInitialPointerMove);\n document.removeEventListener('pointerup', onInitialPointerMove);\n document.removeEventListener('touchmove', onInitialPointerMove);\n document.removeEventListener('touchstart', onInitialPointerMove);\n document.removeEventListener('touchend', onInitialPointerMove);\n }\n\n /**\n * When the polfyill first loads, assume the user is in keyboard modality.\n * If any event is received from a pointing device (e.g. mouse, pointer,\n * touch), turn off keyboard modality.\n * This accounts for situations where focus enters the page from the URL bar.\n * @param {Event} e\n */\n function onInitialPointerMove(e) {\n // Work around a Safari quirk that fires a mousemove on whenever the\n // window blurs, even if you're tabbing out of the page. \u00AF\\_(\u30C4)_/\u00AF\n if (e.target.nodeName && e.target.nodeName.toLowerCase() === 'html') {\n return;\n }\n\n hadKeyboardEvent = false;\n removeInitialPointerMoveListeners();\n }\n\n // For some kinds of state, we are interested in changes at the global scope\n // only. For example, global pointer input, global key presses and global\n // visibility change should affect the state at every scope:\n document.addEventListener('keydown', onKeyDown, true);\n document.addEventListener('mousedown', onPointerDown, true);\n document.addEventListener('pointerdown', onPointerDown, true);\n document.addEventListener('touchstart', onPointerDown, true);\n document.addEventListener('visibilitychange', onVisibilityChange, true);\n\n addInitialPointerMoveListeners();\n\n // For focus and blur, we specifically care about state changes in the local\n // scope. This is because focus / blur events that originate from within a\n // shadow root are not re-dispatched from the host element if it was already\n // the active element in its own scope:\n scope.addEventListener('focus', onFocus, true);\n scope.addEventListener('blur', onBlur, true);\n\n // We detect that a node is a ShadowRoot by ensuring that it is a\n // DocumentFragment and also has a host property. This check covers native\n // implementation and polyfill implementation transparently. If we only cared\n // about the native implementation, we could just check if the scope was\n // an instance of a ShadowRoot.\n if (scope.nodeType === Node.DOCUMENT_FRAGMENT_NODE && scope.host) {\n // Since a ShadowRoot is a special kind of DocumentFragment, it does not\n // have a root element to add a class to. So, we add this attribute to the\n // host element instead:\n scope.host.setAttribute('data-js-focus-visible', '');\n } else if (scope.nodeType === Node.DOCUMENT_NODE) {\n document.documentElement.classList.add('js-focus-visible');\n document.documentElement.setAttribute('data-js-focus-visible', '');\n }\n }\n\n // It is important to wrap all references to global window and document in\n // these checks to support server-side rendering use cases\n // @see https://github.com/WICG/focus-visible/issues/199\n if (typeof window !== 'undefined' && typeof document !== 'undefined') {\n // Make the polyfill helper globally available. This can be used as a signal\n // to interested libraries that wish to coordinate with the polyfill for e.g.,\n // applying the polyfill to a shadow root:\n window.applyFocusVisiblePolyfill = applyFocusVisiblePolyfill;\n\n // Notify interested libraries of the polyfill's presence, in case the\n // polyfill was loaded lazily:\n var event;\n\n try {\n event = new CustomEvent('focus-visible-polyfill-ready');\n } catch (error) {\n // IE11 does not support using CustomEvent as a constructor directly:\n event = document.createEvent('CustomEvent');\n event.initCustomEvent('focus-visible-polyfill-ready', false, false, {});\n }\n\n window.dispatchEvent(event);\n }\n\n if (typeof document !== 'undefined') {\n // Apply the polyfill to the global document, so that no JavaScript\n // coordination is required to use the polyfill in the top-level document:\n applyFocusVisiblePolyfill(document);\n }\n\n})));\n", "/*!\n * escape-html\n * Copyright(c) 2012-2013 TJ Holowaychuk\n * Copyright(c) 2015 Andreas Lubbe\n * Copyright(c) 2015 Tiancheng \"Timothy\" Gu\n * MIT Licensed\n */\n\n'use strict';\n\n/**\n * Module variables.\n * @private\n */\n\nvar matchHtmlRegExp = /[\"'&<>]/;\n\n/**\n * Module exports.\n * @public\n */\n\nmodule.exports = escapeHtml;\n\n/**\n * Escape special characters in the given string of html.\n *\n * @param {string} string The string to escape for inserting into HTML\n * @return {string}\n * @public\n */\n\nfunction escapeHtml(string) {\n var str = '' + string;\n var match = matchHtmlRegExp.exec(str);\n\n if (!match) {\n return str;\n }\n\n var escape;\n var html = '';\n var index = 0;\n var lastIndex = 0;\n\n for (index = match.index; index < str.length; index++) {\n switch (str.charCodeAt(index)) {\n case 34: // \"\n escape = '"';\n break;\n case 38: // &\n escape = '&';\n break;\n case 39: // '\n escape = ''';\n break;\n case 60: // <\n escape = '<';\n break;\n case 62: // >\n escape = '>';\n break;\n default:\n continue;\n }\n\n if (lastIndex !== index) {\n html += str.substring(lastIndex, index);\n }\n\n lastIndex = index + 1;\n html += escape;\n }\n\n return lastIndex !== index\n ? html + str.substring(lastIndex, index)\n : html;\n}\n", "/*!\n * clipboard.js v2.0.11\n * https://clipboardjs.com/\n *\n * Licensed MIT \u00A9 Zeno Rocha\n */\n(function webpackUniversalModuleDefinition(root, factory) {\n\tif(typeof exports === 'object' && typeof module === 'object')\n\t\tmodule.exports = factory();\n\telse if(typeof define === 'function' && define.amd)\n\t\tdefine([], factory);\n\telse if(typeof exports === 'object')\n\t\texports[\"ClipboardJS\"] = factory();\n\telse\n\t\troot[\"ClipboardJS\"] = factory();\n})(this, function() {\nreturn /******/ (function() { // webpackBootstrap\n/******/ \tvar __webpack_modules__ = ({\n\n/***/ 686:\n/***/ (function(__unused_webpack_module, __webpack_exports__, __webpack_require__) {\n\n\"use strict\";\n\n// EXPORTS\n__webpack_require__.d(__webpack_exports__, {\n \"default\": function() { return /* binding */ clipboard; }\n});\n\n// EXTERNAL MODULE: ./node_modules/tiny-emitter/index.js\nvar tiny_emitter = __webpack_require__(279);\nvar tiny_emitter_default = /*#__PURE__*/__webpack_require__.n(tiny_emitter);\n// EXTERNAL MODULE: ./node_modules/good-listener/src/listen.js\nvar listen = __webpack_require__(370);\nvar listen_default = /*#__PURE__*/__webpack_require__.n(listen);\n// EXTERNAL MODULE: ./node_modules/select/src/select.js\nvar src_select = __webpack_require__(817);\nvar select_default = /*#__PURE__*/__webpack_require__.n(src_select);\n;// CONCATENATED MODULE: ./src/common/command.js\n/**\n * Executes a given operation type.\n * @param {String} type\n * @return {Boolean}\n */\nfunction command(type) {\n try {\n return document.execCommand(type);\n } catch (err) {\n return false;\n }\n}\n;// CONCATENATED MODULE: ./src/actions/cut.js\n\n\n/**\n * Cut action wrapper.\n * @param {String|HTMLElement} target\n * @return {String}\n */\n\nvar ClipboardActionCut = function ClipboardActionCut(target) {\n var selectedText = select_default()(target);\n command('cut');\n return selectedText;\n};\n\n/* harmony default export */ var actions_cut = (ClipboardActionCut);\n;// CONCATENATED MODULE: ./src/common/create-fake-element.js\n/**\n * Creates a fake textarea element with a value.\n * @param {String} value\n * @return {HTMLElement}\n */\nfunction createFakeElement(value) {\n var isRTL = document.documentElement.getAttribute('dir') === 'rtl';\n var fakeElement = document.createElement('textarea'); // Prevent zooming on iOS\n\n fakeElement.style.fontSize = '12pt'; // Reset box model\n\n fakeElement.style.border = '0';\n fakeElement.style.padding = '0';\n fakeElement.style.margin = '0'; // Move element out of screen horizontally\n\n fakeElement.style.position = 'absolute';\n fakeElement.style[isRTL ? 'right' : 'left'] = '-9999px'; // Move element to the same position vertically\n\n var yPosition = window.pageYOffset || document.documentElement.scrollTop;\n fakeElement.style.top = \"\".concat(yPosition, \"px\");\n fakeElement.setAttribute('readonly', '');\n fakeElement.value = value;\n return fakeElement;\n}\n;// CONCATENATED MODULE: ./src/actions/copy.js\n\n\n\n/**\n * Create fake copy action wrapper using a fake element.\n * @param {String} target\n * @param {Object} options\n * @return {String}\n */\n\nvar fakeCopyAction = function fakeCopyAction(value, options) {\n var fakeElement = createFakeElement(value);\n options.container.appendChild(fakeElement);\n var selectedText = select_default()(fakeElement);\n command('copy');\n fakeElement.remove();\n return selectedText;\n};\n/**\n * Copy action wrapper.\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @return {String}\n */\n\n\nvar ClipboardActionCopy = function ClipboardActionCopy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n var selectedText = '';\n\n if (typeof target === 'string') {\n selectedText = fakeCopyAction(target, options);\n } else if (target instanceof HTMLInputElement && !['text', 'search', 'url', 'tel', 'password'].includes(target === null || target === void 0 ? void 0 : target.type)) {\n // If input type doesn't support `setSelectionRange`. Simulate it. https://developer.mozilla.org/en-US/docs/Web/API/HTMLInputElement/setSelectionRange\n selectedText = fakeCopyAction(target.value, options);\n } else {\n selectedText = select_default()(target);\n command('copy');\n }\n\n return selectedText;\n};\n\n/* harmony default export */ var actions_copy = (ClipboardActionCopy);\n;// CONCATENATED MODULE: ./src/actions/default.js\nfunction _typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return _typeof(obj); }\n\n\n\n/**\n * Inner function which performs selection from either `text` or `target`\n * properties and then executes copy or cut operations.\n * @param {Object} options\n */\n\nvar ClipboardActionDefault = function ClipboardActionDefault() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n // Defines base properties passed from constructor.\n var _options$action = options.action,\n action = _options$action === void 0 ? 'copy' : _options$action,\n container = options.container,\n target = options.target,\n text = options.text; // Sets the `action` to be performed which can be either 'copy' or 'cut'.\n\n if (action !== 'copy' && action !== 'cut') {\n throw new Error('Invalid \"action\" value, use either \"copy\" or \"cut\"');\n } // Sets the `target` property using an element that will be have its content copied.\n\n\n if (target !== undefined) {\n if (target && _typeof(target) === 'object' && target.nodeType === 1) {\n if (action === 'copy' && target.hasAttribute('disabled')) {\n throw new Error('Invalid \"target\" attribute. Please use \"readonly\" instead of \"disabled\" attribute');\n }\n\n if (action === 'cut' && (target.hasAttribute('readonly') || target.hasAttribute('disabled'))) {\n throw new Error('Invalid \"target\" attribute. You can\\'t cut text from elements with \"readonly\" or \"disabled\" attributes');\n }\n } else {\n throw new Error('Invalid \"target\" value, use a valid Element');\n }\n } // Define selection strategy based on `text` property.\n\n\n if (text) {\n return actions_copy(text, {\n container: container\n });\n } // Defines which selection strategy based on `target` property.\n\n\n if (target) {\n return action === 'cut' ? actions_cut(target) : actions_copy(target, {\n container: container\n });\n }\n};\n\n/* harmony default export */ var actions_default = (ClipboardActionDefault);\n;// CONCATENATED MODULE: ./src/clipboard.js\nfunction clipboard_typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { clipboard_typeof = function _typeof(obj) { return typeof obj; }; } else { clipboard_typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return clipboard_typeof(obj); }\n\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }\n\nfunction _inherits(subClass, superClass) { if (typeof superClass !== \"function\" && superClass !== null) { throw new TypeError(\"Super expression must either be null or a function\"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }\n\nfunction _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }\n\nfunction _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }\n\nfunction _possibleConstructorReturn(self, call) { if (call && (clipboard_typeof(call) === \"object\" || typeof call === \"function\")) { return call; } return _assertThisInitialized(self); }\n\nfunction _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\"); } return self; }\n\nfunction _isNativeReflectConstruct() { if (typeof Reflect === \"undefined\" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === \"function\") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } }\n\nfunction _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }\n\n\n\n\n\n\n/**\n * Helper function to retrieve attribute value.\n * @param {String} suffix\n * @param {Element} element\n */\n\nfunction getAttributeValue(suffix, element) {\n var attribute = \"data-clipboard-\".concat(suffix);\n\n if (!element.hasAttribute(attribute)) {\n return;\n }\n\n return element.getAttribute(attribute);\n}\n/**\n * Base class which takes one or more elements, adds event listeners to them,\n * and instantiates a new `ClipboardAction` on each click.\n */\n\n\nvar Clipboard = /*#__PURE__*/function (_Emitter) {\n _inherits(Clipboard, _Emitter);\n\n var _super = _createSuper(Clipboard);\n\n /**\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n * @param {Object} options\n */\n function Clipboard(trigger, options) {\n var _this;\n\n _classCallCheck(this, Clipboard);\n\n _this = _super.call(this);\n\n _this.resolveOptions(options);\n\n _this.listenClick(trigger);\n\n return _this;\n }\n /**\n * Defines if attributes would be resolved using internal setter functions\n * or custom functions that were passed in the constructor.\n * @param {Object} options\n */\n\n\n _createClass(Clipboard, [{\n key: \"resolveOptions\",\n value: function resolveOptions() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n this.action = typeof options.action === 'function' ? options.action : this.defaultAction;\n this.target = typeof options.target === 'function' ? options.target : this.defaultTarget;\n this.text = typeof options.text === 'function' ? options.text : this.defaultText;\n this.container = clipboard_typeof(options.container) === 'object' ? options.container : document.body;\n }\n /**\n * Adds a click event listener to the passed trigger.\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n */\n\n }, {\n key: \"listenClick\",\n value: function listenClick(trigger) {\n var _this2 = this;\n\n this.listener = listen_default()(trigger, 'click', function (e) {\n return _this2.onClick(e);\n });\n }\n /**\n * Defines a new `ClipboardAction` on each click event.\n * @param {Event} e\n */\n\n }, {\n key: \"onClick\",\n value: function onClick(e) {\n var trigger = e.delegateTarget || e.currentTarget;\n var action = this.action(trigger) || 'copy';\n var text = actions_default({\n action: action,\n container: this.container,\n target: this.target(trigger),\n text: this.text(trigger)\n }); // Fires an event based on the copy operation result.\n\n this.emit(text ? 'success' : 'error', {\n action: action,\n text: text,\n trigger: trigger,\n clearSelection: function clearSelection() {\n if (trigger) {\n trigger.focus();\n }\n\n window.getSelection().removeAllRanges();\n }\n });\n }\n /**\n * Default `action` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultAction\",\n value: function defaultAction(trigger) {\n return getAttributeValue('action', trigger);\n }\n /**\n * Default `target` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultTarget\",\n value: function defaultTarget(trigger) {\n var selector = getAttributeValue('target', trigger);\n\n if (selector) {\n return document.querySelector(selector);\n }\n }\n /**\n * Allow fire programmatically a copy action\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @returns Text copied.\n */\n\n }, {\n key: \"defaultText\",\n\n /**\n * Default `text` lookup function.\n * @param {Element} trigger\n */\n value: function defaultText(trigger) {\n return getAttributeValue('text', trigger);\n }\n /**\n * Destroy lifecycle.\n */\n\n }, {\n key: \"destroy\",\n value: function destroy() {\n this.listener.destroy();\n }\n }], [{\n key: \"copy\",\n value: function copy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n return actions_copy(target, options);\n }\n /**\n * Allow fire programmatically a cut action\n * @param {String|HTMLElement} target\n * @returns Text cutted.\n */\n\n }, {\n key: \"cut\",\n value: function cut(target) {\n return actions_cut(target);\n }\n /**\n * Returns the support of the given action, or all actions if no action is\n * given.\n * @param {String} [action]\n */\n\n }, {\n key: \"isSupported\",\n value: function isSupported() {\n var action = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ['copy', 'cut'];\n var actions = typeof action === 'string' ? [action] : action;\n var support = !!document.queryCommandSupported;\n actions.forEach(function (action) {\n support = support && !!document.queryCommandSupported(action);\n });\n return support;\n }\n }]);\n\n return Clipboard;\n}((tiny_emitter_default()));\n\n/* harmony default export */ var clipboard = (Clipboard);\n\n/***/ }),\n\n/***/ 828:\n/***/ (function(module) {\n\nvar DOCUMENT_NODE_TYPE = 9;\n\n/**\n * A polyfill for Element.matches()\n */\nif (typeof Element !== 'undefined' && !Element.prototype.matches) {\n var proto = Element.prototype;\n\n proto.matches = proto.matchesSelector ||\n proto.mozMatchesSelector ||\n proto.msMatchesSelector ||\n proto.oMatchesSelector ||\n proto.webkitMatchesSelector;\n}\n\n/**\n * Finds the closest parent that matches a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @return {Function}\n */\nfunction closest (element, selector) {\n while (element && element.nodeType !== DOCUMENT_NODE_TYPE) {\n if (typeof element.matches === 'function' &&\n element.matches(selector)) {\n return element;\n }\n element = element.parentNode;\n }\n}\n\nmodule.exports = closest;\n\n\n/***/ }),\n\n/***/ 438:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar closest = __webpack_require__(828);\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction _delegate(element, selector, type, callback, useCapture) {\n var listenerFn = listener.apply(this, arguments);\n\n element.addEventListener(type, listenerFn, useCapture);\n\n return {\n destroy: function() {\n element.removeEventListener(type, listenerFn, useCapture);\n }\n }\n}\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element|String|Array} [elements]\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction delegate(elements, selector, type, callback, useCapture) {\n // Handle the regular Element usage\n if (typeof elements.addEventListener === 'function') {\n return _delegate.apply(null, arguments);\n }\n\n // Handle Element-less usage, it defaults to global delegation\n if (typeof type === 'function') {\n // Use `document` as the first parameter, then apply arguments\n // This is a short way to .unshift `arguments` without running into deoptimizations\n return _delegate.bind(null, document).apply(null, arguments);\n }\n\n // Handle Selector-based usage\n if (typeof elements === 'string') {\n elements = document.querySelectorAll(elements);\n }\n\n // Handle Array-like based usage\n return Array.prototype.map.call(elements, function (element) {\n return _delegate(element, selector, type, callback, useCapture);\n });\n}\n\n/**\n * Finds closest match and invokes callback.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Function}\n */\nfunction listener(element, selector, type, callback) {\n return function(e) {\n e.delegateTarget = closest(e.target, selector);\n\n if (e.delegateTarget) {\n callback.call(element, e);\n }\n }\n}\n\nmodule.exports = delegate;\n\n\n/***/ }),\n\n/***/ 879:\n/***/ (function(__unused_webpack_module, exports) {\n\n/**\n * Check if argument is a HTML element.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.node = function(value) {\n return value !== undefined\n && value instanceof HTMLElement\n && value.nodeType === 1;\n};\n\n/**\n * Check if argument is a list of HTML elements.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.nodeList = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return value !== undefined\n && (type === '[object NodeList]' || type === '[object HTMLCollection]')\n && ('length' in value)\n && (value.length === 0 || exports.node(value[0]));\n};\n\n/**\n * Check if argument is a string.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.string = function(value) {\n return typeof value === 'string'\n || value instanceof String;\n};\n\n/**\n * Check if argument is a function.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.fn = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return type === '[object Function]';\n};\n\n\n/***/ }),\n\n/***/ 370:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar is = __webpack_require__(879);\nvar delegate = __webpack_require__(438);\n\n/**\n * Validates all params and calls the right\n * listener function based on its target type.\n *\n * @param {String|HTMLElement|HTMLCollection|NodeList} target\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listen(target, type, callback) {\n if (!target && !type && !callback) {\n throw new Error('Missing required arguments');\n }\n\n if (!is.string(type)) {\n throw new TypeError('Second argument must be a String');\n }\n\n if (!is.fn(callback)) {\n throw new TypeError('Third argument must be a Function');\n }\n\n if (is.node(target)) {\n return listenNode(target, type, callback);\n }\n else if (is.nodeList(target)) {\n return listenNodeList(target, type, callback);\n }\n else if (is.string(target)) {\n return listenSelector(target, type, callback);\n }\n else {\n throw new TypeError('First argument must be a String, HTMLElement, HTMLCollection, or NodeList');\n }\n}\n\n/**\n * Adds an event listener to a HTML element\n * and returns a remove listener function.\n *\n * @param {HTMLElement} node\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNode(node, type, callback) {\n node.addEventListener(type, callback);\n\n return {\n destroy: function() {\n node.removeEventListener(type, callback);\n }\n }\n}\n\n/**\n * Add an event listener to a list of HTML elements\n * and returns a remove listener function.\n *\n * @param {NodeList|HTMLCollection} nodeList\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNodeList(nodeList, type, callback) {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.addEventListener(type, callback);\n });\n\n return {\n destroy: function() {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.removeEventListener(type, callback);\n });\n }\n }\n}\n\n/**\n * Add an event listener to a selector\n * and returns a remove listener function.\n *\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenSelector(selector, type, callback) {\n return delegate(document.body, selector, type, callback);\n}\n\nmodule.exports = listen;\n\n\n/***/ }),\n\n/***/ 817:\n/***/ (function(module) {\n\nfunction select(element) {\n var selectedText;\n\n if (element.nodeName === 'SELECT') {\n element.focus();\n\n selectedText = element.value;\n }\n else if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') {\n var isReadOnly = element.hasAttribute('readonly');\n\n if (!isReadOnly) {\n element.setAttribute('readonly', '');\n }\n\n element.select();\n element.setSelectionRange(0, element.value.length);\n\n if (!isReadOnly) {\n element.removeAttribute('readonly');\n }\n\n selectedText = element.value;\n }\n else {\n if (element.hasAttribute('contenteditable')) {\n element.focus();\n }\n\n var selection = window.getSelection();\n var range = document.createRange();\n\n range.selectNodeContents(element);\n selection.removeAllRanges();\n selection.addRange(range);\n\n selectedText = selection.toString();\n }\n\n return selectedText;\n}\n\nmodule.exports = select;\n\n\n/***/ }),\n\n/***/ 279:\n/***/ (function(module) {\n\nfunction E () {\n // Keep this empty so it's easier to inherit from\n // (via https://github.com/lipsmack from https://github.com/scottcorgan/tiny-emitter/issues/3)\n}\n\nE.prototype = {\n on: function (name, callback, ctx) {\n var e = this.e || (this.e = {});\n\n (e[name] || (e[name] = [])).push({\n fn: callback,\n ctx: ctx\n });\n\n return this;\n },\n\n once: function (name, callback, ctx) {\n var self = this;\n function listener () {\n self.off(name, listener);\n callback.apply(ctx, arguments);\n };\n\n listener._ = callback\n return this.on(name, listener, ctx);\n },\n\n emit: function (name) {\n var data = [].slice.call(arguments, 1);\n var evtArr = ((this.e || (this.e = {}))[name] || []).slice();\n var i = 0;\n var len = evtArr.length;\n\n for (i; i < len; i++) {\n evtArr[i].fn.apply(evtArr[i].ctx, data);\n }\n\n return this;\n },\n\n off: function (name, callback) {\n var e = this.e || (this.e = {});\n var evts = e[name];\n var liveEvents = [];\n\n if (evts && callback) {\n for (var i = 0, len = evts.length; i < len; i++) {\n if (evts[i].fn !== callback && evts[i].fn._ !== callback)\n liveEvents.push(evts[i]);\n }\n }\n\n // Remove event from queue to prevent memory leak\n // Suggested by https://github.com/lazd\n // Ref: https://github.com/scottcorgan/tiny-emitter/commit/c6ebfaa9bc973b33d110a84a307742b7cf94c953#commitcomment-5024910\n\n (liveEvents.length)\n ? e[name] = liveEvents\n : delete e[name];\n\n return this;\n }\n};\n\nmodule.exports = E;\nmodule.exports.TinyEmitter = E;\n\n\n/***/ })\n\n/******/ \t});\n/************************************************************************/\n/******/ \t// The module cache\n/******/ \tvar __webpack_module_cache__ = {};\n/******/ \t\n/******/ \t// The require function\n/******/ \tfunction __webpack_require__(moduleId) {\n/******/ \t\t// Check if module is in cache\n/******/ \t\tif(__webpack_module_cache__[moduleId]) {\n/******/ \t\t\treturn __webpack_module_cache__[moduleId].exports;\n/******/ \t\t}\n/******/ \t\t// Create a new module (and put it into the cache)\n/******/ \t\tvar module = __webpack_module_cache__[moduleId] = {\n/******/ \t\t\t// no module.id needed\n/******/ \t\t\t// no module.loaded needed\n/******/ \t\t\texports: {}\n/******/ \t\t};\n/******/ \t\n/******/ \t\t// Execute the module function\n/******/ \t\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n/******/ \t\n/******/ \t\t// Return the exports of the module\n/******/ \t\treturn module.exports;\n/******/ \t}\n/******/ \t\n/************************************************************************/\n/******/ \t/* webpack/runtime/compat get default export */\n/******/ \t!function() {\n/******/ \t\t// getDefaultExport function for compatibility with non-harmony modules\n/******/ \t\t__webpack_require__.n = function(module) {\n/******/ \t\t\tvar getter = module && module.__esModule ?\n/******/ \t\t\t\tfunction() { return module['default']; } :\n/******/ \t\t\t\tfunction() { return module; };\n/******/ \t\t\t__webpack_require__.d(getter, { a: getter });\n/******/ \t\t\treturn getter;\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/define property getters */\n/******/ \t!function() {\n/******/ \t\t// define getter functions for harmony exports\n/******/ \t\t__webpack_require__.d = function(exports, definition) {\n/******/ \t\t\tfor(var key in definition) {\n/******/ \t\t\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n/******/ \t\t\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n/******/ \t\t\t\t}\n/******/ \t\t\t}\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/hasOwnProperty shorthand */\n/******/ \t!function() {\n/******/ \t\t__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }\n/******/ \t}();\n/******/ \t\n/************************************************************************/\n/******/ \t// module exports must be returned from runtime so entry inlining is disabled\n/******/ \t// startup\n/******/ \t// Load entry module and return exports\n/******/ \treturn __webpack_require__(686);\n/******/ })()\n.default;\n});", "/*\n * Copyright (c) 2016-2025 Martin Donath \n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\nimport \"focus-visible\"\n\nimport {\n EMPTY,\n NEVER,\n Observable,\n Subject,\n defer,\n delay,\n filter,\n map,\n merge,\n mergeWith,\n shareReplay,\n switchMap\n} from \"rxjs\"\n\nimport { configuration, feature } from \"./_\"\nimport {\n at,\n getActiveElement,\n getOptionalElement,\n requestJSON,\n setLocation,\n setToggle,\n watchDocument,\n watchKeyboard,\n watchLocation,\n watchLocationTarget,\n watchMedia,\n watchPrint,\n watchScript,\n watchViewport\n} from \"./browser\"\nimport {\n getComponentElement,\n getComponentElements,\n mountAnnounce,\n mountBackToTop,\n mountConsent,\n mountContent,\n mountDialog,\n mountHeader,\n mountHeaderTitle,\n mountPalette,\n mountProgress,\n mountSearch,\n mountSearchHiglight,\n mountSidebar,\n mountSource,\n mountTableOfContents,\n mountTabs,\n watchHeader,\n watchMain\n} from \"./components\"\nimport {\n SearchIndex,\n setupClipboardJS,\n setupInstantNavigation,\n setupVersionSelector\n} from \"./integrations\"\nimport {\n patchEllipsis,\n patchIndeterminate,\n patchScrollfix,\n patchScrolllock\n} from \"./patches\"\nimport \"./polyfills\"\n\n/* ----------------------------------------------------------------------------\n * Functions - @todo refactor\n * ------------------------------------------------------------------------- */\n\n/**\n * Fetch search index\n *\n * @returns Search index observable\n */\nfunction fetchSearchIndex(): Observable {\n if (location.protocol === \"file:\") {\n return watchScript(\n `${new URL(\"search/search_index.js\", config.base)}`\n )\n .pipe(\n // @ts-ignore - @todo fix typings\n map(() => __index),\n shareReplay(1)\n )\n } else {\n return requestJSON(\n new URL(\"search/search_index.json\", config.base)\n )\n }\n}\n\n/* ----------------------------------------------------------------------------\n * Application\n * ------------------------------------------------------------------------- */\n\n/* Yay, JavaScript is available */\ndocument.documentElement.classList.remove(\"no-js\")\ndocument.documentElement.classList.add(\"js\")\n\n/* Set up navigation observables and subjects */\nconst document$ = watchDocument()\nconst location$ = watchLocation()\nconst target$ = watchLocationTarget(location$)\nconst keyboard$ = watchKeyboard()\n\n/* Set up media observables */\nconst viewport$ = watchViewport()\nconst tablet$ = watchMedia(\"(min-width: 960px)\")\nconst screen$ = watchMedia(\"(min-width: 1220px)\")\nconst print$ = watchPrint()\n\n/* Retrieve search index, if search is enabled */\nconst config = configuration()\nconst index$ = document.forms.namedItem(\"search\")\n ? fetchSearchIndex()\n : NEVER\n\n/* Set up Clipboard.js integration */\nconst alert$ = new Subject()\nsetupClipboardJS({ alert$ })\n\n/* Set up progress indicator */\nconst progress$ = new Subject()\n\n/* Set up instant navigation, if enabled */\nif (feature(\"navigation.instant\"))\n setupInstantNavigation({ location$, viewport$, progress$ })\n .subscribe(document$)\n\n/* Set up version selector */\nif (config.version?.provider === \"mike\")\n setupVersionSelector({ document$ })\n\n/* Always close drawer and search on navigation */\nmerge(location$, target$)\n .pipe(\n delay(125)\n )\n .subscribe(() => {\n setToggle(\"drawer\", false)\n setToggle(\"search\", false)\n })\n\n/* Set up global keyboard handlers */\nkeyboard$\n .pipe(\n filter(({ mode }) => mode === \"global\")\n )\n .subscribe(key => {\n switch (key.type) {\n\n /* Go to previous page */\n case \"p\":\n case \",\":\n const prev = getOptionalElement(\"link[rel=prev]\")\n if (typeof prev !== \"undefined\")\n setLocation(prev)\n break\n\n /* Go to next page */\n case \"n\":\n case \".\":\n const next = getOptionalElement(\"link[rel=next]\")\n if (typeof next !== \"undefined\")\n setLocation(next)\n break\n\n /* Expand navigation, see https://bit.ly/3ZjG5io */\n case \"Enter\":\n const active = getActiveElement()\n if (active instanceof HTMLLabelElement)\n active.click()\n }\n })\n\n/* Set up patches */\npatchEllipsis({ viewport$, document$ })\npatchIndeterminate({ document$, tablet$ })\npatchScrollfix({ document$ })\npatchScrolllock({ viewport$, tablet$ })\n\n/* Set up header and main area observable */\nconst header$ = watchHeader(getComponentElement(\"header\"), { viewport$ })\nconst main$ = document$\n .pipe(\n map(() => getComponentElement(\"main\")),\n switchMap(el => watchMain(el, { viewport$, header$ })),\n shareReplay(1)\n )\n\n/* Set up control component observables */\nconst control$ = merge(\n\n /* Consent */\n ...getComponentElements(\"consent\")\n .map(el => mountConsent(el, { target$ })),\n\n /* Dialog */\n ...getComponentElements(\"dialog\")\n .map(el => mountDialog(el, { alert$ })),\n\n /* Color palette */\n ...getComponentElements(\"palette\")\n .map(el => mountPalette(el)),\n\n /* Progress bar */\n ...getComponentElements(\"progress\")\n .map(el => mountProgress(el, { progress$ })),\n\n /* Search */\n ...getComponentElements(\"search\")\n .map(el => mountSearch(el, { index$, keyboard$ })),\n\n /* Repository information */\n ...getComponentElements(\"source\")\n .map(el => mountSource(el))\n)\n\n/* Set up content component observables */\nconst content$ = defer(() => merge(\n\n /* Announcement bar */\n ...getComponentElements(\"announce\")\n .map(el => mountAnnounce(el)),\n\n /* Content */\n ...getComponentElements(\"content\")\n .map(el => mountContent(el, { viewport$, target$, print$ })),\n\n /* Search highlighting */\n ...getComponentElements(\"content\")\n .map(el => feature(\"search.highlight\")\n ? mountSearchHiglight(el, { index$, location$ })\n : EMPTY\n ),\n\n /* Header */\n ...getComponentElements(\"header\")\n .map(el => mountHeader(el, { viewport$, header$, main$ })),\n\n /* Header title */\n ...getComponentElements(\"header-title\")\n .map(el => mountHeaderTitle(el, { viewport$, header$ })),\n\n /* Sidebar */\n ...getComponentElements(\"sidebar\")\n .map(el => el.getAttribute(\"data-md-type\") === \"navigation\"\n ? at(screen$, () => mountSidebar(el, { viewport$, header$, main$ }))\n : at(tablet$, () => mountSidebar(el, { viewport$, header$, main$ }))\n ),\n\n /* Navigation tabs */\n ...getComponentElements(\"tabs\")\n .map(el => mountTabs(el, { viewport$, header$ })),\n\n /* Table of contents */\n ...getComponentElements(\"toc\")\n .map(el => mountTableOfContents(el, {\n viewport$, header$, main$, target$\n })),\n\n /* Back-to-top button */\n ...getComponentElements(\"top\")\n .map(el => mountBackToTop(el, { viewport$, header$, main$, target$ }))\n))\n\n/* Set up component observables */\nconst component$ = document$\n .pipe(\n switchMap(() => content$),\n mergeWith(control$),\n shareReplay(1)\n )\n\n/* Subscribe to all components */\ncomponent$.subscribe()\n\n/* ----------------------------------------------------------------------------\n * Exports\n * ------------------------------------------------------------------------- */\n\nwindow.document$ = document$ /* Document observable */\nwindow.location$ = location$ /* Location subject */\nwindow.target$ = target$ /* Location target observable */\nwindow.keyboard$ = keyboard$ /* Keyboard observable */\nwindow.viewport$ = viewport$ /* Viewport observable */\nwindow.tablet$ = tablet$ /* Media tablet observable */\nwindow.screen$ = screen$ /* Media screen observable */\nwindow.print$ = print$ /* Media print observable */\nwindow.alert$ = alert$ /* Alert subject */\nwindow.progress$ = progress$ /* Progress indicator subject */\nwindow.component$ = component$ /* Component observable */\n", "/******************************************************************************\nCopyright (c) Microsoft Corporation.\n\nPermission to use, copy, modify, and/or distribute this software for any\npurpose with or without fee is hereby granted.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH\nREGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY\nAND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,\nINDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM\nLOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR\nOTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\nPERFORMANCE OF THIS SOFTWARE.\n***************************************************************************** */\n/* global Reflect, Promise, SuppressedError, Symbol, Iterator */\n\nvar extendStatics = function(d, b) {\n extendStatics = Object.setPrototypeOf ||\n ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||\n function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };\n return extendStatics(d, b);\n};\n\nexport function __extends(d, b) {\n if (typeof b !== \"function\" && b !== null)\n throw new TypeError(\"Class extends value \" + String(b) + \" is not a constructor or null\");\n extendStatics(d, b);\n function __() { this.constructor = d; }\n d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());\n}\n\nexport var __assign = function() {\n __assign = Object.assign || function __assign(t) {\n for (var s, i = 1, n = arguments.length; i < n; i++) {\n s = arguments[i];\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];\n }\n return t;\n }\n return __assign.apply(this, arguments);\n}\n\nexport function __rest(s, e) {\n var t = {};\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)\n t[p] = s[p];\n if (s != null && typeof Object.getOwnPropertySymbols === \"function\")\n for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {\n if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))\n t[p[i]] = s[p[i]];\n }\n return t;\n}\n\nexport function __decorate(decorators, target, key, desc) {\n var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;\n if (typeof Reflect === \"object\" && typeof Reflect.decorate === \"function\") r = Reflect.decorate(decorators, target, key, desc);\n else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;\n return c > 3 && r && Object.defineProperty(target, key, r), r;\n}\n\nexport function __param(paramIndex, decorator) {\n return function (target, key) { decorator(target, key, paramIndex); }\n}\n\nexport function __esDecorate(ctor, descriptorIn, decorators, contextIn, initializers, extraInitializers) {\n function accept(f) { if (f !== void 0 && typeof f !== \"function\") throw new TypeError(\"Function expected\"); return f; }\n var kind = contextIn.kind, key = kind === \"getter\" ? \"get\" : kind === \"setter\" ? \"set\" : \"value\";\n var target = !descriptorIn && ctor ? contextIn[\"static\"] ? ctor : ctor.prototype : null;\n var descriptor = descriptorIn || (target ? Object.getOwnPropertyDescriptor(target, contextIn.name) : {});\n var _, done = false;\n for (var i = decorators.length - 1; i >= 0; i--) {\n var context = {};\n for (var p in contextIn) context[p] = p === \"access\" ? {} : contextIn[p];\n for (var p in contextIn.access) context.access[p] = contextIn.access[p];\n context.addInitializer = function (f) { if (done) throw new TypeError(\"Cannot add initializers after decoration has completed\"); extraInitializers.push(accept(f || null)); };\n var result = (0, decorators[i])(kind === \"accessor\" ? { get: descriptor.get, set: descriptor.set } : descriptor[key], context);\n if (kind === \"accessor\") {\n if (result === void 0) continue;\n if (result === null || typeof result !== \"object\") throw new TypeError(\"Object expected\");\n if (_ = accept(result.get)) descriptor.get = _;\n if (_ = accept(result.set)) descriptor.set = _;\n if (_ = accept(result.init)) initializers.unshift(_);\n }\n else if (_ = accept(result)) {\n if (kind === \"field\") initializers.unshift(_);\n else descriptor[key] = _;\n }\n }\n if (target) Object.defineProperty(target, contextIn.name, descriptor);\n done = true;\n};\n\nexport function __runInitializers(thisArg, initializers, value) {\n var useValue = arguments.length > 2;\n for (var i = 0; i < initializers.length; i++) {\n value = useValue ? initializers[i].call(thisArg, value) : initializers[i].call(thisArg);\n }\n return useValue ? value : void 0;\n};\n\nexport function __propKey(x) {\n return typeof x === \"symbol\" ? x : \"\".concat(x);\n};\n\nexport function __setFunctionName(f, name, prefix) {\n if (typeof name === \"symbol\") name = name.description ? \"[\".concat(name.description, \"]\") : \"\";\n return Object.defineProperty(f, \"name\", { configurable: true, value: prefix ? \"\".concat(prefix, \" \", name) : name });\n};\n\nexport function __metadata(metadataKey, metadataValue) {\n if (typeof Reflect === \"object\" && typeof Reflect.metadata === \"function\") return Reflect.metadata(metadataKey, metadataValue);\n}\n\nexport function __awaiter(thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n}\n\nexport function __generator(thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g = Object.create((typeof Iterator === \"function\" ? Iterator : Object).prototype);\n return g.next = verb(0), g[\"throw\"] = verb(1), g[\"return\"] = verb(2), typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n}\n\nexport var __createBinding = Object.create ? (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n var desc = Object.getOwnPropertyDescriptor(m, k);\n if (!desc || (\"get\" in desc ? !m.__esModule : desc.writable || desc.configurable)) {\n desc = { enumerable: true, get: function() { return m[k]; } };\n }\n Object.defineProperty(o, k2, desc);\n}) : (function(o, m, k, k2) {\n if (k2 === undefined) k2 = k;\n o[k2] = m[k];\n});\n\nexport function __exportStar(m, o) {\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(o, p)) __createBinding(o, m, p);\n}\n\nexport function __values(o) {\n var s = typeof Symbol === \"function\" && Symbol.iterator, m = s && o[s], i = 0;\n if (m) return m.call(o);\n if (o && typeof o.length === \"number\") return {\n next: function () {\n if (o && i >= o.length) o = void 0;\n return { value: o && o[i++], done: !o };\n }\n };\n throw new TypeError(s ? \"Object is not iterable.\" : \"Symbol.iterator is not defined.\");\n}\n\nexport function __read(o, n) {\n var m = typeof Symbol === \"function\" && o[Symbol.iterator];\n if (!m) return o;\n var i = m.call(o), r, ar = [], e;\n try {\n while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);\n }\n catch (error) { e = { error: error }; }\n finally {\n try {\n if (r && !r.done && (m = i[\"return\"])) m.call(i);\n }\n finally { if (e) throw e.error; }\n }\n return ar;\n}\n\n/** @deprecated */\nexport function __spread() {\n for (var ar = [], i = 0; i < arguments.length; i++)\n ar = ar.concat(__read(arguments[i]));\n return ar;\n}\n\n/** @deprecated */\nexport function __spreadArrays() {\n for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;\n for (var r = Array(s), k = 0, i = 0; i < il; i++)\n for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)\n r[k] = a[j];\n return r;\n}\n\nexport function __spreadArray(to, from, pack) {\n if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {\n if (ar || !(i in from)) {\n if (!ar) ar = Array.prototype.slice.call(from, 0, i);\n ar[i] = from[i];\n }\n }\n return to.concat(ar || Array.prototype.slice.call(from));\n}\n\nexport function __await(v) {\n return this instanceof __await ? (this.v = v, this) : new __await(v);\n}\n\nexport function __asyncGenerator(thisArg, _arguments, generator) {\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\n var g = generator.apply(thisArg, _arguments || []), i, q = [];\n return i = Object.create((typeof AsyncIterator === \"function\" ? AsyncIterator : Object).prototype), verb(\"next\"), verb(\"throw\"), verb(\"return\", awaitReturn), i[Symbol.asyncIterator] = function () { return this; }, i;\n function awaitReturn(f) { return function (v) { return Promise.resolve(v).then(f, reject); }; }\n function verb(n, f) { if (g[n]) { i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; if (f) i[n] = f(i[n]); } }\n function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }\n function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }\n function fulfill(value) { resume(\"next\", value); }\n function reject(value) { resume(\"throw\", value); }\n function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }\n}\n\nexport function __asyncDelegator(o) {\n var i, p;\n return i = {}, verb(\"next\"), verb(\"throw\", function (e) { throw e; }), verb(\"return\"), i[Symbol.iterator] = function () { return this; }, i;\n function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: false } : f ? f(v) : v; } : f; }\n}\n\nexport function __asyncValues(o) {\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\n var m = o[Symbol.asyncIterator], i;\n return m ? m.call(o) : (o = typeof __values === \"function\" ? __values(o) : o[Symbol.iterator](), i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i);\n function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }\n function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }\n}\n\nexport function __makeTemplateObject(cooked, raw) {\n if (Object.defineProperty) { Object.defineProperty(cooked, \"raw\", { value: raw }); } else { cooked.raw = raw; }\n return cooked;\n};\n\nvar __setModuleDefault = Object.create ? (function(o, v) {\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\n}) : function(o, v) {\n o[\"default\"] = v;\n};\n\nexport function __importStar(mod) {\n if (mod && mod.__esModule) return mod;\n var result = {};\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\n __setModuleDefault(result, mod);\n return result;\n}\n\nexport function __importDefault(mod) {\n return (mod && mod.__esModule) ? mod : { default: mod };\n}\n\nexport function __classPrivateFieldGet(receiver, state, kind, f) {\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a getter\");\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot read private member from an object whose class did not declare it\");\n return kind === \"m\" ? f : kind === \"a\" ? f.call(receiver) : f ? f.value : state.get(receiver);\n}\n\nexport function __classPrivateFieldSet(receiver, state, value, kind, f) {\n if (kind === \"m\") throw new TypeError(\"Private method is not writable\");\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a setter\");\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot write private member to an object whose class did not declare it\");\n return (kind === \"a\" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;\n}\n\nexport function __classPrivateFieldIn(state, receiver) {\n if (receiver === null || (typeof receiver !== \"object\" && typeof receiver !== \"function\")) throw new TypeError(\"Cannot use 'in' operator on non-object\");\n return typeof state === \"function\" ? receiver === state : state.has(receiver);\n}\n\nexport function __addDisposableResource(env, value, async) {\n if (value !== null && value !== void 0) {\n if (typeof value !== \"object\" && typeof value !== \"function\") throw new TypeError(\"Object expected.\");\n var dispose, inner;\n if (async) {\n if (!Symbol.asyncDispose) throw new TypeError(\"Symbol.asyncDispose is not defined.\");\n dispose = value[Symbol.asyncDispose];\n }\n if (dispose === void 0) {\n if (!Symbol.dispose) throw new TypeError(\"Symbol.dispose is not defined.\");\n dispose = value[Symbol.dispose];\n if (async) inner = dispose;\n }\n if (typeof dispose !== \"function\") throw new TypeError(\"Object not disposable.\");\n if (inner) dispose = function() { try { inner.call(this); } catch (e) { return Promise.reject(e); } };\n env.stack.push({ value: value, dispose: dispose, async: async });\n }\n else if (async) {\n env.stack.push({ async: true });\n }\n return value;\n}\n\nvar _SuppressedError = typeof SuppressedError === \"function\" ? SuppressedError : function (error, suppressed, message) {\n var e = new Error(message);\n return e.name = \"SuppressedError\", e.error = error, e.suppressed = suppressed, e;\n};\n\nexport function __disposeResources(env) {\n function fail(e) {\n env.error = env.hasError ? new _SuppressedError(e, env.error, \"An error was suppressed during disposal.\") : e;\n env.hasError = true;\n }\n var r, s = 0;\n function next() {\n while (r = env.stack.pop()) {\n try {\n if (!r.async && s === 1) return s = 0, env.stack.push(r), Promise.resolve().then(next);\n if (r.dispose) {\n var result = r.dispose.call(r.value);\n if (r.async) return s |= 2, Promise.resolve(result).then(next, function(e) { fail(e); return next(); });\n }\n else s |= 1;\n }\n catch (e) {\n fail(e);\n }\n }\n if (s === 1) return env.hasError ? Promise.reject(env.error) : Promise.resolve();\n if (env.hasError) throw env.error;\n }\n return next();\n}\n\nexport default {\n __extends,\n __assign,\n __rest,\n __decorate,\n __param,\n __metadata,\n __awaiter,\n __generator,\n __createBinding,\n __exportStar,\n __values,\n __read,\n __spread,\n __spreadArrays,\n __spreadArray,\n __await,\n __asyncGenerator,\n __asyncDelegator,\n __asyncValues,\n __makeTemplateObject,\n __importStar,\n __importDefault,\n __classPrivateFieldGet,\n __classPrivateFieldSet,\n __classPrivateFieldIn,\n __addDisposableResource,\n __disposeResources,\n};\n", "/**\n * Returns true if the object is a function.\n * @param value The value to check\n */\nexport function isFunction(value: any): value is (...args: any[]) => any {\n return typeof value === 'function';\n}\n", "/**\n * Used to create Error subclasses until the community moves away from ES5.\n *\n * This is because compiling from TypeScript down to ES5 has issues with subclassing Errors\n * as well as other built-in types: https://github.com/Microsoft/TypeScript/issues/12123\n *\n * @param createImpl A factory function to create the actual constructor implementation. The returned\n * function should be a named function that calls `_super` internally.\n */\nexport function createErrorClass(createImpl: (_super: any) => any): T {\n const _super = (instance: any) => {\n Error.call(instance);\n instance.stack = new Error().stack;\n };\n\n const ctorFunc = createImpl(_super);\n ctorFunc.prototype = Object.create(Error.prototype);\n ctorFunc.prototype.constructor = ctorFunc;\n return ctorFunc;\n}\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface UnsubscriptionError extends Error {\n readonly errors: any[];\n}\n\nexport interface UnsubscriptionErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (errors: any[]): UnsubscriptionError;\n}\n\n/**\n * An error thrown when one or more errors have occurred during the\n * `unsubscribe` of a {@link Subscription}.\n */\nexport const UnsubscriptionError: UnsubscriptionErrorCtor = createErrorClass(\n (_super) =>\n function UnsubscriptionErrorImpl(this: any, errors: (Error | string)[]) {\n _super(this);\n this.message = errors\n ? `${errors.length} errors occurred during unsubscription:\n${errors.map((err, i) => `${i + 1}) ${err.toString()}`).join('\\n ')}`\n : '';\n this.name = 'UnsubscriptionError';\n this.errors = errors;\n }\n);\n", "/**\n * Removes an item from an array, mutating it.\n * @param arr The array to remove the item from\n * @param item The item to remove\n */\nexport function arrRemove(arr: T[] | undefined | null, item: T) {\n if (arr) {\n const index = arr.indexOf(item);\n 0 <= index && arr.splice(index, 1);\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { UnsubscriptionError } from './util/UnsubscriptionError';\nimport { SubscriptionLike, TeardownLogic, Unsubscribable } from './types';\nimport { arrRemove } from './util/arrRemove';\n\n/**\n * Represents a disposable resource, such as the execution of an Observable. A\n * Subscription has one important method, `unsubscribe`, that takes no argument\n * and just disposes the resource held by the subscription.\n *\n * Additionally, subscriptions may be grouped together through the `add()`\n * method, which will attach a child Subscription to the current Subscription.\n * When a Subscription is unsubscribed, all its children (and its grandchildren)\n * will be unsubscribed as well.\n */\nexport class Subscription implements SubscriptionLike {\n public static EMPTY = (() => {\n const empty = new Subscription();\n empty.closed = true;\n return empty;\n })();\n\n /**\n * A flag to indicate whether this Subscription has already been unsubscribed.\n */\n public closed = false;\n\n private _parentage: Subscription[] | Subscription | null = null;\n\n /**\n * The list of registered finalizers to execute upon unsubscription. Adding and removing from this\n * list occurs in the {@link #add} and {@link #remove} methods.\n */\n private _finalizers: Exclude[] | null = null;\n\n /**\n * @param initialTeardown A function executed first as part of the finalization\n * process that is kicked off when {@link #unsubscribe} is called.\n */\n constructor(private initialTeardown?: () => void) {}\n\n /**\n * Disposes the resources held by the subscription. May, for instance, cancel\n * an ongoing Observable execution or cancel any other type of work that\n * started when the Subscription was created.\n */\n unsubscribe(): void {\n let errors: any[] | undefined;\n\n if (!this.closed) {\n this.closed = true;\n\n // Remove this from it's parents.\n const { _parentage } = this;\n if (_parentage) {\n this._parentage = null;\n if (Array.isArray(_parentage)) {\n for (const parent of _parentage) {\n parent.remove(this);\n }\n } else {\n _parentage.remove(this);\n }\n }\n\n const { initialTeardown: initialFinalizer } = this;\n if (isFunction(initialFinalizer)) {\n try {\n initialFinalizer();\n } catch (e) {\n errors = e instanceof UnsubscriptionError ? e.errors : [e];\n }\n }\n\n const { _finalizers } = this;\n if (_finalizers) {\n this._finalizers = null;\n for (const finalizer of _finalizers) {\n try {\n execFinalizer(finalizer);\n } catch (err) {\n errors = errors ?? [];\n if (err instanceof UnsubscriptionError) {\n errors = [...errors, ...err.errors];\n } else {\n errors.push(err);\n }\n }\n }\n }\n\n if (errors) {\n throw new UnsubscriptionError(errors);\n }\n }\n }\n\n /**\n * Adds a finalizer to this subscription, so that finalization will be unsubscribed/called\n * when this subscription is unsubscribed. If this subscription is already {@link #closed},\n * because it has already been unsubscribed, then whatever finalizer is passed to it\n * will automatically be executed (unless the finalizer itself is also a closed subscription).\n *\n * Closed Subscriptions cannot be added as finalizers to any subscription. Adding a closed\n * subscription to a any subscription will result in no operation. (A noop).\n *\n * Adding a subscription to itself, or adding `null` or `undefined` will not perform any\n * operation at all. (A noop).\n *\n * `Subscription` instances that are added to this instance will automatically remove themselves\n * if they are unsubscribed. Functions and {@link Unsubscribable} objects that you wish to remove\n * will need to be removed manually with {@link #remove}\n *\n * @param teardown The finalization logic to add to this subscription.\n */\n add(teardown: TeardownLogic): void {\n // Only add the finalizer if it's not undefined\n // and don't add a subscription to itself.\n if (teardown && teardown !== this) {\n if (this.closed) {\n // If this subscription is already closed,\n // execute whatever finalizer is handed to it automatically.\n execFinalizer(teardown);\n } else {\n if (teardown instanceof Subscription) {\n // We don't add closed subscriptions, and we don't add the same subscription\n // twice. Subscription unsubscribe is idempotent.\n if (teardown.closed || teardown._hasParent(this)) {\n return;\n }\n teardown._addParent(this);\n }\n (this._finalizers = this._finalizers ?? []).push(teardown);\n }\n }\n }\n\n /**\n * Checks to see if a this subscription already has a particular parent.\n * This will signal that this subscription has already been added to the parent in question.\n * @param parent the parent to check for\n */\n private _hasParent(parent: Subscription) {\n const { _parentage } = this;\n return _parentage === parent || (Array.isArray(_parentage) && _parentage.includes(parent));\n }\n\n /**\n * Adds a parent to this subscription so it can be removed from the parent if it\n * unsubscribes on it's own.\n *\n * NOTE: THIS ASSUMES THAT {@link _hasParent} HAS ALREADY BEEN CHECKED.\n * @param parent The parent subscription to add\n */\n private _addParent(parent: Subscription) {\n const { _parentage } = this;\n this._parentage = Array.isArray(_parentage) ? (_parentage.push(parent), _parentage) : _parentage ? [_parentage, parent] : parent;\n }\n\n /**\n * Called on a child when it is removed via {@link #remove}.\n * @param parent The parent to remove\n */\n private _removeParent(parent: Subscription) {\n const { _parentage } = this;\n if (_parentage === parent) {\n this._parentage = null;\n } else if (Array.isArray(_parentage)) {\n arrRemove(_parentage, parent);\n }\n }\n\n /**\n * Removes a finalizer from this subscription that was previously added with the {@link #add} method.\n *\n * Note that `Subscription` instances, when unsubscribed, will automatically remove themselves\n * from every other `Subscription` they have been added to. This means that using the `remove` method\n * is not a common thing and should be used thoughtfully.\n *\n * If you add the same finalizer instance of a function or an unsubscribable object to a `Subscription` instance\n * more than once, you will need to call `remove` the same number of times to remove all instances.\n *\n * All finalizer instances are removed to free up memory upon unsubscription.\n *\n * @param teardown The finalizer to remove from this subscription\n */\n remove(teardown: Exclude): void {\n const { _finalizers } = this;\n _finalizers && arrRemove(_finalizers, teardown);\n\n if (teardown instanceof Subscription) {\n teardown._removeParent(this);\n }\n }\n}\n\nexport const EMPTY_SUBSCRIPTION = Subscription.EMPTY;\n\nexport function isSubscription(value: any): value is Subscription {\n return (\n value instanceof Subscription ||\n (value && 'closed' in value && isFunction(value.remove) && isFunction(value.add) && isFunction(value.unsubscribe))\n );\n}\n\nfunction execFinalizer(finalizer: Unsubscribable | (() => void)) {\n if (isFunction(finalizer)) {\n finalizer();\n } else {\n finalizer.unsubscribe();\n }\n}\n", "import { Subscriber } from './Subscriber';\nimport { ObservableNotification } from './types';\n\n/**\n * The {@link GlobalConfig} object for RxJS. It is used to configure things\n * like how to react on unhandled errors.\n */\nexport const config: GlobalConfig = {\n onUnhandledError: null,\n onStoppedNotification: null,\n Promise: undefined,\n useDeprecatedSynchronousErrorHandling: false,\n useDeprecatedNextContext: false,\n};\n\n/**\n * The global configuration object for RxJS, used to configure things\n * like how to react on unhandled errors. Accessible via {@link config}\n * object.\n */\nexport interface GlobalConfig {\n /**\n * A registration point for unhandled errors from RxJS. These are errors that\n * cannot were not handled by consuming code in the usual subscription path. For\n * example, if you have this configured, and you subscribe to an observable without\n * providing an error handler, errors from that subscription will end up here. This\n * will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onUnhandledError: ((err: any) => void) | null;\n\n /**\n * A registration point for notifications that cannot be sent to subscribers because they\n * have completed, errored or have been explicitly unsubscribed. By default, next, complete\n * and error notifications sent to stopped subscribers are noops. However, sometimes callers\n * might want a different behavior. For example, with sources that attempt to report errors\n * to stopped subscribers, a caller can configure RxJS to throw an unhandled error instead.\n * This will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onStoppedNotification: ((notification: ObservableNotification, subscriber: Subscriber) => void) | null;\n\n /**\n * The promise constructor used by default for {@link Observable#toPromise toPromise} and {@link Observable#forEach forEach}\n * methods.\n *\n * @deprecated As of version 8, RxJS will no longer support this sort of injection of a\n * Promise constructor. If you need a Promise implementation other than native promises,\n * please polyfill/patch Promise as you see appropriate. Will be removed in v8.\n */\n Promise?: PromiseConstructorLike;\n\n /**\n * If true, turns on synchronous error rethrowing, which is a deprecated behavior\n * in v6 and higher. This behavior enables bad patterns like wrapping a subscribe\n * call in a try/catch block. It also enables producer interference, a nasty bug\n * where a multicast can be broken for all observers by a downstream consumer with\n * an unhandled error. DO NOT USE THIS FLAG UNLESS IT'S NEEDED TO BUY TIME\n * FOR MIGRATION REASONS.\n *\n * @deprecated As of version 8, RxJS will no longer support synchronous throwing\n * of unhandled errors. All errors will be thrown on a separate call stack to prevent bad\n * behaviors described above. Will be removed in v8.\n */\n useDeprecatedSynchronousErrorHandling: boolean;\n\n /**\n * If true, enables an as-of-yet undocumented feature from v5: The ability to access\n * `unsubscribe()` via `this` context in `next` functions created in observers passed\n * to `subscribe`.\n *\n * This is being removed because the performance was severely problematic, and it could also cause\n * issues when types other than POJOs are passed to subscribe as subscribers, as they will likely have\n * their `this` context overwritten.\n *\n * @deprecated As of version 8, RxJS will no longer support altering the\n * context of next functions provided as part of an observer to Subscribe. Instead,\n * you will have access to a subscription or a signal or token that will allow you to do things like\n * unsubscribe and test closed status. Will be removed in v8.\n */\n useDeprecatedNextContext: boolean;\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetTimeoutFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearTimeoutFunction = (handle: TimerHandle) => void;\n\ninterface TimeoutProvider {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n delegate:\n | {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n }\n | undefined;\n}\n\nexport const timeoutProvider: TimeoutProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setTimeout(handler: () => void, timeout?: number, ...args) {\n const { delegate } = timeoutProvider;\n if (delegate?.setTimeout) {\n return delegate.setTimeout(handler, timeout, ...args);\n }\n return setTimeout(handler, timeout, ...args);\n },\n clearTimeout(handle) {\n const { delegate } = timeoutProvider;\n return (delegate?.clearTimeout || clearTimeout)(handle as any);\n },\n delegate: undefined,\n};\n", "import { config } from '../config';\nimport { timeoutProvider } from '../scheduler/timeoutProvider';\n\n/**\n * Handles an error on another job either with the user-configured {@link onUnhandledError},\n * or by throwing it on that new job so it can be picked up by `window.onerror`, `process.on('error')`, etc.\n *\n * This should be called whenever there is an error that is out-of-band with the subscription\n * or when an error hits a terminal boundary of the subscription and no error handler was provided.\n *\n * @param err the error to report\n */\nexport function reportUnhandledError(err: any) {\n timeoutProvider.setTimeout(() => {\n const { onUnhandledError } = config;\n if (onUnhandledError) {\n // Execute the user-configured error handler.\n onUnhandledError(err);\n } else {\n // Throw so it is picked up by the runtime's uncaught error mechanism.\n throw err;\n }\n });\n}\n", "/* tslint:disable:no-empty */\nexport function noop() { }\n", "import { CompleteNotification, NextNotification, ErrorNotification } from './types';\n\n/**\n * A completion object optimized for memory use and created to be the\n * same \"shape\" as other notifications in v8.\n * @internal\n */\nexport const COMPLETE_NOTIFICATION = (() => createNotification('C', undefined, undefined) as CompleteNotification)();\n\n/**\n * Internal use only. Creates an optimized error notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function errorNotification(error: any): ErrorNotification {\n return createNotification('E', undefined, error) as any;\n}\n\n/**\n * Internal use only. Creates an optimized next notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function nextNotification(value: T) {\n return createNotification('N', value, undefined) as NextNotification;\n}\n\n/**\n * Ensures that all notifications created internally have the same \"shape\" in v8.\n *\n * TODO: This is only exported to support a crazy legacy test in `groupBy`.\n * @internal\n */\nexport function createNotification(kind: 'N' | 'E' | 'C', value: any, error: any) {\n return {\n kind,\n value,\n error,\n };\n}\n", "import { config } from '../config';\n\nlet context: { errorThrown: boolean; error: any } | null = null;\n\n/**\n * Handles dealing with errors for super-gross mode. Creates a context, in which\n * any synchronously thrown errors will be passed to {@link captureError}. Which\n * will record the error such that it will be rethrown after the call back is complete.\n * TODO: Remove in v8\n * @param cb An immediately executed function.\n */\nexport function errorContext(cb: () => void) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n const isRoot = !context;\n if (isRoot) {\n context = { errorThrown: false, error: null };\n }\n cb();\n if (isRoot) {\n const { errorThrown, error } = context!;\n context = null;\n if (errorThrown) {\n throw error;\n }\n }\n } else {\n // This is the general non-deprecated path for everyone that\n // isn't crazy enough to use super-gross mode (useDeprecatedSynchronousErrorHandling)\n cb();\n }\n}\n\n/**\n * Captures errors only in super-gross mode.\n * @param err the error to capture\n */\nexport function captureError(err: any) {\n if (config.useDeprecatedSynchronousErrorHandling && context) {\n context.errorThrown = true;\n context.error = err;\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { Observer, ObservableNotification } from './types';\nimport { isSubscription, Subscription } from './Subscription';\nimport { config } from './config';\nimport { reportUnhandledError } from './util/reportUnhandledError';\nimport { noop } from './util/noop';\nimport { nextNotification, errorNotification, COMPLETE_NOTIFICATION } from './NotificationFactories';\nimport { timeoutProvider } from './scheduler/timeoutProvider';\nimport { captureError } from './util/errorContext';\n\n/**\n * Implements the {@link Observer} interface and extends the\n * {@link Subscription} class. While the {@link Observer} is the public API for\n * consuming the values of an {@link Observable}, all Observers get converted to\n * a Subscriber, in order to provide Subscription-like capabilities such as\n * `unsubscribe`. Subscriber is a common type in RxJS, and crucial for\n * implementing operators, but it is rarely used as a public API.\n */\nexport class Subscriber extends Subscription implements Observer {\n /**\n * A static factory for a Subscriber, given a (potentially partial) definition\n * of an Observer.\n * @param next The `next` callback of an Observer.\n * @param error The `error` callback of an\n * Observer.\n * @param complete The `complete` callback of an\n * Observer.\n * @return A Subscriber wrapping the (partially defined)\n * Observer represented by the given arguments.\n * @deprecated Do not use. Will be removed in v8. There is no replacement for this\n * method, and there is no reason to be creating instances of `Subscriber` directly.\n * If you have a specific use case, please file an issue.\n */\n static create(next?: (x?: T) => void, error?: (e?: any) => void, complete?: () => void): Subscriber {\n return new SafeSubscriber(next, error, complete);\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected isStopped: boolean = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected destination: Subscriber | Observer; // this `any` is the escape hatch to erase extra type param (e.g. R)\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * There is no reason to directly create an instance of Subscriber. This type is exported for typings reasons.\n */\n constructor(destination?: Subscriber | Observer) {\n super();\n if (destination) {\n this.destination = destination;\n // Automatically chain subscriptions together here.\n // if destination is a Subscription, then it is a Subscriber.\n if (isSubscription(destination)) {\n destination.add(this);\n }\n } else {\n this.destination = EMPTY_OBSERVER;\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `next` from\n * the Observable, with a value. The Observable may call this method 0 or more\n * times.\n * @param value The `next` value.\n */\n next(value: T): void {\n if (this.isStopped) {\n handleStoppedNotification(nextNotification(value), this);\n } else {\n this._next(value!);\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `error` from\n * the Observable, with an attached `Error`. Notifies the Observer that\n * the Observable has experienced an error condition.\n * @param err The `error` exception.\n */\n error(err?: any): void {\n if (this.isStopped) {\n handleStoppedNotification(errorNotification(err), this);\n } else {\n this.isStopped = true;\n this._error(err);\n }\n }\n\n /**\n * The {@link Observer} callback to receive a valueless notification of type\n * `complete` from the Observable. Notifies the Observer that the Observable\n * has finished sending push-based notifications.\n */\n complete(): void {\n if (this.isStopped) {\n handleStoppedNotification(COMPLETE_NOTIFICATION, this);\n } else {\n this.isStopped = true;\n this._complete();\n }\n }\n\n unsubscribe(): void {\n if (!this.closed) {\n this.isStopped = true;\n super.unsubscribe();\n this.destination = null!;\n }\n }\n\n protected _next(value: T): void {\n this.destination.next(value);\n }\n\n protected _error(err: any): void {\n try {\n this.destination.error(err);\n } finally {\n this.unsubscribe();\n }\n }\n\n protected _complete(): void {\n try {\n this.destination.complete();\n } finally {\n this.unsubscribe();\n }\n }\n}\n\n/**\n * This bind is captured here because we want to be able to have\n * compatibility with monoid libraries that tend to use a method named\n * `bind`. In particular, a library called Monio requires this.\n */\nconst _bind = Function.prototype.bind;\n\nfunction bind any>(fn: Fn, thisArg: any): Fn {\n return _bind.call(fn, thisArg);\n}\n\n/**\n * Internal optimization only, DO NOT EXPOSE.\n * @internal\n */\nclass ConsumerObserver implements Observer {\n constructor(private partialObserver: Partial>) {}\n\n next(value: T): void {\n const { partialObserver } = this;\n if (partialObserver.next) {\n try {\n partialObserver.next(value);\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n\n error(err: any): void {\n const { partialObserver } = this;\n if (partialObserver.error) {\n try {\n partialObserver.error(err);\n } catch (error) {\n handleUnhandledError(error);\n }\n } else {\n handleUnhandledError(err);\n }\n }\n\n complete(): void {\n const { partialObserver } = this;\n if (partialObserver.complete) {\n try {\n partialObserver.complete();\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n}\n\nexport class SafeSubscriber extends Subscriber {\n constructor(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((e?: any) => void) | null,\n complete?: (() => void) | null\n ) {\n super();\n\n let partialObserver: Partial>;\n if (isFunction(observerOrNext) || !observerOrNext) {\n // The first argument is a function, not an observer. The next\n // two arguments *could* be observers, or they could be empty.\n partialObserver = {\n next: (observerOrNext ?? undefined) as ((value: T) => void) | undefined,\n error: error ?? undefined,\n complete: complete ?? undefined,\n };\n } else {\n // The first argument is a partial observer.\n let context: any;\n if (this && config.useDeprecatedNextContext) {\n // This is a deprecated path that made `this.unsubscribe()` available in\n // next handler functions passed to subscribe. This only exists behind a flag\n // now, as it is *very* slow.\n context = Object.create(observerOrNext);\n context.unsubscribe = () => this.unsubscribe();\n partialObserver = {\n next: observerOrNext.next && bind(observerOrNext.next, context),\n error: observerOrNext.error && bind(observerOrNext.error, context),\n complete: observerOrNext.complete && bind(observerOrNext.complete, context),\n };\n } else {\n // The \"normal\" path. Just use the partial observer directly.\n partialObserver = observerOrNext;\n }\n }\n\n // Wrap the partial observer to ensure it's a full observer, and\n // make sure proper error handling is accounted for.\n this.destination = new ConsumerObserver(partialObserver);\n }\n}\n\nfunction handleUnhandledError(error: any) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n captureError(error);\n } else {\n // Ideal path, we report this as an unhandled error,\n // which is thrown on a new call stack.\n reportUnhandledError(error);\n }\n}\n\n/**\n * An error handler used when no error handler was supplied\n * to the SafeSubscriber -- meaning no error handler was supplied\n * do the `subscribe` call on our observable.\n * @param err The error to handle\n */\nfunction defaultErrorHandler(err: any) {\n throw err;\n}\n\n/**\n * A handler for notifications that cannot be sent to a stopped subscriber.\n * @param notification The notification being sent.\n * @param subscriber The stopped subscriber.\n */\nfunction handleStoppedNotification(notification: ObservableNotification, subscriber: Subscriber) {\n const { onStoppedNotification } = config;\n onStoppedNotification && timeoutProvider.setTimeout(() => onStoppedNotification(notification, subscriber));\n}\n\n/**\n * The observer used as a stub for subscriptions where the user did not\n * pass any arguments to `subscribe`. Comes with the default error handling\n * behavior.\n */\nexport const EMPTY_OBSERVER: Readonly> & { closed: true } = {\n closed: true,\n next: noop,\n error: defaultErrorHandler,\n complete: noop,\n};\n", "/**\n * Symbol.observable or a string \"@@observable\". Used for interop\n *\n * @deprecated We will no longer be exporting this symbol in upcoming versions of RxJS.\n * Instead polyfill and use Symbol.observable directly *or* use https://www.npmjs.com/package/symbol-observable\n */\nexport const observable: string | symbol = (() => (typeof Symbol === 'function' && Symbol.observable) || '@@observable')();\n", "/**\n * This function takes one parameter and just returns it. Simply put,\n * this is like `(x: T): T => x`.\n *\n * ## Examples\n *\n * This is useful in some cases when using things like `mergeMap`\n *\n * ```ts\n * import { interval, take, map, range, mergeMap, identity } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(5));\n *\n * const result$ = source$.pipe(\n * map(i => range(i)),\n * mergeMap(identity) // same as mergeMap(x => x)\n * );\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * Or when you want to selectively apply an operator\n *\n * ```ts\n * import { interval, take, identity } from 'rxjs';\n *\n * const shouldLimit = () => Math.random() < 0.5;\n *\n * const source$ = interval(1000);\n *\n * const result$ = source$.pipe(shouldLimit() ? take(5) : identity);\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * @param x Any value that is returned by this function\n * @returns The value passed as the first parameter to this function\n */\nexport function identity(x: T): T {\n return x;\n}\n", "import { identity } from './identity';\nimport { UnaryFunction } from '../types';\n\nexport function pipe(): typeof identity;\nexport function pipe(fn1: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction, fn3: UnaryFunction): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction,\n ...fns: UnaryFunction[]\n): UnaryFunction;\n\n/**\n * pipe() can be called on one or more functions, each of which can take one argument (\"UnaryFunction\")\n * and uses it to return a value.\n * It returns a function that takes one argument, passes it to the first UnaryFunction, and then\n * passes the result to the next one, passes that result to the next one, and so on. \n */\nexport function pipe(...fns: Array>): UnaryFunction {\n return pipeFromArray(fns);\n}\n\n/** @internal */\nexport function pipeFromArray(fns: Array>): UnaryFunction {\n if (fns.length === 0) {\n return identity as UnaryFunction;\n }\n\n if (fns.length === 1) {\n return fns[0];\n }\n\n return function piped(input: T): R {\n return fns.reduce((prev: any, fn: UnaryFunction) => fn(prev), input as any);\n };\n}\n", "import { Operator } from './Operator';\nimport { SafeSubscriber, Subscriber } from './Subscriber';\nimport { isSubscription, Subscription } from './Subscription';\nimport { TeardownLogic, OperatorFunction, Subscribable, Observer } from './types';\nimport { observable as Symbol_observable } from './symbol/observable';\nimport { pipeFromArray } from './util/pipe';\nimport { config } from './config';\nimport { isFunction } from './util/isFunction';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A representation of any set of values over any amount of time. This is the most basic building block\n * of RxJS.\n */\nexport class Observable implements Subscribable {\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n source: Observable | undefined;\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n operator: Operator | undefined;\n\n /**\n * @param subscribe The function that is called when the Observable is\n * initially subscribed to. This function is given a Subscriber, to which new values\n * can be `next`ed, or an `error` method can be called to raise an error, or\n * `complete` can be called to notify of a successful completion.\n */\n constructor(subscribe?: (this: Observable, subscriber: Subscriber) => TeardownLogic) {\n if (subscribe) {\n this._subscribe = subscribe;\n }\n }\n\n // HACK: Since TypeScript inherits static properties too, we have to\n // fight against TypeScript here so Subject can have a different static create signature\n /**\n * Creates a new Observable by calling the Observable constructor\n * @param subscribe the subscriber function to be passed to the Observable constructor\n * @return A new observable.\n * @deprecated Use `new Observable()` instead. Will be removed in v8.\n */\n static create: (...args: any[]) => any = (subscribe?: (subscriber: Subscriber) => TeardownLogic) => {\n return new Observable(subscribe);\n };\n\n /**\n * Creates a new Observable, with this Observable instance as the source, and the passed\n * operator defined as the new observable's operator.\n * @param operator the operator defining the operation to take on the observable\n * @return A new observable with the Operator applied.\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * If you have implemented an operator using `lift`, it is recommended that you create an\n * operator by simply returning `new Observable()` directly. See \"Creating new operators from\n * scratch\" section here: https://rxjs.dev/guide/operators\n */\n lift(operator?: Operator): Observable {\n const observable = new Observable();\n observable.source = this;\n observable.operator = operator;\n return observable;\n }\n\n subscribe(observerOrNext?: Partial> | ((value: T) => void)): Subscription;\n /** @deprecated Instead of passing separate callback arguments, use an observer argument. Signatures taking separate callback arguments will be removed in v8. Details: https://rxjs.dev/deprecations/subscribe-arguments */\n subscribe(next?: ((value: T) => void) | null, error?: ((error: any) => void) | null, complete?: (() => void) | null): Subscription;\n /**\n * Invokes an execution of an Observable and registers Observer handlers for notifications it will emit.\n *\n * Use it when you have all these Observables, but still nothing is happening.\n *\n * `subscribe` is not a regular operator, but a method that calls Observable's internal `subscribe` function. It\n * might be for example a function that you passed to Observable's constructor, but most of the time it is\n * a library implementation, which defines what will be emitted by an Observable, and when it be will emitted. This means\n * that calling `subscribe` is actually the moment when Observable starts its work, not when it is created, as it is often\n * the thought.\n *\n * Apart from starting the execution of an Observable, this method allows you to listen for values\n * that an Observable emits, as well as for when it completes or errors. You can achieve this in two\n * of the following ways.\n *\n * The first way is creating an object that implements {@link Observer} interface. It should have methods\n * defined by that interface, but note that it should be just a regular JavaScript object, which you can create\n * yourself in any way you want (ES6 class, classic function constructor, object literal etc.). In particular, do\n * not attempt to use any RxJS implementation details to create Observers - you don't need them. Remember also\n * that your object does not have to implement all methods. If you find yourself creating a method that doesn't\n * do anything, you can simply omit it. Note however, if the `error` method is not provided and an error happens,\n * it will be thrown asynchronously. Errors thrown asynchronously cannot be caught using `try`/`catch`. Instead,\n * use the {@link onUnhandledError} configuration option or use a runtime handler (like `window.onerror` or\n * `process.on('error)`) to be notified of unhandled errors. Because of this, it's recommended that you provide\n * an `error` method to avoid missing thrown errors.\n *\n * The second way is to give up on Observer object altogether and simply provide callback functions in place of its methods.\n * This means you can provide three functions as arguments to `subscribe`, where the first function is equivalent\n * of a `next` method, the second of an `error` method and the third of a `complete` method. Just as in case of an Observer,\n * if you do not need to listen for something, you can omit a function by passing `undefined` or `null`,\n * since `subscribe` recognizes these functions by where they were placed in function call. When it comes\n * to the `error` function, as with an Observer, if not provided, errors emitted by an Observable will be thrown asynchronously.\n *\n * You can, however, subscribe with no parameters at all. This may be the case where you're not interested in terminal events\n * and you also handled emissions internally by using operators (e.g. using `tap`).\n *\n * Whichever style of calling `subscribe` you use, in both cases it returns a Subscription object.\n * This object allows you to call `unsubscribe` on it, which in turn will stop the work that an Observable does and will clean\n * up all resources that an Observable used. Note that cancelling a subscription will not call `complete` callback\n * provided to `subscribe` function, which is reserved for a regular completion signal that comes from an Observable.\n *\n * Remember that callbacks provided to `subscribe` are not guaranteed to be called asynchronously.\n * It is an Observable itself that decides when these functions will be called. For example {@link of}\n * by default emits all its values synchronously. Always check documentation for how given Observable\n * will behave when subscribed and if its default behavior can be modified with a `scheduler`.\n *\n * #### Examples\n *\n * Subscribe with an {@link guide/observer Observer}\n *\n * ```ts\n * import { of } from 'rxjs';\n *\n * const sumObserver = {\n * sum: 0,\n * next(value) {\n * console.log('Adding: ' + value);\n * this.sum = this.sum + value;\n * },\n * error() {\n * // We actually could just remove this method,\n * // since we do not really care about errors right now.\n * },\n * complete() {\n * console.log('Sum equals: ' + this.sum);\n * }\n * };\n *\n * of(1, 2, 3) // Synchronously emits 1, 2, 3 and then completes.\n * .subscribe(sumObserver);\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Subscribe with functions ({@link deprecations/subscribe-arguments deprecated})\n *\n * ```ts\n * import { of } from 'rxjs'\n *\n * let sum = 0;\n *\n * of(1, 2, 3).subscribe(\n * value => {\n * console.log('Adding: ' + value);\n * sum = sum + value;\n * },\n * undefined,\n * () => console.log('Sum equals: ' + sum)\n * );\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Cancel a subscription\n *\n * ```ts\n * import { interval } from 'rxjs';\n *\n * const subscription = interval(1000).subscribe({\n * next(num) {\n * console.log(num)\n * },\n * complete() {\n * // Will not be called, even when cancelling subscription.\n * console.log('completed!');\n * }\n * });\n *\n * setTimeout(() => {\n * subscription.unsubscribe();\n * console.log('unsubscribed!');\n * }, 2500);\n *\n * // Logs:\n * // 0 after 1s\n * // 1 after 2s\n * // 'unsubscribed!' after 2.5s\n * ```\n *\n * @param observerOrNext Either an {@link Observer} with some or all callback methods,\n * or the `next` handler that is called for each value emitted from the subscribed Observable.\n * @param error A handler for a terminal event resulting from an error. If no error handler is provided,\n * the error will be thrown asynchronously as unhandled.\n * @param complete A handler for a terminal event resulting from successful completion.\n * @return A subscription reference to the registered handlers.\n */\n subscribe(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((error: any) => void) | null,\n complete?: (() => void) | null\n ): Subscription {\n const subscriber = isSubscriber(observerOrNext) ? observerOrNext : new SafeSubscriber(observerOrNext, error, complete);\n\n errorContext(() => {\n const { operator, source } = this;\n subscriber.add(\n operator\n ? // We're dealing with a subscription in the\n // operator chain to one of our lifted operators.\n operator.call(subscriber, source)\n : source\n ? // If `source` has a value, but `operator` does not, something that\n // had intimate knowledge of our API, like our `Subject`, must have\n // set it. We're going to just call `_subscribe` directly.\n this._subscribe(subscriber)\n : // In all other cases, we're likely wrapping a user-provided initializer\n // function, so we need to catch errors and handle them appropriately.\n this._trySubscribe(subscriber)\n );\n });\n\n return subscriber;\n }\n\n /** @internal */\n protected _trySubscribe(sink: Subscriber): TeardownLogic {\n try {\n return this._subscribe(sink);\n } catch (err) {\n // We don't need to return anything in this case,\n // because it's just going to try to `add()` to a subscription\n // above.\n sink.error(err);\n }\n }\n\n /**\n * Used as a NON-CANCELLABLE means of subscribing to an observable, for use with\n * APIs that expect promises, like `async/await`. You cannot unsubscribe from this.\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * #### Example\n *\n * ```ts\n * import { interval, take } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(4));\n *\n * async function getTotal() {\n * let total = 0;\n *\n * await source$.forEach(value => {\n * total += value;\n * console.log('observable -> ' + value);\n * });\n *\n * return total;\n * }\n *\n * getTotal().then(\n * total => console.log('Total: ' + total)\n * );\n *\n * // Expected:\n * // 'observable -> 0'\n * // 'observable -> 1'\n * // 'observable -> 2'\n * // 'observable -> 3'\n * // 'Total: 6'\n * ```\n *\n * @param next A handler for each value emitted by the observable.\n * @return A promise that either resolves on observable completion or\n * rejects with the handled error.\n */\n forEach(next: (value: T) => void): Promise;\n\n /**\n * @param next a handler for each value emitted by the observable\n * @param promiseCtor a constructor function used to instantiate the Promise\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n * @deprecated Passing a Promise constructor will no longer be available\n * in upcoming versions of RxJS. This is because it adds weight to the library, for very\n * little benefit. If you need this functionality, it is recommended that you either\n * polyfill Promise, or you create an adapter to convert the returned native promise\n * to whatever promise implementation you wanted. Will be removed in v8.\n */\n forEach(next: (value: T) => void, promiseCtor: PromiseConstructorLike): Promise;\n\n forEach(next: (value: T) => void, promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n const subscriber = new SafeSubscriber({\n next: (value) => {\n try {\n next(value);\n } catch (err) {\n reject(err);\n subscriber.unsubscribe();\n }\n },\n error: reject,\n complete: resolve,\n });\n this.subscribe(subscriber);\n }) as Promise;\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): TeardownLogic {\n return this.source?.subscribe(subscriber);\n }\n\n /**\n * An interop point defined by the es7-observable spec https://github.com/zenparsing/es-observable\n * @return This instance of the observable.\n */\n [Symbol_observable]() {\n return this;\n }\n\n /* tslint:disable:max-line-length */\n pipe(): Observable;\n pipe(op1: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction, op3: OperatorFunction): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction,\n ...operations: OperatorFunction[]\n ): Observable;\n /* tslint:enable:max-line-length */\n\n /**\n * Used to stitch together functional operators into a chain.\n *\n * ## Example\n *\n * ```ts\n * import { interval, filter, map, scan } from 'rxjs';\n *\n * interval(1000)\n * .pipe(\n * filter(x => x % 2 === 0),\n * map(x => x + x),\n * scan((acc, x) => acc + x)\n * )\n * .subscribe(x => console.log(x));\n * ```\n *\n * @return The Observable result of all the operators having been called\n * in the order they were passed in.\n */\n pipe(...operations: OperatorFunction[]): Observable {\n return pipeFromArray(operations)(this);\n }\n\n /* tslint:disable:max-line-length */\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: typeof Promise): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: PromiseConstructorLike): Promise;\n /* tslint:enable:max-line-length */\n\n /**\n * Subscribe to this Observable and get a Promise resolving on\n * `complete` with the last emission (if any).\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * @param [promiseCtor] a constructor function used to instantiate\n * the Promise\n * @return A Promise that resolves with the last value emit, or\n * rejects on an error. If there were no emissions, Promise\n * resolves with undefined.\n * @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise\n */\n toPromise(promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n let value: T | undefined;\n this.subscribe(\n (x: T) => (value = x),\n (err: any) => reject(err),\n () => resolve(value)\n );\n }) as Promise;\n }\n}\n\n/**\n * Decides between a passed promise constructor from consuming code,\n * A default configured promise constructor, and the native promise\n * constructor and returns it. If nothing can be found, it will throw\n * an error.\n * @param promiseCtor The optional promise constructor to passed by consuming code\n */\nfunction getPromiseCtor(promiseCtor: PromiseConstructorLike | undefined) {\n return promiseCtor ?? config.Promise ?? Promise;\n}\n\nfunction isObserver(value: any): value is Observer {\n return value && isFunction(value.next) && isFunction(value.error) && isFunction(value.complete);\n}\n\nfunction isSubscriber(value: any): value is Subscriber {\n return (value && value instanceof Subscriber) || (isObserver(value) && isSubscription(value));\n}\n", "import { Observable } from '../Observable';\nimport { Subscriber } from '../Subscriber';\nimport { OperatorFunction } from '../types';\nimport { isFunction } from './isFunction';\n\n/**\n * Used to determine if an object is an Observable with a lift function.\n */\nexport function hasLift(source: any): source is { lift: InstanceType['lift'] } {\n return isFunction(source?.lift);\n}\n\n/**\n * Creates an `OperatorFunction`. Used to define operators throughout the library in a concise way.\n * @param init The logic to connect the liftedSource to the subscriber at the moment of subscription.\n */\nexport function operate(\n init: (liftedSource: Observable, subscriber: Subscriber) => (() => void) | void\n): OperatorFunction {\n return (source: Observable) => {\n if (hasLift(source)) {\n return source.lift(function (this: Subscriber, liftedSource: Observable) {\n try {\n return init(liftedSource, this);\n } catch (err) {\n this.error(err);\n }\n });\n }\n throw new TypeError('Unable to lift unknown Observable type');\n };\n}\n", "import { Subscriber } from '../Subscriber';\n\n/**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional teardown logic here. This will only be called on teardown if the\n * subscriber itself is not already closed. This is called after all other teardown logic is executed.\n */\nexport function createOperatorSubscriber(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n onFinalize?: () => void\n): Subscriber {\n return new OperatorSubscriber(destination, onNext, onComplete, onError, onFinalize);\n}\n\n/**\n * A generic helper for allowing operators to be created with a Subscriber and\n * use closures to capture necessary state from the operator function itself.\n */\nexport class OperatorSubscriber extends Subscriber {\n /**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional finalization logic here. This will only be called on finalization if the\n * subscriber itself is not already closed. This is called after all other finalization logic is executed.\n * @param shouldUnsubscribe An optional check to see if an unsubscribe call should truly unsubscribe.\n * NOTE: This currently **ONLY** exists to support the strange behavior of {@link groupBy}, where unsubscription\n * to the resulting observable does not actually disconnect from the source if there are active subscriptions\n * to any grouped observable. (DO NOT EXPOSE OR USE EXTERNALLY!!!)\n */\n constructor(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n private onFinalize?: () => void,\n private shouldUnsubscribe?: () => boolean\n ) {\n // It's important - for performance reasons - that all of this class's\n // members are initialized and that they are always initialized in the same\n // order. This will ensure that all OperatorSubscriber instances have the\n // same hidden class in V8. This, in turn, will help keep the number of\n // hidden classes involved in property accesses within the base class as\n // low as possible. If the number of hidden classes involved exceeds four,\n // the property accesses will become megamorphic and performance penalties\n // will be incurred - i.e. inline caches won't be used.\n //\n // The reasons for ensuring all instances have the same hidden class are\n // further discussed in this blog post from Benedikt Meurer:\n // https://benediktmeurer.de/2018/03/23/impact-of-polymorphism-on-component-based-frameworks-like-react/\n super(destination);\n this._next = onNext\n ? function (this: OperatorSubscriber, value: T) {\n try {\n onNext(value);\n } catch (err) {\n destination.error(err);\n }\n }\n : super._next;\n this._error = onError\n ? function (this: OperatorSubscriber, err: any) {\n try {\n onError(err);\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._error;\n this._complete = onComplete\n ? function (this: OperatorSubscriber) {\n try {\n onComplete();\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._complete;\n }\n\n unsubscribe() {\n if (!this.shouldUnsubscribe || this.shouldUnsubscribe()) {\n const { closed } = this;\n super.unsubscribe();\n // Execute additional teardown if we have any and we didn't already do so.\n !closed && this.onFinalize?.();\n }\n }\n}\n", "import { Subscription } from '../Subscription';\n\ninterface AnimationFrameProvider {\n schedule(callback: FrameRequestCallback): Subscription;\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n delegate:\n | {\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n }\n | undefined;\n}\n\nexport const animationFrameProvider: AnimationFrameProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n schedule(callback) {\n let request = requestAnimationFrame;\n let cancel: typeof cancelAnimationFrame | undefined = cancelAnimationFrame;\n const { delegate } = animationFrameProvider;\n if (delegate) {\n request = delegate.requestAnimationFrame;\n cancel = delegate.cancelAnimationFrame;\n }\n const handle = request((timestamp) => {\n // Clear the cancel function. The request has been fulfilled, so\n // attempting to cancel the request upon unsubscription would be\n // pointless.\n cancel = undefined;\n callback(timestamp);\n });\n return new Subscription(() => cancel?.(handle));\n },\n requestAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.requestAnimationFrame || requestAnimationFrame)(...args);\n },\n cancelAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.cancelAnimationFrame || cancelAnimationFrame)(...args);\n },\n delegate: undefined,\n};\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface ObjectUnsubscribedError extends Error {}\n\nexport interface ObjectUnsubscribedErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (): ObjectUnsubscribedError;\n}\n\n/**\n * An error thrown when an action is invalid because the object has been\n * unsubscribed.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n *\n * @class ObjectUnsubscribedError\n */\nexport const ObjectUnsubscribedError: ObjectUnsubscribedErrorCtor = createErrorClass(\n (_super) =>\n function ObjectUnsubscribedErrorImpl(this: any) {\n _super(this);\n this.name = 'ObjectUnsubscribedError';\n this.message = 'object unsubscribed';\n }\n);\n", "import { Operator } from './Operator';\nimport { Observable } from './Observable';\nimport { Subscriber } from './Subscriber';\nimport { Subscription, EMPTY_SUBSCRIPTION } from './Subscription';\nimport { Observer, SubscriptionLike, TeardownLogic } from './types';\nimport { ObjectUnsubscribedError } from './util/ObjectUnsubscribedError';\nimport { arrRemove } from './util/arrRemove';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A Subject is a special type of Observable that allows values to be\n * multicasted to many Observers. Subjects are like EventEmitters.\n *\n * Every Subject is an Observable and an Observer. You can subscribe to a\n * Subject, and you can call next to feed values as well as error and complete.\n */\nexport class Subject extends Observable implements SubscriptionLike {\n closed = false;\n\n private currentObservers: Observer[] | null = null;\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n observers: Observer[] = [];\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n isStopped = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n hasError = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n thrownError: any = null;\n\n /**\n * Creates a \"subject\" by basically gluing an observer to an observable.\n *\n * @deprecated Recommended you do not use. Will be removed at some point in the future. Plans for replacement still under discussion.\n */\n static create: (...args: any[]) => any = (destination: Observer, source: Observable): AnonymousSubject => {\n return new AnonymousSubject(destination, source);\n };\n\n constructor() {\n // NOTE: This must be here to obscure Observable's constructor.\n super();\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n lift(operator: Operator): Observable {\n const subject = new AnonymousSubject(this, this);\n subject.operator = operator as any;\n return subject as any;\n }\n\n /** @internal */\n protected _throwIfClosed() {\n if (this.closed) {\n throw new ObjectUnsubscribedError();\n }\n }\n\n next(value: T) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n if (!this.currentObservers) {\n this.currentObservers = Array.from(this.observers);\n }\n for (const observer of this.currentObservers) {\n observer.next(value);\n }\n }\n });\n }\n\n error(err: any) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.hasError = this.isStopped = true;\n this.thrownError = err;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.error(err);\n }\n }\n });\n }\n\n complete() {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.isStopped = true;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.complete();\n }\n }\n });\n }\n\n unsubscribe() {\n this.isStopped = this.closed = true;\n this.observers = this.currentObservers = null!;\n }\n\n get observed() {\n return this.observers?.length > 0;\n }\n\n /** @internal */\n protected _trySubscribe(subscriber: Subscriber): TeardownLogic {\n this._throwIfClosed();\n return super._trySubscribe(subscriber);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._checkFinalizedStatuses(subscriber);\n return this._innerSubscribe(subscriber);\n }\n\n /** @internal */\n protected _innerSubscribe(subscriber: Subscriber) {\n const { hasError, isStopped, observers } = this;\n if (hasError || isStopped) {\n return EMPTY_SUBSCRIPTION;\n }\n this.currentObservers = null;\n observers.push(subscriber);\n return new Subscription(() => {\n this.currentObservers = null;\n arrRemove(observers, subscriber);\n });\n }\n\n /** @internal */\n protected _checkFinalizedStatuses(subscriber: Subscriber) {\n const { hasError, thrownError, isStopped } = this;\n if (hasError) {\n subscriber.error(thrownError);\n } else if (isStopped) {\n subscriber.complete();\n }\n }\n\n /**\n * Creates a new Observable with this Subject as the source. You can do this\n * to create custom Observer-side logic of the Subject and conceal it from\n * code that uses the Observable.\n * @return Observable that this Subject casts to.\n */\n asObservable(): Observable {\n const observable: any = new Observable();\n observable.source = this;\n return observable;\n }\n}\n\nexport class AnonymousSubject extends Subject {\n constructor(\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n public destination?: Observer,\n source?: Observable\n ) {\n super();\n this.source = source;\n }\n\n next(value: T) {\n this.destination?.next?.(value);\n }\n\n error(err: any) {\n this.destination?.error?.(err);\n }\n\n complete() {\n this.destination?.complete?.();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n return this.source?.subscribe(subscriber) ?? EMPTY_SUBSCRIPTION;\n }\n}\n", "import { Subject } from './Subject';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\n\n/**\n * A variant of Subject that requires an initial value and emits its current\n * value whenever it is subscribed to.\n */\nexport class BehaviorSubject extends Subject {\n constructor(private _value: T) {\n super();\n }\n\n get value(): T {\n return this.getValue();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n const subscription = super._subscribe(subscriber);\n !subscription.closed && subscriber.next(this._value);\n return subscription;\n }\n\n getValue(): T {\n const { hasError, thrownError, _value } = this;\n if (hasError) {\n throw thrownError;\n }\n this._throwIfClosed();\n return _value;\n }\n\n next(value: T): void {\n super.next((this._value = value));\n }\n}\n", "import { TimestampProvider } from '../types';\n\ninterface DateTimestampProvider extends TimestampProvider {\n delegate: TimestampProvider | undefined;\n}\n\nexport const dateTimestampProvider: DateTimestampProvider = {\n now() {\n // Use the variable rather than `this` so that the function can be called\n // without being bound to the provider.\n return (dateTimestampProvider.delegate || Date).now();\n },\n delegate: undefined,\n};\n", "import { Subject } from './Subject';\nimport { TimestampProvider } from './types';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * A variant of {@link Subject} that \"replays\" old values to new subscribers by emitting them when they first subscribe.\n *\n * `ReplaySubject` has an internal buffer that will store a specified number of values that it has observed. Like `Subject`,\n * `ReplaySubject` \"observes\" values by having them passed to its `next` method. When it observes a value, it will store that\n * value for a time determined by the configuration of the `ReplaySubject`, as passed to its constructor.\n *\n * When a new subscriber subscribes to the `ReplaySubject` instance, it will synchronously emit all values in its buffer in\n * a First-In-First-Out (FIFO) manner. The `ReplaySubject` will also complete, if it has observed completion; and it will\n * error if it has observed an error.\n *\n * There are two main configuration items to be concerned with:\n *\n * 1. `bufferSize` - This will determine how many items are stored in the buffer, defaults to infinite.\n * 2. `windowTime` - The amount of time to hold a value in the buffer before removing it from the buffer.\n *\n * Both configurations may exist simultaneously. So if you would like to buffer a maximum of 3 values, as long as the values\n * are less than 2 seconds old, you could do so with a `new ReplaySubject(3, 2000)`.\n *\n * ### Differences with BehaviorSubject\n *\n * `BehaviorSubject` is similar to `new ReplaySubject(1)`, with a couple of exceptions:\n *\n * 1. `BehaviorSubject` comes \"primed\" with a single value upon construction.\n * 2. `ReplaySubject` will replay values, even after observing an error, where `BehaviorSubject` will not.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n * @see {@link shareReplay}\n */\nexport class ReplaySubject extends Subject {\n private _buffer: (T | number)[] = [];\n private _infiniteTimeWindow = true;\n\n /**\n * @param _bufferSize The size of the buffer to replay on subscription\n * @param _windowTime The amount of time the buffered items will stay buffered\n * @param _timestampProvider An object with a `now()` method that provides the current timestamp. This is used to\n * calculate the amount of time something has been buffered.\n */\n constructor(\n private _bufferSize = Infinity,\n private _windowTime = Infinity,\n private _timestampProvider: TimestampProvider = dateTimestampProvider\n ) {\n super();\n this._infiniteTimeWindow = _windowTime === Infinity;\n this._bufferSize = Math.max(1, _bufferSize);\n this._windowTime = Math.max(1, _windowTime);\n }\n\n next(value: T): void {\n const { isStopped, _buffer, _infiniteTimeWindow, _timestampProvider, _windowTime } = this;\n if (!isStopped) {\n _buffer.push(value);\n !_infiniteTimeWindow && _buffer.push(_timestampProvider.now() + _windowTime);\n }\n this._trimBuffer();\n super.next(value);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._trimBuffer();\n\n const subscription = this._innerSubscribe(subscriber);\n\n const { _infiniteTimeWindow, _buffer } = this;\n // We use a copy here, so reentrant code does not mutate our array while we're\n // emitting it to a new subscriber.\n const copy = _buffer.slice();\n for (let i = 0; i < copy.length && !subscriber.closed; i += _infiniteTimeWindow ? 1 : 2) {\n subscriber.next(copy[i] as T);\n }\n\n this._checkFinalizedStatuses(subscriber);\n\n return subscription;\n }\n\n private _trimBuffer() {\n const { _bufferSize, _timestampProvider, _buffer, _infiniteTimeWindow } = this;\n // If we don't have an infinite buffer size, and we're over the length,\n // use splice to truncate the old buffer values off. Note that we have to\n // double the size for instances where we're not using an infinite time window\n // because we're storing the values and the timestamps in the same array.\n const adjustedBufferSize = (_infiniteTimeWindow ? 1 : 2) * _bufferSize;\n _bufferSize < Infinity && adjustedBufferSize < _buffer.length && _buffer.splice(0, _buffer.length - adjustedBufferSize);\n\n // Now, if we're not in an infinite time window, remove all values where the time is\n // older than what is allowed.\n if (!_infiniteTimeWindow) {\n const now = _timestampProvider.now();\n let last = 0;\n // Search the array for the first timestamp that isn't expired and\n // truncate the buffer up to that point.\n for (let i = 1; i < _buffer.length && (_buffer[i] as number) <= now; i += 2) {\n last = i;\n }\n last && _buffer.splice(0, last + 1);\n }\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Subscription } from '../Subscription';\nimport { SchedulerAction } from '../types';\n\n/**\n * A unit of work to be executed in a `scheduler`. An action is typically\n * created from within a {@link SchedulerLike} and an RxJS user does not need to concern\n * themselves about creating and manipulating an Action.\n *\n * ```ts\n * class Action extends Subscription {\n * new (scheduler: Scheduler, work: (state?: T) => void);\n * schedule(state?: T, delay: number = 0): Subscription;\n * }\n * ```\n */\nexport class Action extends Subscription {\n constructor(scheduler: Scheduler, work: (this: SchedulerAction, state?: T) => void) {\n super();\n }\n /**\n * Schedules this action on its parent {@link SchedulerLike} for execution. May be passed\n * some context object, `state`. May happen at some point in the future,\n * according to the `delay` parameter, if specified.\n * @param state Some contextual data that the `work` function uses when called by the\n * Scheduler.\n * @param delay Time to wait before executing the work, where the time unit is implicit\n * and defined by the Scheduler.\n * @return A subscription in order to be able to unsubscribe the scheduled work.\n */\n public schedule(state?: T, delay: number = 0): Subscription {\n return this;\n }\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetIntervalFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearIntervalFunction = (handle: TimerHandle) => void;\n\ninterface IntervalProvider {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n delegate:\n | {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n }\n | undefined;\n}\n\nexport const intervalProvider: IntervalProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setInterval(handler: () => void, timeout?: number, ...args) {\n const { delegate } = intervalProvider;\n if (delegate?.setInterval) {\n return delegate.setInterval(handler, timeout, ...args);\n }\n return setInterval(handler, timeout, ...args);\n },\n clearInterval(handle) {\n const { delegate } = intervalProvider;\n return (delegate?.clearInterval || clearInterval)(handle as any);\n },\n delegate: undefined,\n};\n", "import { Action } from './Action';\nimport { SchedulerAction } from '../types';\nimport { Subscription } from '../Subscription';\nimport { AsyncScheduler } from './AsyncScheduler';\nimport { intervalProvider } from './intervalProvider';\nimport { arrRemove } from '../util/arrRemove';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncAction extends Action {\n public id: TimerHandle | undefined;\n public state?: T;\n // @ts-ignore: Property has no initializer and is not definitely assigned\n public delay: number;\n protected pending: boolean = false;\n\n constructor(protected scheduler: AsyncScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (this.closed) {\n return this;\n }\n\n // Always replace the current state with the new state.\n this.state = state;\n\n const id = this.id;\n const scheduler = this.scheduler;\n\n //\n // Important implementation note:\n //\n // Actions only execute once by default, unless rescheduled from within the\n // scheduled callback. This allows us to implement single and repeat\n // actions via the same code path, without adding API surface area, as well\n // as mimic traditional recursion but across asynchronous boundaries.\n //\n // However, JS runtimes and timers distinguish between intervals achieved by\n // serial `setTimeout` calls vs. a single `setInterval` call. An interval of\n // serial `setTimeout` calls can be individually delayed, which delays\n // scheduling the next `setTimeout`, and so on. `setInterval` attempts to\n // guarantee the interval callback will be invoked more precisely to the\n // interval period, regardless of load.\n //\n // Therefore, we use `setInterval` to schedule single and repeat actions.\n // If the action reschedules itself with the same delay, the interval is not\n // canceled. If the action doesn't reschedule, or reschedules with a\n // different delay, the interval will be canceled after scheduled callback\n // execution.\n //\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, delay);\n }\n\n // Set the pending flag indicating that this action has been scheduled, or\n // has recursively rescheduled itself.\n this.pending = true;\n\n this.delay = delay;\n // If this action has already an async Id, don't request a new one.\n this.id = this.id ?? this.requestAsyncId(scheduler, this.id, delay);\n\n return this;\n }\n\n protected requestAsyncId(scheduler: AsyncScheduler, _id?: TimerHandle, delay: number = 0): TimerHandle {\n return intervalProvider.setInterval(scheduler.flush.bind(scheduler, this), delay);\n }\n\n protected recycleAsyncId(_scheduler: AsyncScheduler, id?: TimerHandle, delay: number | null = 0): TimerHandle | undefined {\n // If this action is rescheduled with the same delay time, don't clear the interval id.\n if (delay != null && this.delay === delay && this.pending === false) {\n return id;\n }\n // Otherwise, if the action's delay time is different from the current delay,\n // or the action has been rescheduled before it's executed, clear the interval id\n if (id != null) {\n intervalProvider.clearInterval(id);\n }\n\n return undefined;\n }\n\n /**\n * Immediately executes this action and the `work` it contains.\n */\n public execute(state: T, delay: number): any {\n if (this.closed) {\n return new Error('executing a cancelled action');\n }\n\n this.pending = false;\n const error = this._execute(state, delay);\n if (error) {\n return error;\n } else if (this.pending === false && this.id != null) {\n // Dequeue if the action didn't reschedule itself. Don't call\n // unsubscribe(), because the action could reschedule later.\n // For example:\n // ```\n // scheduler.schedule(function doWork(counter) {\n // /* ... I'm a busy worker bee ... */\n // var originalAction = this;\n // /* wait 100ms before rescheduling the action */\n // setTimeout(function () {\n // originalAction.schedule(counter + 1);\n // }, 100);\n // }, 1000);\n // ```\n this.id = this.recycleAsyncId(this.scheduler, this.id, null);\n }\n }\n\n protected _execute(state: T, _delay: number): any {\n let errored: boolean = false;\n let errorValue: any;\n try {\n this.work(state);\n } catch (e) {\n errored = true;\n // HACK: Since code elsewhere is relying on the \"truthiness\" of the\n // return here, we can't have it return \"\" or 0 or false.\n // TODO: Clean this up when we refactor schedulers mid-version-8 or so.\n errorValue = e ? e : new Error('Scheduled action threw falsy error');\n }\n if (errored) {\n this.unsubscribe();\n return errorValue;\n }\n }\n\n unsubscribe() {\n if (!this.closed) {\n const { id, scheduler } = this;\n const { actions } = scheduler;\n\n this.work = this.state = this.scheduler = null!;\n this.pending = false;\n\n arrRemove(actions, this);\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, null);\n }\n\n this.delay = null!;\n super.unsubscribe();\n }\n }\n}\n", "import { Action } from './scheduler/Action';\nimport { Subscription } from './Subscription';\nimport { SchedulerLike, SchedulerAction } from './types';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * An execution context and a data structure to order tasks and schedule their\n * execution. Provides a notion of (potentially virtual) time, through the\n * `now()` getter method.\n *\n * Each unit of work in a Scheduler is called an `Action`.\n *\n * ```ts\n * class Scheduler {\n * now(): number;\n * schedule(work, delay?, state?): Subscription;\n * }\n * ```\n *\n * @deprecated Scheduler is an internal implementation detail of RxJS, and\n * should not be used directly. Rather, create your own class and implement\n * {@link SchedulerLike}. Will be made internal in v8.\n */\nexport class Scheduler implements SchedulerLike {\n public static now: () => number = dateTimestampProvider.now;\n\n constructor(private schedulerActionCtor: typeof Action, now: () => number = Scheduler.now) {\n this.now = now;\n }\n\n /**\n * A getter method that returns a number representing the current time\n * (at the time this function was called) according to the scheduler's own\n * internal clock.\n * @return A number that represents the current time. May or may not\n * have a relation to wall-clock time. May or may not refer to a time unit\n * (e.g. milliseconds).\n */\n public now: () => number;\n\n /**\n * Schedules a function, `work`, for execution. May happen at some point in\n * the future, according to the `delay` parameter, if specified. May be passed\n * some context object, `state`, which will be passed to the `work` function.\n *\n * The given arguments will be processed an stored as an Action object in a\n * queue of actions.\n *\n * @param work A function representing a task, or some unit of work to be\n * executed by the Scheduler.\n * @param delay Time to wait before executing the work, where the time unit is\n * implicit and defined by the Scheduler itself.\n * @param state Some contextual data that the `work` function uses when called\n * by the Scheduler.\n * @return A subscription in order to be able to unsubscribe the scheduled work.\n */\n public schedule(work: (this: SchedulerAction, state?: T) => void, delay: number = 0, state?: T): Subscription {\n return new this.schedulerActionCtor(this, work).schedule(state, delay);\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Action } from './Action';\nimport { AsyncAction } from './AsyncAction';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncScheduler extends Scheduler {\n public actions: Array> = [];\n /**\n * A flag to indicate whether the Scheduler is currently executing a batch of\n * queued actions.\n * @internal\n */\n public _active: boolean = false;\n /**\n * An internal ID used to track the latest asynchronous task such as those\n * coming from `setTimeout`, `setInterval`, `requestAnimationFrame`, and\n * others.\n * @internal\n */\n public _scheduled: TimerHandle | undefined;\n\n constructor(SchedulerAction: typeof Action, now: () => number = Scheduler.now) {\n super(SchedulerAction, now);\n }\n\n public flush(action: AsyncAction): void {\n const { actions } = this;\n\n if (this._active) {\n actions.push(action);\n return;\n }\n\n let error: any;\n this._active = true;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions.shift()!)); // exhaust the scheduler queue\n\n this._active = false;\n\n if (error) {\n while ((action = actions.shift()!)) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\n/**\n *\n * Async Scheduler\n *\n * Schedule task as if you used setTimeout(task, duration)\n *\n * `async` scheduler schedules tasks asynchronously, by putting them on the JavaScript\n * event loop queue. It is best used to delay tasks in time or to schedule tasks repeating\n * in intervals.\n *\n * If you just want to \"defer\" task, that is to perform it right after currently\n * executing synchronous code ends (commonly achieved by `setTimeout(deferredTask, 0)`),\n * better choice will be the {@link asapScheduler} scheduler.\n *\n * ## Examples\n * Use async scheduler to delay task\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * const task = () => console.log('it works!');\n *\n * asyncScheduler.schedule(task, 2000);\n *\n * // After 2 seconds logs:\n * // \"it works!\"\n * ```\n *\n * Use async scheduler to repeat task in intervals\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * function task(state) {\n * console.log(state);\n * this.schedule(state + 1, 1000); // `this` references currently executing Action,\n * // which we reschedule with new state and delay\n * }\n *\n * asyncScheduler.schedule(task, 3000, 0);\n *\n * // Logs:\n * // 0 after 3s\n * // 1 after 4s\n * // 2 after 5s\n * // 3 after 6s\n * ```\n */\n\nexport const asyncScheduler = new AsyncScheduler(AsyncAction);\n\n/**\n * @deprecated Renamed to {@link asyncScheduler}. Will be removed in v8.\n */\nexport const async = asyncScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { Subscription } from '../Subscription';\nimport { QueueScheduler } from './QueueScheduler';\nimport { SchedulerAction } from '../types';\nimport { TimerHandle } from './timerHandle';\n\nexport class QueueAction extends AsyncAction {\n constructor(protected scheduler: QueueScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (delay > 0) {\n return super.schedule(state, delay);\n }\n this.delay = delay;\n this.state = state;\n this.scheduler.flush(this);\n return this;\n }\n\n public execute(state: T, delay: number): any {\n return delay > 0 || this.closed ? super.execute(state, delay) : this._execute(state, delay);\n }\n\n protected requestAsyncId(scheduler: QueueScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n\n if ((delay != null && delay > 0) || (delay == null && this.delay > 0)) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n\n // Otherwise flush the scheduler starting with this action.\n scheduler.flush(this);\n\n // HACK: In the past, this was returning `void`. However, `void` isn't a valid\n // `TimerHandle`, and generally the return value here isn't really used. So the\n // compromise is to return `0` which is both \"falsy\" and a valid `TimerHandle`,\n // as opposed to refactoring every other instanceo of `requestAsyncId`.\n return 0;\n }\n}\n", "import { AsyncScheduler } from './AsyncScheduler';\n\nexport class QueueScheduler extends AsyncScheduler {\n}\n", "import { QueueAction } from './QueueAction';\nimport { QueueScheduler } from './QueueScheduler';\n\n/**\n *\n * Queue Scheduler\n *\n * Put every next task on a queue, instead of executing it immediately\n *\n * `queue` scheduler, when used with delay, behaves the same as {@link asyncScheduler} scheduler.\n *\n * When used without delay, it schedules given task synchronously - executes it right when\n * it is scheduled. However when called recursively, that is when inside the scheduled task,\n * another task is scheduled with queue scheduler, instead of executing immediately as well,\n * that task will be put on a queue and wait for current one to finish.\n *\n * This means that when you execute task with `queue` scheduler, you are sure it will end\n * before any other task scheduled with that scheduler will start.\n *\n * ## Examples\n * Schedule recursively first, then do something\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(() => {\n * queueScheduler.schedule(() => console.log('second')); // will not happen now, but will be put on a queue\n *\n * console.log('first');\n * });\n *\n * // Logs:\n * // \"first\"\n * // \"second\"\n * ```\n *\n * Reschedule itself recursively\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(function(state) {\n * if (state !== 0) {\n * console.log('before', state);\n * this.schedule(state - 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * console.log('after', state);\n * }\n * }, 0, 3);\n *\n * // In scheduler that runs recursively, you would expect:\n * // \"before\", 3\n * // \"before\", 2\n * // \"before\", 1\n * // \"after\", 1\n * // \"after\", 2\n * // \"after\", 3\n *\n * // But with queue it logs:\n * // \"before\", 3\n * // \"after\", 3\n * // \"before\", 2\n * // \"after\", 2\n * // \"before\", 1\n * // \"after\", 1\n * ```\n */\n\nexport const queueScheduler = new QueueScheduler(QueueAction);\n\n/**\n * @deprecated Renamed to {@link queueScheduler}. Will be removed in v8.\n */\nexport const queue = queueScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\nimport { SchedulerAction } from '../types';\nimport { animationFrameProvider } from './animationFrameProvider';\nimport { TimerHandle } from './timerHandle';\n\nexport class AnimationFrameAction extends AsyncAction {\n constructor(protected scheduler: AnimationFrameScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n protected requestAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay is greater than 0, request as an async action.\n if (delay !== null && delay > 0) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n // Push the action to the end of the scheduler queue.\n scheduler.actions.push(this);\n // If an animation frame has already been requested, don't request another\n // one. If an animation frame hasn't been requested yet, request one. Return\n // the current animation frame request id.\n return scheduler._scheduled || (scheduler._scheduled = animationFrameProvider.requestAnimationFrame(() => scheduler.flush(undefined)));\n }\n\n protected recycleAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle | undefined {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n if (delay != null ? delay > 0 : this.delay > 0) {\n return super.recycleAsyncId(scheduler, id, delay);\n }\n // If the scheduler queue has no remaining actions with the same async id,\n // cancel the requested animation frame and set the scheduled flag to\n // undefined so the next AnimationFrameAction will request its own.\n const { actions } = scheduler;\n if (id != null && id === scheduler._scheduled && actions[actions.length - 1]?.id !== id) {\n animationFrameProvider.cancelAnimationFrame(id as number);\n scheduler._scheduled = undefined;\n }\n // Return undefined so the action knows to request a new async id if it's rescheduled.\n return undefined;\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\nexport class AnimationFrameScheduler extends AsyncScheduler {\n public flush(action?: AsyncAction): void {\n this._active = true;\n // The async id that effects a call to flush is stored in _scheduled.\n // Before executing an action, it's necessary to check the action's async\n // id to determine whether it's supposed to be executed in the current\n // flush.\n // Previous implementations of this method used a count to determine this,\n // but that was unsound, as actions that are unsubscribed - i.e. cancelled -\n // are removed from the actions array and that can shift actions that are\n // scheduled to be executed in a subsequent flush into positions at which\n // they are executed within the current flush.\n let flushId;\n if (action) {\n flushId = action.id;\n } else {\n flushId = this._scheduled;\n this._scheduled = undefined;\n }\n\n const { actions } = this;\n let error: any;\n action = action || actions.shift()!;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions[0]) && action.id === flushId && actions.shift());\n\n this._active = false;\n\n if (error) {\n while ((action = actions[0]) && action.id === flushId && actions.shift()) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AnimationFrameAction } from './AnimationFrameAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\n\n/**\n *\n * Animation Frame Scheduler\n *\n * Perform task when `window.requestAnimationFrame` would fire\n *\n * When `animationFrame` scheduler is used with delay, it will fall back to {@link asyncScheduler} scheduler\n * behaviour.\n *\n * Without delay, `animationFrame` scheduler can be used to create smooth browser animations.\n * It makes sure scheduled task will happen just before next browser content repaint,\n * thus performing animations as efficiently as possible.\n *\n * ## Example\n * Schedule div height animation\n * ```ts\n * // html:
\n * import { animationFrameScheduler } from 'rxjs';\n *\n * const div = document.querySelector('div');\n *\n * animationFrameScheduler.schedule(function(height) {\n * div.style.height = height + \"px\";\n *\n * this.schedule(height + 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * }, 0, 0);\n *\n * // You will see a div element growing in height\n * ```\n */\n\nexport const animationFrameScheduler = new AnimationFrameScheduler(AnimationFrameAction);\n\n/**\n * @deprecated Renamed to {@link animationFrameScheduler}. Will be removed in v8.\n */\nexport const animationFrame = animationFrameScheduler;\n", "import { Observable } from '../Observable';\nimport { SchedulerLike } from '../types';\n\n/**\n * A simple Observable that emits no items to the Observer and immediately\n * emits a complete notification.\n *\n * Just emits 'complete', and nothing else.\n *\n * ![](empty.png)\n *\n * A simple Observable that only emits the complete notification. It can be used\n * for composing with other Observables, such as in a {@link mergeMap}.\n *\n * ## Examples\n *\n * Log complete notification\n *\n * ```ts\n * import { EMPTY } from 'rxjs';\n *\n * EMPTY.subscribe({\n * next: () => console.log('Next'),\n * complete: () => console.log('Complete!')\n * });\n *\n * // Outputs\n * // Complete!\n * ```\n *\n * Emit the number 7, then complete\n *\n * ```ts\n * import { EMPTY, startWith } from 'rxjs';\n *\n * const result = EMPTY.pipe(startWith(7));\n * result.subscribe(x => console.log(x));\n *\n * // Outputs\n * // 7\n * ```\n *\n * Map and flatten only odd numbers to the sequence `'a'`, `'b'`, `'c'`\n *\n * ```ts\n * import { interval, mergeMap, of, EMPTY } from 'rxjs';\n *\n * const interval$ = interval(1000);\n * const result = interval$.pipe(\n * mergeMap(x => x % 2 === 1 ? of('a', 'b', 'c') : EMPTY),\n * );\n * result.subscribe(x => console.log(x));\n *\n * // Results in the following to the console:\n * // x is equal to the count on the interval, e.g. (0, 1, 2, 3, ...)\n * // x will occur every 1000ms\n * // if x % 2 is equal to 1, print a, b, c (each on its own)\n * // if x % 2 is not equal to 1, nothing will be output\n * ```\n *\n * @see {@link Observable}\n * @see {@link NEVER}\n * @see {@link of}\n * @see {@link throwError}\n */\nexport const EMPTY = new Observable((subscriber) => subscriber.complete());\n\n/**\n * @param scheduler A {@link SchedulerLike} to use for scheduling\n * the emission of the complete notification.\n * @deprecated Replaced with the {@link EMPTY} constant or {@link scheduled} (e.g. `scheduled([], scheduler)`). Will be removed in v8.\n */\nexport function empty(scheduler?: SchedulerLike) {\n return scheduler ? emptyScheduled(scheduler) : EMPTY;\n}\n\nfunction emptyScheduled(scheduler: SchedulerLike) {\n return new Observable((subscriber) => scheduler.schedule(() => subscriber.complete()));\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport function isScheduler(value: any): value is SchedulerLike {\n return value && isFunction(value.schedule);\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\nimport { isScheduler } from './isScheduler';\n\nfunction last(arr: T[]): T | undefined {\n return arr[arr.length - 1];\n}\n\nexport function popResultSelector(args: any[]): ((...args: unknown[]) => unknown) | undefined {\n return isFunction(last(args)) ? args.pop() : undefined;\n}\n\nexport function popScheduler(args: any[]): SchedulerLike | undefined {\n return isScheduler(last(args)) ? args.pop() : undefined;\n}\n\nexport function popNumber(args: any[], defaultValue: number): number {\n return typeof last(args) === 'number' ? args.pop()! : defaultValue;\n}\n", "export const isArrayLike = ((x: any): x is ArrayLike => x && typeof x.length === 'number' && typeof x !== 'function');", "import { isFunction } from \"./isFunction\";\n\n/**\n * Tests to see if the object is \"thennable\".\n * @param value the object to test\n */\nexport function isPromise(value: any): value is PromiseLike {\n return isFunction(value?.then);\n}\n", "import { InteropObservable } from '../types';\nimport { observable as Symbol_observable } from '../symbol/observable';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being Observable (but not necessary an Rx Observable) */\nexport function isInteropObservable(input: any): input is InteropObservable {\n return isFunction(input[Symbol_observable]);\n}\n", "import { isFunction } from './isFunction';\n\nexport function isAsyncIterable(obj: any): obj is AsyncIterable {\n return Symbol.asyncIterator && isFunction(obj?.[Symbol.asyncIterator]);\n}\n", "/**\n * Creates the TypeError to throw if an invalid object is passed to `from` or `scheduled`.\n * @param input The object that was passed.\n */\nexport function createInvalidObservableTypeError(input: any) {\n // TODO: We should create error codes that can be looked up, so this can be less verbose.\n return new TypeError(\n `You provided ${\n input !== null && typeof input === 'object' ? 'an invalid object' : `'${input}'`\n } where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.`\n );\n}\n", "export function getSymbolIterator(): symbol {\n if (typeof Symbol !== 'function' || !Symbol.iterator) {\n return '@@iterator' as any;\n }\n\n return Symbol.iterator;\n}\n\nexport const iterator = getSymbolIterator();\n", "import { iterator as Symbol_iterator } from '../symbol/iterator';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being an Iterable */\nexport function isIterable(input: any): input is Iterable {\n return isFunction(input?.[Symbol_iterator]);\n}\n", "import { ReadableStreamLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport async function* readableStreamLikeToAsyncGenerator(readableStream: ReadableStreamLike): AsyncGenerator {\n const reader = readableStream.getReader();\n try {\n while (true) {\n const { value, done } = await reader.read();\n if (done) {\n return;\n }\n yield value!;\n }\n } finally {\n reader.releaseLock();\n }\n}\n\nexport function isReadableStreamLike(obj: any): obj is ReadableStreamLike {\n // We don't want to use instanceof checks because they would return\n // false for instances from another Realm, like an