-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathChimera.cpp
More file actions
158 lines (145 loc) · 6.42 KB
/
Chimera.cpp
File metadata and controls
158 lines (145 loc) · 6.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
/*
* -----------------------------------------------------------------------------
* Filename: Chimera.cpp
*
* Author: Qinzhong Tian
*
* Email: tianqinzhong@qq.com
*
* Created Date: 2024-07-09
*
* Last Modified: 2024-11-18
*
* Description:
* This is the main entry for Chimera
*
* Version:
* 1.3
* -----------------------------------------------------------------------------
*/
#include <CLI11.hpp>
#include <buildConfig.hpp>
#include <iostream>
#include <ChimeraBuild.hpp>
#include <classifyConfig.hpp>
#include <ChimeraClassify.hpp>
#include <vector>
#include <string>
#include <stdexcept>
#ifdef CHIMERA_VERSION
#define VERSION_INFO CHIMERA_VERSION
#else
#define VERSION_INFO "unknown"
#endif
int main(int argc, char** argv)
{
// Create the main application object
CLI::App app{ "Chimera - A versatile tool for metagenomic classification" };
ChimeraBuild::BuildConfig buildConfig;
ChimeraClassify::ClassifyConfig classifyConfig;
bool show_version = false;
app.add_flag("-v,--version", show_version, "Show version information");
// Create subcommands
auto build = app.add_subcommand("build", "Build a sequence database");
auto classify = app.add_subcommand("classify", "Classify sequences");
// Build
build->add_option("-i,--input", buildConfig.input_file, "Input file for building")
->required()
->check(CLI::ExistingFile);
build->add_option("-o,--output", buildConfig.output_file, "Output file for building")
->default_val("ChimeraDB");
build->add_option("-m,--mode", buildConfig.mode, "Mode for building")
->check(CLI::IsMember({ "normal", "fast" }))
->default_val("normal");
build->add_option("-k,--kmer", buildConfig.kmer_size, "Kmer size for building")
->default_val(19)
->check(CLI::Range(1, 50));
build->add_option("-w,--window", buildConfig.window_size, "Window size for building")
->default_val(31);
build->add_option("-l,--min-length", buildConfig.min_length, "Minimum length sequence for building")
->default_val(0);
build->add_option("-t,--threads", buildConfig.threads, "Number of threads for building")
->default_val(32);
build->add_option("--load-factor", buildConfig.load_factor, "Loading ratio of ICF")
->default_val(0.58);
build->add_option("-a,--alpha", buildConfig.alpha, "Alpha value for building")
->default_val(1.2);
build->add_option("--relaxed-load-factor", buildConfig.relaxedLoadFactor, "Relaxed loading ratio of ICF")
->default_val(0.95);
build->add_option("-M,--max-hashes", buildConfig.max_hashes_per_taxid, "Maximum number of hashes per taxid")
->default_val(2000000);
build->add_option("-c,--fixed-cutoff", buildConfig.fixed_cutoff, "Fixed cutoff for building (0 - 255)");
build->add_option("-f,--filter", buildConfig.filter, "Choose the filter for building (hicf, icf, imcf)")
->check(CLI::IsMember({ "hicf", "icf", "imcf" }))
->default_val("imcf");
build->add_flag("-q,--quiet", buildConfig.verbose, "Quiet output")->default_val(true)->disable_flag_override();
// Classify
// Add --single option
auto singleOpt = classify->add_option("-i,--single", classifyConfig.singleFiles, "Input file for classifying")
->check(CLI::ExistingFile);
// Add --paired option
auto pairedOpt = classify->add_option("-p,--paired", classifyConfig.pairedFiles, "Paired input files for classifying")
->check(CLI::ExistingFile)
->excludes(singleOpt) // Ensure that single and paired are mutually exclusive
->each([](const std::string&) {}); // Use each to allow multiple inputs for paired
// Custom validation function to ensure that the --paired option must have an even number of files
classify->callback([pairedOpt]() {
if (pairedOpt->count() > 0 && pairedOpt->count() % 2 != 0) {
throw CLI::ValidationError("--paired option must have an even number of input files");
}
});
classify->add_option("-o,--output", classifyConfig.outputFile, "Output file for classifying")
->default_val("ChimeraClassify");
classify->add_option("-d,--database", classifyConfig.dbFile, "Database file for classifying")
->required()
->check(CLI::ExistingFile);
classify->add_option("-s,--shot-threshold", classifyConfig.shotThreshold, "Shot threshold for classifying")
->default_val(0.7);
classify->add_option("-t,--threads", classifyConfig.threads, "Number of threads for classifying")
->default_val(32);
classify->add_option("-m,--mode", classifyConfig.mode, "Mode for classifying")
->check(CLI::IsMember({ "normal", "fast" }))
->default_val("normal");
classify->add_option("-f,--filter", classifyConfig.filter, "Filter for classifying")
->default_val("imcf")
->check(CLI::IsMember({ "hicf", "icf", "imcf" }));
classify->add_option("-b,--batch-size", classifyConfig.batchSize, "Batch size for classifying")
->default_val(400);
classify->add_flag("--lca", classifyConfig.lca, "Enable LCA mode");
classify->add_option("--tax-file", classifyConfig.taxFile, "Taxonomy file for LCA mode")
->check(CLI::ExistingFile);
auto emFlag = classify->add_flag("-e,--EM", classifyConfig.em, "Enable EM mode");
auto vemFlag = classify->add_flag("-V,--VEM", classifyConfig.vem, "Enable VEM mode")->excludes(emFlag);
classify->add_option("--em-threshold", classifyConfig.emThreshold, "EM threshold")
->default_val(0.001);
classify->add_option("--em-iter", classifyConfig.emIter, "EM iteration")
->default_val(100);
classify->add_flag("-q,--quiet", classifyConfig.verbose, "Quiet output")->default_val(true)->disable_flag_override();
if (argc == 1) {
std::cout << app.help() << std::endl;
return 0;
}
CLI11_PARSE(app, argc, argv);
if (show_version) {
std::cout << "======================================" << std::endl;
std::cout << " Chimera - Metagenomic Tool" << std::endl;
std::cout << "======================================" << std::endl;
std::cout << "Version : " << VERSION_INFO << std::endl;
std::cout << "Build Date : " << __DATE__ << " " << __TIME__ << std::endl;
std::cout << "Compiled with: " << "GCC " << __VERSION__ << std::endl;
std::cout << "OS : Ubuntu 20.04" << std::endl;
std::cout << "======================================" << std::endl;
std::cout << "Developed by : Qinzhong Tian" << std::endl;
std::cout << "Team : MalabZ" << std::endl;
std::cout << "Homepage : https://github.com/LoadStar822/Chimera" << std::endl;
std::cout << "======================================" << std::endl;
return 0;
}
if (*build) {
ChimeraBuild::run(buildConfig);
}
else if (*classify) {
ChimeraClassify::run(classifyConfig);
}
return 0;
}