30 #include "FormatHandling/FormatManager.h" 31 #include "Alignment/Alignment.h" 41 std::vector<std::string>* inFiles,
42 std::vector<std::string>* outFormats,
43 std::string* outPattern)
45 if (argc == 1)
return 1;
46 for(
int i = 1; i < argc; i++ )
48 if (!strcmp(argv[i],
"-h") || !strcmp(argv[i],
"--help"))
51 if (!strcmp(argv[i],
"-in"))
56 std::cerr <<
"ERROR: At least one file should be passed after the '-in' argument\n";
61 else if (argv[i + 1][0] ==
'-')
63 std::cerr <<
"ERROR: At least one file should be passed after the '-in' argument and you passed argument " << argv[i + 1] <<
"\n";
68 else while(++i != argc)
70 if (argv[i][0] ==
'-')
77 inFiles->emplace_back(argv[i]);
83 else if (!strcmp(argv[i],
"-out"))
88 std::cerr <<
"A file pattern should be passed after the '-out' argument\n";
93 if (argv[i + 1][0] ==
'-')
95 std::cerr <<
"A file pattern should be passed after the '-out' argument and you passed argument " << argv[i + 1] <<
"\n";
98 *outPattern = argv[++i];
104 else if (!strcmp(argv[i],
"-formats"))
108 std::cerr <<
"A format should be passed after the '-formats' argument\n";
111 else while(++i != argc)
113 if (argv[i][0] ==
'-')
120 outFormats->emplace_back(argv[i]);
125 else if (!strcmp(argv[i],
"-reverse"))
129 else if (!strcmp(argv[i],
"-keepHeaders"))
136 else if (!strcmp(argv[i],
"-html"))
137 outFormats->emplace_back(
"html");
139 else if (!strcmp(argv[i],
"-nbrf"))
140 outFormats->emplace_back(
"nbrf");
142 else if (!strcmp(argv[i],
"-mega"))
143 outFormats->emplace_back(
"mega");
145 else if (!strcmp(argv[i],
"-nexus"))
146 outFormats->emplace_back(
"nexus");
148 else if (!strcmp(argv[i],
"-clustal"))
149 outFormats->emplace_back(
"clustal");
151 else if (!strcmp(argv[i],
"-fasta") || !strcmp(argv[i],
"-onlyseqs"))
152 outFormats->emplace_back(
"fasta");
154 else if (!strcmp(argv[i],
"-fasta_m10"))
156 outFormats->emplace_back(
"fasta");
159 else if (!strcmp(argv[i],
"-phylip"))
160 outFormats->emplace_back(
"phylip40");
162 else if (!strcmp(argv[i],
"-phylip_m10"))
164 outFormats->emplace_back(
"phylip40_m10");
167 else if (!strcmp(argv[i],
"-phylip_paml"))
168 outFormats->emplace_back(
"phylippaml");
170 else if (!strcmp(argv[i],
"-phylip_paml_m10"))
172 outFormats->emplace_back(
"phylippaml_m10");
175 else if (!strcmp(argv[i],
"-phylip3.2"))
176 outFormats->emplace_back(
"phylip32");
178 else if (!strcmp(argv[i],
"-phylip3.2_m10"))
180 outFormats->emplace_back(
"phylip32_m10");
182 else if (!strcmp(argv[i],
"-format"))
186 else if (!strcmp(argv[i],
"-type"))
190 else if (!strcmp(argv[i],
"-info"))
198 std::cerr << argv[i] <<
" not recognized or repeated.\n";
204 std::cout <<
"Input Files:\n";
205 for (std::string ifile : *inFiles)
207 std::cout <<
"-> Input file: " << ifile <<
"\n";
210 std::cout <<
"Out Formats:\n";
211 for (std::string oformat : *outFormats)
213 std::cout <<
"-> Output format: " << oformat <<
"\n";
216 std::cout <<
"Under the pattern\n-> " << *outPattern <<
"\n";
225 if (inFiles->size() == 0)
227 std::cerr <<
"ERROR: At least one input file must be provided\n";
230 if (*outPattern ==
"")
232 if (inFiles->size() == 1 && outFormats->size() == 1 && !(machine
->format || machine
->info || machine
->type))
234 else if (outFormats->size() != 0)
236 std::cerr <<
"ERROR: Terminal output option not compatible with information printing (-info | -format | -type)\n" 237 <<
"Provide an output format or disable information printing.\n";
241 else if (outFormats->size() == 0)
243 std::cerr <<
"ERROR: At least one output format must be provided\n";
256 <<
"readAl webpage: http://trimal.cgenomics.org\n\n" 258 <<
"This program is free software: you can redistribute it and/or modify " 260 <<
"it under the terms of the GNU General Public License as published by " 262 <<
"the Free Software Foundation, the last available version.\n" 266 <<
"\treadalMS -in <inputfiles> -out <pattern> -format [formats] [options].\n\n" 268 <<
"\t-h Show this information.\n" 271 <<
"\t-in <inputfiles> Input files in several formats. Separated by spaces.\n" 273 <<
"\t-out <pattern> Output file name pattern (default STDOUT).\n" 274 <<
"\t It will replace optional the tags [in] -> Original filename without extension.\n" 275 <<
"\t [format] -> Output's format name\n" 276 <<
"\t [extension] -> Output's extension\n" 279 <<
"\t-formats Formats you want the output to be converted to.\n" 281 <<
"\t Being the HTML format not a format itself, but a colored report of the alignment files.\n\n" 282 <<
"\t-format Print information about input file format " 283 <<
"and if sequences are aligned or not.\n" 285 <<
"\t-type Print information about biological " 286 <<
"sequences datatype (e.g. nucleotides:dna, nucleotides:rna, aminoacids, etc)" 289 <<
"\t-info Print information about sequences number, " 290 <<
"average sequence length, max & min sequence length" 293 <<
"\t-reverse Output the reverse of sequences in " 296 <<
"\t-keepHeaders Keeps the headers of the original format if it had any\n\n" 299 <<
"LEGACY OPTIONS\nTake in mind that this arguments may be discontinued any time."<<
"\n\n" 301 <<
"\t-onlyseqs Generate output with only residues from " 304 <<
"\t-html Output residues colored according their " 305 <<
"physicochemical properties. HTML file.\n\n" 308 <<
"\t-nbrf Output file in NBRF/PIR format\n" 309 <<
"\t-mega Output file in MEGA format\n" 311 <<
"\t-nexus Output file in NEXUS format\n" 312 <<
"\t-clustal Output file in CLUSTAL format\n" 315 <<
"\t-fasta Output file in FASTA format\n" 316 <<
"\t-fasta_m10 Output file in FASTA format. Sequences " 317 <<
"name up to 10 characters.\n\n" 319 <<
"\t-phylip Output file in PHYLIP/PHYLIP4 format" 321 <<
"\t-phylip_m10 Output file in PHYLIP/PHYLIP4 format. " 322 <<
"Sequences name up to 10 characters.\n" 323 <<
"\t-phylip_paml Output file in PHYLIP format compatible " 325 <<
"\t-phylip_paml_m10 Output file in PHYLIP format compatible " 326 <<
"with PAML. Sequences name up to 10 characters.\n" 327 <<
"\t-phylip3.2 Output file in PHYLIP3.2 format\n" 328 <<
"\t-phylip3.2_m10 Output file in PHYLIP3.2 format. Sequences" 329 <<
" name up to 10 characters.\n\n" 330 <<
"If you specify any m10 format, this will result in all formats having the sequences names shortened as this has the same effect as '-shortNames' argument\n\n" 333 <<
"EXAMPLES OF USE\n\n" 335 <<
"\treadalMS -in ./dataset/AA1.fas -out ./dataset/[in].output.[extension] -formats clustal\n" 336 <<
"\t -> Will produce ./dataset/AA1.output.clw\n\n" 338 <<
"\treadalMS -in ./dataset/example1.clw -out ./dataset/[in].[format].[extension] -formats fasta phylip32 phylip40\n" 339 <<
"\t -> Will produce ./dataset/example1.FASTA.fasta ./dataset/example1.PHYLIP32.phy ./dataset/example1.PHYLIP40.phy\n\n" 341 <<
"\treadalMS -in ./dataset/example1.clw -out ./dataset/[in]/[format].[extension] -formats fasta phylip32 phylip40\n" 342 <<
"\t -> Will produce ./dataset/example1/FASTA.fasta ./dataset/example1/PHYLIP32.phy ./dataset/example1/PHYLIP40.phy\n" 343 <<
"\t ONLY if ./dataset/example1/ already exists.\n\n" 345 <<
"\treadalMS -in ./dataset/AA1.fas ./dataset/AA2.fas -out ./dataset/[in].output.[extension] -formats clustal pir\n" 346 <<
"\t -> Will produce ./dataset/AA1.output.clw ./dataset/AA2.output.clw ./dataset/AA1.output.pir ./dataset/AA2.output.pir\n\n" 348 <<
"\treadalMS -in ./dataset/AA1.fas -format -type -info\n" 349 <<
"\t -> Will produce terminal output giving information about AA1.fas alignment file\n\n" 351 <<
"\treadalMS -in ./dataset/AA1.fas ./dataset/AA2.fas -out ./dataset/[in].output.[extension] -formats html\n" 352 <<
"\t -> Will produce ./dataset/AA1.output.html ./dataset/AA2.output.html\n" 353 <<
"\t Those files are not indeed reformats of the original alignments, but an HTML colored report of the alignment file.\n" 359 int main(
int argc,
char *argv[])
363 std::vector<std::string> outFormats = std::vector<std::string>();
364 std::vector<std::string> inFiles = std::vector<std::string>();
365 std::string outPattern;
373 else if (result != 0)
return result;
376 if (result != 0)
return result;
379 for (
const std::string &str : inFiles)
382 if (alignment !=
nullptr)
385 std::cout <<
"## Alignment File:\t" << str <<
"\n";
390 <<
"## Input file aligned\t" << (alignment
->isAligned ?
"YES":
"NO")
393 if(MachineState
.type) {
396 std::cout <<
"## Input file datatype\tnucleotides:dna\n";
398 std::cout <<
"## Input file datatype\tnucleotides:dna_degenerate_codes\n";
400 std::cout <<
"## Input file datatype\tnucleotides:rna\n";
402 std::cout <<
"## Input file datatype\tnucleotides:rna_degenerate_codes\n";
404 std::cout <<
"## Input file datatype\tamino-acids\n";
406 std::cout <<
"## Input file datatype\tamino-acids_degenerate_codes\n";
408 std::cout <<
"## Input file datatype\tunknown\n";
411 if(MachineState
.info)
416 if (!outFormats.empty())
423 else if (!outFormats.empty() || MachineState
.reverse)
426 std::cerr <<
"ERROR: An option has to be chosen\n";
void printAlignmentInfo(std::ostream &output)
Print information about sequences number, average sequence length, maximum and minimum sequences leng...
int checkArguments(FormatHandling::FormatManager *machine, std::vector< std::string > *inFiles, std::vector< std::string > *outFormats, std::string *outPattern)
Class containing an alignment This class stores the alignment sequences with it's names...
int main(int argc, char *argv[])
int parseArguments(int argc, char *argv[], FormatHandling::FormatManager *machine, std::vector< std::string > *inFiles, std::vector< std::string > *outFormats, std::string *outPattern)
int getAlignmentType() const
Alignment type getter. See SequenceTypes.
bool isAligned
Flag that indicates if all sequences on the alignment have the same length (Including gaps)...