34 #include "compareFiles.h" 35 #include "compareFiles.h" 43 int main(
int argc,
char *argv[]){
46 bool appearErrors =
false, complementary =
false, colnumbering =
false, nogaps =
false, noallgaps =
false, gappyout =
false,
47 strict =
false, strictplus =
false, automated1 =
false, sgc =
false, sgt =
false, scc =
false, sct =
false, sfc =
false,
48 sft =
false, sident =
false, soverlap =
false, selectSeqs =
false, selectCols =
false, shortNames =
false, splitbystop =
false,
49 terminal =
false, keepSeqs =
false, keepHeader =
false, ignorestop =
false;
51 float conserve = -1, gapThreshold = -1, simThreshold = -1, comThreshold = -1, resOverlap = -1, seqOverlap = -1, maxIdentity = -1;
53 int outformat = -1, compareset = -1, stats = 0, windowSize = -1, gapWindow = -1, simWindow = -1, conWindow = -1,
54 blockSize = -1, clusters = -1, alternative_matrix = -1, alignDataType = -1;
58 float *compareVect = NULL;
59 alignment **compAlig = NULL;
60 string nline, *seqNames = NULL;
61 sequencesMatrix *seqMatrix = NULL;
62 similarityMatrix *similMatrix = NULL;
63 alignment *origAlig = NULL, *intermediateAlig = NULL, *singleAlig = NULL, *backtranslation = NULL;
65 int i = 1, lng, num = 0, maxAminos = 0, numfiles = 0, referFile = 0, *delColumns = NULL, *delSequences = NULL, *seqLengths = NULL, *boundaries = NULL;
66 char c, *forceFile = NULL, *infile = NULL, *backtransFile = NULL, *outfile = NULL, *outhtml = NULL, *matrix = NULL,
67 **filesToCompare = NULL, line[256];
84 if(!strcmp(argv[i],
"-h") && (i+1 == argc)) {
89 if(!strcmp(argv[i],
"--version") && (i+1 == argc)) {
91 <<
" build[" <<
BUILD <<
"]" << endl << endl;
96 origAlig =
new alignment;
105 if(!strcmp(argv[i],
"-in") && (i+1 != argc) && (infile == NULL)) {
107 if((sfc) || (sft) || (comThreshold != -1)) {
108 cerr << endl <<
"ERROR: Not allowed in combination of file comparision." << endl << endl;
113 else if((compareset == -1) || (forceFile != NULL)) {
114 lng = strlen(argv[++i]);
115 infile =
new char[lng + 1];
116 strcpy(infile, argv[i]);
118 if(!origAlig -> loadAlignment(infile)) {
119 cerr << endl <<
"ERROR: Alignment not loaded: \"" << infile <<
"\" Check the file's content." << endl << endl;
126 cerr << endl <<
"ERROR: Option \"" << argv[i] <<
"\" not valid. A reference file exists with alignments to compare." << endl << endl;
127 if(forceFile != NULL)
128 cerr << endl <<
"ERROR: Option \"" << argv[i] <<
"\" not valid. A alignment file has been setting up to be compare with a set of alignmets." << endl << endl;
136 else if((!strcmp(argv[i],
"-out")) && (i+1 != argc) && (outfile == NULL)) {
137 lng = strlen(argv[++i]);
138 outfile =
new char[lng + 1];
139 strcpy(outfile, argv[i]);
143 else if((!strcmp(argv[i],
"-htmlout")) && (i+1 != argc) && (outhtml == NULL)) {
144 lng = strlen(argv[++i]);
145 outhtml =
new char[lng + 1];
146 strcpy(outhtml, argv[i]);
154 else if(!strcmp(argv[i],
"-clustal") && (outformat == -1))
158 else if(!strcmp(argv[i],
"-fasta") && (outformat == -1))
162 else if(!strcmp(argv[i],
"-fasta_m10") && (outformat == -1)) {
163 outformat = 8; shortNames =
true;
167 else if(!strcmp(argv[i],
"-nbrf") && (outformat == -1))
171 else if(!strcmp(argv[i],
"-nexus") && (outformat == -1))
175 else if(!strcmp(argv[i],
"-mega") && (outformat == -1))
179 else if(!strcmp(argv[i],
"-phylip3.2") && (outformat == -1))
183 else if(!strcmp(argv[i],
"-phylip3.2_m10") && (outformat == -1)) {
184 outformat = 11; shortNames =
true;
188 else if(!strcmp(argv[i],
"-phylip") && (outformat == -1))
192 else if(!strcmp(argv[i],
"-phylip_m10") && (outformat == -1)) {
193 outformat = 12; shortNames =
true;
197 else if(!strcmp(argv[i],
"-phylip_paml") && (outformat == -1))
201 else if(!strcmp(argv[i],
"-phylip_paml_m10") && (outformat == -1)) {
202 outformat = 13; shortNames =
true;
210 else if(!strcmp(argv[i],
"-matrix") && (i+1 != argc) && (matrix == NULL)) {
211 lng = strlen(argv[++i]);
212 matrix =
new char[lng + 1];
213 strcpy(matrix, argv[i]);
216 else if(!strcmp(argv[i],
"--alternative_matrix") && (i+1 != argc) && (alternative_matrix == -1)) {
218 if (!strcmp(argv[i],
"degenerated_nt_identity"))
219 alternative_matrix = 1;
221 cerr << endl <<
"ERROR: Alternative not recognized \"" << argv[i] <<
"\"" << endl << endl;
230 else if(!strcmp(argv[i],
"-compareset") && (i+1 != argc) && (compareset == -1)) {
233 compare.open(argv[++i], ifstream::in);
235 cerr << endl <<
"ERROR: Check the reference file with the alignments to compare." << endl << endl;
239 while(compare.getline(line, 256)) numfiles++;
246 cerr << endl <<
"ERROR: Option \"" << argv[i] <<
"\" not valid. A single alignment file has been set by the user." << endl << endl;
253 else if(!strcmp(argv[i],
"-forceselect") && (i+1 != argc) && (forceFile == NULL)) {
256 lng = strlen(argv[++i]);
257 forceFile =
new char[lng + 1];
258 strcpy(forceFile, argv[i]);
259 if(!origAlig -> loadAlignment(forceFile)) {
260 cerr << endl <<
"ERROR: Alignment not loaded: \"" << forceFile <<
"\" Check the file's content." << endl << endl;
266 cerr << endl <<
"ERROR: Option \"" << argv[i] <<
"\" not valid. A single alignment file has been setting it up" << endl << endl;
273 else if(!strcmp(argv[i],
"-backtrans") && (i+1 != argc) && (backtransFile == NULL)) {
275 lng = strlen(argv[++i]);
276 backtransFile =
new char[lng + 1];
277 strcpy(backtransFile, argv[i]);
279 backtranslation =
new alignment;
280 if(!backtranslation -> loadAlignment(backtransFile)) {
281 cerr << endl <<
"ERROR: Alignment not loaded: \"" << backtransFile <<
"\" Check the file's content." << endl << endl;
291 else if((!strcmp(argv[i],
"-gapthreshold") || !strcmp(argv[i],
"-gt")) && (i+1 != argc) && (gapThreshold == -1)) {
293 if((selectCols) || (selectSeqs)) {
294 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods such as manual selection of sequences/columns." << endl << endl;
298 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
299 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
305 gapThreshold = 1 - atof(argv[i]);
306 if((gapThreshold < 0) || (gapThreshold > 1)) {
307 cerr << endl <<
"ERROR: The gap threshold value should be between 0 and 1." << endl << endl;
312 cerr << endl <<
"ERROR: The gap threshold value should be a positive real number." << endl << endl;
320 else if((!strcmp(argv[i],
"-simthreshold") || !strcmp(argv[i],
"-st")) && (i+1 != argc) && (simThreshold == -1)) {
322 if((selectCols) || (selectSeqs)) {
323 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods such as manual selection of sequences/columns." << endl << endl;
327 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
328 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
334 simThreshold = atof(argv[i]);
335 if((simThreshold < 0) || (simThreshold > 1)) {
336 cerr << endl <<
"ERROR: The similarity threshold value should be between 0 and 1." << endl << endl;
341 cerr << endl <<
"ERROR: The similarity threshold value should be a positive real number." << endl << endl;
350 else if((!strcmp(argv[i],
"-conthreshold") || !strcmp(argv[i],
"-ct")) && (i+1 != argc) && (comThreshold == -1)) {
352 if((selectCols) || (selectSeqs)) {
353 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods such as manual selection of sequences/columns." << endl << endl;
362 else if(infile != NULL) {
363 cerr << endl <<
"ERROR: Not allowed in combination with -in option." << endl << endl;
370 comThreshold = atof(argv[i]);
371 if((comThreshold < 0) || (comThreshold > 1)) {
372 cerr << endl <<
"ERROR: The consistency threshold value should be between 0 and 1." << endl << endl;
377 cerr << endl <<
"ERROR: The consistency threshold value should be a positive real number." << endl << endl;
386 else if((!strcmp(argv[i],
"-cons")) && (i+1 != argc) && (conserve == -1)) {
388 if((selectCols) || (selectSeqs)) {
389 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods such as manual selection of sequences/columns." << endl << endl;
393 else if(blockSize != -1) {
394 cerr << endl <<
"ERROR: Not allowed in combination of column block size value." << endl << endl;
398 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
399 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
405 conserve = atof(argv[i]);
406 if((conserve < 0) || (conserve > 100)) {
407 cerr << endl <<
"ERROR: The minimal positions value should be between 0 and 100." << endl << endl;
412 cerr << endl <<
"ERROR: The minimal positions value should be a positive real number." << endl << endl;
420 else if((!strcmp(argv[i],
"-selectcols")) && (selectCols ==
false) && ((i+3) < argc) && (!strcmp(argv[++i],
"{")) && (!strcmp(argv[i+2],
"}"))) {
422 if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
423 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed." << endl << endl;
427 else if(blockSize != -1) {
428 cerr << endl <<
"ERROR: Not allowed in combination of column block size value." << endl << endl;
432 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) || (comThreshold != -1)) {
433 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods." << endl << endl;
437 else if((windowSize != -1) || (gapWindow != -1)|| (simWindow != -1)) {
438 cerr << endl <<
"ERROR: It's imposible to use this windows size in combination of selection method." << endl << endl;
442 else if((delColumns = utils::readNumbers(argv[++i])) == NULL) {
443 cerr << endl <<
"ERROR: Impossible to parser the sequences number" << endl << endl;
447 else selectCols =
true;
456 else if(!strcmp(argv[i],
"-nogaps") && (!nogaps)) {
458 if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
459 cerr << endl <<
"ERROR: Not allowed in combination of window values." << endl << endl;
463 else if(blockSize != -1) {
464 cerr << endl <<
"ERROR: Not allowed in combination of column block size value." << endl << endl;
468 else if((noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
469 cerr << endl <<
"ERROR: Combinations between automatic methods are not allowed." << endl << endl;
478 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
479 (selectCols) || (selectSeqs)) {
480 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
490 else if(!strcmp(argv[i],
"-noallgaps") && (!noallgaps)) {
492 if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
493 cerr << endl <<
"ERROR: Not allowed in combination of window values." << endl << endl;
497 else if(blockSize != -1) {
498 cerr << endl <<
"ERROR: Not allowed in combination of column block size value." << endl << endl;
502 else if((nogaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
503 cerr << endl <<
"ERROR: Combinations between automatic methods are not allowed." << endl << endl;
512 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
513 (selectCols) || (selectSeqs)) {
514 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
523 else if(!strcmp(argv[i],
"-keepseqs") && (!keepSeqs)) {
528 else if(!strcmp(argv[i],
"-keepheader") && (!keepHeader)) {
535 else if(!strcmp(argv[i],
"-gappyout") && (!strict)) {
537 if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
538 cerr << endl <<
"ERROR: Not allowed in combination of window values." << endl << endl;
542 else if((nogaps) || (noallgaps) || (strict) || (strictplus) || (automated1)) {
543 cerr << endl <<
"ERROR: Combinations between automatic methods are not allowed." << endl << endl;
552 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
553 (selectCols) || (selectSeqs)) {
554 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
564 else if(!strcmp(argv[i],
"-strict") && (!strict)) {
566 if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
567 cerr << endl <<
"ERROR: Not allowed in combination of window values." << endl << endl;
576 else if((nogaps) || (noallgaps) || (gappyout) || (strictplus) || (automated1)) {
577 cerr << endl <<
"ERROR: Combinations between automatic methods are not allowed." << endl << endl;
586 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
587 (selectCols) || (selectSeqs)) {
588 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
598 else if((!strcmp(argv[i],
"-strictplus")) && (!strictplus)) {
600 if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
601 cerr << endl <<
"ERROR: Not allowed in combination with this window value." << endl << endl;
610 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (automated1)) {
611 cerr << endl <<
"ERROR: Combinations between automatic methods are not allowed." << endl << endl;
620 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
621 (selectCols) || (selectSeqs)) {
622 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
632 else if((!strcmp(argv[i],
"-automated1")) && (!automated1)) {
634 if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
635 cerr << endl <<
"ERROR: Not allowed in combination with this window value." << endl << endl;
639 else if(blockSize != -1) {
640 cerr << endl <<
"ERROR: Not allowed in combination of column block size value." << endl << endl;
644 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
645 (comThreshold != -1) || (selectCols) || (selectSeqs)) {
646 cerr << endl <<
"ERROR: Combinations between automatic methods are not allowed." << endl << endl;
655 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
656 (selectCols) || (selectSeqs)) {
657 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
676 else if((!strcmp(argv[i],
"-resoverlap")) && (i+1 != argc) && (resOverlap == -1)) {
678 if((selectCols) || (selectSeqs)) {
679 cerr << endl <<
"ERROR: Not allowed in combination of methods such as manual selection of sequences/columns." << endl << endl;
685 resOverlap = atof(argv[i]);
686 if((resOverlap < 0) || (resOverlap > 1)) {
687 cerr << endl <<
"ERROR: The residue overlap value should be between 0 and 1." << endl << endl;
692 cerr << endl <<
"ERROR: The residue overlap value should be a positive real number." << endl << endl;
700 else if((!strcmp(argv[i],
"-seqoverlap")) && (i+1 != argc) && (seqOverlap == -1)) {
702 if((selectCols) || (selectSeqs)) {
703 cerr << endl <<
"ERROR: Not allowed in combination of methods such as manual selection of sequences/columns." << endl << endl;
709 seqOverlap = atof(argv[i]);
710 if((seqOverlap < 0) || (seqOverlap > 100)) {
711 cerr << endl <<
"ERROR: The sequences overlap value should be between 0 and 100." << endl << endl;
716 cerr << endl <<
"ERROR: The minimal positions value should be a positive real number." << endl << endl;
723 else if((!strcmp(argv[i],
"-selectseqs")) && (selectSeqs ==
false) && ((i+3) < argc) && (!strcmp(argv[++i],
"{")) && (!strcmp(argv[i+2],
"}"))) {
725 if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
726 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed." << endl << endl;
730 else if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) || (comThreshold != -1)) {
731 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods." << endl << endl;
735 else if((windowSize != -1) || (gapWindow != -1)|| (simWindow != -1)) {
736 cerr << endl <<
"ERROR: It's imposible to use this windows size in combination of selection method." << endl << endl;
740 else if((clusters != -1) || (maxIdentity != -1)) {
741 cerr << endl <<
"ERROR: Only one method to chose sequences can be applied." << endl << endl;
745 else if((delSequences = utils::readNumbers(argv[++i])) == NULL) {
746 cerr << endl <<
"ERROR: Impossible to parser the sequences number" << endl << endl;
750 else selectSeqs =
true;
755 else if((!strcmp(argv[i],
"-maxidentity")) && (i+1 != argc) && (maxIdentity == -1)) {
757 if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
758 (comThreshold != -1) || (selectCols) || (selectSeqs)) {
759 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods such as manual " 760 <<
"selection of sequences/columns." << endl << endl;
764 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
765 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
769 else if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
770 cerr << endl <<
"ERROR: Not allowed in combination with window values." << endl << endl;
774 else if(clusters != -1) {
775 cerr << endl <<
"ERROR: Only one method to chose representative sequences can be applied." << endl << endl;
781 maxIdentity = atof(argv[i]);
782 if((maxIdentity < 0) || (maxIdentity > 1)) {
783 cerr << endl <<
"ERROR: The maximum identity threshold should be between 0 and 1." << endl << endl;
788 cerr << endl <<
"ERROR: The minimal positions value should be a positive real number." << endl << endl;
796 else if((!strcmp(argv[i],
"-clusters")) && (i+1 != argc) && (clusters == -1)) {
798 if((gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) ||
799 (comThreshold != -1) || (selectCols) || (selectSeqs)) {
800 cerr << endl <<
"ERROR: Not allowed in combination of other manual methods such as manual " 801 <<
"selection of sequences/columns." << endl << endl;
805 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
806 cerr << endl <<
"ERROR: Combinations between automatic and manual methods are not allowed" << endl << endl;
810 else if((windowSize != -1) || (gapWindow != -1) || (simWindow != -1)) {
811 cerr << endl <<
"ERROR: Not allowed in combination with window values." << endl << endl;
815 else if(maxIdentity != -1) {
816 cerr << endl <<
"ERROR: Only one method to chose representative sequences can be applied." << endl << endl;
822 clusters = atoi(argv[i]);
824 cerr << endl <<
"ERROR: There is a problem with the given clusters number." << endl << endl;
829 cerr << endl <<
"ERROR: The clusters number should be a positive integer number." << endl << endl;
843 else if((!strcmp(argv[i],
"-terminalonly")) && (!terminal)) {
848 else if((!strcmp(argv[i],
"--set_boundaries")) && (!terminal) && ((i+3) < argc) && (!strcmp(argv[++i],
"{")) && (!strcmp(argv[i+2],
"}"))) {
850 if((boundaries = utils::readNumbers_StartEnd(argv[++i])) == NULL) {
852 cerr << endl <<
"ERROR: Impossible to parser the sequences number" << endl << endl;
866 else if(!strcmp(argv[i],
"-w") && (i+1 != argc) && (windowSize == -1)){
868 if((gapWindow != -1) || (simWindow != -1) || (conWindow != -1)) {
869 cerr << endl <<
"ERROR: Not allowed in combination with this specific window value." << endl << endl;
873 if((selectCols) || (selectSeqs)) {
874 cerr << endl <<
"ERROR: It's imposible to use this windows size in combination of manual selection method." << endl << endl;
878 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
879 cerr << endl <<
"ERROR: Not allowed in combination of automatic methods." << endl << endl;
885 windowSize = atoi(argv[++i]);
887 cerr << endl <<
"ERROR: The window value should be a positive integer number." << endl << endl;
892 cerr << endl <<
"ERROR: The window value should be a number." << endl << endl;
901 else if(!strcmp(argv[i],
"-gw") && (i+1 != argc) && (gapWindow == -1)){
903 if(windowSize != -1) {
904 cerr << endl <<
"ERROR: Not allowed in combination of general window value." << endl << endl;
908 if((selectCols) || (selectSeqs)) {
909 cerr << endl <<
"ERROR: It's imposible to use this windows size in combination of manual selection method." << endl << endl;
913 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
914 cerr << endl <<
"ERROR: Not allowed in combination of automatic methods." << endl << endl;
920 gapWindow = atoi(argv[++i]);
922 cerr << endl <<
"ERROR: The window value should be a positive integer number." << endl << endl;
927 cerr << endl <<
"ERROR: The window value should be a number." << endl << endl;
936 else if(!strcmp(argv[i],
"-sw") && (i+1 != argc) && (simWindow == -1)){
938 if(windowSize != -1) {
939 cerr << endl <<
"ERROR: Not allowed in combination of general window value." << endl << endl;
943 if((selectCols) || (selectSeqs)) {
944 cerr << endl <<
"ERROR: It's imposible to use this windows size in combination of manual selection method." << endl << endl;
948 else if((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)) {
949 cerr << endl <<
"ERROR: Not allowed in combination of automatic methods." << endl << endl;
955 simWindow = atoi(argv[++i]);
957 cerr << endl <<
"ERROR: The window value should be a positive integer number." << endl << endl;
962 cerr << endl <<
"ERROR: The window value should be a number." << endl << endl;
971 else if(!strcmp(argv[i],
"-cw") && (i+1 != argc) && (conWindow == -1)){
973 if(windowSize != -1) {
974 cerr << endl <<
"ERROR: Not allowed in combination of general window value." << endl << endl;
978 if((selectCols) || (selectSeqs)) {
979 cerr << endl <<
"ERROR: It's imposible to use this windows size in combination of manual selection method." << endl << endl;
985 conWindow = atoi(argv[++i]);
987 cerr << endl <<
"ERROR: The window value should be a positive integer number." << endl << endl;
992 cerr << endl <<
"ERROR: The window value should be a number." << endl << endl;
1005 else if(!strcmp(argv[i],
"-block") && (i+1 != argc) && (blockSize == -1)){
1008 cerr << endl <<
"ERROR: It's imposible to set a block size value in combination with a column manual selection" << endl << endl;
1009 appearErrors =
true;
1012 else if(conserve != -1) {
1013 cerr << endl <<
"ERROR: It's imposible to ask for a minimum percentage of the input alignment in combination with column block size" << endl << endl;
1014 appearErrors =
true;
1018 else if((nogaps) || (noallgaps)) {
1019 cerr << endl <<
"ERROR: Not allowed in combination of automatic methods." << endl << endl;
1020 appearErrors =
true;
1025 blockSize = atoi(argv[++i]);
1027 cerr << endl <<
"ERROR: The block size value should be a positive integer number." << endl << endl;
1028 appearErrors =
true;
1032 cerr << endl <<
"ERROR: The block size value should be a number." << endl << endl;
1033 appearErrors =
true;
1042 else if((!strcmp(argv[i],
"-sgc")) && (!sgc)) {
1049 else if((!strcmp(argv[i],
"-sgt")) && (!sgt)) {
1056 else if((!strcmp(argv[i],
"-ssc")) && (!scc)) {
1063 else if((!strcmp(argv[i],
"-sst")) && (!sct)) {
1070 else if((!strcmp(argv[i],
"-sident")) && (!sident)) {
1076 else if((!strcmp(argv[i],
"-soverlap")) && (!soverlap)) {
1083 else if((!strcmp(argv[i],
"-sfc")) && (!sfc)) {
1085 if(infile != NULL) {
1086 cerr << endl <<
"ERROR: Not allowed in combination with -in option." << endl << endl;
1087 appearErrors =
true;
1099 else if((!strcmp(argv[i],
"-sft")) && (!sft)) {
1101 if(infile != NULL) {
1102 cerr << endl <<
"ERROR: Not allowed in combination with -in option." << endl << endl;
1103 appearErrors =
true;
1119 else if((!strcmp(argv[i],
"-complementary")) && (complementary ==
false)) {
1120 complementary =
true;
1124 else if((!strcmp(argv[i],
"-colnumbering")) && (colnumbering ==
false)) {
1125 colnumbering =
true;
1129 else if((!strcmp(argv[i],
"-splitbystopcodon")) && (splitbystop ==
false)) {
1134 else if((!strcmp(argv[i],
"-ignorestopcodon")) && (ignorestop ==
false)) {
1144 cerr << endl <<
"ERROR: Parameter \"" << argv[i] <<
"\" not valid." << endl << endl;
1145 appearErrors =
true;
1160 if((!appearErrors) && (infile != NULL) && (forceFile != NULL)) {
1161 cerr << endl <<
"ERROR: You can not use a single alignmet at the same " 1162 <<
"time that you force the alignment selection." << endl << endl;
1163 appearErrors =
true;
1166 if((!appearErrors) && (compareset == -1) && (forceFile != NULL)) {
1167 cerr << endl <<
"ERROR: You can not force the alignment selection without set" 1168 <<
" an alignment dataset against to compare it." << endl << endl;
1169 appearErrors =
true;
1172 if((!appearErrors) && (infile == NULL) && (compareset == -1) && (forceFile == NULL) && (backtransFile != NULL)) {
1173 cerr << endl <<
"ERROR: It is impossible to use a Coding Sequences file to apply the back translation method" 1174 <<
" without define an input alignment." << endl << endl;
1175 appearErrors =
true;
1178 if((!appearErrors) && (infile != NULL)) {
1180 if(((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1) ||
1181 (gapThreshold != -1) || (conserve != -1) || (simThreshold != -1) || (selectCols) || (selectSeqs) ||
1182 (resOverlap != -1) || (seqOverlap != -1) || (stats < 0)) &&
1183 (!origAlig -> isFileAligned())) {
1184 cerr << endl <<
"ERROR: The sequences in the input alignment should be aligned in order to use trimming method." << endl << endl;
1185 appearErrors =
true;
1189 if((!appearErrors) && (windowSize != -1) && (compareset != -1))
1190 cerr <<
"INFO: Try with specific comparison file window value. parameter -cw." << endl << endl;
1194 if((matrix != NULL) && (!appearErrors)) {
1195 if((!strict) && (!strictplus) && (!automated1) && (simThreshold == -1.0) && (!scc) && (!sct)) {
1196 cerr << endl <<
"ERROR: The Similarity Matrix can only be used with methods that use this matrix." << endl << endl;
1197 appearErrors =
true;
1200 if((gapWindow != -1) ||((compareset == -1) && (conWindow != -1))) {
1201 cerr << endl <<
"ERROR: The Similarity Matrix can only be used with general/similarity windows size." << endl << endl;
1202 appearErrors =
true;
1208 if((complementary) && (!appearErrors))
1209 if((!nogaps) && (!noallgaps) && (!gappyout) && (!strict) && (!strictplus) && (!automated1)
1210 && (gapThreshold == -1) && (conserve == -1) && (simThreshold == -1) && (!selectCols) && (!selectSeqs)
1211 && (resOverlap == -1) && (seqOverlap == -1) && (maxIdentity == -1) && (clusters == -1)) {
1212 cerr << endl <<
"ERROR: This parameter can only be used with either an automatic or a manual method." << endl << endl;
1213 appearErrors =
true;
1218 if((terminal) && (boundaries != NULL) && (!appearErrors)) {
1219 num = origAlig -> getNumAminos();
1221 if((!nogaps) && (!noallgaps) && (!gappyout) && (!strict) && (!strictplus) && (!automated1)
1222 && (gapThreshold == -1) && (conserve == -1) && (simThreshold == -1) && (!selectCols) && (!selectSeqs)
1223 && (resOverlap == -1) && (seqOverlap == -1) && (maxIdentity == -1) && (clusters == -1)) {
1224 cerr << endl <<
"ERROR: This parameter '--set_boundaries' can only be used with either an automatic or a manual method." << endl << endl;
1225 appearErrors =
true;
1228 else if(boundaries[1] >= num) {
1229 cerr << endl <<
"ERROR: \"--set_boundaries\" parameter only accepts " 1230 <<
"integer numbers between 0 and the number of positions (" << num
1231 <<
") - 1." << endl << endl;
1232 appearErrors =
true;
1236 if((terminal) && (boundaries == NULL) && (!appearErrors))
1237 if((!nogaps) && (!noallgaps) && (!gappyout) && (!strict) && (!strictplus) && (!automated1)
1238 && (gapThreshold == -1) && (conserve == -1) && (simThreshold == -1) && (!selectCols) && (!selectSeqs)
1239 && (resOverlap == -1) && (seqOverlap == -1) && (maxIdentity == -1) && (clusters == -1)) {
1240 cerr << endl <<
"ERROR: This parameter '-terminalonly' can only be used with either an automatic or a manual method." << endl << endl;
1241 appearErrors =
true;
1246 if((colnumbering) && (!appearErrors)) {
1247 if((!nogaps) && (!noallgaps) && (!gappyout) && (!strict) && (!strictplus) && (!automated1)
1248 && (gapThreshold == -1) && (conserve == -1) && (simThreshold == -1) && (comThreshold == -1) && (!selectCols) && (!selectSeqs)) {
1249 cerr << endl <<
"ERROR: This parameter can only be used with any trimming method." << endl << endl;
1250 appearErrors =
true;
1252 else if(stats < 0) {
1253 cerr << endl <<
"ERROR: This parameter is not valid when statistics' parameters are defined." << endl << endl;
1254 appearErrors =
true;
1260 if((outhtml != NULL) && (outfile != NULL) && (!appearErrors)) {
1261 if(!strcmp(outhtml, outfile)) {
1262 cerr << endl <<
"ERROR: The output and html files should not be the same." << endl << endl;
1263 appearErrors =
true;
1269 if((outhtml != NULL) && (!appearErrors)) {
1270 if((!nogaps) && (!noallgaps) && (!gappyout) && (!strict) && (!strictplus) && (!automated1) &&
1271 (gapThreshold == -1) && (conserve == -1) && (simThreshold == -1) && (comThreshold == -1) &&
1272 (!selectCols) && (!selectSeqs) && (resOverlap == -1) && (seqOverlap == -1) && (maxIdentity == -1) &&
1274 cerr << endl <<
"ERROR: This parameter can only be used with any trimming method." << endl << endl;
1275 appearErrors =
true;
1292 if(((resOverlap != -1) || (seqOverlap != -1)) && (!appearErrors)) {
1294 if((resOverlap != -1) && (seqOverlap == -1)) {
1295 cerr << endl <<
"ERROR: The sequence overlap value should be defined." << endl << endl;
1296 appearErrors =
true;
1299 else if((resOverlap == -1) && (seqOverlap != -1)) {
1300 cerr << endl <<
"ERROR: The residue overlap value should be defined." << endl << endl;
1301 appearErrors =
true;
1307 if((stats < 0) && (!appearErrors)) {
1310 if(((nogaps) || (noallgaps) || (gappyout) || (strict) || (strictplus) || (automated1)
1311 || (gapThreshold != -1) || (conserve != -1) || (simThreshold != -1)) && (outfile == NULL)) {
1312 cerr << endl <<
"ERROR: An output file should be defined in order to get the alignment's statistics." << endl << endl;
1313 appearErrors =
true;
1319 if((comThreshold != -1) && (conserve != -1) && (!appearErrors)) {
1321 if((gapThreshold != -1) || (simThreshold != -1)) {
1322 cerr << endl <<
"ERROR: Combinations among thresholds are not allowed." << endl << endl;
1323 appearErrors =
true;
1329 if((compareset != -1) && (!appearErrors)) {
1331 compAlig =
new alignment*[numfiles];
1332 filesToCompare =
new char*[numfiles];
1335 compare.open(argv[compareset], ifstream::in);
1337 for(i = 0; (i < numfiles) && (!appearErrors); i++) {
1340 for(nline.clear(), compare.read(&c, 1); (c !=
'\n') && ((!compare.eof())); compare.read(&c, 1))
1343 filesToCompare[i] =
new char [nline.size() + 1];
1344 strcpy(filesToCompare[i], nline.c_str());
1348 compAlig[i] =
new alignment;
1349 if(!compAlig[i] -> loadAlignment(filesToCompare[i])) {
1350 cerr << endl <<
"Alignment not loaded: \"" << filesToCompare[i] <<
"\" Check the file's content." << endl << endl;
1351 appearErrors =
true;
1355 if(!compAlig[i] -> isFileAligned()) {
1356 cerr << endl <<
"ERROR: The sequences in the input alignment should be aligned in order to use this method." << endl << endl;
1357 appearErrors =
true;
1359 compAlig[i] -> sequenMatrix();
1361 if(compAlig[i] -> getNumAminos() > maxAminos)
1362 maxAminos = compAlig[i] -> getNumAminos();
1364 if((compAlig[i] -> getTypeAlignment() != alignDataType) && (alignDataType != -1)) {
1365 cerr << endl <<
"ERROR: The alignments' datatypes are different. Check your dataset." << endl << endl;
1366 appearErrors =
true;
1368 alignDataType = compAlig[i] -> getTypeAlignment();
1375 if((!appearErrors) && (forceFile == NULL)) {
1377 compareVect =
new float[maxAminos];
1378 if((stats >= 0) && (outfile != NULL))
1379 referFile = compareFiles::algorithm(compAlig, filesToCompare, compareVect, numfiles,
true);
1381 referFile = compareFiles::algorithm(compAlig, filesToCompare, compareVect, numfiles,
false);
1383 if(windowSize != -1)
1384 compareFiles::applyWindow(compAlig[referFile] -> getNumAminos(), windowSize, compareVect);
1385 else if(conWindow != -1)
1386 compareFiles::applyWindow(compAlig[referFile] -> getNumAminos(), conWindow, compareVect);
1388 origAlig -> loadAlignment(filesToCompare[referFile]);
1390 }
else if((!appearErrors) && (forceFile != NULL)) {
1392 compareVect =
new float[origAlig -> getNumAminos()];
1393 appearErrors = !(compareFiles::forceComparison(compAlig, numfiles, origAlig, compareVect));
1395 if((windowSize != -1) && (!appearErrors))
1396 compareFiles::applyWindow(origAlig -> getNumAminos(), windowSize, compareVect);
1397 else if((conWindow != -1) && (!appearErrors))
1398 compareFiles::applyWindow(origAlig -> getNumAminos(), conWindow, compareVect);
1403 for(i = 0; i < numfiles; i++) {
1405 delete filesToCompare[i];
1413 if((!appearErrors) && (origAlig -> getNumAminos() < (blockSize/4))) {
1414 cerr << endl <<
"ERROR: The block size value is too big. Please, choose another one smaller than residues number / 4." << endl << endl;
1415 appearErrors =
true;
1418 if((!appearErrors) && (backtransFile != NULL) && (backtranslation -> getTypeAlignment() != DNAType && backtranslation -> getTypeAlignment() != DNADeg)) {
1419 cerr << endl <<
"ERROR: Check your Coding sequences file. It has been detected other kind of biological sequences." << endl << endl;
1420 appearErrors =
true;
1423 if((!appearErrors) && (origAlig -> isFileAligned() !=
true) && (backtransFile != NULL)) {
1424 cerr << endl <<
"ERROR: The input protein file has to be aligned to carry out the backtranslation process" << endl << endl;
1425 appearErrors =
true;
1428 if((!appearErrors) && (backtransFile == NULL) && (splitbystop)) {
1429 cerr << endl <<
"ERROR: The -splitbystopcodon parameter can be only set up with backtranslation functionality." << endl << endl;
1430 appearErrors =
true;
1433 if((!appearErrors) && (backtransFile == NULL) && (ignorestop)) {
1434 cerr << endl <<
"ERROR: The -ignorestopcodon parameter can be only set up with backtranslation functionality." << endl << endl;
1435 appearErrors =
true;
1438 if((!appearErrors) && (ignorestop) && (splitbystop)) {
1439 cerr << endl <<
"ERROR: Incompatibility of -ignorestopcodon & -splitbystopcodon parameters. Choose one." << endl << endl;
1440 appearErrors =
true;
1443 if((!appearErrors) && (backtransFile != NULL) && (backtranslation -> prepareCodingSequence(splitbystop, ignorestop, origAlig) !=
true))
1444 appearErrors =
true;
1447 if((!appearErrors) && (backtransFile != NULL)) {
1449 seqNames =
new string[backtranslation -> getNumSpecies()];
1450 seqLengths =
new int[backtranslation -> getNumSpecies()];
1451 backtranslation -> getSequences(seqNames, seqLengths);
1453 if(origAlig -> checkCorrespondence(seqNames, seqLengths, backtranslation -> getNumSpecies(), 3) !=
true)
1454 appearErrors =
true;
1469 delete []delColumns;
1471 delete[] filesToCompare;
1472 delete[] compareVect;
1480 if(forceFile != NULL)
delete forceFile;
1481 if(backtransFile != NULL)
delete backtransFile;
1482 if(backtranslation != NULL)
delete backtranslation;
1493 origAlig -> trimTerminalGaps(terminal, boundaries);
1494 origAlig -> setKeepSequencesFlag(keepSeqs);
1495 origAlig -> setKeepSeqsHeaderFlag(keepHeader);
1498 if(windowSize != -1) {
1499 gapWindow = windowSize;
1500 simWindow = windowSize;
1508 origAlig -> setWindowsSize(gapWindow, simWindow);
1514 origAlig -> setBlockSize(blockSize);
1520 origAlig -> setOutputFormat(outformat, shortNames);
1524 if((strict) || (strictplus) || (automated1) || (simThreshold != -1.0) || (scc == 1) || (sct == 1)) {
1525 similMatrix =
new similarityMatrix();
1528 similMatrix -> loadSimMatrix(matrix);
1531 else if(alternative_matrix != -1) {
1532 alignDataType = origAlig -> getTypeAlignment();
1533 similMatrix -> alternativeSimilarityMatrices(alternative_matrix, alignDataType);
1537 alignDataType = origAlig -> getTypeAlignment();
1538 if(alignDataType == AAType)
1539 similMatrix -> defaultAASimMatrix();
1540 else if((alignDataType == DNAType) || (alignDataType == RNAType))
1541 similMatrix -> defaultNTSimMatrix();
1542 else if((alignDataType == DNADeg) || (alignDataType == RNADeg))
1543 similMatrix -> defaultNTDegeneratedSimMatrix();
1546 if(!origAlig -> setSimilarityMatrix(similMatrix)) {
1547 cerr << endl <<
"ERROR: It's imposible to proccess the Similarity Matrix." << endl << endl;
1555 origAlig -> printStatisticsGapsColumns();
1562 origAlig -> printStatisticsGapsTotal();
1569 origAlig -> printStatisticsConservationColumns();
1576 origAlig -> printStatisticsConservationTotal();
1583 origAlig -> printSeqIdentity();
1590 origAlig -> printSeqOverlap();
1598 if(compareset != -1) {
1600 compareFiles::printStatisticsFileColumns(origAlig -> getNumAminos(), compareVect);
1602 compareFiles::printStatisticsFileAcl(origAlig -> getNumAminos(), compareVect);
1607 if(backtransFile != NULL)
1608 seqMatrix = origAlig -> getSeqMatrix();
1613 singleAlig = origAlig -> cleanGaps(0, 0, complementary);
1616 singleAlig = origAlig -> cleanNoAllGaps(complementary);
1619 singleAlig = origAlig -> clean2ndSlope(complementary);
1622 singleAlig = origAlig -> cleanCombMethods(complementary,
false);
1625 singleAlig = origAlig -> cleanCombMethods(complementary,
true);
1627 else if(automated1) {
1628 if(origAlig -> selectMethod() ==
GAPPYOUT)
1629 singleAlig = origAlig -> clean2ndSlope(complementary);
1631 singleAlig = origAlig -> cleanCombMethods(complementary,
false);
1636 if(comThreshold != -1)
1637 singleAlig = origAlig -> cleanCompareFile(comThreshold, conserve, compareVect, complementary);
1641 if((resOverlap != -1) && (seqOverlap != -1)) {
1642 intermediateAlig = origAlig -> cleanSpuriousSeq(resOverlap, (seqOverlap/100), complementary);
1643 singleAlig = intermediateAlig -> cleanNoAllGaps(
false);
1645 delete intermediateAlig;
1650 if(simThreshold != -1.0) {
1651 if(gapThreshold != -1.0)
1652 singleAlig = origAlig -> clean(conserve, gapThreshold, simThreshold, complementary);
1654 singleAlig = origAlig -> cleanConservation(conserve, simThreshold, complementary);
1659 else if(gapThreshold != -1.0)
1660 singleAlig = origAlig -> cleanGaps(conserve, gapThreshold, complementary);
1664 if((selectCols) || (selectSeqs)) {
1672 if(delColumns != NULL) {
1673 num = origAlig -> getNumAminos();
1675 for(i = 1; i < delColumns[0] + 1; i++)
1676 if(delColumns[i] >= num) {
1677 cerr << endl <<
"ERROR: This option only accepts integer numbers " 1678 "between 0 and the number of columns - 1." << endl << endl;
1679 appearErrors =
true;
1684 singleAlig = origAlig -> removeColumns(delColumns, 1, delColumns[0],
1689 if(delSequences != NULL) {
1690 num = origAlig -> getNumSpecies();
1692 for(i = 1; i < delSequences[0] + 1; i++)
1693 if(delSequences[i] >= num) {
1694 cerr << endl <<
"ERROR: This option only accepts integer numbers " 1695 "between 0 and the number of sequences - 1." << endl << endl;
1696 appearErrors =
true;
1700 if (!appearErrors) {
1701 intermediateAlig = origAlig -> removeSequences(delSequences, 1,
1702 delSequences[0], complementary);
1703 singleAlig = intermediateAlig -> cleanNoAllGaps(
false);
1705 delete intermediateAlig;
1712 if(maxIdentity != -1) {
1713 intermediateAlig = origAlig -> getClustering(maxIdentity);
1714 singleAlig = intermediateAlig -> cleanNoAllGaps(
false);
1716 delete intermediateAlig;
1718 else if(clusters != -1) {
1719 if(clusters > origAlig -> getNumSpecies()) {
1720 cerr << endl <<
"ERROR:The number of clusters from the alignment can not be larger than the number of sequences from that alignment." << endl << endl;
1721 appearErrors =
true;
1723 intermediateAlig = origAlig -> getClustering(origAlig -> getCutPointClusters(clusters));
1724 singleAlig = intermediateAlig -> cleanNoAllGaps(
false);
1726 delete intermediateAlig;
1732 if(singleAlig == NULL) {
1733 singleAlig = origAlig;
1739 if((outhtml != NULL) && (!appearErrors))
1740 if(!origAlig -> alignmentSummaryHTML(outhtml, singleAlig -> getNumAminos(), singleAlig -> getNumSpecies(),
1741 singleAlig -> getCorrespResidues(), singleAlig -> getCorrespSequences(), compareVect)) {
1742 cerr << endl <<
"ERROR: It's imposible to generate the HTML output file." << endl << endl;
1743 appearErrors =
true;
1748 if(backtransFile != NULL) {
1750 if(seqNames != NULL)
delete [] seqNames;
1751 seqNames =
new string[singleAlig -> getNumSpecies()];
1753 singleAlig -> getSequences(seqNames);
1755 singleAlig = backtranslation -> getTranslationCDS(singleAlig -> getNumAminos(), singleAlig -> getNumSpecies(),
1756 singleAlig -> getCorrespResidues(), seqNames, seqMatrix, singleAlig);
1761 if((outfile != NULL) && (!appearErrors)) {
1762 if(!singleAlig -> saveAlignment(outfile)) {
1763 cerr << endl <<
"ERROR: It's imposible to generate the output file." << endl << endl;
1764 appearErrors =
true;
1767 else if((stats >= 0) && (!appearErrors))
1768 singleAlig -> printAlignment();
1772 if((colnumbering) && (!appearErrors))
1773 singleAlig -> printCorrespondence();
1783 delete []delColumns;
1785 delete[] filesToCompare;
1786 delete[] compareVect;
1802 <<
"]. " <<
AUTHORS << endl << endl;
1804 cout <<
"trimAl webpage: http://trimal.cgenomics.org" << endl << endl;
1806 cout <<
"This program is free software: you can redistribute it and/or modify " << endl
1807 <<
"it under the terms of the GNU General Public License as published by " << endl
1808 <<
"the Free Software Foundation, the last available version." << endl << endl;
1810 cout <<
"Please cite:" << endl
1811 <<
"\t\ttrimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses." 1812 <<
"\n\t\tSalvador Capella-Gutierrez; Jose M. Silla-Martinez; Toni Gabaldon." 1813 <<
"\n\t\tBioinformatics 2009, 25:1972-1973." << endl << endl;
1815 cout <<
"Basic usage" << endl
1816 <<
"\ttrimal -in <inputfile> -out <outputfile> -(other options)." << endl << endl;
1818 cout <<
"Common options (for a complete list please see the User Guide or visit http://trimal.cgenomics.org):" << endl << endl;
1819 cout <<
" -h " <<
"Print this information and show some examples." << endl;
1820 cout <<
" --version " <<
"Print the trimAl version." << endl << endl;
1822 cout <<
" -in <inputfile> " <<
"Input file in several formats (clustal, fasta, NBRF/PIR, nexus, phylip3.2, phylip)." << endl << endl;
1824 cout <<
" -compareset <inputfile> " <<
"Input list of paths for the files containing the alignments to compare." << endl;
1825 cout <<
" -forceselect <inputfile> " <<
"Force selection of the given input file in the files comparison method." << endl << endl;
1827 cout <<
" -backtrans <inputfile> " <<
"Use a Coding Sequences file to get a backtranslation for a given AA alignment" << endl;
1828 cout <<
" -ignorestopcodon " <<
"Ignore stop codons in the input coding sequences" << endl;
1829 cout <<
" -splitbystopcodon " <<
"Split input coding sequences up to first stop codon appearance" << endl << endl;
1832 cout <<
" -matrix <inpufile> " <<
"Input file for user-defined similarity matrix (default is Blosum62)." << endl;
1833 cout <<
" --alternative_matrix <name> " <<
"Select an alternative similarity matrix already loaded. " << endl
1834 <<
" Only available 'degenerated_nt_identity'" << endl << endl;
1836 cout <<
" -out <outputfile> " <<
"Output alignment in the same input format (default stdout). (default input format)" << endl;
1837 cout <<
" -htmlout <outputfile> " <<
"Get a summary of trimal's work in an HTML file." << endl << endl;
1839 cout <<
" -keepheader " <<
"Keep original sequence header including non-alphanumeric characters." << endl;
1840 cout <<
" " <<
"Only available for input FASTA format files. (future versions will extend this feature)" << endl << endl;
1842 cout <<
" -nbrf " <<
"Output file in NBRF/PIR format" << endl;
1843 cout <<
" -mega " <<
"Output file in MEGA format" << endl;
1844 cout <<
" -nexus " <<
"Output file in NEXUS format" << endl;
1845 cout <<
" -clustal " <<
"Output file in CLUSTAL format" << endl << endl;
1847 cout <<
" -fasta " <<
"Output file in FASTA format" << endl;
1848 cout <<
" -fasta_m10 " <<
"Output file in FASTA format. Sequences name length up to 10 characters." << endl << endl;
1850 cout <<
" -phylip " <<
"Output file in PHYLIP/PHYLIP4 format" << endl;
1851 cout <<
" -phylip_m10 " <<
"Output file in PHYLIP/PHYLIP4 format. Sequences name length up to 10 characters." << endl;
1852 cout <<
" -phylip_paml " <<
"Output file in PHYLIP format compatible with PAML" << endl;
1853 cout <<
" -phylip_paml_m10 " <<
"Output file in PHYLIP format compatible with PAML. Sequences name length up to 10 characters." << endl;
1854 cout <<
" -phylip3.2 " <<
"Output file in PHYLIP3.2 format" << endl;
1855 cout <<
" -phylip3.2_m10 " <<
"Output file in PHYLIP3.2 format. Sequences name length up to 10 characters." << endl << endl;
1857 cout <<
" -complementary " <<
"Get the complementary alignment." << endl;
1858 cout <<
" -colnumbering " <<
"Get the relationship between the columns in the old and new alignment." << endl << endl;
1860 cout <<
" -selectcols { n,l,m-k } " <<
"Selection of columns to be removed from the alignment. Range: [0 - (Number of Columns - 1)]. (see User Guide)." << endl;
1861 cout <<
" -selectseqs { n,l,m-k } " <<
"Selection of sequences to be removed from the alignment. Range: [0 - (Number of Sequences - 1)]. (see User Guide)." << endl << endl;
1863 cout <<
" -gt -gapthreshold <n> " <<
"1 - (fraction of sequences with a gap allowed). Range: [0 - 1]" << endl;
1864 cout <<
" -st -simthreshold <n> " <<
"Minimum average similarity allowed. Range: [0 - 1]" << endl;
1865 cout <<
" -ct -conthreshold <n> " <<
"Minimum consistency value allowed.Range: [0 - 1]" << endl;
1866 cout <<
" -cons <n> " <<
"Minimum percentage of the positions in the original alignment to conserve. Range: [0 - 100]" << endl << endl;
1868 cout <<
" -nogaps " <<
"Remove all positions with gaps in the alignment." << endl;
1869 cout <<
" -noallgaps " <<
"Remove columns composed only by gaps." << endl;
1870 cout <<
" -keepseqs " <<
"Keep sequences even if they are composed only by gaps." << endl << endl;
1872 cout <<
" -gappyout " <<
"Use automated selection on \"gappyout\" mode. This method only uses " 1873 <<
"information based on gaps' distribution. (see User Guide)." << endl;
1874 cout <<
" -strict " <<
"Use automated selection on \"strict\" mode. (see User Guide)." << endl;
1875 cout <<
" -strictplus " <<
"Use automated selection on \"strictplus\" mode. (see User Guide)." << endl;
1876 cout <<
" " <<
"(Optimized for Neighbour Joining phylogenetic tree reconstruction)."<< endl << endl;
1878 cout <<
" -automated1 " <<
"Use a heuristic selection of the automatic method based on similarity statistics. " 1879 <<
"(see User Guide). (Optimized for Maximum Likelihood phylogenetic tree reconstruction)." 1882 cout <<
" -terminalonly " <<
"Only columns out of internal boundaries (first and last column without gaps) are " << endl;
1883 cout <<
" " <<
"candidates to be trimmed depending on the selected method" << endl;
1885 cout <<
" --set_boundaries { l,r } " <<
"Set manually left (l) and right (r) boundaries - only columns out of these boundaries are " << endl;
1886 cout <<
" " <<
"candidates to be trimmed depending on the selected method. Range: [0 - (Number of Columns - 1)]" << endl;
1889 cout <<
" -block <n> " <<
"Minimum column block size to be kept in the trimmed alignment. Available with manual" 1890 <<
" and automatic (gappyout) methods" << endl << endl;
1893 cout <<
" -resoverlap " <<
"Minimum overlap of a positions with other positions in the column to be considered a " 1894 <<
"\"good position\". Range: [0 - 1]. (see User Guide)." << endl;
1895 cout <<
" -seqoverlap " <<
"Minimum percentage of \"good positions\" that a sequence must have in order to be conserved. Range: [0 - 100]" 1896 <<
"(see User Guide)." << endl << endl;
1898 cout <<
" -clusters <n> " <<
"Get the most Nth representatives sequences from a given alignment. Range: [1 - (Number of sequences)]" << endl;
1899 cout <<
" -maxidentity <n> " <<
"Get the representatives sequences for a given identity threshold. Range: [0 - 1]." << endl << endl;
1901 cout <<
" -w <n> " <<
"(half) Window size, score of position i is the average of the window (i - n) to (i + n)." 1903 cout <<
" -gw <n> " <<
"(half) Window size only applies to statistics/methods based on Gaps." << endl;
1904 cout <<
" -sw <n> " <<
"(half) Window size only applies to statistics/methods based on Similarity." << endl;
1905 cout <<
" -cw <n> " <<
"(half) Window size only applies to statistics/methods based on Consistency." << endl << endl;
1907 cout <<
" -sgc " <<
"Print gap scores for each column in the input alignment." << endl;
1908 cout <<
" -sgt " <<
"Print accumulated gap scores for the input alignment." << endl;
1909 cout <<
" -ssc " <<
"Print similarity scores for each column in the input alignment." << endl;
1910 cout <<
" -sst " <<
"Print accumulated similarity scores for the input alignment." << endl;
1911 cout <<
" -sfc " <<
"Print sum-of-pairs scores for each column from the selected alignment" 1913 cout <<
" -sft " <<
"Print accumulated sum-of-pairs scores for the selected alignment" 1915 cout <<
" -sident " <<
"Print identity scores matrix for all sequences in the input alignment. (see User Guide)." 1917 cout <<
" -soverlap " <<
"Print overlap scores matrix for all sequences in the input alignment. (see User Guide)." 1923 cout <<
"Some Examples:" << endl << endl;
1925 cout <<
"1) Removes all positions in the alignment with gaps in 10% or more of" << endl
1926 <<
" the sequences, unless this leaves less than 60% of original alignment. " << endl
1927 <<
" In such case, print the 60% best (with less gaps) positions." << endl << endl;
1929 cout <<
" trimal -in <inputfile> -out <outputfile> -gt 0.9 -cons 60" << endl << endl;
1931 cout <<
"2) As above but, the gap score is averaged over a window starting" << endl
1932 <<
" 3 positions before and ending 3 positions after each column." << endl << endl;
1934 cout <<
" trimal -in <inputfile> -out <outputfile> -gt 0.9 -cons 60 -w 3" << endl << endl;
1936 cout <<
"3) Use an automatic method to decide optimal thresholds, based in the gap scores" << endl
1937 <<
" from input alignment. (see User Guide for details)." << endl << endl;
1939 cout <<
" trimal -in <inputfile> -out <outputfile> -gappyout" << endl << endl;
1941 cout <<
"4) Use automatic methods to decide optimal thresholds, based on the combination " << endl
1942 <<
" of gap and similarity scores. (see User Guide for details)." << endl << endl;
1944 cout <<
" trimal -in <inputfile> -out <outputfile> -strictplus" << endl << endl;
1946 cout <<
"5) Use an heuristic to decide the optimal method for trimming the alignment. " << endl
1947 <<
" (see User Guide for details)." << endl << endl;
1949 cout <<
" trimal -in <inputfile> -out <outputfile> -automated1" << endl << endl;
1951 cout <<
"6) Use residues and sequences overlap thresholds to delete some sequences from the " << endl
1952 <<
" alignemnt. (see User Guide for details)." << endl << endl;
1954 cout <<
" trimal -in <inputfile> -out <outputfile> -resoverlap 0.8 -seqoverlap 75" << endl << endl;
1956 cout <<
"7) Selection of columns to be deleted from the alignment. The selection can " << endl
1957 <<
" be a column number or a column number interval. Start from 0" << endl << endl;
1959 cout <<
" trimal -in <inputfile> -out <outputfile> -selectcols { 0,2,3,10,45-60,68,70-78 }" << endl << endl;
1961 cout <<
"8) Get the complementary alignment from the alignment previously trimmed." << endl << endl;
1963 cout <<
" trimal -in <inputfile> -out <outputfile> -selectcols { 0,2,3,10,45-60,68,70-78 } -complementary" << endl << endl;
1965 cout <<
"9) Selection of sequences to be deleted from the alignment. Start in 0" << endl << endl;
1967 cout <<
" trimal -in <inputfile> -out <outputfile> -selectseqs { 2,4,8-12 } " << endl << endl;
1969 cout <<
"10) Select the 5 most representative sequences from the alignment" << endl << endl;
1971 cout <<
" trimal -in <inputfile> -out <outputfile> -clusters 5 " << endl << endl;
int main(int argc, char *argv[])
Utilities class. This class contains shared methods to be used in multiple parts of the code...
bool isNumber(char *num)
String-is-number checking.