31 #include <trimalManager.h> 33 #include "FormatHandling/BaseFormatHandler.h" 34 #include "Statistics/similarityMatrix.h" 35 #include "Statistics/Consistency.h" 36 #include "Statistics/Similarity.h" 38 #include "Alignment/Alignment.h" 39 #include "Statistics/Manager.h" 67 #define checkArgument(argument) { 68 auto x = argument(&argc, argv, &i); 69 if (x == Recognized) continue; 70 if (x == Errored) return x; 71 if (x == Final) return x;\ 77 StartTiming(
"void trimAlManager::parseArguments(int argc, char *argv[]) ");
93 for (
int i = 1; i < argc; i++) {
201 if (!strcmp(argv[i],
"--verbosity") || !strcmp(argv[i],
"-v")) {
227 StartTiming(
"bool trimAlManager::check_arguments_incompatibilities()");
252 for (
int i = 1; i < *argc; i++) {
253 if (!strcmp(argv[i],
"--verbosity") || !strcmp(argv[i],
"-v")) {
254 if ((i + 1) != *argc) {
255 if (!strcmp(argv[i + 1],
"error") || !strcmp(argv[i + 1],
"3")) {
259 if (!strcmp(argv[i + 1],
"warning") || !strcmp(argv[i + 1],
"2")) {
263 if (!strcmp(argv[i + 1],
"info") || !strcmp(argv[i + 1],
"1")) {
267 if (!strcmp(argv[i + 1],
"none") || !strcmp(argv[i + 1],
"0")) {
280 if (!strcmp(argv[*currentArg],
"-h") || !strcmp(argv[*currentArg],
"-help")) {
286 if (!strcmp(argv[*currentArg],
"--version")) {
287 std::cout <<
VERSION << std::endl;
291 if (!strcmp(argv[*currentArg],
"-lf") || !strcmp(argv[*currentArg],
"--listformats")) {
301 if (!strcmp(argv[*i],
"-in") && ((*i) + 1 != *argc) &&
infile ==
nullptr)
323 if (!strcmp(argv[*i],
"-vcf") && ((*i) + 1 != *argc)) {
324 vcfs =
new std::vector<std::string>();
325 while (((*i) + 1 != *argc)) {
327 if (argv[*i][0] ==
'-') {
331 vcfs->emplace_back(argv[*i]);
340 if ((!strcmp(argv[*i],
"-out")) && ((*i) + 1 != *argc) && (
outfile ==
nullptr)) {
350 if ((!strcmp(argv[*i],
"-htmlout")) && ((*i) + 1 != *argc) && (
htmlOutFile ==
nullptr)) {
360 if ((!strcmp(argv[*i],
"-timetrackerout")) && ((*i) + 1 != *argc)) {
368 if ((!strcmp(argv[*i],
"-svgout")) && ((*i) + 1 != *argc) && (
svgOutFile ==
nullptr)) {
378 if ((!strcmp(argv[*i],
"-svgstats")) && ((*i) + 1 != *argc) && (
svgStatsOutFile ==
nullptr)) {
389 #define LegacyFormatArgumentWrapper(arg, format) 390 if (!strcmp(argv[*i], arg)) { 391 oformats.emplace_back(format); 399 if (!strcmp(argv[*i],
"-formats")) {
400 if ((*i + 1) == *argc) {
405 while (++(*i) != *argc && argv[*i][0] !=
'-')
412 }
else oformats.emplace_back(argv[*i]);
439 #undef LegacyFormatArgumentWrapper 442 if (!strcmp(argv[*i],
"-matrix") && ((*i) + 1 != *argc) && (
matrixFile ==
nullptr)) {
447 }
else if (!strcmp(argv[*i],
"--alternative_matrix") && ((*i) + 1 != *argc) && (
alternative_matrix == -1)) {
449 if (!strcmp(argv[*i],
"degenerated_nt_identity"))
463 if (!strcmp(argv[*i],
"-compareset") && ((*i) + 1 != *argc) && (
compareset ==
nullptr)) {
465 compare.open(argv[++*i], std::ifstream::in);
478 if (!strcmp(argv[*i],
"-forceselect") && ((*i) + 1 != *argc) &&
forceFile ==
nullptr)
502 if (!strcmp(argv[*i],
"-backtrans") && ((*i) + 1 != *argc) && (
backtransFile ==
nullptr)) {
518 if ((!strcmp(argv[*i],
"-gapthreshold") || !strcmp(argv[*i],
"-gt"))
539 if ((!strcmp(argv[*i],
"-gapabsolutethreshold") || !strcmp(argv[*i],
"-gat"))
564 if ((!strcmp(argv[*i],
"-simthreshold") || !strcmp(argv[*i],
"-st")) && ((*i) + 1 != *argc) && (
similarityThreshold == -1)) {
581 if ((!strcmp(argv[*i],
"-conthreshold") || !strcmp(argv[*i],
"-ct")) && ((*i) + 1 != *argc) && (
consistencyThreshold == -1)) {
616 StartTiming(
"trimAlManager::select_cols_argument");
617 if ((!strcmp(argv[*i],
"-selectcols")) &&
620 !strcmp(argv[++(*i)],
"{") &&
621 !strcmp(argv[(*i) + 2],
"}")) {
634 if (!strcmp(argv[*i],
"-nogaps") && (!
nogaps)) {
650 if (!strcmp(argv[*i],
"-noallgaps") && (!
noallgaps)) {
658 if (!strcmp(argv[*i],
"-keepseqs") && (!
keepSeqs)) {
674 if (!strcmp(argv[*i],
"-gappyout") && (!
gappyout)) {
682 if (!strcmp(argv[*i],
"-strict") && (!
strict)) {
690 if ((!strcmp(argv[*i],
"-strictplus")) && (!
strictplus)) {
698 if ((!strcmp(argv[*i],
"-automated1")) && (!
automated1)) {
706 if ((!strcmp(argv[*i],
"-resoverlap")) && ((*i) + 1 != *argc) && (
residuesOverlap == -1)) {
723 if ((!strcmp(argv[*i],
"-seqoverlap")) && ((*i) + 1 != *argc) && (
sequenceOverlap == -1)) {
740 StartTiming(
"trimAlManager::select_seqs_argument");
741 if ((!strcmp(argv[*i],
"-selectseqs")) &&
743 ((*i + 3) < *argc) &&
744 (!strcmp(argv[++*i],
"{")) &&
745 (!strcmp(argv[*i + 2],
"}"))) {
757 if ((!strcmp(argv[*i],
"-maxidentity")) && ((*i) + 1 != *argc) && (
maxIdentity == -1)) {
776 if ((!strcmp(argv[*i],
"-clusters")) && ((*i) + 1 != *argc) && (
clusters == -1)) {
794 if ((!strcmp(argv[*i],
"-terminalonly")) && (!
terminalOnly)) {
802 if (!strcmp(argv[*i],
"-w") && ((*i) + 1 != *argc) && (
windowSize == -1)) {
819 if (!strcmp(argv[*i],
"-gw") && ((*i) + 1 != *argc) && (
gapWindow == -1)) {
836 if (!strcmp(argv[*i],
"-sw") && ((*i) + 1 != *argc) && (
similarityWindow == -1)) {
854 if (!strcmp(argv[*i],
"-cw") && ((*i) + 1 != *argc) && (
consistencyWindow == -1)) {
871 if (!strcmp(argv[*i],
"-block") && ((*i) + 1 != *argc) && (
blockSize == -1)) {
896 #define stat_check(stat) 897 if (!strcmp(argv[*i], "-" #stat )) { 902 } else return NotRecognized; 963 if ((!strcmp(argv[*i],
"-ignorefilter")) && !
ignoreFilter) {
971 if (!strcmp(argv[*i],
"-minquality") && ((*i) + 1 != *argc) && (
minQuality == -1)) {
988 if (!strcmp(argv[*i],
"-mincoverage") && ((*i) + 1 != *argc) && (
minCoverage == -1)) {
1007 StartTiming(
"bool trimAlManager::processArguments(char *argv[]) ");
1055 new std::string[2]{
"-selectcols",
"-block"}
);
1063 new std::string[2]{
"-selectcols",
"columns"}
);
1076 new std::string[2]{
"-selectseqs",
"sequences"}
);
1097 if (
strict) autom =
"-strict";
1100 if (
nogaps) autom =
"-nogaps";
1120 new std::string[2]{
"-conthreshold",
"-block"}
);
1132 if (
strict) autom =
"-strict";
1135 if (
nogaps) autom =
"-nogaps";
1206 StartTiming(
"bool trimAlManager::check_arguments_needs(char *argv[])");
1258 bool returnValue =
false;
1259 if (
vcfs ==
nullptr)
1317 new std::string[2] {
1402 std::array<
char *, 4> outFiles
1410 std::array<std::string, 4> outFilesNames
1412 "html report (-htmlout)",
1413 "output alignment (-out)",
1414 "svg report (-svgout)",
1415 "svg stats (-svgstats)" 1419 for (
int i = 0, x = 0; i < outFiles.size(); i++) {
1420 if (outFiles.at(i) !=
nullptr)
1421 for (x = i + 1; x < outFiles.size(); x++) {
1422 if (outFiles.at(x) !=
nullptr)
1423 if (!strcmp(outFiles.at(i), outFiles.at(x))) {
1637 bool generalMakesSense =
false;
1638 for(
auto& thresholdDependency : std::vector<std::tuple<
const char *,
float&,
int&>>{
1639 {
"gaps", gapThreshold, gapWindow},
1640 {
"similarity", similarityThreshold, similarityWindow},
1641 {
"consistency", consistencyThreshold, consistencyWindow},
1644 if (std::get<2>(thresholdDependency) != -1)
1646 if (std::get<1>(thresholdDependency) == -1.0F)
1649 std::cout << std::get<0>(thresholdDependency) <<
" window provided, but stat not requested.";
1652 if (std::get<1>(thresholdDependency) != -1.0F)
1654 generalMakesSense =
true;
1660 if (!generalMakesSense)
1663 std::cout <<
"General window provided, but no stat requested without specific window";
1685 if (
vcfs ==
nullptr)
1692 int returnValue = 0;
1694 char replacement =
'-';
1708 if (
outfile ==
nullptr || std::string(
outfile).find(
"[contig]") == std::string::npos)
1726 formatManager.openmode = std::ofstream::out | std::ofstream::app;
1732 for (
const std::string & token :
oformats) {
1736 if (handler !=
nullptr)
1739 std::string newName =
1744 std::ofstream tmp(newName);
1767 std::string newOutFile =
1771 if (!originalOutFile.empty())
1843 std::string outFileString = std::string(
outfile);
1851 "Trying to save a nullptr alignment on save_alignment");
1855 std::string emptyString;
1860 "Trying to save a nullptr alignment on save_alignment");
1889 if (start == std::string::npos) start = 0;
1903 if (start == std::string::npos) start = 0;
1905 if (end == std::string::npos) end = 0;
1918 StartTiming(
"/**inline**/ void trimAlManager::print_statistics() ");
1966 StartTiming(
"/**inline**/ bool trimAlManager::create_or_use_similarity_matrix() ");
2003 StartTiming(
"inline void trimAlManager::clean_alignment() ");
2026 StartTiming(
"/**inline**/ void trimAlManager::postprocess_alignment()");
2054 StartTiming(
"/**inline**/ void trimAlManager::CleanSequences() ");
2111 StartTiming(
"/**inline**/ void trimAlManager::CleanResiduesAuto() ");
2152 StartTiming(
"/**inline**/ void trimAlManager::CleanResiduesNonAuto() ");
2160 new std::string[2]{
"-selectcols",
"residues"}
2229 StartTiming(
"inline void trimAlManager::set_window_size() ");
2245 StartTiming(
"inline void trimAlManager::delete_variables() ");
2312 std::string menu = {
2313 #include "RawText/menu.txt" 2336 StartTiming(
"void trimAlManager::examples(void) ");
2340 std::string examples = {
2341 #include "RawText/examples.txt" 2351 std::cout << examples;
float conservationThreshold
Similarity Threshold to use while trimming.
Alignment * cleanCombMethods(bool complementary, bool variable)
Method to clean an alignment. It carries out strict and strictplus. The method:
bool check_stats_incompatibilities()
Method to check statistics incompatibilities.
std::string filename
Filename where this alignment was loaded from.
argumentReport html_out_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the html output file.
argumentReport split_by_stop_codon_argument(const int *argc, char *argv[], int *currentArg)
Method to parse split by stop codon argument.
argumentReport max_identity_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the max identity argument.
bool check_residue_and_sequence_overlap()
Method to check if both of them (residue overlap and sequence overlap) have been defined by the user ...
argumentReport select_cols_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the select columns argument.
bool terminalOnly
Flag to use the Terminal Only option.
Similarity * similarity
Similarity submodule.
bool selectSeqs
Flag to select sequences manually from the alignment.
void defaultNTDegeneratedSimMatrix()
Method to load the default DEG NT similarity matrix.
bool check_and_prepare_coding_sequence()
Method to check and prepare the coding sequence.
Alignment * clean(float baseLine, float GapsPct, float conservationPct, bool complementary)
Method to trim an alignment based on the similarity and gaps distribution values. ...
float gapThreshold
Gap Threshold to use while trimming.
argumentReport ignore_filter_argument(const int *argc, char *argv[], int *currentArg)
char * compareset
Pointer to the argument that contains the compareset file.
bool check_arguments_incompatibilities()
Method to check argument incompatibilities. Take in mind that an incompatibility is between two argum...
bool check_file_aligned()
Method to check if an alignment needs to be aligned depending on the methods that have been asked to ...
argumentReport strict_plus_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the strictplus argument.
void postprocess_alignment()
1 = Info, warning and error messages
int automatedMethodCount
Integer counter of the automatic methods. If the counter is bigger than 0, we are using an automati...
void verbosity_argument(const int *argc, char *argv[])
Method to parse the verbosity argument. This check is done before any other check. This is due to trimAl not performing any action if any help argument has been provided. .
float similarityThreshold
Similarity Threshold to use while trimming.
argumentReport keep_seqs_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the keepSeqs argument.
Cleaner * Cleaning
Trimming submodule. It contains methods and variables related to trimming.
void delete_variables()
Method to delete all variables that need to be deallocated.
argumentReport out_format_arguments(const int *argc, char *argv[], int *currentArg)
Method to parse the output formats as -fasta -clustal or -formats [...].
void set_window_size()
Method to set correct window sizes depending on values for them given by the user.
argumentReport similarity_threshold_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the similarity threshold argument.
void defaultAASimMatrix()
Method to load the default AA similarity matrix.
argumentReport window_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the Window argument.
bool processArguments(char *argv[])
Method to process the information obtained by trimAlManager::parseArguments It checks for argument in...
bool isFileAligned()
isAligned getter.
Alignment * getClustering(float identityThreshold)
Method to select the most representative sequence (the longest one) for each cluster from the input a...
bool splitByStopCodon
Flag to use the SplitByCodonStop option.
bool check_inFile_incompatibilities()
Method to check the inFile argument incompatibilities.
int selectMethod()
Method that selects the best cleaning method based on statistics of the alignment.
bool strict
Flag to use the strict trimming method.
argumentReport automated1_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the automated1 argument.
std::string * sequencesNames
Temporal variable to use in backtranslation option to compare with infile.
Class to calculate the consistency between several MSA containing the same sequences, differently aligned. Using this statistics, the class is able to select the most consistent alignment between all alignments provided. It is possible to forcefully select an alignment, but to calculate the statistics for latter use. After selecting an alignment (most consistent or manually selected), it is possible to use this statistic to trim the alignment, removing columns that are not consistent enough with the other alignments.
char * backtransFile
File to load the alignment containing the backtranslation information.
argumentReport remove_duplicates_argument(const int *argc, char *argv[], int *currentArg)
bool check_output_relevance()
Method to check if, being asked to give an HTML report, it makes logic, depending on the functionalit...
bool check_outputs_coincidence()
Method to check if both output patterns (alignment output and html report output) are the same...
Consistency()
Default Construtor.
argumentReport back_trans_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the backtranslation argument.
std::ifstream compare
Temporal variable to open files in trimAlManager::check_multiple_files_comparison function...
Alignment * removeSequences(int *seqs, int init, int size, bool complementary)
Method to remove sequences, expressed as ranges.
bool sgt
Flag to show the Accumulated Gaps Scores.
Alignment * removeColumns(int *columns, int init, int size, bool complementary)
Method to remove columns, expressed as ranges.
static void printStatisticsFileAcl(Alignment &alig, float *compareVect)
Print the accumulated consistency value from the selected alignment.
bool sft
Flag to show the Accumulated Sum of Pairs.
float residuesOverlap
Residues Overlap to use while trimming.
char * outfile
File to output the trimmed alignment. This can contain tags which will be traduced to some variables:...
int perform()
Method to perform the analysis if all checks have passed.
argumentReport col_numbering_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the column numbering argument.
int blockSize
Block Size to use while trimming.
argumentReport help_arguments(const int *argc, char **argv, int *currentArg)
Method to parse information arguments as help and menu.
argumentReport seqs_select_argument(const int *argc, char *argv[], int *currentArg)
Method to parse que select sequences argument.
Alignment * cleanGaps(float baseLine, float gapsPct, bool complementary)
Method to trim an alignment based on the gap distribution values. Column blocks that don't have a min...
void printSeqIdentity()
Method to print different identity values computed from the alignment. In this method we assess the i...
void alternativeSimilarityMatrices(int matrix_code, int datatype)
Method to load alternative similarity matrices also included on the suite. Currently, only one type of alternative matrix is available: matrix_code: 1 datatype SequenceTypes::AA.
bool statSVG(const char *const destFile)
statistics::Manager * Statistics
Statistics submodule. It contains methods and variables related to statistics calculation and reporti...
bool automated1
Flag to use the automated1 trimming method.
std::string ReplaceString(std::string subject, const std::string &search, const std::string &replace)
Function that replaces a substring with another substring in a string. It makes a copy of the origina...
#define StartTiming(name)
bool setSimilarityMatrix(similarityMatrix *sm)
Method to set a similarity matrix.
Alignment * cleanNoAllGaps(bool complementary)
Method to remove columns composed only by gaps This method is specially useful when we remove missali...
bool getComplementary
Flag to return the complementary alignment.
static void printStatisticsFileColumns(Alignment &alig, float *compareVect)
Print the consistency value for each column from the selected alignment.
bool check_backtranslations()
Method to check backtranslation needs.
bool check_automated_manual_incompatibilities()
Check if only one method, either automatic or manual has been asked.
VerboseLevel Level
Level of Verbosity. The report system won't output messages that are lower than the current level...
argumentReport no_all_gaps_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the noallgaps argument.
float * getValues()
Stat Getter .
argumentReport consistency_window_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the Consistency Window argument.
argumentReport compareset_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the comparset argument.
int similarityWindow
Similarity Window to use while trimming.
int * delSequences
Vector that contains the sequence indexes the user specified to be removed.
void printStatisticsGapsTotal()
Wrapper to Statistics::Gaps::printGapsAcl() It calls to calculateGapStats() to make sure the informat...
Class containing an alignment This class stores the alignment sequences with it's names...
argumentReport matrix_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the custom matrix argument.
argumentReport residue_overlap_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the residue overlap argument.
bool loadSimMatrix(char *filename)
Method to load a custom matrix.
bool noallgaps
Flag to use the noallgaps option.
argumentReport ignore_stop_codon_argument(const int *argc, char *argv[], int *currentArg)
Method to parse ignore by stop codon argument.
VerboseLevel
VerboseLevel used to report messages.
argumentReport gap_window_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the Gap Window argument.
Class that contains information of similarity matrices. These are used to calculate the similarity be...
void printStatisticsConservationTotal()
Wrapper to Statistics::Similarity::printGapsTotal() It calls to calculateConservationStats() to make ...
bool check_vcf_incompatibility()
argumentReport svg_out_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the svg output file.
argumentReport similarity_window_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the Similarity Window argument.
argumentReport sequence_overlap_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the sequence overlap argument.
void print_statistics()
Method to print statistics requested.
argumentReport in_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the input file argument.
void setBlockSize(int blockSize)
BlockSize Setter.
bool appearErrors
Flag to trace errors.
bool check_force_selection()
Method to check dependency of force select argument.
void printCorrespondence()
Method to print the vector containing the keep/reject (Alignment::saveResidues) values of the associa...
2 = Error and warning messages
argumentReport complementary_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the complementary argument.
void CleanResiduesNonAuto()
Method to trim the alignment using non automatic methods:
bool check_codon_behaviour_incompatibility()
Method to check incompatibilities related to how the algorithm should treat stop codons.
bool checkCorrespondence(std::string *names, int *lenghts, int totalInputSequences, int multiple)
Function to check CDS file. .
int alternative_matrix
Int that represents which alternative matrix to use, where '-1' means no alternative matrix...
void printStatisticsConservationColumns()
Wrapper to Statistics::Similarity::printConservationAcl() It calls to calculateConservationStats() to...
void report(ErrorCode message, const char *vars)
Method to report an Error. It will be displayed if Level is equal or higher to VerboseLevel::ERROR.
int numberOfResidues
Number of residues present on the alignment if it is aligned.
bool soverlap
Flag to show overlap scores in the alignment.
void defaultNTSimMatrix()
Method to load the default NT similarity matrix.
std::vector< std::string > oformats
Formats names we are going to output our trimmed alignment.
Alignment * singleAlig
Alignment obtained by making modifications to singleAlig or backtranslationAlig.
Namespace containing all classes related to statistics handling.
argumentReport terminal_only_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the terminal only argument.
argumentReport vcf_argument(const int *argc, char *argv[], int *currentArg)
Method to parse vcf argument.
argumentReport min_quality_argument(const int *argc, char *argv[], int *currentArg)
bool removeOnlyTerminal()
Method to detect right and left borders. Borders are the first column found with no gaps...
argumentReport out_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the output file pattern.
void clean_alignment()
Core Method of the program. This is where almost all cleaning methods are called from.
int windowSize
Window Size to use while trimming.
reporting::reportManager debug
FormatHandling::FormatManager & getFormatManager()
bool check_arguments_needs(char *argv[])
Method that checks dependencies between arguments. Take in mind that dependencies may be between mult...
bool keepSeqs
Flag to keep sequences in the alignment.
std::string * seqsName
String vector containing the sequences names.
void readVCF(const AlignmentVector &sources, const StringVector &filenames, const float minQuality, const float minCoverage, const bool ignoreFilter, const char *const replacementChar)
bool check_thresholds_incompatibilities()
Method to check incompatibilities with manual thresholds.
statistics::Consistency * CS
void setWindowsSize(int ghWindow, int shWindow)
Windows setter.
void setTrimTerminalGapsFlag(bool terminalOnly_)
Setter method to Terminal Only Flag.
float sequenceOverlap
Sequence Overlap to use while trimming.
void check_output_format()
Method to check if an output format has been given. If no output format has been given, it will use the inFile format.
bool check_windows_incompatibilities()
Method to check windows incompatibilities.
int originalNumberOfSequences
Number of sequences the alignment had when it was loaded.
void report(InfoCode message, std::string *vars=nullptr)
Method to report an Info message. It will be displayed if Level is equal or higher to VerboseLevel:...
argumentReport timetracker_out_argument(const int *argc, char *argv[], int *currentArg)
Method to parse InternalBenchmarker output argument. Need to be checked before any other argument...
int consistencyWindow
Consistency Window to use while trimming.
char * htmlOutFile
File to output the html report.
Alignment * getTranslationCDS(Alignment *proteinAlignment)
Method to back translate a protein alignment using the sequences present on the current alignment...
Alignment * tempAlig
Temporal alignment that allows us to make some changes to trimAlManager::singleAlig, destroy it and set trimAlManager::singleAlig to this.
void report(ErrorCode message, std::string *vars=nullptr)
Method to report an Error. It will be displayed if Level is equal or higher to VerboseLevel::ERROR...
void menu()
Method to show the menu of the program.
bool sgc
Flag to show the Gaps Scores per column.
std::vector< std::string > * vcfs
char * svgStatsOutFile
File to output the svg graph showing the stats along the alignment.
void examples()
Method to show the examples information about the program.
FormatHandling::FormatManager formatManager
Read Write Machine that allows to manage formats easily.
int clusters
Number of most representative sequences we are going to use.
statistics::similarityMatrix * similMatrix
Temporal Similarity Matrix.
argumentReport gap_threshold_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the gap threshold argument.
bool check_coding_sequences_type()
Method to check the type of sequences present in the backtranslationAlig.
Gaps * gaps
Gaps submodule.
int numberOfSequences
Number of sequences present on the alignment.
int * sequencesLengths
Vector that contains the sequences lengths of each sequence in the backtranslation option...
bool check_backtranslation_infile_names_correspondence()
Method to check if the origAlig and backtranslationAlig are compatible in names to make the backtrans...
argumentReport clusters_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the clusters argument.
bool gappyout
Flag to use the gappyout trimming method.
char * forceFile
Forcefile to use in combination with multiple files comparison, to compare them against this...
argumentReport consistency_threshold_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the consistency threshold argument.
int gapWindow
Gap Window Size to use while trimming.
bool check_absolute_gap_theshold()
bool sfc
Flag to show the Sum of Pairs scores per column.
bool nogaps
Flag to use the nogaps option.
bool check_clusters_incompatibilities()
Method to check clusters incompatibilities.
std::map< terminalColor, const std::string > colors
argumentReport gappy_out_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the gappyout argument.
int getAlignmentType() const
Alignment type getter. See SequenceTypes.
#define LegacyFormatArgumentWrapper(arg, format)
size_t argumentLength
Temp variable to use when there is a need to copy an argument to a char array.
int stats
Stats Flag to use in reporting. For each stat requested, this flag is decreased by one...
Alignment * cleanSpuriousSeq(float overlapColumn, float minimumOverlap, bool complementary)
Method to remove sequences missaligned with the rest of sequences in the alignment. For each residue in the sequence, it tests it's similarity. If the similarity of that residue is higher than overlapColumn value, it counts as a hit for the sequence. After calculating the number of hits for the sequence, it removes the sequence if it has a proportion hits/residues lower tan minimumOverlap.
void check_compareset_window_argument()
Method to check and warn if windowsize and compareset has been given.
void setKeepSequencesFlag(bool newFlagValue)
Keep Sequences setter.
bool columnNumbering
Flag to return column numbering in report.
int * readNumbers(const std::string &line)
Reads a line and converts it to an array of number.
void getSequences(std::string *names, int *lenghts)
Getter for the sequences names and its lenghts.
void CleanResiduesAuto()
Method to trim the alignment using automatic or semi-automatic methods:
argumentReport stats_arguments(const int *argc, char *argv[], int *currentArg)
Method to parse stats arguments.
Utilities class. This class contains shared methods to be used in multiple parts of the code...
int getNumAminos()
Residues number getter. It counts gaps as residue.
argumentReport svg_stats_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the svg stats output file.
void CleanSequences()
Method to trim or reduce the number of sequences in an alignment. It performs one or none of the foll...
bool check_input_file_with_coding_sequences_argument()
Method to check dependency between custom coding sequence file and infile arguments.
char * matrixFile
File that contains a user defined similarity matrix.
bool performCompareset()
Method to check if multiple files comparison has been set correctly. It also perform some calculation...
void computeComplementaryAlig(bool residues, bool sequences)
Method for computing the complementary alignment. Complementary alignment is an alignment containing...
bool check_similarity_matrix()
Method to check if given a similarity matrix, there is a need to use it.
argumentReport keep_header_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the keepHeader argument.
bool sident
Flag to show identity scores in the alignment.
Alignment * cleanConservation(float baseLine, float conservationPct, bool complementary)
Method to trim an alignment based on the similarity distribution values.
Alignment * backtranslationAlig
Alignment used on the backtranslation functionality.
bool check_col_numbering()
Method to check if column numbering makes sense based on the functionality asked. ...
bool check_combinations_among_thresholds_incompatibility()
As some thresholds are incompatible between them, we check their incompatibilities.
Alignment * origAlig
Main alignment used/loaded on trimAl in combination with singleAlig.
argumentReport force_select_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the force select argument.
int i
Argument Iterator Variable.
bool check_output_file_with_statistics()
Method to check if there is an output file to allow statistics to be given on terminal.
argumentReport strict_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the strict argument.
bool check_select_cols_and_seqs_incompatibilities()
Method to check incompatibilities with select cols and select seqs arguments.
#define checkArgument(argument)
bool selectCols
Flag to select columns manually from the alignment.
argumentReport block_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the Block Size argument.
float consistencyThreshold
Consistency Threshold to use while trimming.
bool check_max_identity_incompatibilities()
Method to check max identity incompatibilities.
argumentReport no_gaps_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the nogaps argument.
void printSeqOverlap()
Prints the overlap between sequences.
void ReplaceStringInPlace(std::string &subject, const std::string &search, const std::string &replace)
Function that replaces a substring with another substring in a string. It does not make a copy of the...
float getCutPointClusters(int clusterNumber)
Method that calculates the optimal cut point for a given clusters number. The idea is to obtain a cu...
bool alignmentSummarySVG(Alignment &trimmedAlig, const char *destFile, int blocks)
Method to report the trimming results in SVG. Outputs a SVG file that shows visually what has been do...
bool perform(char *comparesetFilePath, FormatHandling::FormatManager &formatManager, trimAlManager &manager, char *forceFile)
Method to compare a set of MSA, all containing the same sequences and residues. The number of residue...
Alignment * cleanCompareFile(float cutpoint, float baseLine, float *vectValues, bool complementary)
Method to trim an alignment based on consistency values obtained from a dataset of alignments...
bool alignmentSummaryHTML(const Alignment &trimmedAlig, const char *const destFile)
Method to report the trimming results in HTML. Outputs an HTML file that shows visually what has been...
char ** filesToCompare
Vector containing the files to compare in trimAlManager::check_multiple_files_comparison function...
bool ignoreStopCodon
Flag to use the Ignore Stop Codon option.
bool ssc
Flag to show the Similarity Scores per column.
argumentReport conservation_threshold_argument(const int *argc, char *argv[], int *currentArg)
Method to parse the similarity threshold argument.
argumentReport min_coverage_argument(const int *argc, char *argv[], int *currentArg)
char * svgOutFile
File to output the svg report.
bool isNumber(char *num)
String-is-number checking.
Consistency * consistency
Consistency submodule.
void printStatisticsGapsColumns()
Wrapper to Statistics::Gaps::printGapsColumns() It calls to calculateGapStats() to make sure the info...
float maxIdentity
Max Identity to use while trimming.
Alignment * clean2ndSlope(bool complementary)
Method that carries the gappyout approach. This methods calculates the slope in gaps distribution on ...
bool check_automated_methods_incompatibilities()
Method to check incompatibilities related to automated trimming methods.
int getNumSpecies()
Number of sequences getter.
void check_thresholds_dependencies()
Main class of trimAl. It is strutured to work with 3 calls:
int * delColumns
Vector that contains the column indexes the user specified to be removed.
char * infile
File to load the alignment when only one alignment has trimming interest.
int parseArguments(int argc, char **argv)
Method to parse arguments into usable structure in trimal.
Alignment ** compareAlignmentsArray
Alignment Vector used on trimAlManager::check_multiple_files_comparison to compare them...
bool prepareCodingSequence(bool splitByStopCodon, bool ignoreStopCodon, Alignment *proteinAlignment)
Method to check if the CDS file is correct. Based on nature of residues: DNA/RNA (Most of the residue...
bool sst
Flag to show the Accumulated Similarity Scores.
bool strictplus
Flag to use the strictplus trimming method.
bool create_or_use_similarity_matrix()
Method to create, load and use a similarity matrix, depending on alignment residue nature...
bool check_block_size()
Method to check if the block size is bigger than allowed depending on size of alignment.
similarityMatrix()
Constructor.