41 bool alignment::fillMatrices(
bool aligned) {
47 residuesNumber =
new int[sequenNumber];
48 for(i = 0; i < sequenNumber; i++) {
49 residuesNumber[i] = sequences[i].size();
53 for(i = 0; i < sequenNumber; i++)
54 for(j = 0; j < residuesNumber[i]; j++)
55 if((!isalpha(sequences[i][j])) && (!ispunct(sequences[i][j]))) {
56 cerr << endl <<
"ERROR: The sequence \"" << seqsName[i] <<
"\" has an " 57 <<
"unknown (" << sequences[i][j] <<
") character." << endl;
62 for(i = 1; i < sequenNumber; i++)
63 if(residuesNumber[i] != residuesNumber[i-1])
66 isAligned = (i != sequenNumber) ?
false :
true;
70 if (aligned
and !isAligned) {
71 cerr << endl <<
"ERROR: Sequences should be aligned (all with same length) " 72 <<
"and there are not. Check your input alignment" << endl;
78 residNumber = residuesNumber[0];
81 for(i = 0; (i < sequenNumber)
and (aligned); i++) {
82 if(residuesNumber[i] != residNumber) {
83 cerr << endl <<
"ERROR: The sequence \"" << seqsName[i] <<
"\" (" 84 << residuesNumber[i] <<
") does not have the same number of residues " 85 <<
"fixed by the alignment (" << residNumber <<
")." << endl;
92 if((aligned) || (isAligned)) {
97 saveResidues =
new int[residNumber];
98 for(i = 0; i < residNumber; i++)
104 saveSequences =
new int[sequenNumber];
105 for(i = 0; i < sequenNumber; i++)
106 saveSequences[i] = i;
113 void alignment::getSequences(ostream &file) {
120 tmpMatrix =
new string[sequenNumber];
126 for(i = 0; i < sequenNumber; i++)
127 tmpMatrix[i] = (!reverse) ? utils::removeCharacter(
'-', sequences[i]) :
128 utils::removeCharacter(
'-', utils::getReverse(sequences[i]));
131 for(i = 0; i < sequenNumber; i++) {
132 file <<
">" << seqsName[i] << endl;
133 for(j = 0; j < (
int) tmpMatrix[i].size(); j += 60)
134 file << tmpMatrix[i].substr(j, 60) << endl;
142 int alignment::formatInputAlignment(
char *alignmentFile) {
145 char c, *firstWord = NULL, *line = NULL;
146 int format = 0, blocks = 0;
151 file.open(alignmentFile, ifstream::in);
152 if(!utils::checkFile(file))
157 line = utils::readLine(file);
158 }
while ((line == NULL) && (!file.eof()));
168 if((!strcmp(firstWord,
"CLUSTAL")) || (!strcmp(firstWord,
"clustal")))
172 else if(firstWord[0] ==
'>' && firstWord[3] ==
';')
176 else if(firstWord[0] ==
'>')
180 else if((!strcmp(firstWord,
"#NEXUS")) || (!strcmp(firstWord,
"#nexus")))
184 else if((!strcmp(firstWord,
"#MEGA")) || (!strcmp(firstWord,
"#mega"))) {
192 }
while((c !=
'#') && (!file.eof()));
195 while((c !=
'\n') && (!file.eof()))
200 }
while((c !=
'\n') && (!file.eof()));
203 format = (!blocks) ? 22 : 21;
211 sequenNumber = atoi(firstWord);
213 if(firstWord != NULL)
214 residNumber = atoi(firstWord);
218 if((sequenNumber == 1) && (residNumber != 0))
223 else if((sequenNumber != 0) && (residNumber != 0)) {
230 line = utils::readLine(file);
231 }
while ((line == NULL) && (!file.eof()));
238 while(firstWord != NULL) {
247 line = utils::readLine(file);
248 }
while ((line == NULL) && (!file.eof()));
251 while(firstWord != NULL) {
261 format = (!blocks) ? 12 : 11;
274 bool alignment::loadPhylipAlignment(
char *alignmentFile) {
277 char *str, *line = NULL;
282 file.open(alignmentFile, ifstream::in);
283 if(!utils::checkFile(file))
287 filename.append(
"!Title ");
288 filename.append(alignmentFile);
289 filename.append(
";");
293 line = utils::readLine(file);
294 }
while ((line == NULL) && (!file.eof()));
304 sequenNumber = atoi(str);
309 residNumber = atoi(str);
313 if((sequenNumber == 0) || (residNumber == 0))
317 sequences =
new string[sequenNumber];
318 seqsName =
new string[sequenNumber];
322 while((i < sequenNumber) && (!file.eof())){
327 line = utils::readLine(file);
335 seqsName[i].append(str, strlen(str));
340 sequences[i].append(str, strlen(str));
351 while((i < sequenNumber) && (!file.eof())) {
356 line = utils::readLine(file);
365 sequences[i].append(str, strlen(str));
378 return fillMatrices(
true);
381 bool alignment::loadPhylip3_2Alignment(
char *alignmentFile) {
384 int i, blocksFirstLine, firstLine =
true;
385 char *str, *line = NULL;
389 file.open(alignmentFile, ifstream::in);
390 if(!utils::checkFile(file))
394 filename.append(
"!Title ");
395 filename.append(alignmentFile);
396 filename.append(
";");
400 line = utils::readLine(file);
401 }
while ((line == NULL) && (!file.eof()));
412 sequenNumber = atoi(str);
417 residNumber = atoi(str);
419 if((sequenNumber == 0) || (residNumber == 0))
423 sequences =
new string[sequenNumber];
424 seqsName =
new string[sequenNumber];
437 line = utils::readLine(file);
447 seqsName[i].append(str, strlen(str));
454 sequences[i].append(str, strlen(str));
462 if ((blocksFirstLine == 0)
and firstLine)
463 blocksFirstLine = firstLine;
469 if ((firstLine !=
false)
and (firstLine != blocksFirstLine)) {
470 sequences[i-1].append(seqsName[i]);
472 sequences[i-1].append(sequences[i]);
473 sequences[i].clear();
481 if ((
int) sequences[i].size() == residNumber) {
485 }
while(!file.eof());
493 return fillMatrices(
true);
496 bool alignment::loadClustalAlignment(
char *alignmentFile) {
499 int i, seqLength, pos, firstBlock;
500 char *str, *line = NULL;
504 file.open(alignmentFile, ifstream::in);
505 if(!utils::checkFile(file))
510 filename.append(
"!Title ");
511 filename.append(alignmentFile);
512 filename.append(
";");
516 line = utils::readLine(file);
517 }
while ((line == NULL) && (!file.eof()));
531 line = utils::readLine(file);
549 seqLength = (
int) strlen(line);
550 for(pos = 0; pos < seqLength; pos++)
551 if((isalpha(line[pos])) || (line[pos] ==
'-'))
566 line = utils::readLine(file);
574 seqsName =
new string[sequenNumber];
575 sequences =
new string[sequenNumber];
578 line = utils::readLine(file);
581 aligInfo.append(line, strlen(line));
591 line = utils::readLine(file);
612 line = utils::readLine(file);
618 seqLength = (
int) strlen(line);
619 for(pos = 0; pos < seqLength; pos++)
620 if((isalpha(line[pos])) || (line[pos] ==
'-'))
624 if (pos == seqLength) {
632 line = utils::readLine(file);
643 seqsName[i].append(str, strlen(str));
646 sequences[i].append(str, strlen(str));
649 i = (i + 1) % sequenNumber;
657 line = utils::readLine(file);
668 return fillMatrices(
true);
671 bool alignment::loadFastaAlignment(
char *alignmentFile) {
674 char *str, *line = NULL;
679 file.open(alignmentFile, ifstream::in);
680 if(!utils::checkFile(file))
684 filename.append(
"!Title ");
685 filename.append(alignmentFile);
686 filename.append(
";");
697 line = utils::readLine(file);
716 seqsName =
new string[sequenNumber];
717 sequences =
new string[sequenNumber];
718 seqsInfo =
new string[sequenNumber];
720 for(i = -1; (i < sequenNumber) && (!file.eof()); ) {
727 line = utils::readLine(file);
734 seqsInfo[i+1].append(&line[1], strlen(line) - 1);
747 }
while(strlen(str) == 0);
748 seqsName[++i].append(str, strlen(str));
754 sequences[i].append(str, strlen(str));
767 return fillMatrices(
false);
770 bool alignment::loadNexusAlignment(
char *alignmentFile) {
773 char *frag = NULL, *str = NULL, *line = NULL;
774 int i, pos, state, firstBlock;
778 file.open(alignmentFile, ifstream::in);
779 if(!utils::checkFile(file))
784 filename.append(
"!Title ");
785 filename.append(alignmentFile);
786 filename.append(
";");
796 line = utils::readLine(file);
807 for(i = 0; i < (
int) strlen(str); i++)
808 str[i] = toupper(str[i]);
811 if(!strcmp(str,
"BEGIN"))
814 else if(!strcmp(str,
"MATRIX"))
818 else if(!strcmp(str,
"FORMAT")) {
821 aligInfo.append(str, strlen(str));
822 aligInfo.append(
" ", strlen(
" "));
828 else if((!strcmp(str,
"DIMENSIONS")) && state) {
831 str = strtok(str,
"=;");
832 sequenNumber = atoi(strtok(NULL,
"=;"));
833 frag = strtok(frag,
"=;");
834 residNumber = atoi(strtok(NULL,
"=;"));
836 }
while(!file.eof());
839 if(strcmp(str,
"MATRIX") || (sequenNumber == 0) || (residNumber == 0))
843 seqsName =
new string[sequenNumber];
844 sequences =
new string[sequenNumber];
856 line = utils::readLine(file);
861 for(i = 0; i < (
int) strlen(line); i++) {
864 else if (line[i] ==
']' && state) {
871 if ((state) || (
not state && i != (
int) strlen(line)))
875 if((!strncmp(line,
"end;", 4)) || (!strncmp(line,
"END;", 4)))
885 seqsName[pos].append(str, strlen(str));
890 sequences[pos].append(str, strlen(str));
897 pos = (pos + 1) % sequenNumber;
910 return fillMatrices(
true);
913 bool alignment::loadMegaNonInterleavedAlignment(
char *alignmentFile) {
916 char *frag = NULL, *str = NULL, *line = NULL;
921 file.open(alignmentFile, ifstream::in);
922 if(!utils::checkFile(file))
928 filename.append(
"!Title ");
929 filename.append(alignmentFile);
930 filename.append(
";");
934 line = utils::readLine(file);
935 }
while ((line == NULL) && (!file.eof()));
949 line = utils::readLine(file);
954 if(!strncmp(line,
"#", 1))
962 frag =
new char[strlen(line) + 1];
967 str = strtok(frag,
"!: ");
968 for(i = 0; i < (
int) strlen(str); i++)
969 str[i] = toupper(str[i]);
973 if(!strcmp(str,
"TITLE")) {
975 if(strncmp(line,
"!", 1))
981 else if(!strcmp(str,
"FORMAT"))
982 aligInfo.append(line, strlen(line));
994 line = utils::readLine(file);
999 if (!strncmp(line,
"#", 1))
1007 line = utils::readLine(file);
1009 }
while(!file.eof());
1016 seqsName =
new string[sequenNumber];
1017 sequences =
new string[sequenNumber];
1020 line = utils::readLine(file);
1023 while(!file.eof()) {
1030 line = utils::readLine(file);
1035 if (!strncmp(line,
"#", 1))
1045 while(!file.eof()) {
1049 line = utils::readLine(file);
1054 if (!strncmp(line,
"!", 1)) {
1057 line = utils::readLine(file);
1062 frag = utils::trimLine(line);
1070 line = utils::readLine(file);
1075 str = strtok(frag,
" #\n");
1078 if (!strncmp(line,
"#", 1)) {
1080 seqsName[i].append(str, strlen(str));
1081 str = strtok(NULL,
" #\n");
1085 while(str != NULL) {
1086 sequences[i].append(str, strlen(str));
1087 str = strtok(NULL,
" \n");
1098 line = utils::readLine(file);
1109 return fillMatrices(
true);
1112 bool alignment::loadMegaInterleavedAlignment(
char *alignmentFile) {
1115 char *frag = NULL, *str = NULL, *line = NULL;
1116 int i, firstBlock =
true;
1120 file.open(alignmentFile, ifstream::in);
1121 if(!utils::checkFile(file))
1127 filename.append(
"!Title ");
1128 filename.append(alignmentFile);
1129 filename.append(
";");
1133 line = utils::readLine(file);
1134 }
while ((line == NULL) && (!file.eof()));
1141 while(!file.eof()) {
1148 line = utils::readLine(file);
1153 if(!strncmp(line,
"#", 1))
1157 frag =
new char[strlen(line) + 1];
1162 str = strtok(frag,
"!: ");
1163 for(i = 0; i < (
int) strlen(str); i++)
1164 str[i] = toupper(str[i]);
1168 if(!strcmp(str,
"TITLE")) {
1170 if(strncmp(line,
"!", 1))
1176 else if(!strcmp(str,
"FORMAT"))
1177 aligInfo.append(line, strlen(line));
1185 while(!file.eof()) {
1188 if(!strncmp(line,
"#", 1))
1196 line = utils::readLine(file);
1209 seqsName =
new string[sequenNumber];
1210 sequences =
new string[sequenNumber];
1213 line = utils::readLine(file);
1216 while(!file.eof()) {
1223 line = utils::readLine(file);
1227 if(!strncmp(line,
"#", 1))
1235 while(!file.eof()) {
1239 line = utils::readLine(file);
1243 if (!strncmp(line,
"!", 1)) {
1246 line = utils::readLine(file);
1251 frag = utils::trimLine(line);
1252 str = strtok(frag,
" #\n");
1260 seqsName[i].append(str, strlen(str));
1263 str = strtok(NULL,
" \n");
1264 while(str != NULL) {
1265 sequences[i].append(str, strlen(str));
1266 str = strtok(NULL,
" \n");
1277 line = utils::readLine(file);
1279 i = (i + 1) % sequenNumber;
1292 return fillMatrices(
true);
1295 bool alignment::loadNBRF_PirAlignment(
char *alignmentFile) {
1298 bool seqIdLine, seqLines;
1299 char *str, *line = NULL;
1304 file.open(alignmentFile, ifstream::in);
1305 if(!utils::checkFile(file))
1309 filename.append(
"!Title ");
1310 filename.append(alignmentFile);
1311 filename.append(
";");
1315 while(!file.eof()) {
1322 line = utils::readLine(file);
1341 sequences =
new string[sequenNumber];
1342 seqsName =
new string[sequenNumber];
1343 seqsInfo =
new string[sequenNumber];
1351 while(!file.eof()) {
1358 line = utils::readLine(file);
1365 if((line[0] ==
'>') && (line[3] ==
';') && (seqIdLine)) {
1370 str = strtok(line,
">;");
1371 seqsInfo[i].append(str, strlen(str));
1374 str = strtok(NULL,
">;");
1375 seqsName[i].append(str, strlen(str));
1380 else if((!seqIdLine) && (!seqLines)) {
1382 seqsInfo[i].append(line, strlen(line));
1386 else if (seqLines) {
1390 if (line[strlen(line) - 1] ==
'*') {
1397 while (str != NULL) {
1398 sequences[i].append(str, strlen(str));
1403 if(sequences[i][sequences[i].size() - 1] ==
'*')
1404 sequences[i].erase(sequences[i].size()-1);
1416 return fillMatrices(
true);
1419 void alignment::alignmentPhylipToFile(ostream &file) {
1422 int i, j, maxLongName;
1428 cerr << endl <<
"ERROR: Sequences are not aligned. Format (PHYLIP) " 1429 <<
"not compatible with unaligned sequences." << endl << endl;
1434 tmpMatrix =
new string[sequenNumber];
1439 for(i = 0; i < sequenNumber; i++)
1440 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1445 for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1446 maxLongName = utils::max(maxLongName, seqsName[i].size());
1450 file <<
" " << sequenNumber <<
" " << residNumber << endl;
1453 for(i = 0; i < sequenNumber; i++)
1454 file << setw(maxLongName + 3) << left << seqsName[i].substr(0, maxLongName)
1455 << tmpMatrix[i].substr(0, 60) << endl;
1459 for(i = 60; i < residNumber; i += 60) {
1460 for(j = 0; j < sequenNumber; j++)
1461 file << tmpMatrix[j].substr(i, 60) << endl;
1467 delete [] tmpMatrix;
1470 void alignment::alignmentPhylip3_2ToFile(ostream &file) {
1473 int i, j, k, maxLongName;
1479 cerr << endl <<
"ERROR: Sequences are not aligned. Format (PHYLIP) " 1480 <<
"not compatible with unaligned sequences." << endl << endl;
1485 tmpMatrix =
new string[sequenNumber];
1490 for(i = 0; i < sequenNumber; i++)
1491 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1496 for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1497 maxLongName = utils::max(maxLongName, seqsName[i].size());
1501 file <<
" " << sequenNumber <<
" " << residNumber << endl;
1506 for(i = 0; i < sequenNumber; i++) {
1508 file << setw(maxLongName + 3) << left << seqsName[i].substr(0, maxLongName);
1510 for(j = 0; j < residNumber; j += 50) {
1511 for(k = j; (k < residNumber) && (k < (j + 50)); k += 10)
1512 file << sequences[i].substr(k, 10) <<
" ";
1516 if((j + 50) < residNumber)
1517 file << setw(maxLongName + 3) <<
" ";
1524 delete [] tmpMatrix;
1527 void alignment::alignmentPhylip_PamlToFile(ostream &file) {
1536 cerr << endl <<
"ERROR: Sequences are not aligned. Format (PHYLIP) " 1537 <<
"not compatible with unaligned sequences." << endl << endl;
1542 tmpMatrix =
new string[sequenNumber];
1547 for(i = 0; i < sequenNumber; i++)
1548 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1553 for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1554 maxLongName = utils::max(maxLongName, seqsName[i].size());
1558 file <<
" " << sequenNumber <<
" " << residNumber << endl;
1562 for(i = 0; i < sequenNumber; i++)
1563 file << setw(maxLongName + 3) << left << seqsName[i].substr(0, maxLongName)
1564 << sequences[i] << endl;
1568 delete [] tmpMatrix;
1571 void alignment::alignmentClustalToFile(ostream &file) {
1574 int i, j, maxLongName = 0;
1580 cerr << endl <<
"ERROR: Sequences are not aligned. Format (CLUSTAL) " 1581 <<
"not compatible with unaligned sequences." << endl << endl;
1586 tmpMatrix =
new string[sequenNumber];
1591 for(i = 0; i < sequenNumber; i++)
1592 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1595 for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1596 maxLongName = utils::max(maxLongName, seqsName[i].size());
1599 if((aligInfo.size() != 0) && (iformat == oformat))
1600 file << aligInfo << endl << endl;
1602 file <<
"CLUSTAL multiple sequence alignment" << endl << endl;
1607 for(j = 0; j < residNumber; j += 60) {
1608 for(i = 0; i < sequenNumber; i++)
1609 file << setw(maxLongName + 5) << left << seqsName[i]
1610 << tmpMatrix[i].substr(j, 60) << endl;
1611 file << endl << endl;
1615 delete [] tmpMatrix;
1618 void alignment::alignmentFastaToFile(ostream &file) {
1621 int i, j, maxLongName;
1625 tmpMatrix =
new string[sequenNumber];
1630 for(i = 0; i < sequenNumber; i++)
1631 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1637 for(i = 0; i < sequenNumber; i++)
1639 maxLongName = utils::max(maxLongName, seqsName[i].size());
1640 else if (seqsInfo != NULL)
1641 maxLongName = utils::max(maxLongName, seqsInfo[i].size());
1646 cerr << endl <<
"WARNING: Original sequence header will be cut by charac" 1647 <<
"ter 10" << endl;
1650 for(i = 0; i < sequenNumber; i++) {
1652 file <<
">" << seqsName[i].substr(0, maxLongName) << endl;
1653 else if (seqsInfo != NULL)
1654 file <<
">" << seqsInfo[i].substr(0, maxLongName) << endl;
1655 for(j = 0; j < residuesNumber[i]; j+= 60)
1656 file << tmpMatrix[i].substr(j, 60) << endl;
1660 delete [] tmpMatrix;
1663 void alignment::alignmentNexusToFile(ostream &file) {
1666 int i, j, k, maxLongName = 0;
1672 cerr << endl <<
"ERROR: Sequences are not aligned. Format (NEXUS) " 1673 <<
"not compatible with unaligned sequences." << endl << endl;
1678 tmpMatrix =
new string[sequenNumber];
1683 for(i = 0; i < sequenNumber; i++)
1684 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1687 for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1688 maxLongName = utils::max(maxLongName, seqsName[i].size());
1694 while((
int) aligInfo.find(
";") != (
int) string::npos)
1695 aligInfo.erase(aligInfo.find(
";"), 1);
1698 file <<
"#NEXUS" << endl <<
"BEGIN DATA;" << endl <<
" DIMENSIONS NTAX=" 1699 << sequenNumber <<
" NCHAR=" << residNumber <<
";" << endl;
1702 if ((dataType == DNAType) || (dataType == DNADeg))
1703 file <<
"FORMAT DATATYPE=DNA INTERLEAVE=yes GAP=-";
1704 else if ((dataType == RNAType) || (dataType == RNADeg))
1705 file <<
"FORMAT DATATYPE=RNA INTERLEAVE=yes GAP=-";
1706 else if (dataType == AAType)
1707 file <<
"FORMAT DATATYPE=PROTEIN INTERLEAVE=yes GAP=-";
1711 while((j = aligInfo.find(
" ", i)) != (
int) string::npos) {
1713 if((aligInfo.substr(i, j - i)).compare(0, 7,
"MISSING") == 0 ||
1714 (aligInfo.substr(i, j)).compare(0, 7,
"missing") == 0)
1715 file <<
" " << (aligInfo.substr(i, j - i));
1717 else if((aligInfo.substr(i, j)).compare(0, 9,
"MATCHCHAR") == 0 ||
1718 (aligInfo.substr(i, j)).compare(0, 9,
"matchchar") == 0)
1719 file <<
" " << (aligInfo.substr(i, j - i));
1723 file <<
";" << endl;
1726 for(i = 0; i < sequenNumber; i++)
1727 file <<
"[Name: " << setw(maxLongName + 4) << left << seqsName[i] <<
"Len: " 1728 << residNumber <<
"]" << endl;
1729 file << endl <<
"MATRIX" << endl;
1732 for(j = 0; j < residNumber; j += 50) {
1733 for(i = 0; i < sequenNumber; i++) {
1734 file << setw(maxLongName + 4) << left << seqsName[i];
1735 for(k = j; k < (j + 50) && k < residNumber; k += 10)
1736 file <<
" " << sequences[i].substr(k, 10);
1741 file <<
";" << endl <<
"END;" << endl;
1744 delete [] tmpMatrix;
1747 void alignment::alignmentMegaToFile(ostream &file) {
1756 cerr << endl <<
"ERROR: Sequences are not aligned. Format (MEGA) " 1757 <<
"not compatible with unaligned sequences." << endl << endl;
1762 tmpMatrix =
new string[sequenNumber];
1767 for(i = 0; i < sequenNumber; i++)
1768 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1774 file <<
"#MEGA" << endl << filename << endl;
1777 if ((dataType == DNAType) || (dataType == DNADeg))
1778 file <<
"!Format DataType=DNA ";
1779 else if ((dataType == RNAType) || (dataType == RNADeg))
1780 file <<
"!Format DataType=RNA ";
1781 else if (dataType == AAType)
1782 file <<
"!Format DataType=protein ";
1785 file <<
"NSeqs=" << sequenNumber <<
" Nsites=" << residNumber
1786 <<
" indel=- CodeTable=Standard;" << endl << endl;
1789 for(i = 0; i < sequenNumber; i++) {
1790 file <<
"#" << seqsName[i] << endl;
1791 for(j = 0; j < residNumber; j += 50) {
1792 for(k = j; ((k < residNumber) && (k < j + 50)); k += 10)
1793 file << tmpMatrix[i].substr(k, 10) <<
" ";
1800 delete [] tmpMatrix;
1803 void alignment::alignmentNBRF_PirToFile(ostream &file) {
1807 string alg_datatype, *tmpMatrix;
1810 tmpMatrix =
new string[sequenNumber];
1815 for(i = 0; i < sequenNumber; i++)
1816 tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1820 if ((dataType == DNAType) || (dataType == DNADeg))
1821 alg_datatype =
"DL";
1822 else if ((dataType == RNAType) || (dataType == RNADeg))
1823 alg_datatype =
"RL";
1824 else if (dataType == AAType)
1825 alg_datatype =
"P1";
1828 for(i = 0; i < sequenNumber; i++) {
1831 if((seqsInfo != NULL) && (iformat == oformat))
1832 file <<
">" << seqsInfo[i].substr(0, 2) <<
";" << seqsName[i]
1833 << endl << seqsInfo[i].substr(2) << endl;
1835 file <<
">" << alg_datatype <<
";" << seqsName[i] << endl
1836 << seqsName[i] <<
" " << residuesNumber[i] <<
" bases" << endl;
1839 for(j = 0; j < residuesNumber[i]; j += 50) {
1840 for(k = j; (k < residuesNumber[i]) && (k < (j + 50)); k += 10)
1841 file <<
" " << tmpMatrix[i].substr(k, 10);
1843 if(k >= residuesNumber[i]) {
1844 if((residuesNumber[i] % 50) == 0)
1845 file << endl <<
" ";
1846 else if((residuesNumber[i] % 10) == 0)
1856 delete [] tmpMatrix;
1859 bool alignment::alignmentSummaryHTML(
char *destFile,
int residues,
int seqs,
1860 int *selectedRes,
int *selectedSeq,
float *consValues) {
1865 int i, j, k, kj, upper, minHTML, maxLongName, *gapsValues;
1873 tmpColumn.reserve(sequenNumber);
1878 cerr << endl <<
"ERROR: Sequences are not aligned." << endl << endl;
1883 file.open(destFile);
1889 for(i = 0; i < sequenNumber; i++)
1890 maxLongName = utils::max(maxLongName, seqsName[i].size());
1893 minHTML = utils::max(25, maxLongName + 10);
1897 res =
new bool[residNumber];
1898 for(i = 0; i < residNumber; i++)
1901 seq =
new bool[sequenNumber];
1902 for(i = 0; i < sequenNumber; i++)
1907 for(i = 0; i < residues; i++)
1908 res[selectedRes[i]] =
true;
1909 for(i = 0; i < seqs; i++)
1910 seq[selectedSeq[i]] =
true;
1915 gapsValues = sgaps -> getGapsWindow();
1918 simValues = scons -> getMdkwVector();
1921 file <<
"<!DOCTYPE html>" << endl <<
"<html><head>" << endl <<
" <meta " 1922 <<
"http-equiv=\"Content-Type\" content=\"text/html;charset=ISO-8859-1\" />" 1923 << endl <<
" <title>trimAl v1.4 Summary</title>" << endl
1924 <<
" <style type=\"text/css\" media=\"all\">" << endl
1926 <<
" #b { background-color: #3366ff; }\n" 1927 <<
" #r { background-color: #cc0000; }\n" 1928 <<
" #g { background-color: #33cc00; }\n" 1929 <<
" #p { background-color: #ff6666; }\n" 1930 <<
" #m { background-color: #cc33cc; }\n" 1931 <<
" #o { background-color: #ff9900; }\n" 1932 <<
" #c { background-color: #46C7C7; }\n" 1933 <<
" #y { background-color: #FFFF00; }\n" 1935 <<
" .sel { background-color: #B9B9B9; }\n" 1936 <<
" .nsel { background-color: #E9E9E9; }\n" 1939 <<
" .c1 { background-color: #FFFBF2; }\n" 1940 <<
" .c2 { background-color: #FFF8CC; }\n" 1941 <<
" .c3 { background-color: #FAF0BE; }\n" 1942 <<
" .c4 { background-color: #F0EAD6; }\n" 1943 <<
" .c5 { background-color: #F3E5AB; }\n" 1944 <<
" .c6 { background-color: #F4C430; }\n" 1945 <<
" .c7 { background-color: #C2B280; color: white; }\n" 1946 <<
" .c8 { background-color: #DAA520; color: white; }\n" 1947 <<
" .c9 { background-color: #B8860B; color: white; }\n" 1948 <<
" .c10 { background-color: #918151; color: white; }\n" 1949 <<
" .c11 { background-color: #967117; color: white; }\n" 1950 <<
" .c12 { background-color: #6E5411; color: white; }\n" 1953 <<
" </style>\n </head>\n\n" <<
" <body>\n" <<
" <pre>" << endl;
1956 file <<
" <span class=sel>Selected Sequences: " << setw(5) << right << seqs
1957 <<
" /Selected Residues: " << setw(7) << right << residues <<
"</span>" 1958 << endl <<
" <span class=nsel>Deleted Sequences: " << setw(5) << right
1959 << sequenNumber - seqs <<
" /Deleted Residues: " << setw(7) << right
1960 << residNumber - residues <<
"</span>" << endl;
1963 if (gapsValues != NULL)
1964 file << endl << setw(minHTML) << left <<
" Gaps Scores: " 1965 <<
"<span class=c1> =0= </span><span class=c2> <.001 </span>" 1966 <<
"<span class=c3> <.050 </span><span class=c4> <.100 </span>" 1967 <<
"<span class=c5> <.150 </span><span class=c6> <.200 </span>" 1968 <<
"<span class=c7> <.250 </span><span class=c8> <.350 </span>" 1969 <<
"<span class=c9> <.500 </span><span class=c10> <.750 </span>" 1970 <<
"<span class=c11> <1.00 </span><span class=c12> =1= </span>";
1972 if (simValues != NULL)
1973 file << endl << setw(minHTML) << left <<
" Similarity Scores: " 1974 <<
"<span class=c1> =0= </span><span class=c2> <1e-6 </span>" 1975 <<
"<span class=c3> <1e-5 </span><span class=c4> <1e-4 </span>" 1976 <<
"<span class=c5> <.001 </span><span class=c6> <.010 </span>" 1977 <<
"<span class=c7> <.100 </span><span class=c8> <.250 </span>" 1978 <<
"<span class=c9> <.500 </span><span class=c10> <.750 </span>" 1979 <<
"<span class=c11> <1.00 </span><span class=c12> =1= </span>";
1981 if (consValues != NULL)
1982 file << endl << setw(minHTML) << left <<
" Consistency Scores: " 1983 <<
"<span class=c1> =0= </span><span class=c2> <.001 </span>" 1984 <<
"<span class=c3> <.050 </span><span class=c4> <.100 </span>" 1985 <<
"<span class=c5> <.150 </span><span class=c6> <.200 </span>" 1986 <<
"<span class=c7> <.250 </span><span class=c8> <.350 </span>" 1987 <<
"<span class=c9> <.500 </span><span class=c10> <.750 </span>" 1988 <<
"<span class=c11> <1.00 </span><span class=c12> =1= </span>";
1990 if ((gapsValues != NULL)
or (simValues == NULL)
or (consValues == NULL))
1998 file << endl << setw(minHTML + 10) << right << (j + 10);
1999 for(i = j + 20; ((i <= residNumber) && (i <= upper)); i += 10)
2000 file << setw(10) << right << (i);
2003 file << endl << setw(minHTML + 1) << right;
2004 for(i = j + 1; ((i <= residNumber) && (i <= upper)); i++)
2005 file << (!(i % 10) ?
"+" :
"=");
2009 for(i = 0; i < sequenNumber; i++) {
2010 file <<
" <span class=" << ((seq[i]) ?
"sel>" :
"nsel>") << seqsName[i]
2011 <<
"</span>" << setw(minHTML - 4 - seqsName[i].size()) << right <<
"";
2014 for(k = j; ((k < residNumber) && (k < upper)); k++) {
2015 for(kj = 0, tmpColumn.clear(); kj < sequenNumber; kj++)
2016 tmpColumn += sequences[kj][k];
2018 type = utils::determineColor(sequences[i][k], tmpColumn);
2020 file << sequences[i][k];
2022 file <<
"<span id=" << type <<
">" << sequences[i][k] <<
"</span>";
2027 file << endl << setw(minHTML) << left <<
" Selected Cols: ";
2028 for(k = j; ((k < residNumber) && (k < (j +
HTMLBLOCKS))); k++)
2029 file <<
"<span class=" << (res[k] ?
"sel" :
"nsel") <<
"> </span>";
2033 if ((gapsValues == NULL)
and (simValues == NULL)
and (consValues == NULL))
2037 if (gapsValues != NULL) {
2038 file << endl << setw(minHTML) << left <<
" Gaps Scores: ";
2039 for(k = j; ((k < residNumber) && (k < (j +
HTMLBLOCKS))); k++)
2040 if(gapsValues[k] == 0)
2041 file <<
"<span class=c12> </span>";
2042 else if(gapsValues[k] == sequenNumber)
2043 file <<
"<span class=c1> </span>";
2044 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .750)
2045 file <<
"<span class=c11> </span>";
2046 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .500)
2047 file <<
"<span class=c10> </span>";
2048 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .350)
2049 file <<
"<span class=c9> </span>";
2050 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .250)
2051 file <<
"<span class=c8> </span>";
2052 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .200)
2053 file <<
"<span class=c7> </span>";
2054 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .150)
2055 file <<
"<span class=c6> </span>";
2056 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .100)
2057 file <<
"<span class=c5> </span>";
2058 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .050)
2059 file <<
"<span class=c4> </span>";
2060 else if(1 - (
float(gapsValues[k])/sequenNumber) >= .001)
2061 file <<
"<span class=c3> </span>";
2063 file <<
"<span class=c2> </span>";
2065 if (simValues != NULL) {
2066 file << endl << setw(minHTML) << left <<
" Similarity Scores: ";
2067 for(k = j; ((k < residNumber) && (k < (j +
HTMLBLOCKS))); k++)
2068 if(simValues[k] == 1)
2069 file <<
"<span class=c12> </span>";
2070 else if(simValues[k] == 0)
2071 file <<
"<span class=c1> </span>";
2072 else if(simValues[k] >= .750)
2073 file <<
"<span class=c11> </span>";
2074 else if(simValues[k] >= .500)
2075 file <<
"<span class=c10> </span>";
2076 else if(simValues[k] >= .250)
2077 file <<
"<span class=c9> </span>";
2078 else if(simValues[k] >= .100)
2079 file <<
"<span class=c8> </span>";
2080 else if(simValues[k] >= .010)
2081 file <<
"<span class=c7> </span>";
2082 else if(simValues[k] >= .001)
2083 file <<
"<span class=c6> </span>";
2084 else if(simValues[k] >= 1e-4)
2085 file <<
"<span class=c5> </span>";
2086 else if(simValues[k] >= 1e-5)
2087 file <<
"<span class=c4> </span>";
2088 else if(simValues[k] >= 1e-6)
2089 file <<
"<span class=c3> </span>";
2091 file <<
"<span class=c2> </span>";
2093 if (consValues != NULL) {
2094 file << endl << setw(minHTML) << left <<
" Consistency Scores: ";
2095 for(k = j; ((k < residNumber) && (k < (j +
HTMLBLOCKS))); k++)
2096 if(consValues[k] == 1)
2097 file <<
"<span class=c12> </span>";
2098 else if(consValues[k] == 0)
2099 file <<
"<span class=c1> </span>";
2100 else if(consValues[k] >= .750)
2101 file <<
"<span class=c11> </span>";
2102 else if(consValues[k] >= .500)
2103 file <<
"<span class=c10> </span>";
2104 else if(consValues[k] >= .350)
2105 file <<
"<span class=c9> </span>";
2106 else if(consValues[k] >= .250)
2107 file <<
"<span class=c8> </span>";
2108 else if(consValues[k] >= .200)
2109 file <<
"<span class=c7> </span>";
2110 else if(consValues[k] >= .150)
2111 file <<
"<span class=c6> </span>";
2112 else if(consValues[k] >= .100)
2113 file <<
"<span class=c5> </span>";
2114 else if(consValues[k] >= .050)
2115 file <<
"<span class=c4> </span>";
2116 else if(consValues[k] >= .001)
2117 file <<
"<span class=c3> </span>";
2119 file <<
"<span class=c2> </span>";
2125 file <<
" </pre>" << endl <<
" </body>" << endl <<
"</html>" << endl;
2135 bool alignment::alignmentColourHTML(ostream &file) {
2137 int i, j, kj, upper, k = 0, maxLongName = 0;
2142 tmpColumn.reserve(sequenNumber);
2147 cerr << endl <<
"ERROR: Sequences are not aligned." << endl << endl;
2153 for(i = 0; i < sequenNumber; i++)
2154 maxLongName = utils::max(maxLongName, seqsName[i].size());
2158 file <<
"<!DOCTYPE html>" << endl <<
"<html><head>" << endl <<
" <meta " 2159 <<
"http-equiv=\"Content-Type\" content=\"text/html;charset=ISO-8859-1\" />" 2160 << endl <<
" <title>readAl v1.4</title>" << endl
2161 <<
" <style type=\"text/css\">" << endl
2162 <<
" #b { background-color: #3366ff; }\n" 2163 <<
" #r { background-color: #cc0000; }\n" 2164 <<
" #g { background-color: #33cc00; }\n" 2165 <<
" #p { background-color: #ff6666; }\n" 2166 <<
" #m { background-color: #cc33cc; }\n" 2167 <<
" #o { background-color: #ff9900; }\n" 2168 <<
" #c { background-color: #46C7C7; }\n" 2169 <<
" #y { background-color: #FFFF00; }\n" 2170 <<
" </style>\n </head>\n\n" <<
" <body>\n <pre>" << endl;
2179 file << setw(maxLongName + 19) << right << (j + 10);
2180 for(i = j + 20; ((i <= residNumber) && (i <= upper)); i += 10)
2181 file << setw(10) << right << i;
2184 file << endl << setw(maxLongName + 10);
2185 for(i = j + 1; ((i <= residNumber) && (i <= upper)); i++)
2186 file << (!(i % 10) ?
"+" :
"=");
2189 for(i = 0; i < sequenNumber; i++) {
2192 file << endl << setw(maxLongName + 9) << left << seqsName[i];
2195 for(k = j; ((k < residNumber) && (k < upper)); k++) {
2196 for(kj = 0, tmpColumn.clear(); kj < sequenNumber; kj++)
2197 tmpColumn += sequences[kj][k];
2199 type = utils::determineColor(sequences[i][k], tmpColumn);
2201 file << sequences[i][k];
2203 file <<
"<span id=" << type <<
">" << sequences[i][k] <<
"</span>";
2210 file <<
" </pre>" << endl <<
" </body>" << endl <<
"</html>" << endl;
2215 void alignment::printAlignmentInfo(ostream &file) {
2219 int i, j, valid_res, max, min, max_pos, min_pos, total_res;
2225 min = residuesNumber[0];
2227 for(i = 0, total_res = 0; i < sequenNumber; i++) {
2230 for(j = 0, valid_res = 0; j < residuesNumber[i]; j++)
2231 valid_res += (sequences[i][j] !=
'-' ? 1 : 0);
2235 total_res += valid_res;
2238 max_pos = (max > valid_res) ? max_pos : i;
2239 max = (max > valid_res) ? max : valid_res;
2241 min_pos = (min < valid_res) ? min_pos : i;
2242 min = (min < valid_res) ? min : valid_res;
2245 file <<
"## Total sequences\t" << sequenNumber << endl;
2246 if (isFileAligned())
2247 file <<
"## Alignment length\t" << residNumber << endl;
2248 file <<
"## Avg. sequence length\t" << (
float) total_res / sequenNumber << endl
2249 <<
"## Longest seq. name\t'" << seqsName[max_pos] <<
"'" << endl
2250 <<
"## Longest seq. length\t" << max << endl
2251 <<
"## Shortest seq. name\t'" << seqsName[min_pos] <<
"'" << endl
2252 <<
"## Shortest seq. length\t" << min << endl;