rwAlignment.cpp
Go to the documentation of this file.
1 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
2  ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
3 
4  trimAl v1.4: a tool for automated alignment trimming in large-scale
5  phylogenetics analyses.
6 
7  readAl v1.4: a tool for automated alignment conversion among different
8  formats.
9 
10  2009-2015 Capella-Gutierrez S. and Gabaldon, T.
11  [scapella, tgabaldon]@crg.es
12 
13  This file is part of trimAl/readAl.
14 
15  trimAl/readAl are free software: you can redistribute it and/or modify
16  it under the terms of the GNU General Public License as published by
17  the Free Software Foundation, the last available version.
18 
19  trimAl/readAl are distributed in the hope that it will be useful,
20  but WITHOUT ANY WARRANTY; without even the implied warranty of
21  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  GNU General Public License for more details.
23 
24  You should have received a copy of the GNU General Public License
25  along with trimAl/readAl. If not, see <http://www.gnu.org/licenses/>.
26 
27 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
28 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
29 
30 #include "alignment.h"
31 #include "defines.h"
32 #include "utils.h"
33 
34 extern int errno;
35 #include <errno.h>
36 #include <ctype.h>
37 #include <string>
38 
39 using namespace std;
40 
41 bool alignment::fillMatrices(bool aligned) {
42  /* Function to determine if a set of sequences, that can be aligned or not,
43  * have been correctly load and are free of errors. */
44  int i, j;
45 
46  /* Initialize some variables */
47  residuesNumber = new int[sequenNumber];
48  for(i = 0; i < sequenNumber; i++) {
49  residuesNumber[i] = sequences[i].size();
50  }
51 
52  /* Check whether there are any unknow/no allowed character in the sequences */
53  for(i = 0; i < sequenNumber; i++)
54  for(j = 0; j < residuesNumber[i]; j++)
55  if((!isalpha(sequences[i][j])) && (!ispunct(sequences[i][j]))) {
56  cerr << endl << "ERROR: The sequence \"" << seqsName[i] << "\" has an "
57  << "unknown (" << sequences[i][j] << ") character." << endl;
58  return false;
59  }
60 
61  /* Check whether all sequences have same size or not */
62  for(i = 1; i < sequenNumber; i++)
63  if(residuesNumber[i] != residuesNumber[i-1])
64  break;
65  /* Set an appropriate flag for indicating if sequences are aligned or not */
66  isAligned = (i != sequenNumber) ? false : true;
67 
68  /* Warm about those cases where sequences should be aligned
69  * and there are not */
70  if (aligned and !isAligned) {
71  cerr << endl << "ERROR: Sequences should be aligned (all with same length) "
72  << "and there are not. Check your input alignment" << endl;
73  return false;
74  }
75 
76  /* Full-fill some information about input alignment */
77  if(residNumber == 0)
78  residNumber = residuesNumber[0];
79 
80  /* Check whether aligned sequences have the length fixed for the input alig */
81  for(i = 0; (i < sequenNumber) and (aligned); i++) {
82  if(residuesNumber[i] != residNumber) {
83  cerr << endl << "ERROR: The sequence \"" << seqsName[i] << "\" ("
84  << residuesNumber[i] << ") does not have the same number of residues "
85  << "fixed by the alignment (" << residNumber << ")." << endl;
86  return false;
87  }
88  }
89 
90  /* If the sequences are aligned, initialize some additional variables.
91  * These variables will be useful for posterior analysis */
92  if((aligned) || (isAligned)) {
93 
94  /* Asign its position to each column. That will be used to determine which
95  * columns should be kept in output alignment after applying any method
96  * and which columns should not */
97  saveResidues = new int[residNumber];
98  for(i = 0; i < residNumber; i++)
99  saveResidues[i] = i;
100 
101  /* Asign its position to each sequence. Similar to the columns numbering
102  * process, assign to each sequence its position is useful to know which
103  * sequences will be in the output alignment */
104  saveSequences = new int[sequenNumber];
105  for(i = 0; i < sequenNumber; i++)
106  saveSequences[i] = i;
107  }
108 
109  /* Return an flag indicating that everything is fine */
110  return true;
111 }
112 
113 void alignment::getSequences(ostream &file) {
114  /* Get only residues sequences from input alignment */
115 
116  int i, j;
117  string *tmpMatrix;
118 
119  /* Allocate local memory for generating output alignment */
120  tmpMatrix = new string[sequenNumber];
121 
122  /* Depending on alignment orientation: forward or reverse. Copy directly
123  * sequence information or get firstly the reversed sequences and then copy
124  * it into local memory. Once orientation has been determined, remove gaps
125  * from resuting sequences */
126  for(i = 0; i < sequenNumber; i++)
127  tmpMatrix[i] = (!reverse) ? utils::removeCharacter('-', sequences[i]) :
128  utils::removeCharacter('-', utils::getReverse(sequences[i]));
129 
130  /* Print output set of sequences in FASTA format*/
131  for(i = 0; i < sequenNumber; i++) {
132  file << ">" << seqsName[i] << endl;
133  for(j = 0; j < (int) tmpMatrix[i].size(); j += 60)
134  file << tmpMatrix[i].substr(j, 60) << endl;
135  file << endl;
136  }
137 
138  /* Deallocate local memory */
139  delete [] tmpMatrix;
140 }
141 
142 int alignment::formatInputAlignment(char *alignmentFile) {
143  /* Guess input alignment format */
144 
145  char c, *firstWord = NULL, *line = NULL;
146  int format = 0, blocks = 0;
147  ifstream file;
148  string nline;
149 
150  /* Check the file and its content */
151  file.open(alignmentFile, ifstream::in);
152  if(!utils::checkFile(file))
153  return false;
154 
155  /* Read first valid line in a safer way */
156  do {
157  line = utils::readLine(file);
158  } while ((line == NULL) && (!file.eof()));
159 
160  /* If the file end is reached without a valid line, warn about it */
161  if (file.eof())
162  return false;
163 
164  /* Otherwise, split line */
165  firstWord = strtok(line, OTHDELIMITERS);
166 
167  /* Clustal Format */
168  if((!strcmp(firstWord, "CLUSTAL")) || (!strcmp(firstWord, "clustal")))
169  format = 1;
170 
171  /* NBRF/PIR Format */
172  else if(firstWord[0] == '>' && firstWord[3] == ';')
173  format = 3;
174 
175  /* Fasta Format */
176  else if(firstWord[0] == '>')
177  format = 8;
178 
179  /* Nexus Format */
180  else if((!strcmp(firstWord, "#NEXUS")) || (!strcmp(firstWord, "#nexus")))
181  format = 17;
182 
183  /* Mega Format */
184  else if((!strcmp(firstWord, "#MEGA")) || (!strcmp(firstWord, "#mega"))) {
185 
186  /* Determine specific mega format: sequential or interleaved.
187  * Counting the number of blocks (set of lines starting by "#") in
188  * the input file. */
189  blocks = 0;
190  do {
191  file.read(&c, 1);
192  } while((c != '#') && (!file.eof()));
193 
194  do {
195  while((c != '\n') && (!file.eof()))
196  file.read(&c, 1);
197  file.read(&c, 1);
198  if(c == '#')
199  blocks++;
200  } while((c != '\n') && (!file.eof()));
201 
202  /* MEGA Sequential (22) or Interleaved (21) */
203  format = (!blocks) ? 22 : 21;
204  }
205 
206  /* Phylip Format */
207  else {
208  /* Determine specific phylip format: sequential or interleaved. */
209 
210  /* Get number of sequences and residues */
211  sequenNumber = atoi(firstWord);
212  firstWord = strtok(NULL, DELIMITERS);
213  if(firstWord != NULL)
214  residNumber = atoi(firstWord);
215 
216  /* If there is only one sequence, use by default sequential format since
217  * it is impossible to determine exactly which phylip format is */
218  if((sequenNumber == 1) && (residNumber != 0))
219  format = 12;
220 
221  /* If there are more than one sequence, analyze sequences distribution to
222  * determine its format. */
223  else if((sequenNumber != 0) && (residNumber != 0)) {
224  blocks = 0;
225 
226  /* Read line in a safer way */
227  do {
228  if (line != NULL)
229  delete [] line;
230  line = utils::readLine(file);
231  } while ((line == NULL) && (!file.eof()));
232 
233  /* If the file end is reached without a valid line, warn about it */
234  if (file.eof())
235  return false;
236 
237  firstWord = strtok(line, DELIMITERS);
238  while(firstWord != NULL) {
239  blocks++;
240  firstWord = strtok(NULL, DELIMITERS);
241  }
242 
243  /* Read line in a safer way */
244  do {
245  if (line != NULL)
246  delete [] line;
247  line = utils::readLine(file);
248  } while ((line == NULL) && (!file.eof()));
249 
250  firstWord = strtok(line, DELIMITERS);
251  while(firstWord != NULL) {
252  blocks--;
253  firstWord = strtok(NULL, DELIMITERS);
254  }
255 
256  /* If the file end is reached without a valid line, warn about it */
257  if (file.eof())
258  return false;
259 
260  /* Phylip Interleaved (12) or Sequential (11) */
261  format = (!blocks) ? 12 : 11;
262  }
263  }
264 
265  /* Close the input file and delete dinamic memory */
266  file.close();
267  if (line != NULL)
268  delete [] line;
269 
270  /* Return the input alignment format */
271  return format;
272 }
273 
274 bool alignment::loadPhylipAlignment(char *alignmentFile) {
275  /* PHYLIP/PHYLIP 4 (Sequential) file format parser */
276 
277  char *str, *line = NULL;
278  ifstream file;
279  int i;
280 
281  /* Check the file and its content */
282  file.open(alignmentFile, ifstream::in);
283  if(!utils::checkFile(file))
284  return false;
285 
286  /* Store some data about filename for possible uses in other formats */
287  filename.append("!Title ");
288  filename.append(alignmentFile);
289  filename.append(";");
290 
291  /* Read first valid line in a safer way */
292  do {
293  line = utils::readLine(file);
294  } while ((line == NULL) && (!file.eof()));
295 
296  /* If the file end is reached without a valid line, warn about it */
297  if (file.eof())
298  return false;
299 
300  /* Read the input sequences and residues for each sequence numbers */
301  str = strtok(line, DELIMITERS);
302  sequenNumber = 0;
303  if(str != NULL)
304  sequenNumber = atoi(str);
305 
306  str = strtok(NULL, DELIMITERS);
307  residNumber = 0;
308  if(str != NULL)
309  residNumber = atoi(str);
310 
311  /* If something is wrong about the sequences or/and residues number,
312  * return an error to warn about that */
313  if((sequenNumber == 0) || (residNumber == 0))
314  return false;
315 
316  /* Allocate memory for the input data */
317  sequences = new string[sequenNumber];
318  seqsName = new string[sequenNumber];
319 
320  /* Read the lines block containing the sequences name + first fragment */
321  i = 0;
322  while((i < sequenNumber) && (!file.eof())){
323 
324  /* Read lines in a safer way. Destroy previous stored information */
325  if (line != NULL)
326  delete [] line;
327  line = utils::readLine(file);
328 
329  /* It the input line/s are blank lines, skip the loop iteration */
330  if(line == NULL)
331  continue;
332 
333  /* First token: Sequence name */
334  str = strtok(line, DELIMITERS);
335  seqsName[i].append(str, strlen(str));
336 
337  /* Trim the rest of the line from blank spaces, tabs, etc and store it */
338  str = strtok(NULL, DELIMITERS);
339  while(str != NULL) {
340  sequences[i].append(str, strlen(str));
341  str = strtok(NULL, DELIMITERS);
342  }
343  i++;
344  }
345 
346  /* Read the rest of the input file */
347  while(!file.eof()) {
348 
349  /* Try to get for each sequences its corresponding residues */
350  i = 0;
351  while((i < sequenNumber) && (!file.eof())) {
352  /* Read lines in a safer way. Destroy previous stored information */
353  if (line != NULL)
354  delete [] line;
355 
356  line = utils::readLine(file);
357  /* It the input line/s are blank lines, skip the loop iteration */
358  if(line == NULL)
359  continue;
360 
361  /* Remove from the current line non-printable characters and add fragments
362  * to previous stored sequence */
363  str = strtok(line, DELIMITERS);
364  while(str != NULL) {
365  sequences[i].append(str, strlen(str));
366  str = strtok(NULL, DELIMITERS);
367  }
368  i++;
369  }
370  }
371 
372  /* Close the input file and delete dinamic memory */
373  file.close();
374  if (line != NULL)
375  delete [] line;
376 
377  /* Check the matrix's content */
378  return fillMatrices(true);
379 }
380 
381 bool alignment::loadPhylip3_2Alignment(char *alignmentFile) {
382  /* PHYLIP 3.2 (Interleaved) file format parser */
383 
384  int i, blocksFirstLine, firstLine = true;
385  char *str, *line = NULL;
386  ifstream file;
387 
388  /* Check the file and its content */
389  file.open(alignmentFile, ifstream::in);
390  if(!utils::checkFile(file))
391  return false;
392 
393  /* Store the file name for futher format conversion*/
394  filename.append("!Title ");
395  filename.append(alignmentFile);
396  filename.append(";");
397 
398  /* Read first valid line in a safer way */
399  do {
400  line = utils::readLine(file);
401  } while ((line == NULL) && (!file.eof()));
402 
403  /* If the file end is reached without a valid line, warn about it */
404  if (file.eof())
405  return false;
406 
407  /* Get the sequences and residues numbers. If there is any mistake,
408  * return a FALSE value to warn about the possible error */
409  str = strtok(line, DELIMITERS);
410  sequenNumber = 0;
411  if(str != NULL)
412  sequenNumber = atoi(str);
413 
414  str = strtok(NULL, DELIMITERS);
415  residNumber = 0;
416  if(str != NULL)
417  residNumber = atoi(str);
418 
419  if((sequenNumber == 0) || (residNumber == 0))
420  return false;
421 
422  /* Reserve memory according to the input parameters */
423  sequences = new string[sequenNumber];
424  seqsName = new string[sequenNumber];
425 
426  /* Point to the first sequence in the alignment. Since the alignment could not
427  * have blank lines to separate the different sequences. Store the blocks size
428  * for the first line including a sequence identifier */
429  i = 0;
430  blocksFirstLine = 0;
431 
432  do {
433  /* Read lines in a safer way. Destroy previous stored information */
434  if (line != NULL)
435  delete [] line;
436 
437  line = utils::readLine(file);
438  /* If there is nothing in the input line, skip the loop instructions */
439  if(line == NULL)
440  continue;
441 
442  str = strtok(line, DELIMITERS);
443  /* First block: Sequence Name + Sequence fragment. Count how many blocks
444  * the first sequence line is divided. It could help to identify the
445  * different sequences from the input file */
446  if(firstLine) {
447  seqsName[i].append(str, strlen(str));
448  str = strtok(NULL, OTHDELIMITERS);
449  firstLine = 1;
450  }
451 
452  /* Sequence fragment */
453  while(str != NULL) {
454  sequences[i].append(str, strlen(str));
455  str = strtok(NULL, OTHDELIMITERS);
456  /* Count the blocks number for the sequences first line */
457  if (firstLine)
458  firstLine += 1;
459  }
460 
461  /* Store the blocks number for the first sequence including the name */
462  if ((blocksFirstLine == 0) and firstLine)
463  blocksFirstLine = firstLine;
464 
465  /* If a false positive new sequence was detected, add stored information for
466  * the current sequence to the previous one and clear the data structure
467  * for the current sequence. Finally, move the sequence pointer to the
468  * previous one. */
469  if ((firstLine != false) and (firstLine != blocksFirstLine)) {
470  sequences[i-1].append(seqsName[i]);
471  seqsName[i].clear();
472  sequences[i-1].append(sequences[i]);
473  sequences[i].clear();
474  i --;
475  }
476 
477  firstLine = false;
478  /* There are many ways to detect a new sequence. */
479  /* One of them -experimental- is just to detect if the residues number for
480  * the current entry is equal to the residues number for the whole align */
481  if ((int) sequences[i].size() == residNumber) {
482  firstLine = true;
483  i++;
484  }
485  } while(!file.eof());
486 
487  /* Close the input file and delete dinamic memory */
488  file.close();
489  if (line != NULL)
490  delete [] line;
491 
492  /* Check the matrix's content */
493  return fillMatrices(true);
494 }
495 
496 bool alignment::loadClustalAlignment(char *alignmentFile) {
497  /* CLUSTAL file format parser */
498 
499  int i, seqLength, pos, firstBlock;
500  char *str, *line = NULL;
501  ifstream file;
502 
503  /* Check input file and its content */
504  file.open(alignmentFile, ifstream::in);
505  if(!utils::checkFile(file))
506  return false;
507 
508  /* Store some details about input file to be used in posterior format
509  * conversions */
510  filename.append("!Title ");
511  filename.append(alignmentFile);
512  filename.append(";");
513 
514  /* The first valid line corresponding to CLUSTAL label is ignored */
515  do {
516  line = utils::readLine(file);
517  } while ((line == NULL) && (!file.eof()));
518 
519  /* If the file end is reached without a valid line, warn about it */
520  if (file.eof())
521  return false;
522 
523  /* Ignore blank lines before first sequence block starts */
524  while(!file.eof()) {
525 
526  /* Deallocate previously used dynamic memory */
527  if (line != NULL)
528  delete [] line;
529 
530  /* Read lines in safe way */
531  line = utils::readLine(file);
532 
533  if (line != NULL)
534  break;
535  }
536 
537  /* The program in only interested in the first blocks of sequences since
538  * it wants to know how many sequences are in the input file */
539  sequenNumber = 0;
540  while(!file.eof()) {
541 
542  /* If a new line without any valid character is detected
543  * means the first block is over */
544  if (line == NULL)
545  break;
546 
547  /* Count how many times standard characters as well as
548  * gap symbol "-" is detected in current line. */
549  seqLength = (int) strlen(line);
550  for(pos = 0; pos < seqLength; pos++)
551  if((isalpha(line[pos])) || (line[pos] == '-'))
552  break;
553 
554  /* If not standard characters are detected in current line means that
555  * the program has found typical line in clustal alignment files to mark
556  * some scores for some columns. In that case, the first block is over */
557  if(pos == seqLength)
558  break;
559  sequenNumber++;
560 
561  /* Deallocate previously used dynamic memory */
562  if (line != NULL)
563  delete [] line;
564 
565  /* Read lines in safe way */
566  line = utils::readLine(file);
567  }
568 
569  /* Finish to preprocess the input file. */
570  file.clear();
571  file.seekg(0);
572 
573  /* Allocate memory for the input alignmet */
574  seqsName = new string[sequenNumber];
575  sequences = new string[sequenNumber];
576 
577  /* Read the title line and store it */
578  line = utils::readLine(file);
579  if (line == NULL)
580  return false;
581  aligInfo.append(line, strlen(line));
582 
583  /* Ignore blank lines before first sequence block starts */
584  while(!file.eof()) {
585 
586  /* Deallocate previously used dynamic memory */
587  if (line != NULL)
588  delete [] line;
589 
590  /* Read lines in safe way */
591  line = utils::readLine(file);
592 
593  if (line != NULL)
594  break;
595  }
596 
597  /* Set-up sequences pointer to the first one and the flag to indicate
598  * the first blocks. That flag implies that sequences names have to be
599  * stored */
600  i = 0;
601  firstBlock = true;
602 
603  while(!file.eof()) {
604 
605  if (line == NULL) {
606  /* Sometimes, clustalw files does not have any marker after first block
607  * to indicate conservation between its columns residues. In that cases,
608  * mark the end of first block */
609  if (i == 0)
610  firstBlock = false;
611  /* Read current line and analyze it*/
612  line = utils::readLine(file);
613  continue;
614  }
615 
616  /* Check whteher current line is a standard line or it is a line to mark
617  * quality scores for that alignment column */
618  seqLength = (int) strlen(line);
619  for(pos = 0; pos < seqLength; pos++)
620  if((isalpha(line[pos])) || (line[pos] == '-'))
621  break;
622 
623  /* Start a new block in the input alignment */
624  if (pos == seqLength) {
625  firstBlock = false;
626 
627  /* Deallocate dinamic memory if it has been used before */
628  if (line != NULL)
629  delete [] line;
630 
631  /* Read current line and analyze it*/
632  line = utils::readLine(file);
633 
634  continue;
635  }
636 
637  /* If it is a standard line, split it into two parts. The first one contains
638  * sequence name and the second one the residues. If the "firstBlock" flag
639  * is active then store the sequence name */
640  str = strtok(line, OTHDELIMITERS);
641  if(str != NULL) {
642  if(firstBlock)
643  seqsName[i].append(str, strlen(str));
644  str = strtok(NULL, OTHDELIMITERS);
645  if(str != NULL)
646  sequences[i].append(str, strlen(str));
647 
648  /* Move sequences pointer in a circular way */
649  i = (i + 1) % sequenNumber;
650  }
651 
652  /* Deallocate dinamic memory if it has been used before */
653  if (line != NULL)
654  delete [] line;
655 
656  /* Read current line and analyze it*/
657  line = utils::readLine(file);
658  }
659 
660  /* Close the input file */
661  file.close();
662 
663  /* Deallocate dinamic memory */
664  if (line != NULL)
665  delete [] line;
666 
667  /* Check the matrix's content */
668  return fillMatrices(true);
669 }
670 
671 bool alignment::loadFastaAlignment(char *alignmentFile) {
672  /* FASTA file format parser */
673 
674  char *str, *line = NULL;
675  ifstream file;
676  int i;
677 
678  /* Check the file and its content */
679  file.open(alignmentFile, ifstream::in);
680  if(!utils::checkFile(file))
681  return false;
682 
683  /* Store input file name for posterior uses in other formats */
684  filename.append("!Title ");
685  filename.append(alignmentFile);
686  filename.append(";");
687 
688  /* Compute how many sequences are in the input alignment */
689  sequenNumber = 0;
690  while(!file.eof()) {
691 
692  /* Deallocate previously used dinamic memory */
693  if (line != NULL)
694  delete [] line;
695 
696  /* Read lines in a safe way */
697  line = utils::readLine(file);
698  if (line == NULL)
699  continue;
700 
701  /* It the line starts by ">" means that a new sequence has been found */
702  str = strtok(line, DELIMITERS);
703  if (str == NULL)
704  continue;
705 
706  /* If a sequence name flag is detected, increase sequences counter */
707  if(str[0] == '>')
708  sequenNumber++;
709  }
710 
711  /* Finish to preprocess the input file. */
712  file.clear();
713  file.seekg(0);
714 
715  /* Allocate memory for the input alignmet */
716  seqsName = new string[sequenNumber];
717  sequences = new string[sequenNumber];
718  seqsInfo = new string[sequenNumber];
719 
720  for(i = -1; (i < sequenNumber) && (!file.eof()); ) {
721 
722  /* Deallocate previously used dinamic memory */
723  if (line != NULL)
724  delete [] line;
725 
726  /* Read lines in a safe way */
727  line = utils::readLine(file);
728  if (line == NULL)
729  continue;
730 
731  /* Store original header fom input sequences including non-standard
732  * characters */
733  if (line[0] == '>')
734  seqsInfo[i+1].append(&line[1], strlen(line) - 1);
735 
736  /* Cut the current line and check whether there are valid characters */
737  str = strtok(line, OTHDELIMITERS);
738  if (str == NULL)
739  continue;
740 
741  /* Check whether current line belongs to the current sequence
742  * or it is a new one. In that case, store the sequence name */
743  if(str[0] == '>') {
744  /* Move sequence name pointer until a valid string name is obtained */
745  do {
746  str = str + 1;
747  } while(strlen(str) == 0);
748  seqsName[++i].append(str, strlen(str));
749  continue;
750  }
751 
752  /* Sequence */
753  while(str != NULL) {
754  sequences[i].append(str, strlen(str));
755  str = strtok(NULL, DELIMITERS);
756  }
757  }
758 
759  /* Close the input file */
760  file.close();
761 
762  /* Deallocate previously used dinamic memory */
763  if (line != NULL)
764  delete [] line;
765 
766  /* Check the matrix's content */
767  return fillMatrices(false);
768 }
769 
770 bool alignment::loadNexusAlignment(char *alignmentFile) {
771 
772  /* NEXUS file format parser */
773  char *frag = NULL, *str = NULL, *line = NULL;
774  int i, pos, state, firstBlock;
775  ifstream file;
776 
777  /* Check the file and its content */
778  file.open(alignmentFile, ifstream::in);
779  if(!utils::checkFile(file))
780  return false;
781 
782  /* Store input file name for posterior uses in other formats */
783  /* We store the file name */
784  filename.append("!Title ");
785  filename.append(alignmentFile);
786  filename.append(";");
787 
788  state = false;
789  do {
790 
791  /* Destroy previous assigned memory */
792  if (line != NULL)
793  delete [] line;
794 
795  /* Read line in a safer way */
796  line = utils::readLine(file);
797  if (line == NULL)
798  continue;
799 
800  /* Discard line where there is not information */
801  str = strtok(line, DELIMITERS);
802  if(str == NULL)
803  continue;
804 
805  /* If the line has any kind of information, try to catch it */
806  /* Firstly, convert to capital letters the input line */
807  for(i = 0; i < (int) strlen(str); i++)
808  str[i] = toupper(str[i]);
809 
810  /* ... and then compare it again specific tags */
811  if(!strcmp(str, "BEGIN"))
812  state = true;
813 
814  else if(!strcmp(str, "MATRIX"))
815  break;
816 
817  /* Store information about input format file */
818  else if(!strcmp(str, "FORMAT")) {
819  str = strtok(NULL, DELIMITERS);
820  while(str != NULL) {
821  aligInfo.append(str, strlen(str));
822  aligInfo.append(" ", strlen(" "));
823  str = strtok(NULL, DELIMITERS);
824  }
825  }
826 
827  /* In this case, try to get matrix dimensions */
828  else if((!strcmp(str, "DIMENSIONS")) && state) {
829  str = strtok(NULL, DELIMITERS);
830  frag = strtok(NULL, DELIMITERS);
831  str = strtok(str, "=;");
832  sequenNumber = atoi(strtok(NULL, "=;"));
833  frag = strtok(frag, "=;");
834  residNumber = atoi(strtok(NULL, "=;"));
835  }
836  } while(!file.eof());
837 
838  /* Check all parameters */
839  if(strcmp(str, "MATRIX") || (sequenNumber == 0) || (residNumber == 0))
840  return false;
841 
842  /* Allocate memory for the input alignmet */
843  seqsName = new string[sequenNumber];
844  sequences = new string[sequenNumber];
845 
846  pos = 0;
847  state = false;
848  firstBlock = true;
849 
850  while(!file.eof()) {
851  /* Destroy previous assigned memory */
852  if (line != NULL)
853  delete [] line;
854 
855  /* Read line in a safer way */
856  line = utils::readLine(file);
857  if (line == NULL)
858  continue;
859 
860  /* Discard any comments from input file */
861  for(i = 0; i < (int) strlen(line); i++) {
862  if (line[i] == '[')
863  state = true;
864  else if (line[i] == ']' && state) {
865  state = false;
866  break;
867  }
868  }
869 
870  /* If there is a multi-line comments, skip it as well */
871  if ((state) || (not state && i != (int) strlen(line)))
872  continue;
873 
874  /* Check for a specific tag indicating matrix end */
875  if((!strncmp(line, "end;", 4)) || (!strncmp(line, "END;", 4)))
876  break;
877 
878  /* Split input line and check it if it is valid */
879  str = strtok(line, OTH2DELIMITERS);
880  if (str == NULL)
881  continue;
882 
883  /* Store the sequence name, only from the first block */
884  if(firstBlock)
885  seqsName[pos].append(str, strlen(str));
886 
887  /* Store rest of line as part of sequence */
888  str = strtok(NULL, OTH2DELIMITERS);
889  while(str != NULL) {
890  sequences[pos].append(str, strlen(str));
891  str = strtok(NULL, OTH2DELIMITERS);
892  }
893 
894  /* Move sequences pointer to next one. It if it is last one, move it to
895  * the beginning and set the first block to false for avoiding to rewrite
896  * sequences name */
897  pos = (pos + 1) % sequenNumber;
898  if (not pos)
899  firstBlock = false;
900  }
901 
902  /* Deallocate memory */
903  if (line != NULL)
904  delete [] line;
905 
906  /* Close the input file */
907  file.close();
908 
909  /* Check the matrix's content */
910  return fillMatrices(true);
911 }
912 
913 bool alignment::loadMegaNonInterleavedAlignment(char *alignmentFile) {
914  /* MEGA sequential file format parser */
915 
916  char *frag = NULL, *str = NULL, *line = NULL;
917  ifstream file;
918  int i;
919 
920  /* Check the file and its content */
921  file.open(alignmentFile, ifstream::in);
922  if(!utils::checkFile(file))
923  return false;
924 
925  /* Filename is stored as a title for MEGA input alignment.
926  * If it is detected later a label "TITLE" in input file, this information
927  * will be replaced for that one */
928  filename.append("!Title ");
929  filename.append(alignmentFile);
930  filename.append(";");
931 
932  /* Skip first valid line */
933  do {
934  line = utils::readLine(file);
935  } while ((line == NULL) && (!file.eof()));
936 
937  /* If the file end is reached without a valid line, warn about it */
938  if (file.eof())
939  return false;
940 
941  /* Try to get input alignment information */
942  while(!file.eof()) {
943 
944  /* Destroy previously allocated memory */
945  if (line != NULL)
946  delete [] line;
947 
948  /* Read a new line in a safe way */
949  line = utils::readLine(file);
950  if (line == NULL)
951  continue;
952 
953  /* If a sequence name flag is found, go out from getting information loop */
954  if(!strncmp(line, "#", 1))
955  break;
956 
957  /* Destroy previously allocated memory */
958  if (frag != NULL)
959  delete [] frag;
960 
961  /* Create a local copy from input line */
962  frag = new char[strlen(line) + 1];
963  strcpy(frag, line);
964 
965  /* Split input line copy into pieces and analize it
966  * looking for specific labels */
967  str = strtok(frag, "!: ");
968  for(i = 0; i < (int) strlen(str); i++)
969  str[i] = toupper(str[i]);
970 
971  /* If TITLE label is found, replace previously stored information with
972  * this info */
973  if(!strcmp(str, "TITLE")) {
974  filename.clear();
975  if(strncmp(line, "!", 1))
976  filename += "!";
977  filename += line;
978  }
979 
980  /* If FORMAT label is found, try to get some details from input file */
981  else if(!strcmp(str, "FORMAT"))
982  aligInfo.append(line, strlen(line));
983  }
984 
985  /* Deallocate local memory */
986  if (frag != NULL)
987  delete [] frag;
988 
989  /* Count how many sequences are in input alignment file */
990  do {
991 
992  /* Check whether input line is valid or not */
993  if (line == NULL) {
994  line = utils::readLine(file);
995  continue;
996  }
997 
998  /* If current line starts by a # means that it is a sequence name */
999  if (!strncmp(line, "#", 1))
1000  sequenNumber++;
1001 
1002  /* Destroy previously allocated memory */
1003  if (line != NULL)
1004  delete [] line;
1005 
1006  /* Read a new line in a safe way */
1007  line = utils::readLine(file);
1008 
1009  } while(!file.eof());
1010 
1011  /* Move file pointer to the beginner */
1012  file.clear();
1013  file.seekg(0);
1014 
1015  /* Allocate memory */
1016  seqsName = new string[sequenNumber];
1017  sequences = new string[sequenNumber];
1018 
1019  /* Skip first line */
1020  line = utils::readLine(file);
1021 
1022  /* Skip lines until first sequence name is found */
1023  while(!file.eof()) {
1024 
1025  /* Destroy previously allocated memory */
1026  if (line != NULL)
1027  delete [] line;
1028 
1029  /* Read a new line in a safe way */
1030  line = utils::readLine(file);
1031  if (line == NULL)
1032  continue;
1033 
1034  /* If sequence name label is found, go out from loop */
1035  if (!strncmp(line, "#", 1))
1036  break;
1037  }
1038 
1039  /* First sequence is already detected so its name should be stored */
1040  i = -1;
1041 
1042  /* This loop is a bit tricky because first sequence name has been already
1043  * detected, so it is necessary to process it before moving to next line.
1044  * That implies that lines are read at loop ends */
1045  while(!file.eof()) {
1046 
1047  /* Skip blank lines */
1048  if (line == NULL) {
1049  line = utils::readLine(file);
1050  continue;
1051  }
1052 
1053  /* Skip lines with comments */
1054  if (!strncmp(line, "!", 1)) {
1055  /* Deallocate memory and read a new line */
1056  delete [] line;
1057  line = utils::readLine(file);
1058  continue;
1059  }
1060 
1061  /* Remove comments inside of sequences and split input line */
1062  frag = utils::trimLine(line);
1063 
1064  /* Skip lines with only comments */
1065  if (frag == NULL) {
1066 
1067  /* Deallocate memory and read a new line */
1068  if (line != NULL)
1069  delete [] line;
1070  line = utils::readLine(file);
1071  continue;
1072  }
1073 
1074  /* Otherwise, split it into fragments */
1075  str = strtok(frag, " #\n");
1076 
1077  /* Sequence Name */
1078  if (!strncmp(line, "#", 1)) {
1079  i += 1;
1080  seqsName[i].append(str, strlen(str));
1081  str = strtok(NULL, " #\n");
1082  }
1083 
1084  /* Sequence itself */
1085  while(str != NULL) {
1086  sequences[i].append(str, strlen(str));
1087  str = strtok(NULL, " \n");
1088  }
1089 
1090  /* Deallocate dynamic memory */
1091  if (frag != NULL)
1092  delete [] frag;
1093 
1094  if (line != NULL)
1095  delete [] line;
1096 
1097  /* Read a new line in a safe way */
1098  line = utils::readLine(file);
1099  }
1100 
1101  /* Close input file */
1102  file.close();
1103 
1104  /* Deallocate dynamic memory */
1105  if (line != NULL)
1106  delete [] line;
1107 
1108  /* Check the matrix's content */
1109  return fillMatrices(true);
1110 }
1111 
1112 bool alignment::loadMegaInterleavedAlignment(char *alignmentFile) {
1113  /* MEGA interleaved file format parser */
1114 
1115  char *frag = NULL, *str = NULL, *line = NULL;
1116  int i, firstBlock = true;
1117  ifstream file;
1118 
1119  /* Check the file and its content */
1120  file.open(alignmentFile, ifstream::in);
1121  if(!utils::checkFile(file))
1122  return false;
1123 
1124  /* Filename is stored as a title for MEGA input alignment.
1125  * If it is detected later a label "TITLE" in input file, this information
1126  * will be replaced for that one */
1127  filename.append("!Title ");
1128  filename.append(alignmentFile);
1129  filename.append(";");
1130 
1131  /* Skip first valid line */
1132  do {
1133  line = utils::readLine(file);
1134  } while ((line == NULL) && (!file.eof()));
1135 
1136  /* If the file end is reached without a valid line, warn about it */
1137  if (file.eof())
1138  return false;
1139 
1140  /* Try to get input alignment information */
1141  while(!file.eof()) {
1142 
1143  /* Destroy previously allocated memory */
1144  if (line != NULL)
1145  delete [] line;
1146 
1147  /* Read a new line in a safe way */
1148  line = utils::readLine(file);
1149  if (line == NULL)
1150  continue;
1151 
1152  /* If a sequence name flag is found, go out from getting information loop */
1153  if(!strncmp(line, "#", 1))
1154  break;
1155 
1156  /* Create a local copy from input line */
1157  frag = new char[strlen(line) + 1];
1158  strcpy(frag, line);
1159 
1160  /* Split input line copy into pieces and analize it
1161  * looking for specific labels */
1162  str = strtok(frag, "!: ");
1163  for(i = 0; i < (int) strlen(str); i++)
1164  str[i] = toupper(str[i]);
1165 
1166  /* If TITLE label is found, replace previously stored information with
1167  * this info */
1168  if(!strcmp(str, "TITLE")) {
1169  filename.clear();
1170  if(strncmp(line, "!", 1))
1171  filename += "!";
1172  filename += line;
1173  }
1174 
1175  /* If FORMAT label is found, try to get some details from input file */
1176  else if(!strcmp(str, "FORMAT"))
1177  aligInfo.append(line, strlen(line));
1178 
1179  /* Destroy previously allocated memory */
1180  if (frag != NULL)
1181  delete [] frag;
1182  }
1183 
1184  /* Count how many sequences are in input file */
1185  while(!file.eof()) {
1186 
1187  /* If a sequence name flag has been detected, increase counter */
1188  if(!strncmp(line, "#", 1))
1189  sequenNumber++;
1190 
1191  /* Deallocate dynamic memory */
1192  if(line != NULL)
1193  delete [] line;
1194 
1195  /* Read lines in a safe way */
1196  line = utils::readLine(file);
1197 
1198  /* If a blank line is detected means first block of sequences is over */
1199  /* Then, break counting sequences loop */
1200  if (line == NULL)
1201  break;
1202  }
1203 
1204  /* Finish to preprocess the input file. */
1205  file.clear();
1206  file.seekg(0);
1207 
1208  /* Allocate memory */
1209  seqsName = new string[sequenNumber];
1210  sequences = new string[sequenNumber];
1211 
1212  /* Skip first line */
1213  line = utils::readLine(file);
1214 
1215  /* Skip lines until first # flag is reached */
1216  while(!file.eof()) {
1217 
1218  /* Deallocate previously used dynamic memory */
1219  if (line != NULL)
1220  delete [] line;
1221 
1222  /* Read line in a safer way */
1223  line = utils::readLine(file);
1224 
1225  /* Determine whether a # flag has been found in current string */
1226  if (line != NULL)
1227  if(!strncmp(line, "#", 1))
1228  break;
1229  }
1230 
1231  /* Read sequences and get from first block, the sequences names */
1232  i = 0;
1233  firstBlock = true;
1234 
1235  while(!file.eof()) {
1236 
1237  if (line == NULL) {
1238  /* Read line in a safer way */
1239  line = utils::readLine(file);
1240  continue;
1241  }
1242 
1243  if (!strncmp(line, "!", 1)) {
1244  /* Deallocate memory and read a new line */
1245  delete [] line;
1246  line = utils::readLine(file);
1247  continue;
1248  }
1249 
1250  /* Trim lines from any kind of comments and split it */
1251  frag = utils::trimLine(line);
1252  str = strtok(frag, " #\n");
1253 
1254  /* Check whether a line fragment is valid or not */
1255  if (str == NULL)
1256  continue;
1257 
1258  /* Store sequences names if firstBlock flag is TRUE */
1259  if(firstBlock)
1260  seqsName[i].append(str, strlen(str));
1261 
1262  /* Store sequence */
1263  str = strtok(NULL, " \n");
1264  while(str != NULL) {
1265  sequences[i].append(str, strlen(str));
1266  str = strtok(NULL, " \n");
1267  }
1268 
1269  /* Deallocate previously used dynamic memory */
1270  if (frag != NULL)
1271  delete [] frag;
1272 
1273  if (line != NULL)
1274  delete [] line;
1275 
1276  /* Read line in a safer way */
1277  line = utils::readLine(file);
1278 
1279  i = (i + 1) % sequenNumber;
1280  if (i == 0)
1281  firstBlock = false;
1282  }
1283 
1284  /* Close input file */
1285  file.close();
1286 
1287  /* Deallocate local memory */
1288  if (line != NULL)
1289  delete [] line;
1290 
1291  /* Check the matrix's content */
1292  return fillMatrices(true);
1293 }
1294 
1295 bool alignment::loadNBRF_PirAlignment(char *alignmentFile) {
1296  /* NBRF/PIR file format parser */
1297 
1298  bool seqIdLine, seqLines;
1299  char *str, *line = NULL;
1300  ifstream file;
1301  int i;
1302 
1303  /* Check the file and its content */
1304  file.open(alignmentFile, ifstream::in);
1305  if(!utils::checkFile(file))
1306  return false;
1307 
1308  /* Store input file name for posterior uses in other formats */
1309  filename.append("!Title ");
1310  filename.append(alignmentFile);
1311  filename.append(";");
1312 
1313  /* Compute how many sequences are in the input alignment */
1314  sequenNumber = 0;
1315  while(!file.eof()) {
1316 
1317  /* Deallocate previously used dinamic memory */
1318  if (line != NULL)
1319  delete [] line;
1320 
1321  /* Read lines in a safe way */
1322  line = utils::readLine(file);
1323  if (line == NULL)
1324  continue;
1325 
1326  /* It the line starts by ">" means that a new sequence has been found */
1327  str = strtok(line, DELIMITERS);
1328  if (str == NULL)
1329  continue;
1330 
1331  /* If a sequence name flag is detected, increase sequences counter */
1332  if(str[0] == '>')
1333  sequenNumber++;
1334  }
1335 
1336  /* Finish to preprocess the input file. */
1337  file.clear();
1338  file.seekg(0);
1339 
1340  /* Allocate memory for the input alignmet */
1341  sequences = new string[sequenNumber];
1342  seqsName = new string[sequenNumber];
1343  seqsInfo = new string[sequenNumber];
1344 
1345  /* Initialize some local variables */
1346  seqIdLine = true;
1347  seqLines = false;
1348  i = -1;
1349 
1350  /* Read the entire input file */
1351  while(!file.eof()) {
1352 
1353  /* Deallocate local memory */
1354  if (line != NULL)
1355  delete [] line;
1356 
1357  /* Read lines in a safe way */
1358  line = utils::readLine(file);
1359  if (line == NULL)
1360  continue;
1361 
1362  /* Sequence ID line.
1363  * Identification of these kind of lines is based on presence of ">" and ";"
1364  * symbols at positions 0 and 3 respectively */
1365  if((line[0] == '>') && (line[3] == ';') && (seqIdLine)) {
1366  seqIdLine = false;
1367  i += 1;
1368 
1369  /* Store information about sequence datatype */
1370  str = strtok(line, ">;");
1371  seqsInfo[i].append(str, strlen(str));
1372 
1373  /* and the sequence identifier itself */
1374  str = strtok(NULL, ">;");
1375  seqsName[i].append(str, strlen(str));
1376  }
1377 
1378  /* Line just after sequence Id line contains a textual description of
1379  * the sequence. */
1380  else if((!seqIdLine) && (!seqLines)) {
1381  seqLines = true;
1382  seqsInfo[i].append(line, strlen(line));
1383  }
1384 
1385  /* Sequence lines itself */
1386  else if (seqLines) {
1387 
1388  /* Check whether a sequence end symbol '*' exists in current line.
1389  * In that case, set appropriate flags to read a new sequence */
1390  if (line[strlen(line) - 1] == '*') {
1391  seqLines = false;
1392  seqIdLine = true;
1393  }
1394 
1395  /* Process line */
1396  str = strtok(line, OTHDELIMITERS);
1397  while (str != NULL) {
1398  sequences[i].append(str, strlen(str));
1399  str = strtok(NULL, OTHDELIMITERS);
1400  }
1401 
1402  /* In case the end symbol '*' has been detected, remove it */
1403  if(sequences[i][sequences[i].size() - 1] == '*')
1404  sequences[i].erase(sequences[i].size()-1);
1405 
1406  }
1407  }
1408  /* Close the input file */
1409  file.close();
1410 
1411  /* Deallocate dinamic memory */
1412  if (line != NULL)
1413  delete [] line;
1414 
1415  /* Check the matrix's content */
1416  return fillMatrices(true);
1417 }
1418 
1419 void alignment::alignmentPhylipToFile(ostream &file) {
1420  /* Generate output alignment in PHYLIP/PHYLIP 4 format (sequential) */
1421 
1422  int i, j, maxLongName;
1423  string *tmpMatrix;
1424 
1425  /* Check whether sequences in the alignment are aligned or not.
1426  * Warn about it if there are not aligned. */
1427  if (!isAligned) {
1428  cerr << endl << "ERROR: Sequences are not aligned. Format (PHYLIP) "
1429  << "not compatible with unaligned sequences." << endl << endl;
1430  return ;
1431  }
1432 
1433  /* Allocate local memory for generating output alignment */
1434  tmpMatrix = new string[sequenNumber];
1435 
1436  /* Depending on alignment orientation: forward or reverse. Copy directly
1437  * sequence information or get firstly the reversed sequences and then
1438  * copy it into local memory */
1439  for(i = 0; i < sequenNumber; i++)
1440  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1441 
1442  /* Depending on if short name flag is activated (limits sequence name up to
1443  * 10 characters) or not, get maximum sequence name length */
1444  maxLongName = PHYLIPDISTANCE;
1445  for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1446  maxLongName = utils::max(maxLongName, seqsName[i].size());
1447 
1448  /* Generating output alignment */
1449  /* First Line: Sequences Number & Residued Number */
1450  file << " " << sequenNumber << " " << residNumber << endl;
1451 
1452  /* First Block: Sequences Names & First 60 residues */
1453  for(i = 0; i < sequenNumber; i++)
1454  file << setw(maxLongName + 3) << left << seqsName[i].substr(0, maxLongName)
1455  << tmpMatrix[i].substr(0, 60) << endl;
1456  file << endl;
1457 
1458  /* Rest of blocks: Print 60 residues per each blocks of sequences */
1459  for(i = 60; i < residNumber; i += 60) {
1460  for(j = 0; j < sequenNumber; j++)
1461  file << tmpMatrix[j].substr(i, 60) << endl;
1462  file << endl;
1463  }
1464  file << endl;
1465 
1466  /* Deallocate local memory */
1467  delete [] tmpMatrix;
1468 }
1469 
1470 void alignment::alignmentPhylip3_2ToFile(ostream &file) {
1471  /* Generate output alignment in PHYLIP 3.2 format (interleaved) */
1472 
1473  int i, j, k, maxLongName;
1474  string *tmpMatrix;
1475 
1476  /* Check whether sequences in the alignment are aligned or not.
1477  * Warn about it if there are not aligned. */
1478  if (!isAligned) {
1479  cerr << endl << "ERROR: Sequences are not aligned. Format (PHYLIP) "
1480  << "not compatible with unaligned sequences." << endl << endl;
1481  return ;
1482  }
1483 
1484  /* Allocate local memory for generating output alignment */
1485  tmpMatrix = new string[sequenNumber];
1486 
1487  /* Depending on alignment orientation: forward or reverse. Copy directly
1488  * sequence information or get firstly the reversed sequences and then
1489  * copy it into local memory */
1490  for(i = 0; i < sequenNumber; i++)
1491  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1492 
1493  /* Depending on if short name flag is activated (limits sequence name up to
1494  * 10 characters) or not, get maximum sequence name length */
1495  maxLongName = PHYLIPDISTANCE;
1496  for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1497  maxLongName = utils::max(maxLongName, seqsName[i].size());
1498 
1499  /* Generating output alignment */
1500  /* First Line: Sequences Number & Residued Number */
1501  file << " " << sequenNumber << " " << residNumber << endl;
1502 
1503  /* Alignment */
1504  /* For each sequence, print its identifier and then the sequence itself in
1505  * blocks of 50 residues */
1506  for(i = 0; i < sequenNumber; i++) {
1507  /* Sequence Name */
1508  file << setw(maxLongName + 3) << left << seqsName[i].substr(0, maxLongName);
1509  /* Sequence. Each line contains a block of 5 times 10 residues. */
1510  for(j = 0; j < residNumber; j += 50) {
1511  for(k = j; (k < residNumber) && (k < (j + 50)); k += 10)
1512  file << sequences[i].substr(k, 10) << " ";
1513  file << endl;
1514  /* If the sequences end has not been reached, print black spaces
1515  * to follow format specifications */
1516  if((j + 50) < residNumber)
1517  file << setw(maxLongName + 3) << " ";
1518  }
1519  /* Print a blank line to mark sequences separation */
1520  file << endl;
1521  }
1522 
1523  /* Deallocate local memory */
1524  delete [] tmpMatrix;
1525 }
1526 
1527 void alignment::alignmentPhylip_PamlToFile(ostream &file) {
1528  /* Generate output alignment in PHYLIP format compatible with PAML program */
1529 
1530  int i, maxLongName;
1531  string *tmpMatrix;
1532 
1533  /* Check whether sequences in the alignment are aligned or not.
1534  * Warn about it if there are not aligned. */
1535  if (!isAligned) {
1536  cerr << endl << "ERROR: Sequences are not aligned. Format (PHYLIP) "
1537  << "not compatible with unaligned sequences." << endl << endl;
1538  return ;
1539  }
1540 
1541  /* Allocate local memory for generating output alignment */
1542  tmpMatrix = new string[sequenNumber];
1543 
1544  /* Depending on alignment orientation: forward or reverse. Copy directly
1545  * sequence information or get firstly the reversed sequences and then
1546  * copy it into local memory */
1547  for(i = 0; i < sequenNumber; i++)
1548  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1549 
1550  /* Depending on if short name flag is activated (limits sequence name up to
1551  * 10 characters) or not, get maximum sequence name length */
1552  maxLongName = PHYLIPDISTANCE;
1553  for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1554  maxLongName = utils::max(maxLongName, seqsName[i].size());
1555 
1556  /* Generating output alignment */
1557  /* First Line: Sequences Number & Residued Number */
1558  file << " " << sequenNumber << " " << residNumber << endl;
1559 
1560  /* Print alignment */
1561  /* Print sequences name follow by the sequence itself in the same line */
1562  for(i = 0; i < sequenNumber; i++)
1563  file << setw(maxLongName + 3) << left << seqsName[i].substr(0, maxLongName)
1564  << sequences[i] << endl;
1565  file << endl;
1566 
1567  /* Deallocate local memory */
1568  delete [] tmpMatrix;
1569 }
1570 
1571 void alignment::alignmentClustalToFile(ostream &file) {
1572  /* Generate output alignment in CLUSTAL format */
1573 
1574  int i, j, maxLongName = 0;
1575  string *tmpMatrix;
1576 
1577  /* Check whether sequences in the alignment are aligned or not.
1578  * Warn about it if there are not aligned. */
1579  if (!isAligned) {
1580  cerr << endl << "ERROR: Sequences are not aligned. Format (CLUSTAL) "
1581  << "not compatible with unaligned sequences." << endl << endl;
1582  return ;
1583  }
1584 
1585  /* Allocate local memory for generating output alignment */
1586  tmpMatrix = new string[sequenNumber];
1587 
1588  /* Depending on alignment orientation: forward or reverse. Copy directly
1589  * sequence information or get firstly the reversed sequences and then
1590  * copy it into local memory */
1591  for(i = 0; i < sequenNumber; i++)
1592  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1593 
1594  /* Compute maximum sequences name length */
1595  for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1596  maxLongName = utils::max(maxLongName, seqsName[i].size());
1597 
1598  /* Print alignment header */
1599  if((aligInfo.size() != 0) && (iformat == oformat))
1600  file << aligInfo << endl << endl;
1601  else
1602  file << "CLUSTAL multiple sequence alignment" << endl << endl;
1603 
1604  /* Print alignment itself */
1605  /* Print as many blocks as it is needed of lines composed
1606  * by sequences name and 60 residues */
1607  for(j = 0; j < residNumber; j += 60) {
1608  for(i = 0; i < sequenNumber; i++)
1609  file << setw(maxLongName + 5) << left << seqsName[i]
1610  << tmpMatrix[i].substr(j, 60) << endl;
1611  file << endl << endl;
1612  }
1613 
1614  /* Deallocate local memory */
1615  delete [] tmpMatrix;
1616 }
1617 
1618 void alignment::alignmentFastaToFile(ostream &file) {
1619  /* Generate output alignment in FASTA format. Sequences can be unaligned. */
1620 
1621  int i, j, maxLongName;
1622  string *tmpMatrix;
1623 
1624  /* Allocate local memory for generating output alignment */
1625  tmpMatrix = new string[sequenNumber];
1626 
1627  /* Depending on alignment orientation: forward or reverse. Copy directly
1628  * sequence information or get firstly the reversed sequences and then
1629  * copy it into local memory */
1630  for(i = 0; i < sequenNumber; i++)
1631  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1632 
1633  /* Depending on if short name flag is activated (limits sequence name up to
1634  * 10 characters) or not, get maximum sequence name length. Consider those
1635  * cases when the user has asked to keep original sequence header */
1636  maxLongName = 0;
1637  for(i = 0; i < sequenNumber; i++)
1638  if (!keepHeader)
1639  maxLongName = utils::max(maxLongName, seqsName[i].size());
1640  else if (seqsInfo != NULL)
1641  maxLongName = utils::max(maxLongName, seqsInfo[i].size());
1642 
1643  if (shortNames && maxLongName > PHYLIPDISTANCE) {
1644  maxLongName = PHYLIPDISTANCE;
1645  if (keepHeader)
1646  cerr << endl << "WARNING: Original sequence header will be cut by charac"
1647  << "ter 10" << endl;
1648  }
1649  /* Print alignment. First, sequences name id and then the sequences itself */
1650  for(i = 0; i < sequenNumber; i++) {
1651  if (!keepHeader)
1652  file << ">" << seqsName[i].substr(0, maxLongName) << endl;
1653  else if (seqsInfo != NULL)
1654  file << ">" << seqsInfo[i].substr(0, maxLongName) << endl;
1655  for(j = 0; j < residuesNumber[i]; j+= 60)
1656  file << tmpMatrix[i].substr(j, 60) << endl;
1657  }
1658 
1659  /* Deallocate local memory */
1660  delete [] tmpMatrix;
1661 }
1662 
1663 void alignment::alignmentNexusToFile(ostream &file) {
1664  /* Generate output alignment in NEXUS format setting only alignment block */
1665 
1666  int i, j, k, maxLongName = 0;
1667  string *tmpMatrix;
1668 
1669  /* Check whether sequences in the alignment are aligned or not.
1670  * Warn about it if there are not aligned. */
1671  if (!isAligned) {
1672  cerr << endl << "ERROR: Sequences are not aligned. Format (NEXUS) "
1673  << "not compatible with unaligned sequences." << endl << endl;
1674  return ;
1675  }
1676 
1677  /* Allocate local memory for generating output alignment */
1678  tmpMatrix = new string[sequenNumber];
1679 
1680  /* Depending on alignment orientation: forward or reverse. Copy directly
1681  * sequence information or get firstly the reversed sequences and then
1682  * copy it into local memory */
1683  for(i = 0; i < sequenNumber; i++)
1684  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1685 
1686  /* Compute maximum sequences name length */
1687  for(i = 0; (i < sequenNumber) && (!shortNames); i++)
1688  maxLongName = utils::max(maxLongName, seqsName[i].size());
1689 
1690  /* Compute output file datatype */
1691  getTypeAlignment();
1692 
1693  /* Remove characters like ";" from input alignment information line */
1694  while((int) aligInfo.find(";") != (int) string::npos)
1695  aligInfo.erase(aligInfo.find(";"), 1);
1696 
1697  /* Print Alignment header */
1698  file << "#NEXUS" << endl << "BEGIN DATA;" << endl << " DIMENSIONS NTAX="
1699  << sequenNumber << " NCHAR=" << residNumber <<";" << endl;
1700 
1701  /* Print alignment datatype */
1702  if ((dataType == DNAType) || (dataType == DNADeg))
1703  file << "FORMAT DATATYPE=DNA INTERLEAVE=yes GAP=-";
1704  else if ((dataType == RNAType) || (dataType == RNADeg))
1705  file << "FORMAT DATATYPE=RNA INTERLEAVE=yes GAP=-";
1706  else if (dataType == AAType)
1707  file << "FORMAT DATATYPE=PROTEIN INTERLEAVE=yes GAP=-";
1708 
1709  i = 0;
1710  /* Using information from input alignment. Use only some tags. */
1711  while((j = aligInfo.find(" ", i)) != (int) string::npos) {
1712 
1713  if((aligInfo.substr(i, j - i)).compare(0, 7, "MISSING") == 0 ||
1714  (aligInfo.substr(i, j)).compare(0, 7, "missing") == 0)
1715  file << " " << (aligInfo.substr(i, j - i));
1716 
1717  else if((aligInfo.substr(i, j)).compare(0, 9, "MATCHCHAR") == 0 ||
1718  (aligInfo.substr(i, j)).compare(0, 9, "matchchar") == 0)
1719  file << " " << (aligInfo.substr(i, j - i));
1720 
1721  i = j + 1;
1722  }
1723  file << ";" << endl;
1724 
1725  /* Print sequence name and sequence length */
1726  for(i = 0; i < sequenNumber; i++)
1727  file << "[Name: " << setw(maxLongName + 4) << left << seqsName[i] << "Len: "
1728  << residNumber << "]" << endl;
1729  file << endl << "MATRIX" << endl;
1730 
1731  /* Print alignment itself. Sequence name and 50 residues blocks */
1732  for(j = 0; j < residNumber; j += 50) {
1733  for(i = 0; i < sequenNumber; i++) {
1734  file << setw(maxLongName + 4) << left << seqsName[i];
1735  for(k = j; k < (j + 50) && k < residNumber; k += 10)
1736  file << " " << sequences[i].substr(k, 10);
1737  file << endl;
1738  }
1739  file << endl;
1740  }
1741  file << ";" << endl << "END;" << endl;
1742 
1743  /* Deallocate local memory */
1744  delete [] tmpMatrix;
1745 }
1746 
1747 void alignment::alignmentMegaToFile(ostream &file) {
1748  /* Generate output alignment in MEGA format */
1749 
1750  int i, j, k;
1751  string *tmpMatrix;
1752 
1753  /* Check whether sequences in the alignment are aligned or not.
1754  * Warn about it if there are not aligned. */
1755  if (!isAligned) {
1756  cerr << endl << "ERROR: Sequences are not aligned. Format (MEGA) "
1757  << "not compatible with unaligned sequences." << endl << endl;
1758  return ;
1759  }
1760 
1761  /* Allocate local memory for generating output alignment */
1762  tmpMatrix = new string[sequenNumber];
1763 
1764  /* Depending on alignment orientation: forward or reverse. Copy directly
1765  * sequence information or get firstly the reversed sequences and then
1766  * copy it into local memory */
1767  for(i = 0; i < sequenNumber; i++)
1768  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1769 
1770  /* Compute output file datatype */
1771  getTypeAlignment();
1772 
1773  /* Print output alignment header */
1774  file << "#MEGA" << endl << filename << endl;
1775 
1776  /* Print alignment datatype */
1777  if ((dataType == DNAType) || (dataType == DNADeg))
1778  file << "!Format DataType=DNA ";
1779  else if ((dataType == RNAType) || (dataType == RNADeg))
1780  file << "!Format DataType=RNA ";
1781  else if (dataType == AAType)
1782  file << "!Format DataType=protein ";
1783 
1784  /* Print number of sequences and alignment length */
1785  file << "NSeqs=" << sequenNumber << " Nsites=" << residNumber
1786  << " indel=- CodeTable=Standard;" << endl << endl;
1787 
1788  /* Print sequences name and sequences divided into blocks of 50 residues */
1789  for(i = 0; i < sequenNumber; i++) {
1790  file << "#" << seqsName[i] << endl;
1791  for(j = 0; j < residNumber; j += 50) {
1792  for(k = j; ((k < residNumber) && (k < j + 50)); k += 10)
1793  file << tmpMatrix[i].substr(k, 10) << " ";
1794  file << endl;
1795  }
1796  file << endl;
1797  }
1798 
1799  /* Deallocate local memory */
1800  delete [] tmpMatrix;
1801 }
1802 
1803 void alignment::alignmentNBRF_PirToFile(ostream &file) {
1804  /* Generate output alignment in NBRF/PIR format. Sequences can be unaligned */
1805 
1806  int i, j, k;
1807  string alg_datatype, *tmpMatrix;
1808 
1809  /* Allocate local memory for generating output alignment */
1810  tmpMatrix = new string[sequenNumber];
1811 
1812  /* Depending on alignment orientation: forward or reverse. Copy directly
1813  * sequence information or get firstly the reversed sequences and then
1814  * copy it into local memory */
1815  for(i = 0; i < sequenNumber; i++)
1816  tmpMatrix[i] = (!reverse) ? sequences[i] : utils::getReverse(sequences[i]);
1817 
1818  /* Compute output file datatype */
1819  getTypeAlignment();
1820  if ((dataType == DNAType) || (dataType == DNADeg))
1821  alg_datatype = "DL";
1822  else if ((dataType == RNAType) || (dataType == RNADeg))
1823  alg_datatype = "RL";
1824  else if (dataType == AAType)
1825  alg_datatype = "P1";
1826 
1827  /* Print alignment */
1828  for(i = 0; i < sequenNumber; i++) {
1829 
1830  /* Print sequence datatype and its name */
1831  if((seqsInfo != NULL) && (iformat == oformat))
1832  file << ">" << seqsInfo[i].substr(0, 2) << ";" << seqsName[i]
1833  << endl << seqsInfo[i].substr(2) << endl;
1834  else
1835  file << ">" << alg_datatype << ";" << seqsName[i] << endl
1836  << seqsName[i] << " " << residuesNumber[i] << " bases" << endl;
1837 
1838  /* Write the sequence */
1839  for(j = 0; j < residuesNumber[i]; j += 50) {
1840  for(k = j; (k < residuesNumber[i]) && (k < (j + 50)); k += 10)
1841  file << " " << tmpMatrix[i].substr(k, 10);
1842 
1843  if(k >= residuesNumber[i]) {
1844  if((residuesNumber[i] % 50) == 0)
1845  file << endl << " ";
1846  else if((residuesNumber[i] % 10) == 0)
1847  file << " ";
1848  file << "*";
1849  }
1850  file << endl;
1851  }
1852  file << endl;
1853  }
1854 
1855  /* Deallocate local memory */
1856  delete [] tmpMatrix;
1857 }
1858 
1859 bool alignment::alignmentSummaryHTML(char *destFile, int residues, int seqs,
1860  int *selectedRes, int *selectedSeq, float *consValues) {
1861 
1862  /* Generate an HTML file with a visual summary about which sequences/columns
1863  * have been selected and which have not */
1864 
1865  int i, j, k, kj, upper, minHTML, maxLongName, *gapsValues;
1866  string tmpColumn;
1867  float *simValues;
1868  bool *res, *seq;
1869  ofstream file;
1870  char type;
1871 
1872  /* Allocate some local memory */
1873  tmpColumn.reserve(sequenNumber);
1874 
1875  /* Check whether sequences in the alignment are aligned or not.
1876  * Warn about it if there are not aligned. */
1877  if (!isAligned) {
1878  cerr << endl << "ERROR: Sequences are not aligned." << endl << endl;
1879  return false;
1880  }
1881 
1882  /* Open output file and check that file pointer is valid */
1883  file.open(destFile);
1884  if(!file)
1885  return false;
1886 
1887  /* Compute maximum sequences name length. */
1888  maxLongName = 0;
1889  for(i = 0; i < sequenNumber; i++)
1890  maxLongName = utils::max(maxLongName, seqsName[i].size());
1891 
1892  /* Compute HTML blank spaces */
1893  minHTML = utils::max(25, maxLongName + 10);
1894 
1895  /* Initialize local variables to control which columns/sequences
1896  * will be kept in the output alignment */
1897  res = new bool[residNumber];
1898  for(i = 0; i < residNumber; i++)
1899  res[i] = false;
1900 
1901  seq = new bool[sequenNumber];
1902  for(i = 0; i < sequenNumber; i++)
1903  seq[i] = false;
1904 
1905  /* Record which columns/sequences from original alignment
1906  * have been kept in the final one */
1907  for(i = 0; i < residues; i++)
1908  res[selectedRes[i]] = true;
1909  for(i = 0; i < seqs; i++)
1910  seq[selectedSeq[i]] = true;
1911 
1912  /* Recover some stats about different scores from current alignment */
1913  gapsValues = NULL;
1914  if (sgaps != NULL)
1915  gapsValues = sgaps -> getGapsWindow();
1916  simValues = NULL;
1917  if (scons != NULL)
1918  simValues = scons -> getMdkwVector();
1919 
1920  /* Print HTML header into output file */
1921  file << "<!DOCTYPE html>" << endl << "<html><head>" << endl << " <meta "
1922  << "http-equiv=\"Content-Type\" content=\"text/html;charset=ISO-8859-1\" />"
1923  << endl << " <title>trimAl v1.4 Summary</title>" << endl
1924  << " <style type=\"text/css\" media=\"all\">" << endl
1925 
1926  << " #b { background-color: #3366ff; }\n"
1927  << " #r { background-color: #cc0000; }\n"
1928  << " #g { background-color: #33cc00; }\n"
1929  << " #p { background-color: #ff6666; }\n"
1930  << " #m { background-color: #cc33cc; }\n"
1931  << " #o { background-color: #ff9900; }\n"
1932  << " #c { background-color: #46C7C7; }\n"
1933  << " #y { background-color: #FFFF00; }\n"
1934 
1935  << " .sel { background-color: #B9B9B9; }\n"
1936  << " .nsel { background-color: #E9E9E9; }\n"
1937 
1938  /* Sets of colors for high-lighting scores intervals */
1939  << " .c1 { background-color: #FFFBF2; }\n"
1940  << " .c2 { background-color: #FFF8CC; }\n"
1941  << " .c3 { background-color: #FAF0BE; }\n"
1942  << " .c4 { background-color: #F0EAD6; }\n"
1943  << " .c5 { background-color: #F3E5AB; }\n"
1944  << " .c6 { background-color: #F4C430; }\n"
1945  << " .c7 { background-color: #C2B280; color: white; }\n"
1946  << " .c8 { background-color: #DAA520; color: white; }\n"
1947  << " .c9 { background-color: #B8860B; color: white; }\n"
1948  << " .c10 { background-color: #918151; color: white; }\n"
1949  << " .c11 { background-color: #967117; color: white; }\n"
1950  << " .c12 { background-color: #6E5411; color: white; }\n"
1951 
1952  /* Other HTML elements */
1953  << " </style>\n </head>\n\n" << " <body>\n" << " <pre>" << endl;
1954 
1955  /* Show information about how many sequences/residues have been selected */
1956  file << " <span class=sel>Selected Sequences: " << setw(5) << right << seqs
1957  <<" /Selected Residues: " << setw(7) << right << residues << "</span>"
1958  << endl << " <span class=nsel>Deleted Sequences: " << setw(5) << right
1959  << sequenNumber - seqs << " /Deleted Residues: " << setw(7) << right
1960  << residNumber - residues << "</span>" << endl;
1961 
1962  /* Print headers for different scores derived from input alignment/s */
1963  if (gapsValues != NULL)
1964  file << endl << setw(minHTML) << left << " Gaps Scores: "
1965  << "<span class=c1> =0= </span><span class=c2> <.001 </span>"
1966  << "<span class=c3> <.050 </span><span class=c4> <.100 </span>"
1967  << "<span class=c5> <.150 </span><span class=c6> <.200 </span>"
1968  << "<span class=c7> <.250 </span><span class=c8> <.350 </span>"
1969  << "<span class=c9> <.500 </span><span class=c10> <.750 </span>"
1970  << "<span class=c11> <1.00 </span><span class=c12> =1= </span>";
1971 
1972  if (simValues != NULL)
1973  file << endl << setw(minHTML) << left << " Similarity Scores: "
1974  << "<span class=c1> =0= </span><span class=c2> <1e-6 </span>"
1975  << "<span class=c3> <1e-5 </span><span class=c4> <1e-4 </span>"
1976  << "<span class=c5> <.001 </span><span class=c6> <.010 </span>"
1977  << "<span class=c7> <.100 </span><span class=c8> <.250 </span>"
1978  << "<span class=c9> <.500 </span><span class=c10> <.750 </span>"
1979  << "<span class=c11> <1.00 </span><span class=c12> =1= </span>";
1980 
1981  if (consValues != NULL)
1982  file << endl << setw(minHTML) << left << " Consistency Scores: "
1983  << "<span class=c1> =0= </span><span class=c2> <.001 </span>"
1984  << "<span class=c3> <.050 </span><span class=c4> <.100 </span>"
1985  << "<span class=c5> <.150 </span><span class=c6> <.200 </span>"
1986  << "<span class=c7> <.250 </span><span class=c8> <.350 </span>"
1987  << "<span class=c9> <.500 </span><span class=c10> <.750 </span>"
1988  << "<span class=c11> <1.00 </span><span class=c12> =1= </span>";
1989 
1990  if ((gapsValues != NULL) or (simValues == NULL) or (consValues == NULL))
1991  file << endl;
1992 
1993  /* Print Sequences in block of BLOCK_SIZE */
1994  for(j = 0, upper = HTMLBLOCKS; j < residNumber; j += HTMLBLOCKS, upper +=
1995  HTMLBLOCKS) {
1996 
1997  /* Print main columns number */
1998  file << endl << setw(minHTML + 10) << right << (j + 10);
1999  for(i = j + 20; ((i <= residNumber) && (i <= upper)); i += 10)
2000  file << setw(10) << right << (i);
2001 
2002  /* Print special characters to delimit sequences blocks */
2003  file << endl << setw(minHTML + 1) << right;
2004  for(i = j + 1; ((i <= residNumber) && (i <= upper)); i++)
2005  file << (!(i % 10) ? "+" : "=");
2006  file << endl;
2007 
2008  /* Print sequences name */
2009  for(i = 0; i < sequenNumber; i++) {
2010  file << " <span class=" << ((seq[i]) ? "sel>" : "nsel>") << seqsName[i]
2011  << "</span>" << setw(minHTML - 4 - seqsName[i].size()) << right << "";
2012 
2013  /* Print residues corresponding to current sequences block */
2014  for(k = j; ((k < residNumber) && (k < upper)); k++) {
2015  for(kj = 0, tmpColumn.clear(); kj < sequenNumber; kj++)
2016  tmpColumn += sequences[kj][k];
2017  /* Determine residue color based on residues across the alig column */
2018  type = utils::determineColor(sequences[i][k], tmpColumn);
2019  if (type == 'w')
2020  file << sequences[i][k];
2021  else
2022  file << "<span id=" << type << ">" << sequences[i][k] << "</span>";
2023  }
2024  file << endl;
2025  }
2026 
2027  file << endl << setw(minHTML) << left << " Selected Cols: ";
2028  for(k = j; ((k < residNumber) && (k < (j + HTMLBLOCKS))); k++)
2029  file << "<span class=" << (res[k] ? "sel" : "nsel") << "> </span>";
2030  file << endl;
2031 
2032  /* If there is not any score to print, skip this part of the function */
2033  if ((gapsValues == NULL) and (simValues == NULL) and (consValues == NULL))
2034  continue;
2035 
2036  /* Print score colors according to certain predefined thresholds */
2037  if (gapsValues != NULL) {
2038  file << endl << setw(minHTML) << left << " Gaps Scores: ";
2039  for(k = j; ((k < residNumber) && (k < (j + HTMLBLOCKS))); k++)
2040  if(gapsValues[k] == 0)
2041  file << "<span class=c12> </span>";
2042  else if(gapsValues[k] == sequenNumber)
2043  file << "<span class=c1> </span>";
2044  else if(1 - (float(gapsValues[k])/sequenNumber) >= .750)
2045  file << "<span class=c11> </span>";
2046  else if(1 - (float(gapsValues[k])/sequenNumber) >= .500)
2047  file << "<span class=c10> </span>";
2048  else if(1 - (float(gapsValues[k])/sequenNumber) >= .350)
2049  file << "<span class=c9> </span>";
2050  else if(1 - (float(gapsValues[k])/sequenNumber) >= .250)
2051  file << "<span class=c8> </span>";
2052  else if(1 - (float(gapsValues[k])/sequenNumber) >= .200)
2053  file << "<span class=c7> </span>";
2054  else if(1 - (float(gapsValues[k])/sequenNumber) >= .150)
2055  file << "<span class=c6> </span>";
2056  else if(1 - (float(gapsValues[k])/sequenNumber) >= .100)
2057  file << "<span class=c5> </span>";
2058  else if(1 - (float(gapsValues[k])/sequenNumber) >= .050)
2059  file << "<span class=c4> </span>";
2060  else if(1 - (float(gapsValues[k])/sequenNumber) >= .001)
2061  file << "<span class=c3> </span>";
2062  else
2063  file << "<span class=c2> </span>";
2064  }
2065  if (simValues != NULL) {
2066  file << endl << setw(minHTML) << left << " Similarity Scores: ";
2067  for(k = j; ((k < residNumber) && (k < (j + HTMLBLOCKS))); k++)
2068  if(simValues[k] == 1)
2069  file << "<span class=c12> </span>";
2070  else if(simValues[k] == 0)
2071  file << "<span class=c1> </span>";
2072  else if(simValues[k] >= .750)
2073  file << "<span class=c11> </span>";
2074  else if(simValues[k] >= .500)
2075  file << "<span class=c10> </span>";
2076  else if(simValues[k] >= .250)
2077  file << "<span class=c9> </span>";
2078  else if(simValues[k] >= .100)
2079  file << "<span class=c8> </span>";
2080  else if(simValues[k] >= .010)
2081  file << "<span class=c7> </span>";
2082  else if(simValues[k] >= .001)
2083  file << "<span class=c6> </span>";
2084  else if(simValues[k] >= 1e-4)
2085  file << "<span class=c5> </span>";
2086  else if(simValues[k] >= 1e-5)
2087  file << "<span class=c4> </span>";
2088  else if(simValues[k] >= 1e-6)
2089  file << "<span class=c3> </span>";
2090  else
2091  file << "<span class=c2> </span>";
2092  }
2093  if (consValues != NULL) {
2094  file << endl << setw(minHTML) << left << " Consistency Scores: ";
2095  for(k = j; ((k < residNumber) && (k < (j + HTMLBLOCKS))); k++)
2096  if(consValues[k] == 1)
2097  file << "<span class=c12> </span>";
2098  else if(consValues[k] == 0)
2099  file << "<span class=c1> </span>";
2100  else if(consValues[k] >= .750)
2101  file << "<span class=c11> </span>";
2102  else if(consValues[k] >= .500)
2103  file << "<span class=c10> </span>";
2104  else if(consValues[k] >= .350)
2105  file << "<span class=c9> </span>";
2106  else if(consValues[k] >= .250)
2107  file << "<span class=c8> </span>";
2108  else if(consValues[k] >= .200)
2109  file << "<span class=c7> </span>";
2110  else if(consValues[k] >= .150)
2111  file << "<span class=c6> </span>";
2112  else if(consValues[k] >= .100)
2113  file << "<span class=c5> </span>";
2114  else if(consValues[k] >= .050)
2115  file << "<span class=c4> </span>";
2116  else if(consValues[k] >= .001)
2117  file << "<span class=c3> </span>";
2118  else
2119  file << "<span class=c2> </span>";
2120  }
2121  file << endl;
2122  }
2123 
2124  /* Print HTML footer into output file */
2125  file << " </pre>" << endl << " </body>" << endl << "</html>" << endl;
2126 
2127  /* Close output file and deallocate local memory */
2128  file.close();
2129  delete [] seq;
2130  delete [] res;
2131 
2132  return true;
2133 }
2134 
2135 bool alignment::alignmentColourHTML(ostream &file) {
2136 
2137  int i, j, kj, upper, k = 0, maxLongName = 0;
2138  string tmpColumn;
2139  char type;
2140 
2141  /* Allocate some local memory */
2142  tmpColumn.reserve(sequenNumber);
2143 
2144  /* Check whether sequences in the alignment are aligned or not.
2145  * Warn about it if there are not aligned. */
2146  if (!isAligned) {
2147  cerr << endl << "ERROR: Sequences are not aligned." << endl << endl;
2148  return false;
2149  }
2150 
2151  /* Compute maximum sequences name length */
2152  maxLongName = 0;
2153  for(i = 0; i < sequenNumber; i++)
2154  maxLongName = utils::max(maxLongName, seqsName[i].size());
2155 
2156 
2157  /* Print HTML header into output file */
2158  file << "<!DOCTYPE html>" << endl << "<html><head>" << endl << " <meta "
2159  << "http-equiv=\"Content-Type\" content=\"text/html;charset=ISO-8859-1\" />"
2160  << endl << " <title>readAl v1.4</title>" << endl
2161  << " <style type=\"text/css\">" << endl
2162  << " #b { background-color: #3366ff; }\n"
2163  << " #r { background-color: #cc0000; }\n"
2164  << " #g { background-color: #33cc00; }\n"
2165  << " #p { background-color: #ff6666; }\n"
2166  << " #m { background-color: #cc33cc; }\n"
2167  << " #o { background-color: #ff9900; }\n"
2168  << " #c { background-color: #46C7C7; }\n"
2169  << " #y { background-color: #FFFF00; }\n"
2170  << " </style>\n </head>\n\n" << " <body>\n <pre>" << endl;
2171 
2172  /* Print sequences colored according to CLUSTAL scheme based on
2173  * physical-chemical properties */
2174  for(j = 0, upper = HTMLBLOCKS; j < residNumber; j += HTMLBLOCKS, upper +=
2175  HTMLBLOCKS) {
2176 
2177  file << endl;
2178  /* Print main columns number */
2179  file << setw(maxLongName + 19) << right << (j + 10);
2180  for(i = j + 20; ((i <= residNumber) && (i <= upper)); i += 10)
2181  file << setw(10) << right << i;
2182 
2183  /* Print special characters to delimit sequences blocks */
2184  file << endl << setw(maxLongName + 10);
2185  for(i = j + 1; ((i <= residNumber) && (i <= upper)); i++)
2186  file << (!(i % 10) ? "+" : "=");
2187 
2188  /* Print sequences themselves */
2189  for(i = 0; i < sequenNumber; i++) {
2190 
2191  /* Print sequences name */
2192  file << endl << setw(maxLongName + 9) << left << seqsName[i];
2193 
2194  /* Print residues corresponding to current sequences block */
2195  for(k = j; ((k < residNumber) && (k < upper)); k++) {
2196  for(kj = 0, tmpColumn.clear(); kj < sequenNumber; kj++)
2197  tmpColumn += sequences[kj][k];
2198  /* Determine residue color based on residues across the alig column */
2199  type = utils::determineColor(sequences[i][k], tmpColumn);
2200  if (type == 'w')
2201  file << sequences[i][k];
2202  else
2203  file << "<span id=" << type << ">" << sequences[i][k] << "</span>";
2204  }
2205  }
2206  file << endl;
2207  }
2208 
2209  /* Print HTML footer into output file */
2210  file << " </pre>" << endl << " </body>" << endl << "</html>" << endl;
2211 
2212  return true;
2213 }
2214 
2215 void alignment::printAlignmentInfo(ostream &file) {
2216  /* Print information about sequences number, average sequence length, maximum
2217  * and minimum sequences length, etc */
2218 
2219  int i, j, valid_res, max, min, max_pos, min_pos, total_res;
2220 
2221  /* Storage which sequences are the longest and shortest ones */
2222  max = 0;
2223  max_pos = 0;
2224  min_pos = 0;
2225  min = residuesNumber[0];
2226 
2227  for(i = 0, total_res = 0; i < sequenNumber; i++) {
2228 
2229  /* Discard gaps from current sequence and then compute real length */
2230  for(j = 0, valid_res = 0; j < residuesNumber[i]; j++)
2231  valid_res += (sequences[i][j] != '-' ? 1 : 0);
2232 
2233  /* Compute the total residues in the alignment to calculate avg. sequence
2234  * length */
2235  total_res += valid_res;
2236 
2237  /* Get values for the longest sequence */
2238  max_pos = (max > valid_res) ? max_pos : i;
2239  max = (max > valid_res) ? max : valid_res;
2240  /* Similarily, get values for the shortest sequence */
2241  min_pos = (min < valid_res) ? min_pos : i;
2242  min = (min < valid_res) ? min : valid_res;
2243  }
2244 
2245  file << "## Total sequences\t" << sequenNumber << endl;
2246  if (isFileAligned())
2247  file << "## Alignment length\t" << residNumber << endl;
2248  file << "## Avg. sequence length\t" << (float) total_res / sequenNumber << endl
2249  << "## Longest seq. name\t'" << seqsName[max_pos] << "'" << endl
2250  << "## Longest seq. length\t" << max << endl
2251  << "## Shortest seq. name\t'" << seqsName[min_pos] << "'" << endl
2252  << "## Shortest seq. length\t" << min << endl;
2253 }
#define PHYLIPDISTANCE
Definition: defines.h:46
#define OTHDELIMITERS
Definition: defines.h:42
#define OTH2DELIMITERS
Definition: defines.h:43
#define HTMLBLOCKS
Definition: defines.h:45
#define DELIMITERS
Definition: defines.h:41