1 """FastaFormat HTML decoration using DecoratorPattern by [yong27], 2004-06-14
2 """
3 import os, re, unittest
4
5 class Record: ## It is in Bio.Fasta of biopython ( http://biopython.org )
6 """Holds information from a FASTA record.
7
8 Members:
9 title Title line ('>' character not included).
10 sequence The sequence.
11
12 """
13 def __init__(self, colwidth=60):
14 """__init__(self, colwidth=60)
15
16 Create a new Record. colwidth specifies the number of residues
17 to put on each line when generating FASTA format.
18
19 """
20 self.title = ''
21 self.sequence = ''
22 self._colwidth = colwidth
23
24 def __str__(self):
25 s = []
26 s.append('>%s' % self.title)
27 i = 0
28 while i < len(self.sequence):
29 s.append(self.sequence[i:i+self._colwidth])
30 i = i + self._colwidth
31 return os.linesep.join(s)
32
33
34 class FastaDecorator:
35 """
36 HTML decoration of FASTA format string.
37
38 It can have self instance. (Decorator pattern)
39 """
40 def __init__(self, aFasta, aSeqPattern, aDecoration):
41 """
42 @param aFasta: instance of Fasta.Record or FastaDecorator (self)
43 @param aSeqPattern: re pattern string
44 @param aDecoration: tuple like ('<b>','</b>')
45 """
46 self.title = aFasta.title
47 self._colwidth = aFasta._colwidth
48 self.sequence = re.compile('(%s)'%aSeqPattern, re.IGNORECASE).sub(
49 r'%s\1%s'%aDecoration, aFasta.sequence)
50
51 def __str__(self):
52 s = list()
53 s.append('>%s' % self.title)
54 s.extend(self.getStrSequence())
55 return os.linesep.join(s)
56
57 def getStrSequence(self):
58 s = list()
59 line = str()
60 linePosition = 0
61 isInPattern = False
62 for char in self.sequence:
63 line += char
64 if char == '<':
65 isInPattern = True
66 elif char == '>':
67 isInPattern = False
68 continue
69 if not isInPattern:
70 linePosition += 1
71
72 if linePosition == self._colwidth:
73 s.append(line)
74 line = str()
75 linePosition = 0
76 s.append(line)
77 return s
78
79
80 class FastaDecoratorTest(unittest.TestCase):
81 def setUp(self):
82 self.fasta = Record()
83 self.fasta.title = 'Test sequence'
84 self.fasta.sequence = """\
85 TCTTCTCCTCACCTCGCTCTCGCCGCCTGCTCGCCCCGNCCGCTTTGCTCGGCGCCCCAA\
86 AACACNCTTCCACCATGNGCCACCTCGGCGAGCCCTCCCACTTGAACAAAGGGGTGCTCG\
87 GCGCGTGTACNNATGGCCC\
88 """
89
90 def testDecoratedStr(self):
91 fd = FastaDecorator(self.fasta, 'CCCCaaaaCACN', ('<b>','</b>'))
92 expected = """\
93 >Test sequence
94 TCTTCTCCTCACCTCGCTCTCGCCGCCTGCTCGCCCCGNCCGCTTTGCTCGGCG<b>CCCCAA
95 AACACN</b>CTTCCACCATGNGCCACCTCGGCGAGCCCTCCCACTTGAACAAAGGGGTGCTCG
96 GCGCGTGTACNNATGGCCC\
97 """
98 for i,line in enumerate(expected.split()):
99 self.assertEquals(line, str(fd).split()[i])
100
101 def testMultipleDecoratedStr(self):
102 fd = FastaDecorator(self.fasta, 'CGCT', ('<b>','</b>'))
103 expected = """\
104 >Test sequence
105 TCTTCTCCTCACCT<b>CGCT</b>CTCGCCGCCTGCTCGCCCCGNC<b>CGCT</b>TTGCTCGGCGCCCCAA
106 AACACNCTTCCACCATGNGCCACCTCGGCGAGCCCTCCCACTTGAACAAAGGGGTGCTCG
107 GCGCGTGTACNNATGGCCC\
108 """
109 for i,line in enumerate(expected.split()):
110 self.assertEquals(line, str(fd).split()[i])
111
112 def testDecoratorPattern(self):
113 fd = FastaDecorator(self.fasta, 'CCCCAAAACACN', ('<b>','</b>'))
114 fd2 = FastaDecorator(fd, 'ACCTCGGCGAGCC',('<font color="red">','</font>'))
115 fd3 = FastaDecorator(fd2, 'TCGGCGCGTG',('<i>','</i>'))
116 expected = """\
117 >Test sequence
118 TCTTCTCCTCACCTCGCTCTCGCCGCCTGCTCGCCCCGNCCGCTTTGCTCGGCG<b>CCCCAA
119 AACACN</b>CTTCCACCATGNGCC<font color="red">ACCTCGGCGAGCC</font>CTCCCACTTGAACAAAGGGGTGC<i>TCG
120 GCGCGTG</i>TACNNATGGCCC\
121 """
122 for i,line in enumerate(expected.split('\n')):
123 self.assertEquals(line, str(fd3).split('\n')[i])
124
125
126 if __name__=='__main__':
127 unittest.main(argv=('','-v'))