1 """Various PairwiseAlignment Algorithm implementations when linear gap penalty --[yong27], 2005-02-28
2
3 reference : BiologicalSequenceAnalysis
4 """
5 import unittest
6
7 class SubMatrix:
8 def __init__(self, smatrixFileName):
9 self.matrix = dict()
10 for line in file(smatrixFileName):
11 if line[0] == ' ':
12 indices = line.split()
13 if line[0] not in ('#',' '):
14 scores = [score.strip() for score in line.split()]
15 aa = scores[0]
16 self.matrix[aa] = dict()
17 for i,score in enumerate(scores[1:]):
18 self.matrix[aa][indices[i]] = int(score)
19
20 def getScore(self, a, b):
21 return self.matrix[a][b]
22
23
24 class PairwiseAlignment:
25 def __init__(self, seq1, seq2):
26 self.subMatrix = SubMatrix('blosum50')
27 self.d = -8
28 self.seq1 = seq1
29 self.seq2 = seq2
30 self.n = len(self.seq1) + 1
31 self.m = len(self.seq2) + 1
32 self.matrix = [[None]*self.n for i in range(self.m)]
33 self.traceMatrix = [[None]*self.n for i in range(self.m)]
34 self.fillInitialMatrix()
35 self.fillMatrix()
36
37 def fillInitialMatrix(self):
38 raise NotImplementedError
39
40 def fillMatrix(self):
41 for i in range(self.n):
42 for j in range(self.m):
43 if self.get(i,j) is None:
44 self.set(i,j, self.getMaxValue(i,j))
45
46 def get(self, i, j):
47 return self.matrix[j][i]
48
49 def set(self, i, j, value):
50 self.matrix[j][i] = value
51
52 def setTrace(self, i, j, value):
53 self.traceMatrix[j][i] = value
54
55 def getTrace(self, i, j):
56 return self.traceMatrix[j][i]
57
58 def __repr__(self):
59 return '\n'+\
60 '\n'.join(['\t'.join(map(str,row)) for row in self.matrix]) +\
61 '\n'+\
62 '\n'.join([' '.join(map(str,row)) for row in self.traceMatrix])
63
64 def getBasicValues(self, i, j):
65 values = list()
66 values.append(self.get(i-1,j-1) + self.subMatrix.getScore(
67 self.seq1[i-1], self.seq2[j-1]))
68 values.append(self.get(i-1,j) + self.d)
69 values.append(self.get(i,j-1) + self.d)
70 return values
71
72 def getMaxValue(self, i, j):
73 values = self.getValues(i, j)
74 maxValue = max(values)
75 self.setTrace(i,j, -values.index(maxValue))
76 return maxValue
77
78 def getTraces(self):
79 traces = list()
80 i,j = self.getStartPosition()
81 while not self.getEndCondition(i,j):
82 trace = self.getTrace(i,j)
83 traces.append(trace)
84 if trace == 0:
85 i-=1;j-=1
86 elif trace == -1:
87 i-=1
88 elif trace == -2:
89 j-=1
90 return traces
91
92 def getResult(self):
93 r1=list(); r2=list()
94 i,j = self.getStartPosition()
95 for trace in self.getTraces():
96 if trace == 0:
97 r1.append(self.seq1[i-1])
98 r2.append(self.seq2[j-1])
99 i-=1; j-=1
100 elif trace == -1:
101 r1.append(self.seq1[i-1])
102 r2.append('-')
103 i-=1
104 elif trace == -2:
105 r1.append('-')
106 r2.append(self.seq2[j-1])
107 j-=1
108 r1.reverse()
109 r2.reverse()
110 return ''.join(r1), ''.join(r2)
111
112
113 class GlobalAlignment(PairwiseAlignment):
114 def fillInitialMatrix(self):
115 for i in range(self.n):
116 self.set(i,0, i*self.d)
117 self.setTrace(i,0,-1)
118 for j in range(self.m):
119 self.set(0,j, j*self.d)
120 self.setTrace(0,j,-2)
121
122 def getValues(self, i, j):
123 return self.getBasicValues(i, j)
124
125 def getStartPosition(self):
126 return self.n-1, self.m-1
127
128 def getEndCondition(self, i, j):
129 return i==j==0
130
131
132 class LocalAlignment(PairwiseAlignment):
133 def fillInitialMatrix(self):
134 for i in range(self.n):
135 self.set(i,0,0)
136 self.setTrace(i,0,-3)
137 for j in range(self.m):
138 self.set(0,j,0)
139 self.setTrace(0,j,-3)
140
141 def getValues(self, i, j):
142 values = self.getBasicValues(i, j)
143 values.append(0)
144 return values
145
146 def getStartPosition(self):
147 maxValue = 0
148 for y,row in enumerate(self.matrix):
149 for x,value in enumerate(row):
150 if value > maxValue:
151 maxValue = value
152 i=x; j=y
153 return i, j
154
155 def getEndCondition(self, i, j):
156 return self.get(i,j) == 0
157
158
159 class RepeatedMatches(PairwiseAlignment):
160 threshold = 20
161 def fillInitialMatrix(self):
162 for j in range(self.m):
163 self.set(0,j,0)
164 self.setTrace(0,j,-2)
165
166 def fillMatrix(self):
167 for i in range(1, self.n):
168 self.set(i,0, self.getMaxInitialValue(i))
169 for j in range(1, self.m):
170 self.set(i,j,self.getMaxValue(i,j))
171
172 def getMaxInitialValue(self, i):
173 values = list()
174 values.append(self.get(i-1,0))
175 for j in range(1,self.m):
176 values.append(self.get(i-1,j)-self.threshold)
177 maxValue = max(values)
178 flag = values.index(maxValue)
179 if flag == 0:
180 flag = -1
181 self.setTrace(i,0,flag)
182 return maxValue
183
184 def getValues(self, i, j):
185 values = self.getBasicValues(i, j)
186 values.append(self.get(i,0))
187 return values
188
189 def getStartPosition(self):
190 return self.n-1, 0
191
192 def getEndCondition(self, i, j):
193 return i==j==0
194
195 def getTraces(self):
196 traces = list()
197 i,j = self.getStartPosition()
198 while not self.getEndCondition(i,j):
199 trace = self.getTrace(i,j)
200 traces.append(trace)
201 if trace == 0:
202 i-=1;j-=1
203 elif trace == -1:
204 i-=1
205 elif trace in (-2, -3):
206 j-=1
207 else:
208 i-=1; j=trace
209 return traces
210
211 def getResult(self):
212 r1=list(); r2=list()
213 i,j = self.getStartPosition()
214 for trace in self.getTraces():
215 if trace == 0:
216 r1.append(self.seq1[i-1])
217 r2.append(self.seq2[j-1])
218 i-=1; j-=1
219 elif trace == -1:
220 r1.append(self.seq1[i-1])
221 r2.append('-')
222 i-=1
223 elif trace == -2:
224 if i != 0:
225 r1.append('-')
226 r2.append(self.seq2[j-1])
227 j-=1
228 elif trace == -3:
229 pass
230 else:
231 r1.append(self.seq1[i-1])
232 r2.append('.')
233 j=trace
234 i-=1
235 r1.reverse()
236 r2.reverse()
237 return ''.join(r1), ''.join(r2)
238
239
240 class OverlapMatches(GlobalAlignment):
241 def fillInitialMatrix(self):
242 self.set(0,0,0)
243 for i in range(1, self.n):
244 self.set(i,0, 0)
245 self.setTrace(i,0,-1)
246 for j in range(1, self.m):
247 self.set(0,j, 0)
248 self.setTrace(0,j,-2)
249
250 def getStartPosition(self):
251 maxValue = 0
252 for y,row in enumerate(self.matrix):
253 for x,value in enumerate(row):
254 if value > maxValue:
255 maxValue = value
256 i=x; j=y
257 return i, j
258
259 def getEndCondition(self, i, j):
260 return i==0 or j==0
261
262
263 class TestPairwiseAlignment(unittest.TestCase):
264 def testSubMatrix(self):
265 sm = SubMatrix('blosum50')
266 self.assertEquals(5, sm.getScore('A','A'))
267 self.assertEquals(-2, sm.getScore('A','R'))
268 self.assertEquals(-2, sm.getScore('Z','G'))
269
270 def testGlobalAlignment(self):
271 ga = GlobalAlignment('HEAGAWGHEE','PAWHEAE')
272 self.assertEquals(0, ga.get(0,0))
273 self.assertEquals(-8, ga.get(1,0))
274 self.assertEquals(-2, ga.get(1,1))
275 self.assertEquals([0,-2,0,0,-1,0,0,-1,0,-1,-1], ga.getTraces())
276 self.assertEquals(('HEAGAWGHE-E','--P-AW-HEAE'), ga.getResult())
277
278 def testLocalAlignment(self):
279 la = LocalAlignment('HEAGAWGHEE','PAWHEAE')
280 self.assertEquals(0, la.get(0,0))
281 self.assertEquals(5, la.get(3,2))
282 self.assertEquals(2, la.get(4,3))
283 self.assertEquals([0,0,-1,0,0], la.getTraces())
284 self.assertEquals(('AWGHE','AW-HE'), la.getResult())
285
286 def testRepeatedMatches(self):
287 rm = RepeatedMatches('HEAGAWGHEE','PAWHEAE')
288 self.assertEquals(0, rm.get(0,0))
289 self.assertEquals(10, rm.get(1,4))
290 self.assertEquals(16, rm.get(2,5))
291 self.assertEquals([5,0,0,-1,0,0,-3,6,0,0,0,-2,-2,-2], rm.getTraces())
292 self.assertEquals(('HEAGAWGHEE','HEA.AW-HE.'), rm.getResult())
293
294 def testOverlapMatches(self):
295 om = OverlapMatches('HEAGAWGHEE','PAWHEAE')
296 self.assertEquals(0, om.get(0,0))
297 self.assertEquals(-4, om.get(3,3))
298 self.assertEquals(-6, om.get(4,4))
299 self.assertEquals([0,0,-1,0,0,0], om.getTraces())
300 self.assertEquals(('GAWGHE','PAW-HE'), om.getResult())
301
302
303 if __name__=='__main__':
304 unittest.main(argv=('','-v'))