1 """Various PairwiseAlignment Algorithm implementations when linear gap penalty --[yong27], 2005-02-28
   2 
   3 reference : BiologicalSequenceAnalysis
   4 """
   5 import unittest
   6 
   7 class SubMatrix:
   8     def __init__(self, smatrixFileName):
   9         self.matrix = dict()
  10         for line in file(smatrixFileName):
  11             if line[0] == ' ':
  12                 indices = line.split()
  13             if line[0] not in ('#',' '):
  14                 scores = [score.strip() for score in line.split()]
  15                 aa = scores[0]
  16                 self.matrix[aa] = dict()
  17                 for i,score in enumerate(scores[1:]):
  18                     self.matrix[aa][indices[i]] = int(score)
  19 
  20     def getScore(self, a, b):
  21         return self.matrix[a][b]
  22 
  23 
  24 class PairwiseAlignment:
  25     def __init__(self, seq1, seq2):
  26         self.subMatrix = SubMatrix('blosum50')
  27         self.d = -8
  28         self.seq1 = seq1
  29         self.seq2 = seq2
  30         self.n = len(self.seq1) + 1
  31         self.m = len(self.seq2) + 1
  32         self.matrix = [[None]*self.n for i in range(self.m)]
  33         self.traceMatrix = [[None]*self.n for i in range(self.m)]
  34         self.fillInitialMatrix()
  35         self.fillMatrix()
  36 
  37     def fillInitialMatrix(self):
  38         raise NotImplementedError
  39 
  40     def fillMatrix(self):
  41         for i in range(self.n):
  42             for j in range(self.m):
  43                 if self.get(i,j) is None:
  44                     self.set(i,j, self.getMaxValue(i,j))
  45 
  46     def get(self, i, j):
  47         return self.matrix[j][i]
  48 
  49     def set(self, i, j, value):
  50         self.matrix[j][i] = value
  51 
  52     def setTrace(self, i, j, value):
  53         self.traceMatrix[j][i] = value
  54 
  55     def getTrace(self, i, j):
  56         return self.traceMatrix[j][i]
  57 
  58     def __repr__(self):
  59         return '\n'+\
  60             '\n'.join(['\t'.join(map(str,row)) for row in self.matrix]) +\
  61             '\n'+\
  62             '\n'.join([' '.join(map(str,row)) for row in self.traceMatrix]) 
  63 
  64     def getBasicValues(self, i, j):
  65         values = list()
  66         values.append(self.get(i-1,j-1) + self.subMatrix.getScore(
  67                 self.seq1[i-1], self.seq2[j-1]))
  68         values.append(self.get(i-1,j) + self.d)
  69         values.append(self.get(i,j-1) + self.d)
  70         return values
  71 
  72     def getMaxValue(self, i, j):
  73         values = self.getValues(i, j)
  74         maxValue = max(values)
  75         self.setTrace(i,j, -values.index(maxValue))
  76         return maxValue
  77 
  78     def getTraces(self):
  79         traces = list()
  80         i,j = self.getStartPosition()
  81         while not self.getEndCondition(i,j):
  82             trace = self.getTrace(i,j)
  83             traces.append(trace)
  84             if trace == 0:
  85                 i-=1;j-=1
  86             elif trace == -1:
  87                 i-=1
  88             elif trace == -2:
  89                 j-=1
  90         return traces
  91 
  92     def getResult(self):
  93         r1=list(); r2=list()
  94         i,j = self.getStartPosition()
  95         for trace in self.getTraces():
  96             if trace == 0:
  97                 r1.append(self.seq1[i-1])
  98                 r2.append(self.seq2[j-1])
  99                 i-=1; j-=1
 100             elif trace == -1:
 101                 r1.append(self.seq1[i-1])
 102                 r2.append('-')
 103                 i-=1
 104             elif trace == -2:
 105                 r1.append('-')
 106                 r2.append(self.seq2[j-1])
 107                 j-=1
 108         r1.reverse()
 109         r2.reverse()
 110         return ''.join(r1), ''.join(r2)
 111 
 112 
 113 class GlobalAlignment(PairwiseAlignment):
 114     def fillInitialMatrix(self):
 115         for i in range(self.n):
 116             self.set(i,0, i*self.d)
 117             self.setTrace(i,0,-1)
 118         for j in range(self.m):
 119             self.set(0,j, j*self.d)
 120             self.setTrace(0,j,-2)
 121 
 122     def getValues(self, i, j):
 123         return self.getBasicValues(i, j)
 124 
 125     def getStartPosition(self):
 126         return self.n-1, self.m-1
 127 
 128     def getEndCondition(self, i, j):
 129         return i==j==0
 130 
 131 
 132 class LocalAlignment(PairwiseAlignment):
 133     def fillInitialMatrix(self):
 134         for i in range(self.n):
 135             self.set(i,0,0)
 136             self.setTrace(i,0,-3)
 137         for j in range(self.m):
 138             self.set(0,j,0)
 139             self.setTrace(0,j,-3)
 140 
 141     def getValues(self, i, j):
 142         values = self.getBasicValues(i, j)
 143         values.append(0)
 144         return values
 145 
 146     def getStartPosition(self):
 147         maxValue = 0
 148         for y,row in enumerate(self.matrix):
 149             for x,value in enumerate(row):
 150                 if value > maxValue:
 151                     maxValue = value
 152                     i=x; j=y
 153         return i, j
 154 
 155     def getEndCondition(self, i, j):
 156         return self.get(i,j) == 0
 157 
 158 
 159 class RepeatedMatches(PairwiseAlignment):
 160     threshold = 20
 161     def fillInitialMatrix(self):
 162         for j in range(self.m):
 163             self.set(0,j,0)
 164             self.setTrace(0,j,-2)
 165 
 166     def fillMatrix(self):
 167         for i in range(1, self.n):
 168             self.set(i,0, self.getMaxInitialValue(i))
 169             for j in range(1, self.m):
 170                 self.set(i,j,self.getMaxValue(i,j))
 171 
 172     def getMaxInitialValue(self, i):
 173         values = list()
 174         values.append(self.get(i-1,0))
 175         for j in range(1,self.m):
 176             values.append(self.get(i-1,j)-self.threshold)
 177         maxValue = max(values)
 178         flag = values.index(maxValue)
 179         if flag == 0: 
 180             flag = -1
 181         self.setTrace(i,0,flag)
 182         return maxValue
 183 
 184     def getValues(self, i, j):
 185         values = self.getBasicValues(i, j)
 186         values.append(self.get(i,0))
 187         return values
 188 
 189     def getStartPosition(self):
 190         return self.n-1, 0
 191 
 192     def getEndCondition(self, i, j):
 193         return i==j==0
 194 
 195     def getTraces(self):
 196         traces = list()
 197         i,j = self.getStartPosition()
 198         while not self.getEndCondition(i,j):
 199             trace = self.getTrace(i,j)
 200             traces.append(trace)
 201             if trace == 0:
 202                 i-=1;j-=1
 203             elif trace == -1:
 204                 i-=1
 205             elif trace in (-2, -3):
 206                 j-=1
 207             else:
 208                 i-=1; j=trace
 209         return traces
 210 
 211     def getResult(self):
 212         r1=list(); r2=list()
 213         i,j = self.getStartPosition()
 214         for trace in self.getTraces():
 215             if trace == 0:
 216                 r1.append(self.seq1[i-1])
 217                 r2.append(self.seq2[j-1])
 218                 i-=1; j-=1
 219             elif trace == -1:
 220                 r1.append(self.seq1[i-1])
 221                 r2.append('-')
 222                 i-=1
 223             elif trace == -2:
 224                 if i != 0:
 225                     r1.append('-')
 226                     r2.append(self.seq2[j-1])
 227                     j-=1
 228             elif trace == -3:
 229                 pass
 230             else:
 231                 r1.append(self.seq1[i-1])
 232                 r2.append('.')
 233                 j=trace
 234                 i-=1
 235         r1.reverse()
 236         r2.reverse()
 237         return ''.join(r1), ''.join(r2)
 238 
 239 
 240 class OverlapMatches(GlobalAlignment):
 241     def fillInitialMatrix(self):
 242         self.set(0,0,0)
 243         for i in range(1, self.n):
 244             self.set(i,0, 0)
 245             self.setTrace(i,0,-1)
 246         for j in range(1, self.m):
 247             self.set(0,j, 0)
 248             self.setTrace(0,j,-2)
 249 
 250     def getStartPosition(self):
 251         maxValue = 0
 252         for y,row in enumerate(self.matrix):
 253             for x,value in enumerate(row):
 254                 if value > maxValue:
 255                     maxValue = value
 256                     i=x; j=y
 257         return i, j
 258 
 259     def getEndCondition(self, i, j):
 260         return i==0 or j==0
 261 
 262 
 263 class TestPairwiseAlignment(unittest.TestCase):
 264     def testSubMatrix(self):
 265         sm = SubMatrix('blosum50')
 266         self.assertEquals(5, sm.getScore('A','A'))
 267         self.assertEquals(-2, sm.getScore('A','R'))
 268         self.assertEquals(-2, sm.getScore('Z','G'))
 269 
 270     def testGlobalAlignment(self):
 271         ga = GlobalAlignment('HEAGAWGHEE','PAWHEAE')
 272         self.assertEquals(0, ga.get(0,0))
 273         self.assertEquals(-8, ga.get(1,0))
 274         self.assertEquals(-2, ga.get(1,1))
 275         self.assertEquals([0,-2,0,0,-1,0,0,-1,0,-1,-1], ga.getTraces())
 276         self.assertEquals(('HEAGAWGHE-E','--P-AW-HEAE'), ga.getResult())
 277 
 278     def testLocalAlignment(self):
 279         la = LocalAlignment('HEAGAWGHEE','PAWHEAE')
 280         self.assertEquals(0, la.get(0,0))
 281         self.assertEquals(5, la.get(3,2))
 282         self.assertEquals(2, la.get(4,3))
 283         self.assertEquals([0,0,-1,0,0], la.getTraces())
 284         self.assertEquals(('AWGHE','AW-HE'), la.getResult())
 285 
 286     def testRepeatedMatches(self):
 287         rm = RepeatedMatches('HEAGAWGHEE','PAWHEAE')
 288         self.assertEquals(0, rm.get(0,0))
 289         self.assertEquals(10, rm.get(1,4))
 290         self.assertEquals(16, rm.get(2,5))
 291         self.assertEquals([5,0,0,-1,0,0,-3,6,0,0,0,-2,-2,-2], rm.getTraces())
 292         self.assertEquals(('HEAGAWGHEE','HEA.AW-HE.'), rm.getResult())
 293 
 294     def testOverlapMatches(self):
 295         om = OverlapMatches('HEAGAWGHEE','PAWHEAE')
 296         self.assertEquals(0, om.get(0,0))
 297         self.assertEquals(-4, om.get(3,3))
 298         self.assertEquals(-6, om.get(4,4))
 299         self.assertEquals([0,0,-1,0,0,0], om.getTraces())
 300         self.assertEquals(('GAWGHE','PAW-HE'), om.getResult())
 301 
 302 
 303 if __name__=='__main__':
 304     unittest.main(argv=('','-v'))
web biohackers.net