#format python """Various PairwiseAlignment Algorithm implementations when linear gap penalty --[yong27], 2005-02-28 reference : BiologicalSequenceAnalysis """ import unittest class SubMatrix: def __init__(self, smatrixFileName): self.matrix = dict() for line in file(smatrixFileName): if line[0] == ' ': indices = line.split() if line[0] not in ('#',' '): scores = [score.strip() for score in line.split()] aa = scores[0] self.matrix[aa] = dict() for i,score in enumerate(scores[1:]): self.matrix[aa][indices[i]] = int(score) def getScore(self, a, b): return self.matrix[a][b] class PairwiseAlignment: def __init__(self, seq1, seq2): self.subMatrix = SubMatrix('blosum50') self.d = -8 self.seq1 = seq1 self.seq2 = seq2 self.n = len(self.seq1) + 1 self.m = len(self.seq2) + 1 self.matrix = [[None]*self.n for i in range(self.m)] self.traceMatrix = [[None]*self.n for i in range(self.m)] self.fillInitialMatrix() self.fillMatrix() def fillInitialMatrix(self): raise NotImplementedError def fillMatrix(self): for i in range(self.n): for j in range(self.m): if self.get(i,j) is None: self.set(i,j, self.getMaxValue(i,j)) def get(self, i, j): return self.matrix[j][i] def set(self, i, j, value): self.matrix[j][i] = value def setTrace(self, i, j, value): self.traceMatrix[j][i] = value def getTrace(self, i, j): return self.traceMatrix[j][i] def __repr__(self): return '\n'+\ '\n'.join(['\t'.join(map(str,row)) for row in self.matrix]) +\ '\n'+\ '\n'.join([' '.join(map(str,row)) for row in self.traceMatrix]) def getBasicValues(self, i, j): values = list() values.append(self.get(i-1,j-1) + self.subMatrix.getScore( self.seq1[i-1], self.seq2[j-1])) values.append(self.get(i-1,j) + self.d) values.append(self.get(i,j-1) + self.d) return values def getMaxValue(self, i, j): values = self.getValues(i, j) maxValue = max(values) self.setTrace(i,j, -values.index(maxValue)) return maxValue def getTraces(self): traces = list() i,j = self.getStartPosition() while not self.getEndCondition(i,j): trace = self.getTrace(i,j) traces.append(trace) if trace == 0: i-=1;j-=1 elif trace == -1: i-=1 elif trace == -2: j-=1 return traces def getResult(self): r1=list(); r2=list() i,j = self.getStartPosition() for trace in self.getTraces(): if trace == 0: r1.append(self.seq1[i-1]) r2.append(self.seq2[j-1]) i-=1; j-=1 elif trace == -1: r1.append(self.seq1[i-1]) r2.append('-') i-=1 elif trace == -2: r1.append('-') r2.append(self.seq2[j-1]) j-=1 r1.reverse() r2.reverse() return ''.join(r1), ''.join(r2) class GlobalAlignment(PairwiseAlignment): def fillInitialMatrix(self): for i in range(self.n): self.set(i,0, i*self.d) self.setTrace(i,0,-1) for j in range(self.m): self.set(0,j, j*self.d) self.setTrace(0,j,-2) def getValues(self, i, j): return self.getBasicValues(i, j) def getStartPosition(self): return self.n-1, self.m-1 def getEndCondition(self, i, j): return i==j==0 class LocalAlignment(PairwiseAlignment): def fillInitialMatrix(self): for i in range(self.n): self.set(i,0,0) self.setTrace(i,0,-3) for j in range(self.m): self.set(0,j,0) self.setTrace(0,j,-3) def getValues(self, i, j): values = self.getBasicValues(i, j) values.append(0) return values def getStartPosition(self): maxValue = 0 for y,row in enumerate(self.matrix): for x,value in enumerate(row): if value > maxValue: maxValue = value i=x; j=y return i, j def getEndCondition(self, i, j): return self.get(i,j) == 0 class RepeatedMatches(PairwiseAlignment): threshold = 20 def fillInitialMatrix(self): for j in range(self.m): self.set(0,j,0) self.setTrace(0,j,-2) def fillMatrix(self): for i in range(1, self.n): self.set(i,0, self.getMaxInitialValue(i)) for j in range(1, self.m): self.set(i,j,self.getMaxValue(i,j)) def getMaxInitialValue(self, i): values = list() values.append(self.get(i-1,0)) for j in range(1,self.m): values.append(self.get(i-1,j)-self.threshold) maxValue = max(values) flag = values.index(maxValue) if flag == 0: flag = -1 self.setTrace(i,0,flag) return maxValue def getValues(self, i, j): values = self.getBasicValues(i, j) values.append(self.get(i,0)) return values def getStartPosition(self): return self.n-1, 0 def getEndCondition(self, i, j): return i==j==0 def getTraces(self): traces = list() i,j = self.getStartPosition() while not self.getEndCondition(i,j): trace = self.getTrace(i,j) traces.append(trace) if trace == 0: i-=1;j-=1 elif trace == -1: i-=1 elif trace in (-2, -3): j-=1 else: i-=1; j=trace return traces def getResult(self): r1=list(); r2=list() i,j = self.getStartPosition() for trace in self.getTraces(): if trace == 0: r1.append(self.seq1[i-1]) r2.append(self.seq2[j-1]) i-=1; j-=1 elif trace == -1: r1.append(self.seq1[i-1]) r2.append('-') i-=1 elif trace == -2: if i != 0: r1.append('-') r2.append(self.seq2[j-1]) j-=1 elif trace == -3: pass else: r1.append(self.seq1[i-1]) r2.append('.') j=trace i-=1 r1.reverse() r2.reverse() return ''.join(r1), ''.join(r2) class OverlapMatches(GlobalAlignment): def fillInitialMatrix(self): self.set(0,0,0) for i in range(1, self.n): self.set(i,0, 0) self.setTrace(i,0,-1) for j in range(1, self.m): self.set(0,j, 0) self.setTrace(0,j,-2) def getStartPosition(self): maxValue = 0 for y,row in enumerate(self.matrix): for x,value in enumerate(row): if value > maxValue: maxValue = value i=x; j=y return i, j def getEndCondition(self, i, j): return i==0 or j==0 class TestPairwiseAlignment(unittest.TestCase): def testSubMatrix(self): sm = SubMatrix('blosum50') self.assertEquals(5, sm.getScore('A','A')) self.assertEquals(-2, sm.getScore('A','R')) self.assertEquals(-2, sm.getScore('Z','G')) def testGlobalAlignment(self): ga = GlobalAlignment('HEAGAWGHEE','PAWHEAE') self.assertEquals(0, ga.get(0,0)) self.assertEquals(-8, ga.get(1,0)) self.assertEquals(-2, ga.get(1,1)) self.assertEquals([0,-2,0,0,-1,0,0,-1,0,-1,-1], ga.getTraces()) self.assertEquals(('HEAGAWGHE-E','--P-AW-HEAE'), ga.getResult()) def testLocalAlignment(self): la = LocalAlignment('HEAGAWGHEE','PAWHEAE') self.assertEquals(0, la.get(0,0)) self.assertEquals(5, la.get(3,2)) self.assertEquals(2, la.get(4,3)) self.assertEquals([0,0,-1,0,0], la.getTraces()) self.assertEquals(('AWGHE','AW-HE'), la.getResult()) def testRepeatedMatches(self): rm = RepeatedMatches('HEAGAWGHEE','PAWHEAE') self.assertEquals(0, rm.get(0,0)) self.assertEquals(10, rm.get(1,4)) self.assertEquals(16, rm.get(2,5)) self.assertEquals([5,0,0,-1,0,0,-3,6,0,0,0,-2,-2,-2], rm.getTraces()) self.assertEquals(('HEAGAWGHEE','HEA.AW-HE.'), rm.getResult()) def testOverlapMatches(self): om = OverlapMatches('HEAGAWGHEE','PAWHEAE') self.assertEquals(0, om.get(0,0)) self.assertEquals(-4, om.get(3,3)) self.assertEquals(-6, om.get(4,4)) self.assertEquals([0,0,-1,0,0,0], om.getTraces()) self.assertEquals(('GAWGHE','PAW-HE'), om.getResult()) if __name__=='__main__': unittest.main(argv=('','-v'))