#format python """FastaFormat HTML decoration using DecoratorPattern by [yong27], 2004-06-14 """ import os, re, unittest class Record: ## It is in Bio.Fasta of biopython ( http://biopython.org ) """Holds information from a FASTA record. Members: title Title line ('>' character not included). sequence The sequence. """ def __init__(self, colwidth=60): """__init__(self, colwidth=60) Create a new Record. colwidth specifies the number of residues to put on each line when generating FASTA format. """ self.title = '' self.sequence = '' self._colwidth = colwidth def __str__(self): s = [] s.append('>%s' % self.title) i = 0 while i < len(self.sequence): s.append(self.sequence[i:i+self._colwidth]) i = i + self._colwidth return os.linesep.join(s) class FastaDecorator: """ HTML decoration of FASTA format string. It can have self instance. (Decorator pattern) """ def __init__(self, aFasta, aSeqPattern, aDecoration): """ @param aFasta: instance of Fasta.Record or FastaDecorator (self) @param aSeqPattern: re pattern string @param aDecoration: tuple like ('','') """ self.title = aFasta.title self._colwidth = aFasta._colwidth self.sequence = re.compile('(%s)'%aSeqPattern, re.IGNORECASE).sub( r'%s\1%s'%aDecoration, aFasta.sequence) def __str__(self): s = list() s.append('>%s' % self.title) s.extend(self.getStrSequence()) return os.linesep.join(s) def getStrSequence(self): s = list() line = str() linePosition = 0 isInPattern = False for char in self.sequence: line += char if char == '<': isInPattern = True elif char == '>': isInPattern = False continue if not isInPattern: linePosition += 1 if linePosition == self._colwidth: s.append(line) line = str() linePosition = 0 s.append(line) return s class FastaDecoratorTest(unittest.TestCase): def setUp(self): self.fasta = Record() self.fasta.title = 'Test sequence' self.fasta.sequence = """\ TCTTCTCCTCACCTCGCTCTCGCCGCCTGCTCGCCCCGNCCGCTTTGCTCGGCGCCCCAA\ AACACNCTTCCACCATGNGCCACCTCGGCGAGCCCTCCCACTTGAACAAAGGGGTGCTCG\ GCGCGTGTACNNATGGCCC\ """ def testDecoratedStr(self): fd = FastaDecorator(self.fasta, 'CCCCaaaaCACN', ('','')) expected = """\ >Test sequence TCTTCTCCTCACCTCGCTCTCGCCGCCTGCTCGCCCCGNCCGCTTTGCTCGGCGCCCCAA AACACNCTTCCACCATGNGCCACCTCGGCGAGCCCTCCCACTTGAACAAAGGGGTGCTCG GCGCGTGTACNNATGGCCC\ """ for i,line in enumerate(expected.split()): self.assertEquals(line, str(fd).split()[i]) def testMultipleDecoratedStr(self): fd = FastaDecorator(self.fasta, 'CGCT', ('','')) expected = """\ >Test sequence TCTTCTCCTCACCTCGCTCTCGCCGCCTGCTCGCCCCGNCCGCTTTGCTCGGCGCCCCAA AACACNCTTCCACCATGNGCCACCTCGGCGAGCCCTCCCACTTGAACAAAGGGGTGCTCG GCGCGTGTACNNATGGCCC\ """ for i,line in enumerate(expected.split()): self.assertEquals(line, str(fd).split()[i]) def testDecoratorPattern(self): fd = FastaDecorator(self.fasta, 'CCCCAAAACACN', ('','')) fd2 = FastaDecorator(fd, 'ACCTCGGCGAGCC',('','')) fd3 = FastaDecorator(fd2, 'TCGGCGCGTG',('','')) expected = """\ >Test sequence TCTTCTCCTCACCTCGCTCTCGCCGCCTGCTCGCCCCGNCCGCTTTGCTCGGCGCCCCAA AACACNCTTCCACCATGNGCCACCTCGGCGAGCCCTCCCACTTGAACAAAGGGGTGCTCG GCGCGTGTACNNATGGCCC\ """ for i,line in enumerate(expected.split('\n')): self.assertEquals(line, str(fd3).split('\n')[i]) if __name__=='__main__': unittest.main(argv=('','-v'))