#format python """Parsing of GO annotation of TigrGeneIndices's TC sequence --yong27, 2005-05-03 """ import sys, unittest class Go: def __init__(self, aRecordLine): words = aRecordLine.split() self.id = words[0].split(':')[1] self.proof = words[-1] self.category = words[-2] self.description = ' '.join(words[1:-2]) class TcGo: def __init__(self, aTcId): self.id = aTcId self.gos = dict() def addGo(self, aGo): self.gos[aGo.id] = aGo def getGo(self, aGoId): return self.gos.get(aGoId) class TcGoGenerator: def __init__(self, aFile): self.file = aFile self.g = self.parse() def parse(self): strRecord = list() for line in self.file: if line[0]=='>' and strRecord: yield '\n'.join(strRecord) strRecord = list() strRecord.append(line.strip()) else: yield '\n'.join(strRecord) def nextRecord(self): return self.g.next() def next(self): lines = self.nextRecord().splitlines() tcid = lines[0][1:] tc = TcGo(tcid) for line in lines[1:]: tc.addGo(Go(line)) return tc def __iter__(self): return self class GoTest(unittest.TestCase): def setUp(self): self.go = Go('GO:0003774 motor activity F GOA_SPTR|P42025~98.57~74') def testGo(self): self.assertEquals('0003774',self.go.id) self.assertEquals('motor activity',self.go.description) self.assertEquals('F',self.go.category) def testTcGo(self): tg = TcGo('TC271527') tg.addGo(self.go) self.assertEquals('0003774',tg.getGo('0003774').id) class GeneratorTest(unittest.TestCase): def setUp(self): input = """\ >TC271527 GO:0003774 motor activity F GOA_SPTR|P42025~98.57~74 GO:0003779 actin binding F FB|FBgn0011745~89.57~74 >TC123456 GO:0015629 actin cytoskeleton C FB|FBgn0011745~89.57~74 GO:0005200 structural constituent of cytoskeleton F FB|FBgn0011745~89.57~74 >TC111111 GO:0015629 actin cytoskeleton C FB|FBgn0011745~89.57~74""" self.it = TcGoGenerator(StringIO(input)) def testNextRecord(self): expected = """\ >TC271527 GO:0003774 motor activity F GOA_SPTR|P42025~98.57~74 GO:0003779 actin binding F FB|FBgn0011745~89.57~74""" self.assertEquals(expected, self.it.nextRecord()) expected2 = """\ >TC123456 GO:0015629 actin cytoskeleton C FB|FBgn0011745~89.57~74 GO:0005200 structural constituent of cytoskeleton F FB|FBgn0011745~89.57~74""" self.assertEquals(expected2, self.it.nextRecord()) expected3 = """\ >TC111111 GO:0015629 actin cytoskeleton C FB|FBgn0011745~89.57~74""" self.assertEquals(expected3, self.it.nextRecord()) self.assertRaises(StopIteration, self.it.nextRecord) def testNextTc(self): self.assertEquals('TC271527', self.it.next().id) self.assertEquals('0015629', self.it.next().getGo('0015629').id) if __name__=='__main__': from cStringIO import StringIO unittest.main()