#format python """PhredScoreParsing by [yong27], 2004-03-29 """ import sys,unittest,math from cStringIO import StringIO class QualRecord: def __init__(self): self.ori_title = '' self.title = '' self.quals = [] def setTitle(self, aTitle): self.ori_title = aTitle self.title = self.ori_title.split()[0][1:] def setQuals(self, aStrLines): for line in aStrLines: self.quals.extend(map(int,line.split())) def getLength(self): length = len(self.quals) assert length == int(self.ori_title.split()[1]) return length def getAverage(self): sum=0 for each in self.quals: sum+=each return sum / float(self.getLength()) def getStd(self): avg = self.getAverage() sum=0 for each in self.quals: sum+=(avg-each)*(avg-each) return math.sqrt(sum/float(self.getLength())) def getNumberMoreThan20(self): return len([n for n in self.quals if n>=20]) def getStatisticalResult(self): return self.getLength(), self.getAverage(), self.getStd() class QualIterator: def __init__(self, aFileHandler): self.file = aFileHandler self._saved = [] def __iter__(self): return self def next(self): lines=[] while True: line = self.file.readline() if not line: break if lines and line[0]=='>': self._saved.append(line) break lines.append(line.strip()) if not lines: raise StopIteration return self.getRecord(lines) def getRecord(self, lines): record = QualRecord() if lines[0][0]=='>': ## when first iteration record.setTitle(lines[0]) lines = lines[1:] else: record.setTitle(self._saved[0]) self._saved = self._saved[1:] record.setQuals(lines) return record class QualIteratorTest(unittest.TestCase): def setUp(self): instr = """\ >1013C2SP6 36 0 226 ABI 10 24 29 40 40 40 40 40 40 46 51 56 51 51 40 40 45 45 45 45 51 46 46 46 32 35 23 23 11 11 11 45 31 35 33 32 >1013C2SP7 38 0 226 ABI 10 24 29 40 40 40 40 40 40 46 51 56 51 51 40 40 45 45 45 45 51 46 46 46 32 35 23 23 11 11 11 45 31 35 33 32 11 19 """ self.input = StringIO(instr) def testIterationAndTitle(self): qi=QualIterator(self.input) self.assertEquals('1013C2SP6',qi.next().title) self.assertEquals('1013C2SP7',qi.next().title) self.failUnlessRaises(StopIteration,qi.next) def testQuals(self): qi=QualIterator(self.input) expected=[10, 24, 29, 40, 40, 40, 40, 40, 40, 46, 51, 56, 51, 51, 40, 40, 45, 45, 45, 45, 51, 46, 46, 46, 32, 35, 23, 23, 11, 11, 11, 45, 31, 35, 33, 32] self.assertEquals(expected, qi.next().quals) self.assertEquals(expected+[11,19], qi.next().quals) self.failUnlessRaises(StopIteration,qi.next) def testNumberMoreThan20(self): qi=QualIterator(self.input) qr = qi.next() self.assertEquals(32, qr.getNumberMoreThan20()) def testStatisticalResult(self): qi=QualIterator(self.input) len,avg,std=qi.next().getStatisticalResult() self.assertEquals((36, 36.916666666666664, 12.193748398257197),(len,avg,std)) if __name__=='__main__': #unittest.main(argv=('','-v')) sys.stdout.write('\t'.join([ 'filename','length','average','std','more than 20', ])+'\n') for record in QualIterator(sys.stdin): result = [record.title] result.extend(map(str, record.getStatisticalResult())) result.append(str(record.getNumberMoreThan20())) sys.stdout.write('\t'.join(result)+'\n')