1 """PhredScoreParsing by [yong27], 2004-03-29
2 """
3
4 import sys,unittest,math
5 from cStringIO import StringIO
6
7 class QualRecord:
8 def __init__(self):
9 self.ori_title = ''
10 self.title = ''
11 self.quals = []
12
13 def setTitle(self, aTitle):
14 self.ori_title = aTitle
15 self.title = self.ori_title.split()[0][1:]
16
17 def setQuals(self, aStrLines):
18 for line in aStrLines:
19 self.quals.extend(map(int,line.split()))
20
21 def getLength(self):
22 length = len(self.quals)
23 assert length == int(self.ori_title.split()[1])
24 return length
25
26 def getAverage(self):
27 sum=0
28 for each in self.quals:
29 sum+=each
30 return sum / float(self.getLength())
31
32 def getStd(self):
33 avg = self.getAverage()
34 sum=0
35 for each in self.quals:
36 sum+=(avg-each)*(avg-each)
37 return math.sqrt(sum/float(self.getLength()))
38
39 def getNumberMoreThan20(self):
40 return len([n for n in self.quals if n>=20])
41
42 def getStatisticalResult(self):
43 return self.getLength(), self.getAverage(), self.getStd()
44
45
46 class QualIterator:
47 def __init__(self, aFileHandler):
48 self.file = aFileHandler
49 self._saved = []
50 def __iter__(self):
51 return self
52
53 def next(self):
54 lines=[]
55 while True:
56 line = self.file.readline()
57 if not line:
58 break
59 if lines and line[0]=='>':
60 self._saved.append(line)
61 break
62 lines.append(line.strip())
63 if not lines:
64 raise StopIteration
65 return self.getRecord(lines)
66
67 def getRecord(self, lines):
68 record = QualRecord()
69 if lines[0][0]=='>':
70 record.setTitle(lines[0])
71 lines = lines[1:]
72 else:
73 record.setTitle(self._saved[0])
74 self._saved = self._saved[1:]
75 record.setQuals(lines)
76 return record
77
78
79 class QualIteratorTest(unittest.TestCase):
80 def setUp(self):
81 instr = """\
82 >1013C2SP6 36 0 226 ABI
83 10 24 29 40 40 40 40 40 40 46 51 56 51 51 40 40 45
84 45 45 45 51 46 46 46 32 35 23 23 11 11 11 45 31 35
85 33 32
86 >1013C2SP7 38 0 226 ABI
87 10 24 29 40 40 40 40 40 40 46 51 56 51 51 40 40 45
88 45 45 45 51 46 46 46 32 35 23 23 11 11 11 45 31 35
89 33 32 11 19
90 """
91 self.input = StringIO(instr)
92 def testIterationAndTitle(self):
93 qi=QualIterator(self.input)
94 self.assertEquals('1013C2SP6',qi.next().title)
95 self.assertEquals('1013C2SP7',qi.next().title)
96 self.failUnlessRaises(StopIteration,qi.next)
97
98 def testQuals(self):
99 qi=QualIterator(self.input)
100 expected=[10, 24, 29, 40, 40, 40, 40, 40, 40, 46,
101 51, 56, 51, 51, 40, 40, 45, 45, 45, 45,
102 51, 46, 46, 46, 32, 35, 23, 23, 11, 11,
103 11, 45, 31, 35, 33, 32]
104 self.assertEquals(expected, qi.next().quals)
105 self.assertEquals(expected+[11,19], qi.next().quals)
106 self.failUnlessRaises(StopIteration,qi.next)
107
108 def testNumberMoreThan20(self):
109 qi=QualIterator(self.input)
110 qr = qi.next()
111 self.assertEquals(32, qr.getNumberMoreThan20())
112
113
114 def testStatisticalResult(self):
115 qi=QualIterator(self.input)
116 len,avg,std=qi.next().getStatisticalResult()
117 self.assertEquals((36, 36.916666666666664, 12.193748398257197),(len,avg,std))
118
119 if __name__=='__main__':
120
121 sys.stdout.write('\t'.join([
122 'filename','length','average','std','more than 20',
123 ])+'\n')
124 for record in QualIterator(sys.stdin):
125 result = [record.title]
126 result.extend(map(str, record.getStatisticalResult()))
127 result.append(str(record.getNumberMoreThan20()))
128 sys.stdout.write('\t'.join(result)+'\n')