1 """PhredScoreParsing by [yong27], 2004-03-29
   2 """
   3 
   4 import sys,unittest,math
   5 from cStringIO import StringIO
   6 
   7 class QualRecord:
   8     def __init__(self):
   9         self.ori_title = ''
  10         self.title = ''
  11         self.quals = []
  12 
  13     def setTitle(self, aTitle):
  14         self.ori_title = aTitle
  15         self.title = self.ori_title.split()[0][1:]
  16 
  17     def setQuals(self, aStrLines):
  18         for line in aStrLines:
  19             self.quals.extend(map(int,line.split()))
  20 
  21     def getLength(self):
  22         length = len(self.quals)
  23         assert length == int(self.ori_title.split()[1])
  24         return length
  25 
  26     def getAverage(self):
  27         sum=0
  28         for each in self.quals:
  29             sum+=each
  30         return sum / float(self.getLength())
  31 
  32     def getStd(self):
  33         avg = self.getAverage()
  34         sum=0
  35         for each in self.quals:
  36             sum+=(avg-each)*(avg-each)
  37         return math.sqrt(sum/float(self.getLength()))
  38 
  39     def getNumberMoreThan20(self):
  40         return len([n for n in self.quals if n>=20])
  41 
  42     def getStatisticalResult(self):
  43         return self.getLength(), self.getAverage(), self.getStd()
  44 
  45 
  46 class QualIterator:
  47     def __init__(self, aFileHandler):
  48         self.file = aFileHandler
  49         self._saved = []
  50     def __iter__(self):
  51         return self
  52 
  53     def next(self):
  54         lines=[]
  55         while True:
  56             line = self.file.readline()
  57             if not line:
  58                 break
  59             if lines and line[0]=='>':
  60                 self._saved.append(line)
  61                 break
  62             lines.append(line.strip())
  63         if not lines:
  64             raise StopIteration
  65         return self.getRecord(lines)
  66 
  67     def getRecord(self, lines):
  68         record = QualRecord()
  69         if lines[0][0]=='>': ## when first iteration
  70             record.setTitle(lines[0])
  71             lines = lines[1:]
  72         else:
  73             record.setTitle(self._saved[0])
  74             self._saved = self._saved[1:]
  75         record.setQuals(lines)
  76         return record
  77 
  78 
  79 class QualIteratorTest(unittest.TestCase):
  80     def setUp(self):
  81         instr = """\
  82 >1013C2SP6    36      0    226  ABI
  83 10 24 29 40 40 40 40 40 40 46 51 56 51 51 40 40 45
  84 45 45 45 51 46 46 46 32 35 23 23 11 11 11 45 31 35
  85 33 32
  86 >1013C2SP7    38      0    226  ABI
  87 10 24 29 40 40 40 40 40 40 46 51 56 51 51 40 40 45
  88 45 45 45 51 46 46 46 32 35 23 23 11 11 11 45 31 35
  89 33 32 11 19
  90 """
  91         self.input = StringIO(instr)
  92     def testIterationAndTitle(self):
  93         qi=QualIterator(self.input)
  94         self.assertEquals('1013C2SP6',qi.next().title)
  95         self.assertEquals('1013C2SP7',qi.next().title)
  96         self.failUnlessRaises(StopIteration,qi.next)
  97 
  98     def testQuals(self):
  99         qi=QualIterator(self.input)
 100         expected=[10, 24, 29, 40, 40, 40, 40, 40, 40, 46,
 101                   51, 56, 51, 51, 40, 40, 45, 45, 45, 45,
 102                   51, 46, 46, 46, 32, 35, 23, 23, 11, 11,
 103                   11, 45, 31, 35, 33, 32]
 104         self.assertEquals(expected, qi.next().quals)
 105         self.assertEquals(expected+[11,19], qi.next().quals)
 106         self.failUnlessRaises(StopIteration,qi.next)
 107 
 108     def testNumberMoreThan20(self):
 109         qi=QualIterator(self.input)
 110         qr = qi.next()
 111         self.assertEquals(32, qr.getNumberMoreThan20())
 112 
 113 
 114     def testStatisticalResult(self):
 115         qi=QualIterator(self.input)
 116         len,avg,std=qi.next().getStatisticalResult()
 117         self.assertEquals((36, 36.916666666666664, 12.193748398257197),(len,avg,std))
 118 
 119 if __name__=='__main__':
 120     #unittest.main(argv=('','-v'))
 121     sys.stdout.write('\t'.join([
 122         'filename','length','average','std','more than 20',
 123         ])+'\n')
 124     for record in QualIterator(sys.stdin):
 125         result = [record.title]
 126         result.extend(map(str, record.getStatisticalResult()))
 127         result.append(str(record.getNumberMoreThan20()))
 128         sys.stdout.write('\t'.join(result)+'\n')

PhredScoreParsing.py (last edited 2011-08-03 11:00:38 by localhost)

web biohackers.net