1 """ExpressionProfileByEstClustering by -- ["yong27"] [[DateTime(2006-08-23T16:57:50Z)]]"""
   2 
   3 import sys, math, unittest
   4 
   5 class SummableDict(dict):
   6     def sum(self):
   7         return sum(self.values())
   8 
   9     def __iadd__(self, sdict):
  10         for key in sdict:
  11             if key in self:
  12                 self[key]+=sdict[key]
  13             else:
  14                 self[key] = sdict[key]
  15         return self
  16 
  17 
  18 class Contig:
  19     def __init__(self, lines):
  20         self.name = lines[0]
  21         self.ests = lines[1:]
  22         self.libraries = SummableDict(PLM=0, BF=0, PAF=0, PFC1=0)
  23         for est in self.ests:
  24             swords = est.split('_')
  25             if swords[2] !="N":
  26                 self.libraries[swords[1]] += 1
  27 
  28         self.nAllEsts = self.libraries.sum() 
  29         self.r = 0
  30 
  31     def nEsts(self, library):
  32         return self.libraries.get(library)
  33 
  34     def withTotalEstsDict(self, total_ests):
  35         f = self.nAllEsts / float(total_ests.sum())
  36         for library, nEsts in self.libraries.iteritems():
  37             if nEsts != 0:
  38                 self.r += nEsts * math.log(nEsts / (total_ests.get(library) * f))
  39 
  40     def __str__(self):
  41         return '\t'.join(map(str, [self.name, 
  42                 self.nEsts('PLM'), 
  43                 self.nEsts('BF'), 
  44                 self.nEsts('PAF'), 
  45                 self.nEsts('PFC1'), 
  46                 self.nAllEsts, self.r,
  47                 ]))
  48 
  49 
  50 class FastaIterator:
  51     def __init__(self, ifile):
  52         self.ifile = ifile
  53         self.g = self.getGenerator()
  54     def getGenerator(self):
  55         lines = [self.ifile.next().strip()]
  56         for line in self.ifile:
  57             if line.startswith('>'):
  58                 yield Contig(lines)
  59                 lines = [line.strip()]
  60             else:
  61                 lines.append(line.strip())
  62         else:
  63             yield Contig(lines)
  64     def __iter__(self):
  65         return self.g
  66 
  67     def next(self):
  68         return self.g.next()
  69 
  70 
  71 from cStringIO import StringIO
  72 class ContigStatTest(unittest.TestCase):
  73     def setUp(self):
  74         input = """\
  75 >TOTAL_contig00001
  76 susfleck_BF_8184_P21.ab1
  77 susfleck_PFC1_6568_A14.ab1
  78 susfleck_PLM_N_7376_F15.ab1
  79 susfleck_PLM_7780_K07.ab1
  80 susfleck_PLM_7781_K07.ab1
  81 >TOTAL_contig00002
  82 susfleck_BF_0021_0024_D23.ab1
  83 susfleck_PAF_2932_E15.ab1
  84 susfleck_BF_0073_0076_C12.ab
  85 """
  86         self.fi = FastaIterator(StringIO(input))
  87 
  88     def testCount(self):
  89         c1 = self.fi.next()
  90         self.assertEquals('>TOTAL_contig00001', c1.name)
  91         self.assertEquals(1, c1.nEsts('BF'))
  92         self.assertEquals(2, c1.nEsts('PLM'))
  93         self.assertEquals(4, c1.nAllEsts)
  94 
  95         c2 = self.fi.next()
  96         self.assertEquals('>TOTAL_contig00002', c2.name)
  97         self.assertEquals(2, c2.nEsts('BF'))
  98         self.assertEquals(0, c2.nEsts('PLM'))
  99         self.assertEquals(1, c2.nEsts('PAF'))
 100         self.assertEquals(3, c2.nAllEsts)
 101 
 102     def testSummableDict(self):
 103         sd1 = SummableDict(a=0, b=2, d=2)
 104         sd2 = SummableDict(a=1, c=4, d=3)
 105         sd1 += sd2
 106         self.assertEquals(SummableDict(a=1, b=2, c=4, d=5), sd1)
 107 
 108 
 109 def main(aFile):
 110     contigs = []
 111     total_ests = SummableDict(PLM=0, BF=0, PAF=0, PFC1=0)
 112     for contig in FastaIterator(aFile):
 113         total_ests += contig.libraries
 114         contigs.append(contig)
 115 
 116     for contig in contigs:
 117         contig.withTotalEstsDict(total_ests)
 118 
 119     contigs.sort(key=lambda x: x.r, reverse=True)
 120     
 121     print '\t'.join(['Contig name', 'PLM', 'BF','PAF','PFC1', 'Sum', 'R'])
 122     for contig in contigs:
 123         print contig
 124 
 125 if __name__=='__main__':
 126     #unittest.main()
 127     #main(file('total_contig_info.txt'))
 128     main(sys.stdin)

ExpressionProfileByEstClustering.py (last edited 2011-08-03 11:01:01 by localhost)

web biohackers.net