1 """CowMrnaAggregator by yong27
   2 """
   3 import os, sys, unittest
   4 from Bio import Fasta
   5 from Bio.EUtils import DBIdsClient
   6 
   7 class CowMrnaAggregator:
   8     client = DBIdsClient.DBIdsClient()
   9     query_cow = "(cow[orgn] OR txid9913[orgn]) AND %s"
  10     query_human = "(human[orgn] OR txid9606[orgn]) AND %s"
  11 
  12     def getDbIds(self, aHumanGeneId, species="cow"):
  13         query = self.query_cow
  14         if species=="human":
  15             query = self.query_human
  16         return self.client.search(query%aHumanGeneId, db="nucleotide", retmax=100)
  17 
  18     def getFasta(self, aHumanGeneId, species="cow"):
  19         for cowId in self.getDbIds(aHumanGeneId, species):
  20             fasta = Fasta.RecordParser().parse(
  21                     cowId.efetch(retmode='text',rettype='fasta'))
  22             if fasta.title.find('|ref|')>=0 and fasta.title.find(aHumanGeneId)>=0:
  23                 return fasta
  24 
  25     def getCowFasta(self, aHumanGeneId):
  26         return self.getFasta(aHumanGeneId)
  27     def getHumanFasta(self, aHumanGeneId):
  28         return self.getFasta(aHumanGeneId, 'human')
  29 
  30     def generateMrnas(self, aHumanGeneIdList):
  31         for humanId in aHumanGeneIdList:  
  32             species = ''
  33             fasta = self.getCowFasta(humanId)
  34             if fasta: 
  35                 species = 'cow'  
  36             else:
  37                 fasta = self.getHumanFasta(humanId)
  38                 species = 'human'
  39             yield humanId, species, fasta
  40     
  41     
  42 class TestMakeFasta(unittest.TestCase):
  43     def testGetCowFasta(self):
  44         cma = CowMrnaAggregator()
  45         fasta = cma.getCowFasta('SCD') 
  46         expected="gi|51870929|ref|NM_173959.3| Bos taurus stearoyl-coenzyme A desaturase (SCD), mRNA"
  47         self.assertEquals(expected, fasta.title)
  48 
  49 
  50 def main(aFileName):
  51     humanIds = filter(bool,[x.strip() for x in file(aFileName).readlines()])
  52     woext = os.path.splitext(aFileName)[0]
  53     wfile_cow = file(woext + '.cow_mrna.fasta', 'w')
  54     wfile_human = file(woext + '.human.fasta', 'w')
  55     wfile_stat = file(woext + '.stat.xls', 'w')
  56     cma = CowMrnaAggregator()
  57     for humanId, species, fasta in cma.generateMrnas(humanIds):
  58         print '%s, %s'%(humanId, species)
  59         ftitle = ''
  60         if species == 'cow':
  61             wfile = wfile_cow
  62         elif species == 'human':
  63             wfile = wfile_human
  64         if fasta:
  65             print ' --'+fasta.title
  66             wfile.write(str(fasta)+'\n')
  67             ftitle = fasta.title
  68         wfile_stat.write(humanId+'\t'+species+'\t'+ftitle+'\n')
  69 
  70 if __name__=='__main__':
  71     #unittest.main(argv=('','-v'))
  72     main('ad.txt')
web biohackers.net