#format python """CowMrnaAggregator by yong27 """ import os, sys, unittest from Bio import Fasta from Bio.EUtils import DBIdsClient class CowMrnaAggregator: client = DBIdsClient.DBIdsClient() query_cow = "(cow[orgn] OR txid9913[orgn]) AND %s" query_human = "(human[orgn] OR txid9606[orgn]) AND %s" def getDbIds(self, aHumanGeneId, species="cow"): query = self.query_cow if species=="human": query = self.query_human return self.client.search(query%aHumanGeneId, db="nucleotide", retmax=100) def getFasta(self, aHumanGeneId, species="cow"): for cowId in self.getDbIds(aHumanGeneId, species): fasta = Fasta.RecordParser().parse( cowId.efetch(retmode='text',rettype='fasta')) if fasta.title.find('|ref|')>=0 and fasta.title.find(aHumanGeneId)>=0: return fasta def getCowFasta(self, aHumanGeneId): return self.getFasta(aHumanGeneId) def getHumanFasta(self, aHumanGeneId): return self.getFasta(aHumanGeneId, 'human') def generateMrnas(self, aHumanGeneIdList): for humanId in aHumanGeneIdList: species = '' fasta = self.getCowFasta(humanId) if fasta: species = 'cow' else: fasta = self.getHumanFasta(humanId) species = 'human' yield humanId, species, fasta class TestMakeFasta(unittest.TestCase): def testGetCowFasta(self): cma = CowMrnaAggregator() fasta = cma.getCowFasta('SCD') expected="gi|51870929|ref|NM_173959.3| Bos taurus stearoyl-coenzyme A desaturase (SCD), mRNA" self.assertEquals(expected, fasta.title) def main(aFileName): humanIds = filter(bool,[x.strip() for x in file(aFileName).readlines()]) woext = os.path.splitext(aFileName)[0] wfile_cow = file(woext + '.cow_mrna.fasta', 'w') wfile_human = file(woext + '.human.fasta', 'w') wfile_stat = file(woext + '.stat.xls', 'w') cma = CowMrnaAggregator() for humanId, species, fasta in cma.generateMrnas(humanIds): print '%s, %s'%(humanId, species) ftitle = '' if species == 'cow': wfile = wfile_cow elif species == 'human': wfile = wfile_human if fasta: print ' --'+fasta.title wfile.write(str(fasta)+'\n') ftitle = fasta.title wfile_stat.write(humanId+'\t'+species+'\t'+ftitle+'\n') if __name__=='__main__': #unittest.main(argv=('','-v')) main('ad.txt')