Human gene id list 가 담겨있는 ad.txt 파일을 읽어서, 해당 gene(RefSeq mRNA) 의 cow 것이 있으면 cow의 FastaFormat. 없으면, human의 FastaFormat을 저장하는 스크립트. BioPython의 EUtils모듈을 이용하였다. search메쏘드에 검색쿼리를 넣는다.
1 """CowMrnaAggregator by yong27
2 """
3 import os, sys, unittest
4 from Bio import Fasta
5 from Bio.EUtils import DBIdsClient
6
7 class CowMrnaAggregator:
8 client = DBIdsClient.DBIdsClient()
9 query_cow = "(cow[orgn] OR txid9913[orgn]) AND %s"
10 query_human = "(human[orgn] OR txid9606[orgn]) AND %s"
11
12 def getDbIds(self, aHumanGeneId, species="cow"):
13 query = self.query_cow
14 if species=="human":
15 query = self.query_human
16 return self.client.search(query%aHumanGeneId, db="nucleotide", retmax=100)
17
18 def getFasta(self, aHumanGeneId, species="cow"):
19 for cowId in self.getDbIds(aHumanGeneId, species):
20 fasta = Fasta.RecordParser().parse(
21 cowId.efetch(retmode='text',rettype='fasta'))
22 if fasta.title.find('|ref|')>=0 and fasta.title.find(aHumanGeneId)>=0:
23 return fasta
24
25 def getCowFasta(self, aHumanGeneId):
26 return self.getFasta(aHumanGeneId)
27 def getHumanFasta(self, aHumanGeneId):
28 return self.getFasta(aHumanGeneId, 'human')
29
30 def generateMrnas(self, aHumanGeneIdList):
31 for humanId in aHumanGeneIdList:
32 species = ''
33 fasta = self.getCowFasta(humanId)
34 if fasta:
35 species = 'cow'
36 else:
37 fasta = self.getHumanFasta(humanId)
38 species = 'human'
39 yield humanId, species, fasta
40
41
42 class TestMakeFasta(unittest.TestCase):
43 def testGetCowFasta(self):
44 cma = CowMrnaAggregator()
45 fasta = cma.getCowFasta('SCD')
46 expected="gi|51870929|ref|NM_173959.3| Bos taurus stearoyl-coenzyme A desaturase (SCD), mRNA"
47 self.assertEquals(expected, fasta.title)
48
49
50 def main(aFileName):
51 humanIds = filter(bool,[x.strip() for x in file(aFileName).readlines()])
52 woext = os.path.splitext(aFileName)[0]
53 wfile_cow = file(woext + '.cow_mrna.fasta', 'w')
54 wfile_human = file(woext + '.human.fasta', 'w')
55 wfile_stat = file(woext + '.stat.xls', 'w')
56 cma = CowMrnaAggregator()
57 for humanId, species, fasta in cma.generateMrnas(humanIds):
58 print '%s, %s'%(humanId, species)
59 ftitle = ''
60 if species == 'cow':
61 wfile = wfile_cow
62 elif species == 'human':
63 wfile = wfile_human
64 if fasta:
65 print ' --'+fasta.title
66 wfile.write(str(fasta)+'\n')
67 ftitle = fasta.title
68 wfile_stat.write(humanId+'\t'+species+'\t'+ftitle+'\n')
69
70 if __name__=='__main__':
71 #unittest.main(argv=('','-v'))
72 main('ad.txt')