Eucaryote용 GenePrediction 프로그램으로, Gene의 exon-intron structure와 location을 예측해 준다.
http://genes.mit.edu/GENSCAN.html
Protein코딩영역만을 예측해준다. 따라서, rRNA, tRNA등은 다른 프로그램을 이용해야한다.
본 프로그램만을 이용하여 만들어진 서열을 Protein Translation한 서열데이터베이스가 TrGen이다.
GenScan 결과의 Parsing
1 """This parser is working when using "-v -cds" options"""
2 class GenscanReport:
3 def __init__(self):
4 self.genes = list()
5 def addGene(self, gene):
6 self.genes.append(gene)
7
8 class Gene:
9 def __init__(self, no):
10 self.no = no
11 self.exons = list()
12 self.cds = ''
13 self.peptide = ''
14 def addExon(self, exon):
15 self.exons.append(exon)
16
17 def setCds(self, fasta):
18 self.cds = fasta
19 def setPeptide(self, fasta):
20 self.peptide = fasta
21
22 class Exon:
23 def __init__(self, no, type, strand, begin, end):
24 self.no = no
25 self.type = type
26 self.strand = strand
27 self.begin = int(begin)
28 self.end = int(end)
29
30 class FastaIterator:
31 def __init__(self, ifile):
32 self.ifile = ifile
33 self.g = self.getGenerator()
34 def getGenerator(self):
35 while True:
36 line = self.ifile.next().strip()
37 if line:
38 lines = [line+'\n']
39 break
40 for line in self.ifile:
41 if line.startswith('Explanation'):
42 yield ''.join(lines)
43 break
44 if line.startswith('>'):
45 yield ''.join(lines)
46 lines = [line]
47 else:
48 lines.append(line)
49
50 def __iter__(self):
51 return self.g
52
53 def next(self):
54 return self.g.next()
55
56 def parseGenscan(handler):
57 gr = GenscanReport()
58 while True:
59 line = handler.next()
60 if line.startswith('-') or line is None: break
61 i = 0
62 gene = None
63 for line in handler:
64 line = line.strip()
65 if line.startswith('P') or line.startswith('S') or line.startswith('NO'):
66 break
67 if not line:
68 if gene and gene.exons: gr.addGene(gene)
69 i+=1
70 gene = Gene(i)
71 continue
72 words=line.split()
73 gene.addExon(Exon(words[0], words[1], words[2], words[3], words[4]))
74
75 while True:
76 line = handler.next()
77 if line.startswith('Predicted coding') or line is None: break
78
79 fi = FastaIterator(handler)
80 for gene in gr.genes:
81 gene.setPeptide(fi.next())
82 gene.setCds(fi.next())
83 return gr