Eucaryote용 GenePrediction 프로그램으로, [Gene]의 exon-intron structure와 location을 예측해 준다.
http://genes.mit.edu/GENSCAN.html
[Protein]코딩영역만을 예측해준다. 따라서, r[RNA], t[RNA]등은 다른 프로그램을 이용해야한다.
본 프로그램만을 이용하여 만들어진 서열을 [Protein] [Translation]한 서열데이터베이스가 TrGen이다.
GenScan 결과의 [Parsing]
1 class GenscanReport:
2 def __init__(self):
3 self.genes = list()
4 def addGene(self, gene):
5 self.genes.append(gene)
6
7 class Gene:
8 def __init__(self, no):
9 self.no = no
10 self.exons = list()
11 self.cds = ''
12 self.peptide = ''
13 def addExon(self, exon):
14 self.exons.append(exon)
15
16 def setCds(self, fasta):
17 self.cds = fasta
18 def setPeptide(self, fasta):
19 self.peptide = fasta
20
21 class Exon:
22 def __init__(self, no, type, strand, begin, end):
23 self.no = no
24 self.type = type
25 self.strand = strand
26 self.begin = int(begin)
27 self.end = int(end)
28
29 class FastaIterator:
30 def __init__(self, ifile):
31 self.ifile = ifile
32 self.g = self.getGenerator()
33 def getGenerator(self):
34 while True:
35 line = self.ifile.next().strip()
36 if line:
37 lines = [line]
38 break
39 for line in self.ifile:
40 if line.startswith('Explanation'):
41 yield ''.join(lines)
42 break
43 if line.startswith('>'):
44 yield ''.join(lines)
45 lines = [line]
46 else:
47 lines.append(line)
48
49 def __iter__(self):
50 return self.g
51
52 def next(self):
53 return self.g.next()
54
55 def parseGenscan(handler):
56 gr = GenscanReport()
57 while True:
58 line = handler.next()
59 if line.startswith('-') or line is None: break
60 i = 0
61 gene = None
62 for line in handler:
63 line = line.strip()
64 if line.startswith('P') or line.startswith('S'): break
65 if not line:
66 if gene: gr.addGene(gene)
67 i+=1
68 gene = Gene(i)
69 continue
70 words=line.split()
71 gene.addExon(Exon(words[0], words[1], words[2], words[3], words[4]))
72
73 while True:
74 line = handler.next()
75 if line.startswith('Predicted coding') or line is None: break
76
77 fi = FastaIterator(handler)
78 for gene in gr.genes:
79 gene.setPeptide(fi.next())
80 gene.setCds(fi.next())
81 return gr