Size: 13715
Comment:
|
← Revision 3 as of 2011-08-03 11:00:51 ⇥
Size: 13715
Comment: converted to 1.6 markup
|
No differences found! |
1 #
2
3 require 'win32ole'
4 def wait(ie)
5 while ie.busy
6 end
7 until ie.readyState == 4 # READYSTATE_COMPLETE
8 end
9 end
10
11 class PsiBlastResult
12 attr_reader :id, :sequence
13 def initialize(id, sequence)
14 @id = id
15 @sequence = sequence
16 end
17
18 def addsequence(sequence)
19 @sequence += sequence
20 end
21 end
22
23 class PsiBlastQuery
24 attr_reader :comment, :sequence, :rawResult, :blastResults
25 attr_writer :rawResult
26 def initialize(comment, sequence)
27 @comment = comment
28 @sequence = sequence
29 @rawResult = ""
30 @blastResults = []
31 end
32
33 def printout(mode)
34 blastResults.each do |aResult|
35 print ">#{aResult.id}\n"
36 print "#{aResult.sequence}\n"
37 end
38 end
39
40 def process( manager, visible )
41 manager.connect(visible)
42 #query = PsiBlastQuery.new("comment",
43 #"MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
44 manager.input("QUERY",self)
45 manager.submit()
46
47 # Format 얻는 방법
48 # 1. 자동을 업데이트 될때까지 기다린다.
49 #manager.submit()
50 #manager.waiting(false)
51
52 # Format 얻는 방법
53 # 2. 수동으로 refresh 시킨다.
54 #rid = "1113539432-3598-127383550938.BLASTQ4"
55 rid = manager.getRID()
56 #http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=1113540862-28790-25293121580.BLASTQ2&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=Pairwise&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&RID=1113540862-28790-25293121580.BLASTQ2&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off
57 url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" +
58 rid +
59 "&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+
60 "&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off"
61 manager.browse( url )
62 manager.waiting(true)
63
64 # 결과를 얻어온다.
65 @rawResult = manager.gettext()
66 #print @rawResult
67 purification()
68 end
69
70 def purification()
71 bAlignments = false
72 nEnterCount = 0
73 hash = {}
74 r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z\-]+)[' ']+([0-9]+)")
75 id = ""
76 sequence = ""
77
78 results = rawResult
79 results.each do |line|
80 if ( line.strip == "Alignments" )
81 bAlignments = true
82 end
83 if ( bAlignments == true )
84 if ( r =~ line )
85 id = r.match(line)[1]
86 sequence = r.match(line)[3]
87 if ( hash[ id ] == nil )
88 result = PsiBlastResult.new( id, sequence )
89 hash.store( id, result )
90 else
91 hash[id].addsequence( sequence )
92 end
93 nEnterCount = 0
94 # print r.match(line)[0]
95 # 1_29407 1 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY 60
96 # print r.match(line)[1]
97 # 1_29407
98 # print r.match(line)[2]
99 # 1
100 # print r.match(line)[3]
101 # MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY
102 # print r.match(line)[4]
103 # 60
104
105 end
106 #print r.match(line)
107 #print r.match(line)
108 #print r.match(line)
109 end
110 if ( bAlignments == true && line.strip.empty? )
111 nEnterCount = nEnterCount + 1
112 end
113 if ( nEnterCount >= 2 )
114 bAlignments = false
115 end
116 end
117
118 hash.each do |key,value|
119 #print ">#{value.id}\n"
120 #print "#{value.sequence}\n"
121 if ( finder( @blastResults, sequence ) )
122 @blastResults << value
123 end
124 end
125 end
126 end
127
128 class PsiBlastQueryManager
129 attr_reader :list, :web
130 def initialize(visible)
131 @visible = visible
132 @list = []
133 @web = WIN32OLE.new('InternetExplorer.Application')
134 end
135
136 def add( query )
137 @list << query
138 end
139
140 def openfile( filename )
141 comment = ">no_comment"
142 sequence = ""
143
144 f = File.open(filename)
145 f.each do |line|
146 if (line =~ />/) != nil
147 if ( sequence.empty? == false )
148 add( PsiBlastQuery.new( comment, sequence ) )
149 comment = ">no_comment"
150 sequence = ""
151 end
152 comment = line.chop #delete carriage return
153 elsif line.strip.size > 0
154 sequence += line.strip
155 end
156 end
157 if ( sequence.empty? == false )
158 add( PsiBlastQuery.new( comment, sequence ) )
159 comment = ">no_comment"
160 sequence = ""
161 end
162 end
163
164 def getResult()
165 list.each do |data|
166 data.process(self, @visible)
167 end
168 end
169
170 def printout(mode)
171 list.each do |data|
172 print "\n=================#{data.comment}====================\n"
173 data.printout(mode)
174 end
175 end
176
177 def finalize()
178 web.quit
179 end
180
181 def connect(visible)
182 web.navigate('http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?CMD=Web&LAYOUT=OneWindow&AUTO_FORMAT=Semiauto&ALIGNMENTS=250&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&CLIENT=web&COMPOSITION_BASED_STATISTICS=on&DATABASE=nr&CDD_SEARCH=on&DESCRIPTIONS=500&ENTREZ_QUERY=%28none%29&EXPECT=10&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&I_THRESH=0.005&MATRIX_NAME=BLOSUM62&NCBI_GI=on&PAGE=Proteins&PROGRAM=blastp&RUN_PSIBLAST=on&SERVICE=plain&SET_DEFAULTS.x=36&SET_DEFAULTS.y=5&SHOW_OVERVIEW=on&END_OF_HTTPGET=Yes&SHOW_LINKOUT=yes&GET_SEQUENCE=yes')
183 wait(web)
184 web.visible =visible
185 end
186
187 def input(name, query)
188 doc = web.document
189 if doc != nil
190 begin
191 doc.all(name).value = query.sequence
192 wait(web)
193 rescue
194 end
195 end
196 end
197
198 def submit()
199 doc = web.document
200 if doc != nil
201 doc.forms(0).submit()
202 wait(web)
203 end
204 end
205
206 def gettext()
207 doc = web.document
208 if doc != nil
209 wait(web)
210 return doc.body.innerText
211 end
212 end
213
214 def getRID()
215 doc = web.document
216 if doc != nil
217 wait(web)
218 r = doc.all('RID')
219 r0 = r.item(0)
220 return r0.value
221 end
222 end
223
224 def browse(url)
225 web.navigate(url)
226 wait(web)
227 end
228
229 def waiting(refresh)
230 text = gettext()
231 while ( text =~ /WAITING/ )
232 if ( refresh == true )
233 web.refresh()
234 end
235 text = gettext()
236 sleep(3)
237 end
238 end
239 end
240
241 def testPsiWeb
242 manager = PsiBlastQueryManager.new()
243 manager.connect(true)
244 end
245
246 def testPsiQueryInput
247 manager = PsiBlastQueryManager.new()
248 manager.connect(true)
249 query = PsiBlastQuery.new("comment", "input")
250 manager.input("QUERY",query)
251 end
252
253 def testPsiSubmit
254 manager = PsiBlastQueryManager.new()
255 manager.connect(true)
256 query = PsiBlastQuery.new("comment",
257 "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
258 manager.input("QUERY",query)
259 manager.submit()
260 end
261
262 def testPsiGetFormat
263 manager = PsiBlastQueryManager.new()
264 manager.connect(true)
265 query = PsiBlastQuery.new("comment",
266 "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
267 manager.input("QUERY",query)
268 manager.submit()
269 # f = doc.forms(0)
270 # f.METHOD = "get"
271 # f.submit()
272 #http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?QUERY=MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA&QUERY_FROM=&QUERY_TO=&DATABASE=nr&CDD_SEARCH=on&ENTREZ_QUERY=&ENTREZ_QUERY=All+organisms&COMPOSITION_BASED_STATISTICS=on&EXPECT=10&WORD_SIZE=3&MATRIX_NAME=BLOSUM62&GAPCOSTS=11+1&PSSM=&OTHER_ADVANCED=&PHI_PATTERN=&SHOW_OVERVIEW=on&SHOW_LINKOUT=on&GET_SEQUENCE=on&NCBI_GI=on&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=500&ALIGNMENTS=250&ALIGNMENT_VIEW=Pairwise&RUN_PSIBLAST=on&I_THRESH=0.005&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&LAYOUT=TwoWindows&FORMAT_BLOCK_ON_RESPAGE=None&AUTO_FORMAT=Semiauto&PROGRAM=blastp&CLIENT=web&SERVICE=plain&PAGE=Proteins&CMD=Put
273 manager.submit()
274 end
275
276 def testPsiGetResult
277 manager = PsiBlastQueryManager.new()
278 manager.connect(false)
279 query = PsiBlastQuery.new("comment",
280 "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
281 manager.input("QUERY",query)
282 manager.submit()
283
284 # Format 얻는 방법
285 # 1. 자동을 업데이트 될때까지 기다린다.
286 #manager.submit()
287 #manager.waiting(false)
288
289 # Format 얻는 방법
290 # 2. 수동으로 refresh 시킨다.
291 #rid = "1113539432-3598-127383550938.BLASTQ4"
292 rid = manager.getRID()
293 #http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=1113540862-28790-25293121580.BLASTQ2&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=Pairwise&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&RID=1113540862-28790-25293121580.BLASTQ2&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off
294 url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" +
295 rid +
296 "&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+
297 "&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off"
298 manager.browse( url )
299 manager.waiting(true)
300
301 # 결과를 얻어온다.
302 text = manager.gettext()
303 print text
304 end
305
306 def finder(set, sequence)
307 for aResult in set
308 if ( aResult.sequence == sequence )
309 return false
310 end
311 end
312 return true
313 end
314
315 def testPurification(filename)
316 bAlignments = false
317 nEnterCount = 0
318 hash = {}
319 r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z]+)[' ']+([0-9]+)")
320 f = File.open(filename)
321 id = ""
322 sequence = ""
323 f.each do |line|
324 #print line
325 if ( line.strip == "Alignments" )
326 bAlignments = true
327 end
328 if ( bAlignments == true )
329 if ( r =~ line )
330 id = r.match(line)[1]
331 sequence = r.match(line)[3]
332 if ( hash[ id ] == nil )
333 result = PsiBlastResult.new( id, sequence )
334 hash.store( id, result )
335 else
336 hash[id].addsequence( sequence )
337 end
338 nEnterCount = 0
339 # print r.match(line)[0]
340 # 1_29407 1 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY 60
341 # print r.match(line)[1]
342 # 1_29407
343 # print r.match(line)[2]
344 # 1
345 # print r.match(line)[3]
346 # MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY
347 # print r.match(line)[4]
348 # 60
349
350 end
351 #print r.match(line)
352 #print r.match(line)
353 #print r.match(line)
354 end
355 if ( bAlignments == true && line.strip.empty? )
356 nEnterCount = nEnterCount + 1
357 end
358 if ( nEnterCount >= 2 )
359 bAlignments = false
360 end
361 end
362
363 uniqueArray = []
364
365 hash.each do |key,value|
366 #print ">#{value.id}\n"
367 #print "#{value.sequence}\n"
368 if ( finder( uniqueArray, sequence ) )
369 uniqueArray << value
370 end
371 end
372
373 uniqueArray.each do |aResult|
374 print ">#{aResult.id}\n"
375 print "#{aResult.sequence}\n"
376 end
377 end
378
379 if ( $*.size >= 1 )
380 filename = $*[0]
381 visible = $*[1]
382 if visible == 'visible'
383 visible = true
384 else
385 visible = false
386 end
387
388 mode = $*[2]
389
390 manager = PsiBlastQueryManager.new(visible)
391 manager.openfile( filename )
392 manager.getResult()
393 manager.printout(mode)
394 manager.finalize()
395 else
396 #testPsiWeb()
397 #testPsiQueryInput()
398 #testPsiSubmit()
399 #testPsiGetFormat()
400 #testPsiGetResult() # psi-raw.txt
401 #testPurification("psi-raw.txt") # clw-data.txt
402 #testPurification("test.txt") # clw-data.txt
403 end