1
2
3 require 'win32ole'
4 def wait(ie)
5 while ie.busy
6 end
7 until ie.readyState == 4
8 end
9 end
10
11 class PsiBlastResult
12 attr_reader :id, :sequence
13 def initialize(id, sequence)
14 @id = id
15 @sequence = sequence
16 end
17
18 def addsequence(sequence)
19 @sequence += sequence
20 end
21 end
22
23 class PsiBlastQuery
24 attr_reader :comment, :sequence, :rawResult, :blastResults
25 attr_writer :rawResult
26 def initialize(comment, sequence)
27 @comment = comment
28 @sequence = sequence
29 @rawResult = ""
30 @blastResults = []
31 end
32
33 def printout(mode)
34 blastResults.each do |aResult|
35 print ">#{aResult.id}\n"
36 print "#{aResult.sequence}\n"
37 end
38 end
39
40 def process( manager, visible )
41 manager.connect(visible)
42
43
44 manager.input("QUERY",self)
45 manager.submit()
46
47
48
49
50
51
52
53
54
55 rid = manager.getRID()
56
57 url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" +
58 rid +
59 "&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+
60 "&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off"
61 manager.browse( url )
62 manager.waiting(true)
63
64
65 @rawResult = manager.gettext()
66
67 purification()
68 end
69
70 def purification()
71 bAlignments = false
72 nEnterCount = 0
73 hash = {}
74 r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z\-]+)[' ']+([0-9]+)")
75 id = ""
76 sequence = ""
77
78 results = rawResult
79 results.each do |line|
80 if ( line.strip == "Alignments" )
81 bAlignments = true
82 end
83 if ( bAlignments == true )
84 if ( r =~ line )
85 id = r.match(line)[1]
86 sequence = r.match(line)[3]
87 if ( hash[ id ] == nil )
88 result = PsiBlastResult.new( id, sequence )
89 hash.store( id, result )
90 else
91 hash[id].addsequence( sequence )
92 end
93 nEnterCount = 0
94
95
96
97
98
99
100
101
102
103
104
105 end
106
107
108
109 end
110 if ( bAlignments == true && line.strip.empty? )
111 nEnterCount = nEnterCount + 1
112 end
113 if ( nEnterCount >= 2 )
114 bAlignments = false
115 end
116 end
117
118 hash.each do |key,value|
119
120
121 if ( finder( @blastResults, sequence ) )
122 @blastResults << value
123 end
124 end
125 end
126 end
127
128 class PsiBlastQueryManager
129 attr_reader :list, :web
130 def initialize(visible)
131 @visible = visible
132 @list = []
133 @web = WIN32OLE.new('InternetExplorer.Application')
134 end
135
136 def add( query )
137 @list << query
138 end
139
140 def openfile( filename )
141 comment = ">no_comment"
142 sequence = ""
143
144 f = File.open(filename)
145 f.each do |line|
146 if (line =~ />/) != nil
147 if ( sequence.empty? == false )
148 add( PsiBlastQuery.new( comment, sequence ) )
149 comment = ">no_comment"
150 sequence = ""
151 end
152 comment = line.chop
153 elsif line.strip.size > 0
154 sequence += line.strip
155 end
156 end
157 if ( sequence.empty? == false )
158 add( PsiBlastQuery.new( comment, sequence ) )
159 comment = ">no_comment"
160 sequence = ""
161 end
162 end
163
164 def getResult()
165 list.each do |data|
166 data.process(self, @visible)
167 end
168 end
169
170 def printout(mode)
171 list.each do |data|
172 print "\n=================#{data.comment}====================\n"
173 data.printout(mode)
174 end
175 end
176
177 def finalize()
178 web.quit
179 end
180
181 def connect(visible)
182 web.navigate('http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?CMD=Web&LAYOUT=OneWindow&AUTO_FORMAT=Semiauto&ALIGNMENTS=250&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&CLIENT=web&COMPOSITION_BASED_STATISTICS=on&DATABASE=nr&CDD_SEARCH=on&DESCRIPTIONS=500&ENTREZ_QUERY=%28none%29&EXPECT=10&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&I_THRESH=0.005&MATRIX_NAME=BLOSUM62&NCBI_GI=on&PAGE=Proteins&PROGRAM=blastp&RUN_PSIBLAST=on&SERVICE=plain&SET_DEFAULTS.x=36&SET_DEFAULTS.y=5&SHOW_OVERVIEW=on&END_OF_HTTPGET=Yes&SHOW_LINKOUT=yes&GET_SEQUENCE=yes')
183 wait(web)
184 web.visible =visible
185 end
186
187 def input(name, query)
188 doc = web.document
189 if doc != nil
190 begin
191 doc.all(name).value = query.sequence
192 wait(web)
193 rescue
194 end
195 end
196 end
197
198 def submit()
199 doc = web.document
200 if doc != nil
201 doc.forms(0).submit()
202 wait(web)
203 end
204 end
205
206 def gettext()
207 doc = web.document
208 if doc != nil
209 wait(web)
210 return doc.body.innerText
211 end
212 end
213
214 def getRID()
215 doc = web.document
216 if doc != nil
217 wait(web)
218 r = doc.all('RID')
219 r0 = r.item(0)
220 return r0.value
221 end
222 end
223
224 def browse(url)
225 web.navigate(url)
226 wait(web)
227 end
228
229 def waiting(refresh)
230 text = gettext()
231 while ( text =~ /WAITING/ )
232 if ( refresh == true )
233 web.refresh()
234 end
235 text = gettext()
236 sleep(3)
237 end
238 end
239 end
240
241 def testPsiWeb
242 manager = PsiBlastQueryManager.new()
243 manager.connect(true)
244 end
245
246 def testPsiQueryInput
247 manager = PsiBlastQueryManager.new()
248 manager.connect(true)
249 query = PsiBlastQuery.new("comment", "input")
250 manager.input("QUERY",query)
251 end
252
253 def testPsiSubmit
254 manager = PsiBlastQueryManager.new()
255 manager.connect(true)
256 query = PsiBlastQuery.new("comment",
257 "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
258 manager.input("QUERY",query)
259 manager.submit()
260 end
261
262 def testPsiGetFormat
263 manager = PsiBlastQueryManager.new()
264 manager.connect(true)
265 query = PsiBlastQuery.new("comment",
266 "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
267 manager.input("QUERY",query)
268 manager.submit()
269
270
271
272
273 manager.submit()
274 end
275
276 def testPsiGetResult
277 manager = PsiBlastQueryManager.new()
278 manager.connect(false)
279 query = PsiBlastQuery.new("comment",
280 "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
281 manager.input("QUERY",query)
282 manager.submit()
283
284
285
286
287
288
289
290
291
292 rid = manager.getRID()
293
294 url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" +
295 rid +
296 "&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+
297 "&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off"
298 manager.browse( url )
299 manager.waiting(true)
300
301
302 text = manager.gettext()
303 print text
304 end
305
306 def finder(set, sequence)
307 for aResult in set
308 if ( aResult.sequence == sequence )
309 return false
310 end
311 end
312 return true
313 end
314
315 def testPurification(filename)
316 bAlignments = false
317 nEnterCount = 0
318 hash = {}
319 r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z]+)[' ']+([0-9]+)")
320 f = File.open(filename)
321 id = ""
322 sequence = ""
323 f.each do |line|
324
325 if ( line.strip == "Alignments" )
326 bAlignments = true
327 end
328 if ( bAlignments == true )
329 if ( r =~ line )
330 id = r.match(line)[1]
331 sequence = r.match(line)[3]
332 if ( hash[ id ] == nil )
333 result = PsiBlastResult.new( id, sequence )
334 hash.store( id, result )
335 else
336 hash[id].addsequence( sequence )
337 end
338 nEnterCount = 0
339
340
341
342
343
344
345
346
347
348
349
350 end
351
352
353
354 end
355 if ( bAlignments == true && line.strip.empty? )
356 nEnterCount = nEnterCount + 1
357 end
358 if ( nEnterCount >= 2 )
359 bAlignments = false
360 end
361 end
362
363 uniqueArray = []
364
365 hash.each do |key,value|
366
367
368 if ( finder( uniqueArray, sequence ) )
369 uniqueArray << value
370 end
371 end
372
373 uniqueArray.each do |aResult|
374 print ">#{aResult.id}\n"
375 print "#{aResult.sequence}\n"
376 end
377 end
378
379 if ( $*.size >= 1 )
380 filename = $*[0]
381 visible = $*[1]
382 if visible == 'visible'
383 visible = true
384 else
385 visible = false
386 end
387
388 mode = $*[2]
389
390 manager = PsiBlastQueryManager.new(visible)
391 manager.openfile( filename )
392 manager.getResult()
393 manager.printout(mode)
394 manager.finalize()
395 else
396
397
398
399
400
401
402
403 end