Differences between revisions 2 and 3
Revision 2 as of 2006-09-07 21:28:01
Size: 13715
Editor: 127
Comment:
Revision 3 as of 2011-08-03 11:00:51
Size: 13715
Editor: localhost
Comment: converted to 1.6 markup
No differences found!
   1 #
   2 
   3 require 'win32ole'
   4 def wait(ie) 
   5    while ie.busy 
   6    end 
   7    until ie.readyState == 4 # READYSTATE_COMPLETE 
   8 	end 
   9 end 
  10 
  11 class PsiBlastResult
  12 	attr_reader :id, :sequence
  13 	def initialize(id, sequence)
  14 		@id = id
  15 		@sequence = sequence
  16 	end
  17 	
  18 	def addsequence(sequence)
  19 		@sequence += sequence
  20 	end
  21 end
  22 
  23 class PsiBlastQuery
  24 	attr_reader :comment, :sequence, :rawResult, :blastResults
  25 	attr_writer :rawResult
  26 	def initialize(comment, sequence)
  27 		@comment = comment
  28 		@sequence = sequence
  29 		@rawResult = ""
  30 		@blastResults = []
  31 	end
  32 	
  33 	def printout(mode)
  34 		blastResults.each do |aResult|
  35 			print ">#{aResult.id}\n"
  36 			print "#{aResult.sequence}\n"
  37 		end
  38 	end
  39 	
  40 	def process( manager, visible )
  41 		manager.connect(visible)
  42 		#query = PsiBlastQuery.new("comment", 
  43 		#"MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
  44 		manager.input("QUERY",self)
  45 		manager.submit()
  46 		
  47 		# Format 얻는 방법
  48 		# 1.  자동을 업데이트 될때까지 기다린다.
  49 		#manager.submit()
  50 		#manager.waiting(false)
  51 		
  52 		# Format  얻는 방법
  53 		# 2. 수동으로  refresh  시킨다.
  54 		#rid = "1113539432-3598-127383550938.BLASTQ4"
  55 		   rid = manager.getRID()
  56 		#http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=1113540862-28790-25293121580.BLASTQ2&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=Pairwise&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&RID=1113540862-28790-25293121580.BLASTQ2&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off	
  57 		   url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" +
  58 		   	rid +
  59 		   	"&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+
  60 		   	"&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off"
  61 		   manager.browse( url )
  62 		   manager.waiting(true)
  63 		
  64 		# 결과를 얻어온다.
  65 		@rawResult = manager.gettext()
  66 		#print @rawResult	
  67 		purification()
  68 	end	
  69 	
  70 	def purification()
  71 		bAlignments = false
  72 		nEnterCount = 0
  73 		hash = {}
  74 		r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z\-]+)[' ']+([0-9]+)")
  75 		id = ""
  76 		sequence = ""
  77 		
  78 		results = rawResult
  79 		results.each do |line|
  80 			if ( line.strip == "Alignments" )
  81 				bAlignments = true
  82 			end
  83 			if ( bAlignments == true )
  84 				if ( r =~ line )
  85 					id = r.match(line)[1]
  86 					sequence = r.match(line)[3]
  87 					if ( hash[ id ] == nil )
  88 						result = PsiBlastResult.new( id, sequence )
  89 						hash.store( id, result )
  90 					else
  91 						hash[id].addsequence( sequence )
  92 					end
  93 					nEnterCount = 0
  94 					# print  r.match(line)[0]
  95 					# 1_29407      1   MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY 60
  96 					# print r.match(line)[1]
  97 					# 1_29407
  98 					# print r.match(line)[2]
  99 					# 1
 100 					# print r.match(line)[3]
 101 					# MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY
 102 					# print r.match(line)[4]
 103 					# 60
 104 					
 105 				end
 106 				#print r.match(line)
 107 				#print r.match(line)
 108 				#print r.match(line)
 109 			end
 110 			if ( bAlignments == true && line.strip.empty? )
 111 				nEnterCount = nEnterCount + 1
 112 			end
 113 			if ( nEnterCount >= 2 )
 114 				bAlignments = false
 115 			end
 116 		end
 117 		
 118 		hash.each do |key,value|
 119 			#print ">#{value.id}\n"
 120 			#print "#{value.sequence}\n"
 121 			if ( finder( @blastResults, sequence ) )
 122 				@blastResults << value
 123 			end
 124 		end
 125 	end
 126 end
 127 
 128 class PsiBlastQueryManager
 129 	attr_reader :list, :web
 130 	def initialize(visible)
 131 		@visible = visible
 132 		@list = []
 133 		@web = WIN32OLE.new('InternetExplorer.Application')
 134 	end
 135 	
 136 	def add( query )
 137 		@list << query
 138 	end
 139 	
 140 	def openfile( filename )
 141 		comment = ">no_comment"
 142 		sequence = ""
 143 	
 144 		f = File.open(filename)
 145 		f.each do |line|
 146 			if (line =~ />/) != nil
 147 				if ( sequence.empty? == false )
 148 					add( PsiBlastQuery.new( comment, sequence ) )
 149 					comment = ">no_comment"
 150 					sequence = ""
 151 				end
 152 				comment = line.chop #delete carriage return			
 153 			elsif line.strip.size > 0
 154 				sequence += line.strip
 155 			end
 156 		end
 157 		if ( sequence.empty? == false )
 158 			add( PsiBlastQuery.new( comment, sequence ) )
 159 			comment = ">no_comment"
 160 			sequence = ""
 161 		end	
 162 	end	
 163 	
 164 	def getResult()
 165 		list.each do |data|
 166 			data.process(self, @visible)
 167 		end	
 168 	end
 169 	
 170 	def printout(mode)
 171 		list.each do |data|
 172 			print "\n=================#{data.comment}====================\n"
 173 			data.printout(mode)
 174 		end
 175 	end
 176 	
 177 	def finalize()
 178 		web.quit
 179 	end
 180 	
 181 	def connect(visible)
 182 		web.navigate('http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?CMD=Web&LAYOUT=OneWindow&AUTO_FORMAT=Semiauto&ALIGNMENTS=250&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&CLIENT=web&COMPOSITION_BASED_STATISTICS=on&DATABASE=nr&CDD_SEARCH=on&DESCRIPTIONS=500&ENTREZ_QUERY=%28none%29&EXPECT=10&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&I_THRESH=0.005&MATRIX_NAME=BLOSUM62&NCBI_GI=on&PAGE=Proteins&PROGRAM=blastp&RUN_PSIBLAST=on&SERVICE=plain&SET_DEFAULTS.x=36&SET_DEFAULTS.y=5&SHOW_OVERVIEW=on&END_OF_HTTPGET=Yes&SHOW_LINKOUT=yes&GET_SEQUENCE=yes') 
 183 		wait(web) 		
 184 		web.visible =visible
 185 	end
 186 	
 187 	def input(name, query)
 188 		doc = web.document 
 189 		if doc != nil 
 190 			begin 
 191 			   doc.all(name).value = query.sequence
 192 			   wait(web) 
 193 			rescue 
 194 			end
 195 		end		
 196 	end
 197 	
 198 	def submit()
 199 		doc = web.document 
 200 		if doc != nil
 201 			doc.forms(0).submit() 
 202 			wait(web) 	
 203 		end
 204 	end
 205 	
 206 	def gettext()
 207 		doc = web.document 
 208 		if doc != nil
 209 			wait(web) 	
 210 			return doc.body.innerText
 211 		end
 212 	end
 213 	
 214 	def getRID()
 215 		doc = web.document 
 216 		if doc != nil
 217 			wait(web) 	
 218 			r = doc.all('RID')
 219 			r0 = r.item(0)
 220 			return r0.value
 221 		end		
 222 	end
 223 	
 224 	def browse(url)
 225 		web.navigate(url)
 226 		wait(web)
 227 	end
 228 	
 229 	def waiting(refresh)
 230 		text = gettext()
 231 		while ( text =~ /WAITING/ )
 232 			if ( refresh == true )
 233 				web.refresh()
 234 			end
 235 			text = gettext()
 236 			sleep(3)
 237 		end
 238 	end
 239 end
 240 
 241 def testPsiWeb
 242 	manager = PsiBlastQueryManager.new()
 243 	manager.connect(true)
 244 end
 245 
 246 def testPsiQueryInput
 247 	manager = PsiBlastQueryManager.new()
 248 	manager.connect(true)
 249 	query = PsiBlastQuery.new("comment", "input")
 250 	manager.input("QUERY",query)
 251 end
 252 
 253 def testPsiSubmit
 254 	manager = PsiBlastQueryManager.new()
 255 	manager.connect(true)
 256 	query = PsiBlastQuery.new("comment", 
 257 	"MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
 258 	manager.input("QUERY",query)
 259 	manager.submit()
 260 end
 261 
 262 def testPsiGetFormat
 263 	manager = PsiBlastQueryManager.new()
 264 	manager.connect(true)
 265 	query = PsiBlastQuery.new("comment", 
 266 	"MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
 267 	manager.input("QUERY",query)
 268 	manager.submit()
 269 	# f = doc.forms(0)
 270 	# f.METHOD = "get"
 271 	# f.submit()
 272 	#http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?QUERY=MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA&QUERY_FROM=&QUERY_TO=&DATABASE=nr&CDD_SEARCH=on&ENTREZ_QUERY=&ENTREZ_QUERY=All+organisms&COMPOSITION_BASED_STATISTICS=on&EXPECT=10&WORD_SIZE=3&MATRIX_NAME=BLOSUM62&GAPCOSTS=11+1&PSSM=&OTHER_ADVANCED=&PHI_PATTERN=&SHOW_OVERVIEW=on&SHOW_LINKOUT=on&GET_SEQUENCE=on&NCBI_GI=on&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=500&ALIGNMENTS=250&ALIGNMENT_VIEW=Pairwise&RUN_PSIBLAST=on&I_THRESH=0.005&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&LAYOUT=TwoWindows&FORMAT_BLOCK_ON_RESPAGE=None&AUTO_FORMAT=Semiauto&PROGRAM=blastp&CLIENT=web&SERVICE=plain&PAGE=Proteins&CMD=Put
 273 	manager.submit()
 274 end
 275 
 276 def testPsiGetResult
 277 	manager = PsiBlastQueryManager.new()
 278 	manager.connect(false)
 279 	query = PsiBlastQuery.new("comment", 
 280 	"MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA")
 281 	manager.input("QUERY",query)
 282 	manager.submit()
 283 	
 284 	# Format 얻는 방법
 285 	# 1.  자동을 업데이트 될때까지 기다린다.
 286 	#manager.submit()
 287 	#manager.waiting(false)
 288 	
 289 	# Format  얻는 방법
 290 	# 2. 수동으로  refresh  시킨다.
 291 	#rid = "1113539432-3598-127383550938.BLASTQ4"
 292 	rid = manager.getRID()
 293 	#http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=1113540862-28790-25293121580.BLASTQ2&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=Pairwise&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&RID=1113540862-28790-25293121580.BLASTQ2&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off	
 294 	url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" +
 295 		rid +
 296 		"&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+
 297 		"&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off"
 298 	manager.browse( url )
 299 	manager.waiting(true)
 300 	
 301 	# 결과를 얻어온다.
 302 	text = manager.gettext()
 303 	print text
 304 end
 305 
 306 def finder(set, sequence)
 307 	for aResult in set
 308 		if ( aResult.sequence == sequence )
 309 			return false
 310 		end
 311 	end
 312 	return true
 313 end
 314 
 315 def testPurification(filename)
 316 	bAlignments = false
 317 	nEnterCount = 0
 318 	hash = {}
 319 	r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z]+)[' ']+([0-9]+)")
 320 	f = File.open(filename)
 321 	id = ""
 322 	sequence = ""
 323 	f.each do |line|
 324 		#print line
 325 		if ( line.strip == "Alignments" )
 326 			bAlignments = true
 327 		end
 328 		if ( bAlignments == true )
 329 			if ( r =~ line )
 330 				id = r.match(line)[1]
 331 				sequence = r.match(line)[3]
 332 				if ( hash[ id ] == nil )
 333 					result = PsiBlastResult.new( id, sequence )
 334 					hash.store( id, result )
 335 				else
 336 					hash[id].addsequence( sequence )
 337 				end
 338 				nEnterCount = 0
 339 				# print  r.match(line)[0]
 340 				# 1_29407      1   MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY 60
 341 				# print r.match(line)[1]
 342 				# 1_29407
 343 				# print r.match(line)[2]
 344 				# 1
 345 				# print r.match(line)[3]
 346 				# MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY
 347 				# print r.match(line)[4]
 348 				# 60
 349 				
 350 			end
 351 			#print r.match(line)
 352 			#print r.match(line)
 353 			#print r.match(line)
 354 		end
 355 		if ( bAlignments == true && line.strip.empty? )
 356 			nEnterCount = nEnterCount + 1
 357 		end
 358 		if ( nEnterCount >= 2 )
 359 			bAlignments = false
 360 		end
 361 	end
 362 	
 363 	uniqueArray = []
 364 	
 365 	hash.each do |key,value|
 366 		#print ">#{value.id}\n"
 367 		#print "#{value.sequence}\n"
 368 		if ( finder( uniqueArray, sequence ) )
 369 			uniqueArray << value
 370 		end
 371 	end
 372 	
 373 	uniqueArray.each do |aResult|
 374 		print ">#{aResult.id}\n"
 375 		print "#{aResult.sequence}\n"
 376 	end
 377 end
 378 
 379 if ( $*.size >= 1 )
 380 	filename = $*[0]
 381 	visible = $*[1]
 382 	if visible == 'visible'
 383 		visible = true
 384 	else 
 385 		visible = false
 386 	end
 387 
 388 	mode = $*[2]
 389 	
 390 	manager = PsiBlastQueryManager.new(visible)
 391 	manager.openfile( filename )
 392 	manager.getResult()
 393 	manager.printout(mode)
 394 	manager.finalize()
 395 else
 396 	#testPsiWeb()
 397 	#testPsiQueryInput()
 398 	#testPsiSubmit()
 399 	#testPsiGetFormat()
 400 	#testPsiGetResult()     # psi-raw.txt
 401 	#testPurification("psi-raw.txt")    # clw-data.txt
 402 	#testPurification("test.txt")    # clw-data.txt
 403 end

PsiBlastQuery.rb (last edited 2011-08-03 11:00:51 by localhost)

web biohackers.net