#format ruby # psi.rb # # Psi Blast Query # by destine 2005-04-14 # 사용 방법 # ruby psi.rb rhodopsin.txt visible => precise result # ruby psi.rb rhodopsin.txt visible /r => redundant result # ruby psi.rb rhodopsin.txt visible /i => informative result # # 입력 파일 # rhodopsin.txt : FASTA format으로 되어 있습니다. # require 'win32ole' def wait(ie) while ie.busy end until ie.readyState == 4 # READYSTATE_COMPLETE end end class PsiBlastResult attr_reader :id, :sequence def initialize(id, sequence) @id = id @sequence = sequence end def addsequence(sequence) @sequence += sequence end end class PsiBlastQuery attr_reader :comment, :sequence, :rawResult, :blastResults attr_writer :rawResult def initialize(comment, sequence) @comment = comment @sequence = sequence @rawResult = "" @blastResults = [] end def printout(mode) blastResults.each do |aResult| print ">#{aResult.id}\n" print "#{aResult.sequence}\n" end end def process( manager, visible ) manager.connect(visible) #query = PsiBlastQuery.new("comment", #"MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA") manager.input("QUERY",self) manager.submit() # Format 얻는 방법 # 1. 자동을 업데이트 될때까지 기다린다. #manager.submit() #manager.waiting(false) # Format 얻는 방법 # 2. 수동으로 refresh 시킨다. #rid = "1113539432-3598-127383550938.BLASTQ4" rid = manager.getRID() #http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=1113540862-28790-25293121580.BLASTQ2&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=Pairwise&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&RID=1113540862-28790-25293121580.BLASTQ2&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" + rid + "&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+ "&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off" manager.browse( url ) manager.waiting(true) # 결과를 얻어온다. @rawResult = manager.gettext() #print @rawResult purification() end def purification() bAlignments = false nEnterCount = 0 hash = {} r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z\-]+)[' ']+([0-9]+)") id = "" sequence = "" results = rawResult results.each do |line| if ( line.strip == "Alignments" ) bAlignments = true end if ( bAlignments == true ) if ( r =~ line ) id = r.match(line)[1] sequence = r.match(line)[3] if ( hash[ id ] == nil ) result = PsiBlastResult.new( id, sequence ) hash.store( id, result ) else hash[id].addsequence( sequence ) end nEnterCount = 0 # print r.match(line)[0] # 1_29407 1 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY 60 # print r.match(line)[1] # 1_29407 # print r.match(line)[2] # 1 # print r.match(line)[3] # MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY # print r.match(line)[4] # 60 end #print r.match(line) #print r.match(line) #print r.match(line) end if ( bAlignments == true && line.strip.empty? ) nEnterCount = nEnterCount + 1 end if ( nEnterCount >= 2 ) bAlignments = false end end hash.each do |key,value| #print ">#{value.id}\n" #print "#{value.sequence}\n" if ( finder( @blastResults, sequence ) ) @blastResults << value end end end end class PsiBlastQueryManager attr_reader :list, :web def initialize(visible) @visible = visible @list = [] @web = WIN32OLE.new('InternetExplorer.Application') end def add( query ) @list << query end def openfile( filename ) comment = ">no_comment" sequence = "" f = File.open(filename) f.each do |line| if (line =~ />/) != nil if ( sequence.empty? == false ) add( PsiBlastQuery.new( comment, sequence ) ) comment = ">no_comment" sequence = "" end comment = line.chop #delete carriage return elsif line.strip.size > 0 sequence += line.strip end end if ( sequence.empty? == false ) add( PsiBlastQuery.new( comment, sequence ) ) comment = ">no_comment" sequence = "" end end def getResult() list.each do |data| data.process(self, @visible) end end def printout(mode) list.each do |data| print "\n=================#{data.comment}====================\n" data.printout(mode) end end def finalize() web.quit end def connect(visible) web.navigate('http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?CMD=Web&LAYOUT=OneWindow&AUTO_FORMAT=Semiauto&ALIGNMENTS=250&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&CLIENT=web&COMPOSITION_BASED_STATISTICS=on&DATABASE=nr&CDD_SEARCH=on&DESCRIPTIONS=500&ENTREZ_QUERY=%28none%29&EXPECT=10&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&I_THRESH=0.005&MATRIX_NAME=BLOSUM62&NCBI_GI=on&PAGE=Proteins&PROGRAM=blastp&RUN_PSIBLAST=on&SERVICE=plain&SET_DEFAULTS.x=36&SET_DEFAULTS.y=5&SHOW_OVERVIEW=on&END_OF_HTTPGET=Yes&SHOW_LINKOUT=yes&GET_SEQUENCE=yes') wait(web) web.visible =visible end def input(name, query) doc = web.document if doc != nil begin doc.all(name).value = query.sequence wait(web) rescue end end end def submit() doc = web.document if doc != nil doc.forms(0).submit() wait(web) end end def gettext() doc = web.document if doc != nil wait(web) return doc.body.innerText end end def getRID() doc = web.document if doc != nil wait(web) r = doc.all('RID') r0 = r.item(0) return r0.value end end def browse(url) web.navigate(url) wait(web) end def waiting(refresh) text = gettext() while ( text =~ /WAITING/ ) if ( refresh == true ) web.refresh() end text = gettext() sleep(3) end end end def testPsiWeb manager = PsiBlastQueryManager.new() manager.connect(true) end def testPsiQueryInput manager = PsiBlastQueryManager.new() manager.connect(true) query = PsiBlastQuery.new("comment", "input") manager.input("QUERY",query) end def testPsiSubmit manager = PsiBlastQueryManager.new() manager.connect(true) query = PsiBlastQuery.new("comment", "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA") manager.input("QUERY",query) manager.submit() end def testPsiGetFormat manager = PsiBlastQueryManager.new() manager.connect(true) query = PsiBlastQuery.new("comment", "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA") manager.input("QUERY",query) manager.submit() # f = doc.forms(0) # f.METHOD = "get" # f.submit() #http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?QUERY=MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA&QUERY_FROM=&QUERY_TO=&DATABASE=nr&CDD_SEARCH=on&ENTREZ_QUERY=&ENTREZ_QUERY=All+organisms&COMPOSITION_BASED_STATISTICS=on&EXPECT=10&WORD_SIZE=3&MATRIX_NAME=BLOSUM62&GAPCOSTS=11+1&PSSM=&OTHER_ADVANCED=&PHI_PATTERN=&SHOW_OVERVIEW=on&SHOW_LINKOUT=on&GET_SEQUENCE=on&NCBI_GI=on&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=500&ALIGNMENTS=250&ALIGNMENT_VIEW=Pairwise&RUN_PSIBLAST=on&I_THRESH=0.005&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&LAYOUT=TwoWindows&FORMAT_BLOCK_ON_RESPAGE=None&AUTO_FORMAT=Semiauto&PROGRAM=blastp&CLIENT=web&SERVICE=plain&PAGE=Proteins&CMD=Put manager.submit() end def testPsiGetResult manager = PsiBlastQueryManager.new() manager.connect(false) query = PsiBlastQuery.new("comment", "MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA") manager.input("QUERY",query) manager.submit() # Format 얻는 방법 # 1. 자동을 업데이트 될때까지 기다린다. #manager.submit() #manager.waiting(false) # Format 얻는 방법 # 2. 수동으로 refresh 시킨다. #rid = "1113539432-3598-127383550938.BLASTQ4" rid = manager.getRID() #http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=1113540862-28790-25293121580.BLASTQ2&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=Pairwise&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH=&RID=1113540862-28790-25293121580.BLASTQ2&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off url = "http://www.ncbi.nlm.nih.gov/BLAST/Blast.cgi?RID=" + rid + "&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&MASK_CHAR=0&MASK_COLOR=0&DESCRIPTIONS=100&ALIGNMENTS=50&ALIGNMENT_VIEW=QueryAnchoredNoIdentities&I_THRESH=0.001&FORMAT_ENTREZ_QUERY=&FORMAT_ENTREZ_QUERY=All+organisms&EXPECT_LOW=&EXPECT_HIGH="+ "&RTOE=13&CLIENT=web&FORMAT_OBJECT=Alignment&CMD=Get&PAGE=Proteins&_PGR=0&PID=28790&LAYOUT=OneWindow&FORMAT_BLOCK_ON_RESPAGE=None&STEP_NUMBER=1&EXPECT=10&HITLIST_SIZE=100&DESCRIPTIONS=100&ALIGNMENTS=50&AUTO_FORMAT=Off" manager.browse( url ) manager.waiting(true) # 결과를 얻어온다. text = manager.gettext() print text end def finder(set, sequence) for aResult in set if ( aResult.sequence == sequence ) return false end end return true end def testPurification(filename) bAlignments = false nEnterCount = 0 hash = {} r = Regexp.new("([_0-9A-Z]+)[' ']+([0-9]+)[' ']+([A-Z]+)[' ']+([0-9]+)") f = File.open(filename) id = "" sequence = "" f.each do |line| #print line if ( line.strip == "Alignments" ) bAlignments = true end if ( bAlignments == true ) if ( r =~ line ) id = r.match(line)[1] sequence = r.match(line)[3] if ( hash[ id ] == nil ) result = PsiBlastResult.new( id, sequence ) hash.store( id, result ) else hash[id].addsequence( sequence ) end nEnterCount = 0 # print r.match(line)[0] # 1_29407 1 MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY 60 # print r.match(line)[1] # 1_29407 # print r.match(line)[2] # 1 # print r.match(line)[3] # MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLY # print r.match(line)[4] # 60 end #print r.match(line) #print r.match(line) #print r.match(line) end if ( bAlignments == true && line.strip.empty? ) nEnterCount = nEnterCount + 1 end if ( nEnterCount >= 2 ) bAlignments = false end end uniqueArray = [] hash.each do |key,value| #print ">#{value.id}\n" #print "#{value.sequence}\n" if ( finder( uniqueArray, sequence ) ) uniqueArray << value end end uniqueArray.each do |aResult| print ">#{aResult.id}\n" print "#{aResult.sequence}\n" end end if ( $*.size >= 1 ) filename = $*[0] visible = $*[1] if visible == 'visible' visible = true else visible = false end mode = $*[2] manager = PsiBlastQueryManager.new(visible) manager.openfile( filename ) manager.getResult() manager.printout(mode) manager.finalize() else #testPsiWeb() #testPsiQueryInput() #testPsiSubmit() #testPsiGetFormat() #testPsiGetResult() # psi-raw.txt #testPurification("psi-raw.txt") # clw-data.txt #testPurification("test.txt") # clw-data.txt end