#format python """DrPepper's SPSS output parsing script by yong27 """ import sys,unittest mapOfRecords={} # record name : record object class Record: def __init__(self): self.name='' self.dv='' self.dvs={} def setName(self, aName): self.name=aName def getName(self): return self.name def __str__(self): return self.name +'\n'+ self.reprDict(self.dvs) def reprDict(self, aDict): dvnames = [] ee=[['']*len(aDict),['']*len(aDict),['']*len(aDict)]; i=0 for dvname, edict in aDict.items(): dvnames.append(dvname) for eitem, prf in edict.items(): ee[int(eitem[1])-1][i] = prf i+=1 r= ' \t' + '\t'.join(dvnames) + '\n' i=1 for e in ee: r+='E'+str(i)+'\t'+ '\t'.join(e) + '\n' i+=1 return r def setDvs(self, aDvName): self.dv=aDvName if aDvName not in self.dvs.keys(): self.dvs[aDvName]={} def setE(self, aDvName, aE, aPrF): self.dvs[aDvName][aE]=aPrF def add(self, aRecord): if aRecord.dv in self.dvs.keys(): e,prf=aRecord.dvs[aRecord.dv].items() self.dvs[aRecord.dv][e]=prf else: self.dvs[aRecord.dv]=aRecord.dvs[aRecord.dv] class Parser: def __init__(self, aFileName): self.file = open(aFileName, 'r') self._saved = [] def next(self): lines = [] while 1: line = self.file.readline() if not line: # at the end of the file break line=line.strip() if not line: line=' ' elif lines and line[0]=='@': # at next record self.saveLine(line) break lines.append(line) if not lines: return None record = Record() record = self.setName(lines, record) for line in lines: if line[:3]=='Dep': dv=line.split()[2] record.setDvs(dv) if line[:2] in ('E1','E2','E3') and len(line.split()) == 6: record.setE(dv, line.split()[0], line.split()[-1]) return record def setName(self, aLines, aRecord): if aLines[0][0] == '@': aRecord.setName(self.reprName(aLines[0])) else: aRecord.setName(self.reprName(self._saved[0])) self._saved = self._saved[1:] return aRecord def reprName(self, aLine): return aLine[:-6].strip() def saveLine(self, aLine): self._saved += [aLine] def main(inFileName, outFileName): par=Parser(inFileName) while 1: curRecord = par.next() if curRecord is None: break recName = curRecord.getName() if recName in mapOfRecords.keys(): mapOfRecords[recName].add(curRecord) pass else: mapOfRecords[recName]=curRecord outFile = open(outFileName,'w') for record in mapOfRecords.values(): outFile.write(str(record)+'\n') class ParserTest(unittest.TestCase): def testNameParsing(self): p=Parser('input.txt') self.assertEquals( '@ BA+NC, E1*Bl eosinophil%, Comparison of Means @', p.next().getName()) self.assertEquals( '@ BA+NC, E1*Bl eosinophil%, Comparison of Means @', p.next().getName()) self.assertEquals( '@ BA, E1*Bl eosinophil%, Comparison of Means @', p.next().getName()) def testOverallParsing(self): p=Parser('input.txt') i = 0 while 1: curRecord = p.next() if curRecord is None: break i+=1 self.assertEquals(18,i) # 18 records in input.txt if __name__=='__main__': #unittest.main(argv=('','-v')) main('input.txt','output.txt')