1 """DrPepper's SPSS output parsing script by yong27
   2 """
   3 import sys,unittest
   4 
   5 mapOfRecords={} # record name : record object
   6 
   7 class Record:
   8     def __init__(self):
   9         self.name=''
  10         self.dv=''
  11         self.dvs={}
  12     def setName(self, aName):
  13         self.name=aName
  14     def getName(self):
  15         return self.name
  16     def __str__(self):
  17         return self.name +'\n'+ self.reprDict(self.dvs)
  18     def reprDict(self, aDict):
  19         dvnames = []
  20         ee=[['']*len(aDict),['']*len(aDict),['']*len(aDict)]; i=0
  21         for dvname, edict in aDict.items():
  22             dvnames.append(dvname)
  23             for eitem, prf in edict.items():
  24                 ee[int(eitem[1])-1][i] = prf
  25             i+=1
  26         r= ' \t' + '\t'.join(dvnames) + '\n'
  27         i=1
  28         for e in ee:
  29             r+='E'+str(i)+'\t'+ '\t'.join(e) + '\n'
  30             i+=1
  31         return r
  32 
  33     def setDvs(self, aDvName):
  34         self.dv=aDvName
  35         if aDvName not in self.dvs.keys():
  36             self.dvs[aDvName]={}
  37     def setE(self, aDvName, aE, aPrF):
  38         self.dvs[aDvName][aE]=aPrF
  39     def add(self, aRecord):
  40         if aRecord.dv in self.dvs.keys():
  41             e,prf=aRecord.dvs[aRecord.dv].items()
  42             self.dvs[aRecord.dv][e]=prf
  43         else:
  44             self.dvs[aRecord.dv]=aRecord.dvs[aRecord.dv]
  45 
  46 class Parser:
  47     def __init__(self, aFileName):
  48         self.file = open(aFileName, 'r')
  49         self._saved = []
  50     def next(self):
  51         lines = []
  52         while 1:
  53             line = self.file.readline()
  54             if not line:  # at the end of the file
  55                 break
  56             line=line.strip()
  57             if not line:
  58                 line=' '
  59             elif lines and line[0]=='@': # at next record
  60                 self.saveLine(line)
  61                 break
  62             lines.append(line)
  63         
  64         if not lines:
  65             return None
  66 
  67         record = Record()
  68         record = self.setName(lines, record)
  69         for line in lines:
  70             if line[:3]=='Dep':
  71                 dv=line.split()[2]
  72                 record.setDvs(dv)
  73             if line[:2] in ('E1','E2','E3') and len(line.split()) == 6:
  74                 record.setE(dv, line.split()[0], line.split()[-1])
  75         
  76         return record
  77 
  78     def setName(self, aLines, aRecord):
  79         if aLines[0][0] == '@':
  80             aRecord.setName(self.reprName(aLines[0]))
  81         else:
  82             aRecord.setName(self.reprName(self._saved[0]))
  83             self._saved = self._saved[1:]
  84         return aRecord
  85 
  86     def reprName(self, aLine):
  87         return aLine[:-6].strip()
  88     def saveLine(self, aLine):
  89         self._saved += [aLine]
  90         
  91 
  92 def main(inFileName, outFileName):
  93     par=Parser(inFileName)
  94     while 1:
  95         curRecord = par.next()
  96         if curRecord is None:
  97             break
  98         recName = curRecord.getName()
  99         if recName in mapOfRecords.keys():
 100             mapOfRecords[recName].add(curRecord)
 101             pass
 102         else:
 103             mapOfRecords[recName]=curRecord
 104 
 105     outFile = open(outFileName,'w')
 106     for record in mapOfRecords.values():
 107         outFile.write(str(record)+'\n')
 108 
 109 class ParserTest(unittest.TestCase):
 110     def testNameParsing(self):
 111         p=Parser('input.txt')
 112         self.assertEquals(
 113             '@ BA+NC, E1*Bl eosinophil%, Comparison of Means @',
 114             p.next().getName())
 115         self.assertEquals(
 116             '@ BA+NC, E1*Bl eosinophil%, Comparison of Means @',
 117             p.next().getName())
 118         self.assertEquals(
 119             '@ BA, E1*Bl eosinophil%, Comparison of Means @',
 120             p.next().getName())
 121     def testOverallParsing(self):
 122         p=Parser('input.txt')
 123         i = 0
 124         while 1:
 125             curRecord = p.next()
 126             if curRecord is None:
 127                 break
 128             i+=1
 129         self.assertEquals(18,i) # 18 records in input.txt
 130 
 131 
 132 if __name__=='__main__':
 133     #unittest.main(argv=('','-v'))
 134     main('input.txt','output.txt')
web biohackers.net