1 """DrPepper's SPSS output parsing script by yong27
2 """
3 import sys,unittest
4
5 mapOfRecords={}
6
7 class Record:
8 def __init__(self):
9 self.name=''
10 self.dv=''
11 self.dvs={}
12 def setName(self, aName):
13 self.name=aName
14 def getName(self):
15 return self.name
16 def __str__(self):
17 return self.name +'\n'+ self.reprDict(self.dvs)
18 def reprDict(self, aDict):
19 dvnames = []
20 ee=[['']*len(aDict),['']*len(aDict),['']*len(aDict)]; i=0
21 for dvname, edict in aDict.items():
22 dvnames.append(dvname)
23 for eitem, prf in edict.items():
24 ee[int(eitem[1])-1][i] = prf
25 i+=1
26 r= ' \t' + '\t'.join(dvnames) + '\n'
27 i=1
28 for e in ee:
29 r+='E'+str(i)+'\t'+ '\t'.join(e) + '\n'
30 i+=1
31 return r
32
33 def setDvs(self, aDvName):
34 self.dv=aDvName
35 if aDvName not in self.dvs.keys():
36 self.dvs[aDvName]={}
37 def setE(self, aDvName, aE, aPrF):
38 self.dvs[aDvName][aE]=aPrF
39 def add(self, aRecord):
40 if aRecord.dv in self.dvs.keys():
41 e,prf=aRecord.dvs[aRecord.dv].items()
42 self.dvs[aRecord.dv][e]=prf
43 else:
44 self.dvs[aRecord.dv]=aRecord.dvs[aRecord.dv]
45
46 class Parser:
47 def __init__(self, aFileName):
48 self.file = open(aFileName, 'r')
49 self._saved = []
50 def next(self):
51 lines = []
52 while 1:
53 line = self.file.readline()
54 if not line:
55 break
56 line=line.strip()
57 if not line:
58 line=' '
59 elif lines and line[0]=='@':
60 self.saveLine(line)
61 break
62 lines.append(line)
63
64 if not lines:
65 return None
66
67 record = Record()
68 record = self.setName(lines, record)
69 for line in lines:
70 if line[:3]=='Dep':
71 dv=line.split()[2]
72 record.setDvs(dv)
73 if line[:2] in ('E1','E2','E3') and len(line.split()) == 6:
74 record.setE(dv, line.split()[0], line.split()[-1])
75
76 return record
77
78 def setName(self, aLines, aRecord):
79 if aLines[0][0] == '@':
80 aRecord.setName(self.reprName(aLines[0]))
81 else:
82 aRecord.setName(self.reprName(self._saved[0]))
83 self._saved = self._saved[1:]
84 return aRecord
85
86 def reprName(self, aLine):
87 return aLine[:-6].strip()
88 def saveLine(self, aLine):
89 self._saved += [aLine]
90
91
92 def main(inFileName, outFileName):
93 par=Parser(inFileName)
94 while 1:
95 curRecord = par.next()
96 if curRecord is None:
97 break
98 recName = curRecord.getName()
99 if recName in mapOfRecords.keys():
100 mapOfRecords[recName].add(curRecord)
101 pass
102 else:
103 mapOfRecords[recName]=curRecord
104
105 outFile = open(outFileName,'w')
106 for record in mapOfRecords.values():
107 outFile.write(str(record)+'\n')
108
109 class ParserTest(unittest.TestCase):
110 def testNameParsing(self):
111 p=Parser('input.txt')
112 self.assertEquals(
113 '@ BA+NC, E1*Bl eosinophil%, Comparison of Means @',
114 p.next().getName())
115 self.assertEquals(
116 '@ BA+NC, E1*Bl eosinophil%, Comparison of Means @',
117 p.next().getName())
118 self.assertEquals(
119 '@ BA, E1*Bl eosinophil%, Comparison of Means @',
120 p.next().getName())
121 def testOverallParsing(self):
122 p=Parser('input.txt')
123 i = 0
124 while 1:
125 curRecord = p.next()
126 if curRecord is None:
127 break
128 i+=1
129 self.assertEquals(18,i)
130
131
132 if __name__=='__main__':
133
134 main('input.txt','output.txt')