Toggle line numbers
   1 """Encoding converter by yong27, 2005-06-09
   2 
   3 UnitTest have to be done in UTF-8 environment
   4 """
   5 
   6 import unittest, os, sys
   7 
   8 class EcConverter:
   9     def __init__(self, from_, to_):
  10         self.fromEnc = from_
  11         self.toEnc = to_
  12 
  13     def convert(self, aStr):
  14         return unicode(aStr, self.fromEnc).encode(self.toEnc)
  15 
  16     def convertPathName(self, aPath):
  17         for root, dirs, files in os.walk(aPath):
  18             cvt_root = self.convert(root)
  19             root != cvt_root and os.renames(root, cvt_root)
  20             for fname in files:
  21                 cvt_fname = self.convert(fname)
  22                 fname = os.path.join(cvt_root, fname)
  23                 cvt_fname = os.path.join(cvt_root, cvt_fname)
  24                 fname != cvt_fname and os.rename(fname, cvt_fname)
  25 
  26     def convertPathNames(self, aPaths):
  27         for path in aPaths:
  28             self.convertPathName(path)
  29 
  30     def convertFile(self, fname):
  31         ifile = file(fname)
  32         try:
  33             converted = self.convert(ifile.read())
  34         except UnicodeDecodeError, e:
  35             sys.stderr.write("%s failed : %s\n"%(fname,e))
  36             return
  37 
  38         temp_fname = fname+'_'
  39         ofile = file(temp_fname,'w')
  40         ofile.write(converted)
  41         ifile.close()
  42         ofile.close()
  43         os.rename(temp_fname, fname)
  44 
  45     def convertFiles(self, fnames):
  46         for fname in fnames:
  47             self.convertFile(fname)
  48 
  49 
  50 class EcConverterTest(unittest.TestCase):
  51     def setUp(self):
  52         self.ec1 = EcConverter('utf-8','euc-kr')
  53         self.ec2 = EcConverter('euc-kr','utf-8')
  54         os.mkdir('ectest')
  55         tdir = os.path.join('ectest','디렉토리')
  56         os.mkdir(tdir)
  57         file(os.path.join(tdir, '파일'), 'w').write('파일내용')
  58     def tearDown(self):
  59         os.system('rm -rf ectest')
  60 
  61     def testConvertString(self):
  62         self.assertEquals('\xc5\xd7\xbd\xba\xc6\xae',
  63                 self.ec1.convert('테스트'))
  64         self.assertEquals('테스트',
  65                 self.ec2.convert('\xc5\xd7\xbd\xba\xc6\xae'))
  66 
  67     def testConvertPathName(self):
  68         self.ec1.convertPathName('ectest')
  69         htest = os.listdir('ectest')[0]
  70         self.assertEquals(self.ec1.convert('디렉토리'), htest)
  71         hfile = os.listdir(os.path.join('ectest',htest))[0]
  72         self.assertEquals(self.ec1.convert('파일'), hfile)
  73 
  74         self.ec2.convertPathName('ectest')
  75         htest = os.listdir('ectest')[0]
  76         self.assertEquals('디렉토리', htest)
  77         hfile = os.listdir(os.path.join('ectest',htest))[0]
  78         self.assertEquals('파일', hfile)
  79 
  80     def testConvertFile(self):
  81         ifileName = os.path.join('ectest', '디렉토리','파일')
  82         self.ec1.convertFile(ifileName)
  83         self.assertEquals(self.ec1.convert('파일내용'), file(ifileName).read())
  84 
  85 def main():
  86     import optparse
  87     usage = "%prog [options] arg1 arg2..."
  88     op = optparse.OptionParser(usage, version="%prog 0.1")
  89     op.add_option("-u", "--unittest", action="store_true", dest="test",
  90             default=False, help="doing unittest")
  91     op.add_option("-p", "--path-names", action="store_true", dest="isPath",
  92             default=False, help="is path names converting recursively")
  93     op.add_option("-f", "--from", dest="encfrom", default="euc-kr",
  94             help="encoding of original directory (default:euc-kr)")
  95     op.add_option("-t", "--to", dest="encto", default="utf-8",
  96             help="expected encoding to convert (default:uft-8)")
  97     options, args = op.parse_args()
  98     if options.test:
  99         unittest.TextTestRunner().run(unittest.main(argv=('','-v')))
 100     if not args:
 101         op.error("insert argument files or directories")
 102     ec = EcConverter(options.encfrom, options.encto)
 103     if options.isPath:
 104         ec.convertPathNames(args)
 105     else:
 106         ec.convertFiles(args)
 107 
 108 if __name__=='__main__':
 109     main()
web biohackers.net