# udump.py -- display the characters in a Unicode-encoded text file # Jere Kapyaho (cone at iki dot fi) 2006-03-24 import sys import codecs import unicodedata inputFile = codecs.open(sys.argv[1], "r", "utf-8") fileData = inputFile.read() inputFile.close() print "Using Unicode %s data" % unicodedata.unidata_version print "Read %d characters" % len(fileData) offset = 0 for ch in fileData: utf8ch = ch.encode("utf-8") print "%08X: U+%06X %s" % (offset, ord(ch), unicodedata.name(ch, "(unnamed character)")) offset = offset + len(utf8ch)