diff --git a/parse.py b/parse.py index 2ed8f1c2..d5087fb8 100755 --- a/parse.py +++ b/parse.py @@ -9,7 +9,6 @@ from optparse import OptionParser from html5lib import html5parser -from html5lib.tokenizer import HTMLTokenizer from html5lib import treebuilders, serializer, treewalkers from html5lib import constants from html5lib import utils @@ -53,9 +52,7 @@ def parse(): treebuilder = treebuilders.getTreeBuilder(opts.treebuilder) - tokenizer = HTMLTokenizer - - p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer, debug=opts.log) + p = html5parser.HTMLParser(tree=treebuilder, debug=opts.log) if opts.fragment: parseMethod = p.parseFragment @@ -96,7 +93,7 @@ def parse(): def run(parseMethod, f, encoding, scripting): try: - document = parseMethod(f, encoding=encoding, scripting=scripting) + document = parseMethod(f, override_encoding=encoding, scripting=scripting) except: document = None traceback.print_exc() @@ -117,16 +114,14 @@ def printOutput(parser, document, opts): document.writexml(sys.stdout, encoding="utf-8") elif tb == "lxml": import lxml.etree - sys.stdout.write(lxml.etree.tostring(document)) + sys.stdout.write(lxml.etree.tostring(document, encoding="unicode")) elif tb == "etree": - sys.stdout.write(utils.default_etree.tostring(document)) + sys.stdout.write(utils.default_etree.tostring(document, encoding="unicode")) elif opts.tree: if not hasattr(document, '__getitem__'): document = [document] for fragment in document: print(parser.tree.testSerializer(fragment)) - elif opts.hilite: - sys.stdout.write(document.hilite("utf-8")) elif opts.html: kwargs = {} for opt in serializer.HTMLSerializer.options: @@ -188,9 +183,6 @@ def getOptParser(): parser.add_option("", "--no-html", action="store_false", default=True, dest="html", help="Don't output html") - parser.add_option("", "--hilite", action="store_true", default=False, - dest="hilite", help="Output as formatted highlighted code.") - parser.add_option("-c", "--encoding", action="store_true", default=False, dest="encoding", help="Print character encoding used")