Skip to content

Commit

Permalink
Merge pull request #270 from gsnedders/rename_stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
gsnedders committed Jul 14, 2016
2 parents 945911b + 8cb144b commit a8ba43e
Show file tree
Hide file tree
Showing 40 changed files with 219 additions and 275 deletions.
14 changes: 14 additions & 0 deletions CHANGES.rst
Expand Up @@ -50,6 +50,20 @@ Released on XXX
with a set of keyword arguments: override_encoding, transport_encoding,
same_origin_parent_encoding, likely_encoding, and default_encoding.**

* **Move filters._base, treebuilder._base, and treewalkers._base to .base
to clarify their status as public.**

* **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
sanitizer.htmlsanitizer module and move that to saniziter. This means
anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
code changes.**

* **Rename treewalkers.lxmletree to .etree_lxml and
treewalkers.genshistream to .genshi to have a consistent API.**

* Move a whole load of stuff (inputstream, ihatexml, trie, tokenizer,
utils) to be underscore prefixed to clarify their status as private.


0.9999999/1.0b8
~~~~~~~~~~~~~~~
Expand Down
4 changes: 2 additions & 2 deletions doc/html5lib.filters.rst
@@ -1,10 +1,10 @@
filters Package
===============

:mod:`_base` Module
:mod:`base` Module
-------------------

.. automodule:: html5lib.filters._base
.. automodule:: html5lib.filters.base
:members:
:undoc-members:
:show-inheritance:
Expand Down
37 changes: 2 additions & 35 deletions doc/html5lib.rst
Expand Up @@ -25,42 +25,10 @@ html5lib Package
:undoc-members:
:show-inheritance:

:mod:`ihatexml` Module
:mod:`serializer` Module
----------------------

.. automodule:: html5lib.ihatexml
:members:
:undoc-members:
:show-inheritance:

:mod:`inputstream` Module
-------------------------

.. automodule:: html5lib.inputstream
:members:
:undoc-members:
:show-inheritance:

:mod:`sanitizer` Module
-----------------------

.. automodule:: html5lib.sanitizer
:members:
:undoc-members:
:show-inheritance:

:mod:`tokenizer` Module
-----------------------

.. automodule:: html5lib.tokenizer
:members:
:undoc-members:
:show-inheritance:

:mod:`utils` Module
-------------------

.. automodule:: html5lib.utils
.. automodule:: html5lib.serializer
:members:
:undoc-members:
:show-inheritance:
Expand All @@ -71,7 +39,6 @@ Subpackages
.. toctree::

html5lib.filters
html5lib.serializer
html5lib.treebuilders
html5lib.treewalkers

19 changes: 0 additions & 19 deletions doc/html5lib.serializer.rst

This file was deleted.

4 changes: 2 additions & 2 deletions doc/html5lib.treebuilders.rst
Expand Up @@ -9,10 +9,10 @@ treebuilders Package
:undoc-members:
:show-inheritance:

:mod:`_base` Module
:mod:`base` Module
-------------------

.. automodule:: html5lib.treebuilders._base
.. automodule:: html5lib.treebuilders.base
:members:
:undoc-members:
:show-inheritance:
Expand Down
19 changes: 10 additions & 9 deletions doc/html5lib.treewalkers.rst
Expand Up @@ -9,10 +9,10 @@ treewalkers Package
:undoc-members:
:show-inheritance:

:mod:`_base` Module
:mod:`base` Module
-------------------

.. automodule:: html5lib.treewalkers._base
.. automodule:: html5lib.treewalkers.base
:members:
:undoc-members:
:show-inheritance:
Expand All @@ -33,18 +33,19 @@ treewalkers Package
:undoc-members:
:show-inheritance:

:mod:`genshistream` Module
--------------------------
:mod:`etree_lxml` Module
-----------------------

.. automodule:: html5lib.treewalkers.genshistream
.. automodule:: html5lib.treewalkers.etree_lxml
:members:
:undoc-members:
:show-inheritance:

:mod:`lxmletree` Module
-----------------------

.. automodule:: html5lib.treewalkers.lxmletree
:mod:`genshi` Module
--------------------------

.. automodule:: html5lib.treewalkers.genshi
:members:
:undoc-members:
:show-inheritance:
:show-inheritance:
File renamed without changes.
10 changes: 5 additions & 5 deletions html5lib/inputstream.py → html5lib/_inputstream.py
Expand Up @@ -10,7 +10,7 @@

from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
from .constants import ReparseException
from . import utils
from . import _utils

from io import StringIO

Expand All @@ -28,7 +28,7 @@

invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa

if utils.supports_lone_surrogates:
if _utils.supports_lone_surrogates:
# Use one extra step of indirection and create surrogates with
# eval. Not using this indirection would introduce an illegal
# unicode literal on platforms not supporting such lone
Expand Down Expand Up @@ -176,7 +176,7 @@ def __init__(self, source):
"""

if not utils.supports_lone_surrogates:
if not _utils.supports_lone_surrogates:
# Such platforms will have already checked for such
# surrogate errors, so no need to do this checking.
self.reportCharacterErrors = None
Expand Down Expand Up @@ -304,9 +304,9 @@ def characterErrorsUCS2(self, data):
codepoint = ord(match.group())
pos = match.start()
# Pretty sure there should be endianness issues here
if utils.isSurrogatePair(data[pos:pos + 2]):
if _utils.isSurrogatePair(data[pos:pos + 2]):
# We have a surrogate pair!
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
if char_val in non_bmp_invalid_codepoints:
self.errors.append("invalid-codepoint")
skip = True
Expand Down
4 changes: 2 additions & 2 deletions html5lib/tokenizer.py → html5lib/_tokenizer.py
Expand Up @@ -11,9 +11,9 @@
from .constants import tokenTypes, tagTokenTypes
from .constants import replacementCharacters

from .inputstream import HTMLInputStream
from ._inputstream import HTMLInputStream

from .trie import Trie
from ._trie import Trie

entitiesTrie = Trie(entities)

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions html5lib/filters/alphabeticalattributes.py
@@ -1,16 +1,16 @@
from __future__ import absolute_import, division, unicode_literals

from . import _base
from . import base

try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict


class Filter(_base.Filter):
class Filter(base.Filter):
def __iter__(self):
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
attrs = OrderedDict()
for name, value in sorted(token["data"].items(),
Expand Down
File renamed without changes.
8 changes: 4 additions & 4 deletions html5lib/filters/inject_meta_charset.py
@@ -1,19 +1,19 @@
from __future__ import absolute_import, division, unicode_literals

from . import _base
from . import base


class Filter(_base.Filter):
class Filter(base.Filter):
def __init__(self, source, encoding):
_base.Filter.__init__(self, source)
base.Filter.__init__(self, source)
self.encoding = encoding

def __iter__(self):
state = "pre_head"
meta_found = (self.encoding is None)
pending = []

for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag":
if token["name"].lower() == "head":
Expand Down
6 changes: 3 additions & 3 deletions html5lib/filters/lint.py
Expand Up @@ -2,21 +2,21 @@

from six import text_type

from . import _base
from . import base
from ..constants import namespaces, voidElements

from ..constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)


class Filter(_base.Filter):
class Filter(base.Filter):
def __init__(self, source, require_matching_tags=True):
super(Filter, self).__init__(source)
self.require_matching_tags = require_matching_tags

def __iter__(self):
open_elements = []
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
namespace = token["namespace"]
Expand Down
4 changes: 2 additions & 2 deletions html5lib/filters/optionaltags.py
@@ -1,9 +1,9 @@
from __future__ import absolute_import, division, unicode_literals

from . import _base
from . import base


class Filter(_base.Filter):
class Filter(base.Filter):
def slider(self):
previous1 = previous2 = None
for token in self.source:
Expand Down
28 changes: 10 additions & 18 deletions html5lib/filters/sanitizer.py
Expand Up @@ -5,13 +5,13 @@

from six.moves import urllib_parse as urlparse

from . import _base
from . import base
from ..constants import namespaces, prefixes

__all__ = ["Filter"]


acceptable_elements = frozenset((
allowed_elements = frozenset((
(namespaces['html'], 'a'),
(namespaces['html'], 'abbr'),
(namespaces['html'], 'acronym'),
Expand Down Expand Up @@ -175,7 +175,7 @@
(namespaces['svg'], 'use'),
))

acceptable_attributes = frozenset((
allowed_attributes = frozenset((
# HTML attributes
(None, 'abbr'),
(None, 'accept'),
Expand Down Expand Up @@ -552,7 +552,7 @@
(None, 'use')
))

acceptable_css_properties = frozenset((
allowed_css_properties = frozenset((
'azimuth',
'background-color',
'border-bottom-color',
Expand Down Expand Up @@ -601,7 +601,7 @@
'width',
))

acceptable_css_keywords = frozenset((
allowed_css_keywords = frozenset((
'auto',
'aqua',
'black',
Expand Down Expand Up @@ -643,7 +643,7 @@
'yellow',
))

acceptable_svg_properties = frozenset((
allowed_svg_properties = frozenset((
'fill',
'fill-opacity',
'fill-rule',
Expand All @@ -654,7 +654,7 @@
'stroke-opacity',
))

acceptable_protocols = frozenset((
allowed_protocols = frozenset((
'ed2k',
'ftp',
'http',
Expand All @@ -680,7 +680,7 @@
'data',
))

acceptable_content_types = frozenset((
allowed_content_types = frozenset((
'image/png',
'image/jpeg',
'image/gif',
Expand All @@ -689,14 +689,6 @@
'text/plain',
))

allowed_elements = acceptable_elements
allowed_attributes = acceptable_attributes
allowed_css_properties = acceptable_css_properties
allowed_css_keywords = acceptable_css_keywords
allowed_svg_properties = acceptable_svg_properties
allowed_protocols = acceptable_protocols
allowed_content_types = acceptable_content_types


data_content_type = re.compile(r'''
^
Expand All @@ -712,7 +704,7 @@
re.VERBOSE)


class Filter(_base.Filter):
class Filter(base.Filter):
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
def __init__(self,
source,
Expand All @@ -739,7 +731,7 @@ def __init__(self,
self.svg_allow_local_href = svg_allow_local_href

def __iter__(self):
for token in _base.Filter.__iter__(self):
for token in base.Filter.__iter__(self):
token = self.sanitize_token(token)
if token:
yield token
Expand Down

0 comments on commit a8ba43e

Please sign in to comment.