pyquery/pyquery.py

# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
#
# Distributed under the BSD license, see LICENSE.txt
from .cssselectpatch import JQueryTranslator
from collections import OrderedDict
from urllib.parse import urlencode
from urllib.parse import urljoin
from .openers import url_opener
from .text import extract_text
from copy import deepcopy
from html import escape
from lxml import etree
import lxml.html
import inspect
import itertools
import types

basestring = (str, bytes)


def getargspec(func):
    args = inspect.signature(func).parameters.values()
    return [p.name for p in args
            if p.kind == p.POSITIONAL_OR_KEYWORD]


def with_camel_case_alias(func):
    """decorator for methods who required a camelcase alias"""
    _camel_case_aliases.add(func.__name__)
    return func


_camel_case_aliases = set()


def build_camel_case_aliases(PyQuery):
    """add camelcase aliases to PyQuery"""
    for alias in _camel_case_aliases:
        parts = list(alias.split('_'))
        name = parts[0] + ''.join([p.title() for p in parts[1:]])
        func = getattr(PyQuery, alias)
        f = types.FunctionType(func.__code__, func.__globals__,
                               name, func.__defaults__)
        f.__doc__ = (
            'Alias for :func:`~pyquery.pyquery.PyQuery.%s`') % func.__name__
        setattr(PyQuery, name, f.__get__(None, PyQuery))


def fromstring(context, parser=None, custom_parser=None):
    """use html parser if we don't have clean xml
    """
    if hasattr(context, 'read') and hasattr(context.read, '__call__'):
        meth = 'parse'
    else:
        meth = 'fromstring'
    if custom_parser is None:
        if parser is None:
            try:
                result = getattr(etree, meth)(context)
            except etree.XMLSyntaxError:
                if hasattr(context, 'seek'):
                    context.seek(0)
                result = getattr(lxml.html, meth)(context)
            if isinstance(result, etree._ElementTree):
                return [result.getroot()]
            else:
                return [result]
        elif parser == 'xml':
            custom_parser = getattr(etree, meth)
        elif parser == 'html':
            custom_parser = getattr(lxml.html, meth)
        elif parser == 'html5':
            from lxml.html import html5parser
            custom_parser = getattr(html5parser, meth)
        elif parser == 'soup':
            from lxml.html import soupparser
            custom_parser = getattr(soupparser, meth)
        elif parser == 'html_fragments':
            custom_parser = lxml.html.fragments_fromstring
        else:
            raise ValueError('No such parser: "%s"' % parser)

    result = custom_parser(context)
    if type(result) is list:
        return result
    elif isinstance(result, etree._ElementTree):
        return [result.getroot()]
    elif result is not None:
        return [result]
    else:
        return []


def callback(func, *args):
    return func(*args[:func.__code__.co_argcount])


class NoDefault(object):
    def __repr__(self):
        """clean representation in Sphinx"""
        return '<NoDefault>'


no_default = NoDefault()
del NoDefault


class FlexibleElement(object):
    """property to allow a flexible api"""
    def __init__(self, pget, pset=no_default, pdel=no_default):
        self.pget = pget
        self.pset = pset
        self.pdel = pdel

    def __get__(self, instance, klass):
        class _element(object):
            """real element to support set/get/del attr and item and js call
            style"""
            def __call__(prop, *args, **kwargs):
                return self.pget(instance, *args, **kwargs)
            __getattr__ = __getitem__ = __setattr__ = __setitem__ = __call__

            def __delitem__(prop, name):
                if self.pdel is not no_default:
                    return self.pdel(instance, name)
                else:
                    raise NotImplementedError()
            __delattr__ = __delitem__

            def __repr__(prop):
                return '<flexible_element %s>' % self.pget.__name__
        return _element()

    def __set__(self, instance, value):
        if self.pset is not no_default:
            self.pset(instance, value)
        else:
            raise NotImplementedError()


class PyQuery(list):
    """The main class
    """

    _translator_class = JQueryTranslator

    def __init__(self, *args, **kwargs):
        html = None
        elements = []
        self._base_url = None
        self.parser = kwargs.pop('parser', None)

        if 'parent' in kwargs:
            self._parent = kwargs.pop('parent')
        else:
            self._parent = no_default

        if 'css_translator' in kwargs:
            self._translator = kwargs.pop('css_translator')
        elif self.parser in ('xml',):
            self._translator = self._translator_class(xhtml=True)
        elif self._parent is not no_default:
            self._translator = self._parent._translator
        else:
            self._translator = self._translator_class(xhtml=False)

        self.namespaces = kwargs.pop('namespaces', None)

        if kwargs:
            # specific case to get the dom
            if 'filename' in kwargs:
                html = open(kwargs['filename'],
                            encoding=kwargs.get('encoding'))
            elif 'url' in kwargs:
                url = kwargs.pop('url')
                if 'opener' in kwargs:
                    opener = kwargs.pop('opener')
                    html = opener(url, **kwargs)
                else:
                    html = url_opener(url, kwargs)
                if not self.parser:
                    self.parser = 'html'
                self._base_url = url
            else:
                raise ValueError('Invalid keyword arguments %s' % kwargs)

            elements = fromstring(html, self.parser)
            # close open descriptor if possible
            if hasattr(html, 'close'):
                try:
                    html.close()
                except Exception:
                    pass

        else:
            # get nodes

            # determine context and selector if any
            selector = context = no_default
            length = len(args)
            if length == 1:
                context = args[0]
            elif length == 2:
                selector, context = args
            else:
                raise ValueError(
                    "You can't do that. Please, provide arguments")

            # get context
            if isinstance(context, basestring):
                try:
                    elements = fromstring(context, self.parser)
                except Exception:
                    raise
            elif isinstance(context, self.__class__):
                # copy
                elements = context[:]
            elif isinstance(context, list):
                elements = context
            elif isinstance(context, etree._Element):
                elements = [context]
            else:
                raise TypeError(context)

            # select nodes
            if elements and selector is not no_default:
                xpath = self._css_to_xpath(selector)
                results = []
                for tag in elements:
                    results.extend(
                        tag.xpath(xpath, namespaces=self.namespaces))
                elements = results

        list.__init__(self, elements)

    def _css_to_xpath(self, selector, prefix='descendant-or-self::'):
        selector = selector.replace('[@', '[')
        return self._translator.css_to_xpath(selector, prefix)

    def _copy(self, *args, **kwargs):
        kwargs.setdefault('namespaces', self.namespaces)
        return self.__class__(*args, **kwargs)

    def __call__(self, *args, **kwargs):
        """return a new PyQuery instance
        """
        length = len(args)
        if length == 0:
            raise ValueError('You must provide at least a selector')
        if args[0] == '':
            return self._copy([])
        if (len(args) == 1 and
                isinstance(args[0], str) and
                not args[0].startswith('<')):
            args += (self,)
        result = self._copy(*args, parent=self, **kwargs)
        return result

    # keep original list api prefixed with _
    _append = list.append
    _extend = list.extend

    # improve pythonic api
    def __add__(self, other):
        assert isinstance(other, self.__class__)
        return self._copy(self[:] + other[:])

    def extend(self, other):
        """Extend with anoter PyQuery object"""
        assert isinstance(other, self.__class__)
        self._extend(other[:])
        return self

    def items(self, selector=None):
        """Iter over elements. Return PyQuery objects:

            >>> d = PyQuery('<div><span>foo</span><span>bar</span></div>')
            >>> [i.text() for i in d.items('span')]
            ['foo', 'bar']
            >>> [i.text() for i in d('span').items()]
            ['foo', 'bar']
            >>> list(d.items('a')) == list(d('a').items())
            True
        """
        if selector:
            elems = self(selector) or []
        else:
            elems = self
        for elem in elems:
            yield self._copy(elem, parent=self)

    def xhtml_to_html(self):
        """Remove xhtml namespace:

            >>> doc = PyQuery(
            ...         '<html xmlns="http://www.w3.org/1999/xhtml"></html>')
            >>> doc
            [<{http://www.w3.org/1999/xhtml}html>]
            >>> doc.xhtml_to_html()
            [<html>]
        """
        try:
            root = self[0].getroottree()
        except IndexError:
            pass
        else:
            lxml.html.xhtml_to_html(root)
        return self

    def remove_namespaces(self):
        """Remove all namespaces:

            >>> doc = PyQuery('<foo xmlns="http://example.com/foo"></foo>')
            >>> doc
            [<{http://example.com/foo}foo>]
            >>> doc.remove_namespaces()
            [<foo>]
        """
        try:
            root = self[0].getroottree()
        except IndexError:
            pass
        else:
            for el in root.iter('{*}*'):
                if el.tag.startswith('{'):
                    el.tag = el.tag.split('}', 1)[1]
        return self

    def __str__(self):
        """xml representation of current nodes::

            >>> xml = PyQuery(
            ...   '<script><![[CDATA[ ]></script>', parser='html_fragments')
            >>> print(str(xml))
            <script>&lt;![[CDATA[ ]&gt;</script>

        """
        return ''.join([etree.tostring(e, encoding=str) for e in self])

    def __unicode__(self):
        """xml representation of current nodes"""
        return u''.join([etree.tostring(e, encoding=str)
                         for e in self])

    def __html__(self):
        """html representation of current nodes::

            >>> html = PyQuery(
            ...   '<script><![[CDATA[ ]></script>', parser='html_fragments')
            >>> print(html.__html__())
            <script><![[CDATA[ ]></script>

        """
        return u''.join([lxml.html.tostring(e, encoding=str)
                         for e in self])

    def __repr__(self):
        r = []
        try:
            for el in self:
                c = el.get('class')
                c = c and '.' + '.'.join(c.split(' ')) or ''
                id = el.get('id')
                id = id and '#' + id or ''
                r.append('<%s%s%s>' % (el.tag, id, c))
            return '[' + (', '.join(r)) + ']'
        except AttributeError:
            return list.__repr__(self)

    @property
    def root(self):
        """return the xml root element
        """
        if self._parent is not no_default:
            return self._parent[0].getroottree()
        return self[0].getroottree()

    @property
    def encoding(self):
        """return the xml encoding of the root element
        """
        root = self.root
        if root is not None:
            return self.root.docinfo.encoding

    ##############
    # Traversing #
    ##############

    def _filter_only(self, selector, elements, reverse=False, unique=False):
        """Filters the selection set only, as opposed to also including
           descendants.
        """
        if selector is None:
            results = elements
        else:
            xpath = self._css_to_xpath(selector, 'self::')
            results = []
            for tag in elements:
                results.extend(tag.xpath(xpath, namespaces=self.namespaces))
        if reverse:
            results.reverse()
        if unique:
            result_list = results
            results = []
            for item in result_list:
                if item not in results:
                    results.append(item)
        return self._copy(results, parent=self)

    def parent(self, selector=None):
        return self._filter_only(
            selector,
            [e.getparent() for e in self if e.getparent() is not None],
            unique=True)

    def prev(self, selector=None):
        return self._filter_only(
            selector,
            [e.getprevious() for e in self if e.getprevious() is not None])

    def next(self, selector=None):
        return self._filter_only(
            selector,
            [e.getnext() for e in self if e.getnext() is not None])

    def _traverse(self, method):
        for e in self:
            current = getattr(e, method)()
            while current is not None:
                yield current
                current = getattr(current, method)()

    def _traverse_parent_topdown(self):
        for e in self:
            this_list = []
            current = e.getparent()
            while current is not None:
                this_list.append(current)
                current = current.getparent()
            this_list.reverse()
            for j in this_list:
                yield j

    def _next_all(self):
        return [e for e in self._traverse('getnext')]

    @with_camel_case_alias
    def next_all(self, selector=None):
        """
        >>> h = '<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>'
        >>> d = PyQuery(h)
        >>> d('p:last').next_all()
        [<img>]
        >>> d('p:last').nextAll()
        [<img>]
        """
        return self._filter_only(selector, self._next_all())

    @with_camel_case_alias
    def next_until(self, selector, filter_=None):
        """
        >>> h = '''
        ... <h2>Greeting 1</h2>
        ... <p>Hello!</p><p>World!</p>
        ... <h2>Greeting 2</h2><p>Bye!</p>
        ... '''
        >>> d = PyQuery(h)
        >>> d('h2:first').nextUntil('h2')
        [<p>, <p>]
        """
        return self._filter_only(
            filter_, [
                e
                for q in itertools.takewhile(
                    lambda q: not q.is_(selector), self.next_all().items())
                for e in q
            ]
        )

    def _prev_all(self):
        return [e for e in self._traverse('getprevious')]

    @with_camel_case_alias
    def prev_all(self, selector=None):
        """
        >>> h = '<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>'
        >>> d = PyQuery(h)
        >>> d('p:last').prev_all()
        [<p.hello>]
        >>> d('p:last').prevAll()
        [<p.hello>]
        """
        return self._filter_only(selector, self._prev_all(), reverse=True)

    def siblings(self, selector=None):
        """
         >>> h = '<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>'
         >>> d = PyQuery(h)
         >>> d('.hello').siblings()
         [<p>, <img>]
         >>> d('.hello').siblings('img')
         [<img>]

        """
        return self._filter_only(selector, self._prev_all() + self._next_all())

    def parents(self, selector=None):
        """
        >>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p></span>')
        >>> d('p').parents()
        [<span>]
        >>> d('.hello').parents('span')
        [<span>]
        >>> d('.hello').parents('p')
        []
        """
        return self._filter_only(
            selector,
            [e for e in self._traverse_parent_topdown()],
            unique=True
        )

    def children(self, selector=None):
        """Filter elements that are direct children of self using optional
        selector:

            >>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p></span>')
            >>> d
            [<span>]
            >>> d.children()
            [<p.hello>, <p>]
            >>> d.children('.hello')
            [<p.hello>]
        """
        elements = [child for tag in self for child in tag.getchildren()]
        return self._filter_only(selector, elements)

    def closest(self, selector=None):
        """
        >>> d = PyQuery(
        ...  '<div class="hello"><p>This is a '
        ...  '<strong class="hello">test</strong></p></div>')
        >>> d('strong').closest('div')
        [<div.hello>]
        >>> d('strong').closest('.hello')
        [<strong.hello>]
        >>> d('strong').closest('form')
        []
        """
        result = []
        for current in self:
            while (current is not None and
                    not self._copy(current).is_(selector)):
                current = current.getparent()
            if current is not None:
                result.append(current)
        return self._copy(result, parent=self)

    def contents(self):
        """
        Return contents (with text nodes):

            >>> d = PyQuery('hello <b>bold</b>')
            >>> d.contents()  # doctest: +ELLIPSIS
            ['hello ', <Element b at ...>]
        """
        results = []
        for elem in self:
            results.extend(elem.xpath('child::text()|child::*',
                           namespaces=self.namespaces))
        return self._copy(results, parent=self)

    def filter(self, selector):
        """Filter elements in self using selector (string or function):

            >>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p>')
            >>> d('p')
            [<p.hello>, <p>]
            >>> d('p').filter('.hello')
            [<p.hello>]
            >>> d('p').filter(lambda i: i == 1)
            [<p>]
            >>> d('p').filter(lambda i: PyQuery(this).text() == 'Hi')
            [<p.hello>]
            >>> d('p').filter(lambda i, this: PyQuery(this).text() == 'Hi')
            [<p.hello>]
        """
        if not hasattr(selector, '__call__'):
            return self._filter_only(selector, self)
        else:
            elements = []
            args = getargspec(callback)
            try:
                for i, this in enumerate(self):
                    if len(args) == 1:
                        selector.__globals__['this'] = this
                    if callback(selector, i, this):
                        elements.append(this)
            finally:
                f_globals = selector.__globals__
                if 'this' in f_globals:
                    del f_globals['this']
            return self._copy(elements, parent=self)

    def not_(self, selector):
        """Return elements that don't match the given selector:

            >>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>')
            >>> d('p').not_('.hello')
            [<p>]
        """
        exclude = set(self._copy(selector, self))
        return self._copy([e for e in self if e not in exclude],
                          parent=self)

    def is_(self, selector):
        """Returns True if selector matches at least one current element, else
        False:

            >>> d = PyQuery('<p class="hello"><span>Hi</span></p><p>Bye</p>')
            >>> d('p').eq(0).is_('.hello')
            True

            >>> d('p').eq(0).is_('span')
            False

            >>> d('p').eq(1).is_('.hello')
            False

        ..
        """
        return bool(self._filter_only(selector, self))

    def find(self, selector):
        """Find elements using selector traversing down from self:

            >>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>'
            >>> d = PyQuery(m)
            >>> d('p').find('em')
            [<em>, <em>]
            >>> d('p').eq(1).find('em')
            [<em>]
        """
        xpath = self._css_to_xpath(selector)
        results = [child.xpath(xpath, namespaces=self.namespaces)
                   for tag in self
                   for child in tag.getchildren()]
        # Flatten the results
        elements = []
        for r in results:
            elements.extend(r)
        return self._copy(elements, parent=self)

    def eq(self, index):
        """Return PyQuery of only the element with the provided index::

            >>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>')
            >>> d('p').eq(0)
            [<p.hello>]
            >>> d('p').eq(1)
            [<p>]
            >>> d('p').eq(2)
            []

        ..
        """
        # Slicing will return empty list when index=-1
        # we should handle out of bound by ourselves
        try:
            items = self[index]
        except IndexError:
            items = []
        return self._copy(items, parent=self)

    def each(self, func):
        """apply func on each nodes
        """
        try:
            for i, element in enumerate(self):
                func.__globals__['this'] = element
                if callback(func, i, element) is False:
                    break
        finally:
            f_globals = func.__globals__
            if 'this' in f_globals:
                del f_globals['this']
        return self

    def map(self, func):
        """Returns a new PyQuery after transforming current items with func.

        func should take two arguments - 'index' and 'element'.  Elements can
        also be referred to as 'this' inside of func::

            >>> d = PyQuery('<p class="hello">Hi there</p><p>Bye</p><br />')
            >>> d('p').map(lambda i, e: PyQuery(e).text())
            ['Hi there', 'Bye']

            >>> d('p').map(lambda i, e: len(PyQuery(this).text()))
            [8, 3]

            >>> d('p').map(lambda i, e: PyQuery(this).text().split())
            ['Hi', 'there', 'Bye']

        """
        items = []
        try:
            for i, element in enumerate(self):
                func.__globals__['this'] = element
                result = callback(func, i, element)
                if result is not None:
                    if not isinstance(result, list):
                        items.append(result)
                    else:
                        items.extend(result)
        finally:
            f_globals = func.__globals__
            if 'this' in f_globals:
                del f_globals['this']
        return self._copy(items, parent=self)

    @property
    def length(self):
        return len(self)

    def size(self):
        return len(self)

    def end(self):
        """Break out of a level of traversal and return to the parent level.

            >>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>'
            >>> d = PyQuery(m)
            >>> d('p').eq(1).find('em').end().end()
            [<p>, <p>]
        """
        return self._parent

    ##############
    # Attributes #
    ##############
    def attr(self, *args, **kwargs):
        """Attributes manipulation
        """

        mapping = {'class_': 'class', 'for_': 'for'}

        attr = value = no_default
        length = len(args)
        if length == 1:
            attr = args[0]
            attr = mapping.get(attr, attr)
        elif length == 2:
            attr, value = args
            attr = mapping.get(attr, attr)
        elif kwargs:
            attr = {}
            for k, v in kwargs.items():
                attr[mapping.get(k, k)] = v
        else:
            raise ValueError('Invalid arguments %s %s' % (args, kwargs))

        if not self:
            return None
        elif isinstance(attr, dict):
            for tag in self:
                for key, value in attr.items():
                    tag.set(key, value)
        elif value is no_default:
            return self[0].get(attr)
        elif value is None:
            return self.remove_attr(attr)
        else:
            for tag in self:
                tag.set(attr, value)
        return self

    @with_camel_case_alias
    def remove_attr(self, name):
        """Remove an attribute::

            >>> d = PyQuery('<div id="myid"></div>')
            >>> d.remove_attr('id')
            [<div>]
            >>> d.removeAttr('id')
            [<div>]

        ..
        """
        for tag in self:
            try:
                del tag.attrib[name]
            except KeyError:
                pass
        return self

    attr = FlexibleElement(pget=attr, pdel=remove_attr)

    #######
    # CSS #
    #######
    def height(self, value=no_default):
        """set/get height of element
        """
        return self.attr('height', value)

    def width(self, value=no_default):
        """set/get width of element
        """
        return self.attr('width', value)

    @with_camel_case_alias
    def has_class(self, name):
        """Return True if element has class::

            >>> d = PyQuery('<div class="myclass"></div>')
            >>> d.has_class('myclass')
            True
            >>> d.hasClass('myclass')
            True

        ..
        """
        return self.is_('.%s' % name)

    @with_camel_case_alias
    def add_class(self, value):
        """Add a css class to elements::

            >>> d = PyQuery('<div></div>')
            >>> d.add_class('myclass')
            [<div.myclass>]
            >>> d.addClass('myclass')
            [<div.myclass>]

        ..
        """
        for tag in self:
            values = value.split(' ')
            classes = (tag.get('class') or '').split()
            classes += [v for v in values if v not in classes]
            tag.set('class', ' '.join(classes))
        return self

    @with_camel_case_alias
    def remove_class(self, value):
        """Remove a css class to elements::

            >>> d = PyQuery('<div class="myclass"></div>')
            >>> d.remove_class('myclass')
            [<div>]
            >>> d.removeClass('myclass')
            [<div>]

        ..
        """
        for tag in self:
            values = value.split(' ')
            classes = set((tag.get('class') or '').split())
            classes.difference_update(values)
            classes.difference_update([''])
            classes = ' '.join(classes)
            if classes.strip():
                tag.set('class', classes)
            elif tag.get('class'):
                tag.set('class', classes)
        return self

    @with_camel_case_alias
    def toggle_class(self, value):
        """Toggle a css class to elements

            >>> d = PyQuery('<div></div>')
            >>> d.toggle_class('myclass')
            [<div.myclass>]
            >>> d.toggleClass('myclass')
            [<div>]

        """
        for tag in self:
            values = value.split(' ')
            classes = (tag.get('class') or '').split()
            values_to_add = [v for v in values if v not in classes]
            values_to_del = [v for v in values if v in classes]
            classes = [v for v in classes if v not in values_to_del]
            classes += values_to_add
            tag.set('class', ' '.join(classes))
        return self

    def css(self, *args, **kwargs):
        """css attributes manipulation
        """

        attr = value = no_default
        length = len(args)
        if length == 1:
            attr = args[0]
        elif length == 2:
            attr, value = args
        elif kwargs:
            attr = kwargs
        else:
            raise ValueError('Invalid arguments %s %s' % (args, kwargs))

        if isinstance(attr, dict):
            for tag in self:
                stripped_keys = [key.strip().replace('_', '-')
                                 for key in attr.keys()]
                current = [el.strip()
                           for el in (tag.get('style') or '').split(';')
                           if el.strip()
                           and not el.split(':')[0].strip() in stripped_keys]
                for key, value in attr.items():
                    key = key.replace('_', '-')
                    current.append('%s: %s' % (key, value))
                tag.set('style', '; '.join(current))
        elif isinstance(value, basestring):
            attr = attr.replace('_', '-')
            for tag in self:
                current = [
                    el.strip()
                    for el in (tag.get('style') or '').split(';')
                    if (el.strip() and
                        not el.split(':')[0].strip() == attr.strip())]
                current.append('%s: %s' % (attr, value))
                tag.set('style', '; '.join(current))
        return self

    css = FlexibleElement(pget=css, pset=css)

    ###################
    # CORE UI EFFECTS #
    ###################
    def hide(self):
        """Remove display:none to elements style:

            >>> print(PyQuery('<div style="display:none;"/>').hide())
            <div style="display: none"/>

        """
        return self.css('display', 'none')

    def show(self):
        """Add display:block to elements style:

            >>> print(PyQuery('<div />').show())
            <div style="display: block"/>

        """
        return self.css('display', 'block')

    ########
    # HTML #
    ########
    def val(self, value=no_default):
        """Set the attribute value::

            >>> d = PyQuery('<input />')
            >>> d.val('Youhou')
            [<input>]

        Get the attribute value::

            >>> d.val()
            'Youhou'

        Set the selected values for a `select` element with the `multiple`
        attribute::

            >>> d = PyQuery('''
            ...             <select multiple>
            ...                 <option value="you"><option value="hou">
            ...             </select>
            ...             ''')
            >>> d.val(['you', 'hou'])
            [<select>]

        Get the selected values for a `select` element with the `multiple`
        attribute::

            >>> d.val()
            ['you', 'hou']

        """
        def _get_value(tag):
            # <textarea>
            if tag.tag == 'textarea':
                return self._copy(tag).html()
            # <select>
            elif tag.tag == 'select':
                if 'multiple' in tag.attrib:
                    # Only extract value if selected
                    selected = self._copy(tag)('option[selected]')
                    # Rebuild list to avoid serialization error
                    return list(selected.map(
                        lambda _, o: self._copy(o).attr('value')
                    ))
                selected_option = self._copy(tag)('option[selected]:last')
                if selected_option:
                    return selected_option.attr('value')
                else:
                    return self._copy(tag)('option').attr('value')
            # <input type="checkbox"> or <input type="radio">
            elif self.is_(':checkbox,:radio'):
                val = self._copy(tag).attr('value')
                if val is None:
                    return 'on'
                else:
                    return val
            # <input>
            elif tag.tag == 'input':
                val = self._copy(tag).attr('value')
                return val.replace('\n', '') if val else ''
            # everything else.
            return self._copy(tag).attr('value') or ''

        def _set_value(pq, value):
            for tag in pq:
                # <select>
                if tag.tag == 'select':
                    if not isinstance(value, list):
                        value = [value]

                    def _make_option_selected(_, elem):
                        pq = self._copy(elem)
                        if pq.attr('value') in value:
                            pq.attr('selected', 'selected')
                            if 'multiple' not in tag.attrib:
                                del value[:]  # Ensure it toggles first match
                        else:
                            pq.removeAttr('selected')

                    self._copy(tag)('option').each(_make_option_selected)
                    continue
                # Stringify array
                if isinstance(value, list):
                    value = ','.join(value)
                # <textarea>
                if tag.tag == 'textarea':
                    self._copy(tag).text(value)
                    continue
                # <input> and everything else.
                self._copy(tag).attr('value', value)

        if value is no_default:
            if len(self):
                return _get_value(self[0])
        else:
            _set_value(self, value)
            return self

    def html(self, value=no_default, **kwargs):
        """Get or set the html representation of sub nodes.

        Get the text value::

            >>> d = PyQuery('<div><span>toto</span></div>')
            >>> print(d.html())
            <span>toto</span>

        Extra args are passed to ``lxml.etree.tostring``::

            >>> d = PyQuery('<div><span></span></div>')
            >>> print(d.html())
            <span/>
            >>> print(d.html(method='html'))
            <span></span>

        Set the text value::

            >>> d.html('<span>Youhou !</span>')
            [<div>]
            >>> print(d)
            <div><span>Youhou !</span></div>
        """
        if value is no_default:
            if not self:
                return None
            tag = self[0]
            children = tag.getchildren()
            html = escape(tag.text or '', quote=False)
            if not children:
                return html
            if 'encoding' not in kwargs:
                kwargs['encoding'] = str
            html += u''.join([etree.tostring(e, **kwargs)
                              for e in children])
            return html
        else:
            if isinstance(value, self.__class__):
                new_html = str(value)
            elif isinstance(value, basestring):
                new_html = value
            elif not value:
                new_html = ''
            else:
                raise ValueError(type(value))

            for tag in self:
                for child in tag.getchildren():
                    tag.remove(child)
                root = fromstring(
                    u'<root>' + new_html + u'</root>',
                    self.parser)[0]
                children = root.getchildren()
                if children:
                    tag.extend(children)
                tag.text = root.text
        return self

    @with_camel_case_alias
    def outer_html(self, method="html"):
        """Get the html representation of the first selected element::

            >>> d = PyQuery('<div><span class="red">toto</span> rocks</div>')
            >>> print(d('span'))
            <span class="red">toto</span> rocks
            >>> print(d('span').outer_html())
            <span class="red">toto</span>
            >>> print(d('span').outerHtml())
            <span class="red">toto</span>

            >>> S = PyQuery('<p>Only <b>me</b> & myself</p>')
            >>> print(S('b').outer_html())
            <b>me</b>

        ..
        """

        if not self:
            return None
        e0 = self[0]
        if e0.tail:
            e0 = deepcopy(e0)
            e0.tail = ''
        return etree.tostring(e0, encoding=str, method=method)

    def text(self, value=no_default, **kwargs):
        """Get or set the text representation of sub nodes.

        Get the text value::

            >>> doc = PyQuery('<div><span>toto</span><span>tata</span></div>')
            >>> print(doc.text())
            tototata
            >>> doc = PyQuery('''<div><span>toto</span>
            ...               <span>tata</span></div>''')
            >>> print(doc.text())
            toto tata

        Get the text value, without squashing newlines::

            >>> doc = PyQuery('''<div><span>toto</span>
            ...               <span>tata</span></div>''')
            >>> print(doc.text(squash_space=False))
            toto
            tata

        Set the text value::

            >>> doc.text('Youhou !')
            [<div>]
            >>> print(doc)
            <div>Youhou !</div>

        """

        if value is no_default:
            if not self:
                return ''
            return ' '.join(
                self._copy(tag).html() if tag.tag == 'textarea' else
                extract_text(tag, **kwargs) for tag in self
            )

        for tag in self:
            for child in tag.getchildren():
                tag.remove(child)
            tag.text = value
        return self

    ################
    # Manipulating #
    ################

    def _get_root(self, value):
        if isinstance(value, basestring):
            root = fromstring(u'<root>' + value + u'</root>',
                              self.parser)[0]
        elif isinstance(value, etree._Element):
            root = self._copy(value)
        elif isinstance(value, PyQuery):
            root = value
        else:
            raise TypeError(
                'Value must be string, PyQuery or Element. Got %r' % value)
        if hasattr(root, 'text') and isinstance(root.text, basestring):
            root_text = root.text
        else:
            root_text = ''
        return root, root_text

    def append(self, value):
        """append value to each nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            if len(tag) > 0:  # if the tag has children
                last_child = tag[-1]
                if not last_child.tail:
                    last_child.tail = ''
                last_child.tail += root_text
            else:
                if not tag.text:
                    tag.text = ''
                tag.text += root_text
            if i > 0:
                root = deepcopy(list(root))
            tag.extend(root)
        return self

    @with_camel_case_alias
    def append_to(self, value):
        """append nodes to value
        """
        value.append(self)
        return self

    def prepend(self, value):
        """prepend value to nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            if not tag.text:
                tag.text = ''
            if len(root) > 0:
                root[-1].tail = tag.text
                tag.text = root_text
            else:
                tag.text = root_text + tag.text
            if i > 0:
                root = deepcopy(list(root))
            tag[:0] = root
            root = tag[:len(root)]
        return self

    @with_camel_case_alias
    def prepend_to(self, value):
        """prepend nodes to value
        """
        value.prepend(self)
        return self

    def after(self, value):
        """add value after nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            if not tag.tail:
                tag.tail = ''
            tag.tail += root_text
            if i > 0:
                root = deepcopy(list(root))
            parent = tag.getparent()
            index = parent.index(tag) + 1
            parent[index:index] = root
            root = parent[index:len(root)]
        return self

    @with_camel_case_alias
    def insert_after(self, value):
        """insert nodes after value
        """
        value.after(self)
        return self

    def before(self, value):
        """insert value before nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            previous = tag.getprevious()
            if previous is not None:
                if not previous.tail:
                    previous.tail = ''
                previous.tail += root_text
            else:
                parent = tag.getparent()
                if not parent.text:
                    parent.text = ''
                parent.text += root_text
            if i > 0:
                root = deepcopy(list(root))
            parent = tag.getparent()
            index = parent.index(tag)
            parent[index:index] = root
            root = parent[index:len(root)]
        return self

    @with_camel_case_alias
    def insert_before(self, value):
        """insert nodes before value
        """
        value.before(self)
        return self

    def wrap(self, value):
        """A string of HTML that will be created on the fly and wrapped around
        each target:

            >>> d = PyQuery('<span>youhou</span>')
            >>> d.wrap('<div></div>')
            [<div>]
            >>> print(d)
            <div><span>youhou</span></div>

        """
        assert isinstance(value, basestring)
        value = fromstring(value)[0]
        nodes = []
        for tag in self:
            wrapper = deepcopy(value)
            # FIXME: using iterchildren is probably not optimal
            if not wrapper.getchildren():
                wrapper.append(deepcopy(tag))
            else:
                childs = [c for c in wrapper.iterchildren()]
                child = childs[-1]
                child.append(deepcopy(tag))
            nodes.append(wrapper)

            parent = tag.getparent()
            if parent is not None:
                for t in parent.iterchildren():
                    if t is tag:
                        t.addnext(wrapper)
                        parent.remove(t)
                        break
        self[:] = nodes
        return self

    @with_camel_case_alias
    def wrap_all(self, value):
        """Wrap all the elements in the matched set into a single wrapper
        element::

            >>> d = PyQuery('<div><span>Hey</span><span>you !</span></div>')
            >>> print(d('span').wrap_all('<div id="wrapper"></div>'))
            <div id="wrapper"><span>Hey</span><span>you !</span></div>

            >>> d = PyQuery('<div><span>Hey</span><span>you !</span></div>')
            >>> print(d('span').wrapAll('<div id="wrapper"></div>'))
            <div id="wrapper"><span>Hey</span><span>you !</span></div>

        ..
        """
        if not self:
            return self

        assert isinstance(value, basestring)
        value = fromstring(value)[0]
        wrapper = deepcopy(value)
        if not wrapper.getchildren():
            child = wrapper
        else:
            childs = [c for c in wrapper.iterchildren()]
            child = childs[-1]

        replace_childs = True
        parent = self[0].getparent()
        if parent is None:
            parent = no_default

        # add nodes to wrapper and check parent
        for tag in self:
            child.append(deepcopy(tag))
            if tag.getparent() is not parent:
                replace_childs = False

        # replace nodes i parent if possible
        if parent is not no_default and replace_childs:
            childs = [c for c in parent.iterchildren()]
            if len(childs) == len(self):
                for tag in self:
                    parent.remove(tag)
                parent.append(wrapper)

        self[:] = [wrapper]
        return self

    @with_camel_case_alias
    def replace_with(self, value):
        """replace nodes by value:

            >>> doc = PyQuery("<html><div /></html>")
            >>> node = PyQuery("<span />")
            >>> child = doc.find('div')
            >>> child.replace_with(node)
            [<div>]
            >>> print(doc)
            <html><span/></html>

        """
        if isinstance(value, PyQuery):
            value = str(value)
        if hasattr(value, '__call__'):
            for i, element in enumerate(self):
                self._copy(element).before(
                    value(i, element) + (element.tail or ''))
                parent = element.getparent()
                parent.remove(element)
        else:
            for tag in self:
                self._copy(tag).before(value + (tag.tail or ''))
                parent = tag.getparent()
                parent.remove(tag)
        return self

    @with_camel_case_alias
    def replace_all(self, expr):
        """replace nodes by expr
        """
        if self._parent is no_default:
            raise ValueError(
                'replaceAll can only be used with an object with parent')
        self._parent(expr).replace_with(self)
        return self

    def clone(self):
        """return a copy of nodes
        """
        return PyQuery([deepcopy(tag) for tag in self])

    def empty(self):
        """remove nodes content
        """
        for tag in self:
            tag.text = None
            tag[:] = []
        return self

    def remove(self, expr=no_default):
        """Remove nodes:

             >>> h = (
             ... '<div>Maybe <em>she</em> does <strong>NOT</strong> know</div>'
             ... )
             >>> d = PyQuery(h)
             >>> d('strong').remove()
             [<strong>]
             >>> print(d)
             <div>Maybe <em>she</em> does  know</div>
        """
        if expr is no_default:
            for tag in self:
                parent = tag.getparent()
                if parent is not None:
                    if tag.tail:
                        prev = tag.getprevious()
                        if prev is None:
                            if not parent.text:
                                parent.text = ''
                            parent.text += tag.tail
                        else:
                            if not prev.tail:
                                prev.tail = ''
                            prev.tail += tag.tail
                    parent.remove(tag)
        else:
            results = self._copy(expr, self)
            results.remove()
        return self

    class Fn(object):
        """Hook for defining custom function (like the jQuery.fn):

        .. sourcecode:: python

         >>> fn = lambda: this.map(lambda i, el: PyQuery(this).outerHtml())
         >>> PyQuery.fn.listOuterHtml = fn
         >>> S = PyQuery(
         ...   '<ol>   <li>Coffee</li>   <li>Tea</li>   <li>Milk</li>   </ol>')
         >>> S('li').listOuterHtml()
         ['<li>Coffee</li>', '<li>Tea</li>', '<li>Milk</li>']

        """
        def __setattr__(self, name, func):
            def fn(self, *args, **kwargs):
                func.__globals__['this'] = self
                return func(*args, **kwargs)
            fn.__name__ = name
            setattr(PyQuery, name, fn)
    fn = Fn()

    ########
    # AJAX #
    ########

    @with_camel_case_alias
    def serialize_array(self):
        """Serialize form elements as an array of dictionaries, whose structure
        mirrors that produced by the jQuery API. Notably, it does not handle
        the deprecated `keygen` form element.

            >>> d = PyQuery('<form><input name="order" value="spam"></form>')
            >>> d.serialize_array() == [{'name': 'order', 'value': 'spam'}]
            True
            >>> d.serializeArray() == [{'name': 'order', 'value': 'spam'}]
            True
        """
        return list(map(
            lambda p: {'name': p[0], 'value': p[1]},
            self.serialize_pairs()
        ))

    def serialize(self):
        """Serialize form elements as a URL-encoded string.

            >>> h = (
            ... '<form><input name="order" value="spam">'
            ... '<input name="order2" value="baked beans"></form>'
            ... )
            >>> d = PyQuery(h)
            >>> d.serialize()
            'order=spam&order2=baked%20beans'
        """
        return urlencode(self.serialize_pairs()).replace('+', '%20')

    #####################################################
    # Additional methods that are not in the jQuery API #
    #####################################################

    @with_camel_case_alias
    def serialize_pairs(self):
        """Serialize form elements as an array of 2-tuples conventional for
        typical URL-parsing operations in Python.

            >>> d = PyQuery('<form><input name="order" value="spam"></form>')
            >>> d.serialize_pairs()
            [('order', 'spam')]
            >>> d.serializePairs()
            [('order', 'spam')]
        """
        # https://github.com/jquery/jquery/blob
        # /2d4f53416e5f74fa98e0c1d66b6f3c285a12f0ce/src/serialize.js#L14
        _submitter_types = ['submit', 'button', 'image', 'reset', 'file']

        controls = self._copy([])
        # Expand list of form controls
        for el in self.items():
            if el[0].tag == 'form':
                form_id = el.attr('id')
                if form_id:
                    # Include inputs outside of their form owner
                    root = self._copy(el.root.getroot())
                    controls.extend(root(
                        '#%s :not([form]):input, [form="%s"]:input'
                        % (form_id, form_id)))
                else:
                    controls.extend(el(':not([form]):input'))
            elif el[0].tag == 'fieldset':
                controls.extend(el(':input'))
            else:
                controls.extend(el)
        # Filter controls
        selector = '[name]:enabled:not(button)'  # Not serializing image button
        selector += ''.join(map(
            lambda s: ':not([type="%s"])' % s,
            _submitter_types))
        controls = controls.filter(selector)

        def _filter_out_unchecked(_, el):
            el = controls._copy(el)
            return not el.is_(':checkbox:not(:checked)') and \
                not el.is_(':radio:not(:checked)')
        controls = controls.filter(_filter_out_unchecked)

        # jQuery serializes inputs with the datalist element as an ancestor
        # contrary to WHATWG spec as of August 2018
        #
        # xpath = 'self::*[not(ancestor::datalist)]'
        # results = []
        # for tag in controls:
        #     results.extend(tag.xpath(xpath, namespaces=controls.namespaces))
        # controls = controls._copy(results)

        # Serialize values
        ret = []
        for field in controls:
            val = self._copy(field).val() or ''
            if isinstance(val, list):
                ret.extend(map(
                    lambda v: (field.attrib['name'], v.replace('\n', '\r\n')),
                    val
                ))
            else:
                ret.append((field.attrib['name'], val.replace('\n', '\r\n')))
        return ret

    @with_camel_case_alias
    def serialize_dict(self):
        """Serialize form elements as an ordered dictionary. Multiple values
        corresponding to the same input name are concatenated into one list.

            >>> d = PyQuery('''<form>
            ...             <input name="order" value="spam">
            ...             <input name="order" value="eggs">
            ...             <input name="order2" value="ham">
            ...             </form>''')
            >>> d.serialize_dict()
            OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')])
            >>> d.serializeDict()
            OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')])
        """
        ret = OrderedDict()
        for name, val in self.serialize_pairs():
            if name not in ret:
                ret[name] = val
            elif not isinstance(ret[name], list):
                ret[name] = [ret[name], val]
            else:
                ret[name].append(val)
        return ret

    @property
    def base_url(self):
        """Return the url of current html document or None if not available.
        """
        if self._base_url is not None:
            return self._base_url
        if self._parent is not no_default:
            return self._parent.base_url

    def make_links_absolute(self, base_url=None):
        """Make all links absolute.
        """
        if base_url is None:
            base_url = self.base_url
            if base_url is None:
                raise ValueError((
                    'You need a base URL to make your links'
                    'absolute. It can be provided by the base_url parameter.'))

        def repl(attr):
            def rep(i, e):
                attr_value = self(e).attr(attr)
                # when label hasn't such attr, pass
                if attr_value is None:
                    return None

                # skip specific "protocol" schemas
                if any(attr_value.startswith(schema)
                       for schema in ('tel:', 'callto:', 'sms:')):
                    return None

                return self(e).attr(attr,
                                    urljoin(base_url, attr_value.strip()))
            return rep

        self('a').each(repl('href'))
        self('link').each(repl('href'))
        self('script').each(repl('src'))
        self('img').each(repl('src'))
        self('iframe').each(repl('src'))
        self('form').each(repl('action'))

        return self


build_camel_case_aliases(PyQuery)