From 917214b00bea709b86a907c5fb969937b616bb66 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 14 Jan 2021 12:27:15 -0600 Subject: [PATCH 01/10] Add async compatible classes --- .gitignore | 1 + elasticsearch_dsl/__init__.py | 23 + elasticsearch_dsl/_async/__init__.py | 36 + elasticsearch_dsl/_async/document.py | 430 +++++++++++ elasticsearch_dsl/_async/faceted_search.py | 208 +++++ elasticsearch_dsl/_async/index.py | 682 ++++++++++++++++ elasticsearch_dsl/_async/search.py | 549 +++++++++++++ elasticsearch_dsl/_async/update_by_query.py | 163 ++++ elasticsearch_dsl/_async/utils.py | 19 + elasticsearch_dsl/_base/__init__.py | 28 + elasticsearch_dsl/_base/faceted_search.py | 38 + elasticsearch_dsl/_base/search.py | 298 +++++++ elasticsearch_dsl/_sync/__init__.py | 36 + elasticsearch_dsl/_sync/document.py | 428 ++++++++++ elasticsearch_dsl/_sync/faceted_search.py | 208 +++++ elasticsearch_dsl/_sync/index.py | 654 ++++++++++++++++ elasticsearch_dsl/_sync/search.py | 545 +++++++++++++ elasticsearch_dsl/_sync/update_by_query.py | 161 ++++ elasticsearch_dsl/_sync/utils.py | 19 + elasticsearch_dsl/connections.py | 72 +- elasticsearch_dsl/document.py | 414 +--------- elasticsearch_dsl/faceted_search.py | 229 +----- elasticsearch_dsl/index.py | 642 +-------------- elasticsearch_dsl/search.py | 815 +------------------- elasticsearch_dsl/update_by_query.py | 149 +--- noxfile.py | 12 +- tests/conftest.py | 3 +- utils/unasync-files.py | 100 +++ 28 files changed, 4767 insertions(+), 2195 deletions(-) create mode 100644 elasticsearch_dsl/_async/__init__.py create mode 100644 elasticsearch_dsl/_async/document.py create mode 100644 elasticsearch_dsl/_async/faceted_search.py create mode 100644 elasticsearch_dsl/_async/index.py create mode 100644 elasticsearch_dsl/_async/search.py create mode 100644 elasticsearch_dsl/_async/update_by_query.py create mode 100644 elasticsearch_dsl/_async/utils.py create mode 100644 elasticsearch_dsl/_base/__init__.py create mode 100644 elasticsearch_dsl/_base/faceted_search.py create mode 100644 elasticsearch_dsl/_base/search.py create mode 100644 elasticsearch_dsl/_sync/__init__.py create mode 100644 elasticsearch_dsl/_sync/document.py create mode 100644 elasticsearch_dsl/_sync/faceted_search.py create mode 100644 elasticsearch_dsl/_sync/index.py create mode 100644 elasticsearch_dsl/_sync/search.py create mode 100644 elasticsearch_dsl/_sync/update_by_query.py create mode 100644 elasticsearch_dsl/_sync/utils.py create mode 100644 utils/unasync-files.py diff --git a/.gitignore b/.gitignore index 30f47511b..27e6082a7 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ build *.egg coverage.xml junit.xml +elasticsearch_dsl/_unasync/ test_elasticsearch_dsl/htmlcov docs/_build .cache diff --git a/elasticsearch_dsl/__init__.py b/elasticsearch_dsl/__init__.py index a1d2ab8b2..e6f8d3b18 100644 --- a/elasticsearch_dsl/__init__.py +++ b/elasticsearch_dsl/__init__.py @@ -161,3 +161,26 @@ "token_filter", "tokenizer", ] + +try: + from .document import AsyncDocument # noqa: F401 + from .faceted_search import AsyncFacetedSearch # noqa: F401 + from .index import AsyncIndex, AsyncIndexTemplate # noqa: F401 + from .mapping import AsyncMapping # noqa: F401 + from .search import AsyncMultiSearch, AsyncSearch # noqa: F401 + from .update_by_query import AsyncUpdateByQuery # noqa: F401 + + __all__.extend( + [ + "AsyncDocument", + "AsyncFacetedSearch", + "AsyncIndex", + "AsyncIndexTemplate", + "AsyncMapping", + "AsyncSearch", + "AsyncMultiSearch", + "AsyncUpdateByQuery", + ] + ) +except ImportError: + pass diff --git a/elasticsearch_dsl/_async/__init__.py b/elasticsearch_dsl/_async/__init__.py new file mode 100644 index 000000000..3a69026fc --- /dev/null +++ b/elasticsearch_dsl/_async/__init__.py @@ -0,0 +1,36 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + from .document import AsyncDocument, AsyncIndexMeta + from .faceted_search import AsyncFacetedSearch + from .index import AsyncIndex, AsyncIndexTemplate + from .search import AsyncMultiSearch, AsyncSearch + from .update_by_query import AsyncUpdateByQuery + + __all__ = [ + "AsyncDocument", + "AsyncIndexMeta", + "AsyncFacetedSearch", + "AsyncIndex", + "AsyncIndexTemplate", + "AsyncSearch", + "AsyncMultiSearch", + "AsyncUpdateByQuery", + ] +except (ImportError, SyntaxError): + pass diff --git a/elasticsearch_dsl/_async/document.py b/elasticsearch_dsl/_async/document.py new file mode 100644 index 000000000..3798ccbb9 --- /dev/null +++ b/elasticsearch_dsl/_async/document.py @@ -0,0 +1,430 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from fnmatch import fnmatch + +from elasticsearch.exceptions import NotFoundError, RequestError +from six import add_metaclass + +from ..connections import get_connection +from ..document import DocumentMeta +from ..exceptions import IllegalOperation, ValidationException +from ..utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge +from .search import AsyncSearch +from .utils import ASYNC_IS_ASYNC + + +class AsyncIndexMeta(DocumentMeta): + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__(cls, name, bases, attrs): + new_cls = super(AsyncIndexMeta, cls).__new__(cls, name, bases, attrs) + if cls._document_initialized: + # This 'Index' is different than an 'AsyncIndex' instance + index_opts = attrs.pop("Index", None) + + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return new_cls + + @classmethod + def construct_index(cls, opts, bases): + from .index import AsyncIndex + + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return AsyncIndex(name=None) + + i = AsyncIndex( + getattr(opts, "name", "*"), using=getattr(opts, "using", "default") + ) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +@add_metaclass(AsyncIndexMeta) +class AsyncDocument(ObjectBase): + """ + Model-like class for persisting documents in elasticsearch. + """ + + @classmethod + def _matches(cls, hit): + if cls._index._name is None: + return True + return fnmatch(hit.get("_index", ""), cls._index._name) + + @classmethod + def _get_using(cls, using=None): + return using or cls._index._using + + @classmethod + def _get_connection(cls, using=None): + return get_connection(cls._get_using(using), is_async=ASYNC_IS_ASYNC) + + @classmethod + def _default_index(cls, index=None): + return index or cls._index._name + + @classmethod + async def init(cls, index=None, using=None): + """ + Create the index and populate the mappings in elasticsearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + await i.save(using=using) + + def _get_index(self, index=None, required=True): + if index is None: + index = getattr(self.meta, "index", None) + if index is None: + index = getattr(self._index, "_name", None) + if index is None and required: + raise ValidationException("No index") + if index and "*" in index: + raise ValidationException("You cannot write to a wildcard index.") + return index + + def __repr__(self): + return "{}({})".format( + self.__class__.__name__, + ", ".join( + "{}={!r}".format(key, getattr(self.meta, key)) + for key in ("index", "id") + if key in self.meta + ), + ) + + @classmethod + def search(cls, using=None, index=None): + """ + Create an :class:`~elasticsearch_dsl.Search` instance that will search + over this ``Document``. + """ + return AsyncSearch( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + async def get(cls, id, using=None, index=None, **kwargs): + """ + Retrieve a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.get`` unchanged. + """ + es = cls._get_connection(using) + doc = await es.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_es(doc) + + @classmethod + async def mget( + cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs + ): + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``Elasticsearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + es = cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = await es.mget(body, index=cls._default_index(index), **kwargs) + + objs, error_docs, missing_docs = [], [], [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_es(). + continue + + objs.append(cls.from_es(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = "Documents %s not found." % ", ".join(missing_ids) + raise NotFoundError(404, message, {"docs": missing_docs}) + return objs + + async def delete(self, using=None, index=None, **kwargs): + """ + Delete the instance in elasticsearch. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.delete`` unchanged. + """ + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + es.delete(index=self._get_index(index), **doc_meta) + + def to_dict(self, include_meta=False, skip_empty=True): + """ + Serialize the instance into a dictionary so that it can be saved in elasticsearch. + + :arg include_meta: if set to ``True`` will include all the metadata + (``_index``, ``_id`` etc). Otherwise just the document's + data is serialized. This is useful when passing multiple instances into + ``elasticsearch.helpers.bulk``. + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + """ + d = super(AsyncDocument, self).to_dict(skip_empty=skip_empty) + if not include_meta: + return d + + meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # in case of to_dict include the index unlike save/update/delete + index = self._get_index(required=False) + if index is not None: + meta["_index"] = index + + meta["_source"] = d + return meta + + async def update( + self, + using=None, + index=None, + detect_noop=True, + doc_as_upsert=False, + refresh=False, + retry_on_conflict=None, + script=None, + script_id=None, + scripted_upsert=False, + upsert=None, + return_doc_meta=False, + **fields + ): + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in elasticsearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return operation result noop/updated + """ + body = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + script = {"source": script} + else: + script = {"id": script_id} + + script["params"] = fields + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for ES + values = self.to_dict() + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + meta = await self._get_connection(using).update( + index=self._get_index(index), body=body, refresh=refresh, **doc_meta + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + async def save( + self, + using=None, + index=None, + validate=True, + skip_empty=True, + return_doc_meta=False, + **kwargs + ): + """ + Save the document into elasticsearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``Elasticsearch.index`` unchanged. + + :return operation result created/updated + """ + if validate: + self.full_clean() + + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + meta = await es.index( + index=self._get_index(index), + body=self.to_dict(skip_empty=skip_empty), + **doc_meta + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] diff --git a/elasticsearch_dsl/_async/faceted_search.py b/elasticsearch_dsl/_async/faceted_search.py new file mode 100644 index 000000000..225a53d93 --- /dev/null +++ b/elasticsearch_dsl/_async/faceted_search.py @@ -0,0 +1,208 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from six import iteritems, itervalues + +from .._base import FacetedResponse +from ..query import MatchAll +from .search import AsyncSearch + + +class AsyncFacetedSearch(object): + """ + Abstraction for creating faceted navigation searches that takes care of + composing the queries, aggregations and filters as needed as well as + presenting the results in an easy-to-consume fashion:: + + class BlogSearch(FacetedSearch): + index = 'blogs' + doc_types = [Blog, Post] + fields = ['title^5', 'category', 'description', 'body'] + + facets = { + 'type': TermsFacet(field='_type'), + 'category': TermsFacet(field='category'), + 'weekly_posts': DateHistogramFacet(field='published_from', interval='week') + } + + def search(self): + ' Override search to add your own filters ' + s = super(BlogSearch, self).search() + return s.filter('term', published=True) + + # when using: + blog_search = BlogSearch("web framework", filters={"category": "python"}) + + # supports pagination + blog_search[10:20] + + response = blog_search.execute() + + # easy access to aggregation results: + for category, hit_count, is_selected in response.facets.category: + print( + "Category %s has %d hits%s." % ( + category, + hit_count, + ' and is chosen' if is_selected else '' + ) + ) + + """ + + index = None + doc_types = None + fields = None + facets = {} + using = "default" + + def __init__(self, query=None, filters={}, sort=()): + """ + :arg query: the text to search for + :arg filters: facet values to filter + :arg sort: sort information to be passed to :class:`~elasticsearch_dsl.Search` + """ + self._query = query + self._filters = {} + self._sort = sort + self.filter_values = {} + for name, value in iteritems(filters): + self.add_filter(name, value) + + self._s = self.build_search() + + async def count(self): + return await self._s.count() + + def __getitem__(self, k): + self._s = self._s[k] + return self + + async def __aiter__(self): + async for hit in self._s: + yield hit + + def add_filter(self, name, filter_values): + """ + Add a filter for a facet. + """ + # normalize the value into a list + if not isinstance(filter_values, (tuple, list)): + if filter_values is None: + return + filter_values = [ + filter_values, + ] + + # remember the filter values for use in FacetedResponse + self.filter_values[name] = filter_values + + # get the filter from the facet + f = self.facets[name].add_filter(filter_values) + if f is None: + return + + self._filters[name] = f + + def search(self): + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = AsyncSearch(doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + def query(self, search, query): + """ + Add query part to ``search``. + + Override this if you wish to customize the query used. + """ + if query: + if self.fields: + return search.query("multi_match", fields=self.fields, query=query) + else: + return search.query("multi_match", query=query) + return search + + def aggregate(self, search): + """ + Add aggregations representing the facets selected, including potential + filters. + """ + for f, facet in iteritems(self.facets): + agg = facet.get_aggregation() + agg_filter = MatchAll() + for field, filter in iteritems(self._filters): + if f == field: + continue + agg_filter &= filter + search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( + f, agg + ) + + def filter(self, search): + """ + Add a ``post_filter`` to the search request narrowing the results based + on the facet filters. + """ + if not self._filters: + return search + + post_filter = MatchAll() + for f in itervalues(self._filters): + post_filter &= f + return search.post_filter(post_filter) + + def highlight(self, search): + """ + Add highlighting for all the fields + """ + return search.highlight( + *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) + ) + + def sort(self, search): + """ + Add sorting information to the request. + """ + if self._sort: + search = search.sort(*self._sort) + return search + + def build_search(self): + """ + Construct the ``Search`` object. + """ + s = self.search() + s = self.query(s, self._query) + s = self.filter(s) + if self.fields: + s = self.highlight(s) + s = self.sort(s) + self.aggregate(s) + return s + + async def execute(self): + """ + Execute the search and return the response. + """ + r = await self._s.execute() + r._faceted_search = self + return r diff --git a/elasticsearch_dsl/_async/index.py b/elasticsearch_dsl/_async/index.py new file mode 100644 index 000000000..d104c146e --- /dev/null +++ b/elasticsearch_dsl/_async/index.py @@ -0,0 +1,682 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .. import analysis +from ..connections import get_connection +from ..exceptions import IllegalOperation +from ..mapping import Mapping +from ..utils import merge +from .search import AsyncSearch +from .update_by_query import AsyncUpdateByQuery +from .utils import ASYNC_IS_ASYNC + + +class AsyncIndexTemplate(object): + def __init__(self, name, template, index=None, order=None, **kwargs): + if index is None: + self._index = AsyncIndex(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name): + return getattr(self._index, attr_name) + + def to_dict(self): + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + async def save(self, using=None): + es = get_connection(using or self._index._using, is_async=ASYNC_IS_ASYNC) + return await es.indices.put_template( + name=self._template_name, body=self.to_dict() + ) + + +class AsyncIndex(object): + def __init__(self, name, using="default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + self._name = name + self._doc_types = [] + self._using = using + self._settings = {} + self._aliases = {} + self._analysis = {} + self._mapping = None + + def get_or_create_mapping(self): + if self._mapping is None: + self._mapping = Mapping() + return self._mapping + + def as_template(self, template_name, pattern=None, order=None): + # TODO: should we allow pattern to be a top-level arg? + # or maybe have an IndexPattern that allows for it and have + # Document._index be that? + return AsyncIndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def resolve_nested(self, field_path): + for doc in self._doc_types: + nested, field = doc._doc_type.mapping.resolve_nested(field_path) + if field is not None: + return nested, field + if self._mapping: + return self._mapping.resolve_nested(field_path) + return (), None + + def resolve_field(self, field_path): + for doc in self._doc_types: + field = doc._doc_type.mapping.resolve_field(field_path) + if field is not None: + return field + if self._mapping: + return self._mapping.resolve_field(field_path) + return None + + async def load_mappings(self, using=None): + await self.get_or_create_mapping().update_from_es( + self._name, using=using or self._using + ) + + def clone(self, name=None, using=None): + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = Index('base-index') + i.settings(number_of_shards=1) + i.create() + + i2 = i.clone('other-index') + i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = AsyncIndex(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + def _get_connection(self, using=None): + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return get_connection(using or self._using, is_async=ASYNC_IS_ASYNC) + + connection = property(_get_connection) + + def mapping(self, mapping): + """ + Associate a mapping (an instance of + :class:`~elasticsearch_dsl.Mapping`) with this index. + This means that, when this index is created, it will contain the + mappings for the document type defined by those mappings. + """ + self.get_or_create_mapping().update(mapping) + + def document(self, document): + """ + Associate a :class:`~elasticsearch_dsl.Document` subclass with an index. + This means that, when this index is created, it will contain the + mappings for the ``Document``. If the ``Document`` class doesn't have a + default index yet (by defining ``class Index``), this instance will be + used. Can be used as a decorator:: + + i = Index('blog') + + @i.document + class Post(Document): + title = Text() + + # create the index, including Post mappings + i.create() + + # .search() will now return a Search object that will return + # properly deserialized Post instances + s = i.search() + """ + self._doc_types.append(document) + + # If the document index does not have any name, that means the user + # did not set any index already to the document. + # So set this index as document index + if document._index._name is None: + document._index = self + + return document + + def settings(self, **kwargs): + """ + Add settings to the index:: + + i = Index('i') + i.settings(number_of_shards=1, number_of_replicas=0) + + Multiple calls to ``settings`` will merge the keys, later overriding + the earlier. + """ + self._settings.update(kwargs) + return self + + def aliases(self, **kwargs): + """ + Add aliases to the index definition:: + + i = Index('blog-v2') + i.aliases(blog={}, published={'filter': Q('term', published=True)}) + """ + self._aliases.update(kwargs) + return self + + def analyzer(self, *args, **kwargs): + """ + Explicitly add an analyzer to an index. Note that all custom analyzers + defined in mappings will also be created. This is useful for search analyzers. + + Example:: + + from elasticsearch_dsl import analyzer, tokenizer + + my_analyzer = analyzer('my_analyzer', + tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), + filter=['lowercase'] + ) + + i = Index('blog') + i.analyzer(my_analyzer) + + """ + analyzer = analysis.analyzer(*args, **kwargs) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + return + + # merge the definition + merge(self._analysis, d, True) + + def to_dict(self): + out = {} + if self._settings: + out["settings"] = self._settings + if self._aliases: + out["aliases"] = self._aliases + mappings = self._mapping.to_dict() if self._mapping else {} + analysis = self._mapping._collect_analysis() if self._mapping else {} + for d in self._doc_types: + mapping = d._doc_type.mapping + merge(mappings, mapping.to_dict(), True) + merge(analysis, mapping._collect_analysis(), True) + if mappings: + out["mappings"] = mappings + if analysis or self._analysis: + merge(analysis, self._analysis) + out.setdefault("settings", {})["analysis"] = analysis + return out + + def search(self, using=None): + """ + Return a :class:`~elasticsearch_dsl.Search` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return AsyncSearch( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def update_by_query(self, using=None): + """ + Return a :class:`~elasticsearch_dsl.UpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html + """ + return AsyncUpdateByQuery( + using=using or self._using, + index=self._name, + ) + + updateByQuery = update_by_query + + async def create(self, using=None, **kwargs): + """ + Creates the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.create`` unchanged. + """ + return await self._get_connection(using=using).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + async def is_closed(self, using=None): + state = await self._get_connection(using=using).cluster.state( + index=self._name, metric="metadata" + ) + return state["metadata"]["indices"][self._name]["state"] == "close" + + async def save(self, using=None): + """ + Sync the index definition with elasticsearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not await self.exists(using=using): + return await self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = (await self.get_settings(using=using))[self._name][ + "settings" + ]["index"] + if analysis: + if await self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + await self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + await self.put_mapping(using=using, body=mappings) + + async def analyze(self, using=None, **kwargs): + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.analyze`` unchanged. + """ + return await self._get_connection(using).indices.analyze( + index=self._name, **kwargs + ) + + async def refresh(self, using=None, **kwargs): + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.refresh`` unchanged. + """ + return await self._get_connection(using).indices.refresh( + index=self._name, **kwargs + ) + + async def flush(self, using=None, **kwargs): + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush`` unchanged. + """ + return self._get_connection(using).indices.flush(index=self._name, **kwargs) + + async def get(self, using=None, **kwargs): + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get`` unchanged. + """ + return await self._get_connection(using).indices.get(index=self._name, **kwargs) + + async def open(self, using=None, **kwargs): + """ + Opens the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.open`` unchanged. + """ + return await self._get_connection(using).indices.open( + index=self._name, **kwargs + ) + + async def close(self, using=None, **kwargs): + """ + Closes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.close`` unchanged. + """ + return await self._get_connection(using).indices.close( + index=self._name, **kwargs + ) + + async def delete(self, using=None, **kwargs): + """ + Deletes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete`` unchanged. + """ + return await self._get_connection(using).indices.delete( + index=self._name, **kwargs + ) + + async def exists(self, using=None, **kwargs): + """ + Returns ``True`` if the index already exists in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists`` unchanged. + """ + return await self._get_connection(using).indices.exists( + index=self._name, **kwargs + ) + + async def exists_type(self, using=None, **kwargs): + """ + Check if a type/types exists in the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_type`` unchanged. + """ + return await self._get_connection(using).indices.exists_type( + index=self._name, **kwargs + ) + + async def put_mapping(self, using=None, **kwargs): + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_mapping`` unchanged. + """ + return await self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) + + async def get_mapping(self, using=None, **kwargs): + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_mapping`` unchanged. + """ + return await self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) + + async def get_field_mapping(self, using=None, **kwargs): + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_field_mapping`` unchanged. + """ + return await self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) + + async def put_alias(self, using=None, **kwargs): + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_alias`` unchanged. + """ + return await self._get_connection(using).indices.put_alias( + index=self._name, **kwargs + ) + + async def exists_alias(self, using=None, **kwargs): + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_alias`` unchanged. + """ + return await self._get_connection(using).indices.exists_alias( + index=self._name, **kwargs + ) + + async def get_alias(self, using=None, **kwargs): + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_alias`` unchanged. + """ + return await self._get_connection(using).indices.get_alias( + index=self._name, **kwargs + ) + + async def delete_alias(self, using=None, **kwargs): + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete_alias`` unchanged. + """ + return await self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) + + async def get_settings(self, using=None, **kwargs): + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_settings`` unchanged. + """ + return await self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) + + async def put_settings(self, using=None, **kwargs): + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_settings`` unchanged. + """ + return await self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) + + async def stats(self, using=None, **kwargs): + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.stats`` unchanged. + """ + return await self._get_connection(using).indices.stats( + index=self._name, **kwargs + ) + + async def segments(self, using=None, **kwargs): + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.segments`` unchanged. + """ + return await self._get_connection(using).indices.segments( + index=self._name, **kwargs + ) + + async def validate_query(self, using=None, **kwargs): + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.validate_query`` unchanged. + """ + return await self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) + + async def clear_cache(self, using=None, **kwargs): + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.clear_cache`` unchanged. + """ + return await self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) + + async def recovery(self, using=None, **kwargs): + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.recovery`` unchanged. + """ + return await self._get_connection(using).indices.recovery( + index=self._name, **kwargs + ) + + async def upgrade(self, using=None, **kwargs): + """ + Upgrade the index to the latest format. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.upgrade`` unchanged. + """ + return await self._get_connection(using).indices.upgrade( + index=self._name, **kwargs + ) + + async def get_upgrade(self, using=None, **kwargs): + """ + Monitor how much of the index is upgraded. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_upgrade`` unchanged. + """ + return await self._get_connection(using).indices.get_upgrade( + index=self._name, **kwargs + ) + + async def flush_synced(self, using=None, **kwargs): + """ + Perform a normal flush, then add a generated unique marker (sync_id) to + all shards. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush_synced`` unchanged. + """ + return await self._get_connection(using).indices.flush_synced( + index=self._name, **kwargs + ) + + async def shard_stores(self, using=None, **kwargs): + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shard_stores`` unchanged. + """ + return await self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) + + async def forcemerge(self, using=None, **kwargs): + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.forcemerge`` unchanged. + """ + return await self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) + + async def shrink(self, using=None, **kwargs): + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shrink`` unchanged. + """ + return await self._get_connection(using).indices.shrink( + index=self._name, **kwargs + ) diff --git a/elasticsearch_dsl/_async/search.py b/elasticsearch_dsl/_async/search.py new file mode 100644 index 000000000..477457d42 --- /dev/null +++ b/elasticsearch_dsl/_async/search.py @@ -0,0 +1,549 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import copy + +from elasticsearch.exceptions import TransportError +from elasticsearch.helpers import async_scan +from six import iteritems, string_types + +from .._base import AggsProxy, ProxyDescriptor, QueryProxy, Request +from ..aggs import A +from ..connections import get_connection +from ..exceptions import IllegalOperation +from ..query import Bool, Q +from ..response import Response +from ..utils import AttrDict, recursive_to_dict +from .utils import ASYNC_IS_ASYNC + + +class AsyncSearch(Request): + query = ProxyDescriptor("query") + post_filter = ProxyDescriptor("post_filter") + + def __init__(self, **kwargs): + """ + Search request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + """ + super(AsyncSearch, self).__init__(**kwargs) + + self.aggs = AggsProxy(self) + self._sort = [] + self._source = None + self._highlight = {} + self._highlight_opts = {} + self._suggest = {} + self._script_fields = {} + self._response_class = Response + + self._query_proxy = QueryProxy(self, "query") + self._post_filter_proxy = QueryProxy(self, "post_filter") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + async def __aiter__(self): + """ + Iterate over the hits. + """ + hits = await self.execute() + for hit in hits: + yield hit + + def __getitem__(self, n): + """ + Support slicing the `Search` instance for pagination. + + Slicing equates to the from/size parameters. E.g.:: + + s = Search().query(...)[0:25] + + is equivalent to:: + + s = Search().query(...).extra(from_=0, size=25) + + """ + s = self._clone() + + if isinstance(n, slice): + # If negative slicing, abort. + if n.start and n.start < 0 or n.stop and n.stop < 0: + raise ValueError("Search does not support negative slicing.") + # Elasticsearch won't get all results so we default to size: 10 if + # stop not given. + s._extra["from"] = n.start or 0 + s._extra["size"] = max( + 0, n.stop - (n.start or 0) if n.stop is not None else 10 + ) + return s + else: # This is an index lookup, equivalent to slicing by [n:n+1]. + # If negative index, abort. + if n < 0: + raise ValueError("Search does not support negative indexing.") + s._extra["from"] = n + s._extra["size"] = 1 + return s + + @classmethod + def from_dict(cls, d): + """ + Construct a new `Search` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + s = Search.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "aggs": {...} + }) + s = s.filter('term', published=True) + """ + s = cls() + s.update_from_dict(d) + return s + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + s = super(AsyncSearch, self)._clone() + + s._response_class = self._response_class + s._sort = self._sort[:] + s._source = copy.copy(self._source) if self._source is not None else None + s._highlight = self._highlight.copy() + s._highlight_opts = self._highlight_opts.copy() + s._suggest = self._suggest.copy() + s._script_fields = self._script_fields.copy() + for x in ("query", "post_filter"): + getattr(s, x)._proxied = getattr(self, x)._proxied + + # copy top-level bucket definitions + if self.aggs._params.get("aggs"): + s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} + return s + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + s = self._clone() + s._response_class = cls + return s + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "post_filter" in d: + self.post_filter._proxied = Q(d.pop("post_filter")) + + aggs = d.pop("aggs", d.pop("aggregations", {})) + if aggs: + self.aggs._params = { + "aggs": {name: A(value) for (name, value) in iteritems(aggs)} + } + if "sort" in d: + self._sort = d.pop("sort") + if "_source" in d: + self._source = d.pop("_source") + if "highlight" in d: + high = d.pop("highlight").copy() + self._highlight = high.pop("fields") + self._highlight_opts = high + if "suggest" in d: + self._suggest = d.pop("suggest") + if "text" in self._suggest: + text = self._suggest.pop("text") + for s in self._suggest.values(): + s.setdefault("text", text) + if "script_fields" in d: + self._script_fields = d.pop("script_fields") + self._extra.update(d) + return self + + def script_fields(self, **kwargs): + """ + Define script fields to be calculated on hits. See + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html + for more details. + + Example:: + + s = Search() + s = s.script_fields(times_two="doc['field'].value * 2") + s = s.script_fields( + times_three={ + 'script': { + 'lang': 'painless', + 'source': "doc['field'].value * params.n", + 'params': {'n': 3} + } + } + ) + + """ + s = self._clone() + for name in kwargs: + if isinstance(kwargs[name], string_types): + kwargs[name] = {"script": kwargs[name]} + s._script_fields.update(kwargs) + return s + + def source(self, fields=None, **kwargs): + """ + Selectively control how the _source field is returned. + + :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes + + If ``fields`` is None, the entire document will be returned for + each hit. If fields is a dictionary with keys of 'includes' and/or + 'excludes' the fields will be either included or excluded appropriately. + + Calling this multiple times with the same named parameter will override the + previous values with the new ones. + + Example:: + + s = Search() + s = s.source(includes=['obj1.*'], excludes=["*.description"]) + + s = Search() + s = s.source(includes=['obj1.*']).source(excludes=["*.description"]) + + """ + s = self._clone() + + if fields and kwargs: + raise ValueError("You cannot specify fields and kwargs at the same time.") + + if fields is not None: + s._source = fields + return s + + if kwargs and not isinstance(s._source, dict): + s._source = {} + + for key, value in kwargs.items(): + if value is None: + try: + del s._source[key] + except KeyError: + pass + else: + s._source[key] = value + + return s + + def sort(self, *keys): + """ + Add sorting information to the search request. If called without + arguments it will remove all sort requirements. Otherwise it will + replace them. Acceptable arguments are:: + + 'some.field' + '-some.other.field' + {'different.field': {'any': 'dict'}} + + so for example:: + + s = Search().sort( + 'category', + '-title', + {"price" : {"order" : "asc", "mode" : "avg"}} + ) + + will sort by ``category``, ``title`` (in descending order) and + ``price`` in ascending order using the ``avg`` mode. + + The API returns a copy of the Search object and can thus be chained. + """ + s = self._clone() + s._sort = [] + for k in keys: + if isinstance(k, string_types) and k.startswith("-"): + if k[1:] == "_score": + raise IllegalOperation("Sorting by `-_score` is not allowed.") + k = {k[1:]: {"order": "desc"}} + s._sort.append(k) + return s + + def highlight_options(self, **kwargs): + """ + Update the global highlighting options used for this request. For + example:: + + s = Search() + s = s.highlight_options(order='score') + """ + s = self._clone() + s._highlight_opts.update(kwargs) + return s + + def highlight(self, *fields, **kwargs): + """ + Request highlighting of some fields. All keyword arguments passed in will be + used as parameters for all the fields in the ``fields`` parameter. Example:: + + Search().highlight('title', 'body', fragment_size=50) + + will produce the equivalent of:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 50}, + "title": {"fragment_size": 50} + } + } + } + + If you want to have different options for different fields + you can call ``highlight`` twice:: + + Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100) + + which will produce:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 100}, + "title": {"fragment_size": 50} + } + } + } + + """ + s = self._clone() + for f in fields: + s._highlight[f] = kwargs + return s + + def suggest(self, name, text, **kwargs): + """ + Add a suggestions request to the search. + + :arg name: name of the suggestion + :arg text: text to suggest on + + All keyword arguments will be added to the suggestions body. For example:: + + s = Search() + s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'}) + """ + s = self._clone() + s._suggest[name] = {"text": text} + s._suggest[name].update(kwargs) + return s + + def to_dict(self, count=False, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request's body. + + :arg count: a flag to specify if we are interested in a body for count - + no aggregations, no pagination bounds etc. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + + if self.query: + d["query"] = self.query.to_dict() + + # count request doesn't care for sorting and other things + if not count: + if self.post_filter: + d["post_filter"] = self.post_filter.to_dict() + + if self.aggs.aggs: + d.update(self.aggs.to_dict()) + + if self._sort: + d["sort"] = self._sort + + d.update(recursive_to_dict(self._extra)) + + if self._source not in (None, {}): + d["_source"] = self._source + + if self._highlight: + d["highlight"] = {"fields": self._highlight} + d["highlight"].update(self._highlight_opts) + + if self._suggest: + d["suggest"] = self._suggest + + if self._script_fields: + d["script_fields"] = self._script_fields + + d.update(recursive_to_dict(kwargs)) + return d + + async def count(self): + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": + return self._response.hits.total.value + + es = get_connection(self._using, is_async=ASYNC_IS_ASYNC) + + d = self.to_dict(count=True) + # TODO: failed shards detection + return await es.count(index=self._index, body=d, **self._params)["count"] + + async def execute(self, ignore_cache=False): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + ES, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using, is_async=ASYNC_IS_ASYNC) + + self._response = self._response_class( + self, + await es.search(index=self._index, body=self.to_dict(), **self._params), + ) + return self._response + + async def scan(self): + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``scan`` helper from ``elasticsearch-py`` - + https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan + + """ + es = get_connection(self._using, is_async=ASYNC_IS_ASYNC) + + async for hit in async_scan( + es, query=self.to_dict(), index=self._index, **self._params + ): + yield self._get_result(hit) + + async def delete(self): + """ + delete() executes the query by delegating to delete_by_query() + """ + + es = get_connection(self._using, is_async=ASYNC_IS_ASYNC) + + return AttrDict( + await es.delete_by_query( + index=self._index, body=self.to_dict(), **self._params + ) + ) + + +class AsyncMultiSearch(Request): + """ + Combine multiple :class:`~elasticsearch_dsl.Search` objects into a single + request. + """ + + def __init__(self, **kwargs): + super(AsyncMultiSearch, self).__init__(**kwargs) + self._searches = [] + + def __getitem__(self, key): + return self._searches[key] + + def __iter__(self): + return iter(self._searches) + + def _clone(self): + ms = super(AsyncMultiSearch, self)._clone() + ms._searches = self._searches[:] + return ms + + def add(self, search): + """ + Adds a new :class:`~elasticsearch_dsl.Search` object to the request:: + + ms = MultiSearch(index='my-index') + ms = ms.add(Search(doc_type=Category).filter('term', category='python')) + ms = ms.add(Search(doc_type=Blog)) + """ + ms = self._clone() + ms._searches.append(search) + return ms + + def to_dict(self): + out = [] + for s in self._searches: + meta = {} + if s._index: + meta["index"] = s._index + meta.update(s._params) + + out.append(meta) + out.append(s.to_dict()) + + return out + + async def execute(self, ignore_cache=False, raise_on_error=True): + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using, is_async=ASYNC_IS_ASYNC) + + responses = await es.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise TransportError("N/A", r["error"]["type"], r["error"]) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response diff --git a/elasticsearch_dsl/_async/update_by_query.py b/elasticsearch_dsl/_async/update_by_query.py new file mode 100644 index 000000000..31447af9a --- /dev/null +++ b/elasticsearch_dsl/_async/update_by_query.py @@ -0,0 +1,163 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .._base import ProxyDescriptor, QueryProxy, Request +from ..connections import get_connection +from ..query import Bool, Q +from ..response import UpdateByQueryResponse +from ..utils import recursive_to_dict +from .utils import ASYNC_IS_ASYNC + + +class AsyncUpdateByQuery(Request): + + query = ProxyDescriptor("query") + + def __init__(self, **kwargs): + """ + Update by query request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overriden by methods (`using`, `index` and `doc_type` respectively). + + """ + super(AsyncUpdateByQuery, self).__init__(**kwargs) + self._response_class = UpdateByQueryResponse + self._script = {} + self._query_proxy = QueryProxy(self, "query") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + @classmethod + def from_dict(cls, d): + """ + Construct a new `UpdateByQuery` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + ubq = UpdateByQuery.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "script": {...} + }) + ubq = ubq.filter('term', published=True) + """ + u = cls() + u.update_from_dict(d) + return u + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + ubq = super(AsyncUpdateByQuery, self)._clone() + + ubq._response_class = self._response_class + ubq._script = self._script.copy() + ubq.query._proxied = self.query._proxied + return ubq + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + ubq = self._clone() + ubq._response_class = cls + return ubq + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "script" in d: + self._script = d.pop("script") + self._extra.update(d) + return self + + def script(self, **kwargs): + """ + Define update action to take: + https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-using.html + for more details. + + Note: the API only accepts a single script, so + calling the script multiple times will overwrite. + + Example:: + + ubq = Search() + ubq = ubq.script(source="ctx._source.likes++"") + ubq = ubq.script(source="ctx._source.likes += params.f"", + lang="expression", + params={'f': 3}) + """ + ubq = self._clone() + if ubq._script: + ubq._script = {} + ubq._script.update(kwargs) + return ubq + + def to_dict(self, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request'ubq body. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + if self.query: + d["query"] = self.query.to_dict() + + if self._script: + d["script"] = self._script + + d.update(recursive_to_dict(self._extra)) + d.update(recursive_to_dict(kwargs)) + return d + + async def execute(self): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + es = get_connection(self._using, is_async=ASYNC_IS_ASYNC) + + self._response = self._response_class( + self, + await es.update_by_query( + index=self._index, body=self.to_dict(), **self._params + ), + ) + return self._response diff --git a/elasticsearch_dsl/_async/utils.py b/elasticsearch_dsl/_async/utils.py new file mode 100644 index 000000000..0776939fe --- /dev/null +++ b/elasticsearch_dsl/_async/utils.py @@ -0,0 +1,19 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ASYNC_IS_ASYNC = True +SYNC_IS_ASYNC = False diff --git a/elasticsearch_dsl/_base/__init__.py b/elasticsearch_dsl/_base/__init__.py new file mode 100644 index 000000000..180fb3298 --- /dev/null +++ b/elasticsearch_dsl/_base/__init__.py @@ -0,0 +1,28 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .faceted_search import FacetedResponse +from .search import AggsProxy, ProxyDescriptor, QueryProxy, Request, Response + +__all__ = [ + "FacetedResponse", + "AggsProxy", + "ProxyDescriptor", + "QueryProxy", + "Request", + "Response", +] diff --git a/elasticsearch_dsl/_base/faceted_search.py b/elasticsearch_dsl/_base/faceted_search.py new file mode 100644 index 000000000..4c789a8c7 --- /dev/null +++ b/elasticsearch_dsl/_base/faceted_search.py @@ -0,0 +1,38 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from six import iteritems + +from ..response import Response +from ..utils import AttrDict + + +class FacetedResponse(Response): + @property + def query_string(self): + return self._faceted_search._query + + @property + def facets(self): + if not hasattr(self, "_facets"): + super(AttrDict, self).__setattr__("_facets", AttrDict({})) + for name, facet in iteritems(self._faceted_search.facets): + self._facets[name] = facet.get_values( + getattr(getattr(self.aggregations, "_filter_" + name), name), + self._faceted_search.filter_values.get(name, ()), + ) + return self._facets diff --git a/elasticsearch_dsl/_base/search.py b/elasticsearch_dsl/_base/search.py new file mode 100644 index 000000000..0c924484f --- /dev/null +++ b/elasticsearch_dsl/_base/search.py @@ -0,0 +1,298 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from six import string_types + +from ..aggs import AggBase +from ..query import Q +from ..response import Hit, Response +from ..utils import DslBase + + +class QueryProxy(object): + """ + Simple proxy around DSL objects (queries) that can be called + (to add query/post_filter) and also allows attribute access which is proxied to + the wrapped query. + """ + + def __init__(self, search, attr_name): + self._search = search + self._proxied = None + self._attr_name = attr_name + + def __nonzero__(self): + return self._proxied is not None + + __bool__ = __nonzero__ + + def __call__(self, *args, **kwargs): + s = self._search._clone() + + # we cannot use self._proxied since we just cloned self._search and + # need to access the new self on the clone + proxied = getattr(s, self._attr_name) + if proxied._proxied is None: + proxied._proxied = Q(*args, **kwargs) + else: + proxied._proxied &= Q(*args, **kwargs) + + # always return search to be chainable + return s + + def __getattr__(self, attr_name): + return getattr(self._proxied, attr_name) + + def __setattr__(self, attr_name, value): + if not attr_name.startswith("_"): + self._proxied = Q(self._proxied.to_dict()) + setattr(self._proxied, attr_name, value) + super(QueryProxy, self).__setattr__(attr_name, value) + + def __getstate__(self): + return self._search, self._proxied, self._attr_name + + def __setstate__(self, state): + self._search, self._proxied, self._attr_name = state + + +class ProxyDescriptor(object): + """ + Simple descriptor to enable setting of queries and filters as: + + s = Search() + s.query = Q(...) + + """ + + def __init__(self, name): + self._attr_name = "_%s_proxy" % name + + def __get__(self, instance, owner): + return getattr(instance, self._attr_name) + + def __set__(self, instance, value): + proxy = getattr(instance, self._attr_name) + proxy._proxied = Q(value) + + +class AggsProxy(AggBase, DslBase): + name = "aggs" + + def __init__(self, search): + self._base = self + self._search = search + self._params = {"aggs": {}} + + def to_dict(self): + return super(AggsProxy, self).to_dict().get("aggs", {}) + + +class Request(object): + def __init__(self, using="default", index=None, doc_type=None, extra=None): + self._using = using + + self._index = None + if isinstance(index, (tuple, list)): + self._index = list(index) + elif index: + self._index = [index] + + self._doc_type = [] + self._doc_type_map = {} + if isinstance(doc_type, (tuple, list)): + self._doc_type.extend(doc_type) + elif isinstance(doc_type, collections_abc.Mapping): + self._doc_type.extend(doc_type.keys()) + self._doc_type_map.update(doc_type) + elif doc_type: + self._doc_type.append(doc_type) + + self._params = {} + self._extra = extra or {} + + def __eq__(self, other): + return ( + isinstance(other, Request) + and other._params == self._params + and other._index == self._index + and other._doc_type == self._doc_type + and other.to_dict() == self.to_dict() + ) + + def __copy__(self): + return self._clone() + + def params(self, **kwargs): + """ + Specify query params to be used when executing the search. All the + keyword arguments will override the current values. See + https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.search + for all available parameters. + + Example:: + + s = Search() + s = s.params(routing='user-1', preference='local') + """ + s = self._clone() + s._params.update(kwargs) + return s + + def index(self, *index): + """ + Set the index for the search. If called empty it will remove all information. + + Example: + + s = Search() + s = s.index('twitter-2015.01.01', 'twitter-2015.01.02') + s = s.index(['twitter-2015.01.01', 'twitter-2015.01.02']) + """ + # .index() resets + s = self._clone() + if not index: + s._index = None + else: + indexes = [] + for i in index: + if isinstance(i, string_types): + indexes.append(i) + elif isinstance(i, list): + indexes += i + elif isinstance(i, tuple): + indexes += list(i) + + s._index = (self._index or []) + indexes + + return s + + def _resolve_field(self, path): + for dt in self._doc_type: + if not hasattr(dt, "_index"): + continue + field = dt._index.resolve_field(path) + if field is not None: + return field + + def _resolve_nested(self, hit, parent_class=None): + doc_class = Hit + + nested_path = [] + nesting = hit["_nested"] + while nesting and "field" in nesting: + nested_path.append(nesting["field"]) + nesting = nesting.get("_nested") + nested_path = ".".join(nested_path) + + if hasattr(parent_class, "_index"): + nested_field = parent_class._index.resolve_field(nested_path) + else: + nested_field = self._resolve_field(nested_path) + + if nested_field is not None: + return nested_field._doc_class + + return doc_class + + def _get_result(self, hit, parent_class=None): + doc_class = Hit + dt = hit.get("_type") + + if "_nested" in hit: + doc_class = self._resolve_nested(hit, parent_class) + + elif dt in self._doc_type_map: + doc_class = self._doc_type_map[dt] + + else: + for doc_type in self._doc_type: + if hasattr(doc_type, "_matches") and doc_type._matches(hit): + doc_class = doc_type + break + + for t in hit.get("inner_hits", ()): + hit["inner_hits"][t] = Response( + self, hit["inner_hits"][t], doc_class=doc_class + ) + + callback = getattr(doc_class, "from_es", doc_class) + return callback(hit) + + def doc_type(self, *doc_type, **kwargs): + """ + Set the type to search through. You can supply a single value or + multiple. Values can be strings or subclasses of ``Document``. + + You can also pass in any keyword arguments, mapping a doc_type to a + callback that should be used instead of the Hit class. + + If no doc_type is supplied any information stored on the instance will + be erased. + + Example: + + s = Search().doc_type('product', 'store', User, custom=my_callback) + """ + # .doc_type() resets + s = self._clone() + if not doc_type and not kwargs: + s._doc_type = [] + s._doc_type_map = {} + else: + s._doc_type.extend(doc_type) + s._doc_type.extend(kwargs.keys()) + s._doc_type_map.update(kwargs) + return s + + def using(self, client): + """ + Associate the search request with an elasticsearch client. A fresh copy + will be returned with current instance remaining unchanged. + + :arg client: an instance of ``elasticsearch.Elasticsearch`` to use or + an alias to look up in ``elasticsearch_dsl.connections`` + + """ + s = self._clone() + s._using = client + return s + + def extra(self, **kwargs): + """ + Add extra keys to the request body. Mostly here for backwards + compatibility. + """ + s = self._clone() + if "from_" in kwargs: + kwargs["from"] = kwargs.pop("from_") + s._extra.update(kwargs) + return s + + def _clone(self): + s = self.__class__( + using=self._using, index=self._index, doc_type=self._doc_type + ) + s._doc_type_map = self._doc_type_map.copy() + s._extra = self._extra.copy() + s._params = self._params.copy() + return s diff --git a/elasticsearch_dsl/_sync/__init__.py b/elasticsearch_dsl/_sync/__init__.py new file mode 100644 index 000000000..810250ce8 --- /dev/null +++ b/elasticsearch_dsl/_sync/__init__.py @@ -0,0 +1,36 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + from .document import Document, IndexMeta + from .faceted_search import FacetedSearch + from .index import Index, IndexTemplate + from .search import MultiSearch, Search + from .update_by_query import UpdateByQuery + + __all__ = [ + "Document", + "IndexMeta", + "FacetedSearch", + "Index", + "IndexTemplate", + "Search", + "MultiSearch", + "UpdateByQuery", + ] +except (ImportError, SyntaxError): + pass diff --git a/elasticsearch_dsl/_sync/document.py b/elasticsearch_dsl/_sync/document.py new file mode 100644 index 000000000..18fb01bbe --- /dev/null +++ b/elasticsearch_dsl/_sync/document.py @@ -0,0 +1,428 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from fnmatch import fnmatch + +from elasticsearch.exceptions import NotFoundError, RequestError +from six import add_metaclass + +from ..connections import get_connection +from ..document import DocumentMeta +from ..exceptions import IllegalOperation, ValidationException +from ..utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge +from .search import Search +from .utils import SYNC_IS_ASYNC + + +class IndexMeta(DocumentMeta): + # global flag to guard us from associating an Index with the base Document + # class, only user defined subclasses should have an _index attr + _document_initialized = False + + def __new__(cls, name, bases, attrs): + new_cls = super(IndexMeta, cls).__new__(cls, name, bases, attrs) + if cls._document_initialized: + # This 'Index' is different than an 'AsyncIndex' instance + index_opts = attrs.pop("Index", None) + + index = cls.construct_index(index_opts, bases) + new_cls._index = index + index.document(new_cls) + cls._document_initialized = True + return new_cls + + @classmethod + def construct_index(cls, opts, bases): + from .index import Index + + if opts is None: + for b in bases: + if hasattr(b, "_index"): + return b._index + + # Set None as Index name so it will set _all while making the query + return Index(name=None) + + i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default")) + i.settings(**getattr(opts, "settings", {})) + i.aliases(**getattr(opts, "aliases", {})) + for a in getattr(opts, "analyzers", ()): + i.analyzer(a) + return i + + +@add_metaclass(IndexMeta) +class Document(ObjectBase): + """ + Model-like class for persisting documents in elasticsearch. + """ + + @classmethod + def _matches(cls, hit): + if cls._index._name is None: + return True + return fnmatch(hit.get("_index", ""), cls._index._name) + + @classmethod + def _get_using(cls, using=None): + return using or cls._index._using + + @classmethod + def _get_connection(cls, using=None): + return get_connection(cls._get_using(using), is_async=SYNC_IS_ASYNC) + + @classmethod + def _default_index(cls, index=None): + return index or cls._index._name + + @classmethod + def init(cls, index=None, using=None): + """ + Create the index and populate the mappings in elasticsearch. + """ + i = cls._index + if index: + i = i.clone(name=index) + i.save(using=using) + + def _get_index(self, index=None, required=True): + if index is None: + index = getattr(self.meta, "index", None) + if index is None: + index = getattr(self._index, "_name", None) + if index is None and required: + raise ValidationException("No index") + if index and "*" in index: + raise ValidationException("You cannot write to a wildcard index.") + return index + + def __repr__(self): + return "{}({})".format( + self.__class__.__name__, + ", ".join( + "{}={!r}".format(key, getattr(self.meta, key)) + for key in ("index", "id") + if key in self.meta + ), + ) + + @classmethod + def search(cls, using=None, index=None): + """ + Create an :class:`~elasticsearch_dsl.Search` instance that will search + over this ``Document``. + """ + return Search( + using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] + ) + + @classmethod + def get(cls, id, using=None, index=None, **kwargs): + """ + Retrieve a single document from elasticsearch using its ``id``. + + :arg id: ``id`` of the document to be retrieved + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.get`` unchanged. + """ + es = cls._get_connection(using) + doc = es.get(index=cls._default_index(index), id=id, **kwargs) + if not doc.get("found", False): + return None + return cls.from_es(doc) + + @classmethod + def mget( + cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs + ): + r""" + Retrieve multiple document by their ``id``\s. Returns a list of instances + in the same order as requested. + + :arg docs: list of ``id``\s of the documents to be retrieved or a list + of document specifications as per + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg missing: what to do when one of the documents requested is not + found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise + ``NotFoundError``) or ``'skip'`` (ignore the missing document). + + Any additional keyword arguments will be passed to + ``Elasticsearch.mget`` unchanged. + """ + if missing not in ("raise", "skip", "none"): + raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") + es = cls._get_connection(using) + body = { + "docs": [ + doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc} + for doc in docs + ] + } + results = es.mget(body, index=cls._default_index(index), **kwargs) + + objs, error_docs, missing_docs = [], [], [] + for doc in results["docs"]: + if doc.get("found"): + if error_docs or missing_docs: + # We're going to raise an exception anyway, so avoid an + # expensive call to cls.from_es(). + continue + + objs.append(cls.from_es(doc)) + + elif doc.get("error"): + if raise_on_error: + error_docs.append(doc) + if missing == "none": + objs.append(None) + + # The doc didn't cause an error, but the doc also wasn't found. + elif missing == "raise": + missing_docs.append(doc) + elif missing == "none": + objs.append(None) + + if error_docs: + error_ids = [doc["_id"] for doc in error_docs] + message = "Required routing not provided for documents %s." + message %= ", ".join(error_ids) + raise RequestError(400, message, error_docs) + if missing_docs: + missing_ids = [doc["_id"] for doc in missing_docs] + message = "Documents %s not found." % ", ".join(missing_ids) + raise NotFoundError(404, message, {"docs": missing_docs}) + return objs + + def delete(self, using=None, index=None, **kwargs): + """ + Delete the instance in elasticsearch. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + + Any additional keyword arguments will be passed to + ``Elasticsearch.delete`` unchanged. + """ + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + es.delete(index=self._get_index(index), **doc_meta) + + def to_dict(self, include_meta=False, skip_empty=True): + """ + Serialize the instance into a dictionary so that it can be saved in elasticsearch. + + :arg include_meta: if set to ``True`` will include all the metadata + (``_index``, ``_id`` etc). Otherwise just the document's + data is serialized. This is useful when passing multiple instances into + ``elasticsearch.helpers.bulk``. + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + """ + d = super(Document, self).to_dict(skip_empty=skip_empty) + if not include_meta: + return d + + meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # in case of to_dict include the index unlike save/update/delete + index = self._get_index(required=False) + if index is not None: + meta["_index"] = index + + meta["_source"] = d + return meta + + def update( + self, + using=None, + index=None, + detect_noop=True, + doc_as_upsert=False, + refresh=False, + retry_on_conflict=None, + script=None, + script_id=None, + scripted_upsert=False, + upsert=None, + return_doc_meta=False, + **fields + ): + """ + Partial update of the document, specify fields you wish to update and + both the instance and the document in elasticsearch will be updated:: + + doc = MyDocument(title='Document Title!') + doc.save() + doc.update(title='New Document Title!') + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg detect_noop: Set to ``False`` to disable noop detection. + :arg refresh: Control when the changes made by this request are visible + to search. Set to ``True`` for immediate effect. + :arg retry_on_conflict: In between the get and indexing phases of the + update, it is possible that another process might have already + updated the same document. By default, the update will fail with a + version conflict exception. The retry_on_conflict parameter + controls how many times to retry the update before finally throwing + an exception. + :arg doc_as_upsert: Instead of sending a partial doc plus an upsert + doc, setting doc_as_upsert to true will use the contents of doc as + the upsert value + :arg return_doc_meta: set to ``True`` to return all metadata from the + index API call instead of only the operation result + + :return operation result noop/updated + """ + body = { + "doc_as_upsert": doc_as_upsert, + "detect_noop": detect_noop, + } + + # scripted update + if script or script_id: + if upsert is not None: + body["upsert"] = upsert + + if script: + script = {"source": script} + else: + script = {"id": script_id} + + script["params"] = fields + + body["script"] = script + body["scripted_upsert"] = scripted_upsert + + # partial document update + else: + if not fields: + raise IllegalOperation( + "You cannot call update() without updating individual fields or a script. " + "If you wish to update the entire object use save()." + ) + + # update given fields locally + merge(self, fields) + + # prepare data for ES + values = self.to_dict() + + # if fields were given: partial update + body["doc"] = {k: values.get(k) for k in fields.keys()} + + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + if retry_on_conflict is not None: + doc_meta["retry_on_conflict"] = retry_on_conflict + + # Optimistic concurrency control + if ( + retry_on_conflict in (None, 0) + and "seq_no" in self.meta + and "primary_term" in self.meta + ): + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + meta = self._get_connection(using).update( + index=self._get_index(index), body=body, refresh=refresh, **doc_meta + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] + + def save( + self, + using=None, + index=None, + validate=True, + skip_empty=True, + return_doc_meta=False, + **kwargs + ): + """ + Save the document into elasticsearch. If the document doesn't exist it + is created, it is overwritten otherwise. Returns ``True`` if this + operations resulted in new document being created. + + :arg index: elasticsearch index to use, if the ``Document`` is + associated with an index this can be omitted. + :arg using: connection alias to use, defaults to ``'default'`` + :arg validate: set to ``False`` to skip validating the document + :arg skip_empty: if set to ``False`` will cause empty values (``None``, + ``[]``, ``{}``) to be left on the document. Those values will be + stripped out otherwise as they make no difference in elasticsearch. + :arg return_doc_meta: set to ``True`` to return all metadata from the + update API call instead of only the operation result + + Any additional keyword arguments will be passed to + ``Elasticsearch.index`` unchanged. + + :return operation result created/updated + """ + if validate: + self.full_clean() + + es = self._get_connection(using) + # extract routing etc from meta + doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} + + # Optimistic concurrency control + if "seq_no" in self.meta and "primary_term" in self.meta: + doc_meta["if_seq_no"] = self.meta["seq_no"] + doc_meta["if_primary_term"] = self.meta["primary_term"] + + doc_meta.update(kwargs) + meta = es.index( + index=self._get_index(index), + body=self.to_dict(skip_empty=skip_empty), + **doc_meta + ) + # update meta information from ES + for k in META_FIELDS: + if "_" + k in meta: + setattr(self.meta, k, meta["_" + k]) + + return meta if return_doc_meta else meta["result"] diff --git a/elasticsearch_dsl/_sync/faceted_search.py b/elasticsearch_dsl/_sync/faceted_search.py new file mode 100644 index 000000000..af4ff9fdd --- /dev/null +++ b/elasticsearch_dsl/_sync/faceted_search.py @@ -0,0 +1,208 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from six import iteritems, itervalues + +from .._base import FacetedResponse +from ..query import MatchAll +from .search import Search + + +class FacetedSearch(object): + """ + Abstraction for creating faceted navigation searches that takes care of + composing the queries, aggregations and filters as needed as well as + presenting the results in an easy-to-consume fashion:: + + class BlogSearch(FacetedSearch): + index = 'blogs' + doc_types = [Blog, Post] + fields = ['title^5', 'category', 'description', 'body'] + + facets = { + 'type': TermsFacet(field='_type'), + 'category': TermsFacet(field='category'), + 'weekly_posts': DateHistogramFacet(field='published_from', interval='week') + } + + def search(self): + ' Override search to add your own filters ' + s = super(BlogSearch, self).search() + return s.filter('term', published=True) + + # when using: + blog_search = BlogSearch("web framework", filters={"category": "python"}) + + # supports pagination + blog_search[10:20] + + response = blog_search.execute() + + # easy access to aggregation results: + for category, hit_count, is_selected in response.facets.category: + print( + "Category %s has %d hits%s." % ( + category, + hit_count, + ' and is chosen' if is_selected else '' + ) + ) + + """ + + index = None + doc_types = None + fields = None + facets = {} + using = "default" + + def __init__(self, query=None, filters={}, sort=()): + """ + :arg query: the text to search for + :arg filters: facet values to filter + :arg sort: sort information to be passed to :class:`~elasticsearch_dsl.Search` + """ + self._query = query + self._filters = {} + self._sort = sort + self.filter_values = {} + for name, value in iteritems(filters): + self.add_filter(name, value) + + self._s = self.build_search() + + def count(self): + return self._s.count() + + def __getitem__(self, k): + self._s = self._s[k] + return self + + def __iter__(self): + for hit in self._s: + yield hit + + def add_filter(self, name, filter_values): + """ + Add a filter for a facet. + """ + # normalize the value into a list + if not isinstance(filter_values, (tuple, list)): + if filter_values is None: + return + filter_values = [ + filter_values, + ] + + # remember the filter values for use in FacetedResponse + self.filter_values[name] = filter_values + + # get the filter from the facet + f = self.facets[name].add_filter(filter_values) + if f is None: + return + + self._filters[name] = f + + def search(self): + """ + Returns the base Search object to which the facets are added. + + You can customize the query by overriding this method and returning a + modified search object. + """ + s = Search(doc_type=self.doc_types, index=self.index, using=self.using) + return s.response_class(FacetedResponse) + + def query(self, search, query): + """ + Add query part to ``search``. + + Override this if you wish to customize the query used. + """ + if query: + if self.fields: + return search.query("multi_match", fields=self.fields, query=query) + else: + return search.query("multi_match", query=query) + return search + + def aggregate(self, search): + """ + Add aggregations representing the facets selected, including potential + filters. + """ + for f, facet in iteritems(self.facets): + agg = facet.get_aggregation() + agg_filter = MatchAll() + for field, filter in iteritems(self._filters): + if f == field: + continue + agg_filter &= filter + search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( + f, agg + ) + + def filter(self, search): + """ + Add a ``post_filter`` to the search request narrowing the results based + on the facet filters. + """ + if not self._filters: + return search + + post_filter = MatchAll() + for f in itervalues(self._filters): + post_filter &= f + return search.post_filter(post_filter) + + def highlight(self, search): + """ + Add highlighting for all the fields + """ + return search.highlight( + *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) + ) + + def sort(self, search): + """ + Add sorting information to the request. + """ + if self._sort: + search = search.sort(*self._sort) + return search + + def build_search(self): + """ + Construct the ``Search`` object. + """ + s = self.search() + s = self.query(s, self._query) + s = self.filter(s) + if self.fields: + s = self.highlight(s) + s = self.sort(s) + self.aggregate(s) + return s + + def execute(self): + """ + Execute the search and return the response. + """ + r = self._s.execute() + r._faceted_search = self + return r diff --git a/elasticsearch_dsl/_sync/index.py b/elasticsearch_dsl/_sync/index.py new file mode 100644 index 000000000..f9b812918 --- /dev/null +++ b/elasticsearch_dsl/_sync/index.py @@ -0,0 +1,654 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .. import analysis +from ..connections import get_connection +from ..exceptions import IllegalOperation +from ..mapping import Mapping +from ..utils import merge +from .search import Search +from .update_by_query import UpdateByQuery +from .utils import SYNC_IS_ASYNC + + +class IndexTemplate(object): + def __init__(self, name, template, index=None, order=None, **kwargs): + if index is None: + self._index = Index(template, **kwargs) + else: + if kwargs: + raise ValueError( + "You cannot specify options for Index when" + " passing an Index instance." + ) + self._index = index.clone() + self._index._name = template + self._template_name = name + self.order = order + + def __getattr__(self, attr_name): + return getattr(self._index, attr_name) + + def to_dict(self): + d = self._index.to_dict() + d["index_patterns"] = [self._index._name] + if self.order is not None: + d["order"] = self.order + return d + + def save(self, using=None): + es = get_connection(using or self._index._using, is_async=SYNC_IS_ASYNC) + return es.indices.put_template(name=self._template_name, body=self.to_dict()) + + +class Index(object): + def __init__(self, name, using="default"): + """ + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + self._name = name + self._doc_types = [] + self._using = using + self._settings = {} + self._aliases = {} + self._analysis = {} + self._mapping = None + + def get_or_create_mapping(self): + if self._mapping is None: + self._mapping = Mapping() + return self._mapping + + def as_template(self, template_name, pattern=None, order=None): + # TODO: should we allow pattern to be a top-level arg? + # or maybe have an IndexPattern that allows for it and have + # Document._index be that? + return IndexTemplate( + template_name, pattern or self._name, index=self, order=order + ) + + def resolve_nested(self, field_path): + for doc in self._doc_types: + nested, field = doc._doc_type.mapping.resolve_nested(field_path) + if field is not None: + return nested, field + if self._mapping: + return self._mapping.resolve_nested(field_path) + return (), None + + def resolve_field(self, field_path): + for doc in self._doc_types: + field = doc._doc_type.mapping.resolve_field(field_path) + if field is not None: + return field + if self._mapping: + return self._mapping.resolve_field(field_path) + return None + + def load_mappings(self, using=None): + self.get_or_create_mapping().update_from_es( + self._name, using=using or self._using + ) + + def clone(self, name=None, using=None): + """ + Create a copy of the instance with another name or connection alias. + Useful for creating multiple indices with shared configuration:: + + i = Index('base-index') + i.settings(number_of_shards=1) + i.create() + + i2 = i.clone('other-index') + i2.create() + + :arg name: name of the index + :arg using: connection alias to use, defaults to ``'default'`` + """ + i = Index(name or self._name, using=using or self._using) + i._settings = self._settings.copy() + i._aliases = self._aliases.copy() + i._analysis = self._analysis.copy() + i._doc_types = self._doc_types[:] + if self._mapping is not None: + i._mapping = self._mapping._clone() + return i + + def _get_connection(self, using=None): + if self._name is None: + raise ValueError("You cannot perform API calls on the default index.") + return get_connection(using or self._using, is_async=SYNC_IS_ASYNC) + + connection = property(_get_connection) + + def mapping(self, mapping): + """ + Associate a mapping (an instance of + :class:`~elasticsearch_dsl.Mapping`) with this index. + This means that, when this index is created, it will contain the + mappings for the document type defined by those mappings. + """ + self.get_or_create_mapping().update(mapping) + + def document(self, document): + """ + Associate a :class:`~elasticsearch_dsl.Document` subclass with an index. + This means that, when this index is created, it will contain the + mappings for the ``Document``. If the ``Document`` class doesn't have a + default index yet (by defining ``class Index``), this instance will be + used. Can be used as a decorator:: + + i = Index('blog') + + @i.document + class Post(Document): + title = Text() + + # create the index, including Post mappings + i.create() + + # .search() will now return a Search object that will return + # properly deserialized Post instances + s = i.search() + """ + self._doc_types.append(document) + + # If the document index does not have any name, that means the user + # did not set any index already to the document. + # So set this index as document index + if document._index._name is None: + document._index = self + + return document + + def settings(self, **kwargs): + """ + Add settings to the index:: + + i = Index('i') + i.settings(number_of_shards=1, number_of_replicas=0) + + Multiple calls to ``settings`` will merge the keys, later overriding + the earlier. + """ + self._settings.update(kwargs) + return self + + def aliases(self, **kwargs): + """ + Add aliases to the index definition:: + + i = Index('blog-v2') + i.aliases(blog={}, published={'filter': Q('term', published=True)}) + """ + self._aliases.update(kwargs) + return self + + def analyzer(self, *args, **kwargs): + """ + Explicitly add an analyzer to an index. Note that all custom analyzers + defined in mappings will also be created. This is useful for search analyzers. + + Example:: + + from elasticsearch_dsl import analyzer, tokenizer + + my_analyzer = analyzer('my_analyzer', + tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), + filter=['lowercase'] + ) + + i = Index('blog') + i.analyzer(my_analyzer) + + """ + analyzer = analysis.analyzer(*args, **kwargs) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + return + + # merge the definition + merge(self._analysis, d, True) + + def to_dict(self): + out = {} + if self._settings: + out["settings"] = self._settings + if self._aliases: + out["aliases"] = self._aliases + mappings = self._mapping.to_dict() if self._mapping else {} + analysis = self._mapping._collect_analysis() if self._mapping else {} + for d in self._doc_types: + mapping = d._doc_type.mapping + merge(mappings, mapping.to_dict(), True) + merge(analysis, mapping._collect_analysis(), True) + if mappings: + out["mappings"] = mappings + if analysis or self._analysis: + merge(analysis, self._analysis) + out.setdefault("settings", {})["analysis"] = analysis + return out + + def search(self, using=None): + """ + Return a :class:`~elasticsearch_dsl.Search` object searching over the + index (or all the indices belonging to this template) and its + ``Document``\\s. + """ + return Search( + using=using or self._using, index=self._name, doc_type=self._doc_types + ) + + def update_by_query(self, using=None): + """ + Return a :class:`~elasticsearch_dsl.UpdateByQuery` object searching over the index + (or all the indices belonging to this template) and updating Documents that match + the search criteria. + + For more information, see here: + https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html + """ + return UpdateByQuery( + using=using or self._using, + index=self._name, + ) + + updateByQuery = update_by_query + + def create(self, using=None, **kwargs): + """ + Creates the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.create`` unchanged. + """ + return self._get_connection(using=using).indices.create( + index=self._name, body=self.to_dict(), **kwargs + ) + + def is_closed(self, using=None): + state = self._get_connection(using=using).cluster.state( + index=self._name, metric="metadata" + ) + return state["metadata"]["indices"][self._name]["state"] == "close" + + def save(self, using=None): + """ + Sync the index definition with elasticsearch, creating the index if it + doesn't exist and updating its settings and mappings if it does. + + Note some settings and mapping changes cannot be done on an open + index (or at all on an existing index) and for those this method will + fail with the underlying exception. + """ + if not self.exists(using=using): + return self.create(using=using) + + body = self.to_dict() + settings = body.pop("settings", {}) + analysis = settings.pop("analysis", None) + current_settings = (self.get_settings(using=using))[self._name]["settings"][ + "index" + ] + if analysis: + if self.is_closed(using=using): + # closed index, update away + settings["analysis"] = analysis + else: + # compare analysis definition, if all analysis objects are + # already defined as requested, skip analysis update and + # proceed, otherwise raise IllegalOperation + existing_analysis = current_settings.get("analysis", {}) + if any( + existing_analysis.get(section, {}).get(k, None) + != analysis[section][k] + for section in analysis + for k in analysis[section] + ): + raise IllegalOperation( + "You cannot update analysis configuration on an open index, " + "you need to close index %s first." % self._name + ) + + # try and update the settings + if settings: + settings = settings.copy() + for k, v in list(settings.items()): + if k in current_settings and current_settings[k] == str(v): + del settings[k] + + if settings: + self.put_settings(using=using, body=settings) + + # update the mappings, any conflict in the mappings will result in an + # exception + mappings = body.pop("mappings", {}) + if mappings: + self.put_mapping(using=using, body=mappings) + + def analyze(self, using=None, **kwargs): + """ + Perform the analysis process on a text and return the tokens breakdown + of the text. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.analyze`` unchanged. + """ + return self._get_connection(using).indices.analyze(index=self._name, **kwargs) + + def refresh(self, using=None, **kwargs): + """ + Performs a refresh operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.refresh`` unchanged. + """ + return self._get_connection(using).indices.refresh(index=self._name, **kwargs) + + def flush(self, using=None, **kwargs): + """ + Performs a flush operation on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush`` unchanged. + """ + return self._get_connection(using).indices.flush(index=self._name, **kwargs) + + def get(self, using=None, **kwargs): + """ + The get index API allows to retrieve information about the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get`` unchanged. + """ + return self._get_connection(using).indices.get(index=self._name, **kwargs) + + def open(self, using=None, **kwargs): + """ + Opens the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.open`` unchanged. + """ + return self._get_connection(using).indices.open(index=self._name, **kwargs) + + def close(self, using=None, **kwargs): + """ + Closes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.close`` unchanged. + """ + return self._get_connection(using).indices.close(index=self._name, **kwargs) + + def delete(self, using=None, **kwargs): + """ + Deletes the index in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete`` unchanged. + """ + return self._get_connection(using).indices.delete(index=self._name, **kwargs) + + def exists(self, using=None, **kwargs): + """ + Returns ``True`` if the index already exists in elasticsearch. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists`` unchanged. + """ + return self._get_connection(using).indices.exists(index=self._name, **kwargs) + + def exists_type(self, using=None, **kwargs): + """ + Check if a type/types exists in the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_type`` unchanged. + """ + return self._get_connection(using).indices.exists_type( + index=self._name, **kwargs + ) + + def put_mapping(self, using=None, **kwargs): + """ + Register specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_mapping`` unchanged. + """ + return self._get_connection(using).indices.put_mapping( + index=self._name, **kwargs + ) + + def get_mapping(self, using=None, **kwargs): + """ + Retrieve specific mapping definition for a specific type. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_mapping( + index=self._name, **kwargs + ) + + def get_field_mapping(self, using=None, **kwargs): + """ + Retrieve mapping definition of a specific field. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_field_mapping`` unchanged. + """ + return self._get_connection(using).indices.get_field_mapping( + index=self._name, **kwargs + ) + + def put_alias(self, using=None, **kwargs): + """ + Create an alias for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_alias`` unchanged. + """ + return self._get_connection(using).indices.put_alias(index=self._name, **kwargs) + + def exists_alias(self, using=None, **kwargs): + """ + Return a boolean indicating whether given alias exists for this index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.exists_alias`` unchanged. + """ + return self._get_connection(using).indices.exists_alias( + index=self._name, **kwargs + ) + + def get_alias(self, using=None, **kwargs): + """ + Retrieve a specified alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_alias`` unchanged. + """ + return self._get_connection(using).indices.get_alias(index=self._name, **kwargs) + + def delete_alias(self, using=None, **kwargs): + """ + Delete specific alias. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.delete_alias`` unchanged. + """ + return self._get_connection(using).indices.delete_alias( + index=self._name, **kwargs + ) + + def get_settings(self, using=None, **kwargs): + """ + Retrieve settings for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_settings`` unchanged. + """ + return self._get_connection(using).indices.get_settings( + index=self._name, **kwargs + ) + + def put_settings(self, using=None, **kwargs): + """ + Change specific index level settings in real time. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.put_settings`` unchanged. + """ + return self._get_connection(using).indices.put_settings( + index=self._name, **kwargs + ) + + def stats(self, using=None, **kwargs): + """ + Retrieve statistics on different operations happening on the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.stats`` unchanged. + """ + return self._get_connection(using).indices.stats(index=self._name, **kwargs) + + def segments(self, using=None, **kwargs): + """ + Provide low level segments information that a Lucene index (shard + level) is built with. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.segments`` unchanged. + """ + return self._get_connection(using).indices.segments(index=self._name, **kwargs) + + def validate_query(self, using=None, **kwargs): + """ + Validate a potentially expensive query without executing it. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.validate_query`` unchanged. + """ + return self._get_connection(using).indices.validate_query( + index=self._name, **kwargs + ) + + def clear_cache(self, using=None, **kwargs): + """ + Clear all caches or specific cached associated with the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.clear_cache`` unchanged. + """ + return self._get_connection(using).indices.clear_cache( + index=self._name, **kwargs + ) + + def recovery(self, using=None, **kwargs): + """ + The indices recovery API provides insight into on-going shard + recoveries for the index. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.recovery`` unchanged. + """ + return self._get_connection(using).indices.recovery(index=self._name, **kwargs) + + def upgrade(self, using=None, **kwargs): + """ + Upgrade the index to the latest format. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.upgrade`` unchanged. + """ + return self._get_connection(using).indices.upgrade(index=self._name, **kwargs) + + def get_upgrade(self, using=None, **kwargs): + """ + Monitor how much of the index is upgraded. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.get_upgrade`` unchanged. + """ + return self._get_connection(using).indices.get_upgrade( + index=self._name, **kwargs + ) + + def flush_synced(self, using=None, **kwargs): + """ + Perform a normal flush, then add a generated unique marker (sync_id) to + all shards. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.flush_synced`` unchanged. + """ + return self._get_connection(using).indices.flush_synced( + index=self._name, **kwargs + ) + + def shard_stores(self, using=None, **kwargs): + """ + Provides store information for shard copies of the index. Store + information reports on which nodes shard copies exist, the shard copy + version, indicating how recent they are, and any exceptions encountered + while opening the shard index or from earlier engine failure. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shard_stores`` unchanged. + """ + return self._get_connection(using).indices.shard_stores( + index=self._name, **kwargs + ) + + def forcemerge(self, using=None, **kwargs): + """ + The force merge API allows to force merging of the index through an + API. The merge relates to the number of segments a Lucene index holds + within each shard. The force merge operation allows to reduce the + number of segments by merging them. + + This call will block until the merge is complete. If the http + connection is lost, the request will continue in the background, and + any new requests will block until the previous force merge is complete. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.forcemerge`` unchanged. + """ + return self._get_connection(using).indices.forcemerge( + index=self._name, **kwargs + ) + + def shrink(self, using=None, **kwargs): + """ + The shrink index API allows you to shrink an existing index into a new + index with fewer primary shards. The number of primary shards in the + target index must be a factor of the shards in the source index. For + example an index with 8 primary shards can be shrunk into 4, 2 or 1 + primary shards or an index with 15 primary shards can be shrunk into 5, + 3 or 1. If the number of shards in the index is a prime number it can + only be shrunk into a single primary shard. Before shrinking, a + (primary or replica) copy of every shard in the index must be present + on the same node. + + Any additional keyword arguments will be passed to + ``Elasticsearch.indices.shrink`` unchanged. + """ + return self._get_connection(using).indices.shrink(index=self._name, **kwargs) diff --git a/elasticsearch_dsl/_sync/search.py b/elasticsearch_dsl/_sync/search.py new file mode 100644 index 000000000..cb8abb854 --- /dev/null +++ b/elasticsearch_dsl/_sync/search.py @@ -0,0 +1,545 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import copy + +from elasticsearch.exceptions import TransportError +from elasticsearch.helpers import scan +from six import iteritems, string_types + +from .._base import AggsProxy, ProxyDescriptor, QueryProxy, Request +from ..aggs import A +from ..connections import get_connection +from ..exceptions import IllegalOperation +from ..query import Bool, Q +from ..response import Response +from ..utils import AttrDict, recursive_to_dict +from .utils import SYNC_IS_ASYNC + + +class Search(Request): + query = ProxyDescriptor("query") + post_filter = ProxyDescriptor("post_filter") + + def __init__(self, **kwargs): + """ + Search request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overridden by methods (`using`, `index` and `doc_type` respectively). + """ + super(Search, self).__init__(**kwargs) + + self.aggs = AggsProxy(self) + self._sort = [] + self._source = None + self._highlight = {} + self._highlight_opts = {} + self._suggest = {} + self._script_fields = {} + self._response_class = Response + + self._query_proxy = QueryProxy(self, "query") + self._post_filter_proxy = QueryProxy(self, "post_filter") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + def __iter__(self): + """ + Iterate over the hits. + """ + hits = self.execute() + for hit in hits: + yield hit + + def __getitem__(self, n): + """ + Support slicing the `Search` instance for pagination. + + Slicing equates to the from/size parameters. E.g.:: + + s = Search().query(...)[0:25] + + is equivalent to:: + + s = Search().query(...).extra(from_=0, size=25) + + """ + s = self._clone() + + if isinstance(n, slice): + # If negative slicing, abort. + if n.start and n.start < 0 or n.stop and n.stop < 0: + raise ValueError("Search does not support negative slicing.") + # Elasticsearch won't get all results so we default to size: 10 if + # stop not given. + s._extra["from"] = n.start or 0 + s._extra["size"] = max( + 0, n.stop - (n.start or 0) if n.stop is not None else 10 + ) + return s + else: # This is an index lookup, equivalent to slicing by [n:n+1]. + # If negative index, abort. + if n < 0: + raise ValueError("Search does not support negative indexing.") + s._extra["from"] = n + s._extra["size"] = 1 + return s + + @classmethod + def from_dict(cls, d): + """ + Construct a new `Search` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + s = Search.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "aggs": {...} + }) + s = s.filter('term', published=True) + """ + s = cls() + s.update_from_dict(d) + return s + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + s = super(Search, self)._clone() + + s._response_class = self._response_class + s._sort = self._sort[:] + s._source = copy.copy(self._source) if self._source is not None else None + s._highlight = self._highlight.copy() + s._highlight_opts = self._highlight_opts.copy() + s._suggest = self._suggest.copy() + s._script_fields = self._script_fields.copy() + for x in ("query", "post_filter"): + getattr(s, x)._proxied = getattr(self, x)._proxied + + # copy top-level bucket definitions + if self.aggs._params.get("aggs"): + s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} + return s + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + s = self._clone() + s._response_class = cls + return s + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "post_filter" in d: + self.post_filter._proxied = Q(d.pop("post_filter")) + + aggs = d.pop("aggs", d.pop("aggregations", {})) + if aggs: + self.aggs._params = { + "aggs": {name: A(value) for (name, value) in iteritems(aggs)} + } + if "sort" in d: + self._sort = d.pop("sort") + if "_source" in d: + self._source = d.pop("_source") + if "highlight" in d: + high = d.pop("highlight").copy() + self._highlight = high.pop("fields") + self._highlight_opts = high + if "suggest" in d: + self._suggest = d.pop("suggest") + if "text" in self._suggest: + text = self._suggest.pop("text") + for s in self._suggest.values(): + s.setdefault("text", text) + if "script_fields" in d: + self._script_fields = d.pop("script_fields") + self._extra.update(d) + return self + + def script_fields(self, **kwargs): + """ + Define script fields to be calculated on hits. See + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html + for more details. + + Example:: + + s = Search() + s = s.script_fields(times_two="doc['field'].value * 2") + s = s.script_fields( + times_three={ + 'script': { + 'lang': 'painless', + 'source': "doc['field'].value * params.n", + 'params': {'n': 3} + } + } + ) + + """ + s = self._clone() + for name in kwargs: + if isinstance(kwargs[name], string_types): + kwargs[name] = {"script": kwargs[name]} + s._script_fields.update(kwargs) + return s + + def source(self, fields=None, **kwargs): + """ + Selectively control how the _source field is returned. + + :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes + + If ``fields`` is None, the entire document will be returned for + each hit. If fields is a dictionary with keys of 'includes' and/or + 'excludes' the fields will be either included or excluded appropriately. + + Calling this multiple times with the same named parameter will override the + previous values with the new ones. + + Example:: + + s = Search() + s = s.source(includes=['obj1.*'], excludes=["*.description"]) + + s = Search() + s = s.source(includes=['obj1.*']).source(excludes=["*.description"]) + + """ + s = self._clone() + + if fields and kwargs: + raise ValueError("You cannot specify fields and kwargs at the same time.") + + if fields is not None: + s._source = fields + return s + + if kwargs and not isinstance(s._source, dict): + s._source = {} + + for key, value in kwargs.items(): + if value is None: + try: + del s._source[key] + except KeyError: + pass + else: + s._source[key] = value + + return s + + def sort(self, *keys): + """ + Add sorting information to the search request. If called without + arguments it will remove all sort requirements. Otherwise it will + replace them. Acceptable arguments are:: + + 'some.field' + '-some.other.field' + {'different.field': {'any': 'dict'}} + + so for example:: + + s = Search().sort( + 'category', + '-title', + {"price" : {"order" : "asc", "mode" : "avg"}} + ) + + will sort by ``category``, ``title`` (in descending order) and + ``price`` in ascending order using the ``avg`` mode. + + The API returns a copy of the Search object and can thus be chained. + """ + s = self._clone() + s._sort = [] + for k in keys: + if isinstance(k, string_types) and k.startswith("-"): + if k[1:] == "_score": + raise IllegalOperation("Sorting by `-_score` is not allowed.") + k = {k[1:]: {"order": "desc"}} + s._sort.append(k) + return s + + def highlight_options(self, **kwargs): + """ + Update the global highlighting options used for this request. For + example:: + + s = Search() + s = s.highlight_options(order='score') + """ + s = self._clone() + s._highlight_opts.update(kwargs) + return s + + def highlight(self, *fields, **kwargs): + """ + Request highlighting of some fields. All keyword arguments passed in will be + used as parameters for all the fields in the ``fields`` parameter. Example:: + + Search().highlight('title', 'body', fragment_size=50) + + will produce the equivalent of:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 50}, + "title": {"fragment_size": 50} + } + } + } + + If you want to have different options for different fields + you can call ``highlight`` twice:: + + Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100) + + which will produce:: + + { + "highlight": { + "fields": { + "body": {"fragment_size": 100}, + "title": {"fragment_size": 50} + } + } + } + + """ + s = self._clone() + for f in fields: + s._highlight[f] = kwargs + return s + + def suggest(self, name, text, **kwargs): + """ + Add a suggestions request to the search. + + :arg name: name of the suggestion + :arg text: text to suggest on + + All keyword arguments will be added to the suggestions body. For example:: + + s = Search() + s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'}) + """ + s = self._clone() + s._suggest[name] = {"text": text} + s._suggest[name].update(kwargs) + return s + + def to_dict(self, count=False, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request's body. + + :arg count: a flag to specify if we are interested in a body for count - + no aggregations, no pagination bounds etc. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + + if self.query: + d["query"] = self.query.to_dict() + + # count request doesn't care for sorting and other things + if not count: + if self.post_filter: + d["post_filter"] = self.post_filter.to_dict() + + if self.aggs.aggs: + d.update(self.aggs.to_dict()) + + if self._sort: + d["sort"] = self._sort + + d.update(recursive_to_dict(self._extra)) + + if self._source not in (None, {}): + d["_source"] = self._source + + if self._highlight: + d["highlight"] = {"fields": self._highlight} + d["highlight"].update(self._highlight_opts) + + if self._suggest: + d["suggest"] = self._suggest + + if self._script_fields: + d["script_fields"] = self._script_fields + + d.update(recursive_to_dict(kwargs)) + return d + + def count(self): + """ + Return the number of hits matching the query and filters. Note that + only the actual number is returned. + """ + if hasattr(self, "_response") and self._response.hits.total.relation == "eq": + return self._response.hits.total.value + + es = get_connection(self._using, is_async=SYNC_IS_ASYNC) + + d = self.to_dict(count=True) + # TODO: failed shards detection + return es.count(index=self._index, body=d, **self._params)["count"] + + def execute(self, ignore_cache=False): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + + :arg ignore_cache: if set to ``True``, consecutive calls will hit + ES, while cached result will be ignored. Defaults to `False` + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using, is_async=SYNC_IS_ASYNC) + + self._response = self._response_class( + self, + es.search(index=self._index, body=self.to_dict(), **self._params), + ) + return self._response + + def scan(self): + """ + Turn the search into a scan search and return a generator that will + iterate over all the documents matching the query. + + Use ``params`` method to specify any additional arguments you with to + pass to the underlying ``scan`` helper from ``elasticsearch-py`` - + https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan + + """ + es = get_connection(self._using, is_async=SYNC_IS_ASYNC) + + for hit in scan(es, query=self.to_dict(), index=self._index, **self._params): + yield self._get_result(hit) + + def delete(self): + """ + delete() executes the query by delegating to delete_by_query() + """ + + es = get_connection(self._using, is_async=SYNC_IS_ASYNC) + + return AttrDict( + es.delete_by_query(index=self._index, body=self.to_dict(), **self._params) + ) + + +class MultiSearch(Request): + """ + Combine multiple :class:`~elasticsearch_dsl.Search` objects into a single + request. + """ + + def __init__(self, **kwargs): + super(MultiSearch, self).__init__(**kwargs) + self._searches = [] + + def __getitem__(self, key): + return self._searches[key] + + def __iter__(self): + return iter(self._searches) + + def _clone(self): + ms = super(MultiSearch, self)._clone() + ms._searches = self._searches[:] + return ms + + def add(self, search): + """ + Adds a new :class:`~elasticsearch_dsl.Search` object to the request:: + + ms = MultiSearch(index='my-index') + ms = ms.add(Search(doc_type=Category).filter('term', category='python')) + ms = ms.add(Search(doc_type=Blog)) + """ + ms = self._clone() + ms._searches.append(search) + return ms + + def to_dict(self): + out = [] + for s in self._searches: + meta = {} + if s._index: + meta["index"] = s._index + meta.update(s._params) + + out.append(meta) + out.append(s.to_dict()) + + return out + + def execute(self, ignore_cache=False, raise_on_error=True): + """ + Execute the multi search request and return a list of search results. + """ + if ignore_cache or not hasattr(self, "_response"): + es = get_connection(self._using, is_async=SYNC_IS_ASYNC) + + responses = es.msearch( + index=self._index, body=self.to_dict(), **self._params + ) + + out = [] + for s, r in zip(self._searches, responses["responses"]): + if r.get("error", False): + if raise_on_error: + raise TransportError("N/A", r["error"]["type"], r["error"]) + r = None + else: + r = Response(s, r) + out.append(r) + + self._response = out + + return self._response diff --git a/elasticsearch_dsl/_sync/update_by_query.py b/elasticsearch_dsl/_sync/update_by_query.py new file mode 100644 index 000000000..2f299cb96 --- /dev/null +++ b/elasticsearch_dsl/_sync/update_by_query.py @@ -0,0 +1,161 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .._base import ProxyDescriptor, QueryProxy, Request +from ..connections import get_connection +from ..query import Bool, Q +from ..response import UpdateByQueryResponse +from ..utils import recursive_to_dict +from .utils import SYNC_IS_ASYNC + + +class UpdateByQuery(Request): + + query = ProxyDescriptor("query") + + def __init__(self, **kwargs): + """ + Update by query request to elasticsearch. + + :arg using: `Elasticsearch` instance to use + :arg index: limit the search to index + :arg doc_type: only query this type. + + All the parameters supplied (or omitted) at creation type can be later + overriden by methods (`using`, `index` and `doc_type` respectively). + + """ + super(UpdateByQuery, self).__init__(**kwargs) + self._response_class = UpdateByQueryResponse + self._script = {} + self._query_proxy = QueryProxy(self, "query") + + def filter(self, *args, **kwargs): + return self.query(Bool(filter=[Q(*args, **kwargs)])) + + def exclude(self, *args, **kwargs): + return self.query(Bool(filter=[~Q(*args, **kwargs)])) + + @classmethod + def from_dict(cls, d): + """ + Construct a new `UpdateByQuery` instance from a raw dict containing the search + body. Useful when migrating from raw dictionaries. + + Example:: + + ubq = UpdateByQuery.from_dict({ + "query": { + "bool": { + "must": [...] + } + }, + "script": {...} + }) + ubq = ubq.filter('term', published=True) + """ + u = cls() + u.update_from_dict(d) + return u + + def _clone(self): + """ + Return a clone of the current search request. Performs a shallow copy + of all the underlying objects. Used internally by most state modifying + APIs. + """ + ubq = super(UpdateByQuery, self)._clone() + + ubq._response_class = self._response_class + ubq._script = self._script.copy() + ubq.query._proxied = self.query._proxied + return ubq + + def response_class(self, cls): + """ + Override the default wrapper used for the response. + """ + ubq = self._clone() + ubq._response_class = cls + return ubq + + def update_from_dict(self, d): + """ + Apply options from a serialized body to the current instance. Modifies + the object in-place. Used mostly by ``from_dict``. + """ + d = d.copy() + if "query" in d: + self.query._proxied = Q(d.pop("query")) + if "script" in d: + self._script = d.pop("script") + self._extra.update(d) + return self + + def script(self, **kwargs): + """ + Define update action to take: + https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-using.html + for more details. + + Note: the API only accepts a single script, so + calling the script multiple times will overwrite. + + Example:: + + ubq = Search() + ubq = ubq.script(source="ctx._source.likes++"") + ubq = ubq.script(source="ctx._source.likes += params.f"", + lang="expression", + params={'f': 3}) + """ + ubq = self._clone() + if ubq._script: + ubq._script = {} + ubq._script.update(kwargs) + return ubq + + def to_dict(self, **kwargs): + """ + Serialize the search into the dictionary that will be sent over as the + request'ubq body. + + All additional keyword arguments will be included into the dictionary. + """ + d = {} + if self.query: + d["query"] = self.query.to_dict() + + if self._script: + d["script"] = self._script + + d.update(recursive_to_dict(self._extra)) + d.update(recursive_to_dict(kwargs)) + return d + + def execute(self): + """ + Execute the search and return an instance of ``Response`` wrapping all + the data. + """ + es = get_connection(self._using, is_async=SYNC_IS_ASYNC) + + self._response = self._response_class( + self, + es.update_by_query(index=self._index, body=self.to_dict(), **self._params), + ) + return self._response diff --git a/elasticsearch_dsl/_sync/utils.py b/elasticsearch_dsl/_sync/utils.py new file mode 100644 index 000000000..8e0171d3c --- /dev/null +++ b/elasticsearch_dsl/_sync/utils.py @@ -0,0 +1,19 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +SYNC_IS_ASYNC = True +SYNC_IS_ASYNC = False diff --git a/elasticsearch_dsl/connections.py b/elasticsearch_dsl/connections.py index 57ba46f1d..18da8601b 100644 --- a/elasticsearch_dsl/connections.py +++ b/elasticsearch_dsl/connections.py @@ -20,6 +20,19 @@ from .serializer import serializer +try: + from inspect import iscoroutinefunction +except ImportError: + + def iscoroutinefunction(_): + return False + + +try: + from elasticsearch import AsyncElasticsearch +except ImportError: + AsyncElasticsearch = False + class Connections(object): """ @@ -75,16 +88,29 @@ def remove_connection(self, alias): if errors == 2: raise KeyError("There is no connection with alias %r." % alias) - def create_connection(self, alias="default", **kwargs): + def create_connection(self, alias="default", is_async=False, **kwargs): """ Construct an instance of ``elasticsearch.Elasticsearch`` and register it under given alias. """ kwargs.setdefault("serializer", serializer) - conn = self._conns[alias] = Elasticsearch(**kwargs) + if is_async: + try: + from elasticsearch import AsyncElasticsearch + except ImportError: + # Raise a better error message + raise ValueError( + "Could not import 'AsyncElasticsearch', " + "is 'elasticsearch[async]' installed?" + ) + + es_cls = AsyncElasticsearch + else: + es_cls = Elasticsearch + conn = self._conns[alias] = es_cls(**kwargs) return conn - def get_connection(self, alias="default"): + def get_connection(self, alias="default", is_async=False): """ Retrieve a connection, construct it if necessary (only configuration was passed to us). If a non-string alias has been passed through we @@ -99,17 +125,41 @@ def get_connection(self, alias="default"): return alias # connection already established + conn = None try: - return self._conns[alias] + conn = self._conns[alias] except KeyError: - pass + # if not, try to create it + try: + conn = self.create_connection( + alias, is_async=is_async, **self._kwargs[alias] + ) + except KeyError: + # no connection and no kwargs to set one up + raise KeyError("There is no connection with alias %r." % alias) + + # Verify if the client we got/created is async or sync like we want. + if _is_async_client(conn) != is_async: + raise ValueError( + "Connection with alias %r %s" + % ( + alias, + # Change the error message depending on what + # connection type was requested. + "isn't async as requested" + if is_async + else "isn't sync as requested", + ) + ) - # if not, try to create it - try: - return self.create_connection(alias, **self._kwargs[alias]) - except KeyError: - # no connection and no kwargs to set one up - raise KeyError("There is no connection with alias %r." % alias) + return conn + + +def _is_async_client(client): + """Detects an AsyncElasticsearch instance""" + return ( + AsyncElasticsearch and isinstance(client, AsyncElasticsearch) + ) or iscoroutinefunction(getattr(client, "search", None)) connections = Connections() diff --git a/elasticsearch_dsl/document.py b/elasticsearch_dsl/document.py index 1ba146812..4bffeaea8 100644 --- a/elasticsearch_dsl/document.py +++ b/elasticsearch_dsl/document.py @@ -15,23 +15,11 @@ # specific language governing permissions and limitations # under the License. -try: - import collections.abc as collections_abc # only works on python 3.3+ -except ImportError: - import collections as collections_abc - -from fnmatch import fnmatch - -from elasticsearch.exceptions import NotFoundError, RequestError from six import add_metaclass, iteritems -from .connections import get_connection -from .exceptions import IllegalOperation, ValidationException from .field import Field -from .index import Index from .mapping import Mapping -from .search import Search -from .utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge +from .utils import ObjectBase class MetaField(object): @@ -46,39 +34,6 @@ def __new__(cls, name, bases, attrs): return super(DocumentMeta, cls).__new__(cls, name, bases, attrs) -class IndexMeta(DocumentMeta): - # global flag to guard us from associating an Index with the base Document - # class, only user defined subclasses should have an _index attr - _document_initialized = False - - def __new__(cls, name, bases, attrs): - new_cls = super(IndexMeta, cls).__new__(cls, name, bases, attrs) - if cls._document_initialized: - index_opts = attrs.pop("Index", None) - index = cls.construct_index(index_opts, bases) - new_cls._index = index - index.document(new_cls) - cls._document_initialized = True - return new_cls - - @classmethod - def construct_index(cls, opts, bases): - if opts is None: - for b in bases: - if hasattr(b, "_index"): - return b._index - - # Set None as Index name so it will set _all while making the query - return Index(name=None) - - i = Index(getattr(opts, "name", "*"), using=getattr(opts, "using", "default")) - i.settings(**getattr(opts, "settings", {})) - i.aliases(**getattr(opts, "aliases", {})) - for a in getattr(opts, "analyzers", ()): - i.analyzer(a) - return i - - class DocumentOptions(object): def __init__(self, name, bases, attrs): meta = attrs.pop("Meta", None) @@ -121,359 +76,20 @@ def from_es(cls, data, data_only=False): return super(InnerDoc, cls).from_es(data) -@add_metaclass(IndexMeta) -class Document(ObjectBase): - """ - Model-like class for persisting documents in elasticsearch. - """ - - @classmethod - def _matches(cls, hit): - if cls._index._name is None: - return True - return fnmatch(hit.get("_index", ""), cls._index._name) - - @classmethod - def _get_using(cls, using=None): - return using or cls._index._using - - @classmethod - def _get_connection(cls, using=None): - return get_connection(cls._get_using(using)) - - @classmethod - def _default_index(cls, index=None): - return index or cls._index._name - - @classmethod - def init(cls, index=None, using=None): - """ - Create the index and populate the mappings in elasticsearch. - """ - i = cls._index - if index: - i = i.clone(name=index) - i.save(using=using) - - def _get_index(self, index=None, required=True): - if index is None: - index = getattr(self.meta, "index", None) - if index is None: - index = getattr(self._index, "_name", None) - if index is None and required: - raise ValidationException("No index") - if index and "*" in index: - raise ValidationException("You cannot write to a wildcard index.") - return index - - def __repr__(self): - return "{}({})".format( - self.__class__.__name__, - ", ".join( - "{}={!r}".format(key, getattr(self.meta, key)) - for key in ("index", "id") - if key in self.meta - ), - ) - - @classmethod - def search(cls, using=None, index=None): - """ - Create an :class:`~elasticsearch_dsl.Search` instance that will search - over this ``Document``. - """ - return Search( - using=cls._get_using(using), index=cls._default_index(index), doc_type=[cls] - ) - - @classmethod - def get(cls, id, using=None, index=None, **kwargs): - """ - Retrieve a single document from elasticsearch using its ``id``. - - :arg id: ``id`` of the document to be retrieved - :arg index: elasticsearch index to use, if the ``Document`` is - associated with an index this can be omitted. - :arg using: connection alias to use, defaults to ``'default'`` - - Any additional keyword arguments will be passed to - ``Elasticsearch.get`` unchanged. - """ - es = cls._get_connection(using) - doc = es.get(index=cls._default_index(index), id=id, **kwargs) - if not doc.get("found", False): - return None - return cls.from_es(doc) - - @classmethod - def mget( - cls, docs, using=None, index=None, raise_on_error=True, missing="none", **kwargs - ): - r""" - Retrieve multiple document by their ``id``\s. Returns a list of instances - in the same order as requested. - - :arg docs: list of ``id``\s of the documents to be retrieved or a list - of document specifications as per - https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html - :arg index: elasticsearch index to use, if the ``Document`` is - associated with an index this can be omitted. - :arg using: connection alias to use, defaults to ``'default'`` - :arg missing: what to do when one of the documents requested is not - found. Valid options are ``'none'`` (use ``None``), ``'raise'`` (raise - ``NotFoundError``) or ``'skip'`` (ignore the missing document). - - Any additional keyword arguments will be passed to - ``Elasticsearch.mget`` unchanged. - """ - if missing not in ("raise", "skip", "none"): - raise ValueError("'missing' must be 'raise', 'skip', or 'none'.") - es = cls._get_connection(using) - body = { - "docs": [ - doc if isinstance(doc, collections_abc.Mapping) else {"_id": doc} - for doc in docs - ] - } - results = es.mget(body, index=cls._default_index(index), **kwargs) - - objs, error_docs, missing_docs = [], [], [] - for doc in results["docs"]: - if doc.get("found"): - if error_docs or missing_docs: - # We're going to raise an exception anyway, so avoid an - # expensive call to cls.from_es(). - continue - - objs.append(cls.from_es(doc)) +from ._sync import Document, IndexMeta - elif doc.get("error"): - if raise_on_error: - error_docs.append(doc) - if missing == "none": - objs.append(None) +__all__ = [ + "Document", + "DocumentMeta", + "DocumentOptions", + "InnerDoc", + "IndexMeta", + "MetaField", +] - # The doc didn't cause an error, but the doc also wasn't found. - elif missing == "raise": - missing_docs.append(doc) - elif missing == "none": - objs.append(None) - - if error_docs: - error_ids = [doc["_id"] for doc in error_docs] - message = "Required routing not provided for documents %s." - message %= ", ".join(error_ids) - raise RequestError(400, message, error_docs) - if missing_docs: - missing_ids = [doc["_id"] for doc in missing_docs] - message = "Documents %s not found." % ", ".join(missing_ids) - raise NotFoundError(404, message, {"docs": missing_docs}) - return objs - - def delete(self, using=None, index=None, **kwargs): - """ - Delete the instance in elasticsearch. - - :arg index: elasticsearch index to use, if the ``Document`` is - associated with an index this can be omitted. - :arg using: connection alias to use, defaults to ``'default'`` - - Any additional keyword arguments will be passed to - ``Elasticsearch.delete`` unchanged. - """ - es = self._get_connection(using) - # extract routing etc from meta - doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} - - # Optimistic concurrency control - if "seq_no" in self.meta and "primary_term" in self.meta: - doc_meta["if_seq_no"] = self.meta["seq_no"] - doc_meta["if_primary_term"] = self.meta["primary_term"] - - doc_meta.update(kwargs) - es.delete(index=self._get_index(index), **doc_meta) - - def to_dict(self, include_meta=False, skip_empty=True): - """ - Serialize the instance into a dictionary so that it can be saved in elasticsearch. - - :arg include_meta: if set to ``True`` will include all the metadata - (``_index``, ``_id`` etc). Otherwise just the document's - data is serialized. This is useful when passing multiple instances into - ``elasticsearch.helpers.bulk``. - :arg skip_empty: if set to ``False`` will cause empty values (``None``, - ``[]``, ``{}``) to be left on the document. Those values will be - stripped out otherwise as they make no difference in elasticsearch. - """ - d = super(Document, self).to_dict(skip_empty=skip_empty) - if not include_meta: - return d - - meta = {"_" + k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} - - # in case of to_dict include the index unlike save/update/delete - index = self._get_index(required=False) - if index is not None: - meta["_index"] = index - - meta["_source"] = d - return meta - - def update( - self, - using=None, - index=None, - detect_noop=True, - doc_as_upsert=False, - refresh=False, - retry_on_conflict=None, - script=None, - script_id=None, - scripted_upsert=False, - upsert=None, - return_doc_meta=False, - **fields - ): - """ - Partial update of the document, specify fields you wish to update and - both the instance and the document in elasticsearch will be updated:: - - doc = MyDocument(title='Document Title!') - doc.save() - doc.update(title='New Document Title!') - - :arg index: elasticsearch index to use, if the ``Document`` is - associated with an index this can be omitted. - :arg using: connection alias to use, defaults to ``'default'`` - :arg detect_noop: Set to ``False`` to disable noop detection. - :arg refresh: Control when the changes made by this request are visible - to search. Set to ``True`` for immediate effect. - :arg retry_on_conflict: In between the get and indexing phases of the - update, it is possible that another process might have already - updated the same document. By default, the update will fail with a - version conflict exception. The retry_on_conflict parameter - controls how many times to retry the update before finally throwing - an exception. - :arg doc_as_upsert: Instead of sending a partial doc plus an upsert - doc, setting doc_as_upsert to true will use the contents of doc as - the upsert value - :arg return_doc_meta: set to ``True`` to return all metadata from the - index API call instead of only the operation result - - :return operation result noop/updated - """ - body = { - "doc_as_upsert": doc_as_upsert, - "detect_noop": detect_noop, - } - - # scripted update - if script or script_id: - if upsert is not None: - body["upsert"] = upsert - - if script: - script = {"source": script} - else: - script = {"id": script_id} - - script["params"] = fields - - body["script"] = script - body["scripted_upsert"] = scripted_upsert - - # partial document update - else: - if not fields: - raise IllegalOperation( - "You cannot call update() without updating individual fields or a script. " - "If you wish to update the entire object use save()." - ) - - # update given fields locally - merge(self, fields) - - # prepare data for ES - values = self.to_dict() - - # if fields were given: partial update - body["doc"] = {k: values.get(k) for k in fields.keys()} - - # extract routing etc from meta - doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} - - if retry_on_conflict is not None: - doc_meta["retry_on_conflict"] = retry_on_conflict - - # Optimistic concurrency control - if ( - retry_on_conflict in (None, 0) - and "seq_no" in self.meta - and "primary_term" in self.meta - ): - doc_meta["if_seq_no"] = self.meta["seq_no"] - doc_meta["if_primary_term"] = self.meta["primary_term"] - - meta = self._get_connection(using).update( - index=self._get_index(index), body=body, refresh=refresh, **doc_meta - ) - # update meta information from ES - for k in META_FIELDS: - if "_" + k in meta: - setattr(self.meta, k, meta["_" + k]) - - return meta if return_doc_meta else meta["result"] - - def save( - self, - using=None, - index=None, - validate=True, - skip_empty=True, - return_doc_meta=False, - **kwargs - ): - """ - Save the document into elasticsearch. If the document doesn't exist it - is created, it is overwritten otherwise. Returns ``True`` if this - operations resulted in new document being created. - - :arg index: elasticsearch index to use, if the ``Document`` is - associated with an index this can be omitted. - :arg using: connection alias to use, defaults to ``'default'`` - :arg validate: set to ``False`` to skip validating the document - :arg skip_empty: if set to ``False`` will cause empty values (``None``, - ``[]``, ``{}``) to be left on the document. Those values will be - stripped out otherwise as they make no difference in elasticsearch. - :arg return_doc_meta: set to ``True`` to return all metadata from the - update API call instead of only the operation result - - Any additional keyword arguments will be passed to - ``Elasticsearch.index`` unchanged. - - :return operation result created/updated - """ - if validate: - self.full_clean() - - es = self._get_connection(using) - # extract routing etc from meta - doc_meta = {k: self.meta[k] for k in DOC_META_FIELDS if k in self.meta} - - # Optimistic concurrency control - if "seq_no" in self.meta and "primary_term" in self.meta: - doc_meta["if_seq_no"] = self.meta["seq_no"] - doc_meta["if_primary_term"] = self.meta["primary_term"] - - doc_meta.update(kwargs) - meta = es.index( - index=self._get_index(index), - body=self.to_dict(skip_empty=skip_empty), - **doc_meta - ) - # update meta information from ES - for k in META_FIELDS: - if "_" + k in meta: - setattr(self.meta, k, meta["_" + k]) +try: + from ._async import AsyncDocument, AsyncIndexMeta # noqa: F401 - return meta if return_doc_meta else meta["result"] + __all__.extend(["AsyncDocument", "AsyncIndexMeta"]) +except ImportError: + pass diff --git a/elasticsearch_dsl/faceted_search.py b/elasticsearch_dsl/faceted_search.py index e102ed5cb..5dd9e2e17 100644 --- a/elasticsearch_dsl/faceted_search.py +++ b/elasticsearch_dsl/faceted_search.py @@ -17,22 +17,8 @@ from datetime import datetime, timedelta -from six import iteritems, itervalues - from .aggs import A -from .query import MatchAll, Nested, Range, Terms -from .response import Response -from .search import Search -from .utils import AttrDict - -__all__ = [ - "FacetedSearch", - "HistogramFacet", - "TermsFacet", - "DateHistogramFacet", - "RangeFacet", - "NestedFacet", -] +from .query import Nested, Range, Terms class Facet(object): @@ -252,203 +238,22 @@ def add_filter(self, filter_values): return Nested(path=self._path, query=inner_q) -class FacetedResponse(Response): - @property - def query_string(self): - return self._faceted_search._query - - @property - def facets(self): - if not hasattr(self, "_facets"): - super(AttrDict, self).__setattr__("_facets", AttrDict({})) - for name, facet in iteritems(self._faceted_search.facets): - self._facets[name] = facet.get_values( - getattr(getattr(self.aggregations, "_filter_" + name), name), - self._faceted_search.filter_values.get(name, ()), - ) - return self._facets - - -class FacetedSearch(object): - """ - Abstraction for creating faceted navigation searches that takes care of - composing the queries, aggregations and filters as needed as well as - presenting the results in an easy-to-consume fashion:: - - class BlogSearch(FacetedSearch): - index = 'blogs' - doc_types = [Blog, Post] - fields = ['title^5', 'category', 'description', 'body'] - - facets = { - 'type': TermsFacet(field='_type'), - 'category': TermsFacet(field='category'), - 'weekly_posts': DateHistogramFacet(field='published_from', interval='week') - } - - def search(self): - ' Override search to add your own filters ' - s = super(BlogSearch, self).search() - return s.filter('term', published=True) - - # when using: - blog_search = BlogSearch("web framework", filters={"category": "python"}) - - # supports pagination - blog_search[10:20] - - response = blog_search.execute() - - # easy access to aggregation results: - for category, hit_count, is_selected in response.facets.category: - print( - "Category %s has %d hits%s." % ( - category, - hit_count, - ' and is chosen' if is_selected else '' - ) - ) - - """ - - index = None - doc_types = None - fields = None - facets = {} - using = "default" - - def __init__(self, query=None, filters={}, sort=()): - """ - :arg query: the text to search for - :arg filters: facet values to filter - :arg sort: sort information to be passed to :class:`~elasticsearch_dsl.Search` - """ - self._query = query - self._filters = {} - self._sort = sort - self.filter_values = {} - for name, value in iteritems(filters): - self.add_filter(name, value) - - self._s = self.build_search() - - def count(self): - return self._s.count() - - def __getitem__(self, k): - self._s = self._s[k] - return self - - def __iter__(self): - return iter(self._s) - - def add_filter(self, name, filter_values): - """ - Add a filter for a facet. - """ - # normalize the value into a list - if not isinstance(filter_values, (tuple, list)): - if filter_values is None: - return - filter_values = [ - filter_values, - ] - - # remember the filter values for use in FacetedResponse - self.filter_values[name] = filter_values - - # get the filter from the facet - f = self.facets[name].add_filter(filter_values) - if f is None: - return - - self._filters[name] = f - - def search(self): - """ - Returns the base Search object to which the facets are added. - - You can customize the query by overriding this method and returning a - modified search object. - """ - s = Search(doc_type=self.doc_types, index=self.index, using=self.using) - return s.response_class(FacetedResponse) - - def query(self, search, query): - """ - Add query part to ``search``. - - Override this if you wish to customize the query used. - """ - if query: - if self.fields: - return search.query("multi_match", fields=self.fields, query=query) - else: - return search.query("multi_match", query=query) - return search - - def aggregate(self, search): - """ - Add aggregations representing the facets selected, including potential - filters. - """ - for f, facet in iteritems(self.facets): - agg = facet.get_aggregation() - agg_filter = MatchAll() - for field, filter in iteritems(self._filters): - if f == field: - continue - agg_filter &= filter - search.aggs.bucket("_filter_" + f, "filter", filter=agg_filter).bucket( - f, agg - ) - - def filter(self, search): - """ - Add a ``post_filter`` to the search request narrowing the results based - on the facet filters. - """ - if not self._filters: - return search - - post_filter = MatchAll() - for f in itervalues(self._filters): - post_filter &= f - return search.post_filter(post_filter) +from ._base import FacetedResponse +from ._sync import FacetedSearch - def highlight(self, search): - """ - Add highlighting for all the fields - """ - return search.highlight( - *(f if "^" not in f else f.split("^", 1)[0] for f in self.fields) - ) +__all__ = [ + "FacetedSearch", + "FacetedResponse", + "HistogramFacet", + "TermsFacet", + "DateHistogramFacet", + "RangeFacet", + "NestedFacet", +] - def sort(self, search): - """ - Add sorting information to the request. - """ - if self._sort: - search = search.sort(*self._sort) - return search +try: + from ._async import AsyncFacetedSearch # noqa: F401 - def build_search(self): - """ - Construct the ``Search`` object. - """ - s = self.search() - s = self.query(s, self._query) - s = self.filter(s) - if self.fields: - s = self.highlight(s) - s = self.sort(s) - self.aggregate(s) - return s - - def execute(self): - """ - Execute the search and return the response. - """ - r = self._s.execute() - r._faceted_search = self - return r + __all__.append("AsyncFacetedSearch") +except ImportError: + pass diff --git a/elasticsearch_dsl/index.py b/elasticsearch_dsl/index.py index 17dd93f45..5047cfcc4 100644 --- a/elasticsearch_dsl/index.py +++ b/elasticsearch_dsl/index.py @@ -15,638 +15,18 @@ # specific language governing permissions and limitations # under the License. -from . import analysis -from .connections import get_connection -from .exceptions import IllegalOperation -from .mapping import Mapping -from .search import Search -from .update_by_query import UpdateByQuery -from .utils import merge +from ._sync import Index, IndexTemplate +__all__ = ["Index", "IndexTemplate"] -class IndexTemplate(object): - def __init__(self, name, template, index=None, order=None, **kwargs): - if index is None: - self._index = Index(template, **kwargs) - else: - if kwargs: - raise ValueError( - "You cannot specify options for Index when" - " passing an Index instance." - ) - self._index = index.clone() - self._index._name = template - self._template_name = name - self.order = order +try: + from ._async import AsyncIndex, AsyncIndexTemplate # noqa: F401 - def __getattr__(self, attr_name): - return getattr(self._index, attr_name) - - def to_dict(self): - d = self._index.to_dict() - d["index_patterns"] = [self._index._name] - if self.order is not None: - d["order"] = self.order - return d - - def save(self, using=None): - - es = get_connection(using or self._index._using) - return es.indices.put_template(name=self._template_name, body=self.to_dict()) - - -class Index(object): - def __init__(self, name, using="default"): - """ - :arg name: name of the index - :arg using: connection alias to use, defaults to ``'default'`` - """ - self._name = name - self._doc_types = [] - self._using = using - self._settings = {} - self._aliases = {} - self._analysis = {} - self._mapping = None - - def get_or_create_mapping(self): - if self._mapping is None: - self._mapping = Mapping() - return self._mapping - - def as_template(self, template_name, pattern=None, order=None): - # TODO: should we allow pattern to be a top-level arg? - # or maybe have an IndexPattern that allows for it and have - # Document._index be that? - return IndexTemplate( - template_name, pattern or self._name, index=self, order=order - ) - - def resolve_nested(self, field_path): - for doc in self._doc_types: - nested, field = doc._doc_type.mapping.resolve_nested(field_path) - if field is not None: - return nested, field - if self._mapping: - return self._mapping.resolve_nested(field_path) - return (), None - - def resolve_field(self, field_path): - for doc in self._doc_types: - field = doc._doc_type.mapping.resolve_field(field_path) - if field is not None: - return field - if self._mapping: - return self._mapping.resolve_field(field_path) - return None - - def load_mappings(self, using=None): - self.get_or_create_mapping().update_from_es( - self._name, using=using or self._using - ) - - def clone(self, name=None, using=None): - """ - Create a copy of the instance with another name or connection alias. - Useful for creating multiple indices with shared configuration:: - - i = Index('base-index') - i.settings(number_of_shards=1) - i.create() - - i2 = i.clone('other-index') - i2.create() - - :arg name: name of the index - :arg using: connection alias to use, defaults to ``'default'`` - """ - i = Index(name or self._name, using=using or self._using) - i._settings = self._settings.copy() - i._aliases = self._aliases.copy() - i._analysis = self._analysis.copy() - i._doc_types = self._doc_types[:] - if self._mapping is not None: - i._mapping = self._mapping._clone() - return i - - def _get_connection(self, using=None): - if self._name is None: - raise ValueError("You cannot perform API calls on the default index.") - return get_connection(using or self._using) - - connection = property(_get_connection) - - def mapping(self, mapping): - """ - Associate a mapping (an instance of - :class:`~elasticsearch_dsl.Mapping`) with this index. - This means that, when this index is created, it will contain the - mappings for the document type defined by those mappings. - """ - self.get_or_create_mapping().update(mapping) - - def document(self, document): - """ - Associate a :class:`~elasticsearch_dsl.Document` subclass with an index. - This means that, when this index is created, it will contain the - mappings for the ``Document``. If the ``Document`` class doesn't have a - default index yet (by defining ``class Index``), this instance will be - used. Can be used as a decorator:: - - i = Index('blog') - - @i.document - class Post(Document): - title = Text() - - # create the index, including Post mappings - i.create() - - # .search() will now return a Search object that will return - # properly deserialized Post instances - s = i.search() - """ - self._doc_types.append(document) - - # If the document index does not have any name, that means the user - # did not set any index already to the document. - # So set this index as document index - if document._index._name is None: - document._index = self - - return document - - def settings(self, **kwargs): - """ - Add settings to the index:: - - i = Index('i') - i.settings(number_of_shards=1, number_of_replicas=0) - - Multiple calls to ``settings`` will merge the keys, later overriding - the earlier. - """ - self._settings.update(kwargs) - return self - - def aliases(self, **kwargs): - """ - Add aliases to the index definition:: - - i = Index('blog-v2') - i.aliases(blog={}, published={'filter': Q('term', published=True)}) - """ - self._aliases.update(kwargs) - return self - - def analyzer(self, *args, **kwargs): - """ - Explicitly add an analyzer to an index. Note that all custom analyzers - defined in mappings will also be created. This is useful for search analyzers. - - Example:: - - from elasticsearch_dsl import analyzer, tokenizer - - my_analyzer = analyzer('my_analyzer', - tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), - filter=['lowercase'] - ) - - i = Index('blog') - i.analyzer(my_analyzer) - - """ - analyzer = analysis.analyzer(*args, **kwargs) - d = analyzer.get_analysis_definition() - # empty custom analyzer, probably already defined out of our control - if not d: - return - - # merge the definition - merge(self._analysis, d, True) - - def to_dict(self): - out = {} - if self._settings: - out["settings"] = self._settings - if self._aliases: - out["aliases"] = self._aliases - mappings = self._mapping.to_dict() if self._mapping else {} - analysis = self._mapping._collect_analysis() if self._mapping else {} - for d in self._doc_types: - mapping = d._doc_type.mapping - merge(mappings, mapping.to_dict(), True) - merge(analysis, mapping._collect_analysis(), True) - if mappings: - out["mappings"] = mappings - if analysis or self._analysis: - merge(analysis, self._analysis) - out.setdefault("settings", {})["analysis"] = analysis - return out - - def search(self, using=None): - """ - Return a :class:`~elasticsearch_dsl.Search` object searching over the - index (or all the indices belonging to this template) and its - ``Document``\\s. - """ - return Search( - using=using or self._using, index=self._name, doc_type=self._doc_types - ) - - def updateByQuery(self, using=None): - """ - Return a :class:`~elasticsearch_dsl.UpdateByQuery` object searching over the index - (or all the indices belonging to this template) and updating Documents that match - the search criteria. - - For more information, see here: - https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html - """ - return UpdateByQuery( - using=using or self._using, - index=self._name, - ) - - def create(self, using=None, **kwargs): - """ - Creates the index in elasticsearch. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.create`` unchanged. - """ - return self._get_connection(using).indices.create( - index=self._name, body=self.to_dict(), **kwargs - ) - - def is_closed(self, using=None): - state = self._get_connection(using).cluster.state( - index=self._name, metric="metadata" - ) - return state["metadata"]["indices"][self._name]["state"] == "close" - - def save(self, using=None): - """ - Sync the index definition with elasticsearch, creating the index if it - doesn't exist and updating its settings and mappings if it does. - - Note some settings and mapping changes cannot be done on an open - index (or at all on an existing index) and for those this method will - fail with the underlying exception. - """ - if not self.exists(using=using): - return self.create(using=using) - - body = self.to_dict() - settings = body.pop("settings", {}) - analysis = settings.pop("analysis", None) - current_settings = self.get_settings(using=using)[self._name]["settings"][ - "index" + __all__.extend( + [ + "AsyncIndex", + "AsyncIndexTemplate", ] - if analysis: - if self.is_closed(using=using): - # closed index, update away - settings["analysis"] = analysis - else: - # compare analysis definition, if all analysis objects are - # already defined as requested, skip analysis update and - # proceed, otherwise raise IllegalOperation - existing_analysis = current_settings.get("analysis", {}) - if any( - existing_analysis.get(section, {}).get(k, None) - != analysis[section][k] - for section in analysis - for k in analysis[section] - ): - raise IllegalOperation( - "You cannot update analysis configuration on an open index, " - "you need to close index %s first." % self._name - ) - - # try and update the settings - if settings: - settings = settings.copy() - for k, v in list(settings.items()): - if k in current_settings and current_settings[k] == str(v): - del settings[k] - - if settings: - self.put_settings(using=using, body=settings) - - # update the mappings, any conflict in the mappings will result in an - # exception - mappings = body.pop("mappings", {}) - if mappings: - self.put_mapping(using=using, body=mappings) - - def analyze(self, using=None, **kwargs): - """ - Perform the analysis process on a text and return the tokens breakdown - of the text. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.analyze`` unchanged. - """ - return self._get_connection(using).indices.analyze(index=self._name, **kwargs) - - def refresh(self, using=None, **kwargs): - """ - Performs a refresh operation on the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.refresh`` unchanged. - """ - return self._get_connection(using).indices.refresh(index=self._name, **kwargs) - - def flush(self, using=None, **kwargs): - """ - Performs a flush operation on the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.flush`` unchanged. - """ - return self._get_connection(using).indices.flush(index=self._name, **kwargs) - - def get(self, using=None, **kwargs): - """ - The get index API allows to retrieve information about the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.get`` unchanged. - """ - return self._get_connection(using).indices.get(index=self._name, **kwargs) - - def open(self, using=None, **kwargs): - """ - Opens the index in elasticsearch. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.open`` unchanged. - """ - return self._get_connection(using).indices.open(index=self._name, **kwargs) - - def close(self, using=None, **kwargs): - """ - Closes the index in elasticsearch. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.close`` unchanged. - """ - return self._get_connection(using).indices.close(index=self._name, **kwargs) - - def delete(self, using=None, **kwargs): - """ - Deletes the index in elasticsearch. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.delete`` unchanged. - """ - return self._get_connection(using).indices.delete(index=self._name, **kwargs) - - def exists(self, using=None, **kwargs): - """ - Returns ``True`` if the index already exists in elasticsearch. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.exists`` unchanged. - """ - return self._get_connection(using).indices.exists(index=self._name, **kwargs) - - def exists_type(self, using=None, **kwargs): - """ - Check if a type/types exists in the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.exists_type`` unchanged. - """ - return self._get_connection(using).indices.exists_type( - index=self._name, **kwargs - ) - - def put_mapping(self, using=None, **kwargs): - """ - Register specific mapping definition for a specific type. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.put_mapping`` unchanged. - """ - return self._get_connection(using).indices.put_mapping( - index=self._name, **kwargs - ) - - def get_mapping(self, using=None, **kwargs): - """ - Retrieve specific mapping definition for a specific type. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.get_mapping`` unchanged. - """ - return self._get_connection(using).indices.get_mapping( - index=self._name, **kwargs - ) - - def get_field_mapping(self, using=None, **kwargs): - """ - Retrieve mapping definition of a specific field. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.get_field_mapping`` unchanged. - """ - return self._get_connection(using).indices.get_field_mapping( - index=self._name, **kwargs - ) - - def put_alias(self, using=None, **kwargs): - """ - Create an alias for the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.put_alias`` unchanged. - """ - return self._get_connection(using).indices.put_alias(index=self._name, **kwargs) - - def exists_alias(self, using=None, **kwargs): - """ - Return a boolean indicating whether given alias exists for this index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.exists_alias`` unchanged. - """ - return self._get_connection(using).indices.exists_alias( - index=self._name, **kwargs - ) - - def get_alias(self, using=None, **kwargs): - """ - Retrieve a specified alias. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.get_alias`` unchanged. - """ - return self._get_connection(using).indices.get_alias(index=self._name, **kwargs) - - def delete_alias(self, using=None, **kwargs): - """ - Delete specific alias. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.delete_alias`` unchanged. - """ - return self._get_connection(using).indices.delete_alias( - index=self._name, **kwargs - ) - - def get_settings(self, using=None, **kwargs): - """ - Retrieve settings for the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.get_settings`` unchanged. - """ - return self._get_connection(using).indices.get_settings( - index=self._name, **kwargs - ) - - def put_settings(self, using=None, **kwargs): - """ - Change specific index level settings in real time. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.put_settings`` unchanged. - """ - return self._get_connection(using).indices.put_settings( - index=self._name, **kwargs - ) - - def stats(self, using=None, **kwargs): - """ - Retrieve statistics on different operations happening on the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.stats`` unchanged. - """ - return self._get_connection(using).indices.stats(index=self._name, **kwargs) - - def segments(self, using=None, **kwargs): - """ - Provide low level segments information that a Lucene index (shard - level) is built with. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.segments`` unchanged. - """ - return self._get_connection(using).indices.segments(index=self._name, **kwargs) - - def validate_query(self, using=None, **kwargs): - """ - Validate a potentially expensive query without executing it. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.validate_query`` unchanged. - """ - return self._get_connection(using).indices.validate_query( - index=self._name, **kwargs - ) - - def clear_cache(self, using=None, **kwargs): - """ - Clear all caches or specific cached associated with the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.clear_cache`` unchanged. - """ - return self._get_connection(using).indices.clear_cache( - index=self._name, **kwargs - ) - - def recovery(self, using=None, **kwargs): - """ - The indices recovery API provides insight into on-going shard - recoveries for the index. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.recovery`` unchanged. - """ - return self._get_connection(using).indices.recovery(index=self._name, **kwargs) - - def upgrade(self, using=None, **kwargs): - """ - Upgrade the index to the latest format. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.upgrade`` unchanged. - """ - return self._get_connection(using).indices.upgrade(index=self._name, **kwargs) - - def get_upgrade(self, using=None, **kwargs): - """ - Monitor how much of the index is upgraded. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.get_upgrade`` unchanged. - """ - return self._get_connection(using).indices.get_upgrade( - index=self._name, **kwargs - ) - - def flush_synced(self, using=None, **kwargs): - """ - Perform a normal flush, then add a generated unique marker (sync_id) to - all shards. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.flush_synced`` unchanged. - """ - return self._get_connection(using).indices.flush_synced( - index=self._name, **kwargs - ) - - def shard_stores(self, using=None, **kwargs): - """ - Provides store information for shard copies of the index. Store - information reports on which nodes shard copies exist, the shard copy - version, indicating how recent they are, and any exceptions encountered - while opening the shard index or from earlier engine failure. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.shard_stores`` unchanged. - """ - return self._get_connection(using).indices.shard_stores( - index=self._name, **kwargs - ) - - def forcemerge(self, using=None, **kwargs): - """ - The force merge API allows to force merging of the index through an - API. The merge relates to the number of segments a Lucene index holds - within each shard. The force merge operation allows to reduce the - number of segments by merging them. - - This call will block until the merge is complete. If the http - connection is lost, the request will continue in the background, and - any new requests will block until the previous force merge is complete. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.forcemerge`` unchanged. - """ - return self._get_connection(using).indices.forcemerge( - index=self._name, **kwargs - ) - - def shrink(self, using=None, **kwargs): - """ - The shrink index API allows you to shrink an existing index into a new - index with fewer primary shards. The number of primary shards in the - target index must be a factor of the shards in the source index. For - example an index with 8 primary shards can be shrunk into 4, 2 or 1 - primary shards or an index with 15 primary shards can be shrunk into 5, - 3 or 1. If the number of shards in the index is a prime number it can - only be shrunk into a single primary shard. Before shrinking, a - (primary or replica) copy of every shard in the index must be present - on the same node. - - Any additional keyword arguments will be passed to - ``Elasticsearch.indices.shrink`` unchanged. - """ - return self._get_connection(using).indices.shrink(index=self._name, **kwargs) + ) +except ImportError: + pass diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 761f6611e..d066d5905 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -15,801 +15,26 @@ # specific language governing permissions and limitations # under the License. -import copy +from ._base import AggsProxy, ProxyDescriptor, QueryProxy, Request +from ._sync import MultiSearch, Search + +# 'Q' is here because test suite uses it, don't +# want to break users relying on it. +from .query import Q + +__all__ = [ + "Q", + "QueryProxy", + "ProxyDescriptor", + "AggsProxy", + "Request", + "Search", + "MultiSearch", +] try: - import collections.abc as collections_abc # only works on python 3.3+ -except ImportError: - import collections as collections_abc - -from elasticsearch.exceptions import TransportError -from elasticsearch.helpers import scan -from six import iteritems, string_types - -from .aggs import A, AggBase -from .connections import get_connection -from .exceptions import IllegalOperation -from .query import Bool, Q -from .response import Hit, Response -from .utils import AttrDict, DslBase, recursive_to_dict - - -class QueryProxy(object): - """ - Simple proxy around DSL objects (queries) that can be called - (to add query/post_filter) and also allows attribute access which is proxied to - the wrapped query. - """ - - def __init__(self, search, attr_name): - self._search = search - self._proxied = None - self._attr_name = attr_name - - def __nonzero__(self): - return self._proxied is not None - - __bool__ = __nonzero__ - - def __call__(self, *args, **kwargs): - s = self._search._clone() - - # we cannot use self._proxied since we just cloned self._search and - # need to access the new self on the clone - proxied = getattr(s, self._attr_name) - if proxied._proxied is None: - proxied._proxied = Q(*args, **kwargs) - else: - proxied._proxied &= Q(*args, **kwargs) - - # always return search to be chainable - return s - - def __getattr__(self, attr_name): - return getattr(self._proxied, attr_name) - - def __setattr__(self, attr_name, value): - if not attr_name.startswith("_"): - self._proxied = Q(self._proxied.to_dict()) - setattr(self._proxied, attr_name, value) - super(QueryProxy, self).__setattr__(attr_name, value) - - def __getstate__(self): - return self._search, self._proxied, self._attr_name - - def __setstate__(self, state): - self._search, self._proxied, self._attr_name = state - - -class ProxyDescriptor(object): - """ - Simple descriptor to enable setting of queries and filters as: - - s = Search() - s.query = Q(...) - - """ - - def __init__(self, name): - self._attr_name = "_%s_proxy" % name - - def __get__(self, instance, owner): - return getattr(instance, self._attr_name) - - def __set__(self, instance, value): - proxy = getattr(instance, self._attr_name) - proxy._proxied = Q(value) - - -class AggsProxy(AggBase, DslBase): - name = "aggs" - - def __init__(self, search): - self._base = self - self._search = search - self._params = {"aggs": {}} - - def to_dict(self): - return super(AggsProxy, self).to_dict().get("aggs", {}) - - -class Request(object): - def __init__(self, using="default", index=None, doc_type=None, extra=None): - self._using = using - - self._index = None - if isinstance(index, (tuple, list)): - self._index = list(index) - elif index: - self._index = [index] - - self._doc_type = [] - self._doc_type_map = {} - if isinstance(doc_type, (tuple, list)): - self._doc_type.extend(doc_type) - elif isinstance(doc_type, collections_abc.Mapping): - self._doc_type.extend(doc_type.keys()) - self._doc_type_map.update(doc_type) - elif doc_type: - self._doc_type.append(doc_type) - - self._params = {} - self._extra = extra or {} - - def __eq__(self, other): - return ( - isinstance(other, Request) - and other._params == self._params - and other._index == self._index - and other._doc_type == self._doc_type - and other.to_dict() == self.to_dict() - ) - - def __copy__(self): - return self._clone() - - def params(self, **kwargs): - """ - Specify query params to be used when executing the search. All the - keyword arguments will override the current values. See - https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.search - for all available parameters. - - Example:: - - s = Search() - s = s.params(routing='user-1', preference='local') - """ - s = self._clone() - s._params.update(kwargs) - return s - - def index(self, *index): - """ - Set the index for the search. If called empty it will remove all information. - - Example: - - s = Search() - s = s.index('twitter-2015.01.01', 'twitter-2015.01.02') - s = s.index(['twitter-2015.01.01', 'twitter-2015.01.02']) - """ - # .index() resets - s = self._clone() - if not index: - s._index = None - else: - indexes = [] - for i in index: - if isinstance(i, string_types): - indexes.append(i) - elif isinstance(i, list): - indexes += i - elif isinstance(i, tuple): - indexes += list(i) - - s._index = (self._index or []) + indexes - - return s - - def _resolve_field(self, path): - for dt in self._doc_type: - if not hasattr(dt, "_index"): - continue - field = dt._index.resolve_field(path) - if field is not None: - return field - - def _resolve_nested(self, hit, parent_class=None): - doc_class = Hit - - nested_path = [] - nesting = hit["_nested"] - while nesting and "field" in nesting: - nested_path.append(nesting["field"]) - nesting = nesting.get("_nested") - nested_path = ".".join(nested_path) - - if hasattr(parent_class, "_index"): - nested_field = parent_class._index.resolve_field(nested_path) - else: - nested_field = self._resolve_field(nested_path) - - if nested_field is not None: - return nested_field._doc_class - - return doc_class - - def _get_result(self, hit, parent_class=None): - doc_class = Hit - dt = hit.get("_type") - - if "_nested" in hit: - doc_class = self._resolve_nested(hit, parent_class) - - elif dt in self._doc_type_map: - doc_class = self._doc_type_map[dt] - - else: - for doc_type in self._doc_type: - if hasattr(doc_type, "_matches") and doc_type._matches(hit): - doc_class = doc_type - break - - for t in hit.get("inner_hits", ()): - hit["inner_hits"][t] = Response( - self, hit["inner_hits"][t], doc_class=doc_class - ) - - callback = getattr(doc_class, "from_es", doc_class) - return callback(hit) - - def doc_type(self, *doc_type, **kwargs): - """ - Set the type to search through. You can supply a single value or - multiple. Values can be strings or subclasses of ``Document``. - - You can also pass in any keyword arguments, mapping a doc_type to a - callback that should be used instead of the Hit class. - - If no doc_type is supplied any information stored on the instance will - be erased. - - Example: - - s = Search().doc_type('product', 'store', User, custom=my_callback) - """ - # .doc_type() resets - s = self._clone() - if not doc_type and not kwargs: - s._doc_type = [] - s._doc_type_map = {} - else: - s._doc_type.extend(doc_type) - s._doc_type.extend(kwargs.keys()) - s._doc_type_map.update(kwargs) - return s - - def using(self, client): - """ - Associate the search request with an elasticsearch client. A fresh copy - will be returned with current instance remaining unchanged. - - :arg client: an instance of ``elasticsearch.Elasticsearch`` to use or - an alias to look up in ``elasticsearch_dsl.connections`` - - """ - s = self._clone() - s._using = client - return s - - def extra(self, **kwargs): - """ - Add extra keys to the request body. Mostly here for backwards - compatibility. - """ - s = self._clone() - if "from_" in kwargs: - kwargs["from"] = kwargs.pop("from_") - s._extra.update(kwargs) - return s - - def _clone(self): - s = self.__class__( - using=self._using, index=self._index, doc_type=self._doc_type - ) - s._doc_type_map = self._doc_type_map.copy() - s._extra = self._extra.copy() - s._params = self._params.copy() - return s - - -class Search(Request): - query = ProxyDescriptor("query") - post_filter = ProxyDescriptor("post_filter") - - def __init__(self, **kwargs): - """ - Search request to elasticsearch. - - :arg using: `Elasticsearch` instance to use - :arg index: limit the search to index - :arg doc_type: only query this type. - - All the parameters supplied (or omitted) at creation type can be later - overridden by methods (`using`, `index` and `doc_type` respectively). - """ - super(Search, self).__init__(**kwargs) - - self.aggs = AggsProxy(self) - self._sort = [] - self._source = None - self._highlight = {} - self._highlight_opts = {} - self._suggest = {} - self._script_fields = {} - self._response_class = Response - - self._query_proxy = QueryProxy(self, "query") - self._post_filter_proxy = QueryProxy(self, "post_filter") - - def filter(self, *args, **kwargs): - return self.query(Bool(filter=[Q(*args, **kwargs)])) - - def exclude(self, *args, **kwargs): - return self.query(Bool(filter=[~Q(*args, **kwargs)])) - - def __iter__(self): - """ - Iterate over the hits. - """ - return iter(self.execute()) - - def __getitem__(self, n): - """ - Support slicing the `Search` instance for pagination. - - Slicing equates to the from/size parameters. E.g.:: - - s = Search().query(...)[0:25] - - is equivalent to:: - - s = Search().query(...).extra(from_=0, size=25) - - """ - s = self._clone() - - if isinstance(n, slice): - # If negative slicing, abort. - if n.start and n.start < 0 or n.stop and n.stop < 0: - raise ValueError("Search does not support negative slicing.") - # Elasticsearch won't get all results so we default to size: 10 if - # stop not given. - s._extra["from"] = n.start or 0 - s._extra["size"] = max( - 0, n.stop - (n.start or 0) if n.stop is not None else 10 - ) - return s - else: # This is an index lookup, equivalent to slicing by [n:n+1]. - # If negative index, abort. - if n < 0: - raise ValueError("Search does not support negative indexing.") - s._extra["from"] = n - s._extra["size"] = 1 - return s - - @classmethod - def from_dict(cls, d): - """ - Construct a new `Search` instance from a raw dict containing the search - body. Useful when migrating from raw dictionaries. - - Example:: - - s = Search.from_dict({ - "query": { - "bool": { - "must": [...] - } - }, - "aggs": {...} - }) - s = s.filter('term', published=True) - """ - s = cls() - s.update_from_dict(d) - return s - - def _clone(self): - """ - Return a clone of the current search request. Performs a shallow copy - of all the underlying objects. Used internally by most state modifying - APIs. - """ - s = super(Search, self)._clone() - - s._response_class = self._response_class - s._sort = self._sort[:] - s._source = copy.copy(self._source) if self._source is not None else None - s._highlight = self._highlight.copy() - s._highlight_opts = self._highlight_opts.copy() - s._suggest = self._suggest.copy() - s._script_fields = self._script_fields.copy() - for x in ("query", "post_filter"): - getattr(s, x)._proxied = getattr(self, x)._proxied + from ._sync import AsyncMultiSearch, AsyncSearch # noqa: F401 - # copy top-level bucket definitions - if self.aggs._params.get("aggs"): - s.aggs._params = {"aggs": self.aggs._params["aggs"].copy()} - return s - - def response_class(self, cls): - """ - Override the default wrapper used for the response. - """ - s = self._clone() - s._response_class = cls - return s - - def update_from_dict(self, d): - """ - Apply options from a serialized body to the current instance. Modifies - the object in-place. Used mostly by ``from_dict``. - """ - d = d.copy() - if "query" in d: - self.query._proxied = Q(d.pop("query")) - if "post_filter" in d: - self.post_filter._proxied = Q(d.pop("post_filter")) - - aggs = d.pop("aggs", d.pop("aggregations", {})) - if aggs: - self.aggs._params = { - "aggs": {name: A(value) for (name, value) in iteritems(aggs)} - } - if "sort" in d: - self._sort = d.pop("sort") - if "_source" in d: - self._source = d.pop("_source") - if "highlight" in d: - high = d.pop("highlight").copy() - self._highlight = high.pop("fields") - self._highlight_opts = high - if "suggest" in d: - self._suggest = d.pop("suggest") - if "text" in self._suggest: - text = self._suggest.pop("text") - for s in self._suggest.values(): - s.setdefault("text", text) - if "script_fields" in d: - self._script_fields = d.pop("script_fields") - self._extra.update(d) - return self - - def script_fields(self, **kwargs): - """ - Define script fields to be calculated on hits. See - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html - for more details. - - Example:: - - s = Search() - s = s.script_fields(times_two="doc['field'].value * 2") - s = s.script_fields( - times_three={ - 'script': { - 'lang': 'painless', - 'source': "doc['field'].value * params.n", - 'params': {'n': 3} - } - } - ) - - """ - s = self._clone() - for name in kwargs: - if isinstance(kwargs[name], string_types): - kwargs[name] = {"script": kwargs[name]} - s._script_fields.update(kwargs) - return s - - def source(self, fields=None, **kwargs): - """ - Selectively control how the _source field is returned. - - :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes - - If ``fields`` is None, the entire document will be returned for - each hit. If fields is a dictionary with keys of 'includes' and/or - 'excludes' the fields will be either included or excluded appropriately. - - Calling this multiple times with the same named parameter will override the - previous values with the new ones. - - Example:: - - s = Search() - s = s.source(includes=['obj1.*'], excludes=["*.description"]) - - s = Search() - s = s.source(includes=['obj1.*']).source(excludes=["*.description"]) - - """ - s = self._clone() - - if fields and kwargs: - raise ValueError("You cannot specify fields and kwargs at the same time.") - - if fields is not None: - s._source = fields - return s - - if kwargs and not isinstance(s._source, dict): - s._source = {} - - for key, value in kwargs.items(): - if value is None: - try: - del s._source[key] - except KeyError: - pass - else: - s._source[key] = value - - return s - - def sort(self, *keys): - """ - Add sorting information to the search request. If called without - arguments it will remove all sort requirements. Otherwise it will - replace them. Acceptable arguments are:: - - 'some.field' - '-some.other.field' - {'different.field': {'any': 'dict'}} - - so for example:: - - s = Search().sort( - 'category', - '-title', - {"price" : {"order" : "asc", "mode" : "avg"}} - ) - - will sort by ``category``, ``title`` (in descending order) and - ``price`` in ascending order using the ``avg`` mode. - - The API returns a copy of the Search object and can thus be chained. - """ - s = self._clone() - s._sort = [] - for k in keys: - if isinstance(k, string_types) and k.startswith("-"): - if k[1:] == "_score": - raise IllegalOperation("Sorting by `-_score` is not allowed.") - k = {k[1:]: {"order": "desc"}} - s._sort.append(k) - return s - - def highlight_options(self, **kwargs): - """ - Update the global highlighting options used for this request. For - example:: - - s = Search() - s = s.highlight_options(order='score') - """ - s = self._clone() - s._highlight_opts.update(kwargs) - return s - - def highlight(self, *fields, **kwargs): - """ - Request highlighting of some fields. All keyword arguments passed in will be - used as parameters for all the fields in the ``fields`` parameter. Example:: - - Search().highlight('title', 'body', fragment_size=50) - - will produce the equivalent of:: - - { - "highlight": { - "fields": { - "body": {"fragment_size": 50}, - "title": {"fragment_size": 50} - } - } - } - - If you want to have different options for different fields - you can call ``highlight`` twice:: - - Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100) - - which will produce:: - - { - "highlight": { - "fields": { - "body": {"fragment_size": 100}, - "title": {"fragment_size": 50} - } - } - } - - """ - s = self._clone() - for f in fields: - s._highlight[f] = kwargs - return s - - def suggest(self, name, text, **kwargs): - """ - Add a suggestions request to the search. - - :arg name: name of the suggestion - :arg text: text to suggest on - - All keyword arguments will be added to the suggestions body. For example:: - - s = Search() - s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'}) - """ - s = self._clone() - s._suggest[name] = {"text": text} - s._suggest[name].update(kwargs) - return s - - def to_dict(self, count=False, **kwargs): - """ - Serialize the search into the dictionary that will be sent over as the - request's body. - - :arg count: a flag to specify if we are interested in a body for count - - no aggregations, no pagination bounds etc. - - All additional keyword arguments will be included into the dictionary. - """ - d = {} - - if self.query: - d["query"] = self.query.to_dict() - - # count request doesn't care for sorting and other things - if not count: - if self.post_filter: - d["post_filter"] = self.post_filter.to_dict() - - if self.aggs.aggs: - d.update(self.aggs.to_dict()) - - if self._sort: - d["sort"] = self._sort - - d.update(recursive_to_dict(self._extra)) - - if self._source not in (None, {}): - d["_source"] = self._source - - if self._highlight: - d["highlight"] = {"fields": self._highlight} - d["highlight"].update(self._highlight_opts) - - if self._suggest: - d["suggest"] = self._suggest - - if self._script_fields: - d["script_fields"] = self._script_fields - - d.update(recursive_to_dict(kwargs)) - return d - - def count(self): - """ - Return the number of hits matching the query and filters. Note that - only the actual number is returned. - """ - if hasattr(self, "_response") and self._response.hits.total.relation == "eq": - return self._response.hits.total.value - - es = get_connection(self._using) - - d = self.to_dict(count=True) - # TODO: failed shards detection - return es.count(index=self._index, body=d, **self._params)["count"] - - def execute(self, ignore_cache=False): - """ - Execute the search and return an instance of ``Response`` wrapping all - the data. - - :arg ignore_cache: if set to ``True``, consecutive calls will hit - ES, while cached result will be ignored. Defaults to `False` - """ - if ignore_cache or not hasattr(self, "_response"): - es = get_connection(self._using) - - self._response = self._response_class( - self, es.search(index=self._index, body=self.to_dict(), **self._params) - ) - return self._response - - def scan(self): - """ - Turn the search into a scan search and return a generator that will - iterate over all the documents matching the query. - - Use ``params`` method to specify any additional arguments you with to - pass to the underlying ``scan`` helper from ``elasticsearch-py`` - - https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan - - """ - es = get_connection(self._using) - - for hit in scan(es, query=self.to_dict(), index=self._index, **self._params): - yield self._get_result(hit) - - def delete(self): - """ - delete() executes the query by delegating to delete_by_query() - """ - - es = get_connection(self._using) - - return AttrDict( - es.delete_by_query(index=self._index, body=self.to_dict(), **self._params) - ) - - -class MultiSearch(Request): - """ - Combine multiple :class:`~elasticsearch_dsl.Search` objects into a single - request. - """ - - def __init__(self, **kwargs): - super(MultiSearch, self).__init__(**kwargs) - self._searches = [] - - def __getitem__(self, key): - return self._searches[key] - - def __iter__(self): - return iter(self._searches) - - def _clone(self): - ms = super(MultiSearch, self)._clone() - ms._searches = self._searches[:] - return ms - - def add(self, search): - """ - Adds a new :class:`~elasticsearch_dsl.Search` object to the request:: - - ms = MultiSearch(index='my-index') - ms = ms.add(Search(doc_type=Category).filter('term', category='python')) - ms = ms.add(Search(doc_type=Blog)) - """ - ms = self._clone() - ms._searches.append(search) - return ms - - def to_dict(self): - out = [] - for s in self._searches: - meta = {} - if s._index: - meta["index"] = s._index - meta.update(s._params) - - out.append(meta) - out.append(s.to_dict()) - - return out - - def execute(self, ignore_cache=False, raise_on_error=True): - """ - Execute the multi search request and return a list of search results. - """ - if ignore_cache or not hasattr(self, "_response"): - es = get_connection(self._using) - - responses = es.msearch( - index=self._index, body=self.to_dict(), **self._params - ) - - out = [] - for s, r in zip(self._searches, responses["responses"]): - if r.get("error", False): - if raise_on_error: - raise TransportError("N/A", r["error"]["type"], r["error"]) - r = None - else: - r = Response(s, r) - out.append(r) - - self._response = out - - return self._response + __all__.extend(["AsyncSearch", "AsyncMultiSearch"]) +except ImportError: + pass diff --git a/elasticsearch_dsl/update_by_query.py b/elasticsearch_dsl/update_by_query.py index b46b482b1..a87869c06 100644 --- a/elasticsearch_dsl/update_by_query.py +++ b/elasticsearch_dsl/update_by_query.py @@ -15,146 +15,15 @@ # specific language governing permissions and limitations # under the License. -from .connections import get_connection -from .query import Bool, Q -from .response import UpdateByQueryResponse -from .search import ProxyDescriptor, QueryProxy, Request -from .utils import recursive_to_dict +from ._sync import UpdateByQuery +__all__ = [ + "UpdateByQuery", +] -class UpdateByQuery(Request): +try: + from ._async import AsyncUpdateByQuery # noqa: F401 - query = ProxyDescriptor("query") - - def __init__(self, **kwargs): - """ - Update by query request to elasticsearch. - - :arg using: `Elasticsearch` instance to use - :arg index: limit the search to index - :arg doc_type: only query this type. - - All the parameters supplied (or omitted) at creation type can be later - overriden by methods (`using`, `index` and `doc_type` respectively). - - """ - super(UpdateByQuery, self).__init__(**kwargs) - self._response_class = UpdateByQueryResponse - self._script = {} - self._query_proxy = QueryProxy(self, "query") - - def filter(self, *args, **kwargs): - return self.query(Bool(filter=[Q(*args, **kwargs)])) - - def exclude(self, *args, **kwargs): - return self.query(Bool(filter=[~Q(*args, **kwargs)])) - - @classmethod - def from_dict(cls, d): - """ - Construct a new `UpdateByQuery` instance from a raw dict containing the search - body. Useful when migrating from raw dictionaries. - - Example:: - - ubq = UpdateByQuery.from_dict({ - "query": { - "bool": { - "must": [...] - } - }, - "script": {...} - }) - ubq = ubq.filter('term', published=True) - """ - u = cls() - u.update_from_dict(d) - return u - - def _clone(self): - """ - Return a clone of the current search request. Performs a shallow copy - of all the underlying objects. Used internally by most state modifying - APIs. - """ - ubq = super(UpdateByQuery, self)._clone() - - ubq._response_class = self._response_class - ubq._script = self._script.copy() - ubq.query._proxied = self.query._proxied - return ubq - - def response_class(self, cls): - """ - Override the default wrapper used for the response. - """ - ubq = self._clone() - ubq._response_class = cls - return ubq - - def update_from_dict(self, d): - """ - Apply options from a serialized body to the current instance. Modifies - the object in-place. Used mostly by ``from_dict``. - """ - d = d.copy() - if "query" in d: - self.query._proxied = Q(d.pop("query")) - if "script" in d: - self._script = d.pop("script") - self._extra.update(d) - return self - - def script(self, **kwargs): - """ - Define update action to take: - https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-using.html - for more details. - - Note: the API only accepts a single script, so - calling the script multiple times will overwrite. - - Example:: - - ubq = Search() - ubq = ubq.script(source="ctx._source.likes++"") - ubq = ubq.script(source="ctx._source.likes += params.f"", - lang="expression", - params={'f': 3}) - """ - ubq = self._clone() - if ubq._script: - ubq._script = {} - ubq._script.update(kwargs) - return ubq - - def to_dict(self, **kwargs): - """ - Serialize the search into the dictionary that will be sent over as the - request'ubq body. - - All additional keyword arguments will be included into the dictionary. - """ - d = {} - if self.query: - d["query"] = self.query.to_dict() - - if self._script: - d["script"] = self._script - - d.update(recursive_to_dict(self._extra)) - d.update(recursive_to_dict(kwargs)) - return d - - def execute(self): - """ - Execute the search and return an instance of ``Response`` wrapping all - the data. - """ - es = get_connection(self._using) - - self._response = self._response_class( - self, - es.update_by_query(index=self._index, body=self.to_dict(), **self._params), - ) - return self._response + __all__.append("AsyncUpdateByQuery") +except ImportError: + pass diff --git a/noxfile.py b/noxfile.py index 4eed694d3..196935678 100644 --- a/noxfile.py +++ b/noxfile.py @@ -44,9 +44,10 @@ def test(session): session.run("pytest", *argv) -@nox.session() +@nox.session(python="3") def format(session): - session.install("black", "isort") + session.install("black", "isort", "unasync") + session.run("python", "utils/unasync-files.py", "fix") session.run( "black", "--target-version=py27", "--target-version=py37", *SOURCE_FILES ) @@ -56,9 +57,10 @@ def format(session): lint(session) -@nox.session +@nox.session(python="3") def lint(session): - session.install("flake8", "black", "isort") + session.install("flake8", "black", "isort", "unasync") + session.run("python", "utils/unasync-files.py", "check") session.run( "black", "--check", @@ -67,7 +69,7 @@ def lint(session): *SOURCE_FILES ) session.run("isort", "--check", *SOURCE_FILES) - session.run("flake8", "--ignore=E501,E741,W503", *SOURCE_FILES) + session.run("flake8", "--ignore=E501,E741,W503,E402", *SOURCE_FILES) session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES) diff --git a/tests/conftest.py b/tests/conftest.py index b7326c3fb..308442108 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,7 +38,7 @@ from .test_integration.test_document import Comment, History, PullRequest, User -@fixture(scope="session") +@fixture(scope="session", autouse=True) def client(): try: connection = get_test_client(nowait="WAIT_FOR_ES" not in os.environ) @@ -51,7 +51,6 @@ def client(): @fixture(scope="session") def es_version(client): info = client.info() - print(info) yield tuple( int(x) for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".") diff --git a/utils/unasync-files.py b/utils/unasync-files.py new file mode 100644 index 000000000..63e98e03f --- /dev/null +++ b/utils/unasync-files.py @@ -0,0 +1,100 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import ast +import glob +import hashlib +import os +import re +import shutil +import sys + +import unasync + + +def ast_hash(x): + """Calculate a hash based on all the contents of a directory + by parsing all the .py files as 'ast' and then hashing the + representation of the tree to remove all changes made by Black + between async/sync versions. + """ + md5 = hashlib.md5() + for root, dirs, filenames in os.walk(x): + dirs[:] = sorted(dirs) + for filename in sorted(filenames): + if not re.search(r"\.pyi?$", filename): + continue + with open(os.path.join(root, filename), "r") as f: + md5.update(ast.dump(ast.parse(f.read())).encode("utf-8")) + return md5.hexdigest() + + +def main(): + mode = sys.argv[1] + assert mode in ("fix", "check") + + if mode == "fix": + todir = "/_sync/" + else: + todir = "/_unasync/" + shutil.rmtree("elasticsearch_dsl/_unasync/", ignore_errors=True) + + try: + unasync.unasync_files( + glob.glob("elasticsearch_dsl/_async/*.py"), + rules=[ + unasync.Rule( + fromdir="/_async/", + todir=todir, + additional_replacements={ + "ASYNC_IS_ASYNC": "SYNC_IS_ASYNC", + "async_scan": "scan", + "AsyncDocument": "Document", + "AsyncIndexMeta": "IndexMeta", + "AsyncFacetedSearch": "FacetedSearch", + "AsyncIndex": "Index", + "AsyncIndexTemplate": "IndexTemplate", + "AsyncMapping": "Mapping", + "AsyncSearch": "Search", + "AsyncMultiSearch": "MultiSearch", + "AsyncUpdateByQuery": "UpdateByQuery", + }, + ) + ], + ) + + if mode == "check" and ( + ast_hash("elasticsearch_dsl/_sync") + != ast_hash("elasticsearch_dsl/_unasync") + ): + print( + """======================================== + +Detected differences between +committed 'elasticsearch_dsl/_async' +and 'elasticsearch_dsl/_sync' code. +To fix this problem run 'nox -rs format' +and commit the resulting changes. + +========================================""" + ) + exit(1) + finally: + shutil.rmtree("elasticsearch_dsl/_unasync/", ignore_errors=True) + + +main() From 9eff645ddaafe0c41d2a7def6dd8d453e3b9540f Mon Sep 17 00:00:00 2001 From: Clament John <17064666+clmno@users.noreply.github.com> Date: Mon, 25 Jan 2021 19:02:28 +0530 Subject: [PATCH 02/10] Fix importing async classes --- elasticsearch_dsl/__init__.py | 2 -- elasticsearch_dsl/search.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/elasticsearch_dsl/__init__.py b/elasticsearch_dsl/__init__.py index e6f8d3b18..7ba153d1f 100644 --- a/elasticsearch_dsl/__init__.py +++ b/elasticsearch_dsl/__init__.py @@ -166,7 +166,6 @@ from .document import AsyncDocument # noqa: F401 from .faceted_search import AsyncFacetedSearch # noqa: F401 from .index import AsyncIndex, AsyncIndexTemplate # noqa: F401 - from .mapping import AsyncMapping # noqa: F401 from .search import AsyncMultiSearch, AsyncSearch # noqa: F401 from .update_by_query import AsyncUpdateByQuery # noqa: F401 @@ -176,7 +175,6 @@ "AsyncFacetedSearch", "AsyncIndex", "AsyncIndexTemplate", - "AsyncMapping", "AsyncSearch", "AsyncMultiSearch", "AsyncUpdateByQuery", diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index d066d5905..5c475bb7b 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -33,7 +33,7 @@ ] try: - from ._sync import AsyncMultiSearch, AsyncSearch # noqa: F401 + from ._async import AsyncMultiSearch, AsyncSearch # noqa: F401 __all__.extend(["AsyncSearch", "AsyncMultiSearch"]) except ImportError: From 0189724b92704de1d0ef6565e320f705b40e1f45 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 25 Jan 2021 15:41:44 -0600 Subject: [PATCH 03/10] Remove _(async|sync) top-level imports, use absolute instead --- elasticsearch_dsl/__init__.py | 2 + elasticsearch_dsl/_async/__init__.py | 20 -- elasticsearch_dsl/_async/document.py | 2 +- elasticsearch_dsl/_async/faceted_search.py | 2 +- elasticsearch_dsl/_async/index.py | 4 +- elasticsearch_dsl/_async/mapping.py | 180 ++++++++++++++++ elasticsearch_dsl/_async/search.py | 2 +- elasticsearch_dsl/_async/update_by_query.py | 2 +- elasticsearch_dsl/_base/__init__.py | 12 -- elasticsearch_dsl/_base/document.py | 99 +++++++++ elasticsearch_dsl/_base/mapping.py | 71 +++++++ elasticsearch_dsl/_sync/__init__.py | 20 -- elasticsearch_dsl/_sync/document.py | 2 +- elasticsearch_dsl/_sync/faceted_search.py | 2 +- elasticsearch_dsl/_sync/index.py | 2 +- elasticsearch_dsl/_sync/mapping.py | 180 ++++++++++++++++ elasticsearch_dsl/_sync/search.py | 2 +- elasticsearch_dsl/_sync/update_by_query.py | 2 +- elasticsearch_dsl/document.py | 68 +----- elasticsearch_dsl/faceted_search.py | 8 +- elasticsearch_dsl/index.py | 6 +- elasticsearch_dsl/mapping.py | 219 ++------------------ elasticsearch_dsl/search.py | 8 +- elasticsearch_dsl/update_by_query.py | 6 +- elasticsearch_dsl/utils.py | 11 + 25 files changed, 584 insertions(+), 348 deletions(-) create mode 100644 elasticsearch_dsl/_async/mapping.py create mode 100644 elasticsearch_dsl/_base/document.py create mode 100644 elasticsearch_dsl/_base/mapping.py create mode 100644 elasticsearch_dsl/_sync/mapping.py diff --git a/elasticsearch_dsl/__init__.py b/elasticsearch_dsl/__init__.py index 7ba153d1f..e6f8d3b18 100644 --- a/elasticsearch_dsl/__init__.py +++ b/elasticsearch_dsl/__init__.py @@ -166,6 +166,7 @@ from .document import AsyncDocument # noqa: F401 from .faceted_search import AsyncFacetedSearch # noqa: F401 from .index import AsyncIndex, AsyncIndexTemplate # noqa: F401 + from .mapping import AsyncMapping # noqa: F401 from .search import AsyncMultiSearch, AsyncSearch # noqa: F401 from .update_by_query import AsyncUpdateByQuery # noqa: F401 @@ -175,6 +176,7 @@ "AsyncFacetedSearch", "AsyncIndex", "AsyncIndexTemplate", + "AsyncMapping", "AsyncSearch", "AsyncMultiSearch", "AsyncUpdateByQuery", diff --git a/elasticsearch_dsl/_async/__init__.py b/elasticsearch_dsl/_async/__init__.py index 3a69026fc..2a87d183f 100644 --- a/elasticsearch_dsl/_async/__init__.py +++ b/elasticsearch_dsl/_async/__init__.py @@ -14,23 +14,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -try: - from .document import AsyncDocument, AsyncIndexMeta - from .faceted_search import AsyncFacetedSearch - from .index import AsyncIndex, AsyncIndexTemplate - from .search import AsyncMultiSearch, AsyncSearch - from .update_by_query import AsyncUpdateByQuery - - __all__ = [ - "AsyncDocument", - "AsyncIndexMeta", - "AsyncFacetedSearch", - "AsyncIndex", - "AsyncIndexTemplate", - "AsyncSearch", - "AsyncMultiSearch", - "AsyncUpdateByQuery", - ] -except (ImportError, SyntaxError): - pass diff --git a/elasticsearch_dsl/_async/document.py b/elasticsearch_dsl/_async/document.py index 3798ccbb9..bbcac7a28 100644 --- a/elasticsearch_dsl/_async/document.py +++ b/elasticsearch_dsl/_async/document.py @@ -25,8 +25,8 @@ from elasticsearch.exceptions import NotFoundError, RequestError from six import add_metaclass +from .._base.document import DocumentMeta from ..connections import get_connection -from ..document import DocumentMeta from ..exceptions import IllegalOperation, ValidationException from ..utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge from .search import AsyncSearch diff --git a/elasticsearch_dsl/_async/faceted_search.py b/elasticsearch_dsl/_async/faceted_search.py index 225a53d93..df31b888e 100644 --- a/elasticsearch_dsl/_async/faceted_search.py +++ b/elasticsearch_dsl/_async/faceted_search.py @@ -17,7 +17,7 @@ from six import iteritems, itervalues -from .._base import FacetedResponse +from .._base.faceted_search import FacetedResponse from ..query import MatchAll from .search import AsyncSearch diff --git a/elasticsearch_dsl/_async/index.py b/elasticsearch_dsl/_async/index.py index d104c146e..4bbedfbd3 100644 --- a/elasticsearch_dsl/_async/index.py +++ b/elasticsearch_dsl/_async/index.py @@ -18,8 +18,8 @@ from .. import analysis from ..connections import get_connection from ..exceptions import IllegalOperation -from ..mapping import Mapping from ..utils import merge +from .mapping import AsyncMapping from .search import AsyncSearch from .update_by_query import AsyncUpdateByQuery from .utils import ASYNC_IS_ASYNC @@ -73,7 +73,7 @@ def __init__(self, name, using="default"): def get_or_create_mapping(self): if self._mapping is None: - self._mapping = Mapping() + self._mapping = AsyncMapping() return self._mapping def as_template(self, template_name, pattern=None, order=None): diff --git a/elasticsearch_dsl/_async/mapping.py b/elasticsearch_dsl/_async/mapping.py new file mode 100644 index 000000000..e45b0e794 --- /dev/null +++ b/elasticsearch_dsl/_async/mapping.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from itertools import chain + +from six import iteritems + +from .._base.mapping import Properties +from ..connections import get_connection +from ..field import Nested, Text +from .utils import ASYNC_IS_ASYNC + + +class AsyncMapping(object): + def __init__(self): + self.properties = Properties() + self._meta = {} + + def __repr__(self): + return str(type(self.__name__)) + "()" + + def _clone(self): + m = AsyncMapping() + m.properties._params = self.properties._params.copy() + return m + + @classmethod + async def from_es(cls, index, using="default"): + m = cls() + await m.update_from_es(index, using) + return m + + def resolve_nested(self, field_path): + field = self + nested = [] + parts = field_path.split(".") + for i, step in enumerate(parts): + try: + field = field[step] + except KeyError: + return (), None + if isinstance(field, Nested): + nested.append(".".join(parts[: i + 1])) + return nested, field + + def resolve_field(self, field_path): + field = self + for step in field_path.split("."): + try: + field = field[step] + except KeyError: + return + return field + + def _collect_analysis(self): + analysis = {} + fields = [] + if "_all" in self._meta: + fields.append(Text(**self._meta["_all"])) + + for f in chain(fields, self.properties._collect_fields()): + for analyzer_name in ( + "analyzer", + "normalizer", + "search_analyzer", + "search_quote_analyzer", + ): + if not hasattr(f, analyzer_name): + continue + analyzer = getattr(f, analyzer_name) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + continue + + # merge the definition + # TODO: conflict detection/resolution + for key in d: + analysis.setdefault(key, {}).update(d[key]) + + return analysis + + async def save(self, index, using="default"): + from .index import AsyncIndex + + index = AsyncIndex(index, using=using) + index.mapping(self) + return await index.save() + + async def update_from_es(self, index, using="default"): + es = get_connection(using, is_async=ASYNC_IS_ASYNC) + raw = await es.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + def _update_from_dict(self, raw): + for name, definition in iteritems(raw.get("properties", {})): + self.field(name, definition) + + # metadata like _all etc + for name, value in iteritems(raw): + if name != "properties": + if isinstance(value, collections_abc.Mapping): + self.meta(name, **value) + else: + self.meta(name, value) + + def update(self, mapping, update_only=False): + for name in mapping: + if update_only and name in self: + # nested and inner objects, merge recursively + if hasattr(self[name], "update"): + # FIXME only merge subfields, not the settings + self[name].update(mapping[name], update_only) + continue + self.field(name, mapping[name]) + + if update_only: + for name in mapping._meta: + if name not in self._meta: + self._meta[name] = mapping._meta[name] + else: + self._meta.update(mapping._meta) + + def __contains__(self, name): + return name in self.properties.properties + + def __getitem__(self, name): + return self.properties.properties[name] + + def __iter__(self): + return iter(self.properties.properties) + + def field(self, *args, **kwargs): + self.properties.field(*args, **kwargs) + return self + + def meta(self, name, params=None, **kwargs): + from ..mapping import META_FIELDS + + if not name.startswith("_") and name not in META_FIELDS: + name = "_" + name + + if params and kwargs: + raise ValueError("Meta configs cannot have both value and a dictionary.") + + self._meta[name] = kwargs if params is None else params + return self + + def to_dict(self): + meta = self._meta + + # hard coded serialization of analyzers in _all + if "_all" in meta: + meta = meta.copy() + _all = meta["_all"] = meta["_all"].copy() + for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): + if hasattr(_all.get(f, None), "to_dict"): + _all[f] = _all[f].to_dict() + meta.update(self.properties.to_dict()) + return meta diff --git a/elasticsearch_dsl/_async/search.py b/elasticsearch_dsl/_async/search.py index 477457d42..7a31583ff 100644 --- a/elasticsearch_dsl/_async/search.py +++ b/elasticsearch_dsl/_async/search.py @@ -21,7 +21,7 @@ from elasticsearch.helpers import async_scan from six import iteritems, string_types -from .._base import AggsProxy, ProxyDescriptor, QueryProxy, Request +from .._base.search import AggsProxy, ProxyDescriptor, QueryProxy, Request from ..aggs import A from ..connections import get_connection from ..exceptions import IllegalOperation diff --git a/elasticsearch_dsl/_async/update_by_query.py b/elasticsearch_dsl/_async/update_by_query.py index 31447af9a..0fc4cc3cf 100644 --- a/elasticsearch_dsl/_async/update_by_query.py +++ b/elasticsearch_dsl/_async/update_by_query.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from .._base import ProxyDescriptor, QueryProxy, Request +from .._base.search import ProxyDescriptor, QueryProxy, Request from ..connections import get_connection from ..query import Bool, Q from ..response import UpdateByQueryResponse diff --git a/elasticsearch_dsl/_base/__init__.py b/elasticsearch_dsl/_base/__init__.py index 180fb3298..2a87d183f 100644 --- a/elasticsearch_dsl/_base/__init__.py +++ b/elasticsearch_dsl/_base/__init__.py @@ -14,15 +14,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -from .faceted_search import FacetedResponse -from .search import AggsProxy, ProxyDescriptor, QueryProxy, Request, Response - -__all__ = [ - "FacetedResponse", - "AggsProxy", - "ProxyDescriptor", - "QueryProxy", - "Request", - "Response", -] diff --git a/elasticsearch_dsl/_base/document.py b/elasticsearch_dsl/_base/document.py new file mode 100644 index 000000000..f2460e862 --- /dev/null +++ b/elasticsearch_dsl/_base/document.py @@ -0,0 +1,99 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from six import add_metaclass, iteritems + +from ..field import Field +from ..utils import ObjectBase, iscoroutinefunction + + +class MetaField(object): + def __init__(self, *args, **kwargs): + self.args, self.kwargs = args, kwargs + + +class DocumentMeta(type): + def __new__(cls, name, bases, attrs): + # DocumentMeta filters attrs in place + attrs["_doc_type"] = DocumentOptions(name, bases, attrs) + return super(DocumentMeta, cls).__new__(cls, name, bases, attrs) + + +class DocumentOptions(object): + def __init__(self, name, bases, attrs): + meta = attrs.pop("Meta", None) + + # Decide whether we should use an 'AsyncMapping' or sync 'Mapping' + # class based on whether the document.save() function is async or not. + if "init" in attrs and iscoroutinefunction(attrs["init"]): + from ..mapping import AsyncMapping + + default_mapping_cls = AsyncMapping + else: + from ..mapping import Mapping + + default_mapping_cls = Mapping + + # create the mapping instance + try: + meta_mapping = meta.mapping + + # If a synchronous 'Mapping' is defined on an + # 'AsyncDocument' or the reverse we can correct + # the definition by updating a new instance + # with the proper I/O flavoring. + if not isinstance(meta_mapping, default_mapping_cls): + meta_mapping = default_mapping_cls() + meta_mapping.update(meta_mapping) + + self.mapping = meta_mapping + except AttributeError: + self.mapping = default_mapping_cls() + + # register all declared fields into the mapping + for name, value in list(iteritems(attrs)): + if isinstance(value, Field): + self.mapping.field(name, value) + del attrs[name] + + # add all the mappings for meta fields + for name in dir(meta): + if isinstance(getattr(meta, name, None), MetaField): + params = getattr(meta, name) + self.mapping.meta(name, *params.args, **params.kwargs) + + # document inheritance - include the fields from parents' mappings + for b in bases: + if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"): + self.mapping.update(b._doc_type.mapping, update_only=True) + + @property + def name(self): + return self.mapping.properties.name + + +@add_metaclass(DocumentMeta) +class InnerDoc(ObjectBase): + """ + Common class for inner documents like Object or Nested + """ + + @classmethod + def from_es(cls, data, data_only=False): + if data_only: + data = {"_source": data} + return super(InnerDoc, cls).from_es(data) diff --git a/elasticsearch_dsl/_base/mapping.py b/elasticsearch_dsl/_base/mapping.py new file mode 100644 index 000000000..05052c682 --- /dev/null +++ b/elasticsearch_dsl/_base/mapping.py @@ -0,0 +1,71 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from six import itervalues + +from ..field import construct_field +from ..utils import DslBase + + +class Properties(DslBase): + name = "properties" + _param_defs = {"properties": {"type": "field", "hash": True}} + + def __init__(self): + super(Properties, self).__init__() + + def __repr__(self): + return "Properties()" + + def __getitem__(self, name): + return self.properties[name] + + def __contains__(self, name): + return name in self.properties + + def to_dict(self): + return super(Properties, self).to_dict()["properties"] + + def field(self, name, *args, **kwargs): + self.properties[name] = construct_field(*args, **kwargs) + return self + + def _collect_fields(self): + """ Iterate over all Field objects within, including multi fields. """ + for f in itervalues(self.properties.to_dict()): + yield f + # multi fields + if hasattr(f, "fields"): + for inner_f in itervalues(f.fields.to_dict()): + yield inner_f + # nested and inner objects + if hasattr(f, "_collect_fields"): + for inner_f in f._collect_fields(): + yield inner_f + + def update(self, other_object): + if not hasattr(other_object, "properties"): + # not an inner/nested object, no merge possible + return + + our, other = self.properties, other_object.properties + for name in other: + if name in our: + if hasattr(our[name], "update"): + our[name].update(other[name]) + continue + our[name] = other[name] diff --git a/elasticsearch_dsl/_sync/__init__.py b/elasticsearch_dsl/_sync/__init__.py index 810250ce8..2a87d183f 100644 --- a/elasticsearch_dsl/_sync/__init__.py +++ b/elasticsearch_dsl/_sync/__init__.py @@ -14,23 +14,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -try: - from .document import Document, IndexMeta - from .faceted_search import FacetedSearch - from .index import Index, IndexTemplate - from .search import MultiSearch, Search - from .update_by_query import UpdateByQuery - - __all__ = [ - "Document", - "IndexMeta", - "FacetedSearch", - "Index", - "IndexTemplate", - "Search", - "MultiSearch", - "UpdateByQuery", - ] -except (ImportError, SyntaxError): - pass diff --git a/elasticsearch_dsl/_sync/document.py b/elasticsearch_dsl/_sync/document.py index 18fb01bbe..8211da0e4 100644 --- a/elasticsearch_dsl/_sync/document.py +++ b/elasticsearch_dsl/_sync/document.py @@ -25,8 +25,8 @@ from elasticsearch.exceptions import NotFoundError, RequestError from six import add_metaclass +from .._base.document import DocumentMeta from ..connections import get_connection -from ..document import DocumentMeta from ..exceptions import IllegalOperation, ValidationException from ..utils import DOC_META_FIELDS, META_FIELDS, ObjectBase, merge from .search import Search diff --git a/elasticsearch_dsl/_sync/faceted_search.py b/elasticsearch_dsl/_sync/faceted_search.py index af4ff9fdd..498f8993d 100644 --- a/elasticsearch_dsl/_sync/faceted_search.py +++ b/elasticsearch_dsl/_sync/faceted_search.py @@ -17,7 +17,7 @@ from six import iteritems, itervalues -from .._base import FacetedResponse +from .._base.faceted_search import FacetedResponse from ..query import MatchAll from .search import Search diff --git a/elasticsearch_dsl/_sync/index.py b/elasticsearch_dsl/_sync/index.py index f9b812918..519bb4b07 100644 --- a/elasticsearch_dsl/_sync/index.py +++ b/elasticsearch_dsl/_sync/index.py @@ -18,8 +18,8 @@ from .. import analysis from ..connections import get_connection from ..exceptions import IllegalOperation -from ..mapping import Mapping from ..utils import merge +from .mapping import Mapping from .search import Search from .update_by_query import UpdateByQuery from .utils import SYNC_IS_ASYNC diff --git a/elasticsearch_dsl/_sync/mapping.py b/elasticsearch_dsl/_sync/mapping.py new file mode 100644 index 000000000..c462f00a2 --- /dev/null +++ b/elasticsearch_dsl/_sync/mapping.py @@ -0,0 +1,180 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +try: + import collections.abc as collections_abc # only works on python 3.3+ +except ImportError: + import collections as collections_abc + +from itertools import chain + +from six import iteritems + +from .._base.mapping import Properties +from ..connections import get_connection +from ..field import Nested, Text +from .utils import SYNC_IS_ASYNC + + +class Mapping(object): + def __init__(self): + self.properties = Properties() + self._meta = {} + + def __repr__(self): + return str(type(self.__name__)) + "()" + + def _clone(self): + m = Mapping() + m.properties._params = self.properties._params.copy() + return m + + @classmethod + def from_es(cls, index, using="default"): + m = cls() + m.update_from_es(index, using) + return m + + def resolve_nested(self, field_path): + field = self + nested = [] + parts = field_path.split(".") + for i, step in enumerate(parts): + try: + field = field[step] + except KeyError: + return (), None + if isinstance(field, Nested): + nested.append(".".join(parts[: i + 1])) + return nested, field + + def resolve_field(self, field_path): + field = self + for step in field_path.split("."): + try: + field = field[step] + except KeyError: + return + return field + + def _collect_analysis(self): + analysis = {} + fields = [] + if "_all" in self._meta: + fields.append(Text(**self._meta["_all"])) + + for f in chain(fields, self.properties._collect_fields()): + for analyzer_name in ( + "analyzer", + "normalizer", + "search_analyzer", + "search_quote_analyzer", + ): + if not hasattr(f, analyzer_name): + continue + analyzer = getattr(f, analyzer_name) + d = analyzer.get_analysis_definition() + # empty custom analyzer, probably already defined out of our control + if not d: + continue + + # merge the definition + # TODO: conflict detection/resolution + for key in d: + analysis.setdefault(key, {}).update(d[key]) + + return analysis + + def save(self, index, using="default"): + from .index import Index + + index = Index(index, using=using) + index.mapping(self) + return index.save() + + def update_from_es(self, index, using="default"): + es = get_connection(using, is_async=SYNC_IS_ASYNC) + raw = es.indices.get_mapping(index=index) + _, raw = raw.popitem() + self._update_from_dict(raw["mappings"]) + + def _update_from_dict(self, raw): + for name, definition in iteritems(raw.get("properties", {})): + self.field(name, definition) + + # metadata like _all etc + for name, value in iteritems(raw): + if name != "properties": + if isinstance(value, collections_abc.Mapping): + self.meta(name, **value) + else: + self.meta(name, value) + + def update(self, mapping, update_only=False): + for name in mapping: + if update_only and name in self: + # nested and inner objects, merge recursively + if hasattr(self[name], "update"): + # FIXME only merge subfields, not the settings + self[name].update(mapping[name], update_only) + continue + self.field(name, mapping[name]) + + if update_only: + for name in mapping._meta: + if name not in self._meta: + self._meta[name] = mapping._meta[name] + else: + self._meta.update(mapping._meta) + + def __contains__(self, name): + return name in self.properties.properties + + def __getitem__(self, name): + return self.properties.properties[name] + + def __iter__(self): + return iter(self.properties.properties) + + def field(self, *args, **kwargs): + self.properties.field(*args, **kwargs) + return self + + def meta(self, name, params=None, **kwargs): + from ..mapping import META_FIELDS + + if not name.startswith("_") and name not in META_FIELDS: + name = "_" + name + + if params and kwargs: + raise ValueError("Meta configs cannot have both value and a dictionary.") + + self._meta[name] = kwargs if params is None else params + return self + + def to_dict(self): + meta = self._meta + + # hard coded serialization of analyzers in _all + if "_all" in meta: + meta = meta.copy() + _all = meta["_all"] = meta["_all"].copy() + for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): + if hasattr(_all.get(f, None), "to_dict"): + _all[f] = _all[f].to_dict() + meta.update(self.properties.to_dict()) + return meta diff --git a/elasticsearch_dsl/_sync/search.py b/elasticsearch_dsl/_sync/search.py index cb8abb854..640197da4 100644 --- a/elasticsearch_dsl/_sync/search.py +++ b/elasticsearch_dsl/_sync/search.py @@ -21,7 +21,7 @@ from elasticsearch.helpers import scan from six import iteritems, string_types -from .._base import AggsProxy, ProxyDescriptor, QueryProxy, Request +from .._base.search import AggsProxy, ProxyDescriptor, QueryProxy, Request from ..aggs import A from ..connections import get_connection from ..exceptions import IllegalOperation diff --git a/elasticsearch_dsl/_sync/update_by_query.py b/elasticsearch_dsl/_sync/update_by_query.py index 2f299cb96..fd7fe2e7a 100644 --- a/elasticsearch_dsl/_sync/update_by_query.py +++ b/elasticsearch_dsl/_sync/update_by_query.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from .._base import ProxyDescriptor, QueryProxy, Request +from .._base.search import ProxyDescriptor, QueryProxy, Request from ..connections import get_connection from ..query import Bool, Q from ..response import UpdateByQueryResponse diff --git a/elasticsearch_dsl/document.py b/elasticsearch_dsl/document.py index 4bffeaea8..38286454b 100644 --- a/elasticsearch_dsl/document.py +++ b/elasticsearch_dsl/document.py @@ -15,68 +15,8 @@ # specific language governing permissions and limitations # under the License. -from six import add_metaclass, iteritems - -from .field import Field -from .mapping import Mapping -from .utils import ObjectBase - - -class MetaField(object): - def __init__(self, *args, **kwargs): - self.args, self.kwargs = args, kwargs - - -class DocumentMeta(type): - def __new__(cls, name, bases, attrs): - # DocumentMeta filters attrs in place - attrs["_doc_type"] = DocumentOptions(name, bases, attrs) - return super(DocumentMeta, cls).__new__(cls, name, bases, attrs) - - -class DocumentOptions(object): - def __init__(self, name, bases, attrs): - meta = attrs.pop("Meta", None) - - # create the mapping instance - self.mapping = getattr(meta, "mapping", Mapping()) - - # register all declared fields into the mapping - for name, value in list(iteritems(attrs)): - if isinstance(value, Field): - self.mapping.field(name, value) - del attrs[name] - - # add all the mappings for meta fields - for name in dir(meta): - if isinstance(getattr(meta, name, None), MetaField): - params = getattr(meta, name) - self.mapping.meta(name, *params.args, **params.kwargs) - - # document inheritance - include the fields from parents' mappings - for b in bases: - if hasattr(b, "_doc_type") and hasattr(b._doc_type, "mapping"): - self.mapping.update(b._doc_type.mapping, update_only=True) - - @property - def name(self): - return self.mapping.properties.name - - -@add_metaclass(DocumentMeta) -class InnerDoc(ObjectBase): - """ - Common class for inner documents like Object or Nested - """ - - @classmethod - def from_es(cls, data, data_only=False): - if data_only: - data = {"_source": data} - return super(InnerDoc, cls).from_es(data) - - -from ._sync import Document, IndexMeta +from ._base.document import DocumentMeta, DocumentOptions, InnerDoc, MetaField +from ._sync.document import Document, IndexMeta __all__ = [ "Document", @@ -88,8 +28,8 @@ def from_es(cls, data, data_only=False): ] try: - from ._async import AsyncDocument, AsyncIndexMeta # noqa: F401 + from ._async.document import AsyncDocument, AsyncIndexMeta # noqa: F401 __all__.extend(["AsyncDocument", "AsyncIndexMeta"]) -except ImportError: +except (ImportError, SyntaxError): pass diff --git a/elasticsearch_dsl/faceted_search.py b/elasticsearch_dsl/faceted_search.py index 5dd9e2e17..1a33c5d0d 100644 --- a/elasticsearch_dsl/faceted_search.py +++ b/elasticsearch_dsl/faceted_search.py @@ -238,8 +238,8 @@ def add_filter(self, filter_values): return Nested(path=self._path, query=inner_q) -from ._base import FacetedResponse -from ._sync import FacetedSearch +from ._base.faceted_search import FacetedResponse +from ._sync.faceted_search import FacetedSearch __all__ = [ "FacetedSearch", @@ -252,8 +252,8 @@ def add_filter(self, filter_values): ] try: - from ._async import AsyncFacetedSearch # noqa: F401 + from ._async.faceted_search import AsyncFacetedSearch # noqa: F401 __all__.append("AsyncFacetedSearch") -except ImportError: +except (ImportError, SyntaxError): pass diff --git a/elasticsearch_dsl/index.py b/elasticsearch_dsl/index.py index 5047cfcc4..0a3dd5189 100644 --- a/elasticsearch_dsl/index.py +++ b/elasticsearch_dsl/index.py @@ -15,12 +15,12 @@ # specific language governing permissions and limitations # under the License. -from ._sync import Index, IndexTemplate +from ._sync.index import Index, IndexTemplate __all__ = ["Index", "IndexTemplate"] try: - from ._async import AsyncIndex, AsyncIndexTemplate # noqa: F401 + from ._async.index import AsyncIndex, AsyncIndexTemplate # noqa: F401 __all__.extend( [ @@ -28,5 +28,5 @@ "AsyncIndexTemplate", ] ) -except ImportError: +except (ImportError, SyntaxError): pass diff --git a/elasticsearch_dsl/mapping.py b/elasticsearch_dsl/mapping.py index 6d1bc8bfd..106ebc375 100644 --- a/elasticsearch_dsl/mapping.py +++ b/elasticsearch_dsl/mapping.py @@ -15,19 +15,6 @@ # specific language governing permissions and limitations # under the License. -try: - import collections.abc as collections_abc # only works on python 3.3+ -except ImportError: - import collections as collections_abc - -from itertools import chain - -from six import iteritems, itervalues - -from .connections import get_connection -from .field import Nested, Text, construct_field -from .utils import DslBase - META_FIELDS = frozenset( ( "dynamic", @@ -40,200 +27,18 @@ ) ) +__all__ = [ + "Properties", + "Mapping", + "META_FIELDS", +] -class Properties(DslBase): - name = "properties" - _param_defs = {"properties": {"type": "field", "hash": True}} - - def __init__(self): - super(Properties, self).__init__() - - def __repr__(self): - return "Properties()" - - def __getitem__(self, name): - return self.properties[name] - - def __contains__(self, name): - return name in self.properties - - def to_dict(self): - return super(Properties, self).to_dict()["properties"] - - def field(self, name, *args, **kwargs): - self.properties[name] = construct_field(*args, **kwargs) - return self - - def _collect_fields(self): - """ Iterate over all Field objects within, including multi fields. """ - for f in itervalues(self.properties.to_dict()): - yield f - # multi fields - if hasattr(f, "fields"): - for inner_f in itervalues(f.fields.to_dict()): - yield inner_f - # nested and inner objects - if hasattr(f, "_collect_fields"): - for inner_f in f._collect_fields(): - yield inner_f - - def update(self, other_object): - if not hasattr(other_object, "properties"): - # not an inner/nested object, no merge possible - return - - our, other = self.properties, other_object.properties - for name in other: - if name in our: - if hasattr(our[name], "update"): - our[name].update(other[name]) - continue - our[name] = other[name] - - -class Mapping(object): - def __init__(self): - self.properties = Properties() - self._meta = {} - - def __repr__(self): - return "Mapping()" - - def _clone(self): - m = Mapping() - m.properties._params = self.properties._params.copy() - return m - - @classmethod - def from_es(cls, index, using="default"): - m = cls() - m.update_from_es(index, using) - return m - - def resolve_nested(self, field_path): - field = self - nested = [] - parts = field_path.split(".") - for i, step in enumerate(parts): - try: - field = field[step] - except KeyError: - return (), None - if isinstance(field, Nested): - nested.append(".".join(parts[: i + 1])) - return nested, field +from ._base.mapping import Properties +from ._sync.mapping import Mapping - def resolve_field(self, field_path): - field = self - for step in field_path.split("."): - try: - field = field[step] - except KeyError: - return - return field - - def _collect_analysis(self): - analysis = {} - fields = [] - if "_all" in self._meta: - fields.append(Text(**self._meta["_all"])) - - for f in chain(fields, self.properties._collect_fields()): - for analyzer_name in ( - "analyzer", - "normalizer", - "search_analyzer", - "search_quote_analyzer", - ): - if not hasattr(f, analyzer_name): - continue - analyzer = getattr(f, analyzer_name) - d = analyzer.get_analysis_definition() - # empty custom analyzer, probably already defined out of our control - if not d: - continue - - # merge the definition - # TODO: conflict detection/resolution - for key in d: - analysis.setdefault(key, {}).update(d[key]) - - return analysis - - def save(self, index, using="default"): - from .index import Index - - index = Index(index, using=using) - index.mapping(self) - return index.save() - - def update_from_es(self, index, using="default"): - es = get_connection(using) - raw = es.indices.get_mapping(index=index) - _, raw = raw.popitem() - self._update_from_dict(raw["mappings"]) - - def _update_from_dict(self, raw): - for name, definition in iteritems(raw.get("properties", {})): - self.field(name, definition) - - # metadata like _all etc - for name, value in iteritems(raw): - if name != "properties": - if isinstance(value, collections_abc.Mapping): - self.meta(name, **value) - else: - self.meta(name, value) - - def update(self, mapping, update_only=False): - for name in mapping: - if update_only and name in self: - # nested and inner objects, merge recursively - if hasattr(self[name], "update"): - # FIXME only merge subfields, not the settings - self[name].update(mapping[name], update_only) - continue - self.field(name, mapping[name]) - - if update_only: - for name in mapping._meta: - if name not in self._meta: - self._meta[name] = mapping._meta[name] - else: - self._meta.update(mapping._meta) - - def __contains__(self, name): - return name in self.properties.properties - - def __getitem__(self, name): - return self.properties.properties[name] - - def __iter__(self): - return iter(self.properties.properties) - - def field(self, *args, **kwargs): - self.properties.field(*args, **kwargs) - return self - - def meta(self, name, params=None, **kwargs): - if not name.startswith("_") and name not in META_FIELDS: - name = "_" + name - - if params and kwargs: - raise ValueError("Meta configs cannot have both value and a dictionary.") - - self._meta[name] = kwargs if params is None else params - return self - - def to_dict(self): - meta = self._meta +try: + from ._async.mapping import AsyncMapping # noqa: F401 - # hard coded serialization of analyzers in _all - if "_all" in meta: - meta = meta.copy() - _all = meta["_all"] = meta["_all"].copy() - for f in ("analyzer", "search_analyzer", "search_quote_analyzer"): - if hasattr(_all.get(f, None), "to_dict"): - _all[f] = _all[f].to_dict() - meta.update(self.properties.to_dict()) - return meta + __all__.append("AsyncMapping") +except (ImportError, SyntaxError): + pass diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 5c475bb7b..daece721e 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from ._base import AggsProxy, ProxyDescriptor, QueryProxy, Request -from ._sync import MultiSearch, Search +from ._base.search import AggsProxy, ProxyDescriptor, QueryProxy, Request +from ._sync.search import MultiSearch, Search # 'Q' is here because test suite uses it, don't # want to break users relying on it. @@ -33,8 +33,8 @@ ] try: - from ._async import AsyncMultiSearch, AsyncSearch # noqa: F401 + from ._async.search import AsyncMultiSearch, AsyncSearch # noqa: F401 __all__.extend(["AsyncSearch", "AsyncMultiSearch"]) -except ImportError: +except (ImportError, SyntaxError): pass diff --git a/elasticsearch_dsl/update_by_query.py b/elasticsearch_dsl/update_by_query.py index a87869c06..476be8ac2 100644 --- a/elasticsearch_dsl/update_by_query.py +++ b/elasticsearch_dsl/update_by_query.py @@ -15,15 +15,15 @@ # specific language governing permissions and limitations # under the License. -from ._sync import UpdateByQuery +from ._sync.update_by_query import UpdateByQuery __all__ = [ "UpdateByQuery", ] try: - from ._async import AsyncUpdateByQuery # noqa: F401 + from ._async.update_by_query import AsyncUpdateByQuery # noqa: F401 __all__.append("AsyncUpdateByQuery") -except ImportError: +except (ImportError, SyntaxError): pass diff --git a/elasticsearch_dsl/utils.py b/elasticsearch_dsl/utils.py index a081670e0..ea06746e9 100644 --- a/elasticsearch_dsl/utils.py +++ b/elasticsearch_dsl/utils.py @@ -22,6 +22,7 @@ except ImportError: import collections as collections_abc +import inspect from copy import copy from six import add_metaclass, iteritems @@ -582,3 +583,13 @@ def recursive_to_dict(data): elif isinstance(data, collections_abc.Mapping): return {key: recursive_to_dict(val) for key, val in data.items()} return data + + +def iscoroutinefunction(x): + """Detects whether a function returns a coroutine. Specifically + returns 'True' if the function is defined via 'async def' + """ + try: + return inspect.iscoroutinefunction(x) + except AttributeError: + return False From 547eef0560cffaa018c20b064263de33271e5eea Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 25 Jan 2021 17:15:43 -0600 Subject: [PATCH 04/10] Update test suite for async classes --- tests/test_async/__init__.py | 16 ++++++++++ tests/test_async/conftest.py | 23 ++++++++++++++ tests/test_async/test_document.py | 37 +++++++++++++++++++++ tests/test_mapping.py | 48 +++++++++++++++++++--------- tests/test_update_by_query.py | 53 +++++++++++++++++++++---------- 5 files changed, 146 insertions(+), 31 deletions(-) create mode 100644 tests/test_async/__init__.py create mode 100644 tests/test_async/conftest.py create mode 100644 tests/test_async/test_document.py diff --git a/tests/test_async/__init__.py b/tests/test_async/__init__.py new file mode 100644 index 000000000..2a87d183f --- /dev/null +++ b/tests/test_async/__init__.py @@ -0,0 +1,16 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/test_async/conftest.py b/tests/test_async/conftest.py new file mode 100644 index 000000000..66c5e540f --- /dev/null +++ b/tests/test_async/conftest.py @@ -0,0 +1,23 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +try: + from elasticsearch import AsyncElasticsearch +except ImportError: + pytest.skip("asyncio support must be available") diff --git a/tests/test_async/test_document.py b/tests/test_async/test_document.py new file mode 100644 index 000000000..a211a71a0 --- /dev/null +++ b/tests/test_async/test_document.py @@ -0,0 +1,37 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch_dsl import ( + AsyncDocument, + AsyncIndex, + AsyncMapping, + field, +) + + +def test_async_document_index_mapping(): + class ExampleDoc1(AsyncDocument): + title = field.Text() + + class Index: + name = "example-doc-1" + + index = ExampleDoc1._index + assert isinstance(index, AsyncIndex) + + mapping = index.get_or_create_mapping() + assert isinstance(mapping, AsyncMapping) diff --git a/tests/test_mapping.py b/tests/test_mapping.py index aa4939fbc..2141b1e53 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -17,11 +17,22 @@ import json +import pytest + from elasticsearch_dsl import Keyword, Nested, Text, analysis, mapping +mapping_classes = [mapping.Mapping] + +# If 'AsyncMapping' is available test that as well +if hasattr(mapping, "AsyncMapping"): + mapping_classes.append(mapping.AsyncMapping) + +mapping_cls = pytest.mark.parametrize("mapping_cls", mapping_classes) + -def test_mapping_can_has_fields(): - m = mapping.Mapping() +@mapping_cls +def test_mapping_can_has_fields(mapping_cls): + m = mapping_cls() m.field("name", "text").field("tags", "keyword") assert { @@ -29,7 +40,8 @@ def test_mapping_can_has_fields(): } == m.to_dict() -def test_mapping_update_is_recursive(): +@mapping_cls +def test_mapping_update_is_recursive(mapping_cls): m1 = mapping.Mapping() m1.field("title", "text") m1.field("author", "object") @@ -62,8 +74,9 @@ def test_mapping_update_is_recursive(): } == m1.to_dict() -def test_properties_can_iterate_over_all_the_fields(): - m = mapping.Mapping() +@mapping_cls +def test_properties_can_iterate_over_all_the_fields(mapping_cls): + m = mapping_cls() m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) @@ -72,7 +85,8 @@ def test_properties_can_iterate_over_all_the_fields(): } -def test_mapping_can_collect_all_analyzers_and_normalizers(): +@mapping_cls +def test_mapping_can_collect_all_analyzers_and_normalizers(mapping_cls): a1 = analysis.analyzer( "my_analyzer1", tokenizer="keyword", @@ -100,7 +114,7 @@ def test_mapping_can_collect_all_analyzers_and_normalizers(): ) n3 = analysis.normalizer("unknown_custom") - m = mapping.Mapping() + m = mapping_cls() m.field( "title", "text", @@ -145,7 +159,8 @@ def test_mapping_can_collect_all_analyzers_and_normalizers(): assert json.loads(json.dumps(m.to_dict())) == m.to_dict() -def test_mapping_can_collect_multiple_analyzers(): +@mapping_cls +def test_mapping_can_collect_multiple_analyzers(mapping_cls): a1 = analysis.analyzer( "my_analyzer1", tokenizer="keyword", @@ -159,7 +174,7 @@ def test_mapping_can_collect_multiple_analyzers(): tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), filter=[analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"])], ) - m = mapping.Mapping() + m = mapping_cls() m.field("title", "text", analyzer=a1, search_analyzer=a2) m.field( "text", @@ -191,9 +206,10 @@ def test_mapping_can_collect_multiple_analyzers(): } == m._collect_analysis() -def test_even_non_custom_analyzers_can_have_params(): +@mapping_cls +def test_even_non_custom_analyzers_can_have_params(mapping_cls): a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") - m = mapping.Mapping() + m = mapping_cls() m.field("title", "text", analyzer=a1) assert { @@ -201,15 +217,17 @@ def test_even_non_custom_analyzers_can_have_params(): } == m._collect_analysis() -def test_resolve_field_can_resolve_multifields(): - m = mapping.Mapping() +@mapping_cls +def test_resolve_field_can_resolve_multifields(mapping_cls): + m = mapping_cls() m.field("title", "text", fields={"keyword": Keyword()}) assert isinstance(m.resolve_field("title.keyword"), Keyword) -def test_resolve_nested(): - m = mapping.Mapping() +@mapping_cls +def test_resolve_nested(mapping_cls): + m = mapping_cls() m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) m.field("k2", "keyword") diff --git a/tests/test_update_by_query.py b/tests/test_update_by_query.py index c10fde878..e1c2fc31c 100644 --- a/tests/test_update_by_query.py +++ b/tests/test_update_by_query.py @@ -17,18 +17,34 @@ from copy import deepcopy +import pytest + from elasticsearch_dsl import Q, UpdateByQuery from elasticsearch_dsl.response import UpdateByQueryResponse +ubq_classes = [UpdateByQuery] + +# If 'AsyncUpdateByQuery' is available test that as well +try: + from elasticsearch_dsl import AsyncUpdateByQuery + + ubq_classes.append(AsyncUpdateByQuery) +except ImportError: + pass + +ubq_cls = pytest.mark.parametrize("ubq_cls", ubq_classes) + -def test_ubq_starts_with_no_query(): - ubq = UpdateByQuery() +@ubq_cls +def test_ubq_starts_with_no_query(ubq_cls): + ubq = ubq_cls() assert ubq.query._proxied is None -def test_ubq_to_dict(): - ubq = UpdateByQuery() +@ubq_cls +def test_ubq_to_dict(ubq_cls): + ubq = ubq_cls() assert {} == ubq.to_dict() ubq = ubq.query("match", f=42) @@ -36,15 +52,16 @@ def test_ubq_to_dict(): assert {"query": {"match": {"f": 42}}, "size": 10} == ubq.to_dict(size=10) - ubq = UpdateByQuery(extra={"size": 5}) + ubq = ubq_cls(extra={"size": 5}) assert {"size": 5} == ubq.to_dict() - ubq = UpdateByQuery(extra={"extra_q": Q("term", category="conference")}) + ubq = ubq_cls(extra={"extra_q": Q("term", category="conference")}) assert {"extra_q": {"term": {"category": "conference"}}} == ubq.to_dict() -def test_complex_example(): - ubq = UpdateByQuery() +@ubq_cls +def test_complex_example(ubq_cls): + ubq = ubq_cls() ubq = ( ubq.query("match", title="python") .query(~Q("match", title="ruby")) @@ -81,8 +98,9 @@ def test_complex_example(): } == ubq.to_dict() -def test_exclude(): - ubq = UpdateByQuery() +@ubq_cls +def test_exclude(ubq_cls): + ubq = ubq_cls() ubq = ubq.exclude("match", title="python") assert { @@ -94,7 +112,8 @@ def test_exclude(): } == ubq.to_dict() -def test_reverse(): +@ubq_cls +def test_reverse(ubq_cls): d = { "query": { "filtered": { @@ -124,14 +143,15 @@ def test_reverse(): d2 = deepcopy(d) - ubq = UpdateByQuery.from_dict(d) + ubq = ubq_cls.from_dict(d) assert d == d2 assert d == ubq.to_dict() -def test_from_dict_doesnt_need_query(): - ubq = UpdateByQuery.from_dict({"script": {"source": "test"}}) +@ubq_cls +def test_from_dict_doesnt_need_query(ubq_cls): + ubq = ubq_cls.from_dict({"script": {"source": "test"}}) assert {"script": {"source": "test"}} == ubq.to_dict() @@ -146,8 +166,9 @@ def test_params_being_passed_to_search(mock_client): ) -def test_overwrite_script(): - ubq = UpdateByQuery() +@ubq_cls +def test_overwrite_script(ubq_cls): + ubq = ubq_cls() ubq = ubq.script( source="ctx._source.likes += params.f", lang="painless", params={"f": 3} ) From 4083bc54874b435858786bda801651cf57768277 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Feb 2021 11:11:16 -0600 Subject: [PATCH 05/10] Add async integration tests --- elasticsearch_dsl/_async/index.py | 4 +- setup.py | 1 + tests/conftest.py | 15 ++--- tests/test_async/test_document.py | 7 +-- .../__init__.py} | 7 --- tests/test_async/test_integration/conftest.py | 45 ++++++++++++++ .../test_integration/test_document.py | 61 +++++++++++++++++++ 7 files changed, 119 insertions(+), 21 deletions(-) rename tests/test_async/{conftest.py => test_integration/__init__.py} (84%) create mode 100644 tests/test_async/test_integration/conftest.py create mode 100644 tests/test_async/test_integration/test_document.py diff --git a/elasticsearch_dsl/_async/index.py b/elasticsearch_dsl/_async/index.py index 4bbedfbd3..d807a4aaa 100644 --- a/elasticsearch_dsl/_async/index.py +++ b/elasticsearch_dsl/_async/index.py @@ -374,7 +374,9 @@ async def flush(self, using=None, **kwargs): Any additional keyword arguments will be passed to ``Elasticsearch.indices.flush`` unchanged. """ - return self._get_connection(using).indices.flush(index=self._name, **kwargs) + return await self._get_connection(using).indices.flush( + index=self._name, **kwargs + ) async def get(self, using=None, **kwargs): """ diff --git a/setup.py b/setup.py index 09c633053..459d72e46 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ "pytest>=3.0.0", "pytest-cov", "pytest-mock<3.0.0", + "pytest-asyncio; python_version>='3.6'", "pytz", "coverage<5.0.0", "sphinx", diff --git a/tests/conftest.py b/tests/conftest.py index 308442108..2de8b4d58 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,12 +17,11 @@ # under the License. -import os import re from datetime import datetime +from elasticsearch import Elasticsearch from elasticsearch.helpers import bulk -from elasticsearch.helpers.test import SkipTest, get_test_client from mock import Mock from pytest import fixture, skip @@ -41,11 +40,13 @@ @fixture(scope="session", autouse=True) def client(): try: - connection = get_test_client(nowait="WAIT_FOR_ES" not in os.environ) - add_connection("default", connection) - return connection - except SkipTest: - skip() + connection = Elasticsearch("http://localhost:9200") + connection.info() + except Exception: + return skip("Couldn't connect to Elasticsearch") + + add_connection("default", connection) + return connection @fixture(scope="session") diff --git a/tests/test_async/test_document.py b/tests/test_async/test_document.py index a211a71a0..dfb983c2d 100644 --- a/tests/test_async/test_document.py +++ b/tests/test_async/test_document.py @@ -15,12 +15,7 @@ # specific language governing permissions and limitations # under the License. -from elasticsearch_dsl import ( - AsyncDocument, - AsyncIndex, - AsyncMapping, - field, -) +from elasticsearch_dsl import AsyncDocument, AsyncIndex, AsyncMapping, field def test_async_document_index_mapping(): diff --git a/tests/test_async/conftest.py b/tests/test_async/test_integration/__init__.py similarity index 84% rename from tests/test_async/conftest.py rename to tests/test_async/test_integration/__init__.py index 66c5e540f..2a87d183f 100644 --- a/tests/test_async/conftest.py +++ b/tests/test_async/test_integration/__init__.py @@ -14,10 +14,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -import pytest - -try: - from elasticsearch import AsyncElasticsearch -except ImportError: - pytest.skip("asyncio support must be available") diff --git a/tests/test_async/test_integration/conftest.py b/tests/test_async/test_integration/conftest.py new file mode 100644 index 000000000..d11af6d9c --- /dev/null +++ b/tests/test_async/test_integration/conftest.py @@ -0,0 +1,45 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pytest import fixture, mark, skip + +from elasticsearch_dsl.connections import add_connection + +pytestmark = mark.asyncio + + +@fixture(scope="function", autouse=True) +async def async_client(): + try: + from elasticsearch import AsyncElasticsearch + except ImportError: + return skip("asyncio support must be available") + try: + connection = AsyncElasticsearch("http://localhost:9200") + await connection.info() + except Exception: + return skip("Couldn't connect to Elasticsearch") + + add_connection("async", connection) + return connection + + +@fixture +async def write_client(async_client): + yield async_client + await async_client.indices.delete("test-*", ignore=404) + await async_client.indices.delete_template("test-template", ignore=404) diff --git a/tests/test_async/test_integration/test_document.py b/tests/test_async/test_integration/test_document.py new file mode 100644 index 000000000..61af66639 --- /dev/null +++ b/tests/test_async/test_integration/test_document.py @@ -0,0 +1,61 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from ipaddress import ip_address + +from elasticsearch_dsl import AsyncDocument, field + + +class SerializationDoc(AsyncDocument): + i = field.Long() + b = field.Boolean() + d = field.Double() + bin = field.Binary() + ip = field.Ip() + + class Index: + name = "test-serialization" + + +async def test_serialization(write_client): + await SerializationDoc.init() + await write_client.index( + index="test-serialization", + id=42, + body={ + "i": [1, 2, "3", None], + "b": [True, False, "true", "false", None], + "d": [0.1, "-0.1", None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "ip": ["::1", "127.0.0.1", None], + }, + ) + sd = await SerializationDoc.get(id=42) + + assert sd.i == [1, 2, 3, None] + assert sd.b == [True, False, True, False, None] + assert sd.d == [0.1, -0.1, None] + assert sd.bin == [b"Hello World", None] + assert sd.ip == [ip_address(u"::1"), ip_address(u"127.0.0.1"), None] + + assert sd.to_dict() == { + "b": [True, False, True, False, None], + "bin": ["SGVsbG8gV29ybGQ=", None], + "d": [0.1, -0.1, None], + "i": [1, 2, 3, None], + "ip": ["::1", "127.0.0.1", None], + } From 784c9c5f8886895829095d7fdbd38d9c1cc340a4 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Feb 2021 11:22:18 -0600 Subject: [PATCH 06/10] Add dev dep on aiohttp --- setup.py | 4 +++- tests/test_async/test_integration/test_document.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 459d72e46..58c4c1541 100644 --- a/setup.py +++ b/setup.py @@ -41,11 +41,13 @@ "pytest>=3.0.0", "pytest-cov", "pytest-mock<3.0.0", - "pytest-asyncio; python_version>='3.6'", "pytz", "coverage<5.0.0", "sphinx", "sphinx_rtd_theme", + # async dependencies + "aiohttp; python_version>='3.6'", + "pytest-asyncio; python_version>='3.6'", ] setup( diff --git a/tests/test_async/test_integration/test_document.py b/tests/test_async/test_integration/test_document.py index 61af66639..415fb68c5 100644 --- a/tests/test_async/test_integration/test_document.py +++ b/tests/test_async/test_integration/test_document.py @@ -32,7 +32,7 @@ class Index: async def test_serialization(write_client): - await SerializationDoc.init() + await SerializationDoc.init(using="async") await write_client.index( index="test-serialization", id=42, @@ -44,7 +44,7 @@ async def test_serialization(write_client): "ip": ["::1", "127.0.0.1", None], }, ) - sd = await SerializationDoc.get(id=42) + sd = await SerializationDoc.get(id=42, using="async") assert sd.i == [1, 2, 3, None] assert sd.b == [True, False, True, False, None] From 2d03acdb30bdada7426cb39aeaf9a3df2a7715f5 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Feb 2021 11:44:53 -0600 Subject: [PATCH 07/10] sort __all__ --- elasticsearch_dsl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elasticsearch_dsl/__init__.py b/elasticsearch_dsl/__init__.py index e6f8d3b18..8194f0874 100644 --- a/elasticsearch_dsl/__init__.py +++ b/elasticsearch_dsl/__init__.py @@ -182,5 +182,6 @@ "AsyncUpdateByQuery", ] ) + __all__.sort() except ImportError: pass From 00bc726a67858540935640c53b7499002958ae46 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Feb 2021 12:28:44 -0600 Subject: [PATCH 08/10] Skip async tests on Python<3.6 --- noxfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/noxfile.py b/noxfile.py index 196935678..312aefd2a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -41,6 +41,8 @@ def test(session): "--cov=tests.test_integration.test_examples", "tests/", ) + if session.python in ("2.7", "3.4", "3.5"): + argv += ("--ignore=tests/test_async/",) session.run("pytest", *argv) From 5a9997161a2badb61620c62fcf83b490a08c7eec Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Mon, 8 Feb 2021 15:34:40 -0600 Subject: [PATCH 09/10] More async integration tests --- setup.py | 8 +- .../test_integration/test_document.py | 516 +++++++++++++++++- 2 files changed, 514 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 58c4c1541..114130cde 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,12 @@ "pytest-asyncio; python_version>='3.6'", ] +packages = [package for package in find_packages(where=".", exclude=("tests*",))] +if any("_unasync" in package for package in packages): + raise RuntimeError( + "'elasticsearch_dsl/_unasync' directory shouldn't " "exist when building dists" + ) + setup( name="elasticsearch-dsl", description="Python client for Elasticsearch", @@ -62,7 +68,7 @@ author_email="honza.kral@gmail.com", maintainer="Seth Michael Larson", maintainer_email="seth.larson@elastic.co", - packages=find_packages(where=".", exclude=("tests*",)), + packages=packages, python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", classifiers=[ "Development Status :: 4 - Beta", diff --git a/tests/test_async/test_integration/test_document.py b/tests/test_async/test_integration/test_document.py index 415fb68c5..29cf8edcd 100644 --- a/tests/test_async/test_integration/test_document.py +++ b/tests/test_async/test_integration/test_document.py @@ -15,25 +15,114 @@ # specific language governing permissions and limitations # under the License. +from datetime import datetime from ipaddress import ip_address -from elasticsearch_dsl import AsyncDocument, field +import pytest +from elasticsearch import ConflictError, NotFoundError +from pytest import raises +from pytz import timezone + +from elasticsearch_dsl import ( + AsyncDocument, + Binary, + Boolean, + Date, + Double, + InnerDoc, + Ip, + Keyword, + Long, + Mapping, + MetaField, + Nested, + Object, + Q, + RankFeatures, + Text, + analyzer, +) +from elasticsearch_dsl.utils import AttrList + +snowball = analyzer("my_snow", tokenizer="standard", filter=["lowercase", "snowball"]) + + +class User(InnerDoc): + name = Text(fields={"raw": Keyword()}) + + +class Wiki(AsyncDocument): + owner = Object(User) + views = Long() + ranked = RankFeatures() + + class Index: + name = "test-wiki" + + +class Repository(AsyncDocument): + owner = Object(User) + created_at = Date() + description = Text(analyzer=snowball) + tags = Keyword() + + @classmethod + def search(cls): + return super(Repository, cls).search().filter("term", commit_repo="repo") + + class Index: + name = "git" + + +class Commit(AsyncDocument): + committed_date = Date() + authored_date = Date() + description = Text(analyzer=snowball) + + class Index: + name = "flat-git" + + class Meta: + mapping = Mapping() + + +class History(InnerDoc): + timestamp = Date() + diff = Text() + + +class Comment(InnerDoc): + content = Text() + created_at = Date() + author = Object(User) + history = Nested(History) + + class Meta: + dynamic = MetaField(False) + + +class PullRequest(AsyncDocument): + comments = Nested(Comment) + created_at = Date() + + class Index: + name = "test-prs" class SerializationDoc(AsyncDocument): - i = field.Long() - b = field.Boolean() - d = field.Double() - bin = field.Binary() - ip = field.Ip() + i = Long() + b = Boolean() + d = Double() + bin = Binary() + ip = Ip() class Index: name = "test-serialization" async def test_serialization(write_client): - await SerializationDoc.init(using="async") - await write_client.index( + await SerializationDoc.init() + write_client.index( index="test-serialization", id=42, body={ @@ -44,7 +133,7 @@ async def test_serialization(write_client): "ip": ["::1", "127.0.0.1", None], }, ) - sd = await SerializationDoc.get(id=42, using="async") + sd = await SerializationDoc.get(id=42) assert sd.i == [1, 2, 3, None] assert sd.b == [True, False, True, False, None] @@ -59,3 +148,412 @@ async def test_serialization(write_client): "i": [1, 2, 3, None], "ip": ["::1", "127.0.0.1", None], } + + +async def test_nested_inner_hits_are_wrapped_properly(pull_request): + history_query = Q( + "nested", + path="comments.history", + inner_hits={}, + query=Q("match", comments__history__diff="ahoj"), + ) + s = PullRequest.search().query( + "nested", inner_hits={}, path="comments", query=history_query + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr, PullRequest) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].history[0], History) + + comment = pr.meta.inner_hits.comments.hits[0] + assert isinstance(comment, Comment) + assert comment.author.name == "honzakral" + assert isinstance(comment.history[0], History) + + history = comment.meta.inner_hits["comments.history"].hits[0] + assert isinstance(history, History) + assert history.timestamp == datetime(2012, 1, 1) + assert "score" in history.meta + + +async def test_nested_inner_hits_are_deserialized_properly(pull_request): + s = PullRequest.search().query( + "nested", + inner_hits={}, + path="comments", + query=Q("match", comments__content="hello"), + ) + + response = await s.execute() + pr = response.hits[0] + assert isinstance(pr.created_at, datetime) + assert isinstance(pr.comments[0], Comment) + assert isinstance(pr.comments[0].created_at, datetime) + + +async def test_nested_top_hits_are_wrapped_properly(pull_request): + s = PullRequest.search() + s.aggs.bucket("comments", "nested", path="comments").metric( + "hits", "top_hits", size=1 + ) + + r = await s.execute() + + assert isinstance(r.aggregations.comments.hits.hits[0], Comment) + + +async def test_update_object_field(write_client): + await Wiki.init() + w = Wiki( + owner=User(name="Honza Kral"), + _id="elasticsearch-py", + ranked={"test1": 0.1, "topic2": 0.2}, + ) + await w.save() + + assert "updated" == w.update(owner=[{"name": "Honza"}, {"name": "Nick"}]) + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + w = await Wiki.get(id="elasticsearch-py") + assert w.owner[0].name == "Honza" + assert w.owner[1].name == "Nick" + + assert w.ranked == {"test1": 0.1, "topic2": 0.2} + + +async def test_update_script(write_client): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + await w.update(script="ctx._source.views += params.inc", inc=5) + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 47 + + +async def test_update_retry_on_conflict(write_client): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + w1 = await Wiki.get(id="elasticsearch-py") + w2 = await Wiki.get(id="elasticsearch-py") + await w1.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + await w2.update( + script="ctx._source.views += params.inc", inc=5, retry_on_conflict=1 + ) + + w = await Wiki.get(id="elasticsearch-py") + assert w.views == 52 + + +@pytest.mark.parametrize("retry_on_conflict", [None, 0]) +async def test_update_conflicting_version(write_client, retry_on_conflict): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + await w.save() + + w1 = Wiki.get(id="elasticsearch-py") + w2 = Wiki.get(id="elasticsearch-py") + await w1.update(script="ctx._source.views += params.inc", inc=5) + + with raises(ConflictError): + await w2.update( + script="ctx._source.views += params.inc", + inc=5, + retry_on_conflict=retry_on_conflict, + ) + + +async def test_save_and_update_return_doc_meta(write_client): + await Wiki.init() + w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) + resp = await w.save(return_doc_meta=True) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "created" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_type", + "_version", + "result", + } + + resp = await w.update( + script="ctx._source.views += params.inc", inc=5, return_doc_meta=True + ) + assert resp["_index"] == "test-wiki" + assert resp["result"] == "updated" + assert set(resp.keys()) == { + "_id", + "_index", + "_primary_term", + "_seq_no", + "_shards", + "_type", + "_version", + "result", + } + + +async def test_init(write_client): + await Repository.init(index="test-git") + + assert write_client.indices.exists(index="test-git") + + +async def test_get_raises_404_on_index_missing(data_client): + with raises(NotFoundError): + await Repository.get("elasticsearch-dsl-php", index="not-there") + + +async def test_get_raises_404_on_non_existent_id(data_client): + with raises(NotFoundError): + await Repository.get("elasticsearch-dsl-php") + + +async def test_get_returns_none_if_404_ignored(data_client): + assert None is await Repository.get("elasticsearch-dsl-php", ignore=404) + + +async def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client): + assert None is await Repository.get("42", index="not-there", ignore=404) + + +async def test_get(data_client): + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + assert datetime(2014, 3, 3) == elasticsearch_repo.created_at + + +async def test_get_with_tz_date(data_client): + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + + tzinfo = timezone("Europe/Prague") + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123000)) + == first_commit.authored_date + ) + + +async def test_save_with_tz_date(data_client): + tzinfo = timezone("Europe/Prague") + first_commit = await Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + first_commit.committed_date = tzinfo.localize( + datetime(2014, 5, 2, 13, 47, 19, 123456) + ) + await first_commit.save() + + first_commit = Commit.get( + id="3ca6e1e73a071a705b4babd2f581c91a2a3e5037", routing="elasticsearch-dsl-py" + ) + assert ( + tzinfo.localize(datetime(2014, 5, 2, 13, 47, 19, 123456)) + == first_commit.committed_date + ) + + +COMMIT_DOCS_WITH_MISSING = [ + {"_id": "0"}, # Missing + {"_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037"}, # Existing + {"_id": "f"}, # Missing + {"_id": "eb3e543323f189fd7b698e66295427204fff5755"}, # Existing +] + + +async def test_mget(data_client): + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING) + assert commits[0] is None + assert commits[1].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[2] is None + assert commits[3].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +async def test_mget_raises_exception_when_missing_param_is_invalid(data_client): + with raises(ValueError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raj") + + +async def test_mget_raises_404_when_missing_param_is_raise(data_client): + with raises(NotFoundError): + await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="raise") + + +async def test_mget_ignores_missing_docs_when_missing_param_is_skip(data_client): + commits = await Commit.mget(COMMIT_DOCS_WITH_MISSING, missing="skip") + assert commits[0].meta.id == "3ca6e1e73a071a705b4babd2f581c91a2a3e5037" + assert commits[1].meta.id == "eb3e543323f189fd7b698e66295427204fff5755" + + +async def test_update_works_from_search_response(data_client): + elasticsearch_repo = await Repository.search().execute()[0] + + await elasticsearch_repo.update(owner={"other_name": "elastic"}) + assert "elastic" == elasticsearch_repo.owner.other_name + + new_version = await Repository.get("elasticsearch-dsl-py") + assert "elastic" == new_version.owner.other_name + assert "elasticsearch" == new_version.owner.name + + +async def test_update(data_client): + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + v = elasticsearch_repo.meta.version + + old_seq_no = elasticsearch_repo.meta.seq_no + elasticsearch_repo.update(owner={"new_name": "elastic"}, new_field="testing-update") + + assert "elastic" == elasticsearch_repo.owner.new_name + assert "testing-update" == elasticsearch_repo.new_field + + # assert version has been updated + assert elasticsearch_repo.meta.version == v + 1 + + new_version = await Repository.get("elasticsearch-dsl-py") + assert "testing-update" == new_version.new_field + assert "elastic" == new_version.owner.new_name + assert "elasticsearch" == new_version.owner.name + assert "seq_no" in new_version.meta + assert new_version.meta.seq_no != old_seq_no + assert "primary_term" in new_version.meta + + +async def test_save_updates_existing_doc(data_client): + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + + elasticsearch_repo.new_field = "testing-save" + old_seq_no = elasticsearch_repo.meta.seq_no + assert "updated" == (await elasticsearch_repo.save()) + + new_repo = data_client.get(index="git", id="elasticsearch-dsl-py") + assert "testing-save" == new_repo["_source"]["new_field"] + assert new_repo["_seq_no"] != old_seq_no + assert new_repo["_seq_no"] == elasticsearch_repo.meta.seq_no + + +async def test_save_automatically_uses_seq_no_and_primary_term(data_client): + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await elasticsearch_repo.save() + + +async def test_delete_automatically_uses_seq_no_and_primary_term(data_client): + elasticsearch_repo = await Repository.get("elasticsearch-dsl-py") + elasticsearch_repo.meta.seq_no += 1 + + with raises(ConflictError): + await elasticsearch_repo.delete() + + +def assert_doc_equals(expected, actual): + for f in expected: + assert f in actual + assert actual[f] == expected[f] + + +async def test_can_save_to_different_index(write_client): + test_repo = Repository(description="testing", meta={"id": 42}) + assert await test_repo.save(index="test-document") + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"description": "testing"}, + }, + write_client.get(index="test-document", id=42), + ) + + +async def test_save_without_skip_empty_will_include_empty_fields(write_client): + test_repo = Repository(field_1=[], field_2=None, field_3={}, meta={"id": 42}) + assert test_repo.save(index="test-document", skip_empty=False) + + assert_doc_equals( + { + "found": True, + "_index": "test-document", + "_id": "42", + "_source": {"field_1": [], "field_2": None, "field_3": {}}, + }, + write_client.get(index="test-document", id=42), + ) + + +async def test_delete(write_client): + write_client.create( + index="test-document", + id="elasticsearch-dsl-py", + body={ + "organization": "elasticsearch", + "created_at": "2014-03-03", + "owner": {"name": "elasticsearch"}, + }, + ) + + test_repo = Repository(meta={"id": "elasticsearch-dsl-py"}) + test_repo.meta.index = "test-document" + await test_repo.delete() + + assert not write_client.exists( + index="test-document", + id="elasticsearch-dsl-py", + ) + + +async def test_search(data_client): + assert (await Repository.search().count()) == 1 + + +async def test_search_returns_proper_doc_classes(data_client): + result = await Repository.search().execute() + + elasticsearch_repo = result.hits[0] + + assert isinstance(elasticsearch_repo, Repository) + assert elasticsearch_repo.owner.name == "elasticsearch" + + +async def test_refresh_mapping(data_client): + class Commit(AsyncDocument): + class Index: + name = "git" + + Commit._index.load_mappings() + + assert "stats" in Commit._index._mapping + assert "committer" in Commit._index._mapping + assert "description" in Commit._index._mapping + assert "committed_date" in Commit._index._mapping + assert isinstance(Commit._index._mapping["committed_date"], Date) + + +async def test_highlight_in_meta(data_client): + commit = await ( + Commit.search() + .query("match", description="inverting") + .highlight("description") + .execute() + )[0] + + assert isinstance(commit, Commit) + assert "description" in commit.meta.highlight + assert isinstance(commit.meta.highlight["description"], AttrList) + assert len(commit.meta.highlight["description"]) > 0 From 3c84c474d3c461329dd3a0a2990eb66ce019fe8f Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 7 Jul 2021 08:44:54 -0500 Subject: [PATCH 10/10] test wip --- .../test_async/test_integration/test_count.py | 42 +++ .../test_integration/test_document.py | 4 +- .../test_integration/test_faceted_search.py | 280 ++++++++++++++++++ 3 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 tests/test_async/test_integration/test_count.py create mode 100644 tests/test_async/test_integration/test_faceted_search.py diff --git a/tests/test_async/test_integration/test_count.py b/tests/test_async/test_integration/test_count.py new file mode 100644 index 000000000..94c79761f --- /dev/null +++ b/tests/test_async/test_integration/test_count.py @@ -0,0 +1,42 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from elasticsearch_dsl.search import Q, AsyncSearch + + +async def test_count_all(data_client): + s = AsyncSearch(using=data_client).index("git") + assert 53 == await s.count() + + +async def test_count_prefetch(data_client, mocker): + mocker.spy(data_client, "count") + + search = AsyncSearch(using=data_client).index("git") + await search.execute() + assert await search.count() == 53 + assert data_client.count.call_count == 0 + + search._response.hits.total.relation = "gte" + assert await search.count() == 53 + assert data_client.count.call_count == 1 + + +async def test_count_filter(data_client): + s = AsyncSearch(using=data_client).index("git").filter(~Q("exists", field="parent_shas")) + # initial commit + repo document + assert 2 == await s.count() diff --git a/tests/test_async/test_integration/test_document.py b/tests/test_async/test_integration/test_document.py index 29cf8edcd..0277c0f6b 100644 --- a/tests/test_async/test_integration/test_document.py +++ b/tests/test_async/test_integration/test_document.py @@ -258,8 +258,8 @@ async def test_update_conflicting_version(write_client, retry_on_conflict): w = Wiki(owner=User(name="Honza Kral"), _id="elasticsearch-py", views=42) await w.save() - w1 = Wiki.get(id="elasticsearch-py") - w2 = Wiki.get(id="elasticsearch-py") + w1 = await Wiki.get(id="elasticsearch-py") + w2 = await Wiki.get(id="elasticsearch-py") await w1.update(script="ctx._source.views += params.inc", inc=5) with raises(ConflictError): diff --git a/tests/test_async/test_integration/test_faceted_search.py b/tests/test_async/test_integration/test_faceted_search.py new file mode 100644 index 000000000..1a2729d62 --- /dev/null +++ b/tests/test_async/test_integration/test_faceted_search.py @@ -0,0 +1,280 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import datetime + +import pytest + +from elasticsearch_dsl import A, Boolean, Date, Document, Keyword +from elasticsearch_dsl import ( + DateHistogramFacet, + AsyncFacetedSearch, + NestedFacet, + RangeFacet, + TermsFacet, +) + +from .test_document import PullRequest + + +class Repos(Document): + is_public = Boolean() + created_at = Date() + + class Index: + name = "git" + + +class Commit(Document): + files = Keyword() + committed_date = Date() + + class Index: + name = "git" + + +class MetricSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Commit] + + facets = { + "files": TermsFacet(field="files", metric=A("max", field="committed_date")), + } + + +@pytest.fixture(scope="session") +def commit_search_cls(es_version): + if es_version >= (7, 2): + interval_kwargs = {"fixed_interval": "1d"} + else: + interval_kwargs = {"interval": "day"} + + class CommitSearch(AsyncFacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", + ) + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(es_version): + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class RepoSearch(AsyncFacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self): + s = super(RepoSearch, self).search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(es_version): + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class PRSearch(AsyncFacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch + + +async def test_facet_with_custom_metric(data_client): + ms = MetricSearch() + r = await ms.execute() + + dates = [f[1] for f in r.facets.files] + assert dates == list(sorted(dates, reverse=True)) + assert dates[0] == 1399038439000 + + +async def test_nested_facet(pull_request, pr_search_cls): + prs = pr_search_cls() + r = prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments + + +async def test_nested_facet_with_filter(pull_request, pr_search_cls): + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) + r = prs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments + + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + r = prs.execute() + assert not r.hits + + +async def test_datehistogram_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 + assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created + + +async def test_boolean_facet(data_client, repo_search_cls): + rs = repo_search_cls() + r = rs.execute() + + assert r.hits.total.value == 1 + assert [(True, 1, False)] == r.facets.public + value, count, selected = r.facets.public[0] + assert value is True + + +async def test_empty_search_finds_everything(data_client, es_version, commit_search_cls): + cs = commit_search_cls() + r = cs.execute() + + assert r.hits.total.value == 52 + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, False), + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 2, False), + (datetime(2014, 3, 4, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 3, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 9, False), + (datetime(2014, 3, 10, 0, 0), 2, False), + (datetime(2014, 3, 15, 0, 0), 4, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 2, False), + (datetime(2014, 3, 24, 0, 0), 10, False), + (datetime(2014, 4, 20, 0, 0), 2, False), + (datetime(2014, 4, 22, 0, 0), 2, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 2, False), + (datetime(2014, 5, 1, 0, 0), 2, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 19, False), + ("good", 14, False), + ("better", 19, False), + ] == r.facets.deletions + + +async def test_term_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"}) + + r = cs.execute() + + assert 35 == r.hits.total.value + assert [ + ("elasticsearch_dsl", 40, False), + ("test_elasticsearch_dsl", 35, True), # selected + ("elasticsearch_dsl/query.py", 19, False), + ("test_elasticsearch_dsl/test_search.py", 15, False), + ("elasticsearch_dsl/utils.py", 14, False), + ("test_elasticsearch_dsl/test_query.py", 13, False), + ("elasticsearch_dsl/search.py", 12, False), + ("elasticsearch_dsl/aggs.py", 11, False), + ("test_elasticsearch_dsl/test_result.py", 5, False), + ("elasticsearch_dsl/result.py", 3, False), + ] == r.facets.files + + assert [ + (datetime(2014, 3, 3, 0, 0), 1, False), + (datetime(2014, 3, 5, 0, 0), 2, False), + (datetime(2014, 3, 6, 0, 0), 3, False), + (datetime(2014, 3, 7, 0, 0), 6, False), + (datetime(2014, 3, 10, 0, 0), 1, False), + (datetime(2014, 3, 15, 0, 0), 3, False), + (datetime(2014, 3, 21, 0, 0), 2, False), + (datetime(2014, 3, 23, 0, 0), 1, False), + (datetime(2014, 3, 24, 0, 0), 7, False), + (datetime(2014, 4, 20, 0, 0), 1, False), + (datetime(2014, 4, 25, 0, 0), 3, False), + (datetime(2014, 4, 26, 0, 0), 2, False), + (datetime(2014, 4, 27, 0, 0), 1, False), + (datetime(2014, 5, 1, 0, 0), 1, False), + (datetime(2014, 5, 2, 0, 0), 1, False), + ] == r.facets.frequency + + assert [ + ("ok", 12, False), + ("good", 10, False), + ("better", 13, False), + ] == r.facets.deletions + + +async def test_range_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"deletions": "better"}) + + r = cs.execute() + + assert 19 == r.hits.total.value + + +async def test_pagination(data_client, commit_search_cls): + cs = commit_search_cls() + cs = cs[0:20] + + assert 52 == cs.count() + assert 20 == len(cs.execute())