-
Notifications
You must be signed in to change notification settings - Fork 44
/
query.py
738 lines (579 loc) · 24.3 KB
/
query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
# Copyright 2014 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create / interact with Google Cloud Datastore queries."""
import base64
from google.api_core import page_iterator
from google.cloud._helpers import _ensure_tuple_or_list
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore_v1.types import query as query_pb2
from google.cloud.datastore import helpers
from google.cloud.datastore.key import Key
_NOT_FINISHED = query_pb2.QueryResultBatch.MoreResultsType.NOT_FINISHED
_NO_MORE_RESULTS = query_pb2.QueryResultBatch.MoreResultsType.NO_MORE_RESULTS
_FINISHED = (
_NO_MORE_RESULTS,
query_pb2.QueryResultBatch.MoreResultsType.MORE_RESULTS_AFTER_LIMIT,
query_pb2.QueryResultBatch.MoreResultsType.MORE_RESULTS_AFTER_CURSOR,
)
class Query(object):
"""A Query against the Cloud Datastore.
This class serves as an abstraction for creating a query over data
stored in the Cloud Datastore.
:type client: :class:`google.cloud.datastore.client.Client`
:param client: The client used to connect to Datastore.
:type kind: str
:param kind: The kind to query.
:type project: str
:param project:
(Optional) The project associated with the query. If not passed, uses
the client's value.
:type namespace: str
:param namespace:
(Optional) The namespace to which to restrict results. If not passed,
uses the client's value.
:type ancestor: :class:`~google.cloud.datastore.key.Key`
:param ancestor:
(Optional) key of the ancestor to which this query's results are
restricted.
:type filters: tuple[str, str, str]
:param filters: Property filters applied by this query. The sequence
is ``(property_name, operator, value)``.
:type projection: sequence of string
:param projection: fields returned as part of query results.
:type order: sequence of string
:param order: field names used to order query results. Prepend ``-``
to a field name to sort it in descending order.
:type distinct_on: sequence of string
:param distinct_on: field names used to group query results.
:raises: ValueError if ``project`` is not passed and no implicit
default is set.
"""
OPERATORS = {
"<=": query_pb2.PropertyFilter.Operator.LESS_THAN_OR_EQUAL,
">=": query_pb2.PropertyFilter.Operator.GREATER_THAN_OR_EQUAL,
"<": query_pb2.PropertyFilter.Operator.LESS_THAN,
">": query_pb2.PropertyFilter.Operator.GREATER_THAN,
"=": query_pb2.PropertyFilter.Operator.EQUAL,
"!=": query_pb2.PropertyFilter.Operator.NOT_EQUAL,
"IN": query_pb2.PropertyFilter.Operator.IN,
"NOT_IN": query_pb2.PropertyFilter.Operator.NOT_IN,
}
"""Mapping of operator strings and their protobuf equivalents."""
def __init__(
self,
client,
kind=None,
project=None,
namespace=None,
ancestor=None,
filters=(),
projection=(),
order=(),
distinct_on=(),
):
self._client = client
self._kind = kind
self._project = project or client.project
self._namespace = namespace or client.namespace
self._ancestor = ancestor
self._filters = []
# Verify filters passed in.
for property_name, operator, value in filters:
self.add_filter(property_name, operator, value)
self._projection = _ensure_tuple_or_list("projection", projection)
self._order = _ensure_tuple_or_list("order", order)
self._distinct_on = _ensure_tuple_or_list("distinct_on", distinct_on)
@property
def project(self):
"""Get the project for this Query.
:rtype: str
:returns: The project for the query.
"""
return self._project or self._client.project
@property
def namespace(self):
"""This query's namespace
:rtype: str or None
:returns: the namespace assigned to this query
"""
return self._namespace or self._client.namespace
@namespace.setter
def namespace(self, value):
"""Update the query's namespace.
:type value: str
"""
if not isinstance(value, str):
raise ValueError("Namespace must be a string")
self._namespace = value
@property
def kind(self):
"""Get the Kind of the Query.
:rtype: str
:returns: The kind for the query.
"""
return self._kind
@kind.setter
def kind(self, value):
"""Update the Kind of the Query.
:type value: str
:param value: updated kind for the query.
.. note::
The protobuf specification allows for ``kind`` to be repeated,
but the current implementation returns an error if more than
one value is passed. If the back-end changes in the future to
allow multiple values, this method will be updated to allow passing
either a string or a sequence of strings.
"""
if not isinstance(value, str):
raise TypeError("Kind must be a string")
self._kind = value
@property
def ancestor(self):
"""The ancestor key for the query.
:rtype: :class:`~google.cloud.datastore.key.Key` or None
:returns: The ancestor for the query.
"""
return self._ancestor
@ancestor.setter
def ancestor(self, value):
"""Set the ancestor for the query
:type value: :class:`~google.cloud.datastore.key.Key`
:param value: the new ancestor key
"""
if not isinstance(value, Key):
raise TypeError("Ancestor must be a Key")
self._ancestor = value
@ancestor.deleter
def ancestor(self):
"""Remove the ancestor for the query."""
self._ancestor = None
@property
def filters(self):
"""Filters set on the query.
:rtype: tuple[str, str, str]
:returns: The filters set on the query. The sequence is
``(property_name, operator, value)``.
"""
return self._filters[:]
def add_filter(self, property_name, operator, value):
"""Filter the query based on a property name, operator and a value.
Expressions take the form of::
.add_filter('<property>', '<operator>', <value>)
where property is a property stored on the entity in the datastore
and operator is one of ``OPERATORS``
(ie, ``=``, ``<``, ``<=``, ``>``, ``>=``, ``!=``, ``IN``, ``NOT_IN``):
.. testsetup:: query-filter
import uuid
from google.cloud import datastore
client = datastore.Client()
.. doctest:: query-filter
>>> query = client.query(kind='Person')
>>> query = query.add_filter('name', '=', 'James')
>>> query = query.add_filter('age', '>', 50)
:type property_name: str
:param property_name: A property name.
:type operator: str
:param operator: One of ``=``, ``<``, ``<=``, ``>``, ``>=``, ``!=``, ``IN``, ``NOT_IN``.
:type value: :class:`int`, :class:`str`, :class:`bool`,
:class:`float`, :class:`NoneType`,
:class:`datetime.datetime`,
:class:`google.cloud.datastore.key.Key`
:param value: The value to filter on.
:rtype: :class:`~google.cloud.datastore.query.Query`
:returns: A query object.
:raises: :class:`ValueError` if ``operation`` is not one of the
specified values, or if a filter names ``'__key__'`` but
passes an invalid value (a key is required).
"""
if self.OPERATORS.get(operator) is None:
error_message = 'Invalid expression: "%s"' % (operator,)
choices_message = "Please use one of: =, <, <=, >, >=, !=, IN, NOT_IN."
raise ValueError(error_message, choices_message)
if property_name == "__key__" and not isinstance(value, Key):
raise ValueError('Invalid key: "%s"' % value)
self._filters.append((property_name, operator, value))
return self
@property
def projection(self):
"""Fields names returned by the query.
:rtype: sequence of string
:returns: Names of fields in query results.
"""
return self._projection[:]
@projection.setter
def projection(self, projection):
"""Set the fields returned the query.
:type projection: str or sequence of strings
:param projection: Each value is a string giving the name of a
property to be included in the projection query.
"""
if isinstance(projection, str):
projection = [projection]
self._projection[:] = projection
def keys_only(self):
"""Set the projection to include only keys."""
self._projection[:] = ["__key__"]
def key_filter(self, key, operator="="):
"""Filter on a key.
:type key: :class:`google.cloud.datastore.key.Key`
:param key: The key to filter on.
:type operator: str
:param operator: (Optional) One of ``=``, ``<``, ``<=``, ``>``, ``>=``, ``!=``, ``IN``, ``NOT_IN``.
Defaults to ``=``.
"""
self.add_filter("__key__", operator, key)
@property
def order(self):
"""Names of fields used to sort query results.
:rtype: sequence of string
:returns: The order(s) set on the query.
"""
return self._order[:]
@order.setter
def order(self, value):
"""Set the fields used to sort query results.
Sort fields will be applied in the order specified.
:type value: str or sequence of strings
:param value: Each value is a string giving the name of the
property on which to sort, optionally preceded by a
hyphen (-) to specify descending order.
Omitting the hyphen implies ascending order.
"""
if isinstance(value, str):
value = [value]
self._order[:] = value
@property
def distinct_on(self):
"""Names of fields used to group query results.
:rtype: sequence of string
:returns: The "distinct on" fields set on the query.
"""
return self._distinct_on[:]
@distinct_on.setter
def distinct_on(self, value):
"""Set fields used to group query results.
:type value: str or sequence of strings
:param value: Each value is a string giving the name of a
property to use to group results together.
"""
if isinstance(value, str):
value = [value]
self._distinct_on[:] = value
def fetch(
self,
limit=None,
offset=0,
start_cursor=None,
end_cursor=None,
client=None,
eventual=False,
retry=None,
timeout=None,
):
"""Execute the Query; return an iterator for the matching entities.
For example:
.. testsetup:: query-fetch
import uuid
from google.cloud import datastore
unique = str(uuid.uuid4())[0:8]
client = datastore.Client(namespace='ns{}'.format(unique))
.. doctest:: query-fetch
>>> andy = datastore.Entity(client.key('Person', 1234))
>>> andy['name'] = 'Andy'
>>> sally = datastore.Entity(client.key('Person', 2345))
>>> sally['name'] = 'Sally'
>>> bobby = datastore.Entity(client.key('Person', 3456))
>>> bobby['name'] = 'Bobby'
>>> client.put_multi([andy, sally, bobby])
>>> query = client.query(kind='Person')
>>> result = list(query.add_filter('name', '=', 'Sally').fetch())
>>> result
[<Entity('Person', 2345) {'name': 'Sally'}>]
.. testcleanup:: query-fetch
client.delete(andy.key)
client.delete(sally.key)
client.delete(bobby.key)
:type limit: int
:param limit: (Optional) limit passed through to the iterator.
:type offset: int
:param offset: (Optional) offset passed through to the iterator.
:type start_cursor: bytes
:param start_cursor: (Optional) cursor passed through to the iterator.
:type end_cursor: bytes
:param end_cursor: (Optional) cursor passed through to the iterator.
:type client: :class:`google.cloud.datastore.client.Client`
:param client: (Optional) client used to connect to datastore.
If not supplied, uses the query's value.
:type eventual: bool
:param eventual: (Optional) Defaults to strongly consistent (False).
Setting True will use eventual consistency,
but cannot be used inside a transaction or
will raise ValueError.
:type retry: :class:`google.api_core.retry.Retry`
:param retry:
A retry object used to retry requests. If ``None`` is specified,
requests will be retried using a default configuration.
:type timeout: float
:param timeout:
Time, in seconds, to wait for the request to complete.
Note that if ``retry`` is specified, the timeout applies
to each individual attempt.
:rtype: :class:`Iterator`
:returns: The iterator for the query.
"""
if client is None:
client = self._client
return Iterator(
self,
client,
limit=limit,
offset=offset,
start_cursor=start_cursor,
end_cursor=end_cursor,
eventual=eventual,
retry=retry,
timeout=timeout,
)
class Iterator(page_iterator.Iterator):
"""Represent the state of a given execution of a Query.
:type query: :class:`~google.cloud.datastore.query.Query`
:param query: Query object holding permanent configuration (i.e.
things that don't change on with each page in
a results set).
:type client: :class:`~google.cloud.datastore.client.Client`
:param client: The client used to make a request.
:type limit: int
:param limit: (Optional) Limit the number of results returned.
:type offset: int
:param offset: (Optional) Offset used to begin a query.
:type start_cursor: bytes
:param start_cursor: (Optional) Cursor to begin paging through
query results.
:type end_cursor: bytes
:param end_cursor: (Optional) Cursor to end paging through
query results.
:type eventual: bool
:param eventual: (Optional) Defaults to strongly consistent (False).
Setting True will use eventual consistency,
but cannot be used inside a transaction or
will raise ValueError.
:type retry: :class:`google.api_core.retry.Retry`
:param retry:
A retry object used to retry requests. If ``None`` is specified,
requests will be retried using a default configuration.
:type timeout: float
:param timeout:
Time, in seconds, to wait for the request to complete.
Note that if ``retry`` is specified, the timeout applies
to each individual attempt.
"""
next_page_token = None
def __init__(
self,
query,
client,
limit=None,
offset=None,
start_cursor=None,
end_cursor=None,
eventual=False,
retry=None,
timeout=None,
):
super(Iterator, self).__init__(
client=client,
item_to_value=_item_to_entity,
page_token=start_cursor,
max_results=limit,
)
self._query = query
self._offset = offset
self._end_cursor = end_cursor
self._eventual = eventual
self._retry = retry
self._timeout = timeout
# The attributes below will change over the life of the iterator.
self._more_results = True
self._skipped_results = 0
def _build_protobuf(self):
"""Build a query protobuf.
Relies on the current state of the iterator.
:rtype:
:class:`.query_pb2.Query`
:returns: The query protobuf object for the current
state of the iterator.
"""
pb = _pb_from_query(self._query)
start_cursor = self.next_page_token
if start_cursor is not None:
pb.start_cursor = base64.urlsafe_b64decode(start_cursor)
end_cursor = self._end_cursor
if end_cursor is not None:
pb.end_cursor = base64.urlsafe_b64decode(end_cursor)
if self.max_results is not None:
pb.limit = self.max_results - self.num_results
if start_cursor is None and self._offset is not None:
# NOTE: We don't need to add an offset to the request protobuf
# if we are using an existing cursor, because the offset
# is only relative to the start of the result set, not
# relative to each page (this method is called per-page)
pb.offset = self._offset
return pb
def _process_query_results(self, response_pb):
"""Process the response from a datastore query.
:type response_pb: :class:`.datastore_pb2.RunQueryResponse`
:param response_pb: The protobuf response from a ``runQuery`` request.
:rtype: iterable
:returns: The next page of entity results.
:raises ValueError: If ``more_results`` is an unexpected value.
"""
self._skipped_results = response_pb.batch.skipped_results
if response_pb.batch.more_results == _NO_MORE_RESULTS:
self.next_page_token = None
else:
self.next_page_token = base64.urlsafe_b64encode(
response_pb.batch.end_cursor
)
self._end_cursor = None
if response_pb.batch.more_results == _NOT_FINISHED:
self._more_results = True
elif response_pb.batch.more_results in _FINISHED:
self._more_results = False
else:
raise ValueError("Unexpected value returned for `more_results`.")
return [result.entity for result in response_pb.batch.entity_results]
def _next_page(self):
"""Get the next page in the iterator.
:rtype: :class:`~google.cloud.iterator.Page`
:returns: The next page in the iterator (or :data:`None` if
there are no pages left).
"""
if not self._more_results:
return None
query_pb = self._build_protobuf()
transaction = self.client.current_transaction
if transaction is None:
transaction_id = None
else:
transaction_id = transaction.id
read_options = helpers.get_read_options(self._eventual, transaction_id)
partition_id = entity_pb2.PartitionId(
project_id=self._query.project, namespace_id=self._query.namespace
)
kwargs = {}
if self._retry is not None:
kwargs["retry"] = self._retry
if self._timeout is not None:
kwargs["timeout"] = self._timeout
response_pb = self.client._datastore_api.run_query(
request={
"project_id": self._query.project,
"partition_id": partition_id,
"read_options": read_options,
"query": query_pb,
},
**kwargs,
)
while (
response_pb.batch.more_results == _NOT_FINISHED
and response_pb.batch.skipped_results < query_pb.offset
):
# We haven't finished processing. A likely reason is we haven't
# skipped all of the results yet. Don't return any results.
# Instead, rerun query, adjusting offsets. Datastore doesn't process
# more than 1000 skipped results in a query.
old_query_pb = query_pb
query_pb = query_pb2.Query()
query_pb._pb.CopyFrom(old_query_pb._pb) # copy for testability
query_pb.start_cursor = response_pb.batch.skipped_cursor
query_pb.offset -= response_pb.batch.skipped_results
response_pb = self.client._datastore_api.run_query(
request={
"project_id": self._query.project,
"partition_id": partition_id,
"read_options": read_options,
"query": query_pb,
},
**kwargs,
)
entity_pbs = self._process_query_results(response_pb)
return page_iterator.Page(self, entity_pbs, self.item_to_value)
def _pb_from_query(query):
"""Convert a Query instance to the corresponding protobuf.
:type query: :class:`Query`
:param query: The source query.
:rtype: :class:`.query_pb2.Query`
:returns: A protobuf that can be sent to the protobuf API. N.b. that
it does not contain "in-flight" fields for ongoing query
executions (cursors, offset, limit).
"""
pb = query_pb2.Query()
for projection_name in query.projection:
projection = query_pb2.Projection()
projection.property.name = projection_name
pb.projection.append(projection)
if query.kind:
kind = query_pb2.KindExpression()
kind.name = query.kind
pb.kind.append(kind)
composite_filter = pb.filter.composite_filter
composite_filter.op = query_pb2.CompositeFilter.Operator.AND
if query.ancestor:
ancestor_pb = query.ancestor.to_protobuf()
# Filter on __key__ HAS_ANCESTOR == ancestor.
ancestor_filter = composite_filter.filters._pb.add().property_filter
ancestor_filter.property.name = "__key__"
ancestor_filter.op = query_pb2.PropertyFilter.Operator.HAS_ANCESTOR
ancestor_filter.value.key_value.CopyFrom(ancestor_pb._pb)
for property_name, operator, value in query.filters:
pb_op_enum = query.OPERATORS.get(operator)
# Add the specific filter
property_filter = composite_filter.filters._pb.add().property_filter
property_filter.property.name = property_name
property_filter.op = pb_op_enum
# Set the value to filter on based on the type.
if property_name == "__key__":
key_pb = value.to_protobuf()
property_filter.value.key_value.CopyFrom(key_pb._pb)
else:
helpers._set_protobuf_value(property_filter.value, value)
if not composite_filter.filters:
pb._pb.ClearField("filter")
for prop in query.order:
property_order = query_pb2.PropertyOrder()
if prop.startswith("-"):
property_order.property.name = prop[1:]
property_order.direction = property_order.Direction.DESCENDING
else:
property_order.property.name = prop
property_order.direction = property_order.Direction.ASCENDING
pb.order.append(property_order)
for distinct_on_name in query.distinct_on:
ref = query_pb2.PropertyReference()
ref.name = distinct_on_name
pb.distinct_on.append(ref)
return pb
# pylint: disable=unused-argument
def _item_to_entity(iterator, entity_pb):
"""Convert a raw protobuf entity to the native object.
:type iterator: :class:`~google.api_core.page_iterator.Iterator`
:param iterator: The iterator that is currently in use.
:type entity_pb:
:class:`.entity_pb2.Entity`
:param entity_pb: An entity protobuf to convert to a native entity.
:rtype: :class:`~google.cloud.datastore.entity.Entity`
:returns: The next entity in the page.
"""
return helpers.entity_from_protobuf(entity_pb)
# pylint: enable=unused-argument