From e2ab7c34a4e5c739b6cd26d01127c0b7c23f4af5 Mon Sep 17 00:00:00 2001 From: noahbenson Date: Wed, 9 Feb 2022 19:52:32 -0800 Subject: [PATCH 1/3] Makes the PMap keys, values, and items methods return in O(1) time Adds a new class `PMapView` to the `pyrsistent._pmap` namespace. This class is intended to be private and essentially mimics the built-in `dict_values` and `dict_items` pseudo-types. Under the hood they just keep a reference to the `PMap` object and use the `iteritems` and `itervalues` methods; this means that creating a view can be run in constant time insteead of the current `O(n)` time. The `keys` method of `PMap` does not use the `PMapView` class because `PSet` is aleady a `PMapView`-type class for `PMap` keys. I couldn't think of a reason not to just use `PSet`: it's the logical type for a set of keys of a persistent map, and it can just be passed a `PMap` to create such a set in constant time. The tests were also updated to reflect the reality that `keys` now returns a `PSet` instead of a `PVector` and that `values` and `items` return `PMapView`s. As a benchmark, the following code: ```python from timeit import timeit def make_setup(mapsize): return ('from pyrsistent import pmap, pvector; ' 'm = pmap({k:k for k in range(%d)})' % (mapsize,)) tests = [('len(m.keys())', 'len(pvector(m.iterkeys()))'), ('len(m.values())', 'len(pvector(m.itervalues()))'), ('len(m.items())', 'len(pvector(m.iteritems()))')] for mapsize in [10, 100, 1000]: print("Map size:", str(mapsize)) setupstr = make_setup(mapsize) for test_pair in tests: print('=' * 60) print('%-40s' % test_pair[0], '%6f us' % timeit(test_pair[0], setup=setupstr)) print('%-40s' % test_pair[1], '%6f us' % timeit(test_pair[1], setup=setupstr)) print('-' * 60) print('') ``` produces the following output on my desktop: ``` Map size: 10 ============================================================ len(m.keys()) 1.330579 us len(pvector(m.iterkeys())) 1.562125 us ------------------------------------------------------------ ============================================================ len(m.values()) 0.653008 us len(pvector(m.itervalues())) 1.584405 us ------------------------------------------------------------ ============================================================ len(m.items()) 0.653144 us len(pvector(m.iteritems())) 1.159099 us ------------------------------------------------------------ Map size: 100 ============================================================ len(m.keys()) 1.350190 us len(pvector(m.iterkeys())) 11.702080 us ------------------------------------------------------------ ============================================================ len(m.values()) 0.650723 us len(pvector(m.itervalues())) 11.739465 us ------------------------------------------------------------ ============================================================ len(m.items()) 0.653189 us len(pvector(m.iteritems())) 8.951215 us ------------------------------------------------------------ Map size: 1000 ============================================================ len(m.keys()) 1.379429 us len(pvector(m.iterkeys())) 114.202673 us ------------------------------------------------------------ ============================================================ len(m.values()) 0.679722 us len(pvector(m.itervalues())) 113.874931 us ------------------------------------------------------------ ============================================================ len(m.items()) 0.680877 us len(pvector(m.iteritems())) 87.645472 us ------------------------------------------------------------ ``` --- pyrsistent/_pmap.py | 75 +++++++++++++++++++++++++++++++++++++++++++-- tests/map_test.py | 8 +++-- 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/pyrsistent/_pmap.py b/pyrsistent/_pmap.py index 056d478..5b27a4e 100644 --- a/pyrsistent/_pmap.py +++ b/pyrsistent/_pmap.py @@ -3,6 +3,68 @@ from pyrsistent._pvector import pvector from pyrsistent._transformations import transform +class PMapView(object): + """View type for the persistent map/dict type `PMap`. + + Provides an equivalent of Python's built-in `dict_values` and `dict_items` + types that result from expreessions such as `{}.values()` and + `{}.items()`. The equivalent for `{}.keys()` is absent because the keys are + instead represented by a `PSet` object, which can be created in `O(1)` time. + + Parameters + ---------- + m : mapping + The mapping/dict-like object of which a view is to be created. This + should generally be a `PMap` object. + values : boolean, optional + If `True`, then a view of the values is created; if `False`, then a view + of the items is created. + """ + def __init__(self, m, values=False): + # Make sure this is a persistnt map + if not isinstance(m, PMap): + # We can convert mapping objects into pmap objects, I guess (but why?) + if isinstance(m, Mapping): + m = pmap(m) + else: + raise TypeError("PViewMap requires a Mapping object") + object.__setattr__(self, '_map', m) + object.__setattr__(self, '_values', values) + def __len__(self): + return len(self._map) + def __setattr__(self, k, v): + raise TypeError("%s is immutable" % (type(self),)) + def __iter__(self): + if self._values: + return self._map.itervalues() + else: + return self._map.iteritems() + def __contains__(self, arg): + if self._values: + for v in self._map.itervalues(): + if v == arg: return True + return False + else: + try: (k,v) = arg + except Exception: return False + m = self._map + return k in m and m[k] == v + def __reversed__(self): + raise TypeError("Persistent maps are not reversible") + # The str and repr methods mitate the dict_view style currently. + def __str__(self): + tag = 'values' if self._values else 'items' + return "pmap_%s(%s)" % (tag, list(iter(self))) + def __repr__(self): + tag = 'values' if self._values else 'items' + return "pmap_%s(%s)" % (tag, list(iter(self))) + def __eq__(self, x): + if x is self: return True + if not isinstance(x, PMapView): return False + # For whatever reason, dict_values always seem to return False for == + # (probably it's not implemented), so we mimic that. + if self._values != False or x._values != False: return False + return self._map == x._map class PMap(object): """ @@ -89,6 +151,12 @@ def __contains__(self, key): def __iter__(self): return self.iterkeys() + # If this method is not defined, then reversed(pmap) will attempt to reverse + # the map using len() and getitem, usually resulting in a mysterious + # KeyError. + def __reversed__(self): + raise TypeError("Persistent maps are not reversible") + def __getattr__(self, key): try: return self[key] @@ -115,13 +183,14 @@ def iteritems(self): yield k, v def values(self): - return pvector(self.itervalues()) + return PMapView(self, values=True) def keys(self): - return pvector(self.iterkeys()) + from ._pset import PSet + return PSet(self) def items(self): - return pvector(self.iteritems()) + return PMapView(self, values=False) def __len__(self): return self._size diff --git a/tests/map_test.py b/tests/map_test.py index 982bd11..261338b 100644 --- a/tests/map_test.py +++ b/tests/map_test.py @@ -54,17 +54,19 @@ def test_remove_non_existing_element_raises_key_error(): def test_various_iterations(): + import pyrsistent as pyr + assert set(['a', 'b']) == set(m(a=1, b=2)) assert ['a', 'b'] == sorted(m(a=1, b=2).keys()) - assert isinstance(m().keys(), PVector) + assert isinstance(m().keys(), pyr.PSet) assert set([1, 2]) == set(m(a=1, b=2).itervalues()) assert [1, 2] == sorted(m(a=1, b=2).values()) - assert isinstance(m().values(), PVector) + assert isinstance(m().values(), pyr._pmap.PMapView) assert set([('a', 1), ('b', 2)]) == set(m(a=1, b=2).iteritems()) assert set([('a', 1), ('b', 2)]) == set(m(a=1, b=2).items()) - assert isinstance(m().items(), PVector) + assert isinstance(m().items(), pyr._pmap.PMapView) def test_initialization_with_two_elements(): From 4ec6e5a9b99ff4c574c1e7c2d9c7fd538971f692 Mon Sep 17 00:00:00 2001 From: noahbenson Date: Wed, 9 Feb 2022 20:46:14 -0800 Subject: [PATCH 2/3] Added additional tests of the PMapView class --- tests/map_test.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/map_test.py b/tests/map_test.py index 261338b..76df834 100644 --- a/tests/map_test.py +++ b/tests/map_test.py @@ -68,6 +68,17 @@ def test_various_iterations(): assert set([('a', 1), ('b', 2)]) == set(m(a=1, b=2).items()) assert isinstance(m().items(), pyr._pmap.PMapView) + pm = pmap({k:k for k in range(100)}) + assert len(pm) == len(pm.keys()) + assert len(pm) == len(pm.values()) + assert len(pm) == len(pm.items()) + ks = pm.keys() + vs = pm.values() + us = pm.items() + assert all(k in pm for k in ks) + assert all(pm[k] == v for (k,v) in us) + assert all(k in ks for k in ks) + assert all(v in vs for v in vs) def test_initialization_with_two_elements(): map = pmap({'a': 2, 'b': 3}) From 5cebec73b07e92aa00866db43de8c3bda120c2ea Mon Sep 17 00:00:00 2001 From: noahbenson Date: Fri, 11 Feb 2022 08:13:40 -0800 Subject: [PATCH 3/3] responses to code review for pull request #243 --- pyrsistent/_pmap.py | 101 ++++++++++++++++++++++++++++++-------------- tests/map_test.py | 9 ++-- 2 files changed, 72 insertions(+), 38 deletions(-) diff --git a/pyrsistent/_pmap.py b/pyrsistent/_pmap.py index 5b27a4e..7da1de4 100644 --- a/pyrsistent/_pmap.py +++ b/pyrsistent/_pmap.py @@ -3,7 +3,7 @@ from pyrsistent._pvector import pvector from pyrsistent._transformations import transform -class PMapView(object): +class PMapView: """View type for the persistent map/dict type `PMap`. Provides an equivalent of Python's built-in `dict_values` and `dict_items` @@ -11,16 +11,17 @@ class PMapView(object): `{}.items()`. The equivalent for `{}.keys()` is absent because the keys are instead represented by a `PSet` object, which can be created in `O(1)` time. + The `PMapView` class is overloaded by the `PMapValues` and `PMapItems` + classes which handle the specific case of values and items, respectively + Parameters ---------- m : mapping The mapping/dict-like object of which a view is to be created. This should generally be a `PMap` object. - values : boolean, optional - If `True`, then a view of the values is created; if `False`, then a view - of the items is created. """ - def __init__(self, m, values=False): + # The public methods that use the above. + def __init__(self, m): # Make sure this is a persistnt map if not isinstance(m, PMap): # We can convert mapping objects into pmap objects, I guess (but why?) @@ -29,42 +30,78 @@ def __init__(self, m, values=False): else: raise TypeError("PViewMap requires a Mapping object") object.__setattr__(self, '_map', m) - object.__setattr__(self, '_values', values) + def __len__(self): return len(self._map) + def __setattr__(self, k, v): raise TypeError("%s is immutable" % (type(self),)) - def __iter__(self): - if self._values: - return self._map.itervalues() - else: - return self._map.iteritems() - def __contains__(self, arg): - if self._values: - for v in self._map.itervalues(): - if v == arg: return True - return False - else: - try: (k,v) = arg - except Exception: return False - m = self._map - return k in m and m[k] == v + def __reversed__(self): raise TypeError("Persistent maps are not reversible") - # The str and repr methods mitate the dict_view style currently. + +class PMapValues(PMapView): + """View type for the values of the persistent map/dict type `PMap`. + + Provides an equivalent of Python's built-in `dict_values` type that result + from expreessions such as `{}.values()`. See also `PMapView`. + + Parameters + ---------- + m : mapping + The mapping/dict-like object of which a view is to be created. This + should generally be a `PMap` object. + """ + def __iter__(self): + return self._map.itervalues() + + def __contains__(self, arg): + return arg in self._map.itervalues() + + # The str and repr methods imitate the dict_view style currently. def __str__(self): - tag = 'values' if self._values else 'items' - return "pmap_%s(%s)" % (tag, list(iter(self))) + return f"pmap_values({list(iter(self))})" + def __repr__(self): - tag = 'values' if self._values else 'items' - return "pmap_%s(%s)" % (tag, list(iter(self))) + return f"pmap_values({list(iter(self))})" + def __eq__(self, x): - if x is self: return True - if not isinstance(x, PMapView): return False # For whatever reason, dict_values always seem to return False for == # (probably it's not implemented), so we mimic that. - if self._values != False or x._values != False: return False - return self._map == x._map + if x is self: return True + else: return False + +class PMapItems(PMapView): + """View type for the items of the persistent map/dict type `PMap`. + + Provides an equivalent of Python's built-in `dict_items` type that result + from expreessions such as `{}.items()`. See also `PMapView`. + + Parameters + ---------- + m : mapping + The mapping/dict-like object of which a view is to be created. This + should generally be a `PMap` object. + """ + def __iter__(self): + return self._map.iteritems() + + def __contains__(self, arg): + try: (k,v) = arg + except Exception: return False + return k in self._map and self._map[k] == v + + # The str and repr methods mitate the dict_view style currently. + def __str__(self): + return f"pmap_items({list(iter(self))})" + + def __repr__(self): + return f"pmap_items({list(iter(self))})" + + def __eq__(self, x): + if x is self: return True + elif not isinstance(x, type(self)): return False + else: return self._map == x._map class PMap(object): """ @@ -183,14 +220,14 @@ def iteritems(self): yield k, v def values(self): - return PMapView(self, values=True) + return PMapValues(self) def keys(self): from ._pset import PSet return PSet(self) def items(self): - return PMapView(self, values=False) + return PMapItems(self) def __len__(self): return self._size diff --git a/tests/map_test.py b/tests/map_test.py index 76df834..4f38f40 100644 --- a/tests/map_test.py +++ b/tests/map_test.py @@ -58,26 +58,23 @@ def test_various_iterations(): assert set(['a', 'b']) == set(m(a=1, b=2)) assert ['a', 'b'] == sorted(m(a=1, b=2).keys()) - assert isinstance(m().keys(), pyr.PSet) assert set([1, 2]) == set(m(a=1, b=2).itervalues()) assert [1, 2] == sorted(m(a=1, b=2).values()) - assert isinstance(m().values(), pyr._pmap.PMapView) assert set([('a', 1), ('b', 2)]) == set(m(a=1, b=2).iteritems()) assert set([('a', 1), ('b', 2)]) == set(m(a=1, b=2).items()) - assert isinstance(m().items(), pyr._pmap.PMapView) pm = pmap({k:k for k in range(100)}) assert len(pm) == len(pm.keys()) assert len(pm) == len(pm.values()) assert len(pm) == len(pm.items()) ks = pm.keys() - vs = pm.values() - us = pm.items() assert all(k in pm for k in ks) - assert all(pm[k] == v for (k,v) in us) assert all(k in ks for k in ks) + us = pm.items() + assert all(pm[k] == v for (k,v) in us) + vs = pm.values() assert all(v in vs for v in vs) def test_initialization_with_two_elements():