Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Added support for tuples, namedtuples (both from collections and typing), sets, frozensets and OrderedDict's in MSONable, MontyEncoder and MontyDecoder #100

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
0755e76
Added is_namedtuple method to test whether an object is a namedtuple …
davidwaroquiers Jan 14, 2020
f259b94
Merge branch 'master' of https://github.com/materialsvirtuallab/monty
davidwaroquiers Jan 14, 2020
92246a7
Added (partial) support for tuple and namedtuples in json.py.
davidwaroquiers Jan 14, 2020
680461d
Added support for tuple, namedtuples and OrderedDict in json.py.
davidwaroquiers Jan 14, 2020
9a57714
Fixed namedtuple support for python versions < 3.7, for which default
davidwaroquiers Jan 14, 2020
41f4463
Disable pylint's check on unexpected keyword argument 'defaults'.
davidwaroquiers Jan 14, 2020
d5527b9
Disable pylint's check on unexpected keyword argument 'defaults'.
davidwaroquiers Jan 14, 2020
3f3c029
Added is_NamedTuple to check whether object is a class generated from…
davidwaroquiers Jan 15, 2020
6866658
Added validate_NamedTuple to check whether the items in a NamedTuple …
davidwaroquiers Jan 16, 2020
b67e6e6
Changed validate_NamedTuple to accept subclass of the types as valid …
davidwaroquiers Jan 16, 2020
e233d4c
Changed validate_NamedTuple to accept subclass of the types as valid …
davidwaroquiers Jan 16, 2020
465467d
Ignoring pytest import for mypy.
davidwaroquiers Jan 16, 2020
3ed73cc
Changed "builtins" to "@builtins" in the serialization of tuples, nam…
davidwaroquiers Jan 16, 2020
7e823a9
Added serialization of typing.NamedTuple's.
davidwaroquiers Jan 17, 2020
d982c3b
Added serialization of sets.
davidwaroquiers Jan 17, 2020
04e0513
Fixed pycodestyle, pylint, mypy, ...
davidwaroquiers Jan 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions monty/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,8 @@ def dict2namedtuple(*args, **kwargs):
d.update(**kwargs)
return collections.namedtuple(typename="dict2namedtuple",
field_names=list(d.keys()))(**d)


def is_namedtuple(obj):
"""Test if an object is a class generated from collections.namedtuple."""
return isinstance(obj, tuple) and hasattr(obj, "_fields") and hasattr(obj, "_asdict")
79 changes: 69 additions & 10 deletions monty/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import os
import json
import datetime
import sys

from hashlib import sha1
from collections import OrderedDict, defaultdict
from collections import namedtuple
from enum import Enum

from importlib import import_module
Expand All @@ -33,6 +35,9 @@
except ImportError:
yaml = None # type: ignore

from monty.collections import is_namedtuple


__version__ = "3.0.0"


Expand Down Expand Up @@ -62,6 +67,41 @@ def _load_redirect(redirect_file):
return dict(redirect_dict)


def _recursive_as_dict(obj):
"""Recursive function to prepare serialization of objects.

Takes care of tuples, namedtuples, OrderedDict, objects with an as_dict method.
"""
if is_namedtuple(obj):
if sys.version_info < (3, 7): # default values for namedtuples were introduced in python 3.7.
return {"namedtuple_as_list": [_recursive_as_dict(it) for it in obj],
"fields": obj._fields,
"typename": obj.__class__.__name__,
"@module": "builtins",
"@class": "namedtuple"}
return {"namedtuple_as_list": [_recursive_as_dict(it) for it in obj],
"fields": obj._fields,
"fields_defaults": obj._fields_defaults,
"typename": obj.__class__.__name__,
"@module": "builtins",
"@class": "namedtuple"}
if isinstance(obj, tuple):
return {"tuple_as_list": [_recursive_as_dict(it) for it in obj],
"@module": "builtins",
"@class": "tuple"}
if isinstance(obj, OrderedDict):
return {"ordereddict_as_list": [[key, _recursive_as_dict(val)] for key, val in obj.items()],
"@module": "builtins",
"@class": "OrderedDict"}
if isinstance(obj, list):
return [_recursive_as_dict(it) for it in obj]
if isinstance(obj, dict):
return {kk: _recursive_as_dict(vv) for kk, vv in obj.items()}
if hasattr(obj, "as_dict"):
return obj.as_dict()
return obj


class MSONable:
"""
This is a mix-in base class specifying an API for msonable objects. MSON
Expand Down Expand Up @@ -122,15 +162,6 @@ def as_dict(self) -> dict:

args = getfullargspec(self.__class__.__init__).args

def recursive_as_dict(obj):
if isinstance(obj, (list, tuple)):
return [recursive_as_dict(it) for it in obj]
if isinstance(obj, dict):
return {kk: recursive_as_dict(vv) for kk, vv in obj.items()}
if hasattr(obj, "as_dict"):
return obj.as_dict()
return obj

for c in args:
if c != "self":
try:
Expand All @@ -147,7 +178,7 @@ def recursive_as_dict(obj):
"a self.kwargs variable to automatically "
"determine the dict format. Alternatively, "
"you can implement both as_dict and from_dict.")
d[c] = recursive_as_dict(a)
d[c] = _recursive_as_dict(a)
if hasattr(self, "kwargs"):
# type: ignore
d.update(**getattr(self, "kwargs")) # pylint: disable=E1101
Expand Down Expand Up @@ -251,6 +282,8 @@ def default(self, o) -> dict: # pylint: disable=E0202
"@class": "ObjectId",
"oid": str(o)}

# Is this still useful as we are now calling the _recursive_as_dict
# method (which takes care of as_dict's) before the encoding ?
try:
d = o.as_dict()
if "@module" not in d:
Expand All @@ -268,6 +301,17 @@ def default(self, o) -> dict: # pylint: disable=E0202
except AttributeError:
return json.JSONEncoder.default(self, o)

def encode(self, o):
"""MontyEncoder's encode method.

First, prepares the object by recursively transforming tuples, namedtuples,
object having an as_dict method and others to encodable python objects.
"""
# This cannot go in the default method because default is called as a last resort,
# such that tuples and namedtuples have already been transformed to lists by json's encode method.
o = _recursive_as_dict(o)
return super().encode(o)


class MontyDecoder(json.JSONDecoder):
"""
Expand Down Expand Up @@ -308,6 +352,21 @@ def process_decoded(self, d):
dt = datetime.datetime.strptime(d["string"],
"%Y-%m-%d %H:%M:%S")
return dt
if modname == "builtins":
if classname == "tuple":
return tuple([self.process_decoded(item) for item in d['tuple_as_list']])
if classname == "namedtuple":
# default values for collections.namedtuple have been introduced in python 3.7
# it is probably not essential to deserialize the defaults if the object was serialized with
# python >= 3.7 and deserialized with python < 3.7.
if sys.version_info < (3, 7):
nt = namedtuple(d['typename'], d['fields'])
else:
nt = namedtuple(d['typename'], d['fields'], # pylint: disable=E1123
defaults=d['fields_defaults']) # pylint: disable=E1123
return nt(*[self.process_decoded(item) for item in d['namedtuple_as_list']])
if classname == "OrderedDict":
return OrderedDict([(key, self.process_decoded(val)) for key, val in d['ordereddict_as_list']])

mod = __import__(modname, globals(), locals(), [classname], 0)
if hasattr(mod, classname):
Expand Down
25 changes: 25 additions & 0 deletions tests/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import unittest
import os

from collections import namedtuple

from monty.collections import frozendict, Namespace, AttrDict, \
FrozenAttrDict, tree
from monty.collections import is_namedtuple

test_dir = os.path.join(os.path.dirname(__file__), 'test_files')

Expand Down Expand Up @@ -49,5 +52,27 @@ def test_tree(self):
self.assertEqual(x['a']['b']['c']['d'], 1)


def test_is_namedtuple():
a_nt = namedtuple('a', ['x', 'y', 'z'])
a1 = a_nt(1, 2, 3)
assert a1 == (1, 2, 3)
assert is_namedtuple(a1) is True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need "is True". The assert tests whether it is True.
Similarly, 3 lines later you just need "assert not is_named_tuple..."

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is actually not exactly equivalent. The "is True" also checks that the returned value is a bool and not for example an empty list vs a non empty list. If is_namedtuple(a1) were to return, e.g. ["hello"] (or to return [] in the "False" case), "assert is_namedtuple(a1)" would also pass while the API would have changed. I would keep the "is True" if that's ok for you. Same for 3 lines after (and also in quite a lot of other places).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I know it is not exactly equivalent, but we need to think of typical use cases. When people use that method, they are not going to check that it is a boolean type. They will merely check that it is a "True-like" value. E.g., if is_namedtuple(a1). That's the Pythonic way of doing this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course, users will use it this way. My point is that this method is supposed (from its docstring/name) to return a bool indicating whether it is a namedtuple or not. Why should we allow any new development/change to return something else (e.g. the string "Yes it is a namedtuple" for the True case and "" for the False case) ? This unit test is just preventing that. Anyway if you prefer to remove the "is True", I can do it. For me the pythonic way applies for the code itself, but the test code can deviate from that for the reason above.

a_t = tuple([1, 2])
assert a_t == (1, 2)
assert is_namedtuple(a_t) is False

class SubList(list):
def _fields(self):
return []
def _fields_defaults(self):
return []
def _asdict(self):
return {}

sublist = SubList([3, 2, 1])
assert sublist == [3, 2, 1]
assert is_namedtuple(sublist) is False


if __name__ == "__main__":
unittest.main()
73 changes: 73 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
import datetime
from bson.objectid import ObjectId
from enum import Enum
from collections import namedtuple
from collections import OrderedDict

from . import __version__ as tests_version
from monty.json import MSONable, MontyEncoder, MontyDecoder, jsanitize
from monty.json import _load_redirect
from monty.collections import is_namedtuple

test_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_files")

Expand Down Expand Up @@ -188,6 +191,44 @@ def test_enum_serialization(self):
self.assertEqual(e_new.name, e.name)
self.assertEqual(e_new.value, e.value)

def test_tuple_serialization(self):
a = GoodMSONClass(a=1, b=tuple(), c=1)
afromdict = GoodMSONClass.from_dict(a.as_dict())
assert type(a.b) is tuple
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think isinstance is a better approach. This applies to all instances where you use type(X) is or == something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isinstance checks that the instance is a tuple or a subclass of tuple. I think it is more safe to use type(a.b) is tuple as it would prevent someone (a weirdo btw ;-) to change the MSONable as_dict and/or from_dict to return a subclass of tuple when deserializing. What do you think ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. But this is a test, not the actual code. So here, it suffices to make sure it is an instance of tuple.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the user serializes a tuple, he expects to get back exactly a tuple when deserializing. Using isinstance in the test allows to change that. I could modify the MSONable from_dict to return a subclass of a tuple instead of a real tuple.

assert type(afromdict.b) is tuple
assert a.b == afromdict.b
a = GoodMSONClass(a=1, b=[1, tuple([2, tuple([1, 2, 3, 4])])], c=1)
afromdict = GoodMSONClass.from_dict(a.as_dict())
assert type(a.b) is list
assert type(afromdict.b) is list
assert afromdict.b[0] == 1
assert type(afromdict.b[1]) is tuple
assert afromdict.b[1][0] == 2
assert type(afromdict.b[1][1]) is tuple
assert afromdict.b[1][1] == (1, 2, 3, 4)

def test_namedtuple_serialization(self):
a = namedtuple('A', ['x', 'y', 'zzz'])
b = GoodMSONClass(a=1, b=a(1, 2, 3), c=1)
assert is_namedtuple(b.b) is True
assert b.b._fields == ('x', 'y', 'zzz')
bfromdict = GoodMSONClass.from_dict(b.as_dict())
assert is_namedtuple(bfromdict.b) is True
assert bfromdict.b._fields == ('x', 'y', 'zzz')
assert bfromdict.b == b.b
assert bfromdict.b.x == b.b.x
assert bfromdict.b.y == b.b.y
assert bfromdict.b.zzz == b.b.zzz

def test_OrderedDict_serialization(self):
od = OrderedDict([('val1', 1), ('val2', 2)])
od['val3'] = '3'
a = GoodMSONClass(a=1, b=od, c=1)
assert list(a.b.keys()) == ['val1', 'val2', 'val3']
afromdict = GoodMSONClass.from_dict(a.as_dict())
assert type(afromdict.b) is OrderedDict
assert list(afromdict.b.keys()) == ['val1', 'val2', 'val3']


class JsonTest(unittest.TestCase):

Expand Down Expand Up @@ -303,6 +344,38 @@ def test_redirect_settings_file(self):
data = _load_redirect(os.path.join(test_dir, "test_settings.yaml"))
self.assertEqual(data, {'old_module': {'old_class': {'@class': 'new_class', '@module': 'new_module'}}})

def test_complex_enc_dec(self):
a = tuple([0, 2, 4])
a_jsonstr = json.dumps(a, cls=MontyEncoder)
a_from_jsonstr = json.loads(a_jsonstr, cls=MontyDecoder)
assert type(a_from_jsonstr) is tuple

nt = namedtuple('A', ['x', 'y', 'zzz'])
nt2 = namedtuple('ABC', ['ab', 'cd', 'ef'])
od = OrderedDict([('val1', 1), ('val2', GoodMSONClass(a=a, b=nt2(1, 2, 3), c=1))])
od['val3'] = '3'

obj = nt(x=a, y=od, zzz=[1, 2, 3])
obj_jsonstr = json.dumps(obj, cls=MontyEncoder)
obj_from_jsonstr = json.loads(obj_jsonstr, cls=MontyDecoder)

assert is_namedtuple(obj_from_jsonstr) is True
assert obj_from_jsonstr.__class__.__name__ == 'A'
assert type(obj_from_jsonstr.x) is tuple
assert obj_from_jsonstr.x == (0, 2, 4)
assert type(obj_from_jsonstr.y) is OrderedDict
assert list(obj_from_jsonstr.y.keys()) == ['val1', 'val2', 'val3']
assert obj_from_jsonstr.y['val1'] == 1
assert type(obj_from_jsonstr.y['val2']) is GoodMSONClass
assert type(obj_from_jsonstr.y['val2'].a) is tuple
assert obj_from_jsonstr.y['val2'].a == (0, 2, 4)
assert is_namedtuple(obj_from_jsonstr.y['val2'].b) is True
assert obj_from_jsonstr.y['val2'].b.__class__.__name__ == 'ABC'
assert obj_from_jsonstr.y['val2'].b.ab == 1
assert obj_from_jsonstr.y['val2'].b.cd == 2
assert obj_from_jsonstr.y['val2'].b.ef == 3
assert obj_from_jsonstr.y['val3'] == '3'


if __name__ == "__main__":
unittest.main()