Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FW and JSON Improvements #1350

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
79 changes: 48 additions & 31 deletions faker/providers/misc/__init__.py
Expand Up @@ -2,6 +2,7 @@
import hashlib
import io
import json
import re
import string
import tarfile
import uuid
Expand Down Expand Up @@ -399,12 +400,12 @@ def json(self,
data structures it is recommended to use the dictionary format.

Data Column Dictionary format:
{'key name': 'definition'}}
{'key name': 'definition'}

The definition can simply be the 'name:argument_group' of a provider
method, or can also be string {{ tokens }} that are passed to python
provider pystr_format() method for complex string generation.
Argument Groups are used to pass arguments to the provider methods.
The definition can be 'provider', 'provider:argument_group', tokenized
'string {{ provider:argument_group }}' that is passed to the python
provider method pystr_format() for generation, or a fixed '@word'.
Using Lists, Tuples, and Dicts as a definition for structure.

Example:
fake.set_arguments('top_half', {'min_value': 50, 'max_value': 100})
Expand All @@ -426,8 +427,8 @@ def json(self,
:return: Serialized JSON data
:rtype: str

:sample: data_columns={'ID': 'pyint', 'Details': {'Name': 'name',
'Address': 'address'}}, num_rows=1
:sample: data_columns={'Spec': '@1.0.1', 'ID': 'pyint',
'Details': {'Name': 'name', 'Address': 'address'}}, num_rows=2
:sample: data_columns={'Candidates': ['name', 'name', 'name']},
num_rows=1
:sample: data_columns=[('Name', 'name'), ('Points', 'pyint',
Expand All @@ -449,36 +450,35 @@ def process_list_structure(data: list) -> dict:
raise TypeError('Invalid arguments type. Must be a dictionary')

if name is None:
return self._format_selection(definition, **kwargs)
return self._value_format_selection(definition, **kwargs)

if isinstance(definition, tuple):
entry[name] = process_list_structure(definition)
elif isinstance(definition, (list, set)):
entry[name] = [process_list_structure([item])
for item in definition]
else:
entry[name] = self._format_selection(definition, **kwargs)
entry[name] = self._value_format_selection(definition, **kwargs)
return entry

def process_dict_structure(data: dict) -> dict:
entry = {}

if isinstance(data, str):
return self._format_selection(data)
return self._value_format_selection(data)

if isinstance(data, (float, int)):
return data
if isinstance(data, dict):
for name, definition in data.items():
if isinstance(definition, (tuple, list, set)):
entry[name] = [process_dict_structure(item)
for item in definition]
elif isinstance(definition, (dict, int, float, bool)):
entry[name] = process_dict_structure(definition)
else:
entry[name] = self._value_format_selection(definition)
return entry

for name, definition in data.items():
if isinstance(definition, (tuple, list)):
entry[name] = [process_dict_structure(item)
for item in definition]
elif isinstance(definition, (dict, int, float)):
entry[name] = process_dict_structure(definition)
else:
entry[name] = self._format_selection(definition)

return entry
return data

def create_json_structure(data_columns) -> dict:
if isinstance(data_columns, dict):
Expand Down Expand Up @@ -510,9 +510,11 @@ def fixed_width(self,
Data Column List format
[('field width', 'definition', {'arguments'})]

The definition can simply be the 'name:argument_group' of a provider
method, or can also be string tokens that are passed to python
provider method pystr_format() for data generation.
The definition can be 'provider', 'provider:argument_group', tokenized
'string {{ provider:argument_group }}' that is passed to the python
provider method pystr_format() for generation, or a fixed '@word'.
Using Lists, Tuples, and Dicts as a definition for structure.

Argument Groups can be used to pass arguments to the provider methods,
but will override the arguments supplied in the tuple record.

Expand All @@ -530,7 +532,7 @@ def fixed_width(self,
:rtype: str

:sample: data_columns=[(20, 'name'), (3, 'pyint', {'min_value': 50,
'max_value': 100})], align='right', num_rows=1
'max_value': 100})], align='right', num_rows=2
"""
default_data_columns = [
(20, 'name'),
Expand All @@ -553,24 +555,39 @@ def fixed_width(self,
if not isinstance(kwargs, dict):
raise TypeError('Invalid arguments type. Must be a dictionary')

result = self._format_selection(definition, **kwargs)
result = self._value_format_selection(definition, **kwargs)
field = "{0:%s%s}" % (align_map.get(align, '<'), width)
row.append(field.format(result)[:width])

data.append(''.join(row))
return '\n'.join(data)

def _format_selection(self, definition, **kwargs):
def _value_format_selection(self, definition, **kwargs):
"""
Formats the string with PyStr Format if special characters are found.
Formats the string in different ways depending on it's contents.

The return can be the '@word' itself, a '{{ token }}' passed to PyStr,
or a 'provider:argument_group' format field that returns potentially
a non-string type.

This ensures that Numbers, Boolean types that are generated in the
JSON structures in there proper type, and not just strings.
"""
if '{{' in definition and '}}' in definition:

# Check for PyStr first as complex strings may start with @
if re.match(r'.*\{\{.*\}\}.*', definition):
return self.generator.pystr_format(definition)

if definition.count(':') == 1:
# Check for fixed @words that won't be generated
if re.match(r'^@.*', definition):
return definition.lstrip('@')

# Check if a argument group has been supplied
if re.match(r'^[a-zA-Z0-9_-]*:\w', definition):
definition, argument_group = definition.split(':')
arguments = self.generator.get_arguments(argument_group.strip())

return self.generator.format(definition.strip(), **arguments)

# Assume the string is refering to a provider
return self.generator.format(definition, **kwargs)
6 changes: 5 additions & 1 deletion tests/providers/test_misc.py
Expand Up @@ -477,18 +477,22 @@ def test_json_multiple_rows(self, faker_with_foobar):

assert isinstance(json_data, list) and len(json_data) == 2

def test_json_passthrough_int_float(self, faker_with_foobar):
def test_json_passthrough_values(self, faker_with_foobar):
kwargs = {
'data_columns': {
'item1': 1,
'item2': 1.0,
'item3': True,
'item4': '@fixed',
},
'num_rows': 1,
}
json_data = json.loads(faker_with_foobar.json(**kwargs))

assert json_data['item1'] == 1
assert json_data['item2'] == 1.0
assert json_data['item3'] is True
assert json_data['item4'] == 'fixed'

def test_json_type_integrity_int(self, faker_with_foobar):
kwargs = {
Expand Down