From a2333a3b6fc54c0efaff45fb23a0b1a0a47a71a6 Mon Sep 17 00:00:00 2001 From: johnbrandborg Date: Thu, 31 Dec 2020 03:02:15 +1100 Subject: [PATCH] FW and JSON Improvements (#1350) * FW and JSON Improvements * Update assertion to be more clear --- faker/providers/misc/__init__.py | 79 +++++++++++++++++++------------- tests/providers/test_misc.py | 6 ++- 2 files changed, 53 insertions(+), 32 deletions(-) diff --git a/faker/providers/misc/__init__.py b/faker/providers/misc/__init__.py index d5c587eb73..f2ba2660b9 100644 --- a/faker/providers/misc/__init__.py +++ b/faker/providers/misc/__init__.py @@ -2,6 +2,7 @@ import hashlib import io import json +import re import string import tarfile import uuid @@ -435,12 +436,12 @@ def json(self, data structures it is recommended to use the dictionary format. Data Column Dictionary format: - {'key name': 'definition'}} + {'key name': 'definition'} - The definition can simply be the 'name:argument_group' of a provider - method, or can also be string {{ tokens }} that are passed to python - provider pystr_format() method for complex string generation. - Argument Groups are used to pass arguments to the provider methods. + The definition can be 'provider', 'provider:argument_group', tokenized + 'string {{ provider:argument_group }}' that is passed to the python + provider method pystr_format() for generation, or a fixed '@word'. + Using Lists, Tuples, and Dicts as a definition for structure. Example: fake.set_arguments('top_half', {'min_value': 50, 'max_value': 100}) @@ -462,8 +463,8 @@ def json(self, :return: Serialized JSON data :rtype: str - :sample: data_columns={'ID': 'pyint', 'Details': {'Name': 'name', - 'Address': 'address'}}, num_rows=1 + :sample: data_columns={'Spec': '@1.0.1', 'ID': 'pyint', + 'Details': {'Name': 'name', 'Address': 'address'}}, num_rows=2 :sample: data_columns={'Candidates': ['name', 'name', 'name']}, num_rows=1 :sample: data_columns=[('Name', 'name'), ('Points', 'pyint', @@ -485,7 +486,7 @@ def process_list_structure(data: list) -> dict: raise TypeError('Invalid arguments type. Must be a dictionary') if name is None: - return self._format_selection(definition, **kwargs) + return self._value_format_selection(definition, **kwargs) if isinstance(definition, tuple): entry[name] = process_list_structure(definition) @@ -493,28 +494,27 @@ def process_list_structure(data: list) -> dict: entry[name] = [process_list_structure([item]) for item in definition] else: - entry[name] = self._format_selection(definition, **kwargs) + entry[name] = self._value_format_selection(definition, **kwargs) return entry def process_dict_structure(data: dict) -> dict: entry = {} if isinstance(data, str): - return self._format_selection(data) + return self._value_format_selection(data) - if isinstance(data, (float, int)): - return data + if isinstance(data, dict): + for name, definition in data.items(): + if isinstance(definition, (tuple, list, set)): + entry[name] = [process_dict_structure(item) + for item in definition] + elif isinstance(definition, (dict, int, float, bool)): + entry[name] = process_dict_structure(definition) + else: + entry[name] = self._value_format_selection(definition) + return entry - for name, definition in data.items(): - if isinstance(definition, (tuple, list)): - entry[name] = [process_dict_structure(item) - for item in definition] - elif isinstance(definition, (dict, int, float)): - entry[name] = process_dict_structure(definition) - else: - entry[name] = self._format_selection(definition) - - return entry + return data def create_json_structure(data_columns) -> dict: if isinstance(data_columns, dict): @@ -546,9 +546,11 @@ def fixed_width(self, Data Column List format [('field width', 'definition', {'arguments'})] - The definition can simply be the 'name:argument_group' of a provider - method, or can also be string tokens that are passed to python - provider method pystr_format() for data generation. + The definition can be 'provider', 'provider:argument_group', tokenized + 'string {{ provider:argument_group }}' that is passed to the python + provider method pystr_format() for generation, or a fixed '@word'. + Using Lists, Tuples, and Dicts as a definition for structure. + Argument Groups can be used to pass arguments to the provider methods, but will override the arguments supplied in the tuple record. @@ -566,7 +568,7 @@ def fixed_width(self, :rtype: str :sample: data_columns=[(20, 'name'), (3, 'pyint', {'min_value': 50, - 'max_value': 100})], align='right', num_rows=1 + 'max_value': 100})], align='right', num_rows=2 """ default_data_columns = [ (20, 'name'), @@ -589,24 +591,39 @@ def fixed_width(self, if not isinstance(kwargs, dict): raise TypeError('Invalid arguments type. Must be a dictionary') - result = self._format_selection(definition, **kwargs) + result = self._value_format_selection(definition, **kwargs) field = "{0:%s%s}" % (align_map.get(align, '<'), width) row.append(field.format(result)[:width]) data.append(''.join(row)) return '\n'.join(data) - def _format_selection(self, definition, **kwargs): + def _value_format_selection(self, definition, **kwargs): """ - Formats the string with PyStr Format if special characters are found. + Formats the string in different ways depending on it's contents. + + The return can be the '@word' itself, a '{{ token }}' passed to PyStr, + or a 'provider:argument_group' format field that returns potentially + a non-string type. + + This ensures that Numbers, Boolean types that are generated in the + JSON structures in there proper type, and not just strings. """ - if '{{' in definition and '}}' in definition: + + # Check for PyStr first as complex strings may start with @ + if re.match(r'.*\{\{.*\}\}.*', definition): return self.generator.pystr_format(definition) - if definition.count(':') == 1: + # Check for fixed @words that won't be generated + if re.match(r'^@.*', definition): + return definition.lstrip('@') + + # Check if a argument group has been supplied + if re.match(r'^[a-zA-Z0-9_-]*:\w', definition): definition, argument_group = definition.split(':') arguments = self.generator.get_arguments(argument_group.strip()) return self.generator.format(definition.strip(), **arguments) + # Assume the string is refering to a provider return self.generator.format(definition, **kwargs) diff --git a/tests/providers/test_misc.py b/tests/providers/test_misc.py index f4ee13bf1b..7081f5c4b9 100644 --- a/tests/providers/test_misc.py +++ b/tests/providers/test_misc.py @@ -492,11 +492,13 @@ def test_json_multiple_rows(self, faker_with_foobar): assert isinstance(json_data, list) and len(json_data) == 2 - def test_json_passthrough_int_float(self, faker_with_foobar): + def test_json_passthrough_values(self, faker_with_foobar): kwargs = { 'data_columns': { 'item1': 1, 'item2': 1.0, + 'item3': True, + 'item4': '@fixed', }, 'num_rows': 1, } @@ -504,6 +506,8 @@ def test_json_passthrough_int_float(self, faker_with_foobar): assert json_data['item1'] == 1 assert json_data['item2'] == 1.0 + assert json_data['item3'] is True + assert json_data['item4'] == 'fixed' def test_json_type_integrity_int(self, faker_with_foobar): kwargs = {