Skip to content

Commit

Permalink
Add support for YAML 1.2 schemas
Browse files Browse the repository at this point in the history
More and more YAML libraries are implementing YAML 1.2, either new ones
simply starting with 1.2 or older ones adding support for it.

While also the syntax was changed in YAML 1.2, this pull request is about the
schema changes.

As an example, in 1.1, Y, yes, NO, on etc. are resolved as booleans in 1.1.

This sounds convenient, but also means that all these 22 different strings must
be quoted if they are not meant as booleans. A very common obstacle is the
country code for Norway, NO ("Norway Problem").

In YAML 1.2 this was improved by reducing the list of boolean representations.

Also other types have been improved. The 1.1 regular expression for float allows
. and ._ as floats, although there isn't a single digit in these strings.

While the 1.2 Core Schema, the recommended default for 1.2, still allows a few
variations (true, True and TRUE, etc.), the 1.2 JSON Schema is there to match
JSON behaviour regarding types, so it allows only true and false.

Note that this implementation of the YAML JSON Schema might not be exactly like
the spec defines it (all plain scalars not resolving to numbers, null or
booleans would be an error).

Short usage example:

    class MyCoreLoader(yaml.BaseLoader): pass
    class MyCoreDumper(yaml.CommonDumper): pass
    MyCoreLoader.init_tags('core')
    MyCoreDumper.init_tags('core')
    data = yaml.load(input, Loader=MyCoreLoader)
    output = yaml.dump(data, Dumper=MyCoreDumper)

Detailed example code to play with:

    import yaml

    class MyCoreLoader(yaml.BaseLoader): pass
    MyCoreLoader.init_tags('core')

    class MyJSONLoader(yaml.BaseLoader): pass
    MyJSONLoader.init_tags('json')

    class MyCoreDumper(yaml.CommonDumper): pass
    MyCoreDumper.init_tags('core')

    class MyJSONDumper(yaml.CommonDumper): pass
    MyJSONDumper.init_tags('json')

    input = """
    - TRUE
    - yes
    - ~
    - true
    #- .inf
    #- 23
    #- #empty
    #- !!str #empty
    #- 010
    #- 0o10
    #- 0b100
    #- 0x20
    #- -0x20
    #- 1_000
    #- 3:14
    #- 0011
    #- +0
    #- 0001.23
    #- !!str +0.3e3
    #- +0.3e3
    #- &x foo
    #- *x
    #- 1e27
    #- 1x+27
    """

    print('--------------------------------------------- BaseLoader')
    data = yaml.load(input, Loader=yaml.BaseLoader)
    print(data)
    print('--------------------------------------------- SafeLoader')
    data = yaml.load(input, Loader=yaml.SafeLoader)
    print(data)
    print('--------------------------------------------- CoreLoader')
    data = yaml.load(input, Loader=MyCoreLoader)
    print(data)
    print('--------------------------------------------- JSONLoader')
    data = yaml.load(input, Loader=MyJSONLoader)
    print(data)

    print('--------------------------------------------- SafeDumper')
    out = yaml.dump(data, Dumper=yaml.SafeDumper)
    print(out)
    print('--------------------------------------------- MyCoreDumper')
    out = yaml.dump(data, Dumper=MyCoreDumper)
    print(out)
    print('--------------------------------------------- MyJSONDumper')
    out = yaml.dump(data, Dumper=MyJSONDumper)
    print(out)
  • Loading branch information
perlpunk committed Dec 2, 2023
1 parent fc4be2e commit b80472f
Show file tree
Hide file tree
Showing 10 changed files with 844 additions and 220 deletions.
333 changes: 196 additions & 137 deletions lib/yaml/constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
'FullConstructor',
'UnsafeConstructor',
'Constructor',
'ConstructorError'
'ConstructorError',
]

from .error import *
Expand Down Expand Up @@ -198,98 +198,26 @@ def construct_undefined(self, node):
"could not determine a constructor for the tag %r" % node.tag,
node.start_mark)

@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor

@classmethod
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor

class SafeConstructor(BaseConstructor):

def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return super().construct_scalar(node)

def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value

def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return super().construct_mapping(node, deep=deep)

def construct_yaml_int(self, node):
def construct_yaml_int_core(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]

if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0o'):
return sign*int(value[2:], 8)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)

def construct_yaml_float(self, node):
def construct_yaml_float_core(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '').lower()
value = value.lower()
sign = +1
if value[0] == '-':
sign = -1
Expand All @@ -299,18 +227,60 @@ def construct_yaml_float(self, node):
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)

def construct_yaml_int_json(self, node):
value = self.construct_scalar(node)
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]

if value == '0':
return 0
else:
return sign*int(value)

def construct_yaml_float_json(self, node):
value = self.construct_scalar(node)
value = value.lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
return sign*float(value)

@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor

@classmethod
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor


@classmethod
def init_constructors(cls, tagset):
if tagset not in _constructors:
return
for key in _constructors[tagset]:
callback = _constructors[tagset][key]
if (key is None):
cls.add_constructor(key, callback)
else:
cls.add_constructor('tag:yaml.org,2002:' + key, callback)


# SafeConstructor implements YAML 1.1
class SafeConstructor(BaseConstructor):

def construct_yaml_binary(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
Expand Down Expand Up @@ -419,6 +389,105 @@ def construct_yaml_set(self, node):
value = self.construct_mapping(node)
data.update(value)

def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return super().construct_scalar(node)

def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value

def construct_yaml_int(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)

def construct_yaml_float(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '.inf':
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)

def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return super().construct_mapping(node, deep=deep)

def construct_yaml_object(self, node, cls):
data = cls.__new__(cls)
yield data
Expand All @@ -429,56 +498,46 @@ def construct_yaml_object(self, node, cls):
state = self.construct_mapping(node)
data.__dict__.update(state)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)

SafeConstructor.add_constructor(
'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)

SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined)
_constructors = {
'yaml11': {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': SafeConstructor.construct_yaml_int,
'float': SafeConstructor.construct_yaml_float,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
'binary': SafeConstructor.construct_yaml_binary,
'timestamp': SafeConstructor.construct_yaml_timestamp,
'omap': SafeConstructor.construct_yaml_omap,
'pairs': SafeConstructor.construct_yaml_pairs,
'set': SafeConstructor.construct_yaml_set,
},
'core': {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': BaseConstructor.construct_yaml_int_core,
'float': BaseConstructor.construct_yaml_float_core,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
},
'json': {
'str': BaseConstructor.construct_yaml_str,
'seq': BaseConstructor.construct_yaml_seq,
'map': BaseConstructor.construct_yaml_map,
None: BaseConstructor.construct_undefined,
'int': BaseConstructor.construct_yaml_int_json,
'float': BaseConstructor.construct_yaml_float_json,
'null': BaseConstructor.construct_yaml_null,
'bool': BaseConstructor.construct_yaml_bool,
},
}

SafeConstructor.init_constructors('yaml11')

class FullConstructor(SafeConstructor):
# 'extend' is blacklisted because it is used by
Expand Down

0 comments on commit b80472f

Please sign in to comment.