From a7da039c081aaeb7c6eb18bba0ab0d76661a2b39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Nasturas?= Date: Fri, 11 Sep 2020 17:49:04 +0200 Subject: [PATCH 1/2] Support for structured and nested values as dictionnaries and list in metadata (#120) --- lib/markdown2.py | 68 +++++++++++++++++++++++++++++---- test/tm-cases/metadata.metadata | 4 +- test/tm-cases/metadata.text | 15 ++++++++ 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index e73d8db6..8b9a4794 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -440,13 +440,21 @@ def preprocess(self, text): # another-var: blah blah # # # header - _meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|([\S\w]+\s*:(?! >)[ \t]*.*\n?)(?:---[\ \t]*\n)?', re.MULTILINE) + _meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?((?:[\S\w]+\s*:(?:\n+[ \t]+.*)+)|(?:.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|(?:\s*[\S\w]+\s*:(?! >)[ \t]*.*\n?))(?:---[\ \t]*\n)?', re.MULTILINE) _key_val_pat = re.compile(r"[\S\w]+\s*:(?! >)[ \t]*.*\n?", re.MULTILINE) # this allows key: > # value # conutiues over multiple lines _key_val_block_pat = re.compile( - "(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE) + r"(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE + ) + _key_val_list_pat = re.compile( + r"^-(?:[ \t]*([^:\s]*)(?:[ \t]*[:-][ \t]*(\S+))?)(?:\n((?:[ \t]+[^\n]+\n?)+))?", + re.MULTILINE, + ) + _key_val_dict_pat = re.compile( + r"^([^:\n]+)[ \t]*:[ \t]*([^\n]*)(?:((?:\n[ \t]+[^\n]+)+))?", re.MULTILINE + ) # grp0: key, grp1: value, grp2: multiline value _meta_data_fence_pattern = re.compile(r'^---[\ \t]*\n', re.MULTILINE) _meta_data_newline = re.compile("^\n", re.MULTILINE) @@ -466,13 +474,59 @@ def _extract_metadata(self, text): return text tail = metadata_split[1] - kv = re.findall(self._key_val_pat, metadata_content) - kvm = re.findall(self._key_val_block_pat, metadata_content) - kvm = [item.replace(": >\n", ":", 1) for item in kvm] + def parse_structured_value(value): + print(repr(value)) + vs = value.lstrip() + vs = value.replace(v[: len(value) - len(vs)], "\n")[1:] + + # List + if vs.startswith("-"): + r = [] + for match in re.findall(self._key_val_list_pat, vs): + if match[0] and not match[1] and not match[2]: + r.append(match[0].strip()) + elif match[0] == ">" and not match[1] and match[2]: + r.append(match[2].strip()) + elif match[0] and match[1]: + r.append({match[0].strip(): match[1].strip()}) + elif not match[0] and not match[1] and match[2]: + r.append(parse_structured_value(match[2])) + else: + # Broken case + pass + + return r + + # Dict + else: + return { + match[0].strip(): ( + match[1].strip() + if match[1] + else parse_structured_value(match[2]) + ) + for match in re.findall(self._key_val_dict_pat, vs) + } + + for item in match: - for item in kv + kvm: k, v = item.split(":", 1) - self.metadata[k.strip()] = v.strip() + + # Multiline value + if v[:3] == " >\n": + self.metadata[k.strip()] = v[3:].strip() + + # Empty value + elif v == "\n": + self.metadata[k.strip()] = "" + + # Structured value + elif v[0] == "\n": + self.metadata[k.strip()] = parse_structured_value(v) + + # Simple value + else: + self.metadata[k.strip()] = v.strip() return tail diff --git a/test/tm-cases/metadata.metadata b/test/tm-cases/metadata.metadata index b1eb9146..ebb79271 100644 --- a/test/tm-cases/metadata.metadata +++ b/test/tm-cases/metadata.metadata @@ -5,5 +5,7 @@ "this-is": "a hyphen test", "empty": "", "and some": "long value\n that goes multiline", - "another": "example" + "another": "example", + "alist": ["a", "b", "c"], + "adict": {"key": "foo", "a nested list": ["one", "two", "Even multiline strings are allowed\n in nested structured data\n if linebreaks and indent are respected !", {"subkey": "and another dict in a list"}]} } diff --git a/test/tm-cases/metadata.text b/test/tm-cases/metadata.text index be68e292..b2e14ae8 100644 --- a/test/tm-cases/metadata.text +++ b/test/tm-cases/metadata.text @@ -8,6 +8,21 @@ and some: > long value that goes multiline another: example +alist: + - a + - b + - c +adict: + key: foo + a nested list: + - one + - two + - > + Even multiline strings are allowed + in nested structured data + if linebreaks and indent are respected ! + - + subkey: and another dict in a list --- # The real text From 1ea190ac1c0aad6daf078d17a9b424e452cae71c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Nasturas?= Date: Wed, 16 Sep 2020 11:24:46 +0200 Subject: [PATCH 2/2] Removed useless print --- lib/markdown2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 8b9a4794..58c1b707 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -475,7 +475,6 @@ def _extract_metadata(self, text): tail = metadata_split[1] def parse_structured_value(value): - print(repr(value)) vs = value.lstrip() vs = value.replace(v[: len(value) - len(vs)], "\n")[1:]