Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for structured and nested values in metadata (#120) #368

Merged
merged 2 commits into from Sep 18, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
68 changes: 61 additions & 7 deletions lib/markdown2.py
Expand Up @@ -440,13 +440,21 @@ def preprocess(self, text):
# another-var: blah blah
#
# # header
_meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|([\S\w]+\s*:(?! >)[ \t]*.*\n?)(?:---[\ \t]*\n)?', re.MULTILINE)
_meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?((?:[\S\w]+\s*:(?:\n+[ \t]+.*)+)|(?:.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|(?:\s*[\S\w]+\s*:(?! >)[ \t]*.*\n?))(?:---[\ \t]*\n)?', re.MULTILINE)
_key_val_pat = re.compile(r"[\S\w]+\s*:(?! >)[ \t]*.*\n?", re.MULTILINE)
# this allows key: >
# value
# conutiues over multiple lines
_key_val_block_pat = re.compile(
"(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE)
r"(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE
)
_key_val_list_pat = re.compile(
r"^-(?:[ \t]*([^:\s]*)(?:[ \t]*[:-][ \t]*(\S+))?)(?:\n((?:[ \t]+[^\n]+\n?)+))?",
re.MULTILINE,
)
_key_val_dict_pat = re.compile(
r"^([^:\n]+)[ \t]*:[ \t]*([^\n]*)(?:((?:\n[ \t]+[^\n]+)+))?", re.MULTILINE
) # grp0: key, grp1: value, grp2: multiline value
_meta_data_fence_pattern = re.compile(r'^---[\ \t]*\n', re.MULTILINE)
_meta_data_newline = re.compile("^\n", re.MULTILINE)

Expand All @@ -466,13 +474,59 @@ def _extract_metadata(self, text):
return text
tail = metadata_split[1]

kv = re.findall(self._key_val_pat, metadata_content)
kvm = re.findall(self._key_val_block_pat, metadata_content)
kvm = [item.replace(": >\n", ":", 1) for item in kvm]
def parse_structured_value(value):
print(repr(value))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we nuke this print?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! Sorry about that.

vs = value.lstrip()
vs = value.replace(v[: len(value) - len(vs)], "\n")[1:]

# List
if vs.startswith("-"):
r = []
for match in re.findall(self._key_val_list_pat, vs):
if match[0] and not match[1] and not match[2]:
r.append(match[0].strip())
elif match[0] == ">" and not match[1] and match[2]:
r.append(match[2].strip())
elif match[0] and match[1]:
r.append({match[0].strip(): match[1].strip()})
elif not match[0] and not match[1] and match[2]:
r.append(parse_structured_value(match[2]))
else:
# Broken case
pass

return r

# Dict
else:
return {
match[0].strip(): (
match[1].strip()
if match[1]
else parse_structured_value(match[2])
)
for match in re.findall(self._key_val_dict_pat, vs)
}

for item in match:

for item in kv + kvm:
k, v = item.split(":", 1)
self.metadata[k.strip()] = v.strip()

# Multiline value
if v[:3] == " >\n":
self.metadata[k.strip()] = v[3:].strip()

# Empty value
elif v == "\n":
self.metadata[k.strip()] = ""

# Structured value
elif v[0] == "\n":
self.metadata[k.strip()] = parse_structured_value(v)

# Simple value
else:
self.metadata[k.strip()] = v.strip()

return tail

Expand Down
4 changes: 3 additions & 1 deletion test/tm-cases/metadata.metadata
Expand Up @@ -5,5 +5,7 @@
"this-is": "a hyphen test",
"empty": "",
"and some": "long value\n that goes multiline",
"another": "example"
"another": "example",
"alist": ["a", "b", "c"],
"adict": {"key": "foo", "a nested list": ["one", "two", "Even multiline strings are allowed\n in nested structured data\n if linebreaks and indent are respected !", {"subkey": "and another dict in a list"}]}
}
15 changes: 15 additions & 0 deletions test/tm-cases/metadata.text
Expand Up @@ -8,6 +8,21 @@ and some: >
long value
that goes multiline
another: example
alist:
- a
- b
- c
adict:
key: foo
a nested list:
- one
- two
- >
Even multiline strings are allowed
in nested structured data
if linebreaks and indent are respected !
-
subkey: and another dict in a list
---
# The real text

Expand Down