Skip to content

Commit

Permalink
Merge pull request #368 from andrenasturas/master
Browse files Browse the repository at this point in the history
Support for structured and nested values in metadata (#120)
  • Loading branch information
nicholasserra committed Sep 18, 2020
2 parents b9c3b69 + 1ea190a commit e3da4df
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 8 deletions.
67 changes: 60 additions & 7 deletions lib/markdown2.py
Expand Up @@ -440,13 +440,21 @@ def preprocess(self, text):
# another-var: blah blah
#
# # header
_meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|([\S\w]+\s*:(?! >)[ \t]*.*\n?)(?:---[\ \t]*\n)?', re.MULTILINE)
_meta_data_pattern = re.compile(r'^(?:---[\ \t]*\n)?((?:[\S\w]+\s*:(?:\n+[ \t]+.*)+)|(?:.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)|(?:\s*[\S\w]+\s*:(?! >)[ \t]*.*\n?))(?:---[\ \t]*\n)?', re.MULTILINE)
_key_val_pat = re.compile(r"[\S\w]+\s*:(?! >)[ \t]*.*\n?", re.MULTILINE)
# this allows key: >
# value
# conutiues over multiple lines
_key_val_block_pat = re.compile(
"(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE)
r"(.*:\s+>\n\s+[\S\s]+?)(?=\n\w+\s*:\s*\w+\n|\Z)", re.MULTILINE
)
_key_val_list_pat = re.compile(
r"^-(?:[ \t]*([^:\s]*)(?:[ \t]*[:-][ \t]*(\S+))?)(?:\n((?:[ \t]+[^\n]+\n?)+))?",
re.MULTILINE,
)
_key_val_dict_pat = re.compile(
r"^([^:\n]+)[ \t]*:[ \t]*([^\n]*)(?:((?:\n[ \t]+[^\n]+)+))?", re.MULTILINE
) # grp0: key, grp1: value, grp2: multiline value
_meta_data_fence_pattern = re.compile(r'^---[\ \t]*\n', re.MULTILINE)
_meta_data_newline = re.compile("^\n", re.MULTILINE)

Expand All @@ -466,13 +474,58 @@ def _extract_metadata(self, text):
return text
tail = metadata_split[1]

kv = re.findall(self._key_val_pat, metadata_content)
kvm = re.findall(self._key_val_block_pat, metadata_content)
kvm = [item.replace(": >\n", ":", 1) for item in kvm]
def parse_structured_value(value):
vs = value.lstrip()
vs = value.replace(v[: len(value) - len(vs)], "\n")[1:]

# List
if vs.startswith("-"):
r = []
for match in re.findall(self._key_val_list_pat, vs):
if match[0] and not match[1] and not match[2]:
r.append(match[0].strip())
elif match[0] == ">" and not match[1] and match[2]:
r.append(match[2].strip())
elif match[0] and match[1]:
r.append({match[0].strip(): match[1].strip()})
elif not match[0] and not match[1] and match[2]:
r.append(parse_structured_value(match[2]))
else:
# Broken case
pass

return r

# Dict
else:
return {
match[0].strip(): (
match[1].strip()
if match[1]
else parse_structured_value(match[2])
)
for match in re.findall(self._key_val_dict_pat, vs)
}

for item in match:

for item in kv + kvm:
k, v = item.split(":", 1)
self.metadata[k.strip()] = v.strip()

# Multiline value
if v[:3] == " >\n":
self.metadata[k.strip()] = v[3:].strip()

# Empty value
elif v == "\n":
self.metadata[k.strip()] = ""

# Structured value
elif v[0] == "\n":
self.metadata[k.strip()] = parse_structured_value(v)

# Simple value
else:
self.metadata[k.strip()] = v.strip()

return tail

Expand Down
4 changes: 3 additions & 1 deletion test/tm-cases/metadata.metadata
Expand Up @@ -5,5 +5,7 @@
"this-is": "a hyphen test",
"empty": "",
"and some": "long value\n that goes multiline",
"another": "example"
"another": "example",
"alist": ["a", "b", "c"],
"adict": {"key": "foo", "a nested list": ["one", "two", "Even multiline strings are allowed\n in nested structured data\n if linebreaks and indent are respected !", {"subkey": "and another dict in a list"}]}
}
15 changes: 15 additions & 0 deletions test/tm-cases/metadata.text
Expand Up @@ -8,6 +8,21 @@ and some: >
long value
that goes multiline
another: example
alist:
- a
- b
- c
adict:
key: foo
a nested list:
- one
- two
- >
Even multiline strings are allowed
in nested structured data
if linebreaks and indent are respected !
-
subkey: and another dict in a list
---
# The real text

Expand Down

0 comments on commit e3da4df

Please sign in to comment.