Skip to content

Commit

Permalink
Added support for urls in include markdown tag (#159)
Browse files Browse the repository at this point in the history
* added support for urls in include markedown tag

* ✨ updating print to logger and replacing requests with urllib

* updates made to assign a value to plural_suffix, lineno, and readable_file_to_include.  Also repaired read_http function to get the target_url and to decode
  • Loading branch information
clubanderson committed Jul 31, 2023
1 parent e5f6c24 commit 328a87c
Showing 1 changed file with 66 additions and 7 deletions.
73 changes: 66 additions & 7 deletions src/mkdocs_include_markdown_plugin/event.py
Expand Up @@ -10,6 +10,8 @@
import textwrap
from collections.abc import MutableMapping
from typing import TYPE_CHECKING, Any
from urllib.parse import urlparse
import urllib.request

from mkdocs_include_markdown_plugin import process
from mkdocs_include_markdown_plugin.config import (
Expand Down Expand Up @@ -53,6 +55,12 @@ class DirectiveBoolArgument(TypedDict): # noqa: D101
False: 'false',
}

def is_url(string):
try:
result = urlparse(string)
return all([result.scheme, result.netloc])
except ValueError:
return False

def bool_arg(arg: str) -> re.Pattern[str]:
"""Return a compiled regexp to match a boolean argument."""
Expand Down Expand Up @@ -123,7 +131,12 @@ def read_file(file_path: str, encoding: str) -> str:
with open(file_path, encoding=encoding) as f:
return f.read()


def read_http(target_url: str, encoding: str) -> str:
"""Read an http location and return its content."""
req = urllib.request.Request(target_url)
with urllib.request.urlopen(req) as response:
return response.read().decode('UTF-8')

def get_file_content(
markdown: str,
page_src_path: str,
Expand Down Expand Up @@ -161,6 +174,8 @@ def found_include_tag(match: re.Match[str]) -> str:

if os.path.isabs(filename):
file_path_glob = filename
elif is_url(filename):
file_path_glob = filename
else:
file_path_glob = os.path.join(
os.path.abspath(os.path.dirname(page_src_path)),
Expand Down Expand Up @@ -199,6 +214,10 @@ def found_include_tag(match: re.Match[str]) -> str:
glob.iglob(file_path_glob, recursive=True),
ignore_paths=ignore_paths,
)
if is_url(filename):
file_paths_to_include=[file_path_glob]
logger.info('url found: ' + file_path_glob)


if not file_paths_to_include:
lineno = lineno_from_content_start(
Expand All @@ -212,7 +231,21 @@ def found_include_tag(match: re.Match[str]) -> str:
)
return ''
elif files_watcher is not None:
files_watcher.included_files.extend(file_paths_to_include)
if not is_url(file_path_glob):
files_watcher.included_files.extend(file_paths_to_include)
else:
readable_files_to_include = ', '.join(file_paths_to_include)
plural_suffix = 's' if len(file_paths_to_include) > 1 else ''
lineno = lineno_from_content_start(
markdown,
directive_match_start,
)
logger.warning(
f"Not adding a watcher for {file_path_glob} of 'include-markdown'"
f' directive at {os.path.relpath(page_src_path, docs_dir)}'
f':{lineno} not detected in the file{plural_suffix}'
f' {readable_files_to_include}',
)

bool_options: dict[str, DirectiveBoolArgument] = {
'preserve-includer-indent': {
Expand Down Expand Up @@ -301,7 +334,10 @@ def found_include_tag(match: re.Match[str]) -> str:
text_to_include = ''
expected_but_any_found = [start is not None, end is not None]
for file_path in file_paths_to_include:
new_text_to_include = read_file(file_path, encoding)
if is_url(filename):
new_text_to_include = read_http(file_path, encoding)
else:
new_text_to_include = read_file(file_path, encoding)

if start is not None or end is not None:
new_text_to_include, *expected_not_found = (
Expand Down Expand Up @@ -352,7 +388,7 @@ def found_include_tag(match: re.Match[str]) -> str:

text_to_include += new_text_to_include

# warn if expected start or ends haven't been found in included content
# warn if expected start or ends haven't been found in included content
for i, argname in enumerate(['start', 'end']):
if expected_but_any_found[i]:
value = locals()[argname]
Expand Down Expand Up @@ -392,11 +428,13 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:
f':{lineno}',
)
return ''

arguments_string = match.group('arguments')

if os.path.isabs(filename):
file_path_glob = filename
elif is_url(filename):
file_path_glob = filename
else:
file_path_glob = os.path.join(
os.path.abspath(os.path.dirname(page_src_path)),
Expand Down Expand Up @@ -436,6 +474,10 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:
ignore_paths=ignore_paths,
)

if is_url(filename):
file_paths_to_include=[file_path_glob]
logger.info('url found: ' + file_path_glob)

if not file_paths_to_include:
lineno = lineno_from_content_start(
markdown,
Expand All @@ -448,7 +490,21 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:
)
return ''
elif files_watcher is not None:
files_watcher.included_files.extend(file_paths_to_include)
if not is_url(file_path_glob):
files_watcher.included_files.extend(file_paths_to_include)
else:
readable_files_to_include = ', '.join(file_paths_to_include)
plural_suffix = 's' if len(file_paths_to_include) > 1 else ''
lineno = lineno_from_content_start(
markdown,
directive_match_start,
)
logger.warning(
f"Not adding a watcher for {file_path_glob} of 'include-markdown'"
f' directive at {os.path.relpath(page_src_path, docs_dir)}'
f':{lineno} not detected in the file{plural_suffix}'
f' {readable_files_to_include}',
)

bool_options: dict[str, DirectiveBoolArgument] = {
'rewrite-relative-urls': {
Expand Down Expand Up @@ -570,7 +626,10 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:

text_to_include = ''
for file_path in file_paths_to_include:
new_text_to_include = read_file(file_path, encoding)
if is_url(filename):
new_text_to_include = read_http(file_path, encoding)
else:
new_text_to_include = read_file(file_path, encoding)

if start is not None or end is not None:
new_text_to_include, *expected_not_found = (
Expand Down

0 comments on commit 328a87c

Please sign in to comment.