Added support for urls in include markdown tag (#159)

* added support for urls in include markedown tag * ✨ updating print to logger and replacing requests with urllib * updates made to assign a value to plural_suffix, lineno, and readable_file_to_include. Also repaired read_http function to get the target_url and to decode
mondeja · Jul 31, 2023 · 328a87c · 328a87c
1 parent e5f6c24
commit 328a87c
Showing 1 changed file with 66 additions and 7 deletions.
diff --git a/src/mkdocs_include_markdown_plugin/event.py b/src/mkdocs_include_markdown_plugin/event.py
@@ -10,6 +10,8 @@
 import textwrap
 from collections.abc import MutableMapping
 from typing import TYPE_CHECKING, Any
+from urllib.parse import urlparse
+import urllib.request
 
 from mkdocs_include_markdown_plugin import process
 from mkdocs_include_markdown_plugin.config import (
@@ -53,6 +55,12 @@ class DirectiveBoolArgument(TypedDict):  # noqa: D101
     False: 'false',
 }
 
+def is_url(string):
+    try:
+        result = urlparse(string)
+        return all([result.scheme, result.netloc])
+    except ValueError:
+        return False
 
 def bool_arg(arg: str) -> re.Pattern[str]:
     """Return a compiled regexp to match a boolean argument."""
@@ -123,7 +131,12 @@ def read_file(file_path: str, encoding: str) -> str:
     with open(file_path, encoding=encoding) as f:
         return f.read()
 
-
+def read_http(target_url: str, encoding: str) -> str:
+    """Read an http location and return its content."""
+    req = urllib.request.Request(target_url)
+    with urllib.request.urlopen(req) as response:
+        return response.read().decode('UTF-8')
+
 def get_file_content(
     markdown: str,
     page_src_path: str,
@@ -161,6 +174,8 @@ def found_include_tag(match: re.Match[str]) -> str:
 
         if os.path.isabs(filename):
             file_path_glob = filename
+        elif is_url(filename):
+            file_path_glob = filename
         else:
             file_path_glob = os.path.join(
                 os.path.abspath(os.path.dirname(page_src_path)),
@@ -199,6 +214,10 @@ def found_include_tag(match: re.Match[str]) -> str:
             glob.iglob(file_path_glob, recursive=True),
             ignore_paths=ignore_paths,
         )
+        if is_url(filename):
+            file_paths_to_include=[file_path_glob]
+            logger.info('url found: ' + file_path_glob)
+
 
         if not file_paths_to_include:
             lineno = lineno_from_content_start(
@@ -212,7 +231,21 @@ def found_include_tag(match: re.Match[str]) -> str:
             )
             return ''
         elif files_watcher is not None:
-            files_watcher.included_files.extend(file_paths_to_include)
+            if not is_url(file_path_glob):
+                files_watcher.included_files.extend(file_paths_to_include)
+            else:
+                readable_files_to_include = ', '.join(file_paths_to_include)
+                plural_suffix = 's' if len(file_paths_to_include) > 1 else ''
+                lineno = lineno_from_content_start(
+                    markdown,
+                    directive_match_start,
+                )
+                logger.warning(
+                    f"Not adding a watcher for {file_path_glob} of 'include-markdown'"
+                    f' directive at {os.path.relpath(page_src_path, docs_dir)}'
+                    f':{lineno} not detected in the file{plural_suffix}'
+                    f' {readable_files_to_include}',
+                )
 
         bool_options: dict[str, DirectiveBoolArgument] = {
             'preserve-includer-indent': {
@@ -301,7 +334,10 @@ def found_include_tag(match: re.Match[str]) -> str:
         text_to_include = ''
         expected_but_any_found = [start is not None, end is not None]
         for file_path in file_paths_to_include:
-            new_text_to_include = read_file(file_path, encoding)
+            if is_url(filename):
+                new_text_to_include = read_http(file_path, encoding)
+            else:
+                new_text_to_include = read_file(file_path, encoding)
 
             if start is not None or end is not None:
                 new_text_to_include, *expected_not_found = (
@@ -352,7 +388,7 @@ def found_include_tag(match: re.Match[str]) -> str:
 
             text_to_include += new_text_to_include
 
-        # warn if expected start or ends haven't been found in included content
+        # warn if expected start or ends haven't been found in included content        
         for i, argname in enumerate(['start', 'end']):
             if expected_but_any_found[i]:
                 value = locals()[argname]
@@ -392,11 +428,13 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:
                 f':{lineno}',
             )
             return ''
-
+        
         arguments_string = match.group('arguments')
 
         if os.path.isabs(filename):
             file_path_glob = filename
+        elif is_url(filename):
+            file_path_glob = filename
         else:
             file_path_glob = os.path.join(
                 os.path.abspath(os.path.dirname(page_src_path)),
@@ -436,6 +474,10 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:
             ignore_paths=ignore_paths,
         )
 
+        if is_url(filename):
+            file_paths_to_include=[file_path_glob]
+            logger.info('url found: ' + file_path_glob)
+
         if not file_paths_to_include:
             lineno = lineno_from_content_start(
                 markdown,
@@ -448,7 +490,21 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:
             )
             return ''
         elif files_watcher is not None:
-            files_watcher.included_files.extend(file_paths_to_include)
+            if not is_url(file_path_glob):
+                files_watcher.included_files.extend(file_paths_to_include)
+            else:
+                readable_files_to_include = ', '.join(file_paths_to_include)
+                plural_suffix = 's' if len(file_paths_to_include) > 1 else ''
+                lineno = lineno_from_content_start(
+                    markdown,
+                    directive_match_start,
+                )
+                logger.warning(
+                    f"Not adding a watcher for {file_path_glob} of 'include-markdown'"
+                    f' directive at {os.path.relpath(page_src_path, docs_dir)}'
+                    f':{lineno} not detected in the file{plural_suffix}'
+                    f' {readable_files_to_include}',
+                )
 
         bool_options: dict[str, DirectiveBoolArgument] = {
             'rewrite-relative-urls': {
@@ -570,7 +626,10 @@ def found_include_markdown_tag(match: re.Match[str]) -> str:
 
         text_to_include = ''
         for file_path in file_paths_to_include:
-            new_text_to_include = read_file(file_path, encoding)
+            if is_url(filename):
+                new_text_to_include = read_http(file_path, encoding)
+            else:
+                new_text_to_include = read_file(file_path, encoding)
 
             if start is not None or end is not None:
                 new_text_to_include, *expected_not_found = (