forked from wpilibsuite/sphinxext-opengraph
/
__init__.py
217 lines (181 loc) · 6.89 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
from typing import Any, Dict
from urllib.parse import urljoin, urlparse, urlunparse
from pathlib import Path
import docutils.nodes as nodes
from sphinx.application import Sphinx
from .descriptionparser import get_description
from .metaparser import get_meta_description
from .titleparser import get_title
import os
DEFAULT_DESCRIPTION_LENGTH = 200
# A selection from https://www.iana.org/assignments/media-types/media-types.xhtml#image
IMAGE_MIME_TYPES = {
"gif": "image/gif",
"apng": "image/apng",
"webp": "image/webp",
"jpeg": "image/jpeg",
"jpg": "image/jpeg",
"png": "image/png",
"bmp": "image/bmp",
"heic": "image/heic",
"heif": "image/heif",
"tiff": "image/tiff",
}
def make_tag(property: str, content: str, type_: str = "property") -> str:
# Parse quotation, so they won't break html tags if smart quotes are disabled
content = content.replace('"', """)
return f'<meta {type_}="{property}" content="{content}" />'
def get_tags(
app: Sphinx,
context: Dict[str, Any],
doctree: nodes.document,
config: Dict[str, Any],
) -> str:
# Get field lists for per-page overrides
fields = context["meta"]
if fields is None:
fields = {}
tags = {}
meta_tags = {} # For non-og meta tags
# Set length of description
try:
desc_len = int(
fields.get("ogp_description_length", config["ogp_description_length"])
)
except ValueError:
desc_len = DEFAULT_DESCRIPTION_LENGTH
# Get the title and parse any html in it
title = get_title(context["title"], skip_html_tags=False)
title_excluding_html = get_title(context["title"], skip_html_tags=True)
# Parse/walk doctree for metadata (tag/description)
description = get_description(doctree, desc_len, [title, title_excluding_html])
# title tag
tags["og:title"] = title
# type tag
tags["og:type"] = config["ogp_type"]
if os.getenv("READTHEDOCS") and config["ogp_site_url"] is None:
# readthedocs uses html_baseurl for sphinx > 1.8
parse_result = urlparse(config["html_baseurl"])
if config["html_baseurl"] is None:
raise OSError("ReadTheDocs did not provide a valid canonical URL!")
# Grab root url from canonical url
config["ogp_site_url"] = urlunparse(
(
parse_result.scheme,
parse_result.netloc,
parse_result.path,
"",
"",
"",
)
)
# url tag
# Get the URL of the specific page
if context["builder"] == "dirhtml":
if context["pagename"] == "index":
page_url = config["ogp_site_url"]
elif context["pagename"].endswith("/index"):
relative = context["pagename"].rsplit("/", 1)[0]
page_url = urljoin(config["ogp_site_url"], relative + "/")
else:
page_url = urljoin(config["ogp_site_url"], context["pagename"] + "/")
else:
page_url = urljoin(
config["ogp_site_url"], context["pagename"] + context["file_suffix"]
)
tags["og:url"] = page_url
# site name tag, False disables, default to project if ogp_site_name not
# set.
if config["ogp_site_name"] is False:
site_name = None
elif config["ogp_site_name"] is None:
site_name = config["project"]
else:
site_name = config["ogp_site_name"]
if site_name:
tags["og:site_name"] = site_name
# description tag
if description:
tags["og:description"] = description
if config["ogp_enable_meta_description"] and not get_meta_description(
context["metatags"]
):
meta_tags["description"] = description
# image tag
# Get basic values from config
if "og:image" in fields:
image_url = fields["og:image"]
ogp_use_first_image = False
ogp_image_alt = fields.get("og:image:alt")
fields.pop("og:image", None)
else:
image_url = config["ogp_image"]
ogp_use_first_image = config["ogp_use_first_image"]
ogp_image_alt = fields.get("og:image:alt", config["ogp_image_alt"])
fields.pop("og:image:alt", None)
first_image = None
if ogp_use_first_image:
first_image = doctree.next_node(nodes.image)
if (
first_image
and Path(first_image.get("uri", "")).suffix[1:].lower() in IMAGE_MIME_TYPES
):
image_url = first_image["uri"]
ogp_image_alt = first_image.get("alt", None)
else:
first_image = None
if image_url:
# temporarily disable relative image paths with field lists
if "og:image" not in fields:
image_url_parsed = urlparse(image_url)
if not image_url_parsed.scheme:
# Relative image path detected, relative to the source. Make absolute.
if first_image:
root = page_url
else: # ogp_image is set
# ogp_image is defined as being relative to the site root.
# This workaround is to keep that functionality from breaking.
root = config["ogp_site_url"]
image_url = urljoin(root, image_url_parsed.path)
tags["og:image"] = image_url
# Add image alt text (either provided by config or from site_name)
if isinstance(ogp_image_alt, str):
tags["og:image:alt"] = ogp_image_alt
elif ogp_image_alt is None and site_name:
tags["og:image:alt"] = site_name
elif ogp_image_alt is None and title:
tags["og:image:alt"] = title
# arbitrary tags and overrides
tags.update({k: v for k, v in fields.items() if k.startswith("og:")})
return (
"\n".join(
[make_tag(p, c) for p, c in tags.items()]
+ [make_tag(p, c, "name") for p, c in meta_tags.items()]
+ config["ogp_custom_meta_tags"]
)
+ "\n"
)
def html_page_context(
app: Sphinx,
pagename: str,
templatename: str,
context: Dict[str, Any],
doctree: nodes.document,
) -> None:
if doctree:
context["metatags"] += get_tags(app, context, doctree, app.config)
def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value("ogp_site_url", None, "html")
app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html")
app.add_config_value("ogp_image", None, "html")
app.add_config_value("ogp_image_alt", None, "html")
app.add_config_value("ogp_use_first_image", False, "html")
app.add_config_value("ogp_type", "website", "html")
app.add_config_value("ogp_site_name", None, "html")
app.add_config_value("ogp_custom_meta_tags", [], "html")
app.add_config_value("ogp_enable_meta_description", True, "html")
app.connect("html-page-context", html_page_context)
return {
"parallel_read_safe": True,
"parallel_write_safe": True,
}