-
-
Notifications
You must be signed in to change notification settings - Fork 192
/
links.rb
150 lines (124 loc) · 4.48 KB
/
links.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# frozen_string_literal: true
module HTMLProofer
class Check
class Links < HTMLProofer::Check
def run
@html.css("a, link").each do |node|
@link = create_element(node)
next if @link.ignore?
if !allow_hash_href? && @link.node["href"] == "#"
add_failure("linking to internal hash #, which points to nowhere", element: @link)
next
end
# is there even an href?
if blank?(@link.url.raw_attribute)
next if allow_missing_href?
add_failure("'#{@link.node.name}' tag is missing a reference", element: @link)
next
end
# is it even a valid URL?
unless @link.url.valid?
add_failure("#{@link.href} is an invalid URL", element: @link)
next
end
if @link.url.protocol_relative?
add_failure(
"#{@link.url} is a protocol-relative URL, use explicit https:// instead",
element: @link,
)
next
end
check_schemes
# intentionally down here because we still want valid? & missing_href? to execute
next if @link.url.non_http_remote?
if !@link.url.internal? && @link.url.remote?
check_sri if @runner.check_sri? && @link.link_tag?
# we need to skip these for now; although the domain main be valid,
# curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
next if @link.node["rel"] == "dns-prefetch"
unless @link.url.path?
add_failure("#{@link.url.raw_attribute} is an invalid URL", element: @link)
next
end
add_to_external_urls(@link.url, @link.line)
elsif @link.url.internal?
# does the local directory have a trailing slash?
if @link.url.unslashed_directory?(@link.url.absolute_path)
add_failure(
"internally linking to a directory #{@link.url.raw_attribute} without trailing slash",
element: @link,
)
next
end
add_to_internal_urls(@link.url, @link.line)
end
end
end
def allow_missing_href?
@runner.options[:allow_missing_href]
end
def allow_hash_href?
@runner.options[:allow_hash_href]
end
def check_schemes
case @link.url.scheme
when "mailto"
handle_mailto
when "tel"
handle_tel
when "http"
return unless @runner.options[:enforce_https]
add_failure("#{@link.url.raw_attribute} is not an HTTPS link", element: @link)
end
end
def handle_mailto
if @link.url.path.empty?
add_failure(
"#{@link.url.raw_attribute} contains no email address",
element: @link,
) unless ignore_empty_mailto?
# eg., if any do not match a valid URL
elsif @link.url.path.split(",").any? { |email| !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(email) }
add_failure(
"#{@link.url.raw_attribute} contains an invalid email address",
element: @link,
)
end
end
def handle_tel
add_failure(
"#{@link.url.raw_attribute} contains no phone number",
element: @link,
) if @link.url.path.empty?
end
def ignore_empty_mailto?
@runner.options[:ignore_empty_mailto]
end
# Allowed elements from Subresource Integrity specification
# https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
SRI_REL_TYPES = %(stylesheet)
def check_sri
return unless SRI_REL_TYPES.include?(@link.node["rel"])
if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"])
add_failure(
"SRI and CORS not provided in: #{@link.url.raw_attribute}",
element: @link,
)
elsif blank?(@link.node["integrity"])
add_failure("Integrity is missing in: #{@link.url.raw_attribute}", element: @link)
elsif blank?(@link.node["crossorigin"])
add_failure(
"CORS not provided for external resource in: #{@link.link.url.raw_attribute}",
element: @link,
)
end
end
private def source_tag?
@link.node.name == "source"
end
private def anchor_tag?
@link.node.name == "a"
end
end
end
end