-
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added new custom email validator (#90)
refs https://github.com/TryGhost/Team/issues/2235 The `validator` package used in our codebase is stuck on a legacy version due to some constraints. The `isEmail` check on this legacy version is unable to detect some invalid email addresses causing them to sneak through and causing problems. This change adds a custom `isEmail` validator picked from the latest version of `validator` package, and allows us to gradually update the email checks to use the new version so we can prevent invalid email getting stored in Ghost.
- Loading branch information
1 parent
2985dbe
commit 8af2f47
Showing
8 changed files
with
408 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
const assertString = require('./util/assertString'); | ||
|
||
/* eslint-disable prefer-rest-params */ | ||
module.exports = function isByteLength(str, options) { | ||
assertString(str); | ||
let min; | ||
let max; | ||
if (typeof (options) === 'object') { | ||
min = options.min || 0; | ||
max = options.max; | ||
} else { // backwards compatibility: isByteLength(str, min [, max]) | ||
min = arguments[1]; | ||
max = arguments[2]; | ||
} | ||
const len = encodeURI(str).split(/%..|./).length - 1; | ||
return len >= min && (typeof max === 'undefined' || len <= max); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
/** | ||
* This file is a copy of validator.js isEmail method - v13.7.0: | ||
* https://github.com/validatorjs/validator.js/blob/531dc7f1f75613bec75c6d888b46480455e78dc7/src/lib/isEmail.js | ||
*/ | ||
/* eslint-disable camelcase */ | ||
const assertString = require('./util/assertString'); | ||
const merge = require('./util/merge'); | ||
const isByteLength = require('./isByteLength'); | ||
const isFQDN = require('./isFQDN'); | ||
const isIP = require('./isIP'); | ||
|
||
const default_email_options = { | ||
allow_display_name: false, | ||
require_display_name: false, | ||
allow_utf8_local_part: true, | ||
require_tld: true, | ||
blacklisted_chars: '', | ||
ignore_max_length: false, | ||
host_blacklist: [], | ||
host_whitelist: [] | ||
}; | ||
|
||
/* eslint-disable max-len */ | ||
/* eslint-disable no-control-regex */ | ||
const splitNameAddress = /^([^\x00-\x1F\x7F-\x9F\cX]+)</i; | ||
const emailUserPart = /^[a-z\d!#$%&'*+\-/=?^_`{|}~]+$/i; | ||
const gmailUserPart = /^[a-z\d]+$/; | ||
const quotedEmailUser = /^([\s\x01-\x08\x0b\x0c\x0e-\x1f\x7f\x21\x23-\x5b\x5d-\x7e]|(\\[\x01-\x09\x0b\x0c\x0d-\x7f]))*$/i; | ||
const emailUserUtf8Part = /^[a-z\d!#$%&'*+\-/=?^_`{|}~\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+$/i; | ||
const quotedEmailUserUtf8 = /^([\s\x01-\x08\x0b\x0c\x0e-\x1f\x7f\x21\x23-\x5b\x5d-\x7e\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]|(\\[\x01-\x09\x0b\x0c\x0d-\x7f\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]))*$/i; | ||
const defaultMaxEmailLength = 254; | ||
/* eslint-enable max-len */ | ||
/* eslint-enable no-control-regex */ | ||
|
||
/** | ||
* Validate display name according to the RFC2822: https://tools.ietf.org/html/rfc2822#appendix-A.1.2 | ||
* @param {String} display_name | ||
*/ | ||
function validateDisplayName(display_name) { | ||
const display_name_without_quotes = display_name.replace(/^"(.+)"$/, '$1'); | ||
// display name with only spaces is not valid | ||
if (!display_name_without_quotes.trim()) { | ||
return false; | ||
} | ||
|
||
// check whether display name contains illegal character | ||
const contains_illegal = /[.";<>]/.test(display_name_without_quotes); | ||
if (contains_illegal) { | ||
// if contains illegal characters, | ||
// must to be enclosed in double-quotes, otherwise it's not a valid display name | ||
if (display_name_without_quotes === display_name) { | ||
return false; | ||
} | ||
|
||
// the quotes in display name must start with character symbol \ | ||
const all_start_with_back_slash = | ||
display_name_without_quotes.split('"').length === display_name_without_quotes.split('\\"').length; | ||
if (!all_start_with_back_slash) { | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
module.exports = function isEmail(str, options) { | ||
assertString(str); | ||
options = merge(options, default_email_options); | ||
|
||
if (options.require_display_name || options.allow_display_name) { | ||
const display_email = str.match(splitNameAddress); | ||
if (display_email) { | ||
let display_name = display_email[1]; | ||
|
||
// Remove display name and angle brackets to get email address | ||
// Can be done in the regex but will introduce a ReDOS (See #1597 for more info) | ||
str = str.replace(display_name, '').replace(/(^<|>$)/g, ''); | ||
|
||
// sometimes need to trim the last space to get the display name | ||
// because there may be a space between display name and email address | ||
// eg. myname <address@gmail.com> | ||
// the display name is `myname` instead of `myname `, so need to trim the last space | ||
if (display_name.endsWith(' ')) { | ||
display_name = display_name.slice(0, -1); | ||
} | ||
|
||
if (!validateDisplayName(display_name)) { | ||
return false; | ||
} | ||
} else if (options.require_display_name) { | ||
return false; | ||
} | ||
} | ||
if (!options.ignore_max_length && str.length > defaultMaxEmailLength) { | ||
return false; | ||
} | ||
|
||
const parts = str.split('@'); | ||
const domain = parts.pop(); | ||
const lower_domain = domain.toLowerCase(); | ||
|
||
if (options.host_blacklist.includes(lower_domain)) { | ||
return false; | ||
} | ||
|
||
if (options.host_whitelist.length > 0 && !options.host_whitelist.includes(lower_domain)) { | ||
return false; | ||
} | ||
|
||
let user = parts.join('@'); | ||
|
||
if (options.domain_specific_validation && (lower_domain === 'gmail.com' || lower_domain === 'googlemail.com')) { | ||
/* | ||
Previously we removed dots for gmail addresses before validating. | ||
This was removed because it allows `multiple..dots@gmail.com` | ||
to be reported as valid, but it is not. | ||
Gmail only normalizes single dots, removing them from here is pointless, | ||
should be done in normalizeEmail | ||
*/ | ||
user = user.toLowerCase(); | ||
|
||
// Removing sub-address from username before gmail validation | ||
const username = user.split('+')[0]; | ||
|
||
// Dots are not included in gmail length restriction | ||
if (!isByteLength(username.replace(/\./g, ''), {min: 6, max: 30})) { | ||
return false; | ||
} | ||
|
||
const user_parts = username.split('.'); | ||
for (let i = 0; i < user_parts.length; i++) { | ||
if (!gmailUserPart.test(user_parts[i])) { | ||
return false; | ||
} | ||
} | ||
} | ||
|
||
if (options.ignore_max_length === false && ( | ||
!isByteLength(user, {max: 64}) || | ||
!isByteLength(domain, {max: 254})) | ||
) { | ||
return false; | ||
} | ||
|
||
if (!isFQDN(domain, {require_tld: options.require_tld})) { | ||
if (!options.allow_ip_domain) { | ||
return false; | ||
} | ||
|
||
if (!isIP(domain)) { | ||
if (!domain.startsWith('[') || !domain.endsWith(']')) { | ||
return false; | ||
} | ||
|
||
let noBracketdomain = domain.slice(1, -1); | ||
|
||
if (noBracketdomain.length === 0 || !isIP(noBracketdomain)) { | ||
return false; | ||
} | ||
} | ||
} | ||
|
||
if (user[0] === '"') { | ||
user = user.slice(1, user.length - 1); | ||
return options.allow_utf8_local_part ? | ||
quotedEmailUserUtf8.test(user) : | ||
quotedEmailUser.test(user); | ||
} | ||
|
||
const pattern = options.allow_utf8_local_part ? | ||
emailUserUtf8Part : emailUserPart; | ||
|
||
const user_parts = user.split('.'); | ||
for (let i = 0; i < user_parts.length; i++) { | ||
if (!pattern.test(user_parts[i])) { | ||
return false; | ||
} | ||
} | ||
if (options.blacklisted_chars) { | ||
if (user.search(new RegExp(`[${options.blacklisted_chars}]+`, 'g')) !== -1) { | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/** | ||
* This file is a copy of validator.js isFQDN method - v13.7.0: | ||
* https://github.com/validatorjs/validator.js/blob/531dc7f1f75613bec75c6d888b46480455e78dc7/src/lib/isFQDN.js | ||
*/ | ||
/* eslint-disable camelcase */ | ||
const assertString = require('./util/assertString'); | ||
const merge = require('./util/merge'); | ||
|
||
const default_fqdn_options = { | ||
require_tld: true, | ||
allow_underscores: false, | ||
allow_trailing_dot: false, | ||
allow_numeric_tld: false, | ||
allow_wildcard: false | ||
}; | ||
|
||
module.exports = function isFQDN(str, options) { | ||
assertString(str); | ||
options = merge(options, default_fqdn_options); | ||
|
||
/* Remove the optional trailing dot before checking validity */ | ||
if (options.allow_trailing_dot && str[str.length - 1] === '.') { | ||
str = str.substring(0, str.length - 1); | ||
} | ||
|
||
/* Remove the optional wildcard before checking validity */ | ||
if (options.allow_wildcard === true && str.indexOf('*.') === 0) { | ||
str = str.substring(2); | ||
} | ||
|
||
const parts = str.split('.'); | ||
const tld = parts[parts.length - 1]; | ||
|
||
if (options.require_tld) { | ||
// disallow fqdns without tld | ||
if (parts.length < 2) { | ||
return false; | ||
} | ||
|
||
if (!options.allow_numeric_tld && !/^([a-z\u00A1-\u00A8\u00AA-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{2,}|xn[a-z0-9-]{2,})$/i.test(tld)) { | ||
return false; | ||
} | ||
|
||
// disallow spaces | ||
if (/\s/.test(tld)) { | ||
return false; | ||
} | ||
} | ||
|
||
// reject numeric TLDs | ||
if (!options.allow_numeric_tld && /^\d+$/.test(tld)) { | ||
return false; | ||
} | ||
|
||
return parts.every((part) => { | ||
if (part.length > 63) { | ||
return false; | ||
} | ||
|
||
if (!/^[a-z_\u00a1-\uffff0-9-]+$/i.test(part)) { | ||
return false; | ||
} | ||
|
||
// disallow full-width chars | ||
if (/[\uff01-\uff5e]/.test(part)) { | ||
return false; | ||
} | ||
|
||
// disallow parts starting or ending with hyphen | ||
if (/^-|-$/.test(part)) { | ||
return false; | ||
} | ||
|
||
if (!options.allow_underscores && /_/.test(part)) { | ||
return false; | ||
} | ||
|
||
return true; | ||
}); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/** | ||
* This file is a copy of validator.js isIP method - v13.7.0: | ||
* https://github.com/validatorjs/validator.js/blob/531dc7f1f75613bec75c6d888b46480455e78dc7/src/lib/isIP.js | ||
*/ | ||
|
||
const assertString = require('./util/assertString'); | ||
/** | ||
11.3. Examples | ||
The following addresses | ||
fe80::1234 (on the 1st link of the node) | ||
ff02::5678 (on the 5th link of the node) | ||
ff08::9abc (on the 10th organization of the node) | ||
would be represented as follows: | ||
fe80::1234%1 | ||
ff02::5678%5 | ||
ff08::9abc%10 | ||
(Here we assume a natural translation from a zone index to the | ||
<zone_id> part, where the Nth zone of any scope is translated into | ||
"N".) | ||
If we use interface names as <zone_id>, those addresses could also be | ||
represented as follows: | ||
fe80::1234%ne0 | ||
ff02::5678%pvc1.3 | ||
ff08::9abc%interface10 | ||
where the interface "ne0" belongs to the 1st link, "pvc1.3" belongs | ||
to the 5th link, and "interface10" belongs to the 10th organization. | ||
* * */ | ||
const IPv4SegmentFormat = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'; | ||
const IPv4AddressFormat = `(${IPv4SegmentFormat}[.]){3}${IPv4SegmentFormat}`; | ||
const IPv4AddressRegExp = new RegExp(`^${IPv4AddressFormat}$`); | ||
|
||
const IPv6SegmentFormat = '(?:[0-9a-fA-F]{1,4})'; | ||
const IPv6AddressRegExp = new RegExp('^(' + | ||
`(?:${IPv6SegmentFormat}:){7}(?:${IPv6SegmentFormat}|:)|` + | ||
`(?:${IPv6SegmentFormat}:){6}(?:${IPv4AddressFormat}|:${IPv6SegmentFormat}|:)|` + | ||
`(?:${IPv6SegmentFormat}:){5}(?::${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,2}|:)|` + | ||
`(?:${IPv6SegmentFormat}:){4}(?:(:${IPv6SegmentFormat}){0,1}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,3}|:)|` + | ||
`(?:${IPv6SegmentFormat}:){3}(?:(:${IPv6SegmentFormat}){0,2}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,4}|:)|` + | ||
`(?:${IPv6SegmentFormat}:){2}(?:(:${IPv6SegmentFormat}){0,3}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,5}|:)|` + | ||
`(?:${IPv6SegmentFormat}:){1}(?:(:${IPv6SegmentFormat}){0,4}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,6}|:)|` + | ||
`(?::((?::${IPv6SegmentFormat}){0,5}:${IPv4AddressFormat}|(?::${IPv6SegmentFormat}){1,7}|:))` + | ||
')(%[0-9a-zA-Z-.:]{1,})?$'); | ||
|
||
module.exports = function isIP(str, version = '') { | ||
assertString(str); | ||
version = String(version); | ||
if (!version) { | ||
return isIP(str, 4) || isIP(str, 6); | ||
} | ||
if (version === '4') { | ||
return IPv4AddressRegExp.test(str); | ||
} | ||
if (version === '6') { | ||
return IPv6AddressRegExp.test(str); | ||
} | ||
return false; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
const errors = require('@tryghost/errors'); | ||
|
||
/** | ||
* This file is a copy of validator.js assertString util - v13.7.0: | ||
* https://github.com/validatorjs/validator.js/blob/531dc7f1f75613bec75c6d888b46480455e78dc7/src/lib/util/assertString.js | ||
*/ | ||
module.exports = function assertString(input) { | ||
const isString = typeof input === 'string' || input instanceof String; | ||
|
||
if (!isString) { | ||
let invalidType = typeof input; | ||
if (input === null) { | ||
invalidType = 'null'; | ||
} else if (invalidType === 'object') { | ||
invalidType = input.constructor.name; | ||
} | ||
|
||
throw new errors.ValidationError({ | ||
message: `Expected a string but received a ${invalidType}` | ||
}); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
/** | ||
* This file is a copy of validator.js merge util - v13.7.0: | ||
* https://github.com/validatorjs/validator.js/blob/531dc7f1f75613bec75c6d888b46480455e78dc7/src/lib/util/merge.js | ||
*/ | ||
|
||
module.exports = function merge(obj = {}, defaults) { | ||
for (const key in defaults) { | ||
if (typeof obj[key] === 'undefined') { | ||
obj[key] = defaults[key]; | ||
} | ||
} | ||
return obj; | ||
}; |
Oops, something went wrong.