Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Unicode international characters #151

Merged
merged 15 commits into from
Feb 15, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 30 additions & 15 deletions lib/index.js
Expand Up @@ -2,7 +2,10 @@

// Load modules

const Dns = require('dns');
/* eslint-disable */
let Dns = require('dns');
/* eslint-enable */
const Punycode = require('punycode');


// Declare internals
Expand Down Expand Up @@ -132,7 +135,19 @@ internals.specials = function () {
}

for (let i = 0; i < specials.length; ++i) {
lookup[specials.charCodeAt(i)] = true;
lookup[specials.codePointAt(i)] = true;
}

// add C0 control characters

for (let i = 0; i < 33; ++i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this package only supports node 4+, you can probably use lookup.fill to initialize it.

lookup[i] = true;
}

// add C1 control characters

for (let i = 127; i < 160; ++i) {
lookup[i] = true;
}

return function (code) {
Expand Down Expand Up @@ -173,7 +188,7 @@ internals.validDomain = function (tldAtom, options) {


/**
* Check that an email address conforms to RFCs 5321, 5322 and others
* Check that an email address conforms to RFCs 5321, 5322, 6530 and others
*
* We distinguish clearly between a Mailbox as defined by RFC 5321 and an
* addr-spec as defined by RFC 5322. Depending on the context, either can be
Expand Down Expand Up @@ -462,10 +477,10 @@ exports.validate = internals.validate = function (email, options, callback) {
}
else {
context.prev = context.now;
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

// Especially if charCode == 10
if (charCode < 33 || charCode > 126 || internals.specials(charCode)) {
if (internals.specials(charCode)) {

// Fatal error
updateResult(internals.diagnoses.errExpectingATEXT);
Expand Down Expand Up @@ -660,11 +675,11 @@ exports.validate = internals.validate = function (email, options, callback) {
}
}

charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);
// Assume this token isn't a hyphen unless we discover it is
hyphenFlag = false;

if (charCode < 33 || charCode > 126 || internals.specials(charCode)) {
if (internals.specials(charCode)) {
// Fatal error
updateResult(internals.diagnoses.errExpectingATEXT);
}
Expand All @@ -676,8 +691,8 @@ exports.validate = internals.validate = function (email, options, callback) {

hyphenFlag = true;
}
// Check if it's a neither a number nor a latin letter
else if (charCode < 48 || charCode > 122 || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) {
// Check if it's a neither a number nor a latin/unicode letter
else if (charCode < 48 || (charCode > 122 && charCode < 192) || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WesTyler I'm reading through some of this code as I refactor, and now I find myself asking where 192 came from. You wouldn't happen to recall, would you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I believe 192 is the beginning of the Unicode "international" character set (À).

If I remember correctly, Unicode #s 123-191 are all non-character symbols.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, ok. Thanks!

// This is not an RFC 5321 subdomain, but still OK by RFC 5322
updateResult(internals.diagnoses.rfc5322Domain);
}
Expand Down Expand Up @@ -864,7 +879,7 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

// '\r', '\n', ' ', and '\t' have already been parsed above
if (charCode > 127 || charCode === 0 || token === '[') {
Expand Down Expand Up @@ -954,7 +969,7 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

if (charCode > 127 || charCode === 0 || charCode === 10) {
updateResult(internals.diagnoses.errExpectingQTEXT);
Expand Down Expand Up @@ -992,7 +1007,7 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d127 ; white space characters
//
// i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

if (charCode > 127) {
// Fatal error
Expand Down Expand Up @@ -1099,7 +1114,7 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

if (charCode > 127 || charCode === 0 || charCode === 10) {
// Fatal error
Expand Down Expand Up @@ -1266,7 +1281,7 @@ exports.validate = internals.validate = function (email, options, callback) {

if (!dnsPositive && maxResult < internals.categories.dnsWarn) {
// Per RFC 5321, domain atoms are limited to letter-digit-hyphen, so we only need to check code <= 57 to check for a digit
const code = atomData.domains[elementCount].charCodeAt(0);
const code = atomData.domains[elementCount].codePointAt(0);
if (code <= 57) {
updateResult(internals.diagnoses.rfc5321TLDNumeric);
}
Expand Down Expand Up @@ -1311,7 +1326,7 @@ exports.validate = internals.validate = function (email, options, callback) {
parseData.domain += '.';
}

const dnsDomain = parseData.domain;
const dnsDomain = Punycode.toASCII(parseData.domain);
Dns.resolveMx(dnsDomain, (err, mxRecords) => {

// If we have a fatal error, then we must assume that there are no records
Expand Down
4 changes: 3 additions & 1 deletion package.json
Expand Up @@ -18,10 +18,12 @@
"node": ">=4.0.0"
},
"dependencies": {
"punycode": "^2.1.0"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep hapi versioning scheme (2.1.x).

},
"devDependencies": {
"code": "3.x.x",
"lab": "10.x.x"
"lab": "10.x.x",
"rewire": "^2.5.2"
},
"scripts": {
"test": "lab -a code -t 100 -L -m 5000",
Expand Down
21 changes: 21 additions & 0 deletions test/index.js
Expand Up @@ -48,6 +48,10 @@ const tldExpectations = [
['shouldbe@example.com', diag.valid]
];

const noDNSExpectations = [
['伊昭傑@郵件.商務', diag.valid],
['ñoñó1234@ñomething.com', diag.valid]
];

describe('validate()', () => {

Expand Down Expand Up @@ -207,6 +211,23 @@ describe('validate()', () => {
});
});

noDNSExpectations.forEach((obj, i) => {

const email = obj[0];
const result = obj[1];
it('should handle noDNS test ' + (i + 1), (done) => {

Isemail.validate(email, {
errorLevel: 0,
checkDNS: false
}, (res) => {

expect(res).to.equal(result);
done();
});
});
});

it('should handle domain atom test 1', (done) => {

expect(Isemail.validate('shouldbe@invalid', {
Expand Down
3 changes: 3 additions & 0 deletions test/tests.json
Expand Up @@ -11,6 +11,9 @@
["test@nominet.org.uk", "valid"],
["test@about.museum", "valid"],
["a@iana.org", "valid"],
["êjness@iana.org", "valid"],
["ñoñó1234@iana.org", "valid"],
["ñoñó1234@something.com", "valid"],
["test@e.com", "dnsWarnNoRecord"],
["test@iana.a", "dnsWarnNoRecord"],
["test.test@iana.org", "valid"],
Expand Down
47 changes: 47 additions & 0 deletions test/tmp.js
@@ -0,0 +1,47 @@
'use strict';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tmp.js ??

Copy link
Contributor Author

@WesTyler WesTyler Feb 7, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@skeggse's preference is to utilize a proper internationalized domain with an active MX record. I have struck out on finding such a domain, so this approach was his backup plan. I'm guessing tmp.js was so that it doesn't become the permanent testing solution by default. :P


// Load modules

const Lab = require('lab');
const Code = require('code');
const Rewire = require('rewire');
const Isemail = Rewire('..');

// Test shortcuts

const lab = exports.lab = Lab.script();
const describe = lab.describe;
const it = lab.it;
const expect = Code.expect;

// declare internals

const internals = {};

// Dns rewire stub

const dns_stub = {
resolveMx: (domainName, cb) => {

internals.punyDomain = domainName;

return cb(null, [domainName]);
}
};

Isemail.__set__('Dns', dns_stub);

describe('validate() international domains', () => {

it('should punycode domains', (done) => {

Isemail.validate('伊昭傑@郵件.商務', {
errorLevel: 0,
checkDNS: true
}, () => {

expect(internals.punyDomain).to.equal('xn--5nqv22n.xn--lhr59c');
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works, but depends on the exact behavior of punycode. I'd rather not test whether punycode did the right thing, just whether validate called punycode correctly. Instead, could you put this?

expect(punycode.toUnicode(internals.punyDomain)).to.equal('伊昭傑@郵件.商務');

done();
});
});
});