Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(idna): fast-path simple/ascii domains #761

Merged
merged 2 commits into from Sep 6, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
65 changes: 38 additions & 27 deletions idna/src/uts46.rs
Expand Up @@ -318,50 +318,48 @@ fn check_validity(label: &str, config: Config, errors: &mut Errors) {
// V8: Bidi rules are checked inside `processing()`
}

/// http://www.unicode.org/reports/tr46/#Processing
fn processing(
domain: &str,
config: Config,
normalized: &mut String,
output: &mut String,
) -> Errors {
// Weed out the simple cases: only allow all lowercase ASCII characters and digits where none
// of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
let (mut prev, mut simple, mut puny_prefix) = ('?', !domain.is_empty(), 0);
// Detect simple cases: all lowercase ASCII characters and digits where none
// of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
fn is_simple(domain: &str) -> bool {
if domain.is_empty() {
return false;
}
let (mut prev, mut puny_prefix) = ('?', 0);
for c in domain.chars() {
if c == '.' {
if prev == '-' {
simple = false;
break;
return false;
}
puny_prefix = 0;
continue;
} else if puny_prefix == 0 && c == '-' {
simple = false;
break;
return false;
} else if puny_prefix < 5 {
if c == ['x', 'n', '-', '-'][puny_prefix] {
puny_prefix += 1;
if puny_prefix == 4 {
simple = false;
break;
return false;
}
} else {
puny_prefix = 5;
}
}
if !c.is_ascii_lowercase() && !c.is_ascii_digit() {
simple = false;
break;
return false;
}
prev = c;
}

if simple {
output.push_str(domain);
return Errors::default();
}
true
}

/// http://www.unicode.org/reports/tr46/#Processing
fn processing(
domain: &str,
config: Config,
normalized: &mut String,
output: &mut String,
) -> Errors {
normalized.clear();
let mut errors = Errors::default();
let offset = output.len();
Expand Down Expand Up @@ -447,11 +445,13 @@ impl Idna {
}
}

/// http://www.unicode.org/reports/tr46/#ToASCII
#[allow(clippy::wrong_self_convention)]
pub fn to_ascii<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
let mut errors = processing(domain, self.config, &mut self.normalized, &mut self.output);

pub fn to_ascii_inner(&mut self, domain: &str, out: &mut String) -> Errors {
if is_simple(domain) {
out.push_str(domain);
return Errors::default();
}
let mut errors = processing(domain, self.config, &mut self.normalized, out);
self.output = std::mem::replace(out, String::with_capacity(out.len()));
let mut first = true;
for label in self.output.split('.') {
if !first {
Expand All @@ -470,6 +470,13 @@ impl Idna {
}
}
}
errors
}

/// http://www.unicode.org/reports/tr46/#ToASCII
#[allow(clippy::wrong_self_convention)]
pub fn to_ascii<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
let mut errors = self.to_ascii_inner(domain, out);

if self.config.verify_dns_length {
let domain = if out.ends_with('.') {
Expand All @@ -491,6 +498,10 @@ impl Idna {
/// http://www.unicode.org/reports/tr46/#ToUnicode
#[allow(clippy::wrong_self_convention)]
pub fn to_unicode<'a>(&'a mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
if is_simple(domain) {
out.push_str(domain);
return Errors::default().into();
}
processing(domain, self.config, &mut self.normalized, out).into()
}
}
Expand Down