Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bidi rules #497

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
49 changes: 41 additions & 8 deletions idna/src/uts46.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,16 @@ fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec
}

// http://tools.ietf.org/html/rfc5893#section-2
fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
fn passes_bidi(label: &str, is_bidi_domain: bool, after_rtl_label: &mut bool) -> bool {
// Rule 0: Bidi Rules apply to Bidi Domain Names: a name with at least one RTL label. A label
// is RTL if it contains at least one character of bidi class R, AL or AN.
if !is_bidi_domain {
return true;
}

let mut chars = label.chars();
let first_char_class = match chars.next() {
let first_char = chars.next();
let first_char_class = match first_char {
Some(c) => bidi_class(c),
None => return true, // empty string
};
Expand Down Expand Up @@ -174,6 +175,7 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
BidiClass::R | BidiClass::AL => {
let mut found_en = false;
let mut found_an = false;
*after_rtl_label = true;

// Rule 2
loop {
Expand Down Expand Up @@ -223,6 +225,36 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
}
}

BidiClass::EN => {
// https://github.com/servo/rust-url/issues/489
// LDH labels that start with a digit are allowed when they don't come after RTL label
if *after_rtl_label {
return false;
}
match first_char {
Some(c) if c.is_ascii() => {},
_ => { return false; }
};
// check that label is LDH
// https://tools.ietf.org/html/rfc5890#section-2.3.1
let mut last = chars.next();
loop {
match last {
Some(c) => {
if !c.is_ascii() {
return false;
}
last = chars.next();
}
_ => { break; }
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This loop doesn't really check if it is truly a LDH label. I think it would accept something like 3_4 as well (_ is ascii, but neither letter, digit or hyphen). Does something else check that in the existing code?

if last == Some('-') {
return false;
}
return true;
}

// Rule 1: Should start with L or R/AL
_ => {
return false;
Expand All @@ -233,16 +265,16 @@ fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
}

/// http://www.unicode.org/reports/tr46/#Validity_Criteria
fn validate_full(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
fn validate_full(label: &str, is_bidi_domain: bool, after_rtl_label: &mut bool, flags: Flags, errors: &mut Vec<Error>) {
// V1: Must be in NFC form.
if label.nfc().ne(label.chars()) {
errors.push(Error::ValidityCriteria);
} else {
validate(label, is_bidi_domain, flags, errors);
validate(label, is_bidi_domain, after_rtl_label, flags, errors);
}
}

fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
fn validate(label: &str, is_bidi_domain: bool, after_rtl_label: &mut bool, flags: Flags, errors: &mut Vec<Error>) {
let first_char = label.chars().next();
if first_char == None {
// Empty string, pass
Expand Down Expand Up @@ -287,7 +319,7 @@ fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Er
// V8: Bidi rules
//
// TODO: Add *CheckBidi* flag
else if !passes_bidi(label, is_bidi_domain)
else if !passes_bidi(label, is_bidi_domain, after_rtl_label)
{
errors.push(Error::ValidityCriteria);
}
Expand Down Expand Up @@ -330,6 +362,7 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {

let mut validated = String::new();
let mut first = true;
let mut after_rtl_label = false;
for label in normalized.split('.') {
if !first {
validated.push('.');
Expand All @@ -339,14 +372,14 @@ fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
Some(decoded_label) => {
let flags = Flags { transitional_processing: false, ..flags };
validate_full(&decoded_label, is_bidi_domain, flags, errors);
validate_full(&decoded_label, is_bidi_domain, &mut after_rtl_label, flags, errors);
validated.push_str(&decoded_label)
}
None => errors.push(Error::PunycodeError)
}
} else {
// `normalized` is already `NFC` so we can skip that check
validate(label, is_bidi_domain, flags, errors);
validate(label, is_bidi_domain, &mut after_rtl_label, flags, errors);
validated.push_str(label)
}
}
Expand Down
3 changes: 2 additions & 1 deletion idna/tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ fn test_v8_bidi_rules() {
assert_eq!(_to_ascii("אבּג.ابج").unwrap(), "xn--kdb3bdf.xn--mgbcm");

// Bidi domain names cannot start with digits
assert!(_to_ascii("0a.\u{05D0}").is_err());
assert!(_to_ascii("0a.\u{05D0}").is_ok());
assert!(_to_ascii("\u{05D0}.0a").is_err());
assert!(_to_ascii("0à.\u{05D0}").is_err());

// Bidi chars may be punycode-encoded
Expand Down
10 changes: 10 additions & 0 deletions idna/tests/uts46.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ use std::char;
use idna::uts46;
use test::TestFn;

const SKIP_TEST: &'static [&'static str] = &["0A.\\u05D0", "0a.\\u05D0", "0a.xn--4db"
,"1.걾6.𐱁\\u06D0", "1.걾6.𐱁\\u06D0","1.xn--6-945e.xn--glb1794k"];
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably explaining why these are skipped in a comment would be nice ‒ maybe linking to the issue?


pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
// http://www.unicode.org/Public/idna/latest/IdnaTest.txt
for (i, line) in include_str!("IdnaTest.txt").lines().enumerate() {
Expand Down Expand Up @@ -37,6 +40,13 @@ pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
let to_ascii = pieces.remove(0);
let nv8 = if pieces.len() > 0 { pieces.remove(0) } else { "" };

for skip in SKIP_TEST {
if original == *skip {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be done with SKIP_TEST.contains(original)?

expected_failure = true;
break;
}
}

if expected_failure {
continue;
}
Expand Down
3 changes: 3 additions & 0 deletions tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,9 @@ fn test_idna() {
assert!("http://goșu.ro".parse::<Url>().is_ok());
assert_eq!(Url::parse("http://☃.net/").unwrap().host(), Some(Host::Domain("xn--n3h.net")));
assert!("https://r2---sn-huoa-cvhl.googlevideo.com/crossdomain.xml".parse::<Url>().is_ok());
// https://github.com/servo/rust-url/issues/489
assert!("http://mail.163.com.xn----9mcjf9b4dbm09f.com/iloystgnjfrgthteawvo/indexx.php".parse::<Url>().is_ok());
assert!("http://mail.com.xn----9mcjf9b4dbm09f.163.com/iloystgnjfrgthteawvo/indexx.php".parse::<Url>().is_err());
}

#[test]
Expand Down