Skip to content

Commit

Permalink
Auto merge of rust-lang#75349 - nnethercote:tweak-confusable-idents-c…
Browse files Browse the repository at this point in the history
…hecking, r=petrochenkov

Tweak confusable idents checking

The confusable idents checking does some sub-optimal things with symbols.

r? @petrochenkov
cc @crlf0710
  • Loading branch information
bors committed Aug 10, 2020
2 parents d495ef5 + 0a597bd commit 770bd3d
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 65 deletions.
90 changes: 29 additions & 61 deletions src/librustc_lint/non_ascii_idents.rs
@@ -1,7 +1,7 @@
use crate::{EarlyContext, EarlyLintPass, LintContext};
use rustc_ast::ast;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::SymbolStr;
use rustc_span::symbol::Symbol;

declare_lint! {
pub NON_ASCII_IDENTS,
Expand Down Expand Up @@ -39,7 +39,6 @@ impl EarlyLintPass for NonAsciiIdents {
use rustc_span::Span;
use std::collections::BTreeMap;
use unicode_security::GeneralSecurityProfile;
use utils::CowBoxSymStr;

let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
let check_uncommon_codepoints =
Expand All @@ -58,6 +57,12 @@ impl EarlyLintPass for NonAsciiIdents {

let mut has_non_ascii_idents = false;
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();

// Sort by `Span` so that error messages make sense with respect to the
// order of identifier locations in the code.
let mut symbols: Vec<_> = symbols.iter().collect();
symbols.sort_by_key(|k| k.1);

for (symbol, &sp) in symbols.iter() {
let symbol_str = symbol.as_str();
if symbol_str.is_ascii() {
Expand All @@ -77,33 +82,34 @@ impl EarlyLintPass for NonAsciiIdents {
}

if has_non_ascii_idents && check_confusable_idents {
let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
let mut skeleton_map: FxHashMap<Symbol, (Symbol, Span, bool)> =
FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
let mut str_buf = String::new();
for (symbol, &sp) in symbols.iter() {
fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
use std::mem::replace;
use unicode_security::confusable_detection::skeleton;
buffer.clear();
buffer.extend(skeleton(symbol_str));
if *symbol_str == *buffer {
CowBoxSymStr::Interned(symbol_str.clone())
} else {
let owned = replace(buffer, String::new());
CowBoxSymStr::Owned(owned.into_boxed_str())
}
}
let mut skeleton_buf = String::new();

for (&symbol, &sp) in symbols.iter() {
use unicode_security::confusable_detection::skeleton;

let symbol_str = symbol.as_str();
let is_ascii = symbol_str.is_ascii();
let skeleton = calc_skeleton(&symbol_str, &mut str_buf);

// Get the skeleton as a `Symbol`.
skeleton_buf.clear();
skeleton_buf.extend(skeleton(&symbol_str));
let skeleton_sym = if *symbol_str == *skeleton_buf {
symbol
} else {
Symbol::intern(&skeleton_buf)
};

skeleton_map
.entry(skeleton)
.and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
.entry(skeleton_sym)
.and_modify(|(existing_symbol, existing_span, existing_is_ascii)| {
if !*existing_is_ascii || !is_ascii {
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
lint.build(&format!(
"identifier pair considered confusable between `{}` and `{}`",
existing_symbolstr, symbol_str
existing_symbol.as_str(),
symbol.as_str()
))
.span_label(
*existing_span,
Expand All @@ -113,12 +119,12 @@ impl EarlyLintPass for NonAsciiIdents {
});
}
if *existing_is_ascii && !is_ascii {
*existing_symbolstr = symbol_str.clone();
*existing_symbol = symbol;
*existing_span = sp;
*existing_is_ascii = is_ascii;
}
})
.or_insert((symbol_str, sp, is_ascii));
.or_insert((symbol, sp, is_ascii));
}
}

Expand Down Expand Up @@ -232,41 +238,3 @@ impl EarlyLintPass for NonAsciiIdents {
}
}
}

mod utils {
use rustc_span::symbol::SymbolStr;
use std::hash::{Hash, Hasher};
use std::ops::Deref;

pub(super) enum CowBoxSymStr {
Interned(SymbolStr),
Owned(Box<str>),
}

impl Deref for CowBoxSymStr {
type Target = str;

fn deref(&self) -> &str {
match self {
CowBoxSymStr::Interned(interned) => interned,
CowBoxSymStr::Owned(ref owned) => owned,
}
}
}

impl Hash for CowBoxSymStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&**self, state)
}
}

impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
#[inline]
fn eq(&self, other: &CowBoxSymStr) -> bool {
PartialEq::eq(&**self, &**other)
}
}

impl Eq for CowBoxSymStr {}
}
3 changes: 1 addition & 2 deletions src/librustc_session/parse.rs
Expand Up @@ -13,7 +13,6 @@ use rustc_span::hygiene::ExpnId;
use rustc_span::source_map::{FilePathMapping, SourceMap};
use rustc_span::{MultiSpan, Span, Symbol};

use std::collections::BTreeMap;
use std::path::PathBuf;
use std::str;

Expand Down Expand Up @@ -64,7 +63,7 @@ impl GatedSpans {
#[derive(Default)]
pub struct SymbolGallery {
/// All symbols occurred and their first occurrence span.
pub symbols: Lock<BTreeMap<Symbol, Span>>,
pub symbols: Lock<FxHashMap<Symbol, Span>>,
}

impl SymbolGallery {
Expand Down
Expand Up @@ -3,9 +3,11 @@
#![allow(uncommon_codepoints, non_upper_case_globals)]

const: usize = 42;
const s_s: usize = 42;

fn main() {
let s = "rust"; //~ ERROR identifier pair considered confusable
let s_s = "rust2"; //~ ERROR identifier pair considered confusable
not_affected();
}

Expand Down
@@ -1,5 +1,5 @@
error: identifier pair considered confusable between `s` and `s`
--> $DIR/lint-confusable-idents.rs:8:9
--> $DIR/lint-confusable-idents.rs:9:9
|
LL | const s: usize = 42;
| -- this is where the previous identifier occurred
Expand All @@ -13,5 +13,14 @@ note: the lint level is defined here
LL | #![deny(confusable_idents)]
| ^^^^^^^^^^^^^^^^^

error: aborting due to previous error
error: identifier pair considered confusable between `s_s` and `s_s`
--> $DIR/lint-confusable-idents.rs:10:9
|
LL | const s_s: usize = 42;
| --- this is where the previous identifier occurred
...
LL | let s_s = "rust2";
| ^^^^^

error: aborting due to 2 previous errors

0 comments on commit 770bd3d

Please sign in to comment.