From e58a8c3c50e585553eacd650a5f8752b5f3dcae0 Mon Sep 17 00:00:00 2001
From: snsmac <snsmac@users.noreply.github.com>
Date: Mon, 1 Aug 2022 20:38:40 +0200
Subject: [PATCH] syntax: \p{Sc} should map to \p{Currency_Symbol}

'sc' refers to the 'Currency_Symbol' general category, but is also
the abbreviation for the 'Script' property. So when going through the
canonicalization process, it would get normalized to 'Script' before
being checked as a general category. We fix it by special casing it.

See also #719

Fixes #835, #899
---
 regex-syntax/src/unicode.rs | 7 ++++++-
 tests/unicode.rs            | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs
index 8194d7f55..84e781db4 100644
--- a/regex-syntax/src/unicode.rs
+++ b/regex-syntax/src/unicode.rs
@@ -243,7 +243,12 @@ impl<'a> ClassQuery<'a> {
         // a general category. (Currently, we don't even support the
         // 'Case_Folding' property. But if we do in the future, users will be
         // required to spell it out.)
-        if norm != "cf" {
+        //
+        // Also 'sc' refers to the 'Currency_Symbol' general category, but is
+        // also the abbreviation for the 'Script' property. So we avoid calling
+        // 'canonical_prop' for it too, which would erroneously normalize it
+        // to 'Script'.
+        if norm != "cf" && norm != "sc" {
             if let Some(canon) = canonical_prop(&norm)? {
                 return Ok(CanonicalClassQuery::Binary(canon));
             }
diff --git a/tests/unicode.rs b/tests/unicode.rs
index 9b3228624..748bbb79c 100644
--- a/tests/unicode.rs
+++ b/tests/unicode.rs
@@ -77,6 +77,7 @@ mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4)));
 // See: https://github.com/rust-lang/regex/issues/719
 mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4)));
 mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4)));
+mat!(uni_class_gencat_format_abbrev3, r"\p{Sc}", "$", Some((0, 1)));
 mat!(
     uni_class_gencat_initial_punctuation,
     r"\p{Initial_Punctuation}",