From fd97b40579b8ec6ad297a7723d36557e4f45925f Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Tue, 26 Apr 2022 15:23:43 +0200 Subject: [PATCH] Error on source files with unicode directional formatting characters Don't allow characters with unicode property `Bidi_Class` in source files. --- .../scala/tools/nsc/ast/parser/Scanners.scala | 7 ++++- .../tools/nsc/util/CharArrayReader.scala | 7 +++++ .../scala/reflect/internal/Chars.scala | 8 +++++ test/files/neg/t12478.check | 31 +++++++++++++++++++ test/files/neg/t12478.scala | 21 +++++++++++++ test/files/run/t12478.check | 3 ++ test/files/run/t12478.scala | 12 +++++++ 7 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 test/files/neg/t12478.check create mode 100644 test/files/neg/t12478.scala create mode 100644 test/files/run/t12478.check create mode 100644 test/files/run/t12478.scala diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index 2b78697298a7..b3e034dae33d 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -806,7 +806,12 @@ trait Scanners extends ScannersCommon { case ']' => nextChar(); token = RBRACKET case SU => - if (isAtEnd) token = EOF + if (isAtEnd) { + bidiChars.foreach { case (char, offset) => + syntaxError(offset, f"found unicode bidirectional character '\\u$char%04x'; in a string or character literal, use a unicode escape instead") + } + token = EOF + } else { syntaxError("illegal character") nextChar() diff --git a/src/compiler/scala/tools/nsc/util/CharArrayReader.scala b/src/compiler/scala/tools/nsc/util/CharArrayReader.scala index 34bd50f7cc9d..21ceaa3a5620 100644 --- a/src/compiler/scala/tools/nsc/util/CharArrayReader.scala +++ b/src/compiler/scala/tools/nsc/util/CharArrayReader.scala @@ -13,6 +13,7 @@ package scala.tools.nsc package util +import scala.collection.mutable.ListBuffer import scala.reflect.internal.Chars._ trait CharArrayReaderData { @@ -41,6 +42,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self => val buf: Array[Char] + val bidiChars: ListBuffer[(Int, Int)] = ListBuffer.empty + /** Advance one character; reducing CR;LF pairs to just LF */ final def nextChar(): Unit = { if (charOffset >= buf.length) { @@ -49,6 +52,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self => val c = buf(charOffset) ch = c charOffset += 1 + if (isBiDiCharacter(ch)) + bidiChars.addOne((ch, charOffset)) if (ch < ' ') { skipCR() potentialLineEnd() @@ -67,6 +72,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self => val c = buf(charOffset) ch = c charOffset += 1 + if (isBiDiCharacter(ch)) + bidiChars.addOne((ch, charOffset)) } } diff --git a/src/reflect/scala/reflect/internal/Chars.scala b/src/reflect/scala/reflect/internal/Chars.scala index 19e7722a985b..5e476169fc89 100644 --- a/src/reflect/scala/reflect/internal/Chars.scala +++ b/src/reflect/scala/reflect/internal/Chars.scala @@ -118,6 +118,14 @@ trait Chars { '|' | '/' | '\\' => true case c => isSpecial(c) } + + def isBiDiCharacter(c: Char): Boolean = (c: @switch) match { + case '\u061c' | + '\u200e' | '\u200f' | + '\u202a' | '\u202b' | '\u202c' | '\u202d' | '\u202e' | + '\u2066' | '\u2067' | '\u2068' | '\u2069' => true + case _ => false + } } object Chars extends Chars { diff --git a/test/files/neg/t12478.check b/test/files/neg/t12478.check new file mode 100644 index 000000000000..69524f340444 --- /dev/null +++ b/test/files/neg/t12478.check @@ -0,0 +1,31 @@ +t12478.scala:3: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead + accessLevel != "user‮ ⁦// Check if admin⁩ ⁦" + ^ +t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead + accessLevel != "user‮ ⁦// Check if admin⁩ ⁦" + ^ +t12478.scala:3: error: found unicode bidirectional character '\u2069'; in a string or character literal, use a unicode escape instead + accessLevel != "user‮ ⁦// Check if admin⁩ ⁦" + ^ +t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead + accessLevel != "user‮ ⁦// Check if admin⁩ ⁦" + ^ +t12478.scala:7: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead + cl‮ass C + ^ +t12478.scala:9: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead + def a‮cb + ^ +t12478.scala:11: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead + // comm‮tne + ^ +t12478.scala:13: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead + """te‮tx""" + ^ +t12478.scala:14: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead + raw"""te‮tx""" + ^ +t12478.scala:16: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead + val u202e = '‮' + ^ +10 errors diff --git a/test/files/neg/t12478.scala b/test/files/neg/t12478.scala new file mode 100644 index 000000000000..1bc574d60256 --- /dev/null +++ b/test/files/neg/t12478.scala @@ -0,0 +1,21 @@ +object Test { + def isAdmin(accessLevel: String): Boolean = + accessLevel != "user‮ ⁦// Check if admin⁩ ⁦" + + def שרה = 0 // no bidi override char, these characters are rtl + + cl‮ass C + + def a‮cb + + // comm‮tne + + """te‮tx""" + raw"""te‮tx""" + + val u202e = '‮' + + def main(args: Array[String]): Unit = { + println(isAdmin("user")) + } +} diff --git a/test/files/run/t12478.check b/test/files/run/t12478.check new file mode 100644 index 000000000000..191a81f0b2eb --- /dev/null +++ b/test/files/run/t12478.check @@ -0,0 +1,3 @@ +ab‮dc‬ +ab‮dc‬ +Sarah diff --git a/test/files/run/t12478.scala b/test/files/run/t12478.scala new file mode 100644 index 000000000000..4fd52961c0ab --- /dev/null +++ b/test/files/run/t12478.scala @@ -0,0 +1,12 @@ +object Test { + val oks = "ab\u202edc\u202c" + val okc = '\u202e' + + def שרה = "Sarah" + + def main(args: Array[String]): Unit = { + println(oks) + println(s"ab${okc}dc\u202c") + println(שרה) + } +}