Skip to content

Commit

Permalink
Error on source files with unicode directional formatting characters
Browse files Browse the repository at this point in the history
Don't allow characters with unicode property `Bidi_Class` in source
files.
  • Loading branch information
lrytz committed Apr 27, 2022
1 parent 79a7324 commit fd97b40
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
Expand Up @@ -806,7 +806,12 @@ trait Scanners extends ScannersCommon {
case ']' =>
nextChar(); token = RBRACKET
case SU =>
if (isAtEnd) token = EOF
if (isAtEnd) {
bidiChars.foreach { case (char, offset) =>
syntaxError(offset, f"found unicode bidirectional character '\\u$char%04x'; in a string or character literal, use a unicode escape instead")
}
token = EOF
}
else {
syntaxError("illegal character")
nextChar()
Expand Down
7 changes: 7 additions & 0 deletions src/compiler/scala/tools/nsc/util/CharArrayReader.scala
Expand Up @@ -13,6 +13,7 @@
package scala.tools.nsc
package util

import scala.collection.mutable.ListBuffer
import scala.reflect.internal.Chars._

trait CharArrayReaderData {
Expand Down Expand Up @@ -41,6 +42,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>

val buf: Array[Char]

val bidiChars: ListBuffer[(Int, Int)] = ListBuffer.empty

/** Advance one character; reducing CR;LF pairs to just LF */
final def nextChar(): Unit = {
if (charOffset >= buf.length) {
Expand All @@ -49,6 +52,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
val c = buf(charOffset)
ch = c
charOffset += 1
if (isBiDiCharacter(ch))
bidiChars.addOne((ch, charOffset))
if (ch < ' ') {
skipCR()
potentialLineEnd()
Expand All @@ -67,6 +72,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
val c = buf(charOffset)
ch = c
charOffset += 1
if (isBiDiCharacter(ch))
bidiChars.addOne((ch, charOffset))
}
}

Expand Down
8 changes: 8 additions & 0 deletions src/reflect/scala/reflect/internal/Chars.scala
Expand Up @@ -118,6 +118,14 @@ trait Chars {
'|' | '/' | '\\' => true
case c => isSpecial(c)
}

def isBiDiCharacter(c: Char): Boolean = (c: @switch) match {
case '\u061c' |
'\u200e' | '\u200f' |
'\u202a' | '\u202b' | '\u202c' | '\u202d' | '\u202e' |
'\u2066' | '\u2067' | '\u2068' | '\u2069' => true
case _ => false
}
}

object Chars extends Chars {
Expand Down
31 changes: 31 additions & 0 deletions test/files/neg/t12478.check
@@ -0,0 +1,31 @@
t12478.scala:3: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2069'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:7: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
cl‮ass C
^
t12478.scala:9: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
def a‮cb
^
t12478.scala:11: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
// comm‮tne
^
t12478.scala:13: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
"""te‮tx"""
^
t12478.scala:14: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
raw"""te‮tx"""
^
t12478.scala:16: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
val u202e = '‮'
^
10 errors
21 changes: 21 additions & 0 deletions test/files/neg/t12478.scala
@@ -0,0 +1,21 @@
object Test {
def isAdmin(accessLevel: String): Boolean =
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"

def שרה = 0 // no bidi override char, these characters are rtl

cl‮ass C

def a‮cb

// comm‮tne

"""te‮tx"""
raw"""te‮tx"""

val u202e = '‮'

def main(args: Array[String]): Unit = {
println(isAdmin("user"))
}
}
3 changes: 3 additions & 0 deletions test/files/run/t12478.check
@@ -0,0 +1,3 @@
ab‮dc‬
ab‮dc‬
Sarah
12 changes: 12 additions & 0 deletions test/files/run/t12478.scala
@@ -0,0 +1,12 @@
object Test {
val oks = "ab\u202edc\u202c"
val okc = '\u202e'

def שרה = "Sarah"

def main(args: Array[String]): Unit = {
println(oks)
println(s"ab${okc}dc\u202c")
println(שרה)
}
}

0 comments on commit fd97b40

Please sign in to comment.