Skip to content

Commit

Permalink
Merge pull request #10017 from lrytz/t12478
Browse files Browse the repository at this point in the history
Error on source files with unicode directional formatting characters
  • Loading branch information
lrytz committed May 2, 2022
2 parents 78376ed + fd97b40 commit 21a5643
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
Expand Up @@ -806,7 +806,12 @@ trait Scanners extends ScannersCommon {
case ']' =>
nextChar(); token = RBRACKET
case SU =>
if (isAtEnd) token = EOF
if (isAtEnd) {
bidiChars.foreach { case (char, offset) =>
syntaxError(offset, f"found unicode bidirectional character '\\u$char%04x'; in a string or character literal, use a unicode escape instead")
}
token = EOF
}
else {
syntaxError("illegal character")
nextChar()
Expand Down
7 changes: 7 additions & 0 deletions src/compiler/scala/tools/nsc/util/CharArrayReader.scala
Expand Up @@ -13,6 +13,7 @@
package scala.tools.nsc
package util

import scala.collection.mutable.ListBuffer
import scala.reflect.internal.Chars._

trait CharArrayReaderData {
Expand Down Expand Up @@ -41,6 +42,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>

val buf: Array[Char]

val bidiChars: ListBuffer[(Int, Int)] = ListBuffer.empty

/** Advance one character; reducing CR;LF pairs to just LF */
final def nextChar(): Unit = {
if (charOffset >= buf.length) {
Expand All @@ -49,6 +52,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
val c = buf(charOffset)
ch = c
charOffset += 1
if (isBiDiCharacter(ch))
bidiChars.addOne((ch, charOffset))
if (ch < ' ') {
skipCR()
potentialLineEnd()
Expand All @@ -67,6 +72,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
val c = buf(charOffset)
ch = c
charOffset += 1
if (isBiDiCharacter(ch))
bidiChars.addOne((ch, charOffset))
}
}

Expand Down
8 changes: 8 additions & 0 deletions src/reflect/scala/reflect/internal/Chars.scala
Expand Up @@ -118,6 +118,14 @@ trait Chars {
'|' | '/' | '\\' => true
case c => isSpecial(c)
}

def isBiDiCharacter(c: Char): Boolean = (c: @switch) match {
case '\u061c' |
'\u200e' | '\u200f' |
'\u202a' | '\u202b' | '\u202c' | '\u202d' | '\u202e' |
'\u2066' | '\u2067' | '\u2068' | '\u2069' => true
case _ => false
}
}

object Chars extends Chars {
Expand Down
31 changes: 31 additions & 0 deletions test/files/neg/t12478.check
@@ -0,0 +1,31 @@
t12478.scala:3: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2069'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:7: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
cl‮ass C
^
t12478.scala:9: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
def a‮cb
^
t12478.scala:11: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
// comm‮tne
^
t12478.scala:13: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
"""te‮tx"""
^
t12478.scala:14: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
raw"""te‮tx"""
^
t12478.scala:16: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
val u202e = '‮'
^
10 errors
21 changes: 21 additions & 0 deletions test/files/neg/t12478.scala
@@ -0,0 +1,21 @@
object Test {
def isAdmin(accessLevel: String): Boolean =
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"

def שרה = 0 // no bidi override char, these characters are rtl

cl‮ass C

def a‮cb

// comm‮tne

"""te‮tx"""
raw"""te‮tx"""

val u202e = '‮'

def main(args: Array[String]): Unit = {
println(isAdmin("user"))
}
}
3 changes: 3 additions & 0 deletions test/files/run/t12478.check
@@ -0,0 +1,3 @@
ab‮dc‬
ab‮dc‬
Sarah
12 changes: 12 additions & 0 deletions test/files/run/t12478.scala
@@ -0,0 +1,12 @@
object Test {
val oks = "ab\u202edc\u202c"
val okc = '\u202e'

def שרה = "Sarah"

def main(args: Array[String]): Unit = {
println(oks)
println(s"ab${okc}dc\u202c")
println(שרה)
}
}

0 comments on commit 21a5643

Please sign in to comment.