Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For security, error on source files with Unicode directional formatting characters #10017

Merged
merged 1 commit into from May 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
Expand Up @@ -806,7 +806,12 @@ trait Scanners extends ScannersCommon {
case ']' =>
nextChar(); token = RBRACKET
case SU =>
if (isAtEnd) token = EOF
if (isAtEnd) {
bidiChars.foreach { case (char, offset) =>
syntaxError(offset, f"found unicode bidirectional character '\\u$char%04x'; in a string or character literal, use a unicode escape instead")
}
token = EOF
}
else {
syntaxError("illegal character")
nextChar()
Expand Down
7 changes: 7 additions & 0 deletions src/compiler/scala/tools/nsc/util/CharArrayReader.scala
Expand Up @@ -13,6 +13,7 @@
package scala.tools.nsc
package util

import scala.collection.mutable.ListBuffer
import scala.reflect.internal.Chars._

trait CharArrayReaderData {
Expand Down Expand Up @@ -41,6 +42,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>

val buf: Array[Char]

val bidiChars: ListBuffer[(Int, Int)] = ListBuffer.empty

/** Advance one character; reducing CR;LF pairs to just LF */
final def nextChar(): Unit = {
if (charOffset >= buf.length) {
Expand All @@ -49,6 +52,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
val c = buf(charOffset)
ch = c
charOffset += 1
if (isBiDiCharacter(ch))
bidiChars.addOne((ch, charOffset))
if (ch < ' ') {
skipCR()
potentialLineEnd()
Expand All @@ -67,6 +72,8 @@ abstract class CharArrayReader extends CharArrayReaderData { self =>
val c = buf(charOffset)
ch = c
charOffset += 1
if (isBiDiCharacter(ch))
bidiChars.addOne((ch, charOffset))
}
}

Expand Down
8 changes: 8 additions & 0 deletions src/reflect/scala/reflect/internal/Chars.scala
Expand Up @@ -118,6 +118,14 @@ trait Chars {
'|' | '/' | '\\' => true
case c => isSpecial(c)
}

def isBiDiCharacter(c: Char): Boolean = (c: @switch) match {
case '\u061c' |
'\u200e' | '\u200f' |
'\u202a' | '\u202b' | '\u202c' | '\u202d' | '\u202e' |
'\u2066' | '\u2067' | '\u2068' | '\u2069' => true
case _ => false
}
}

object Chars extends Chars {
Expand Down
31 changes: 31 additions & 0 deletions test/files/neg/t12478.check
@@ -0,0 +1,31 @@
t12478.scala:3: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2069'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:3: error: found unicode bidirectional character '\u2066'; in a string or character literal, use a unicode escape instead
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"
^
t12478.scala:7: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
cl‮ass C
^
t12478.scala:9: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
def a‮cb
^
t12478.scala:11: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
// comm‮tne
^
t12478.scala:13: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
"""te‮tx"""
^
t12478.scala:14: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
raw"""te‮tx"""
^
t12478.scala:16: error: found unicode bidirectional character '\u202e'; in a string or character literal, use a unicode escape instead
val u202e = '‮'
^
10 errors
21 changes: 21 additions & 0 deletions test/files/neg/t12478.scala
@@ -0,0 +1,21 @@
object Test {
def isAdmin(accessLevel: String): Boolean =
accessLevel != "user‮ ⁦// Check if admin⁩ ⁦"

def שרה = 0 // no bidi override char, these characters are rtl

cl‮ass C

def a‮cb

// comm‮tne

"""te‮tx"""
raw"""te‮tx"""

val u202e = '‮'

def main(args: Array[String]): Unit = {
println(isAdmin("user"))
}
}
3 changes: 3 additions & 0 deletions test/files/run/t12478.check
@@ -0,0 +1,3 @@
ab‮dc‬
ab‮dc‬
Sarah
12 changes: 12 additions & 0 deletions test/files/run/t12478.scala
@@ -0,0 +1,12 @@
object Test {
val oks = "ab\u202edc\u202c"
val okc = '\u202e'

def שרה = "Sarah"

def main(args: Array[String]): Unit = {
println(oks)
println(s"ab${okc}dc\u202c")
println(שרה)
}
}