diff --git a/spec/01-lexical-syntax.md b/spec/01-lexical-syntax.md index 3dbed39d6806..005756b9cd1f 100644 --- a/spec/01-lexical-syntax.md +++ b/spec/01-lexical-syntax.md @@ -506,7 +506,7 @@ interpolatedString ::= alphaid ‘"’ {[‘\’] interpolatedStringPart | interpolatedStringPart ::= printableChar \ (‘"’ | ‘$’ | ‘\’) | escape escape ::= ‘$$’ | ‘$"’ - | ‘$’ id + | ‘$’ alphaid | ‘$’ BlockExpr alphaid ::= upper idrest | varid @@ -533,9 +533,9 @@ in an interpolated string. A single ‘$’-sign can still be obtained by doubli character: ‘$$’. A single ‘"’-sign can be obtained by the sequence ‘\$"’. The simpler form consists of a ‘$’-sign followed by an identifier starting with -a letter and followed only by letters, digits, and underscore characters, -e.g `$id`. The simpler form is expanded by putting braces around the identifier, -e.g `$id` is equivalent to `${id}`. In the following, unless we explicitly state otherwise, +a letter and followed only by letters, digits, and underscore characters, e.g., `$id`. +The simpler form is expanded by putting braces around the identifier, +e.g., `$id` is equivalent to `${id}`. In the following, unless we explicitly state otherwise, we assume that this expansion has already been performed. The expanded expression is type checked normally. Usually, `StringContext` will resolve to diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index 8010fd2756a0..a55e39f70608 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -182,22 +182,26 @@ trait Scanners extends ScannersCommon { private def isSupplementary(high: Char, test: Int => Boolean, strict: Boolean = true): Boolean = isHighSurrogate(high) && { var res = false - nextChar() - val low = ch + val low = lookaheadReader.getc() if (isLowSurrogate(low)) { - nextChar() - val codepoint = toCodePoint(high, low) - if (isValidCodePoint(codepoint) && test(codepoint)) { - putChar(high) - putChar(low) - res = true - } else - syntaxError(f"illegal character '\\u$high%04x\\u$low%04x'") - } else if (!strict) { + val codePoint = toCodePoint(high, low) + if (isValidCodePoint(codePoint)) { + if (test(codePoint)) { + putChar(high) + putChar(low) + nextChar() + nextChar() + res = true + } + } + else syntaxError(f"illegal character '\\u$high%04x\\u$low%04x'") + } + else if (!strict) { putChar(high) + nextChar() res = true - } else - syntaxError(f"illegal character '\\u$high%04x' missing low surrogate") + } + else syntaxError(f"illegal character '\\u$high%04x' missing low surrogate") res } private def atSupplementary(ch: Char, f: Int => Boolean): Boolean = @@ -621,8 +625,7 @@ trait Scanners extends ScannersCommon { putChar(ch) nextChar() getIdentRest() - if (ch == '"' && token == IDENTIFIER) - token = INTERPOLATIONID + if (ch == '"' && token == IDENTIFIER) token = INTERPOLATIONID case '<' => // is XMLSTART? def fetchLT() = { val last = if (charOffset >= 2) buf(charOffset - 2) else ' ' @@ -729,12 +732,31 @@ trait Scanners extends ScannersCommon { } syntaxError(msg) } + /** Either at closing quote of charlit + * or run the op and take it as a (deprecated) Symbol identifier. + */ + def charLitOrSymbolAfter(op: () => Unit): Unit = + if (ch == '\'') { + nextChar() + token = CHARLIT + setStrVal() + } else { + op() + token = SYMBOLLIT + strVal = name.toString + } def fetchSingleQuote() = { nextChar() - if (isIdentifierStart(ch)) - charLitOr(() => getIdentRest()) - else if (isOperatorPart(ch) && (ch != '\\')) - charLitOr(() => getOperatorRest()) + if (isIdentifierStart(ch)) { + putChar(ch) + nextChar() + charLitOrSymbolAfter(() => getIdentRest()) + } + else if (isOperatorPart(ch) && (ch != '\\')) { + putChar(ch) + nextChar() + charLitOrSymbolAfter(() => getOperatorRest()) + } else if (!isAtEnd && (ch != SU && ch != CR && ch != LF)) { val isEmptyCharLit = (ch == '\'') getLitChar() @@ -801,12 +823,16 @@ trait Scanners extends ScannersCommon { putChar(ch) nextChar() getIdentRest() + if (ch == '"' && token == IDENTIFIER) token = INTERPOLATIONID } else if (isSpecial(ch)) { putChar(ch) nextChar() getOperatorRest() } else if (isSupplementary(ch, isUnicodeIdentifierStart)) { getIdentRest() + if (ch == '"' && token == IDENTIFIER) token = INTERPOLATIONID + } else if (isSupplementary(ch, isSpecial)) { + getOperatorRest() } else { syntaxError(f"illegal character '\\u$ch%04x'") nextChar() @@ -872,7 +898,8 @@ trait Scanners extends ScannersCommon { putChar(ch) nextChar() getIdentOrOperatorRest() - case SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true! + case ' ' | LF | // optimize for common whitespace + SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true! finishNamed() case _ => if (isUnicodeIdentifierPart(ch)) { @@ -888,6 +915,7 @@ trait Scanners extends ScannersCommon { @tailrec private def getOperatorRest(): Unit = (ch: @switch) match { + case ' ' | LF => finishNamed() // optimize case '~' | '!' | '@' | '#' | '%' | '^' | '*' | '+' | '-' | '<' | '>' | '?' | ':' | '=' | '&' | @@ -899,24 +927,12 @@ trait Scanners extends ScannersCommon { else { putChar('/'); getOperatorRest() } case _ => if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() } + else if (isSupplementary(ch, isSpecial)) getOperatorRest() else finishNamed() } - private def getIdentOrOperatorRest(): Unit = { - if (isIdentifierPart(ch)) - getIdentRest() - else ch match { - case '~' | '!' | '@' | '#' | '%' | - '^' | '*' | '+' | '-' | '<' | - '>' | '?' | ':' | '=' | '&' | - '|' | '\\' | '/' => - getOperatorRest() - case _ => - if (isSpecial(ch)) getOperatorRest() - else finishNamed() - } - } - + private def getIdentOrOperatorRest(): Unit = + if (isIdentifierPart(ch) || isSupplementary(ch, isIdentifierPart)) getIdentRest() else getOperatorRest() // Literals ----------------------------------------------------------------- @@ -1040,10 +1056,6 @@ trait Scanners extends ScannersCommon { getInterpolatedIdentRest() } else if (atSupplementary(ch, isUnicodeIdentifierStart)) { finishStringPart() - putChar(ch) - nextRawChar() - putChar(ch) - nextRawChar() getInterpolatedIdentRest() } else { val expectations = "$$, $\", $identifier or ${expression}" @@ -1370,23 +1382,6 @@ trait Scanners extends ScannersCommon { if (detectedFloat) restOfNonIntegralNumber() else restOfNumber() } - /** Parse character literal if current character is followed by \', - * or follow with given op and return a symbol literal token - */ - def charLitOr(op: () => Unit): Unit = { - putChar(ch) - nextChar() - if (ch == '\'') { - nextChar() - token = CHARLIT - setStrVal() - } else { - op() - token = SYMBOLLIT - strVal = name.toString - } - } - // Errors ----------------------------------------------------------------- /** generate an error at the given offset */ diff --git a/src/reflect/scala/reflect/internal/Chars.scala b/src/reflect/scala/reflect/internal/Chars.scala index d34651078f4b..0e6778cb92a3 100644 --- a/src/reflect/scala/reflect/internal/Chars.scala +++ b/src/reflect/scala/reflect/internal/Chars.scala @@ -15,10 +15,10 @@ package reflect package internal import scala.annotation.switch -import java.lang.{ Character => JCharacter } /** Contains constants and classifier methods for characters */ trait Chars { + import Chars.CodePoint // Be very careful touching these. // Apparently trivial changes to the way you write these constants // will cause Scanners.scala to go from a nice efficient switch to @@ -72,28 +72,47 @@ trait Chars { '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' /** Can character start an alphanumeric Scala identifier? */ - def isIdentifierStart(c: Char): Boolean = - (c == '_') || (c == '$') || Character.isUnicodeIdentifierStart(c) + def isIdentifierStart(c: Char): Boolean = (c == '_') || (c == '$') || Character.isUnicodeIdentifierStart(c) + def isIdentifierStart(c: CodePoint): Boolean = (c == '_') || (c == '$') || Character.isUnicodeIdentifierStart(c) /** Can character form part of an alphanumeric Scala identifier? */ - def isIdentifierPart(c: Char) = - (c == '$') || Character.isUnicodeIdentifierPart(c) + def isIdentifierPart(c: Char) = (c == '$') || Character.isUnicodeIdentifierPart(c) + + def isIdentifierPart(c: CodePoint) = (c == '$') || Character.isUnicodeIdentifierPart(c) /** Is character a math or other symbol in Unicode? */ def isSpecial(c: Char) = { val chtp = Character.getType(c) chtp == Character.MATH_SYMBOL.toInt || chtp == Character.OTHER_SYMBOL.toInt } + def isSpecial(codePoint: CodePoint) = { + val chtp = Character.getType(codePoint) + chtp == Character.MATH_SYMBOL.toInt || chtp == Character.OTHER_SYMBOL.toInt + } + // used for precedence + import Character.{LOWERCASE_LETTER, UPPERCASE_LETTER, OTHER_LETTER, TITLECASE_LETTER, LETTER_NUMBER} private final val otherLetters = Set[Char]('\u0024', '\u005F') // '$' and '_' - private final val letterGroups = { - import JCharacter._ - Set[Byte](LOWERCASE_LETTER, UPPERCASE_LETTER, OTHER_LETTER, TITLECASE_LETTER, LETTER_NUMBER) - } - def isScalaLetter(ch: Char) = letterGroups(JCharacter.getType(ch).toByte) || otherLetters(ch) + private final val letterGroups = Set[Byte](LOWERCASE_LETTER, UPPERCASE_LETTER, OTHER_LETTER, TITLECASE_LETTER, LETTER_NUMBER) + def isScalaLetter(ch: Char) = letterGroups(Character.getType(ch).toByte) || otherLetters(ch) + def isScalaLetter(c: CodePoint): Boolean = + (Character.getType(c) match { + case LOWERCASE_LETTER | UPPERCASE_LETTER | OTHER_LETTER | TITLECASE_LETTER | LETTER_NUMBER => true + case _ => false + }) || (c match { + case '$' | '_' => true + case _ => false + }) /** Can character form part of a Scala operator name? */ - def isOperatorPart(c : Char) : Boolean = (c: @switch) match { + def isOperatorPart(c: Char): Boolean = (c: @switch) match { + case '~' | '!' | '@' | '#' | '%' | + '^' | '*' | '+' | '-' | '<' | + '>' | '?' | ':' | '=' | '&' | + '|' | '/' | '\\' => true + case c => isSpecial(c) + } + def isOperatorPart(c: CodePoint): Boolean = (c: @switch) match { case '~' | '!' | '@' | '#' | '%' | '^' | '*' | '+' | '-' | '<' | '>' | '?' | ':' | '=' | '&' | @@ -102,4 +121,6 @@ trait Chars { } } -object Chars extends Chars { } +object Chars extends Chars { + type CodePoint = Int +} diff --git a/src/reflect/scala/reflect/internal/Precedence.scala b/src/reflect/scala/reflect/internal/Precedence.scala index f63abd3d2f8f..fb42eb377b9b 100644 --- a/src/reflect/scala/reflect/internal/Precedence.scala +++ b/src/reflect/scala/reflect/internal/Precedence.scala @@ -10,26 +10,24 @@ * additional information regarding copyright ownership. */ -package scala -package reflect -package internal +package scala.reflect.internal import scala.annotation.switch -import Chars._ +import Chars.{isOperatorPart, isScalaLetter} final class Precedence private (val level: Int) extends AnyVal with Ordered[Precedence] { - def compare(that: Precedence): Int = level compare that.level + def compare(that: Precedence): Int = level.compare(that.level) override def toString = s"Precedence($level)" } - object Precedence extends (Int => Precedence) { + type CodePoint = Int private[this] val ErrorName = "" private def isAssignmentOp(name: String) = name match { case "!=" | "<=" | ">=" | "" => false - case _ => name.last == '=' && name.head != '=' && isOperatorPart(name.head) + case _ => name.last == '=' && name.head != '=' && isOperatorPart(name.codePointAt(0)) } - private def firstChar(ch: Char): Precedence = apply((ch: @switch) match { + private def firstChar(c: CodePoint): Precedence = apply((c: @switch) match { case '|' => 2 case '^' => 3 case '&' => 4 @@ -38,13 +36,13 @@ object Precedence extends (Int => Precedence) { case ':' => 7 case '+' | '-' => 8 case '*' | '/' | '%' => 9 - case _ => if (isScalaLetter(ch)) 1 else 10 + case _ => if (isScalaLetter(c)) 1 else 10 }) def apply(level: Int): Precedence = new Precedence(level) def apply(name: String): Precedence = name match { case "" | ErrorName => this(-1) case _ if isAssignmentOp(name) => this(0) - case _ => firstChar(name charAt 0) + case _ => firstChar(name.codePointAt(0)) } } diff --git a/test/files/run/t1406.scala b/test/files/run/t1406.scala index c027771716a8..97e50ba4c7fc 100644 --- a/test/files/run/t1406.scala +++ b/test/files/run/t1406.scala @@ -9,8 +9,27 @@ object Test extends DirectTest { // \u10428 isLetter and isLowerCase def U2 = "\ud801" def U3 = "\udc28" + // symbol operator So with supplementary char + def U4 = "\ud834" + def U5 = "\udd97" + //\ud83c\udf00 // cyclone 1f300 + def U6 = "\ud83c" + def U7 = "\udf00" + // rocket 1f680 + def U8 = "\ud83d" + def U9 = "\ude80" + // quintessence 1f700 + def UA = "\ud83d" + def UB = "\udf00" + + //val a_𝓅 = 1 + // 1d4c5 Mathematical Script Small P + // My Pomeranian, Padraig, was nicknamed Little P because of his little p. + def UC = "\ud835" + def UD = "\udcc5" + def code = - s"""class C { + s"""class Identifiers { | def x = "$U0" | def y = "$U1" | def `$U0` = x @@ -23,6 +42,29 @@ object Test extends DirectTest { | def g(x: Any) = x match { | case $U2$U3 @ _ => $U2$U3 | } + |} + |class Ops { + | def $U4$U5 = 42 // was error: illegal character + | def op_$U4$U5 = 42 // was error: illegal character + | def $U6$U7 = 42 + | def op_$U6$U7 = 42 + | def $U8$U9 = 42 + | def op_$U8$U9 = 42 + | def $UA$UB = 42 + | def op_$UA$UB = 42 + | def $UC$UD = 42 + | def op_$UC$UD = 42 + |} + |class Strings { + | implicit class Interps(sc: StringContext) { + | def $UC$UD(parts: Any*) = "done" + | } + | def $U4$U5 = 42 + | def op_$U4$U5 = 42 + | def interpolated = s"$$$$$U4$U5" // a lot of dollars for little sense + | def interpolated_op = s"$$$$$U4$U5" // a lot of dollars for little sense + | def e = "a $UC$UD b" + | def f = $UC$UD"one" |}""".stripMargin def show(): Unit = { diff --git a/test/files/run/t1406b.check b/test/files/run/t1406b.check index 407e44adf89d..50a0e9217169 100644 --- a/test/files/run/t1406b.check +++ b/test/files/run/t1406b.check @@ -1,6 +1,9 @@ -newSource1.scala:4: error: illegal character '\ud801' missing low surrogate - def ? = x - ^ -newSource1.scala:5: error: illegal character '\udc00' - def ? = y - ^ +C(84) +C(1764) +C(1764) +C(1806) +C(1806) +C(3528) +C(3528) +C(1806) +C(3528) diff --git a/test/files/run/t1406b.scala b/test/files/run/t1406b.scala index bd1868a642fb..ff16cd296478 100644 --- a/test/files/run/t1406b.scala +++ b/test/files/run/t1406b.scala @@ -1,22 +1,23 @@ -import scala.tools.partest.DirectTest - -object Test extends DirectTest { - // for reference, UTF-8 of U0 - //val data = Array(0xed, 0xa0, 0x81).map(_.asInstanceOf[Byte]) - def U0 = "\ud801" - def U1 = "\udc00" - def code = - s"""class C { - | def x = "$U0" - | def y = "$U1" - | def $U0 = x - | def $U1 = y - |}""".stripMargin - - def show(): Unit = { - assert(U0.length == 1) - assert(!compile()) - } +case class C(n: Int) { + def 𐀀(c: C): C = C(n * c.n) // actually a letter but supplementary 0x10000 + def ☀(c: C): C = C(n * c.n) // just a symbol + def ☀=(c: C): C = C(n * c.n) // just a symbol + def 🌀(c: C): C = C(n * c.n) // cyclone operator is symbol, supplementary + def 🌀=(c: C): C = C(n * c.n) // cyclone operator is symbol, supplementary + def *(c: C): C = C(n * c.n) + def +(c: C): C = C(n + c.n) +} +object Test extends App { + val c, d = C(42) + println(c + d) + println(c * d) + println(c ☀ d) + println(c * d + d) + println(c ☀ d + d) + println(c ☀= d + d) // assignment op is low precedence + println(c 𐀀 d + d) // the first one, letter should be low precedence + println(c 🌀d + d) // the second one, cyclone should be high precedence + println(c 🌀= d + d) // the second one, cyclone should be high precedence }