Skip to content

Commit

Permalink
feat(intl-messageformat-parser): revamped quote rule (#134)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: This changes how we escape chars in messages, instead of `\` we now use apostrophe which is more aligned with ICU4J & ICU4C
  • Loading branch information
pyrocat101 authored and longlho committed Apr 26, 2020
1 parent 66f4422 commit 88c6aa7
Show file tree
Hide file tree
Showing 22 changed files with 904 additions and 221 deletions.
2 changes: 2 additions & 0 deletions packages/formatjs/packages/intl-messageformat-parser/build.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ import {
pluralOption: 'PluralOrSelectOption',
numberSkeleton: 'NumberSkeleton',
dateOrTimeSkeleton: 'DateSkeleton',
numberArgStyle: 'string | NumberSkeleton',
dateOrTimeArgStyle: 'string | DateSkeleton',
simpleFormatElement: `
| NumberElement
| DateElement
Expand Down
116 changes: 43 additions & 73 deletions packages/formatjs/packages/intl-messageformat-parser/src/parser.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,9 @@ messageElement
/ selectElement

messageText
= chunks:(_ chars _)+ {
return chunks.reduce(function (all, chunk) {
return all.concat(chunk)
}, []).join('')
= parts:(doubleApostrophes / quotedString / unquotedString)+ {
return parts.join('');
}
/ $(ws)

literalElement
= messageText:messageText {
Expand All @@ -45,19 +42,20 @@ literalElement
};
}

varName
= number
/ chars:quoteEscapedChar* { return chars.join(''); }
argName = $(number / keyword)

argumentElement 'argumentElement'
= '{' _ value:varName _ '}' {
= '{' _ value:argName _ '}' {
return {
type: TYPE.argument,
value,
...insertLocation()
}
}

numberSkeletonId 'numberSkeletonId'
= $(!(patternWhiteSpace / [\'\/{}]) .)+

numberSkeletonTokenOption 'numberSkeletonTokenOption'
= '/' option:numberSkeletonId { return option; }

Expand All @@ -68,20 +66,21 @@ numberSkeletonToken 'numberSkeletonToken'

// See also:
// https://github.com/unicode-org/icu/blob/master/docs/userguide/format_parse/numbers/skeletons.md
numberSkeleton = tokens:(numberSkeletonToken+) {
return {
type: SKELETON_TYPE.number,
tokens,
...insertLocation()
numberSkeleton
= tokens:(numberSkeletonToken+) {
return {
type: SKELETON_TYPE.number,
tokens,
...insertLocation()
}
}
}

numberArgStyle
= '::' skeleton:numberSkeleton { return skeleton; }
/ chars
/ keyword

numberFormatElement
= '{' _ value:varName _ ',' _ type:'number' _ style:(',' _ numberArgStyle)? _ '}' {
= '{' _ value:argName _ ',' _ type:'number' _ style:(',' _ numberArgStyle)? _ '}' {
return {
type : type === 'number' ? TYPE.number : type === 'date' ? TYPE.date : TYPE.time,
style : style && style[2],
Expand All @@ -90,35 +89,25 @@ numberFormatElement
};
}

// Starting with ICU 4.8, an ASCII apostrophe only starts quoted text if it immediately precedes
// a character that requires quoting (that is, "only where needed"), and works the same in
// nested messages as on the top level of the pattern. The new behavior is otherwise compatible.
// TODO: use this rule for message text literal.
quotedString = "'" escapedChar:([\{\}]) quotedChars:$([^'] / "''")+ "'" {
return escapedChar + quotedChars.replace(`''`, `'`);
}
doubleApostrophes = "''" { return `'`; }
unquotedString = matches:$(!("''" / '{' / '}' / "'{" "'}") .)+ { return matches; }

// See also:
// - http://cldr.unicode.org/translation/date-time-patterns
// - http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html
// Here we implement the ICU >= 4.8 quoting behavior.
dateOrTimeSkeleton
= parts:(quotedString / doubleApostrophes / unquotedString)+ {
= pattern:messageText {
return {
type: SKELETON_TYPE.date,
pattern: parts.join(''),
pattern,
...insertLocation(),
}
}

dateOrTimeArgStyle
= '::' skeleton:dateOrTimeSkeleton { return skeleton; }
/ chars
/ keyword

dateOrTimeFormatElement
= '{' _ value:varName _ ',' _ type:('date' / 'time') _ style:(',' _ dateOrTimeArgStyle)? _ '}' {
= '{' _ value:argName _ ',' _ type:('date' / 'time') _ style:(',' _ dateOrTimeArgStyle)? _ '}' {
return {
type : type === 'number' ? TYPE.number : type === 'date' ? TYPE.date : TYPE.time,
style : style && style[2],
Expand All @@ -127,10 +116,11 @@ dateOrTimeFormatElement
};
}

simpleFormatElement = numberFormatElement / dateOrTimeFormatElement
simpleFormatElement
= numberFormatElement / dateOrTimeFormatElement

pluralElement
= '{' _ value:varName _ ',' _ pluralType:('plural' / 'selectordinal') _ ',' _ offset:('offset:' _ number)? _ options:pluralOption+ _ '}' {
= '{' _ value:argName _ ',' _ pluralType:('plural' / 'selectordinal') _ ',' _ offset:('offset:' _ number)? _ options:pluralOption+ _ '}' {
return {
type : TYPE.plural,
pluralType: pluralType === 'plural' ? 'cardinal' : 'ordinal',
Expand All @@ -148,7 +138,7 @@ pluralElement
}

selectElement
= '{' _ value:varName _ ',' _ 'select' _ ',' _ options:selectOption+ _ '}' {
= '{' _ value:argName _ ',' _ 'select' _ ',' _ options:selectOption+ _ '}' {
return {
type : TYPE.select,
value,
Expand All @@ -164,18 +154,10 @@ selectElement
}

pluralRuleSelectValue
= '=' n:number {
return `=${n}`
}
/ 'zero'
/ 'one'
/ 'two'
/ 'few'
/ 'many'
/ 'other'
= $('=' number) / keyword

selectOption
= _ id:chars _ '{' value:message '}' {
= _ id:keyword _ '{' value:message '}' {
return {
id,
value,
Expand All @@ -194,39 +176,27 @@ pluralOption

// -- Helpers ------------------------------------------------------------------

ws 'whitespace' = [ \t\n\r]+
_ 'optionalWhitespace' = $(ws*)
// Equivalence of \p{Pattern_White_Space}
// See: https://github.com/mathiasbynens/unicode-11.0.0/blob/master/Binary_Property/Pattern_White_Space/regex.js
patternWhiteSpace = [\t-\r \x85\u200E\u200F\u2028\u2029]
// Equivalence of \p{Pattern_Syntax}
// See: https://github.com/mathiasbynens/unicode-11.0.0/blob/master/Binary_Property/Pattern_Syntax/regex.js
patternSyntax = [!-\/:-@\[-\^`\{-~\xA1-\xA7\xA9\xAB\xAC\xAE\xB0\xB1\xB6\xBB\xBF\xD7\xF7\u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E\u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F\u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]

digit = [0-9]
hexDigit = [0-9a-f]i
_ 'optional whitespace' = $(patternWhiteSpace*)

number = digits:digit+ {
number = digits:[0-9]+ {
return parseInt(digits.join(''), 10);
}

quoteEscapedChar =
!("'" / [ \t\n\r,.+={}#]) char:. { return char; }
/ "'" sequence:escape { return sequence; }

apostrophe 'apostrophe' = "'"
escape = [ \t\n\r,.+={}#] / apostrophe

char
=
"'" sequence:apostrophe { return sequence; }
/ [^{}\\\0-\x1F\x7f \t\n\r]
/ '\\\\' { return '\\'; }
/ '\\#' { return '\\#'; }
/ '\\{' { return '\u007B'; }
/ '\\}' { return '\u007D'; }
/ '\\u' digits:$(hexDigit hexDigit hexDigit hexDigit) {
return String.fromCharCode(parseInt(digits, 16));
}

chars = chars:char+ { return chars.join(''); }

// Equivalence of \p{Pattern_White_Space}
// See: https://github.com/mathiasbynens/unicode-11.0.0/blob/master/Binary_Property/Pattern_White_Space/regex.js
patternWhiteSpace = [\t-\r \x85\u200E\u200F\u2028\u2029];
doubleApostrophes 'double apostrophes' = "''" { return `'`; }
// Starting with ICU 4.8, an ASCII apostrophe only starts quoted text if it immediately precedes
// a character that requires quoting (that is, "only where needed"), and works the same in
// nested messages as on the top level of the pattern. The new behavior is otherwise compatible.
quotedString = "'" escapedChar:([{}]) quotedChars:$("''" / [^'])* "'" {
return escapedChar + quotedChars.replace(`''`, `'`);
}
unquotedString = $([^{}]);

numberSkeletonId 'numberSkeletonId' = $(!(patternWhiteSpace / [\'\/{}]) .)+;
keyword 'keyword' = $((!(patternWhiteSpace / patternSyntax) .)+)
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,14 @@ export interface BaseElement<T extends TYPE> {
export type LiteralElement = BaseElement<TYPE.literal>;
export type ArgumentElement = BaseElement<TYPE.argument>;

export interface SimpleFormatElement<T extends TYPE> extends BaseElement<T> {
style?: string;
export interface SimpleFormatElement<T extends TYPE, S extends Skeleton>
extends BaseElement<T> {
style?: string | S | null;
}

export type NumberElement = SimpleFormatElement<TYPE.number>;
export type DateElement = SimpleFormatElement<TYPE.date>;
export type TimeElement = SimpleFormatElement<TYPE.time>;
export type NumberElement = SimpleFormatElement<TYPE.number, NumberSkeleton>;
export type DateElement = SimpleFormatElement<TYPE.date, DateSkeleton>;
export type TimeElement = SimpleFormatElement<TYPE.time, DateSkeleton>;

export interface SelectOption {
id: string;
Expand Down Expand Up @@ -163,7 +164,7 @@ export function createLiteralElement(value: string): LiteralElement {

export function createNumberElement(
value: string,
style?: string
style?: string | null
): NumberElement {
return {
type: TYPE.number,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Array [
"type": 1,
},
"type": 3,
"value": 0,
"value": "0",
},
]
`;
Expand All @@ -21,7 +21,7 @@ Array [
"type": 1,
},
"type": 3,
"value": 0,
"value": "0",
},
]
`;
Expand All @@ -34,7 +34,7 @@ Array [
"type": 1,
},
"type": 3,
"value": 0,
"value": "0",
},
]
`;

0 comments on commit 88c6aa7

Please sign in to comment.