From 7ced0618184975f6c1767f046346bb88d1b646cd Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Thu, 10 Feb 2022 19:02:48 +0000 Subject: [PATCH 1/7] Allow string type to be controlled from the public API --- tests/test_api.py | 20 ++++++++++++++++++++ tomlkit/api.py | 22 +++++++++++++++++++--- tomlkit/items.py | 15 +++++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 88b9772..5495cbb 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -392,3 +392,23 @@ def test_create_super_table_with_table(): def test_create_super_table_with_aot(): data = {"foo": {"bar": [{"a": 1}]}} assert dumps(data) == "[[foo.bar]]\na = 1\n" + + +@pytest.mark.parametrize( + "kwargs, example, expected", + [ + ({}, "My\nString\u0001", '"My\\nString\\u0001"'), + ({"escape": False}, "My String\u0001", '"My String\u0001"'), + ({"single_quotes": True}, "My\nString", "'My\\nString'"), + ({"multiline": True}, "\nMy\nString\n", '"""\nMy\nString\n"""'), + ({"multiline": True, "single_quotes": True}, "My\nString", "'''My\nString'''"), + ( + {"multiline": True, "single_quotes": True, "escape": True}, + "My\nString", + "'''My\\nString'''" + ), + ] +) +def test_create_string_with_different_types(kwargs, example, expected): + value = tomlkit.string(example, **kwargs) + assert value.as_string() == expected diff --git a/tomlkit/api.py b/tomlkit/api.py index c4cc07f..3687f86 100644 --- a/tomlkit/api.py +++ b/tomlkit/api.py @@ -23,6 +23,7 @@ from .items import Key from .items import SingleKey from .items import String +from .items import StringType as _StringType from .items import Table from .items import Time from .items import Trivia @@ -104,9 +105,24 @@ def boolean(raw: str) -> Bool: return item(raw == "true") -def string(raw: str) -> String: - """Create a string item.""" - return item(raw) +def string( + raw: str, + *, + single_quotes: bool = False, + multiline: bool = False, + escape: Union[None, bool] = None +) -> String: + """Create a string item. + + Boolean flags (e.g. ``single_quotes=True`` and/or ``multiline=True``) + can be used for personalization. + + By default, single line strings are escaped, but multi line strings are not. + This can be controlled by explicitly setting ``escape``. + """ + escape = not(multiline) if escape is None else escape + type_ = _StringType.select(single_quotes, multiline) + return String.from_raw(raw, type_, escape) def date(raw: str) -> Date: diff --git a/tomlkit/items.py b/tomlkit/items.py index c1eac02..fab60fc 100644 --- a/tomlkit/items.py +++ b/tomlkit/items.py @@ -166,6 +166,15 @@ class StringType(Enum): # Multi Line Literal MLL = "'''" + @classmethod + def select(cls, single_quotes=False, multiline=False) -> "StringType": + return { + (False, False): cls.SLB, + (False, True): cls.MLB, + (True, False): cls.SLL, + (True, True): cls.MLL, + }[(single_quotes, multiline)] + @property @lru_cache(maxsize=None) def unit(self) -> str: @@ -1512,6 +1521,12 @@ def _new(self, result): def _getstate(self, protocol=3): return self._t, str(self), self._original, self._trivia + @classmethod + def from_raw(cls, value: str, type_=StringType.SLB, escape=False) -> "String": + string_value = escape_string(value) if escape else value + + return cls(type_, decode(value), string_value, Trivia()) + class AoT(Item, _CustomList): """ From 3981f508b0a7dd67c0b796046b56eb8dfa30da74 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Thu, 10 Feb 2022 19:09:02 +0000 Subject: [PATCH 2/7] Avoid double bookkeeping for String creation logic in 'items' --- tomlkit/items.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tomlkit/items.py b/tomlkit/items.py index fab60fc..5b29f10 100644 --- a/tomlkit/items.py +++ b/tomlkit/items.py @@ -124,9 +124,7 @@ def item( return a elif isinstance(value, str): - escaped = escape_string(value) - - return String(StringType.SLB, decode(value), escaped, Trivia()) + return String.from_raw(value, escape=True) elif isinstance(value, datetime): return DateTime( value.year, From 457aebdd998c1b5ff5ced72b864b2259fadce8b2 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Thu, 10 Feb 2022 19:11:03 +0000 Subject: [PATCH 3/7] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 949185a..eb91c6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ - Fix the only child detection when creating tables. ([#175](https://github.com/sdispater/tomlkit/issues/175)) - Include the `docs/` directory and `CHANGELOG.md` in sdist tarball. ([#176](https://github.com/sdispater/tomlkit/issues/176)) +### Added + +- Add keyword arguments to `string` API to allow selecting the representation type. ([#177](https://github.com/sdispater/tomlkit/pull/177)) + ## [0.9.2] - 2022-02-08 ### Changed From e773d00129d232d2508e09bdb8856cf43295c19e Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Thu, 10 Feb 2022 19:14:57 +0000 Subject: [PATCH 4/7] Fix linting errors --- tests/test_api.py | 4 ++-- tomlkit/api.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 5495cbb..ee02995 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -405,9 +405,9 @@ def test_create_super_table_with_aot(): ( {"multiline": True, "single_quotes": True, "escape": True}, "My\nString", - "'''My\\nString'''" + "'''My\\nString'''", ), - ] + ], ) def test_create_string_with_different_types(kwargs, example, expected): value = tomlkit.string(example, **kwargs) diff --git a/tomlkit/api.py b/tomlkit/api.py index 3687f86..aeb34ad 100644 --- a/tomlkit/api.py +++ b/tomlkit/api.py @@ -110,7 +110,7 @@ def string( *, single_quotes: bool = False, multiline: bool = False, - escape: Union[None, bool] = None + escape: Union[None, bool] = None, ) -> String: """Create a string item. @@ -120,7 +120,7 @@ def string( By default, single line strings are escaped, but multi line strings are not. This can be controlled by explicitly setting ``escape``. """ - escape = not(multiline) if escape is None else escape + escape = (not multiline) if escape is None else escape type_ = _StringType.select(single_quotes, multiline) return String.from_raw(raw, type_, escape) From cbf6b4e07d0c07297a446a4cf4872832c6b77833 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Sat, 12 Feb 2022 12:13:40 +0000 Subject: [PATCH 5/7] Improve escaping for constructing strings via API --- tests/test_api.py | 40 ++++++++++++++++++++++++++++------- tomlkit/_utils.py | 53 +++++++++++++++++++++++++++++++++++------------ tomlkit/api.py | 24 ++++++++++++++------- tomlkit/items.py | 36 +++++++++++++++++++++++++++----- 4 files changed, 121 insertions(+), 32 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index ee02995..75704bc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -394,21 +394,47 @@ def test_create_super_table_with_aot(): assert dumps(data) == "[[foo.bar]]\na = 1\n" +@pytest.mark.parametrize( + "kwargs, example, expected", + [ + ({}, "My\nString", '"My\\nString"'), + ({"literal": True}, "My\nString", "'My\\nString'"), + ({"multiline": True}, "\nMy\nString\n", '"""\nMy\nString\n"""'), + ({"multiline": True, "literal": True}, "My\nString", "'''My\nString'''"), + ], +) +def test_create_string_with_different_types(kwargs, example, expected): + value = tomlkit.string(example, **kwargs) + assert value.as_string() == expected + + @pytest.mark.parametrize( "kwargs, example, expected", [ ({}, "My\nString\u0001", '"My\\nString\\u0001"'), ({"escape": False}, "My String\u0001", '"My String\u0001"'), - ({"single_quotes": True}, "My\nString", "'My\\nString'"), - ({"multiline": True}, "\nMy\nString\n", '"""\nMy\nString\n"""'), - ({"multiline": True, "single_quotes": True}, "My\nString", "'''My\nString'''"), + ({"escape": False, "literal": True}, "My'String", "'My'String'"), + ({"multiline": True}, 'My"""String', '"""My""\\"String"""'), + ({"multiline": True, "literal": True}, "My'''String", "'''My''\\'String'''"), + ( + {"multiline": True}, + '"""My"""Str"""ing"""', + '"""""\\"My""\\"Str""\\"ing""\\""""', + ), + # Examples from standard + ( + {"literal": True}, + r"C:\Users\nodejs\templates", + r"'C:\Users\nodejs\templates'", + ), + ({"literal": True}, r"<\i\c*\s*>", r"'<\i\c*\s*>'"), ( - {"multiline": True, "single_quotes": True, "escape": True}, - "My\nString", - "'''My\\nString'''", + {"multiline": True, "literal": True}, + r"I [dw]on't need \d{2} apples", + r"'''I [dw]on't need \d{2} apples'''", ), ], ) -def test_create_string_with_different_types(kwargs, example, expected): +def test_create_string_escaping(kwargs, example, expected): value = tomlkit.string(example, **kwargs) assert value.as_string() == expected diff --git a/tomlkit/_utils.py b/tomlkit/_utils.py index f3fa49f..0cec157 100644 --- a/tomlkit/_utils.py +++ b/tomlkit/_utils.py @@ -6,6 +6,7 @@ from datetime import time from datetime import timedelta from datetime import timezone +from typing import Collection from typing import Union from ._compat import decode @@ -97,31 +98,57 @@ def parse_rfc3339(string: str) -> Union[datetime, date, time]: raise ValueError("Invalid RFC 339 string") -_escaped = {"b": "\b", "t": "\t", "n": "\n", "f": "\f", "r": "\r", '"': '"', "\\": "\\"} -_escapes = {v: k for k, v in _escaped.items()} - - -def escape_string(s: str) -> str: +# https://toml.io/en/v1.0.0#string +CONTROL_CHARS = frozenset(chr(c) for c in range(0x20)) | {chr(0x7F)} +_escaped = { + "b": "\b", + "t": "\t", + "n": "\n", + "f": "\f", + "r": "\r", + '"': '"', + "\\": "\\", +} +_compact_escapes = {v: k for k, v in _escaped.items()} +_basic_escapes = frozenset(CONTROL_CHARS | {'"', "\\"}) +_escaped_sequences = { + '"""': '""\\"', + "'''": "''\\'", +} + + +def escape_string( + s: str, + escape_chars: Collection[str] = _basic_escapes, + escape_sequences: Collection[str] = (), +) -> str: s = decode(s) res = [] start = 0 + l = len(s) - def flush(): + def flush(inc=1): if start != i: res.append(s[start:i]) - return i + 1 + return i + inc i = 0 - while i < len(s): + while i < l: c = s[i] - if c in '"\\\n\r\t\b\f': + if c in escape_chars: start = flush() - res.append("\\" + _escapes[c]) - elif ord(c) < 0x20: - start = flush() - res.append("\\u%04x" % ord(c)) + if c in _compact_escapes: + res.append("\\" + _compact_escapes[c]) + else: + res.append("\\u%04x" % ord(c)) + for seq in escape_sequences: + seq_len = len(seq) + if s[i:].startswith(seq): + start = flush(seq_len) + res.append(_escaped_sequences[seq]) + i += seq_len - 1 # fast-forward escape sequence i += 1 flush() diff --git a/tomlkit/api.py b/tomlkit/api.py index aeb34ad..ecd1c41 100644 --- a/tomlkit/api.py +++ b/tomlkit/api.py @@ -108,20 +108,30 @@ def boolean(raw: str) -> Bool: def string( raw: str, *, - single_quotes: bool = False, + literal: bool = False, multiline: bool = False, - escape: Union[None, bool] = None, + escape: bool = True, ) -> String: """Create a string item. - Boolean flags (e.g. ``single_quotes=True`` and/or ``multiline=True``) + Boolean flags (e.g. ``literal=True`` and/or ``multiline=True``) can be used for personalization. - By default, single line strings are escaped, but multi line strings are not. - This can be controlled by explicitly setting ``escape``. + By default, common escaping rules will be applied so strings are valid + according to the TOML spec. + + This can be controlled by explicitly setting ``escape=False``. + Please note that, if you disable escaping, you will have to make sure that + the given strings don't contain any forbidden character or sequence. + + Also note that, although escaping is done even when ``literal=True``, to + prevent invalid TOML, TOML parsers will interpret literal and basic + strings in a different way. + + For more information, please check the spec: + `https://toml.io/en/v1.0.0#string`_. """ - escape = (not multiline) if escape is None else escape - type_ = _StringType.select(single_quotes, multiline) + type_ = _StringType.select(literal, multiline) return String.from_raw(raw, type_, escape) diff --git a/tomlkit/items.py b/tomlkit/items.py index 5b29f10..8d471b5 100644 --- a/tomlkit/items.py +++ b/tomlkit/items.py @@ -11,6 +11,7 @@ from functools import lru_cache from typing import TYPE_CHECKING from typing import Any +from typing import Collection from typing import Dict from typing import Iterable from typing import Iterator @@ -23,6 +24,7 @@ from ._compat import PY38 from ._compat import decode +from ._utils import CONTROL_CHARS from ._utils import escape_string from .toml_char import TOMLChar @@ -124,7 +126,7 @@ def item( return a elif isinstance(value, str): - return String.from_raw(value, escape=True) + return String.from_raw(value) elif isinstance(value, datetime): return DateTime( value.year, @@ -165,13 +167,36 @@ class StringType(Enum): MLL = "'''" @classmethod - def select(cls, single_quotes=False, multiline=False) -> "StringType": + def select(cls, literal=False, multiline=False) -> "StringType": return { (False, False): cls.SLB, (False, True): cls.MLB, (True, False): cls.SLL, (True, True): cls.MLL, - }[(single_quotes, multiline)] + }[(literal, multiline)] + + @property + def escaped_chars(self) -> Collection[str]: + # https://toml.io/en/v1.0.0#string + escaped_in_basic = CONTROL_CHARS | {"\\"} + forbidden_in_literal = CONTROL_CHARS - {"\t"} + allowed_in_multiline = {"\t", "\n", "\r"} + return { + StringType.SLB: escaped_in_basic | {'"'}, + StringType.MLB: escaped_in_basic - allowed_in_multiline, + StringType.SLL: forbidden_in_literal | {"'"}, + StringType.MLL: forbidden_in_literal - allowed_in_multiline, + }[self] + + @property + def escaped_sequences(self) -> Collection[str]: + # https://toml.io/en/v1.0.0#string + return { + StringType.SLB: (), + StringType.MLB: ('"""',), + StringType.SLL: (), + StringType.MLL: ("'''",), + }[self] @property @lru_cache(maxsize=None) @@ -1520,8 +1545,9 @@ def _getstate(self, protocol=3): return self._t, str(self), self._original, self._trivia @classmethod - def from_raw(cls, value: str, type_=StringType.SLB, escape=False) -> "String": - string_value = escape_string(value) if escape else value + def from_raw(cls, value: str, type_=StringType.SLB, escape=True) -> "String": + escape_args = (type_.escaped_chars, type_.escaped_sequences) + string_value = escape_string(value, *escape_args) if escape else value return cls(type_, decode(value), string_value, Trivia()) From f92a3bc86ea77718c50d5d899ee68c9bcffb3f90 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Sat, 12 Feb 2022 12:58:44 +0000 Subject: [PATCH 6/7] Ensure single line literal string doesn't contain "'" --- tests/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_api.py b/tests/test_api.py index 75704bc..7faf4f5 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -412,6 +412,7 @@ def test_create_string_with_different_types(kwargs, example, expected): "kwargs, example, expected", [ ({}, "My\nString\u0001", '"My\\nString\\u0001"'), + ({"literal": True}, "My'String", "'My\\u0027String'"), ({"escape": False}, "My String\u0001", '"My String\u0001"'), ({"escape": False, "literal": True}, "My'String", "'My'String'"), ({"multiline": True}, 'My"""String', '"""My""\\"String"""'), From 53219e93d75f3aa99a4f9e16eb7f70cfd40ec230 Mon Sep 17 00:00:00 2001 From: Anderson Bravalheri Date: Sun, 13 Feb 2022 11:56:03 +0000 Subject: [PATCH 7/7] Fail if literal strings have invalid chars Previously invalid chars were being escaped for the sake of not having invalid strings. It is preferable instead to explicitly fail. --- tests/test_api.py | 57 ++++++++++++++++++++++++++++--------------- tomlkit/_utils.py | 26 +++++++------------- tomlkit/api.py | 14 +++-------- tomlkit/exceptions.py | 10 ++++++++ tomlkit/items.py | 32 +++++++++++++++--------- 5 files changed, 81 insertions(+), 58 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 7faf4f5..3a27db4 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -21,6 +21,7 @@ from tomlkit.exceptions import InvalidDateError from tomlkit.exceptions import InvalidDateTimeError from tomlkit.exceptions import InvalidNumberError +from tomlkit.exceptions import InvalidStringError from tomlkit.exceptions import InvalidTimeError from tomlkit.exceptions import UnexpectedCharError from tomlkit.items import AoT @@ -398,31 +399,31 @@ def test_create_super_table_with_aot(): "kwargs, example, expected", [ ({}, "My\nString", '"My\\nString"'), - ({"literal": True}, "My\nString", "'My\\nString'"), - ({"multiline": True}, "\nMy\nString\n", '"""\nMy\nString\n"""'), - ({"multiline": True, "literal": True}, "My\nString", "'''My\nString'''"), - ], -) -def test_create_string_with_different_types(kwargs, example, expected): - value = tomlkit.string(example, **kwargs) - assert value.as_string() == expected - - -@pytest.mark.parametrize( - "kwargs, example, expected", - [ - ({}, "My\nString\u0001", '"My\\nString\\u0001"'), - ({"literal": True}, "My'String", "'My\\u0027String'"), + ({"escape": False}, "My String\t", '"My String\t"'), + ({"literal": True}, "My String\t", "'My String\t'"), + ({"escape": True, "literal": True}, "My String\t", "'My String\t'"), + ({}, "My String\u0001", '"My String\\u0001"'), + ({}, "My String\u000b", '"My String\\u000b"'), + ({}, "My String\x08", '"My String\\b"'), + ({}, "My String\x0c", '"My String\\f"'), + ({}, "My String\x01", '"My String\\u0001"'), + ({}, "My String\x06", '"My String\\u0006"'), + ({}, "My String\x12", '"My String\\u0012"'), + ({}, "My String\x7f", '"My String\\u007f"'), ({"escape": False}, "My String\u0001", '"My String\u0001"'), - ({"escape": False, "literal": True}, "My'String", "'My'String'"), + ({"multiline": True}, "\nMy\nString\n", '"""\nMy\nString\n"""'), + ({"multiline": True}, 'My"String', '"""My"String"""'), + ({"multiline": True}, 'My""String', '"""My""String"""'), ({"multiline": True}, 'My"""String', '"""My""\\"String"""'), - ({"multiline": True, "literal": True}, "My'''String", "'''My''\\'String'''"), + ({"multiline": True}, 'My""""String', '"""My""\\""String"""'), ( {"multiline": True}, '"""My"""Str"""ing"""', '"""""\\"My""\\"Str""\\"ing""\\""""', ), - # Examples from standard + ({"multiline": True, "literal": True}, "My\nString", "'''My\nString'''"), + ({"multiline": True, "literal": True}, "My'String", "'''My'String'''"), + ({"multiline": True, "literal": True}, "My\r\nString", "'''My\r\nString'''"), ( {"literal": True}, r"C:\Users\nodejs\templates", @@ -436,6 +437,24 @@ def test_create_string_with_different_types(kwargs, example, expected): ), ], ) -def test_create_string_escaping(kwargs, example, expected): +def test_create_string(kwargs, example, expected): value = tomlkit.string(example, **kwargs) assert value.as_string() == expected + + +@pytest.mark.parametrize( + "kwargs, example", + [ + ({"literal": True}, "My'String"), + ({"literal": True}, "My\nString"), + ({"literal": True}, "My\r\nString"), + ({"literal": True}, "My\bString"), + ({"literal": True}, "My\x08String"), + ({"literal": True}, "My\x0cString"), + ({"literal": True}, "My\x7fString"), + ({"multiline": True, "literal": True}, "My'''String"), + ], +) +def test_create_string_with_invalid_characters(kwargs, example): + with pytest.raises(InvalidStringError): + tomlkit.string(example, **kwargs) diff --git a/tomlkit/_utils.py b/tomlkit/_utils.py index 0cec157..5c8113f 100644 --- a/tomlkit/_utils.py +++ b/tomlkit/_utils.py @@ -109,19 +109,18 @@ def parse_rfc3339(string: str) -> Union[datetime, date, time]: '"': '"', "\\": "\\", } -_compact_escapes = {v: k for k, v in _escaped.items()} -_basic_escapes = frozenset(CONTROL_CHARS | {'"', "\\"}) -_escaped_sequences = { +_compact_escapes = { + **{v: f"\\{k}" for k, v in _escaped.items()}, '"""': '""\\"', - "'''": "''\\'", } +_basic_escapes = CONTROL_CHARS | {'"'} -def escape_string( - s: str, - escape_chars: Collection[str] = _basic_escapes, - escape_sequences: Collection[str] = (), -) -> str: +def _unicode_escape(seq: str) -> str: + return "".join(f"\\u{ord(c):04x}" for c in seq) + + +def escape_string(s: str, escape_sequences: Collection[str] = _basic_escapes) -> str: s = decode(s) res = [] @@ -136,18 +135,11 @@ def flush(inc=1): i = 0 while i < l: - c = s[i] - if c in escape_chars: - start = flush() - if c in _compact_escapes: - res.append("\\" + _compact_escapes[c]) - else: - res.append("\\u%04x" % ord(c)) for seq in escape_sequences: seq_len = len(seq) if s[i:].startswith(seq): start = flush(seq_len) - res.append(_escaped_sequences[seq]) + res.append(_compact_escapes.get(seq) or _unicode_escape(seq)) i += seq_len - 1 # fast-forward escape sequence i += 1 diff --git a/tomlkit/api.py b/tomlkit/api.py index ecd1c41..273efc5 100644 --- a/tomlkit/api.py +++ b/tomlkit/api.py @@ -114,22 +114,16 @@ def string( ) -> String: """Create a string item. - Boolean flags (e.g. ``literal=True`` and/or ``multiline=True``) + By default, this function will create *single line basic* strings, but + boolean flags (e.g. ``literal=True`` and/or ``multiline=True``) can be used for personalization. - By default, common escaping rules will be applied so strings are valid - according to the TOML spec. + For more information, please check the spec: `https://toml.io/en/v1.0.0#string`_. + Common escaping rules will be applied for basic strings. This can be controlled by explicitly setting ``escape=False``. Please note that, if you disable escaping, you will have to make sure that the given strings don't contain any forbidden character or sequence. - - Also note that, although escaping is done even when ``literal=True``, to - prevent invalid TOML, TOML parsers will interpret literal and basic - strings in a different way. - - For more information, please check the spec: - `https://toml.io/en/v1.0.0#string`_. """ type_ = _StringType.select(literal, multiline) return String.from_raw(raw, type_, escape) diff --git a/tomlkit/exceptions.py b/tomlkit/exceptions.py index 66370db..6c2c7a1 100644 --- a/tomlkit/exceptions.py +++ b/tomlkit/exceptions.py @@ -1,3 +1,4 @@ +from typing import Collection from typing import Optional @@ -213,3 +214,12 @@ def __init__(self, line: int, col: int, char: int, type: str) -> None: ) super().__init__(line, col, message=message) + + +class InvalidStringError(ValueError, TOMLKitError): + def __init__(self, value: str, invalid_sequences: Collection[str], delimiter: str): + repr_ = repr(value)[1:-1] + super().__init__( + f"Invalid string: {delimiter}{repr_}{delimiter}. " + f"The character sequences {invalid_sequences} are invalid." + ) diff --git a/tomlkit/items.py b/tomlkit/items.py index 8d471b5..ba7f848 100644 --- a/tomlkit/items.py +++ b/tomlkit/items.py @@ -26,6 +26,7 @@ from ._compat import decode from ._utils import CONTROL_CHARS from ._utils import escape_string +from .exceptions import InvalidStringError from .toml_char import TOMLChar @@ -176,26 +177,27 @@ def select(cls, literal=False, multiline=False) -> "StringType": }[(literal, multiline)] @property - def escaped_chars(self) -> Collection[str]: + def escaped_sequences(self) -> Collection[str]: # https://toml.io/en/v1.0.0#string escaped_in_basic = CONTROL_CHARS | {"\\"} - forbidden_in_literal = CONTROL_CHARS - {"\t"} - allowed_in_multiline = {"\t", "\n", "\r"} + allowed_in_multiline = {"\n", "\r"} return { StringType.SLB: escaped_in_basic | {'"'}, - StringType.MLB: escaped_in_basic - allowed_in_multiline, - StringType.SLL: forbidden_in_literal | {"'"}, - StringType.MLL: forbidden_in_literal - allowed_in_multiline, + StringType.MLB: (escaped_in_basic | {'"""'}) - allowed_in_multiline, + StringType.SLL: (), + StringType.MLL: (), }[self] @property - def escaped_sequences(self) -> Collection[str]: + def invalid_sequences(self) -> Collection[str]: # https://toml.io/en/v1.0.0#string + forbidden_in_literal = CONTROL_CHARS - {"\t"} + allowed_in_multiline = {"\n", "\r"} return { StringType.SLB: (), - StringType.MLB: ('"""',), - StringType.SLL: (), - StringType.MLL: ("'''",), + StringType.MLB: (), + StringType.SLL: forbidden_in_literal | {"'"}, + StringType.MLL: (forbidden_in_literal | {"'''"}) - allowed_in_multiline, }[self] @property @@ -1546,8 +1548,14 @@ def _getstate(self, protocol=3): @classmethod def from_raw(cls, value: str, type_=StringType.SLB, escape=True) -> "String": - escape_args = (type_.escaped_chars, type_.escaped_sequences) - string_value = escape_string(value, *escape_args) if escape else value + value = decode(value) + + invalid = type_.invalid_sequences + if any(c in value for c in invalid): + raise InvalidStringError(value, invalid, type_.value) + + escaped = type_.escaped_sequences + string_value = escape_string(value, escaped) if escape and escaped else value return cls(type_, decode(value), string_value, Trivia())