From c0fb56e6a5a7fa9268b5164db0ff0fc28524d648 Mon Sep 17 00:00:00 2001 From: Ronan Amicel Date: Mon, 22 Apr 2024 08:53:54 +0200 Subject: [PATCH] Allow alternative space characters as group separator when parsing numbers (#1007) The French group separator is `"\u202f"` (narrow non-breaking space), but when parsing numbers in the real world, you will most often encounter either a regular space character (`" "`) or a non-breaking space character (`"\xa0"`). The issue was partially adressed earlier in https://github.com/python-babel/babel/issues/637, but only to allow regular spaces instead of non-breaking spaces `"\xa0"` in `parse_decimal`. This commit goes further by changing both `parse_number` and `parse_decimal` to allow certain other space characters when the group character is itself a space character, but is not present in the string to parse. Unit tests are included. --- babel/numbers.py | 27 +++++++++++++++++++++++---- tests/test_numbers.py | 18 ++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/babel/numbers.py b/babel/numbers.py index 2d46e0271..6df1db8cb 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -998,6 +998,15 @@ def __init__(self, message: str, suggestions: list[str] | None = None) -> None: self.suggestions = suggestions +SPACE_CHARS = { + ' ', # space + '\xa0', # no-break space + '\u202f', # narrow no-break space +} + +SPACE_CHARS_RE = re.compile('|'.join(SPACE_CHARS)) + + def parse_number( string: str, locale: Locale | str | None = LC_NUMERIC, @@ -1026,8 +1035,18 @@ def parse_number( :raise `NumberFormatError`: if the string can not be converted to a number :raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale. """ + group_symbol = get_group_symbol(locale, numbering_system=numbering_system) + + if ( + group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space, + group_symbol not in string and # and the string to be parsed does not contain it, + SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead, + ): + # ... it's reasonable to assume it is taking the place of the grouping symbol. + string = SPACE_CHARS_RE.sub(group_symbol, string) + try: - return int(string.replace(get_group_symbol(locale, numbering_system=numbering_system), '')) + return int(string.replace(group_symbol, '')) except ValueError as ve: raise NumberFormatError(f"{string!r} is not a valid number") from ve @@ -1085,12 +1104,12 @@ def parse_decimal( decimal_symbol = get_decimal_symbol(locale, numbering_system=numbering_system) if not strict and ( - group_symbol == '\xa0' and # if the grouping symbol is U+00A0 NO-BREAK SPACE, + group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space, group_symbol not in string and # and the string to be parsed does not contain it, - ' ' in string # but it does contain a space instead, + SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead, ): # ... it's reasonable to assume it is taking the place of the grouping symbol. - string = string.replace(' ', group_symbol) + string = SPACE_CHARS_RE.sub(group_symbol, string) try: parsed = decimal.Decimal(string.replace(group_symbol, '') diff --git a/tests/test_numbers.py b/tests/test_numbers.py index e58f0735f..eeb71a2fc 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -751,6 +751,15 @@ def test_parse_number(): with pytest.raises(numbers.UnsupportedNumberingSystemError): numbers.parse_number('1.099,98', locale='en', numbering_system="unsupported") +@pytest.mark.parametrize('string', [ + '1 099', + '1\xa0099', + '1\u202f099', +]) +def test_parse_number_group_separator_can_be_any_space(string): + assert numbers.parse_number(string, locale='fr') == 1099 + + def test_parse_decimal(): assert (numbers.parse_decimal('1,099.98', locale='en_US') == decimal.Decimal('1099.98')) @@ -761,6 +770,15 @@ def test_parse_decimal(): assert excinfo.value.args[0] == "'2,109,998' is not a valid decimal number" +@pytest.mark.parametrize('string', [ + '1 099,98', + '1\xa0099,98', + '1\u202f099,98', +]) +def test_parse_decimal_group_separator_can_be_any_space(string): + assert decimal.Decimal('1099.98') == numbers.parse_decimal(string, locale='fr') + + def test_parse_grouping(): assert numbers.parse_grouping('##') == (1000, 1000) assert numbers.parse_grouping('#,###') == (3, 3)