Skip to content

Commit

Permalink
Allow alternative space characters as group separator when parsing nu…
Browse files Browse the repository at this point in the history
…mbers (#1007)

The French group separator is `"\u202f"` (narrow non-breaking space),
but when parsing numbers in the real world, you will most often encounter
either a regular space character (`" "`) or a non-breaking space character
(`"\xa0"`).

The issue was partially adressed earlier in #637,
but only to allow regular spaces instead of non-breaking spaces `"\xa0"` in
`parse_decimal`.

This commit goes further by changing both `parse_number` and `parse_decimal`
to allow certain other space characters when the group character is itself a space character,
but is not present in the string to parse.

Unit tests are included.
  • Loading branch information
ronnix committed Apr 22, 2024
1 parent fe82fbc commit c0fb56e
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
27 changes: 23 additions & 4 deletions babel/numbers.py
Expand Up @@ -998,6 +998,15 @@ def __init__(self, message: str, suggestions: list[str] | None = None) -> None:
self.suggestions = suggestions


SPACE_CHARS = {
' ', # space
'\xa0', # no-break space
'\u202f', # narrow no-break space
}

SPACE_CHARS_RE = re.compile('|'.join(SPACE_CHARS))


def parse_number(
string: str,
locale: Locale | str | None = LC_NUMERIC,
Expand Down Expand Up @@ -1026,8 +1035,18 @@ def parse_number(
:raise `NumberFormatError`: if the string can not be converted to a number
:raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
"""
group_symbol = get_group_symbol(locale, numbering_system=numbering_system)

if (
group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
group_symbol not in string and # and the string to be parsed does not contain it,
SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
):
# ... it's reasonable to assume it is taking the place of the grouping symbol.
string = SPACE_CHARS_RE.sub(group_symbol, string)

try:
return int(string.replace(get_group_symbol(locale, numbering_system=numbering_system), ''))
return int(string.replace(group_symbol, ''))
except ValueError as ve:
raise NumberFormatError(f"{string!r} is not a valid number") from ve

Expand Down Expand Up @@ -1085,12 +1104,12 @@ def parse_decimal(
decimal_symbol = get_decimal_symbol(locale, numbering_system=numbering_system)

if not strict and (
group_symbol == '\xa0' and # if the grouping symbol is U+00A0 NO-BREAK SPACE,
group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
group_symbol not in string and # and the string to be parsed does not contain it,
' ' in string # but it does contain a space instead,
SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
):
# ... it's reasonable to assume it is taking the place of the grouping symbol.
string = string.replace(' ', group_symbol)
string = SPACE_CHARS_RE.sub(group_symbol, string)

try:
parsed = decimal.Decimal(string.replace(group_symbol, '')
Expand Down
18 changes: 18 additions & 0 deletions tests/test_numbers.py
Expand Up @@ -751,6 +751,15 @@ def test_parse_number():
with pytest.raises(numbers.UnsupportedNumberingSystemError):
numbers.parse_number('1.099,98', locale='en', numbering_system="unsupported")

@pytest.mark.parametrize('string', [
'1 099',
'1\xa0099',
'1\u202f099',
])
def test_parse_number_group_separator_can_be_any_space(string):
assert numbers.parse_number(string, locale='fr') == 1099


def test_parse_decimal():
assert (numbers.parse_decimal('1,099.98', locale='en_US')
== decimal.Decimal('1099.98'))
Expand All @@ -761,6 +770,15 @@ def test_parse_decimal():
assert excinfo.value.args[0] == "'2,109,998' is not a valid decimal number"


@pytest.mark.parametrize('string', [
'1 099,98',
'1\xa0099,98',
'1\u202f099,98',
])
def test_parse_decimal_group_separator_can_be_any_space(string):
assert decimal.Decimal('1099.98') == numbers.parse_decimal(string, locale='fr')


def test_parse_grouping():
assert numbers.parse_grouping('##') == (1000, 1000)
assert numbers.parse_grouping('#,###') == (3, 3)
Expand Down

0 comments on commit c0fb56e

Please sign in to comment.