Skip to content

Commit

Permalink
Be more specific about which alternative space characters should work…
Browse files Browse the repository at this point in the history
… as group symbols

The `\s` character class (or the `string.isspace()` method) could match
characters like new lines that we probably don’t want to consider as potential
group symbols in numbers.
  • Loading branch information
ronnix committed Nov 9, 2023
1 parent 74d07a1 commit cab7135
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions babel/numbers.py
Expand Up @@ -862,6 +862,15 @@ def __init__(self, message: str, suggestions: list[str] | None = None) -> None:
self.suggestions = suggestions


SPACE_CHARS = {
' ', # regular space
'\xa0', # non-breakable space
'\u202f', # narrow non-breakable space
}

SPACE_CHARS_RE = re.compile('|'.join(SPACE_CHARS))


def parse_number(string: str, locale: Locale | str | None = LC_NUMERIC) -> int:
"""Parse localized number string into an integer.
Expand All @@ -885,12 +894,12 @@ def parse_number(string: str, locale: Locale | str | None = LC_NUMERIC) -> int:
group_symbol = get_group_symbol(locale)

if (
re.match(r'\s', group_symbol) and # if the grouping symbol is a kind of space,
group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
group_symbol not in string and # and the string to be parsed does not contain it,
re.search(r'\s', string) # but it does contain any other kind of space instead,
SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
):
# ... it's reasonable to assume it is taking the place of the grouping symbol.
string = re.sub(r'\s', group_symbol, string)
string = SPACE_CHARS_RE.sub(group_symbol, string)

try:
return int(string.replace(group_symbol, ''))
Expand Down Expand Up @@ -940,12 +949,12 @@ def parse_decimal(string: str, locale: Locale | str | None = LC_NUMERIC, strict:
decimal_symbol = get_decimal_symbol(locale)

if not strict and (
re.match(r'\s', group_symbol) and # if the grouping symbol is a kind of space,
group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
group_symbol not in string and # and the string to be parsed does not contain it,
re.search(r'\s', string) # but it does contain any other kind of space instead,
SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
):
# ... it's reasonable to assume it is taking the place of the grouping symbol.
string = re.sub(r'\s', group_symbol, string)
string = SPACE_CHARS_RE.sub(group_symbol, string)

try:
parsed = decimal.Decimal(string.replace(group_symbol, '')
Expand Down

0 comments on commit cab7135

Please sign in to comment.