Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR 40 #826

Merged
merged 5 commits into from Apr 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions babel/dates.py
Expand Up @@ -322,7 +322,7 @@ def get_day_names(width='wide', context='format', locale=LC_TIME):
>>> get_day_names('short', locale='en_US')[1]
u'Tu'
>>> get_day_names('abbreviated', locale='es')[1]
u'mar.'
u'mar'
>>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1]
u'D'

Expand All @@ -339,7 +339,7 @@ def get_month_names(width='wide', context='format', locale=LC_TIME):
>>> get_month_names('wide', locale='en_US')[1]
u'January'
>>> get_month_names('abbreviated', locale='es')[1]
u'ene.'
u'ene'
>>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1]
u'J'

Expand Down
31 changes: 22 additions & 9 deletions babel/plural.py
Expand Up @@ -19,7 +19,7 @@
def extract_operands(source):
"""Extract operands from a decimal, a float or an int, according to `CLDR rules`_.

The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows:
The result is a 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows:

====== ===============================================================
Symbol Value
Expand All @@ -30,14 +30,16 @@ def extract_operands(source):
w number of visible fraction digits in n, without trailing zeros.
f visible fractional digits in n, with trailing zeros.
t visible fractional digits in n, without trailing zeros.
c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
e currently, synonym for ‘c’. however, may be redefined in the future.
====== ===============================================================

.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands

:param source: A real number
:type source: int|float|decimal.Decimal
:return: A n-i-v-w-f-t tuple
:rtype: tuple[decimal.Decimal, int, int, int, int, int]
:return: A n-i-v-w-f-t-c-e tuple
:rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int]
"""
n = abs(source)
i = int(n)
Expand Down Expand Up @@ -69,7 +71,8 @@ def extract_operands(source):
t = int(no_trailing or 0)
else:
v = w = f = t = 0
return n, i, v, w, f, t
c = e = 0 # TODO: c and e are not supported
return n, i, v, w, f, t, c, e


class PluralRule(object):
Expand Down Expand Up @@ -216,7 +219,7 @@ def to_python(rule):
to_python_func = _PythonCompiler().compile
result = [
'def evaluate(n):',
' n, i, v, w, f, t = extract_operands(n)',
' n, i, v, w, f, t, c, e = extract_operands(n)',
]
for tag, ast in PluralRule.parse(rule).abstract:
# the str() call is to coerce the tag to the native string. It's
Expand Down Expand Up @@ -317,12 +320,20 @@ def cldr_modulo(a, b):
class RuleError(Exception):
"""Raised if a rule is malformed."""

_VARS = 'nivwft'
_VARS = {
'n', # absolute value of the source number.
'i', # integer digits of n.
'v', # number of visible fraction digits in n, with trailing zeros.*
'w', # number of visible fraction digits in n, without trailing zeros.*
'f', # visible fraction digits in n, with trailing zeros.*
't', # visible fraction digits in n, without trailing zeros.*
'c', # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
'e', # currently, synonym for ‘c’. however, may be redefined in the future.
}

_RULES = [
(None, re.compile(r'\s+', re.UNICODE)),
('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b'
.format(_VARS))),
('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')),
('value', re.compile(r'\d+')),
('symbol', re.compile(r'%|,|!=|=')),
('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS
Expand Down Expand Up @@ -525,6 +536,8 @@ def compile(self, arg):
compile_w = lambda x: 'w'
compile_f = lambda x: 'f'
compile_t = lambda x: 't'
compile_c = lambda x: 'c'
compile_e = lambda x: 'e'
compile_value = lambda x, v: str(v)
compile_and = _binary_compiler('(%s && %s)')
compile_or = _binary_compiler('(%s || %s)')
Expand Down
11 changes: 6 additions & 5 deletions scripts/download_import_cldr.py
Expand Up @@ -13,9 +13,10 @@
from urllib import urlretrieve


URL = 'http://unicode.org/Public/cldr/37/core.zip'
FILENAME = 'cldr-core-37.zip'
FILESUM = 'ba93f5ba256a61a6f8253397c6c4b1a9b9e77531f013cc7ffa7977b5f7e4da57'
URL = 'http://unicode.org/Public/cldr/40/cldr-common-40.0.zip'
FILENAME = 'cldr-common-40.0.zip'
# Via https://unicode.org/Public/cldr/40/hashes/SHASUM512.txt
FILESUM = 'b45ea381002210cf5963a2ba52fa45ee4e9b1e80ae1180bcecf61f431d64e4e0faba700b3d56a96a33355deab3abdb8bcbae9222b60a8ca85536476718175645'
BLKSIZE = 131072


Expand Down Expand Up @@ -53,7 +54,7 @@ def is_good_file(filename):
if not os.path.isfile(filename):
log('Local copy \'%s\' not found', filename)
return False
h = hashlib.sha256()
h = hashlib.sha512()
with open(filename, 'rb') as f:
while 1:
blk = f.read(BLKSIZE)
Expand All @@ -78,7 +79,7 @@ def main():
show_progress = (False if os.environ.get("BABEL_CLDR_NO_DOWNLOAD_PROGRESS") else sys.stdout.isatty())

while not is_good_file(zip_path):
log('Downloading \'%s\'', FILENAME)
log("Downloading '%s' from %s", FILENAME, URL)
if os.path.isfile(zip_path):
os.remove(zip_path)
urlretrieve(URL, zip_path, (reporthook if show_progress else None))
Expand Down
6 changes: 3 additions & 3 deletions tests/test_dates.py
Expand Up @@ -548,14 +548,14 @@ def test_get_period_names():
def test_get_day_names():
assert dates.get_day_names('wide', locale='en_US')[1] == u'Tuesday'
assert dates.get_day_names('short', locale='en_US')[1] == u'Tu'
assert dates.get_day_names('abbreviated', locale='es')[1] == u'mar.'
assert dates.get_day_names('abbreviated', locale='es')[1] == u'mar'
de = dates.get_day_names('narrow', context='stand-alone', locale='de_DE')
assert de[1] == u'D'


def test_get_month_names():
assert dates.get_month_names('wide', locale='en_US')[1] == u'January'
assert dates.get_month_names('abbreviated', locale='es')[1] == u'ene.'
assert dates.get_month_names('abbreviated', locale='es')[1] == u'ene'
de = dates.get_month_names('narrow', context='stand-alone', locale='de_DE')
assert de[1] == u'J'

Expand Down Expand Up @@ -834,7 +834,7 @@ def test_lithuanian_long_format():

def test_zh_TW_format():
# Refs GitHub issue #378
assert dates.format_time(datetime(2016, 4, 8, 12, 34, 56), locale='zh_TW') == u'\u4e0b\u534812:34:56'
assert dates.format_time(datetime(2016, 4, 8, 12, 34, 56), locale='zh_TW') == u'B12:34:56'


def test_format_current_moment():
Expand Down
4 changes: 2 additions & 2 deletions tests/test_numbers.py
Expand Up @@ -170,13 +170,13 @@ def test_group_separator(self):

self.assertEqual(u'29,567.12', numbers.format_decimal(29567.12,
locale='en_US', group_separator=True))
self.assertEqual(u'29\u202f567,12', numbers.format_decimal(29567.12,
self.assertEqual(u'29\xa0567,12', numbers.format_decimal(29567.12,
locale='fr_CA', group_separator=True))
self.assertEqual(u'29.567,12', numbers.format_decimal(29567.12,
locale='pt_BR', group_separator=True))
self.assertEqual(u'$1,099.98', numbers.format_currency(1099.98, 'USD',
locale='en_US', group_separator=True))
self.assertEqual(u'101\u202f299,98\xa0\u20ac', numbers.format_currency(101299.98, 'EUR',
self.assertEqual(u'101\xa0299,98\xa0\u20ac', numbers.format_currency(101299.98, 'EUR',
locale='fr_CA', group_separator=True))
self.assertEqual(u'101,299.98 euros', numbers.format_currency(101299.98, 'EUR',
locale='en_US', group_separator=True,
Expand Down
4 changes: 3 additions & 1 deletion tests/test_plural.py
Expand Up @@ -255,13 +255,15 @@ def test_or_and(self):

@pytest.mark.parametrize('source,n,i,v,w,f,t', EXTRACT_OPERANDS_TESTS)
def test_extract_operands(source, n, i, v, w, f, t):
e_n, e_i, e_v, e_w, e_f, e_t = plural.extract_operands(source)
e_n, e_i, e_v, e_w, e_f, e_t, e_c, e_e = plural.extract_operands(source)
assert abs(e_n - decimal.Decimal(n)) <= EPSILON # float-decimal conversion inaccuracy
assert e_i == i
assert e_v == v
assert e_w == w
assert e_f == f
assert e_t == t
assert not e_c # Not supported at present
assert not e_e # Not supported at present


@pytest.mark.parametrize('locale', ('ru', 'pl'))
Expand Down
45 changes: 32 additions & 13 deletions tests/test_smoke.py
Expand Up @@ -6,32 +6,51 @@
we ship.
"""
import decimal
from datetime import datetime

import datetime
import pytest
from babel import Locale
from babel import dates
from babel import numbers

from babel import Locale, units, dates, numbers

NUMBERS = (
decimal.Decimal("-33.76"), # Negative Decimal
decimal.Decimal("13.37"), # Positive Decimal
1.2 - 1.0, # Inaccurate float
10, # Plain old integer
0, # Zero
)


@pytest.mark.all_locales
def test_smoke_dates(locale):
locale = Locale.parse(locale)
instant = datetime.now()
instant = datetime.datetime.now()
for width in ("full", "long", "medium", "short"):
assert dates.format_date(instant, format=width, locale=locale)
assert dates.format_datetime(instant, format=width, locale=locale)
assert dates.format_time(instant, format=width, locale=locale)
# Interval test
past = instant - datetime.timedelta(hours=23)
assert dates.format_interval(past, instant, locale=locale)
# Duration test - at the time of writing, all locales seem to have `short` width,
# so let's test that.
duration = instant - instant.replace(hour=0, minute=0, second=0)
for granularity in ('second', 'minute', 'hour', 'day'):
assert dates.format_timedelta(duration, granularity=granularity, format="short", locale=locale)


@pytest.mark.all_locales
def test_smoke_numbers(locale):
locale = Locale.parse(locale)
for number in (
decimal.Decimal("-33.76"), # Negative Decimal
decimal.Decimal("13.37"), # Positive Decimal
1.2 - 1.0, # Inaccurate float
10, # Plain old integer
0, # Zero
):
for number in NUMBERS:
assert numbers.format_decimal(number, locale=locale)
assert numbers.format_currency(number, "EUR", locale=locale)
assert numbers.format_scientific(number, locale=locale)
assert numbers.format_percent(number / 100, locale=locale)


@pytest.mark.all_locales
def test_smoke_units(locale):
locale = Locale.parse(locale)
for unit in ('length-meter', 'mass-kilogram', 'energy-calorie', 'volume-liter'):
for number in NUMBERS:
assert units.format_unit(number, measurement_unit=unit, locale=locale)