From 05df10fd1474e929793183c3b0ffa28251df79eb Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Mon, 31 Oct 2022 09:29:23 -0600 Subject: [PATCH] Add support for compact decimal formats (#909) --- babel/core.py | 12 +++++++++ babel/numbers.py | 57 ++++++++++++++++++++++++++++++++++++++++++ docs/api/numbers.rst | 2 ++ docs/numbers.rst | 2 +- scripts/import_cldr.py | 2 -- tests/test_numbers.py | 33 ++++++++++++++++++++++++ 6 files changed, 105 insertions(+), 3 deletions(-) diff --git a/babel/core.py b/babel/core.py index 9393c2394..220cbaf0a 100644 --- a/babel/core.py +++ b/babel/core.py @@ -564,6 +564,18 @@ def decimal_formats(self): """ return self._data['decimal_formats'] + @property + def compact_decimal_formats(self): + """Locale patterns for compact decimal number formatting. + + .. note:: The format of the value returned may change between + Babel versions. + + >>> Locale('en', 'US').compact_decimal_formats["short"]["one"]["1000"] + + """ + return self._data['compact_decimal_formats'] + @property def currency_formats(self): """Locale patterns for currency number formatting. diff --git a/babel/numbers.py b/babel/numbers.py index b8971bcbc..192e3ed6e 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -425,6 +425,63 @@ def format_decimal( number, locale, decimal_quantization=decimal_quantization, group_separator=group_separator) +def format_compact_decimal(number, *, format_type="short", locale=LC_NUMERIC, fraction_digits=0): + u"""Return the given decimal number formatted for a specific locale in compact form. + + >>> format_compact_decimal(12345, format_type="short", locale='en_US') + u'12K' + >>> format_compact_decimal(12345, format_type="long", locale='en_US') + u'12 thousand' + >>> format_compact_decimal(12345, format_type="short", locale='en_US', fraction_digits=2) + u'12.35K' + >>> format_compact_decimal(1234567, format_type="short", locale="ja_JP") + u'123万' + >>> format_compact_decimal(2345678, format_type="long", locale="mk") + u'2 милиони' + >>> format_compact_decimal(21098765, format_type="long", locale="mk") + u'21 милион' + + :param number: the number to format + :param format_type: Compact format to use ("short" or "long") + :param locale: the `Locale` object or locale identifier + :param fraction_digits: Number of digits after the decimal point to use. Defaults to `0`. + """ + locale = Locale.parse(locale) + number, format = _get_compact_format(number, format_type, locale, fraction_digits) + pattern = parse_pattern(format) + return pattern.apply(number, locale, decimal_quantization=False) + + +def _get_compact_format(number, format_type, locale, fraction_digits=0): + """Returns the number after dividing by the unit and the format pattern to use. + The algorithm is described here: + https://www.unicode.org/reports/tr35/tr35-45/tr35-numbers.html#Compact_Number_Formats. + """ + format = None + compact_format = locale.compact_decimal_formats[format_type] + for magnitude in sorted([int(m) for m in compact_format["other"]], reverse=True): + if abs(number) >= magnitude: + # check the pattern using "other" as the amount + format = compact_format["other"][str(magnitude)] + pattern = parse_pattern(format).pattern + # if the pattern is "0", we do not divide the number + if pattern == "0": + break + # otherwise, we need to divide the number by the magnitude but remove zeros + # equal to the number of 0's in the pattern minus 1 + number = number / (magnitude / (10 ** (pattern.count("0") - 1))) + # round to the number of fraction digits requested + number = round(number, fraction_digits) + # if the remaining number is singular, use the singular format + plural_form = locale.plural_form(abs(number)) + plural_form = plural_form if plural_form in compact_format else "other" + format = compact_format[plural_form][str(magnitude)] + break + if format is None: # Did not find a format, fall back. + format = locale.decimal_formats.get(None) + return number, format + + class UnknownCurrencyFormatError(KeyError): """Exception raised when an unknown currency format is requested.""" diff --git a/docs/api/numbers.rst b/docs/api/numbers.rst index f9b0833a2..eac569206 100644 --- a/docs/api/numbers.rst +++ b/docs/api/numbers.rst @@ -13,6 +13,8 @@ Number Formatting .. autofunction:: format_decimal +.. autofunction:: format_compact_decimal + .. autofunction:: format_currency .. autofunction:: format_percent diff --git a/docs/numbers.rst b/docs/numbers.rst index ed3b60f13..cbe05cdef 100644 --- a/docs/numbers.rst +++ b/docs/numbers.rst @@ -12,7 +12,7 @@ the ``babel.numbers`` module: .. code-block:: pycon - >>> from babel.numbers import format_number, format_decimal, format_percent + >>> from babel.numbers import format_number, format_decimal, format_compact_decimal, format_percent Examples: diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 7fc8538d7..92dd27234 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -770,8 +770,6 @@ def parse_decimal_formats(data, tree): # These are mapped into a `compact_decimal_formats` dictionary # with the format {length: {count: {multiplier: pattern}}}. - - # TODO: Add support for formatting them. compact_decimal_formats = data.setdefault('compact_decimal_formats', {}) length_map = compact_decimal_formats.setdefault(length_type, {}) length_count_map = length_map.setdefault(pattern_el.attrib['count'], {}) diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 78f779758..1b955c95e 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -121,6 +121,39 @@ def test_group_separator(self): assert numbers.format_currency(101299.98, 'EUR', locale='en_US', group_separator=True, format_type='name') == u'101,299.98 euros' assert numbers.format_percent(251234.1234, locale='sv_SE', group_separator=True) == u'25\xa0123\xa0412\xa0%' + def test_compact(self): + assert numbers.format_compact_decimal(1, locale='en_US', format_type="short") == u'1' + assert numbers.format_compact_decimal(999, locale='en_US', format_type="short") == u'999' + assert numbers.format_compact_decimal(1000, locale='en_US', format_type="short") == u'1K' + assert numbers.format_compact_decimal(9000, locale='en_US', format_type="short") == u'9K' + assert numbers.format_compact_decimal(9123, locale='en_US', format_type="short", fraction_digits=2) == u'9.12K' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="short") == u'10K' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="short", fraction_digits=2) == u'10K' + assert numbers.format_compact_decimal(1000000, locale='en_US', format_type="short") == u'1M' + assert numbers.format_compact_decimal(9000999, locale='en_US', format_type="short") == u'9M' + assert numbers.format_compact_decimal(9000900099, locale='en_US', format_type="short", fraction_digits=5) == u'9.0009B' + assert numbers.format_compact_decimal(1, locale='en_US', format_type="long") == u'1' + assert numbers.format_compact_decimal(999, locale='en_US', format_type="long") == u'999' + assert numbers.format_compact_decimal(1000, locale='en_US', format_type="long") == u'1 thousand' + assert numbers.format_compact_decimal(9000, locale='en_US', format_type="long") == u'9 thousand' + assert numbers.format_compact_decimal(9000, locale='en_US', format_type="long", fraction_digits=2) == u'9 thousand' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="long") == u'10 thousand' + assert numbers.format_compact_decimal(10000, locale='en_US', format_type="long", fraction_digits=2) == u'10 thousand' + assert numbers.format_compact_decimal(1000000, locale='en_US', format_type="long") == u'1 million' + assert numbers.format_compact_decimal(9999999, locale='en_US', format_type="long") == u'10 million' + assert numbers.format_compact_decimal(9999999999, locale='en_US', format_type="long", fraction_digits=5) == u'10 billion' + assert numbers.format_compact_decimal(1, locale='ja_JP', format_type="short") == u'1' + assert numbers.format_compact_decimal(999, locale='ja_JP', format_type="short") == u'999' + assert numbers.format_compact_decimal(1000, locale='ja_JP', format_type="short") == u'1000' + assert numbers.format_compact_decimal(9123, locale='ja_JP', format_type="short") == u'9123' + assert numbers.format_compact_decimal(10000, locale='ja_JP', format_type="short") == u'1万' + assert numbers.format_compact_decimal(1234567, locale='ja_JP', format_type="long") == u'123万' + assert numbers.format_compact_decimal(-1, locale='en_US', format_type="short") == u'-1' + assert numbers.format_compact_decimal(-1234, locale='en_US', format_type="short", fraction_digits=2) == u'-1.23K' + assert numbers.format_compact_decimal(-123456789, format_type='short', locale='en_US') == u'-123M' + assert numbers.format_compact_decimal(-123456789, format_type='long', locale='en_US') == u'-123 million' + assert numbers.format_compact_decimal(2345678, locale='mk', format_type='long') == u'2 милиони' + assert numbers.format_compact_decimal(21098765, locale='mk', format_type='long') == u'21 милион' class NumberParsingTestCase(unittest.TestCase):