From dd6a07427eed25ca4d484470fb82cd1d39182aa2 Mon Sep 17 00:00:00 2001 From: Jean Abou Samra Date: Sun, 16 Jan 2022 03:15:46 +0100 Subject: [PATCH] Close #3985: Implement #noqa for i18n When cross-references in the original paragraph and the translated paragraph do not match, a warning is emitted. It is useful, because it allows to catch mistakes, but it can also be an annoyance since sometimes it is expected that the cross-references will not match. For example, a reference that is repeated in the original text may need to be factored out for good style in the target language. Another example: if the translator needs to translate a universally understood term in the source language into a term that not everyone knows is the translation of this original term, adding a reference to the glossary can be warranted. This allows the translated message to start with '#noqa' in order to disable the warning. --- doc/usage/advanced/intl.rst | 17 +++++++ sphinx/transforms/i18n.py | 34 ++++++++++--- tests/roots/test-intl/literalblock.txt | 8 ++++ tests/roots/test-intl/noqa.txt | 16 +++++++ .../test-intl/xx/LC_MESSAGES/literalblock.po | 5 ++ tests/roots/test-intl/xx/LC_MESSAGES/noqa.po | 48 +++++++++++++++++++ tests/test_intl.py | 28 +++++++++++ 7 files changed, 150 insertions(+), 6 deletions(-) create mode 100644 tests/roots/test-intl/noqa.txt create mode 100644 tests/roots/test-intl/xx/LC_MESSAGES/noqa.po diff --git a/doc/usage/advanced/intl.rst b/doc/usage/advanced/intl.rst index 3bf353e8d72..33378fff9a2 100644 --- a/doc/usage/advanced/intl.rst +++ b/doc/usage/advanced/intl.rst @@ -68,6 +68,23 @@ be translated you need to follow these instructions: * Run your desired build. +In order to protect against mistakes, a warning is emitted if +cross-references in the translated paragraph do not match those from the +original. This can be turned off globally using the +:confval:`suppress_warnings` configuration variable. Alternatively, to +turn it off for one message only, start the message with ``#noqa`` like +this:: + + #noqa Lorem ipsum dolor sit amet ... + +(Write ``\#noqa`` in case you want to have "#noqa" literally in the +text. This does not apply to code blocks, where ``#noqa`` is ignored +because code blocks do not contain references anyway.) + +.. versionadded:: 4.4 + The ``#noqa`` mechanism. + + Translating with sphinx-intl ---------------------------- diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py index f40a66e111b..7612cbbb291 100644 --- a/sphinx/transforms/i18n.py +++ b/sphinx/transforms/i18n.py @@ -9,6 +9,7 @@ """ from os import path +from re import match from textwrap import indent from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, TypeVar @@ -82,6 +83,14 @@ def publish_msgstr(app: "Sphinx", source: str, source_path: str, source_line: in config.rst_prolog = rst_prolog # type: ignore +def parse_noqa(source: str) -> Tuple[str, bool]: + m = match(r"\s*#\s*noqa(\s+|$)", source) + if m: + return source[m.end():], True + else: + return source, False + + class PreserveTranslatableMessages(SphinxTransform): """ Preserve original translatable messages before translation @@ -119,6 +128,14 @@ def apply(self, **kwargs: Any) -> None: # phase1: replace reference ids with translated names for node, msg in extract_messages(self.document): msgstr = catalog.gettext(msg) + + # There is no point in having #noqa on literal blocks because + # they cannot contain references. Recognizing it would just + # completely prevent escaping the #noqa. Outside of literal + # blocks, one can always write \#noqa. + if not isinstance(node, LITERAL_TYPE_NODES): + msgstr, _ = parse_noqa(msgstr) + # XXX add marker to untranslated parts if not msgstr or msgstr == msg or not msgstr.strip(): # as-of-yet untranslated @@ -139,6 +156,7 @@ def apply(self, **kwargs: Any) -> None: patch = publish_msgstr(self.app, msgstr, source, node.line, self.config, settings) + # FIXME: no warnings about inconsistent references in this part # XXX doctest and other block markup if not isinstance(patch, nodes.paragraph): continue # skip for now @@ -228,6 +246,11 @@ def apply(self, **kwargs: Any) -> None: continue # skip if the node is already translated by phase1 msgstr = catalog.gettext(msg) + + # See above. + if not isinstance(node, LITERAL_TYPE_NODES): + msgstr, noqa = parse_noqa(msgstr) + # XXX add marker to untranslated parts if not msgstr or msgstr == msg: # as-of-yet untranslated continue @@ -273,7 +296,6 @@ def apply(self, **kwargs: Any) -> None: patch = publish_msgstr(self.app, msgstr, source, node.line, self.config, settings) - # Structural Subelements phase2 if isinstance(node, nodes.title): # get node that placed as a first child @@ -303,7 +325,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None: is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any) old_foot_refs: List[nodes.footnote_reference] = list(node.findall(is_autofootnote_ref)) # NOQA new_foot_refs: List[nodes.footnote_reference] = list(patch.findall(is_autofootnote_ref)) # NOQA - if len(old_foot_refs) != len(new_foot_refs): + if not noqa and len(old_foot_refs) != len(new_foot_refs): old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs] new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs] logger.warning(__('inconsistent footnote references in translated message.' + @@ -346,7 +368,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None: is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any) old_refs: List[nodes.reference] = list(node.findall(is_refnamed_ref)) new_refs: List[nodes.reference] = list(patch.findall(is_refnamed_ref)) - if len(old_refs) != len(new_refs): + if not noqa and len(old_refs) != len(new_refs): old_ref_rawsources = [ref.rawsource for ref in old_refs] new_ref_rawsources = [ref.rawsource for ref in new_refs] logger.warning(__('inconsistent references in translated message.' + @@ -374,7 +396,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None: old_foot_refs = list(node.findall(is_refnamed_footnote_ref)) new_foot_refs = list(patch.findall(is_refnamed_footnote_ref)) refname_ids_map: Dict[str, List[str]] = {} - if len(old_foot_refs) != len(new_foot_refs): + if not noqa and len(old_foot_refs) != len(new_foot_refs): old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs] new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs] logger.warning(__('inconsistent footnote references in translated message.' + @@ -393,7 +415,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None: old_cite_refs: List[nodes.citation_reference] = list(node.findall(is_citation_ref)) new_cite_refs: List[nodes.citation_reference] = list(patch.findall(is_citation_ref)) # NOQA refname_ids_map = {} - if len(old_cite_refs) != len(new_cite_refs): + if not noqa and len(old_cite_refs) != len(new_cite_refs): old_cite_ref_rawsources = [ref.rawsource for ref in old_cite_refs] new_cite_ref_rawsources = [ref.rawsource for ref in new_cite_refs] logger.warning(__('inconsistent citation references in translated message.' + @@ -413,7 +435,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None: old_xrefs = list(node.findall(addnodes.pending_xref)) new_xrefs = list(patch.findall(addnodes.pending_xref)) xref_reftarget_map = {} - if len(old_xrefs) != len(new_xrefs): + if not noqa and len(old_xrefs) != len(new_xrefs): old_xref_rawsources = [xref.rawsource for xref in old_xrefs] new_xref_rawsources = [xref.rawsource for xref in new_xrefs] logger.warning(__('inconsistent term references in translated message.' + diff --git a/tests/roots/test-intl/literalblock.txt b/tests/roots/test-intl/literalblock.txt index 2b9eb8eb192..583b5b61072 100644 --- a/tests/roots/test-intl/literalblock.txt +++ b/tests/roots/test-intl/literalblock.txt @@ -49,6 +49,14 @@ code blocks literal-block in list +.. highlight:: none + +:: + + test_code_for_noqa() + continued() + + doctest blocks ============== diff --git a/tests/roots/test-intl/noqa.txt b/tests/roots/test-intl/noqa.txt new file mode 100644 index 00000000000..004b301461e --- /dev/null +++ b/tests/roots/test-intl/noqa.txt @@ -0,0 +1,16 @@ +First section +============= + +Some text with a reference, :ref:`next-section`. + +Another reference: :ref:`next-section`. + +This should allow to test escaping ``#noqa``. + +.. _next-section: + +Next section +============ + +Some text, again referring to the section: :ref:`next-section`. + diff --git a/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po b/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po index 04029535950..4aefe2f32db 100644 --- a/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po +++ b/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po @@ -77,6 +77,11 @@ msgid "literal-block\n" msgstr "LITERAL-BLOCK\n" "IN LIST" +msgid "test_code_for_noqa()\n" +"continued()" +msgstr "#noqa should not get stripped\n" +"# from this block." + msgid "doctest blocks" msgstr "DOCTEST-BLOCKS" diff --git a/tests/roots/test-intl/xx/LC_MESSAGES/noqa.po b/tests/roots/test-intl/xx/LC_MESSAGES/noqa.po new file mode 100644 index 00000000000..e6eb1d94db1 --- /dev/null +++ b/tests/roots/test-intl/xx/LC_MESSAGES/noqa.po @@ -0,0 +1,48 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) +# This file is distributed under the same license as the Sphinx intl <Tests> package. +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2022-01-16 15:23+0100\n" +"PO-Revision-Date: 2022-01-16 15:23+0100\n" +"Last-Translator: Jean Abou Samra <jean@abou-samra.fr>\n" +"Language-Team: \n" +"Language: xx\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: Poedit 3.0\n" + +#: ../tests/roots/test-intl/noqa.txt:2 +msgid "First section" +msgstr "" + +#: ../tests/roots/test-intl/noqa.txt:4 +msgid "Some text with a reference, :ref:`next-section`." +msgstr "#noqa TRANSLATED TEXT WITHOUT REFERENCE." + +#: ../tests/roots/test-intl/noqa.txt:6 +msgid "Another reference: :ref:`next-section`." +msgstr "" +" \n" +"# noqa\n" +" \n" +"\n" +" TEST WHITESPACE INSENSITIVITY." + +#: ../tests/roots/test-intl/noqa.txt:8 +msgid "This should allow to test escaping ``#noqa``." +msgstr "\\#noqa ``#noqa`` is escaped at the beginning of this string." + +#: ../tests/roots/test-intl/noqa.txt:13 +msgid "Next section" +msgstr "" + +# This edge case should not fail. +#: ../tests/roots/test-intl/noqa.txt:15 +msgid "Some text, again referring to the section: :ref:`next-section`." +msgstr "#noqa" diff --git a/tests/test_intl.py b/tests/test_intl.py index 92d7badf46a..bd8d07dd749 100644 --- a/tests/test_intl.py +++ b/tests/test_intl.py @@ -192,6 +192,31 @@ def test_text_inconsistency_warnings(app, warning): assert_re_search(expected_citation_warning_expr, warnings) +@sphinx_intl +@pytest.mark.sphinx('text') +@pytest.mark.test_params(shared_result='test_intl_basic') +def test_noqa(app, warning): + app.build() + result = (app.outdir / 'noqa.txt').read_text() + expect = r"""First section +************* + +TRANSLATED TEXT WITHOUT REFERENCE. + +TEST WHITESPACE INSENSITIVITY. + +#noqa "#noqa" is escaped at the beginning of this string. + + +Next section +************ + +Some text, again referring to the section: Next section. +""" + assert result == expect + assert "next-section" not in getwarning(warning) + + @sphinx_intl @pytest.mark.sphinx('text') @pytest.mark.test_params(shared_result='test_intl_basic') @@ -1186,6 +1211,9 @@ def test_additional_targets_should_be_translated(app): """<span class="c1"># SYS IMPORTING</span>""") assert_count(expected_expr, result, 1) + # '#noqa' should remain in literal blocks. + assert_count("#noqa", result, 1) + # [raw.txt] result = (app.outdir / 'raw.html').read_text()