sphinx-doc · tk0miya · Dec 15, 2019 · Dec 5, 2019 · Dec 15, 2019 · jfbu
diff --git a/CHANGES b/CHANGES
@@ -21,6 +21,7 @@ Deprecated
 * ``sphinx.io.SphinxStandaloneReader.env``
 * ``sphinx.util.texescape.tex_escape_map``
 * ``sphinx.util.texescape.tex_hl_escape_map_new``
+* ``sphinx.writers.latex.LaTeXTranslator.no_contractions``
 
 Features added
 --------------
@@ -84,6 +85,8 @@ Bugs fixed
 * #6867: text: extra spaces are inserted to hyphenated words on folding lines
 * #6886: LaTeX: xelatex converts straight double quotes into right curly ones
   (shows when :confval:`smartquotes` is ``False``)
+* #6890: LaTeX: even with smartquotes off, PDF output transforms straight
+  quotes and consecutive hyphens into curly quotes and dashes
 * #6876: LaTeX: multi-line display of authors on title page has ragged edges
 * #6887: Sphinx crashes with docutils-0.16b0
 * #6920: sphinx-build: A console message is wrongly highlighted

diff --git a/doc/extdev/deprecated.rst b/doc/extdev/deprecated.rst
@@ -51,6 +51,11 @@ The following is a list of deprecated interfaces.
      - 4.0
      - ``sphinx.util.texescape.hlescape()``
 
+   * - ``sphinx.writers.latex.LaTeXTranslator.no_contractions``
+     - 2.3
+     - 4.0
+     - N/A
+
    * - ``sphinx.domains.math.MathDomain.add_equation()``
      - 2.2
      - 4.0

diff --git a/sphinx/highlighting.py b/sphinx/highlighting.py
@@ -193,6 +193,7 @@ def highlight_block(self, source, lang, opts=None, force=False, location=None, *
         if self.dest == 'html':
             return hlsource
         else:
+            # MEMO: this is done to escape Unicode chars with non-Unicode engines
             return texescape.hlescape(hlsource, self.latex_engine)
 
     def get_stylesheet(self):

diff --git a/sphinx/texinputs/sphinx.sty b/sphinx/texinputs/sphinx.sty
@@ -1078,7 +1078,7 @@
 }
 
 \def\sphinx@verbatim@nolig@list {\do \`}%
-% Some characters . , ; ? ! / are not pygmentized.
+% Some characters . , ; ? ! / are neither pygmentized nor "tex-escaped".
 % This macro makes them "active" and they will insert potential linebreaks.
 % Not compatible with math mode (cf \sphinxunactivateextras).
 \newcommand*\sphinxbreaksbeforeactivelist {}% none
@@ -1369,7 +1369,6 @@
 }
 \newcommand*\sphinxbreaksviaactiveinparsedliteral{%
   \sphinxbreaksviaactive % by default handles . , ; ? ! /
-  \do\-% we need also the hyphen character (ends up "as is" in parsed-literal)
   \lccode`\~`\~ %
   % update \dospecials as it is used by \url
   % but deactivation will already have been done hence this is unneeded:
@@ -1380,7 +1379,7 @@
   \lccode`~32 \lowercase{\let~}\spx@verbatim@space\lccode`\~`\~
 }
 \newcommand*{\sphinxunactivateextras}{\let\do\@makeother
-      \sphinxbreaksbeforeactivelist\sphinxbreaksafteractivelist\do\-}%
+      \sphinxbreaksbeforeactivelist\sphinxbreaksafteractivelist}%
 % the \catcode13=5\relax (deactivate end of input lines) is left to callers
 \newcommand*{\sphinxunactivateextrasandspace}{\catcode32=10\relax
       \sphinxunactivateextras}%
@@ -1805,12 +1804,20 @@
       % break also at \
       \let\sphinx@textbackslash\textbackslash
       \let\textbackslash\sphinxtextbackslash
-      % do not typeset a continuation symbol on next line
+      % by default, no continuation symbol on next line but may be added
       \let\sphinxafterbreak\sphinxafterbreakofinlineliteral
       % do not overwrite the comma set-up
       \let\verbatim@nolig@list\sphinx@literal@nolig@list
    \fi
    % fix a space-gobbling issue due to LaTeX's original \do@noligs
+% TODO: using \@noligs as patched by upquote.sty is now unneeded because
+% either ` and ' are escaped (non-unicode engines) or they don't build
+% ligatures (unicode engines). Thus remove this and unify handling of `, <, >, 
+% ' and - with the characters . , ; ? ! / as handled via
+% \sphinxbreaksviaactive.
+% Hence \sphinx@do@noligs will be removed, or rather replaced  with code
+% inserting discretionaries, as they allow a continuation symbol on start of
+% next line to achieve common design with code-blocks.
    \let\do@noligs\sphinx@do@noligs
    \@noligs\endlinechar\m@ne\everyeof{}% (<- in case inside \sphinxhref)
    \expandafter\scantokens
@@ -1866,6 +1873,7 @@
 % reduce hyperref "Token not allowed in a PDF string" warnings on PDF builds
 \AtBeginDocument{\pdfstringdefDisableCommands{%
 % all "protected" macros possibly ending up in section titles should be here
+% TODO: examine if \sphinxhref, \sphinxurl, \sphinnolinkurl should be handled
     \let\sphinxstyleemphasis        \@firstofone
     \let\sphinxstyleliteralemphasis \@firstofone
     \let\sphinxstylestrong          \@firstofone
@@ -1879,8 +1887,20 @@
     \let\sphinxemail    \@firstofone
     \let\sphinxcrossref \@firstofone
     \let\sphinxtermref  \@firstofone
+    \let\sphinxhyphen\sphinxhyphenforbookmarks
 }}
 
+% Special characters
+%
+% This definition prevents en-dash and em-dash TeX ligatures.
+% 
+% It inserts a potential breakpoint after the hyphen. This is to keep in sync
+% with behavior in code-blocks, parsed and inline literals. For a breakpoint
+% before the hyphen use \leavevmode\kern\z@- (within \makeatletter/\makeatother)
+\protected\def\sphinxhyphen#1{-\kern\z@}
+% The {} from texescape mark-up is kept, else -- gives en-dash in PDF bookmark
+\def\sphinxhyphenforbookmarks{-}
+
 % For curly braces inside \index macro
 \def\sphinxleftcurlybrace{\{}
 \def\sphinxrightcurlybrace{\}}

diff --git a/sphinx/texinputs/sphinx.xdy b/sphinx/texinputs/sphinx.xdy
@@ -112,12 +112,14 @@
 (merge-rule "\_" "_" :string)
 (merge-rule "{[}" "[" :string)
 (merge-rule "{]}" "]" :string)
-(merge-rule "{}`" "`" :string)
 (merge-rule "\textbackslash{}" "\" :string)  ; " for Emacs syntax highlighting
 (merge-rule "\textasciitilde{}" "~~" :string); the ~~ escape is needed here
+(merge-rule "\textasciicircum{}" "^" :string)
+(merge-rule "\sphinxhyphen{}" "-" :string)
+(merge-rule "\textquotesingle{}" "'" :string)
+(merge-rule "\textasciigrave{}" "`" :string)
 (merge-rule "\textless{}" "<" :string)
 (merge-rule "\textgreater{}" ">" :string)
-(merge-rule "\textasciicircum{}" "^" :string)
 (merge-rule "\P{}" "¶" :string)
 (merge-rule "\S{}" "§" :string)
 (merge-rule "\texteuro{}" "€" :string)

diff --git a/sphinx/util/texescape.py b/sphinx/util/texescape.py
@@ -29,12 +29,6 @@
     # map chars to avoid mis-interpretation in LaTeX
     ('[', r'{[}'),
     (']', r'{]}'),
-    # map chars to avoid TeX ligatures
-    # 1. ' - and , not here for some legacy reason
-    # 2. no effect with lualatex (done otherwise: #5790)
-    ('`', r'{}`'),
-    ('<', r'\textless{}'),
-    ('>', r'\textgreater{}'),
     # map special Unicode characters to TeX commands
     ('✓', r'\(\checkmark\)'),
     ('✔', r'\(\pmb{\checkmark}\)'),
@@ -49,6 +43,23 @@
     # OHM SIGN U+2126 is handled by LaTeX textcomp package
 ]
 
+# A map to avoid TeX ligatures or character replacements in PDF output
+# xelatex/lualatex/uplatex are handled differently (#5790, #6888)
+ascii_tex_replacements = [
+    # Note: the " renders curly in OT1 encoding but straight in T1, T2A, LY1...
+    #       escaping it to \textquotedbl would break documents using OT1
+    #       Sphinx does \shorthandoff{"} to avoid problems with some languages
+    # There is no \text... LaTeX escape for the hyphen character -
+    ('-', r'\sphinxhyphen{}'),  # -- and --- are TeX ligatures
+    # ,, is a TeX ligature in T1 encoding, but escaping the comma adds
+    # complications (whether by {}, or a macro) and is not done
+    # the next two require textcomp package
+    ("'", r'\textquotesingle{}'),  # else ' renders curly, and '' is a ligature
+    ('`', r'\textasciigrave{}'),   # else \` and \`\` render curly
+    ('<', r'\textless{}'),     # < is inv. exclam in OT1, << is a T1-ligature
+    ('>', r'\textgreater{}'),  # > is inv. quest. mark in 0T1, >> a T1-ligature
+]
+
 # A map Unicode characters to LaTeX representation
 # (for LaTeX engines which don't support unicode)
 unicode_tex_replacements = [
@@ -85,6 +96,11 @@
     ('₉', r'\(\sb{\text{9}}\)'),
 ]
 
+# TODO: this should be called tex_idescape_map because its only use is in
+#       sphinx.writers.latex.LaTeXTranslator.idescape()
+# %, {, }, \, #, and ~ are the only ones which must be replaced by _ character
+# It would be simpler to define it entirely here rather than in init().
+# Unicode replacements are superfluous, as idescape() uses backslashreplace
 tex_replace_map = {}  # type: Dict[int, str]
 
 _tex_escape_map = {}  # type: Dict[int, str]
@@ -130,8 +146,17 @@ def init() -> None:
         _tex_escape_map_without_unicode[ord(a)] = b
         tex_replace_map[ord(a)] = '_'
 
+    # no reason to do this for _tex_escape_map_without_unicode
+    for a, b in ascii_tex_replacements:
+        _tex_escape_map[ord(a)] = b
+
+    # but the hyphen has a specific PDF bookmark problem
+    # https://github.com/latex3/hyperref/issues/112
+    _tex_escape_map_without_unicode[ord('-')] = r'\sphinxhyphen{}'
+
     for a, b in unicode_tex_replacements:
         _tex_escape_map[ord(a)] = b
+        #  This is actually unneeded:
         tex_replace_map[ord(a)] = '_'
 
     for a, b in tex_replacements:

diff --git a/sphinx/writers/latex.py b/sphinx/writers/latex.py
@@ -488,7 +488,6 @@ def __init__(self, document: nodes.document, builder: "LaTeXBuilder") -> None:
         self.first_document = 1
         self.this_is_the_title = 1
         self.literal_whitespace = 0
-        self.no_contractions = 0
         self.in_parsed_literal = 0
         self.compact_list = 0
         self.first_param = 0
@@ -992,13 +991,11 @@ def depart_desc_returns(self, node: Element) -> None:
 
     def visit_desc_name(self, node: Element) -> None:
         self.body.append(r'\sphinxbfcode{\sphinxupquote{')
-        self.no_contractions += 1
         self.literal_whitespace += 1
 
     def depart_desc_name(self, node: Element) -> None:
         self.body.append('}}')
         self.literal_whitespace -= 1
-        self.no_contractions -= 1
 
     def visit_desc_parameterlist(self, node: Element) -> None:
         # close name, open parameterlist
@@ -1844,11 +1841,9 @@ def depart_emphasis(self, node: Element) -> None:
 
     def visit_literal_emphasis(self, node: Element) -> None:
         self.body.append(r'\sphinxstyleliteralemphasis{\sphinxupquote{')
-        self.no_contractions += 1
 
     def depart_literal_emphasis(self, node: Element) -> None:
         self.body.append('}}')
-        self.no_contractions -= 1
 
     def visit_strong(self, node: Element) -> None:
         self.body.append(r'\sphinxstylestrong{')
@@ -1858,11 +1853,9 @@ def depart_strong(self, node: Element) -> None:
 
     def visit_literal_strong(self, node: Element) -> None:
         self.body.append(r'\sphinxstyleliteralstrong{\sphinxupquote{')
-        self.no_contractions += 1
 
     def depart_literal_strong(self, node: Element) -> None:
         self.body.append('}}')
-        self.no_contractions -= 1
 
     def visit_abbreviation(self, node: Element) -> None:
         abbr = node.astext()
@@ -1922,14 +1915,12 @@ def depart_citation_reference(self, node: Element) -> None:
         pass
 
     def visit_literal(self, node: Element) -> None:
-        self.no_contractions += 1
         if self.in_title:
             self.body.append(r'\sphinxstyleliteralintitle{\sphinxupquote{')
         else:
             self.body.append(r'\sphinxcode{\sphinxupquote{')
 
     def depart_literal(self, node: Element) -> None:
-        self.no_contractions -= 1
         self.body.append('}}')
 
     def visit_footnote_reference(self, node: Element) -> None:
@@ -2103,9 +2094,7 @@ def depart_option_list_item(self, node: Element) -> None:
 
     def visit_option_string(self, node: Element) -> None:
         ostring = node.astext()
-        self.no_contractions += 1
         self.body.append(self.encode(ostring))
-        self.no_contractions -= 1
         raise nodes.SkipNode
 
     def visit_description(self, node: Element) -> None:
@@ -2189,14 +2178,15 @@ def encode(self, text: str) -> str:
             # Insert a blank before the newline, to avoid
             # ! LaTeX Error: There's no line here to end.
             text = text.replace('\n', '~\\\\\n').replace(' ', '~')
-        if self.no_contractions:
-            text = text.replace('--', '-{-}')
-            text = text.replace("''", "'{'}")
         return text
 
     def encode_uri(self, text: str) -> str:
+        # TODO: it is probably wrong that this uses texescape.escape()
+        #       this must be checked against hyperref package exact dealings
+        #       mainly, %, #, {, } and \ need escaping via a \ escape
         # in \href, the tilde is allowed and must be represented literally
-        return self.encode(text).replace('\\textasciitilde{}', '~')
+        return self.encode(text).replace('\\textasciitilde{}', '~').\
+            replace('\\sphinxhyphen{}', '-')
 
     def visit_Text(self, node: Text) -> None:
         text = self.encode(node.astext())
@@ -2285,6 +2275,12 @@ def footnotes_under(n: Element) -> Iterator[nodes.footnote]:
             fnotes[num] = [newnode, False]
         return fnotes
 
+    @property
+    def no_contractions(self) -> int:
+        warnings.warn('LaTeXTranslator.no_contractions is deprecated.',
+                      RemovedInSphinx40Warning, stacklevel=2)
+        return 0
+
     @property
     def footnotestack(self) -> List[Dict[str, List[Union[collected_footnote, bool]]]]:
         warnings.warn('LaTeXWriter.footnotestack is deprecated.',

diff --git a/tests/roots/test-latex-table/expects/complex_spanning_cell.tex b/tests/roots/test-latex-table/expects/complex_spanning_cell.tex
@@ -2,10 +2,10 @@
 table having …
 \begin{itemize}
 \item {} 
-consecutive multirow at top of row (1-1 and 1-2)
+consecutive multirow at top of row (1\sphinxhyphen{}1 and 1\sphinxhyphen{}2)
 
 \item {} 
-consecutive multirow at end of row (1-4 and 1-5)
+consecutive multirow at end of row (1\sphinxhyphen{}4 and 1\sphinxhyphen{}5)
 
 \end{itemize}
 
@@ -16,40 +16,40 @@
 \hline
 \sphinxmultirow{3}{1}{%
 \begin{varwidth}[t]{\sphinxcolwidth{1}{5}}
-cell1-1
+cell1\sphinxhyphen{}1
 \par
 \vskip-\baselineskip\vbox{\hbox{\strut}}\end{varwidth}%
 }%
 &\sphinxmultirow{3}{2}{%
 \begin{varwidth}[t]{\sphinxcolwidth{1}{5}}
-cell1-2
+cell1\sphinxhyphen{}2
 \par
 \vskip-\baselineskip\vbox{\hbox{\strut}}\end{varwidth}%
 }%
 &
-cell1-3
+cell1\sphinxhyphen{}3
 &\sphinxmultirow{3}{4}{%
 \begin{varwidth}[t]{\sphinxcolwidth{1}{5}}
-cell1-4
+cell1\sphinxhyphen{}4
 \par
 \vskip-\baselineskip\vbox{\hbox{\strut}}\end{varwidth}%
 }%
 &\sphinxmultirow{2}{5}{%
 \begin{varwidth}[t]{\sphinxcolwidth{1}{5}}
-cell1-5
+cell1\sphinxhyphen{}5
 \par
 \vskip-\baselineskip\vbox{\hbox{\strut}}\end{varwidth}%
 }%
 \\
 \cline{3-3}\sphinxtablestrut{1}&\sphinxtablestrut{2}&\sphinxmultirow{2}{6}{%
 \begin{varwidth}[t]{\sphinxcolwidth{1}{5}}
-cell2-3
+cell2\sphinxhyphen{}3
 \par
 \vskip-\baselineskip\vbox{\hbox{\strut}}\end{varwidth}%
 }%
 &\sphinxtablestrut{4}&\sphinxtablestrut{5}\\
 \cline{5-5}\sphinxtablestrut{1}&\sphinxtablestrut{2}&\sphinxtablestrut{6}&\sphinxtablestrut{4}&
-cell3-5
+cell3\sphinxhyphen{}5
 \\
 \hline
 \end{tabulary}