From 833afb68df35f8152865fec6cb3872c4cc46628c Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Wed, 13 Oct 2021 21:06:36 +0200 Subject: [PATCH] Remove original_text attribute This "hack" was introduced to ignore the text-transform property for bookmarks. Now that the whitespace management logic is done after the box creation (just before building the bookmark label), we can do the same for the text transformation (just after building the bookmark label). Related to #607 and #137. --- tests/test_pdf.py | 5 ---- weasyprint/formatting_structure/boxes.py | 16 ------------ weasyprint/formatting_structure/build.py | 31 +++++++++++++++++++++--- weasyprint/layout/page.py | 1 + 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 02b355dd7c..6d16ee2f2c 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -288,11 +288,6 @@ def test_bookmarks_14(): f

g h i

''').write_pdf() - # a - # |_ b - # |_ c - # L_ d - # e assert re.findall(b'/Count ([0-9-]*)', pdf)[-1] == b'4' assert re.findall(b'/Title \\((.*)\\)', pdf) == [ b'a', b'b c d', b'e f', b'g h i'] diff --git a/weasyprint/formatting_structure/boxes.py b/weasyprint/formatting_structure/boxes.py index bc1fa845d2..e55c5e3a1b 100644 --- a/weasyprint/formatting_structure/boxes.py +++ b/weasyprint/formatting_structure/boxes.py @@ -468,25 +468,9 @@ class TextBox(InlineLevelBox): """ justification_spacing = 0 - # http://stackoverflow.com/questions/16317534/ - ascii_to_wide = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)} - ascii_to_wide.update({0x20: '\u3000', 0x2D: '\u2212'}) - def __init__(self, element_tag, style, element, text): assert text super().__init__(element_tag, style, element) - self.original_text = text - text_transform = style['text_transform'] - if text_transform != 'none': - text = { - 'uppercase': lambda t: t.upper(), - 'lowercase': lambda t: t.lower(), - # Python’s unicode.captitalize is not the same. - 'capitalize': lambda t: t.title(), - 'full-width': lambda t: t.translate(self.ascii_to_wide), - }[text_transform](text) - if style['hyphens'] == 'none': - text = text.replace('\u00AD', '') # U+00AD SOFT HYPHEN (SHY) self.text = text def copy_with_text(self, text): diff --git a/weasyprint/formatting_structure/build.py b/weasyprint/formatting_structure/build.py index ed424465c3..425af9d5dc 100644 --- a/weasyprint/formatting_structure/build.py +++ b/weasyprint/formatting_structure/build.py @@ -45,6 +45,10 @@ ('table-caption',): boxes.TableCaptionBox, } +# http://stackoverflow.com/questions/16317534/ +ASCII_TO_WIDE = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)} +ASCII_TO_WIDE.update({0x20: '\u3000', 0x2D: '\u2212'}) + def build_formatting_structure(element_tree, style_for, get_image_from_uri, base_url, target_collector, counter_style): @@ -190,9 +194,9 @@ def element_to_box(element, style_for, get_image_from_uri, base_url, box.children = children process_whitespace(box) - # calculate string-set and bookmark-label set_content_lists( element, box, style, counter_values, target_collector, counter_style) + process_text_transform(box) if marker_boxes and len(box.children) == 1: # See https://www.w3.org/TR/css-lists-3/#list-style-position-outside @@ -374,7 +378,6 @@ def add_text(text): has_text.add(True) if text: if content_boxes and isinstance(content_boxes[-1], boxes.TextBox): - content_boxes[-1].original_text += text content_boxes[-1].text += text else: content_boxes.append( @@ -1256,6 +1259,26 @@ def process_whitespace(box, following_collapsible_space=False): return following_collapsible_space +def process_text_transform(box): + if isinstance(box, boxes.TextBox): + text_transform = box.style['text_transform'] + if text_transform != 'none': + box.text = { + 'uppercase': lambda text: text.upper(), + 'lowercase': lambda text: text.lower(), + # Python’s unicode.captitalize is not the same. + 'capitalize': lambda text: text.title(), + 'full-width': lambda text: text.translate(ASCII_TO_WIDE), + }[text_transform](box.text) + if box.style['hyphens'] == 'none': + box.text = box.text.replace('\u00AD', '') # U+00AD is soft hyphen + + if isinstance(box, boxes.ParentBox) and not box.is_running(): + for child in box.children: + if isinstance(child, (boxes.TextBox, boxes.InlineBox)): + process_text_transform(child) + + def inline_in_block(box): """Build the structure of lines inside blocks and return a new box tree. @@ -1547,10 +1570,10 @@ def set_viewport_overflow(root_box): def box_text(box): if isinstance(box, boxes.TextBox): - return box.original_text + return box.text elif isinstance(box, boxes.ParentBox): return ''.join( - child.original_text for child in box.descendants() + child.text for child in box.descendants() if not child.element_tag.endswith('::before') and not child.element_tag.endswith('::after') and not child.element_tag.endswith('::marker') and diff --git a/weasyprint/layout/page.py b/weasyprint/layout/page.py index 1efd81962d..f567c0b805 100644 --- a/weasyprint/layout/page.py +++ b/weasyprint/layout/page.py @@ -342,6 +342,7 @@ def make_box(at_keyword, containing_block): context.get_image_from_uri, context.target_collector, context.counter_style, context, page) build.process_whitespace(box) + build.process_text_transform(box) box = build.anonymous_table_boxes(box) box = build.flex_boxes(box) box = build.inline_in_block(box)