diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 02b355dd7c..6d16ee2f2c 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -288,11 +288,6 @@ def test_bookmarks_14(): f

g h i

''').write_pdf() - # a - # |_ b - # |_ c - # L_ d - # e assert re.findall(b'/Count ([0-9-]*)', pdf)[-1] == b'4' assert re.findall(b'/Title \\((.*)\\)', pdf) == [ b'a', b'b c d', b'e f', b'g h i'] diff --git a/weasyprint/formatting_structure/boxes.py b/weasyprint/formatting_structure/boxes.py index bc1fa845d2..e55c5e3a1b 100644 --- a/weasyprint/formatting_structure/boxes.py +++ b/weasyprint/formatting_structure/boxes.py @@ -468,25 +468,9 @@ class TextBox(InlineLevelBox): """ justification_spacing = 0 - # http://stackoverflow.com/questions/16317534/ - ascii_to_wide = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)} - ascii_to_wide.update({0x20: '\u3000', 0x2D: '\u2212'}) - def __init__(self, element_tag, style, element, text): assert text super().__init__(element_tag, style, element) - self.original_text = text - text_transform = style['text_transform'] - if text_transform != 'none': - text = { - 'uppercase': lambda t: t.upper(), - 'lowercase': lambda t: t.lower(), - # Python’s unicode.captitalize is not the same. - 'capitalize': lambda t: t.title(), - 'full-width': lambda t: t.translate(self.ascii_to_wide), - }[text_transform](text) - if style['hyphens'] == 'none': - text = text.replace('\u00AD', '') # U+00AD SOFT HYPHEN (SHY) self.text = text def copy_with_text(self, text): diff --git a/weasyprint/formatting_structure/build.py b/weasyprint/formatting_structure/build.py index ed424465c3..425af9d5dc 100644 --- a/weasyprint/formatting_structure/build.py +++ b/weasyprint/formatting_structure/build.py @@ -45,6 +45,10 @@ ('table-caption',): boxes.TableCaptionBox, } +# http://stackoverflow.com/questions/16317534/ +ASCII_TO_WIDE = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)} +ASCII_TO_WIDE.update({0x20: '\u3000', 0x2D: '\u2212'}) + def build_formatting_structure(element_tree, style_for, get_image_from_uri, base_url, target_collector, counter_style): @@ -190,9 +194,9 @@ def element_to_box(element, style_for, get_image_from_uri, base_url, box.children = children process_whitespace(box) - # calculate string-set and bookmark-label set_content_lists( element, box, style, counter_values, target_collector, counter_style) + process_text_transform(box) if marker_boxes and len(box.children) == 1: # See https://www.w3.org/TR/css-lists-3/#list-style-position-outside @@ -374,7 +378,6 @@ def add_text(text): has_text.add(True) if text: if content_boxes and isinstance(content_boxes[-1], boxes.TextBox): - content_boxes[-1].original_text += text content_boxes[-1].text += text else: content_boxes.append( @@ -1256,6 +1259,26 @@ def process_whitespace(box, following_collapsible_space=False): return following_collapsible_space +def process_text_transform(box): + if isinstance(box, boxes.TextBox): + text_transform = box.style['text_transform'] + if text_transform != 'none': + box.text = { + 'uppercase': lambda text: text.upper(), + 'lowercase': lambda text: text.lower(), + # Python’s unicode.captitalize is not the same. + 'capitalize': lambda text: text.title(), + 'full-width': lambda text: text.translate(ASCII_TO_WIDE), + }[text_transform](box.text) + if box.style['hyphens'] == 'none': + box.text = box.text.replace('\u00AD', '') # U+00AD is soft hyphen + + if isinstance(box, boxes.ParentBox) and not box.is_running(): + for child in box.children: + if isinstance(child, (boxes.TextBox, boxes.InlineBox)): + process_text_transform(child) + + def inline_in_block(box): """Build the structure of lines inside blocks and return a new box tree. @@ -1547,10 +1570,10 @@ def set_viewport_overflow(root_box): def box_text(box): if isinstance(box, boxes.TextBox): - return box.original_text + return box.text elif isinstance(box, boxes.ParentBox): return ''.join( - child.original_text for child in box.descendants() + child.text for child in box.descendants() if not child.element_tag.endswith('::before') and not child.element_tag.endswith('::after') and not child.element_tag.endswith('::marker') and diff --git a/weasyprint/layout/page.py b/weasyprint/layout/page.py index 1efd81962d..f567c0b805 100644 --- a/weasyprint/layout/page.py +++ b/weasyprint/layout/page.py @@ -342,6 +342,7 @@ def make_box(at_keyword, containing_block): context.get_image_from_uri, context.target_collector, context.counter_style, context, page) build.process_whitespace(box) + build.process_text_transform(box) box = build.anonymous_table_boxes(box) box = build.flex_boxes(box) box = build.inline_in_block(box)