Skip to content

Commit

Permalink
Don’t use "text-transform"ed text for content-based uses
Browse files Browse the repository at this point in the history
This solution is far from perfect. What we could do is to have a getter/setter
for the "text" attribute of TextBox using the "original_text" attribute, so
that we’re sure that we change one when we change the other.

But… That would be much more complicated and much slower, only to solve corner
use cases.

Of course, an issue will be open soon, when someone finds one of these cases ;).

Fix #137.
  • Loading branch information
liZe committed Sep 6, 2021
1 parent 083cab0 commit 85e1ea1
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
13 changes: 13 additions & 0 deletions tests/test_pdf.py
Expand Up @@ -266,6 +266,19 @@ def test_bookmarks_12():
assert re.findall(b'/Title \\((.*)\\)', pdf) == [b'a']


@assert_no_logs
def test_bookmarks_13():
pdf = FakeHTML(string='''
<div style="bookmark-level:1; bookmark-label:contents;
text-transform:uppercase">a</div>
''').write_pdf()
# a
counts = re.findall(b'/Count ([0-9-]*)', pdf)
outlines = counts.pop()
assert outlines == b'1'
assert re.findall(b'/Title \\((.*)\\)', pdf) == [b'a']


@assert_no_logs
def test_links_none():
pdf = FakeHTML(string='<body>').write_pdf()
Expand Down
1 change: 1 addition & 0 deletions weasyprint/formatting_structure/boxes.py
Expand Up @@ -475,6 +475,7 @@ class TextBox(InlineLevelBox):
def __init__(self, element_tag, style, element, text):
assert text
super().__init__(element_tag, style, element)
self.original_text = text
text_transform = style['text_transform']
if text_transform != 'none':
text = {
Expand Down
13 changes: 5 additions & 8 deletions weasyprint/formatting_structure/build.py
Expand Up @@ -374,6 +374,7 @@ def add_text(text):
has_text.add(True)
if text:
if content_boxes and isinstance(content_boxes[-1], boxes.TextBox):
content_boxes[-1].original_text += text
content_boxes[-1].text += text
else:
content_boxes.append(
Expand Down Expand Up @@ -649,12 +650,8 @@ def parse_again(mixin_pagebased_counters={}):
box_list = compute_content_list(
content_list, box, counter_values, css_token, parse_again,
target_collector, counter_style, element=element)

if box_list is None:
box.bookmark_label = ''
else:
box.bookmark_label = ''.join(
box.text for box in box_list if isinstance(box, boxes.TextBox))
if box_list:
box.bookmark_label = ''.join(box_text(box) for box in box_list)


def set_content_lists(element, box, style, counter_values, target_collector,
Expand Down Expand Up @@ -1550,10 +1547,10 @@ def set_viewport_overflow(root_box):

def box_text(box):
if isinstance(box, boxes.TextBox):
return box.text
return box.original_text
elif isinstance(box, boxes.ParentBox):
return ''.join(
child.text for child in box.descendants()
child.original_text for child in box.descendants()
if not child.element_tag.endswith('::before') and
not child.element_tag.endswith('::after') and
not child.element_tag.endswith('::marker') and
Expand Down

0 comments on commit 85e1ea1

Please sign in to comment.