Skip to content

Commit

Permalink
Remove original_text attribute
Browse files Browse the repository at this point in the history
This "hack" was introduced to ignore the text-transform property for bookmarks.
Now that the whitespace management logic is done after the box creation (just
before building the bookmark label), we can do the same for the text
transformation (just after building the bookmark label).

Related to #607 and #137.
  • Loading branch information
liZe committed Oct 13, 2021
1 parent e5beff6 commit 833afb6
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 25 deletions.
5 changes: 0 additions & 5 deletions tests/test_pdf.py
Expand Up @@ -288,11 +288,6 @@ def test_bookmarks_14():
f </h1>
<h1> g <span> h </span> i </h1>
''').write_pdf()
# a
# |_ b
# |_ c
# L_ d
# e
assert re.findall(b'/Count ([0-9-]*)', pdf)[-1] == b'4'
assert re.findall(b'/Title \\((.*)\\)', pdf) == [
b'a', b'b c d', b'e f', b'g h i']
Expand Down
16 changes: 0 additions & 16 deletions weasyprint/formatting_structure/boxes.py
Expand Up @@ -468,25 +468,9 @@ class TextBox(InlineLevelBox):
"""
justification_spacing = 0

# http://stackoverflow.com/questions/16317534/
ascii_to_wide = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)}
ascii_to_wide.update({0x20: '\u3000', 0x2D: '\u2212'})

def __init__(self, element_tag, style, element, text):
assert text
super().__init__(element_tag, style, element)
self.original_text = text
text_transform = style['text_transform']
if text_transform != 'none':
text = {
'uppercase': lambda t: t.upper(),
'lowercase': lambda t: t.lower(),
# Python’s unicode.captitalize is not the same.
'capitalize': lambda t: t.title(),
'full-width': lambda t: t.translate(self.ascii_to_wide),
}[text_transform](text)
if style['hyphens'] == 'none':
text = text.replace('\u00AD', '') # U+00AD SOFT HYPHEN (SHY)
self.text = text

def copy_with_text(self, text):
Expand Down
31 changes: 27 additions & 4 deletions weasyprint/formatting_structure/build.py
Expand Up @@ -45,6 +45,10 @@
('table-caption',): boxes.TableCaptionBox,
}

# http://stackoverflow.com/questions/16317534/
ASCII_TO_WIDE = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)}
ASCII_TO_WIDE.update({0x20: '\u3000', 0x2D: '\u2212'})


def build_formatting_structure(element_tree, style_for, get_image_from_uri,
base_url, target_collector, counter_style):
Expand Down Expand Up @@ -190,9 +194,9 @@ def element_to_box(element, style_for, get_image_from_uri, base_url,

box.children = children
process_whitespace(box)
# calculate string-set and bookmark-label
set_content_lists(
element, box, style, counter_values, target_collector, counter_style)
process_text_transform(box)

if marker_boxes and len(box.children) == 1:
# See https://www.w3.org/TR/css-lists-3/#list-style-position-outside
Expand Down Expand Up @@ -374,7 +378,6 @@ def add_text(text):
has_text.add(True)
if text:
if content_boxes and isinstance(content_boxes[-1], boxes.TextBox):
content_boxes[-1].original_text += text
content_boxes[-1].text += text
else:
content_boxes.append(
Expand Down Expand Up @@ -1256,6 +1259,26 @@ def process_whitespace(box, following_collapsible_space=False):
return following_collapsible_space


def process_text_transform(box):
if isinstance(box, boxes.TextBox):
text_transform = box.style['text_transform']
if text_transform != 'none':
box.text = {
'uppercase': lambda text: text.upper(),
'lowercase': lambda text: text.lower(),
# Python’s unicode.captitalize is not the same.
'capitalize': lambda text: text.title(),
'full-width': lambda text: text.translate(ASCII_TO_WIDE),
}[text_transform](box.text)
if box.style['hyphens'] == 'none':
box.text = box.text.replace('\u00AD', '') # U+00AD is soft hyphen

if isinstance(box, boxes.ParentBox) and not box.is_running():
for child in box.children:
if isinstance(child, (boxes.TextBox, boxes.InlineBox)):
process_text_transform(child)


def inline_in_block(box):
"""Build the structure of lines inside blocks and return a new box tree.
Expand Down Expand Up @@ -1547,10 +1570,10 @@ def set_viewport_overflow(root_box):

def box_text(box):
if isinstance(box, boxes.TextBox):
return box.original_text
return box.text
elif isinstance(box, boxes.ParentBox):
return ''.join(
child.original_text for child in box.descendants()
child.text for child in box.descendants()
if not child.element_tag.endswith('::before') and
not child.element_tag.endswith('::after') and
not child.element_tag.endswith('::marker') and
Expand Down
1 change: 1 addition & 0 deletions weasyprint/layout/page.py
Expand Up @@ -342,6 +342,7 @@ def make_box(at_keyword, containing_block):
context.get_image_from_uri, context.target_collector,
context.counter_style, context, page)
build.process_whitespace(box)
build.process_text_transform(box)
box = build.anonymous_table_boxes(box)
box = build.flex_boxes(box)
box = build.inline_in_block(box)
Expand Down

0 comments on commit 833afb6

Please sign in to comment.