Skip to content

Commit

Permalink
TST: Add xfail test for #2336
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Dec 24, 2023
1 parent 3ab1581 commit 3d26c24
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
2 changes: 2 additions & 0 deletions tests/example_files.yaml
Expand Up @@ -110,3 +110,5 @@
url: https://github.com/py-pdf/pypdf/assets/4083478/56c93021-33cd-4387-ae13-5cbe7e673f42
- local_filename: paid.pdf
url: https://github.com/py-pdf/pypdf/files/12050253/tt.pdf
- local_filename: Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
url: https://www.joinville.sc.gov.br/wp-content/uploads/2023/11/Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
13 changes: 13 additions & 0 deletions tests/test_text_extraction.py
Expand Up @@ -3,13 +3,16 @@
The tested code might be in _page.py.
"""
from io import BytesIO
from pathlib import Path

import pytest

from pypdf import PdfReader, mult
from pypdf._text_extraction import set_custom_rtl

from . import get_data_from_url

TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"
Expand Down Expand Up @@ -99,3 +102,13 @@ def visitor_text(text, cm, tm, font_dict, font_size) -> None:
x = matches[0]["x"]
y = matches[0]["y"]
assert constraint(x, y), f'Line "{text}" is wrong at x:{x}, y:{y}'


@pytest.mark.xfail(reason="known whitespace issue #2336")
@pytest.mark.enable_socket()
def test_issue_2336():
name = "Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf"
reader = PdfReader(BytesIO(get_data_from_url(name=name)))
page = reader.pages[0]
actual_text = page.extract_text()
assert "Beira Rio" in actual_text

0 comments on commit 3d26c24

Please sign in to comment.