Skip to content

Commit

Permalink
Automatically close objects via __del__ (#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
mara004 committed Sep 25, 2022
1 parent 69b3e9b commit b068b9e
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 27 deletions.
1 change: 1 addition & 0 deletions docs/devel/changelog_staging.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
<!-- List character: dash (-) -->

# Changelog for next release
- Finally implemented automatic closing of objects by implementing `__del__` finaliser methods, taking into account that Python may call finalisers in an arbitrary order while objects must not be closed if one of their parents is closed already. It is still recommended that you call `close()` manually, to release memory independent of Python garbage collection, and to ensure that child objects are always closed as well. However, the risk of memory leaks due to missing `close()` calls is now greatly reduced.
3 changes: 1 addition & 2 deletions docs/source/python_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ Python API

.. Warning::
* PDFium is not thread-safe. If you need to parallelise time-consuming PDFium tasks, use processes instead of threads.
* Attempting to work with an object after it has been closed will result in a segmentation fault.
* Not calling the ``close()`` methods as required may lead to memory leaks.
* Calling ``close()`` makes objects inoperable, so you should not access them anymore afterwards.

Version
*******
Expand Down
73 changes: 60 additions & 13 deletions src/pypdfium2/_helpers/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@
logger = logging.getLogger(__name__)


# Notes on automatic closing of objects (concerns PdfDocument, PdfPage, PdfTextPage, PdfTextSearcher):
#
# pypdfium2 implements __del__ finaliser methods that are run by Python once it has identified an object as garbage and is about to remove it.
# However, objects must be closed in correct order. It is illegal to close a subordinate object if any of its superordinate objects has been closed already.
# If Python garbage collects multiple pypdfium2 objects in one cycle, it may finalise them in arbitrary order.
# Therefore, we implement checks to only call PDFium close functions if all superordinate objects are still open.
# Even if some exception occurred, __del__ will still be called.
# However, it is not guaranteed that __del__ is called on objects that still exists when the interpreter exits. This is irrelevant for us because all associated memory is released when the Python process terminates.
# If an exception happens in __del__ itself, it is caught and converted to an "unraisable exception" warning by Python. This will happen if an attribute accessed in __del__ is not initialised yet due to a parameter error on construction.
# Correct functionality may be confirmed by adding context information and debug prints to close().
#
# See also https://docs.python.org/3/reference/datamodel.html#object.__del__


class PdfDocument (BitmapConvAliases):
"""
Document helper class.
Expand Down Expand Up @@ -71,6 +85,8 @@ def __init__(
autoclose = False,
):

self.raw = None

self._orig_input = input_data
self._actual_input = input_data
self._rendering_input = None
Expand Down Expand Up @@ -125,29 +141,44 @@ def __getitem__(self, i):
def __delitem__(self, i):
self.del_page(i)

def __del__(self):
self.close()

@classmethod
def new(cls):
"""
Returns:
PdfDocument: A new, empty document.
"""
new_pdf = pdfium.FPDF_CreateNewDocument()
return cls(new_pdf)

def _skip_close(self):
return (self.raw is None)

def close(self):
"""
Close the document to release allocated memory.
This function shall be called when finished working with the object.
If the document is already closed, nothing will be done.
This method is called by the ``__del__`` finaliser.
"""

if self._skip_close():
return # self is closed already, or exception on construction

self.exit_formenv()
pdfium.FPDF_CloseDocument(self.raw)
self.raw = None

if self._ld_data is not None:
self._ld_data.close()
if self._autoclose and is_input_buffer(self._actual_input):
self._actual_input.close()


@classmethod
def new(cls):
"""
Returns:
PdfDocument: A new, empty document.
"""
new_pdf = pdfium.FPDF_CreateNewDocument()
return cls(new_pdf)


def init_formenv(self):
"""
Initialise a form environment handle for this document.
Expand All @@ -166,11 +197,10 @@ def init_formenv(self):

def exit_formenv(self):
"""
Release allocated memory by exiting the form environment.
Exit the form environment to release allocated memory.
If the form environment is not initialised, nothing will be done.
Note:
This method is called by :meth:`.close`.
This method is called by :meth:`.close`, which is called by the ``__del__`` finaliser.
"""
if self._form_env is None:
return
Expand Down Expand Up @@ -563,10 +593,27 @@ def __init__(self, raw, pdf, font_data):
self.pdf = pdf
self._font_data = font_data

def __del__(self):
self.close()


def _skip_close(self):
if self.pdf._skip_close():
return True
return (self.raw is None)

def close(self):
"""
Close the font to release allocated memory.
This function shall be called when finished working with the object.
If the font (or its parent document) is already closed, nothing will be done.
This method is called by the ``__del__`` finaliser.
"""

if self._skip_close():
return

pdfium.FPDFFont_Close(self.raw)
self.raw = None

id(self._font_data)
18 changes: 17 additions & 1 deletion src/pypdfium2/_helpers/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,28 @@ def __init__(self, raw, pdf):
self.raw = raw
self.pdf = pdf

def __del__(self):
self.close()


def _skip_close(self):
if self.pdf._skip_close():
return True
return (self.raw is None)

def close(self):
"""
Close the page to release allocated memory.
This function shall be called when finished working with the object.
If the page (or its parent document) is already closed, nothing will be done.
This method is called by the ``__del__`` finaliser.
"""

if self._skip_close():
return

pdfium.FPDF_ClosePage(self.raw)
self.raw = None


def get_width(self):
Expand Down
51 changes: 42 additions & 9 deletions src/pypdfium2/_helpers/textpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,28 @@ def __init__(self, raw, page):
self.raw = raw
self.page = page

def __del__(self):
self.close()


def _skip_close(self):
if self.page._skip_close():
return True
return (self.raw is None)

def close(self):
"""
Close the text page to release allocated memory.
This method shall be called when finished working with the text page.
Close the textpage to release allocated memory.
If the textpage (or its parent page or document) is already closed, nothing will be done.
This method is called by the ``__del__`` finaliser.
"""

if self._skip_close():
return

pdfium.FPDFText_ClosePage(self.raw)
self.raw = None


def get_text(self, left=0, bottom=0, right=0, top=0):
Expand Down Expand Up @@ -221,6 +237,30 @@ def __init__(self, raw, textpage):
self.raw = raw
self.textpage = textpage

def __del__(self):
self.close()


def _skip_close(self):
if self.textpage._skip_close():
return True
return (self.raw is None)

def close(self):
"""
Close the text searcher to release allocated memory.
If the text searcher (or its parent textpage, page or document) is already closed, nothing will be done.
This method is called by the ``__del__`` finaliser.
"""

if self._skip_close():
return

pdfium.FPDFText_FindClose(self.raw)
self.raw = None


def _get_occurrence(self, find_func):
found = find_func(self.raw)
if not found:
Expand All @@ -245,10 +285,3 @@ def get_prev(self):
or :data:`None` if the first occurrence was passed.
"""
return self._get_occurrence(pdfium.FPDFText_FindPrev)

def close(self):
"""
Close the search structure to release allocated memory.
This method shall be called when done with text searching.
"""
pdfium.FPDFText_FindClose(self.raw)
2 changes: 0 additions & 2 deletions tests/helpers/test_opener.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,6 @@ def test_open_new():


def test_open_invalid():
with pytest.raises(TypeError, match=re.escape("__init__() missing 1 required positional argument: 'input_data'")):
pdf = pdfium.PdfDocument()
with pytest.raises(TypeError, match=re.escape("Invalid input type 'int'")):
pdf = pdfium.PdfDocument(123)
with pytest.raises(FileNotFoundError, match=re.escape("File does not exist: '%s'" % abspath("invalid/path"))):
Expand Down

0 comments on commit b068b9e

Please sign in to comment.