py-pdf · MartinThoma · Apr 7, 2022 · Mar 19, 2017 · Apr 6, 2022 · Apr 7, 2022
diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py
@@ -2648,7 +2648,7 @@ def compressContentStreams(self):
                 content = ContentStream(content, self.pdf)
             self[NameObject("/Contents")] = content.flateEncode()
 
-    def extractText(self):
+    def extractText(self, Tj_sep="", TJ_sep=" "):
         """
         Locate all text drawing commands, in the order they are provided in the
         content stream, and extract the text.  This works well for some PDF
@@ -2670,6 +2670,7 @@ def extractText(self):
             if operator == b_("Tj"):
                 _text = operands[0]
                 if isinstance(_text, TextStringObject):
+                    text += Tj_sep
                     text += _text
                     text += "\n"
             elif operator == b_("T*"):
@@ -2687,7 +2688,7 @@ def extractText(self):
             elif operator == b_("TJ"):
                 for i in operands[0]:
                     if isinstance(i, TextStringObject):
-                        text += " "
+                        text += TJ_sep
                         text += i
                 text += "\n"
         return text