{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":489948938,"defaultBranch":"main","name":"benchmarks","ownerLogin":"py-pdf","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2022-05-08T13:18:00.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/102914013?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1652015881.049901","currentOid":""},"activityList":{"items":[{"before":"4f14b3c93c1ee71021cccb5cb256be99ccb83c57","after":"24c51dd4194b93fc55d4615f0b6ed9b6ef67b359","ref":"refs/heads/main","pushedAt":"2023-10-31T21:44:53.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"ENH: Improve PDFium text extraction (#11)\n\nSeveral additional changes:\r\n\r\n* ENH: Add PDFium image extraction\r\n* ROB: Make opening/parsing the cache file more robust\r\n* MAINT: Update deprecated pdantic API\r\n* MAINT: Add pdfrw to main.in","shortMessageHtmlLink":"ENH: Improve PDFium text extraction (#11)"}},{"before":"ce340e84af84a755be12f824fa49c0240e30425f","after":"4f14b3c93c1ee71021cccb5cb256be99ccb83c57","ref":"refs/heads/main","pushedAt":"2023-08-26T10:15:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Update: pypdf got faster","shortMessageHtmlLink":"Update: pypdf got faster"}},{"before":"267a925f465bd4aeb44d8c5b9b1e9f592317ca97","after":"ce340e84af84a755be12f824fa49c0240e30425f","ref":"refs/heads/main","pushedAt":"2023-08-02T20:38:44.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Add header/footer removal for pypdf","shortMessageHtmlLink":"Add header/footer removal for pypdf"}},{"before":"8e94c021ddaa5d76146e4bc98fcc8110c5eec204","after":"267a925f465bd4aeb44d8c5b9b1e9f592317ca97","ref":"refs/heads/main","pushedAt":"2023-08-01T15:16:53.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Update after changing the ground truth","shortMessageHtmlLink":"Update after changing the ground truth"}},{"before":"e7fb117f168a82353cf936bb749d4ca666842353","after":"8e94c021ddaa5d76146e4bc98fcc8110c5eec204","ref":"refs/heads/main","pushedAt":"2023-08-01T12:42:06.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"MAINT: Fix hyphenation","shortMessageHtmlLink":"MAINT: Fix hyphenation"}},{"before":"38a4fa635c6594fa2e204e741edd4b58b713f908","after":"e7fb117f168a82353cf936bb749d4ca666842353","ref":"refs/heads/main","pushedAt":"2023-08-01T06:25:55.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"MAINT: pypdf now applies post-processing","shortMessageHtmlLink":"MAINT: pypdf now applies post-processing"}},{"before":"9633ada9455eac9d8c7801903cc25a1a45ca51fe","after":"38a4fa635c6594fa2e204e741edd4b58b713f908","ref":"refs/heads/main","pushedAt":"2023-08-01T06:04:29.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"BUG: Fix read ground truth","shortMessageHtmlLink":"BUG: Fix read ground truth"}},{"before":"a9b8e2721731d52f3908f2d33a3f220371876b92","after":"9633ada9455eac9d8c7801903cc25a1a45ca51fe","ref":"refs/heads/main","pushedAt":"2023-07-29T14:32:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"MAINT: pypdf==3.14.0 improved math extraction\n\nA big thank you to pubpub-zz for this improvement","shortMessageHtmlLink":"MAINT: pypdf==3.14.0 improved math extraction"}},{"before":"5d37d6bdd3947d6345349a17796dde9f9d915bac","after":"a9b8e2721731d52f3908f2d33a3f220371876b92","ref":"refs/heads/main","pushedAt":"2023-07-29T14:11:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"BUG: Fix ground truth","shortMessageHtmlLink":"BUG: Fix ground truth"}},{"before":"7345129d27281a7e5b11079ca1487667478abc2e","after":"5d37d6bdd3947d6345349a17796dde9f9d915bac","ref":"refs/heads/main","pushedAt":"2023-07-09T14:42:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"pypdf==3.12.1 update","shortMessageHtmlLink":"pypdf==3.12.1 update"}},{"before":"a78f609d3d4d0d6a298c72c00ddc05b4d35fce53","after":"7345129d27281a7e5b11079ca1487667478abc2e","ref":"refs/heads/main","pushedAt":"2023-07-02T09:45:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"ENH: Add pdfrw for watermarking","shortMessageHtmlLink":"ENH: Add pdfrw for watermarking"}},{"before":"38bdc80a2934ec44f8be69891b6cb6ef7f51002f","after":"a78f609d3d4d0d6a298c72c00ddc05b4d35fce53","ref":"refs/heads/main","pushedAt":"2023-07-02T09:12:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Apply compression for pypdf when watermarking","shortMessageHtmlLink":"Apply compression for pypdf when watermarking"}},{"before":"3689773f180bf9dd7824dc374b508a9c042fb2c1","after":"38bdc80a2934ec44f8be69891b6cb6ef7f51002f","ref":"refs/heads/main","pushedAt":"2023-07-02T08:31:45.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"MAINT: Refactor benchmark script into modules","shortMessageHtmlLink":"MAINT: Refactor benchmark script into modules"}},{"before":"dec01bc032faf34bff906f378cc7e1c883974703","after":"3689773f180bf9dd7824dc374b508a9c042fb2c1","ref":"refs/heads/main","pushedAt":"2023-07-02T07:24:33.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Reflect the fact that it contains several different benchmarks","shortMessageHtmlLink":"Reflect the fact that it contains several different benchmarks"}},{"before":"8000a538b5b6b3ccfb04432a590365028d3b943b","after":"dec01bc032faf34bff906f378cc7e1c883974703","ref":"refs/heads/main","pushedAt":"2023-07-02T07:22:29.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"DOC: pdfplumber uses pdfminer.six","shortMessageHtmlLink":"DOC: pdfplumber uses pdfminer.six"}},{"before":"6e2ba09072461928a784037a1ecabbad32c68c45","after":"8000a538b5b6b3ccfb04432a590365028d3b943b","ref":"refs/heads/main","pushedAt":"2023-07-02T07:19:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Fix borb text extraction code","shortMessageHtmlLink":"Fix borb text extraction code"}},{"before":"a2611a6be86e8aea397cc01e662824b148d826c1","after":"6e2ba09072461928a784037a1ecabbad32c68c45","ref":"refs/heads/main","pushedAt":"2023-07-01T22:27:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"ENH: Re-run benchmark with latest libraries\n\npdfplumber:\n * Improvements in ligature support and thus text extraction improved\nborb 2.1.7 -> 2.1.15:\n * Massive speed improvement for text extraction; however, it's still very slow\npypdf 3.2.0 -> 3.11.1:\n * Slight speed improvement for text extraction\n * Image extraction became way slower, but more images are extracted","shortMessageHtmlLink":"ENH: Re-run benchmark with latest libraries"}},{"before":"6397bd0f1beaad6ebbaf887dc5df7def9aa4c9c3","after":"a2611a6be86e8aea397cc01e662824b148d826c1","ref":"refs/heads/main","pushedAt":"2023-07-01T21:17:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"ENH: Add watermarking resulting file size","shortMessageHtmlLink":"ENH: Add watermarking resulting file size"}},{"before":"aa40d4c64b2c4635d1c3c85a3bbfdfd95a29cf68","after":"6397bd0f1beaad6ebbaf887dc5df7def9aa4c9c3","ref":"refs/heads/main","pushedAt":"2023-04-21T22:48:19.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Add table extraction benchmark","shortMessageHtmlLink":"Add table extraction benchmark"}},{"before":"6684872a05407cdbd68648bbf04161ea089e1072","after":"aa40d4c64b2c4635d1c3c85a3bbfdfd95a29cf68","ref":"refs/heads/main","pushedAt":"2023-04-21T21:49:40.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"MartinThoma","name":"Martin Thoma","path":"/MartinThoma","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1658117?s=80&v=4"},"commit":{"message":"Ensure tika server is running","shortMessageHtmlLink":"Ensure tika server is running"}}],"hasNextPage":false,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAADpG_s4QA","startCursor":null,"endCursor":null}},"title":"Activity ยท py-pdf/benchmarks"}