Skip to content

Commit

Permalink
fitz/: Fix pymupdf#2238 - use 'overlap' rather than 'contains' when e…
Browse files Browse the repository at this point in the history
…xtracting text.

Also fixed Story.draw() to handle exceptions e.g. from fz_draw_story().
  • Loading branch information
julian-smith-artifex-com committed Mar 7, 2023
1 parent 0c4e3cb commit d605482
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 14 deletions.
23 changes: 16 additions & 7 deletions fitz/fitz.i
Original file line number Diff line number Diff line change
Expand Up @@ -11528,7 +11528,7 @@ struct TextPage {
fz_rect linerect = fz_empty_rect;
for (ch = line->first_char; ch; ch = ch->next) {
fz_rect cbbox = JM_char_bbox(gctx, line, ch);
if (!fz_contains_rect(tp_rect, cbbox) &&
if (!JM_rects_overlap(tp_rect, cbbox) &&
!fz_is_infinite_rect(tp_rect)) {
continue;
}
Expand All @@ -11542,7 +11542,7 @@ struct TextPage {
blockrect = fz_union_rect(blockrect, linerect);
}
text = JM_EscapeStrFromBuffer(gctx, res);
} else if (fz_contains_rect(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) {
} else if (JM_rects_overlap(tp_rect, block->bbox) || fz_is_infinite_rect(tp_rect)) {
fz_image *img = block->u.i.image;
fz_colorspace *cs = img->colorspace;
text = PyUnicode_FromFormat("<image: %s, width: %d, height: %d, bpc: %d>", fz_colorspace_name(gctx, cs), img->w, img->h, img->bpc);
Expand Down Expand Up @@ -11610,7 +11610,7 @@ struct TextPage {
buflen = 0; // reset char counter
for (ch = line->first_char; ch; ch = ch->next) {
fz_rect cbbox = JM_char_bbox(gctx, line, ch);
if (!fz_contains_rect(tp_rect, cbbox) &&
if (!JM_rects_overlap(tp_rect, cbbox) &&
!fz_is_infinite_rect(tp_rect)) {
continue;
}
Expand Down Expand Up @@ -13976,11 +13976,20 @@ struct Story
}


void draw( struct DeviceWrapper* device, PyObject* matrix=NULL)
FITZEXCEPTION(draw, !result)
PyObject* draw( struct DeviceWrapper* device, PyObject* matrix=NULL)
{
fz_matrix ctm2 = JM_matrix_from_py( matrix);
fz_device *dev = (device) ? device->device : NULL;
fz_draw_story( gctx, (fz_story*) $self, dev, ctm2);
fz_try(gctx)
{
fz_matrix ctm2 = JM_matrix_from_py( matrix);
fz_device *dev = (device) ? device->device : NULL;
fz_draw_story( gctx, (fz_story*) $self, dev, ctm2);
}
fz_catch(gctx)
{
return NULL;
}
Py_RETURN_NONE;
}

FITZEXCEPTION(document, !result)
Expand Down
6 changes: 5 additions & 1 deletion fitz/helper-other.i
Original file line number Diff line number Diff line change
Expand Up @@ -1295,7 +1295,11 @@ fz_archive *JM_archive_from_py(fz_context *ctx, fz_archive *arch, PyObject *path
}



int JM_rects_overlap(const fz_rect a, const fz_rect b)
{
fz_rect c = fz_intersect_rect(a, b);
return !fz_is_empty_rect(c);
}

//-----------------------------------------------------------------------------
// dummy structure for various tools and utilities
Expand Down
12 changes: 6 additions & 6 deletions fitz/helper-stext.i
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ JM_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *page)
if (block->type == FZ_STEXT_BLOCK_TEXT) {
for (line = block->u.t.first_line; line; line = line->next) {
for (ch = line->first_char; ch; ch = ch->next) {
if (!fz_contains_rect(rect, JM_char_bbox(ctx, line, ch)) &&
if (!JM_rects_overlap(rect, JM_char_bbox(ctx, line, ch)) &&
!fz_is_infinite_rect(rect)) {
continue;
}
Expand Down Expand Up @@ -420,7 +420,7 @@ JM_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle)
for (line = block->u.t.first_line; line; line = line->next) {
for (ch = line->first_char; ch; ch = ch->next) {
if (!fz_is_infinite_rect(rect) &&
!fz_contains_rect(rect, JM_char_bbox(ctx, line, ch))) {
!JM_rects_overlap(rect, JM_char_bbox(ctx, line, ch))) {
goto next_char;
}
try_new_match:
Expand Down Expand Up @@ -482,7 +482,7 @@ JM_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page
for (ch = line->first_char; ch; ch = ch->next) {
chbbox = JM_char_bbox(ctx, line, ch);
if (fz_is_infinite_rect(rect) ||
fz_contains_rect(rect, chbbox)) {
JM_rects_overlap(rect, chbbox)) {
last_char = ch->c;
n = fz_runetochar(utf, ch->c);
for (i = 0; i < n; i++) {
Expand Down Expand Up @@ -571,7 +571,7 @@ JM_make_spanlist(fz_context *ctx, PyObject *line_dict,

for (ch = line->first_char; ch; ch = ch->next) {
fz_rect r = JM_char_bbox(ctx, line, ch);
if (!fz_contains_rect(tp_rect, r) &&
if (!JM_rects_overlap(tp_rect, r) &&
!fz_is_infinite_rect(tp_rect)) {
continue;
}
Expand Down Expand Up @@ -769,7 +769,7 @@ void JM_make_textpage_dict(fz_context *ctx, fz_stext_page *tp, PyObject *page_di
int block_n = -1;
for (block = tp->first_block; block; block = block->next) {
block_n++;
if (!fz_contains_rect(tp_rect, block->bbox) &&
if (!JM_rects_overlap(tp_rect, block->bbox) &&
!fz_is_infinite_rect(tp_rect) &&
block->type == FZ_STEXT_BLOCK_IMAGE) {
continue;
Expand Down Expand Up @@ -817,7 +817,7 @@ JM_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area)
int line_had_text = 0;
for (ch = line->first_char; ch; ch = ch->next) {
fz_rect r = JM_char_bbox(ctx, line, ch);
if (fz_contains_rect(area, r)) {
if (JM_rects_overlap(area, r)) {
line_had_text = 1;
if (need_new_line) {
fz_append_string(ctx, buffer, "\n");
Expand Down

0 comments on commit d605482

Please sign in to comment.