Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #2035 #2047

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions .github/workflows/test_1.21.yml
@@ -0,0 +1,48 @@
name: Test branch 1.21.

on:
schedule:
- cron: '13 5 * * *'
workflow_dispatch:

jobs:

test_branch_121:
# Simple build+test on single platform using latest MuPDF from git. This is
# a cut-down version of `build_wheels` except that we use latest MuPDF from
# git and use a single platform and python version.
#
name: Test branch 1.21
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]

steps:

- uses: actions/checkout@v2
with:
ref: 1.21
- uses: actions/setup-python@v2

# Set up cibuildwheel.
#
- name: cibuildwheel
uses: pypa/cibuildwheel@v2.11.2

env:
# We test building with the default hard-coded mupdf URL.

# Build on single cpu.
CIBW_ARCHS_LINUX: x86_64

# Build for single python version.
CIBW_BUILD: "cp311*"

# Don't build for unsupported platforms.
CIBW_SKIP: "pp* *i686 *-musllinux_* cp36*"

# Get cibuildwheel to run pytest with each wheel.
CIBW_TEST_REQUIRES: "fontTools pytest"
CIBW_TEST_COMMAND: "pytest -s {project}/tests"
CIBW_BUILD_VERBOSITY: 3
2 changes: 1 addition & 1 deletion docs/document.rst
Expand Up @@ -519,7 +519,7 @@ For details on **embedded files** refer to Appendix 3.

- ``startpage``: (int) the first page number (0-based) to apply the label rule. This key **must be present**. The rule is applied to all subsequent pages until either end of document or superseded by the rule with the next larger page number.
- ``prefix``: (str) an arbitrary string to start the label with, e.g. "A-". Default is "".
- ``style``: (str) the numbering style. Available are "D" (decimal), "r"/"R" (Roman numbers, lower / upper case), and "a"/"A" (lower / upper case alphabetical numbering: "a" through "z", then "aa" through "az", etc.). Default is "". If "", no numbering will take place and the pages in that range will receive the same label consisting of the ``prefix`` value. If prefix is also omitted, then the label will be "".
- ``style``: (str) the numbering style. Available are "D" (decimal), "r"/"R" (Roman numbers, lower / upper case), and "a"/"A" (lower / upper case alphabetical numbering: "a" through "z", then "aa" through "zz", etc.). Default is "". If "", no numbering will take place and the pages in that range will receive the same label consisting of the ``prefix`` value. If prefix is also omitted, then the label will be "".
- ``firstpagenum``: (int) start numbering with this value. Default is 1, smaller values are ignored.

For example::
Expand Down
9 changes: 6 additions & 3 deletions docs/xml-class.rst
Expand Up @@ -103,17 +103,20 @@ There is no need to ever directly construct an :ref:`Xml` object: after creating
:arg width: if provided, either an absolute (int) value, or a percentage string like "30%". A percentage value refers to the width of the specified ``where`` rectangle in :meth:`Story.place`. If this value is provided and ``height`` is omitted, the image will be included keeping its aspect ratio.
:arg height: if provided, either an absolute (int) value, or a percentage string like "30%". A percentage value refers to the height of the specified ``where`` rectangle in :meth:`Story.place`. If this value is provided and ``width`` is omitted, the image's aspect ratio will be honored.

.. method:: add_link(link)
.. method:: add_link(href, text=None)

Add an :htmlTag:`a` tag.
Add an :htmlTag:`a` tag - inline element, treated like text.

:arg str href: the URL target.
:arg str text: the text to display. If omitted, the ``href`` text is shown instead.

.. method:: add_number_list

Add an :htmlTag:`ol` tag, context manager.

.. method:: add_paragraph

Add a :htmlTag:`p` tag.
Add a :htmlTag:`p` tag, context manager.

.. method:: add_span

Expand Down
20 changes: 13 additions & 7 deletions fitz/fitz.i
Expand Up @@ -2773,15 +2773,17 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not

if (pdf_is_jpx_image(gctx, obj)) {
img_type = FZ_IMAGE_JPX;
res = pdf_load_stream(gctx, obj);
ext = "jpx";
}
if (JM_is_jbig2_image(gctx, obj)) {
img_type = FZ_IMAGE_JBIG2;
res = pdf_load_stream(gctx, obj);
ext = "jb2";
}
res = pdf_load_raw_stream(gctx, obj);
if (img_type == FZ_IMAGE_UNKNOWN) {
unsigned char *c = NULL;
res = pdf_load_raw_stream(gctx, obj);
unsigned char *c = NULL;
fz_buffer_storage(gctx, res, &c);
img_type = fz_recognize_image_format(gctx, c);
ext = JM_image_extension(img_type);
Expand All @@ -2793,9 +2795,10 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not
res = fz_new_buffer_from_image_as_png(gctx, img,
fz_default_color_params);
ext = "png";
} else /*if (smask == 0)*/ {
} else {
img = fz_new_image_from_buffer(gctx, res);
}

fz_image_resolution(img, &xres, &yres);
width = img->w;
height = img->h;
Expand Down Expand Up @@ -2833,7 +2836,8 @@ if len(pyliste) == 0 or min(pyliste) not in range(len(self)) or max(pyliste) not

fz_catch(gctx) {
Py_CLEAR(rc);
Py_RETURN_NONE;
fz_warn(gctx, fz_caught_message(gctx));
Py_RETURN_FALSE;
}
if (!rc)
Py_RETURN_NONE;
Expand Down Expand Up @@ -13122,11 +13126,13 @@ struct Xml
self.append_child(child)
return child

def add_link(self, text=None):
def add_link(self, href, text=None):
"""Add a hyperlink ("a" tag)"""
child = self.create_element("a")
if type(text) is str:
child.append_child(self.create_text_node(text))
if not isinstance(text, str):
text = href
child.set_attribute("href", href)
child.append_child(self.create_text_node(text))
prev = self.span_bottom()
if prev == None:
prev = self
Expand Down
14 changes: 10 additions & 4 deletions fitz/helper-annot.i
Expand Up @@ -310,19 +310,25 @@ PyObject *JM_get_annot_id_list(fz_context *ctx, pdf_page *page)
PyObject *JM_get_annot_xref_list(fz_context *ctx, pdf_obj *page_obj)
{
PyObject *names = PyList_New(0);
pdf_obj *id, *annot_obj = NULL;
pdf_obj *id, *subtype, *annot_obj = NULL;
pdf_obj *annots = pdf_dict_get(ctx, page_obj, PDF_NAME(Annots));
const char *name=NULL;
int type;
if (!annots) return names;
fz_try(ctx) {
int i, n = pdf_array_len(ctx, annots);
for (i = 0; i < n; i++) {
annot_obj = pdf_array_get(ctx, annots, i);
int xref = pdf_to_num(ctx, annot_obj);
pdf_obj *subtype = pdf_dict_get(ctx, annot_obj, PDF_NAME(Subtype));
int type = PDF_ANNOT_UNKNOWN;
subtype = pdf_dict_get(ctx, annot_obj, PDF_NAME(Subtype));
if (subtype) {
const char *name = pdf_to_name(ctx, subtype);
name = pdf_to_name(ctx, subtype);
type = pdf_annot_type_from_string(ctx, name);
if (type == PDF_ANNOT_UNKNOWN) {
continue; // only accept valid annot types
}
} else {
continue;
}
id = pdf_dict_gets(ctx, annot_obj, "NM");
LIST_APPEND_DROP(names, Py_BuildValue("iis", xref, type, pdf_to_text_string(ctx, id)));
Expand Down
46 changes: 16 additions & 30 deletions fitz/helper-devices.i
Expand Up @@ -101,6 +101,7 @@ jm_checkrect()
dev_linecount = 0; // reset line count
long orientation = 0;
fz_point ll, lr, ur, ul;
fz_rect r;
PyObject *rect;
PyObject *line0, *line2;
PyObject *items = PyDict_GetItem(dev_pathdict, dictkey_items);
Expand All @@ -109,50 +110,35 @@ jm_checkrect()
line0 = PyList_GET_ITEM(items, len - 3);
ll = JM_point_from_py(PyTuple_GET_ITEM(line0, 1));
lr = JM_point_from_py(PyTuple_GET_ITEM(line0, 2));

// no need to extract "line1"!
line2 = PyList_GET_ITEM(items, len - 1);
ur = JM_point_from_py(PyTuple_GET_ITEM(line2, 1));
ul = JM_point_from_py(PyTuple_GET_ITEM(line2, 2));

/*
---------------------------------------------------------------------
Three connected lines: at least a quad! Check whether even a rect.
For this, the lines must be parallel to the axes.
Assumption:
For decomposing rects, MuPDF always starts with a horizontal line,
followed by a vertical line, followed by a horizontal line.
We will also check orientation of the enclosed area and add this info
as '+1' for anti-clockwise, '-1' for clockwise orientation.
---------------------------------------------------------------------
*/
if (ll.y != lr.y) { // not horizontal
goto drop_out;
}
if (lr.x != ur.x) { // not vertical
goto drop_out;
}
if (ur.y != ul.y) { // not horizontal
goto drop_out;
if (ll.y != lr.y ||
ll.x != ul.x ||
ur.y != ul.y ||
ur.x != lr.x) {
goto drop_out; // not a rectangle
}
// we have a rect, determine orientation
if (ll.x < lr.x) { // move left to right
if (lr.y > ur.y) { // move upwards
orientation = 1;
} else {
orientation = -1;
}
} else { // move right to left
if (lr.y < ur.y) { // move downwards
orientation = 1;
} else {
orientation = -1;
}

// we have a rect, replace last 3 "l" items by one "re" item.
if (ul.y < lr.y) {
r = fz_make_rect(ul.x, ul.y, lr.x, lr.y);
orientation = 1;
} else {
r = fz_make_rect(ll.x, ll.y, ur.x, ur.y);
orientation = -1;
}
// Replace last 3 "l" items by one "re" item.
fz_rect r = fz_make_rect(ul.x, ul.y, ul.x, ul.y);
r = fz_include_point_in_rect(r, ur);
r = fz_include_point_in_rect(r, ll);
r = fz_include_point_in_rect(r, lr);
rect = PyTuple_New(3);
PyTuple_SET_ITEM(rect, 0, PyUnicode_FromString("re"));
PyTuple_SET_ITEM(rect, 1, JM_py_from_rect(r));
Expand Down Expand Up @@ -493,7 +479,7 @@ jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type,
// make the span dictionary
PyObject *span_dict = PyDict_New();
DICT_SETITEMSTR_DROP(span_dict, "dir", JM_py_from_point(dir));
DICT_SETITEM_DROP(span_dict, dictkey_font, Py_BuildValue("s",fontname));
DICT_SETITEM_DROP(span_dict, dictkey_font, JM_EscapeStrFromStr(fontname));
DICT_SETITEM_DROP(span_dict, dictkey_wmode, PyLong_FromLong((long) span->wmode));
DICT_SETITEM_DROP(span_dict, dictkey_flags, PyLong_FromLong((long) fflags));
DICT_SETITEMSTR_DROP(span_dict, "bidi_lvl", PyLong_FromLong((long) span->bidi_level));
Expand Down
3 changes: 1 addition & 2 deletions fitz/helper-fields.i
Expand Up @@ -1030,8 +1030,7 @@ class Widget(object):
return "'%s' widget on %s" % (self.field_type_string, str(self.parent))

def __del__(self):
annot = getattr(self, "_annot")
if annot:
if hasattr(self, "_annot"):
del self._annot

@property
Expand Down
1 change: 1 addition & 0 deletions fitz/helper-python.i
Expand Up @@ -844,6 +844,7 @@ class linkDest(object):
self.page = -1
self.kind = LINK_NONE
if isInt and self.uri:
self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
if self.uri.startswith("#"):
self.named = ""
self.kind = LINK_GOTO
Expand Down
53 changes: 35 additions & 18 deletions fitz/helper-stext.i
Expand Up @@ -16,7 +16,11 @@ JM_font_ascender(fz_context *ctx, fz_font *font)
if (skip_quad_corrections) {
return 0.8f;
}
return fz_font_ascender(ctx, font);
float asc = fz_font_ascender(ctx, font);
if (asc > 10) {
return 1.0f;
}
return asc;
}

static const float
Expand All @@ -25,7 +29,11 @@ JM_font_descender(fz_context *ctx, fz_font *font)
if (skip_quad_corrections) {
return -0.2f;
}
return fz_font_descender(ctx, font);
float dsc =fz_font_descender(ctx, font);
if (dsc < -10) {
return -0.2f;
}
return dsc;
}


Expand Down Expand Up @@ -97,16 +105,18 @@ JM_char_quad(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch)
return ch->quad;
}
fz_font *font = ch->font;
fz_matrix trm1, trm2, xlate1, xlate2;
fz_quad quad;
float c, s, fsize = ch->size;
float asc = JM_font_ascender(ctx, font);
float dsc = JM_font_descender(ctx, font);
float c, s, fsize = ch->size;
float asc_dsc = asc - dsc + FLT_EPSILON;
if (asc_dsc >= 1 && small_glyph_heights == 0) { // no problem
return ch->quad;
}
if (asc < 1e-3) { // probably Tesseract glyphless font
dsc = -0.1f;
asc = 0.9f;
dsc = -0.2f;
asc = 0.8f;
asc_dsc = 1.0f;
}

Expand All @@ -123,8 +133,6 @@ JM_char_quad(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch)
Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
re-rotate and move back to ch->origin location.
------------------------------ */
fz_matrix trm1, trm2, xlate1, xlate2;
fz_quad quad;
c = line->dir.x; // cosine
s = line->dir.y; // sine
trm1 = fz_make_matrix(c, -s, s, c, 0, 0); // derotate
Expand All @@ -139,35 +147,44 @@ JM_char_quad(fz_context *ctx, fz_stext_line *line, fz_stext_char *ch)
quad = fz_transform_quad(ch->quad, xlate1); // move origin to (0,0)
quad = fz_transform_quad(quad, trm1); // de-rotate corners

quad.ul.y = -asc;
quad.ur.y = -asc;
quad.ll.y = -dsc;
quad.lr.y = -dsc;

// adjust vertical coordinates
if (c == 1 && quad.ul.y > 0) { // up-down flip
quad.ul.y = asc;
quad.ur.y = asc;
quad.ll.y = dsc;
quad.lr.y = dsc;
} else {
} else if (abs(c) == 1) {
quad.ul.y = -asc;
quad.ur.y = -asc;
quad.ll.y = -dsc;
quad.lr.y = -dsc;
}

// adjust horizontal coordinates that are too crazy:
// (1) left x must be >= 0
// (1) left and right x must be >= 0
// (2) if bbox width is 0, lookup char advance in font.
if (quad.ll.x < 0) {
quad.ll.x = 0;
quad.ul.x = 0;
quad.ll.x = quad.ul.x = 0;
}
if (quad.lr.x <= 0) {
quad.lr.x = quad.ur.x = 0.5 * fsize;
}
float cwidth = quad.lr.x - quad.ll.x;
if (cwidth < FLT_EPSILON) {
int glyph = fz_encode_character(ctx, font, ch->c);
if (glyph) {
if (glyph>=0) {
float fwidth = fz_advance_glyph(ctx, font, glyph, line->wmode);
quad.lr.x = quad.ll.x + fwidth * fsize;
quad.ur.x = quad.lr.x;
}
}
if (fwidth > cwidth) {
quad.lr.x = quad.ll.x + fwidth;
quad.ur.x = quad.lr.x;
}
}
}

quad = fz_transform_quad(quad, trm2); // rotate back
quad = fz_transform_quad(quad, xlate2); // translate back
Expand Down Expand Up @@ -567,8 +584,8 @@ JM_make_spanlist(fz_context *ctx, PyObject *line_dict,
span = PyDict_New();
float asc = style.asc, desc = style.desc;
if (style.asc < 1e-3) {
asc = 0.9f;
desc = -0.1f;
asc = 0.8f;
desc = -0.2f;
}

DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size));
Expand Down