Skip to content

Commit

Permalink
Added line size metrics to output per #906 (#909)
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Apr 1, 2024
1 parent bf358a2 commit 03f82ea
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"node-fetch": "^2.6.9",
"opencollective-postinstall": "^2.0.3",
"regenerator-runtime": "^0.13.3",
"tesseract.js-core": "^5.0.0",
"tesseract.js-core": "^5.1.0",
"wasm-feature-detect": "^1.2.11",
"zlibjs": "^0.3.1"
},
Expand Down
6 changes: 6 additions & 0 deletions src/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ declare namespace Tesseract {
y1: number;
has_baseline: boolean;
}
interface RowAttributes {
ascenders: number;
descenders: number;
row_height: number;
}
interface Bbox {
x0: number;
y0: number;
Expand All @@ -189,6 +194,7 @@ declare namespace Tesseract {
text: string;
confidence: number;
baseline: Baseline;
rowAttributes: RowAttributes
bbox: Bbox;
paragraph: Paragraph;
block: Block;
Expand Down
11 changes: 11 additions & 0 deletions src/worker-script/utils/dump.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,22 @@ module.exports = (TessModule, api, output, options) => {
block.paragraphs.push(para);
}
if (ri.IsAtBeginningOf(RIL_TEXTLINE)) {
// getRowAttributes was added in a recent minor version of Tesseract.js-core,
// so we need to check if it exists before calling it.
// This can be removed in the next major version (v6).
let rowAttributes;
if (ri.getRowAttributes) {
rowAttributes = ri.getRowAttributes();
// Descenders is reported as a negative within Tesseract internally so we need to flip it.
// The positive version is intuitive, and matches what is reported in the hOCR output.
rowAttributes.descenders *= -1;
}
textline = {
words: [],
text: !options.skipRecognition ? ri.GetUTF8Text(RIL_TEXTLINE) : null,
confidence: !options.skipRecognition ? ri.Confidence(RIL_TEXTLINE) : null,
baseline: ri.getBaseline(RIL_TEXTLINE),
rowAttributes,
bbox: ri.getBoundingBox(RIL_TEXTLINE),
};
para.lines.push(textline);
Expand Down

0 comments on commit 03f82ea

Please sign in to comment.