Skip to content

Commit

Permalink
Merge pull request #114 from nohn/tesseract-resiliency
Browse files Browse the repository at this point in the history
catch tesseract exceptions allowing to configure watermeter in case ocr fails while configuring
  • Loading branch information
nohn committed Feb 7, 2024
2 parents 92582f6 + 0150e7a commit 44622c3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# 2024-02-08 v1.2.21
- don't fail configuration if OCR fails (#113)

# 2023-02-19 v1.2.20
- update PHP to 8.2.3

Expand Down
25 changes: 15 additions & 10 deletions classes/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
use Imagick;
use nohn\AnalogMeterReader\AnalogMeter;
use thiagoalessio\TesseractOCR\TesseractOCR;
use thiagoalessio\TesseractOCR\TesseractOcrException;

class Reader extends Watermeter
{
Expand Down Expand Up @@ -85,7 +86,7 @@ private function readDigits($post_decimal = false)

foreach ($digits_to_read as $digit) {
$rawDigit = clone $digitalSourceImage;
if(isset($digit['width']) && $digit['width'] >0 && isset($digit['height']) && $digit['height'] >0) {
if (isset($digit['width']) && $digit['width'] > 0 && isset($digit['height']) && $digit['height'] > 0) {
$rawDigit->cropImage($digit['width'], $digit['height'], $digit['x'], $digit['y']);
$targetImage->addImage($rawDigit);
if ($this->debug) {
Expand All @@ -107,20 +108,24 @@ private function readDigits($post_decimal = false)
}
$numberDigitalImage->setImageFormat("png");
$numberDigitalImage->borderImage('white', 10, 10);

$ocr = new TesseractOCR();
$ocr->imageData($numberDigitalImage, sizeof($numberDigitalImage));
$ocr->allowlist(range('0', '9'));
$numberOCR = $ocr->run();
try {
$ocr = new TesseractOCR();
$ocr->imageData($numberDigitalImage, sizeof($numberDigitalImage));
$ocr->allowlist(range('0', '9'));
$numberOCR = $ocr->run();
} catch (TesseractOcrException $e) {
$numberOCR = 0;
$this->errors[] = $e->getMessage();
}
$numberDigital = preg_replace('/\s+/', '', $numberOCR);
// There is TesseractOCR::digits(), but sometimes this will not convert a letter do a similar looking digit but completely ignore it. So we replace o with 0, I with 1 etc.
$numberDigital = strtr($numberDigital, 'oOiIlzZsSBg', '00111225589');
// $numberDigital = '00815';
if ($this->debug) {
$numberDigitalImage->writeImage('tmp/'.$cachePrefix.'_digital.jpg');
$numberDigitalImage->writeImage('tmp/' . $cachePrefix . '_digital.jpg');
echo "Raw OCR: $numberOCR<br>";
echo "Clean OCR: $numberDigital";
echo '<img alt="Digital Preview" src="tmp/'.$cachePrefix.'_digital.jpg" /><br>';
echo '<img alt="Digital Preview" src="tmp/' . $cachePrefix . '_digital.jpg" /><br>';
}

if (is_numeric($numberDigital)) {
Expand All @@ -139,7 +144,7 @@ private function readDigits($post_decimal = false)
echo '<table border="1"><tr>';
echo '<td>';
$digitalSourceImage->writeImage('tmp/input.jpg');
$numberDigitalImage->writeImage('tmp/'.$cachePrefix.'_digital.png');
$numberDigitalImage->writeImage('tmp/' . $cachePrefix . '_digital.png');
echo '</td>';
}
return $numberRead;
Expand Down Expand Up @@ -196,4 +201,4 @@ public function getErrors()
{
return $this->errors;
}
}
}

0 comments on commit 44622c3

Please sign in to comment.