Skip to content

Commit

Permalink
Merge pull request opencv#24350 from dkurt:py_return_non_utf8_string
Browse files Browse the repository at this point in the history
Encode QR code data to UTF-8 opencv#24350

### Pull Request Readiness Checklist

**Merge with extra**: opencv/opencv_extra#1105

resolves opencv#23728

This is first PR in a series. Here we just return a raw Unicode. Later I will try expand QR codes decoding methods to use ECI assignment number and return a string with proper encoding, not only UTF-8 or raw unicode.

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
  • Loading branch information
dkurt authored and thewoz committed Jan 4, 2024
1 parent a1ff43e commit e2b4f13
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 3 deletions.
105 changes: 102 additions & 3 deletions modules/objdetect/src/qrcode.cpp
Expand Up @@ -2727,6 +2727,58 @@ bool QRDecode::samplingForVersion()
return true;
}


static bool checkASCIIcompatible(const uint8_t* str, const size_t size) {
for (size_t i = 0; i < size; ++i) {
uint8_t byte = str[i];
if (byte >= 0x80)
return false;
}
return true;
}

static bool checkUTF8(const uint8_t* str, const size_t size) {
for (size_t i = 0; i < size; ++i) {
uint8_t byte = str[i];
if (byte >= 0x80) {
// Check that symbol is encoded correctly.

// Count number of bytes per symbol as a number of leading non-zero bits
uint8_t numBytesPerSymbol;
if ((byte & 0xe0) == 0xc0)
numBytesPerSymbol = 2;
else if ((byte & 0xf0) == 0xe0)
numBytesPerSymbol = 3;
else if ((byte & 0xf8) == 0xf0)
numBytesPerSymbol = 4;
else
return false;

for (size_t j = 1; j < numBytesPerSymbol; ++j) {
if (i + j >= size || (str[i + j] & 0xc0) != 0x80) {
return false;
}
}
i += numBytesPerSymbol - 1;
}
}
return true;
}

static std::string encodeUTF8_bytesarray(const uint8_t* str, const size_t size) {
std::ostringstream res;
for (size_t i = 0; i < size; ++i) {
uint8_t byte = str[i];
if (byte >= 0x80) {
res << (char)(0xc0 | (byte >> 6));
res << (char)(0x80 | (byte & 0x3f));
} else {
res << (char)byte;
}
}
return res.str();
}

bool QRDecode::decodingProcess()
{
#ifdef HAVE_QUIRC
Expand Down Expand Up @@ -2756,11 +2808,58 @@ bool QRDecode::decodingProcess()

if (errorCode != 0) { return false; }

for (int i = 0; i < qr_code_data.payload_len; i++)
CV_LOG_INFO(NULL, "QR: decoded with .version=" << qr_code_data.version << " .data_type=" << qr_code_data.data_type << " .eci=" << qr_code_data.eci << " .payload_len=" << qr_code_data.payload_len)

switch (qr_code_data.data_type)
{
result_info += qr_code_data.payload[i];
case QUIRC_DATA_TYPE_NUMERIC:
if (!checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
CV_LOG_INFO(NULL, "QR: DATA_TYPE_NUMERIC payload must be ACSII compatible string");
return false;
}
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
return true;
case QUIRC_DATA_TYPE_ALPHA:
if (!checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
CV_LOG_INFO(NULL, "QR: DATA_TYPE_ALPHA payload must be ASCII compatible string");
return false;
}
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
return true;
case QUIRC_DATA_TYPE_BYTE:
// https://en.wikipedia.org/wiki/Extended_Channel_Interpretation
if (qr_code_data.eci == QUIRC_ECI_UTF_8) {
CV_LOG_INFO(NULL, "QR: payload ECI is UTF-8");
if (!checkUTF8(qr_code_data.payload, qr_code_data.payload_len)) {
CV_LOG_INFO(NULL, "QUIRC_DATA_TYPE_BYTE with UTF-8 ECI must be UTF-8 compatible string");
return false;
}
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
} else if (qr_code_data.eci == 25/*ECI_UTF_16BE*/) {
CV_LOG_INFO(NULL, "QR: UTF-16BE ECI is not supported");
return false;
} else if (checkASCIIcompatible(qr_code_data.payload, qr_code_data.payload_len)) {
CV_LOG_INFO(NULL, "QR: payload is ASCII compatible (special handling for symbols encoding is not needed)");
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
} else {
if (checkUTF8(qr_code_data.payload, qr_code_data.payload_len)) {
CV_LOG_INFO(NULL, "QR: payload QUIRC_DATA_TYPE_BYTE is UTF-8 compatible, return as-is");
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
} else {
CV_LOG_INFO(NULL, "QR: assume 1-byte per symbol encoding");
result_info = encodeUTF8_bytesarray(qr_code_data.payload, qr_code_data.payload_len);
}
}
return true;
case QUIRC_DATA_TYPE_KANJI:
// FIXIT BUG: we must return UTF-8 compatible string
CV_LOG_WARNING(NULL, "QR: Kanji is not supported properly");
result_info.assign((const char*)qr_code_data.payload, qr_code_data.payload_len);
return true;
}
return true;

CV_LOG_WARNING(NULL, "QR: unsupported QR data type");
return false;
#else
return false;
#endif
Expand Down
21 changes: 21 additions & 0 deletions modules/objdetect/test/test_qrcode.cpp
Expand Up @@ -637,4 +637,25 @@ TEST_P(Objdetect_QRCode_detectAndDecodeMulti, decode_9_qrcodes_version7)

#endif // UPDATE_QRCODE_TEST_DATA

TEST(Objdetect_QRCode_detectAndDecode, utf8_output)
{
#ifndef HAVE_QUIRC
throw SkipTestException("Quirc is required for decoding");
#else
const std::string name_current_image = "umlaut.png";
const std::string root = "qrcode/";

std::string image_path = findDataFile(root + name_current_image);
Mat src = imread(image_path);
ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;

QRCodeDetector qrcode;
std::vector<Point> corners;
Mat straight;
std::string decoded_info = qrcode.detectAndDecode(src, corners, straight);
EXPECT_FALSE(decoded_info.empty());
EXPECT_NE(decoded_info.find("M\xc3\xbcllheimstrasse"), std::string::npos);
#endif // HAVE_QUIRC
}

}} // namespace

0 comments on commit e2b4f13

Please sign in to comment.