From 456e69df0ca72db1daebcd07d5edfa4df639a0f8 Mon Sep 17 00:00:00 2001 From: Alex Geller Date: Mon, 7 Mar 2022 22:14:39 +0100 Subject: [PATCH 1/3] - Added multi-eci decoding for PDF417 - Fixed issue that some multi-eci encoded PDF417 codes were missing ECIs --- .../google/zxing/common/MinimalECIInput.java | 20 +++ .../decoder/DecodedBitStreamParser.java | 167 ++++++++++++------ .../encoder/PDF417HighLevelEncoder.java | 2 +- .../pdf417/decoder/PDF417DecoderTestCase.java | 20 +-- 4 files changed, 145 insertions(+), 64 deletions(-) diff --git a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java index ac204b832c..d840c57591 100755 --- a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java +++ b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java @@ -208,6 +208,26 @@ public int getECIValue(int index) { return bytes[index] - 256; } + public String toString() { + StringBuilder result = new StringBuilder(); + for (int i = 0; i < length(); i++) { + if (i > 0) { + result.append(", "); + } + if (isECI(i)) { + result.append("ECI("); + result.append(getECIValue(i)); + result.append(')'); + } else if (charAt(i) < 128) { + result.append('\''); + result.append(charAt(i)); + result.append('\''); + } else { + result.append((int) charAt(i)); + } + } + return result.toString(); + } static void addEdge(InputEdge[][] edges, int to, InputEdge edge) { if (edges[to][edge.encoderIndex] == null || edges[to][edge.encoderIndex].cachedTotalSize > edge.cachedTotalSize) { diff --git a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java index ab52e665d4..8a647a4a4a 100644 --- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java +++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java @@ -22,10 +22,7 @@ import com.google.zxing.pdf417.PDF417ResultMetadata; import java.io.ByteArrayOutputStream; -import java.io.UnsupportedEncodingException; import java.math.BigInteger; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.util.Arrays; /** @@ -102,14 +99,8 @@ private DecodedBitStreamParser() { } static DecoderResult decode(int[] codewords, String ecLevel) throws FormatException { - StringBuilder result = new StringBuilder(codewords.length * 2); - Charset encoding = StandardCharsets.ISO_8859_1; - int codeIndex = 1; - if (codewords[0] > 1 && codewords[codeIndex] == ECI_CHARSET) { - encoding = getECICharset(codewords[++codeIndex]); - codeIndex++; - } - codeIndex = textCompaction(codewords, codeIndex, result); + ECIOutput result = new ECIOutput(codewords.length * 2); + int codeIndex = textCompaction(codewords, 1, result); PDF417ResultMetadata resultMetadata = new PDF417ResultMetadata(); while (codeIndex < codewords[0]) { int code = codewords[codeIndex++]; @@ -119,7 +110,7 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept break; case BYTE_COMPACTION_MODE_LATCH: case BYTE_COMPACTION_MODE_LATCH_6: - codeIndex = byteCompaction(code, codewords, encoding, codeIndex, result); + codeIndex = byteCompaction(code, codewords, codeIndex, result); break; case MODE_SHIFT_TO_BYTE_COMPACTION_MODE: result.append((char) codewords[codeIndex++]); @@ -128,7 +119,7 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept codeIndex = numericCompaction(codewords, codeIndex, result); break; case ECI_CHARSET: - encoding = getECICharset(codewords[codeIndex++]); + result.appendECI(codewords[codeIndex++]); break; case ECI_GENERAL_PURPOSE: // Can't do anything with generic ECI; skip its 2 characters @@ -154,7 +145,7 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept break; } } - if (result.length() == 0 && resultMetadata.getFileId() == null) { + if (result.isEmpty() && resultMetadata.getFileId() == null) { throw FormatException.getFormatInstance(); } DecoderResult decoderResult = new DecoderResult(null, result.toString(), null, ecLevel); @@ -162,15 +153,6 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept return decoderResult; } - private static Charset getECICharset(int eciValue) throws FormatException { - CharacterSetECI charsetECI = - CharacterSetECI.getCharacterSetECIByValue(eciValue); - if (charsetECI == null) { - throw FormatException.getFormatInstance(); - } - return charsetECI.getCharset(); - } - @SuppressWarnings("deprecation") static int decodeMacroBlock(int[] codewords, int codeIndex, PDF417ResultMetadata resultMetadata) throws FormatException { @@ -222,37 +204,37 @@ static int decodeMacroBlock(int[] codewords, int codeIndex, PDF417ResultMetadata codeIndex++; switch (codewords[codeIndex]) { case MACRO_PDF417_OPTIONAL_FIELD_FILE_NAME: - StringBuilder fileName = new StringBuilder(); + ECIOutput fileName = new ECIOutput(); codeIndex = textCompaction(codewords, codeIndex + 1, fileName); resultMetadata.setFileName(fileName.toString()); break; case MACRO_PDF417_OPTIONAL_FIELD_SENDER: - StringBuilder sender = new StringBuilder(); + ECIOutput sender = new ECIOutput(); codeIndex = textCompaction(codewords, codeIndex + 1, sender); resultMetadata.setSender(sender.toString()); break; case MACRO_PDF417_OPTIONAL_FIELD_ADDRESSEE: - StringBuilder addressee = new StringBuilder(); + ECIOutput addressee = new ECIOutput(); codeIndex = textCompaction(codewords, codeIndex + 1, addressee); resultMetadata.setAddressee(addressee.toString()); break; case MACRO_PDF417_OPTIONAL_FIELD_SEGMENT_COUNT: - StringBuilder segmentCount = new StringBuilder(); + ECIOutput segmentCount = new ECIOutput(); codeIndex = numericCompaction(codewords, codeIndex + 1, segmentCount); resultMetadata.setSegmentCount(Integer.parseInt(segmentCount.toString())); break; case MACRO_PDF417_OPTIONAL_FIELD_TIME_STAMP: - StringBuilder timestamp = new StringBuilder(); + ECIOutput timestamp = new ECIOutput(); codeIndex = numericCompaction(codewords, codeIndex + 1, timestamp); resultMetadata.setTimestamp(Long.parseLong(timestamp.toString())); break; case MACRO_PDF417_OPTIONAL_FIELD_CHECKSUM: - StringBuilder checksum = new StringBuilder(); + ECIOutput checksum = new ECIOutput(); codeIndex = numericCompaction(codewords, codeIndex + 1, checksum); resultMetadata.setChecksum(Integer.parseInt(checksum.toString())); break; case MACRO_PDF417_OPTIONAL_FIELD_FILE_SIZE: - StringBuilder fileSize = new StringBuilder(); + ECIOutput fileSize = new ECIOutput(); codeIndex = numericCompaction(codewords, codeIndex + 1, fileSize); resultMetadata.setFileSize(Long.parseLong(fileSize.toString())); break; @@ -293,7 +275,7 @@ static int decodeMacroBlock(int[] codewords, int codeIndex, PDF417ResultMetadata * @param result The decoded data is appended to the result. * @return The next index into the codeword array. */ - private static int textCompaction(int[] codewords, int codeIndex, StringBuilder result) { + private static int textCompaction(int[] codewords, int codeIndex, ECIOutput result) throws FormatException { // 2 character per codeword int[] textCompactionData = new int[(codewords[0] - codeIndex) * 2]; // Used to hold the byte compaction value if there is a mode shift @@ -301,6 +283,7 @@ private static int textCompaction(int[] codewords, int codeIndex, StringBuilder int index = 0; boolean end = false; + Mode subMode = Mode.ALPHA; while ((codeIndex < codewords[0]) && !end) { int code = codewords[codeIndex++]; if (code < TEXT_COMPACTION_MODE_LATCH) { @@ -334,10 +317,17 @@ private static int textCompaction(int[] codewords, int codeIndex, StringBuilder byteCompactionData[index] = code; index++; break; + case ECI_CHARSET: + subMode = decodeTextCompaction(textCompactionData, byteCompactionData, index, result, subMode); + result.appendECI(codewords[codeIndex++]); + textCompactionData = new int[(codewords[0] - codeIndex) * 2]; + byteCompactionData = new int[(codewords[0] - codeIndex) * 2]; + index = 0; + break; } } } - decodeTextCompaction(textCompactionData, byteCompactionData, index, result); + decodeTextCompaction(textCompactionData, byteCompactionData, index, result, subMode); return codeIndex; } @@ -356,17 +346,21 @@ private static int textCompaction(int[] codewords, int codeIndex, StringBuilder * was a mode shift. * @param length The size of the text compaction and byte compaction data. * @param result The decoded data is appended to the result. + * @param startMode The mode in which decoding starts + * @return The mode in which decoding ended */ - private static void decodeTextCompaction(int[] textCompactionData, + private static Mode decodeTextCompaction(int[] textCompactionData, int[] byteCompactionData, int length, - StringBuilder result) { - // Beginning from an initial state of the Alpha sub-mode + ECIOutput result, + Mode startMode) { + // Beginning from an initial state // The default compaction mode for PDF417 in effect at the start of each symbol shall always be Text // Compaction mode Alpha sub-mode (uppercase alphabetic). A latch codeword from another mode to the Text // Compaction mode shall always switch to the Text Compaction Alpha sub-mode. - Mode subMode = Mode.ALPHA; - Mode priorToShiftMode = Mode.ALPHA; + Mode subMode = startMode; + Mode priorToShiftMode = startMode; + Mode latchedMode = startMode; int i = 0; while (i < length) { int subModeCh = textCompactionData[i]; @@ -384,9 +378,11 @@ private static void decodeTextCompaction(int[] textCompactionData, break; case LL: subMode = Mode.LOWER; + latchedMode = subMode; break; case ML: subMode = Mode.MIXED; + latchedMode = subMode; break; case PS: // Shift to punctuation @@ -398,6 +394,7 @@ private static void decodeTextCompaction(int[] textCompactionData, break; case TEXT_COMPACTION_MODE_LATCH: subMode = Mode.ALPHA; + latchedMode = subMode; break; } } @@ -419,6 +416,7 @@ private static void decodeTextCompaction(int[] textCompactionData, break; case ML: subMode = Mode.MIXED; + latchedMode = subMode; break; case PS: // Shift to punctuation @@ -426,11 +424,11 @@ private static void decodeTextCompaction(int[] textCompactionData, subMode = Mode.PUNCT_SHIFT; break; case MODE_SHIFT_TO_BYTE_COMPACTION_MODE: - // TODO Does this need to use the current character encoding? See other occurrences below result.append((char) byteCompactionData[i]); break; case TEXT_COMPACTION_MODE_LATCH: subMode = Mode.ALPHA; + latchedMode = subMode; break; } } @@ -444,16 +442,19 @@ private static void decodeTextCompaction(int[] textCompactionData, switch (subModeCh) { case PL: subMode = Mode.PUNCT; + latchedMode = subMode; break; case 26: ch = ' '; break; case LL: subMode = Mode.LOWER; + latchedMode = subMode; break; case AL: case TEXT_COMPACTION_MODE_LATCH: subMode = Mode.ALPHA; + latchedMode = subMode; break; case PS: // Shift to punctuation @@ -476,6 +477,7 @@ private static void decodeTextCompaction(int[] textCompactionData, case PAL: case TEXT_COMPACTION_MODE_LATCH: subMode = Mode.ALPHA; + latchedMode = subMode; break; case MODE_SHIFT_TO_BYTE_COMPACTION_MODE: result.append((char) byteCompactionData[i]); @@ -527,6 +529,7 @@ private static void decodeTextCompaction(int[] textCompactionData, } i++; } + return latchedMode; } /** @@ -536,17 +539,14 @@ private static void decodeTextCompaction(int[] textCompactionData, * * @param mode The byte compaction mode i.e. 901 or 924 * @param codewords The array of codewords (data + error) - * @param encoding Currently active character encoding * @param codeIndex The current index into the codeword array. * @param result The decoded data is appended to the result. * @return The next index into the codeword array. */ private static int byteCompaction(int mode, int[] codewords, - Charset encoding, int codeIndex, - StringBuilder result) { - ByteArrayOutputStream decodedBytes = new ByteArrayOutputStream(); + ECIOutput result) { int count = 0; long value = 0; boolean end = false; @@ -572,6 +572,7 @@ private static int byteCompaction(int mode, case BEGIN_MACRO_PDF417_CONTROL_BLOCK: case BEGIN_MACRO_PDF417_OPTIONAL_FIELD: case MACRO_PDF417_TERMINATOR: + case ECI_CHARSET: codeIndex--; end = true; break; @@ -580,7 +581,7 @@ private static int byteCompaction(int mode, // Decode every 5 codewords // Convert to Base 256 for (int j = 0; j < 6; ++j) { - decodedBytes.write((byte) (value >> (8 * (5 - j)))); + result.append((byte) (value >> (8 * (5 - j)))); } value = 0; count = 0; @@ -598,7 +599,7 @@ private static int byteCompaction(int mode, // the last group of codewords is interpreted directly // as one byte per codeword, without compaction. for (int i = 0; i < count; i++) { - decodedBytes.write((byte) byteCompactedCodewords[i]); + result.append((byte) byteCompactedCodewords[i]); } break; @@ -621,6 +622,7 @@ private static int byteCompaction(int mode, case BEGIN_MACRO_PDF417_CONTROL_BLOCK: case BEGIN_MACRO_PDF417_OPTIONAL_FIELD: case MACRO_PDF417_TERMINATOR: + case ECI_CHARSET: codeIndex--; end = true; break; @@ -630,19 +632,17 @@ private static int byteCompaction(int mode, // Decode every 5 codewords // Convert to Base 256 for (int j = 0; j < 6; ++j) { - decodedBytes.write((byte) (value >> (8 * (5 - j)))); + result.append((byte) (value >> (8 * (5 - j)))); } value = 0; count = 0; } } break; - } - try { - result.append(decodedBytes.toString(encoding.name())); - } catch (UnsupportedEncodingException uee) { - // can't happen - throw new IllegalStateException(uee); + case ECI_CHARSET: + codeIndex--; + end = true; + break; } return codeIndex; } @@ -655,7 +655,7 @@ private static int byteCompaction(int mode, * @param result The decoded data is appended to the result. * @return The next index into the codeword array. */ - private static int numericCompaction(int[] codewords, int codeIndex, StringBuilder result) throws FormatException { + private static int numericCompaction(int[] codewords, int codeIndex, ECIOutput result) throws FormatException { int count = 0; boolean end = false; @@ -677,6 +677,7 @@ private static int numericCompaction(int[] codewords, int codeIndex, StringBuild case BEGIN_MACRO_PDF417_CONTROL_BLOCK: case BEGIN_MACRO_PDF417_OPTIONAL_FIELD: case MACRO_PDF417_TERMINATOR: + case ECI_CHARSET: codeIndex--; end = true; break; @@ -749,4 +750,66 @@ private static String decodeBase900toBase10(int[] codewords, int count) throws F return resultString.substring(1); } + private static final class ECIOutput { + boolean needFlush = false; + String encodingName = "ISO-8859-1"; + private ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + StringBuilder result; + + private ECIOutput() { + result = new StringBuilder(); + } + + private ECIOutput(int size) { + result = new StringBuilder(size); + } + + private void append(byte value) { + bytes.write(value); + needFlush = true; + } + + private void append(char value) { + bytes.write(value & 0xff); + needFlush = true; + } + + private void append(String s) { + for (int i = 0; i < s.length(); i++) { + append(s.charAt(i)); + } + } + + private void appendECI(int value) throws FormatException { + flush(); + bytes = new ByteArrayOutputStream(); + CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(value); + if (charsetECI == null) { + throw FormatException.getFormatInstance(); + } + encodingName = charsetECI.name(); + } + + private void flush() { + if (needFlush) { + needFlush = false; + try { + result.append(bytes.toString(encodingName)); + } catch (java.io.UnsupportedEncodingException uee) { + // can't happen + throw new IllegalStateException(uee); + } + } + } + + private boolean isEmpty() { + return !needFlush && result.length() == 0; + } + + public String toString() { + flush(); + return result.toString(); + } + } + } diff --git a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java index 607dea9d71..4b3871c10b 100644 --- a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java +++ b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java @@ -437,7 +437,7 @@ private static void encodeMultiECIBinary(ECIInput input, //encode the segment encodeBinary(subBytes(input, localStart, localEnd), 0, localCount, localStart == startpos ? startmode : BYTE_COMPACTION, sb); - localStart = localEnd + 1; + localStart = localEnd; } } } diff --git a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java index d9c4b6bc0b..0a6e82c33c 100644 --- a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java +++ b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java @@ -287,49 +287,49 @@ public void testBinaryData() throws WriterException, FormatException { @Test public void testECIEnglishHiragana() throws Exception { //multi ECI UTF-8, UTF-16 and ISO-8859-1 - performECITest(new char[] {'a', '1', '\u3040'}, new float[] {20f, 1f, 10f}, 102583, 110914); + performECITest(new char[] {'a', '1', '\u3040'}, new float[] {20f, 1f, 10f}, 105825, 110914); } @Test public void testECIEnglishKatakana() throws Exception { //multi ECI UTF-8, UTF-16 and ISO-8859-1 - performECITest(new char[] {'a', '1', '\u30a0'}, new float[] {20f, 1f, 10f}, 104691, 110914); + performECITest(new char[] {'a', '1', '\u30a0'}, new float[] {20f, 1f, 10f}, 109177, 110914); } @Test public void testECIEnglishHalfWidthKatakana() throws Exception { //single ECI - performECITest(new char[] {'a', '1', '\uff80'}, new float[] {20f, 1f, 10f}, 80463, 110914); + performECITest(new char[] {'a', '1', '\uff80'}, new float[] {20f, 1f, 10f}, 80617, 110914); } @Test public void testECIEnglishChinese() throws Exception { //single ECI - performECITest(new char[] {'a', '1', '\u4e00'}, new float[] {20f, 1f, 10f}, 95643, 110914); + performECITest(new char[] {'a', '1', '\u4e00'}, new float[] {20f, 1f, 10f}, 95797, 110914); } @Test public void testECIGermanCyrillic() throws Exception { //single ECI since the German Umlaut is in ISO-8859-1 - performECITest(new char[] {'a', '1', '\u00c4', '\u042f'}, new float[] {20f, 1f, 1f, 10f}, 80529, 96007); + performECITest(new char[] {'a', '1', '\u00c4', '\u042f'}, new float[] {20f, 1f, 1f, 10f}, 80755, 96007); } @Test public void testECIEnglishCzechCyrillic1() throws Exception { //multi ECI between ISO-8859-2 and ISO-8859-5 - performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 91482, 124525); + performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 102824, 124525); } @Test public void testECIEnglishCzechCyrillic2() throws Exception { //multi ECI between ISO-8859-2 and ISO-8859-5 - performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {40f, 1f, 10f, 10f}, 79331, 88236); + performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {40f, 1f, 10f, 10f}, 81321, 88236); } @Test public void testECIEnglishArabicCyrillic() throws Exception { //multi ECI between UTF-8 (ISO-8859-6 is excluded in CharacterSetECI) and ISO-8859-5 - performECITest(new char[] {'a', '1', '\u0620', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 111508, 124525); + performECITest(new char[] {'a', '1', '\u0620', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 118510, 124525); } private static void encodeDecode(String input, int expectedLength) throws WriterException, FormatException { @@ -403,9 +403,7 @@ private static void performECITest(char[] chars, int utfLength = 0; for (int i = 0; i < 1000; i++) { String s = generateText(random, 100, chars, weights); - minLength += encodeDecode(s, null, true, false); - // TODO: Use this instead when the decoder supports multi ECI input - //minLength += encodeDecode(s, null, true, true); + minLength += encodeDecode(s, null, true, true); utfLength += encodeDecode(s, StandardCharsets.UTF_8, false, true); } assertEquals(expectedMinLength, minLength); From 2ec70497b5c9b270f26d82529b5abf6d00e3934c Mon Sep 17 00:00:00 2001 From: Alex Geller Date: Mon, 7 Mar 2022 22:59:34 +0100 Subject: [PATCH 2/3] Syntactic changes --- .../java/com/google/zxing/common/MinimalECIInput.java | 1 + .../zxing/pdf417/decoder/DecodedBitStreamParser.java | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java index d840c57591..5955c0bd4f 100755 --- a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java +++ b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java @@ -208,6 +208,7 @@ public int getECIValue(int index) { return bytes[index] - 256; } + @Override public String toString() { StringBuilder result = new StringBuilder(); for (int i = 0; i < length(); i++) { diff --git a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java index 8a647a4a4a..3503449c7c 100644 --- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java +++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java @@ -24,6 +24,7 @@ import java.io.ByteArrayOutputStream; import java.math.BigInteger; import java.util.Arrays; +import java.io.UnsupportedEncodingException; /** *

This class contains the methods for decoding the PDF417 codewords.

@@ -751,10 +752,10 @@ private static String decodeBase900toBase10(int[] codewords, int count) throws F } private static final class ECIOutput { - boolean needFlush = false; - String encodingName = "ISO-8859-1"; + private boolean needFlush = false; + private String encodingName = "ISO-8859-1"; private ByteArrayOutputStream bytes = new ByteArrayOutputStream(); - StringBuilder result; + private StringBuilder result; private ECIOutput() { result = new StringBuilder(); @@ -795,7 +796,7 @@ private void flush() { needFlush = false; try { result.append(bytes.toString(encodingName)); - } catch (java.io.UnsupportedEncodingException uee) { + } catch (UnsupportedEncodingException uee) { // can't happen throw new IllegalStateException(uee); } @@ -806,6 +807,7 @@ private boolean isEmpty() { return !needFlush && result.length() == 0; } + @Override public String toString() { flush(); return result.toString(); From 561f187ac03444ca09275b37b1cb193dfc9b033d Mon Sep 17 00:00:00 2001 From: Alex Geller Date: Tue, 8 Mar 2022 21:09:20 +0100 Subject: [PATCH 3/3] - Fixed issue that ECIs were not processed correctly in all allowed locations in binary encoded data as specified in section 5.5.3.2 of the spec - Added verifying unit test --- .../decoder/DecodedBitStreamParser.java | 128 +++++------------- .../pdf417/decoder/PDF417DecoderTestCase.java | 24 +++- 2 files changed, 57 insertions(+), 95 deletions(-) diff --git a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java index 3503449c7c..67ecc0851b 100644 --- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java +++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java @@ -547,103 +547,49 @@ private static Mode decodeTextCompaction(int[] textCompactionData, private static int byteCompaction(int mode, int[] codewords, int codeIndex, - ECIOutput result) { - int count = 0; - long value = 0; + ECIOutput result) throws FormatException { boolean end = false; - - switch (mode) { - case BYTE_COMPACTION_MODE_LATCH: - // Total number of Byte Compaction characters to be encoded - // is not a multiple of 6 - - int[] byteCompactedCodewords = new int[6]; - int nextCode = codewords[codeIndex++]; - while ((codeIndex < codewords[0]) && !end) { - byteCompactedCodewords[count++] = nextCode; - // Base 900 - value = 900 * value + nextCode; - nextCode = codewords[codeIndex++]; - // perhaps it should be ok to check only nextCode >= TEXT_COMPACTION_MODE_LATCH - switch (nextCode) { - case TEXT_COMPACTION_MODE_LATCH: - case BYTE_COMPACTION_MODE_LATCH: - case NUMERIC_COMPACTION_MODE_LATCH: - case BYTE_COMPACTION_MODE_LATCH_6: - case BEGIN_MACRO_PDF417_CONTROL_BLOCK: - case BEGIN_MACRO_PDF417_OPTIONAL_FIELD: - case MACRO_PDF417_TERMINATOR: - case ECI_CHARSET: + + while (codeIndex < codewords[0] && !end) { + //handle leading ECIs + while (codeIndex < codewords[0] && codewords[codeIndex] == ECI_CHARSET) { + result.appendECI(codewords[++codeIndex]); + codeIndex++; + } + + if (codeIndex >= codewords[0] || codewords[codeIndex] >= TEXT_COMPACTION_MODE_LATCH) { + end = true; + } else { + //decode one block of 5 codewords to 6 bytes + long value = 0; + int count = 0; + do { + value = 900 * value + codewords[codeIndex++]; + count++; + } while (count < 5 && + codeIndex < codewords[0] && + codewords[codeIndex] < TEXT_COMPACTION_MODE_LATCH); + if (count == 5 && (mode == BYTE_COMPACTION_MODE_LATCH_6 || + codeIndex < codewords[0] && + codewords[codeIndex] < TEXT_COMPACTION_MODE_LATCH)) { + for (int i = 0; i < 6; i++) { + result.append((byte) (value >> (8 * (5 - i)))); + } + } else { + codeIndex -= count; + while ((codeIndex < codewords[0]) && !end) { + int code = codewords[codeIndex++]; + if (code < TEXT_COMPACTION_MODE_LATCH) { + result.append((byte) code); + } else if (code == ECI_CHARSET) { + result.appendECI(codewords[codeIndex++]); + } else { codeIndex--; end = true; - break; - default: - if ((count % 5 == 0) && (count > 0)) { - // Decode every 5 codewords - // Convert to Base 256 - for (int j = 0; j < 6; ++j) { - result.append((byte) (value >> (8 * (5 - j)))); - } - value = 0; - count = 0; - } - break; - } - } - - // if the end of all codewords is reached the last codeword needs to be added - if (codeIndex == codewords[0] && nextCode < TEXT_COMPACTION_MODE_LATCH) { - byteCompactedCodewords[count++] = nextCode; - } - - // If Byte Compaction mode is invoked with codeword 901, - // the last group of codewords is interpreted directly - // as one byte per codeword, without compaction. - for (int i = 0; i < count; i++) { - result.append((byte) byteCompactedCodewords[i]); - } - - break; - - case BYTE_COMPACTION_MODE_LATCH_6: - // Total number of Byte Compaction characters to be encoded - // is an integer multiple of 6 - while (codeIndex < codewords[0] && !end) { - int code = codewords[codeIndex++]; - if (code < TEXT_COMPACTION_MODE_LATCH) { - count++; - // Base 900 - value = 900 * value + code; - } else { - switch (code) { - case TEXT_COMPACTION_MODE_LATCH: - case BYTE_COMPACTION_MODE_LATCH: - case NUMERIC_COMPACTION_MODE_LATCH: - case BYTE_COMPACTION_MODE_LATCH_6: - case BEGIN_MACRO_PDF417_CONTROL_BLOCK: - case BEGIN_MACRO_PDF417_OPTIONAL_FIELD: - case MACRO_PDF417_TERMINATOR: - case ECI_CHARSET: - codeIndex--; - end = true; - break; - } - } - if ((count % 5 == 0) && (count > 0)) { - // Decode every 5 codewords - // Convert to Base 256 - for (int j = 0; j < 6; ++j) { - result.append((byte) (value >> (8 * (5 - j)))); } - value = 0; - count = 0; } } - break; - case ECI_CHARSET: - codeIndex--; - end = true; - break; + } } return codeIndex; } diff --git a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java index 0a6e82c33c..e384178d94 100644 --- a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java +++ b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java @@ -281,7 +281,7 @@ public void testBinaryData() throws WriterException, FormatException { random.nextBytes(bytes); total += encodeDecode(new String(bytes, StandardCharsets.ISO_8859_1)); } - assertEquals(4190044, total); + assertEquals(4190044, total); } @Test @@ -332,6 +332,19 @@ public void testECIEnglishArabicCyrillic() throws Exception { performECITest(new char[] {'a', '1', '\u0620', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 118510, 124525); } + @Test + public void testBinaryMultiECI() throws Exception { + //Test the cases described in 5.5.5.3 "ECI and Byte Compaction mode using latch 924 and 901" + performDecodeTest(new int[] {5, 927, 4, 913, 200}, "\u010c"); + performDecodeTest(new int[] {9, 927, 4, 913, 200, 927, 7, 913, 207}, "\u010c\u042f"); + performDecodeTest(new int[] {9, 927, 4, 901, 200, 927, 7, 901, 207}, "\u010c\u042f"); + performDecodeTest(new int[] {8, 927, 4, 901, 200, 927, 7, 207}, "\u010c\u042f"); + performDecodeTest(new int[] {14, 927, 4, 901, 200, 927, 7, 207, 927, 4, 200, 927, 7, 207}, + "\u010c\u042f\u010c\u042f"); + performDecodeTest(new int[] {16, 927, 4, 924, 336, 432, 197, 51, 300, 927, 7, 348, 231, 311, 858, 567}, + "\u010c\u010c\u010c\u010c\u010c\u010c\u042f\u042f\u042f\u042f\u042f\u042f"); + } + private static void encodeDecode(String input, int expectedLength) throws WriterException, FormatException { assertEquals(expectedLength, encodeDecode(input)); } @@ -349,9 +362,7 @@ private static int encodeDecode(String input, Charset charset, boolean autoECI, for (int i = 1; i < codewords.length; i++) { codewords[i] = s.charAt(i - 1); } - DecoderResult result = DecodedBitStreamParser.decode(codewords, "0"); - - assertEquals(input, result.getText()); + performDecodeTest(codewords, input); } return s.length() + 1; } @@ -394,6 +405,11 @@ private static void performEncodeTest(char c, int[] expectedLengths) throws Writ } } + private static void performDecodeTest(int[] codewords, String expectedResult) throws FormatException { + DecoderResult result = DecodedBitStreamParser.decode(codewords, "0"); + assertEquals(expectedResult, result.getText()); + } + private static void performECITest(char[] chars, float[] weights, int expectedMinLength,