From 456e69df0ca72db1daebcd07d5edfa4df639a0f8 Mon Sep 17 00:00:00 2001
From: Alex Geller <ag@4js.com>
Date: Mon, 7 Mar 2022 22:14:39 +0100
Subject: [PATCH 1/3] - Added multi-eci decoding for PDF417 - Fixed issue that
 some multi-eci encoded PDF417 codes were missing ECIs

---
 .../google/zxing/common/MinimalECIInput.java  |  20 +++
 .../decoder/DecodedBitStreamParser.java       | 167 ++++++++++++------
 .../encoder/PDF417HighLevelEncoder.java       |   2 +-
 .../pdf417/decoder/PDF417DecoderTestCase.java |  20 +--
 4 files changed, 145 insertions(+), 64 deletions(-)

diff --git a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java
index ac204b832c..d840c57591 100755
--- a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java
+++ b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java
@@ -208,6 +208,26 @@ public int getECIValue(int index) {
     return bytes[index] - 256;
   }
 
+  public String toString() {
+    StringBuilder result = new StringBuilder();
+    for (int i = 0; i < length(); i++) {
+      if (i > 0) {
+        result.append(", ");
+      }
+      if (isECI(i)) {
+        result.append("ECI(");
+        result.append(getECIValue(i));
+        result.append(')');
+      } else if (charAt(i) < 128) {
+        result.append('\'');
+        result.append(charAt(i));
+        result.append('\'');
+      } else {
+        result.append((int) charAt(i));
+      }
+    }
+    return result.toString();
+  }
   static void addEdge(InputEdge[][] edges, int to, InputEdge edge) {
     if (edges[to][edge.encoderIndex] == null ||
         edges[to][edge.encoderIndex].cachedTotalSize > edge.cachedTotalSize) {
diff --git a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
index ab52e665d4..8a647a4a4a 100644
--- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
@@ -22,10 +22,7 @@
 import com.google.zxing.pdf417.PDF417ResultMetadata;
 
 import java.io.ByteArrayOutputStream;
-import java.io.UnsupportedEncodingException;
 import java.math.BigInteger;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
 /**
@@ -102,14 +99,8 @@ private DecodedBitStreamParser() {
   }
 
   static DecoderResult decode(int[] codewords, String ecLevel) throws FormatException {
-    StringBuilder result = new StringBuilder(codewords.length * 2);
-    Charset encoding = StandardCharsets.ISO_8859_1;
-    int codeIndex = 1;
-    if (codewords[0] > 1 && codewords[codeIndex] == ECI_CHARSET) {
-      encoding = getECICharset(codewords[++codeIndex]);
-      codeIndex++;
-    }
-    codeIndex = textCompaction(codewords, codeIndex, result);
+    ECIOutput result = new ECIOutput(codewords.length * 2);
+    int codeIndex = textCompaction(codewords, 1, result);
     PDF417ResultMetadata resultMetadata = new PDF417ResultMetadata();
     while (codeIndex < codewords[0]) {
       int code = codewords[codeIndex++];
@@ -119,7 +110,7 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept
           break;
         case BYTE_COMPACTION_MODE_LATCH:
         case BYTE_COMPACTION_MODE_LATCH_6:
-          codeIndex = byteCompaction(code, codewords, encoding, codeIndex, result);
+          codeIndex = byteCompaction(code, codewords, codeIndex, result);
           break;
         case MODE_SHIFT_TO_BYTE_COMPACTION_MODE:
           result.append((char) codewords[codeIndex++]);
@@ -128,7 +119,7 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept
           codeIndex = numericCompaction(codewords, codeIndex, result);
           break;
         case ECI_CHARSET:
-          encoding = getECICharset(codewords[codeIndex++]);
+          result.appendECI(codewords[codeIndex++]);
           break;
         case ECI_GENERAL_PURPOSE:
           // Can't do anything with generic ECI; skip its 2 characters
@@ -154,7 +145,7 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept
           break;
       }
     }
-    if (result.length() == 0 && resultMetadata.getFileId() == null) {
+    if (result.isEmpty() && resultMetadata.getFileId() == null) {
       throw FormatException.getFormatInstance();
     }
     DecoderResult decoderResult = new DecoderResult(null, result.toString(), null, ecLevel);
@@ -162,15 +153,6 @@ static DecoderResult decode(int[] codewords, String ecLevel) throws FormatExcept
     return decoderResult;
   }
 
-  private static Charset getECICharset(int eciValue) throws FormatException {
-    CharacterSetECI charsetECI =
-        CharacterSetECI.getCharacterSetECIByValue(eciValue);
-    if (charsetECI == null) {
-      throw FormatException.getFormatInstance();
-    }
-    return charsetECI.getCharset();
-  }
-
   @SuppressWarnings("deprecation")
   static int decodeMacroBlock(int[] codewords, int codeIndex, PDF417ResultMetadata resultMetadata)
       throws FormatException {
@@ -222,37 +204,37 @@ static int decodeMacroBlock(int[] codewords, int codeIndex, PDF417ResultMetadata
           codeIndex++;
           switch (codewords[codeIndex]) {
             case MACRO_PDF417_OPTIONAL_FIELD_FILE_NAME:
-              StringBuilder fileName = new StringBuilder();
+              ECIOutput fileName = new ECIOutput();
               codeIndex = textCompaction(codewords, codeIndex + 1, fileName);
               resultMetadata.setFileName(fileName.toString());
               break;
             case MACRO_PDF417_OPTIONAL_FIELD_SENDER:
-              StringBuilder sender = new StringBuilder();
+              ECIOutput sender = new ECIOutput();
               codeIndex = textCompaction(codewords, codeIndex + 1, sender);
               resultMetadata.setSender(sender.toString());
               break;
             case MACRO_PDF417_OPTIONAL_FIELD_ADDRESSEE:
-              StringBuilder addressee = new StringBuilder();
+              ECIOutput addressee = new ECIOutput();
               codeIndex = textCompaction(codewords, codeIndex + 1, addressee);
               resultMetadata.setAddressee(addressee.toString());
               break;
             case MACRO_PDF417_OPTIONAL_FIELD_SEGMENT_COUNT:
-              StringBuilder segmentCount = new StringBuilder();
+              ECIOutput segmentCount = new ECIOutput();
               codeIndex = numericCompaction(codewords, codeIndex + 1, segmentCount);
               resultMetadata.setSegmentCount(Integer.parseInt(segmentCount.toString()));
               break;
             case MACRO_PDF417_OPTIONAL_FIELD_TIME_STAMP:
-              StringBuilder timestamp = new StringBuilder();
+              ECIOutput timestamp = new ECIOutput();
               codeIndex = numericCompaction(codewords, codeIndex + 1, timestamp);
               resultMetadata.setTimestamp(Long.parseLong(timestamp.toString()));
               break;
             case MACRO_PDF417_OPTIONAL_FIELD_CHECKSUM:
-              StringBuilder checksum = new StringBuilder();
+              ECIOutput checksum = new ECIOutput();
               codeIndex = numericCompaction(codewords, codeIndex + 1, checksum);
               resultMetadata.setChecksum(Integer.parseInt(checksum.toString()));
               break;
             case MACRO_PDF417_OPTIONAL_FIELD_FILE_SIZE:
-              StringBuilder fileSize = new StringBuilder();
+              ECIOutput fileSize = new ECIOutput();
               codeIndex = numericCompaction(codewords, codeIndex + 1, fileSize);
               resultMetadata.setFileSize(Long.parseLong(fileSize.toString()));
               break;
@@ -293,7 +275,7 @@ static int decodeMacroBlock(int[] codewords, int codeIndex, PDF417ResultMetadata
    * @param result    The decoded data is appended to the result.
    * @return The next index into the codeword array.
    */
-  private static int textCompaction(int[] codewords, int codeIndex, StringBuilder result) {
+  private static int textCompaction(int[] codewords, int codeIndex, ECIOutput result) throws FormatException {
     // 2 character per codeword
     int[] textCompactionData = new int[(codewords[0] - codeIndex) * 2];
     // Used to hold the byte compaction value if there is a mode shift
@@ -301,6 +283,7 @@ private static int textCompaction(int[] codewords, int codeIndex, StringBuilder
 
     int index = 0;
     boolean end = false;
+    Mode subMode = Mode.ALPHA;
     while ((codeIndex < codewords[0]) && !end) {
       int code = codewords[codeIndex++];
       if (code < TEXT_COMPACTION_MODE_LATCH) {
@@ -334,10 +317,17 @@ private static int textCompaction(int[] codewords, int codeIndex, StringBuilder
             byteCompactionData[index] = code;
             index++;
             break;
+          case ECI_CHARSET:
+            subMode = decodeTextCompaction(textCompactionData, byteCompactionData, index, result, subMode);
+            result.appendECI(codewords[codeIndex++]);
+            textCompactionData = new int[(codewords[0] - codeIndex) * 2];
+            byteCompactionData = new int[(codewords[0] - codeIndex) * 2];
+            index = 0;
+            break;
         }
       }
     }
-    decodeTextCompaction(textCompactionData, byteCompactionData, index, result);
+    decodeTextCompaction(textCompactionData, byteCompactionData, index, result, subMode);
     return codeIndex;
   }
 
@@ -356,17 +346,21 @@ private static int textCompaction(int[] codewords, int codeIndex, StringBuilder
    *                           was a mode shift.
    * @param length             The size of the text compaction and byte compaction data.
    * @param result             The decoded data is appended to the result.
+   * @param startMode          The mode in which decoding starts
+   * @return The mode in which decoding ended
    */
-  private static void decodeTextCompaction(int[] textCompactionData,
+  private static Mode decodeTextCompaction(int[] textCompactionData,
                                            int[] byteCompactionData,
                                            int length,
-                                           StringBuilder result) {
-    // Beginning from an initial state of the Alpha sub-mode
+                                           ECIOutput result,
+                                           Mode startMode) {
+    // Beginning from an initial state
     // The default compaction mode for PDF417 in effect at the start of each symbol shall always be Text
     // Compaction mode Alpha sub-mode (uppercase alphabetic). A latch codeword from another mode to the Text
     // Compaction mode shall always switch to the Text Compaction Alpha sub-mode.
-    Mode subMode = Mode.ALPHA;
-    Mode priorToShiftMode = Mode.ALPHA;
+    Mode subMode = startMode;
+    Mode priorToShiftMode = startMode;
+    Mode latchedMode = startMode;
     int i = 0;
     while (i < length) {
       int subModeCh = textCompactionData[i];
@@ -384,9 +378,11 @@ private static void decodeTextCompaction(int[] textCompactionData,
                 break;
               case LL:
                 subMode = Mode.LOWER;
+                latchedMode = subMode;
                 break;
               case ML:
                 subMode = Mode.MIXED;
+                latchedMode = subMode;
                 break;
               case PS:
                 // Shift to punctuation
@@ -398,6 +394,7 @@ private static void decodeTextCompaction(int[] textCompactionData,
                 break;
               case TEXT_COMPACTION_MODE_LATCH:
                 subMode = Mode.ALPHA;
+                latchedMode = subMode;
                 break;
             }
           }
@@ -419,6 +416,7 @@ private static void decodeTextCompaction(int[] textCompactionData,
                 break;
               case ML:
                 subMode = Mode.MIXED;
+                latchedMode = subMode;
                 break;
               case PS:
                 // Shift to punctuation
@@ -426,11 +424,11 @@ private static void decodeTextCompaction(int[] textCompactionData,
                 subMode = Mode.PUNCT_SHIFT;
                 break;
               case MODE_SHIFT_TO_BYTE_COMPACTION_MODE:
-                // TODO Does this need to use the current character encoding? See other occurrences below
                 result.append((char) byteCompactionData[i]);
                 break;
               case TEXT_COMPACTION_MODE_LATCH:
                 subMode = Mode.ALPHA;
+                latchedMode = subMode;
                 break;
             }
           }
@@ -444,16 +442,19 @@ private static void decodeTextCompaction(int[] textCompactionData,
             switch (subModeCh) {
               case PL:
                 subMode = Mode.PUNCT;
+                latchedMode = subMode;
                 break;
               case 26:
                 ch = ' ';
                 break;
               case LL:
                 subMode = Mode.LOWER;
+                latchedMode = subMode;
                 break;
               case AL:
               case TEXT_COMPACTION_MODE_LATCH:
                 subMode = Mode.ALPHA;
+                latchedMode = subMode;
                 break;
               case PS:
                 // Shift to punctuation
@@ -476,6 +477,7 @@ private static void decodeTextCompaction(int[] textCompactionData,
               case PAL:
               case TEXT_COMPACTION_MODE_LATCH:
                 subMode = Mode.ALPHA;
+                latchedMode = subMode;
                 break;
               case MODE_SHIFT_TO_BYTE_COMPACTION_MODE:
                 result.append((char) byteCompactionData[i]);
@@ -527,6 +529,7 @@ private static void decodeTextCompaction(int[] textCompactionData,
       }
       i++;
     }
+    return latchedMode;
   }
 
   /**
@@ -536,17 +539,14 @@ private static void decodeTextCompaction(int[] textCompactionData,
    *
    * @param mode      The byte compaction mode i.e. 901 or 924
    * @param codewords The array of codewords (data + error)
-   * @param encoding  Currently active character encoding
    * @param codeIndex The current index into the codeword array.
    * @param result    The decoded data is appended to the result.
    * @return The next index into the codeword array.
    */
   private static int byteCompaction(int mode,
                                     int[] codewords,
-                                    Charset encoding,
                                     int codeIndex,
-                                    StringBuilder result) {
-    ByteArrayOutputStream decodedBytes = new ByteArrayOutputStream();
+                                    ECIOutput result) {
     int count = 0;
     long value = 0;
     boolean end = false;
@@ -572,6 +572,7 @@ private static int byteCompaction(int mode,
             case BEGIN_MACRO_PDF417_CONTROL_BLOCK:
             case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
             case MACRO_PDF417_TERMINATOR:
+            case ECI_CHARSET:
               codeIndex--;
               end = true;
               break;
@@ -580,7 +581,7 @@ private static int byteCompaction(int mode,
                 // Decode every 5 codewords
                 // Convert to Base 256
                 for (int j = 0; j < 6; ++j) {
-                  decodedBytes.write((byte) (value >> (8 * (5 - j))));
+                  result.append((byte) (value >> (8 * (5 - j))));
                 }
                 value = 0;
                 count = 0;
@@ -598,7 +599,7 @@ private static int byteCompaction(int mode,
         // the last group of codewords is interpreted directly
         // as one byte per codeword, without compaction.
         for (int i = 0; i < count; i++) {
-          decodedBytes.write((byte) byteCompactedCodewords[i]);
+          result.append((byte) byteCompactedCodewords[i]);
         }
 
         break;
@@ -621,6 +622,7 @@ private static int byteCompaction(int mode,
               case BEGIN_MACRO_PDF417_CONTROL_BLOCK:
               case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
               case MACRO_PDF417_TERMINATOR:
+              case ECI_CHARSET:
                 codeIndex--;
                 end = true;
                 break;
@@ -630,19 +632,17 @@ private static int byteCompaction(int mode,
             // Decode every 5 codewords
             // Convert to Base 256
             for (int j = 0; j < 6; ++j) {
-              decodedBytes.write((byte) (value >> (8 * (5 - j))));
+              result.append((byte) (value >> (8 * (5 - j))));
             }
             value = 0;
             count = 0;
           }
         }
         break;
-    }
-    try {
-      result.append(decodedBytes.toString(encoding.name()));
-    } catch (UnsupportedEncodingException uee) {
-      // can't happen
-      throw new IllegalStateException(uee);
+      case ECI_CHARSET:
+        codeIndex--;
+        end = true;
+        break;
     }
     return codeIndex;
   }
@@ -655,7 +655,7 @@ private static int byteCompaction(int mode,
    * @param result    The decoded data is appended to the result.
    * @return The next index into the codeword array.
    */
-  private static int numericCompaction(int[] codewords, int codeIndex, StringBuilder result) throws FormatException {
+  private static int numericCompaction(int[] codewords, int codeIndex, ECIOutput result) throws FormatException {
     int count = 0;
     boolean end = false;
 
@@ -677,6 +677,7 @@ private static int numericCompaction(int[] codewords, int codeIndex, StringBuild
           case BEGIN_MACRO_PDF417_CONTROL_BLOCK:
           case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
           case MACRO_PDF417_TERMINATOR:
+          case ECI_CHARSET:
             codeIndex--;
             end = true;
             break;
@@ -749,4 +750,66 @@ private static String decodeBase900toBase10(int[] codewords, int count) throws F
     return resultString.substring(1);
   }
 
+  private static final class ECIOutput {
+    boolean needFlush = false;
+    String encodingName = "ISO-8859-1";
+    private ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+    StringBuilder result;
+
+    private ECIOutput() {
+      result = new StringBuilder();
+    }
+
+    private ECIOutput(int size) {
+      result = new StringBuilder(size);
+    }
+
+    private void append(byte value) {
+      bytes.write(value);
+      needFlush = true;
+    }
+
+    private void append(char value) {
+      bytes.write(value & 0xff);
+      needFlush = true;
+    }
+
+    private void append(String s) {
+      for (int i = 0; i < s.length(); i++) {
+        append(s.charAt(i));
+      }
+    }
+
+    private void appendECI(int value) throws FormatException {
+      flush();
+      bytes = new ByteArrayOutputStream();
+      CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(value);
+      if (charsetECI == null) {
+        throw FormatException.getFormatInstance();
+      }
+      encodingName = charsetECI.name();
+    }
+
+    private void flush() {
+      if (needFlush) {
+        needFlush = false;
+        try {
+          result.append(bytes.toString(encodingName));
+        } catch (java.io.UnsupportedEncodingException uee) {
+          // can't happen
+          throw new IllegalStateException(uee);
+        }
+      }
+    }
+
+    private boolean isEmpty() {
+      return !needFlush && result.length() == 0;
+    }
+
+    public String toString() {
+      flush();
+      return result.toString();
+    }
+  }
+
 }
diff --git a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java
index 607dea9d71..4b3871c10b 100644
--- a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java
+++ b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java
@@ -437,7 +437,7 @@ private static void encodeMultiECIBinary(ECIInput input,
         //encode the segment
         encodeBinary(subBytes(input, localStart, localEnd),
             0, localCount, localStart == startpos ? startmode : BYTE_COMPACTION, sb);
-        localStart = localEnd + 1;
+        localStart = localEnd;
       }
     }
   }
diff --git a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java
index d9c4b6bc0b..0a6e82c33c 100644
--- a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java
+++ b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java
@@ -287,49 +287,49 @@ public void testBinaryData() throws WriterException, FormatException {
   @Test
   public void testECIEnglishHiragana() throws Exception {
     //multi ECI UTF-8, UTF-16 and ISO-8859-1
-    performECITest(new char[] {'a', '1', '\u3040'}, new float[] {20f, 1f, 10f}, 102583, 110914);
+    performECITest(new char[] {'a', '1', '\u3040'}, new float[] {20f, 1f, 10f}, 105825, 110914);
   }
 
   @Test
   public void testECIEnglishKatakana() throws Exception {
     //multi ECI UTF-8, UTF-16 and ISO-8859-1
-    performECITest(new char[] {'a', '1', '\u30a0'}, new float[] {20f, 1f, 10f}, 104691, 110914);
+    performECITest(new char[] {'a', '1', '\u30a0'}, new float[] {20f, 1f, 10f}, 109177, 110914);
   }
 
   @Test
   public void testECIEnglishHalfWidthKatakana() throws Exception {
     //single ECI
-    performECITest(new char[] {'a', '1', '\uff80'}, new float[] {20f, 1f, 10f}, 80463, 110914);
+    performECITest(new char[] {'a', '1', '\uff80'}, new float[] {20f, 1f, 10f}, 80617, 110914);
   }
 
   @Test
   public void testECIEnglishChinese() throws Exception {
     //single ECI
-    performECITest(new char[] {'a', '1', '\u4e00'}, new float[] {20f, 1f, 10f}, 95643, 110914);
+    performECITest(new char[] {'a', '1', '\u4e00'}, new float[] {20f, 1f, 10f}, 95797, 110914);
   }
 
   @Test
   public void testECIGermanCyrillic() throws Exception {
     //single ECI since the German Umlaut is in ISO-8859-1
-    performECITest(new char[] {'a', '1', '\u00c4', '\u042f'}, new float[] {20f, 1f, 1f, 10f}, 80529, 96007);
+    performECITest(new char[] {'a', '1', '\u00c4', '\u042f'}, new float[] {20f, 1f, 1f, 10f}, 80755, 96007);
   }
 
   @Test
   public void testECIEnglishCzechCyrillic1() throws Exception {
     //multi ECI between ISO-8859-2 and ISO-8859-5
-    performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 91482, 124525);
+    performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 102824, 124525);
   }
 
   @Test
   public void testECIEnglishCzechCyrillic2() throws Exception {
     //multi ECI between ISO-8859-2 and ISO-8859-5
-    performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {40f, 1f, 10f, 10f}, 79331, 88236);
+    performECITest(new char[] {'a', '1', '\u010c', '\u042f'}, new float[] {40f, 1f, 10f, 10f}, 81321, 88236);
   }
 
   @Test
   public void testECIEnglishArabicCyrillic() throws Exception {
     //multi ECI between UTF-8 (ISO-8859-6 is excluded in CharacterSetECI) and ISO-8859-5
-    performECITest(new char[] {'a', '1', '\u0620', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 111508, 124525);
+    performECITest(new char[] {'a', '1', '\u0620', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 118510, 124525);
   }
 
   private static void encodeDecode(String input, int expectedLength) throws WriterException, FormatException {
@@ -403,9 +403,7 @@ private static void performECITest(char[] chars,
     int utfLength = 0;
     for (int i = 0; i < 1000; i++) {
       String s = generateText(random, 100, chars, weights);
-      minLength += encodeDecode(s, null, true, false);
-      // TODO: Use this instead when the decoder supports multi ECI input
-      //minLength += encodeDecode(s, null, true, true);
+      minLength += encodeDecode(s, null, true, true);
       utfLength += encodeDecode(s, StandardCharsets.UTF_8, false, true);
     }
     assertEquals(expectedMinLength, minLength);

From 2ec70497b5c9b270f26d82529b5abf6d00e3934c Mon Sep 17 00:00:00 2001
From: Alex Geller <ag@4js.com>
Date: Mon, 7 Mar 2022 22:59:34 +0100
Subject: [PATCH 2/3] Syntactic changes

---
 .../java/com/google/zxing/common/MinimalECIInput.java  |  1 +
 .../zxing/pdf417/decoder/DecodedBitStreamParser.java   | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java
index d840c57591..5955c0bd4f 100755
--- a/core/src/main/java/com/google/zxing/common/MinimalECIInput.java
+++ b/core/src/main/java/com/google/zxing/common/MinimalECIInput.java
@@ -208,6 +208,7 @@ public int getECIValue(int index) {
     return bytes[index] - 256;
   }
 
+  @Override
   public String toString() {
     StringBuilder result = new StringBuilder();
     for (int i = 0; i < length(); i++) {
diff --git a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
index 8a647a4a4a..3503449c7c 100644
--- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
@@ -24,6 +24,7 @@
 import java.io.ByteArrayOutputStream;
 import java.math.BigInteger;
 import java.util.Arrays;
+import java.io.UnsupportedEncodingException;
 
 /**
  * <p>This class contains the methods for decoding the PDF417 codewords.</p>
@@ -751,10 +752,10 @@ private static String decodeBase900toBase10(int[] codewords, int count) throws F
   }
 
   private static final class ECIOutput {
-    boolean needFlush = false;
-    String encodingName = "ISO-8859-1";
+    private boolean needFlush = false;
+    private String encodingName = "ISO-8859-1";
     private ByteArrayOutputStream bytes = new ByteArrayOutputStream();
-    StringBuilder result;
+    private StringBuilder result;
 
     private ECIOutput() {
       result = new StringBuilder();
@@ -795,7 +796,7 @@ private void flush() {
         needFlush = false;
         try {
           result.append(bytes.toString(encodingName));
-        } catch (java.io.UnsupportedEncodingException uee) {
+        } catch (UnsupportedEncodingException uee) {
           // can't happen
           throw new IllegalStateException(uee);
         }
@@ -806,6 +807,7 @@ private boolean isEmpty() {
       return !needFlush && result.length() == 0;
     }
 
+    @Override
     public String toString() {
       flush();
       return result.toString();

From 561f187ac03444ca09275b37b1cb193dfc9b033d Mon Sep 17 00:00:00 2001
From: Alex Geller <ag@4js.com>
Date: Tue, 8 Mar 2022 21:09:20 +0100
Subject: [PATCH 3/3] - Fixed issue that ECIs were not processed correctly in
 all allowed locations in binary encoded data as specified in section 5.5.3.2
 of the spec - Added verifying unit test

---
 .../decoder/DecodedBitStreamParser.java       | 128 +++++-------------
 .../pdf417/decoder/PDF417DecoderTestCase.java |  24 +++-
 2 files changed, 57 insertions(+), 95 deletions(-)

diff --git a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
index 3503449c7c..67ecc0851b 100644
--- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
@@ -547,103 +547,49 @@ private static Mode decodeTextCompaction(int[] textCompactionData,
   private static int byteCompaction(int mode,
                                     int[] codewords,
                                     int codeIndex,
-                                    ECIOutput result) {
-    int count = 0;
-    long value = 0;
+                                    ECIOutput result) throws FormatException {
     boolean end = false;
-
-    switch (mode) {
-      case BYTE_COMPACTION_MODE_LATCH:
-        // Total number of Byte Compaction characters to be encoded
-        // is not a multiple of 6
-
-        int[] byteCompactedCodewords = new int[6];
-        int nextCode = codewords[codeIndex++];
-        while ((codeIndex < codewords[0]) && !end) {
-          byteCompactedCodewords[count++] = nextCode;
-          // Base 900
-          value = 900 * value + nextCode;
-          nextCode = codewords[codeIndex++];
-          // perhaps it should be ok to check only nextCode >= TEXT_COMPACTION_MODE_LATCH
-          switch (nextCode) {
-            case TEXT_COMPACTION_MODE_LATCH:
-            case BYTE_COMPACTION_MODE_LATCH:
-            case NUMERIC_COMPACTION_MODE_LATCH:
-            case BYTE_COMPACTION_MODE_LATCH_6:
-            case BEGIN_MACRO_PDF417_CONTROL_BLOCK:
-            case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
-            case MACRO_PDF417_TERMINATOR:
-            case ECI_CHARSET:
+    
+    while (codeIndex < codewords[0] && !end) {
+      //handle leading ECIs
+      while (codeIndex < codewords[0] && codewords[codeIndex] == ECI_CHARSET) {
+        result.appendECI(codewords[++codeIndex]);
+        codeIndex++;
+      }
+      
+      if (codeIndex >= codewords[0] || codewords[codeIndex] >= TEXT_COMPACTION_MODE_LATCH) {
+        end = true;
+      } else {
+        //decode one block of 5 codewords to 6 bytes
+        long value = 0;
+        int count = 0;
+        do {
+          value = 900 * value + codewords[codeIndex++];
+          count++;
+        } while (count < 5 && 
+                 codeIndex < codewords[0] &&
+                 codewords[codeIndex] < TEXT_COMPACTION_MODE_LATCH);
+        if (count == 5 && (mode == BYTE_COMPACTION_MODE_LATCH_6 ||
+                           codeIndex < codewords[0] &&
+                           codewords[codeIndex] < TEXT_COMPACTION_MODE_LATCH)) {
+          for (int i = 0; i < 6; i++) {
+            result.append((byte) (value >> (8 * (5 - i))));
+          }
+        } else {
+          codeIndex -= count;
+          while ((codeIndex < codewords[0]) && !end) {
+            int code = codewords[codeIndex++];
+            if (code < TEXT_COMPACTION_MODE_LATCH) {
+              result.append((byte) code);
+            } else if (code == ECI_CHARSET) {
+              result.appendECI(codewords[codeIndex++]);
+            } else {
               codeIndex--;
               end = true;
-              break;
-            default:
-              if ((count % 5 == 0) && (count > 0)) {
-                // Decode every 5 codewords
-                // Convert to Base 256
-                for (int j = 0; j < 6; ++j) {
-                  result.append((byte) (value >> (8 * (5 - j))));
-                }
-                value = 0;
-                count = 0;
-              }
-              break;
-          }
-        }
-
-        // if the end of all codewords is reached the last codeword needs to be added
-        if (codeIndex == codewords[0] && nextCode < TEXT_COMPACTION_MODE_LATCH) {
-          byteCompactedCodewords[count++] = nextCode;
-        }
-
-        // If Byte Compaction mode is invoked with codeword 901,
-        // the last group of codewords is interpreted directly
-        // as one byte per codeword, without compaction.
-        for (int i = 0; i < count; i++) {
-          result.append((byte) byteCompactedCodewords[i]);
-        }
-
-        break;
-
-      case BYTE_COMPACTION_MODE_LATCH_6:
-        // Total number of Byte Compaction characters to be encoded
-        // is an integer multiple of 6
-        while (codeIndex < codewords[0] && !end) {
-          int code = codewords[codeIndex++];
-          if (code < TEXT_COMPACTION_MODE_LATCH) {
-            count++;
-            // Base 900
-            value = 900 * value + code;
-          } else {
-            switch (code) {
-              case TEXT_COMPACTION_MODE_LATCH:
-              case BYTE_COMPACTION_MODE_LATCH:
-              case NUMERIC_COMPACTION_MODE_LATCH:
-              case BYTE_COMPACTION_MODE_LATCH_6:
-              case BEGIN_MACRO_PDF417_CONTROL_BLOCK:
-              case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
-              case MACRO_PDF417_TERMINATOR:
-              case ECI_CHARSET:
-                codeIndex--;
-                end = true;
-                break;
-            }
-          }
-          if ((count % 5 == 0) && (count > 0)) {
-            // Decode every 5 codewords
-            // Convert to Base 256
-            for (int j = 0; j < 6; ++j) {
-              result.append((byte) (value >> (8 * (5 - j))));
             }
-            value = 0;
-            count = 0;
           }
         }
-        break;
-      case ECI_CHARSET:
-        codeIndex--;
-        end = true;
-        break;
+      }
     }
     return codeIndex;
   }
diff --git a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java
index 0a6e82c33c..e384178d94 100644
--- a/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java
+++ b/core/src/test/java/com/google/zxing/pdf417/decoder/PDF417DecoderTestCase.java
@@ -281,7 +281,7 @@ public void testBinaryData() throws WriterException, FormatException {
       random.nextBytes(bytes);
       total += encodeDecode(new String(bytes, StandardCharsets.ISO_8859_1));
     }
-    assertEquals(4190044, total); 
+    assertEquals(4190044, total);
   }
 
   @Test
@@ -332,6 +332,19 @@ public void testECIEnglishArabicCyrillic() throws Exception {
     performECITest(new char[] {'a', '1', '\u0620', '\u042f'}, new float[] {10f, 1f, 10f, 10f}, 118510, 124525);
   }
 
+  @Test
+  public void testBinaryMultiECI() throws Exception {
+    //Test the cases described in 5.5.5.3 "ECI and Byte Compaction mode using latch 924 and 901"
+    performDecodeTest(new int[] {5, 927, 4, 913, 200}, "\u010c");
+    performDecodeTest(new int[] {9, 927, 4, 913, 200, 927, 7, 913, 207}, "\u010c\u042f");
+    performDecodeTest(new int[] {9, 927, 4, 901, 200, 927, 7, 901, 207}, "\u010c\u042f");
+    performDecodeTest(new int[] {8, 927, 4, 901, 200, 927, 7, 207}, "\u010c\u042f");
+    performDecodeTest(new int[] {14, 927, 4, 901, 200, 927, 7, 207, 927, 4, 200, 927, 7, 207},
+         "\u010c\u042f\u010c\u042f");
+    performDecodeTest(new int[] {16, 927, 4, 924, 336, 432, 197, 51, 300, 927, 7, 348, 231, 311, 858, 567},
+        "\u010c\u010c\u010c\u010c\u010c\u010c\u042f\u042f\u042f\u042f\u042f\u042f");
+  }
+
   private static void encodeDecode(String input, int expectedLength) throws WriterException, FormatException {
     assertEquals(expectedLength, encodeDecode(input));
   }
@@ -349,9 +362,7 @@ private static int encodeDecode(String input, Charset charset, boolean autoECI,
       for (int i = 1; i < codewords.length; i++) {
         codewords[i] = s.charAt(i - 1);
       }
-      DecoderResult result = DecodedBitStreamParser.decode(codewords, "0");
-  
-      assertEquals(input, result.getText());
+      performDecodeTest(codewords, input);
     }
     return s.length() + 1;
   }
@@ -394,6 +405,11 @@ private static void performEncodeTest(char c, int[] expectedLengths) throws Writ
     }
   }
 
+  private static void performDecodeTest(int[] codewords, String expectedResult) throws FormatException {
+    DecoderResult result = DecodedBitStreamParser.decode(codewords, "0");
+    assertEquals(expectedResult, result.getText());
+  }
+
   private static void performECITest(char[] chars,
                                float[] weights,
                                int expectedMinLength,