Skip to content

Commit

Permalink
CODEC-308: change NYSIIS encoding to not remove the first character i…
Browse files Browse the repository at this point in the history
…f its an A or S
  • Loading branch information
Ben-Waters committed Jun 26, 2023
1 parent 8d7887a commit caa21a8
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
9 changes: 8 additions & 1 deletion src/main/java/org/apache/commons/codec/language/Nysiis.java
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ public String nysiis(String str) {

// First character of key = first character of name.
final StringBuilder key = new StringBuilder(str.length());
key.append(str.charAt(0));
final char firstChar = str.charAt(0);
key.append(firstChar);

// Transcode remaining characters, incrementing by one character each time
final char[] chars = str.toCharArray();
Expand Down Expand Up @@ -314,6 +315,12 @@ public String nysiis(String str) {
if (lastChar == 'A') {
key.deleteCharAt(key.length() - 1);
}

if(key.length()==0){
// We've removed the first character of the string. Likely because it was an S or A
// We should return at least the first character
key.append(firstChar);
}
}

final String string = key.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ public void testDropBy() throws EncoderException {
new String[] { "JILES", "JAL" },
// violates 6: if the last two characters are AY, remove A
new String[] { "CARRAWAY", "CARY" }, // Original: CARAY
new String[] { "YAMADA", "YANAD" });
new String[] { "YAMADA", "YANAD" },
new String[] { "ASH", "A"});
}

@Test
Expand Down

0 comments on commit caa21a8

Please sign in to comment.