Skip to content

Commit

Permalink
Finish support for RFC4180 for CSV bulk insert operations (#2338)
Browse files Browse the repository at this point in the history
  • Loading branch information
funkyjive committed Apr 1, 2024
1 parent 539b117 commit 662a266
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,47 @@ private void initFileReader(InputStreamReader sr, String encoding, String demlim
}
}

/*
* RFC4180 specifies that rules for quoted fields. It allows quoted string data to contain newlines data
* provided the contents otherwise conforms to the rules for escaping quotes. For example, the following is valid:
* "a","b","c"
* "aaa","b <-- newline is retained in data field
* bb","c"
* "aa","bb","cc"
* We cannot simply use fileReader.readLine() to read these records but instead must continue reading until we reach
* a newline that is not contained within quotes.
*/
private String readLineEscapeDelimiters() throws SQLServerException {
int quoteCount = 0;
StringBuilder sb = new StringBuilder();
try {
int c;
while ((c = fileReader.read()) != -1) {
if ((c == '\n' || c == '\r') && quoteCount % 2 == 0) { // newlines only end the record if we are not in quotes
fileReader.mark(1);
c = fileReader.read(); // we might have read \r of a \r\n, if so we need to read the \n as well
if (c != '\n') {
fileReader.reset(); // only delimited by \n, unread last char so it goes into the next record
}
break;
}
sb.append((char) c);
if (c == '"') {
quoteCount++;
}
}
if (c == -1 && quoteCount % 2 != 0) { // stream ended, but we are within quotes -- data problem
throw new SQLServerException(SQLServerException.getErrString("R_InvalidCSVQuotes"), null, 0, null);
}
if (c == -1) { // keep semantics of readLine() by returning a null when there is no more data
return null;
}
} catch (IOException e) {
throw new SQLServerException(e.getMessage(), null, 0, e);
}
return sb.toString();
}

private void initLoggerResources() {
super.loggerPackageName = "com.microsoft.sqlserver.jdbc.SQLServerBulkCSVFileRecord";
}
Expand Down Expand Up @@ -526,7 +567,7 @@ else if ((null != columnNames) && (columnNames.length >= positionInSource))
@Override
public boolean next() throws SQLServerException {
try {
currentLine = fileReader.readLine();
currentLine = escapeDelimiters ? readLineEscapeDelimiters() : fileReader.readLine();
} catch (IOException e) {
throw new SQLServerException(e.getMessage(), null, 0, e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public void testEscapeColumnDelimitersCSV() throws Exception {
/*
* The list below is the copy of inputFileDelimiterEsc ape with quotes removed.
*/
String[][] expectedEscaped = new String[11][4];
String[][] expectedEscaped = new String[12][4];
expectedEscaped[0] = new String[] {"test", " test\"", "no@split", " testNoQuote", ""};
expectedEscaped[1] = new String[] {null, null, null, null, ""};
expectedEscaped[2] = new String[] {"\"", "test\"test", "test@\" test", null, ""};
Expand All @@ -166,6 +166,7 @@ public void testEscapeColumnDelimitersCSV() throws Exception {
expectedEscaped[8] = new String[] {"1997", "Ford", "E350", "Super@ \"luxurious\" truck", ""};
expectedEscaped[9] = new String[] {"1997", "Ford", "E350", "E63", ""};
expectedEscaped[10] = new String[] {"1997", "Ford", "E350", " Super luxurious truck ", ""};
expectedEscaped[11] = new String[] {"1997", "F\r\no\r\nr\r\nd", "E350", "\"Super\" \"luxurious\" \"truck\"", ""};

try (Connection con = getConnection(); Statement stmt = con.createStatement();
SQLServerBulkCopy bulkCopy = new SQLServerBulkCopy(con);
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/BulkCopyCSVTestInputDelimiterEscape.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@
9@1997@Ford@E350@"Super@ ""luxurious"" truck"@
10@1997@ "Ford" @E350@ "E63"@
11@1997@Ford@E350@" Super luxurious truck "@
12@1997@"F
o
r
d"@"E350"@"""Super"" ""luxurious"" ""truck"""@

0 comments on commit 662a266

Please sign in to comment.