From e3533ad687d32abb088b7fba96dbe8a7341bd068 Mon Sep 17 00:00:00 2001 From: Seth Hollyman Date: Fri, 29 Jul 2022 20:08:50 +0000 Subject: [PATCH 1/2] feat(bigquery): add PreserveAsciiControlCharacters support for CSV --- bigquery/external.go | 32 ++++++++++++++++----------- bigquery/file.go | 2 ++ bigquery/file_test.go | 51 +++++++++++++++++++++++-------------------- bigquery/go.mod | 2 +- bigquery/go.sum | 4 ++-- 5 files changed, 51 insertions(+), 40 deletions(-) diff --git a/bigquery/external.go b/bigquery/external.go index c5337a00216..4790e3f3223 100644 --- a/bigquery/external.go +++ b/bigquery/external.go @@ -230,17 +230,22 @@ type CSVOptions struct { // An optional custom string that will represent a NULL // value in CSV import data. NullMarker string + + // Preserves the embedded ASCII control characters (the first 32 characters in the ASCII-table, + // from '\\x00' to '\\x1F') when loading from CSV. Only applicable to CSV, ignored for other formats. + PreserveAsciiControlCharacters bool } func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) { c.CsvOptions = &bq.CsvOptions{ - AllowJaggedRows: o.AllowJaggedRows, - AllowQuotedNewlines: o.AllowQuotedNewlines, - Encoding: string(o.Encoding), - FieldDelimiter: o.FieldDelimiter, - Quote: o.quote(), - SkipLeadingRows: o.SkipLeadingRows, - NullMarker: o.NullMarker, + AllowJaggedRows: o.AllowJaggedRows, + AllowQuotedNewlines: o.AllowQuotedNewlines, + Encoding: string(o.Encoding), + FieldDelimiter: o.FieldDelimiter, + Quote: o.quote(), + SkipLeadingRows: o.SkipLeadingRows, + NullMarker: o.NullMarker, + PreserveAsciiControlCharacters: o.PreserveAsciiControlCharacters, } } @@ -267,12 +272,13 @@ func (o *CSVOptions) setQuote(ps *string) { func bqToCSVOptions(q *bq.CsvOptions) *CSVOptions { o := &CSVOptions{ - AllowJaggedRows: q.AllowJaggedRows, - AllowQuotedNewlines: q.AllowQuotedNewlines, - Encoding: Encoding(q.Encoding), - FieldDelimiter: q.FieldDelimiter, - SkipLeadingRows: q.SkipLeadingRows, - NullMarker: q.NullMarker, + AllowJaggedRows: q.AllowJaggedRows, + AllowQuotedNewlines: q.AllowQuotedNewlines, + Encoding: Encoding(q.Encoding), + FieldDelimiter: q.FieldDelimiter, + SkipLeadingRows: q.SkipLeadingRows, + NullMarker: q.NullMarker, + PreserveAsciiControlCharacters: q.PreserveAsciiControlCharacters, } o.setQuote(q.Quote) return o diff --git a/bigquery/file.go b/bigquery/file.go index 44eab5dc070..531544b03a9 100644 --- a/bigquery/file.go +++ b/bigquery/file.go @@ -93,6 +93,7 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) { conf.IgnoreUnknownValues = fc.IgnoreUnknownValues conf.MaxBadRecords = fc.MaxBadRecords conf.NullMarker = fc.NullMarker + conf.PreserveAsciiControlCharacters = fc.PreserveAsciiControlCharacters if fc.Schema != nil { conf.Schema = fc.Schema.toBQ() } @@ -120,6 +121,7 @@ func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) { fc.Encoding = Encoding(conf.Encoding) fc.FieldDelimiter = conf.FieldDelimiter fc.CSVOptions.NullMarker = conf.NullMarker + fc.CSVOptions.PreserveAsciiControlCharacters = conf.PreserveAsciiControlCharacters fc.CSVOptions.setQuote(conf.Quote) } diff --git a/bigquery/file_test.go b/bigquery/file_test.go index e57633f3635..045dfde7de1 100644 --- a/bigquery/file_test.go +++ b/bigquery/file_test.go @@ -33,13 +33,14 @@ var ( nestedFieldSchema(), }, CSVOptions: CSVOptions{ - Quote: hyphen, - FieldDelimiter: "\t", - SkipLeadingRows: 8, - AllowJaggedRows: true, - AllowQuotedNewlines: true, - Encoding: UTF_8, - NullMarker: "marker", + Quote: hyphen, + FieldDelimiter: "\t", + SkipLeadingRows: 8, + AllowJaggedRows: true, + AllowQuotedNewlines: true, + Encoding: UTF_8, + NullMarker: "marker", + PreserveAsciiControlCharacters: true, }, } ) @@ -63,16 +64,17 @@ func TestFileConfigPopulateLoadConfig(t *testing.T) { description: "csv", fileConfig: &fc, want: &bq.JobConfigurationLoad{ - SourceFormat: "CSV", - FieldDelimiter: "\t", - SkipLeadingRows: 8, - AllowJaggedRows: true, - AllowQuotedNewlines: true, - Autodetect: true, - Encoding: "UTF-8", - MaxBadRecords: 7, - IgnoreUnknownValues: true, - NullMarker: "marker", + SourceFormat: "CSV", + FieldDelimiter: "\t", + SkipLeadingRows: 8, + AllowJaggedRows: true, + AllowQuotedNewlines: true, + Autodetect: true, + Encoding: "UTF-8", + MaxBadRecords: 7, + IgnoreUnknownValues: true, + NullMarker: "marker", + PreserveAsciiControlCharacters: true, Schema: &bq.TableSchema{ Fields: []*bq.TableFieldSchema{ bqStringFieldSchema(), @@ -150,13 +152,14 @@ func TestFileConfigPopulateExternalDataConfig(t *testing.T) { bqNestedFieldSchema(), }}, CsvOptions: &bq.CsvOptions{ - AllowJaggedRows: true, - AllowQuotedNewlines: true, - Encoding: "UTF-8", - FieldDelimiter: "\t", - Quote: &hyphen, - SkipLeadingRows: 8, - NullMarker: "marker", + AllowJaggedRows: true, + AllowQuotedNewlines: true, + Encoding: "UTF-8", + FieldDelimiter: "\t", + Quote: &hyphen, + SkipLeadingRows: 8, + NullMarker: "marker", + PreserveAsciiControlCharacters: true, }, }, }, diff --git a/bigquery/go.mod b/bigquery/go.mod index 2127119efe4..12232edf95d 100644 --- a/bigquery/go.mod +++ b/bigquery/go.mod @@ -13,7 +13,7 @@ require ( go.opencensus.io v0.23.0 golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f - google.golang.org/api v0.87.0 + google.golang.org/api v0.90.0 google.golang.org/genproto v0.0.0-20220714211235-042d03aeabc9 google.golang.org/grpc v1.47.0 google.golang.org/protobuf v1.28.0 diff --git a/bigquery/go.sum b/bigquery/go.sum index 640755dc763..1285857bfa1 100644 --- a/bigquery/go.sum +++ b/bigquery/go.sum @@ -514,8 +514,8 @@ google.golang.org/api v0.78.0/go.mod h1:1Sg78yoMLOhlQTeF+ARBoytAcH1NNyyl390YMy6r google.golang.org/api v0.80.0/go.mod h1:xY3nI94gbvBrE0J6NHXhxOmW97HG7Khjkku6AFB3Hyg= google.golang.org/api v0.84.0/go.mod h1:NTsGnUFJMYROtiquksZHBWtHfeMC7iYthki7Eq3pa8o= google.golang.org/api v0.85.0/go.mod h1:AqZf8Ep9uZ2pyTvgL+x0D3Zt0eoT9b5E8fmzfu6FO2g= -google.golang.org/api v0.87.0 h1:pUQVF/F+X7Tl1lo4LJoJf5BOpjtmINU80p9XpYTU2p4= -google.golang.org/api v0.87.0/go.mod h1:+Sem1dnrKlrXMR/X0bPnMWyluQe4RsNoYfmNLhOIkzw= +google.golang.org/api v0.90.0 h1:WMnUWAvihIClUYFNeFA69VTuR3duKS3IalMGDQcLvq8= +google.golang.org/api v0.90.0/go.mod h1:+Sem1dnrKlrXMR/X0bPnMWyluQe4RsNoYfmNLhOIkzw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= From b4c74a5f90ca83917ec0c16e479b3baaf2fa2fc6 Mon Sep 17 00:00:00 2001 From: Seth Hollyman Date: Mon, 22 Aug 2022 22:11:34 +0000 Subject: [PATCH 2/2] lint rename --- bigquery/external.go | 6 +++--- bigquery/file.go | 4 ++-- bigquery/file_test.go | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bigquery/external.go b/bigquery/external.go index 4790e3f3223..3d3442b7fa3 100644 --- a/bigquery/external.go +++ b/bigquery/external.go @@ -233,7 +233,7 @@ type CSVOptions struct { // Preserves the embedded ASCII control characters (the first 32 characters in the ASCII-table, // from '\\x00' to '\\x1F') when loading from CSV. Only applicable to CSV, ignored for other formats. - PreserveAsciiControlCharacters bool + PreserveASCIIControlCharacters bool } func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) { @@ -245,7 +245,7 @@ func (o *CSVOptions) populateExternalDataConfig(c *bq.ExternalDataConfiguration) Quote: o.quote(), SkipLeadingRows: o.SkipLeadingRows, NullMarker: o.NullMarker, - PreserveAsciiControlCharacters: o.PreserveAsciiControlCharacters, + PreserveAsciiControlCharacters: o.PreserveASCIIControlCharacters, } } @@ -278,7 +278,7 @@ func bqToCSVOptions(q *bq.CsvOptions) *CSVOptions { FieldDelimiter: q.FieldDelimiter, SkipLeadingRows: q.SkipLeadingRows, NullMarker: q.NullMarker, - PreserveAsciiControlCharacters: q.PreserveAsciiControlCharacters, + PreserveASCIIControlCharacters: q.PreserveAsciiControlCharacters, } o.setQuote(q.Quote) return o diff --git a/bigquery/file.go b/bigquery/file.go index 531544b03a9..9afdbb53599 100644 --- a/bigquery/file.go +++ b/bigquery/file.go @@ -93,7 +93,7 @@ func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) { conf.IgnoreUnknownValues = fc.IgnoreUnknownValues conf.MaxBadRecords = fc.MaxBadRecords conf.NullMarker = fc.NullMarker - conf.PreserveAsciiControlCharacters = fc.PreserveAsciiControlCharacters + conf.PreserveAsciiControlCharacters = fc.PreserveASCIIControlCharacters if fc.Schema != nil { conf.Schema = fc.Schema.toBQ() } @@ -121,7 +121,7 @@ func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) { fc.Encoding = Encoding(conf.Encoding) fc.FieldDelimiter = conf.FieldDelimiter fc.CSVOptions.NullMarker = conf.NullMarker - fc.CSVOptions.PreserveAsciiControlCharacters = conf.PreserveAsciiControlCharacters + fc.CSVOptions.PreserveASCIIControlCharacters = conf.PreserveAsciiControlCharacters fc.CSVOptions.setQuote(conf.Quote) } diff --git a/bigquery/file_test.go b/bigquery/file_test.go index 045dfde7de1..77ba8b2610b 100644 --- a/bigquery/file_test.go +++ b/bigquery/file_test.go @@ -40,7 +40,7 @@ var ( AllowQuotedNewlines: true, Encoding: UTF_8, NullMarker: "marker", - PreserveAsciiControlCharacters: true, + PreserveASCIIControlCharacters: true, }, } )