From 16289f086ae15ea18c70d387b542796e099d4a09 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 12 Apr 2022 14:44:47 -0700 Subject: [PATCH] feat(bigquery): enhance SchemaFromJSON (#5877) * feat(bigquery): enhance SchemaFromJSON This PR does two things: It enhances SchemaFromJSON to work directly with the underlying TableFieldSchema messages from the discovery API definition, and adds a FormatJSONFields method to Schema to export the same format consumed by SchemaFromJSON. With this, we're able clear up the existing internal duplicate logic for this special case, and we manage to address two different feature requests at the same time. Fixes: https://github.com/googleapis/google-cloud-go/issues/5833 Fixes: https://github.com/googleapis/google-cloud-go/issues/5867 Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- bigquery/schema.go | 83 ++++++++++++++++------------------- bigquery/schema_test.go | 96 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 48 deletions(-) diff --git a/bigquery/schema.go b/bigquery/schema.go index 9b117e40a72..78f2f762864 100644 --- a/bigquery/schema.go +++ b/bigquery/schema.go @@ -45,6 +45,22 @@ func (s Schema) Relax() Schema { return out } +// ToJSONFields exposes the schema as a JSON array of +// TableFieldSchema objects: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema +// +// Generally this isn't needed for direct usage of this library, but is +// provided for use cases where you're interacting with other tools +// that consume the underlying API representation directly such as the +// BQ CLI tool. +func (s Schema) ToJSONFields() ([]byte, error) { + var rawSchema []*bq.TableFieldSchema + for _, f := range s { + rawSchema = append(rawSchema, f.toBQ()) + } + // Use json.MarshalIndent to make the output more human-readable. + return json.MarshalIndent(rawSchema, "", " ") +} + // FieldSchema describes a single field. type FieldSchema struct { // The field name. @@ -539,16 +555,6 @@ func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) { return false, nil } -// bigQuerySchemaJSONField is an individual field in a JSON BigQuery table schema definition -// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema). -type bigQueryJSONField struct { - Description string `json:"description"` - Fields []bigQueryJSONField `json:"fields"` - Mode string `json:"mode"` - Name string `json:"name"` - Type string `json:"type"` -} - // validateKnownType ensures a type is known (or alias of a known type). func validateKnownType(in FieldType) (FieldType, error) { if _, ok := fieldTypes[in]; !ok { @@ -561,52 +567,37 @@ func validateKnownType(in FieldType) (FieldType, error) { return in, nil } -// convertSchemaFromJSON generates a Schema: -func convertSchemaFromJSON(fs []bigQueryJSONField) (Schema, error) { - convertedSchema := Schema{} - for _, f := range fs { - convertedFieldSchema := &FieldSchema{ - Description: f.Description, - Name: f.Name, - Required: f.Mode == "REQUIRED", - Repeated: f.Mode == "REPEATED", - } - if len(f.Fields) > 0 { - convertedNestedFieldSchema, err := convertSchemaFromJSON(f.Fields) - if err != nil { - return nil, err - } - convertedFieldSchema.Schema = convertedNestedFieldSchema - } - - // Check that the field-type (string) maps to a known FieldType: - validType, err := validateKnownType(FieldType(f.Type)) - if err != nil { - return nil, err - } - convertedFieldSchema.Type = validType - convertedSchema = append(convertedSchema, convertedFieldSchema) - } - return convertedSchema, nil -} - -// SchemaFromJSON takes a JSON BigQuery table schema definition -// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema) -// and returns a fully-populated Schema. +// SchemaFromJSON takes a native JSON BigQuery table schema definition and converts it to +// a populated Schema. The native API definition is used by tools such as the BQ CLI and +// https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema. +// +// The expected format is a JSON array of TableFieldSchema objects from the underlying API: +// https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema func SchemaFromJSON(schemaJSON []byte) (Schema, error) { - var bigQuerySchema []bigQueryJSONField - // Make sure we actually have some content: if len(schemaJSON) == 0 { return nil, errEmptyJSONSchema } - if err := json.Unmarshal(schemaJSON, &bigQuerySchema); err != nil { + var rawSchema []*bq.TableFieldSchema + + if err := json.Unmarshal(schemaJSON, &rawSchema); err != nil { return nil, err } - return convertSchemaFromJSON(bigQuerySchema) + convertedSchema := Schema{} + for _, f := range rawSchema { + convField := bqToFieldSchema(f) + // Normalize the types. + validType, err := validateKnownType(convField.Type) + if err != nil { + return nil, err + } + convField.Type = validType + convertedSchema = append(convertedSchema, convField) + } + return convertedSchema, nil } type noStructError struct { diff --git a/bigquery/schema_test.go b/bigquery/schema_test.go index f877e36766a..17b7fc7157f 100644 --- a/bigquery/schema_test.go +++ b/bigquery/schema_test.go @@ -15,6 +15,7 @@ package bigquery import ( + "encoding/json" "fmt" "math/big" "reflect" @@ -24,6 +25,7 @@ import ( "cloud.google.com/go/civil" "cloud.google.com/go/internal/pretty" "cloud.google.com/go/internal/testutil" + "github.com/google/go-cmp/cmp" bq "google.golang.org/api/bigquery/v2" ) @@ -1167,6 +1169,33 @@ func TestSchemaFromJSON(t *testing.T) { }, }, }, + { + description: "Table with advanced parameters", + bqSchemaJSON: []byte(` +[ + {"name":"strfield","type":"STRING","mode":"NULLABLE","description":"foo","maxLength":"100"}, + {"name":"numfield","type":"BIGNUMERIC","description":"bar","mode":"REPEATED","precision":"10","scale":"5","policyTags":{"names":["baz"]}} +]`), + expectedSchema: Schema{ + &FieldSchema{ + Name: "strfield", + Description: "foo", + MaxLength: 100, + Type: "STRING", + }, + &FieldSchema{ + Name: "numfield", + Description: "bar", + Repeated: true, + Type: "BIGNUMERIC", + Precision: 10, + Scale: 5, + PolicyTags: &PolicyTagList{ + Names: []string{"baz"}, + }, + }, + }, + }, } for _, tc := range testCasesExpectingSuccess { convertedSchema, err := SchemaFromJSON(tc.bqSchemaJSON) @@ -1174,8 +1203,8 @@ func TestSchemaFromJSON(t *testing.T) { t.Errorf("encountered an error when converting JSON table schema (%s): %v", tc.description, err) continue } - if !testutil.Equal(convertedSchema, tc.expectedSchema) { - t.Errorf("generated JSON table schema (%s) differs from the expected schema", tc.description) + if diff := testutil.Diff(convertedSchema, tc.expectedSchema); diff != "" { + t.Errorf("%s: %s", tc.description, diff) } } @@ -1204,3 +1233,66 @@ func TestSchemaFromJSON(t *testing.T) { } } } + +func TestSchemaToJSONFields(t *testing.T) { + + // cmp option for comparing byte arrays without caring about whitespace. + // courtesy of https://github.com/google/go-cmp/issues/224 + normalizeJSON := cmp.FilterValues(func(x, y []byte) bool { + return json.Valid(x) && json.Valid(y) + }, cmp.Transformer("ParseJSON", func(in []byte) (out interface{}) { + if err := json.Unmarshal(in, &out); err != nil { + panic(err) + } + return out + })) + + testCases := []struct { + description string + inSchema Schema + expectedJSON []byte + }{ + { + description: "basic schema", + inSchema: Schema{ + fieldSchema("foo", "strfield", "STRING", false, false, nil), + fieldSchema("bar", "intfield", "INTEGER", false, true, nil), + fieldSchema("baz", "bool_arr", "INTEGER", true, false, []string{"tag1"}), + }, + expectedJSON: []byte(`[ + { + "description": "foo", + "name": "strfield", + "type": "STRING" + }, + { + "description": "bar", + "mode": "REQUIRED", + "name": "intfield", + "type": "INTEGER" + }, + { + "description": "baz", + "mode": "REPEATED", + "name": "bool_arr", + "policyTags": { + "names": [ + "tag1" + ] + }, + "type": "INTEGER" + } +]`), + }, + } + for _, tc := range testCases { + got, err := tc.inSchema.ToJSONFields() + if err != nil { + t.Errorf("%s: %v", tc.description, err) + } + + if diff := cmp.Diff(got, tc.expectedJSON, normalizeJSON); diff != "" { + t.Errorf("%s: %s", tc.description, diff) + } + } +}