Skip to content

Commit

Permalink
feat(bigquery): enhance SchemaFromJSON (#5877)
Browse files Browse the repository at this point in the history
* feat(bigquery): enhance SchemaFromJSON

This PR does two things: It enhances SchemaFromJSON to
work directly with the underlying TableFieldSchema messages
from the discovery API definition, and adds a FormatJSONFields
method to Schema to export the same format consumed by SchemaFromJSON.

With this, we're able clear up the existing internal duplicate logic
for this special case, and we manage to address two different feature
requests at the same time.

Fixes: #5833
Fixes: #5867


Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com>
  • Loading branch information
shollyman and steffnay committed Apr 12, 2022
1 parent 82d4f90 commit 16289f0
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 48 deletions.
83 changes: 37 additions & 46 deletions bigquery/schema.go
Expand Up @@ -45,6 +45,22 @@ func (s Schema) Relax() Schema {
return out
}

// ToJSONFields exposes the schema as a JSON array of
// TableFieldSchema objects: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema
//
// Generally this isn't needed for direct usage of this library, but is
// provided for use cases where you're interacting with other tools
// that consume the underlying API representation directly such as the
// BQ CLI tool.
func (s Schema) ToJSONFields() ([]byte, error) {
var rawSchema []*bq.TableFieldSchema
for _, f := range s {
rawSchema = append(rawSchema, f.toBQ())
}
// Use json.MarshalIndent to make the output more human-readable.
return json.MarshalIndent(rawSchema, "", " ")
}

// FieldSchema describes a single field.
type FieldSchema struct {
// The field name.
Expand Down Expand Up @@ -539,16 +555,6 @@ func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) {
return false, nil
}

// bigQuerySchemaJSONField is an individual field in a JSON BigQuery table schema definition
// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema).
type bigQueryJSONField struct {
Description string `json:"description"`
Fields []bigQueryJSONField `json:"fields"`
Mode string `json:"mode"`
Name string `json:"name"`
Type string `json:"type"`
}

// validateKnownType ensures a type is known (or alias of a known type).
func validateKnownType(in FieldType) (FieldType, error) {
if _, ok := fieldTypes[in]; !ok {
Expand All @@ -561,52 +567,37 @@ func validateKnownType(in FieldType) (FieldType, error) {
return in, nil
}

// convertSchemaFromJSON generates a Schema:
func convertSchemaFromJSON(fs []bigQueryJSONField) (Schema, error) {
convertedSchema := Schema{}
for _, f := range fs {
convertedFieldSchema := &FieldSchema{
Description: f.Description,
Name: f.Name,
Required: f.Mode == "REQUIRED",
Repeated: f.Mode == "REPEATED",
}
if len(f.Fields) > 0 {
convertedNestedFieldSchema, err := convertSchemaFromJSON(f.Fields)
if err != nil {
return nil, err
}
convertedFieldSchema.Schema = convertedNestedFieldSchema
}

// Check that the field-type (string) maps to a known FieldType:
validType, err := validateKnownType(FieldType(f.Type))
if err != nil {
return nil, err
}
convertedFieldSchema.Type = validType
convertedSchema = append(convertedSchema, convertedFieldSchema)
}
return convertedSchema, nil
}

// SchemaFromJSON takes a JSON BigQuery table schema definition
// (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema)
// and returns a fully-populated Schema.
// SchemaFromJSON takes a native JSON BigQuery table schema definition and converts it to
// a populated Schema. The native API definition is used by tools such as the BQ CLI and
// https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema.
//
// The expected format is a JSON array of TableFieldSchema objects from the underlying API:
// https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema
func SchemaFromJSON(schemaJSON []byte) (Schema, error) {

var bigQuerySchema []bigQueryJSONField

// Make sure we actually have some content:
if len(schemaJSON) == 0 {
return nil, errEmptyJSONSchema
}

if err := json.Unmarshal(schemaJSON, &bigQuerySchema); err != nil {
var rawSchema []*bq.TableFieldSchema

if err := json.Unmarshal(schemaJSON, &rawSchema); err != nil {
return nil, err
}

return convertSchemaFromJSON(bigQuerySchema)
convertedSchema := Schema{}
for _, f := range rawSchema {
convField := bqToFieldSchema(f)
// Normalize the types.
validType, err := validateKnownType(convField.Type)
if err != nil {
return nil, err
}
convField.Type = validType
convertedSchema = append(convertedSchema, convField)
}
return convertedSchema, nil
}

type noStructError struct {
Expand Down
96 changes: 94 additions & 2 deletions bigquery/schema_test.go
Expand Up @@ -15,6 +15,7 @@
package bigquery

import (
"encoding/json"
"fmt"
"math/big"
"reflect"
Expand All @@ -24,6 +25,7 @@ import (
"cloud.google.com/go/civil"
"cloud.google.com/go/internal/pretty"
"cloud.google.com/go/internal/testutil"
"github.com/google/go-cmp/cmp"
bq "google.golang.org/api/bigquery/v2"
)

Expand Down Expand Up @@ -1167,15 +1169,42 @@ func TestSchemaFromJSON(t *testing.T) {
},
},
},
{
description: "Table with advanced parameters",
bqSchemaJSON: []byte(`
[
{"name":"strfield","type":"STRING","mode":"NULLABLE","description":"foo","maxLength":"100"},
{"name":"numfield","type":"BIGNUMERIC","description":"bar","mode":"REPEATED","precision":"10","scale":"5","policyTags":{"names":["baz"]}}
]`),
expectedSchema: Schema{
&FieldSchema{
Name: "strfield",
Description: "foo",
MaxLength: 100,
Type: "STRING",
},
&FieldSchema{
Name: "numfield",
Description: "bar",
Repeated: true,
Type: "BIGNUMERIC",
Precision: 10,
Scale: 5,
PolicyTags: &PolicyTagList{
Names: []string{"baz"},
},
},
},
},
}
for _, tc := range testCasesExpectingSuccess {
convertedSchema, err := SchemaFromJSON(tc.bqSchemaJSON)
if err != nil {
t.Errorf("encountered an error when converting JSON table schema (%s): %v", tc.description, err)
continue
}
if !testutil.Equal(convertedSchema, tc.expectedSchema) {
t.Errorf("generated JSON table schema (%s) differs from the expected schema", tc.description)
if diff := testutil.Diff(convertedSchema, tc.expectedSchema); diff != "" {
t.Errorf("%s: %s", tc.description, diff)
}
}

Expand Down Expand Up @@ -1204,3 +1233,66 @@ func TestSchemaFromJSON(t *testing.T) {
}
}
}

func TestSchemaToJSONFields(t *testing.T) {

// cmp option for comparing byte arrays without caring about whitespace.
// courtesy of https://github.com/google/go-cmp/issues/224
normalizeJSON := cmp.FilterValues(func(x, y []byte) bool {
return json.Valid(x) && json.Valid(y)
}, cmp.Transformer("ParseJSON", func(in []byte) (out interface{}) {
if err := json.Unmarshal(in, &out); err != nil {
panic(err)
}
return out
}))

testCases := []struct {
description string
inSchema Schema
expectedJSON []byte
}{
{
description: "basic schema",
inSchema: Schema{
fieldSchema("foo", "strfield", "STRING", false, false, nil),
fieldSchema("bar", "intfield", "INTEGER", false, true, nil),
fieldSchema("baz", "bool_arr", "INTEGER", true, false, []string{"tag1"}),
},
expectedJSON: []byte(`[
{
"description": "foo",
"name": "strfield",
"type": "STRING"
},
{
"description": "bar",
"mode": "REQUIRED",
"name": "intfield",
"type": "INTEGER"
},
{
"description": "baz",
"mode": "REPEATED",
"name": "bool_arr",
"policyTags": {
"names": [
"tag1"
]
},
"type": "INTEGER"
}
]`),
},
}
for _, tc := range testCases {
got, err := tc.inSchema.ToJSONFields()
if err != nil {
t.Errorf("%s: %v", tc.description, err)
}

if diff := cmp.Diff(got, tc.expectedJSON, normalizeJSON); diff != "" {
t.Errorf("%s: %s", tc.description, diff)
}
}
}

0 comments on commit 16289f0

Please sign in to comment.