diff --git a/bigquery/intervalvalue.go b/bigquery/intervalvalue.go new file mode 100644 index 00000000000..dc4510c33f0 --- /dev/null +++ b/bigquery/intervalvalue.go @@ -0,0 +1,321 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bigquery + +import ( + "bytes" + "fmt" + "strconv" + "time" +) + +// IntervalValue is a go type for representing BigQuery INTERVAL values. +// Intervals are represented using three distinct parts: +// * Years and Months +// * Days +// * Time (Hours/Mins/Seconds/Fractional Seconds). +// +// More information about BigQuery INTERVAL types can be found at: +// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type +// +// IntervalValue is EXPERIMENTAL and subject to change or removal without notice. +type IntervalValue struct { + // In canonical form, Years and Months share a consistent sign and reduced + // to avoid large month values. + Years int32 + Months int32 + + // In canonical form, Days are independent of the other parts and can have it's + // own sign. There is no attempt to reduce larger Day values into the Y-M part. + Days int32 + + // In canonical form, the time parts all share a consistent sign and are reduced. + Hours int32 + Minutes int32 + Seconds int32 + // This represents the fractional seconds as nanoseconds. + SubSecondNanos int32 +} + +// String returns string representation of the interval value using the canonical format. +// The canonical format is as follows: +// +// [sign]Y-M [sign]D [sign]H:M:S[.F] +func (iv *IntervalValue) String() string { + // Don't canonicalize the current value. Instead, if it's not canonical, + // compute the canonical form and use that. + src := iv + if !iv.IsCanonical() { + src = iv.Canonicalize() + } + out := fmt.Sprintf("%d-%d %d %d:%d:%d", src.Years, int32abs(src.Months), src.Days, src.Hours, int32abs(src.Minutes), int32abs(src.Seconds)) + if src.SubSecondNanos != 0 { + mantStr := fmt.Sprintf("%09d", src.SubSecondNanos) + for len(mantStr) > 0 && mantStr[len(mantStr)-1:] == "0" { + mantStr = mantStr[0 : len(mantStr)-1] + } + out = fmt.Sprintf("%s.%s", out, mantStr) + } + return out +} + +// intervalPart is used for parsing string representations. +type intervalPart int + +const ( + yearsPart = iota + monthsPart + daysPart + hoursPart + minutesPart + secondsPart + subsecsPart +) + +func (i intervalPart) String() string { + knownParts := []string{"YEARS", "MONTHS", "DAYS", "HOURS", "MINUTES", "SECONDS", "SUBSECONDS"} + if i < 0 || int(i) > len(knownParts) { + return fmt.Sprintf("UNKNOWN(%d)", i) + } + return knownParts[i] +} + +// canonicalParts indicates the parse order for canonical format. +var canonicalParts = []intervalPart{yearsPart, monthsPart, daysPart, hoursPart, minutesPart, secondsPart, subsecsPart} + +// ParseInterval parses an interval in canonical string format and returns the IntervalValue it represents. +func ParseInterval(value string) (*IntervalValue, error) { + iVal := &IntervalValue{} + for _, part := range canonicalParts { + remaining, v, err := getPartValue(part, value) + if err != nil { + return nil, err + } + switch part { + case yearsPart: + iVal.Years = v + case monthsPart: + iVal.Months = v + if iVal.Years < 0 { + iVal.Months = -v + } + case daysPart: + iVal.Days = v + case hoursPart: + iVal.Hours = v + case minutesPart: + iVal.Minutes = v + if iVal.Hours < 0 { + iVal.Minutes = -v + } + case secondsPart: + iVal.Seconds = v + if iVal.Hours < 0 { + iVal.Seconds = -v + } + case subsecsPart: + iVal.SubSecondNanos = v + if iVal.Hours < 0 { + iVal.SubSecondNanos = -v + } + default: + return nil, fmt.Errorf("encountered invalid part %s during parse", part) + } + value = remaining + } + return iVal, nil +} + +func getPartValue(part intervalPart, s string) (string, int32, error) { + s = trimPrefix(part, s) + return getNumVal(part, s) +} + +// trimPrefix removes formatting prefix relevant to the given type. +func trimPrefix(part intervalPart, s string) string { + var trimByte byte + switch part { + case yearsPart, daysPart, hoursPart: + trimByte = byte(' ') + case monthsPart: + trimByte = byte('-') + case minutesPart, secondsPart: + trimByte = byte(':') + case subsecsPart: + trimByte = byte('.') + } + for len(s) > 0 && s[0] == trimByte { + s = s[1:] + } + return s +} + +func getNumVal(part intervalPart, s string) (string, int32, error) { + + allowedVals := []byte("0123456789") + var allowedSign bool + captured := "" + switch part { + case yearsPart, daysPart, hoursPart: + allowedSign = true + } + // capture sign prefix +/- + if len(s) > 0 && allowedSign { + switch s[0] { + case '-': + captured = "-" + s = s[1:] + case '+': + s = s[1:] + } + } + for len(s) > 0 && bytes.IndexByte(allowedVals, s[0]) >= 0 { + captured = captured + string(s[0]) + s = s[1:] + } + + if len(captured) == 0 { + if part == subsecsPart { + return s, 0, nil + } + return "", 0, fmt.Errorf("no value parsed for part %s", part.String()) + } + // special case: subsecs is a mantissa, convert it to nanos + if part == subsecsPart { + parsed, err := strconv.ParseFloat(fmt.Sprintf("0.%s", captured), 64) + if err != nil { + return "", 0, fmt.Errorf("couldn't parse %s as %s", captured, part.String()) + } + return s, int32(parsed * 1e9), nil + } + parsed, err := strconv.ParseInt(captured, 10, 32) + if err != nil { + return "", 0, fmt.Errorf("error parsing value %s for %s: %v", captured, part.String(), err) + } + return s, int32(parsed), nil +} + +// IntervalValueFromDuration converts a time.Duration to an IntervalType representation. +// +// The converted duration only leverages the hours/minutes/seconds part of the interval, +// the other parts representing days, months, and years are not used. +func IntervalValueFromDuration(in time.Duration) *IntervalValue { + nanos := in.Nanoseconds() + out := &IntervalValue{} + out.Hours = int32(nanos / 3600 / 1e9) + nanos = nanos - (int64(out.Hours) * 3600 * 1e9) + out.Minutes = int32(nanos / 60 / 1e9) + nanos = nanos - (int64(out.Minutes) * 60 * 1e9) + out.Seconds = int32(nanos / 1e9) + nanos = nanos - (int64(out.Seconds) * 1e9) + out.SubSecondNanos = int32(nanos) + return out +} + +// ToDuration converts an interval to a time.Duration value. +// +// For the purposes of conversion: +// Years are normalized to 12 months. +// Months are normalized to 30 days. +// Days are normalized to 24 hours. +func (iv *IntervalValue) ToDuration() time.Duration { + var accum int64 + accum = 12*int64(iv.Years) + int64(iv.Months) + // widen to days + accum = accum*30 + int64(iv.Days) + // hours + accum = accum*24 + int64(iv.Hours) + // minutes + accum = accum*60 + int64(iv.Minutes) + // seconds + accum = accum*60 + int64(iv.Seconds) + // subsecs + accum = accum*1e9 + int64(iv.SubSecondNanos*1e9) + return time.Duration(accum) +} + +// Canonicalize returns an IntervalValue where signs for elements in the +// Y-M and H:M:S.F are consistent and values are normalized/reduced. +// +// Canonical form enables more consistent comparison of the encoded +// interval. For example, encoding an interval with 12 months is equivalent +// to an interval of 1 year. +func (iv *IntervalValue) Canonicalize() *IntervalValue { + newIV := &IntervalValue{iv.Years, iv.Months, iv.Days, iv.Hours, iv.Minutes, iv.Seconds, iv.SubSecondNanos} + // canonicalize Y-M part + totalMonths := iv.Years*12 + iv.Months + newIV.Years = totalMonths / 12 + totalMonths = totalMonths - (newIV.Years * 12) + newIV.Months = totalMonths % 12 + + // No canonicalization for the Days part. + + // canonicalize time part by switching to Nanos. + totalNanos := int64(iv.Hours)*3600*1e9 + + int64(iv.Minutes)*60*1e9 + + int64(iv.Seconds)*1e9 + + int64(iv.SubSecondNanos) + + // Reduce to parts. + newIV.Hours = int32(totalNanos / 60 / 60 / 1e9) + totalNanos = totalNanos - (int64(newIV.Hours) * 3600 * 1e9) + newIV.Minutes = int32(totalNanos / 60 / 1e9) + totalNanos = totalNanos - (int64(newIV.Minutes) * 60 * 1e9) + newIV.Seconds = int32(totalNanos / 1e9) + totalNanos = totalNanos - (int64(newIV.Seconds) * 1e9) + newIV.SubSecondNanos = int32(totalNanos) + return newIV +} + +// IsCanonical evaluates whether the current representation is in canonical +// form. +func (iv *IntervalValue) IsCanonical() bool { + if !sameSign(iv.Years, iv.Months) || + !sameSign(iv.Hours, iv.Minutes) { + return false + } + // We allow large days and hours values, because they are within different parts. + if int32abs(iv.Months) > 12 || + int32abs(iv.Minutes) > 60 || + int32abs(iv.Seconds) > 60 || + int32abs(iv.SubSecondNanos) > 1e9 { + return false + } + // TODO: We don't currently validate that each part represents value smaller than 10k years. + return true +} + +func int32abs(x int32) int32 { + if x < 0 { + return -x + } + return x +} + +func sameSign(nums ...int32) bool { + var pos, neg int + for _, n := range nums { + if n > 0 { + pos = pos + 1 + } + if n < 0 { + neg = neg + 1 + } + } + if pos > 0 && neg > 0 { + return false + } + return true +} diff --git a/bigquery/intervalvalue_test.go b/bigquery/intervalvalue_test.go new file mode 100644 index 00000000000..f657eb4a1c5 --- /dev/null +++ b/bigquery/intervalvalue_test.go @@ -0,0 +1,179 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bigquery + +import ( + "testing" + "time" + + "cloud.google.com/go/internal/testutil" +) + +func TestParseInterval(t *testing.T) { + testcases := []struct { + inputStr string + wantInterval *IntervalValue + wantErr bool + }{ + { + inputStr: "", + wantErr: true, + }, + { + inputStr: "1-2 3", + wantErr: true, + }, + { + inputStr: "1-2 3 4:5:6", + wantInterval: &IntervalValue{Years: 1, Months: 2, Days: 3, Hours: 4, Minutes: 5, Seconds: 6, SubSecondNanos: 0}, + }, + { + inputStr: "1-2 3 4:5:6.5", + wantInterval: &IntervalValue{Years: 1, Months: 2, Days: 3, Hours: 4, Minutes: 5, Seconds: 6, SubSecondNanos: 500000000}, + }, + { + inputStr: "-1-2 3 -4:5:6.123", + wantInterval: &IntervalValue{Years: -1, Months: -2, Days: 3, Hours: -4, Minutes: -5, Seconds: -6, SubSecondNanos: -123000000}, + }, + { + inputStr: "0-0 0 1:1:1.000000001", + wantInterval: &IntervalValue{Hours: 1, Minutes: 1, Seconds: 1, SubSecondNanos: 1}, + }, + } + + for _, tc := range testcases { + gotInterval, err := ParseInterval(tc.inputStr) + if tc.wantErr { + if err != nil { + continue + } + t.Errorf("input %s: wanted err, got success", tc.inputStr) + } + if err != nil { + t.Errorf("input %s got err: %v", tc.inputStr, err) + } + if diff := testutil.Diff(gotInterval, tc.wantInterval); diff != "" { + t.Errorf("input %s: got=-, want=+:\n%s", tc.inputStr, diff) + } + } +} + +func TestCanonicalInterval(t *testing.T) { + testcases := []struct { + description string + input *IntervalValue + wantCanonical *IntervalValue + wantString string + }{ + { + description: "already canonical", + input: &IntervalValue{Years: 1, Months: 2, Days: 3, Hours: 4, Minutes: 5, Seconds: 6, SubSecondNanos: 0}, + wantCanonical: &IntervalValue{Years: 1, Months: 2, Days: 3, Hours: 4, Minutes: 5, Seconds: 6, SubSecondNanos: 0}, + wantString: "1-2 3 4:5:6", + }, + { + description: "mixed Y-M", + input: &IntervalValue{Years: -1, Months: 28}, + wantCanonical: &IntervalValue{Years: 1, Months: 4, Days: 0, Hours: 0, Minutes: 0, Seconds: 0, SubSecondNanos: 0}, + wantString: "1-4 0 0:0:0", + }, + { + description: "mixed Y-M", + input: &IntervalValue{Years: -1, Months: 28}, + wantCanonical: &IntervalValue{Years: 1, Months: 4, Days: 0, Hours: 0, Minutes: 0, Seconds: 0, SubSecondNanos: 0}, + wantString: "1-4 0 0:0:0", + }, + { + description: "big month Y-M", + input: &IntervalValue{Years: 0, Months: -13}, + wantCanonical: &IntervalValue{Years: -1, Months: -1, Days: 0, Hours: 0, Minutes: 0, Seconds: 0, SubSecondNanos: 0}, + wantString: "-1-1 0 0:0:0", + }, + { + description: "big days not normalized", + input: &IntervalValue{Days: 1000}, + wantCanonical: &IntervalValue{Years: 0, Months: 0, Days: 1000, Hours: 0, Minutes: 0, Seconds: 0, SubSecondNanos: 0}, + wantString: "0-0 1000 0:0:0", + }, + { + description: "time reduced", + input: &IntervalValue{Minutes: 181, Seconds: 61, SubSecondNanos: 5}, + wantCanonical: &IntervalValue{Hours: 3, Minutes: 2, Seconds: 1, SubSecondNanos: 5}, + wantString: "0-0 0 3:2:1.000000005", + }, + { + description: "subseconds oversized", + input: &IntervalValue{SubSecondNanos: 1900000000}, + wantCanonical: &IntervalValue{Years: 0, Months: 0, Days: 0, Hours: 0, Minutes: 0, Seconds: 1, SubSecondNanos: 900000000}, + wantString: "0-0 0 0:0:1.9", + }, + } + + for _, tc := range testcases { + gotCanonical := tc.input.Canonicalize() + + if diff := testutil.Diff(gotCanonical, tc.wantCanonical); diff != "" { + t.Errorf("%s: got=-, want=+:\n%s", tc.description, diff) + } + + gotStr := tc.input.String() + if gotStr != tc.wantString { + t.Errorf("%s mismatched strings. got %s want %s", tc.description, gotStr, tc.wantString) + } + } +} + +func TestIntervalDuration(t *testing.T) { + testcases := []struct { + description string + inputInterval *IntervalValue + wantDuration time.Duration + wantInterval *IntervalValue + }{ + { + description: "hour", + inputInterval: &IntervalValue{Hours: 1}, + wantDuration: time.Duration(time.Hour), + wantInterval: &IntervalValue{Hours: 1}, + }, + { + description: "minute oversized", + inputInterval: &IntervalValue{Minutes: 62}, + wantDuration: time.Duration(62 * time.Minute), + wantInterval: &IntervalValue{Hours: 1, Minutes: 2}, + }, + { + description: "other parts", + inputInterval: &IntervalValue{Months: 1, Days: 2}, + wantDuration: time.Duration(32 * 24 * time.Hour), + wantInterval: &IntervalValue{Hours: 32 * 24}, + }, + } + + for _, tc := range testcases { + gotDuration := tc.inputInterval.ToDuration() + + // interval -> duration + if gotDuration != tc.wantDuration { + t.Errorf("%s: mismatched duration, got %v want %v", tc.description, gotDuration, tc.wantDuration) + } + + // duration -> interval (canonical) + gotInterval := IntervalValueFromDuration(gotDuration) + if diff := testutil.Diff(gotInterval, tc.wantInterval); diff != "" { + t.Errorf("%s: got=-, want=+:\n%s", tc.description, diff) + } + } +} diff --git a/bigquery/params.go b/bigquery/params.go index 1171d50cbb9..3c8f0f55beb 100644 --- a/bigquery/params.go +++ b/bigquery/params.go @@ -77,14 +77,16 @@ var ( numericParamType = &bq.QueryParameterType{Type: "NUMERIC"} bigNumericParamType = &bq.QueryParameterType{Type: "BIGNUMERIC"} geographyParamType = &bq.QueryParameterType{Type: "GEOGRAPHY"} + intervalParamType = &bq.QueryParameterType{Type: "INTERVAL"} ) var ( - typeOfDate = reflect.TypeOf(civil.Date{}) - typeOfTime = reflect.TypeOf(civil.Time{}) - typeOfDateTime = reflect.TypeOf(civil.DateTime{}) - typeOfGoTime = reflect.TypeOf(time.Time{}) - typeOfRat = reflect.TypeOf(&big.Rat{}) + typeOfDate = reflect.TypeOf(civil.Date{}) + typeOfTime = reflect.TypeOf(civil.Time{}) + typeOfDateTime = reflect.TypeOf(civil.DateTime{}) + typeOfGoTime = reflect.TypeOf(time.Time{}) + typeOfRat = reflect.TypeOf(&big.Rat{}) + typeOfIntervalValue = reflect.TypeOf(&IntervalValue{}) ) // A QueryParameter is a parameter to a query. @@ -106,6 +108,7 @@ type QueryParameter struct { // []byte: BYTES // time.Time: TIMESTAMP // *big.Rat: NUMERIC + // *IntervalValue: INTERVAL // Arrays and slices of the above. // Structs of the above. Only the exported fields are used. // @@ -156,6 +159,8 @@ func paramType(t reflect.Type) (*bq.QueryParameterType, error) { return timestampParamType, nil case typeOfRat: return numericParamType, nil + case typeOfIntervalValue: + return intervalParamType, nil case typeOfNullBool: return boolParamType, nil case typeOfNullFloat64: @@ -300,6 +305,9 @@ func paramValue(v reflect.Value) (*bq.QueryParameterValue, error) { // to honor previous behavior and send as Numeric type. res.Value = NumericString(v.Interface().(*big.Rat)) return res, nil + case typeOfIntervalValue: + res.Value = IntervalString(v.Interface().(*IntervalValue)) + return res, nil } switch t.Kind() { case reflect.Slice: @@ -379,6 +387,7 @@ var paramTypeToFieldType = map[string]FieldType{ numericParamType.Type: NumericFieldType, bigNumericParamType.Type: BigNumericFieldType, geographyParamType.Type: GeographyFieldType, + intervalParamType.Type: IntervalFieldType, } // Convert a parameter value from the service to a Go value. This is similar to, but diff --git a/bigquery/params_test.go b/bigquery/params_test.go index d9233cad6e3..366184894a3 100644 --- a/bigquery/params_test.go +++ b/bigquery/params_test.go @@ -116,6 +116,7 @@ var scalarTests = []struct { dateTimeParamType, NullDateTime{Valid: false}}, {big.NewRat(12345, 1000), false, "12.345000000", numericParamType, big.NewRat(12345, 1000)}, + {&IntervalValue{Years: 1, Months: 2, Days: 3}, false, "1-2 3 0:0:0", intervalParamType, &IntervalValue{Years: 1, Months: 2, Days: 3}}, {NullGeography{GeographyVal: "POINT(-122.335503 47.625536)", Valid: true}, false, "POINT(-122.335503 47.625536)", geographyParamType, "POINT(-122.335503 47.625536)"}, {NullGeography{Valid: false}, true, "", geographyParamType, NullGeography{Valid: false}}, } diff --git a/bigquery/schema.go b/bigquery/schema.go index 78f2f762864..f5e2d4130d6 100644 --- a/bigquery/schema.go +++ b/bigquery/schema.go @@ -242,6 +242,8 @@ const ( // BigNumericFieldType is a numeric field type that supports values of larger precision // and scale than the NumericFieldType. BigNumericFieldType FieldType = "BIGNUMERIC" + // IntervalFieldType is a representation of a duration or an amount of time. + IntervalFieldType FieldType = "INTERVAL" ) var ( @@ -260,6 +262,7 @@ var ( NumericFieldType: true, GeographyFieldType: true, BigNumericFieldType: true, + IntervalFieldType: true, } // The API will accept alias names for the types based on the Standard SQL type names. fieldAliases = map[FieldType]FieldType{ diff --git a/bigquery/schema_test.go b/bigquery/schema_test.go index 17b7fc7157f..d69cbbd8222 100644 --- a/bigquery/schema_test.go +++ b/bigquery/schema_test.go @@ -1084,7 +1084,8 @@ func TestSchemaFromJSON(t *testing.T) { {"name":"aliased_boolean","type":"BOOL","mode":"NULLABLE","description":"Aliased nullable boolean"}, {"name":"aliased_float","type":"FLOAT64","mode":"REQUIRED","description":"Aliased required float"}, {"name":"aliased_record","type":"STRUCT","mode":"NULLABLE","description":"Aliased nullable record"}, - {"name":"aliased_bignumeric","type":"BIGDECIMAL","mode":"NULLABLE","description":"Aliased nullable bignumeric"} + {"name":"aliased_bignumeric","type":"BIGDECIMAL","mode":"NULLABLE","description":"Aliased nullable bignumeric"}, + {"name":"flat_interval","type":"INTERVAL","mode":"NULLABLE","description":"Flat nullable interval"} ]`), expectedSchema: Schema{ fieldSchema("Flat nullable string", "flat_string", "STRING", false, false, nil), @@ -1104,6 +1105,7 @@ func TestSchemaFromJSON(t *testing.T) { fieldSchema("Aliased required float", "aliased_float", "FLOAT", false, true, nil), fieldSchema("Aliased nullable record", "aliased_record", "RECORD", false, false, nil), fieldSchema("Aliased nullable bignumeric", "aliased_bignumeric", "BIGNUMERIC", false, false, nil), + fieldSchema("Flat nullable interval", "flat_interval", "INTERVAL", false, false, nil), }, }, { diff --git a/bigquery/value.go b/bigquery/value.go index cae28be0464..84e55acaae7 100644 --- a/bigquery/value.go +++ b/bigquery/value.go @@ -735,6 +735,13 @@ func toUploadValueReflect(v reflect.Value, fs *FieldSchema) interface{} { return formatUploadValue(v, fs, func(v reflect.Value) string { return BigNumericString(v.Interface().(*big.Rat)) }) + case IntervalFieldType: + if r, ok := v.Interface().(*IntervalValue); ok && r == nil { + return nil + } + return formatUploadValue(v, fs, func(v reflect.Value) string { + return IntervalString(v.Interface().(*IntervalValue)) + }) default: if !fs.Repeated || v.Len() > 0 { return v.Interface() @@ -821,6 +828,12 @@ func BigNumericString(r *big.Rat) string { return r.FloatString(BigNumericScaleDigits) } +// IntervalString returns a string representing an *IntervalValue in a format compatible with +// BigQuery SQL. It returns an interval literal in canonical format. +func IntervalString(iv *IntervalValue) string { + return iv.String() +} + // convertRows converts a series of TableRows into a series of Value slices. // schema is used to interpret the data from rows; its length must match the // length of each row. @@ -947,6 +960,12 @@ func convertBasicType(val string, typ FieldType) (Value, error) { return Value(r), nil case GeographyFieldType: return val, nil + case IntervalFieldType: + i, err := ParseInterval(val) + if err != nil { + return nil, fmt.Errorf("bigquery: invalid INTERVAL value %q", val) + } + return Value(i), nil default: return nil, fmt.Errorf("unrecognized type: %s", typ) }