diff --git a/google-cloud-bigquery/acceptance/bigquery/table_test.rb b/google-cloud-bigquery/acceptance/bigquery/table_test.rb index ce7b14e5af70..2834823824da 100644 --- a/google-cloud-bigquery/acceptance/bigquery/table_test.rb +++ b/google-cloud-bigquery/acceptance/bigquery/table_test.rb @@ -482,13 +482,24 @@ end end - it "allows tables to be created with time_partitioning and clustering" do - table = time_partitioned_table - _(table.time_partitioning?).must_equal true - _(table.time_partitioning_type).must_equal "DAY" - _(table.time_partitioning_field).must_equal "dob" - _(table.time_partitioning_expiration).must_equal seven_days - _(table.clustering_fields).must_equal clustering_fields + it "allows tables to be created and updated with time_partitioning and clustering" do + begin + table = time_partitioned_table + _(table.time_partitioning?).must_equal true + _(table.time_partitioning_type).must_equal "DAY" + _(table.time_partitioning_field).must_equal "dob" + _(table.time_partitioning_expiration).must_equal seven_days + _(table.clustering_fields).must_equal clustering_fields + + new_clustering_fields = ["last_name"] + table.clustering_fields = new_clustering_fields + _(table.clustering_fields).must_equal new_clustering_fields + + table.clustering_fields = nil + _(table.clustering_fields).must_be :nil? + ensure + time_partitioned_table.delete + end end it "allows tables to be created with range_partitioning" do diff --git a/google-cloud-bigquery/lib/google/cloud/bigquery/load_job.rb b/google-cloud-bigquery/lib/google/cloud/bigquery/load_job.rb index e6c0991601ec..fafc43ecb9f1 100644 --- a/google-cloud-bigquery/lib/google/cloud/bigquery/load_job.rb +++ b/google-cloud-bigquery/lib/google/cloud/bigquery/load_job.rb @@ -484,7 +484,7 @@ def range_partitioning_end # Checks if the destination table will be time partitioned. See # [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables). # - # @return [Boolean, nil] `true` when the table will be time-partitioned, + # @return [Boolean] `true` when the table will be time-partitioned, # or `false` otherwise. # # @!group Attributes @@ -560,10 +560,15 @@ def time_partitioning_require_filter? ### # Checks if the destination table will be clustered. # + # See {LoadJob::Updater#clustering_fields=}, {Table#clustering_fields} and + # {Table#clustering_fields=}. + # # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables + # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables + # Creating and using clustered tables # - # @return [Boolean, nil] `true` when the table will be clustered, + # @return [Boolean] `true` when the table will be clustered, # or `false` otherwise. # # @!group Attributes @@ -578,14 +583,16 @@ def clustering? # be first partitioned and subsequently clustered. The order of the # returned fields determines the sort order of the data. # - # See {LoadJob::Updater#clustering_fields=}. + # BigQuery supports clustering for both partitioned and non-partitioned + # tables. + # + # See {LoadJob::Updater#clustering_fields=}, {Table#clustering_fields} and + # {Table#clustering_fields=}. # - # @see https://cloud.google.com/bigquery/docs/partitioned-tables - # Partitioned Tables # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables - # Creating and Using Clustered Tables + # Creating and using clustered tables # # @return [Array, nil] The clustering fields, or `nil` if the # destination table will not be clustered. @@ -1819,23 +1826,23 @@ def time_partitioning_require_filter= val end ## - # Sets one or more fields on which the destination table should be - # clustered. Must be specified with time-based partitioning, data in - # the table will be first partitioned and subsequently clustered. + # Sets the list of fields on which data should be clustered. # # Only top-level, non-repeated, simple-type fields are supported. When # you cluster a table using multiple columns, the order of columns you # specify is important. The order of the specified columns determines # the sort order of the data. # - # See {LoadJob#clustering_fields}. + # BigQuery supports clustering for both partitioned and non-partitioned + # tables. + # + # See {LoadJob#clustering_fields}, {Table#clustering_fields} and + # {Table#clustering_fields=}. # - # @see https://cloud.google.com/bigquery/docs/partitioned-tables - # Partitioned Tables # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables - # Creating and Using Clustered Tables + # Creating and using clustered tables # # @param [Array] fields The clustering fields. Only top-level, # non-repeated, simple-type fields are supported. diff --git a/google-cloud-bigquery/lib/google/cloud/bigquery/query_job.rb b/google-cloud-bigquery/lib/google/cloud/bigquery/query_job.rb index eac763a99103..62e5adf0c02c 100644 --- a/google-cloud-bigquery/lib/google/cloud/bigquery/query_job.rb +++ b/google-cloud-bigquery/lib/google/cloud/bigquery/query_job.rb @@ -514,7 +514,7 @@ def range_partitioning_end # Checks if the destination table will be time-partitioned. See # [Partitioned Tables](https://cloud.google.com/bigquery/docs/partitioned-tables). # - # @return [Boolean, nil] `true` when the table will be time-partitioned, + # @return [Boolean] `true` when the table will be time-partitioned, # or `false` otherwise. # # @!group Attributes @@ -589,10 +589,15 @@ def time_partitioning_require_filter? ### # Checks if the destination table will be clustered. # + # See {QueryJob::Updater#clustering_fields=}, {Table#clustering_fields} and + # {Table#clustering_fields=}. + # # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables + # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables + # Creating and using clustered tables # - # @return [Boolean, nil] `true` when the table will be clustered, + # @return [Boolean] `true` when the table will be clustered, # or `false` otherwise. # # @!group Attributes @@ -607,14 +612,16 @@ def clustering? # be first partitioned and subsequently clustered. The order of the # returned fields determines the sort order of the data. # - # See {QueryJob::Updater#clustering_fields=}. + # BigQuery supports clustering for both partitioned and non-partitioned + # tables. + # + # See {QueryJob::Updater#clustering_fields=}, {Table#clustering_fields} and + # {Table#clustering_fields=}. # - # @see https://cloud.google.com/bigquery/docs/partitioned-tables - # Partitioned Tables # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables - # Creating and Using Clustered Tables + # Creating and using clustered tables # # @return [Array, nil] The clustering fields, or `nil` if the # destination table will not be clustered. @@ -1445,23 +1452,23 @@ def time_partitioning_require_filter= val end ## - # Sets one or more fields on which the destination table should be - # clustered. Must be specified with time-based partitioning, data in - # the table will be first partitioned and subsequently clustered. + # Sets the list of fields on which data should be clustered. # # Only top-level, non-repeated, simple-type fields are supported. When # you cluster a table using multiple columns, the order of columns you # specify is important. The order of the specified columns determines # the sort order of the data. # - # See {QueryJob#clustering_fields}. + # BigQuery supports clustering for both partitioned and non-partitioned + # tables. + # + # See {QueryJob#clustering_fields}, {Table#clustering_fields} and + # {Table#clustering_fields=}. # - # @see https://cloud.google.com/bigquery/docs/partitioned-tables - # Partitioned Tables # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables - # Creating and Using Clustered Tables + # Creating and using clustered tables # # @param [Array] fields The clustering fields. Only top-level, # non-repeated, simple-type fields are supported. diff --git a/google-cloud-bigquery/lib/google/cloud/bigquery/table.rb b/google-cloud-bigquery/lib/google/cloud/bigquery/table.rb index b7e9e3473836..b9a9e81c4513 100644 --- a/google-cloud-bigquery/lib/google/cloud/bigquery/table.rb +++ b/google-cloud-bigquery/lib/google/cloud/bigquery/table.rb @@ -471,8 +471,13 @@ def require_partition_filter= new_require ### # Checks if the table is clustered. # + # See {Table::Updater#clustering_fields=}, {Table#clustering_fields} and + # {Table#clustering_fields=}. + # # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables + # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables + # Creating and using clustered tables # # @return [Boolean, nil] `true` when the table is clustered, or # `false` otherwise, if the object is a resource (see {#resource?}); @@ -491,14 +496,16 @@ def clustering? # first partitioned and subsequently clustered. The order of the # returned fields determines the sort order of the data. # - # See {Table::Updater#clustering_fields=}. + # BigQuery supports clustering for both partitioned and non-partitioned + # tables. + # + # See {Table::Updater#clustering_fields=}, {Table#clustering_fields=} and + # {Table#clustering?}. # - # @see https://cloud.google.com/bigquery/docs/partitioned-tables - # Partitioned Tables # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables - # Creating and Using Clustered Tables + # Creating and using clustered tables # # @return [Array, nil] The clustering fields, or `nil` if the # table is not clustered or if the table is a reference (see @@ -512,6 +519,53 @@ def clustering_fields @gapi.clustering.fields if clustering? end + ## + # Updates the list of fields on which data should be clustered. + # + # Only top-level, non-repeated, simple-type fields are supported. When + # you cluster a table using multiple columns, the order of columns you + # specify is important. The order of the specified columns determines + # the sort order of the data. + # + # BigQuery supports clustering for both partitioned and non-partitioned + # tables. + # + # See {Table::Updater#clustering_fields=}, {Table#clustering_fields} and + # {Table#clustering?}. + # + # @see https://cloud.google.com/bigquery/docs/clustered-tables + # Introduction to clustered tables + # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables + # Creating and using clustered tables + # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables#modifying-cluster-spec + # Modifying clustering specification + # + # @param [Array, nil] fields The clustering fields, or `nil` to + # remove the clustering configuration. Only top-level, non-repeated, + # simple-type fields are supported. + # + # @example + # require "google/cloud/bigquery" + # + # bigquery = Google::Cloud::Bigquery.new + # dataset = bigquery.dataset "my_dataset" + # table = dataset.table "my_table" + # + # table.clustering_fields = ["last_name", "first_name"] + # + # @!group Attributes + # + def clustering_fields= fields + reload! unless resource_full? + if fields + @gapi.clustering ||= Google::Apis::BigqueryV2::Clustering.new + @gapi.clustering.fields = fields + else + @gapi.clustering = nil + end + patch_gapi! :clustering + end + ## # The combined Project ID, Dataset ID, and Table ID for this table, in # the format specified by the [Legacy SQL Query @@ -3062,27 +3116,22 @@ def range_partitioning_end= range_end end ## - # Sets one or more fields on which data should be clustered. Must be - # specified with time-based partitioning, data in the table will be - # first partitioned and subsequently clustered. + # Sets the list of fields on which data should be clustered. # # Only top-level, non-repeated, simple-type fields are supported. When # you cluster a table using multiple columns, the order of columns you # specify is important. The order of the specified columns determines # the sort order of the data. # - # You can only set the clustering fields while creating a table as in - # the example below. BigQuery does not allow you to change clustering - # on an existing table. + # BigQuery supports clustering for both partitioned and non-partitioned + # tables. # - # See {Table#clustering_fields}. + # See {Table#clustering_fields} and {Table#clustering_fields=}. # - # @see https://cloud.google.com/bigquery/docs/partitioned-tables - # Partitioned Tables # @see https://cloud.google.com/bigquery/docs/clustered-tables - # Introduction to Clustered Tables + # Introduction to clustered tables # @see https://cloud.google.com/bigquery/docs/creating-clustered-tables - # Creating and Using Clustered Tables + # Creating and using clustered tables # # @param [Array] fields The clustering fields. Only top-level, # non-repeated, simple-type fields are supported. diff --git a/google-cloud-bigquery/support/doctest_helper.rb b/google-cloud-bigquery/support/doctest_helper.rb index 662c1208d145..0010b4352f4e 100644 --- a/google-cloud-bigquery/support/doctest_helper.rb +++ b/google-cloud-bigquery/support/doctest_helper.rb @@ -879,6 +879,15 @@ def mock_storage end end + doctest.before "Google::Cloud::Bigquery::Table#clustering_fields=" do + mock_bigquery do |mock| + mock.expect :get_dataset, dataset_full_gapi, ["my-project", "my_dataset"] + mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "my_table"] + mock.expect :patch_table, table_full_gapi, ["my-project", "my_dataset", "my_table", Google::Apis::BigqueryV2::Table, Hash] + mock.expect :get_table, table_full_gapi, ["my-project", "my_dataset", "my_table"] + end + end + # Google::Cloud::Bigquery::Table#data@Paginate rows of data: (See {Data#next}) # Google::Cloud::Bigquery::Table#data@Retrieve all rows of data: (See {Data#all}) doctest.before "Google::Cloud::Bigquery::Table#data" do diff --git a/google-cloud-bigquery/test/google/cloud/bigquery/table_reference_update_test.rb b/google-cloud-bigquery/test/google/cloud/bigquery/table_reference_update_test.rb index cf691eda6e02..c5b59e7c6a66 100644 --- a/google-cloud-bigquery/test/google/cloud/bigquery/table_reference_update_test.rb +++ b/google-cloud-bigquery/test/google/cloud/bigquery/table_reference_update_test.rb @@ -70,7 +70,7 @@ mock = Minitest::Mock.new table_hash = random_table_hash dataset_id, table_id, table_name, description table_hash["timePartitioning"] = { - "type" => type, + "type" => type } partitioning = Google::Apis::BigqueryV2::TimePartitioning.new type: type request_table_gapi = Google::Apis::BigqueryV2::Table.new time_partitioning: partitioning, etag: etag @@ -92,7 +92,7 @@ mock = Minitest::Mock.new table_hash = random_table_hash dataset_id, table_id, table_name, description table_hash["timePartitioning"] = { - "field" => field, + "field" => field } partitioning = Google::Apis::BigqueryV2::TimePartitioning.new field: field request_table_gapi = Google::Apis::BigqueryV2::Table.new time_partitioning: partitioning, etag: etag @@ -115,7 +115,7 @@ mock = Minitest::Mock.new table_hash = random_table_hash dataset_id, table_id, table_name, description table_hash["timePartitioning"] = { - "expirationMs" => expiration_ms, + "expirationMs" => expiration_ms } partitioning = Google::Apis::BigqueryV2::TimePartitioning.new expiration_ms: expiration_ms request_table_gapi = Google::Apis::BigqueryV2::Table.new time_partitioning: partitioning, etag: etag @@ -128,6 +128,82 @@ table.time_partitioning_expiration = expiration _(table.time_partitioning_expiration).must_equal expiration + + mock.verify + end + + it "updates clustering fields" do + clustering_fields = ["a"] + + mock = Minitest::Mock.new + table_hash = random_table_hash dataset_id, table_id, table_name, description + table_hash_clustering = table_hash.dup + table_hash_clustering["clustering"] = { + "fields" => clustering_fields + } + clustering = Google::Apis::BigqueryV2::Clustering.new fields: clustering_fields + request_table_gapi = Google::Apis::BigqueryV2::Table.new clustering: clustering, etag: etag + mock.expect :get_table, return_table(table_hash), [project, dataset_id, table_id] + mock.expect :patch_table, return_table(table_hash_clustering), + [project, dataset_id, table_id, request_table_gapi, {options: {header: {"If-Match" => etag}}}] + mock.expect :get_table, return_table(table_hash_clustering), [project, dataset_id, table_id] + table.service.mocked_service = mock + + table.clustering_fields = clustering_fields + + _(table.clustering_fields).must_equal clustering_fields + + mock.verify + end + + it "updates existing clustering fields" do + clustering_fields = ["a"] + new_clustering_fields = ["a"] + + mock = Minitest::Mock.new + table_hash = random_table_hash dataset_id, table_id, table_name, description + table_hash["clustering"] = { + "fields" => clustering_fields + } + table_hash_clustering = table_hash.dup + table_hash_clustering["clustering"] = { + "fields" => new_clustering_fields + } + clustering = Google::Apis::BigqueryV2::Clustering.new fields: new_clustering_fields + request_table_gapi = Google::Apis::BigqueryV2::Table.new clustering: clustering, etag: etag + mock.expect :get_table, return_table(table_hash), [project, dataset_id, table_id] + mock.expect :patch_table, return_table(table_hash_clustering), + [project, dataset_id, table_id, request_table_gapi, {options: {header: {"If-Match" => etag}}}] + mock.expect :get_table, return_table(table_hash_clustering), [project, dataset_id, table_id] + table.service.mocked_service = mock + + table.clustering_fields = new_clustering_fields + + _(table.clustering_fields).must_equal new_clustering_fields + + mock.verify + end + + it "updates existing clustering to nil" do + clustering_fields = ["a"] + + mock = Minitest::Mock.new + table_hash = random_table_hash dataset_id, table_id, table_name, description + table_hash["clustering"] = { + "fields" => clustering_fields + } + table_hash_clustering = table_hash.dup + table_hash_clustering["clustering"] = nil + request_table_gapi = Google::Apis::BigqueryV2::Table.new clustering: nil, etag: etag + mock.expect :get_table, return_table(table_hash), [project, dataset_id, table_id] + mock.expect :patch_table, return_table(table_hash_clustering), + [project, dataset_id, table_id, request_table_gapi, {options: {header: {"If-Match" => etag}}}] + mock.expect :get_table, return_table(table_hash_clustering), [project, dataset_id, table_id] + table.service.mocked_service = mock + + table.clustering_fields = nil + + _(table.clustering_fields).must_be :nil? mock.verify end diff --git a/google-cloud-bigquery/test/google/cloud/bigquery/table_update_test.rb b/google-cloud-bigquery/test/google/cloud/bigquery/table_update_test.rb index c3a9bf1419ed..7e5422b77257 100644 --- a/google-cloud-bigquery/test/google/cloud/bigquery/table_update_test.rb +++ b/google-cloud-bigquery/test/google/cloud/bigquery/table_update_test.rb @@ -253,6 +253,88 @@ mock.verify end + it "updates clustering fields" do + clustering_fields = ["a"] + + mock = Minitest::Mock.new + table_hash = random_table_hash dataset_id, table_id, table_name, description + table_hash["clustering"] = { + "fields" => clustering_fields + } + clustering = Google::Apis::BigqueryV2::Clustering.new fields: clustering_fields + request_table_gapi = Google::Apis::BigqueryV2::Table.new clustering: clustering, etag: etag + mock.expect :patch_table, return_table(table_hash), + [project, dataset_id, table_id, request_table_gapi, {options: {header: {"If-Match" => etag}}}] + mock.expect :get_table, return_table(table_hash), [project, dataset_id, table_id] + table.service.mocked_service = mock + + _(table.clustering_fields).must_be :nil? + + table.clustering_fields = clustering_fields + + _(table.clustering_fields).must_equal clustering_fields + + mock.verify + end + + it "updates existing clustering fields" do + clustering_fields = ["a"] + new_clustering_fields = ["a"] + + mock = Minitest::Mock.new + table_hash_clustering = random_table_hash dataset_id, table_id, table_name, description + table_hash_clustering["clustering"] = { + "fields" => clustering_fields + } + table_clustering = Google::Cloud::Bigquery::Table.from_gapi return_table(table_hash_clustering), bigquery.service + + table_hash_clustering_2 = table_hash_clustering.dup + table_hash_clustering_2["clustering"] = { + "fields" => new_clustering_fields + } + clustering = Google::Apis::BigqueryV2::Clustering.new fields: new_clustering_fields + request_table_gapi = Google::Apis::BigqueryV2::Table.new clustering: clustering, etag: etag + mock.expect :patch_table, return_table(table_hash_clustering_2), + [project, dataset_id, table_id, request_table_gapi, {options: {header: {"If-Match" => etag}}}] + mock.expect :get_table, return_table(table_hash_clustering_2), [project, dataset_id, table_id] + table_clustering.service.mocked_service = mock + + _(table_clustering.clustering_fields).must_equal clustering_fields + + table_clustering.clustering_fields = new_clustering_fields + + _(table_clustering.clustering_fields).must_equal new_clustering_fields + + mock.verify + end + + it "updates existing clustering to nil" do + clustering_fields = ["a"] + new_clustering_fields = ["a"] + + mock = Minitest::Mock.new + table_hash = random_table_hash dataset_id, table_id, table_name, description + table_hash_clustering = table_hash.dup + table_hash_clustering["clustering"] = { + "fields" => clustering_fields + } + table_clustering = Google::Cloud::Bigquery::Table.from_gapi return_table(table_hash_clustering), bigquery.service + + request_table_gapi = Google::Apis::BigqueryV2::Table.new clustering: nil, etag: etag + mock.expect :patch_table, return_table(table_hash), + [project, dataset_id, table_id, request_table_gapi, {options: {header: {"If-Match" => etag}}}] + mock.expect :get_table, return_table(table_hash), [project, dataset_id, table_id] + table_clustering.service.mocked_service = mock + + _(table_clustering.clustering_fields).must_equal clustering_fields + + table_clustering.clustering_fields = nil + + _(table_clustering.clustering_fields).must_be :nil? + + mock.verify + end + it "updates its labels" do new_labels = { "bar" => "baz" }