google-cloud-bigquery/lib/google/cloud/bigquery/project.rb

# Copyright 2015 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


require "google/cloud/errors"
require "google/cloud/bigquery/service"
require "google/cloud/bigquery/credentials"
require "google/cloud/bigquery/dataset"
require "google/cloud/bigquery/job"
require "google/cloud/bigquery/external"
require "google/cloud/bigquery/project/list"
require "google/cloud/bigquery/time"
require "google/cloud/bigquery/schema"

module Google
  module Cloud
    module Bigquery
      ##
      # # Project
      #
      # Projects are top-level containers in Google Cloud Platform. They store
      # information about billing and authorized users, and they contain
      # BigQuery data. Each project has a friendly name and a unique ID.
      #
      # Google::Cloud::Bigquery::Project is the main object for interacting with
      # Google BigQuery. {Google::Cloud::Bigquery::Dataset} objects are created,
      # accessed, and deleted by Google::Cloud::Bigquery::Project.
      #
      # See {Google::Cloud#bigquery}.
      #
      # @attr_reader [String, nil] name The descriptive name of the project.
      #   Can only be present if the project was retrieved with {#projects}.
      # @attr_reader [Integer, nil] numeric_id The numeric ID of the project.
      #   Can only be present if the project was retrieved with {#projects}.
      #
      # @example
      #   require "google/cloud/bigquery"
      #
      #   bigquery = Google::Cloud::Bigquery.new
      #   dataset = bigquery.dataset "my_dataset"
      #   table = dataset.table "my_table"
      #
      class Project
        ##
        # @private The Service object.
        attr_accessor :service

        attr_reader :name
        attr_reader :numeric_id

        ##
        # Creates a new Service instance.
        #
        # See {Google::Cloud.bigquery}
        def initialize service
          @service = service
        end

        ##
        # The BigQuery project connected to.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new(
        #     project_id: "my-project",
        #     credentials: "/path/to/keyfile.json"
        #   )
        #
        #   bigquery.project_id #=> "my-project"
        #
        def project_id
          service.project
        end
        alias project project_id

        ##
        # The email address of the service account for the project used to
        # connect to BigQuery. (See also {#project_id}.)
        #
        # @return [String] The service account email address.
        #
        def service_account_email
          @service_account_email ||= service.project_service_account.email
        end

        ##
        # Copies the data from the source table to the destination table using
        # an asynchronous method. In this method, a {CopyJob} is immediately
        # returned. The caller may poll the service by repeatedly calling
        # {Job#reload!} and {Job#done?} to detect when the job is done, or
        # simply block until the job is done by calling #{Job#wait_until_done!}.
        # See {#copy} for the synchronous version. Use this method instead of
        # {Table#copy_job} to copy from source tables in other projects.
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {CopyJob::Updater#location=} in a block passed to this method.
        #
        # @param [String, Table] source_table The source table for the
        #   copied data. This can be a table object; or a string ID as specified
        #   by the [Standard SQL Query
        #   Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
        #   (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
        #   Reference](https://cloud.google.com/bigquery/query-reference#from)
        #   (`project-name:dataset_id.table_id`).
        # @param [String, Table] destination_table The destination table for the
        #   copied data. This can be a table object; or a string ID as specified
        #   by the [Standard SQL Query
        #   Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
        #   (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
        #   Reference](https://cloud.google.com/bigquery/query-reference#from)
        #   (`project-name:dataset_id.table_id`).
        # @param [String] create Specifies whether the job is allowed to create
        #   new tables. The default value is `needed`.
        #
        #   The following values are supported:
        #
        #   * `needed` - Create the table if it does not exist.
        #   * `never` - The table must already exist. A 'notFound' error is
        #     raised if the table does not exist.
        # @param [String] write Specifies how to handle data already present in
        #   the destination table. The default value is `empty`.
        #
        #   The following values are supported:
        #
        #   * `truncate` - BigQuery overwrites the table data.
        #   * `append` - BigQuery appends the data to the table.
        #   * `empty` - An error will be returned if the destination table
        #     already contains data.
        # @param [String] job_id A user-defined ID for the copy job. The ID
        #   must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
        #   (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
        #   `job_id` is provided, then `prefix` will not be used.
        #
        #   See [Generating a job
        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
        # @param [String] prefix A string, usually human-readable, that will be
        #   prepended to a generated value to produce a unique job ID. For
        #   example, the prefix `daily_import_job_` can be given to generate a
        #   job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
        #   prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
        #   underscores (`_`), or dashes (`-`). The maximum length of the entire ID
        #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
        #   be used.
        # @param [Hash] labels A hash of user-provided labels associated with
        #   the job. You can use these to organize and group your jobs.
        #
        #   The labels applied to a resource must meet the following requirements:
        #
        #   * Each resource can have multiple labels, up to a maximum of 64.
        #   * Each label must be a key-value pair.
        #   * Keys have a minimum length of 1 character and a maximum length of
        #     63 characters, and cannot be empty. Values can be empty, and have
        #     a maximum length of 63 characters.
        #   * Keys and values can contain only lowercase letters, numeric characters,
        #     underscores, and dashes. All characters must use UTF-8 encoding, and
        #     international characters are allowed.
        #   * The key portion of a label must be unique. However, you can use the
        #     same key with multiple resources.
        #   * Keys must start with a lowercase letter or international character.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
        #   configuration object for setting additional options.
        #
        # @return [Google::Cloud::Bigquery::CopyJob]
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   source_table_id = "bigquery-public-data.samples.shakespeare"
        #   destination_table = dataset.table "my_destination_table"
        #
        #   copy_job = bigquery.copy_job source_table_id, destination_table
        #
        #   copy_job.wait_until_done!
        #   copy_job.done? #=> true
        #
        # @!group Data
        #
        def copy_job source_table, destination_table, create: nil, write: nil, job_id: nil, prefix: nil, labels: nil
          ensure_service!
          options = { create: create, write: write, labels: labels, job_id: job_id, prefix: prefix }

          updater = CopyJob::Updater.from_options(
            service,
            Service.get_table_ref(source_table, default_ref: project_ref),
            Service.get_table_ref(destination_table, default_ref: project_ref),
            options
          )

          yield updater if block_given?

          job_gapi = updater.to_gapi
          gapi = service.copy_table job_gapi
          Job.from_gapi gapi, service
        end

        ##
        # Copies the data from the source table to the destination table using a
        # synchronous method that blocks for a response. Timeouts and transient
        # errors are generally handled as needed to complete the job. See
        # {#copy_job} for the asynchronous version. Use this method instead of
        # {Table#copy} to copy from source tables in other projects.
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {CopyJob::Updater#location=} in a block passed to this method.
        #
        # @param [String, Table] source_table The source table for the
        #   copied data. This can be a table object; or a string ID as specified
        #   by the [Standard SQL Query
        #   Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
        #   (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
        #   Reference](https://cloud.google.com/bigquery/query-reference#from)
        #   (`project-name:dataset_id.table_id`).
        # @param [String, Table] destination_table The destination table for the
        #   copied data. This can be a table object; or a string ID as specified
        #   by the [Standard SQL Query
        #   Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
        #   (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
        #   Reference](https://cloud.google.com/bigquery/query-reference#from)
        #   (`project-name:dataset_id.table_id`).
        # @param [String] create Specifies whether the job is allowed to create
        #   new tables. The default value is `needed`.
        #
        #   The following values are supported:
        #
        #   * `needed` - Create the table if it does not exist.
        #   * `never` - The table must already exist. A 'notFound' error is
        #     raised if the table does not exist.
        # @param [String] write Specifies how to handle data already present in
        #   the destination table. The default value is `empty`.
        #
        #   The following values are supported:
        #
        #   * `truncate` - BigQuery overwrites the table data.
        #   * `append` - BigQuery appends the data to the table.
        #   * `empty` - An error will be returned if the destination table
        #     already contains data.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::CopyJob::Updater] job a job
        #   configuration object for setting additional options.
        #
        # @return [Boolean] Returns `true` if the copy operation succeeded.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   destination_table = dataset.table "my_destination_table"
        #
        #   bigquery.copy "bigquery-public-data.samples.shakespeare",
        #                 destination_table
        #
        # @!group Data
        #
        def copy source_table, destination_table, create: nil, write: nil, &block
          job = copy_job source_table, destination_table, create: create, write: write, &block
          job.wait_until_done!
          ensure_job_succeeded! job
          true
        end

        ##
        # Queries data by creating a [query
        # job](https://cloud.google.com/bigquery/docs/query-overview#query_jobs).
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {QueryJob::Updater#location=} in a block passed to this method.
        #
        # @param [String] query A query string, following the BigQuery [query
        #   syntax](https://cloud.google.com/bigquery/query-reference), of the
        #   query to execute. Example: "SELECT count(f1) FROM
        #   [myProjectId:myDatasetId.myTableId]".
        # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
        #   either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
        #   query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
        #   use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
        #   true.
        #
        #   BigQuery types are converted from Ruby types as follows:
        #
        #   | BigQuery     | Ruby                                 | Notes                                              |
        #   |--------------|--------------------------------------|----------------------------------------------------|
        #   | `BOOL`       | `true`/`false`                       |                                                    |
        #   | `INT64`      | `Integer`                            |                                                    |
        #   | `FLOAT64`    | `Float`                              |                                                    |
        #   | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
        #   | `BIGNUMERIC` | `BigDecimal`                         | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `STRING`     | `String`                             |                                                    |
        #   | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
        #   | `DATE`       | `Date`                               |                                                    |
        #   | `GEOGRAPHY`  | `String` (WKT or GeoJSON)            | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `TIMESTAMP`  | `Time`                               |                                                    |
        #   | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
        #   | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
        #   | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
        #   | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
        #
        #   See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
        #   of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
        #   GIS data](https://cloud.google.com/bigquery/docs/gis-data).
        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
        #   possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
        #   specify the SQL type for these values.
        #
        #   Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
        #   positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
        #   should be BigQuery type codes from the following list:
        #
        #   * `:BOOL`
        #   * `:INT64`
        #   * `:FLOAT64`
        #   * `:NUMERIC`
        #   * `:BIGNUMERIC`
        #   * `:STRING`
        #   * `:DATETIME`
        #   * `:DATE`
        #   * `:GEOGRAPHY`
        #   * `:TIMESTAMP`
        #   * `:TIME`
        #   * `:BYTES`
        #   * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
        #     are specified as `[:INT64]`.
        #   * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
        #     match the `params` hash, and the values are the types value that matches the data.
        #
        #   Types are optional.
        # @param [Hash<String|Symbol, External::DataSource>] external A Hash
        #   that represents the mapping of the external tables to the table
        #   names used in the SQL query. The hash keys are the table names, and
        #   the hash values are the external table objects. See {Project#query}.
        # @param [String] priority Specifies a priority for the query. Possible
        #   values include `INTERACTIVE` and `BATCH`. The default value is
        #   `INTERACTIVE`.
        # @param [Boolean] cache Whether to look for the result in the query
        #   cache. The query cache is a best-effort cache that will be flushed
        #   whenever tables in the query are modified. The default value is
        #   true. For more information, see [query
        #   caching](https://developers.google.com/bigquery/querying-data).
        # @param [Table] table The destination table where the query results
        #   should be stored. If not present, a new table will be created to
        #   store the results.
        # @param [String] create Specifies whether the job is allowed to create
        #   new tables. The default value is `needed`.
        #
        #   The following values are supported:
        #
        #   * `needed` - Create the table if it does not exist.
        #   * `never` - The table must already exist. A 'notFound' error is
        #     raised if the table does not exist.
        # @param [String] write Specifies the action that occurs if the
        #   destination table already exists. The default value is `empty`.
        #
        #   The following values are supported:
        #
        #   * `truncate` - BigQuery overwrites the table data.
        #   * `append` - BigQuery appends the data to the table.
        #   * `empty` - A 'duplicate' error is returned in the job result if the
        #     table exists and contains data.
        # @param [Boolean] dryrun If set to true, BigQuery doesn't run the job.
        #   Instead, if the query is valid, BigQuery returns statistics about
        #   the job such as how many bytes would be processed. If the query is
        #   invalid, an error returns. The default value is false.
        # @param [Dataset, String] dataset The default dataset to use for
        #   unqualified table names in the query. Optional.
        # @param [String] project Specifies the default projectId to assume for
        #   any unqualified table names in the query. Only used if `dataset`
        #   option is set.
        # @param [Boolean] standard_sql Specifies whether to use BigQuery's
        #   [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   dialect for this query. If set to true, the query will use standard
        #   SQL rather than the [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect. Optional. The default value is true.
        # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
        #   [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect for this query. If set to false, the query will use
        #   BigQuery's [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   dialect. Optional. The default value is false.
        # @param [Boolean] large_results This option is specific to Legacy SQL.
        #   If `true`, allows the query to produce arbitrarily large result
        #   tables at a slight cost in performance. Requires `table` parameter
        #   to be set.
        # @param [Boolean] flatten This option is specific to Legacy SQL.
        #   Flattens all nested and repeated fields in the query results. The
        #   default value is `true`. `large_results` parameter must be `true` if
        #   this is set to `false`.
        # @param [Integer] maximum_billing_tier Deprecated: Change the billing
        #   tier to allow high-compute queries.
        # @param [Integer] maximum_bytes_billed Limits the bytes billed for this
        #   job. Queries that will have bytes billed beyond this limit will fail
        #   (without incurring a charge). Optional. If unspecified, this will be
        #   set to your project default.
        # @param [String] job_id A user-defined ID for the query job. The ID
        #   must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
        #   (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
        #   `job_id` is provided, then `prefix` will not be used.
        #
        #   See [Generating a job
        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
        # @param [String] prefix A string, usually human-readable, that will be
        #   prepended to a generated value to produce a unique job ID. For
        #   example, the prefix `daily_import_job_` can be given to generate a
        #   job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
        #   prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
        #   underscores (`_`), or dashes (`-`). The maximum length of the entire ID
        #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
        #   be used.
        #
        #   See [Generating a job
        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
        # @param [Hash] labels A hash of user-provided labels associated with
        #   the job. You can use these to organize and group your jobs.
        #
        #   The labels applied to a resource must meet the following requirements:
        #
        #   * Each resource can have multiple labels, up to a maximum of 64.
        #   * Each label must be a key-value pair.
        #   * Keys have a minimum length of 1 character and a maximum length of
        #     63 characters, and cannot be empty. Values can be empty, and have
        #     a maximum length of 63 characters.
        #   * Keys and values can contain only lowercase letters, numeric characters,
        #     underscores, and dashes. All characters must use UTF-8 encoding, and
        #     international characters are allowed.
        #   * The key portion of a label must be unique. However, you can use the
        #     same key with multiple resources.
        #   * Keys must start with a lowercase letter or international character.
        # @param [Array<String>, String] udfs User-defined function resources
        #   used in a legacy SQL query. May be either a code resource to load from
        #   a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource
        #   that contains code for a user-defined function (UDF). Providing an
        #   inline code resource is equivalent to providing a URI for a file
        #   containing the same code.
        #
        #   This parameter is used for defining User Defined Function (UDF)
        #   resources only when using legacy SQL. Users of standard SQL should
        #   leverage either DDL (e.g. `CREATE [TEMPORARY] FUNCTION ...`) or the
        #   Routines API to define UDF resources.
        #
        #   For additional information on migrating, see: [Migrating to
        #   standard SQL - Differences in user-defined JavaScript
        #   functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/migrating-from-legacy-sql#differences_in_user-defined_javascript_functions)
        # @param [Boolean] create_session If true, creates a new session, where the
        #   session ID will be a server generated random id. If false, runs query
        #   with an existing session ID when one is provided in the `session_id`
        #   param, otherwise runs query in non-session mode. See {Job#session_id}.
        # @param [String] session_id The ID of an existing session. See also the
        #   `create_session` param and {Job#session_id}.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
        #   configuration object for setting query options.
        #
        # @return [Google::Cloud::Bigquery::QueryJob]
        #
        # @example Query using standard SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.query_job "SELECT name FROM " \
        #                            "`my_project.my_dataset.my_table`"
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using legacy SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.query_job "SELECT name FROM " \
        #                            " [my_project:my_dataset.my_table]",
        #                            legacy_sql: true
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using positional query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.query_job "SELECT name FROM " \
        #                            "`my_dataset.my_table` " \
        #                            "WHERE id = ?",
        #                            params: [1]
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using named query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.query_job "SELECT name FROM " \
        #                            "`my_dataset.my_table` " \
        #                            "WHERE id = @id",
        #                            params: { id: 1 }
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Query using named query parameters with types:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.query_job "SELECT name FROM " \
        #                            "`my_dataset.my_table` " \
        #                            "WHERE id IN UNNEST(@ids)",
        #                            params: { ids: [] },
        #                            types: { ids: [:INT64] }
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        # @example Execute a DDL statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.query_job "CREATE TABLE " \
        #                            "`my_dataset.my_table` " \
        #                            "(x INT64)"
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     table_ref = job.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
        #   end
        #
        # @example Execute a DML statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.query_job "UPDATE " \
        #                            "`my_dataset.my_table` " \
        #                            "SET x = x + 1 " \
        #                            "WHERE x IS NOT NULL"
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     puts job.num_dml_affected_rows
        #   end
        #
        # @example Query using external data source, set destination:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   csv_url = "gs://bucket/path/to/data.csv"
        #   csv_table = bigquery.external csv_url do |csv|
        #     csv.autodetect = true
        #     csv.skip_leading_rows = 1
        #   end
        #
        #   job = bigquery.query_job "SELECT * FROM my_ext_table" do |query|
        #     query.external = { my_ext_table: csv_table }
        #     dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #     query.table = dataset.table "my_table", skip_lookup: true
        #   end
        #
        #   job.wait_until_done!
        #   if !job.failed?
        #     job.data.each do |row|
        #       puts row[:name]
        #     end
        #   end
        #
        def query_job query,
                      params: nil,
                      types: nil,
                      external: nil,
                      priority: "INTERACTIVE",
                      cache: true,
                      table: nil,
                      create: nil,
                      write: nil,
                      dryrun: nil,
                      dataset: nil,
                      project: nil,
                      standard_sql: nil,
                      legacy_sql: nil,
                      large_results: nil,
                      flatten: nil,
                      maximum_billing_tier: nil,
                      maximum_bytes_billed: nil,
                      job_id: nil,
                      prefix: nil,
                      labels: nil,
                      udfs: nil,
                      create_session: nil,
                      session_id: nil
          ensure_service!
          options = {
            params: params,
            types: types,
            external: external,
            priority: priority,
            cache: cache,
            table: table,
            create: create,
            write: write,
            dryrun: dryrun,
            dataset: dataset,
            project: (project || self.project),
            standard_sql: standard_sql,
            legacy_sql: legacy_sql,
            large_results: large_results,
            flatten: flatten,
            maximum_billing_tier: maximum_billing_tier,
            maximum_bytes_billed: maximum_bytes_billed,
            job_id: job_id,
            prefix: prefix,
            labels: labels,
            udfs: udfs,
            create_session: create_session,
            session_id: session_id
          }

          updater = QueryJob::Updater.from_options service, query, options

          yield updater if block_given?

          gapi = service.query_job updater.to_gapi
          Job.from_gapi gapi, service
        end

        ##
        # Queries data and waits for the results. In this method, a {QueryJob}
        # is created and its results are saved to a temporary table, then read
        # from the table. Timeouts and transient errors are generally handled
        # as needed to complete the query. When used for executing DDL/DML
        # statements, this method does not return row data.
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {QueryJob::Updater#location=} in a block passed to this method.
        #
        # @see https://cloud.google.com/bigquery/querying-data Querying Data
        #
        # @param [String] query A query string, following the BigQuery [query
        #   syntax](https://cloud.google.com/bigquery/query-reference), of the
        #   query to execute. Example: "SELECT count(f1) FROM
        #   [myProjectId:myDatasetId.myTableId]".
        # @param [Array, Hash] params Standard SQL only. Used to pass query arguments when the `query` string contains
        #   either positional (`?`) or named (`@myparam`) query parameters. If value passed is an array `["foo"]`, the
        #   query must use positional query parameters. If value passed is a hash `{ myparam: "foo" }`, the query must
        #   use named query parameters. When set, `legacy_sql` will automatically be set to false and `standard_sql` to
        #   true.
        #
        #   BigQuery types are converted from Ruby types as follows:
        #
        #   | BigQuery     | Ruby                                 | Notes                                              |
        #   |--------------|--------------------------------------|----------------------------------------------------|
        #   | `BOOL`       | `true`/`false`                       |                                                    |
        #   | `INT64`      | `Integer`                            |                                                    |
        #   | `FLOAT64`    | `Float`                              |                                                    |
        #   | `NUMERIC`    | `BigDecimal`                         | `BigDecimal` values will be rounded to scale 9.    |
        #   | `BIGNUMERIC` | `BigDecimal`                         | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `STRING`     | `String`                             |                                                    |
        #   | `DATETIME`   | `DateTime`                           | `DATETIME` does not support time zone.             |
        #   | `DATE`       | `Date`                               |                                                    |
        #   | `GEOGRAPHY`  | `String` (WKT or GeoJSON)            | NOT AUTOMATIC: Must be mapped using `types`, below.|
        #   | `TIMESTAMP`  | `Time`                               |                                                    |
        #   | `TIME`       | `Google::Cloud::BigQuery::Time`      |                                                    |
        #   | `BYTES`      | `File`, `IO`, `StringIO`, or similar |                                                    |
        #   | `ARRAY`      | `Array`                              | Nested arrays, `nil` values are not supported.     |
        #   | `STRUCT`     | `Hash`                               | Hash keys may be strings or symbols.               |
        #
        #   See [Data Types](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types) for an overview
        #   of each BigQuery data type, including allowed values. For the `GEOGRAPHY` type, see [Working with BigQuery
        #   GIS data](https://cloud.google.com/bigquery/docs/gis-data).
        # @param [Array, Hash] types Standard SQL only. Types of the SQL parameters in `params`. It is not always
        #   possible to infer the right SQL type from a value in `params`. In these cases, `types` must be used to
        #   specify the SQL type for these values.
        #
        #   Arguments must match the value type passed to `params`. This must be an `Array` when the query uses
        #   positional query parameters. This must be an `Hash` when the query uses named query parameters. The values
        #   should be BigQuery type codes from the following list:
        #
        #   * `:BOOL`
        #   * `:INT64`
        #   * `:FLOAT64`
        #   * `:NUMERIC`
        #   * `:BIGNUMERIC`
        #   * `:STRING`
        #   * `:DATETIME`
        #   * `:DATE`
        #   * `:GEOGRAPHY`
        #   * `:TIMESTAMP`
        #   * `:TIME`
        #   * `:BYTES`
        #   * `Array` - Lists are specified by providing the type code in an array. For example, an array of integers
        #     are specified as `[:INT64]`.
        #   * `Hash` - Types for STRUCT values (`Hash` objects) are specified using a `Hash` object, where the keys
        #     match the `params` hash, and the values are the types value that matches the data.
        #
        #   Types are optional.
        # @param [Hash<String|Symbol, External::DataSource>] external A Hash
        #   that represents the mapping of the external tables to the table
        #   names used in the SQL query. The hash keys are the table names, and
        #   the hash values are the external table objects. See {Project#query}.
        # @param [Integer] max The maximum number of rows of data to return per
        #   page of results. Setting this flag to a small value such as 1000 and
        #   then paging through results might improve reliability when the query
        #   result set is large. In addition to this limit, responses are also
        #   limited to 10 MB. By default, there is no maximum row count, and
        #   only the byte limit applies.
        # @param [Boolean] cache Whether to look for the result in the query
        #   cache. The query cache is a best-effort cache that will be flushed
        #   whenever tables in the query are modified. The default value is
        #   true. For more information, see [query
        #   caching](https://developers.google.com/bigquery/querying-data).
        # @param [String] dataset Specifies the default datasetId and projectId
        #   to assume for any unqualified table names in the query. If not set,
        #   all table names in the query string must be qualified in the format
        #   'datasetId.tableId'.
        # @param [String] project Specifies the default projectId to assume for
        #   any unqualified table names in the query. Only used if `dataset`
        #   option is set.
        # @param [Boolean] standard_sql Specifies whether to use BigQuery's
        #   [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   dialect for this query. If set to true, the query will use standard
        #   SQL rather than the [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect. When set to true, the values of `large_results` and
        #   `flatten` are ignored; the query will be run as if `large_results`
        #   is true and `flatten` is false. Optional. The default value is
        #   true.
        # @param [Boolean] legacy_sql Specifies whether to use BigQuery's
        #   [legacy
        #   SQL](https://cloud.google.com/bigquery/docs/reference/legacy-sql)
        #   dialect for this query. If set to false, the query will use
        #   BigQuery's [standard
        #   SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/)
        #   When set to false, the values of `large_results` and `flatten` are
        #   ignored; the query will be run as if `large_results` is true and
        #   `flatten` is false. Optional. The default value is false.
        # @param [String] session_id The ID of an existing session. See the
        #   `create_session` param in {#query_job} and {Job#session_id}.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::QueryJob::Updater] job a job
        #   configuration object for setting additional options for the query.
        #
        # @return [Google::Cloud::Bigquery::Data]
        #
        # @example Query using standard SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   sql = "SELECT name FROM `my_project.my_dataset.my_table`"
        #   data = bigquery.query sql
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Query using legacy SQL:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   sql = "SELECT name FROM [my_project:my_dataset.my_table]"
        #   data = bigquery.query sql, legacy_sql: true
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Retrieve all rows: (See {Data#all})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   data = bigquery.query "SELECT name FROM `my_dataset.my_table`"
        #
        #   data.all do |row|
        #     puts row[:name]
        #   end
        #
        # @example Query using positional query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   data = bigquery.query "SELECT name " \
        #                         "FROM `my_dataset.my_table` " \
        #                         "WHERE id = ?",
        #                         params: [1]
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Query using named query parameters:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   data = bigquery.query "SELECT name " \
        #                         "FROM `my_dataset.my_table` " \
        #                         "WHERE id = @id",
        #                         params: { id: 1 }
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Query using named query parameters with types:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   data = bigquery.query "SELECT name FROM " \
        #                         "`my_dataset.my_table` " \
        #                         "WHERE id IN UNNEST(@ids)",
        #                         params: { ids: [] },
        #                         types: { ids: [:INT64] }
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Execute a DDL statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   data = bigquery.query "CREATE TABLE `my_dataset.my_table` (x INT64)"
        #
        #   table_ref = data.ddl_target_table # Or ddl_target_routine for CREATE/DROP FUNCTION/PROCEDURE
        #
        # @example Execute a DML statement:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   data = bigquery.query "UPDATE `my_dataset.my_table` " \
        #                         "SET x = x + 1 " \
        #                         "WHERE x IS NOT NULL"
        #
        #   puts data.num_dml_affected_rows
        #
        # @example Query using external data source, set destination:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   csv_url = "gs://bucket/path/to/data.csv"
        #   csv_table = bigquery.external csv_url do |csv|
        #     csv.autodetect = true
        #     csv.skip_leading_rows = 1
        #   end
        #
        #   data = bigquery.query "SELECT * FROM my_ext_table" do |query|
        #     query.external = { my_ext_table: csv_table }
        #     dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #     query.table = dataset.table "my_table", skip_lookup: true
        #   end
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        def query query,
                  params: nil,
                  types: nil,
                  external: nil,
                  max: nil,
                  cache: true,
                  dataset: nil,
                  project: nil,
                  standard_sql: nil,
                  legacy_sql: nil,
                  session_id: nil,
                  &block
          job = query_job query,
                          params: params,
                          types: types,
                          external: external,
                          cache: cache,
                          dataset: dataset,
                          project: project,
                          standard_sql: standard_sql,
                          legacy_sql: legacy_sql,
                          session_id: session_id,
                          &block
          job.wait_until_done!

          if job.failed?
            begin
              # raise to activate ruby exception cause handling
              raise job.gapi_error
            rescue StandardError => e
              # wrap Google::Apis::Error with Google::Cloud::Error
              raise Google::Cloud::Error.from_error(e)
            end
          end

          job.data max: max
        end

        ##
        # Creates a new External::DataSource (or subclass) object that
        # represents the external data source that can be queried from directly,
        # even though the data is not stored in BigQuery. Instead of loading or
        # streaming the data, this object references the external data source.
        #
        # @see https://cloud.google.com/bigquery/external-data-sources Querying
        #   External Data Sources
        #
        # @param [String, Array<String>] url The fully-qualified URL(s) that
        #   point to your data in Google Cloud. An attempt will be made to
        #   derive the format from the URLs provided.
        # @param [String|Symbol] format The data format. This value will be used
        #   even if the provided URLs are recognized as a different format.
        #   Optional.
        #
        #   The following values are supported:
        #
        #   * `csv` - CSV
        #   * `json` - [Newline-delimited JSON](http://jsonlines.org/)
        #   * `avro` - [Avro](http://avro.apache.org/)
        #   * `sheets` - Google Sheets
        #   * `datastore_backup` - Cloud Datastore backup
        #   * `bigtable` - Bigtable
        #
        # @return [External::DataSource] External data source.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   csv_url = "gs://bucket/path/to/data.csv"
        #   csv_table = bigquery.external csv_url do |csv|
        #     csv.autodetect = true
        #     csv.skip_leading_rows = 1
        #   end
        #
        #   data = bigquery.query "SELECT * FROM my_ext_table",
        #                         external: { my_ext_table: csv_table }
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        def external url, format: nil
          ext = External.from_urls url, format
          yield ext if block_given?
          ext
        end

        ##
        # Retrieves an existing dataset by ID.
        #
        # @param [String] dataset_id The ID of a dataset.
        # @param [Boolean] skip_lookup Optionally create just a local reference
        #   object without verifying that the resource exists on the BigQuery
        #   service. Calls made on this object will raise errors if the resource
        #   does not exist. Default is `false`. Optional.
        #
        # @return [Google::Cloud::Bigquery::Dataset, nil] Returns `nil` if the
        #   dataset does not exist.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset"
        #   puts dataset.name
        #
        # @example Avoid retrieving the dataset resource with `skip_lookup`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.dataset "my_dataset", skip_lookup: true
        #
        def dataset dataset_id, skip_lookup: nil
          ensure_service!
          return Dataset.new_reference project, dataset_id, service if skip_lookup
          gapi = service.get_dataset dataset_id
          Dataset.from_gapi gapi, service
        rescue Google::Cloud::NotFoundError
          nil
        end

        ##
        # Creates a new dataset.
        #
        # @param [String] dataset_id A unique ID for this dataset, without the
        #   project name. The ID must contain only letters (`[A-Za-z]`), numbers
        #   (`[0-9]`), or underscores (`_`). The maximum length is 1,024 characters.
        # @param [String] name A descriptive name for the dataset.
        # @param [String] description A user-friendly description of the
        #   dataset.
        # @param [Integer] expiration The default lifetime of all tables in the
        #   dataset, in milliseconds. The minimum value is `3_600_000` (one hour).
        # @param [String] location The geographic location where the dataset
        #   should reside. Possible values include `EU` and `US`. The default
        #   value is `US`.
        # @yield [access] a block for setting rules
        # @yieldparam [Google::Cloud::Bigquery::Dataset] access the object
        #   accepting rules
        #
        # @return [Google::Cloud::Bigquery::Dataset]
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.create_dataset "my_dataset"
        #
        # @example A name and description can be provided:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.create_dataset "my_dataset",
        #                                     name: "My Dataset",
        #                                     description: "This is my Dataset"
        #
        # @example Or, configure access with a block: (See {Dataset::Access})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   dataset = bigquery.create_dataset "my_dataset" do |dataset|
        #     dataset.access.add_writer_user "writers@example.com"
        #   end
        #
        def create_dataset dataset_id, name: nil, description: nil,
                           expiration: nil, location: nil
          ensure_service!

          new_ds = Google::Apis::BigqueryV2::Dataset.new(
            dataset_reference: Google::Apis::BigqueryV2::DatasetReference.new(
              project_id: project, dataset_id: dataset_id
            )
          )

          # Can set location only on creation, no Dataset#location method
          new_ds.update! location: location unless location.nil?

          updater = Dataset::Updater.new(new_ds).tap do |b|
            b.name = name unless name.nil?
            b.description = description unless description.nil?
            b.default_expiration = expiration unless expiration.nil?
          end

          if block_given?
            yield updater
            updater.check_for_mutated_access!
          end

          gapi = service.insert_dataset new_ds
          Dataset.from_gapi gapi, service
        end

        ##
        # Retrieves the list of datasets belonging to the project.
        #
        # @param [Boolean] all Whether to list all datasets, including hidden
        #   ones. The default is `false`.
        # @param [String] filter An expression for filtering the results of the
        #   request by label. The syntax is `labels.<name>[:<value>]`.
        #   Multiple filters can be `AND`ed together by connecting with a space.
        #   Example: `labels.department:receiving labels.active`. See [Filtering
        #   datasets using labels](https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels).
        # @param [String] token A previously-returned page token representing
        #   part of the larger set of results to view.
        # @param [Integer] max Maximum number of datasets to return.
        #
        # @return [Array<Google::Cloud::Bigquery::Dataset>] (See
        #   {Google::Cloud::Bigquery::Dataset::List})
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   datasets = bigquery.datasets
        #   datasets.each do |dataset|
        #     puts dataset.name
        #   end
        #
        # @example Retrieve hidden datasets with the `all` optional arg:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   all_datasets = bigquery.datasets all: true
        #
        # @example Retrieve all datasets: (See {Dataset::List#all})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   datasets = bigquery.datasets
        #   datasets.all do |dataset|
        #     puts dataset.name
        #   end
        #
        def datasets all: nil, filter: nil, token: nil, max: nil
          ensure_service!
          gapi = service.list_datasets all: all, filter: filter, token: token, max: max
          Dataset::List.from_gapi gapi, service, all, filter, max
        end

        ##
        # Retrieves an existing job by ID.
        #
        # @param [String] job_id The ID of a job.
        # @param [String] location The geographic location where the job was
        #   created. Required except for US and EU.
        #
        # @return [Google::Cloud::Bigquery::Job, nil] Returns `nil` if the job
        #   does not exist.
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   job = bigquery.job "my_job"
        #
        def job job_id, location: nil
          ensure_service!
          gapi = service.get_job job_id, location: location
          Job.from_gapi gapi, service
        rescue Google::Cloud::NotFoundError
          nil
        end

        ##
        # Retrieves the list of jobs belonging to the project.
        #
        # @param [Boolean] all Whether to display jobs owned by all users in the
        #   project. The default is `false`. Optional.
        # @param [String] token A previously-returned page token representing
        #   part of the larger set of results to view. Optional.
        # @param [Integer] max Maximum number of jobs to return. Optional.
        # @param [String] filter A filter for job state. Optional.
        #
        #   Acceptable values are:
        #
        #   * `done` - Finished jobs
        #   * `pending` - Pending jobs
        #   * `running` - Running jobs
        # @param [Time] min_created_at Min value for {Job#created_at}. When
        #   provided, only jobs created after or at this time are returned.
        #   Optional.
        # @param [Time] max_created_at Max value for {Job#created_at}. When
        #   provided, only jobs created before or at this time are returned.
        #   Optional.
        # @param [Google::Cloud::Bigquery::Job, String] parent_job A job
        #   object or a job ID. If set, retrieve only child jobs of the
        #   specified parent. Optional. See {Job#job_id}, {Job#num_child_jobs},
        #   and {Job#parent_job_id}.
        #
        # @return [Array<Google::Cloud::Bigquery::Job>] (See
        #   {Google::Cloud::Bigquery::Job::List})
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   jobs = bigquery.jobs
        #   jobs.each do |job|
        #     # process job
        #   end
        #
        # @example Retrieve only running jobs using the `filter` optional arg:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   running_jobs = bigquery.jobs filter: "running"
        #   running_jobs.each do |job|
        #     # process job
        #   end
        #
        # @example Retrieve only jobs created within provided times:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   two_days_ago = Time.now - 60*60*24*2
        #   three_days_ago = Time.now - 60*60*24*3
        #
        #   jobs = bigquery.jobs min_created_at: three_days_ago,
        #                        max_created_at: two_days_ago
        #   jobs.each do |job|
        #     # process job
        #   end
        #
        # @example Retrieve all jobs: (See {Job::List#all})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   jobs = bigquery.jobs
        #   jobs.all do |job|
        #     # process job
        #   end
        #
        # @example Retrieve child jobs by setting `parent_job`:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   multi_statement_sql = <<~SQL
        #     -- Declare a variable to hold names as an array.
        #     DECLARE top_names ARRAY<STRING>;
        #     -- Build an array of the top 100 names from the year 2017.
        #     SET top_names = (
        #     SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
        #     FROM `bigquery-public-data.usa_names.usa_1910_current`
        #     WHERE year = 2017
        #     );
        #     -- Which names appear as words in Shakespeare's plays?
        #     SELECT
        #     name AS shakespeare_name
        #     FROM UNNEST(top_names) AS name
        #     WHERE name IN (
        #     SELECT word
        #     FROM `bigquery-public-data.samples.shakespeare`
        #     );
        #   SQL
        #
        #   job = bigquery.query_job multi_statement_sql
        #
        #   job.wait_until_done!
        #
        #   child_jobs = bigquery.jobs parent_job: job
        #
        #   child_jobs.each do |child_job|
        #     script_statistics = child_job.script_statistics
        #     puts script_statistics.evaluation_kind
        #     script_statistics.stack_frames.each do |stack_frame|
        #       puts stack_frame.text
        #     end
        #   end
        #
        def jobs all: nil,
                 token: nil,
                 max: nil,
                 filter: nil,
                 min_created_at: nil,
                 max_created_at: nil,
                 parent_job: nil
          ensure_service!
          parent_job = parent_job.job_id if parent_job.is_a? Job
          options = {
            parent_job_id: parent_job,
            all: all,
            token: token,
            max: max, filter: filter,
            min_created_at: min_created_at,
            max_created_at: max_created_at
          }
          gapi = service.list_jobs(**options)
          Job::List.from_gapi gapi, service, **options
        end

        ##
        # Retrieves the list of all projects for which the currently authorized
        # account has been granted any project role. The returned project
        # instances share the same credentials as the project used to retrieve
        # them, but lazily create a new API connection for interactions with the
        # BigQuery service.
        #
        # @param [String] token A previously-returned page token representing
        #   part of the larger set of results to view.
        # @param [Integer] max Maximum number of projects to return.
        #
        # @return [Array<Google::Cloud::Bigquery::Project>] (See
        #   {Google::Cloud::Bigquery::Project::List})
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   projects = bigquery.projects
        #   projects.each do |project|
        #     puts project.name
        #     project.datasets.all.each do |dataset|
        #       puts dataset.name
        #     end
        #   end
        #
        # @example Retrieve all projects: (See {Project::List#all})
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   projects = bigquery.projects
        #
        #   projects.all do |project|
        #     puts project.name
        #     project.datasets.all.each do |dataset|
        #       puts dataset.name
        #     end
        #   end
        #
        def projects token: nil, max: nil
          ensure_service!
          gapi = service.list_projects token: token, max: max
          Project::List.from_gapi gapi, service, max
        end

        ##
        # Creates a Bigquery::Time object to represent a time, independent of a
        # specific date.
        #
        # @param [Integer] hour Hour, valid values from 0 to 23.
        # @param [Integer] minute Minute, valid values from 0 to 59.
        # @param [Integer, Float] second Second, valid values from 0 to 59. Can
        #   contain microsecond precision.
        #
        # @return [Bigquery::Time]
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   fourpm = bigquery.time 16, 0, 0
        #   data = bigquery.query "SELECT name " \
        #                         "FROM `my_dataset.my_table`" \
        #                         "WHERE time_of_date = @time",
        #                         params: { time: fourpm }
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        # @example Create Time with fractional seconds:
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   precise_time = bigquery.time 16, 35, 15.376541
        #   data = bigquery.query "SELECT name " \
        #                         "FROM `my_dataset.my_table`" \
        #                         "WHERE time_of_date >= @time",
        #                         params: { time: precise_time }
        #
        #   # Iterate over the first page of results
        #   data.each do |row|
        #     puts row[:name]
        #   end
        #   # Retrieve the next page of results
        #   data = data.next if data.next?
        #
        def time hour, minute, second
          Bigquery::Time.new "#{hour}:#{minute}:#{second}"
        end

        ##
        # Creates a new schema instance. An optional block may be given to
        # configure the schema, otherwise the schema is returned empty and may
        # be configured directly.
        #
        # The returned schema can be passed to {Dataset#load} using the
        # `schema` option. However, for most use cases, the block yielded by
        # {Dataset#load} is a more convenient way to configure the schema
        # for the destination table.
        #
        # @yield [schema] a block for setting the schema
        # @yieldparam [Schema] schema the object accepting the schema
        #
        # @return [Google::Cloud::Bigquery::Schema]
        #
        # @example
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   schema = bigquery.schema do |s|
        #     s.string "first_name", mode: :required
        #     s.record "cities_lived", mode: :repeated do |nested_schema|
        #       nested_schema.string "place", mode: :required
        #       nested_schema.integer "number_of_years", mode: :required
        #     end
        #   end
        #
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   gs_url = "gs://my-bucket/file-name.csv"
        #   load_job = dataset.load_job "my_new_table", gs_url, schema: schema
        #
        def schema
          s = Schema.from_gapi
          yield s if block_given?
          s
        end

        ##
        # Creates a new Bigquery::EncryptionConfiguration instance.
        #
        # This method does not execute an API call. Use the encryption
        # configuration to encrypt a table when creating one via
        # Bigquery::Dataset#create_table, Bigquery::Dataset#load,
        # Bigquery::Table#copy, or Bigquery::Project#query.
        #
        # @param [String] kms_key Name of the Cloud KMS encryption key that
        #   will be used to protect the destination BigQuery table. The BigQuery
        #   Service Account associated with your project requires access to this
        #   encryption key.
        #
        # @example Encrypt a new table
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
        #   encrypt_config = bigquery.encryption kms_key: key_name
        #
        #   table = dataset.create_table "my_table" do |updater|
        #     updater.encryption = encrypt_config
        #   end
        #
        # @example Encrypt a load destination table
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
        #   encrypt_config = bigquery.encryption kms_key: key_name
        #   job = dataset.load_job "my_table", "gs://abc/file" do |job|
        #     job.encryption = encrypt_config
        #   end
        #
        # @example Encrypt a copy destination table
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   table = dataset.table "my_table"
        #
        #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
        #   encrypt_config = bigquery.encryption kms_key: key_name
        #   job = table.copy_job "my_dataset.new_table" do |job|
        #     job.encryption = encrypt_config
        #   end
        #
        # @example Encrypt a query destination table
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #
        #   key_name = "projects/a/locations/b/keyRings/c/cryptoKeys/d"
        #   encrypt_config = bigquery.encryption kms_key: key_name
        #   job = bigquery.query_job "SELECT 1;" do |query|
        #     query.table = dataset.table "my_table", skip_lookup: true
        #     query.encryption = encrypt_config
        #   end
        #
        # @return [Google::Cloud::Bigquery::EncryptionConfiguration]
        def encryption kms_key: nil
          encrypt_config = Bigquery::EncryptionConfiguration.new
          encrypt_config.kms_key = kms_key unless kms_key.nil?
          encrypt_config
        end

        ##
        # Extracts the data from a table or exports a model to Google Cloud Storage
        # asynchronously, immediately returning an {ExtractJob} that can be used to
        # track the progress of the export job.  The caller may poll the service by
        # repeatedly calling {Job#reload!} and {Job#done?} to detect when the job
        # is done, or simply block until the job is done by calling
        # #{Job#wait_until_done!}. See {#extract} for the synchronous version.
        #
        # Use this method instead of {Table#extract_job} or {Model#extract_job} to
        # extract data from source tables or models in other projects.
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {ExtractJob::Updater#location=} in a block passed to this method.
        #
        # @see https://cloud.google.com/bigquery/docs/exporting-data
        #   Exporting table data
        # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
        #   Exporting models
        #
        # @param [Table, Model, String] source The source table or model for
        #   the extract operation. This can be a table or model object; or a
        #   table ID string as specified by the [Standard SQL Query
        #   Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
        #   (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
        #   Reference](https://cloud.google.com/bigquery/query-reference#from)
        #   (`project-name:dataset_id.table_id`).
        # @param [Google::Cloud::Storage::File, String, Array<String>]
        #   extract_url The Google Storage file or file URI pattern(s) to which
        #   BigQuery should extract. For a model export this value should be a
        #   string ending in an object name prefix, since multiple objects will
        #   be exported.
        # @param [String] format The exported file format. The default value for
        #   tables is `csv`. Tables with nested or repeated fields cannot be
        #   exported as CSV. The default value for models is `ml_tf_saved_model`.
        #
        #   Supported values for tables:
        #
        #   * `csv` - CSV
        #   * `json` - [Newline-delimited JSON](http://jsonlines.org/)
        #   * `avro` - [Avro](http://avro.apache.org/)
        #
        #   Supported values for models:
        #
        #   * `ml_tf_saved_model` - TensorFlow SavedModel
        #   * `ml_xgboost_booster` - XGBoost Booster
        # @param [String] compression The compression type to use for exported
        #   files. Possible values include `GZIP` and `NONE`. The default value
        #   is `NONE`. Not applicable when extracting models.
        # @param [String] delimiter Delimiter to use between fields in the
        #   exported table data. Default is `,`. Not applicable when extracting
        #   models.
        # @param [Boolean] header Whether to print out a header row in table
        #   exports. Default is `true`. Not applicable when extracting models.
        # @param [String] job_id A user-defined ID for the extract job. The ID
        #   must contain only letters (`[A-Za-z]`), numbers (`[0-9]`), underscores
        #   (`_`), or dashes (`-`). The maximum length is 1,024 characters. If
        #   `job_id` is provided, then `prefix` will not be used.
        #
        #   See [Generating a job
        #   ID](https://cloud.google.com/bigquery/docs/managing-jobs#generate-jobid).
        # @param [String] prefix A string, usually human-readable, that will be
        #   prepended to a generated value to produce a unique job ID. For
        #   example, the prefix `daily_import_job_` can be given to generate a
        #   job ID such as `daily_import_job_12vEDtMQ0mbp1Mo5Z7mzAFQJZazh`. The
        #   prefix must contain only letters (`[A-Za-z]`), numbers (`[0-9]`),
        #   underscores (`_`), or dashes (`-`). The maximum length of the entire ID
        #   is 1,024 characters. If `job_id` is provided, then `prefix` will not
        #   be used.
        # @param [Hash] labels A hash of user-provided labels associated with
        #   the job. You can use these to organize and group your jobs.
        #
        #   The labels applied to a resource must meet the following requirements:
        #
        #   * Each resource can have multiple labels, up to a maximum of 64.
        #   * Each label must be a key-value pair.
        #   * Keys have a minimum length of 1 character and a maximum length of
        #     63 characters, and cannot be empty. Values can be empty, and have
        #     a maximum length of 63 characters.
        #   * Keys and values can contain only lowercase letters, numeric characters,
        #     underscores, and dashes. All characters must use UTF-8 encoding, and
        #     international characters are allowed.
        #   * The key portion of a label must be unique. However, you can use the
        #     same key with multiple resources.
        #   * Keys must start with a lowercase letter or international character.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
        #   configuration object for setting additional options.
        #
        # @return [Google::Cloud::Bigquery::ExtractJob]
        #
        # @example Export table data
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   table_id = "bigquery-public-data.samples.shakespeare"
        #   extract_job = bigquery.extract_job table_id, "gs://my-bucket/shakespeare.csv"
        #   extract_job.wait_until_done!
        #   extract_job.done? #=> true
        #
        # @example Export a model
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   model = dataset.model "my_model"
        #
        #   extract_job = bigquery.extract model, "gs://my-bucket/#{model.model_id}"
        #
        # @!group Data
        #
        def extract_job source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, job_id: nil,
                        prefix: nil, labels: nil
          ensure_service!
          options = { format: format, compression: compression, delimiter: delimiter, header: header, job_id: job_id,
                      prefix: prefix, labels: labels }
          source_ref = if source.respond_to? :model_ref
                         source.model_ref
                       else
                         Service.get_table_ref source, default_ref: project_ref
                       end

          updater = ExtractJob::Updater.from_options service, source_ref, extract_url, options

          yield updater if block_given?

          job_gapi = updater.to_gapi
          gapi = service.extract_table job_gapi
          Job.from_gapi gapi, service
        end

        ##
        # Extracts the data from a table or exports a model to Google Cloud Storage
        # using a synchronous method that blocks for a response. Timeouts
        # and transient errors are generally handled as needed to complete the
        # job. See {#extract_job} for the asynchronous version.
        #
        # Use this method instead of {Table#extract} or {Model#extract} to
        # extract data from source tables or models in other projects.
        #
        # The geographic location for the job ("US", "EU", etc.) can be set via
        # {ExtractJob::Updater#location=} in a block passed to this method.
        #
        # @see https://cloud.google.com/bigquery/docs/exporting-data
        #   Exporting table data
        # @see https://cloud.google.com/bigquery-ml/docs/exporting-models
        #   Exporting models
        #
        # @param [Table, Model, String] source The source table or model for
        #   the extract operation. This can be a table or model object; or a
        #   table ID string as specified by the [Standard SQL Query
        #   Reference](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#from-clause)
        #   (`project-name.dataset_id.table_id`) or the [Legacy SQL Query
        #   Reference](https://cloud.google.com/bigquery/query-reference#from)
        #   (`project-name:dataset_id.table_id`).
        # @param [Google::Cloud::Storage::File, String, Array<String>]
        #   extract_url The Google Storage file or file URI pattern(s) to which
        #   BigQuery should extract. For a model export this value should be a
        #   string ending in an object name prefix, since multiple objects will
        #   be exported.
        # @param [String] format The exported file format. The default value for
        #   tables is `csv`. Tables with nested or repeated fields cannot be
        #   exported as CSV. The default value for models is `ml_tf_saved_model`.
        #
        #   Supported values for tables:
        #
        #   * `csv` - CSV
        #   * `json` - [Newline-delimited JSON](http://jsonlines.org/)
        #   * `avro` - [Avro](http://avro.apache.org/)
        #
        #   Supported values for models:
        #
        #   * `ml_tf_saved_model` - TensorFlow SavedModel
        #   * `ml_xgboost_booster` - XGBoost Booster
        # @param [String] compression The compression type to use for exported
        #   files. Possible values include `GZIP` and `NONE`. The default value
        #   is `NONE`. Not applicable when extracting models.
        # @param [String] delimiter Delimiter to use between fields in the
        #   exported table data. Default is `,`. Not applicable when extracting
        #   models.
        # @param [Boolean] header Whether to print out a header row in table
        #   exports. Default is `true`. Not applicable when extracting models.
        # @yield [job] a job configuration object
        # @yieldparam [Google::Cloud::Bigquery::ExtractJob::Updater] job a job
        #   configuration object for setting additional options.
        #
        # @return [Boolean] Returns `true` if the extract operation succeeded.
        #
        # @example Export table data
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #
        #   bigquery.extract "bigquery-public-data.samples.shakespeare",
        #                    "gs://my-bucket/shakespeare.csv"
        #
        # @example Export a model
        #   require "google/cloud/bigquery"
        #
        #   bigquery = Google::Cloud::Bigquery.new
        #   dataset = bigquery.dataset "my_dataset"
        #   model = dataset.model "my_model"
        #
        #   bigquery.extract model, "gs://my-bucket/#{model.model_id}"
        #
        # @!group Data
        #
        def extract source, extract_url, format: nil, compression: nil, delimiter: nil, header: nil, &block
          job = extract_job source, extract_url,
                            format:      format,
                            compression: compression,
                            delimiter:   delimiter,
                            header:      header,
                            &block
          job.wait_until_done!
          ensure_job_succeeded! job
          true
        end

        ##
        # @private New Project from a Google API Client object, using the
        # same Credentials as this project.
        def self.from_gapi gapi, service
          project_service = Service.new gapi.project_reference.project_id,
                                        service.credentials,
                                        retries: service.retries,
                                        timeout: service.timeout
          new(project_service).tap do |p|
            p.instance_variable_set :@name, gapi.friendly_name

            # TODO: remove `Integer` and set normally after migrating to Gax or
            # to google-api-client 0.10 (See google/google-api-ruby-client#439)
            p.instance_variable_set :@numeric_id, Integer(gapi.numeric_id) if gapi.numeric_id
          end
        end

        protected

        ##
        # Raise an error unless an active service is available.
        def ensure_service!
          raise "Must have active connection" unless service
        end

        def ensure_job_succeeded! job
          return unless job.failed?
          begin
            # raise to activate ruby exception cause handling
            raise job.gapi_error
          rescue StandardError => e
            # wrap Google::Apis::Error with Google::Cloud::Error
            raise Google::Cloud::Error.from_error(e)
          end
        end

        def project_ref
          Google::Apis::BigqueryV2::ProjectReference.new project_id: project_id
        end
      end
    end
  end
end