这是indexloc提供的服务,不要输入任何密码
Skip to content
26 changes: 24 additions & 2 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.enums import AutoRowIDs
from google.cloud.bigquery.enums import UpdateMode

from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode
from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery.job import (
CopyJob,
Expand Down Expand Up @@ -865,6 +865,7 @@ def get_dataset(
dataset_ref: Union[DatasetReference, str],
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
dataset_view: Optional[DatasetView] = None,
) -> Dataset:
"""Fetch the dataset referenced by ``dataset_ref``

Expand All @@ -882,7 +883,21 @@ def get_dataset(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]):
Specifies the view that determines which dataset information is
returned. By default, dataset metadata (e.g. friendlyName, description,
labels, etc) and ACL information are returned. This argument can
take on the following possible enum values.

* :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`:
Includes dataset metadata and the ACL.
* :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`:
Includes all dataset metadata, including the ACL and table metadata.
This view is not supported by the `datasets.list` API method.
* :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`:
Includes basic dataset metadata, but not the ACL.
* :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`:
The server will decide which view to use. Currently defaults to FULL.
Returns:
google.cloud.bigquery.dataset.Dataset:
A ``Dataset`` instance.
Expand All @@ -892,6 +907,12 @@ def get_dataset(
dataset_ref, default_project=self.project
)
path = dataset_ref.path

if dataset_view:
query_params = {"datasetView": dataset_view.value}
else:
query_params = {}

span_attributes = {"path": path}
api_response = self._call_api(
retry,
Expand All @@ -900,6 +921,7 @@ def get_dataset(
method="GET",
path=path,
timeout=timeout,
query_params=query_params,
)
return Dataset.from_api_repr(api_response)

Expand Down
18 changes: 18 additions & 0 deletions google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,24 @@ class CreateDisposition(object):
returned in the job result."""


class DatasetView(enum.Enum):
"""DatasetView specifies which dataset information is returned."""

DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED"
"""The default value. Currently maps to the FULL view."""

METADATA = "METADATA"
"""View metadata information for the dataset, such as friendlyName,
description, labels, etc."""

ACL = "ACL"
"""View ACL information for the dataset, which defines dataset access
for one or more entities."""

FULL = "FULL"
"""View both dataset metadata and ACL information."""


class DefaultPandasDTypes(enum.Enum):
"""Default Pandas DataFrem DTypes to convert BigQuery data. These
Sentinel values are used instead of None to maintain backward compatibility,
Expand Down
70 changes: 68 additions & 2 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from google.cloud.bigquery import job as bqjob
import google.cloud.bigquery._job_helpers
from google.cloud.bigquery.dataset import DatasetReference, Dataset
from google.cloud.bigquery.enums import UpdateMode
from google.cloud.bigquery.enums import UpdateMode, DatasetView
from google.cloud.bigquery import exceptions
from google.cloud.bigquery import ParquetOptions
import google.cloud.bigquery.retry
Expand Down Expand Up @@ -753,7 +753,7 @@ def test_get_dataset(self):
final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None)

conn.api_request.assert_called_once_with(
method="GET", path="/%s" % path, timeout=7.5
method="GET", path="/%s" % path, timeout=7.5, query_params={}
)
self.assertEqual(dataset.dataset_id, self.DS_ID)

Expand Down Expand Up @@ -819,6 +819,72 @@ def test_get_dataset(self):

self.assertEqual(dataset.dataset_id, self.DS_ID)

def test_get_dataset_with_dataset_view(self):
path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID)
creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
resource = {
"id": "%s:%s" % (self.PROJECT, self.DS_ID),
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
}
dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)

test_cases = [
(None, None),
(DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"),
(DatasetView.METADATA, "METADATA"),
(DatasetView.ACL, "ACL"),
(DatasetView.FULL, "FULL"),
]

for dataset_view_arg, expected_param_value in test_cases:
with self.subTest(
dataset_view_arg=dataset_view_arg,
expected_param_value=expected_param_value,
):
# Re-initialize the connection mock for each sub-test to reset side_effect
conn = client._connection = make_connection(resource)

dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg)

self.assertEqual(dataset.dataset_id, self.DS_ID)

if expected_param_value:
expected_query_params = {"datasetView": expected_param_value}
else:
expected_query_params = {}

conn.api_request.assert_called_once_with(
method="GET",
path="/%s" % path,
timeout=DEFAULT_TIMEOUT,
query_params=expected_query_params if expected_query_params else {},
)

def test_get_dataset_with_invalid_dataset_view(self):
invalid_view_values = [
"INVALID_STRING",
123,
123.45,
object(),
]
creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
resource = {
"id": "%s:%s" % (self.PROJECT, self.DS_ID),
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
}
conn = client._connection = make_connection(resource)
dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)

for invalid_view_value in invalid_view_values:
with self.subTest(invalid_view_value=invalid_view_value):
conn.api_request.reset_mock() # Reset mock for each sub-test
with self.assertRaises(AttributeError):
client.get_dataset(dataset_ref, dataset_view=invalid_view_value)

def test_ensure_bqstorage_client_creating_new_instance(self):
bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")

Expand Down
7 changes: 6 additions & 1 deletion tests/unit/test_create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION)
},
timeout=DEFAULT_TIMEOUT,
),
mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT),
mock.call(
method="GET",
path=get_path,
timeout=DEFAULT_TIMEOUT,
query_params={},
),
]
)

Expand Down