这是indexloc提供的服务,不要输入任何密码
Skip to content

feat: add ability to set autodetect_schema query param in update_table #2171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1389,6 +1389,7 @@ def update_table(
self,
table: Table,
fields: Sequence[str],
autodetect_schema: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Table:
Expand Down Expand Up @@ -1419,6 +1420,10 @@ def update_table(
fields (Sequence[str]):
The fields of ``table`` to change, spelled as the
:class:`~google.cloud.bigquery.table.Table` properties.
autodetect_schema (bool):
Specifies if the schema of the table should be autodetected when
updating the table from the underlying source. Only applicable
for external tables.
retry (Optional[google.api_core.retry.Retry]):
A description of how to retry the API call.
timeout (Optional[float]):
Expand All @@ -1438,12 +1443,18 @@ def update_table(
path = table.path
span_attributes = {"path": path, "fields": fields}

if autodetect_schema:
query_params = {"autodetect_schema": True}
else:
query_params = {}

api_response = self._call_api(
retry,
span_name="BigQuery.updateTable",
span_attributes=span_attributes,
method="PATCH",
path=path,
query_params=query_params,
data=partial,
headers=headers,
timeout=timeout,
Expand Down
47 changes: 47 additions & 0 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,53 @@ def test_update_table_constraints(self):
)
self.assertIsNone(reference_table3.table_constraints, None)

def test_update_table_autodetect_schema(self):
dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test"))

# Create an external table, restrict schema to one field
TABLE_NAME = "test_table"
set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")]
table_arg = Table(dataset.table(TABLE_NAME))

# Create an external_config and include it in the table arguments
external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO)
external_config.source_uris = SOURCE_URIS_AVRO
external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO
external_config.schema = set_schema
table_arg.external_data_configuration = external_config

self.assertFalse(_table_exists(table_arg))

table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))

self.assertEqual(table.schema, set_schema)

# Update table with schema autodetection
updated_table_arg = Table(dataset.table(TABLE_NAME))

# Update the external_config and include it in the table arguments
updated_external_config = copy.deepcopy(external_config)
updated_external_config.autodetect = True
updated_external_config.schema = None
updated_table_arg.external_data_configuration = updated_external_config

# PATCH call with autodetect_schema=True to trigger schema inference
updated_table = Config.CLIENT.update_table(
updated_table_arg, ["external_data_configuration"], autodetect_schema=True
)

# The updated table should have a schema inferred from the reference
# file, which has all four fields.
expected_schema = [
bigquery.SchemaField("username", "STRING", mode="NULLABLE"),
bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"),
bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"),
bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"),
]
self.assertEqual(updated_table.schema, expected_schema)

@staticmethod
def _fetch_single_page(table, selected_fields=None):
iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields)
Expand Down
12 changes: 9 additions & 3 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2385,7 +2385,7 @@ def test_update_table(self):
"resourceTags": {"123456789012/key": "value"},
}
conn.api_request.assert_called_once_with(
method="PATCH", data=sent, path="/" + path, timeout=7.5
method="PATCH", data=sent, path="/" + path, timeout=7.5, query_params={}
)
self.assertEqual(updated_table.description, table.description)
self.assertEqual(updated_table.friendly_name, table.friendly_name)
Expand Down Expand Up @@ -2439,6 +2439,7 @@ def test_update_table_w_custom_property(self):
path="/%s" % path,
data={"newAlphaProperty": "unreleased property"},
timeout=DEFAULT_TIMEOUT,
query_params={},
)
self.assertEqual(
updated_table._properties["newAlphaProperty"], "unreleased property"
Expand Down Expand Up @@ -2475,6 +2476,7 @@ def test_update_table_only_use_legacy_sql(self):
path="/%s" % path,
data={"view": {"useLegacySql": True}},
timeout=DEFAULT_TIMEOUT,
query_params={},
)
self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql)

Expand Down Expand Up @@ -2567,9 +2569,10 @@ def test_update_table_w_query(self):
"schema": schema_resource,
},
timeout=DEFAULT_TIMEOUT,
query_params={},
)

def test_update_table_w_schema_None(self):
def test_update_table_w_schema_None_autodetect_schema(self):
# Simulate deleting schema: not sure if back-end will actually
# allow this operation, but the spec says it is optional.
path = "projects/%s/datasets/%s/tables/%s" % (
Expand Down Expand Up @@ -2611,7 +2614,9 @@ def test_update_table_w_schema_None(self):
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
updated_table = client.update_table(table, ["schema"])
updated_table = client.update_table(
table, ["schema"], autodetect_schema=True
)

final_attributes.assert_called_once_with(
{"path": "/%s" % path, "fields": ["schema"]}, client, None
Expand All @@ -2623,6 +2628,7 @@ def test_update_table_w_schema_None(self):
sent = {"schema": {"fields": None}}
self.assertEqual(req[1]["data"], sent)
self.assertEqual(req[1]["path"], "/%s" % path)
self.assertEqual(req[1]["query_params"], {"autodetect_schema": True})
self.assertEqual(len(updated_table.schema), 0)

def test_update_table_delete_property(self):
Expand Down