这是indexloc提供的服务,不要输入任何密码
Skip to content

Backport #84007 to 25.5: Fix rare bug with MATERIALIZE COLUMN query #84124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions src/Storages/MergeTree/MutateTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ static void splitAndModifyMutationCommands(
for_interpreter.push_back(command);
mutated_columns.emplace(command.column_name);
}

/// Materialize column in case of complex data types like tuple can remove some nested columns
/// Here we add it "for renames" because these set of commands also removes redundant files
if (part_columns.has(command.column_name))
for_file_renames.push_back(command);
}
if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTICS
Expand Down Expand Up @@ -307,6 +312,11 @@ static void splitAndModifyMutationCommands(
auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
for_interpreter.push_back(command);

/// Materialize column in case of complex data types like tuple can remove some nested columns
/// Here we add it "for renames" because these set of commands also removes redundant files
if (part_columns.has(command.column_name))
for_file_renames.push_back(command);
}
else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTICS
Expand Down Expand Up @@ -887,12 +897,11 @@ static NameToNameVector collectFilesForRenames(
if (source_part->checksums.has(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX))
add_rename(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX, STATS_FILE_PREFIX + command.rename_to + STATS_FILE_SUFFIX);
}
else if (command.type == MutationCommand::Type::READ_COLUMN)
else if (command.type == MutationCommand::Type::READ_COLUMN || command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
{
/// Remove files for streams that exist in source_part,
/// but were removed in new_part by MODIFY COLUMN from
/// but were removed in new_part by MODIFY COLUMN or MATERIALIZE COLUMN from
/// type with higher number of streams (e.g. LowCardinality -> String).

auto old_streams = getStreamCounts(source_part, source_part->checksums, source_part->getColumns().getNames());
auto new_streams = getStreamCounts(new_part, source_part->checksums, source_part->getColumns().getNames());

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
37 changes: 37 additions & 0 deletions tests/queries/0_stateless/03569_materialize_alias_issue.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
# Tags: no-random-settings, no-random-merge-tree-settings

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS customer_dbt_materialize"

$CLICKHOUSE_CLIENT -n --query "CREATE TABLE customer_dbt_materialize(
key UInt64,
value Array(Tuple(transaction_hash String, instruction_sig_hash String)) MATERIALIZED array((toString(key), toString(key)))
)
ENGINE = ReplicatedMergeTree('/zookeeper/{database}/test_replicated_merge_tree', 'customer_dbt_materialize')
ORDER BY tuple()
SETTINGS min_bytes_for_wide_part = 0, min_bytes_for_full_part_storage = 0;"

$CLICKHOUSE_CLIENT --query "INSERT INTO customer_dbt_materialize SELECT number FROM numbers(1000)"

# NOTE This command looks like noop (pure metadata change which we will override with next ALTER), however it leads to important logic in the codebase:
# When we apply MODIFY COLUMN we validate that we changed something in PHYSICAL column. If we don't change anything in PHYSICAL column, we will not touch any data parts.
#
# After this MODIFY `value` column is not a physical column anymore, however it still exists in data part. So the next ALTER MODIFY COLUMN to MATERIALIZED state
# will also do nothing with data parts (because `value` is ALIAS, not PHYSICAL column).
#
# And the last MATERIALIZE COLUMN will trigger real mutation which will rewrite data part and leave incorrect checksum on disk.
$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MODIFY COLUMN value Array(Tuple(transaction_hash String, instruction_sig_hash String)) ALIAS array((toString(key), toString(key))) SETTINGS mutations_sync = 2"

$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MODIFY COLUMN value Array(Tuple(transaction_hash String, transaction_index_data String)) MATERIALIZED array((toString(key), toString(key))) SETTINGS mutations_sync = 2"

$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MATERIALIZE COLUMN value"

$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA customer_dbt_materialize"

$CLICKHOUSE_CLIENT --query "CHECK TABLE customer_dbt_materialize"

$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS customer_dbt_materialize"
Loading