diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 8bb70182529d..7988f7a72b95 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -168,6 +168,11 @@ static void splitAndModifyMutationCommands( for_interpreter.push_back(command); mutated_columns.emplace(command.column_name); } + + /// Materialize column in case of complex data types like tuple can remove some nested columns + /// Here we add it "for renames" because these set of commands also removes redundant files + if (part_columns.has(command.column_name)) + for_file_renames.push_back(command); } if (command.type == MutationCommand::Type::MATERIALIZE_INDEX || command.type == MutationCommand::Type::MATERIALIZE_STATISTICS @@ -307,6 +312,11 @@ static void splitAndModifyMutationCommands( auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) for_interpreter.push_back(command); + + /// Materialize column in case of complex data types like tuple can remove some nested columns + /// Here we add it "for renames" because these set of commands also removes redundant files + if (part_columns.has(command.column_name)) + for_file_renames.push_back(command); } else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX || command.type == MutationCommand::Type::MATERIALIZE_STATISTICS @@ -887,12 +897,11 @@ static NameToNameVector collectFilesForRenames( if (source_part->checksums.has(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX)) add_rename(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX, STATS_FILE_PREFIX + command.rename_to + STATS_FILE_SUFFIX); } - else if (command.type == MutationCommand::Type::READ_COLUMN) + else if (command.type == MutationCommand::Type::READ_COLUMN || command.type == MutationCommand::Type::MATERIALIZE_COLUMN) { /// Remove files for streams that exist in source_part, - /// but were removed in new_part by MODIFY COLUMN from + /// but were removed in new_part by MODIFY COLUMN or MATERIALIZE COLUMN from /// type with higher number of streams (e.g. LowCardinality -> String). - auto old_streams = getStreamCounts(source_part, source_part->checksums, source_part->getColumns().getNames()); auto new_streams = getStreamCounts(new_part, source_part->checksums, source_part->getColumns().getNames()); diff --git a/tests/queries/0_stateless/03569_materialize_alias_issue.reference b/tests/queries/0_stateless/03569_materialize_alias_issue.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03569_materialize_alias_issue.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03569_materialize_alias_issue.sh b/tests/queries/0_stateless/03569_materialize_alias_issue.sh new file mode 100755 index 000000000000..c04be637dc45 --- /dev/null +++ b/tests/queries/0_stateless/03569_materialize_alias_issue.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS customer_dbt_materialize" + +$CLICKHOUSE_CLIENT -n --query "CREATE TABLE customer_dbt_materialize( + key UInt64, + value Array(Tuple(transaction_hash String, instruction_sig_hash String)) MATERIALIZED array((toString(key), toString(key))) +) +ENGINE = ReplicatedMergeTree('/zookeeper/{database}/test_replicated_merge_tree', 'customer_dbt_materialize') +ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, min_bytes_for_full_part_storage = 0;" + +$CLICKHOUSE_CLIENT --query "INSERT INTO customer_dbt_materialize SELECT number FROM numbers(1000)" + +# NOTE This command looks like noop (pure metadata change which we will override with next ALTER), however it leads to important logic in the codebase: +# When we apply MODIFY COLUMN we validate that we changed something in PHYSICAL column. If we don't change anything in PHYSICAL column, we will not touch any data parts. +# +# After this MODIFY `value` column is not a physical column anymore, however it still exists in data part. So the next ALTER MODIFY COLUMN to MATERIALIZED state +# will also do nothing with data parts (because `value` is ALIAS, not PHYSICAL column). +# +# And the last MATERIALIZE COLUMN will trigger real mutation which will rewrite data part and leave incorrect checksum on disk. +$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MODIFY COLUMN value Array(Tuple(transaction_hash String, instruction_sig_hash String)) ALIAS array((toString(key), toString(key))) SETTINGS mutations_sync = 2" + +$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MODIFY COLUMN value Array(Tuple(transaction_hash String, transaction_index_data String)) MATERIALIZED array((toString(key), toString(key))) SETTINGS mutations_sync = 2" + +$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MATERIALIZE COLUMN value" + +$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA customer_dbt_materialize" + +$CLICKHOUSE_CLIENT --query "CHECK TABLE customer_dbt_materialize" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS customer_dbt_materialize"