From 628ec9d29b47836ff1fad3a4f9054c12fe01621a Mon Sep 17 00:00:00 2001 From: Anton Yuzhaninov Date: Wed, 29 Jul 2020 14:30:23 +0100 Subject: [PATCH] [Minor] Fix query used to find old partitions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In ClickHouse a partition includes one or more parts. Typically a partition has at least a few parts. For each part there is a row in system.parts table and max_date can be different for different parts. For example if we have following parts: ┌─partition──┬─table──┬───max_date─┐ │ 2020-06-01 │ rspamd │ 2020-06-07 │ │ 2020-06-08 │ rspamd │ 2020-06-10 │ │ 2020-06-08 │ rspamd │ 2020-06-14 │ │ 2020-06-15 │ rspamd │ 2020-06-21 │ └────────────┴────────┴────────────┘ and want to delete parts which don't contain data written before 2020-06-12. Old query will return partition which contain a part with max_date=2020-06-14: ┌─partition──┬─table──┐ │ 2020-06-01 │ rspamd │ │ 2020-06-08 │ rspamd │ └────────────┴────────┘ and new query will return ┌─partition──┬─table──┐ │ 2020-06-01 │ rspamd │ └────────────┴────────┘ While here change <= to < so we will store at least given number of full months. --- src/plugins/lua/clickhouse.lua | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index bede3f50b..3a4b1f802 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -1010,8 +1010,10 @@ local function clickhouse_remove_old_partitions(cfg, ev_base) end local upstream = settings.upstream:get_upstream_round_robin() - local partition_to_remove_sql = "SELECT distinct partition, table FROM system.parts WHERE " .. - "table in ('${tables}') and max_date <= toDate(now() - interval ${month} month);" + local partition_to_remove_sql = "SELECT partition, table " .. + "FROM system.parts WHERE table IN ('${tables}') " .. + "GROUP BY partition, table " .. + "HAVING max(max_date) < toDate(now() - interval ${month} month)" local table_names = {'rspamd'} local tables = table.concat(table_names, "', '") -- 2.39.5