Browse Source

Merge pull request #2935 from citrin/clickhouse

[Minor] ClickHouse: Improve schema and tests
tags/2.0
Vsevolod Stakhov 5 years ago
parent
commit
57c21062f2
No account linked to committer's email address

+ 46
- 46
src/plugins/lua/clickhouse.lua View File

@@ -90,54 +90,54 @@ local settings = {
local clickhouse_schema = {[[
CREATE TABLE rspamd
(
Date Date,
TS DateTime,
From String,
MimeFrom String,
IP String,
Score Float32,
NRcpt UInt8,
Size UInt32,
IsWhitelist Enum8('blacklist' = 0, 'whitelist' = 1, 'unknown' = 2) DEFAULT 'unknown',
IsBayes Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT 'unknown',
IsFuzzy Enum8('whitelist' = 0, 'deny' = 1, 'unknown' = 2) DEFAULT 'unknown',
IsFann Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT 'unknown',
IsDkim Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'dnsfail' = 3, 'na' = 4) DEFAULT 'unknown',
IsDmarc Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'softfail' = 3, 'na' = 4, 'quarantine' = 5) DEFAULT 'unknown',
IsSpf Enum8('reject' = 0, 'allow' = 1, 'neutral' = 2, 'dnsfail' = 3, 'na' = 4, 'unknown' = 5) DEFAULT 'unknown',
NUrls Int32,
Action Enum8('reject' = 0, 'rewrite subject' = 1, 'add header' = 2, 'greylist' = 3, 'no action' = 4, 'soft reject' = 5, 'custom' = 6) DEFAULT 'no action',
CustomAction LowCardinality(String),
FromUser String,
MimeUser String,
RcptUser String,
RcptDomain String,
MimeRecipients Array(String),
MessageId String,
ListId String,
Subject String,
`Attachments.FileName` Array(String),
`Attachments.ContentType` Array(String),
`Attachments.Length` Array(UInt32),
`Attachments.Digest` Array(FixedString(16)),
`Urls.Tld` Array(String),
`Urls.Url` Array(String),
Emails Array(String),
ASN String,
Country LowCardinality(FixedString(2)),
Date Date COMMENT 'Date (used for partitioning)',
TS DateTime COMMENT 'Date and time of request start (UTC)',
From String COMMENT 'Domain part of the return address (RFC5321.MailFrom)',
MimeFrom String COMMENT 'Domain part of the address in From: header (RFC5322.From)',
IP String COMMENT 'SMTP client IP as provided by MTA or from Received: header',
Score Float32 COMMENT 'Message score',
NRcpt UInt8 COMMENT 'Number of envelope recipients (RFC5321.RcptTo)',
Size UInt32 COMMENT 'Message size in bytes',
IsWhitelist Enum8('blacklist' = 0, 'whitelist' = 1, 'unknown' = 2) DEFAULT 'unknown' COMMENT 'Based on symbols configured in `whitelist_symbols` module option',
IsBayes Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT 'unknown' COMMENT 'Based on symbols configured in `bayes_spam_symbols` and `bayes_ham_symbols` module options',
IsFuzzy Enum8('whitelist' = 0, 'deny' = 1, 'unknown' = 2) DEFAULT 'unknown' COMMENT 'Based on symbols configured in `fuzzy_symbols` module option',
IsFann Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT 'unknown' COMMENT 'Based on symbols configured in `ann_symbols_spam` and `ann_symbols_ham` module options',
IsDkim Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'dnsfail' = 3, 'na' = 4) DEFAULT 'unknown' COMMENT 'Based on symbols configured in dkim_* module options',
IsDmarc Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'softfail' = 3, 'na' = 4, 'quarantine' = 5) DEFAULT 'unknown' COMMENT 'Based on symbols configured in dmarc_* module options',
IsSpf Enum8('reject' = 0, 'allow' = 1, 'neutral' = 2, 'dnsfail' = 3, 'na' = 4, 'unknown' = 5) DEFAULT 'unknown' COMMENT 'Based on symbols configured in spf_* module options',
NUrls Int32 COMMENT 'Number of URLs and email extracted from the message',
Action Enum8('reject' = 0, 'rewrite subject' = 1, 'add header' = 2, 'greylist' = 3, 'no action' = 4, 'soft reject' = 5, 'custom' = 6) DEFAULT 'no action' COMMENT 'Action returned for the message; if action is not predefined actual action will be in `CustomAction` field',
CustomAction LowCardinality(String) COMMENT 'Action string for custom action',
FromUser String COMMENT 'Local part of the return address (RFC5321.MailFrom)',
MimeUser String COMMENT 'Local part of address in From: header (RFC5322.From)',
RcptUser String COMMENT 'Local part of the first envelope recipient (RFC5321.RcptTo)',
RcptDomain String COMMENT 'Domain part of the first envelope recipient (RFC5321.RcptTo)',
MimeRecipients Array(String) COMMENT 'List of recipients from headers (RFC5322.To/.CC/.BCC)',
MessageId String COMMENT 'Message-ID header',
ListId String COMMENT 'List-Id header',
Subject String COMMENT 'Subject header (or hash if `subject_privacy` module option enabled)',
`Attachments.FileName` Array(String) COMMENT 'Attachment name',
`Attachments.ContentType` Array(String) COMMENT 'Attachment Content-Type',
`Attachments.Length` Array(UInt32) COMMENT 'Attachment size in bytes',
`Attachments.Digest` Array(FixedString(16)) COMMENT 'First 16 characters of hash returned by mime_part:get_digest()',
`Urls.Tld` Array(String) COMMENT 'Effective second level domain part of the URL host',
`Urls.Url` Array(String) COMMENT 'Full URL if `full_urls` module option enabled, host part of URL otherwise',
Emails Array(String) COMMENT 'List of emails extracted from the message',
ASN String COMMENT 'BGP AS number for SMTP client IP (returned by asn.rspamd.com or asn6.rspamd.com)',
Country LowCardinality(FixedString(2)) COMMENT 'Country for SMTP client IP (returned by asn.rspamd.com or asn6.rspamd.com)',
IPNet String,
`Symbols.Names` Array(LowCardinality(String)),
`Symbols.Scores` Array(Float32),
`Symbols.Options` Array(String),
ScanTimeReal UInt32,
`Symbols.Names` Array(LowCardinality(String)) COMMENT 'Symbol name',
`Symbols.Scores` Array(Float32) COMMENT 'Symbol score',
`Symbols.Options` Array(String) COMMENT 'Symbol options (comma separated list)',
ScanTimeReal UInt32 COMMENT 'Request time in milliseconds',
ScanTimeVirtual UInt32,
AuthUser String,
SettingsId LowCardinality(String),
Digest FixedString(32),
SMTPFrom ALIAS if(From = '', '', concat(FromUser, '@', From)),
SMTPRcpt ALIAS if(RcptDomain = '', '', concat(RcptUser, '@', RcptDomain)),
MIMEFrom ALIAS if(MimeFrom = '', '', concat(MimeUser, '@', MimeFrom)),
MIMERcpt ALIAS MimeRecipients[1]
AuthUser String COMMENT 'Username for authenticated SMTP client',
SettingsId LowCardinality(String) COMMENT 'ID for settings profile',
Digest FixedString(32) COMMENT 'Deprecated, no longer stored',
SMTPFrom ALIAS if(From = '', '', concat(FromUser, '@', From)) COMMENT 'Return address (RFC5321.MailFrom)',
SMTPRcpt ALIAS if(RcptDomain = '', '', concat(RcptUser, '@', RcptDomain)) COMMENT 'First recipient (RFC5321.RcptTo)',
MIMEFrom ALIAS if(MimeFrom = '', '', concat(MimeUser, '@', MimeFrom)) COMMENT 'Address in From: header (RFC5322.From)',
MIMERcpt ALIAS MimeRecipients[1] COMMENT 'First recipients from headers (RFC5322.To/.CC/.BCC)'
) ENGINE = MergeTree()
PARTITION BY toMonday(Date)
ORDER BY TS

+ 63
- 75
test/functional/cases/210_clickhouse/001_migration.robot View File

@@ -1,90 +1,78 @@
*** Settings ***
Documentation Checks if rspamd is able to upgrade migration schema from v0 (very initial) to v2
Variables ${TESTDIR}/lib/vars.py
Library ${TESTDIR}/lib/rspamd.py
Library clickhouse.py
Resource ${TESTDIR}/lib/rspamd.robot

Test Setup Clickhouse Setup
Test Teardown Clickhosue Teardown
Documentation Checks if rspamd is able to upgrade migration schema from v0 (very initial) to v2
Test Setup Clickhouse Setup
Test Teardown Clickhosue Teardown
Variables ${TESTDIR}/lib/vars.py
Library ${TESTDIR}/lib/rspamd.py
Library clickhouse.py
Resource ${TESTDIR}/lib/rspamd.robot

*** Variables ***
${CONFIG} ${TESTDIR}/configs/clickhouse.conf
${RSPAMD_SCOPE} Suite
${CLICKHOUSE_PORT} ${18123}
${CONFIG} ${TESTDIR}/configs/clickhouse.conf
${RSPAMD_SCOPE} Suite
${CLICKHOUSE_PORT} ${18123}

*** Test Cases ***
#Initial schema
# Prepare rspamd
# Sleep 2 #TODO: replace this check with waiting until migration finishes
# Column should exist rspamd Symbols.Scores
# Column should exist rspamd Attachments.Digest
# Column should exist rspamd Symbols.Scores
# Schema version should be 3


Migration
Upload new schema ${TESTDIR}/data/initial_schema/schema.sql
Insert data rspamd ${TESTDIR}/data/initial_schema/data.rspamd.sql
Insert data rspamd_asn ${TESTDIR}/data/initial_schema/data.rspamd_asn.sql
Insert data rspamd_urls ${TESTDIR}/data/initial_schema/data.rspamd_urls.sql
Insert data rspamd_emails ${TESTDIR}/data/initial_schema/data.rspamd_emails.sql
Insert data rspamd_symbols ${TESTDIR}/data/initial_schema/data.rspamd_symbols.sql
Insert data rspamd_attachments ${TESTDIR}/data/initial_schema/data.rspamd_attachments.sql

Prepare rspamd

Sleep 2 #TODO: replace this check with waiting until migration finishes

Column should exist rspamd Symbols.Scores
Column should exist rspamd Attachments.Digest
Column should exist rspamd Symbols.Scores
Schema version should be 6

#Initial schema
# Prepare rspamd
# Sleep 2 #TODO: replace this check with waiting until migration finishes
# Column should exist rspamd Symbols.Scores
# Column should exist rspamd Attachments.Digest
# Column should exist rspamd Symbols.Scores
# Schema version should be 3
Upload new schema ${TESTDIR}/data/initial_schema/schema.sql
Insert data rspamd ${TESTDIR}/data/initial_schema/data.rspamd.sql
Insert data rspamd_asn ${TESTDIR}/data/initial_schema/data.rspamd_asn.sql
Insert data rspamd_urls ${TESTDIR}/data/initial_schema/data.rspamd_urls.sql
Insert data rspamd_emails ${TESTDIR}/data/initial_schema/data.rspamd_emails.sql
Insert data rspamd_symbols ${TESTDIR}/data/initial_schema/data.rspamd_symbols.sql
Insert data rspamd_attachments ${TESTDIR}/data/initial_schema/data.rspamd_attachments.sql
Prepare rspamd
Sleep 2 #TODO: replace this check with waiting until migration finishes
Column should exist rspamd Symbols.Scores
Column should exist rspamd Attachments.Digest
Column should exist rspamd Symbols.Scores
Schema version should be 6

Retention
Upload new schema ${TESTDIR}/data/schema_2/schema.sql
Insert data rspamd ${TESTDIR}/data/schema_2/data.rspamd.sql

Assert rows count rspamd 56
Prepare rspamd

Sleep 2 #TODO: replace this check with waiting until migration finishes

Assert rows count rspamd 30

Upload new schema ${TESTDIR}/data/schema_2/schema.sql
Insert data rspamd ${TESTDIR}/data/schema_2/data.rspamd.sql
Assert rows count rspamd 56
Prepare rspamd
Sleep 2 #TODO: replace this check with waiting until migration finishes
Assert rows count rspamd 30

*** Keywords ***
Clickhouse Setup
${TMPDIR} = Make Temporary Directory
Set Global Variable ${TMPDIR}
Set Directory Ownership ${TMPDIR} ${RSPAMD_USER} ${RSPAMD_GROUP}
${template} = Get File ${TESTDIR}/configs/clickhouse-config.xml
${config} = Replace Variables ${template}
Create File ${TMPDIR}/clickhouse-config.xml ${config}
Copy File ${TESTDIR}/configs/clickhouse-users.xml ${TMPDIR}/users.xml
Create Directory ${TMPDIR}/metadata
Create Directory ${TMPDIR}/metadata/default
Create Directory ${TMPDIR}/data/default
${result} = Run Process clickhouse-server --daemon --config-file\=${TMPDIR}/clickhouse-config.xml --pid-file\=${TMPDIR}/clickhouse.pid
Run Keyword If ${result.rc} != 0 Log ${result.stderr}
Should Be Equal As Integers ${result.rc} 0
Wait Until Keyword Succeeds 5 sec 50 ms TCP Connect localhost ${CLICKHOUSE_PORT}
Set Suite Variable ${TMPDIR} ${TMPDIR}


${TMPDIR} = Make Temporary Directory
Set Global Variable ${TMPDIR}
Set Directory Ownership ${TMPDIR} ${RSPAMD_USER} ${RSPAMD_GROUP}
${template} = Get File ${TESTDIR}/configs/clickhouse-config.xml
${config} = Replace Variables ${template}
Create File ${TMPDIR}/clickhouse-config.xml ${config}
Copy File ${TESTDIR}/configs/clickhouse-users.xml ${TMPDIR}/users.xml
Create Directory ${TMPDIR}/clickhouse
Set Directory Ownership ${TMPDIR}/clickhouse clickhouse clickhouse
${result} = Run Process su -s /bin/sh clickhouse -c
... clickhouse-server --daemon --config-file\=${TMPDIR}/clickhouse-config.xml --pid-file\=${TMPDIR}/clickhouse/clickhouse.pid
Run Keyword If ${result.rc} != 0 Log ${result.stderr}
Should Be Equal As Integers ${result.rc} 0
Wait Until Keyword Succeeds 5 sec 50 ms TCP Connect localhost ${CLICKHOUSE_PORT}
Set Suite Variable ${TMPDIR} ${TMPDIR}

Clickhosue Teardown
# Sleep 30
${clickhouse_pid} = Get File ${TMPDIR}/clickhouse.pid
Shutdown Process With Children ${clickhouse_pid}
Simple Teardown
# Sleep 30
${clickhouse_pid} = Get File ${TMPDIR}/clickhouse/clickhouse.pid
Shutdown Process With Children ${clickhouse_pid}
Log File ${TMPDIR}/clickhouse/clickhouse-server.err.log
Simple Teardown

Prepare rspamd
&{d} = Run Rspamd CONFIG=${TESTDIR}/configs/clickhouse.conf TMPDIR=${TMPDIR}
${keys} = Get Dictionary Keys ${d}
: FOR ${i} IN @{keys}
\ Run Keyword If '${RSPAMD_SCOPE}' == 'Suite' Set Suite Variable ${${i}} &{d}[${i}]
\ ... ELSE IF '${RSPAMD_SCOPE}' == 'Test' Set Test Variable ${${i}} &{d}[${i}]
\ ... ELSE Fail 'RSPAMD_SCOPE must be Test or Suite'
&{d} = Run Rspamd CONFIG=${TESTDIR}/configs/clickhouse.conf TMPDIR=${TMPDIR}
${keys} = Get Dictionary Keys ${d}
FOR ${i} IN @{keys}
Run Keyword If '${RSPAMD_SCOPE}' == 'Suite' Set Suite Variable ${${i}} &{d}[${i}]
... ELSE IF '${RSPAMD_SCOPE}' == 'Test' Set Test Variable ${${i}} &{d}[${i}]
... ELSE Fail 'RSPAMD_SCOPE must be Test or Suite'
END

+ 8
- 280
test/functional/configs/clickhouse-config.xml View File

@@ -2,53 +2,20 @@
<yandex>
<logger>
<!-- Possible levels: https://github.com/pocoproject/poco/blob/develop/Foundation/include/Poco/Logger.h#L105 -->
<level>trace</level>
<log>${TMPDIR}/clickhouse-server.log</log>
<errorlog>${TMPDIR}/clickhouse-server.err.log</errorlog>
<level>debug</level>
<log>${TMPDIR}/clickhouse/clickhouse-server.log</log>
<errorlog>${TMPDIR}/clickhouse/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
</logger>
<!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
<http_port>${CLICKHOUSE_PORT}</http_port>
<tcp_port>19000</tcp_port>

<!-- For HTTPS and SSL over native protocol. -->
<!--
<https_port>8443</https_port>
<tcp_port_secure>9440</tcp_port_secure>
-->

<!-- Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123 -->
<!--
<http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]></http_server_default_response>
-->

<!-- Port for communication between replicas. Used for data exchange. -->
<interserver_http_port>19009</interserver_http_port>

<!-- Hostname that is used by other replicas to request this server.
If not specified, than it is determined analoguous to 'hostname -f' command.
This setting could be used to switch replication to another network interface.
-->
<!--
<interserver_http_host>example.yandex.ru</interserver_http_host>
-->

<!-- Listen specified host. use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere. -->
<!-- <listen_host>::</listen_host> -->
<!-- Same for hosts with disabled ipv6: -->
<!-- <listen_host>0.0.0.0</listen_host> -->

<!-- Default values - try listen localhost on ipv4 and ipv6: -->
<!--
<listen_host>::1</listen_host>
<listen_host>127.0.0.1</listen_host>
-->
<!-- Don't exit if ipv6 or ipv4 unavailable, but listen_host with this protocol specified -->
<!-- <listen_try>0</listen_try> -->
<http_port>${CLICKHOUSE_PORT}</http_port>

<!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
<users_config>${TMPDIR}/users.xml</users_config>

<!-- Allow listen on same address:port -->
<!-- <listen_reuse_port>0</listen_reuse_port> -->

<!-- <listen_backlog>64</listen_backlog> -->
@@ -63,31 +30,14 @@
correct maximum value. -->
<max_open_files>256</max_open_files>

<!-- Size of cache of uncompressed blocks of data, used in tables of MergeTree family.
In bytes. Cache is single for server. Memory is allocated only on demand.
Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
Uncompressed cache is advantageous only for very short queries and in rare cases.
-->
<uncompressed_cache_size>8589934592</uncompressed_cache_size>

<!-- Approximate size of mark cache, used in tables of MergeTree family.
In bytes. Cache is single for server. Memory is allocated only on demand.
You should not lower this value.
-->
<mark_cache_size>5368709120</mark_cache_size>


<!-- Path to data directory, with trailing slash. -->
<path>${TMPDIR}/</path>

<!-- Path to temporary data for processing hard queries. -->
<tmp_path>${TMPDIR}</tmp_path>

<!-- Directory with user provided files that are accessible by 'file' table function. -->
<user_files_path>${TMPDIR}</user_files_path>

<!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
<users_config>${TMPDIR}/users.xml</users_config>
<path>${TMPDIR}/clickhouse/</path>

<!-- Default profile of settings. -->
<default_profile>default</default_profile>
@@ -117,226 +67,4 @@
-->
<!-- <umask>022</umask> -->

<!-- Configuration of clusters that could be used in Distributed tables.
https://clickhouse.yandex/docs/en/table_engines/distributed/
-->
<!--
<remote_servers incl="clickhouse_remote_servers" >
&lt;!&ndash; Test only shard config for testing distributed storage &ndash;&gt;
<test_shard_localhost>
<shard>
<replica>
<host>localhost</host>
<port>19000</port>
</replica>
</shard>
</test_shard_localhost>
<test_shard_localhost_secure>
<shard>
<replica>
<host>localhost</host>
<port>19440</port>
<secure>1</secure>
</replica>
</shard>
</test_shard_localhost_secure>
</remote_servers>
-->


<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
Values for substitutions are specified in /yandex/name_of_substitution elements in that file.
-->

<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
Optional. If you don't use replicated tables, you could omit that.

See https://clickhouse.yandex/docs/en/table_engines/replication/
-->
<zookeeper incl="zookeeper-servers" optional="true" />

<!-- Substitutions for parameters of replicated tables.
Optional. If you don't use replicated tables, you could omit that.

See https://clickhouse.yandex/docs/en/table_engines/replication/#creating-replicated-tables
-->
<macros incl="macros" optional="true" />


<!-- Reloading interval for embedded dictionaries, in seconds. Default: 3600. -->
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>


<!-- Maximum session timeout, in seconds. Default: 3600. -->
<max_session_timeout>3600</max_session_timeout>

<!-- Default session timeout, in seconds. Default: 60. -->
<default_session_timeout>60</default_session_timeout>

<!-- Sending data to Graphite for monitoring. Several sections can be defined. -->
<!--
interval - send every X second
root_path - prefix for keys
hostname_in_path - append hostname to root_path (default = true)
metrics - send data from table system.metrics
events - send data from table system.events
asynchronous_metrics - send data from table system.asynchronous_metrics
-->
<!--
<graphite>
<host>localhost</host>
<port>42000</port>
<timeout>0.1</timeout>
<interval>60</interval>
<root_path>one_min</root_path>
<hostname_in_path>true</hostname_in_path>

<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</graphite>
<graphite>
<host>localhost</host>
<port>42000</port>
<timeout>0.1</timeout>
<interval>1</interval>
<root_path>one_sec</root_path>

<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>false</asynchronous_metrics>
</graphite>
-->


<!-- Query log. Used only for queries with setting log_queries = 1. -->
<query_log>
<!-- What table to insert data. If table is not exist, it will be created.
When query log structure is changed after system update,
then old table will be renamed and new table will be created automatically.
-->
<database>system</database>
<table>query_log</table>
<!--
PARTITION BY expr https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/
Example:
event_date
toMonday(event_date)
toYYYYMM(event_date)
toStartOfHour(event_time)
-->
<partition_by>toYYYYMM(event_date)</partition_by>
<!-- Interval of flushing data. -->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>


<!-- Uncomment if use part_log
<part_log>
<database>system</database>
<table>part_log</table>

<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
-->


<!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
See https://clickhouse.yandex/docs/en/dicts/internal_dicts/
-->

<!-- Path to file with region hierarchy. -->
<!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> -->

<!-- Path to directory with files containing names of regions -->
<!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> -->


<!-- Configuration of external dictionaries. See:
https://clickhouse.yandex/docs/en/dicts/external_dicts/
-->
<dictionaries_config>*_dictionary.xml</dictionaries_config>

<!-- Uncomment if you want data to be compressed 30-100% better.
Don't do that if you just started using ClickHouse.
-->
<compression incl="clickhouse_compression">
<!--
<!- - Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used. - ->
<case>

<!- - Conditions. All must be satisfied. Some conditions may be omitted. - ->
<min_part_size>10000000000</min_part_size> <!- - Min part size in bytes. - ->
<min_part_size_ratio>0.01</min_part_size_ratio> <!- - Min size of part relative to whole table size. - ->

<!- - What compression method to use. - ->
<method>zstd</method>
</case>
-->
</compression>

<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->
<distributed_ddl>
<!-- Path in ZooKeeper to queue with DDL queries -->
<path>/clickhouse/task_queue/ddl</path>

<!-- Settings from this profile will be used to execute DDL queries -->
<!-- <profile>default</profile> -->
</distributed_ddl>

<!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
<!--
<merge_tree>
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
</merge_tree>
-->

<!-- Protection from accidental DROP.
If size of a MergeTree table is greater than max_table_size_to_drop (in bytes) than table could not be dropped with any DROP query.
If you want do delete one table and don't want to restart clickhouse-server, you could create special file <clickhouse-path>/flags/force_drop_table and make DROP once.
By default max_table_size_to_drop is 50GB, max_table_size_to_drop=0 allows to DROP any tables.
Uncomment to disable protection.
-->
<!-- <max_table_size_to_drop>0</max_table_size_to_drop> -->

<!-- Example of parameters for GraphiteMergeTree table engine -->
<graphite_rollup_example>
<pattern>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>3600</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup_example>

<!-- Directory in <clickhouse-path> containing schema files for various input formats.
The directory will be created if it doesn't exist.
-->
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>

<!-- Uncomment to disable ClickHouse internal DNS caching. -->
<!-- <disable_internal_dns_cache>1</disable_internal_dns_cache> -->
</yandex>

+ 8
- 1
test/functional/lib/rspamd.py View File

@@ -102,7 +102,14 @@ def HTTP(method, host, port, path, data=None, headers={}):
return [s, t]

def make_temporary_directory():
return tempfile.mkdtemp()
"""Creates and returns a unique temporary directory

Example:
| ${TMPDIR} = | Make Temporary Directory |
"""
dirname = tempfile.mkdtemp()
os.chmod(dirname, 0755)
return dirname

def make_temporary_file():
return tempfile.mktemp()

+ 1
- 1
test/functional/lib/rspamd.robot View File

@@ -73,7 +73,7 @@ Generic Teardown
Run Keyword If '${CONTROLLER_ERRORS}' == 'True' Check Controller Errors
Shutdown Process With Children ${RSPAMD_PID}
Log does not contain segfault record
Save Run Results ${TMPDIR} rspamd.log redis.log rspamd.conf clickhouse-server.log clickhouse-server.err.log clickhouse-config.xml
Save Run Results ${TMPDIR} rspamd.log redis.log rspamd.conf clickhouse-config.xml
Collect Lua Coverage
Cleanup Temporary Directory ${TMPDIR}


Loading…
Cancel
Save