aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/cfg_rcl.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/libserver/cfg_rcl.cxx')
-rw-r--r--src/libserver/cfg_rcl.cxx223
1 files changed, 208 insertions, 15 deletions
diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx
index 79509e12e..da5845917 100644
--- a/src/libserver/cfg_rcl.cxx
+++ b/src/libserver/cfg_rcl.cxx
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@
#include <algorithm>// for std::transform
#include <memory>
#include "contrib/ankerl/unordered_dense.h"
-#include "fmt/base.h"
+#include "contrib/fmt/include/fmt/base.h"
#include "libutil/cxx/util.hxx"
#include "libutil/cxx/file_util.hxx"
#include "frozen/unordered_set.h"
@@ -299,6 +299,14 @@ rspamd_rcl_logging_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
cfg->log_flags |= RSPAMD_LOG_FLAG_USEC;
}
+ /* Set default values for new log tag options */
+ if (cfg->log_max_tag_len == 0) {
+ cfg->log_max_tag_len = RSPAMD_LOG_ID_LEN; /* Default to new max size */
+ }
+ if (cfg->log_tag_strip_policy_str == NULL) {
+ cfg->log_tag_strip_policy_str = rspamd_mempool_strdup(cfg->cfg_pool, "right");
+ }
+
return rspamd_rcl_section_parse_defaults(cfg, *section, cfg->cfg_pool, obj,
(void *) cfg, err);
}
@@ -1189,31 +1197,73 @@ rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
st->opts = (ucl_object_t *) obj;
st->clcf = ccf;
- const auto *val = ucl_object_lookup(obj, "spam");
- if (val == nullptr) {
+ /* Handle migration from old 'spam' field to new 'class' field */
+ const auto *class_val = ucl_object_lookup(obj, "class");
+ const auto *spam_val = ucl_object_lookup(obj, "spam");
+
+ if (class_val != nullptr && spam_val != nullptr) {
+ msg_warn_config("statfile %s has both 'class' and 'spam' fields, using 'class' field",
+ st->symbol);
+ }
+
+ if (class_val == nullptr && spam_val == nullptr) {
+ /* Neither field present, try to guess by symbol name */
msg_info_config(
- "statfile %s has no explicit 'spam' setting, trying to guess by symbol",
+ "statfile %s has no explicit 'class' or 'spam' setting, trying to guess by symbol",
st->symbol);
if (rspamd_substring_search_caseless(st->symbol,
strlen(st->symbol), "spam", 4) != -1) {
st->is_spam = TRUE;
+ st->class_name = rspamd_mempool_strdup(pool, "spam");
+ st->is_spam_converted = TRUE;
}
else if (rspamd_substring_search_caseless(st->symbol,
strlen(st->symbol), "ham", 3) != -1) {
st->is_spam = FALSE;
+ st->class_name = rspamd_mempool_strdup(pool, "ham");
+ st->is_spam_converted = TRUE;
}
else {
g_set_error(err,
CFG_RCL_ERROR,
EINVAL,
- "cannot guess spam setting from %s",
+ "cannot guess class setting from %s, please specify 'class' field",
st->symbol);
return FALSE;
}
- msg_info_config("guessed that statfile with symbol %s is %s",
- st->symbol,
- st->is_spam ? "spam" : "ham");
+ msg_info_config("guessed that statfile with symbol %s has class '%s'",
+ st->symbol, st->class_name);
}
+ else if (class_val == nullptr && spam_val != nullptr) {
+ /* Only spam field present - migrate to class */
+ msg_warn_config("statfile %s uses deprecated 'spam' field, please use 'class' instead",
+ st->symbol);
+ if (st->is_spam) {
+ st->class_name = rspamd_mempool_strdup(pool, "spam");
+ }
+ else {
+ st->class_name = rspamd_mempool_strdup(pool, "ham");
+ }
+ st->is_spam_converted = TRUE;
+ }
+ else if (class_val != nullptr && spam_val == nullptr) {
+ /* Only class field present - set is_spam for backward compatibility */
+ if (st->class_name != nullptr) {
+ if (strcmp(st->class_name, "spam") == 0) {
+ st->is_spam = TRUE;
+ }
+ else if (strcmp(st->class_name, "ham") == 0) {
+ st->is_spam = FALSE;
+ }
+ else {
+ /* For non-binary classes, default to not spam */
+ st->is_spam = FALSE;
+ }
+ msg_debug_config("statfile %s with class '%s' set is_spam=%s for compatibility",
+ st->symbol, st->class_name, st->is_spam ? "true" : "false");
+ }
+ }
+ /* If both fields are present, class takes precedence and was already parsed by the default parser */
return TRUE;
}
@@ -1221,6 +1271,31 @@ rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
}
static gboolean
+rspamd_rcl_class_labels_handler(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ const char *key,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *ccf = static_cast<rspamd_classifier_config *>(ud);
+
+ if (obj->type != UCL_OBJECT) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "class_labels must be an object");
+ return FALSE;
+ }
+
+ if (!rspamd_config_parse_class_labels(obj, &ccf->class_labels)) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "invalid class_labels configuration");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
const ucl_object_t *obj,
const char *key,
@@ -1293,6 +1368,22 @@ rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
}
}
}
+ else if (g_ascii_strcasecmp(st_key, "class_labels") == 0) {
+ /* Parse class_labels configuration directly */
+ if (ucl_object_type(val) != UCL_OBJECT) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "class_labels must be an object");
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+
+ if (!rspamd_config_parse_class_labels(val, &ccf->class_labels)) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "invalid class_labels configuration");
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+ }
}
}
@@ -1367,8 +1458,80 @@ rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
}
ccf->opts = (ucl_object_t *) obj;
+
+ /* Validate multi-class configuration */
+ GError *validation_err = nullptr;
+ if (!rspamd_config_validate_class_config(ccf, &validation_err)) {
+ if (validation_err) {
+ g_propagate_error(err, validation_err);
+ }
+ else {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "multi-class configuration validation failed for classifier '%s'",
+ ccf->name ? ccf->name : "unknown");
+ }
+ return FALSE;
+ }
+
cfg->classifiers = g_list_prepend(cfg->classifiers, ccf);
+ /* Populate class_names array from statfiles - only for explicit multiclass configs */
+ if (ccf->statfiles) {
+ GList *cur = ccf->statfiles;
+ gboolean has_explicit_classes = FALSE;
+
+ /* Check if any statfile uses explicit class declaration (not converted from is_spam) */
+ cur = ccf->statfiles;
+ while (cur) {
+ struct rspamd_statfile_config *stcf = (struct rspamd_statfile_config *) cur->data;
+ msg_debug("checking statfile %s: class_name=%s, is_spam_converted=%s",
+ stcf->symbol, stcf->class_name ? stcf->class_name : "NULL",
+ stcf->is_spam_converted ? "true" : "false");
+ if (stcf->class_name && !stcf->is_spam_converted) {
+ has_explicit_classes = TRUE;
+ break;
+ }
+ cur = g_list_next(cur);
+ }
+
+ msg_debug("has_explicit_classes = %s", has_explicit_classes ? "true" : "false");
+
+ /* Only populate class_names for explicit multiclass configurations */
+ if (has_explicit_classes) {
+ msg_debug("populating class_names for multiclass configuration");
+ }
+ else {
+ msg_debug("skipping class_names population for binary configuration");
+ }
+
+ if (has_explicit_classes) {
+ ccf->class_names = g_ptr_array_new();
+
+ cur = ccf->statfiles;
+ while (cur) {
+ struct rspamd_statfile_config *stcf = (struct rspamd_statfile_config *) cur->data;
+ if (stcf->class_name) {
+ /* Check if class already exists */
+ bool found = false;
+ for (unsigned int i = 0; i < ccf->class_names->len; i++) {
+ if (strcmp((char *) g_ptr_array_index(ccf->class_names, i), stcf->class_name) == 0) {
+ stcf->class_index = i; /* Store the index for O(1) lookup */
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* Add new class */
+ stcf->class_index = ccf->class_names->len;
+ g_ptr_array_add(ccf->class_names, g_strdup(stcf->class_name));
+ }
+ }
+ cur = g_list_next(cur);
+ }
+ }
+ }
+
return TRUE;
}
@@ -1700,6 +1863,18 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
G_STRUCT_OFFSET(struct rspamd_config, log_task_max_elts),
RSPAMD_CL_FLAG_UINT,
"Maximum number of elements in task log entry (7 by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_tag_len",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, log_max_tag_len),
+ RSPAMD_CL_FLAG_UINT,
+ "Maximum length of log tag cannot exceed 32 (" G_STRINGIFY(RSPAMD_LOG_ID_LEN) ") by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "tag_strip_policy",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, log_tag_strip_policy_str),
+ 0,
+ "Log tag strip policy when tag exceeds max length: 'right', 'left', 'middle' (right by default)");
/* Documentation only options, handled in log_handler to map flags */
rspamd_rcl_add_doc_by_path(cfg,
@@ -2210,7 +2385,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
rspamd_rcl_add_doc_by_path(cfg,
"options",
- "Swtich mode of gtube patterns: disable, reject, all",
+ "Switch mode of gtube patterns: disable, reject, all",
"gtube_patterns",
UCL_STRING,
nullptr,
@@ -2308,7 +2483,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
rspamd_rcl_parse_struct_time,
G_STRUCT_OFFSET(struct rspamd_config, upstream_resolve_min_interval),
RSPAMD_CL_FLAG_TIME_FLOAT,
- "Minumum interval to perform resolving (60 seconds by default)");
+ "Minimum interval to perform resolving (60 seconds by default)");
}
if (!(skip_sections && g_hash_table_lookup(skip_sections, "actions"))) {
@@ -2437,7 +2612,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
FALSE,
TRUE,
cfg->doc_strings,
- "CLassifier options");
+ "Classifier options");
/* Default classifier is 'bayes' for now */
sub->default_key = "bayes";
@@ -2456,7 +2631,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
rspamd_rcl_add_default_handler(sub,
"min_prob_strength",
rspamd_rcl_parse_struct_double,
- G_STRUCT_OFFSET(struct rspamd_classifier_config, min_token_hits),
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, min_prob_strength),
0,
"Use only tokens with probability in [0.5 - MPS, 0.5 + MPS]");
rspamd_rcl_add_default_handler(sub,
@@ -2485,6 +2660,18 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
"Name of classifier");
/*
+ * Multi-class configuration
+ */
+ rspamd_rcl_add_section_doc(&top, sub,
+ "class_labels", nullptr,
+ rspamd_rcl_class_labels_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ sub->doc_ref,
+ "Class to backend label mapping for multi-class classification");
+
+ /*
* Statfile defaults
*/
auto *ssub = rspamd_rcl_add_section_doc(&top, sub,
@@ -2502,11 +2689,17 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
0,
"Statfile unique label");
rspamd_rcl_add_default_handler(ssub,
+ "class",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_statfile_config, class_name),
+ 0,
+ "Class name for multi-class classification");
+ rspamd_rcl_add_default_handler(ssub,
"spam",
rspamd_rcl_parse_struct_boolean,
G_STRUCT_OFFSET(struct rspamd_statfile_config, is_spam),
0,
- "Sets if this statfile contains spam samples");
+ "DEPRECATED: Sets if this statfile contains spam samples (use 'class' instead)");
}
if (!(skip_sections && g_hash_table_lookup(skip_sections, "composite"))) {
@@ -3640,7 +3833,7 @@ rspamd_config_parse_ucl(struct rspamd_config *cfg,
/* Try to load keyfile if available */
auto keyfile_name = fmt::format("{}.key", filename);
rspamd::util::raii_file::open(keyfile_name, O_RDONLY).map([&](const auto &keyfile) {
- auto *kp_parser = ucl_parser_new(0);
+ auto *kp_parser = ucl_parser_new(UCL_PARSER_DEFAULT);
if (ucl_parser_add_fd(kp_parser, keyfile.get_fd())) {
auto *kp_obj = ucl_parser_get_object(kp_parser);