aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/cfg_file.h40
-rw-r--r--src/libserver/cfg_rcl.cxx215
-rw-r--r--src/libserver/cfg_utils.cxx266
-rw-r--r--src/libserver/css/css.cxx19
-rw-r--r--src/libserver/dynamic_cfg.c4
-rw-r--r--src/libserver/html/html.cxx1017
-rw-r--r--src/libserver/html/html_tag.hxx1348
-rw-r--r--src/libserver/http/http_connection.c92
-rw-r--r--src/libserver/http/http_connection.h12
-rw-r--r--src/libserver/http/http_message.c7
-rw-r--r--src/libserver/http/http_router.c8
-rw-r--r--src/libserver/logger/logger.c141
-rw-r--r--src/libserver/logger/logger_private.h12
-rw-r--r--src/libserver/maps/map.c330
-rw-r--r--src/libserver/maps/map.h6
-rw-r--r--src/libserver/maps/map_private.h21
-rw-r--r--src/libserver/milter.c4
-rw-r--r--src/libserver/protocol.c17
-rw-r--r--src/libserver/re_cache.c1161
-rw-r--r--src/libserver/re_cache.h198
-rw-r--r--src/libserver/roll_history.c8
-rw-r--r--src/libserver/rspamd_control.c86
-rw-r--r--src/libserver/rspamd_control.h19
-rw-r--r--src/libserver/rspamd_symcache.h7
-rw-r--r--src/libserver/symcache/symcache_c.cxx11
-rw-r--r--src/libserver/symcache/symcache_impl.cxx2
-rw-r--r--src/libserver/symcache/symcache_internal.hxx8
-rw-r--r--src/libserver/task.c49
-rw-r--r--src/libserver/task.h9
-rw-r--r--src/libserver/word.h88
-rw-r--r--src/libserver/worker_util.c31
31 files changed, 4565 insertions, 671 deletions
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index f59c6ff89..355046cac 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -48,6 +48,7 @@ struct worker_s;
struct rspamd_external_libs_ctx;
struct rspamd_cryptobox_pubkey;
struct rspamd_dns_resolver;
+struct rspamd_tokenizer_manager;
/**
* Logging type
@@ -138,7 +139,10 @@ struct rspamd_statfile_config {
char *symbol; /**< symbol of statfile */
char *label; /**< label of this statfile */
ucl_object_t *opts; /**< other options */
- gboolean is_spam; /**< spam flag */
+ char *class_name; /**< class name for multi-class classification */
+ unsigned int class_index; /**< class index for O(1) lookup during classification */
+ gboolean is_spam; /**< DEPRECATED: spam flag - use class_name instead */
+ gboolean is_spam_converted; /**< TRUE if class_name was converted from is_spam flag */
struct rspamd_classifier_config *clcf; /**< parent pointer of classifier configuration */
gpointer data; /**< opaque data */
};
@@ -181,6 +185,8 @@ struct rspamd_classifier_config {
double min_prob_strength; /**< use only tokens with probability in [0.5 - MPS, 0.5 + MPS] */
unsigned int min_learns; /**< minimum number of learns for each statfile */
unsigned int flags;
+ GHashTable *class_labels; /**< class_name -> backend_symbol mapping for multi-class */
+ GPtrArray *class_names; /**< ordered list of class names */
};
struct rspamd_worker_bind_conf {
@@ -395,6 +401,8 @@ struct rspamd_config {
unsigned int log_error_elts; /**< number of elements in error logbuf */
unsigned int log_error_elt_maxlen; /**< maximum size of error log element */
unsigned int log_task_max_elts; /**< maximum number of elements in task logging */
+ unsigned int log_max_tag_len; /**< maximum length of log tag */
+ char *log_tag_strip_policy_str; /**< log tag strip policy string */
struct rspamd_worker_log_pipe *log_pipes;
gboolean compat_messages; /**< use old messages in the protocol (array) */
@@ -495,9 +503,10 @@ struct rspamd_config {
char *zstd_output_dictionary; /**< path to zstd output dictionary */
ucl_object_t *neighbours; /**< other servers in the cluster */
- struct rspamd_config_settings_elt *setting_ids; /**< preprocessed settings ids */
- struct rspamd_lang_detector *lang_det; /**< language detector */
- struct rspamd_worker *cur_worker; /**< set dynamically by each worker */
+ struct rspamd_config_settings_elt *setting_ids; /**< preprocessed settings ids */
+ struct rspamd_lang_detector *lang_det; /**< language detector */
+ struct rspamd_tokenizer_manager *tokenizer_manager; /**< custom tokenizer manager */
+ struct rspamd_worker *cur_worker; /**< set dynamically by each worker */
ref_entry_t ref; /**< reference counter */
};
@@ -617,12 +626,25 @@ void rspamd_config_insert_classify_symbols(struct rspamd_config *cfg);
*/
gboolean rspamd_config_check_statfiles(struct rspamd_classifier_config *cf);
-/*
- * Find classifier config by name
+/**
+ * Multi-class configuration helpers
+ */
+gboolean rspamd_config_parse_class_labels(const ucl_object_t *obj,
+ GHashTable **class_labels);
+
+gboolean rspamd_config_migrate_binary_config(struct rspamd_statfile_config *stcf);
+
+gboolean rspamd_config_validate_class_config(struct rspamd_classifier_config *ccf,
+ GError **err);
+
+const char *rspamd_config_get_class_label(struct rspamd_classifier_config *ccf,
+ const char *class_name);
+
+/**
+ * Find classifier by name
*/
struct rspamd_classifier_config *rspamd_config_find_classifier(
- struct rspamd_config *cfg,
- const char *name);
+ struct rspamd_config *cfg, const char *name);
void rspamd_ucl_add_conf_macros(struct ucl_parser *parser,
struct rspamd_config *cfg);
diff --git a/src/libserver/cfg_rcl.cxx b/src/libserver/cfg_rcl.cxx
index f38366908..da5845917 100644
--- a/src/libserver/cfg_rcl.cxx
+++ b/src/libserver/cfg_rcl.cxx
@@ -299,6 +299,14 @@ rspamd_rcl_logging_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
cfg->log_flags |= RSPAMD_LOG_FLAG_USEC;
}
+ /* Set default values for new log tag options */
+ if (cfg->log_max_tag_len == 0) {
+ cfg->log_max_tag_len = RSPAMD_LOG_ID_LEN; /* Default to new max size */
+ }
+ if (cfg->log_tag_strip_policy_str == NULL) {
+ cfg->log_tag_strip_policy_str = rspamd_mempool_strdup(cfg->cfg_pool, "right");
+ }
+
return rspamd_rcl_section_parse_defaults(cfg, *section, cfg->cfg_pool, obj,
(void *) cfg, err);
}
@@ -1189,31 +1197,73 @@ rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
st->opts = (ucl_object_t *) obj;
st->clcf = ccf;
- const auto *val = ucl_object_lookup(obj, "spam");
- if (val == nullptr) {
+ /* Handle migration from old 'spam' field to new 'class' field */
+ const auto *class_val = ucl_object_lookup(obj, "class");
+ const auto *spam_val = ucl_object_lookup(obj, "spam");
+
+ if (class_val != nullptr && spam_val != nullptr) {
+ msg_warn_config("statfile %s has both 'class' and 'spam' fields, using 'class' field",
+ st->symbol);
+ }
+
+ if (class_val == nullptr && spam_val == nullptr) {
+ /* Neither field present, try to guess by symbol name */
msg_info_config(
- "statfile %s has no explicit 'spam' setting, trying to guess by symbol",
+ "statfile %s has no explicit 'class' or 'spam' setting, trying to guess by symbol",
st->symbol);
if (rspamd_substring_search_caseless(st->symbol,
strlen(st->symbol), "spam", 4) != -1) {
st->is_spam = TRUE;
+ st->class_name = rspamd_mempool_strdup(pool, "spam");
+ st->is_spam_converted = TRUE;
}
else if (rspamd_substring_search_caseless(st->symbol,
strlen(st->symbol), "ham", 3) != -1) {
st->is_spam = FALSE;
+ st->class_name = rspamd_mempool_strdup(pool, "ham");
+ st->is_spam_converted = TRUE;
}
else {
g_set_error(err,
CFG_RCL_ERROR,
EINVAL,
- "cannot guess spam setting from %s",
+ "cannot guess class setting from %s, please specify 'class' field",
st->symbol);
return FALSE;
}
- msg_info_config("guessed that statfile with symbol %s is %s",
- st->symbol,
- st->is_spam ? "spam" : "ham");
+ msg_info_config("guessed that statfile with symbol %s has class '%s'",
+ st->symbol, st->class_name);
}
+ else if (class_val == nullptr && spam_val != nullptr) {
+ /* Only spam field present - migrate to class */
+ msg_warn_config("statfile %s uses deprecated 'spam' field, please use 'class' instead",
+ st->symbol);
+ if (st->is_spam) {
+ st->class_name = rspamd_mempool_strdup(pool, "spam");
+ }
+ else {
+ st->class_name = rspamd_mempool_strdup(pool, "ham");
+ }
+ st->is_spam_converted = TRUE;
+ }
+ else if (class_val != nullptr && spam_val == nullptr) {
+ /* Only class field present - set is_spam for backward compatibility */
+ if (st->class_name != nullptr) {
+ if (strcmp(st->class_name, "spam") == 0) {
+ st->is_spam = TRUE;
+ }
+ else if (strcmp(st->class_name, "ham") == 0) {
+ st->is_spam = FALSE;
+ }
+ else {
+ /* For non-binary classes, default to not spam */
+ st->is_spam = FALSE;
+ }
+ msg_debug_config("statfile %s with class '%s' set is_spam=%s for compatibility",
+ st->symbol, st->class_name, st->is_spam ? "true" : "false");
+ }
+ }
+ /* If both fields are present, class takes precedence and was already parsed by the default parser */
return TRUE;
}
@@ -1221,6 +1271,31 @@ rspamd_rcl_statfile_handler(rspamd_mempool_t *pool, const ucl_object_t *obj,
}
static gboolean
+rspamd_rcl_class_labels_handler(rspamd_mempool_t *pool,
+ const ucl_object_t *obj,
+ const char *key,
+ gpointer ud,
+ struct rspamd_rcl_section *section,
+ GError **err)
+{
+ auto *ccf = static_cast<rspamd_classifier_config *>(ud);
+
+ if (obj->type != UCL_OBJECT) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "class_labels must be an object");
+ return FALSE;
+ }
+
+ if (!rspamd_config_parse_class_labels(obj, &ccf->class_labels)) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "invalid class_labels configuration");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
const ucl_object_t *obj,
const char *key,
@@ -1293,6 +1368,22 @@ rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
}
}
}
+ else if (g_ascii_strcasecmp(st_key, "class_labels") == 0) {
+ /* Parse class_labels configuration directly */
+ if (ucl_object_type(val) != UCL_OBJECT) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "class_labels must be an object");
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+
+ if (!rspamd_config_parse_class_labels(val, &ccf->class_labels)) {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "invalid class_labels configuration");
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+ }
}
}
@@ -1367,8 +1458,80 @@ rspamd_rcl_classifier_handler(rspamd_mempool_t *pool,
}
ccf->opts = (ucl_object_t *) obj;
+
+ /* Validate multi-class configuration */
+ GError *validation_err = nullptr;
+ if (!rspamd_config_validate_class_config(ccf, &validation_err)) {
+ if (validation_err) {
+ g_propagate_error(err, validation_err);
+ }
+ else {
+ g_set_error(err, CFG_RCL_ERROR, EINVAL,
+ "multi-class configuration validation failed for classifier '%s'",
+ ccf->name ? ccf->name : "unknown");
+ }
+ return FALSE;
+ }
+
cfg->classifiers = g_list_prepend(cfg->classifiers, ccf);
+ /* Populate class_names array from statfiles - only for explicit multiclass configs */
+ if (ccf->statfiles) {
+ GList *cur = ccf->statfiles;
+ gboolean has_explicit_classes = FALSE;
+
+ /* Check if any statfile uses explicit class declaration (not converted from is_spam) */
+ cur = ccf->statfiles;
+ while (cur) {
+ struct rspamd_statfile_config *stcf = (struct rspamd_statfile_config *) cur->data;
+ msg_debug("checking statfile %s: class_name=%s, is_spam_converted=%s",
+ stcf->symbol, stcf->class_name ? stcf->class_name : "NULL",
+ stcf->is_spam_converted ? "true" : "false");
+ if (stcf->class_name && !stcf->is_spam_converted) {
+ has_explicit_classes = TRUE;
+ break;
+ }
+ cur = g_list_next(cur);
+ }
+
+ msg_debug("has_explicit_classes = %s", has_explicit_classes ? "true" : "false");
+
+ /* Only populate class_names for explicit multiclass configurations */
+ if (has_explicit_classes) {
+ msg_debug("populating class_names for multiclass configuration");
+ }
+ else {
+ msg_debug("skipping class_names population for binary configuration");
+ }
+
+ if (has_explicit_classes) {
+ ccf->class_names = g_ptr_array_new();
+
+ cur = ccf->statfiles;
+ while (cur) {
+ struct rspamd_statfile_config *stcf = (struct rspamd_statfile_config *) cur->data;
+ if (stcf->class_name) {
+ /* Check if class already exists */
+ bool found = false;
+ for (unsigned int i = 0; i < ccf->class_names->len; i++) {
+ if (strcmp((char *) g_ptr_array_index(ccf->class_names, i), stcf->class_name) == 0) {
+ stcf->class_index = i; /* Store the index for O(1) lookup */
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ /* Add new class */
+ stcf->class_index = ccf->class_names->len;
+ g_ptr_array_add(ccf->class_names, g_strdup(stcf->class_name));
+ }
+ }
+ cur = g_list_next(cur);
+ }
+ }
+ }
+
return TRUE;
}
@@ -1700,6 +1863,18 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
G_STRUCT_OFFSET(struct rspamd_config, log_task_max_elts),
RSPAMD_CL_FLAG_UINT,
"Maximum number of elements in task log entry (7 by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "max_tag_len",
+ rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET(struct rspamd_config, log_max_tag_len),
+ RSPAMD_CL_FLAG_UINT,
+ "Maximum length of log tag cannot exceed 32 (" G_STRINGIFY(RSPAMD_LOG_ID_LEN) ") by default)");
+ rspamd_rcl_add_default_handler(sub,
+ "tag_strip_policy",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_config, log_tag_strip_policy_str),
+ 0,
+ "Log tag strip policy when tag exceeds max length: 'right', 'left', 'middle' (right by default)");
/* Documentation only options, handled in log_handler to map flags */
rspamd_rcl_add_doc_by_path(cfg,
@@ -2437,7 +2612,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
FALSE,
TRUE,
cfg->doc_strings,
- "CLassifier options");
+ "Classifier options");
/* Default classifier is 'bayes' for now */
sub->default_key = "bayes";
@@ -2456,7 +2631,7 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
rspamd_rcl_add_default_handler(sub,
"min_prob_strength",
rspamd_rcl_parse_struct_double,
- G_STRUCT_OFFSET(struct rspamd_classifier_config, min_token_hits),
+ G_STRUCT_OFFSET(struct rspamd_classifier_config, min_prob_strength),
0,
"Use only tokens with probability in [0.5 - MPS, 0.5 + MPS]");
rspamd_rcl_add_default_handler(sub,
@@ -2485,6 +2660,18 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
"Name of classifier");
/*
+ * Multi-class configuration
+ */
+ rspamd_rcl_add_section_doc(&top, sub,
+ "class_labels", nullptr,
+ rspamd_rcl_class_labels_handler,
+ UCL_OBJECT,
+ FALSE,
+ TRUE,
+ sub->doc_ref,
+ "Class to backend label mapping for multi-class classification");
+
+ /*
* Statfile defaults
*/
auto *ssub = rspamd_rcl_add_section_doc(&top, sub,
@@ -2502,11 +2689,17 @@ rspamd_rcl_config_init(struct rspamd_config *cfg, GHashTable *skip_sections)
0,
"Statfile unique label");
rspamd_rcl_add_default_handler(ssub,
+ "class",
+ rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET(struct rspamd_statfile_config, class_name),
+ 0,
+ "Class name for multi-class classification");
+ rspamd_rcl_add_default_handler(ssub,
"spam",
rspamd_rcl_parse_struct_boolean,
G_STRUCT_OFFSET(struct rspamd_statfile_config, is_spam),
0,
- "Sets if this statfile contains spam samples");
+ "DEPRECATED: Sets if this statfile contains spam samples (use 'class' instead)");
}
if (!(skip_sections && g_hash_table_lookup(skip_sections, "composite"))) {
@@ -3640,7 +3833,7 @@ rspamd_config_parse_ucl(struct rspamd_config *cfg,
/* Try to load keyfile if available */
auto keyfile_name = fmt::format("{}.key", filename);
rspamd::util::raii_file::open(keyfile_name, O_RDONLY).map([&](const auto &keyfile) {
- auto *kp_parser = ucl_parser_new(0);
+ auto *kp_parser = ucl_parser_new(UCL_PARSER_DEFAULT);
if (ucl_parser_add_fd(kp_parser, keyfile.get_fd())) {
auto *kp_obj = ucl_parser_get_object(kp_parser);
diff --git a/src/libserver/cfg_utils.cxx b/src/libserver/cfg_utils.cxx
index dfbdc6bee..c22a9b877 100644
--- a/src/libserver/cfg_utils.cxx
+++ b/src/libserver/cfg_utils.cxx
@@ -72,6 +72,11 @@
#include "contrib/expected/expected.hpp"
#include "contrib/ankerl/unordered_dense.h"
+#include "libserver/task.h"
+#include "libserver/url.h"
+#define RSPAMD_TOKENIZER_INTERNAL// We need to use internal tokenizer API
+#include "libstat/tokenizers/custom_tokenizer.h"
+
#define DEFAULT_SCORE 10.0
#define DEFAULT_RLIMIT_NOFILE 2048
@@ -821,6 +826,65 @@ rspamd_adjust_clocks_resolution(struct rspamd_config *cfg)
#endif
}
+extern "C" {
+
+gboolean
+rspamd_config_load_custom_tokenizers(struct rspamd_config *cfg, GError **err)
+{
+ /* Load custom tokenizers */
+ const ucl_object_t *custom_tokenizers = ucl_object_lookup_path(cfg->cfg_ucl_obj,
+ "options.custom_tokenizers");
+ if (custom_tokenizers != NULL) {
+ msg_info_config("loading custom tokenizers");
+
+ if (!cfg->tokenizer_manager) {
+ cfg->tokenizer_manager = rspamd_tokenizer_manager_new(cfg->cfg_pool);
+ }
+
+ ucl_object_iter_t it = ucl_object_iterate_new(custom_tokenizers);
+ const ucl_object_t *tok_obj;
+ const char *tok_name;
+
+ while ((tok_obj = ucl_object_iterate_safe(it, true)) != NULL) {
+ tok_name = ucl_object_key(tok_obj);
+ GError *local_err = NULL;
+
+ if (!rspamd_tokenizer_manager_load_tokenizer(cfg->tokenizer_manager,
+ tok_name, tok_obj, &local_err)) {
+ msg_err_config("failed to load custom tokenizer '%s': %s",
+ tok_name, local_err ? local_err->message : "unknown error");
+
+ if (err && !*err) {
+ *err = g_error_copy(local_err);
+ }
+
+ if (local_err) {
+ g_error_free(local_err);
+ }
+
+ ucl_object_iterate_free(it);
+ return FALSE;
+ }
+ }
+ ucl_object_iterate_free(it);
+
+ msg_info_config("loaded custom tokenizers successfully");
+ }
+
+ return TRUE;
+}
+
+void rspamd_config_unload_custom_tokenizers(struct rspamd_config *cfg)
+{
+ if (cfg->tokenizer_manager) {
+ msg_info_config("unloading custom tokenizers");
+ rspamd_tokenizer_manager_destroy(cfg->tokenizer_manager);
+ cfg->tokenizer_manager = NULL;
+ }
+}
+
+}// extern "C"
+
/*
* Perform post load actions
*/
@@ -940,6 +1004,20 @@ rspamd_config_post_load(struct rspamd_config *cfg,
msg_err_config("cannot configure libraries, fatal error");
return FALSE;
}
+
+ /* Load custom tokenizers using the new function */
+ GError *tokenizer_err = NULL;
+ if (!rspamd_config_load_custom_tokenizers(cfg, &tokenizer_err)) {
+ msg_err_config("failed to load custom tokenizers: %s",
+ tokenizer_err ? tokenizer_err->message : "unknown error");
+ if (tokenizer_err) {
+ g_error_free(tokenizer_err);
+ }
+
+ if (opts & RSPAMD_CONFIG_INIT_VALIDATE) {
+ ret = tl::make_unexpected(std::string{"failed to load custom tokenizers"});
+ }
+ }
}
/* Validate cache */
@@ -1363,7 +1441,7 @@ rspamd_ucl_fin_cb(struct map_cb_data *data, void **target)
}
/* New data available */
- auto *parser = ucl_parser_new(0);
+ auto *parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
if (!ucl_parser_add_chunk(parser, (unsigned char *) cbdata->buf.data(),
cbdata->buf.size())) {
msg_err_config("cannot parse map %s: %s",
@@ -2964,3 +3042,189 @@ rspamd_ip_is_local_cfg(struct rspamd_config *cfg,
return FALSE;
}
+
+gboolean
+rspamd_config_parse_class_labels(const ucl_object_t *obj, GHashTable **class_labels)
+{
+ const ucl_object_t *cur;
+ ucl_object_iter_t it = nullptr;
+
+ if (!obj || ucl_object_type(obj) != UCL_OBJECT) {
+ return FALSE;
+ }
+
+ if (*class_labels == nullptr) {
+ *class_labels = g_hash_table_new_full(g_str_hash, g_str_equal,
+ g_free, g_free);
+ }
+
+ while ((cur = ucl_object_iterate(obj, &it, true)) != nullptr) {
+ const char *class_name = ucl_object_key(cur);
+ const char *label = ucl_object_tostring(cur);
+
+ if (class_name && label) {
+ /* Validate class name: alphanumeric + underscore, max 32 chars */
+ if (strlen(class_name) > 32) {
+ msg_err("class name '%s' is too long (max 32 characters)", class_name);
+ g_hash_table_destroy(*class_labels);
+ *class_labels = nullptr;
+ return FALSE;
+ }
+
+ for (const char *p = class_name; *p; p++) {
+ if (!g_ascii_isalnum(*p) && *p != '_') {
+ msg_err("class name '%s' contains invalid character '%c'", class_name, *p);
+ g_hash_table_destroy(*class_labels);
+ *class_labels = nullptr;
+ return FALSE;
+ }
+ }
+
+ /* Validate label uniqueness */
+ if (g_hash_table_lookup(*class_labels, label)) {
+ msg_err("backend label '%s' is used by multiple classes", label);
+ g_hash_table_destroy(*class_labels);
+ *class_labels = nullptr;
+ return FALSE;
+ }
+ }
+
+ g_hash_table_insert(*class_labels, g_strdup(class_name), g_strdup(label));
+ }
+
+ return g_hash_table_size(*class_labels) > 0;
+}
+
+gboolean
+rspamd_config_migrate_binary_config(struct rspamd_statfile_config *stcf)
+{
+ if (stcf->class_name != nullptr) {
+ /* Already migrated or using new format */
+ return TRUE;
+ }
+
+ if (stcf->is_spam) {
+ stcf->class_name = g_strdup("spam");
+ msg_info("migrated statfile '%s' from is_spam=true to class='spam'",
+ stcf->symbol ? stcf->symbol : "unknown");
+ }
+ else {
+ stcf->class_name = g_strdup("ham");
+ msg_info("migrated statfile '%s' from is_spam=false to class='ham'",
+ stcf->symbol ? stcf->symbol : "unknown");
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_config_validate_class_config(struct rspamd_classifier_config *ccf, GError **err)
+{
+ GList *cur;
+ GHashTable *seen_classes = nullptr;
+ struct rspamd_statfile_config *stcf;
+ unsigned int class_count = 0;
+
+ if (!ccf || !ccf->statfiles) {
+ g_set_error(err, g_quark_from_static_string("config"), 1,
+ "classifier has no statfiles defined");
+ return FALSE;
+ }
+
+ seen_classes = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, nullptr);
+
+ /* Iterate through statfiles and collect classes */
+ cur = ccf->statfiles;
+ while (cur) {
+ stcf = (struct rspamd_statfile_config *) cur->data;
+
+ /* Migrate binary config if needed */
+ if (!rspamd_config_migrate_binary_config(stcf)) {
+ g_set_error(err, g_quark_from_static_string("config"), 1,
+ "failed to migrate binary config for statfile '%s'",
+ stcf->symbol ? stcf->symbol : "unknown");
+ g_hash_table_destroy(seen_classes);
+ return FALSE;
+ }
+
+ /* Check class name */
+ if (!stcf->class_name || strlen(stcf->class_name) == 0) {
+ g_set_error(err, g_quark_from_static_string("config"), 1,
+ "statfile '%s' has no class defined",
+ stcf->symbol ? stcf->symbol : "unknown");
+ g_hash_table_destroy(seen_classes);
+ return FALSE;
+ }
+
+ /* Track unique classes */
+ if (!g_hash_table_contains(seen_classes, stcf->class_name)) {
+ g_hash_table_insert(seen_classes, g_strdup(stcf->class_name), GINT_TO_POINTER(1));
+ class_count++;
+ }
+
+ cur = g_list_next(cur);
+ }
+
+ /* Validate class count */
+ if (class_count < 2) {
+ g_set_error(err, g_quark_from_static_string("config"), 1,
+ "classifier must have at least 2 classes, found %ud", class_count);
+ g_hash_table_destroy(seen_classes);
+ return FALSE;
+ }
+
+ if (class_count > 20) {
+ msg_warn("classifier has %ud classes, performance may be degraded above 20 classes",
+ class_count);
+ }
+
+ /* Initialize classifier class tracking - only for explicit multiclass configurations */
+ gboolean has_explicit_classes = FALSE;
+
+ /* Check if any statfile uses explicit class declaration (not converted from is_spam) */
+ cur = ccf->statfiles;
+ while (cur) {
+ stcf = (struct rspamd_statfile_config *) cur->data;
+ if (stcf->class_name && !stcf->is_spam_converted) {
+ has_explicit_classes = TRUE;
+ break;
+ }
+ cur = g_list_next(cur);
+ }
+
+ /* Only populate class_names for explicit multiclass configurations */
+ if (has_explicit_classes) {
+ if (ccf->class_names) {
+ g_ptr_array_unref(ccf->class_names);
+ }
+ ccf->class_names = g_ptr_array_new_with_free_func(g_free);
+
+ /* Populate class names array */
+ GHashTableIter iter;
+ gpointer key, value;
+ g_hash_table_iter_init(&iter, seen_classes);
+ while (g_hash_table_iter_next(&iter, &key, &value)) {
+ g_ptr_array_add(ccf->class_names, g_strdup((const char *) key));
+ }
+ }
+ else {
+ /* Binary configuration - ensure class_names is NULL */
+ if (ccf->class_names) {
+ g_ptr_array_unref(ccf->class_names);
+ ccf->class_names = nullptr;
+ }
+ }
+
+ g_hash_table_destroy(seen_classes);
+ return TRUE;
+}
+
+const char *
+rspamd_config_get_class_label(struct rspamd_classifier_config *ccf, const char *class_name)
+{
+ if (!ccf || !ccf->class_labels || !class_name) {
+ return nullptr;
+ }
+
+ return (const char *) g_hash_table_lookup(ccf->class_labels, class_name);
+}
diff --git a/src/libserver/css/css.cxx b/src/libserver/css/css.cxx
index 1b369ed17..c53e3c05e 100644
--- a/src/libserver/css/css.cxx
+++ b/src/libserver/css/css.cxx
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -107,7 +107,6 @@ auto css_style_sheet::add_selector_rule(std::unique_ptr<css_selector> &&selector
auto css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) -> rspamd::html::html_block *
{
- std::optional<std::string_view> id_comp, class_comp;
rspamd::html::html_block *res = nullptr;
if (!tag) {
@@ -115,14 +114,8 @@ auto css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) -> rspa
}
/* First, find id in a tag and a class */
- for (const auto &param: tag->components) {
- if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_ID) {
- id_comp = param.value;
- }
- else if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
- class_comp = param.value;
- }
- }
+ auto id_comp = tag->find_id();
+ auto class_comp = tag->find_class();
/* ID part */
if (id_comp && !pimpl->id_selectors.empty()) {
@@ -224,4 +217,4 @@ auto css_parse_style(rspamd_mempool_t *pool,
return std::make_pair(nullptr, parse_res.error());
}
-}// namespace rspamd::css \ No newline at end of file
+}// namespace rspamd::css
diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c
index 984517074..6d648d745 100644
--- a/src/libserver/dynamic_cfg.c
+++ b/src/libserver/dynamic_cfg.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -195,7 +195,7 @@ json_config_fin_cb(struct map_cb_data *data, void **target)
return;
}
- parser = ucl_parser_new(0);
+ parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
if (!ucl_parser_add_chunk(parser, jb->buf->str, jb->buf->len)) {
msg_err("cannot load json data: parse error %s",
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 93d1fdf91..78a6a975c 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -39,6 +39,7 @@
#include "contrib/frozen/include/frozen/string.h"
#include "contrib/fmt/include/fmt/core.h"
+#include <functional>
#include <unicode/uversion.h>
namespace rspamd::html {
@@ -47,23 +48,88 @@ static const unsigned int max_tags = 8192; /* Ignore tags if this maximum is rea
static const html_tags_storage html_tags_defs;
-auto html_components_map = frozen::make_unordered_map<frozen::string, html_component_type>(
+auto html_components_map = frozen::make_unordered_map<frozen::string, html_component_enum_type>(
{
- {"name", html_component_type::RSPAMD_HTML_COMPONENT_NAME},
- {"href", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
- {"src", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
- {"action", html_component_type::RSPAMD_HTML_COMPONENT_HREF},
- {"color", html_component_type::RSPAMD_HTML_COMPONENT_COLOR},
- {"bgcolor", html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR},
- {"style", html_component_type::RSPAMD_HTML_COMPONENT_STYLE},
- {"class", html_component_type::RSPAMD_HTML_COMPONENT_CLASS},
- {"width", html_component_type::RSPAMD_HTML_COMPONENT_WIDTH},
- {"height", html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT},
- {"size", html_component_type::RSPAMD_HTML_COMPONENT_SIZE},
- {"rel", html_component_type::RSPAMD_HTML_COMPONENT_REL},
- {"alt", html_component_type::RSPAMD_HTML_COMPONENT_ALT},
- {"id", html_component_type::RSPAMD_HTML_COMPONENT_ID},
- {"hidden", html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN},
+ {"name", html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME},
+ {"href", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF},
+ {"src", html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC},
+ {"action", html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF},
+ {"color", html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR},
+ {"bgcolor", html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR},
+ {"style", html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE},
+ {"class", html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS},
+ {"width", html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH},
+ {"height", html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT},
+ {"size", html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE},
+ {"rel", html_component_enum_type::RSPAMD_HTML_COMPONENT_REL},
+ {"alt", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT},
+ {"id", html_component_enum_type::RSPAMD_HTML_COMPONENT_ID},
+ {"hidden", html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN},
+ // Typography
+ {"font-family", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY},
+ {"font-size", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE},
+ {"font-weight", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT},
+ {"font-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE},
+ {"text-align", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN},
+ {"text-decoration", html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION},
+ {"line-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT},
+ // Layout & positioning
+ {"margin", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN},
+ {"margin-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP},
+ {"margin-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM},
+ {"margin-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT},
+ {"margin-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT},
+ {"padding", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING},
+ {"padding-top", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP},
+ {"padding-bottom", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM},
+ {"padding-left", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT},
+ {"padding-right", html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT},
+ {"border", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER},
+ {"border-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR},
+ {"border-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH},
+ {"border-style", html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE},
+ // Display & visibility
+ {"display", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY},
+ {"visibility", html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY},
+ {"opacity", html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY},
+ // Dimensions
+ {"min-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH},
+ {"max-width", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH},
+ {"min-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT},
+ {"max-height", html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT},
+ // Table attributes
+ {"cellpadding", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING},
+ {"cellspacing", html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING},
+ {"valign", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN},
+ {"align", html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN},
+ // Form attributes
+ {"type", html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE},
+ {"value", html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE},
+ {"placeholder", html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER},
+ {"disabled", html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED},
+ {"readonly", html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY},
+ {"checked", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED},
+ {"selected", html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED},
+ // Link & media
+ {"target", html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET},
+ {"title", html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE},
+ // Meta & document
+ {"charset", html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET},
+ {"content", html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT},
+ {"http-equiv", html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV},
+ // Accessibility
+ {"role", html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE},
+ {"tabindex", html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX},
+ // Background
+ {"background", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND},
+ {"background-image", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE},
+ {"background-color", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR},
+ {"background-repeat", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT},
+ {"background-position", html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION},
+ // Email-specific tracking
+ {"data-track", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK},
+ {"data-id", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID},
+ {"data-url", html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL},
});
#define msg_debug_html(...) rspamd_conditional_debug_fast(NULL, NULL, \
@@ -199,18 +265,608 @@ html_check_balance(struct html_content *hc,
return nullptr;
}
-auto html_component_from_string(const std::string_view &st) -> std::optional<html_component_type>
+auto html_component_from_string(std::string_view name, std::string_view value) -> html_tag_component
{
- auto known_component_it = html_components_map.find(st);
+ auto known_component_it = html_components_map.find(name);
if (known_component_it != html_components_map.end()) {
- return known_component_it->second;
+ switch (known_component_it->second) {
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_NAME:
+ return html_component_name{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HREF:
+ return html_component_href{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_COLOR:
+ return html_component_color{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BGCOLOR:
+ return html_component_bgcolor{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_STYLE:
+ return html_component_style{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CLASS:
+ return html_component_class{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_WIDTH:
+ return html_component_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HEIGHT:
+ return html_component_height{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_SIZE:
+ return html_component_size{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_REL:
+ return html_component_rel{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALT:
+ return html_component_alt{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ID:
+ return html_component_id{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HIDDEN:
+ return html_component_hidden{};
+ // Typography
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_FAMILY:
+ return html_component_font_family{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_SIZE:
+ return html_component_font_size{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_WEIGHT:
+ return html_component_font_weight{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_FONT_STYLE:
+ return html_component_font_style{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_ALIGN:
+ return html_component_text_align{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TEXT_DECORATION:
+ return html_component_text_decoration{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_LINE_HEIGHT:
+ return html_component_line_height{value};
+ // Layout
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN:
+ return html_component_margin{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_TOP:
+ return html_component_margin_top{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM:
+ return html_component_margin_bottom{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_LEFT:
+ return html_component_margin_left{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MARGIN_RIGHT:
+ return html_component_margin_right{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING:
+ return html_component_padding{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_TOP:
+ return html_component_padding_top{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_BOTTOM:
+ return html_component_padding_bottom{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_LEFT:
+ return html_component_padding_left{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PADDING_RIGHT:
+ return html_component_padding_right{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER:
+ return html_component_border{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_COLOR:
+ return html_component_border_color{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_WIDTH:
+ return html_component_border_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BORDER_STYLE:
+ return html_component_border_style{value};
+ // Display
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISPLAY:
+ return html_component_display{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_VISIBILITY:
+ return html_component_visibility{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_OPACITY:
+ return html_component_opacity{value};
+ // Dimensions
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_WIDTH:
+ return html_component_min_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_WIDTH:
+ return html_component_max_width{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MIN_HEIGHT:
+ return html_component_min_height{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_MAX_HEIGHT:
+ return html_component_max_height{value};
+ // Table
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLPADDING:
+ return html_component_cellpadding{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CELLSPACING:
+ return html_component_cellspacing{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALIGN:
+ return html_component_valign{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ALIGN:
+ return html_component_align{value};
+ // Form
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TYPE:
+ return html_component_type{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_VALUE:
+ return html_component_value{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_PLACEHOLDER:
+ return html_component_placeholder{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DISABLED:
+ return html_component_disabled{};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_READONLY:
+ return html_component_readonly{};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHECKED:
+ return html_component_checked{};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_SELECTED:
+ return html_component_selected{};
+ // Link & media
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TARGET:
+ return html_component_target{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TITLE:
+ return html_component_title{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_SRC:
+ return html_component_src{value};
+ // Meta
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CHARSET:
+ return html_component_charset{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_CONTENT:
+ return html_component_content{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_HTTP_EQUIV:
+ return html_component_http_equiv{value};
+ // Accessibility
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_ROLE:
+ return html_component_role{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_TABINDEX:
+ return html_component_tabindex{value};
+ // Background
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND:
+ return html_component_background{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE:
+ return html_component_background_image{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR:
+ return html_component_background_color{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT:
+ return html_component_background_repeat{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION:
+ return html_component_background_position{value};
+ // Email tracking
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_TRACK:
+ return html_component_data_track{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_ID:
+ return html_component_data_id{value};
+ case html_component_enum_type::RSPAMD_HTML_COMPONENT_DATA_URL:
+ return html_component_data_url{value};
+ default:
+ return html_component_unknown{name, value};
+ }
}
else {
- return std::nullopt;
+ return html_component_unknown{name, value};
}
}
+using component_extractor_func = std::function<std::optional<std::string_view>(const html_tag *)>;
+static const auto component_extractors = frozen::make_unordered_map<frozen::string, component_extractor_func>(
+ {
+ // Basic components
+ {"name", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_name>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"href", [](const html_tag *tag) { return tag->find_href(); }},
+ {"src", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_src>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"class", [](const html_tag *tag) { return tag->find_class(); }},
+ {"id", [](const html_tag *tag) { return tag->find_id(); }},
+ {"style", [](const html_tag *tag) { return tag->find_style(); }},
+ {"alt", [](const html_tag *tag) { return tag->find_alt(); }},
+ {"rel", [](const html_tag *tag) { return tag->find_rel(); }},
+ {"color", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_color>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"bgcolor", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_bgcolor>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Numeric components (return string representation)
+ {"width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"size", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_size>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Boolean components
+ {"hidden", [](const html_tag *tag) -> std::optional<std::string_view> {
+ return tag->is_hidden() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }},
+
+ // Typography components
+ {"font-family", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_family>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"font-size", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_size>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"font-weight", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_weight>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"font-style", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_font_style>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"text-align", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_text_align>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"text-decoration", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_text_decoration>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"line-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_line_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Layout components
+ {"margin", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-top", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_top>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-bottom", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_bottom>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-left", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_left>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"margin-right", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_margin_right>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-top", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_top>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-bottom", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_bottom>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-left", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_left>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"padding-right", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_padding_right>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"border", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"border-color", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border_color>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"border-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"border-style", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_border_style>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Display components
+ {"display", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_display>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"visibility", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_visibility>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"opacity", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_opacity>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Additional dimensions
+ {"min-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_min_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"max-width", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_max_width>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"min-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_min_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"max-height", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_max_height>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Table components
+ {"cellpadding", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_cellpadding>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"cellspacing", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_cellspacing>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+ {"valign", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_valign>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"align", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_align>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Form components
+ {"type", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_type>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"value", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_value>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"placeholder", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_placeholder>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"disabled", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_disabled>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+ {"readonly", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_readonly>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+ {"checked", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_checked>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+ {"selected", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_selected>()) {
+ return comp.value()->is_present() ? std::optional<std::string_view>{"true"} : std::nullopt;
+ }
+ return std::nullopt;
+ }},
+
+ // Link & media components
+ {"target", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_target>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"title", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_title>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Meta components
+ {"charset", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_charset>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"content", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_content>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"http-equiv", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_http_equiv>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Accessibility components
+ {"role", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_role>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"tabindex", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_tabindex>()) {
+ return comp.value()->get_string_value();
+ }
+ return std::nullopt;
+ }},
+
+ // Background components
+ {"background", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-image", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_image>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-color", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_color>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-repeat", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_repeat>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"background-position", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_background_position>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+
+ // Email tracking components
+ {"data-track", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_data_track>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"data-id", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_data_id>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ {"data-url", [](const html_tag *tag) -> std::optional<std::string_view> {
+ if (auto comp = tag->find_component<html_component_data_url>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }},
+ });
+
+auto html_tag::find_component_by_name(std::string_view attr_name) const -> std::optional<std::string_view>
+{
+ auto it = component_extractors.find(attr_name);
+ if (it != component_extractors.end()) {
+ return it->second(this);
+ }
+
+ // Fallback to unknown components
+ return find_unknown_component(attr_name);
+}
+
+auto html_tag::get_all_attributes() const -> std::vector<std::pair<std::string_view, std::string_view>>
+{
+ std::vector<std::pair<std::string_view, std::string_view>> result;
+
+ // First, get all known attributes using the component_extractors map
+ for (const auto &[attr_name, extractor_func]: component_extractors) {
+ if (auto value = extractor_func(this)) {
+ // Convert frozen::string to std::string_view for the key
+ std::string_view name_view{attr_name.data(), attr_name.size()};
+ result.emplace_back(name_view, value.value());
+ }
+ }
+
+ // Then add all unknown attributes
+ auto unknown_attrs = get_unknown_components();
+ for (const auto &[name, value]: unknown_attrs) {
+ result.emplace_back(name, value);
+ }
+
+ return result;
+}
+
enum tag_parser_state {
parse_start = 0,
parse_name,
@@ -234,13 +890,13 @@ enum tag_parser_state {
struct tag_content_parser_state {
tag_parser_state cur_state = parse_start;
std::string buf;
- std::optional<html_component_type> cur_component;
+ std::string attr_name;// Store current attribute name
void reset()
{
cur_state = parse_start;
buf.clear();
- cur_component = std::nullopt;
+ attr_name.clear();
}
};
@@ -254,56 +910,50 @@ html_parse_tag_content(rspamd_mempool_t *pool,
auto state = parser_env.cur_state;
/*
- * Stores tag component if it doesn't exist, performing copy of the
- * value + decoding of the entities
- * Parser env is set to clear the current html attribute fields (saved_p and
- * cur_component)
+ * Stores tag component creating the appropriate variant type
+ * Parser env is cleared after storing
*/
auto store_component_value = [&]() -> void {
- if (parser_env.cur_component) {
+ if (!parser_env.attr_name.empty()) {
+ std::string_view attr_name_view, value_view;
- if (parser_env.buf.empty()) {
- tag->components.emplace_back(parser_env.cur_component.value(),
- std::string_view{});
+ // Store attribute name in persistent memory
+ if (!parser_env.attr_name.empty()) {
+ auto *name_storage = rspamd_mempool_alloc_buffer(pool, parser_env.attr_name.size());
+ memcpy(name_storage, parser_env.attr_name.data(), parser_env.attr_name.size());
+ attr_name_view = {name_storage, parser_env.attr_name.size()};
}
- else {
- /* We need to copy buf to a persistent storage */
- auto *s = rspamd_mempool_alloc_buffer(pool, parser_env.buf.size());
- if (parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_ID ||
- parser_env.cur_component.value() == html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
- /* Lowercase */
- rspamd_str_copy_lc(parser_env.buf.data(), s, parser_env.buf.size());
+ // Store value in persistent memory if not empty
+ if (!parser_env.buf.empty()) {
+ auto *value_storage = rspamd_mempool_alloc_buffer(pool, parser_env.buf.size());
+
+ // Lowercase for id and class attributes
+ if (parser_env.attr_name == "id" || parser_env.attr_name == "class") {
+ rspamd_str_copy_lc(parser_env.buf.data(), value_storage, parser_env.buf.size());
}
else {
- memcpy(s, parser_env.buf.data(), parser_env.buf.size());
+ memcpy(value_storage, parser_env.buf.data(), parser_env.buf.size());
}
- auto sz = rspamd_html_decode_entitles_inplace(s, parser_env.buf.size());
- tag->components.emplace_back(parser_env.cur_component.value(),
- std::string_view{s, sz});
+ auto sz = rspamd_html_decode_entitles_inplace(value_storage, parser_env.buf.size());
+ value_view = {value_storage, sz};
}
+
+ // Create the appropriate component variant
+ auto component = html_component_from_string(attr_name_view, value_view);
+ tag->components.emplace_back(std::move(component));
}
parser_env.buf.clear();
- parser_env.cur_component = std::nullopt;
+ parser_env.attr_name.clear();
};
auto store_component_name = [&]() -> bool {
decode_html_entitles_inplace(parser_env.buf);
- auto known_component_it = html_components_map.find(std::string_view{parser_env.buf});
+ parser_env.attr_name = parser_env.buf;
parser_env.buf.clear();
-
- if (known_component_it != html_components_map.end()) {
- parser_env.cur_component = known_component_it->second;
-
- return true;
- }
- else {
- parser_env.cur_component = std::nullopt;
- }
-
- return false;
+ return true;
};
auto store_value_character = [&](bool lc) -> void {
@@ -471,6 +1121,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
case parse_start_dquote:
if (*in == '"') {
+ store_component_value();
state = spaces_after_param;
}
else {
@@ -481,6 +1132,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
case parse_start_squote:
if (*in == '\'') {
+ store_component_value();
state = spaces_after_param;
}
else {
@@ -620,7 +1272,7 @@ html_process_url_tag(rspamd_mempool_t *pool,
struct html_tag *tag,
struct html_content *hc) -> std::optional<struct rspamd_url *>
{
- auto found_href_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_HREF);
+ auto found_href_maybe = tag->find_href();
if (found_href_maybe) {
/* Check base url */
@@ -816,130 +1468,126 @@ html_process_img_tag(rspamd_mempool_t *pool,
img = rspamd_mempool_alloc0_type(pool, struct html_image);
img->tag = tag;
- for (const auto &param: tag->components) {
+ // Process SRC component (preferred for img tags) or HREF component (fallback)
+ std::optional<std::string_view> href_value;
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HREF) {
- /* Check base url */
- const auto &href_value = param.value;
+ // Try SRC first (standard for img tags)
+ if (auto src_comp = tag->find_component<html_component_src>()) {
+ href_value = src_comp.value()->value;
+ }
+ // Fallback to HREF (for backward compatibility or non-standard usage)
+ else if (auto href_comp = tag->find_href()) {
+ href_value = href_comp;
+ }
- if (href_value.size() > 0) {
- rspamd_ftok_t fstr;
- fstr.begin = href_value.data();
- fstr.len = href_value.size();
- img->src = rspamd_mempool_ftokdup(pool, &fstr);
+ if (href_value && href_value->size() > 0) {
+ rspamd_ftok_t fstr;
+ fstr.begin = href_value->data();
+ fstr.len = href_value->size();
+ img->src = rspamd_mempool_ftokdup(pool, &fstr);
- if (href_value.size() > sizeof("cid:") - 1 && memcmp(href_value.data(),
- "cid:", sizeof("cid:") - 1) == 0) {
- /* We have an embedded image */
- img->src += sizeof("cid:") - 1;
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
- }
- else {
- if (href_value.size() > sizeof("data:") - 1 && memcmp(href_value.data(),
- "data:", sizeof("data:") - 1) == 0) {
- /* We have an embedded image in HTML tag */
- img->flags |=
- (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
- html_process_data_image(pool, img, href_value);
- hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
- }
- else {
- img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
- if (img->src) {
-
- std::string_view cpy{href_value};
- auto maybe_url = html_process_url(pool, cpy);
-
- if (maybe_url) {
- img->url = maybe_url.value();
- struct rspamd_url *existing;
-
- img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
- existing = rspamd_url_set_add_or_return(url_set,
- img->url);
-
- if (existing && existing != img->url) {
- /*
- * We have some other URL that could be
- * found, e.g. from another part. However,
- * we still want to set an image flag on it
- */
- existing->flags |= img->url->flags;
- existing->count++;
- }
- else if (part_urls) {
- /* New url */
- g_ptr_array_add(part_urls, img->url);
- }
- }
- }
- }
- }
- }
+ if (href_value->size() > sizeof("cid:") - 1 && memcmp(href_value->data(),
+ "cid:", sizeof("cid:") - 1) == 0) {
+ /* We have an embedded image */
+ img->src += sizeof("cid:") - 1;
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EMBEDDED;
}
+ else {
+ if (href_value->size() > sizeof("data:") - 1 && memcmp(href_value->data(),
+ "data:", sizeof("data:") - 1) == 0) {
+ /* We have an embedded image in HTML tag */
+ img->flags |=
+ (RSPAMD_HTML_FLAG_IMAGE_EMBEDDED | RSPAMD_HTML_FLAG_IMAGE_DATA);
+ html_process_data_image(pool, img, *href_value);
+ hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+ }
+ else {
+ img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
+ if (img->src) {
+ std::string_view cpy{*href_value};
+ auto maybe_url = html_process_url(pool, cpy);
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HEIGHT) {
- unsigned long val;
+ if (maybe_url) {
+ img->url = maybe_url.value();
+ struct rspamd_url *existing;
- rspamd_strtoul(param.value.data(), param.value.size(), &val);
- img->height = val;
- }
+ img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+ existing = rspamd_url_set_add_or_return(url_set,
+ img->url);
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_WIDTH) {
- unsigned long val;
-
- rspamd_strtoul(param.value.data(), param.value.size(), &val);
- img->width = val;
+ if (existing && existing != img->url) {
+ /*
+ * We have some other URL that could be
+ * found, e.g. from another part. However,
+ * we still want to set an image flag on it
+ */
+ existing->flags |= img->url->flags;
+ existing->count++;
+ }
+ else if (part_urls) {
+ /* New url */
+ g_ptr_array_add(part_urls, img->url);
+ }
+ }
+ }
+ }
}
+ }
- /* TODO: rework to css at some time */
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
- if (img->height == 0) {
- auto style_st = param.value;
- auto pos = rspamd_substring_search_caseless(style_st.data(),
- style_st.size(),
- "height", sizeof("height") - 1);
- if (pos != -1) {
- auto substr = style_st.substr(pos + sizeof("height") - 1);
+ // Process numeric dimensions using the new helper methods
+ if (auto height = tag->find_height()) {
+ img->height = height.value();
+ }
- for (auto i = 0; i < substr.size(); i++) {
- auto t = substr[i];
- if (g_ascii_isdigit(t)) {
- unsigned long val;
- rspamd_strtoul(substr.data(),
- substr.size(), &val);
- img->height = val;
- break;
- }
- else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
- /* Fallback */
- break;
- }
+ if (auto width = tag->find_width()) {
+ img->width = width.value();
+ }
+
+ // Process style component for dimensions
+ if (auto style_value = tag->find_style()) {
+ if (img->height == 0) {
+ auto pos = rspamd_substring_search_caseless(style_value->data(),
+ style_value->size(),
+ "height", sizeof("height") - 1);
+ if (pos != -1) {
+ auto substr = style_value->substr(pos + sizeof("height") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->height = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
}
}
}
- if (img->width == 0) {
- auto style_st = param.value;
- auto pos = rspamd_substring_search_caseless(style_st.data(),
- style_st.size(),
- "width", sizeof("width") - 1);
- if (pos != -1) {
- auto substr = style_st.substr(pos + sizeof("width") - 1);
-
- for (auto i = 0; i < substr.size(); i++) {
- auto t = substr[i];
- if (g_ascii_isdigit(t)) {
- unsigned long val;
- rspamd_strtoul(substr.data(),
- substr.size(), &val);
- img->width = val;
- break;
- }
- else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
- /* Fallback */
- break;
- }
+ }
+ if (img->width == 0) {
+ auto pos = rspamd_substring_search_caseless(style_value->data(),
+ style_value->size(),
+ "width", sizeof("width") - 1);
+ if (pos != -1) {
+ auto substr = style_value->substr(pos + sizeof("width") - 1);
+
+ for (auto i = 0; i < substr.size(); i++) {
+ auto t = substr[i];
+ if (g_ascii_isdigit(t)) {
+ unsigned long val;
+ rspamd_strtoul(substr.data(),
+ substr.size(), &val);
+ img->width = val;
+ break;
+ }
+ else if (!g_ascii_isspace(t) && t != '=' && t != ':') {
+ /* Fallback */
+ break;
}
}
}
@@ -968,7 +1616,7 @@ html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
khash_t(rspamd_url_hash) * url_set,
GPtrArray *part_urls) -> void
{
- auto found_rel_maybe = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_REL);
+ auto found_rel_maybe = tag->find_rel();
if (found_rel_maybe) {
if (found_rel_maybe.value() == "icon") {
@@ -984,24 +1632,23 @@ html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
std::optional<css::css_value> maybe_fgcolor, maybe_bgcolor;
bool hidden = false;
- for (const auto &param: tag->components) {
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_COLOR) {
- maybe_fgcolor = css::css_value::maybe_color_from_string(param.value);
- }
-
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_BGCOLOR) {
- maybe_bgcolor = css::css_value::maybe_color_from_string(param.value);
- }
+ // Process color components
+ if (auto color_comp = tag->find_component<html_component_color>()) {
+ maybe_fgcolor = css::css_value::maybe_color_from_string(color_comp.value()->value);
+ }
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_STYLE) {
- tag->block = rspamd::css::parse_css_declaration(pool, param.value);
- }
+ if (auto bgcolor_comp = tag->find_component<html_component_bgcolor>()) {
+ maybe_bgcolor = css::css_value::maybe_color_from_string(bgcolor_comp.value()->value);
+ }
- if (param.type == html_component_type::RSPAMD_HTML_COMPONENT_HIDDEN) {
- hidden = true;
- }
+ // Process style component
+ if (auto style_value = tag->find_style()) {
+ tag->block = rspamd::css::parse_css_declaration(pool, *style_value);
}
+ // Check if hidden
+ hidden = tag->is_hidden();
+
if (!tag->block) {
tag->block = html_block::undefined_html_block_pool(pool);
}
@@ -1284,7 +1931,7 @@ html_append_tag_content(rspamd_mempool_t *pool,
}
else if (tag->id == Tag_IMG) {
/* Process ALT if presented */
- auto maybe_alt = tag->find_component(html_component_type::RSPAMD_HTML_COMPONENT_ALT);
+ auto maybe_alt = tag->find_alt();
if (maybe_alt) {
if (!hc->parsed.empty() && !g_ascii_isspace(hc->parsed.back())) {
@@ -1384,9 +2031,7 @@ auto html_process_input(struct rspamd_task *task,
overflow_input = true;
}
- auto new_tag = [&](int flags = 0) -> struct html_tag *
- {
-
+ auto new_tag = [&](int flags = 0) -> struct html_tag * {
if (hc->all_tags.size() > rspamd::html::max_tags) {
hc->flags |= RSPAMD_HTML_FLAG_TOO_MANY_TAGS;
@@ -2151,7 +2796,7 @@ auto html_process_input(struct rspamd_task *task,
/* Leftover after content */
switch (state) {
case tags_limit_overflow:
- html_append_parsed(hc, {c, (std::size_t)(end - c)},
+ html_append_parsed(hc, {c, (std::size_t) (end - c)},
false, end - start, hc->parsed);
break;
default:
@@ -2390,4 +3035,4 @@ gsize rspamd_html_get_tags_count(void *html_content)
}
return hc->all_tags.size();
-} \ No newline at end of file
+}
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index 309d76177..6d41f1337 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -26,6 +26,7 @@
#include <cstdint>
#include "html_tags.h"
+#include "libutil/str_util.h"
struct rspamd_url;
struct html_image;
@@ -34,7 +35,8 @@ namespace rspamd::html {
struct html_content; /* Forward declaration */
-enum class html_component_type : std::uint8_t {
+// Internal enum for mapping (not exposed in public API)
+enum class html_component_enum_type : std::uint8_t {
RSPAMD_HTML_COMPONENT_NAME = 0,
RSPAMD_HTML_COMPONENT_HREF,
RSPAMD_HTML_COMPONENT_COLOR,
@@ -48,8 +50,1214 @@ enum class html_component_type : std::uint8_t {
RSPAMD_HTML_COMPONENT_ALT,
RSPAMD_HTML_COMPONENT_ID,
RSPAMD_HTML_COMPONENT_HIDDEN,
+ // Typography
+ RSPAMD_HTML_COMPONENT_FONT_FAMILY,
+ RSPAMD_HTML_COMPONENT_FONT_SIZE,
+ RSPAMD_HTML_COMPONENT_FONT_WEIGHT,
+ RSPAMD_HTML_COMPONENT_FONT_STYLE,
+ RSPAMD_HTML_COMPONENT_TEXT_ALIGN,
+ RSPAMD_HTML_COMPONENT_TEXT_DECORATION,
+ RSPAMD_HTML_COMPONENT_LINE_HEIGHT,
+ // Layout & positioning
+ RSPAMD_HTML_COMPONENT_MARGIN,
+ RSPAMD_HTML_COMPONENT_MARGIN_TOP,
+ RSPAMD_HTML_COMPONENT_MARGIN_BOTTOM,
+ RSPAMD_HTML_COMPONENT_MARGIN_LEFT,
+ RSPAMD_HTML_COMPONENT_MARGIN_RIGHT,
+ RSPAMD_HTML_COMPONENT_PADDING,
+ RSPAMD_HTML_COMPONENT_PADDING_TOP,
+ RSPAMD_HTML_COMPONENT_PADDING_BOTTOM,
+ RSPAMD_HTML_COMPONENT_PADDING_LEFT,
+ RSPAMD_HTML_COMPONENT_PADDING_RIGHT,
+ RSPAMD_HTML_COMPONENT_BORDER,
+ RSPAMD_HTML_COMPONENT_BORDER_COLOR,
+ RSPAMD_HTML_COMPONENT_BORDER_WIDTH,
+ RSPAMD_HTML_COMPONENT_BORDER_STYLE,
+ // Display & visibility
+ RSPAMD_HTML_COMPONENT_DISPLAY,
+ RSPAMD_HTML_COMPONENT_VISIBILITY,
+ RSPAMD_HTML_COMPONENT_OPACITY,
+ // Dimensions
+ RSPAMD_HTML_COMPONENT_MIN_WIDTH,
+ RSPAMD_HTML_COMPONENT_MAX_WIDTH,
+ RSPAMD_HTML_COMPONENT_MIN_HEIGHT,
+ RSPAMD_HTML_COMPONENT_MAX_HEIGHT,
+ // Table attributes
+ RSPAMD_HTML_COMPONENT_CELLPADDING,
+ RSPAMD_HTML_COMPONENT_CELLSPACING,
+ RSPAMD_HTML_COMPONENT_VALIGN,
+ RSPAMD_HTML_COMPONENT_ALIGN,
+ // Form attributes
+ RSPAMD_HTML_COMPONENT_TYPE,
+ RSPAMD_HTML_COMPONENT_VALUE,
+ RSPAMD_HTML_COMPONENT_PLACEHOLDER,
+ RSPAMD_HTML_COMPONENT_DISABLED,
+ RSPAMD_HTML_COMPONENT_READONLY,
+ RSPAMD_HTML_COMPONENT_CHECKED,
+ RSPAMD_HTML_COMPONENT_SELECTED,
+ // Link & media
+ RSPAMD_HTML_COMPONENT_TARGET,
+ RSPAMD_HTML_COMPONENT_TITLE,
+ RSPAMD_HTML_COMPONENT_SRC,
+ // Meta & document
+ RSPAMD_HTML_COMPONENT_CHARSET,
+ RSPAMD_HTML_COMPONENT_CONTENT,
+ RSPAMD_HTML_COMPONENT_HTTP_EQUIV,
+ // Accessibility
+ RSPAMD_HTML_COMPONENT_ROLE,
+ RSPAMD_HTML_COMPONENT_TABINDEX,
+ // Background
+ RSPAMD_HTML_COMPONENT_BACKGROUND,
+ RSPAMD_HTML_COMPONENT_BACKGROUND_IMAGE,
+ RSPAMD_HTML_COMPONENT_BACKGROUND_COLOR,
+ RSPAMD_HTML_COMPONENT_BACKGROUND_REPEAT,
+ RSPAMD_HTML_COMPONENT_BACKGROUND_POSITION,
+ // Email-specific tracking
+ RSPAMD_HTML_COMPONENT_DATA_TRACK,
+ RSPAMD_HTML_COMPONENT_DATA_ID,
+ RSPAMD_HTML_COMPONENT_DATA_URL,
};
+// Forward declarations for component types
+struct html_component_name;
+struct html_component_href;
+struct html_component_color;
+struct html_component_bgcolor;
+struct html_component_style;
+struct html_component_class;
+struct html_component_width;
+struct html_component_height;
+struct html_component_size;
+struct html_component_rel;
+struct html_component_alt;
+struct html_component_id;
+struct html_component_hidden;
+struct html_component_unknown;
+
+// Base interface for all components
+struct html_component_base {
+ virtual ~html_component_base() = default;
+ virtual constexpr std::string_view get_string_value() const = 0;
+};
+
+// String-based components
+struct html_component_name : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_name(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_href : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_href(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_style : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_style(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_class : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_class(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_rel : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_rel(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_alt : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_alt(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_id : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_id(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Color components (could be extended to parse actual colors)
+struct html_component_color : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_color(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_bgcolor : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_bgcolor(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Numeric components
+struct html_component_width : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_width(const std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ constexpr std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_height : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_height(const std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ constexpr std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_size : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_size(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ constexpr std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+// Boolean/flag component
+struct html_component_hidden : html_component_base {
+ bool present;
+ explicit constexpr html_component_hidden()
+ : present(true)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return present ? "true" : "false";
+ }
+ constexpr bool is_present() const
+ {
+ return present;
+ }
+};
+
+// Unknown component with both name and value
+struct html_component_unknown : html_component_base {
+ std::string_view name;
+ std::string_view value;
+
+ constexpr html_component_unknown(std::string_view n, std::string_view v)
+ : name(n), value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+ constexpr std::string_view get_name() const
+ {
+ return name;
+ }
+};
+
+// Typography components
+struct html_component_font_family : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_font_family(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_font_size : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_font_size(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ constexpr std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_font_weight : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_font_weight(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_font_style : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_font_style(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_text_align : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_text_align(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_text_decoration : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_text_decoration(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_line_height : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_line_height(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+// Layout components (most are string-based for flexibility)
+struct html_component_margin : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_margin(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_margin_top : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_margin_top(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_margin_bottom : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_margin_bottom(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_margin_left : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_margin_left(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_margin_right : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_margin_right(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_padding : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_padding(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_padding_top : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_padding_top(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_padding_bottom : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_padding_bottom(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_padding_left : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_padding_left(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_padding_right : html_component_base {
+ std::string_view value;
+ explicit constexpr html_component_padding_right(std::string_view v)
+ : value(v)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_border : html_component_base {
+ std::string_view value;
+ explicit html_component_border(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_border_color : html_component_base {
+ std::string_view value;
+ explicit html_component_border_color(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_border_width : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_border_width(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_border_style : html_component_base {
+ std::string_view value;
+ explicit html_component_border_style(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Display components
+struct html_component_display : html_component_base {
+ std::string_view value;
+ explicit html_component_display(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_visibility : html_component_base {
+ std::string_view value;
+ explicit html_component_visibility(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_opacity : html_component_base {
+ std::string_view raw_value;
+ std::optional<float> numeric_value;
+
+ explicit html_component_opacity(std::string_view v)
+ : raw_value(v)
+ {
+ char *endptr;
+ auto val = std::strtof(v.data(), &endptr);
+ if (endptr != v.data() && val >= 0.0f && val <= 1.0f) {
+ numeric_value = val;
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<float> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+// Additional dimension components
+struct html_component_min_width : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_min_width(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_max_width : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_max_width(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_min_height : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_min_height(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_max_height : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_max_height(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+// Table components
+struct html_component_cellpadding : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_cellpadding(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_cellspacing : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::uint32_t> numeric_value;
+
+ explicit html_component_cellspacing(std::string_view v)
+ : raw_value(v)
+ {
+ unsigned long val;
+ if (rspamd_strtoul(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::uint32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::uint32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+struct html_component_valign : html_component_base {
+ std::string_view value;
+ explicit html_component_valign(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_align : html_component_base {
+ std::string_view value;
+ explicit html_component_align(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Form components
+struct html_component_type : html_component_base {
+ std::string_view value;
+ explicit html_component_type(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_value : html_component_base {
+ std::string_view value;
+ explicit html_component_value(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_placeholder : html_component_base {
+ std::string_view value;
+ explicit html_component_placeholder(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Boolean form components
+struct html_component_disabled : html_component_base {
+ bool present;
+ explicit constexpr html_component_disabled()
+ : present(true)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return present ? "true" : "false";
+ }
+ constexpr bool is_present() const
+ {
+ return present;
+ }
+};
+
+struct html_component_readonly : html_component_base {
+ bool present;
+ explicit constexpr html_component_readonly()
+ : present(true)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return present ? "true" : "false";
+ }
+ constexpr bool is_present() const
+ {
+ return present;
+ }
+};
+
+struct html_component_checked : html_component_base {
+ bool present;
+ explicit constexpr html_component_checked()
+ : present(true)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return present ? "true" : "false";
+ }
+ constexpr bool is_present() const
+ {
+ return present;
+ }
+};
+
+struct html_component_selected : html_component_base {
+ bool present;
+ explicit constexpr html_component_selected()
+ : present(true)
+ {
+ }
+ constexpr std::string_view get_string_value() const override
+ {
+ return present ? "true" : "false";
+ }
+ constexpr bool is_present() const
+ {
+ return present;
+ }
+};
+
+// Link & media components
+struct html_component_target : html_component_base {
+ std::string_view value;
+ explicit html_component_target(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_title : html_component_base {
+ std::string_view value;
+ explicit html_component_title(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_src : html_component_base {
+ std::string_view value;
+ explicit html_component_src(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Meta components
+struct html_component_charset : html_component_base {
+ std::string_view value;
+ explicit html_component_charset(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_content : html_component_base {
+ std::string_view value;
+ explicit html_component_content(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_http_equiv : html_component_base {
+ std::string_view value;
+ explicit html_component_http_equiv(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Accessibility components
+struct html_component_role : html_component_base {
+ std::string_view value;
+ explicit html_component_role(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_tabindex : html_component_base {
+ std::string_view raw_value;
+ std::optional<std::int32_t> numeric_value;
+
+ explicit html_component_tabindex(std::string_view v)
+ : raw_value(v)
+ {
+ long val;
+ if (rspamd_strtol(v.data(), v.size(), &val)) {
+ numeric_value = static_cast<std::int32_t>(val);
+ }
+ }
+
+ std::string_view get_string_value() const override
+ {
+ return raw_value;
+ }
+ std::optional<std::int32_t> get_numeric_value() const
+ {
+ return numeric_value;
+ }
+};
+
+// Background components
+struct html_component_background : html_component_base {
+ std::string_view value;
+ explicit html_component_background(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_background_image : html_component_base {
+ std::string_view value;
+ explicit html_component_background_image(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_background_color : html_component_base {
+ std::string_view value;
+ explicit html_component_background_color(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_background_repeat : html_component_base {
+ std::string_view value;
+ explicit html_component_background_repeat(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_background_position : html_component_base {
+ std::string_view value;
+ explicit html_component_background_position(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// Email tracking components
+struct html_component_data_track : html_component_base {
+ std::string_view value;
+ explicit html_component_data_track(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_data_id : html_component_base {
+ std::string_view value;
+ explicit html_component_data_id(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+struct html_component_data_url : html_component_base {
+ std::string_view value;
+ explicit html_component_data_url(std::string_view v)
+ : value(v)
+ {
+ }
+ std::string_view get_string_value() const override
+ {
+ return value;
+ }
+};
+
+// The variant type that holds all possible components
+using html_tag_component = std::variant<
+ html_component_name,
+ html_component_href,
+ html_component_color,
+ html_component_bgcolor,
+ html_component_style,
+ html_component_class,
+ html_component_width,
+ html_component_height,
+ html_component_size,
+ html_component_rel,
+ html_component_alt,
+ html_component_id,
+ html_component_hidden,
+ // Typography
+ html_component_font_family,
+ html_component_font_size,
+ html_component_font_weight,
+ html_component_font_style,
+ html_component_text_align,
+ html_component_text_decoration,
+ html_component_line_height,
+ // Layout
+ html_component_margin,
+ html_component_margin_top,
+ html_component_margin_bottom,
+ html_component_margin_left,
+ html_component_margin_right,
+ html_component_padding,
+ html_component_padding_top,
+ html_component_padding_bottom,
+ html_component_padding_left,
+ html_component_padding_right,
+ html_component_border,
+ html_component_border_color,
+ html_component_border_width,
+ html_component_border_style,
+ // Display
+ html_component_display,
+ html_component_visibility,
+ html_component_opacity,
+ // Dimensions
+ html_component_min_width,
+ html_component_max_width,
+ html_component_min_height,
+ html_component_max_height,
+ // Table
+ html_component_cellpadding,
+ html_component_cellspacing,
+ html_component_valign,
+ html_component_align,
+ // Form
+ html_component_type,
+ html_component_value,
+ html_component_placeholder,
+ html_component_disabled,
+ html_component_readonly,
+ html_component_checked,
+ html_component_selected,
+ // Link & media
+ html_component_target,
+ html_component_title,
+ html_component_src,
+ // Meta
+ html_component_charset,
+ html_component_content,
+ html_component_http_equiv,
+ // Accessibility
+ html_component_role,
+ html_component_tabindex,
+ // Background
+ html_component_background,
+ html_component_background_image,
+ html_component_background_color,
+ html_component_background_repeat,
+ html_component_background_position,
+ // Email tracking
+ html_component_data_track,
+ html_component_data_id,
+ html_component_data_url,
+ // Unknown
+ html_component_unknown>;
+
+/**
+ * Returns component variant from a string
+ * @param name attribute name
+ * @param value attribute value
+ * @return variant component
+ */
+auto html_component_from_string(std::string_view name, std::string_view value) -> html_tag_component;
+
/* Public tags flags */
/* XML tag */
#define FL_XML (1u << CM_USER_SHIFT)
@@ -62,23 +1270,7 @@ enum class html_component_type : std::uint8_t {
#define FL_COMMENT (1 << (CM_USER_SHIFT + 6))
#define FL_VIRTUAL (1 << (CM_USER_SHIFT + 7))
-/**
- * Returns component type from a string
- * @param st
- * @return
- */
-auto html_component_from_string(const std::string_view &st) -> std::optional<html_component_type>;
-
using html_tag_extra_t = std::variant<std::monostate, struct rspamd_url *, struct html_image *>;
-struct html_tag_component {
- html_component_type type;
- std::string_view value;
-
- html_tag_component(html_component_type type, std::string_view value)
- : type(type), value(value)
- {
- }
-};
/* Pairing closing tag representation */
struct html_closing_tag {
@@ -105,26 +1297,128 @@ struct html_tag {
std::vector<struct html_tag *> children;
struct html_tag *parent;
- auto find_component(html_component_type what) const -> std::optional<std::string_view>
+ // Template method to find component by type
+ template<typename T>
+ auto find_component() const -> std::optional<const T *>
{
for (const auto &comp: components) {
- if (comp.type == what) {
- return comp.value;
+ if (std::holds_alternative<T>(comp)) {
+ return &std::get<T>(comp);
}
}
+ return std::nullopt;
+ }
+ // Helper methods for common component access
+ auto find_href() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_href>()) {
+ return comp.value()->value;
+ }
return std::nullopt;
}
- auto find_component(std::optional<html_component_type> what) const -> std::optional<std::string_view>
+ auto find_class() const -> std::optional<std::string_view>
{
- if (what) {
- return find_component(what.value());
+ if (auto comp = find_component<html_component_class>()) {
+ return comp.value()->value;
}
+ return std::nullopt;
+ }
+ auto find_id() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_id>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }
+
+ auto find_width() const -> std::optional<std::uint32_t>
+ {
+ if (auto comp = find_component<html_component_width>()) {
+ return comp.value()->get_numeric_value();
+ }
+ return std::nullopt;
+ }
+
+ auto find_height() const -> std::optional<std::uint32_t>
+ {
+ if (auto comp = find_component<html_component_height>()) {
+ return comp.value()->get_numeric_value();
+ }
return std::nullopt;
}
+ auto find_style() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_style>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }
+
+ auto find_alt() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_alt>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }
+
+ auto find_rel() const -> std::optional<std::string_view>
+ {
+ if (auto comp = find_component<html_component_rel>()) {
+ return comp.value()->value;
+ }
+ return std::nullopt;
+ }
+
+ auto is_hidden() const -> bool
+ {
+ return find_component<html_component_hidden>().has_value();
+ }
+
+ auto find_unknown_component(std::string_view attr_name) const -> std::optional<std::string_view>
+ {
+ for (const auto &comp: components) {
+ if (std::holds_alternative<html_component_unknown>(comp)) {
+ const auto &unknown = std::get<html_component_unknown>(comp);
+ if (unknown.name == attr_name) {
+ return unknown.value;
+ }
+ }
+ }
+ return std::nullopt;
+ }
+
+ auto get_unknown_components() const -> std::vector<std::pair<std::string_view, std::string_view>>
+ {
+ std::vector<std::pair<std::string_view, std::string_view>> unknown_attrs;
+ for (const auto &comp: components) {
+ if (std::holds_alternative<html_component_unknown>(comp)) {
+ const auto &unknown = std::get<html_component_unknown>(comp);
+ unknown_attrs.emplace_back(unknown.name, unknown.value);
+ }
+ }
+ return unknown_attrs;
+ }
+
+ // Generic visitor method for processing all components
+ template<typename Visitor>
+ auto visit_components(Visitor &&visitor) const
+ {
+ for (const auto &comp: components) {
+ std::visit(std::forward<Visitor>(visitor), comp);
+ }
+ }
+
+ // Find any component by attribute name
+ auto find_component_by_name(std::string_view attr_name) const -> std::optional<std::string_view>;
+
+ // Get all attributes as name-value pairs
+ auto get_all_attributes() const -> std::vector<std::pair<std::string_view, std::string_view>>;
+
auto clear(void) -> void
{
id = Tag_UNKNOWN;
@@ -137,7 +1431,7 @@ struct html_tag {
closing.clear();
}
- constexpr auto get_content_length() const -> std::size_t
+ auto get_content_length() const -> std::size_t
{
if (flags & (FL_IGNORE | CM_HEAD)) {
return 0;
diff --git a/src/libserver/http/http_connection.c b/src/libserver/http/http_connection.c
index baf37a385..b5d70fc1c 100644
--- a/src/libserver/http/http_connection.c
+++ b/src/libserver/http/http_connection.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1670,7 +1670,22 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
{
char datebuf[64];
int meth_len = 0;
- const char *conn_type = "close";
+ const char *server_conn_header, *client_conn_header;
+
+ /* Set up connection header strings based on flags and connection type */
+ if (msg->flags & RSPAMD_HTTP_FLAG_HAS_CONNECTION_HEADER) {
+ server_conn_header = "";
+ client_conn_header = "";
+ }
+ else {
+ server_conn_header = "Connection: close\r\n";
+ if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) {
+ client_conn_header = "Connection: keep-alive\r\n";
+ }
+ else {
+ client_conn_header = "Connection: close\r\n";
+ }
+ }
if (conn->type == RSPAMD_HTTP_SERVER) {
/* Format reply */
@@ -1712,12 +1727,14 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
meth_len =
rspamd_snprintf(repbuf, replen,
"HTTP/1.1 %d %T\r\n"
- "Connection: close\r\n"
+ "%s"
"Server: %s\r\n"
"Date: %s\r\n"
"Content-Length: %z\r\n"
"Content-Type: %s", /* NO \r\n at the end ! */
- msg->code, &status, priv->ctx->config.server_hdr,
+ msg->code, &status,
+ server_conn_header,
+ priv->ctx->config.server_hdr,
datebuf,
bodylen, mime_type);
}
@@ -1725,11 +1742,13 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
meth_len =
rspamd_snprintf(repbuf, replen,
"HTTP/1.1 %d %T\r\n"
- "Connection: close\r\n"
+ "%s"
"Server: %s\r\n"
"Date: %s\r\n"
"Content-Length: %z", /* NO \r\n at the end ! */
- msg->code, &status, priv->ctx->config.server_hdr,
+ msg->code, &status,
+ server_conn_header,
+ priv->ctx->config.server_hdr,
datebuf,
bodylen);
}
@@ -1737,11 +1756,12 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
/* External reply */
rspamd_printf_fstring(buf,
"HTTP/1.1 200 OK\r\n"
- "Connection: close\r\n"
+ "%s"
"Server: %s\r\n"
"Date: %s\r\n"
"Content-Length: %z\r\n"
"Content-Type: application/octet-stream\r\n",
+ server_conn_header,
priv->ctx->config.server_hdr,
datebuf, enclen);
}
@@ -1750,12 +1770,14 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
meth_len =
rspamd_printf_fstring(buf,
"HTTP/1.1 %d %T\r\n"
- "Connection: close\r\n"
+ "%s"
"Server: %s\r\n"
"Date: %s\r\n"
"Content-Length: %z\r\n"
"Content-Type: %s\r\n",
- msg->code, &status, priv->ctx->config.server_hdr,
+ msg->code, &status,
+ server_conn_header,
+ priv->ctx->config.server_hdr,
datebuf,
bodylen, mime_type);
}
@@ -1763,11 +1785,13 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
meth_len =
rspamd_printf_fstring(buf,
"HTTP/1.1 %d %T\r\n"
- "Connection: close\r\n"
+ "%s"
"Server: %s\r\n"
"Date: %s\r\n"
"Content-Length: %z\r\n",
- msg->code, &status, priv->ctx->config.server_hdr,
+ msg->code, &status,
+ server_conn_header,
+ priv->ctx->config.server_hdr,
datebuf,
bodylen);
}
@@ -1804,10 +1828,6 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
else {
/* Client request */
- if (conn->opts & RSPAMD_HTTP_CLIENT_KEEP_ALIVE) {
- conn_type = "keep-alive";
- }
-
/* Format request */
enclen += RSPAMD_FSTRING_LEN(msg->url) +
strlen(http_method_str(msg->method)) + 1;
@@ -1819,21 +1839,21 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
"%s %s HTTP/1.0\r\n"
"Content-Length: %z\r\n"
"Content-Type: application/octet-stream\r\n"
- "Connection: %s\r\n",
+ "%s",
"POST",
"/post",
enclen,
- conn_type);
+ client_conn_header);
}
else {
rspamd_printf_fstring(buf,
"%s %V HTTP/1.0\r\n"
"Content-Length: %z\r\n"
- "Connection: %s\r\n",
+ "%s",
http_method_str(msg->method),
msg->url,
bodylen,
- conn_type);
+ client_conn_header);
if (bodylen > 0) {
if (mime_type == NULL) {
@@ -1857,26 +1877,26 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
if (rspamd_http_message_is_standard_port(msg)) {
rspamd_printf_fstring(buf,
"%s %s HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Host: %s\r\n"
"Content-Length: %z\r\n"
"Content-Type: application/octet-stream\r\n",
"POST",
"/post",
- conn_type,
+ client_conn_header,
host,
enclen);
}
else {
rspamd_printf_fstring(buf,
"%s %s HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Host: %s:%d\r\n"
"Content-Length: %z\r\n"
"Content-Type: application/octet-stream\r\n",
"POST",
"/post",
- conn_type,
+ client_conn_header,
host,
msg->port,
enclen);
@@ -1888,21 +1908,21 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
if ((msg->flags & RSPAMD_HTTP_FLAG_HAS_HOST_HEADER)) {
rspamd_printf_fstring(buf,
"%s %s://%s:%d/%V HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Content-Length: %z\r\n",
http_method_str(msg->method),
(conn->opts & RSPAMD_HTTP_CLIENT_SSL) ? "https" : "http",
host,
msg->port,
msg->url,
- conn_type,
+ client_conn_header,
bodylen);
}
else {
if (rspamd_http_message_is_standard_port(msg)) {
rspamd_printf_fstring(buf,
"%s %s://%s:%d/%V HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Host: %s\r\n"
"Content-Length: %z\r\n",
http_method_str(msg->method),
@@ -1910,14 +1930,14 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
host,
msg->port,
msg->url,
- conn_type,
+ client_conn_header,
host,
bodylen);
}
else {
rspamd_printf_fstring(buf,
"%s %s://%s:%d/%V HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Host: %s:%d\r\n"
"Content-Length: %z\r\n",
http_method_str(msg->method),
@@ -1925,7 +1945,7 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
host,
msg->port,
msg->url,
- conn_type,
+ client_conn_header,
host,
msg->port,
bodylen);
@@ -1937,35 +1957,35 @@ int rspamd_http_message_write_header(const char *mime_type, gboolean encrypted,
if ((msg->flags & RSPAMD_HTTP_FLAG_HAS_HOST_HEADER)) {
rspamd_printf_fstring(buf,
"%s %V HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Content-Length: %z\r\n",
http_method_str(msg->method),
msg->url,
- conn_type,
+ client_conn_header,
bodylen);
}
else {
if (rspamd_http_message_is_standard_port(msg)) {
rspamd_printf_fstring(buf,
"%s %V HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Host: %s\r\n"
"Content-Length: %z\r\n",
http_method_str(msg->method),
msg->url,
- conn_type,
+ client_conn_header,
host,
bodylen);
}
else {
rspamd_printf_fstring(buf,
"%s %V HTTP/1.1\r\n"
- "Connection: %s\r\n"
+ "%s"
"Host: %s:%d\r\n"
"Content-Length: %z\r\n",
http_method_str(msg->method),
msg->url,
- conn_type,
+ client_conn_header,
host,
msg->port,
bodylen);
@@ -2633,4 +2653,4 @@ void rspamd_http_connection_disable_encryption(struct rspamd_http_connection *co
priv->peer_key = NULL;
priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_ENCRYPTED;
}
-} \ No newline at end of file
+}
diff --git a/src/libserver/http/http_connection.h b/src/libserver/http/http_connection.h
index f6ec03d95..466a3edd9 100644
--- a/src/libserver/http/http_connection.h
+++ b/src/libserver/http/http_connection.h
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -80,9 +80,13 @@ struct rspamd_storage_shmem {
*/
#define RSPAMD_HTTP_FLAG_HAS_HOST_HEADER (1 << 7)
/**
+ * Connection header has been set for a message
+ */
+#define RSPAMD_HTTP_FLAG_HAS_CONNECTION_HEADER (1 << 8)
+/**
* Message is intended for SSL connection
*/
-#define RSPAMD_HTTP_FLAG_WANT_SSL (1 << 8)
+#define RSPAMD_HTTP_FLAG_WANT_SSL (1 << 9)
/**
* Options for HTTP connection
*/
diff --git a/src/libserver/http/http_message.c b/src/libserver/http/http_message.c
index 0c9708450..e5e4a0469 100644
--- a/src/libserver/http/http_message.c
+++ b/src/libserver/http/http_message.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -539,6 +539,9 @@ void rspamd_http_message_add_header_len(struct rspamd_http_message *msg,
if (g_ascii_strcasecmp(name, "host") == 0) {
msg->flags |= RSPAMD_HTTP_FLAG_HAS_HOST_HEADER;
}
+ else if (g_ascii_strcasecmp(name, "connection") == 0) {
+ msg->flags |= RSPAMD_HTTP_FLAG_HAS_CONNECTION_HEADER;
+ }
hdr->combined = rspamd_fstring_sized_new(nlen + vlen + 4);
rspamd_printf_fstring(&hdr->combined, "%s: %*s\r\n", name, (int) vlen,
@@ -746,4 +749,4 @@ const char *rspamd_http_message_get_url(struct rspamd_http_message *msg, gsize *
}
return NULL;
-} \ No newline at end of file
+}
diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c
index aca791a27..459401e9e 100644
--- a/src/libserver/http/http_router.c
+++ b/src/libserver/http/http_router.c
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2019 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -56,13 +56,13 @@ static void
rspamd_http_entry_free(struct rspamd_http_connection_entry *entry)
{
if (entry != NULL) {
- close(entry->conn->fd);
rspamd_http_connection_unref(entry->conn);
if (entry->rt->finish_handler) {
entry->rt->finish_handler(entry);
}
DL_DELETE(entry->rt->conns, entry);
+ close(entry->conn->fd);
g_free(entry);
}
}
diff --git a/src/libserver/logger/logger.c b/src/libserver/logger/logger.c
index dc0a85a05..600b7f1e1 100644
--- a/src/libserver/logger/logger.c
+++ b/src/libserver/logger/logger.c
@@ -22,7 +22,6 @@
#include "unix-std.h"
#include "logger_private.h"
-
static rspamd_logger_t *default_logger = NULL;
static rspamd_logger_t *emergency_logger = NULL;
static struct rspamd_log_modules *log_modules = NULL;
@@ -30,6 +29,61 @@ static struct rspamd_log_modules *log_modules = NULL;
static const char lf_chr = '\n';
unsigned int rspamd_task_log_id = (unsigned int) -1;
+
+/**
+ * Strip log tag according to the configured policy
+ * @param original_tag original log tag
+ * @param original_len length of original tag
+ * @param dest destination buffer
+ * @param max_len maximum length allowed
+ * @param policy stripping policy
+ * @return actual length of stripped tag
+ */
+static gsize
+rspamd_log_strip_tag(const char *original_tag, gsize original_len,
+ char *dest, gsize max_len,
+ enum rspamd_log_tag_strip_policy policy)
+{
+ if (original_len <= max_len) {
+ /* No stripping needed */
+ memcpy(dest, original_tag, original_len);
+ return original_len;
+ }
+
+ switch (policy) {
+ case RSPAMD_LOG_TAG_STRIP_RIGHT:
+ /* Cut right part (current behavior) */
+ memcpy(dest, original_tag, max_len);
+ return max_len;
+
+ case RSPAMD_LOG_TAG_STRIP_LEFT:
+ /* Cut left part (take last elements) */
+ memcpy(dest, original_tag + (original_len - max_len), max_len);
+ return max_len;
+
+ case RSPAMD_LOG_TAG_STRIP_MIDDLE:
+ /* Half from start and half from end */
+ if (max_len >= 2) {
+ gsize first_half = max_len / 2;
+ gsize second_half = max_len - first_half;
+
+ memcpy(dest, original_tag, first_half);
+ memcpy(dest + first_half,
+ original_tag + (original_len - second_half),
+ second_half);
+ }
+ else if (max_len == 1) {
+ /* Just take first character */
+ dest[0] = original_tag[0];
+ }
+ return max_len;
+
+ default:
+ /* Fallback to right stripping */
+ memcpy(dest, original_tag, max_len);
+ return max_len;
+ }
+}
RSPAMD_CONSTRUCTOR(rspamd_task_log_init)
{
rspamd_task_log_id = rspamd_logger_add_debug_module("task");
@@ -160,6 +214,10 @@ rspamd_log_open_emergency(rspamd_mempool_t *pool, int flags)
logger->process_type = "main";
logger->pid = getpid();
+ /* Initialize log tag configuration with defaults */
+ logger->max_log_tag_len = RSPAMD_LOG_ID_LEN; /* Keep backward compatibility default */
+ logger->log_tag_strip_policy = RSPAMD_LOG_TAG_STRIP_RIGHT;
+
const struct rspamd_logger_funcs *funcs = &console_log_funcs;
memcpy(&logger->ops, funcs, sizeof(*funcs));
@@ -258,6 +316,28 @@ rspamd_log_open_specific(rspamd_mempool_t *pool,
logger->process_type = ptype;
logger->enabled = TRUE;
+ /* Initialize log tag configuration with defaults */
+ if (cfg && cfg->log_max_tag_len > 0) {
+ logger->max_log_tag_len = MIN(MEMPOOL_UID_LEN, cfg->log_max_tag_len);
+ }
+ else {
+ logger->max_log_tag_len = RSPAMD_LOG_ID_LEN; /* Keep backward compatibility default */
+ }
+
+ logger->log_tag_strip_policy = RSPAMD_LOG_TAG_STRIP_RIGHT;
+
+ if (cfg && cfg->log_tag_strip_policy_str) {
+ if (g_ascii_strcasecmp(cfg->log_tag_strip_policy_str, "left") == 0) {
+ logger->log_tag_strip_policy = RSPAMD_LOG_TAG_STRIP_LEFT;
+ }
+ else if (g_ascii_strcasecmp(cfg->log_tag_strip_policy_str, "middle") == 0) {
+ logger->log_tag_strip_policy = RSPAMD_LOG_TAG_STRIP_MIDDLE;
+ }
+ else {
+ logger->log_tag_strip_policy = RSPAMD_LOG_TAG_STRIP_RIGHT; /* Default */
+ }
+ }
+
/* Set up conditional logging */
if (cfg) {
if (cfg->debug_ip_map != NULL) {
@@ -1026,16 +1106,34 @@ log_time(double now, rspamd_logger_t *rspamd_log, char *timebuf,
}
}
+/**
+ * Process log ID with stripping policy and return the effective length
+ * @param logger logger instance with configuration
+ * @param id original log ID
+ * @param processed_id buffer to store processed ID (should be at least max_log_tag_len + 1)
+ * @return effective length of processed ID
+ */
static inline int
-rspamd_log_id_strlen(const char *id)
+rspamd_log_process_id(rspamd_logger_t *logger, const char *id, char *processed_id)
{
- for (int i = 0; i < RSPAMD_LOG_ID_LEN; i++) {
- if (G_UNLIKELY(id[i] == '\0')) {
- return i;
- }
+ if (id == NULL) {
+ return 0;
+ }
+
+ gsize original_len = strlen(id);
+ gsize max_len = MIN(MEMPOOL_UID_LEN, logger->max_log_tag_len);
+
+ if (original_len <= max_len) {
+ /* No processing needed */
+ memcpy(processed_id, id, original_len);
+ return original_len;
}
- return RSPAMD_LOG_ID_LEN;
+ /* Apply stripping policy */
+ gsize processed_len = rspamd_log_strip_tag(id, original_len, processed_id, max_len,
+ logger->log_tag_strip_policy);
+
+ return processed_len;
}
void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
@@ -1071,8 +1169,17 @@ void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
if (G_UNLIKELY(log_json)) {
/* Perform JSON logging */
- unsigned int slen = id ? strlen(id) : strlen("(NULL)");
- slen = MIN(RSPAMD_LOG_ID_LEN, slen);
+ char processed_id[MEMPOOL_UID_LEN];
+ int processed_len = 0;
+
+ if (id) {
+ processed_len = rspamd_log_process_id(logger, id, processed_id);
+ }
+ else {
+ strcpy(processed_id, "(NULL)");
+ processed_len = strlen(processed_id);
+ }
+
r = rspamd_snprintf(tmpbuf, sizeof(tmpbuf), "{\"ts\": %f, "
"\"pid\": %P, "
"\"severity\": \"%s\", "
@@ -1085,7 +1192,7 @@ void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
logger->pid,
rspamd_get_log_severity_string(level_flags),
logger->process_type,
- slen, id,
+ processed_len, processed_id,
module,
function);
iov_ctx->iov[0].iov_base = tmpbuf;
@@ -1241,14 +1348,17 @@ void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
glong mremain, mr;
char *m;
+ char processed_id[MEMPOOL_UID_LEN];
+ int processed_len = 0;
modulebuf[0] = '\0';
mremain = sizeof(modulebuf);
m = modulebuf;
if (id != NULL) {
- mr = rspamd_snprintf(m, mremain, "<%*.s>; ", rspamd_log_id_strlen(id),
- id);
+ processed_len = rspamd_log_process_id(logger, id, processed_id);
+ mr = rspamd_snprintf(m, mremain, "<%*.s>; ", processed_len,
+ processed_id);
m += mr;
mremain -= mr;
}
@@ -1300,10 +1410,13 @@ void rspamd_log_fill_iov(struct rspamd_logger_iov_ctx *iov_ctx,
iov_ctx->iov[niov].iov_base = (void *) timebuf;
iov_ctx->iov[niov++].iov_len = strlen(timebuf);
if (id != NULL) {
+ char processed_id[MEMPOOL_UID_LEN];
+ int processed_len = rspamd_log_process_id(logger, id, processed_id);
+
iov_ctx->iov[niov].iov_base = (void *) "; ";
iov_ctx->iov[niov++].iov_len = 2;
- iov_ctx->iov[niov].iov_base = (void *) id;
- iov_ctx->iov[niov++].iov_len = rspamd_log_id_strlen(id);
+ iov_ctx->iov[niov].iov_base = (void *) processed_id;
+ iov_ctx->iov[niov++].iov_len = processed_len;
iov_ctx->iov[niov].iov_base = (void *) ";";
iov_ctx->iov[niov++].iov_len = 1;
}
diff --git a/src/libserver/logger/logger_private.h b/src/libserver/logger/logger_private.h
index 80178ad32..387d8639b 100644
--- a/src/libserver/logger/logger_private.h
+++ b/src/libserver/logger/logger_private.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -23,6 +23,12 @@
#define REPEATS_MAX 300
#define LOGBUF_LEN 8192
+enum rspamd_log_tag_strip_policy {
+ RSPAMD_LOG_TAG_STRIP_RIGHT = 0, /* Cut right part (current behavior) */
+ RSPAMD_LOG_TAG_STRIP_LEFT, /* Cut left part (take last elements) */
+ RSPAMD_LOG_TAG_STRIP_MIDDLE, /* Half from start and half from end */
+};
+
struct rspamd_log_module {
char *mname;
unsigned int id;
@@ -73,6 +79,10 @@ struct rspamd_logger_s {
gboolean is_debug;
gboolean no_lock;
+ /* Log tag configuration */
+ unsigned int max_log_tag_len;
+ enum rspamd_log_tag_strip_policy log_tag_strip_policy;
+
pid_t pid;
const char *process_type;
struct rspamd_radix_map_helper *debug_ip;
diff --git a/src/libserver/maps/map.c b/src/libserver/maps/map.c
index 51390f24b..6de694eb3 100644
--- a/src/libserver/maps/map.c
+++ b/src/libserver/maps/map.c
@@ -26,6 +26,8 @@
#include "contrib/libev/ev.h"
#include "contrib/uthash/utlist.h"
+#include <worker_util.h>
+
#ifdef SYS_ZSTD
#include "zstd.h"
#else
@@ -84,7 +86,8 @@ RSPAMD_CONSTRUCTOR(rspamd_map_log_init)
}
/**
- * Write HTTP request
+ * Write HTTP request with proper cache validation headers
+ * Uses ETags (If-None-Match) and Last-Modified (If-Modified-Since) for conditional requests
*/
static void
write_http_request(struct http_callback_data *cbd)
@@ -109,7 +112,8 @@ write_http_request(struct http_callback_data *cbd)
}
if (cbd->data->etag) {
rspamd_http_message_add_header_len(msg, "If-None-Match",
- cbd->data->etag->str, cbd->data->etag->len);
+ cbd->data->etag->str,
+ cbd->data->etag->len);
}
}
@@ -295,23 +299,101 @@ rspamd_map_cache_cb(struct ev_loop *loop, ev_timer *w, int revents)
}
}
+/**
+ * Calculate next check time with proper priority for different cache validation mechanisms
+ * Priority: ETags > Last-Modified > Cache expiration headers
+ * @param now current time
+ * @param expires time from cache expiration header
+ * @param map_check_interval base polling interval
+ * @param has_etag whether we have ETag for conditional requests
+ * @param has_last_modified whether we have Last-Modified for conditional requests
+ * @return next check time
+ */
static inline time_t
-rspamd_http_map_process_next_check(time_t now, time_t expires, time_t map_check_interval)
+rspamd_http_map_process_next_check(struct rspamd_map *map,
+ struct rspamd_map_backend *bk,
+ time_t now,
+ time_t expires,
+ time_t map_check_interval,
+ gboolean has_etag,
+ gboolean has_last_modified)
{
- static const time_t interval_mult = 16;
- /* By default use expires header */
- time_t next_check = expires;
+ static const time_t interval_mult = 4; /* Reduced from 16 to be more responsive */
+ static const time_t min_respectful_interval = 5;
+ time_t next_check;
+ time_t effective_interval = map_check_interval;
+
+ /*
+ * Priority order for cache validation:
+ * 1. ETags (most reliable)
+ * 2. Last-Modified dates
+ * 3. Cache expiration headers (least reliable)
+ */
+
+ if (has_etag || has_last_modified) {
+ /*
+ * If we have ETags or Last-Modified, we can use conditional requests
+ * to avoid unnecessary downloads. However, we still need to be respectful
+ * to servers and not DoS them with overly aggressive polling.
+ */
+ if (map_check_interval < min_respectful_interval) {
+ /*
+ * User configured very aggressive polling, but server provides cache validation.
+ * Enforce minimum respectful interval to avoid DoS'ing the server.
+ */
+ effective_interval = min_respectful_interval * interval_mult;
+ msg_info_map("map polling interval %d too aggressive with server cache support for %s, "
+ "using %d seconds minimum",
+ (int) map_check_interval, bk->uri, (int) effective_interval);
+ }
- if (expires < now) {
- return now;
+ if (expires > now && (expires - now) <= effective_interval * interval_mult) {
+ /* Use expires header if it's reasonable (within interval_mult x poll interval) */
+ next_check = expires;
+ }
+ else {
+ /* Use effective interval, don't extend too much */
+ next_check = now + effective_interval;
+ }
+ }
+ else if (expires > now) {
+ /*
+ * No ETags or Last-Modified available, rely on cache expiration.
+ * But still cap the interval to avoid too long delays.
+ * No need for respectful interval protection here since no conditional requests.
+ */
+ if (expires - now > map_check_interval * interval_mult) {
+ next_check = now + map_check_interval * interval_mult;
+ }
+ else {
+ next_check = expires;
+ }
}
- else if (expires - now > map_check_interval * interval_mult) {
- next_check = now + map_check_interval * interval_mult;
+ else {
+ /* No valid cache information, check immediately */
+ next_check = now;
}
return next_check;
}
+/**
+ * Calculate respectful polling interval to avoid DoS'ing servers with cache validation
+ * @param map_check_interval user configured interval
+ * @return effective interval that respects server resources
+ */
+static inline time_t
+rspamd_map_get_respectful_interval(time_t map_check_interval)
+{
+ static const time_t min_respectful_interval = 5; /* Minimum 5 seconds to be respectful */
+ static const time_t interval_mult = 4; /* Multiplier for respectful minimum */
+
+ if (map_check_interval < min_respectful_interval) {
+ return min_respectful_interval * interval_mult;
+ }
+ return map_check_interval;
+}
+
static int
http_map_finish(struct rspamd_http_connection *conn,
struct rspamd_http_message *msg)
@@ -333,13 +415,15 @@ http_map_finish(struct rspamd_http_connection *conn,
if (msg->code == 200) {
if (cbd->check) {
- msg_info_map("need to reread map from %s", cbd->bk->uri);
+ msg_info_map("need to reread map from %s (reply code 200); "
+ "date timestamp: %z, last modified: %z",
+ cbd->bk->uri, (size_t) msg->date, (size_t) msg->last_modified);
cbd->periodic->need_modify = TRUE;
/* Reset the whole chain */
cbd->periodic->cur_backend = 0;
/* Reset cache, old cached data will be cleaned on timeout */
g_atomic_int_set(&data->cache->available, 0);
- g_atomic_int_set(&bk->shared->loaded, 0);
+ g_atomic_int_set(&map->shared->loaded, 0);
data->cur_cache_cbd = NULL;
rspamd_map_process_periodic(cbd->periodic);
@@ -348,6 +432,7 @@ http_map_finish(struct rspamd_http_connection *conn,
return 0;
}
+ /* This code is executed when we are actually reading a map */
cbd->data->last_checked = msg->date;
if (msg->last_modified) {
@@ -378,10 +463,11 @@ http_map_finish(struct rspamd_http_connection *conn,
goto err;
}
- /* Check for expires */
+ /* Check for expires + etag */
double cached_timeout = map->poll_timeout * 2;
expires_hdr = rspamd_http_message_find_header(msg, "Expires");
+ etag_hdr = rspamd_http_message_find_header(msg, "ETag");
if (expires_hdr) {
time_t hdate;
@@ -389,8 +475,10 @@ http_map_finish(struct rspamd_http_connection *conn,
hdate = rspamd_http_parse_date(expires_hdr->begin, expires_hdr->len);
if (hdate != (time_t) -1 && hdate > msg->date) {
- map->next_check = rspamd_http_map_process_next_check(msg->date, hdate,
- (time_t) map->poll_timeout);
+ map->next_check = rspamd_http_map_process_next_check(map, bk, msg->date, hdate,
+ (time_t) map->poll_timeout,
+ etag_hdr != NULL,
+ msg->last_modified != 0);
cached_timeout = map->next_check - msg->date;
}
else {
@@ -398,9 +486,16 @@ http_map_finish(struct rspamd_http_connection *conn,
map->next_check = 0;
}
}
-
- /* Check for etag */
- etag_hdr = rspamd_http_message_find_header(msg, "ETag");
+ else if (etag_hdr != NULL || msg->last_modified != 0) {
+ /* No expires header, but we have ETag or Last-Modified - use respectful interval */
+ time_t effective_interval = rspamd_map_get_respectful_interval(map->poll_timeout);
+ if (effective_interval != map->poll_timeout) {
+ msg_info_map("map polling interval %d too aggressive with server cache support, "
+ "using %d seconds minimum",
+ (int) map->poll_timeout, (int) effective_interval);
+ }
+ map->next_check = msg->date + effective_interval;
+ }
if (etag_hdr) {
if (cbd->data->etag) {
@@ -421,12 +516,7 @@ http_map_finish(struct rspamd_http_connection *conn,
MAP_RETAIN(cbd->shmem_data, "shmem_data");
cbd->data->gen++;
- /*
- * We know that a map is in the locked state
- */
- g_atomic_int_set(&data->cache->available, 1);
- g_atomic_int_set(&bk->shared->loaded, 1);
- g_atomic_int_set(&bk->shared->cached, 0);
+
/* Store cached data */
rspamd_strlcpy(data->cache->shmem_name, cbd->shmem_data->shm_name,
sizeof(data->cache->shmem_name));
@@ -528,6 +618,12 @@ http_map_finish(struct rspamd_http_connection *conn,
cbd->periodic->cur_backend++;
munmap(in, dlen);
+
+ /* Announce for other processes */
+ g_atomic_int_set(&data->cache->available, 1);
+ g_atomic_int_set(&map->shared->loaded, 1);
+ g_atomic_int_set(&map->shared->cached, 1);
+
rspamd_map_process_periodic(cbd->periodic);
}
else if (msg->code == 304 && cbd->check) {
@@ -541,19 +637,33 @@ http_map_finish(struct rspamd_http_connection *conn,
}
expires_hdr = rspamd_http_message_find_header(msg, "Expires");
+ bool has_expires = (expires_hdr != NULL);
if (expires_hdr) {
time_t hdate;
hdate = rspamd_http_parse_date(expires_hdr->begin, expires_hdr->len);
if (hdate != (time_t) -1 && hdate > msg->date) {
- map->next_check = rspamd_http_map_process_next_check(msg->date, hdate,
- (time_t) map->poll_timeout);
+ map->next_check = rspamd_http_map_process_next_check(map, bk, msg->date, hdate,
+ (time_t) map->poll_timeout,
+ cbd->data->etag != NULL,
+ msg->last_modified != 0);
}
else {
msg_info_map("invalid expires header: %T, ignore it", expires_hdr);
map->next_check = 0;
+ has_expires = false;
+ }
+ }
+ else if (cbd->data->etag != NULL || msg->last_modified != 0) {
+ /* No expires header, but we have ETag or Last-Modified - use respectful interval */
+ time_t effective_interval = rspamd_map_get_respectful_interval(map->poll_timeout);
+ if (effective_interval != map->poll_timeout) {
+ msg_info_map("map polling interval %d too aggressive with server cache support, "
+ "using %d seconds minimum",
+ (int) map->poll_timeout, (int) effective_interval);
}
+ map->next_check = msg->date + effective_interval;
}
etag_hdr = rspamd_http_message_find_header(msg, "ETag");
@@ -567,19 +677,24 @@ http_map_finish(struct rspamd_http_connection *conn,
}
}
- if (map->next_check) {
+ if (has_expires) {
rspamd_http_date_format(next_check_date, sizeof(next_check_date),
map->next_check);
- msg_info_map("data is not modified for server %s, next check at %s "
+ msg_info_map("data is not modified for server %s (%s), next check at %s "
"(http cache based: %T)",
- cbd->data->host, next_check_date, expires_hdr);
+ cbd->data->host,
+ bk->uri,
+ next_check_date,
+ expires_hdr);
}
else {
rspamd_http_date_format(next_check_date, sizeof(next_check_date),
- rspamd_get_calendar_ticks() + map->poll_timeout);
- msg_info_map("data is not modified for server %s, next check at %s "
+ map->next_check);
+ msg_info_map("data is not modified for server %s (%s), next check at %s "
"(timer based)",
- cbd->data->host, next_check_date);
+ cbd->data->host,
+ bk->uri,
+ next_check_date);
}
rspamd_map_update_http_cached_file(map, bk, cbd->data);
@@ -922,7 +1037,7 @@ read_map_file(struct rspamd_map *map, struct file_map_data *data,
map->read_callback(NULL, 0, &periodic->cbdata, TRUE);
}
- g_atomic_int_set(&bk->shared->loaded, 1);
+ g_atomic_int_set(&map->shared->loaded, 1);
return TRUE;
}
@@ -1008,7 +1123,7 @@ read_map_static(struct rspamd_map *map, struct static_map_data *data,
}
data->processed = TRUE;
- g_atomic_int_set(&bk->shared->loaded, 1);
+ g_atomic_int_set(&map->shared->loaded, 1);
return TRUE;
}
@@ -1016,10 +1131,7 @@ read_map_static(struct rspamd_map *map, struct static_map_data *data,
static void
rspamd_map_periodic_dtor(struct map_periodic_cbdata *periodic)
{
- struct rspamd_map *map;
- struct rspamd_map_backend *bk;
-
- map = periodic->map;
+ struct rspamd_map *map = periodic->map;
msg_debug_map("periodic dtor %p; need_modify=%d", periodic, periodic->need_modify);
if (periodic->need_modify || periodic->cbdata.errored) {
@@ -1034,21 +1146,13 @@ rspamd_map_periodic_dtor(struct map_periodic_cbdata *periodic)
/* Not modified */
}
- if (periodic->locked) {
- if (periodic->cur_backend < map->backends->len) {
- bk = (struct rspamd_map_backend *) g_ptr_array_index(map->backends, periodic->cur_backend);
- g_atomic_int_set(&bk->shared->locked, 0);
- msg_debug_map("unlocked map %s", map->name);
- }
-
- if (periodic->map->wrk->state == rspamd_worker_state_running) {
- rspamd_map_schedule_periodic(periodic->map,
- RSPAMD_SYMBOL_RESULT_NORMAL);
- }
- else {
- msg_debug_map("stop scheduling periodics for %s; terminating state",
- periodic->map->name);
- }
+ if (periodic->map->wrk->state == rspamd_worker_state_running) {
+ rspamd_map_schedule_periodic(periodic->map,
+ RSPAMD_MAP_SCHEDULE_NORMAL);
+ }
+ else {
+ msg_debug_map("stop scheduling periodics for %s; terminating state",
+ periodic->map->name);
}
g_free(periodic);
@@ -1448,9 +1552,6 @@ rspamd_map_read_cached(struct rspamd_map *map, struct rspamd_map_backend *bk,
map->read_callback(in, len, &periodic->cbdata, TRUE);
}
- g_atomic_int_set(&bk->shared->loaded, 1);
- g_atomic_int_set(&bk->shared->cached, 1);
-
munmap(in, mmap_len);
return TRUE;
@@ -1488,7 +1589,7 @@ rspamd_map_save_http_cached_file(struct rspamd_map *map,
const unsigned char *data,
gsize len)
{
- char path[PATH_MAX];
+ char path[PATH_MAX], temp_path[PATH_MAX];
unsigned char digest[rspamd_cryptobox_HASHBYTES];
struct rspamd_config *cfg = map->cfg;
int fd;
@@ -1501,8 +1602,10 @@ rspamd_map_save_http_cached_file(struct rspamd_map *map,
rspamd_cryptobox_hash(digest, bk->uri, strlen(bk->uri), NULL, 0);
rspamd_snprintf(path, sizeof(path), "%s%c%*xs.map", cfg->maps_cache_dir,
G_DIR_SEPARATOR, 20, digest);
+ rspamd_snprintf(temp_path, sizeof(temp_path), "%s.tmp.%d.%d", path,
+ (int) getpid(), (int) rspamd_get_calendar_ticks());
- fd = rspamd_file_xopen(path, O_WRONLY | O_TRUNC | O_CREAT,
+ fd = rspamd_file_xopen(temp_path, O_WRONLY | O_TRUNC | O_CREAT,
00600, FALSE);
if (fd == -1) {
@@ -1510,8 +1613,9 @@ rspamd_map_save_http_cached_file(struct rspamd_map *map,
}
if (!rspamd_file_lock(fd, FALSE)) {
- msg_err_map("cannot lock file %s: %s", path, strerror(errno));
+ msg_err_map("cannot lock file %s: %s", temp_path, strerror(errno));
close(fd);
+ unlink(temp_path);
return FALSE;
}
@@ -1530,9 +1634,10 @@ rspamd_map_save_http_cached_file(struct rspamd_map *map,
}
if (write(fd, &header, sizeof(header)) != sizeof(header)) {
- msg_err_map("cannot write file %s (header stage): %s", path, strerror(errno));
+ msg_err_map("cannot write file %s (header stage): %s", temp_path, strerror(errno));
rspamd_file_unlock(fd, FALSE);
close(fd);
+ unlink(temp_path);
return FALSE;
}
@@ -1540,9 +1645,10 @@ rspamd_map_save_http_cached_file(struct rspamd_map *map,
if (header.etag_len > 0) {
if (write(fd, RSPAMD_FSTRING_DATA(htdata->etag), header.etag_len) !=
header.etag_len) {
- msg_err_map("cannot write file %s (etag stage): %s", path, strerror(errno));
+ msg_err_map("cannot write file %s (etag stage): %s", temp_path, strerror(errno));
rspamd_file_unlock(fd, FALSE);
close(fd);
+ unlink(temp_path);
return FALSE;
}
@@ -1550,9 +1656,10 @@ rspamd_map_save_http_cached_file(struct rspamd_map *map,
/* Now write the rest */
if (write(fd, data, len) != len) {
- msg_err_map("cannot write file %s (data stage): %s", path, strerror(errno));
+ msg_err_map("cannot write file %s (data stage): %s", temp_path, strerror(errno));
rspamd_file_unlock(fd, FALSE);
close(fd);
+ unlink(temp_path);
return FALSE;
}
@@ -1560,6 +1667,13 @@ rspamd_map_save_http_cached_file(struct rspamd_map *map,
rspamd_file_unlock(fd, FALSE);
close(fd);
+ /* Atomically move temp file to final location */
+ if (rename(temp_path, path) != 0) {
+ msg_err_map("cannot rename %s to %s: %s", temp_path, path, strerror(errno));
+ unlink(temp_path);
+ return FALSE;
+ }
+
msg_info_map("saved data from %s in %s, %uz bytes", bk->uri, path, len + sizeof(header) + header.etag_len);
return TRUE;
@@ -1693,7 +1807,11 @@ rspamd_map_read_http_cached_file(struct rspamd_map *map,
double now = rspamd_get_calendar_ticks();
if (header.next_check > now) {
- map->next_check = rspamd_http_map_process_next_check(now, header.next_check, map->poll_timeout);
+ /* We assume that we have this data inside the cached file */
+ map->next_check = rspamd_http_map_process_next_check(map, bk, now, header.next_check,
+ map->poll_timeout,
+ header.etag_len > 0,
+ true);
}
else {
map->next_check = now;
@@ -1740,8 +1858,9 @@ rspamd_map_read_http_cached_file(struct rspamd_map *map,
struct tm tm;
char ncheck_buf[32], lm_buf[32];
- g_atomic_int_set(&bk->shared->loaded, 1);
- g_atomic_int_set(&bk->shared->cached, 1);
+ g_atomic_int_set(&map->shared->loaded, 1);
+ g_atomic_int_set(&map->shared->cached, 1);
+
rspamd_localtime(map->next_check, &tm);
strftime(ncheck_buf, sizeof(ncheck_buf) - 1, "%Y-%m-%d %H:%M:%S", &tm);
rspamd_localtime(htdata->last_modified, &tm);
@@ -1784,7 +1903,6 @@ rspamd_map_common_http_callback(struct rspamd_map *map,
(int) data->last_modified,
(int) data->cache->last_modified);
periodic->need_modify = TRUE;
- /* Reset the whole chain */
periodic->cur_backend = 0;
rspamd_map_process_periodic(periodic);
}
@@ -2054,33 +2172,10 @@ rspamd_map_process_periodic(struct map_periodic_cbdata *cbd)
bk = g_ptr_array_index(map->backends, cbd->cur_backend);
- if (!map->file_only && !cbd->locked) {
- if (!g_atomic_int_compare_and_exchange(&bk->shared->locked,
- 0, 1)) {
- msg_debug_map(
- "don't try to reread map %s as it is locked by other process, "
- "will reread it later",
- cbd->map->name);
- rspamd_map_schedule_periodic(map, RSPAMD_MAP_SCHEDULE_LOCKED);
- MAP_RELEASE(cbd, "periodic");
-
- return;
- }
- else {
- msg_debug_map("locked map %s", map->name);
- cbd->locked = TRUE;
- }
- }
-
if (cbd->errored) {
/* We should not check other backends if some backend has failed*/
rspamd_map_schedule_periodic(cbd->map, RSPAMD_MAP_SCHEDULE_ERROR);
- if (cbd->locked) {
- g_atomic_int_set(&bk->shared->locked, 0);
- cbd->locked = FALSE;
- }
-
/* Also set error flag for the map consumer */
cbd->cbdata.errored = true;
@@ -2796,9 +2891,6 @@ rspamd_map_parse_backend(struct rspamd_config *cfg, const char *map_line)
bk->data.sd = sdata;
}
- bk->shared = rspamd_mempool_alloc0_shared(cfg->cfg_pool,
- sizeof(struct rspamd_map_shared_backend_data));
-
return bk;
err:
@@ -2929,6 +3021,8 @@ rspamd_map_add(struct rspamd_config *cfg,
map->user_data = user_data;
map->cfg = cfg;
map->id = rspamd_random_uint64_fast();
+ map->shared =
+ rspamd_mempool_alloc0_shared(cfg->cfg_pool, sizeof(struct rspamd_map_shared_data));
map->backends = g_ptr_array_sized_new(1);
map->wrk = worker;
rspamd_mempool_add_destructor(cfg->cfg_pool, rspamd_ptr_array_free_hard,
@@ -3027,6 +3121,8 @@ rspamd_map_add_from_ucl(struct rspamd_config *cfg,
map->user_data = user_data;
map->cfg = cfg;
map->id = rspamd_random_uint64_fast();
+ map->shared =
+ rspamd_mempool_alloc0_shared(cfg->cfg_pool, sizeof(struct rspamd_map_shared_data));
map->backends = g_ptr_array_new();
map->wrk = worker;
map->no_file_read = (flags & RSPAMD_MAP_FILE_NO_READ);
@@ -3208,7 +3304,7 @@ rspamd_map_add_from_ucl(struct rspamd_config *cfg,
if (all_loaded) {
/* Static map */
- g_atomic_int_set(&bk->shared->loaded, 1);
+ g_atomic_int_set(&map->shared->loaded, 1);
}
rspamd_map_calculate_hash(map);
@@ -3257,3 +3353,51 @@ void rspamd_map_set_on_load_function(struct rspamd_map *map, rspamd_map_on_load_
map->on_load_ud_dtor = dtor;
}
}
+
+void rspamd_map_trigger_hyperscan_compilation(struct rspamd_map *map)
+{
+ /* Only trigger compilation in controller worker */
+ if (!map->cfg || !map->cfg->cur_worker) {
+ return;
+ }
+
+ struct rspamd_worker *worker = map->wrk;
+ if (!rspamd_worker_is_primary_controller(worker)) {
+ return;
+ }
+
+ /* Check if we have any scopes that need compilation */
+ if (!map->cfg->re_cache) {
+ return;
+ }
+
+ unsigned int scope_count = rspamd_re_cache_count_scopes(map->cfg->re_cache);
+ if (scope_count == 0) {
+ return;
+ }
+
+ /* Iterate through scopes and compile those that are loaded */
+ struct rspamd_re_cache *scope;
+
+ for (scope = rspamd_re_cache_scope_first(map->cfg->re_cache);
+ scope != NULL;
+ scope = rspamd_re_cache_scope_next(scope)) {
+ const char *scope_name = rspamd_re_cache_scope_name(scope);
+ const char *scope_for_check = (strcmp(scope_name, "default") == 0) ? NULL : scope_name;
+
+ /* Only compile loaded scopes */
+ if (rspamd_re_cache_is_loaded(map->cfg->re_cache, scope_for_check)) {
+ msg_info_map("triggering hyperscan compilation for scope: %s after map update",
+ scope_name);
+
+ /* Use default settings for compilation */
+ rspamd_re_cache_compile_hyperscan_scoped_single(scope, scope_for_check,
+ map->cfg->hs_cache_dir ? map->cfg->hs_cache_dir : RSPAMD_DBDIR "/",
+ 1.0, /* max_time */
+ FALSE, /* silent */
+ worker->ctx ? ((struct rspamd_abstract_worker_ctx *) worker->ctx)->event_loop : NULL,
+ NULL, /* callback */
+ NULL); /* cbdata */
+ }
+ }
+}
diff --git a/src/libserver/maps/map.h b/src/libserver/maps/map.h
index b2ba53118..27915e4c9 100644
--- a/src/libserver/maps/map.h
+++ b/src/libserver/maps/map.h
@@ -161,6 +161,12 @@ void rspamd_map_traverse(struct rspamd_map *map, rspamd_map_traverse_cb cb,
void rspamd_map_set_on_load_function(struct rspamd_map *map, rspamd_map_on_load_function cb,
gpointer cbdata, GDestroyNotify dtor);
+/**
+ * Trigger hyperscan compilation for regexp scopes that may have been updated
+ * @param map map that was updated
+ */
+void rspamd_map_trigger_hyperscan_compilation(struct rspamd_map *map);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/libserver/maps/map_private.h b/src/libserver/maps/map_private.h
index 66949f926..65df8d7f5 100644
--- a/src/libserver/maps/map_private.h
+++ b/src/libserver/maps/map_private.h
@@ -134,20 +134,12 @@ union rspamd_map_backend_data {
struct rspamd_map;
-/*
- * Shared between workers
- */
-struct rspamd_map_shared_backend_data {
- int locked;
- int loaded;
- int cached;
-};
+
struct rspamd_map_backend {
enum fetch_proto protocol;
gboolean is_signed;
gboolean is_compressed;
gboolean is_fallback;
- struct rspamd_map_shared_backend_data *shared;
struct rspamd_map *map;
struct ev_loop *event_loop;
uint64_t id;
@@ -159,6 +151,14 @@ struct rspamd_map_backend {
struct map_periodic_cbdata;
+/*
+ * Shared between workers
+ */
+struct rspamd_map_shared_data {
+ int loaded;
+ int cached;
+};
+
struct rspamd_map {
struct rspamd_dns_resolver *r;
struct rspamd_config *cfg;
@@ -193,6 +193,8 @@ struct rspamd_map {
bool static_only; /* No need to check */
bool no_file_read; /* Do not read files */
bool seen; /* This map has already been watched or pre-loaded */
+ /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */
+ struct rspamd_map_shared_data *shared;
char tag[MEMPOOL_UID_LEN];
};
@@ -209,7 +211,6 @@ struct map_periodic_cbdata {
ev_timer ev;
gboolean need_modify;
gboolean errored;
- gboolean locked;
unsigned int cur_backend;
ref_entry_t ref;
};
diff --git a/src/libserver/milter.c b/src/libserver/milter.c
index 94b0d6cc1..09ddddaba 100644
--- a/src/libserver/milter.c
+++ b/src/libserver/milter.c
@@ -1473,8 +1473,6 @@ rspamd_milter_macro_http(struct rspamd_milter_session *session,
{
rspamd_http_message_add_header_len(msg, QUEUE_ID_HEADER,
found->begin, found->len);
- rspamd_http_message_add_header_len(msg, LOG_TAG_HEADER,
- found->begin, found->len);
}
else
{
@@ -1482,8 +1480,6 @@ rspamd_milter_macro_http(struct rspamd_milter_session *session,
{
rspamd_http_message_add_header_len(msg, QUEUE_ID_HEADER,
found->begin, found->len);
- rspamd_http_message_add_header_len(msg, LOG_TAG_HEADER,
- found->begin, found->len);
}
}
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index b683547a1..b085c69d7 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -1668,8 +1668,21 @@ void rspamd_protocol_http_reply(struct rspamd_http_message *msg,
}
}
- if ((task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED) &&
- rspamd_libs_reset_compression(task->cfg->libs_ctx)) {
+ /* Check if we should compress the response */
+ gboolean should_compress = FALSE;
+
+ /* Rule 1: If request had compression, preserve it (existing behavior) */
+ if (task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_COMPRESSED) {
+ should_compress = TRUE;
+ }
+
+ /* Rule 2: If client supports zstd compression, honor it (takes precedence) */
+ const rspamd_ftok_t *accept_encoding = rspamd_task_get_request_header(task, "Accept-Encoding");
+ if (accept_encoding && rspamd_substring_search_caseless(accept_encoding->begin, accept_encoding->len, "zstd", 4) != -1) {
+ should_compress = TRUE;
+ }
+
+ if (should_compress && rspamd_libs_reset_compression(task->cfg->libs_ctx)) {
/* We can compress output */
ZSTD_inBuffer zin;
ZSTD_outBuffer zout;
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 06e9f3328..06ba26528 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -91,6 +91,7 @@ struct rspamd_re_class {
gsize type_len;
GHashTable *re;
rspamd_cryptobox_hash_state_t *st;
+ struct rspamd_re_cache *cache; /* Back-reference to owning cache */
char hash[rspamd_cryptobox_HASHBYTES + 1];
@@ -126,6 +127,12 @@ struct rspamd_re_cache {
unsigned int max_re_data;
char hash[rspamd_cryptobox_HASHBYTES + 1];
lua_State *L;
+
+ /* Intrusive linked list for scoped caches */
+ struct rspamd_re_cache *next, *prev;
+ char *scope;
+ unsigned int flags; /* Cache flags (loaded state, etc.) */
+
#ifdef WITH_HYPERSCAN
enum rspamd_hyperscan_status hyperscan_loaded;
gboolean disable_hyperscan;
@@ -149,6 +156,9 @@ struct rspamd_re_runtime {
struct rspamd_re_cache *cache;
struct rspamd_re_cache_stat stat;
gboolean has_hs;
+
+ /* Linked list for multiple scoped runtimes */
+ struct rspamd_re_runtime *next, *prev;
};
static GQuark
@@ -174,6 +184,63 @@ rspamd_re_cache_class_id(enum rspamd_re_type type,
return rspamd_cryptobox_fast_hash_final(&st);
}
+static struct rspamd_re_cache *
+rspamd_re_cache_find_by_scope(struct rspamd_re_cache *cache_head, const char *scope)
+{
+ struct rspamd_re_cache *cur;
+
+ if (!cache_head) {
+ return NULL;
+ }
+
+ DL_FOREACH(cache_head, cur)
+ {
+ if (scope == NULL && cur->scope == NULL) {
+ /* Looking for default scope */
+ return cur;
+ }
+ else if (scope != NULL && cur->scope != NULL && strcmp(cur->scope, scope) == 0) {
+ return cur;
+ }
+ }
+
+ return NULL;
+}
+
+static struct rspamd_re_cache *
+rspamd_re_cache_add_to_scope_list(struct rspamd_re_cache **cache_head, const char *scope)
+{
+ struct rspamd_re_cache *new_cache, *existing;
+
+ if (!cache_head) {
+ return NULL;
+ }
+
+ /* Check if scope already exists */
+ existing = rspamd_re_cache_find_by_scope(*cache_head, scope);
+ if (existing) {
+ return existing;
+ }
+
+ /* Create new cache for this scope */
+ new_cache = rspamd_re_cache_new();
+ if (new_cache->scope) {
+ g_free(new_cache->scope);
+ }
+ new_cache->scope = g_strdup(scope);
+ new_cache->flags = 0; /* New scopes start as unloaded */
+
+ /* Add to linked list */
+ if (*cache_head) {
+ DL_APPEND(*cache_head, new_cache);
+ }
+ else {
+ *cache_head = new_cache;
+ }
+
+ return new_cache;
+}
+
static void
rspamd_re_cache_destroy(struct rspamd_re_cache *cache)
{
@@ -230,6 +297,11 @@ rspamd_re_cache_destroy(struct rspamd_re_cache *cache)
g_hash_table_unref(cache->re_classes);
g_ptr_array_free(cache->re, TRUE);
+
+ if (cache->scope) {
+ g_free(cache->scope);
+ }
+
g_free(cache);
}
@@ -252,6 +324,10 @@ rspamd_re_cache_new(void)
cache->nre = 0;
cache->re = g_ptr_array_new_full(256, rspamd_re_cache_elt_dtor);
cache->selectors = kh_init(lua_selectors_hash);
+ cache->next = NULL;
+ cache->prev = cache;
+ cache->scope = NULL; /* Default scope */
+ cache->flags = RSPAMD_RE_CACHE_FLAG_LOADED; /* Default scope is always loaded */
#ifdef WITH_HYPERSCAN
cache->hyperscan_loaded = RSPAMD_HYPERSCAN_UNKNOWN;
#endif
@@ -295,6 +371,7 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache,
re_class->id = class_id;
re_class->type_len = datalen;
re_class->type = type;
+ re_class->cache = cache; /* Set back-reference */
re_class->re = g_hash_table_new_full(rspamd_regexp_hash,
rspamd_regexp_equal, NULL, (GDestroyNotify) rspamd_regexp_unref);
@@ -330,6 +407,26 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache,
return nre;
}
+rspamd_regexp_t *
+rspamd_re_cache_add_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+ rspamd_regexp_t *re, enum rspamd_re_type type,
+ gconstpointer type_data, gsize datalen,
+ int lua_cbref)
+{
+ struct rspamd_re_cache *cache;
+
+ g_assert(cache_head != NULL);
+ g_assert(re != NULL);
+
+ /* NULL scope is allowed for default scope */
+ cache = rspamd_re_cache_add_to_scope_list(cache_head, scope);
+ if (!cache) {
+ return NULL;
+ }
+
+ return rspamd_re_cache_add(cache, re, type, type_data, datalen, lua_cbref);
+}
+
void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
rspamd_regexp_t *what,
rspamd_regexp_t *with)
@@ -371,6 +468,23 @@ void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
}
}
+void rspamd_re_cache_replace_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+ rspamd_regexp_t *what,
+ rspamd_regexp_t *with)
+{
+ struct rspamd_re_cache *cache;
+
+ g_assert(cache_head != NULL);
+ g_assert(what != NULL);
+ g_assert(with != NULL);
+
+ /* NULL scope is allowed for default scope */
+ cache = rspamd_re_cache_find_by_scope(*cache_head, scope);
+ if (cache) {
+ rspamd_re_cache_replace(cache, what, with);
+ }
+}
+
static int
rspamd_re_cache_sort_func(gconstpointer a, gconstpointer b)
{
@@ -515,8 +629,24 @@ void rspamd_re_cache_init(struct rspamd_re_cache *cache, struct rspamd_config *c
#endif
}
-struct rspamd_re_runtime *
-rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache)
+void rspamd_re_cache_init_scoped(struct rspamd_re_cache *cache_head,
+ struct rspamd_config *cfg)
+{
+ struct rspamd_re_cache *cur;
+
+ g_assert(cache_head != NULL);
+
+ DL_FOREACH(cache_head, cur)
+ {
+ /* Only initialize loaded scopes */
+ if (cur->flags & RSPAMD_RE_CACHE_FLAG_LOADED) {
+ rspamd_re_cache_init(cur, cfg);
+ }
+ }
+}
+
+static struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new_single(struct rspamd_re_cache *cache)
{
struct rspamd_re_runtime *rt;
g_assert(cache != NULL);
@@ -530,10 +660,73 @@ rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache)
#ifdef WITH_HYPERSCAN
rt->has_hs = cache->hyperscan_loaded;
#endif
+ /* Initialize the doubly-linked list pointers */
+ rt->next = NULL;
+ rt->prev = NULL;
return rt;
}
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache)
+{
+ struct rspamd_re_runtime *rt_head = NULL, *rt;
+ struct rspamd_re_cache *cur;
+
+ g_assert(cache != NULL);
+
+ /*
+ * Create runtime for all loaded scopes in the chain.
+ * This ensures task has runtimes for all available loaded scopes.
+ */
+ DL_FOREACH(cache, cur)
+ {
+ /* Skip unloaded scopes */
+ if (!(cur->flags & RSPAMD_RE_CACHE_FLAG_LOADED)) {
+ continue;
+ }
+
+ rt = rspamd_re_cache_runtime_new_single(cur);
+ if (rt) {
+ if (rt_head) {
+ DL_APPEND(rt_head, rt);
+ }
+ else {
+ rt_head = rt;
+ /* For doubly-linked list, first element's prev should point to itself */
+ rt_head->prev = rt_head;
+ rt_head->next = NULL;
+ }
+ }
+ }
+
+ return rt_head;
+}
+
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new_all_scopes(struct rspamd_re_cache *cache_head)
+{
+ /* This is now the same as the main function since it always creates for all scopes */
+ return rspamd_re_cache_runtime_new(cache_head);
+}
+
+struct rspamd_re_runtime *
+rspamd_re_cache_runtime_new_scoped(struct rspamd_re_cache *cache_head, const char *scope)
+{
+ struct rspamd_re_cache *cache;
+
+ if (!cache_head) {
+ return NULL;
+ }
+
+ cache = rspamd_re_cache_find_by_scope(cache_head, scope);
+ if (!cache) {
+ return NULL;
+ }
+
+ return rspamd_re_cache_runtime_new_single(cache);
+}
+
const struct rspamd_re_cache_stat *
rspamd_re_cache_get_stat(struct rspamd_re_runtime *rt)
{
@@ -998,20 +1191,21 @@ rspamd_re_cache_process_selector(struct rspamd_task *task,
return result;
}
+
static inline unsigned int
-rspamd_process_words_vector(GArray *words,
- const unsigned char **scvec,
- unsigned int *lenvec,
- struct rspamd_re_class *re_class,
- unsigned int cnt,
- gboolean *raw)
+rspamd_process_words_vector_kvec(rspamd_words_t *words,
+ const unsigned char **scvec,
+ unsigned int *lenvec,
+ struct rspamd_re_class *re_class,
+ unsigned int cnt,
+ gboolean *raw)
{
unsigned int j;
- rspamd_stat_token_t *tok;
+ rspamd_word_t *tok;
- if (words) {
- for (j = 0; j < words->len; j++) {
- tok = &g_array_index(words, rspamd_stat_token_t, j);
+ if (words && words->a) {
+ for (j = 0; j < kv_size(*words); j++) {
+ tok = &kv_A(*words, j);
if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_UTF)) {
@@ -1432,13 +1626,13 @@ rspamd_re_cache_exec_re(struct rspamd_task *task,
PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part)
{
- if (text_part->utf_words) {
- cnt += text_part->utf_words->len;
+ if (text_part->utf_words.a) {
+ cnt += kv_size(text_part->utf_words);
}
}
- if (task->meta_words && task->meta_words->len > 0) {
- cnt += task->meta_words->len;
+ if (task->meta_words.a && kv_size(task->meta_words) > 0) {
+ cnt += kv_size(task->meta_words);
}
if (cnt > 0) {
@@ -1449,15 +1643,15 @@ rspamd_re_cache_exec_re(struct rspamd_task *task,
PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, text_part)
{
- if (text_part->utf_words) {
- cnt = rspamd_process_words_vector(text_part->utf_words,
- scvec, lenvec, re_class, cnt, &raw);
+ if (text_part->utf_words.a) {
+ cnt = rspamd_process_words_vector_kvec(&text_part->utf_words,
+ scvec, lenvec, re_class, cnt, &raw);
}
}
- if (task->meta_words) {
- cnt = rspamd_process_words_vector(task->meta_words,
- scvec, lenvec, re_class, cnt, &raw);
+ if (task->meta_words.a) {
+ cnt = rspamd_process_words_vector_kvec(&task->meta_words,
+ scvec, lenvec, re_class, cnt, &raw);
}
ret = rspamd_re_cache_process_regexp_data(rt, re,
@@ -1502,20 +1696,20 @@ rspamd_re_cache_exec_re(struct rspamd_task *task,
return rt->results[re_id];
}
-int rspamd_re_cache_process(struct rspamd_task *task,
- rspamd_regexp_t *re,
- enum rspamd_re_type type,
- gconstpointer type_data,
- gsize datalen,
- gboolean is_strong)
+static int
+rspamd_re_cache_process_single(struct rspamd_task *task,
+ struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gconstpointer type_data,
+ gsize datalen,
+ gboolean is_strong)
{
uint64_t re_id;
struct rspamd_re_class *re_class;
struct rspamd_re_cache *cache;
- struct rspamd_re_runtime *rt;
g_assert(task != NULL);
- rt = task->re_rt;
g_assert(rt != NULL);
g_assert(re != NULL);
@@ -1550,6 +1744,53 @@ int rspamd_re_cache_process(struct rspamd_task *task,
return 0;
}
+int rspamd_re_cache_process(struct rspamd_task *task,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gconstpointer type_data,
+ gsize datalen,
+ gboolean is_strong)
+{
+ struct rspamd_re_runtime *rt_list, *rt;
+ struct rspamd_re_class *re_class;
+ struct rspamd_re_cache *target_cache;
+ int result = 0;
+
+ g_assert(task != NULL);
+ g_assert(re != NULL);
+
+ rt_list = task->re_rt;
+ if (!rt_list) {
+ return 0;
+ }
+
+ /*
+ * Since each regexp belongs to a class which belongs to a cache,
+ * we can find the correct cache and corresponding runtime
+ */
+ re_class = rspamd_regexp_get_class(re);
+ if (!re_class) {
+ return 0;
+ }
+
+ target_cache = re_class->cache;
+ if (!target_cache) {
+ return 0;
+ }
+
+ /* Find the runtime that matches the cache */
+ DL_FOREACH(rt_list, rt)
+ {
+ if (rt->cache == target_cache) {
+ result = rspamd_re_cache_process_single(task, rt, re, type,
+ type_data, datalen, is_strong);
+ break;
+ }
+ }
+
+ return result;
+}
+
int rspamd_re_cache_process_ffi(void *ptask,
void *pre,
int type,
@@ -1570,24 +1811,30 @@ int rspamd_re_cache_process_ffi(void *ptask,
void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt)
{
+ struct rspamd_re_runtime *cur, *tmp;
+
g_assert(rt != NULL);
- if (rt->sel_cache) {
- struct rspamd_re_selector_result sr;
+ /* Handle linked list of runtimes */
+ DL_FOREACH_SAFE(rt, cur, tmp)
+ {
+ if (cur->sel_cache) {
+ struct rspamd_re_selector_result sr;
- kh_foreach_value(rt->sel_cache, sr, {
- for (unsigned int i = 0; i < sr.cnt; i++) {
- g_free((gpointer) sr.scvec[i]);
- }
+ kh_foreach_value(cur->sel_cache, sr, {
+ for (unsigned int i = 0; i < sr.cnt; i++) {
+ g_free((gpointer) sr.scvec[i]);
+ }
- g_free(sr.scvec);
- g_free(sr.lenvec);
- });
- kh_destroy(selectors_results_hash, rt->sel_cache);
- }
+ g_free(sr.scvec);
+ g_free(sr.lenvec);
+ });
+ kh_destroy(selectors_results_hash, cur->sel_cache);
+ }
- REF_RELEASE(rt->cache);
- g_free(rt);
+ REF_RELEASE(cur->cache);
+ g_free(cur);
+ }
}
void rspamd_re_cache_unref(struct rspamd_re_cache *cache)
@@ -1597,6 +1844,21 @@ void rspamd_re_cache_unref(struct rspamd_re_cache *cache)
}
}
+void rspamd_re_cache_unref_scoped(struct rspamd_re_cache *cache_head)
+{
+ struct rspamd_re_cache *cur, *tmp;
+
+ if (!cache_head) {
+ return;
+ }
+
+ DL_FOREACH_SAFE(cache_head, cur, tmp)
+ {
+ DL_DELETE(cache_head, cur);
+ rspamd_re_cache_unref(cur);
+ }
+}
+
struct rspamd_re_cache *
rspamd_re_cache_ref(struct rspamd_re_cache *cache)
{
@@ -1619,6 +1881,23 @@ unsigned int rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, unsigned i
return old;
}
+unsigned int rspamd_re_cache_set_limit_scoped(struct rspamd_re_cache *cache_head, const char *scope, unsigned int limit)
+{
+ struct rspamd_re_cache *cache;
+ unsigned int old = 0;
+
+ if (!cache_head || !scope) {
+ return old;
+ }
+
+ cache = rspamd_re_cache_find_by_scope(cache_head, scope);
+ if (cache) {
+ old = rspamd_re_cache_set_limit(cache, limit);
+ }
+
+ return old;
+}
+
const char *
rspamd_re_cache_type_to_string(enum rspamd_re_type type)
{
@@ -1936,21 +2215,27 @@ rspamd_re_cache_compile_timer_cb(EV_P_ ev_timer *w, int revents)
if (re_class->type_len > 0) {
if (!cbdata->silent) {
msg_info_re_cache(
- "skip already valid class %s(%*s) to cache %6s, %d regexps",
+ "skip already valid class %s(%*s) to cache %6s, %d regexps%s%s%s",
rspamd_re_cache_type_to_string(re_class->type),
(int) re_class->type_len - 1,
re_class->type_data,
re_class->hash,
- n);
+ n,
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
}
}
else {
if (!cbdata->silent) {
msg_info_re_cache(
- "skip already valid class %s to cache %6s, %d regexps",
+ "skip already valid class %s to cache %6s, %d regexps%s%s%s",
rspamd_re_cache_type_to_string(re_class->type),
re_class->hash,
- n);
+ n,
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
}
}
@@ -2159,21 +2444,27 @@ rspamd_re_cache_compile_timer_cb(EV_P_ ev_timer *w, int revents)
if (re_class->type_len > 0) {
msg_info_re_cache(
- "compiled class %s(%*s) to cache %6s, %d/%d regexps",
+ "compiled class %s(%*s) to cache %6s, %d/%d regexps%s%s%s",
rspamd_re_cache_type_to_string(re_class->type),
(int) re_class->type_len - 1,
re_class->type_data,
re_class->hash,
n,
- (int) g_hash_table_size(re_class->re));
+ (int) g_hash_table_size(re_class->re),
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
}
else {
msg_info_re_cache(
- "compiled class %s to cache %6s, %d/%d regexps",
+ "compiled class %s to cache %6s, %d/%d regexps%s%s%s",
rspamd_re_cache_type_to_string(re_class->type),
re_class->hash,
n,
- (int) g_hash_table_size(re_class->re));
+ (int) g_hash_table_size(re_class->re),
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
}
cbdata->total += n;
@@ -2256,6 +2547,108 @@ int rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache,
#endif
}
+#ifdef WITH_HYPERSCAN
+struct rspamd_re_cache_scoped_compile_data {
+ unsigned int total_scopes;
+ unsigned int completed_scopes;
+ unsigned int total_compiled;
+ GError *first_error;
+ void (*final_cb)(unsigned int ncompiled, GError *err, void *cbd);
+ void *final_cbd;
+};
+
+static void
+rspamd_re_cache_compile_scoped_coordination_cb(unsigned int ncompiled, GError *err, void *cbd)
+{
+ struct rspamd_re_cache_scoped_compile_data *coord_data =
+ (struct rspamd_re_cache_scoped_compile_data *) cbd;
+
+ coord_data->completed_scopes++;
+ coord_data->total_compiled += ncompiled;
+
+ /* Store the first error we encounter */
+ if (err && !coord_data->first_error) {
+ coord_data->first_error = g_error_copy(err);
+ }
+
+ /* Check if all scopes have completed */
+ if (coord_data->completed_scopes >= coord_data->total_scopes) {
+ /* All scopes completed, call the final callback */
+ if (coord_data->final_cb) {
+ coord_data->final_cb(coord_data->total_compiled, coord_data->first_error, coord_data->final_cbd);
+ }
+
+ /* Cleanup */
+ if (coord_data->first_error) {
+ g_error_free(coord_data->first_error);
+ }
+ g_free(coord_data);
+ }
+}
+#endif
+
+int rspamd_re_cache_compile_hyperscan_scoped(struct rspamd_re_cache *cache_head,
+ const char *cache_dir,
+ double max_time,
+ gboolean silent,
+ struct ev_loop *event_loop,
+ void (*cb)(unsigned int ncompiled, GError *err, void *cbd),
+ void *cbd)
+{
+#ifndef WITH_HYPERSCAN
+ return -1;
+#else
+ struct rspamd_re_cache *cur;
+ struct rspamd_re_cache_scoped_compile_data *coord_data;
+ unsigned int scope_count = 0;
+ int result;
+
+ if (!cache_head) {
+ return -1;
+ }
+
+ /* Count the number of scopes to compile */
+ DL_COUNT(cache_head, cur, scope_count);
+
+ if (scope_count == 0) {
+ /* No scopes to compile, call callback immediately */
+ if (cb) {
+ cb(0, NULL, cbd);
+ }
+ return 0;
+ }
+
+ /* Create coordination data to track completion of all scopes */
+ coord_data = g_malloc0(sizeof(*coord_data));
+ coord_data->total_scopes = scope_count;
+ coord_data->completed_scopes = 0;
+ coord_data->total_compiled = 0;
+ coord_data->first_error = NULL;
+ coord_data->final_cb = cb;
+ coord_data->final_cbd = cbd;
+
+ /*
+ * Start async compilation for each scope. Each scope will use timers
+ * and call our coordination callback when completed.
+ */
+ DL_FOREACH(cache_head, cur)
+ {
+ result = rspamd_re_cache_compile_hyperscan(cur, cache_dir, max_time, silent,
+ event_loop, rspamd_re_cache_compile_scoped_coordination_cb, coord_data);
+ if (result < 0) {
+ /* If we failed to start compilation for this scope, treat it as completed with error */
+ GError *start_error = g_error_new(rspamd_re_cache_quark(), result,
+ "Failed to start hyperscan compilation for scope '%s'",
+ cur->scope ? cur->scope : "unknown");
+ rspamd_re_cache_compile_scoped_coordination_cb(0, start_error, coord_data);
+ g_error_free(start_error);
+ }
+ }
+
+ return 0; /* Always return 0 for async operation */
+#endif
+}
+
gboolean
rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
const char *path, gboolean silent, gboolean try_load, GError **err)
@@ -2272,6 +2665,7 @@ rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
GHashTableIter it;
gpointer k, v;
struct rspamd_re_class *re_class;
+ struct rspamd_re_cache *cur;
gsize len;
const char *hash_pos;
hs_platform_info_t test_plt;
@@ -2282,7 +2676,7 @@ rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
len = strlen(path);
- if (len < sizeof(rspamd_cryptobox_HASHBYTES + 3)) {
+ if (len < (rspamd_cryptobox_HASHBYTES + 3)) {
if (!silent) {
msg_err_re_cache("cannot open hyperscan cache file %s: too short filename",
path);
@@ -2304,174 +2698,179 @@ rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
}
hash_pos = path + len - 3 - (sizeof(re_class->hash) - 1);
- g_hash_table_iter_init(&it, cache->re_classes);
- while (g_hash_table_iter_next(&it, &k, &v)) {
- re_class = v;
+ /* Iterate through all scopes in the cache chain */
+ DL_FOREACH(cache, cur)
+ {
+ g_hash_table_iter_init(&it, cur->re_classes);
- if (memcmp(hash_pos, re_class->hash, sizeof(re_class->hash) - 1) == 0) {
- /* Open file and check magic */
- gssize r;
+ while (g_hash_table_iter_next(&it, &k, &v)) {
+ re_class = v;
- fd = open(path, O_RDONLY);
+ if (memcmp(hash_pos, re_class->hash, sizeof(re_class->hash) - 1) == 0) {
+ /* Open file and check magic */
+ gssize r;
- if (fd == -1) {
- if (errno != ENOENT || !silent) {
- msg_err_re_cache("cannot open hyperscan cache file %s: %s",
- path, strerror(errno));
- }
- g_set_error(err, rspamd_re_cache_quark(), 0,
- "%s",
- strerror(errno));
- return FALSE;
- }
+ fd = open(path, O_RDONLY);
- if ((r = read(fd, magicbuf, sizeof(magicbuf))) != sizeof(magicbuf)) {
- if (r == -1) {
- msg_err_re_cache("cannot read magic from hyperscan "
- "cache file %s: %s",
- path, strerror(errno));
+ if (fd == -1) {
+ if (errno != ENOENT || !silent) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: %s",
+ path, strerror(errno));
+ }
g_set_error(err, rspamd_re_cache_quark(), 0,
- "cannot read magic: %s",
+ "%s",
strerror(errno));
- }
- else {
- msg_err_re_cache("truncated read magic from hyperscan "
- "cache file %s: %z, %z wanted",
- path, r, (gsize) sizeof(magicbuf));
- g_set_error(err, rspamd_re_cache_quark(), 0,
- "truncated read magic %zd, %zd wanted",
- r, (gsize) sizeof(magicbuf));
+ return FALSE;
}
- close(fd);
- return FALSE;
- }
-
- mb = rspamd_hs_magic;
-
- if (memcmp(magicbuf, mb, sizeof(magicbuf)) != 0) {
- msg_err_re_cache("cannot open hyperscan cache file %s: "
- "bad magic ('%*xs', '%*xs' expected)",
- path, (int) RSPAMD_HS_MAGIC_LEN, magicbuf,
- (int) RSPAMD_HS_MAGIC_LEN, mb);
-
- close(fd);
- g_set_error(err, rspamd_re_cache_quark(), 0, "invalid magic");
- return FALSE;
- }
+ if ((r = read(fd, magicbuf, sizeof(magicbuf))) != sizeof(magicbuf)) {
+ if (r == -1) {
+ msg_err_re_cache("cannot read magic from hyperscan "
+ "cache file %s: %s",
+ path, strerror(errno));
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "cannot read magic: %s",
+ strerror(errno));
+ }
+ else {
+ msg_err_re_cache("truncated read magic from hyperscan "
+ "cache file %s: %z, %z wanted",
+ path, r, (gsize) sizeof(magicbuf));
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "truncated read magic %zd, %zd wanted",
+ r, (gsize) sizeof(magicbuf));
+ }
- if ((r = read(fd, &test_plt, sizeof(test_plt))) != sizeof(test_plt)) {
- if (r == -1) {
- msg_err_re_cache("cannot read platform data from hyperscan "
- "cache file %s: %s",
- path, strerror(errno));
- }
- else {
- msg_err_re_cache("truncated read platform data from hyperscan "
- "cache file %s: %z, %z wanted",
- path, r, (gsize) sizeof(magicbuf));
+ close(fd);
+ return FALSE;
}
- g_set_error(err, rspamd_re_cache_quark(), 0,
- "cannot read platform data: %s", strerror(errno));
-
- close(fd);
- return FALSE;
- }
-
- if (test_plt.cpu_features != cache->plt.cpu_features) {
- msg_err_re_cache("cannot open hyperscan cache file %s: "
- "compiled for a different platform",
- path);
- g_set_error(err, rspamd_re_cache_quark(), 0,
- "compiled for a different platform");
-
- close(fd);
- return FALSE;
- }
+ mb = rspamd_hs_magic;
- close(fd);
-
- if (try_load) {
- map = rspamd_file_xmap(path, PROT_READ, &len, TRUE);
+ if (memcmp(magicbuf, mb, sizeof(magicbuf)) != 0) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: "
+ "bad magic ('%*xs', '%*xs' expected)",
+ path, (int) RSPAMD_HS_MAGIC_LEN, magicbuf,
+ (int) RSPAMD_HS_MAGIC_LEN, mb);
- if (map == NULL) {
- msg_err_re_cache("cannot mmap hyperscan cache file %s: "
- "%s",
- path, strerror(errno));
- g_set_error(err, rspamd_re_cache_quark(), 0,
- "mmap error: %s", strerror(errno));
+ close(fd);
+ g_set_error(err, rspamd_re_cache_quark(), 0, "invalid magic");
return FALSE;
}
- p = map + RSPAMD_HS_MAGIC_LEN + sizeof(test_plt);
- end = map + len;
- memcpy(&n, p, sizeof(n));
- p += sizeof(int);
-
- if (n <= 0 || 2 * n * sizeof(int) + /* IDs + flags */
- sizeof(uint64_t) + /* crc */
- RSPAMD_HS_MAGIC_LEN + /* header */
- sizeof(cache->plt) >
- len) {
- /* Some wrong amount of regexps */
- msg_err_re_cache("bad number of expressions in %s: %d",
- path, n);
+ if ((r = read(fd, &test_plt, sizeof(test_plt))) != sizeof(test_plt)) {
+ if (r == -1) {
+ msg_err_re_cache("cannot read platform data from hyperscan "
+ "cache file %s: %s",
+ path, strerror(errno));
+ }
+ else {
+ msg_err_re_cache("truncated read platform data from hyperscan "
+ "cache file %s: %z, %z wanted",
+ path, r, (gsize) sizeof(magicbuf));
+ }
+
g_set_error(err, rspamd_re_cache_quark(), 0,
- "bad number of expressions: %d", n);
- munmap(map, len);
+ "cannot read platform data: %s", strerror(errno));
+
+ close(fd);
return FALSE;
}
- /*
- * Magic - 8 bytes
- * Platform - sizeof (platform)
- * n - number of regexps
- * n * <regexp ids>
- * n * <regexp flags>
- * crc - 8 bytes checksum
- * <hyperscan blob>
- */
-
- memcpy(&crc, p + n * 2 * sizeof(int), sizeof(crc));
- rspamd_cryptobox_fast_hash_init(&crc_st, 0xdeadbabe);
- /* IDs */
- rspamd_cryptobox_fast_hash_update(&crc_st, p, n * sizeof(int));
- /* Flags */
- rspamd_cryptobox_fast_hash_update(&crc_st, p + n * sizeof(int),
- n * sizeof(int));
- /* HS database */
- p += n * sizeof(int) * 2 + sizeof(uint64_t);
- rspamd_cryptobox_fast_hash_update(&crc_st, p, end - p);
- valid_crc = rspamd_cryptobox_fast_hash_final(&crc_st);
-
- if (crc != valid_crc) {
- msg_warn_re_cache("outdated or invalid hs database in %s: "
- "crc read %xL, crc expected %xL",
- path, crc, valid_crc);
+ if (test_plt.cpu_features != cur->plt.cpu_features) {
+ msg_err_re_cache("cannot open hyperscan cache file %s: "
+ "compiled for a different platform",
+ path);
g_set_error(err, rspamd_re_cache_quark(), 0,
- "outdated or invalid hs database, crc check failure");
- munmap(map, len);
+ "compiled for a different platform");
+ close(fd);
return FALSE;
}
- if ((ret = hs_deserialize_database(p, end - p, &test_db)) != HS_SUCCESS) {
- msg_err_re_cache("bad hs database in %s: %d", path, ret);
- g_set_error(err, rspamd_re_cache_quark(), 0,
- "deserialize error: %d", ret);
- munmap(map, len);
+ close(fd);
- return FALSE;
+ if (try_load) {
+ map = rspamd_file_xmap(path, PROT_READ, &len, TRUE);
+
+ if (map == NULL) {
+ msg_err_re_cache("cannot mmap hyperscan cache file %s: "
+ "%s",
+ path, strerror(errno));
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "mmap error: %s", strerror(errno));
+ return FALSE;
+ }
+
+ p = map + RSPAMD_HS_MAGIC_LEN + sizeof(test_plt);
+ end = map + len;
+ memcpy(&n, p, sizeof(n));
+ p += sizeof(int);
+
+ if (n <= 0 || 2 * n * sizeof(int) + /* IDs + flags */
+ sizeof(uint64_t) + /* crc */
+ RSPAMD_HS_MAGIC_LEN + /* header */
+ sizeof(cur->plt) >
+ len) {
+ /* Some wrong amount of regexps */
+ msg_err_re_cache("bad number of expressions in %s: %d",
+ path, n);
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "bad number of expressions: %d", n);
+ munmap(map, len);
+ return FALSE;
+ }
+
+ /*
+ * Magic - 8 bytes
+ * Platform - sizeof (platform)
+ * n - number of regexps
+ * n * <regexp ids>
+ * n * <regexp flags>
+ * crc - 8 bytes checksum
+ * <hyperscan blob>
+ */
+
+ memcpy(&crc, p + n * 2 * sizeof(int), sizeof(crc));
+ rspamd_cryptobox_fast_hash_init(&crc_st, 0xdeadbabe);
+ /* IDs */
+ rspamd_cryptobox_fast_hash_update(&crc_st, p, n * sizeof(int));
+ /* Flags */
+ rspamd_cryptobox_fast_hash_update(&crc_st, p + n * sizeof(int),
+ n * sizeof(int));
+ /* HS database */
+ p += n * sizeof(int) * 2 + sizeof(uint64_t);
+ rspamd_cryptobox_fast_hash_update(&crc_st, p, end - p);
+ valid_crc = rspamd_cryptobox_fast_hash_final(&crc_st);
+
+ if (crc != valid_crc) {
+ msg_warn_re_cache("outdated or invalid hs database in %s: "
+ "crc read %xL, crc expected %xL",
+ path, crc, valid_crc);
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "outdated or invalid hs database, crc check failure");
+ munmap(map, len);
+
+ return FALSE;
+ }
+
+ if ((ret = hs_deserialize_database(p, end - p, &test_db)) != HS_SUCCESS) {
+ msg_err_re_cache("bad hs database in %s: %d", path, ret);
+ g_set_error(err, rspamd_re_cache_quark(), 0,
+ "deserialize error: %d", ret);
+ munmap(map, len);
+
+ return FALSE;
+ }
+
+ hs_free_database(test_db);
+ munmap(map, len);
}
+ /* XXX: add crc check */
- hs_free_database(test_db);
- munmap(map, len);
+ return TRUE;
}
- /* XXX: add crc check */
-
- return TRUE;
}
}
@@ -2672,16 +3071,27 @@ rspamd_re_cache_load_hyperscan(struct rspamd_re_cache *cache,
if (has_valid) {
if (all_valid) {
- msg_info_re_cache("full hyperscan database of %d regexps has been loaded", total);
+ msg_info_re_cache("full hyperscan database of %d regexps has been loaded%s%s%s",
+ total,
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_FULL;
}
else {
- msg_info_re_cache("partial hyperscan database of %d regexps has been loaded", total);
+ msg_info_re_cache("partial hyperscan database of %d regexps has been loaded%s%s%s",
+ total,
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_PARTIAL;
}
}
else {
- msg_info_re_cache("hyperscan database has NOT been loaded; no valid expressions");
+ msg_info_re_cache("hyperscan database has NOT been loaded; no valid expressions%s%s%s",
+ cache->scope ? " for scope '" : "",
+ cache->scope ? cache->scope : "",
+ cache->scope ? "'" : "");
cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOAD_ERROR;
}
@@ -2690,6 +3100,48 @@ rspamd_re_cache_load_hyperscan(struct rspamd_re_cache *cache,
#endif
}
+enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan_scoped(
+ struct rspamd_re_cache *cache_head,
+ const char *cache_dir, bool try_load)
+{
+#ifndef WITH_HYPERSCAN
+ return RSPAMD_HYPERSCAN_UNSUPPORTED;
+#else
+ struct rspamd_re_cache *cur;
+ enum rspamd_hyperscan_status result, overall_status = RSPAMD_HYPERSCAN_UNKNOWN;
+ gboolean has_loaded = FALSE, all_loaded = TRUE;
+
+ if (!cache_head) {
+ return RSPAMD_HYPERSCAN_LOAD_ERROR;
+ }
+
+ DL_FOREACH(cache_head, cur)
+ {
+ result = rspamd_re_cache_load_hyperscan(cur, cache_dir, try_load);
+
+ if (result == RSPAMD_HYPERSCAN_LOADED_FULL ||
+ result == RSPAMD_HYPERSCAN_LOADED_PARTIAL) {
+ has_loaded = TRUE;
+ if (result == RSPAMD_HYPERSCAN_LOADED_PARTIAL) {
+ all_loaded = FALSE;
+ }
+ }
+ else {
+ all_loaded = FALSE;
+ }
+ }
+
+ if (has_loaded) {
+ overall_status = all_loaded ? RSPAMD_HYPERSCAN_LOADED_FULL : RSPAMD_HYPERSCAN_LOADED_PARTIAL;
+ }
+ else {
+ overall_status = RSPAMD_HYPERSCAN_LOAD_ERROR;
+ }
+
+ return overall_status;
+#endif
+}
+
void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
const char *sname,
int ref)
@@ -2716,3 +3168,324 @@ void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
kh_value(cache->selectors, k) = ref;
}
}
+
+void rspamd_re_cache_add_selector_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+ const char *sname, int ref)
+{
+ struct rspamd_re_cache *cache;
+
+ g_assert(cache_head != NULL);
+ g_assert(sname != NULL);
+
+ /* NULL scope is allowed for default scope */
+ cache = rspamd_re_cache_add_to_scope_list(cache_head, scope);
+ if (cache) {
+ rspamd_re_cache_add_selector(cache, sname, ref);
+ }
+}
+
+struct rspamd_re_cache *rspamd_re_cache_find_scope(struct rspamd_re_cache *cache_head, const char *scope)
+{
+ return rspamd_re_cache_find_by_scope(cache_head, scope);
+}
+
+gboolean rspamd_re_cache_remove_scope(struct rspamd_re_cache **cache_head, const char *scope)
+{
+ struct rspamd_re_cache *target;
+
+ if (!cache_head || !*cache_head) {
+ return FALSE;
+ }
+
+ /* Prevent removal of default scope (NULL) to keep head stable */
+ if (!scope) {
+ return FALSE;
+ }
+
+ target = rspamd_re_cache_find_by_scope(*cache_head, scope);
+ if (!target) {
+ return FALSE;
+ }
+
+ /* Remove from linked list */
+ DL_DELETE(*cache_head, target);
+
+ /* If this was the head and there are no more elements, update head */
+ if (target == *cache_head && !*cache_head) {
+ *cache_head = NULL;
+ }
+
+ /* Unref the cache */
+ rspamd_re_cache_unref(target);
+
+ return TRUE;
+}
+
+unsigned int rspamd_re_cache_count_scopes(struct rspamd_re_cache *cache_head)
+{
+ struct rspamd_re_cache *cur;
+ unsigned int count = 0;
+
+ if (!cache_head) {
+ return 0;
+ }
+
+ DL_COUNT(cache_head, cur, count);
+ return count;
+}
+
+struct rspamd_re_cache *rspamd_re_cache_scope_first(struct rspamd_re_cache *cache_head)
+{
+ return cache_head;
+}
+
+struct rspamd_re_cache *rspamd_re_cache_scope_next(struct rspamd_re_cache *current)
+{
+ return current ? current->next : NULL;
+}
+
+const char *rspamd_re_cache_scope_name(struct rspamd_re_cache *scope)
+{
+ if (!scope) {
+ return "unknown";
+ }
+
+ return scope->scope ? scope->scope : "default";
+}
+
+void rspamd_re_cache_scope_set_flags(struct rspamd_re_cache *scope, unsigned int flags)
+{
+ if (scope) {
+ scope->flags |= flags;
+ }
+}
+
+void rspamd_re_cache_scope_clear_flags(struct rspamd_re_cache *scope, unsigned int flags)
+{
+ if (scope) {
+ scope->flags &= ~flags;
+ }
+}
+
+unsigned int rspamd_re_cache_scope_get_flags(struct rspamd_re_cache *scope)
+{
+ return scope ? scope->flags : 0;
+}
+
+gboolean rspamd_re_cache_scope_is_loaded(struct rspamd_re_cache *scope)
+{
+ if (!scope) {
+ return FALSE;
+ }
+
+ return (scope->flags & RSPAMD_RE_CACHE_FLAG_LOADED) != 0;
+}
+
+void rspamd_re_cache_set_flags(struct rspamd_re_cache *cache_head, const char *scope, unsigned int flags)
+{
+ struct rspamd_re_cache *target;
+
+ if (!cache_head) {
+ return;
+ }
+
+ target = rspamd_re_cache_find_by_scope(cache_head, scope);
+ if (target) {
+ target->flags |= flags;
+ }
+}
+
+void rspamd_re_cache_clear_flags(struct rspamd_re_cache *cache_head, const char *scope, unsigned int flags)
+{
+ struct rspamd_re_cache *target;
+
+ if (!cache_head) {
+ return;
+ }
+
+ target = rspamd_re_cache_find_by_scope(cache_head, scope);
+ if (target) {
+ target->flags &= ~flags;
+ }
+}
+
+unsigned int rspamd_re_cache_get_flags(struct rspamd_re_cache *cache_head, const char *scope)
+{
+ struct rspamd_re_cache *target;
+
+ if (!cache_head) {
+ return 0;
+ }
+
+ target = rspamd_re_cache_find_by_scope(cache_head, scope);
+ if (target) {
+ return target->flags;
+ }
+
+ return 0;
+}
+
+gboolean rspamd_re_cache_is_loaded(struct rspamd_re_cache *cache_head, const char *scope)
+{
+ unsigned int flags = rspamd_re_cache_get_flags(cache_head, scope);
+ return (flags & RSPAMD_RE_CACHE_FLAG_LOADED) != 0;
+}
+
+
+static gboolean
+rspamd_re_cache_create_scope_lock(const char *cache_dir, const char *scope, int *lock_fd)
+{
+ char lock_path[PATH_MAX];
+ pid_t myself = getpid();
+
+ if (!scope) {
+ scope = "default";
+ }
+
+ rspamd_snprintf(lock_path, sizeof(lock_path), "%s%c%s.scope.lock",
+ cache_dir, G_DIR_SEPARATOR, scope);
+
+ *lock_fd = open(lock_path, O_WRONLY | O_CREAT | O_EXCL, 00600);
+
+ if (*lock_fd == -1) {
+ if (errno == EEXIST || errno == EBUSY) {
+ /* Check if the lock is stale */
+ int read_fd = open(lock_path, O_RDONLY);
+ if (read_fd != -1) {
+ pid_t lock_pid;
+ gssize r = read(read_fd, &lock_pid, sizeof(lock_pid));
+ close(read_fd);
+
+ if (r == sizeof(lock_pid)) {
+ /* Check if the process is still alive */
+ if (lock_pid != myself && (kill(lock_pid, 0) == -1 && errno == ESRCH)) {
+ /* Stale lock, remove it */
+ if (unlink(lock_path) == 0) {
+ /* Try to create lock again */
+ *lock_fd = open(lock_path, O_WRONLY | O_CREAT | O_EXCL, 00600);
+ if (*lock_fd != -1) {
+ goto write_pid;
+ }
+ }
+ }
+ }
+ else {
+ /* Invalid lock file, remove it */
+ if (unlink(lock_path) == 0) {
+ *lock_fd = open(lock_path, O_WRONLY | O_CREAT | O_EXCL, 00600);
+ if (*lock_fd != -1) {
+ goto write_pid;
+ }
+ }
+ }
+ }
+ }
+ return FALSE;
+ }
+
+write_pid:
+ /* Write our PID to the lock file */
+ if (write(*lock_fd, &myself, sizeof(myself)) != sizeof(myself)) {
+ close(*lock_fd);
+ unlink(lock_path);
+ return FALSE;
+ }
+
+ /* Lock the file */
+ if (!rspamd_file_lock(*lock_fd, FALSE)) {
+ close(*lock_fd);
+ unlink(lock_path);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+rspamd_re_cache_remove_scope_lock(const char *cache_dir, const char *scope, int lock_fd)
+{
+ char lock_path[PATH_MAX];
+
+ if (!scope) {
+ scope = "default";
+ }
+
+ rspamd_snprintf(lock_path, sizeof(lock_path), "%s%c%s.scope.lock",
+ cache_dir, G_DIR_SEPARATOR, scope);
+
+ if (lock_fd != -1) {
+ rspamd_file_unlock(lock_fd, FALSE);
+ close(lock_fd);
+ }
+ unlink(lock_path);
+}
+
+#ifdef WITH_HYPERSCAN
+struct rspamd_re_cache_hs_compile_scoped_cbdata {
+ struct rspamd_re_cache *cache;
+ const char *cache_dir;
+ const char *scope;
+ double max_time;
+ gboolean silent;
+ int lock_fd;
+ void (*cb)(const char *scope, unsigned int ncompiled, GError *err, void *cbd);
+ void *cbd;
+};
+
+static void
+rspamd_re_cache_compile_scoped_cb(unsigned int ncompiled, GError *err, void *cbd)
+{
+ struct rspamd_re_cache_hs_compile_scoped_cbdata *scoped_cbd =
+ (struct rspamd_re_cache_hs_compile_scoped_cbdata *) cbd;
+
+ /* Remove lock */
+ rspamd_re_cache_remove_scope_lock(scoped_cbd->cache_dir, scoped_cbd->scope,
+ scoped_cbd->lock_fd);
+
+ /* Call original callback */
+ if (scoped_cbd->cb) {
+ scoped_cbd->cb(scoped_cbd->scope, ncompiled, err, scoped_cbd->cbd);
+ }
+
+ g_free(scoped_cbd);
+}
+
+int rspamd_re_cache_compile_hyperscan_scoped_single(struct rspamd_re_cache *cache,
+ const char *scope,
+ const char *cache_dir,
+ double max_time,
+ gboolean silent,
+ struct ev_loop *event_loop,
+ void (*cb)(const char *scope, unsigned int ncompiled, GError *err, void *cbd),
+ void *cbd)
+{
+ struct rspamd_re_cache_hs_compile_scoped_cbdata *scoped_cbd;
+ int lock_fd = -1;
+
+ g_assert(cache != NULL);
+ g_assert(cache_dir != NULL);
+
+ /* Try to acquire lock for this scope */
+ if (!rspamd_re_cache_create_scope_lock(cache_dir, scope, &lock_fd)) {
+ /* Another process is compiling this scope */
+ if (cb) {
+ cb(scope, 0, NULL, cbd);
+ }
+ return 0;
+ }
+
+ /* Create callback data */
+ scoped_cbd = g_malloc0(sizeof(*scoped_cbd));
+ scoped_cbd->cache = cache;
+ scoped_cbd->cache_dir = cache_dir;
+ scoped_cbd->scope = scope;
+ scoped_cbd->max_time = max_time;
+ scoped_cbd->silent = silent;
+ scoped_cbd->lock_fd = lock_fd;
+ scoped_cbd->cb = cb;
+ scoped_cbd->cbd = cbd;
+
+ return rspamd_re_cache_compile_hyperscan(cache, cache_dir, max_time, silent,
+ event_loop, rspamd_re_cache_compile_scoped_cb, scoped_cbd);
+}
+#endif
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
index 20b1108e0..c5c8627d8 100644
--- a/src/libserver/re_cache.h
+++ b/src/libserver/re_cache.h
@@ -28,6 +28,9 @@ struct rspamd_re_runtime;
struct rspamd_task;
struct rspamd_config;
+/* Re cache flags */
+#define RSPAMD_RE_CACHE_FLAG_LOADED (1U << 0) /* Scope is fully loaded and ready for use */
+
enum rspamd_re_type {
RSPAMD_RE_HEADER,
RSPAMD_RE_RAWHEADER,
@@ -77,6 +80,22 @@ rspamd_re_cache_add(struct rspamd_re_cache *cache, rspamd_regexp_t *re,
int lua_cbref);
/**
+ * Add the existing regexp to the cache with specified scope
+ * @param cache_head head of cache list
+ * @param scope scope name
+ * @param re regexp object
+ * @param type type of object
+ * @param type_data associated data with the type (e.g. header name)
+ * @param datalen associated data length
+ * @param lua_cbref optional lua callback reference for matching purposes
+ */
+rspamd_regexp_t *
+rspamd_re_cache_add_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+ rspamd_regexp_t *re, enum rspamd_re_type type,
+ gconstpointer type_data, gsize datalen,
+ int lua_cbref);
+
+/**
* Replace regexp in the cache with another regexp
* @param cache cache object
* @param what re to replace
@@ -87,11 +106,28 @@ void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
rspamd_regexp_t *with);
/**
+ * Replace regexp in the scoped cache with another regexp
+ * @param cache_head head of cache list
+ * @param scope scope name
+ * @param what re to replace
+ * @param with regexp object to replace the origin
+ */
+void rspamd_re_cache_replace_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+ rspamd_regexp_t *what,
+ rspamd_regexp_t *with);
+
+/**
* Initialize and optimize re cache structure
*/
void rspamd_re_cache_init(struct rspamd_re_cache *cache,
struct rspamd_config *cfg);
+/**
+ * Initialize and optimize re cache structures for all scopes
+ */
+void rspamd_re_cache_init_scoped(struct rspamd_re_cache *cache_head,
+ struct rspamd_config *cfg);
+
enum rspamd_hyperscan_status {
RSPAMD_HYPERSCAN_UNKNOWN = 0,
RSPAMD_HYPERSCAN_UNSUPPORTED,
@@ -108,11 +144,22 @@ enum rspamd_hyperscan_status {
enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded(struct rspamd_re_cache *cache);
/**
- * Get runtime data for a cache
+ * Get runtime data for a cache - automatically creates runtimes for all scopes in the chain
+ * This is the main function used for task runtime creation
*/
struct rspamd_re_runtime *rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache);
/**
+ * Get runtime data for all scoped caches (same as rspamd_re_cache_runtime_new)
+ */
+struct rspamd_re_runtime *rspamd_re_cache_runtime_new_all_scopes(struct rspamd_re_cache *cache_head);
+
+/**
+ * Get runtime data for a specific scoped cache only
+ */
+struct rspamd_re_runtime *rspamd_re_cache_runtime_new_scoped(struct rspamd_re_cache *cache_head, const char *scope);
+
+/**
* Get runtime statistics
*/
const struct rspamd_re_cache_stat *
@@ -152,6 +199,11 @@ void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt);
void rspamd_re_cache_unref(struct rspamd_re_cache *cache);
/**
+ * Unref re cache list (all scopes)
+ */
+void rspamd_re_cache_unref_scoped(struct rspamd_re_cache *cache_head);
+
+/**
* Retain reference to re cache
*/
struct rspamd_re_cache *rspamd_re_cache_ref(struct rspamd_re_cache *cache);
@@ -162,6 +214,11 @@ struct rspamd_re_cache *rspamd_re_cache_ref(struct rspamd_re_cache *cache);
unsigned int rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, unsigned int limit);
/**
+ * Set limit for all regular expressions in the scoped cache, returns previous limit
+ */
+unsigned int rspamd_re_cache_set_limit_scoped(struct rspamd_re_cache *cache_head, const char *scope, unsigned int limit);
+
+/**
* Convert re type to a human readable string (constant one)
*/
const char *rspamd_re_cache_type_to_string(enum rspamd_re_type type);
@@ -184,6 +241,17 @@ int rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache,
void *cbd);
/**
+ * Compile expressions to the hyperscan tree and store in the `cache_dir` for all scopes
+ */
+int rspamd_re_cache_compile_hyperscan_scoped(struct rspamd_re_cache *cache_head,
+ const char *cache_dir,
+ double max_time,
+ gboolean silent,
+ struct ev_loop *event_loop,
+ void (*cb)(unsigned int ncompiled, GError *err, void *cbd),
+ void *cbd);
+
+/**
* Returns TRUE if the specified file is valid hyperscan cache
*/
gboolean rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
@@ -200,11 +268,139 @@ enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan(
const char *cache_dir, bool try_load);
/**
+ * Loads all hyperscan regexps precompiled for all scopes
+ */
+enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan_scoped(
+ struct rspamd_re_cache *cache_head,
+ const char *cache_dir, bool try_load);
+
+/**
+ * Compile expressions to the hyperscan tree for a single scope with locking
+ */
+int rspamd_re_cache_compile_hyperscan_scoped_single(struct rspamd_re_cache *cache,
+ const char *scope,
+ const char *cache_dir,
+ double max_time,
+ gboolean silent,
+ struct ev_loop *event_loop,
+ void (*cb)(const char *scope, unsigned int ncompiled, GError *err, void *cbd),
+ void *cbd);
+
+/**
* Registers lua selector in the cache
*/
void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
const char *sname, int ref);
+/**
+ * Registers lua selector in the scoped cache
+ */
+void rspamd_re_cache_add_selector_scoped(struct rspamd_re_cache **cache_head, const char *scope,
+ const char *sname, int ref);
+
+/**
+ * Find a cache by scope name
+ */
+struct rspamd_re_cache *rspamd_re_cache_find_scope(struct rspamd_re_cache *cache_head, const char *scope);
+
+/**
+ * Remove a cache scope from the list
+ */
+gboolean rspamd_re_cache_remove_scope(struct rspamd_re_cache **cache_head, const char *scope);
+
+/**
+ * Get array of scope names from the cache list
+ * @param cache_head head of cache list
+ * @return NULL-terminated array of scope names (must be freed with g_strfreev), or NULL if no scopes
+ */
+char **rspamd_re_cache_get_scope_names(struct rspamd_re_cache *cache_head);
+
+/**
+ * Count the number of scopes in the cache list
+ */
+unsigned int rspamd_re_cache_count_scopes(struct rspamd_re_cache *cache_head);
+
+/**
+ * Get the first scope in the cache list for iteration
+ * @param cache_head head of cache list
+ * @return first scope, or NULL if no scopes
+ */
+struct rspamd_re_cache *rspamd_re_cache_scope_first(struct rspamd_re_cache *cache_head);
+
+/**
+ * Get the next scope in iteration
+ * @param current current scope
+ * @return next scope, or NULL if at end
+ */
+struct rspamd_re_cache *rspamd_re_cache_scope_next(struct rspamd_re_cache *current);
+
+/**
+ * Get the scope name (for display/logging purposes)
+ * @param scope the scope
+ * @return scope name ("default" for NULL scope name), never returns NULL
+ */
+const char *rspamd_re_cache_scope_name(struct rspamd_re_cache *scope);
+
+/**
+ * Set flags on a scope (efficient version that works directly on scope object)
+ * @param scope the scope object (from iterator)
+ * @param flags flags to set
+ */
+void rspamd_re_cache_scope_set_flags(struct rspamd_re_cache *scope, unsigned int flags);
+
+/**
+ * Clear flags on a scope (efficient version that works directly on scope object)
+ * @param scope the scope object (from iterator)
+ * @param flags flags to clear
+ */
+void rspamd_re_cache_scope_clear_flags(struct rspamd_re_cache *scope, unsigned int flags);
+
+/**
+ * Get flags from a scope (efficient version that works directly on scope object)
+ * @param scope the scope object (from iterator)
+ * @return flags value
+ */
+unsigned int rspamd_re_cache_scope_get_flags(struct rspamd_re_cache *scope);
+
+/**
+ * Check if a scope is loaded (efficient version that works directly on scope object)
+ * @param scope the scope object (from iterator)
+ * @return TRUE if scope is loaded
+ */
+gboolean rspamd_re_cache_scope_is_loaded(struct rspamd_re_cache *scope);
+
+/**
+ * Set flags for a specific scope (legacy function - less efficient, searches by name)
+ * @param cache_head head of cache list
+ * @param scope scope name (NULL for default scope)
+ * @param flags flags to set
+ */
+void rspamd_re_cache_set_flags(struct rspamd_re_cache *cache_head, const char *scope, unsigned int flags);
+
+/**
+ * Clear flags for a specific scope (legacy function - less efficient, searches by name)
+ * @param cache_head head of cache list
+ * @param scope scope name (NULL for default scope)
+ * @param flags flags to clear
+ */
+void rspamd_re_cache_clear_flags(struct rspamd_re_cache *cache_head, const char *scope, unsigned int flags);
+
+/**
+ * Get flags for a specific scope (legacy function - less efficient, searches by name)
+ * @param cache_head head of cache list
+ * @param scope scope name (NULL for default scope)
+ * @return flags value
+ */
+unsigned int rspamd_re_cache_get_flags(struct rspamd_re_cache *cache_head, const char *scope);
+
+/**
+ * Check if a scope is loaded (legacy function - less efficient, searches by name)
+ * @param cache_head head of cache list
+ * @param scope scope name (NULL for default scope)
+ * @return TRUE if scope is loaded and ready for use
+ */
+gboolean rspamd_re_cache_is_loaded(struct rspamd_re_cache *cache_head, const char *scope);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/libserver/roll_history.c b/src/libserver/roll_history.c
index 66a53a597..d0f145d8f 100644
--- a/src/libserver/roll_history.c
+++ b/src/libserver/roll_history.c
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -231,7 +231,7 @@ rspamd_roll_history_load(struct roll_history *history, const char *filename)
return FALSE;
}
- parser = ucl_parser_new(0);
+ parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
if (!ucl_parser_add_fd(parser, fd)) {
msg_warn("cannot parse history file %s: %s", filename,
diff --git a/src/libserver/rspamd_control.c b/src/libserver/rspamd_control.c
index 1bff2ff12..e212f7e91 100644
--- a/src/libserver/rspamd_control.c
+++ b/src/libserver/rspamd_control.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -214,7 +214,7 @@ rspamd_control_write_reply(struct rspamd_control_session *session)
case RSPAMD_CONTROL_FUZZY_STAT:
if (elt->attached_fd != -1) {
/* We have some data to parse */
- parser = ucl_parser_new(0);
+ parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
ucl_object_insert_key(cur,
ucl_object_fromint(
elt->reply.reply.fuzzy_stat.status),
@@ -724,6 +724,9 @@ rspamd_control_default_cmd_handler(int fd,
case RSPAMD_CONTROL_CHILD_CHANGE:
case RSPAMD_CONTROL_FUZZY_BLOCKED:
break;
+ case RSPAMD_CONTROL_WORKERS_SPAWNED:
+ rep.reply.workers_spawned.status = 0;
+ break;
case RSPAMD_CONTROL_RERESOLVE:
if (cd->worker->srv->cfg) {
REF_RETAIN(cd->worker->srv->cfg);
@@ -1065,30 +1068,58 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
case RSPAMD_SRV_HYPERSCAN_LOADED:
#ifdef WITH_HYPERSCAN
/* Load RE cache to provide it for new forks */
- if (rspamd_re_cache_is_hs_loaded(rspamd_main->cfg->re_cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
- cmd.cmd.hs_loaded.forced) {
- rspamd_re_cache_load_hyperscan(
+ if (cmd.cmd.hs_loaded.scope[0] != '\0') {
+ /* Scoped loading */
+ const char *scope = cmd.cmd.hs_loaded.scope;
+ msg_info_main("received scoped hyperscan cache loaded from %s for scope: %s",
+ cmd.cmd.hs_loaded.cache_dir, scope);
+
+ /* Load specific scope */
+ rspamd_re_cache_load_hyperscan_scoped(
rspamd_main->cfg->re_cache,
cmd.cmd.hs_loaded.cache_dir,
false);
- }
-
- /* After getting this notice, we can clean up old hyperscan files */
-
- rspamd_hyperscan_notice_loaded();
- msg_info_main("received hyperscan cache loaded from %s",
- cmd.cmd.hs_loaded.cache_dir);
+ /* Broadcast scoped command to all workers */
+ memset(&wcmd, 0, sizeof(wcmd));
+ wcmd.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
+ rspamd_strlcpy(wcmd.cmd.hs_loaded.cache_dir,
+ cmd.cmd.hs_loaded.cache_dir,
+ sizeof(wcmd.cmd.hs_loaded.cache_dir));
+ rspamd_strlcpy(wcmd.cmd.hs_loaded.scope,
+ cmd.cmd.hs_loaded.scope,
+ sizeof(wcmd.cmd.hs_loaded.scope));
+ wcmd.cmd.hs_loaded.forced = cmd.cmd.hs_loaded.forced;
+ rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+ rspamd_control_ignore_io_handler, NULL, worker->pid);
+ }
+ else {
+ /* Legacy full cache loading */
+ if (rspamd_re_cache_is_hs_loaded(rspamd_main->cfg->re_cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
+ cmd.cmd.hs_loaded.forced) {
+ rspamd_re_cache_load_hyperscan(
+ rspamd_main->cfg->re_cache,
+ cmd.cmd.hs_loaded.cache_dir,
+ false);
+ }
- /* Broadcast command to all workers */
- memset(&wcmd, 0, sizeof(wcmd));
- wcmd.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
- rspamd_strlcpy(wcmd.cmd.hs_loaded.cache_dir,
- cmd.cmd.hs_loaded.cache_dir,
- sizeof(wcmd.cmd.hs_loaded.cache_dir));
- wcmd.cmd.hs_loaded.forced = cmd.cmd.hs_loaded.forced;
- rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
- rspamd_control_ignore_io_handler, NULL, worker->pid);
+ /* After getting this notice, we can clean up old hyperscan files */
+ rspamd_hyperscan_notice_loaded();
+
+ msg_info_main("received hyperscan cache loaded from %s",
+ cmd.cmd.hs_loaded.cache_dir);
+
+ /* Broadcast command to all workers */
+ memset(&wcmd, 0, sizeof(wcmd));
+ wcmd.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
+ rspamd_strlcpy(wcmd.cmd.hs_loaded.cache_dir,
+ cmd.cmd.hs_loaded.cache_dir,
+ sizeof(wcmd.cmd.hs_loaded.cache_dir));
+ wcmd.cmd.hs_loaded.forced = cmd.cmd.hs_loaded.forced;
+ wcmd.cmd.hs_loaded.scope[0] = '\0'; /* Empty scope for legacy */
+ rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+ rspamd_control_ignore_io_handler, NULL, worker->pid);
+ }
#endif
break;
case RSPAMD_SRV_MONITORED_CHANGE:
@@ -1137,6 +1168,10 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
rspamd_control_ignore_io_handler, NULL, worker->pid);
break;
+ case RSPAMD_SRV_WORKERS_SPAWNED:
+ /* No need to broadcast, this is just a notification from main to specific workers */
+ rdata->rep.reply.workers_spawned.status = 0;
+ break;
default:
msg_err_main("unknown command type: %d", cmd.type);
break;
@@ -1390,6 +1425,9 @@ rspamd_control_command_from_string(const char *str)
else if (g_ascii_strcasecmp(str, "child_change") == 0) {
ret = RSPAMD_CONTROL_CHILD_CHANGE;
}
+ else if (g_ascii_strcasecmp(str, "workers_spawned") == 0) {
+ ret = RSPAMD_CONTROL_WORKERS_SPAWNED;
+ }
return ret;
}
@@ -1430,6 +1468,9 @@ rspamd_control_command_to_string(enum rspamd_control_type cmd)
case RSPAMD_CONTROL_CHILD_CHANGE:
reply = "child_change";
break;
+ case RSPAMD_CONTROL_WORKERS_SPAWNED:
+ reply = "workers_spawned";
+ break;
default:
break;
}
@@ -1469,6 +1510,9 @@ const char *rspamd_srv_command_to_string(enum rspamd_srv_type cmd)
case RSPAMD_SRV_FUZZY_BLOCKED:
reply = "fuzzy_blocked";
break;
+ case RSPAMD_SRV_WORKERS_SPAWNED:
+ reply = "workers_spawned";
+ break;
}
return reply;
diff --git a/src/libserver/rspamd_control.h b/src/libserver/rspamd_control.h
index a08ba7948..81603cab2 100644
--- a/src/libserver/rspamd_control.h
+++ b/src/libserver/rspamd_control.h
@@ -37,6 +37,7 @@ enum rspamd_control_type {
RSPAMD_CONTROL_MONITORED_CHANGE,
RSPAMD_CONTROL_CHILD_CHANGE,
RSPAMD_CONTROL_FUZZY_BLOCKED,
+ RSPAMD_CONTROL_WORKERS_SPAWNED,
RSPAMD_CONTROL_MAX
};
@@ -49,7 +50,8 @@ enum rspamd_srv_type {
RSPAMD_SRV_HEARTBEAT,
RSPAMD_SRV_HEALTH,
RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE,
- RSPAMD_SRV_FUZZY_BLOCKED, /* Used to notify main process about a blocked ip */
+ RSPAMD_SRV_FUZZY_BLOCKED, /* Used to notify main process about a blocked ip */
+ RSPAMD_SRV_WORKERS_SPAWNED, /* Used to notify workers that all workers have been spawned */
};
enum rspamd_log_pipe_type {
@@ -74,6 +76,7 @@ struct rspamd_control_command {
struct {
gboolean forced;
char cache_dir[CONTROL_PATHLEN];
+ char scope[64]; /* Scope name, NULL means all scopes */
} hs_loaded;
struct {
char tag[32];
@@ -106,6 +109,9 @@ struct rspamd_control_command {
} addr;
sa_family_t af;
} fuzzy_blocked;
+ struct {
+ unsigned int workers_count;
+ } workers_spawned;
} cmd;
};
@@ -147,6 +153,9 @@ struct rspamd_control_reply {
struct {
unsigned int status;
} fuzzy_blocked;
+ struct {
+ unsigned int status;
+ } workers_spawned;
} reply;
};
@@ -164,6 +173,7 @@ struct rspamd_srv_command {
struct {
gboolean forced;
char cache_dir[CONTROL_PATHLEN];
+ char scope[64]; /* Scope name, NULL means all scopes */
} hs_loaded;
struct {
char tag[32];
@@ -201,6 +211,10 @@ struct rspamd_srv_command {
} addr;
sa_family_t af;
} fuzzy_blocked;
+ /* Sent when all workers have been spawned */
+ struct {
+ unsigned int workers_count;
+ } workers_spawned;
} cmd;
};
@@ -238,6 +252,9 @@ struct rspamd_srv_reply {
struct {
int unused;
} fuzzy_blocked;
+ struct {
+ int status;
+ } workers_spawned;
} reply;
};
diff --git a/src/libserver/rspamd_symcache.h b/src/libserver/rspamd_symcache.h
index 5725a2885..f020b6055 100644
--- a/src/libserver/rspamd_symcache.h
+++ b/src/libserver/rspamd_symcache.h
@@ -571,6 +571,13 @@ void rspamd_symcache_timeout_result_free(struct rspamd_symcache_timeout_result *
* @param task
*/
void rspamd_symcache_runtime_destroy(struct rspamd_task *task);
+
+/**
+ * Promote symbols cache resort (typically after dynamic symbol registration)
+ * @param cache
+ */
+void rspamd_symcache_promote_resort(struct rspamd_symcache *cache);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/libserver/symcache/symcache_c.cxx b/src/libserver/symcache/symcache_c.cxx
index 047fc1181..6221aa238 100644
--- a/src/libserver/symcache/symcache_c.cxx
+++ b/src/libserver/symcache/symcache_c.cxx
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -719,4 +719,11 @@ void rspamd_symcache_runtime_destroy(struct rspamd_task *task)
{
auto *cache_runtime = C_API_SYMCACHE_RUNTIME(task->symcache_runtime);
cache_runtime->savepoint_dtor(task);
-} \ No newline at end of file
+}
+
+void rspamd_symcache_promote_resort(struct rspamd_symcache *cache)
+{
+ auto *real_cache = C_API_SYMCACHE(cache);
+
+ real_cache->promote_resort();
+}
diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index c0278cfc1..c1ca2a6ed 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -274,7 +274,7 @@ auto symcache::load_items() -> bool
return false;
}
- auto *parser = ucl_parser_new(0);
+ auto *parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
const auto *p = (const std::uint8_t *) (hdr + 1);
if (!ucl_parser_add_chunk(parser, p, cached_map->get_size() - sizeof(*hdr))) {
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
index c7dda51d1..f715b5bb0 100644
--- a/src/libserver/symcache/symcache_internal.hxx
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -644,6 +644,14 @@ public:
* @return
*/
auto get_max_timeout(std::vector<std::pair<double, const cache_item *>> &elts) const -> double;
+
+ /**
+ * Promote cache resort on next use (after dynamic symbol registration)
+ */
+ auto promote_resort() -> void
+ {
+ cur_order_gen++;
+ }
};
diff --git a/src/libserver/task.c b/src/libserver/task.c
index bd1e07549..f655ab11b 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -196,8 +196,8 @@ void rspamd_task_free(struct rspamd_task *task)
rspamd_email_address_free(task->from_envelope_orig);
}
- if (task->meta_words) {
- g_array_free(task->meta_words, TRUE);
+ if (task->meta_words.a) {
+ kv_destroy(task->meta_words);
}
ucl_object_unref(task->messages);
@@ -730,7 +730,7 @@ rspamd_task_process(struct rspamd_task *task, unsigned int stages)
if (all_done && (task->flags & RSPAMD_TASK_FLAG_LEARN_AUTO) &&
!RSPAMD_TASK_IS_EMPTY(task) &&
- !(task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM | RSPAMD_TASK_FLAG_LEARN_HAM))) {
+ !(task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM | RSPAMD_TASK_FLAG_LEARN_HAM | RSPAMD_TASK_FLAG_LEARN_CLASS))) {
rspamd_stat_check_autolearn(task);
}
break;
@@ -738,12 +738,32 @@ rspamd_task_process(struct rspamd_task *task, unsigned int stages)
case RSPAMD_TASK_STAGE_LEARN:
case RSPAMD_TASK_STAGE_LEARN_PRE:
case RSPAMD_TASK_STAGE_LEARN_POST:
- if (task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM | RSPAMD_TASK_FLAG_LEARN_HAM)) {
+ if (task->flags & (RSPAMD_TASK_FLAG_LEARN_SPAM | RSPAMD_TASK_FLAG_LEARN_HAM | RSPAMD_TASK_FLAG_LEARN_CLASS)) {
if (task->err == NULL) {
- if (!rspamd_stat_learn(task,
- task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM,
- task->cfg->lua_state, task->classifier,
- st, &stat_error)) {
+ gboolean learn_result = FALSE;
+
+ if (task->flags & RSPAMD_TASK_FLAG_LEARN_CLASS) {
+ /* Multi-class learning */
+ const char *autolearn_class = rspamd_task_get_autolearn_class(task);
+ if (autolearn_class) {
+ learn_result = rspamd_stat_learn_class(task, autolearn_class,
+ task->cfg->lua_state, task->classifier,
+ st, &stat_error);
+ }
+ else {
+ g_set_error(&stat_error, g_quark_from_static_string("stat"), 500,
+ "No autolearn class specified for multi-class learning");
+ }
+ }
+ else {
+ /* Legacy binary learning */
+ learn_result = rspamd_stat_learn(task,
+ task->flags & RSPAMD_TASK_FLAG_LEARN_SPAM,
+ task->cfg->lua_state, task->classifier,
+ st, &stat_error);
+ }
+
+ if (!learn_result) {
if (stat_error == NULL) {
g_set_error(&stat_error,
@@ -922,15 +942,14 @@ rspamd_learn_task_spam(struct rspamd_task *task,
const char *classifier,
GError **err)
{
+ /* Use unified class-based approach internally */
+ const char *class_name = is_spam ? "spam" : "ham";
+
/* Disable learn auto flag to avoid bad learn codes */
task->flags &= ~RSPAMD_TASK_FLAG_LEARN_AUTO;
- if (is_spam) {
- task->flags |= RSPAMD_TASK_FLAG_LEARN_SPAM;
- }
- else {
- task->flags |= RSPAMD_TASK_FLAG_LEARN_HAM;
- }
+ /* Use the unified class-based learning approach */
+ rspamd_task_set_autolearn_class(task, class_name);
task->classifier = classifier;
diff --git a/src/libserver/task.h b/src/libserver/task.h
index 6be350098..a1742e160 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -24,6 +24,7 @@
#include "dns.h"
#include "re_cache.h"
#include "khash.h"
+#include "libserver/word.h"
#ifdef __cplusplus
extern "C" {
@@ -103,9 +104,9 @@ enum rspamd_task_stage {
#define RSPAMD_TASK_FLAG_LEARN_SPAM (1u << 12u)
#define RSPAMD_TASK_FLAG_LEARN_HAM (1u << 13u)
#define RSPAMD_TASK_FLAG_LEARN_AUTO (1u << 14u)
+#define RSPAMD_TASK_FLAG_LEARN_CLASS (1u << 25u)
#define RSPAMD_TASK_FLAG_BROKEN_HEADERS (1u << 15u)
-#define RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS (1u << 16u)
-#define RSPAMD_TASK_FLAG_HAS_HAM_TOKENS (1u << 17u)
+/* Removed RSPAMD_TASK_FLAG_HAS_SPAM_TOKENS and RSPAMD_TASK_FLAG_HAS_HAM_TOKENS - not needed in multi-class */
#define RSPAMD_TASK_FLAG_EMPTY (1u << 18u)
#define RSPAMD_TASK_FLAG_PROFILE (1u << 19u)
#define RSPAMD_TASK_FLAG_GREYLISTED (1u << 20u)
@@ -113,7 +114,7 @@ enum rspamd_task_stage {
#define RSPAMD_TASK_FLAG_SSL (1u << 22u)
#define RSPAMD_TASK_FLAG_BAD_UNICODE (1u << 23u)
#define RSPAMD_TASK_FLAG_MESSAGE_REWRITE (1u << 24u)
-#define RSPAMD_TASK_FLAG_MAX_SHIFT (24u)
+#define RSPAMD_TASK_FLAG_MAX_SHIFT (25u)
/* Request has been done by a local client */
#define RSPAMD_TASK_PROTOCOL_FLAG_LOCAL_CLIENT (1u << 1u)
@@ -187,7 +188,7 @@ struct rspamd_task {
struct rspamd_scan_result *result; /**< Metric result */
khash_t(rspamd_task_lua_cache) lua_cache; /**< cache of lua objects */
GPtrArray *tokens; /**< statistics tokens */
- GArray *meta_words; /**< rspamd_stat_token_t produced from meta headers
+ rspamd_words_t meta_words; /**< rspamd_word_t produced from meta headers
(e.g. Subject) */
GPtrArray *rcpt_envelope; /**< array of rspamd_email_address */
diff --git a/src/libserver/word.h b/src/libserver/word.h
new file mode 100644
index 000000000..7698bf327
--- /dev/null
+++ b/src/libserver/word.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_WORD_H
+#define RSPAMD_WORD_H
+
+#include "config.h"
+#include "fstring.h"
+#include "contrib/libucl/kvec.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file word.h
+ * Word processing structures and definitions
+ */
+
+/* Word flags */
+#define RSPAMD_WORD_FLAG_TEXT (1u << 0)
+#define RSPAMD_WORD_FLAG_META (1u << 1)
+#define RSPAMD_WORD_FLAG_LUA_META (1u << 2)
+#define RSPAMD_WORD_FLAG_EXCEPTION (1u << 3)
+#define RSPAMD_WORD_FLAG_HEADER (1u << 4)
+#define RSPAMD_WORD_FLAG_UNIGRAM (1u << 5)
+#define RSPAMD_WORD_FLAG_UTF (1u << 6)
+#define RSPAMD_WORD_FLAG_NORMALISED (1u << 7)
+#define RSPAMD_WORD_FLAG_STEMMED (1u << 8)
+#define RSPAMD_WORD_FLAG_BROKEN_UNICODE (1u << 9)
+#define RSPAMD_WORD_FLAG_STOP_WORD (1u << 10)
+#define RSPAMD_WORD_FLAG_SKIPPED (1u << 11)
+#define RSPAMD_WORD_FLAG_INVISIBLE_SPACES (1u << 12)
+#define RSPAMD_WORD_FLAG_EMOJI (1u << 13)
+
+/**
+ * Word structure representing tokenized text
+ */
+typedef struct rspamd_word_s {
+ rspamd_ftok_t original; /* utf8 raw */
+ rspamd_ftok_unicode_t unicode; /* array of unicode characters, normalized, lowercased */
+ rspamd_ftok_t normalized; /* normalized and lowercased utf8 */
+ rspamd_ftok_t stemmed; /* stemmed utf8 */
+ unsigned int flags;
+} rspamd_word_t;
+
+/**
+ * Vector of words using kvec
+ */
+typedef kvec_t(rspamd_word_t) rspamd_words_t;
+
+/* Legacy typedefs for backward compatibility */
+typedef rspamd_word_t rspamd_stat_token_t;
+
+/* Legacy flag aliases for backward compatibility */
+#define RSPAMD_STAT_TOKEN_FLAG_TEXT RSPAMD_WORD_FLAG_TEXT
+#define RSPAMD_STAT_TOKEN_FLAG_META RSPAMD_WORD_FLAG_META
+#define RSPAMD_STAT_TOKEN_FLAG_LUA_META RSPAMD_WORD_FLAG_LUA_META
+#define RSPAMD_STAT_TOKEN_FLAG_EXCEPTION RSPAMD_WORD_FLAG_EXCEPTION
+#define RSPAMD_STAT_TOKEN_FLAG_HEADER RSPAMD_WORD_FLAG_HEADER
+#define RSPAMD_STAT_TOKEN_FLAG_UNIGRAM RSPAMD_WORD_FLAG_UNIGRAM
+#define RSPAMD_STAT_TOKEN_FLAG_UTF RSPAMD_WORD_FLAG_UTF
+#define RSPAMD_STAT_TOKEN_FLAG_NORMALISED RSPAMD_WORD_FLAG_NORMALISED
+#define RSPAMD_STAT_TOKEN_FLAG_STEMMED RSPAMD_WORD_FLAG_STEMMED
+#define RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE RSPAMD_WORD_FLAG_BROKEN_UNICODE
+#define RSPAMD_STAT_TOKEN_FLAG_STOP_WORD RSPAMD_WORD_FLAG_STOP_WORD
+#define RSPAMD_STAT_TOKEN_FLAG_SKIPPED RSPAMD_WORD_FLAG_SKIPPED
+#define RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES RSPAMD_WORD_FLAG_INVISIBLE_SPACES
+#define RSPAMD_STAT_TOKEN_FLAG_EMOJI RSPAMD_WORD_FLAG_EMOJI
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RSPAMD_WORD_H */
diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c
index d0ac8d8d3..fdcc5a4b3 100644
--- a/src/libserver/worker_util.c
+++ b/src/libserver/worker_util.c
@@ -1908,14 +1908,27 @@ rspamd_worker_hyperscan_ready(struct rspamd_main *rspamd_main,
memset(&rep, 0, sizeof(rep));
rep.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
- if (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
- cmd->cmd.hs_loaded.forced) {
+ /* Check if this is a scoped notification */
+ if (cmd->cmd.hs_loaded.scope[0] != '\0') {
+ /* Scoped hyperscan loading */
+ const char *scope = cmd->cmd.hs_loaded.scope;
- msg_info("loading hyperscan expressions after receiving compilation "
- "notice: %s",
- (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL) ? "new db" : "forced update");
- rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan(
- worker->srv->cfg->re_cache, cmd->cmd.hs_loaded.cache_dir, false);
+ msg_info("loading hyperscan expressions for scope '%s' after receiving compilation notice", scope);
+
+ rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan_scoped(
+ cache, cmd->cmd.hs_loaded.cache_dir, false);
+ }
+ else {
+ /* Legacy/full cache loading */
+ if (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
+ cmd->cmd.hs_loaded.forced) {
+
+ msg_info("loading hyperscan expressions after receiving compilation "
+ "notice: %s",
+ (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL) ? "new db" : "forced update");
+ rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan(
+ worker->srv->cfg->re_cache, cmd->cmd.hs_loaded.cache_dir, false);
+ }
}
if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
@@ -2138,7 +2151,7 @@ rspamd_controller_load_saved_stats(struct rspamd_main *rspamd_main,
return;
}
- parser = ucl_parser_new(0);
+ parser = ucl_parser_new(UCL_PARSER_SAFE_FLAGS);
if (!ucl_parser_add_file(parser, cfg->stats_file)) {
msg_err_config("cannot parse controller stats from %s: %s",
@@ -2556,4 +2569,4 @@ rspamd_metrics_to_prometheus_string(const ucl_object_t *top)
/* Must be finalized and freed by caller */
return output;
-} \ No newline at end of file
+}