aboutsummaryrefslogtreecommitdiffstats
path: root/src/libutil
diff options
context:
space:
mode:
Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/expression.c123
-rw-r--r--src/libutil/fstring.h14
-rw-r--r--src/libutil/mem_pool.c7
-rw-r--r--src/libutil/mem_pool.h2
-rw-r--r--src/libutil/radix.c37
-rw-r--r--src/libutil/radix.h18
-rw-r--r--src/libutil/shingles.c27
-rw-r--r--src/libutil/shingles.h5
8 files changed, 202 insertions, 31 deletions
diff --git a/src/libutil/expression.c b/src/libutil/expression.c
index e36964e72..cac7594d6 100644
--- a/src/libutil/expression.c
+++ b/src/libutil/expression.c
@@ -80,6 +80,10 @@ struct rspamd_expr_process_data {
/* != NULL if trace is collected */
GPtrArray *trace;
rspamd_expression_process_cb process_closure;
+ /* Optimization thresholds for arithmetic operations */
+ double threshold;
+ enum rspamd_expression_op threshold_op;
+ gboolean has_threshold;
};
#define msg_debug_expression(...) rspamd_conditional_debug_fast(NULL, NULL, \
@@ -1198,10 +1202,55 @@ error_label:
}
/*
+ * Analyze AST node to determine if arithmetic operations can be optimized
+ * based on comparison context (e.g., A + B + C > 2 can stop at 3)
+ */
+static void
+rspamd_ast_analyze_node(GNode *node, struct rspamd_expr_process_data *process_data)
+{
+ struct rspamd_expression_elt *elt;
+ GNode *child;
+
+ if (!node || !node->data) {
+ return;
+ }
+
+ elt = (struct rspamd_expression_elt *) node->data;
+
+ /* Check if this is a comparison operation with arithmetic child */
+ if (elt->type == ELT_OP && (elt->p.op.op_flags & RSPAMD_EXPRESSION_COMPARISON)) {
+ /* Look for arithmetic operations in children */
+ child = node->children;
+ if (child && child->next) {
+ GNode *left = child;
+ GNode *right = child->next;
+ struct rspamd_expression_elt *left_elt = left->data;
+ struct rspamd_expression_elt *right_elt = right->data;
+
+ /* Check if left child is arithmetic operation and right is limit */
+ if (left_elt->type == ELT_OP &&
+ (left_elt->p.op.op_flags & RSPAMD_EXPRESSION_ARITHMETIC) &&
+ (left_elt->p.op.op_flags & RSPAMD_EXPRESSION_NARY) &&
+ right_elt->type == ELT_LIMIT) {
+
+ /* Set threshold for arithmetic optimization */
+ process_data->has_threshold = TRUE;
+ process_data->threshold = right_elt->p.lim;
+ process_data->threshold_op = elt->p.op.op;
+
+ msg_debug_expression_verbose("detected arithmetic optimization: %s %.1f",
+ rspamd_expr_op_to_str(elt->p.op.op),
+ right_elt->p.lim);
+ }
+ }
+ }
+}
+
+/*
* Node optimizer function: skip nodes that are not relevant
*/
static gboolean
-rspamd_ast_node_done(struct rspamd_expression_elt *elt, double acc)
+rspamd_ast_node_done(struct rspamd_expression_elt *elt, double acc, struct rspamd_expr_process_data *process_data)
{
gboolean ret = FALSE;
@@ -1217,6 +1266,47 @@ rspamd_ast_node_done(struct rspamd_expression_elt *elt, double acc)
case OP_OR:
ret = acc != 0;
break;
+ case OP_PLUS:
+ case OP_MULT:
+ /* Handle arithmetic operations with thresholds */
+ if (process_data->has_threshold) {
+ switch (process_data->threshold_op) {
+ case OP_GT:
+ /* For A + B + C > 2, stop when acc > threshold */
+ ret = acc > process_data->threshold;
+ break;
+ case OP_GE:
+ /* For A + B + C >= 2, stop when acc >= threshold */
+ ret = acc >= process_data->threshold;
+ break;
+ case OP_LT:
+ /* For A + B + C < 2, stop when acc >= threshold (result will be false) */
+ ret = acc >= process_data->threshold;
+ break;
+ case OP_LE:
+ /* For A + B + C <= 2, stop when acc > threshold (result will be false) */
+ ret = acc > process_data->threshold;
+ break;
+ case OP_EQ:
+ /* For A + B + C == 2, stop when acc > threshold (result will be false) */
+ ret = acc > process_data->threshold;
+ break;
+ case OP_NE:
+ /* For A + B + C != 2, stop when acc > threshold (result will be true) */
+ ret = acc > process_data->threshold;
+ break;
+ default:
+ break;
+ }
+
+ if (ret) {
+ msg_debug_expression_verbose("arithmetic optimization triggered: %s %.1f %s %.1f",
+ rspamd_expr_op_to_str(elt->p.op.op), acc,
+ rspamd_expr_op_to_str(process_data->threshold_op),
+ process_data->threshold);
+ }
+ }
+ break;
default:
break;
}
@@ -1340,9 +1430,28 @@ rspamd_ast_process_node(struct rspamd_expression *e, GNode *node,
double val;
gboolean calc_ticks = FALSE;
__attribute__((unused)) const char *op_name = NULL;
+ gboolean saved_has_threshold = FALSE;
+ double saved_threshold = 0.0;
+ enum rspamd_expression_op saved_threshold_op = OP_INVALID;
elt = node->data;
+ /* Analyze node for optimization opportunities */
+ if (elt->type == ELT_OP && (elt->p.op.op_flags & RSPAMD_EXPRESSION_COMPARISON)) {
+ /* Save current threshold state */
+ saved_has_threshold = process_data->has_threshold;
+ saved_threshold = process_data->threshold;
+ saved_threshold_op = process_data->threshold_op;
+
+ /* Reset threshold state */
+ process_data->has_threshold = FALSE;
+ process_data->threshold = 0.0;
+ process_data->threshold_op = OP_INVALID;
+
+ /* Analyze for optimization opportunities */
+ rspamd_ast_analyze_node(node, process_data);
+ }
+
switch (elt->type) {
case ELT_ATOM:
if (!(elt->flags & RSPAMD_EXPR_FLAG_PROCESSED)) {
@@ -1400,7 +1509,7 @@ rspamd_ast_process_node(struct rspamd_expression *e, GNode *node,
/* Check if we need to process further */
if (!(process_data->flags & RSPAMD_EXPRESSION_FLAG_NOOPT)) {
- if (rspamd_ast_node_done(elt, acc)) {
+ if (rspamd_ast_node_done(elt, acc, process_data)) {
msg_debug_expression_verbose("optimizer: done");
return acc;
}
@@ -1443,6 +1552,13 @@ rspamd_ast_process_node(struct rspamd_expression *e, GNode *node,
break;
}
+ /* Restore threshold state if it was saved */
+ if (elt->type == ELT_OP && (elt->p.op.op_flags & RSPAMD_EXPRESSION_COMPARISON)) {
+ process_data->has_threshold = saved_has_threshold;
+ process_data->threshold = saved_threshold;
+ process_data->threshold_op = saved_threshold_op;
+ }
+
return acc;
}
@@ -1477,6 +1593,9 @@ rspamd_process_expression_closure(struct rspamd_expression *expr,
pd.process_closure = cb;
pd.flags = flags;
pd.ud = runtime_ud;
+ pd.has_threshold = FALSE;
+ pd.threshold = 0.0;
+ pd.threshold_op = OP_INVALID;
if (track) {
pd.trace = g_ptr_array_sized_new(32);
diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h
index 0792ab9fa..ca9f689c8 100644
--- a/src/libutil/fstring.h
+++ b/src/libutil/fstring.h
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -30,8 +30,8 @@ extern "C" {
*/
typedef struct f_str_s {
- gsize len;
- gsize allocated;
+ size_t len;
+ size_t allocated;
char str[];
} rspamd_fstring_t;
@@ -40,12 +40,12 @@ typedef struct f_str_s {
#define RSPAMD_FSTRING_LIT(lit) rspamd_fstring_new_init((lit), sizeof(lit) - 1)
typedef struct f_str_tok {
- gsize len;
+ size_t len;
const char *begin;
} rspamd_ftok_t;
typedef struct f_str_unicode_tok {
- gsize len; /* in UChar32 */
+ size_t len; /* in UChar32 */
const UChar32 *begin;
} rspamd_ftok_unicode_t;
diff --git a/src/libutil/mem_pool.c b/src/libutil/mem_pool.c
index 3dc67bc5f..575b4e497 100644
--- a/src/libutil/mem_pool.c
+++ b/src/libutil/mem_pool.c
@@ -403,9 +403,10 @@ rspamd_mempool_new_(gsize size, const char *tag, int flags, const char *loc)
/* Generate new uid */
uint64_t uid = rspamd_random_uint64_fast();
- rspamd_encode_hex_buf((unsigned char *) &uid, sizeof(uid),
- new_pool->tag.uid, sizeof(new_pool->tag.uid) - 1);
- new_pool->tag.uid[sizeof(new_pool->tag.uid) - 1] = '\0';
+ G_STATIC_ASSERT(sizeof(new_pool->tag.uid) >= sizeof(uid) * 2 + 1);
+ int enc_len = rspamd_encode_hex_buf((unsigned char *) &uid, sizeof(uid),
+ new_pool->tag.uid, sizeof(new_pool->tag.uid) - 1);
+ new_pool->tag.uid[enc_len] = '\0';
mem_pool_stat->pools_allocated++;
diff --git a/src/libutil/mem_pool.h b/src/libutil/mem_pool.h
index 651b44661..00d1a2067 100644
--- a/src/libutil/mem_pool.h
+++ b/src/libutil/mem_pool.h
@@ -71,7 +71,7 @@ struct f_str_s;
#endif
#define MEMPOOL_TAG_LEN 16
-#define MEMPOOL_UID_LEN 16
+#define MEMPOOL_UID_LEN 32
/* All pointers are aligned as this variable */
#define MIN_MEM_ALIGNMENT G_MEM_ALIGN
diff --git a/src/libutil/radix.c b/src/libutil/radix.c
index 2cae8e34a..bdd722b49 100644
--- a/src/libutil/radix.c
+++ b/src/libutil/radix.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -66,7 +66,7 @@ radix_find_compressed(radix_compressed_t *tree, const uint8_t *key, gsize keylen
uintptr_t
radix_insert_compressed(radix_compressed_t *tree,
- uint8_t *key, gsize keylen,
+ const uint8_t *key, gsize keylen,
gsize masklen,
uintptr_t value)
{
@@ -128,6 +128,39 @@ radix_insert_compressed(radix_compressed_t *tree,
return old;
}
+uintptr_t
+radix_insert_compressed_addr(radix_compressed_t *tree,
+ const rspamd_inet_addr_t *addr,
+ uintptr_t value)
+{
+ const unsigned char *key;
+ unsigned int klen = 0;
+ unsigned char buf[16];
+
+ if (addr == NULL) {
+ return RADIX_NO_VALUE;
+ }
+
+ key = rspamd_inet_address_get_hash_key(addr, &klen);
+
+ if (key && klen) {
+ if (klen == 4) {
+ /* Map to ipv6 */
+ memset(buf, 0, 10);
+ buf[10] = 0xffu;
+ buf[11] = 0xffu;
+ memcpy(buf + 12, key, klen);
+
+ key = buf;
+ klen = sizeof(buf);
+ }
+
+ return radix_insert_compressed(tree, key, klen, 0, value);
+ }
+
+ return RADIX_NO_VALUE;
+}
+
radix_compressed_t *
radix_create_compressed(const char *tree_name)
diff --git a/src/libutil/radix.h b/src/libutil/radix.h
index c4fe96441..8c1224707 100644
--- a/src/libutil/radix.h
+++ b/src/libutil/radix.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2024 Vsevolod Stakhov
+ * Copyright 2025 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@
#include "mem_pool.h"
#include "util.h"
-#define RADIX_NO_VALUE (uintptr_t) - 1
+#define RADIX_NO_VALUE (uintptr_t) -1
#ifdef __cplusplus
extern "C" {
@@ -39,11 +39,23 @@ typedef struct radix_tree_compressed radix_compressed_t;
*/
uintptr_t
radix_insert_compressed(radix_compressed_t *tree,
- uint8_t *key, gsize keylen,
+ const uint8_t *key, gsize keylen,
gsize masklen,
uintptr_t value);
/**
+ * Insert new address to the radix trie (works for IPv4 or IPv6 addresses)
+ * @param tree radix trie
+ * @param addr address to insert
+ * @param value opaque value pointer
+ * @return previous value of the key or `RADIX_NO_VALUE`
+ */
+uintptr_t
+radix_insert_compressed_addr(radix_compressed_t *tree,
+ const rspamd_inet_addr_t *addr,
+ uintptr_t value);
+
+/**
* Find a key in a radix trie
* @param tree radix trie
* @param key key to find (bitstring)
diff --git a/src/libutil/shingles.c b/src/libutil/shingles.c
index 5fe110eb8..c69c42292 100644
--- a/src/libutil/shingles.c
+++ b/src/libutil/shingles.c
@@ -18,6 +18,7 @@
#include "cryptobox.h"
#include "images.h"
#include "libstat/stat_api.h"
+#include "libserver/word.h"
#define SHINGLES_WINDOW 3
#define SHINGLES_KEY_SIZE rspamd_cryptobox_SIPKEYBYTES
@@ -112,7 +113,7 @@ rspamd_shingles_get_keys_cached(const unsigned char key[SHINGLES_KEY_SIZE])
}
struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops")
- rspamd_shingles_from_text(GArray *input,
+ rspamd_shingles_from_text(rspamd_words_t *input,
const unsigned char key[16],
rspamd_mempool_t *pool,
rspamd_shingles_filter filter,
@@ -123,12 +124,16 @@ struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops")
uint64_t **hashes;
unsigned char **keys;
rspamd_fstring_t *row;
- rspamd_stat_token_t *word;
+ rspamd_word_t *word;
uint64_t val;
int i, j, k;
gsize hlen, ilen = 0, beg = 0, widx = 0;
enum rspamd_cryptobox_fast_hash_type ht;
+ if (!input || !input->a) {
+ return NULL;
+ }
+
if (pool != NULL) {
res = rspamd_mempool_alloc(pool, sizeof(*res));
}
@@ -138,10 +143,10 @@ struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops")
row = rspamd_fstring_sized_new(256);
- for (i = 0; i < input->len; i++) {
- word = &g_array_index(input, rspamd_stat_token_t, i);
+ for (i = 0; i < kv_size(*input); i++) {
+ word = &kv_A(*input, i);
- if (!((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0)) {
+ if (!((word->flags & RSPAMD_WORD_FLAG_SKIPPED) || word->stemmed.len == 0)) {
ilen++;
}
}
@@ -162,10 +167,10 @@ struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops")
for (j = beg; j < i; j++) {
word = NULL;
- while (widx < input->len) {
- word = &g_array_index(input, rspamd_stat_token_t, widx);
+ while (widx < kv_size(*input)) {
+ word = &kv_A(*input, widx);
- if ((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0) {
+ if ((word->flags & RSPAMD_WORD_FLAG_SKIPPED) || word->stemmed.len == 0) {
widx++;
}
else {
@@ -237,10 +242,10 @@ struct rspamd_shingle *RSPAMD_OPTIMIZE("unroll-loops")
word = NULL;
- while (widx < input->len) {
- word = &g_array_index(input, rspamd_stat_token_t, widx);
+ while (widx < kv_size(*input)) {
+ word = &kv_A(*input, widx);
- if ((word->flags & RSPAMD_STAT_TOKEN_FLAG_SKIPPED) || word->stemmed.len == 0) {
+ if ((word->flags & RSPAMD_WORD_FLAG_SKIPPED) || word->stemmed.len == 0) {
widx++;
}
else {
diff --git a/src/libutil/shingles.h b/src/libutil/shingles.h
index fe6f16cf8..1ab2c6842 100644
--- a/src/libutil/shingles.h
+++ b/src/libutil/shingles.h
@@ -18,6 +18,7 @@
#include "config.h"
#include "mem_pool.h"
+#include "libserver/word.h"
#define RSPAMD_SHINGLE_SIZE 32
@@ -48,14 +49,14 @@ typedef uint64_t (*rspamd_shingles_filter)(uint64_t *input, gsize count,
/**
* Generate shingles from the input of fixed size strings using lemmatizer
* if needed
- * @param input array of `rspamd_fstring_t`
+ * @param input kvec of `rspamd_word_t`
* @param key secret key used to generate shingles
* @param pool pool to allocate shingles array
* @param filter hashes filtering function
* @param filterd opaque data for filtering function
* @return shingles array
*/
-struct rspamd_shingle *rspamd_shingles_from_text(GArray *input,
+struct rspamd_shingle *rspamd_shingles_from_text(rspamd_words_t *input,
const unsigned char key[16],
rspamd_mempool_t *pool,
rspamd_shingles_filter filter,