aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2018-07-23 21:10:12 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2018-07-23 21:10:12 +0100
commit9992ba79ad516a4a72df4ce7c75edebaec63e895 (patch)
tree718ed3f828180ea4357d89203ab109f2ad3b8f66 /src/libserver
parent4e6d48ea4fe9a3acb0912a6e408af7cb36355327 (diff)
downloadrspamd-9992ba79ad516a4a72df4ce7c75edebaec63e895.tar.gz
rspamd-9992ba79ad516a4a72df4ce7c75edebaec63e895.zip
[Project] Apply topological sorting for symbols in Rspamd
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/symbols_cache.c117
1 files changed, 89 insertions, 28 deletions
diff --git a/src/libserver/symbols_cache.c b/src/libserver/symbols_cache.c
index 5e169c52b..9bea3a87a 100644
--- a/src/libserver/symbols_cache.c
+++ b/src/libserver/symbols_cache.c
@@ -123,6 +123,8 @@ struct cache_item {
gint parent;
/* Priority */
gint priority;
+ /* Topological order */
+ guint order;
gint id;
gint frequency_peaks;
@@ -280,6 +282,12 @@ prefilters_cmp (const void *p1, const void *p2, gpointer ud)
return 0;
}
+#define TSORT_MARK_PERM(it) (it)->order |= (1u << 31)
+#define TSORT_MARK_TEMP(it) (it)->order |= (1u << 30)
+#define TSORT_IS_MARKED_PERM(it) ((it)->order & (1u << 31))
+#define TSORT_IS_MARKED_TEMP(it) ((it)->order & (1u << 30))
+#define TSORT_UNMASK(it) ((it)->order & ~((1u << 31) | (1u << 30)))
+
static gint
cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
{
@@ -289,35 +297,35 @@ cache_logic_cmp (const void *p1, const void *p2, gpointer ud)
double w1, w2;
double weight1, weight2;
double f1 = 0, f2 = 0, t1, t2, avg_freq, avg_weight;
-
- if (i1->deps->len != 0 || i2->deps->len != 0) {
- /* TODO: handle complex dependencies */
- w1 = 1.0;
- w2 = 1.0;
-
- if (i1->deps->len != 0) {
- w1 = 1.0 / (i1->deps->len);
- }
- if (i2->deps->len != 0) {
- w2 = 1.0 / (i2->deps->len);
+ guint o1 = TSORT_UNMASK (i1), o2 = TSORT_UNMASK (i2);
+
+
+ if (o1 == o2) {
+ /* Heurstic */
+ if (i1->priority == i2->priority) {
+ avg_freq = ((gdouble) cache->total_hits / cache->used_items);
+ avg_weight = (cache->total_weight / cache->used_items);
+ f1 = (double) i1->st->total_hits / avg_freq;
+ f2 = (double) i2->st->total_hits / avg_freq;
+ weight1 = fabs (i1->st->weight) / avg_weight;
+ weight2 = fabs (i2->st->weight) / avg_weight;
+ t1 = i1->st->avg_time;
+ t2 = i2->st->avg_time;
+ w1 = SCORE_FUN (weight1, f1, t1);
+ w2 = SCORE_FUN (weight2, f2, t2);
+ } else {
+ /* Strict sorting */
+ w1 = abs (i1->priority);
+ w2 = abs (i2->priority);
}
}
- else if (i1->priority == i2->priority) {
- avg_freq = ((gdouble)cache->total_hits / cache->used_items);
- avg_weight = (cache->total_weight / cache->used_items);
- f1 = (double)i1->st->total_hits / avg_freq;
- f2 = (double)i2->st->total_hits / avg_freq;
- weight1 = fabs (i1->st->weight) / avg_weight;
- weight2 = fabs (i2->st->weight) / avg_weight;
- t1 = i1->st->avg_time;
- t2 = i2->st->avg_time;
- w1 = SCORE_FUN (weight1, f1, t1);
- w2 = SCORE_FUN (weight2, f2, t2);
- }
else {
- /* Strict sorting */
- w1 = abs (i1->priority);
- w2 = abs (i2->priority);
+ /*
+ * Topological ordering is reverse, deps has HIGHER order than top level
+ * elements. Hence, we swap w1 and w2 here.
+ */
+ w1 = o2;
+ w2 = o1;
}
if (w2 > w1) {
@@ -374,6 +382,40 @@ rspamd_set_counter_ema (struct counter_data *cd, gdouble value, gdouble alpha)
return cd->mean;
}
+static void
+rspamd_symbols_cache_tsort_visit (struct symbols_cache *cache,
+ struct cache_item *it,
+ guint cur_order)
+{
+ struct cache_dependency *dep;
+ guint i;
+
+ if (TSORT_IS_MARKED_PERM (it)) {
+ if (cur_order > TSORT_UNMASK (it)) {
+ /* Need to recalculate the whole chain */
+ it->order = cur_order; /* That also removes all masking */
+ }
+ else {
+ /* We are fine, stop DFS */
+ return;
+ }
+ }
+ else if (TSORT_IS_MARKED_TEMP (it)) {
+ msg_err_cache ("cyclic dependencies found when checking '%s'!",
+ it->symbol);
+ return;
+ }
+
+ TSORT_MARK_TEMP (it);
+
+ PTR_ARRAY_FOREACH (it->deps, i, dep) {
+ rspamd_symbols_cache_tsort_visit (cache, dep->item, cur_order + 1);
+ }
+
+ it->order |= cur_order;
+
+ TSORT_MARK_PERM (it);
+}
static void
rspamd_symbols_cache_resort (struct symbols_cache *cache)
@@ -392,12 +434,31 @@ rspamd_symbols_cache_resort (struct symbols_cache *cache)
if (!(it->type & (SYMBOL_TYPE_PREFILTER|
SYMBOL_TYPE_POSTFILTER|
SYMBOL_TYPE_COMPOSITE))) {
+ it->order = 0;
g_ptr_array_add (ord->d, it);
}
}
- cache->total_hits = total_hits;
+ /* Topological sort, intended to be O(N) but my implementation
+ * is not linear (semi-linear usually) as I want to make it as
+ * simple as possible.
+ * On each stage it does DFS for unseen nodes. In theory, that
+ * can be more complicated than linear - O(N^2) for specially
+ * crafted data. But I don't care.
+ */
+ PTR_ARRAY_FOREACH (ord->d, i, it) {
+ if (it->order == 0) {
+ rspamd_symbols_cache_tsort_visit (cache, it, 1);
+ }
+ }
+
+ /*
+ * Now we have all sorted and can do some heuristical sort, keeping
+ * topological order invariant
+ */
g_ptr_array_sort_with_data (ord->d, cache_logic_cmp, cache);
+ cache->total_hits = total_hits;
+
if (cache->items_by_order) {
REF_RELEASE (cache->items_by_order);
@@ -2182,7 +2243,7 @@ rspamd_symbols_cache_resort_cb (gint fd, short what, gpointer ud)
}
cbdata->last_resort = cur_ticks;
- rspamd_symbols_cache_resort (cache);
+ /* We don't do actual sorting due to topological guarantees */
}
void