aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-07-06 19:35:02 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-07-06 19:35:02 +0100
commitf025f6935711db580d0a193deef48e4705420966 (patch)
treeff1d470f57be9f053e37486c65d6562b937419e9
parent52b4b8db0b126f7004d6aff79cbea3b06f94c927 (diff)
downloadrspamd-f025f6935711db580d0a193deef48e4705420966.tar.gz
rspamd-f025f6935711db580d0a193deef48e4705420966.zip
[Feature] Improve monitored timeouts logic
We now reduce timeout on the first error when a monitored object is used and reduce it when we have subsequent errors when a monitored object is dead. In conjunction with the previous changes, it should make monitoring really clever and responsive.
-rw-r--r--src/libserver/monitored.c49
1 files changed, 42 insertions, 7 deletions
diff --git a/src/libserver/monitored.c b/src/libserver/monitored.c
index 548dcf362..3202b80af 100644
--- a/src/libserver/monitored.c
+++ b/src/libserver/monitored.c
@@ -27,7 +27,7 @@ struct rspamd_monitored_methods {
void * (*monitored_config) (struct rspamd_monitored *m,
struct rspamd_monitored_ctx *ctx,
const ucl_object_t *opts);
- void (*monitored_update) (struct rspamd_monitored *m,
+ gboolean (*monitored_update) (struct rspamd_monitored *m,
struct rspamd_monitored_ctx *ctx, gpointer ud);
void (*monitored_dtor) (struct rspamd_monitored *m,
struct rspamd_monitored_ctx *ctx, gpointer ud);
@@ -50,6 +50,7 @@ struct rspamd_monitored_ctx {
struct rspamd_monitored {
gchar *url;
gdouble monitoring_interval;
+ gdouble monitoring_mult;
gdouble offline_time;
gdouble total_offline_time;
gdouble latency;
@@ -91,18 +92,38 @@ rspamd_monitored_propagate_error (struct rspamd_monitored *m,
msg_debug_mon ("%s on resolving %s, %d retries left",
error, m->url, m->max_errors - m->cur_errors);
m->cur_errors ++;
+ /* Reduce timeout */
+ rspamd_monitored_stop (m);
+ m->monitoring_mult /= 2.0;
+ rspamd_monitored_start (m);
}
else {
msg_info_mon ("%s on resolving %s, disable object",
error, m->url);
m->alive = FALSE;
m->offline_time = rspamd_get_calendar_ticks ();
+ rspamd_monitored_stop (m);
+ m->monitoring_mult = 1.0;
+ rspamd_monitored_start (m);
if (m->ctx->change_cb) {
m->ctx->change_cb (m->ctx, m, FALSE, m->ctx->ud);
}
}
}
+ else {
+ if (m->monitoring_mult < 8.0) {
+ /* Increase timeout */
+ rspamd_monitored_stop (m);
+ m->monitoring_mult *= 2.0;
+ rspamd_monitored_start (m);
+ }
+ else {
+ rspamd_monitored_stop (m);
+ m->monitoring_mult = 8.0;
+ rspamd_monitored_start (m);
+ }
+ }
}
static inline void
@@ -122,6 +143,9 @@ rspamd_monitored_propagate_success (struct rspamd_monitored *m, gdouble lat)
m->offline_time = 0;
m->nchecks = 1;
m->latency = lat;
+ rspamd_monitored_stop (m);
+ m->monitoring_mult = 1.0;
+ rspamd_monitored_start (m);
if (m->ctx->change_cb) {
m->ctx->change_cb (m->ctx, m, TRUE, m->ctx->ud);
@@ -139,15 +163,19 @@ rspamd_monitored_periodic (gint fd, short what, gpointer ud)
struct rspamd_monitored *m = ud;
struct timeval tv;
gdouble jittered;
+ gboolean ret = FALSE;
- jittered = rspamd_time_jitter (m->monitoring_interval, 0.0);
+ jittered = rspamd_time_jitter (m->monitoring_interval * m->monitoring_mult,
+ 0.0);
double_to_tv (jittered, &tv);
if (m->proc.monitored_update) {
- m->proc.monitored_update (m, m->ctx, m->proc.ud);
+ ret = m->proc.monitored_update (m, m->ctx, m->proc.ud);
}
- event_add (&m->periodic, &tv);
+ if (ret) {
+ event_add (&m->periodic, &tv);
+ }
}
struct rspamd_dns_monitored_conf {
@@ -310,7 +338,7 @@ rspamd_monitored_dns_cb (struct rdns_reply *reply, void *arg)
}
}
-void
+static gboolean
rspamd_monitored_dns_mon (struct rspamd_monitored *m,
struct rspamd_monitored_ctx *ctx, gpointer ud)
{
@@ -323,10 +351,14 @@ rspamd_monitored_dns_mon (struct rspamd_monitored *m,
m->cur_errors ++;
rspamd_monitored_propagate_error (m, "failed to make DNS request");
+
+ return FALSE;
}
else {
conf->check_tm = rspamd_get_calendar_ticks ();
}
+
+ return TRUE;
}
void
@@ -381,7 +413,9 @@ rspamd_monitored_ctx_config (struct rspamd_monitored_ctx *ctx,
/* Start all events */
for (i = 0; i < ctx->elts->len; i ++) {
m = g_ptr_array_index (ctx->elts, i);
+ m->monitoring_mult = 0;
rspamd_monitored_start (m);
+ m->monitoring_mult = 1.0;
}
}
@@ -414,6 +448,7 @@ rspamd_monitored_create_ (struct rspamd_monitored_ctx *ctx,
m->url = g_strdup (line);
m->ctx = ctx;
m->monitoring_interval = ctx->monitoring_interval;
+ m->monitoring_mult = 1.0;
m->max_errors = ctx->max_errors;
m->alive = TRUE;
@@ -521,7 +556,6 @@ rspamd_monitored_stop (struct rspamd_monitored *m)
{
g_assert (m != NULL);
- m->alive = FALSE;
if (event_get_base (&m->periodic)) {
event_del (&m->periodic);
}
@@ -535,7 +569,8 @@ rspamd_monitored_start (struct rspamd_monitored *m)
g_assert (m != NULL);
msg_debug_mon ("started monitored object %s", m->url);
- jittered = rspamd_time_jitter (m->monitoring_interval, 0.0);
+ jittered = rspamd_time_jitter (m->monitoring_interval * m->monitoring_mult,
+ 0.0);
double_to_tv (jittered, &tv);
if (event_get_base (&m->periodic)) {