From 5fd8d7b24c42766f6ca86c50fe214283ec43fbb8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Fri, 26 Aug 2016 16:34:29 +0100 Subject: [PATCH] [Feature] Add latency and offline time monitoring --- src/libserver/monitored.c | 77 +++++++++++++++++++++++++++++++++------ src/libserver/monitored.h | 21 +++++++++++ 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/src/libserver/monitored.c b/src/libserver/monitored.c index eabfad538..d41ce652e 100644 --- a/src/libserver/monitored.c +++ b/src/libserver/monitored.c @@ -49,7 +49,10 @@ struct rspamd_monitored_ctx { struct rspamd_monitored { gchar *url; gdouble monitoring_interval; - gdouble dead_time; + gdouble offline_time; + gdouble total_offline_time; + gdouble latency; + guint nchecks; guint max_errors; guint cur_errors; gboolean alive; @@ -92,19 +95,32 @@ rspamd_monitored_propagate_error (struct rspamd_monitored *m, msg_info_mon ("%s on resolving %s, disable object", error, m->url); m->alive = FALSE; - m->dead_time = rspamd_get_calendar_ticks (); + m->offline_time = rspamd_get_calendar_ticks (); } } } static inline void -rspamd_monitored_propagate_success (struct rspamd_monitored *m) +rspamd_monitored_propagate_success (struct rspamd_monitored *m, gdouble lat) { + gdouble t; + + m->cur_errors = 0; + if (!m->alive) { - m->cur_errors = 0; + t = rspamd_get_calendar_ticks (); + m->total_offline_time += t - m->offline_time; m->alive = TRUE; - msg_info_mon ("restoring %s after %.1f seconds of downtime", - m->url, rspamd_get_calendar_ticks () - m->dead_time); + msg_info_mon ("restoring %s after %.1f seconds of downtime, " + "total downtime: %.1f", + m->url, t - m->offline_time, m->total_offline_time); + m->offline_time = 0; + m->nchecks = 1; + m->latency = lat; + } + else { + m->latency = (lat + m->latency * m->nchecks) / (m->nchecks + 1); + m->nchecks ++; } } @@ -131,6 +147,7 @@ struct rspamd_dns_monitored_conf { radix_compressed_t *expected; struct rspamd_monitored *m; gint expected_code; + gdouble check_tm; }; static void * @@ -212,9 +229,12 @@ rspamd_monitored_dns_cb (struct rdns_reply *reply, void *arg) { struct rspamd_dns_monitored_conf *conf = arg; struct rspamd_monitored *m; + gdouble lat; m = conf->m; - msg_debug_mon ("dns callback for %s: %s", m->url, + lat = rspamd_get_calendar_ticks () - conf->check_tm; + conf->check_tm = 0; + msg_debug_mon ("dns callback for %s in %.2f: %s", m->url, lat, rdns_strerror (reply->code)); if (reply->code == RDNS_RC_TIMEOUT) { @@ -259,13 +279,13 @@ rspamd_monitored_dns_cb (struct rdns_reply *reply, void *arg) rspamd_inet_address_destroy (addr); } else { - rspamd_monitored_propagate_success (m); + rspamd_monitored_propagate_success (m, lat); rspamd_inet_address_destroy (addr); } } } else { - rspamd_monitored_propagate_success (m); + rspamd_monitored_propagate_success (m, lat); } } } @@ -282,10 +302,10 @@ rspamd_monitored_dns_mon (struct rspamd_monitored *m, msg_info_mon ("cannot make request to resolve %s", conf->request->str); m->cur_errors ++; + rspamd_monitored_propagate_error (m, "failed to make DNS request"); } - - if (m->cur_errors > m->max_errors) { - m->alive = FALSE; + else { + conf->check_tm = rspamd_get_calendar_ticks (); } } @@ -408,6 +428,39 @@ rspamd_monitored_alive (struct rspamd_monitored *m) return m->alive; } +gdouble +rspamd_monitored_offline_time (struct rspamd_monitored *m) +{ + g_assert (m != NULL); + + if (m->offline_time > 0) { + return rspamd_get_calendar_ticks () - m->offline_time; + } + + return 0; +} + +gdouble +rspamd_monitored_total_offline_time (struct rspamd_monitored *m) +{ + g_assert (m != NULL); + + if (m->offline_time > 0) { + return rspamd_get_calendar_ticks () - m->offline_time + m->total_offline_time; + } + + + return m->total_offline_time; +} + +gdouble +rspamd_monitored_latency (struct rspamd_monitored *m) +{ + g_assert (m != NULL); + + return m->latency; +} + void rspamd_monitored_stop (struct rspamd_monitored *m) { diff --git a/src/libserver/monitored.h b/src/libserver/monitored.h index 399fb2ea6..4c55a5646 100644 --- a/src/libserver/monitored.h +++ b/src/libserver/monitored.h @@ -72,6 +72,27 @@ struct rspamd_monitored *rspamd_monitored_create ( */ gboolean rspamd_monitored_alive (struct rspamd_monitored *m); +/** + * Returns the current offline time for a monitored object + * @param m + * @return + */ +gdouble rspamd_monitored_offline_time (struct rspamd_monitored *m); + +/** + * Returns the total offline time for a monitored object + * @param m + * @return + */ +gdouble rspamd_monitored_total_offline_time (struct rspamd_monitored *m); + +/** + * Returns the latency for monitored object (in seconds) + * @param m + * @return + */ +gdouble rspamd_monitored_latency (struct rspamd_monitored *m); + /** * Explicitly disable monitored object * @param m -- 2.39.5