From: Vsevolod Stakhov Date: Mon, 18 Jun 2018 16:51:25 +0000 (+0100) Subject: [Feature] Main process crash will now cleanup all children X-Git-Tag: 1.7.7~87 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=580240b1eaa7cefe3b75db2f3a9d3ced41c13ed5;p=rspamd.git [Feature] Main process crash will now cleanup all children --- diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c index a1fa4048c..e706e95f4 100644 --- a/src/libserver/worker_util.c +++ b/src/libserver/worker_util.c @@ -983,20 +983,45 @@ rspamd_print_crash (ucontext_t *uap) } #endif +static struct rspamd_main *saved_main = NULL; +static gboolean +rspamd_crash_propagate (gpointer key, gpointer value, gpointer unused) +{ + struct rspamd_worker *w = value; + + /* Kill children softly */ + kill (w->pid, SIGTERM); + + return TRUE; +} + static void rspamd_crash_sig_handler (int sig, siginfo_t *info, void *ctx) { struct sigaction sa; ucontext_t *uap = ctx; + pid_t pid; + pid = getpid (); msg_err ("caught fatal signal %d(%s), " "pid: %P, trace: ", - sig, strsignal (sig), getpid ()); + sig, strsignal (sig), pid); (void)uap; #ifdef WITH_LIBUNWIND rspamd_print_crash (uap); #endif + if (saved_main) { + if (pid == saved_main->pid) { + /* + * Main process has crashed, propagate crash further to trigger + * monitoring alerts and mass panic + */ + g_hash_table_foreach_remove (saved_main->workers, + rspamd_crash_propagate, NULL); + } + } + /* * Invoke signal with the default handler */ @@ -1004,12 +1029,12 @@ rspamd_crash_sig_handler (int sig, siginfo_t *info, void *ctx) sa.sa_handler = SIG_DFL; sa.sa_flags = 0; sigaction (sig, &sa, NULL); - kill (getpid (), sig); + kill (pid, sig); } #endif void -rspamd_set_crash_handler (struct rspamd_main *main) +rspamd_set_crash_handler (struct rspamd_main *rspamd_main) { #ifdef HAVE_SA_SIGINFO struct sigaction sa; @@ -1022,14 +1047,14 @@ rspamd_set_crash_handler (struct rspamd_main *main) ss.ss_sp = g_malloc0 (ss.ss_size); sigaltstack (&ss, NULL); #endif - - sigemptyset(&sa.sa_mask); + saved_main = rspamd_main; + sigemptyset (&sa.sa_mask); sa.sa_sigaction = &rspamd_crash_sig_handler; sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK; - sigaction(SIGSEGV, &sa, NULL); - sigaction(SIGBUS, &sa, NULL); - sigaction(SIGABRT, &sa, NULL); - sigaction(SIGFPE, &sa, NULL); - sigaction(SIGSYS, &sa, NULL); + sigaction (SIGSEGV, &sa, NULL); + sigaction (SIGBUS, &sa, NULL); + sigaction (SIGABRT, &sa, NULL); + sigaction (SIGFPE, &sa, NULL); + sigaction (SIGSYS, &sa, NULL); #endif } \ No newline at end of file diff --git a/src/rspamd.c b/src/rspamd.c index 7c5450521..b5454f38d 100644 --- a/src/rspamd.c +++ b/src/rspamd.c @@ -1182,7 +1182,7 @@ main (gint argc, gchar **argv, gchar **env) struct event term_ev, int_ev, cld_ev, hup_ev, usr1_ev, control_ev; struct timeval term_tv; struct rspamd_main *rspamd_main; - gboolean skip_pid = FALSE; + gboolean skip_pid = FALSE, valgrind_mode = FALSE; #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) g_thread_init (NULL); @@ -1198,6 +1198,10 @@ main (gint argc, gchar **argv, gchar **env) rspamd_spair_equal, g_free, rspamd_spair_close); rspamd_main->start_mtx = rspamd_mempool_get_mutex (rspamd_main->server_pool); + if (getenv ("VALGRIND") != NULL) { + valgrind_mode = TRUE; + } + #ifndef HAVE_SETPROCTITLE init_title (rspamd_main, argc, argv, env); #endif @@ -1242,7 +1246,6 @@ main (gint argc, gchar **argv, gchar **env) } type = g_quark_from_static_string ("main"); - rspamd_set_crash_handler (rspamd_main); /* First set logger to console logger */ rspamd_main->cfg->log_type = RSPAMD_LOG_CONSOLE; @@ -1362,6 +1365,10 @@ main (gint argc, gchar **argv, gchar **env) rspamd_main->pid = getpid (); rspamd_main->type = type; + if (!valgrind_mode) { + rspamd_set_crash_handler (rspamd_main); + } + /* Ignore SIGPIPE as we handle write errors manually */ sigemptyset (&sigpipe_act.sa_mask); sigaddset (&sigpipe_act.sa_mask, SIGPIPE); @@ -1476,7 +1483,7 @@ main (gint argc, gchar **argv, gchar **env) close (control_fd); } - if (getenv ("VALGRIND") != NULL) { + if (valgrind_mode) { /* Special case if we are likely running with valgrind */ term_attempts = TERMINATION_ATTEMPTS * 10; }