You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

worker.c 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. /*
  2. * Copyright (c) 2009, Rambler media
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
  14. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
  17. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  18. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  19. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  20. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  21. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. */
  24. /*
  25. * Rspamd worker implementation
  26. */
  27. #include "config.h"
  28. #include "util.h"
  29. #include "main.h"
  30. #include "protocol.h"
  31. #include "upstream.h"
  32. #include "cfg_file.h"
  33. #include "url.h"
  34. #include "modules.h"
  35. #include "message.h"
  36. #include "map.h"
  37. #include <evdns.h>
  38. #ifndef WITHOUT_PERL
  39. # include <EXTERN.h> /* from the Perl distribution */
  40. # include <perl.h> /* from the Perl distribution */
  41. extern PerlInterpreter *perl_interpreter;
  42. #endif
  43. #ifdef WITH_GPERF_TOOLS
  44. # include <glib/gprintf.h>
  45. #endif
  46. #ifndef BUILD_STATIC
  47. #define MODULE_INIT_FUNC "module_init"
  48. #define MODULE_FINIT_FUNC "module_fin"
  49. #define MODULE_BEFORE_CONNECT_FUNC "before_connect"
  50. #define MODULE_AFTER_CONNECT_FUNC "after_connect"
  51. #define MODULE_PARSE_LINE_FUNC "parse_line"
  52. struct custom_filter {
  53. char *filename; /*< filename */
  54. GModule *handle; /*< returned by dlopen */
  55. void (*init_func)(struct config_file *cfg); /*< called at start of worker */
  56. void* (*before_connect)(void); /*< called when clients connects */
  57. gboolean (*process_line)(const char *line, size_t len, char **output, void *user_data); /*< called when client send data line */
  58. void (*after_connect)(char **output, char **log_line, void *user_data); /*< called when client disconnects */
  59. void (*fin_func)(void);
  60. };
  61. #endif
  62. static struct timeval io_tv;
  63. /* Detect whether this worker is mime worker */
  64. static gboolean is_mime;
  65. /* Detect whether this worker bypass normal filters and is using custom filters */
  66. static gboolean is_custom;
  67. static GList *custom_filters;
  68. static gboolean write_socket (void *arg);
  69. static sig_atomic_t wanna_die = 0;
  70. #ifndef HAVE_SA_SIGINFO
  71. static void
  72. sig_handler (int signo)
  73. #else
  74. static void
  75. sig_handler (int signo, siginfo_t *info, void *unused)
  76. #endif
  77. {
  78. struct timeval tv;
  79. switch (signo) {
  80. case SIGINT:
  81. case SIGTERM:
  82. if (!wanna_die) {
  83. wanna_die = 1;
  84. tv.tv_sec = 0;
  85. tv.tv_usec = 0;
  86. event_loopexit (&tv);
  87. #ifdef WITH_GPERF_TOOLS
  88. ProfilerStop ();
  89. #endif
  90. }
  91. break;
  92. }
  93. }
  94. /*
  95. * Config reload is designed by sending sigusr to active workers and pending shutdown of them
  96. */
  97. static void
  98. sigusr_handler (int fd, short what, void *arg)
  99. {
  100. struct rspamd_worker *worker = (struct rspamd_worker *)arg;
  101. /* Do not accept new connections, preparing to end worker's process */
  102. struct timeval tv;
  103. if (! wanna_die) {
  104. tv.tv_sec = SOFT_SHUTDOWN_TIME;
  105. tv.tv_usec = 0;
  106. event_del (&worker->sig_ev);
  107. event_del (&worker->bind_ev);
  108. do_reopen_log = 1;
  109. msg_info ("worker's shutdown is pending in %d sec", SOFT_SHUTDOWN_TIME);
  110. event_loopexit (&tv);
  111. }
  112. return;
  113. }
  114. /*
  115. * Destructor for recipients list
  116. */
  117. static void
  118. rcpt_destruct (void *pointer)
  119. {
  120. struct worker_task *task = (struct worker_task *)pointer;
  121. if (task->rcpt) {
  122. g_list_free (task->rcpt);
  123. }
  124. }
  125. #ifndef BUILD_STATIC
  126. static void
  127. fin_custom_filters (struct worker_task *task)
  128. {
  129. GList *cur, *curd;
  130. struct custom_filter *filt;
  131. char *output = NULL, *log = NULL;
  132. cur = custom_filters;
  133. curd = task->rcpt;
  134. while (cur) {
  135. filt = cur->data;
  136. if (filt->after_connect) {
  137. filt->after_connect (&output, &log, curd->data);
  138. if (output != NULL) {
  139. rspamd_dispatcher_write (task->dispatcher, output, strlen (output), FALSE, FALSE);
  140. g_free (output);
  141. }
  142. if (log != NULL) {
  143. msg_info ("%s", log);
  144. g_free (log);
  145. }
  146. if (curd->next) {
  147. curd = g_list_next (curd);
  148. }
  149. }
  150. cur = g_list_next (cur);
  151. }
  152. }
  153. static gboolean
  154. parse_line_custom (struct worker_task *task, f_str_t *in)
  155. {
  156. GList *cur, *curd;
  157. struct custom_filter *filt;
  158. char *output = NULL;
  159. gboolean res = TRUE;
  160. cur = custom_filters;
  161. curd = task->rcpt;
  162. while (cur) {
  163. filt = cur->data;
  164. if (filt->after_connect) {
  165. if (! filt->process_line (in->begin, in->len, &output, curd->data)) {
  166. res = FALSE;
  167. }
  168. if (output != NULL) {
  169. rspamd_dispatcher_write (task->dispatcher, output, strlen (output), FALSE, FALSE);
  170. g_free (output);
  171. }
  172. if (curd->next) {
  173. curd = g_list_next (curd);
  174. }
  175. }
  176. cur = g_list_next (cur);
  177. }
  178. return res;
  179. }
  180. #else
  181. /* Stubs */
  182. static void
  183. fin_custom_filters (struct worker_task *task)
  184. {
  185. }
  186. static gboolean
  187. parse_line_custom (struct worker_task *task, f_str_t *in)
  188. {
  189. return FALSE;
  190. }
  191. #endif
  192. /*
  193. * Free all structures of worker_task
  194. */
  195. void
  196. free_task (struct worker_task *task, gboolean is_soft)
  197. {
  198. GList *part;
  199. struct mime_part *p;
  200. if (task) {
  201. debug_task ("free pointer %p", task);
  202. while ((part = g_list_first (task->parts))) {
  203. task->parts = g_list_remove_link (task->parts, part);
  204. p = (struct mime_part *)part->data;
  205. g_byte_array_free (p->content, TRUE);
  206. g_list_free_1 (part);
  207. }
  208. if (task->text_parts) {
  209. g_list_free (task->text_parts);
  210. }
  211. if (task->urls) {
  212. g_list_free (task->urls);
  213. }
  214. if (task->messages) {
  215. g_list_free (task->messages);
  216. }
  217. memory_pool_delete (task->task_pool);
  218. if (task->dispatcher) {
  219. if (is_soft) {
  220. /* Plan dispatcher shutdown */
  221. task->dispatcher->wanna_die = 1;
  222. }
  223. else {
  224. rspamd_remove_dispatcher (task->dispatcher);
  225. }
  226. }
  227. if (task->sock != -1) {
  228. close (task->sock);
  229. }
  230. g_free (task);
  231. }
  232. }
  233. static void
  234. free_task_hard (void *ud)
  235. {
  236. struct worker_task *task = ud;
  237. free_task (task, FALSE);
  238. }
  239. /*
  240. * Callback that is called when there is data to read in buffer
  241. */
  242. static gboolean
  243. read_socket (f_str_t * in, void *arg)
  244. {
  245. struct worker_task *task = (struct worker_task *)arg;
  246. ssize_t r;
  247. switch (task->state) {
  248. case READ_COMMAND:
  249. case READ_HEADER:
  250. if (is_custom) {
  251. if (! parse_line_custom (task, in)) {
  252. task->last_error = "Read error";
  253. task->error_code = RSPAMD_NETWORK_ERROR;
  254. task->state = WRITE_ERROR;
  255. }
  256. }
  257. else {
  258. if (read_rspamd_input_line (task, in) != 0) {
  259. task->last_error = "Read error";
  260. task->error_code = RSPAMD_NETWORK_ERROR;
  261. task->state = WRITE_ERROR;
  262. }
  263. }
  264. if (task->state == WRITE_REPLY || task->state == WRITE_ERROR) {
  265. return write_socket (task);
  266. }
  267. break;
  268. case READ_MESSAGE:
  269. task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t));
  270. task->msg->begin = in->begin;
  271. task->msg->len = in->len;
  272. debug_task ("got string of length %ld", (long int)task->msg->len);
  273. r = process_message (task);
  274. if (r == -1) {
  275. msg_warn ("processing of message failed");
  276. task->last_error = "MIME processing error";
  277. task->error_code = RSPAMD_FILTER_ERROR;
  278. task->state = WRITE_ERROR;
  279. return write_socket (task);
  280. }
  281. if (task->cmd == CMD_OTHER) {
  282. /* Skip filters */
  283. task->state = WRITE_REPLY;
  284. return write_socket (task);
  285. }
  286. r = process_filters (task);
  287. if (r == -1) {
  288. task->last_error = "Filter processing error";
  289. task->error_code = RSPAMD_FILTER_ERROR;
  290. task->state = WRITE_ERROR;
  291. return write_socket (task);
  292. }
  293. else if (r == 0) {
  294. task->state = WAIT_FILTER;
  295. rspamd_dispatcher_pause (task->dispatcher);
  296. }
  297. else {
  298. process_statfiles (task);
  299. return write_socket (task);
  300. }
  301. break;
  302. default:
  303. debug_task ("invalid state on reading stage");
  304. break;
  305. }
  306. return TRUE;
  307. }
  308. /*
  309. * Callback for socket writing
  310. */
  311. static gboolean
  312. write_socket (void *arg)
  313. {
  314. struct worker_task *task = (struct worker_task *)arg;
  315. switch (task->state) {
  316. case WRITE_REPLY:
  317. write_reply (task);
  318. if (is_custom) {
  319. fin_custom_filters (task);
  320. }
  321. destroy_session (task->s);
  322. return FALSE;
  323. break;
  324. case WRITE_ERROR:
  325. write_reply (task);
  326. if (is_custom) {
  327. fin_custom_filters (task);
  328. }
  329. destroy_session (task->s);
  330. return FALSE;
  331. break;
  332. case CLOSING_CONNECTION:
  333. debug_task ("normally closing connection");
  334. if (is_custom) {
  335. fin_custom_filters (task);
  336. }
  337. destroy_session (task->s);
  338. return FALSE;
  339. break;
  340. default:
  341. msg_info ("abnormally closing connection");
  342. if (is_custom) {
  343. fin_custom_filters (task);
  344. }
  345. destroy_session (task->s);
  346. return FALSE;
  347. break;
  348. }
  349. return TRUE;
  350. }
  351. /*
  352. * Called if something goes wrong
  353. */
  354. static void
  355. err_socket (GError * err, void *arg)
  356. {
  357. struct worker_task *task = (struct worker_task *)arg;
  358. msg_info ("abnormally closing connection, error: %s", err->message);
  359. /* Free buffers */
  360. if (is_custom) {
  361. fin_custom_filters (task);
  362. }
  363. destroy_session (task->s);
  364. }
  365. struct worker_task *
  366. construct_task (struct rspamd_worker *worker)
  367. {
  368. struct worker_task *new_task;
  369. new_task = g_malloc (sizeof (struct worker_task));
  370. bzero (new_task, sizeof (struct worker_task));
  371. new_task->worker = worker;
  372. new_task->state = READ_COMMAND;
  373. new_task->cfg = worker->srv->cfg;
  374. new_task->from_addr.s_addr = INADDR_NONE;
  375. new_task->view_checked = FALSE;
  376. #ifdef HAVE_CLOCK_GETTIME
  377. # ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
  378. clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &new_task->ts);
  379. # elif defined(HAVE_CLOCK_VIRTUAL)
  380. clock_gettime (CLOCK_VIRTUAL, &new_task->ts);
  381. # else
  382. clock_gettime (CLOCK_REALTIME, &new_task->ts);
  383. # endif
  384. #else
  385. if (gettimeofday (&new_task->tv, NULL) == -1) {
  386. msg_warn ("gettimeofday failed: %s", strerror (errno));
  387. }
  388. #endif
  389. io_tv.tv_sec = WORKER_IO_TIMEOUT;
  390. io_tv.tv_usec = 0;
  391. new_task->task_pool = memory_pool_new (memory_pool_get_size ());
  392. /* Add destructor for recipients list (it would be better to use anonymous function here */
  393. memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func) rcpt_destruct, new_task);
  394. new_task->results = g_hash_table_new (g_str_hash, g_str_equal);
  395. memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func) g_hash_table_destroy, new_task->results);
  396. new_task->re_cache = g_hash_table_new (g_str_hash, g_str_equal);
  397. memory_pool_add_destructor (new_task->task_pool, (pool_destruct_func) g_hash_table_destroy, new_task->re_cache);
  398. new_task->s = new_async_session (new_task->task_pool, free_task_hard, new_task);
  399. new_task->sock = -1;
  400. new_task->is_mime = TRUE;
  401. return new_task;
  402. }
  403. /*
  404. * Accept new connection and construct task
  405. */
  406. static void
  407. accept_socket (int fd, short what, void *arg)
  408. {
  409. struct rspamd_worker *worker = (struct rspamd_worker *)arg;
  410. union sa_union su;
  411. struct worker_task *new_task;
  412. GList *cur;
  413. struct custom_filter *filt;
  414. socklen_t addrlen = sizeof (su.ss);
  415. int nfd;
  416. if ((nfd = accept_from_socket (fd, (struct sockaddr *)&su.ss, &addrlen)) == -1) {
  417. msg_warn ("accept failed: %s", strerror (errno));
  418. return;
  419. }
  420. /* Check for EAGAIN */
  421. if (nfd == 0) {
  422. return;
  423. }
  424. new_task = construct_task (worker);
  425. if (su.ss.ss_family == AF_UNIX) {
  426. msg_info ("accepted connection from unix socket");
  427. new_task->client_addr.s_addr = INADDR_NONE;
  428. }
  429. else if (su.ss.ss_family == AF_INET) {
  430. msg_info ("accepted connection from %s port %d", inet_ntoa (su.s4.sin_addr), ntohs (su.s4.sin_port));
  431. memcpy (&new_task->client_addr, &su.s4.sin_addr, sizeof (struct in_addr));
  432. }
  433. new_task->sock = nfd;
  434. new_task->is_mime = is_mime;
  435. worker->srv->stat->connections_count++;
  436. /* Set up dispatcher */
  437. new_task->dispatcher = rspamd_create_dispatcher (nfd, BUFFER_LINE, read_socket, write_socket, err_socket, &io_tv, (void *)new_task);
  438. new_task->dispatcher->peer_addr = new_task->client_addr.s_addr;
  439. /* Init custom filters */
  440. #ifndef BUILD_STATIC
  441. if (is_custom) {
  442. cur = custom_filters;
  443. while (cur) {
  444. filt = cur->data;
  445. if (filt->before_connect) {
  446. /* XXX: maybe not use rcpt list here for custom filters data, but this can save some bytes in task structure */
  447. new_task->rcpt = g_list_prepend (new_task->rcpt, filt->before_connect ());
  448. }
  449. cur = g_list_next (cur);
  450. }
  451. /* Keep user data in the same order as custom filters */
  452. new_task->rcpt = g_list_reverse (new_task->rcpt);
  453. }
  454. #endif
  455. }
  456. #ifndef BUILD_STATIC
  457. static gboolean
  458. load_custom_filter (struct config_file *cfg, const char *file)
  459. {
  460. struct custom_filter *filt;
  461. struct stat st;
  462. if (stat (file, &st) == -1 || !S_ISREG (st.st_mode)) {
  463. msg_info ("stat failed for %s", file);
  464. return FALSE;
  465. }
  466. filt = g_malloc (sizeof (struct custom_filter));
  467. filt->handle = g_module_open (file, G_MODULE_BIND_LAZY);
  468. if (!filt->handle) {
  469. msg_info ("module load failed: %s", g_module_error ());
  470. g_free (filt);
  471. return FALSE;
  472. }
  473. /* Now extract functions from custom module */
  474. if (!g_module_symbol (filt->handle, MODULE_INIT_FUNC, (gpointer *)&filt->init_func) ||
  475. !g_module_symbol (filt->handle, MODULE_FINIT_FUNC, (gpointer *)&filt->fin_func) ||
  476. !g_module_symbol (filt->handle, MODULE_BEFORE_CONNECT_FUNC, (gpointer *)&filt->before_connect) ||
  477. !g_module_symbol (filt->handle, MODULE_AFTER_CONNECT_FUNC, (gpointer *)&filt->after_connect) ||
  478. !g_module_symbol (filt->handle, MODULE_PARSE_LINE_FUNC, (gpointer *)&filt->process_line)) {
  479. msg_info ("cannot find handlers in module %s: %s", file, g_module_error ());
  480. g_free (filt);
  481. return FALSE;
  482. }
  483. filt->init_func (cfg);
  484. filt->filename = g_strdup (file);
  485. custom_filters = g_list_prepend (custom_filters, filt);
  486. return TRUE;
  487. }
  488. /*
  489. * Load custom filters from specified path
  490. */
  491. static gboolean
  492. load_custom_filters (struct rspamd_worker *worker, const char *path)
  493. {
  494. glob_t gp;
  495. int r, i;
  496. gp.gl_offs = 0;
  497. if ((r = glob (path, GLOB_NOSORT, NULL, &gp)) != 0) {
  498. msg_warn ("glob failed: %s, %d", strerror (errno), r);
  499. return FALSE;
  500. }
  501. for (i = 0; i < gp.gl_pathc; i ++) {
  502. if (! load_custom_filter (worker->srv->cfg, gp.gl_pathv[i])) {
  503. globfree (&gp);
  504. return FALSE;
  505. }
  506. }
  507. globfree (&gp);
  508. return TRUE;
  509. }
  510. static void
  511. unload_custom_filters (void)
  512. {
  513. GList *cur;
  514. struct custom_filter *filt;
  515. cur = custom_filters;
  516. while (cur) {
  517. filt = cur->data;
  518. if (filt->fin_func) {
  519. filt->fin_func ();
  520. }
  521. g_module_close (filt->handle);
  522. g_free (filt);
  523. cur = g_list_next (cur);
  524. }
  525. g_list_free (custom_filters);
  526. }
  527. #endif
  528. /*
  529. * Start worker process
  530. */
  531. void
  532. start_worker (struct rspamd_worker *worker)
  533. {
  534. struct sigaction signals;
  535. char *is_mime_str;
  536. char *is_custom_str;
  537. #ifdef WITH_PROFILER
  538. extern void _start (void), etext (void);
  539. monstartup ((u_long) & _start, (u_long) & etext);
  540. #endif
  541. gperf_profiler_init (worker->srv->cfg, "worker");
  542. worker->srv->pid = getpid ();
  543. event_init ();
  544. evdns_init ();
  545. init_signals (&signals, sig_handler);
  546. sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL);
  547. /* SIGUSR2 handler */
  548. signal_set (&worker->sig_ev, SIGUSR2, sigusr_handler, (void *)worker);
  549. signal_add (&worker->sig_ev, NULL);
  550. /* Accept event */
  551. event_set (&worker->bind_ev, worker->cf->listen_sock, EV_READ | EV_PERSIST, accept_socket, (void *)worker);
  552. event_add (&worker->bind_ev, NULL);
  553. #ifndef BUILD_STATIC
  554. /* Check if this worker is not usual rspamd worker, but uses custom filters from specified path */
  555. is_custom_str = g_hash_table_lookup (worker->cf->params, "custom_filters");
  556. if (is_custom_str && g_module_supported () && load_custom_filters (worker, is_custom_str)) {
  557. msg_info ("starting custom process, loaded modules from %s", is_custom_str);
  558. is_custom = TRUE;
  559. }
  560. else {
  561. #endif
  562. /* Maps events */
  563. start_map_watch ();
  564. /* Check whether we are mime worker */
  565. is_mime_str = g_hash_table_lookup (worker->cf->params, "mime");
  566. if (is_mime_str != NULL && (g_ascii_strcasecmp (is_mime_str, "no") == 0 || g_ascii_strcasecmp (is_mime_str, "false") == 0)) {
  567. is_mime = FALSE;
  568. }
  569. else {
  570. is_mime = TRUE;
  571. }
  572. #ifndef BUILD_STATIC
  573. }
  574. #endif
  575. event_loop (0);
  576. #ifndef BUILD_STATIC
  577. if (is_custom) {
  578. unload_custom_filters ();
  579. }
  580. #endif
  581. close_log ();
  582. exit (EXIT_SUCCESS);
  583. }
  584. /*
  585. * vi:ts=4
  586. */