You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hs_helper.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "libutil/util.h"
  18. #include "libserver/cfg_file.h"
  19. #include "libserver/cfg_rcl.h"
  20. #include "libserver/worker_util.h"
  21. #include "libserver/rspamd_control.h"
  22. #include "unix-std.h"
  23. #ifdef HAVE_GLOB_H
  24. #include <glob.h>
  25. #endif
  26. static gpointer init_hs_helper(struct rspamd_config *cfg);
  27. __attribute__((noreturn)) static void start_hs_helper(struct rspamd_worker *worker);
  28. worker_t hs_helper_worker = {
  29. "hs_helper", /* Name */
  30. init_hs_helper, /* Init function */
  31. start_hs_helper, /* Start function */
  32. RSPAMD_WORKER_UNIQUE | RSPAMD_WORKER_KILLABLE | RSPAMD_WORKER_ALWAYS_START | RSPAMD_WORKER_NO_TERMINATE_DELAY,
  33. RSPAMD_WORKER_SOCKET_NONE,
  34. RSPAMD_WORKER_VER /* Version info */
  35. };
  36. static const double default_max_time = 1.0;
  37. static const double default_recompile_time = 60.0;
  38. static const uint64_t rspamd_hs_helper_magic = 0x22d310157a2288a0ULL;
  39. /*
  40. * Worker's context
  41. */
  42. struct hs_helper_ctx {
  43. uint64_t magic;
  44. /* Events base */
  45. struct ev_loop *event_loop;
  46. /* DNS resolver */
  47. struct rspamd_dns_resolver *resolver;
  48. /* Config */
  49. struct rspamd_config *cfg;
  50. /* END OF COMMON PART */
  51. char *hs_dir;
  52. gboolean loaded;
  53. double max_time;
  54. double recompile_time;
  55. ev_timer recompile_timer;
  56. };
  57. static gpointer
  58. init_hs_helper(struct rspamd_config *cfg)
  59. {
  60. struct hs_helper_ctx *ctx;
  61. GQuark type;
  62. type = g_quark_try_string("hs_helper");
  63. ctx = rspamd_mempool_alloc0(cfg->cfg_pool, sizeof(*ctx));
  64. ctx->magic = rspamd_hs_helper_magic;
  65. ctx->cfg = cfg;
  66. ctx->hs_dir = NULL;
  67. ctx->max_time = default_max_time;
  68. ctx->recompile_time = default_recompile_time;
  69. rspamd_rcl_register_worker_option(cfg,
  70. type,
  71. "cache_dir",
  72. rspamd_rcl_parse_struct_string,
  73. ctx,
  74. G_STRUCT_OFFSET(struct hs_helper_ctx, hs_dir),
  75. 0,
  76. "Directory where to save hyperscan compiled expressions");
  77. rspamd_rcl_register_worker_option(cfg,
  78. type,
  79. "max_time",
  80. rspamd_rcl_parse_struct_time,
  81. ctx,
  82. G_STRUCT_OFFSET(struct hs_helper_ctx, max_time),
  83. RSPAMD_CL_FLAG_TIME_FLOAT,
  84. "Maximum time to wait for compilation of a single expression");
  85. rspamd_rcl_register_worker_option(cfg,
  86. type,
  87. "recompile",
  88. rspamd_rcl_parse_struct_time,
  89. ctx,
  90. G_STRUCT_OFFSET(struct hs_helper_ctx, recompile_time),
  91. RSPAMD_CL_FLAG_TIME_FLOAT,
  92. "Time between recompilation checks");
  93. rspamd_rcl_register_worker_option(cfg,
  94. type,
  95. "timeout",
  96. rspamd_rcl_parse_struct_time,
  97. ctx,
  98. G_STRUCT_OFFSET(struct hs_helper_ctx, max_time),
  99. RSPAMD_CL_FLAG_TIME_FLOAT,
  100. "Maximum time to wait for compilation of a single expression");
  101. return ctx;
  102. }
  103. /**
  104. * Clean
  105. */
  106. static gboolean
  107. rspamd_hs_helper_cleanup_dir(struct hs_helper_ctx *ctx, gboolean forced)
  108. {
  109. struct stat st;
  110. glob_t globbuf;
  111. unsigned int len, i;
  112. int rc;
  113. char *pattern;
  114. gboolean ret = TRUE;
  115. pid_t our_pid = getpid();
  116. if (getenv("RSPAMD_NO_CLEANUP")) {
  117. /* Skip all cleanup */
  118. return TRUE;
  119. }
  120. if (stat(ctx->hs_dir, &st) == -1) {
  121. msg_err("cannot stat path %s, %s",
  122. ctx->hs_dir,
  123. strerror(errno));
  124. return FALSE;
  125. }
  126. globbuf.gl_offs = 0;
  127. /*
  128. * We reuse this buffer for .new patterns as well, so allocate with some
  129. * margin
  130. */
  131. len = strlen(ctx->hs_dir) + 1 + sizeof("*.hs") + sizeof(G_DIR_SEPARATOR);
  132. pattern = g_malloc(len);
  133. rspamd_snprintf(pattern, len, "%s%c%s", ctx->hs_dir, G_DIR_SEPARATOR, "*.hs");
  134. if ((rc = glob(pattern, 0, NULL, &globbuf)) == 0) {
  135. for (i = 0; i < globbuf.gl_pathc; i++) {
  136. GError *err = NULL;
  137. if (forced) {
  138. g_set_error(&err, g_quark_from_static_string("re_cache"),
  139. 0, "forced removal");
  140. }
  141. if (forced ||
  142. !rspamd_re_cache_is_valid_hyperscan_file(ctx->cfg->re_cache,
  143. globbuf.gl_pathv[i], TRUE, TRUE, &err)) {
  144. if (unlink(globbuf.gl_pathv[i]) == -1) {
  145. msg_err("cannot unlink %s: %s; reason for expiration: %e", globbuf.gl_pathv[i],
  146. strerror(errno), err);
  147. ret = FALSE;
  148. }
  149. else {
  150. msg_notice("successfully removed outdated hyperscan file: %s; reason for expiration: %e",
  151. globbuf.gl_pathv[i], err);
  152. }
  153. }
  154. if (err) {
  155. g_error_free(err);
  156. }
  157. }
  158. }
  159. else if (rc != GLOB_NOMATCH) {
  160. msg_err("glob %s failed: %s", pattern, strerror(errno));
  161. ret = FALSE;
  162. }
  163. globfree(&globbuf);
  164. memset(&globbuf, 0, sizeof(globbuf));
  165. rspamd_snprintf(pattern, len, "%s%c%s", ctx->hs_dir, G_DIR_SEPARATOR, "*.hs.new");
  166. if ((rc = glob(pattern, 0, NULL, &globbuf)) == 0) {
  167. for (i = 0; i < globbuf.gl_pathc; i++) {
  168. /* Check if we have a pid in the filename */
  169. const char *end_num = globbuf.gl_pathv[i] +
  170. strlen(globbuf.gl_pathv[i]) - (sizeof(".hs.new") - 1);
  171. const char *p = end_num - 1;
  172. pid_t foreign_pid = -1;
  173. while (p > globbuf.gl_pathv[i]) {
  174. if (g_ascii_isdigit(*p)) {
  175. p--;
  176. }
  177. else {
  178. p++;
  179. break;
  180. }
  181. }
  182. gulong ul;
  183. if (p < end_num && rspamd_strtoul(p, end_num - p, &ul)) {
  184. foreign_pid = ul;
  185. }
  186. /*
  187. * Remove only files that was left by us or some non-existing process
  188. * There could be another race condition but it would just leave
  189. * extra files which is relatively innocent?
  190. */
  191. if (foreign_pid == -1 || foreign_pid == our_pid || kill(foreign_pid, 0) == -1) {
  192. if (unlink(globbuf.gl_pathv[i]) == -1) {
  193. msg_err("cannot unlink %s: %s", globbuf.gl_pathv[i],
  194. strerror(errno));
  195. ret = FALSE;
  196. }
  197. else {
  198. msg_notice("successfully removed outdated hyperscan temporary file: %s; "
  199. "pid of the file creator process: %P",
  200. globbuf.gl_pathv[i],
  201. foreign_pid);
  202. }
  203. }
  204. else {
  205. msg_notice("skip removal of the hyperscan temporary file: %s; "
  206. "pid of the file creator process: %P",
  207. globbuf.gl_pathv[i],
  208. foreign_pid);
  209. }
  210. }
  211. }
  212. else if (rc != GLOB_NOMATCH) {
  213. msg_err("glob %s failed: %s", pattern, strerror(errno));
  214. ret = FALSE;
  215. }
  216. globfree(&globbuf);
  217. g_free(pattern);
  218. return ret;
  219. }
  220. /* Bad hack, but who cares */
  221. static gboolean hack_global_forced;
  222. static void
  223. rspamd_rs_delayed_cb(EV_P_ ev_timer *w, int revents)
  224. {
  225. struct rspamd_worker *worker = (struct rspamd_worker *) w->data;
  226. static struct rspamd_srv_command srv_cmd;
  227. struct hs_helper_ctx *ctx;
  228. ctx = (struct hs_helper_ctx *) worker->ctx;
  229. memset(&srv_cmd, 0, sizeof(srv_cmd));
  230. srv_cmd.type = RSPAMD_SRV_HYPERSCAN_LOADED;
  231. rspamd_strlcpy(srv_cmd.cmd.hs_loaded.cache_dir, ctx->hs_dir,
  232. sizeof(srv_cmd.cmd.hs_loaded.cache_dir));
  233. srv_cmd.cmd.hs_loaded.forced = hack_global_forced;
  234. hack_global_forced = FALSE;
  235. rspamd_srv_send_command(worker,
  236. ctx->event_loop, &srv_cmd, -1, NULL, NULL);
  237. ev_timer_stop(EV_A_ w);
  238. g_free(w);
  239. ev_timer_again(EV_A_ & ctx->recompile_timer);
  240. }
  241. static void
  242. rspamd_rs_compile_cb(unsigned int ncompiled, GError *err, void *cbd)
  243. {
  244. struct rspamd_worker *worker = (struct rspamd_worker *) cbd;
  245. ev_timer *tm;
  246. ev_tstamp when = 0.0;
  247. struct hs_helper_ctx *ctx;
  248. ctx = (struct hs_helper_ctx *) worker->ctx;
  249. if (err != NULL) {
  250. /* Failed to compile: log and go out */
  251. msg_err("cannot compile Hyperscan database: %e", err);
  252. return;
  253. }
  254. if (ncompiled > 0) {
  255. /* Enforce update for other workers */
  256. hack_global_forced = TRUE;
  257. }
  258. /*
  259. * Do not send notification unless all other workers are started
  260. * XXX: now we just sleep for 1 seconds to ensure that
  261. */
  262. if (!ctx->loaded) {
  263. when = 1.0; /* Postpone */
  264. ctx->loaded = TRUE;
  265. msg_info("compiled %d regular expressions to the hyperscan tree, "
  266. "postpone loaded notification for %.0f seconds to avoid races",
  267. ncompiled,
  268. when);
  269. }
  270. else {
  271. msg_info("compiled %d regular expressions to the hyperscan tree, "
  272. "send loaded notification",
  273. ncompiled);
  274. }
  275. tm = g_malloc0(sizeof(*tm));
  276. tm->data = (void *) worker;
  277. ev_timer_init(tm, rspamd_rs_delayed_cb, when, 0);
  278. ev_timer_start(ctx->event_loop, tm);
  279. }
  280. static gboolean
  281. rspamd_rs_compile(struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
  282. gboolean forced)
  283. {
  284. #if !defined(__aarch64__) && !defined(__powerpc64__)
  285. if (!(ctx->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
  286. msg_warn("CPU doesn't have SSSE3 instructions set "
  287. "required for hyperscan, disable hyperscan compilation");
  288. return FALSE;
  289. }
  290. #endif
  291. if (!rspamd_hs_helper_cleanup_dir(ctx, forced)) {
  292. msg_warn("cannot cleanup cache dir '%s'", ctx->hs_dir);
  293. }
  294. hack_global_forced = forced; /* killmeplease */
  295. rspamd_re_cache_compile_hyperscan(ctx->cfg->re_cache,
  296. ctx->hs_dir, ctx->max_time, !forced,
  297. ctx->event_loop,
  298. rspamd_rs_compile_cb,
  299. (void *) worker);
  300. return TRUE;
  301. }
  302. static gboolean
  303. rspamd_hs_helper_reload(struct rspamd_main *rspamd_main,
  304. struct rspamd_worker *worker, int fd,
  305. int attached_fd,
  306. struct rspamd_control_command *cmd,
  307. gpointer ud)
  308. {
  309. struct rspamd_control_reply rep;
  310. struct hs_helper_ctx *ctx = ud;
  311. msg_info("recompiling hyperscan expressions after receiving reload command");
  312. memset(&rep, 0, sizeof(rep));
  313. rep.type = RSPAMD_CONTROL_RECOMPILE;
  314. rep.reply.recompile.status = 0;
  315. /* We write reply before actual recompilation as it takes a lot of time */
  316. if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
  317. msg_err("cannot write reply to the control socket: %s",
  318. strerror(errno));
  319. }
  320. /* Stop recompile */
  321. ev_timer_stop(ctx->event_loop, &ctx->recompile_timer);
  322. rspamd_rs_compile(ctx, worker, TRUE);
  323. return TRUE;
  324. }
  325. static void
  326. rspamd_hs_helper_timer(EV_P_ ev_timer *w, int revents)
  327. {
  328. struct rspamd_worker *worker = (struct rspamd_worker *) w->data;
  329. struct hs_helper_ctx *ctx;
  330. double tim;
  331. ctx = worker->ctx;
  332. tim = rspamd_time_jitter(ctx->recompile_time, 0);
  333. w->repeat = tim;
  334. rspamd_rs_compile(ctx, worker, FALSE);
  335. }
  336. static void
  337. start_hs_helper(struct rspamd_worker *worker)
  338. {
  339. struct hs_helper_ctx *ctx = worker->ctx;
  340. double tim;
  341. g_assert(rspamd_worker_check_context(worker->ctx, rspamd_hs_helper_magic));
  342. ctx->cfg = worker->srv->cfg;
  343. if (ctx->hs_dir == NULL) {
  344. ctx->hs_dir = ctx->cfg->hs_cache_dir;
  345. }
  346. if (ctx->hs_dir == NULL) {
  347. ctx->hs_dir = RSPAMD_DBDIR "/";
  348. }
  349. ctx->event_loop = rspamd_prepare_worker(worker,
  350. "hs_helper",
  351. NULL);
  352. if (!rspamd_rs_compile(ctx, worker, FALSE)) {
  353. /* Tell main not to respawn more workers */
  354. exit(EXIT_SUCCESS);
  355. }
  356. rspamd_control_worker_add_cmd_handler(worker, RSPAMD_CONTROL_RECOMPILE,
  357. rspamd_hs_helper_reload, ctx);
  358. ctx->recompile_timer.data = worker;
  359. tim = rspamd_time_jitter(ctx->recompile_time, 0);
  360. ev_timer_init(&ctx->recompile_timer, rspamd_hs_helper_timer, tim, 0.0);
  361. ev_timer_start(ctx->event_loop, &ctx->recompile_timer);
  362. ev_loop(ctx->event_loop, 0);
  363. rspamd_worker_block_signals();
  364. rspamd_log_close(worker->srv->logger);
  365. REF_RELEASE(ctx->cfg);
  366. rspamd_unset_crash_handler(worker->srv);
  367. exit(EXIT_SUCCESS);
  368. }