You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

roll_history.c 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "lua/lua_common.h"
  19. #include "unix-std.h"
  20. #include "cfg_file_private.h"
  21. static const gchar rspamd_history_magic_old[] = {'r', 's', 'h', '1'};
  22. /**
  23. * Returns new roll history
  24. * @param pool pool for shared memory
  25. * @return new structure
  26. */
  27. struct roll_history *
  28. rspamd_roll_history_new (rspamd_mempool_t *pool, guint max_rows,
  29. struct rspamd_config *cfg)
  30. {
  31. struct roll_history *history;
  32. lua_State *L = cfg->lua_state;
  33. if (pool == NULL || max_rows == 0) {
  34. return NULL;
  35. }
  36. history = rspamd_mempool_alloc0_shared (pool, sizeof (struct roll_history));
  37. /*
  38. * Here, we check if there is any plugin that handles history,
  39. * in this case, we disable this code completely
  40. */
  41. lua_getglobal (L, "rspamd_plugins");
  42. if (lua_istable (L, -1)) {
  43. lua_pushstring (L, "history");
  44. lua_gettable (L, -2);
  45. if (lua_istable (L, -1)) {
  46. history->disabled = TRUE;
  47. }
  48. lua_pop (L, 1);
  49. }
  50. lua_pop (L, 1);
  51. if (!history->disabled) {
  52. history->rows = rspamd_mempool_alloc0_shared (pool,
  53. sizeof (struct roll_history_row) * max_rows);
  54. history->nrows = max_rows;
  55. }
  56. return history;
  57. }
  58. struct history_metric_callback_data {
  59. gchar *pos;
  60. gint remain;
  61. };
  62. static void
  63. roll_history_symbols_callback (gpointer key, gpointer value, void *user_data)
  64. {
  65. struct history_metric_callback_data *cb = user_data;
  66. struct rspamd_symbol_result *s = value;
  67. guint wr;
  68. if (s->flags & RSPAMD_SYMBOL_RESULT_IGNORED) {
  69. return;
  70. }
  71. if (cb->remain > 0) {
  72. wr = rspamd_snprintf (cb->pos, cb->remain, "%s, ", s->name);
  73. cb->pos += wr;
  74. cb->remain -= wr;
  75. }
  76. }
  77. /**
  78. * Update roll history with data from task
  79. * @param history roll history object
  80. * @param task task object
  81. */
  82. void
  83. rspamd_roll_history_update (struct roll_history *history,
  84. struct rspamd_task *task)
  85. {
  86. guint row_num;
  87. struct roll_history_row *row;
  88. struct rspamd_metric_result *metric_res;
  89. struct history_metric_callback_data cbdata;
  90. struct rspamd_action *action;
  91. if (history->disabled) {
  92. return;
  93. }
  94. /* First of all obtain check and obtain row number */
  95. g_atomic_int_compare_and_exchange (&history->cur_row, history->nrows, 0);
  96. #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
  97. row_num = g_atomic_int_add (&history->cur_row, 1);
  98. #else
  99. row_num = g_atomic_int_exchange_and_add (&history->cur_row, 1);
  100. #endif
  101. if (row_num < history->nrows) {
  102. row = &history->rows[row_num];
  103. g_atomic_int_set (&row->completed, FALSE);
  104. }
  105. else {
  106. /* Race condition */
  107. history->cur_row = 0;
  108. return;
  109. }
  110. /* Add information from task to roll history */
  111. if (task->from_addr) {
  112. rspamd_strlcpy (row->from_addr,
  113. rspamd_inet_address_to_string (task->from_addr),
  114. sizeof (row->from_addr));
  115. }
  116. else {
  117. rspamd_strlcpy (row->from_addr, "unknown", sizeof (row->from_addr));
  118. }
  119. memcpy (&row->tv, &task->tv, sizeof (row->tv));
  120. /* Strings */
  121. rspamd_strlcpy (row->message_id, task->message_id,
  122. sizeof (row->message_id));
  123. if (task->user) {
  124. rspamd_strlcpy (row->user, task->user, sizeof (row->user));
  125. }
  126. else {
  127. row->user[0] = '\0';
  128. }
  129. /* Get default metric */
  130. metric_res = task->result;
  131. if (metric_res == NULL) {
  132. row->symbols[0] = '\0';
  133. row->action = METRIC_ACTION_NOACTION;
  134. }
  135. else {
  136. row->score = metric_res->score;
  137. action = rspamd_check_action_metric (task);
  138. row->action = action->action_type;
  139. row->required_score = rspamd_task_get_required_score (task, metric_res);
  140. cbdata.pos = row->symbols;
  141. cbdata.remain = sizeof (row->symbols);
  142. rspamd_task_symbol_result_foreach (task,
  143. roll_history_symbols_callback,
  144. &cbdata);
  145. if (cbdata.remain > 0) {
  146. /* Remove last whitespace and comma */
  147. *cbdata.pos-- = '\0';
  148. *cbdata.pos-- = '\0';
  149. *cbdata.pos = '\0';
  150. }
  151. }
  152. row->scan_time = task->time_real_finish - task->time_real;
  153. row->len = task->msg.len;
  154. g_atomic_int_set (&row->completed, TRUE);
  155. }
  156. /**
  157. * Load previously saved history from file
  158. * @param history roll history object
  159. * @param filename filename to load from
  160. * @return TRUE if history has been loaded
  161. */
  162. gboolean
  163. rspamd_roll_history_load (struct roll_history *history, const gchar *filename)
  164. {
  165. gint fd;
  166. struct stat st;
  167. gchar magic[sizeof(rspamd_history_magic_old)];
  168. ucl_object_t *top;
  169. const ucl_object_t *cur, *elt;
  170. struct ucl_parser *parser;
  171. struct roll_history_row *row;
  172. guint n, i;
  173. g_assert (history != NULL);
  174. if (history->disabled) {
  175. return TRUE;
  176. }
  177. if (stat (filename, &st) == -1) {
  178. msg_info ("cannot load history from %s: %s", filename,
  179. strerror (errno));
  180. return FALSE;
  181. }
  182. if ((fd = open (filename, O_RDONLY)) == -1) {
  183. msg_info ("cannot load history from %s: %s", filename,
  184. strerror (errno));
  185. return FALSE;
  186. }
  187. /* Check for old format */
  188. if (read (fd, magic, sizeof (magic)) == -1) {
  189. close (fd);
  190. msg_info ("cannot read history from %s: %s", filename,
  191. strerror (errno));
  192. return FALSE;
  193. }
  194. if (memcmp (magic, rspamd_history_magic_old, sizeof (magic)) == 0) {
  195. close (fd);
  196. msg_warn ("cannot read history from old format %s, "
  197. "it will be replaced after restart", filename);
  198. return FALSE;
  199. }
  200. parser = ucl_parser_new (0);
  201. if (!ucl_parser_add_fd (parser, fd)) {
  202. msg_warn ("cannot parse history file %s: %s", filename,
  203. ucl_parser_get_error (parser));
  204. ucl_parser_free (parser);
  205. close (fd);
  206. return FALSE;
  207. }
  208. top = ucl_parser_get_object (parser);
  209. ucl_parser_free (parser);
  210. close (fd);
  211. if (top == NULL) {
  212. msg_warn ("cannot parse history file %s: no object", filename);
  213. return FALSE;
  214. }
  215. if (ucl_object_type (top) != UCL_ARRAY) {
  216. msg_warn ("invalid object type read from: %s", filename);
  217. ucl_object_unref (top);
  218. return FALSE;
  219. }
  220. if (top->len > history->nrows) {
  221. msg_warn ("stored history is larger than the current one: %ud (file) vs "
  222. "%ud (history)", top->len, history->nrows);
  223. n = history->nrows;
  224. }
  225. else if (top->len < history->nrows) {
  226. msg_warn (
  227. "stored history is smaller than the current one: %ud (file) vs "
  228. "%ud (history)",
  229. top->len, history->nrows);
  230. n = top->len;
  231. }
  232. else {
  233. n = top->len;
  234. }
  235. for (i = 0; i < n; i ++) {
  236. cur = ucl_array_find_index (top, i);
  237. if (cur != NULL && ucl_object_type (cur) == UCL_OBJECT) {
  238. row = &history->rows[i];
  239. memset (row, 0, sizeof (*row));
  240. elt = ucl_object_lookup (cur, "time");
  241. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  242. double_to_tv (ucl_object_todouble (elt), &row->tv);
  243. }
  244. elt = ucl_object_lookup (cur, "id");
  245. if (elt && ucl_object_type (elt) == UCL_STRING) {
  246. rspamd_strlcpy (row->message_id, ucl_object_tostring (elt),
  247. sizeof (row->message_id));
  248. }
  249. elt = ucl_object_lookup (cur, "symbols");
  250. if (elt && ucl_object_type (elt) == UCL_STRING) {
  251. rspamd_strlcpy (row->symbols, ucl_object_tostring (elt),
  252. sizeof (row->symbols));
  253. }
  254. elt = ucl_object_lookup (cur, "user");
  255. if (elt && ucl_object_type (elt) == UCL_STRING) {
  256. rspamd_strlcpy (row->user, ucl_object_tostring (elt),
  257. sizeof (row->user));
  258. }
  259. elt = ucl_object_lookup (cur, "from");
  260. if (elt && ucl_object_type (elt) == UCL_STRING) {
  261. rspamd_strlcpy (row->from_addr, ucl_object_tostring (elt),
  262. sizeof (row->from_addr));
  263. }
  264. elt = ucl_object_lookup (cur, "len");
  265. if (elt && ucl_object_type (elt) == UCL_INT) {
  266. row->len = ucl_object_toint (elt);
  267. }
  268. elt = ucl_object_lookup (cur, "scan_time");
  269. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  270. row->scan_time = ucl_object_todouble (elt);
  271. }
  272. elt = ucl_object_lookup (cur, "score");
  273. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  274. row->score = ucl_object_todouble (elt);
  275. }
  276. elt = ucl_object_lookup (cur, "required_score");
  277. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  278. row->required_score = ucl_object_todouble (elt);
  279. }
  280. elt = ucl_object_lookup (cur, "action");
  281. if (elt && ucl_object_type (elt) == UCL_INT) {
  282. row->action = ucl_object_toint (elt);
  283. }
  284. row->completed = TRUE;
  285. }
  286. }
  287. ucl_object_unref (top);
  288. history->cur_row = n;
  289. return TRUE;
  290. }
  291. /**
  292. * Save history to file
  293. * @param history roll history object
  294. * @param filename filename to load from
  295. * @return TRUE if history has been saved
  296. */
  297. gboolean
  298. rspamd_roll_history_save (struct roll_history *history, const gchar *filename)
  299. {
  300. gint fd;
  301. ucl_object_t *obj, *elt;
  302. guint i;
  303. struct roll_history_row *row;
  304. struct ucl_emitter_functions *emitter_func;
  305. g_assert (history != NULL);
  306. if (history->disabled) {
  307. return TRUE;
  308. }
  309. if ((fd = open (filename, O_WRONLY | O_CREAT | O_TRUNC, 00600)) == -1) {
  310. msg_info ("cannot save history to %s: %s", filename, strerror (errno));
  311. return FALSE;
  312. }
  313. obj = ucl_object_typed_new (UCL_ARRAY);
  314. for (i = 0; i < history->nrows; i ++) {
  315. row = &history->rows[i];
  316. if (!row->completed) {
  317. continue;
  318. }
  319. elt = ucl_object_typed_new (UCL_OBJECT);
  320. ucl_object_insert_key (elt, ucl_object_fromdouble (
  321. tv_to_double (&row->tv)), "time", 0, false);
  322. ucl_object_insert_key (elt, ucl_object_fromstring (row->message_id),
  323. "id", 0, false);
  324. ucl_object_insert_key (elt, ucl_object_fromstring (row->symbols),
  325. "symbols", 0, false);
  326. ucl_object_insert_key (elt, ucl_object_fromstring (row->user),
  327. "user", 0, false);
  328. ucl_object_insert_key (elt, ucl_object_fromstring (row->from_addr),
  329. "from", 0, false);
  330. ucl_object_insert_key (elt, ucl_object_fromint (row->len),
  331. "len", 0, false);
  332. ucl_object_insert_key (elt, ucl_object_fromdouble (row->scan_time),
  333. "scan_time", 0, false);
  334. ucl_object_insert_key (elt, ucl_object_fromdouble (row->score),
  335. "score", 0, false);
  336. ucl_object_insert_key (elt, ucl_object_fromdouble (row->required_score),
  337. "required_score", 0, false);
  338. ucl_object_insert_key (elt, ucl_object_fromint (row->action),
  339. "action", 0, false);
  340. ucl_array_append (obj, elt);
  341. }
  342. emitter_func = ucl_object_emit_fd_funcs (fd);
  343. ucl_object_emit_full (obj, UCL_EMIT_JSON_COMPACT, emitter_func, NULL);
  344. ucl_object_emit_funcs_free (emitter_func);
  345. ucl_object_unref (obj);
  346. close (fd);
  347. return TRUE;
  348. }