You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

roll_history.c 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "rspamd.h"
  18. #include "libmime/message.h"
  19. #include "lua/lua_common.h"
  20. #include "unix-std.h"
  21. #include "cfg_file_private.h"
  22. static const gchar rspamd_history_magic_old[] = {'r', 's', 'h', '1'};
  23. /**
  24. * Returns new roll history
  25. * @param pool pool for shared memory
  26. * @return new structure
  27. */
  28. struct roll_history *
  29. rspamd_roll_history_new (rspamd_mempool_t *pool, guint max_rows,
  30. struct rspamd_config *cfg)
  31. {
  32. struct roll_history *history;
  33. lua_State *L = cfg->lua_state;
  34. if (pool == NULL || max_rows == 0) {
  35. return NULL;
  36. }
  37. history = rspamd_mempool_alloc0_shared (pool, sizeof (struct roll_history));
  38. /*
  39. * Here, we check if there is any plugin that handles history,
  40. * in this case, we disable this code completely
  41. */
  42. lua_getglobal (L, "rspamd_plugins");
  43. if (lua_istable (L, -1)) {
  44. lua_pushstring (L, "history");
  45. lua_gettable (L, -2);
  46. if (lua_istable (L, -1)) {
  47. history->disabled = TRUE;
  48. }
  49. lua_pop (L, 1);
  50. }
  51. lua_pop (L, 1);
  52. if (!history->disabled) {
  53. history->rows = rspamd_mempool_alloc0_shared (pool,
  54. sizeof (struct roll_history_row) * max_rows);
  55. history->nrows = max_rows;
  56. }
  57. return history;
  58. }
  59. struct history_metric_callback_data {
  60. gchar *pos;
  61. gint remain;
  62. };
  63. static void
  64. roll_history_symbols_callback (gpointer key, gpointer value, void *user_data)
  65. {
  66. struct history_metric_callback_data *cb = user_data;
  67. struct rspamd_symbol_result *s = value;
  68. guint wr;
  69. if (s->flags & RSPAMD_SYMBOL_RESULT_IGNORED) {
  70. return;
  71. }
  72. if (cb->remain > 0) {
  73. wr = rspamd_snprintf (cb->pos, cb->remain, "%s, ", s->name);
  74. cb->pos += wr;
  75. cb->remain -= wr;
  76. }
  77. }
  78. /**
  79. * Update roll history with data from task
  80. * @param history roll history object
  81. * @param task task object
  82. */
  83. void
  84. rspamd_roll_history_update (struct roll_history *history,
  85. struct rspamd_task *task)
  86. {
  87. guint row_num;
  88. struct roll_history_row *row;
  89. struct rspamd_scan_result *metric_res;
  90. struct history_metric_callback_data cbdata;
  91. struct rspamd_action *action;
  92. if (history->disabled) {
  93. return;
  94. }
  95. /* First of all obtain check and obtain row number */
  96. g_atomic_int_compare_and_exchange (&history->cur_row, history->nrows, 0);
  97. #if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
  98. row_num = g_atomic_int_add (&history->cur_row, 1);
  99. #else
  100. row_num = g_atomic_int_exchange_and_add (&history->cur_row, 1);
  101. #endif
  102. if (row_num < history->nrows) {
  103. row = &history->rows[row_num];
  104. g_atomic_int_set (&row->completed, FALSE);
  105. }
  106. else {
  107. /* Race condition */
  108. history->cur_row = 0;
  109. return;
  110. }
  111. /* Add information from task to roll history */
  112. if (task->from_addr) {
  113. rspamd_strlcpy (row->from_addr,
  114. rspamd_inet_address_to_string (task->from_addr),
  115. sizeof (row->from_addr));
  116. }
  117. else {
  118. rspamd_strlcpy (row->from_addr, "unknown", sizeof (row->from_addr));
  119. }
  120. row->timestamp = task->task_timestamp;
  121. /* Strings */
  122. if (task->message) {
  123. rspamd_strlcpy (row->message_id, MESSAGE_FIELD (task, message_id),
  124. sizeof (row->message_id));
  125. }
  126. if (task->user) {
  127. rspamd_strlcpy (row->user, task->user, sizeof (row->user));
  128. }
  129. else {
  130. row->user[0] = '\0';
  131. }
  132. /* Get default metric */
  133. metric_res = task->result;
  134. if (metric_res == NULL) {
  135. row->symbols[0] = '\0';
  136. row->action = METRIC_ACTION_NOACTION;
  137. }
  138. else {
  139. row->score = metric_res->score;
  140. action = rspamd_check_action_metric (task, NULL, NULL);
  141. row->action = action->action_type;
  142. row->required_score = rspamd_task_get_required_score (task, metric_res);
  143. cbdata.pos = row->symbols;
  144. cbdata.remain = sizeof (row->symbols);
  145. rspamd_task_symbol_result_foreach (task, NULL,
  146. roll_history_symbols_callback,
  147. &cbdata);
  148. if (cbdata.remain > 0) {
  149. /* Remove last whitespace and comma */
  150. *cbdata.pos-- = '\0';
  151. *cbdata.pos-- = '\0';
  152. *cbdata.pos = '\0';
  153. }
  154. }
  155. row->scan_time = task->time_real_finish - task->task_timestamp;
  156. row->len = task->msg.len;
  157. g_atomic_int_set (&row->completed, TRUE);
  158. }
  159. /**
  160. * Load previously saved history from file
  161. * @param history roll history object
  162. * @param filename filename to load from
  163. * @return TRUE if history has been loaded
  164. */
  165. gboolean
  166. rspamd_roll_history_load (struct roll_history *history, const gchar *filename)
  167. {
  168. gint fd;
  169. struct stat st;
  170. gchar magic[sizeof(rspamd_history_magic_old)];
  171. ucl_object_t *top;
  172. const ucl_object_t *cur, *elt;
  173. struct ucl_parser *parser;
  174. struct roll_history_row *row;
  175. guint n, i;
  176. g_assert (history != NULL);
  177. if (history->disabled) {
  178. return TRUE;
  179. }
  180. if (stat (filename, &st) == -1) {
  181. msg_info ("cannot load history from %s: %s", filename,
  182. strerror (errno));
  183. return FALSE;
  184. }
  185. if ((fd = open (filename, O_RDONLY)) == -1) {
  186. msg_info ("cannot load history from %s: %s", filename,
  187. strerror (errno));
  188. return FALSE;
  189. }
  190. /* Check for old format */
  191. if (read (fd, magic, sizeof (magic)) == -1) {
  192. close (fd);
  193. msg_info ("cannot read history from %s: %s", filename,
  194. strerror (errno));
  195. return FALSE;
  196. }
  197. if (memcmp (magic, rspamd_history_magic_old, sizeof (magic)) == 0) {
  198. close (fd);
  199. msg_warn ("cannot read history from old format %s, "
  200. "it will be replaced after restart", filename);
  201. return FALSE;
  202. }
  203. parser = ucl_parser_new (0);
  204. if (!ucl_parser_add_fd (parser, fd)) {
  205. msg_warn ("cannot parse history file %s: %s", filename,
  206. ucl_parser_get_error (parser));
  207. ucl_parser_free (parser);
  208. close (fd);
  209. return FALSE;
  210. }
  211. top = ucl_parser_get_object (parser);
  212. ucl_parser_free (parser);
  213. close (fd);
  214. if (top == NULL) {
  215. msg_warn ("cannot parse history file %s: no object", filename);
  216. return FALSE;
  217. }
  218. if (ucl_object_type (top) != UCL_ARRAY) {
  219. msg_warn ("invalid object type read from: %s", filename);
  220. ucl_object_unref (top);
  221. return FALSE;
  222. }
  223. if (top->len > history->nrows) {
  224. msg_warn ("stored history is larger than the current one: %ud (file) vs "
  225. "%ud (history)", top->len, history->nrows);
  226. n = history->nrows;
  227. }
  228. else if (top->len < history->nrows) {
  229. msg_warn (
  230. "stored history is smaller than the current one: %ud (file) vs "
  231. "%ud (history)",
  232. top->len, history->nrows);
  233. n = top->len;
  234. }
  235. else {
  236. n = top->len;
  237. }
  238. for (i = 0; i < n; i ++) {
  239. cur = ucl_array_find_index (top, i);
  240. if (cur != NULL && ucl_object_type (cur) == UCL_OBJECT) {
  241. row = &history->rows[i];
  242. memset (row, 0, sizeof (*row));
  243. elt = ucl_object_lookup (cur, "time");
  244. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  245. row->timestamp = ucl_object_todouble (elt);
  246. }
  247. elt = ucl_object_lookup (cur, "id");
  248. if (elt && ucl_object_type (elt) == UCL_STRING) {
  249. rspamd_strlcpy (row->message_id, ucl_object_tostring (elt),
  250. sizeof (row->message_id));
  251. }
  252. elt = ucl_object_lookup (cur, "symbols");
  253. if (elt && ucl_object_type (elt) == UCL_STRING) {
  254. rspamd_strlcpy (row->symbols, ucl_object_tostring (elt),
  255. sizeof (row->symbols));
  256. }
  257. elt = ucl_object_lookup (cur, "user");
  258. if (elt && ucl_object_type (elt) == UCL_STRING) {
  259. rspamd_strlcpy (row->user, ucl_object_tostring (elt),
  260. sizeof (row->user));
  261. }
  262. elt = ucl_object_lookup (cur, "from");
  263. if (elt && ucl_object_type (elt) == UCL_STRING) {
  264. rspamd_strlcpy (row->from_addr, ucl_object_tostring (elt),
  265. sizeof (row->from_addr));
  266. }
  267. elt = ucl_object_lookup (cur, "len");
  268. if (elt && ucl_object_type (elt) == UCL_INT) {
  269. row->len = ucl_object_toint (elt);
  270. }
  271. elt = ucl_object_lookup (cur, "scan_time");
  272. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  273. row->scan_time = ucl_object_todouble (elt);
  274. }
  275. elt = ucl_object_lookup (cur, "score");
  276. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  277. row->score = ucl_object_todouble (elt);
  278. }
  279. elt = ucl_object_lookup (cur, "required_score");
  280. if (elt && ucl_object_type (elt) == UCL_FLOAT) {
  281. row->required_score = ucl_object_todouble (elt);
  282. }
  283. elt = ucl_object_lookup (cur, "action");
  284. if (elt && ucl_object_type (elt) == UCL_INT) {
  285. row->action = ucl_object_toint (elt);
  286. }
  287. row->completed = TRUE;
  288. }
  289. }
  290. ucl_object_unref (top);
  291. history->cur_row = n;
  292. return TRUE;
  293. }
  294. /**
  295. * Save history to file
  296. * @param history roll history object
  297. * @param filename filename to load from
  298. * @return TRUE if history has been saved
  299. */
  300. gboolean
  301. rspamd_roll_history_save (struct roll_history *history, const gchar *filename)
  302. {
  303. gint fd;
  304. FILE *fp;
  305. ucl_object_t *obj, *elt;
  306. guint i;
  307. struct roll_history_row *row;
  308. struct ucl_emitter_functions *emitter_func;
  309. g_assert (history != NULL);
  310. if (history->disabled) {
  311. return TRUE;
  312. }
  313. if ((fd = open (filename, O_WRONLY | O_CREAT | O_TRUNC, 00600)) == -1) {
  314. msg_info ("cannot save history to %s: %s", filename, strerror (errno));
  315. return FALSE;
  316. }
  317. fp = fdopen (fd, "w");
  318. obj = ucl_object_typed_new (UCL_ARRAY);
  319. for (i = 0; i < history->nrows; i ++) {
  320. row = &history->rows[i];
  321. if (!row->completed) {
  322. continue;
  323. }
  324. elt = ucl_object_typed_new (UCL_OBJECT);
  325. ucl_object_insert_key (elt, ucl_object_fromdouble (row->timestamp),
  326. "time", 0, false);
  327. ucl_object_insert_key (elt, ucl_object_fromstring (row->message_id),
  328. "id", 0, false);
  329. ucl_object_insert_key (elt, ucl_object_fromstring (row->symbols),
  330. "symbols", 0, false);
  331. ucl_object_insert_key (elt, ucl_object_fromstring (row->user),
  332. "user", 0, false);
  333. ucl_object_insert_key (elt, ucl_object_fromstring (row->from_addr),
  334. "from", 0, false);
  335. ucl_object_insert_key (elt, ucl_object_fromint (row->len),
  336. "len", 0, false);
  337. ucl_object_insert_key (elt, ucl_object_fromdouble (row->scan_time),
  338. "scan_time", 0, false);
  339. ucl_object_insert_key (elt, ucl_object_fromdouble (row->score),
  340. "score", 0, false);
  341. ucl_object_insert_key (elt, ucl_object_fromdouble (row->required_score),
  342. "required_score", 0, false);
  343. ucl_object_insert_key (elt, ucl_object_fromint (row->action),
  344. "action", 0, false);
  345. ucl_array_append (obj, elt);
  346. }
  347. emitter_func = ucl_object_emit_file_funcs (fp);
  348. ucl_object_emit_full (obj, UCL_EMIT_JSON_COMPACT, emitter_func, NULL);
  349. ucl_object_emit_funcs_free (emitter_func);
  350. ucl_object_unref (obj);
  351. fclose (fp);
  352. return TRUE;
  353. }