You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

composites.c 8.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "logger.h"
  18. #include "expression.h"
  19. #include "task.h"
  20. #include "utlist.h"
  21. #include "filter.h"
  22. #include "composites.h"
  23. struct composites_data {
  24. struct rspamd_task *task;
  25. struct rspamd_composite *composite;
  26. struct metric_result *metric_res;
  27. GTree *symbols_to_remove;
  28. guint8 *checked;
  29. };
  30. struct symbol_remove_data {
  31. struct symbol *ms;
  32. gboolean remove_weight;
  33. gboolean remove_symbol;
  34. GList *comp;
  35. };
  36. static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len,
  37. rspamd_mempool_t *pool, gpointer ud, GError **err);
  38. static gint rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom);
  39. static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom);
  40. static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom);
  41. const struct rspamd_atom_subr composite_expr_subr = {
  42. .parse = rspamd_composite_expr_parse,
  43. .process = rspamd_composite_expr_process,
  44. .priority = rspamd_composite_expr_priority,
  45. .destroy = rspamd_composite_expr_destroy
  46. };
  47. static GQuark
  48. rspamd_composites_quark (void)
  49. {
  50. return g_quark_from_static_string ("composites");
  51. }
  52. static rspamd_expression_atom_t *
  53. rspamd_composite_expr_parse (const gchar *line, gsize len,
  54. rspamd_mempool_t *pool, gpointer ud, GError **err)
  55. {
  56. gsize clen;
  57. rspamd_expression_atom_t *res;
  58. /*
  59. * Composites are just sequences of symbols
  60. */
  61. clen = strcspn (line, ", \t()><+!|&\n");
  62. if (clen == 0) {
  63. /* Invalid composite atom */
  64. g_set_error (err, rspamd_composites_quark (), 100, "Invalid composite: %s",
  65. line);
  66. return NULL;
  67. }
  68. res = rspamd_mempool_alloc0 (pool, sizeof (*res));
  69. res->len = clen;
  70. res->str = line;
  71. res->data = rspamd_mempool_alloc (pool, clen + 1);
  72. rspamd_strlcpy (res->data, line, clen + 1);
  73. return res;
  74. }
  75. static gint
  76. rspamd_composite_process_single_symbol (struct composites_data *cd,
  77. const gchar *sym, struct symbol **pms)
  78. {
  79. struct symbol *ms = NULL;
  80. gint rc = 0;
  81. struct rspamd_composite *ncomp;
  82. if ((ms = g_hash_table_lookup (cd->metric_res->symbols, sym)) == NULL) {
  83. if ((ncomp =
  84. g_hash_table_lookup (cd->task->cfg->composite_symbols,
  85. sym)) != NULL) {
  86. /* Set checked for this symbol to avoid cyclic references */
  87. if (isclr (cd->checked, ncomp->id * 2)) {
  88. setbit (cd->checked, cd->composite->id * 2);
  89. rc = rspamd_process_expression (ncomp->expr,
  90. RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
  91. clrbit (cd->checked, cd->composite->id * 2);
  92. if (rc) {
  93. setbit (cd->checked, ncomp->id * 2 + 1);
  94. }
  95. setbit (cd->checked, ncomp->id * 2);
  96. ms = g_hash_table_lookup (cd->metric_res->symbols, sym);
  97. }
  98. else {
  99. /*
  100. * XXX: in case of cyclic references this would return 0
  101. */
  102. rc = isset (cd->checked, ncomp->id * 2 + 1);
  103. }
  104. }
  105. }
  106. else {
  107. rc = 1;
  108. }
  109. *pms = ms;
  110. return rc;
  111. }
  112. static gint
  113. rspamd_composite_expr_process (gpointer input, rspamd_expression_atom_t *atom)
  114. {
  115. struct composites_data *cd = (struct composites_data *)input;
  116. const gchar *sym = atom->data;
  117. struct symbol_remove_data *rd;
  118. struct symbol *ms;
  119. struct rspamd_symbols_group *gr;
  120. struct rspamd_symbol_def *sdef;
  121. struct metric *metric;
  122. GHashTableIter it;
  123. gpointer k, v;
  124. gint rc = 0;
  125. gchar t = '\0';
  126. if (isset (cd->checked, cd->composite->id * 2)) {
  127. /* We have already checked this composite, so just return its value */
  128. rc = isset (cd->checked, cd->composite->id * 2 + 1);
  129. return rc;
  130. }
  131. if (*sym == '~' || *sym == '-') {
  132. t = *sym ++;
  133. }
  134. if (strncmp (sym, "g:", 2) == 0) {
  135. metric = g_hash_table_lookup (cd->task->cfg->metrics, DEFAULT_METRIC);
  136. g_assert (metric != NULL);
  137. gr = g_hash_table_lookup (metric->groups, sym + 2);
  138. if (gr != NULL) {
  139. g_hash_table_iter_init (&it, gr->symbols);
  140. while (g_hash_table_iter_next (&it, &k, &v)) {
  141. sdef = v;
  142. rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms);
  143. if (rc) {
  144. break;
  145. }
  146. }
  147. }
  148. }
  149. else {
  150. rc = rspamd_composite_process_single_symbol (cd, sym, &ms);
  151. }
  152. if (rc && ms) {
  153. /*
  154. * At this point we know that we need to do something about this symbol,
  155. * however, we don't know whether we need to delete it unfortunately,
  156. * that depends on the later decisions when the complete expression is
  157. * evaluated.
  158. */
  159. if ((rd = g_tree_lookup (cd->symbols_to_remove, ms->name)) == NULL) {
  160. rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*rd));
  161. rd->ms = ms;
  162. if (G_UNLIKELY (t == '~')) {
  163. rd->remove_weight = FALSE;
  164. rd->remove_symbol = TRUE;
  165. }
  166. else if (G_UNLIKELY (t == '-')) {
  167. rd->remove_symbol = FALSE;
  168. rd->remove_weight = FALSE;
  169. }
  170. else {
  171. rd->remove_symbol = TRUE;
  172. rd->remove_weight = TRUE;
  173. }
  174. rd->comp = g_list_prepend (NULL, cd->composite);
  175. g_tree_insert (cd->symbols_to_remove,
  176. (gpointer)ms->name,
  177. rd);
  178. }
  179. else {
  180. /*
  181. * XXX: what if we have different preferences regarding
  182. * weight and symbol removal in different composites?
  183. */
  184. rd->comp = g_list_prepend (rd->comp, cd->composite);
  185. }
  186. }
  187. return rc;
  188. }
  189. /*
  190. * We don't have preferences for composites
  191. */
  192. static gint
  193. rspamd_composite_expr_priority (rspamd_expression_atom_t *atom)
  194. {
  195. return 0;
  196. }
  197. static void
  198. rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom)
  199. {
  200. /* Composite atoms are destroyed just with the pool */
  201. }
  202. static gint
  203. remove_compare_data (gconstpointer a, gconstpointer b)
  204. {
  205. const gchar *ca = a, *cb = b;
  206. return strcmp (ca, cb);
  207. }
  208. static void
  209. composites_foreach_callback (gpointer key, gpointer value, void *data)
  210. {
  211. struct composites_data *cd = data;
  212. struct rspamd_composite *comp = value;
  213. gint rc;
  214. cd->composite = comp;
  215. rc = rspamd_process_expression (comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT, cd);
  216. /* Checked bit */
  217. setbit (cd->checked, comp->id * 2);
  218. /* Result bit */
  219. if (rc) {
  220. setbit (cd->checked, comp->id * 2 + 1);
  221. rspamd_task_insert_result_single (cd->task, key, 1.0, NULL);
  222. }
  223. else {
  224. clrbit (cd->checked, comp->id * 2 + 1);
  225. }
  226. }
  227. static gboolean
  228. composites_remove_symbols (gpointer key, gpointer value, gpointer data)
  229. {
  230. struct composites_data *cd = data;
  231. struct symbol_remove_data *rd = value;
  232. GList *cur;
  233. struct rspamd_composite *comp;
  234. gboolean matched = FALSE;
  235. cur = rd->comp;
  236. /*
  237. * XXX: actually, this is a weak assumption as we are unaware here about
  238. * negate operation and so on. We need to parse AST directly and remove
  239. * only those symbols that could be removed.
  240. */
  241. while (cur) {
  242. comp = cur->data;
  243. if (isset (cd->checked, comp->id * 2 + 1)) {
  244. matched = TRUE;
  245. break;
  246. }
  247. cur = g_list_next (cur);
  248. }
  249. g_list_free (rd->comp);
  250. if (matched) {
  251. if (rd->remove_symbol) {
  252. g_hash_table_remove (cd->metric_res->symbols, key);
  253. }
  254. if (rd->remove_weight) {
  255. cd->metric_res->score -= rd->ms->score;
  256. }
  257. }
  258. return FALSE;
  259. }
  260. static void
  261. composites_metric_callback (gpointer key, gpointer value, gpointer data)
  262. {
  263. struct rspamd_task *task = (struct rspamd_task *)data;
  264. struct composites_data *cd =
  265. rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data));
  266. struct metric_result *metric_res = (struct metric_result *)value;
  267. cd->task = task;
  268. cd->metric_res = (struct metric_result *)metric_res;
  269. cd->symbols_to_remove = g_tree_new (remove_compare_data);
  270. cd->checked =
  271. rspamd_mempool_alloc0 (task->task_pool,
  272. NBYTES (g_hash_table_size (task->cfg->composite_symbols) * 2));
  273. /* Process hash table */
  274. g_hash_table_foreach (task->cfg->composite_symbols,
  275. composites_foreach_callback,
  276. cd);
  277. /* Remove symbols that are in composites */
  278. g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd);
  279. /* Free list */
  280. g_tree_destroy (cd->symbols_to_remove);
  281. }
  282. void
  283. rspamd_make_composites (struct rspamd_task *task)
  284. {
  285. g_hash_table_foreach (task->results, composites_metric_callback, task);
  286. }