You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

composites.cxx 25KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989
  1. /*-
  2. * Copyright 2021 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "logger.h"
  18. #include "expression.h"
  19. #include "task.h"
  20. #include "utlist.h"
  21. #include "scan_result.h"
  22. #include "composites.h"
  23. #include <cmath>
  24. #include <vector>
  25. #include <variant>
  26. #include "libutil/cxx/util.hxx"
  27. #include "contrib/ankerl/unordered_dense.h"
  28. #include "composites_internal.hxx"
  29. #define msg_err_composites(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, \
  30. "composites", task->task_pool->tag.uid, \
  31. RSPAMD_LOG_FUNC, \
  32. __VA_ARGS__)
  33. #define msg_warn_composites(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, \
  34. "composites", task->task_pool->tag.uid, \
  35. RSPAMD_LOG_FUNC, \
  36. __VA_ARGS__)
  37. #define msg_info_composites(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, \
  38. "composites", task->task_pool->tag.uid, \
  39. RSPAMD_LOG_FUNC, \
  40. __VA_ARGS__)
  41. #define msg_debug_composites(...) rspamd_conditional_debug_fast(NULL, task->from_addr, \
  42. rspamd_composites_log_id, "composites", task->task_pool->tag.uid, \
  43. RSPAMD_LOG_FUNC, \
  44. __VA_ARGS__)
  45. INIT_LOG_MODULE(composites)
  46. namespace rspamd::composites {
  47. static rspamd_expression_atom_t *rspamd_composite_expr_parse(const char *line, gsize len,
  48. rspamd_mempool_t *pool,
  49. gpointer ud, GError **err);
  50. static double rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
  51. static int rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
  52. static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
  53. static void composites_foreach_callback(gpointer key, gpointer value, void *data);
  54. const struct rspamd_atom_subr composite_expr_subr = {
  55. .parse = rspamd::composites::rspamd_composite_expr_parse,
  56. .process = rspamd::composites::rspamd_composite_expr_process,
  57. .priority = rspamd::composites::rspamd_composite_expr_priority,
  58. .destroy = rspamd::composites::rspamd_composite_expr_destroy};
  59. }// namespace rspamd::composites
  60. namespace rspamd::composites {
  61. static constexpr const double epsilon = 0.00001;
  62. struct symbol_remove_data {
  63. const char *sym;
  64. struct rspamd_composite *comp;
  65. GNode *parent;
  66. std::uint8_t action;
  67. };
  68. struct composites_data {
  69. struct rspamd_task *task;
  70. struct rspamd_composite *composite;
  71. struct rspamd_scan_result *metric_res;
  72. ankerl::unordered_dense::map<std::string_view,
  73. std::vector<symbol_remove_data>>
  74. symbols_to_remove;
  75. std::vector<bool> checked;
  76. explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres)
  77. : task(task), composite(nullptr), metric_res(mres)
  78. {
  79. checked.resize(rspamd_composites_manager_nelts(task->cfg->composites_manager) * 2,
  80. false);
  81. }
  82. };
  83. struct rspamd_composite_option_match {
  84. rspamd_regexp_t *re;
  85. std::string match;
  86. explicit rspamd_composite_option_match(const char *start, std::size_t len) noexcept
  87. : re(nullptr), match(start, len)
  88. {
  89. }
  90. explicit rspamd_composite_option_match(rspamd_regexp_t *re) noexcept
  91. : re(rspamd_regexp_ref(re))
  92. {
  93. }
  94. rspamd_composite_option_match(const rspamd_composite_option_match &other) noexcept
  95. {
  96. if (other.re) {
  97. re = rspamd_regexp_ref(other.re);
  98. }
  99. else {
  100. match = other.match;
  101. re = nullptr;
  102. }
  103. }
  104. rspamd_composite_option_match &operator=(const rspamd_composite_option_match &other) noexcept
  105. {
  106. if (other.re) {
  107. if (re) {
  108. rspamd_regexp_unref(re);
  109. }
  110. re = rspamd_regexp_ref(other.re);
  111. }
  112. else {
  113. if (re) {
  114. rspamd_regexp_unref(re);
  115. }
  116. re = nullptr;
  117. match = other.match;
  118. }
  119. return *this;
  120. }
  121. rspamd_composite_option_match(rspamd_composite_option_match &&other) noexcept
  122. {
  123. if (other.re) {
  124. re = other.re;
  125. other.re = nullptr;
  126. }
  127. else {
  128. re = nullptr;
  129. match = std::move(other.match);
  130. }
  131. }
  132. rspamd_composite_option_match &operator=(rspamd_composite_option_match &&other) noexcept
  133. {
  134. if (other.re) {
  135. if (re) {
  136. rspamd_regexp_unref(re);
  137. }
  138. re = other.re;
  139. other.re = nullptr;
  140. }
  141. else {
  142. if (re) {
  143. rspamd_regexp_unref(re);
  144. }
  145. re = nullptr;
  146. match = std::move(other.match);
  147. }
  148. return *this;
  149. }
  150. ~rspamd_composite_option_match()
  151. {
  152. if (re) {
  153. rspamd_regexp_unref(re);
  154. }
  155. }
  156. auto match_opt(const std::string_view &data) const -> bool
  157. {
  158. if (re) {
  159. return rspamd_regexp_search(re,
  160. data.data(), data.size(),
  161. nullptr, nullptr, false, nullptr);
  162. }
  163. else {
  164. return data == match;
  165. }
  166. }
  167. auto get_pat() const -> std::string_view
  168. {
  169. if (re) {
  170. return std::string_view(rspamd_regexp_get_pattern(re));
  171. }
  172. else {
  173. return match;
  174. }
  175. }
  176. };
  177. enum class rspamd_composite_atom_type {
  178. ATOM_UNKNOWN,
  179. ATOM_COMPOSITE,
  180. ATOM_PLAIN
  181. };
  182. struct rspamd_composite_atom {
  183. std::string symbol;
  184. std::string_view norm_symbol;
  185. rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN;
  186. const struct rspamd_composite *ncomp; /* underlying composite */
  187. std::vector<rspamd_composite_option_match> opts;
  188. };
  189. enum rspamd_composite_action : std::uint8_t {
  190. RSPAMD_COMPOSITE_UNTOUCH = 0,
  191. RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1u << 0),
  192. RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1u << 1),
  193. RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
  194. };
  195. static GQuark
  196. rspamd_composites_quark(void)
  197. {
  198. return g_quark_from_static_string("composites");
  199. }
  200. static auto
  201. rspamd_composite_atom_dtor(void *ptr)
  202. {
  203. auto *atom = reinterpret_cast<rspamd_composite_atom *>(ptr);
  204. delete atom;
  205. }
  206. static rspamd_expression_atom_t *
  207. rspamd_composite_expr_parse(const char *line, gsize len,
  208. rspamd_mempool_t *pool,
  209. gpointer ud, GError **err)
  210. {
  211. gsize clen = 0;
  212. const char *p, *end;
  213. enum composite_expr_state {
  214. comp_state_read_symbol = 0,
  215. comp_state_read_obrace,
  216. comp_state_read_option,
  217. comp_state_read_regexp,
  218. comp_state_read_regexp_end,
  219. comp_state_read_comma,
  220. comp_state_read_ebrace,
  221. comp_state_read_end
  222. } state = comp_state_read_symbol;
  223. end = line + len;
  224. p = line;
  225. /* Find length of the atom using a reduced state machine */
  226. while (p < end) {
  227. if (state == comp_state_read_end) {
  228. break;
  229. }
  230. switch (state) {
  231. case comp_state_read_symbol:
  232. clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
  233. p += clen;
  234. if (*p == '[') {
  235. state = comp_state_read_obrace;
  236. }
  237. else {
  238. state = comp_state_read_end;
  239. }
  240. break;
  241. case comp_state_read_obrace:
  242. p++;
  243. if (*p == '/') {
  244. p++;
  245. state = comp_state_read_regexp;
  246. }
  247. else {
  248. state = comp_state_read_option;
  249. }
  250. break;
  251. case comp_state_read_regexp:
  252. if (*p == '\\' && p + 1 < end) {
  253. /* Escaping */
  254. p++;
  255. }
  256. else if (*p == '/') {
  257. /* End of regexp, possible flags */
  258. state = comp_state_read_regexp_end;
  259. }
  260. p++;
  261. break;
  262. case comp_state_read_option:
  263. case comp_state_read_regexp_end:
  264. if (*p == ',') {
  265. p++;
  266. state = comp_state_read_comma;
  267. }
  268. else if (*p == ']') {
  269. state = comp_state_read_ebrace;
  270. }
  271. else {
  272. p++;
  273. }
  274. break;
  275. case comp_state_read_comma:
  276. if (!g_ascii_isspace(*p)) {
  277. if (*p == '/') {
  278. state = comp_state_read_regexp;
  279. }
  280. else if (*p == ']') {
  281. state = comp_state_read_ebrace;
  282. }
  283. else {
  284. state = comp_state_read_option;
  285. }
  286. }
  287. else {
  288. /* Skip spaces after comma */
  289. p++;
  290. }
  291. break;
  292. case comp_state_read_ebrace:
  293. p++;
  294. state = comp_state_read_end;
  295. break;
  296. case comp_state_read_end:
  297. g_assert_not_reached();
  298. }
  299. }
  300. if (state != comp_state_read_end) {
  301. g_set_error(err, rspamd_composites_quark(), 100, "invalid composite: %s;"
  302. "parser stopped in state %d",
  303. line, state);
  304. return NULL;
  305. }
  306. clen = p - line;
  307. p = line;
  308. state = comp_state_read_symbol;
  309. auto *atom = new rspamd_composite_atom;
  310. auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t);
  311. res->len = clen;
  312. res->str = line;
  313. /* Full state machine to fill a composite atom */
  314. const char *opt_start = nullptr;
  315. while (p < end) {
  316. if (state == comp_state_read_end) {
  317. break;
  318. }
  319. switch (state) {
  320. case comp_state_read_symbol: {
  321. clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
  322. p += clen;
  323. if (*p == '[') {
  324. state = comp_state_read_obrace;
  325. }
  326. else {
  327. state = comp_state_read_end;
  328. }
  329. atom->symbol = std::string{line, clen};
  330. auto norm_start = std::find_if(atom->symbol.begin(), atom->symbol.end(),
  331. [](char c) { return g_ascii_isalnum(c); });
  332. if (norm_start == atom->symbol.end()) {
  333. msg_err_pool("invalid composite atom: %s", atom->symbol.c_str());
  334. }
  335. atom->norm_symbol = make_string_view_from_it(norm_start, atom->symbol.end());
  336. break;
  337. }
  338. case comp_state_read_obrace:
  339. p++;
  340. if (*p == '/') {
  341. opt_start = p;
  342. p++; /* Starting slash */
  343. state = comp_state_read_regexp;
  344. }
  345. else {
  346. state = comp_state_read_option;
  347. opt_start = p;
  348. }
  349. break;
  350. case comp_state_read_regexp:
  351. if (*p == '\\' && p + 1 < end) {
  352. /* Escaping */
  353. p++;
  354. }
  355. else if (*p == '/') {
  356. /* End of regexp, possible flags */
  357. state = comp_state_read_regexp_end;
  358. }
  359. p++;
  360. break;
  361. case comp_state_read_option:
  362. if (*p == ',' || *p == ']') {
  363. /* Plain match, copy option to ensure string_view validity */
  364. int opt_len = p - opt_start;
  365. auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1);
  366. rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
  367. opt_buf = g_strstrip(opt_buf);
  368. atom->opts.emplace_back(opt_buf, strlen(opt_buf));
  369. if (*p == ',') {
  370. p++;
  371. state = comp_state_read_comma;
  372. }
  373. else {
  374. state = comp_state_read_ebrace;
  375. }
  376. }
  377. else {
  378. p++;
  379. }
  380. break;
  381. case comp_state_read_regexp_end:
  382. if (*p == ',' || *p == ']') {
  383. auto opt_len = p - opt_start;
  384. rspamd_regexp_t *re;
  385. GError *re_err = nullptr;
  386. re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err);
  387. if (re == nullptr) {
  388. msg_err_pool("cannot create regexp from string %*s: %e",
  389. opt_len, opt_start, re_err);
  390. g_error_free(re_err);
  391. }
  392. else {
  393. atom->opts.emplace_back(re);
  394. rspamd_regexp_unref(re);
  395. }
  396. if (*p == ',') {
  397. p++;
  398. state = comp_state_read_comma;
  399. }
  400. else {
  401. state = comp_state_read_ebrace;
  402. }
  403. }
  404. else {
  405. p++;
  406. }
  407. break;
  408. case comp_state_read_comma:
  409. if (!g_ascii_isspace(*p)) {
  410. if (*p == '/') {
  411. state = comp_state_read_regexp;
  412. opt_start = p;
  413. }
  414. else if (*p == ']') {
  415. state = comp_state_read_ebrace;
  416. }
  417. else {
  418. opt_start = p;
  419. state = comp_state_read_option;
  420. }
  421. }
  422. else {
  423. /* Skip spaces after comma */
  424. p++;
  425. }
  426. break;
  427. case comp_state_read_ebrace:
  428. p++;
  429. state = comp_state_read_end;
  430. break;
  431. case comp_state_read_end:
  432. g_assert_not_reached();
  433. }
  434. }
  435. res->data = atom;
  436. return res;
  437. }
  438. static auto
  439. process_symbol_removal(rspamd_expression_atom_t *atom,
  440. struct composites_data *cd,
  441. struct rspamd_symbol_result *ms,
  442. const std::string &beg) -> void
  443. {
  444. struct rspamd_task *task = cd->task;
  445. if (ms == nullptr) {
  446. return;
  447. }
  448. /*
  449. * At this point we know that we need to do something about this symbol,
  450. * however, we don't know whether we need to delete it unfortunately,
  451. * that depends on the later decisions when the complete expression is
  452. * evaluated.
  453. */
  454. auto rd_it = cd->symbols_to_remove.find(ms->name);
  455. auto fill_removal_structure = [&](symbol_remove_data &nrd) {
  456. nrd.sym = ms->name;
  457. /* By default remove symbols */
  458. switch (cd->composite->policy) {
  459. case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
  460. default:
  461. nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT);
  462. break;
  463. case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
  464. nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
  465. break;
  466. case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
  467. nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
  468. break;
  469. case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE:
  470. nrd.action = 0;
  471. break;
  472. }
  473. for (auto t: beg) {
  474. if (t == '~') {
  475. nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
  476. }
  477. else if (t == '-') {
  478. nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT |
  479. RSPAMD_COMPOSITE_REMOVE_SYMBOL);
  480. }
  481. else if (t == '^') {
  482. nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
  483. }
  484. else {
  485. break;
  486. }
  487. }
  488. nrd.comp = cd->composite;
  489. nrd.parent = atom->parent;
  490. };
  491. if (rd_it != cd->symbols_to_remove.end()) {
  492. fill_removal_structure(rd_it->second.emplace_back());
  493. msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s",
  494. cd->metric_res->name,
  495. ms->name, rd_it->second.back().action,
  496. cd->composite->sym.c_str());
  497. }
  498. else {
  499. std::vector<symbol_remove_data> nrd;
  500. fill_removal_structure(nrd.emplace_back());
  501. msg_debug_composites("%s: added symbol %s to removal: %d policy, from composite %s",
  502. cd->metric_res->name,
  503. ms->name, nrd.front().action,
  504. cd->composite->sym.c_str());
  505. cd->symbols_to_remove[ms->name] = std::move(nrd);
  506. }
  507. }
  508. static auto
  509. process_single_symbol(struct composites_data *cd,
  510. std::string_view sym,
  511. struct rspamd_symbol_result **pms,
  512. struct rspamd_composite_atom *atom) -> double
  513. {
  514. struct rspamd_symbol_result *ms = nullptr;
  515. double rc = 0;
  516. struct rspamd_task *task = cd->task;
  517. if ((ms = rspamd_task_find_symbol_result(cd->task, sym.data(), cd->metric_res)) == nullptr) {
  518. msg_debug_composites("not found symbol %s in composite %s", sym.data(),
  519. cd->composite->sym.c_str());
  520. if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) {
  521. const struct rspamd_composite *ncomp;
  522. if ((ncomp = COMPOSITE_MANAGER_FROM_PTR(task->cfg->composites_manager)->find(sym)) != NULL) {
  523. atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE;
  524. atom->ncomp = ncomp;
  525. }
  526. else {
  527. atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN;
  528. }
  529. }
  530. if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) {
  531. msg_debug_composites("symbol %s for composite %s is another composite",
  532. sym.data(), cd->composite->sym.c_str());
  533. if (!cd->checked[atom->ncomp->id * 2]) {
  534. msg_debug_composites("composite dependency %s for %s is not checked",
  535. sym.data(), cd->composite->sym.c_str());
  536. /* Set checked for this symbol to avoid cyclic references */
  537. cd->checked[cd->composite->id * 2] = true;
  538. auto *saved = cd->composite; /* Save the current composite */
  539. composites_foreach_callback((gpointer) atom->ncomp->sym.c_str(),
  540. (gpointer) atom->ncomp, (gpointer) cd);
  541. /* Restore state */
  542. cd->composite = saved;
  543. cd->checked[cd->composite->id * 2] = false;
  544. ms = rspamd_task_find_symbol_result(cd->task, sym.data(),
  545. cd->metric_res);
  546. }
  547. else {
  548. /*
  549. * XXX: in case of cyclic references this would return 0
  550. */
  551. if (cd->checked[atom->ncomp->id * 2 + 1]) {
  552. ms = rspamd_task_find_symbol_result(cd->task, sym.data(),
  553. cd->metric_res);
  554. }
  555. }
  556. }
  557. }
  558. if (ms) {
  559. msg_debug_composites("found symbol %s in composite %s, weight: %.3f",
  560. sym.data(), cd->composite->sym.c_str(), ms->score);
  561. /* Now check options */
  562. for (const auto &cur_opt: atom->opts) {
  563. struct rspamd_symbol_option *opt;
  564. auto found = false;
  565. DL_FOREACH(ms->opts_head, opt)
  566. {
  567. if (cur_opt.match_opt({opt->option, opt->optlen})) {
  568. found = true;
  569. break;
  570. }
  571. }
  572. if (!found) {
  573. auto pat = cur_opt.get_pat();
  574. msg_debug_composites("symbol %s in composite %s misses required option %*s",
  575. sym.data(),
  576. cd->composite->sym.c_str(),
  577. (int) pat.size(), pat.data());
  578. ms = nullptr;
  579. break;
  580. }
  581. }
  582. if (ms) {
  583. if (ms->score == 0) {
  584. rc = epsilon * 16.0; /* Distinguish from 0 */
  585. }
  586. else {
  587. rc = ms->score;
  588. }
  589. }
  590. }
  591. *pms = ms;
  592. return rc;
  593. }
  594. static auto
  595. rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double
  596. {
  597. struct composites_data *cd = (struct composites_data *) ud;
  598. struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data;
  599. struct rspamd_symbol_result *ms = NULL;
  600. struct rspamd_task *task = cd->task;
  601. double rc = 0;
  602. if (cd->checked[cd->composite->id * 2]) {
  603. /* We have already checked this composite, so just return its value */
  604. if (cd->checked[cd->composite->id * 2 + 1]) {
  605. ms = rspamd_task_find_symbol_result(cd->task,
  606. comp_atom->norm_symbol.data(),
  607. cd->metric_res);
  608. }
  609. if (ms) {
  610. if (ms->score == 0) {
  611. rc = epsilon; /* Distinguish from 0 */
  612. }
  613. else {
  614. /* Treat negative and positive scores equally... */
  615. rc = fabs(ms->score);
  616. }
  617. }
  618. msg_debug_composites("composite %s is already checked, result: %.2f",
  619. cd->composite->sym.c_str(), rc);
  620. return rc;
  621. }
  622. /* Note: sym is zero terminated as it is a view on std::string */
  623. auto sym = comp_atom->norm_symbol;
  624. auto group_process_functor = [&](auto cond, int sub_start) -> double {
  625. auto max = 0.;
  626. GHashTableIter it;
  627. gpointer k, v;
  628. struct rspamd_symbols_group *gr;
  629. gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups,
  630. sym.substr(sub_start).data());
  631. if (gr != nullptr) {
  632. g_hash_table_iter_init(&it, gr->symbols);
  633. while (g_hash_table_iter_next(&it, &k, &v)) {
  634. auto *sdef = (rspamd_symbol *) v;
  635. if (cond(sdef->score)) {
  636. rc = process_single_symbol(cd,
  637. std::string_view(sdef->name),
  638. &ms,
  639. comp_atom);
  640. if (fabs(rc) > epsilon) {
  641. process_symbol_removal(atom,
  642. cd,
  643. ms,
  644. comp_atom->symbol);
  645. if (fabs(rc) > max) {
  646. max = fabs(rc);
  647. }
  648. }
  649. }
  650. }
  651. }
  652. return max;
  653. };
  654. if (sym.size() > 2) {
  655. if (sym.substr(0, 2) == "g:") {
  656. rc = group_process_functor([](auto _) { return true; }, 2);
  657. }
  658. else if (sym.substr(0, 3) == "g+:") {
  659. /* Group, positive symbols only */
  660. rc = group_process_functor([](auto sc) { return sc > 0.; }, 3);
  661. }
  662. else if (sym.substr(0, 3) == "g-:") {
  663. rc = group_process_functor([](auto sc) { return sc < 0.; }, 3);
  664. }
  665. else {
  666. rc = process_single_symbol(cd, sym, &ms, comp_atom);
  667. if (fabs(rc) > epsilon) {
  668. process_symbol_removal(atom,
  669. cd,
  670. ms,
  671. comp_atom->symbol);
  672. }
  673. }
  674. }
  675. else {
  676. rc = process_single_symbol(cd, sym, &ms, comp_atom);
  677. if (fabs(rc) > epsilon) {
  678. process_symbol_removal(atom,
  679. cd,
  680. ms,
  681. comp_atom->symbol);
  682. }
  683. }
  684. msg_debug_composites("%s: result for atom %s in composite %s is %.4f",
  685. cd->metric_res->name,
  686. comp_atom->norm_symbol.data(),
  687. cd->composite->sym.c_str(), rc);
  688. return rc;
  689. }
  690. /*
  691. * We don't have preferences for composites
  692. */
  693. static int
  694. rspamd_composite_expr_priority(rspamd_expression_atom_t *atom)
  695. {
  696. return 0;
  697. }
  698. static void
  699. rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom)
  700. {
  701. rspamd_composite_atom_dtor(atom->data);
  702. }
  703. static void
  704. composites_foreach_callback(gpointer key, gpointer value, void *data)
  705. {
  706. auto *cd = (struct composites_data *) data;
  707. auto *comp = (struct rspamd_composite *) value;
  708. auto *str_key = (const char *) key;
  709. struct rspamd_task *task;
  710. double rc;
  711. cd->composite = comp;
  712. task = cd->task;
  713. msg_debug_composites("process composite %s", str_key);
  714. if (!cd->checked[cd->composite->id * 2]) {
  715. if (rspamd_symcache_is_checked(cd->task, cd->task->cfg->cache,
  716. str_key)) {
  717. msg_debug_composites("composite %s is checked in symcache but not "
  718. "in composites bitfield",
  719. cd->composite->sym.c_str());
  720. cd->checked[comp->id * 2] = true;
  721. cd->checked[comp->id * 2 + 1] = false;
  722. }
  723. else {
  724. if (rspamd_task_find_symbol_result(cd->task, str_key,
  725. cd->metric_res) != nullptr) {
  726. /* Already set, no need to check */
  727. msg_debug_composites("composite %s is already in metric "
  728. "in composites bitfield",
  729. cd->composite->sym.c_str());
  730. cd->checked[comp->id * 2] = true;
  731. cd->checked[comp->id * 2 + 1] = true;
  732. return;
  733. }
  734. msg_debug_composites("%s: start processing composite %s",
  735. cd->metric_res->name,
  736. cd->composite->sym.c_str());
  737. rc = rspamd_process_expression(comp->expr, RSPAMD_EXPRESSION_FLAG_NOOPT,
  738. cd);
  739. /* Checked bit */
  740. cd->checked[comp->id * 2] = true;
  741. msg_debug_composites("%s: final result for composite %s is %.4f",
  742. cd->metric_res->name,
  743. cd->composite->sym.c_str(), rc);
  744. /* Result bit */
  745. if (fabs(rc) > epsilon) {
  746. cd->checked[comp->id * 2 + 1] = true;
  747. rspamd_task_insert_result_full(cd->task, str_key, 1.0, NULL,
  748. RSPAMD_SYMBOL_INSERT_SINGLE, cd->metric_res);
  749. }
  750. else {
  751. cd->checked[comp->id * 2 + 1] = false;
  752. }
  753. }
  754. }
  755. }
  756. static auto
  757. remove_symbols(const composites_data &cd, const std::vector<symbol_remove_data> &rd) -> void
  758. {
  759. struct rspamd_task *task = cd.task;
  760. gboolean skip = FALSE,
  761. has_valid_op = FALSE,
  762. want_remove_score = TRUE,
  763. want_remove_symbol = TRUE,
  764. want_forced = FALSE;
  765. const char *disable_score_reason = "no policy",
  766. *disable_symbol_reason = "no policy";
  767. task = cd.task;
  768. for (const auto &cur: rd) {
  769. if (!cd.checked[cur.comp->id * 2 + 1]) {
  770. continue;
  771. }
  772. /*
  773. * First of all exclude all elements with any parent that is negation:
  774. * !A || B -> here we can have both !A and B matched, but we do *NOT*
  775. * want to remove symbol in that case
  776. */
  777. auto *par = cur.parent;
  778. skip = FALSE;
  779. while (par) {
  780. if (rspamd_expression_node_is_op(par, OP_NOT)) {
  781. skip = TRUE;
  782. break;
  783. }
  784. par = par->parent;
  785. }
  786. if (skip) {
  787. continue;
  788. }
  789. has_valid_op = TRUE;
  790. /*
  791. * Now we can try to remove symbols/scores
  792. *
  793. * We apply the following logic here:
  794. * - if no composites would like to save score then we remove score
  795. * - if no composites would like to save symbol then we remove symbol
  796. */
  797. if (!want_forced) {
  798. if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_SYMBOL)) {
  799. want_remove_symbol = FALSE;
  800. disable_symbol_reason = cur.comp->sym.c_str();
  801. }
  802. if (!(cur.action & RSPAMD_COMPOSITE_REMOVE_WEIGHT)) {
  803. want_remove_score = FALSE;
  804. disable_score_reason = cur.comp->sym.c_str();
  805. }
  806. if (cur.action & RSPAMD_COMPOSITE_REMOVE_FORCED) {
  807. want_forced = TRUE;
  808. disable_symbol_reason = cur.comp->sym.c_str();
  809. disable_score_reason = cur.comp->sym.c_str();
  810. }
  811. }
  812. }
  813. auto *ms = rspamd_task_find_symbol_result(task, rd.front().sym, cd.metric_res);
  814. if (has_valid_op && ms && !(ms->flags & RSPAMD_SYMBOL_RESULT_IGNORED)) {
  815. if (want_remove_score || want_forced) {
  816. msg_debug_composites("%s: %s remove symbol weight for %s (was %.2f), "
  817. "score removal affected by %s, symbol removal affected by %s",
  818. cd.metric_res->name,
  819. (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
  820. disable_score_reason, disable_symbol_reason);
  821. cd.metric_res->score -= ms->score;
  822. ms->score = 0.0;
  823. }
  824. if (want_remove_symbol || want_forced) {
  825. ms->flags |= RSPAMD_SYMBOL_RESULT_IGNORED;
  826. msg_debug_composites("%s: %s remove symbol %s (score %.2f), "
  827. "score removal affected by %s, symbol removal affected by %s",
  828. cd.metric_res->name,
  829. (want_forced ? "forced" : "normal"), rd.front().sym, ms->score,
  830. disable_score_reason, disable_symbol_reason);
  831. }
  832. }
  833. }
  834. static void
  835. composites_metric_callback(struct rspamd_task *task)
  836. {
  837. std::vector<composites_data> comp_data_vec;
  838. struct rspamd_scan_result *mres;
  839. comp_data_vec.reserve(1);
  840. DL_FOREACH(task->result, mres)
  841. {
  842. auto &cd = comp_data_vec.emplace_back(task, mres);
  843. /* Process metric result */
  844. rspamd_symcache_composites_foreach(task,
  845. task->cfg->cache,
  846. composites_foreach_callback,
  847. &cd);
  848. }
  849. for (const auto &cd: comp_data_vec) {
  850. /* Remove symbols that are in composites */
  851. for (const auto &srd_it: cd.symbols_to_remove) {
  852. remove_symbols(cd, srd_it.second);
  853. }
  854. }
  855. }
  856. }// namespace rspamd::composites
  857. void rspamd_composites_process_task(struct rspamd_task *task)
  858. {
  859. if (task->result && !RSPAMD_TASK_IS_SKIPPED(task)) {
  860. rspamd::composites::composites_metric_callback(task);
  861. }
  862. }