You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_expressions.c 56KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392
  1. /*
  2. * Copyright 2023 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <contrib/libucl/ucl.h>
  17. #include "config.h"
  18. #include "util.h"
  19. #include "cfg_file.h"
  20. #include "rspamd.h"
  21. #include "message.h"
  22. #include "mime_expressions.h"
  23. #include "libserver/html/html.h"
  24. #include "lua/lua_common.h"
  25. #include "utlist.h"
  26. gboolean rspamd_compare_encoding(struct rspamd_task *task,
  27. GArray *args,
  28. void *unused);
  29. gboolean rspamd_header_exists(struct rspamd_task *task,
  30. GArray *args,
  31. void *unused);
  32. gboolean rspamd_parts_distance(struct rspamd_task *task,
  33. GArray *args,
  34. void *unused);
  35. gboolean rspamd_recipients_distance(struct rspamd_task *task,
  36. GArray *args,
  37. void *unused);
  38. gboolean rspamd_has_only_html_part(struct rspamd_task *task,
  39. GArray *args,
  40. void *unused);
  41. gboolean rspamd_is_recipients_sorted(struct rspamd_task *task,
  42. GArray *args,
  43. void *unused);
  44. gboolean rspamd_compare_transfer_encoding(struct rspamd_task *task,
  45. GArray *args,
  46. void *unused);
  47. gboolean rspamd_is_html_balanced(struct rspamd_task *task,
  48. GArray *args,
  49. void *unused);
  50. gboolean rspamd_has_html_tag(struct rspamd_task *task,
  51. GArray *args,
  52. void *unused);
  53. gboolean rspamd_has_fake_html(struct rspamd_task *task,
  54. GArray *args,
  55. void *unused);
  56. static gboolean rspamd_raw_header_exists(struct rspamd_task *task,
  57. GArray *args,
  58. void *unused);
  59. static gboolean rspamd_check_smtp_data(struct rspamd_task *task,
  60. GArray *args,
  61. void *unused);
  62. static gboolean rspamd_content_type_is_type(struct rspamd_task *task,
  63. GArray *args,
  64. void *unused);
  65. static gboolean rspamd_content_type_is_subtype(struct rspamd_task *task,
  66. GArray *args,
  67. void *unused);
  68. static gboolean rspamd_content_type_has_param(struct rspamd_task *task,
  69. GArray *args,
  70. void *unused);
  71. static gboolean rspamd_content_type_compare_param(struct rspamd_task *task,
  72. GArray *args,
  73. void *unused);
  74. static gboolean rspamd_has_content_part(struct rspamd_task *task,
  75. GArray *args,
  76. void *unused);
  77. static gboolean rspamd_has_content_part_len(struct rspamd_task *task,
  78. GArray *args,
  79. void *unused);
  80. static gboolean rspamd_is_empty_body(struct rspamd_task *task,
  81. GArray *args,
  82. void *unused);
  83. static gboolean rspamd_has_flag_expr(struct rspamd_task *task,
  84. GArray *args,
  85. void *unused);
  86. static gboolean rspamd_has_symbol_expr(struct rspamd_task *task,
  87. GArray *args,
  88. void *unused);
  89. static rspamd_expression_atom_t *rspamd_mime_expr_parse(const char *line, gsize len,
  90. rspamd_mempool_t *pool, gpointer ud, GError **err);
  91. static double rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom);
  92. static int rspamd_mime_expr_priority(rspamd_expression_atom_t *atom);
  93. static void rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom);
  94. /**
  95. * Regexp structure
  96. */
  97. struct rspamd_regexp_atom {
  98. enum rspamd_re_type type; /**< regexp type */
  99. char *regexp_text; /**< regexp text representation */
  100. rspamd_regexp_t *regexp; /**< regexp structure */
  101. union {
  102. const char *header; /**< header name for header regexps */
  103. const char *selector; /**< selector name for lua selector regexp */
  104. } extra;
  105. gboolean is_test; /**< true if this expression must be tested */
  106. gboolean is_strong; /**< true if headers search must be case sensitive */
  107. gboolean is_multiple; /**< true if we need to match all inclusions of atom */
  108. };
  109. /**
  110. * Rspamd expression function
  111. */
  112. struct rspamd_function_atom {
  113. char *name; /**< name of function */
  114. GArray *args; /**< its args */
  115. };
  116. enum rspamd_mime_atom_type {
  117. MIME_ATOM_REGEXP = 0,
  118. MIME_ATOM_INTERNAL_FUNCTION,
  119. MIME_ATOM_LUA_FUNCTION,
  120. MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
  121. };
  122. struct rspamd_mime_atom {
  123. char *str;
  124. union {
  125. struct rspamd_regexp_atom *re;
  126. struct rspamd_function_atom *func;
  127. const char *lua_function;
  128. int lua_cbref;
  129. } d;
  130. enum rspamd_mime_atom_type type;
  131. };
  132. /*
  133. * List of internal functions of rspamd
  134. * Sorted by name to use bsearch
  135. */
  136. static struct _fl {
  137. const char *name;
  138. rspamd_internal_func_t func;
  139. void *user_data;
  140. } rspamd_functions_list[] = {
  141. {"check_smtp_data", rspamd_check_smtp_data, NULL},
  142. {"compare_encoding", rspamd_compare_encoding, NULL},
  143. {"compare_parts_distance", rspamd_parts_distance, NULL},
  144. {"compare_recipients_distance", rspamd_recipients_distance, NULL},
  145. {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
  146. {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
  147. {"content_type_has_param", rspamd_content_type_has_param, NULL},
  148. {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
  149. {"content_type_is_type", rspamd_content_type_is_type, NULL},
  150. {"has_content_part", rspamd_has_content_part, NULL},
  151. {"has_content_part_len", rspamd_has_content_part_len, NULL},
  152. {"has_fake_html", rspamd_has_fake_html, NULL},
  153. {"has_flag", rspamd_has_flag_expr, NULL},
  154. {"has_html_tag", rspamd_has_html_tag, NULL},
  155. {"has_only_html_part", rspamd_has_only_html_part, NULL},
  156. {"has_symbol", rspamd_has_symbol_expr, NULL},
  157. {"header_exists", rspamd_header_exists, NULL},
  158. {"is_empty_body", rspamd_is_empty_body, NULL},
  159. {"is_html_balanced", rspamd_is_html_balanced, NULL},
  160. {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
  161. {"raw_header_exists", rspamd_raw_header_exists, NULL},
  162. };
  163. const struct rspamd_atom_subr mime_expr_subr = {
  164. .parse = rspamd_mime_expr_parse,
  165. .process = rspamd_mime_expr_process,
  166. .priority = rspamd_mime_expr_priority,
  167. .destroy = rspamd_mime_expr_destroy};
  168. static struct _fl *list_ptr = &rspamd_functions_list[0];
  169. static uint32_t functions_number = sizeof(rspamd_functions_list) /
  170. sizeof(struct _fl);
  171. static gboolean list_allocated = FALSE;
  172. /* Bsearch routine */
  173. static int
  174. fl_cmp(const void *s1, const void *s2)
  175. {
  176. struct _fl *fl1 = (struct _fl *) s1;
  177. struct _fl *fl2 = (struct _fl *) s2;
  178. return strcmp(fl1->name, fl2->name);
  179. }
  180. static GQuark
  181. rspamd_mime_expr_quark(void)
  182. {
  183. return g_quark_from_static_string("mime-expressions");
  184. }
  185. #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
  186. static gboolean
  187. rspamd_parse_long_option(const char *start, gsize len,
  188. struct rspamd_regexp_atom *a)
  189. {
  190. gboolean ret = FALSE;
  191. if (TYPE_CHECK(start, "body", len)) {
  192. ret = TRUE;
  193. a->type = RSPAMD_RE_BODY;
  194. }
  195. else if (TYPE_CHECK(start, "part", len) ||
  196. TYPE_CHECK(start, "mime", len)) {
  197. ret = TRUE;
  198. a->type = RSPAMD_RE_MIME;
  199. }
  200. else if (TYPE_CHECK(start, "raw_part", len) ||
  201. TYPE_CHECK(start, "raw_mime", len) ||
  202. TYPE_CHECK(start, "mime_raw", len)) {
  203. ret = TRUE;
  204. a->type = RSPAMD_RE_RAWMIME;
  205. }
  206. else if (TYPE_CHECK(start, "header", len)) {
  207. ret = TRUE;
  208. a->type = RSPAMD_RE_HEADER;
  209. }
  210. else if (TYPE_CHECK(start, "mime_header", len) ||
  211. TYPE_CHECK(start, "header_mime", len)) {
  212. ret = TRUE;
  213. a->type = RSPAMD_RE_MIMEHEADER;
  214. }
  215. else if (TYPE_CHECK(start, "raw_header", len) ||
  216. TYPE_CHECK(start, "header_raw", len)) {
  217. ret = TRUE;
  218. a->type = RSPAMD_RE_RAWHEADER;
  219. }
  220. else if (TYPE_CHECK(start, "all_header", len) ||
  221. TYPE_CHECK(start, "header_all", len) ||
  222. TYPE_CHECK(start, "all_headers", len)) {
  223. ret = TRUE;
  224. a->type = RSPAMD_RE_ALLHEADER;
  225. }
  226. else if (TYPE_CHECK(start, "url", len)) {
  227. ret = TRUE;
  228. a->type = RSPAMD_RE_URL;
  229. }
  230. else if (TYPE_CHECK(start, "email", len)) {
  231. ret = TRUE;
  232. a->type = RSPAMD_RE_EMAIL;
  233. }
  234. else if (TYPE_CHECK(start, "sa_body", len)) {
  235. ret = TRUE;
  236. a->type = RSPAMD_RE_SABODY;
  237. }
  238. else if (TYPE_CHECK(start, "sa_raw_body", len) ||
  239. TYPE_CHECK(start, "sa_body_raw", len)) {
  240. ret = TRUE;
  241. a->type = RSPAMD_RE_SARAWBODY;
  242. }
  243. else if (TYPE_CHECK(start, "words", len)) {
  244. ret = TRUE;
  245. a->type = RSPAMD_RE_WORDS;
  246. }
  247. else if (TYPE_CHECK(start, "raw_words", len)) {
  248. ret = TRUE;
  249. a->type = RSPAMD_RE_RAWWORDS;
  250. }
  251. else if (TYPE_CHECK(start, "stem_words", len)) {
  252. ret = TRUE;
  253. a->type = RSPAMD_RE_STEMWORDS;
  254. }
  255. else if (TYPE_CHECK(start, "selector", len)) {
  256. ret = TRUE;
  257. a->type = RSPAMD_RE_SELECTOR;
  258. }
  259. return ret;
  260. }
  261. /*
  262. * Rspamd regexp utility functions
  263. */
  264. static struct rspamd_regexp_atom *
  265. rspamd_mime_expr_parse_regexp_atom(rspamd_mempool_t *pool, const char *line,
  266. struct rspamd_config *cfg)
  267. {
  268. const char *begin, *end, *p, *src, *start, *brace;
  269. char *dbegin, *dend, *extra = NULL;
  270. struct rspamd_regexp_atom *result;
  271. GError *err = NULL;
  272. GString *re_flags;
  273. if (line == NULL) {
  274. msg_err_pool("cannot parse NULL line");
  275. return NULL;
  276. }
  277. src = line;
  278. result = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_regexp_atom));
  279. /* Skip whitespaces */
  280. while (g_ascii_isspace(*line)) {
  281. line++;
  282. }
  283. if (*line == '\0') {
  284. msg_warn_pool("got empty regexp");
  285. return NULL;
  286. }
  287. result->type = RSPAMD_RE_MAX;
  288. start = line;
  289. /* First try to find header name */
  290. begin = strchr(line, '/');
  291. if (begin != NULL) {
  292. p = begin;
  293. end = NULL;
  294. while (p != line) {
  295. if (*p == '=') {
  296. end = p;
  297. break;
  298. }
  299. p--;
  300. }
  301. if (end) {
  302. extra = rspamd_mempool_alloc(pool, end - line + 1);
  303. rspamd_strlcpy(extra, line, end - line + 1);
  304. line = end;
  305. }
  306. }
  307. else {
  308. extra = rspamd_mempool_strdup(pool, line);
  309. result->type = RSPAMD_RE_MAX;
  310. line = start;
  311. }
  312. /* Find begin of regexp */
  313. while (*line && *line != '/') {
  314. line++;
  315. }
  316. if (*line != '\0') {
  317. begin = line + 1;
  318. }
  319. else if (extra == NULL) {
  320. /* Assume that line without // is just a header name */
  321. extra = rspamd_mempool_strdup(pool, line);
  322. result->type = RSPAMD_RE_HEADER;
  323. return result;
  324. }
  325. else {
  326. /* We got header name earlier but have not found // expression, so it is invalid regexp */
  327. msg_warn_pool(
  328. "got no header name (eg. header=) but without corresponding regexp, %s",
  329. src);
  330. return NULL;
  331. }
  332. /* Find end */
  333. end = begin;
  334. while (*end && (*end != '/' || *(end - 1) == '\\')) {
  335. end++;
  336. }
  337. if (end == begin || *end != '/') {
  338. msg_warn_pool("no trailing / in regexp %s", src);
  339. return NULL;
  340. }
  341. /* Parse flags */
  342. p = end + 1;
  343. re_flags = g_string_sized_new(32);
  344. while (p != NULL) {
  345. switch (*p) {
  346. case 'i':
  347. case 'm':
  348. case 's':
  349. case 'x':
  350. case 'u':
  351. case 'O':
  352. case 'r':
  353. case 'L':
  354. /* Handled by rspamd_regexp_t */
  355. g_string_append_c(re_flags, *p);
  356. p++;
  357. break;
  358. case 'o':
  359. p++;
  360. break;
  361. /* Type flags */
  362. case 'H':
  363. result->type = RSPAMD_RE_HEADER;
  364. p++;
  365. break;
  366. case 'R':
  367. result->type = RSPAMD_RE_ALLHEADER;
  368. p++;
  369. break;
  370. case 'B':
  371. result->type = RSPAMD_RE_MIMEHEADER;
  372. p++;
  373. break;
  374. case 'C':
  375. result->type = RSPAMD_RE_SABODY;
  376. p++;
  377. break;
  378. case 'D':
  379. result->type = RSPAMD_RE_SARAWBODY;
  380. p++;
  381. break;
  382. case 'M':
  383. result->type = RSPAMD_RE_BODY;
  384. p++;
  385. break;
  386. case 'P':
  387. result->type = RSPAMD_RE_MIME;
  388. p++;
  389. break;
  390. case 'Q':
  391. result->type = RSPAMD_RE_RAWMIME;
  392. p++;
  393. break;
  394. case 'U':
  395. result->type = RSPAMD_RE_URL;
  396. p++;
  397. break;
  398. case 'X':
  399. result->type = RSPAMD_RE_RAWHEADER;
  400. p++;
  401. break;
  402. case '$':
  403. result->type = RSPAMD_RE_SELECTOR;
  404. p++;
  405. break;
  406. case '{':
  407. /* Long definition */
  408. if ((brace = strchr(p + 1, '}')) != NULL) {
  409. if (!rspamd_parse_long_option(p + 1, brace - (p + 1), result)) {
  410. msg_warn_pool("invalid long regexp type: %*s in '%s'",
  411. (int) (brace - (p + 1)), p + 1, src);
  412. p = NULL;
  413. }
  414. else {
  415. p = brace + 1;
  416. }
  417. }
  418. else {
  419. p = NULL;
  420. }
  421. break;
  422. /* Other flags */
  423. case 'T':
  424. result->is_test = TRUE;
  425. p++;
  426. break;
  427. case 'S':
  428. result->is_strong = TRUE;
  429. p++;
  430. break;
  431. case 'A':
  432. result->is_multiple = TRUE;
  433. p++;
  434. break;
  435. /* Stop flags parsing */
  436. default:
  437. p = NULL;
  438. break;
  439. }
  440. }
  441. if (result->type >= RSPAMD_RE_MAX) {
  442. if (extra) {
  443. /* Assume header regexp */
  444. result->extra.header = extra;
  445. result->type = RSPAMD_RE_HEADER;
  446. }
  447. else {
  448. msg_err_pool("could not read regexp: %s, unknown type", src);
  449. return NULL;
  450. }
  451. }
  452. if ((result->type == RSPAMD_RE_HEADER ||
  453. result->type == RSPAMD_RE_RAWHEADER ||
  454. result->type == RSPAMD_RE_MIMEHEADER)) {
  455. if (extra == NULL) {
  456. msg_err_pool("header regexp: '%s' has no header part", src);
  457. return NULL;
  458. }
  459. else {
  460. result->extra.header = extra;
  461. }
  462. }
  463. if (result->type == RSPAMD_RE_SELECTOR) {
  464. if (extra == NULL) {
  465. msg_err_pool("selector regexp: '%s' has no selector part", src);
  466. return NULL;
  467. }
  468. else {
  469. result->extra.selector = extra;
  470. }
  471. }
  472. result->regexp_text = rspamd_mempool_strdup(pool, start);
  473. dbegin = result->regexp_text + (begin - start);
  474. dend = result->regexp_text + (end - start);
  475. *dend = '\0';
  476. result->regexp = rspamd_regexp_new(dbegin, re_flags->str,
  477. &err);
  478. g_string_free(re_flags, TRUE);
  479. if (result->regexp == NULL || err != NULL) {
  480. msg_warn_pool("could not read regexp: %s while reading regexp %e",
  481. src, err);
  482. if (err) {
  483. g_error_free(err);
  484. }
  485. return NULL;
  486. }
  487. if (result->is_multiple) {
  488. rspamd_regexp_set_maxhits(result->regexp, 0);
  489. }
  490. else {
  491. rspamd_regexp_set_maxhits(result->regexp, 1);
  492. }
  493. rspamd_regexp_set_ud(result->regexp, result);
  494. *dend = '/';
  495. return result;
  496. }
  497. struct rspamd_function_atom *
  498. rspamd_mime_expr_parse_function_atom(rspamd_mempool_t *pool, const char *input)
  499. {
  500. const char *obrace, *ebrace, *p, *c;
  501. char t, *databuf;
  502. unsigned int len;
  503. struct rspamd_function_atom *res;
  504. struct expression_argument arg;
  505. GError *err = NULL;
  506. enum {
  507. start_read_argument = 0,
  508. in_string,
  509. in_regexp,
  510. got_backslash,
  511. got_comma
  512. } state,
  513. prev_state = 0;
  514. obrace = strchr(input, '(');
  515. ebrace = strrchr(input, ')');
  516. g_assert(obrace != NULL && ebrace != NULL);
  517. res = rspamd_mempool_alloc0(pool, sizeof(*res));
  518. res->name = rspamd_mempool_alloc(pool, obrace - input + 1);
  519. rspamd_strlcpy(res->name, input, obrace - input + 1);
  520. res->args = g_array_new(FALSE, FALSE, sizeof(struct expression_argument));
  521. p = obrace + 1;
  522. c = p;
  523. state = start_read_argument;
  524. /* Read arguments */
  525. while (p <= ebrace) {
  526. t = *p;
  527. switch (state) {
  528. case start_read_argument:
  529. if (t == '/') {
  530. state = in_regexp;
  531. c = p;
  532. }
  533. else if (!g_ascii_isspace(t)) {
  534. state = in_string;
  535. if (t == '\'' || t == '\"') {
  536. c = p + 1;
  537. }
  538. else {
  539. c = p;
  540. }
  541. }
  542. p++;
  543. break;
  544. case in_regexp:
  545. if (t == '\\') {
  546. state = got_backslash;
  547. prev_state = in_regexp;
  548. }
  549. else if (t == ',' || p == ebrace) {
  550. len = p - c + 1;
  551. databuf = rspamd_mempool_alloc(pool, len);
  552. rspamd_strlcpy(databuf, c, len);
  553. arg.type = EXPRESSION_ARGUMENT_REGEXP;
  554. arg.data = rspamd_regexp_cache_create(NULL, databuf, NULL, &err);
  555. if (arg.data == NULL) {
  556. /* Fallback to string */
  557. msg_warn("cannot parse slashed argument %s as regexp: %s",
  558. databuf, err->message);
  559. g_error_free(err);
  560. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  561. arg.data = databuf;
  562. }
  563. g_array_append_val(res->args, arg);
  564. state = got_comma;
  565. }
  566. p++;
  567. break;
  568. case in_string:
  569. if (t == '\\') {
  570. state = got_backslash;
  571. prev_state = in_string;
  572. }
  573. else if (t == ',' || p == ebrace) {
  574. if (*(p - 1) == '\'' || *(p - 1) == '\"') {
  575. len = p - c;
  576. }
  577. else {
  578. len = p - c + 1;
  579. }
  580. databuf = rspamd_mempool_alloc(pool, len);
  581. rspamd_strlcpy(databuf, c, len);
  582. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  583. arg.data = databuf;
  584. g_array_append_val(res->args, arg);
  585. state = got_comma;
  586. }
  587. p++;
  588. break;
  589. case got_backslash:
  590. state = prev_state;
  591. p++;
  592. break;
  593. case got_comma:
  594. state = start_read_argument;
  595. break;
  596. }
  597. }
  598. return res;
  599. }
  600. static rspamd_expression_atom_t *
  601. rspamd_mime_expr_parse(const char *line, gsize len,
  602. rspamd_mempool_t *pool, gpointer ud, GError **err)
  603. {
  604. rspamd_expression_atom_t *a = NULL;
  605. struct rspamd_mime_atom *mime_atom = NULL;
  606. const char *p, *end, *c = NULL;
  607. struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *) ud;
  608. struct rspamd_config *cfg;
  609. rspamd_regexp_t *own_re;
  610. char t;
  611. int type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
  612. enum {
  613. in_header = 0,
  614. got_slash,
  615. in_regexp,
  616. got_backslash,
  617. got_second_slash,
  618. in_flags,
  619. in_flags_brace,
  620. got_obrace,
  621. in_function,
  622. in_local_function,
  623. got_ebrace,
  624. end_atom,
  625. bad_atom
  626. } state = 0,
  627. prev_state = 0;
  628. p = line;
  629. end = p + len;
  630. cfg = real_ud->cfg;
  631. while (p < end) {
  632. t = *p;
  633. switch (state) {
  634. case in_header:
  635. if (t == '/') {
  636. /* Regexp */
  637. state = got_slash;
  638. }
  639. else if (t == '(') {
  640. /* Function */
  641. state = got_obrace;
  642. }
  643. else if (!g_ascii_isalnum(t) && t != '_' && t != '-' && t != '=') {
  644. if (t == ':') {
  645. if (p - line == 3 && memcmp(line, "lua", 3) == 0) {
  646. type = MIME_ATOM_LOCAL_LUA_FUNCTION;
  647. state = in_local_function;
  648. c = p + 1;
  649. }
  650. }
  651. else {
  652. /* Likely lua function, identified by just a string */
  653. type = MIME_ATOM_LUA_FUNCTION;
  654. state = end_atom;
  655. /* Do not increase p */
  656. continue;
  657. }
  658. }
  659. else if (g_ascii_isspace(t)) {
  660. state = bad_atom;
  661. }
  662. p++;
  663. break;
  664. case got_slash:
  665. state = in_regexp;
  666. break;
  667. case in_regexp:
  668. if (t == '\\') {
  669. state = got_backslash;
  670. prev_state = in_regexp;
  671. }
  672. else if (t == '/') {
  673. state = got_second_slash;
  674. }
  675. p++;
  676. break;
  677. case got_second_slash:
  678. state = in_flags;
  679. break;
  680. case in_flags:
  681. if (t == '{') {
  682. state = in_flags_brace;
  683. p++;
  684. }
  685. else if (!g_ascii_isalpha(t) && t != '$') {
  686. state = end_atom;
  687. }
  688. else {
  689. p++;
  690. }
  691. break;
  692. case in_flags_brace:
  693. if (t == '}') {
  694. state = in_flags;
  695. }
  696. p++;
  697. break;
  698. case got_backslash:
  699. state = prev_state;
  700. p++;
  701. break;
  702. case got_obrace:
  703. state = in_function;
  704. type = MIME_ATOM_INTERNAL_FUNCTION;
  705. obraces++;
  706. break;
  707. case in_function:
  708. if (t == '\\') {
  709. state = got_backslash;
  710. prev_state = in_function;
  711. }
  712. else if (t == '(') {
  713. obraces++;
  714. }
  715. else if (t == ')') {
  716. ebraces++;
  717. if (ebraces == obraces) {
  718. state = got_ebrace;
  719. }
  720. }
  721. p++;
  722. break;
  723. case in_local_function:
  724. if (!(g_ascii_isalnum(t) || t == '-' || t == '_')) {
  725. g_assert(c != NULL);
  726. state = end_atom;
  727. }
  728. else {
  729. p++;
  730. }
  731. break;
  732. case got_ebrace:
  733. state = end_atom;
  734. break;
  735. case bad_atom:
  736. g_set_error(err, rspamd_mime_expr_quark(), 100, "cannot parse"
  737. " mime atom '%s' when reading symbol '%c' at offset %d, "
  738. "near %.*s",
  739. line, t, (int) (p - line),
  740. (int) MIN(end - p, 10), p);
  741. return NULL;
  742. case end_atom:
  743. goto set;
  744. }
  745. }
  746. set:
  747. if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
  748. state != in_flags && state != end_atom)) {
  749. g_set_error(err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
  750. " mime atom");
  751. return NULL;
  752. }
  753. mime_atom = rspamd_mempool_alloc(pool, sizeof(*mime_atom));
  754. mime_atom->type = type;
  755. mime_atom->str = rspamd_mempool_alloc(pool, p - line + 1);
  756. rspamd_strlcpy(mime_atom->str, line, p - line + 1);
  757. if (type == MIME_ATOM_REGEXP) {
  758. mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom(pool,
  759. mime_atom->str, cfg);
  760. if (mime_atom->d.re == NULL) {
  761. g_set_error(err, rspamd_mime_expr_quark(), 200,
  762. "cannot parse regexp '%s'",
  763. mime_atom->str);
  764. goto err;
  765. }
  766. else {
  767. int lua_cbref = -1;
  768. /* Check regexp condition */
  769. if (real_ud->conf_obj != NULL) {
  770. const ucl_object_t *re_conditions = ucl_object_lookup(real_ud->conf_obj,
  771. "re_conditions");
  772. if (re_conditions != NULL) {
  773. if (ucl_object_type(re_conditions) != UCL_OBJECT) {
  774. g_set_error(err, rspamd_mime_expr_quark(), 320,
  775. "re_conditions is not a table for '%s'",
  776. mime_atom->str);
  777. rspamd_regexp_unref(mime_atom->d.re->regexp);
  778. goto err;
  779. }
  780. const ucl_object_t *function_obj = ucl_object_lookup(re_conditions,
  781. mime_atom->str);
  782. if (function_obj != NULL) {
  783. if (ucl_object_type(function_obj) != UCL_USERDATA) {
  784. g_set_error(err, rspamd_mime_expr_quark(), 320,
  785. "condition for '%s' is invalid, must be function",
  786. mime_atom->str);
  787. rspamd_regexp_unref(mime_atom->d.re->regexp);
  788. goto err;
  789. }
  790. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  791. lua_cbref = fd->idx;
  792. }
  793. }
  794. }
  795. if (lua_cbref != -1) {
  796. msg_info_config("added condition for regexp %s", mime_atom->str);
  797. /* Add SOM_LEFTMOST_FLAG implicitly */
  798. rspamd_regexp_set_flags(mime_atom->d.re->regexp, rspamd_regexp_get_flags(mime_atom->d.re->regexp) |
  799. RSPAMD_REGEXP_FLAG_LEFTMOST);
  800. }
  801. /* Register new item in the cache */
  802. if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
  803. mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
  804. mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
  805. if (mime_atom->d.re->extra.header != NULL) {
  806. own_re = mime_atom->d.re->regexp;
  807. mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
  808. mime_atom->d.re->regexp,
  809. mime_atom->d.re->type,
  810. mime_atom->d.re->extra.header,
  811. strlen(mime_atom->d.re->extra.header) + 1,
  812. lua_cbref);
  813. /* Pass ownership to the cache */
  814. rspamd_regexp_unref(own_re);
  815. }
  816. else {
  817. /* We have header regexp, but no header name is detected */
  818. g_set_error(err,
  819. rspamd_mime_expr_quark(),
  820. 200,
  821. "no header name in header regexp: '%s'",
  822. mime_atom->str);
  823. rspamd_regexp_unref(mime_atom->d.re->regexp);
  824. goto err;
  825. }
  826. }
  827. else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
  828. if (mime_atom->d.re->extra.selector != NULL) {
  829. own_re = mime_atom->d.re->regexp;
  830. mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
  831. mime_atom->d.re->regexp,
  832. mime_atom->d.re->type,
  833. mime_atom->d.re->extra.selector,
  834. strlen(mime_atom->d.re->extra.selector) + 1,
  835. lua_cbref);
  836. /* Pass ownership to the cache */
  837. rspamd_regexp_unref(own_re);
  838. }
  839. else {
  840. /* We have selector regexp, but no selector name is detected */
  841. g_set_error(err,
  842. rspamd_mime_expr_quark(),
  843. 200,
  844. "no selector name in selector regexp: '%s'",
  845. mime_atom->str);
  846. rspamd_regexp_unref(mime_atom->d.re->regexp);
  847. goto err;
  848. }
  849. }
  850. else {
  851. own_re = mime_atom->d.re->regexp;
  852. mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
  853. mime_atom->d.re->regexp,
  854. mime_atom->d.re->type,
  855. NULL,
  856. 0,
  857. lua_cbref);
  858. /* Pass ownership to the cache */
  859. rspamd_regexp_unref(own_re);
  860. }
  861. }
  862. }
  863. else if (type == MIME_ATOM_LUA_FUNCTION) {
  864. mime_atom->d.lua_function = mime_atom->str;
  865. lua_getglobal(cfg->lua_state, mime_atom->str);
  866. if (lua_type(cfg->lua_state, -1) != LUA_TFUNCTION) {
  867. g_set_error(err, rspamd_mime_expr_quark(), 200,
  868. "no such lua function '%s'",
  869. mime_atom->str);
  870. lua_pop(cfg->lua_state, 1);
  871. goto err;
  872. }
  873. lua_pop(cfg->lua_state, 1);
  874. }
  875. else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  876. /* p pointer is set to the start of Lua function name */
  877. if (real_ud->conf_obj == NULL) {
  878. g_set_error(err, rspamd_mime_expr_quark(), 300,
  879. "no config object for '%s'",
  880. mime_atom->str);
  881. goto err;
  882. }
  883. const ucl_object_t *functions = ucl_object_lookup(real_ud->conf_obj,
  884. "functions");
  885. if (functions == NULL) {
  886. g_set_error(err, rspamd_mime_expr_quark(), 310,
  887. "no functions defined for '%s'",
  888. mime_atom->str);
  889. goto err;
  890. }
  891. if (ucl_object_type(functions) != UCL_OBJECT) {
  892. g_set_error(err, rspamd_mime_expr_quark(), 320,
  893. "functions is not a table for '%s'",
  894. mime_atom->str);
  895. goto err;
  896. }
  897. const ucl_object_t *function_obj;
  898. function_obj = ucl_object_lookup_len(functions, c,
  899. p - c);
  900. if (function_obj == NULL) {
  901. g_set_error(err, rspamd_mime_expr_quark(), 320,
  902. "function %.*s is not found for '%s'",
  903. (int) (p - c), c, mime_atom->str);
  904. goto err;
  905. }
  906. if (ucl_object_type(function_obj) != UCL_USERDATA) {
  907. g_set_error(err, rspamd_mime_expr_quark(), 320,
  908. "function %.*s has invalid type for '%s'",
  909. (int) (p - c), c, mime_atom->str);
  910. goto err;
  911. }
  912. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  913. mime_atom->d.lua_cbref = fd->idx;
  914. }
  915. else {
  916. mime_atom->d.func = rspamd_mime_expr_parse_function_atom(pool,
  917. mime_atom->str);
  918. if (mime_atom->d.func == NULL) {
  919. g_set_error(err, rspamd_mime_expr_quark(), 200,
  920. "cannot parse function '%s'",
  921. mime_atom->str);
  922. goto err;
  923. }
  924. }
  925. a = rspamd_mempool_alloc0(pool, sizeof(*a));
  926. a->len = p - line;
  927. a->priority = 0;
  928. a->data = mime_atom;
  929. return a;
  930. err:
  931. return NULL;
  932. }
  933. static int
  934. rspamd_mime_expr_process_regexp(struct rspamd_regexp_atom *re,
  935. struct rspamd_task *task)
  936. {
  937. int ret;
  938. if (re == NULL) {
  939. msg_info_task("invalid regexp passed");
  940. return 0;
  941. }
  942. if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
  943. ret = rspamd_re_cache_process(task,
  944. re->regexp,
  945. re->type,
  946. re->extra.header,
  947. strlen(re->extra.header),
  948. re->is_strong);
  949. }
  950. else if (re->type == RSPAMD_RE_SELECTOR) {
  951. ret = rspamd_re_cache_process(task,
  952. re->regexp,
  953. re->type,
  954. re->extra.selector,
  955. strlen(re->extra.selector),
  956. re->is_strong);
  957. }
  958. else {
  959. ret = rspamd_re_cache_process(task,
  960. re->regexp,
  961. re->type,
  962. NULL,
  963. 0,
  964. re->is_strong);
  965. }
  966. if (re->is_test) {
  967. msg_info_task("test %s regexp '%s' returned %d",
  968. rspamd_re_cache_type_to_string(re->type),
  969. re->regexp_text, ret);
  970. }
  971. return ret;
  972. }
  973. static int
  974. rspamd_mime_expr_priority(rspamd_expression_atom_t *atom)
  975. {
  976. struct rspamd_mime_atom *mime_atom = atom->data;
  977. int ret = 0;
  978. switch (mime_atom->type) {
  979. case MIME_ATOM_INTERNAL_FUNCTION:
  980. /* Prioritize internal functions slightly */
  981. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  982. break;
  983. case MIME_ATOM_LUA_FUNCTION:
  984. case MIME_ATOM_LOCAL_LUA_FUNCTION:
  985. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 4;
  986. break;
  987. case MIME_ATOM_REGEXP:
  988. switch (mime_atom->d.re->type) {
  989. case RSPAMD_RE_HEADER:
  990. case RSPAMD_RE_RAWHEADER:
  991. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 16;
  992. break;
  993. case RSPAMD_RE_URL:
  994. case RSPAMD_RE_EMAIL:
  995. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  996. break;
  997. case RSPAMD_RE_SELECTOR:
  998. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  999. break;
  1000. case RSPAMD_RE_MIME:
  1001. case RSPAMD_RE_RAWMIME:
  1002. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 2;
  1003. break;
  1004. case RSPAMD_RE_WORDS:
  1005. case RSPAMD_RE_RAWWORDS:
  1006. case RSPAMD_RE_STEMWORDS:
  1007. default:
  1008. /* For expensive regexps */
  1009. ret = 0;
  1010. break;
  1011. }
  1012. }
  1013. return ret;
  1014. }
  1015. static void
  1016. rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom)
  1017. {
  1018. struct rspamd_mime_atom *mime_atom = atom->data;
  1019. if (mime_atom) {
  1020. if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
  1021. /* Need to cleanup arguments */
  1022. g_array_free(mime_atom->d.func->args, TRUE);
  1023. }
  1024. }
  1025. }
  1026. static gboolean
  1027. rspamd_mime_expr_process_function(struct rspamd_function_atom *func,
  1028. struct rspamd_task *task,
  1029. lua_State *L)
  1030. {
  1031. struct _fl *selected, key;
  1032. key.name = func->name;
  1033. selected = bsearch(&key,
  1034. list_ptr,
  1035. functions_number,
  1036. sizeof(struct _fl),
  1037. fl_cmp);
  1038. if (selected == NULL) {
  1039. /* Try to check lua function */
  1040. return FALSE;
  1041. }
  1042. return selected->func(task, func->args, selected->user_data);
  1043. }
  1044. static double
  1045. rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom)
  1046. {
  1047. struct rspamd_task *task = (struct rspamd_task *) ud;
  1048. struct rspamd_mime_atom *mime_atom;
  1049. lua_State *L;
  1050. double ret = 0;
  1051. g_assert(task != NULL);
  1052. g_assert(atom != NULL);
  1053. mime_atom = atom->data;
  1054. if (mime_atom->type == MIME_ATOM_REGEXP) {
  1055. ret = rspamd_mime_expr_process_regexp(mime_atom->d.re, task);
  1056. }
  1057. else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
  1058. L = task->cfg->lua_state;
  1059. lua_getglobal(L, mime_atom->d.lua_function);
  1060. rspamd_lua_task_push(L, task);
  1061. if (lua_pcall(L, 1, 1, 0) != 0) {
  1062. msg_info_task("lua call to global function '%s' for atom '%s' failed: %s",
  1063. mime_atom->d.lua_function,
  1064. mime_atom->str,
  1065. lua_tostring(L, -1));
  1066. lua_pop(L, 1);
  1067. }
  1068. else {
  1069. if (lua_type(L, -1) == LUA_TBOOLEAN) {
  1070. ret = lua_toboolean(L, -1);
  1071. }
  1072. else if (lua_type(L, -1) == LUA_TNUMBER) {
  1073. ret = lua_tonumber(L, 1);
  1074. }
  1075. else {
  1076. msg_err_task("%s returned wrong return type: %s",
  1077. mime_atom->str, lua_typename(L, lua_type(L, -1)));
  1078. }
  1079. /* Remove result */
  1080. lua_pop(L, 1);
  1081. }
  1082. }
  1083. else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  1084. int err_idx;
  1085. L = task->cfg->lua_state;
  1086. lua_pushcfunction(L, &rspamd_lua_traceback);
  1087. err_idx = lua_gettop(L);
  1088. lua_rawgeti(L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
  1089. rspamd_lua_task_push(L, task);
  1090. if (lua_pcall(L, 1, 1, err_idx) != 0) {
  1091. msg_info_task("lua call to local function for atom '%s' failed: %s",
  1092. mime_atom->str,
  1093. lua_tostring(L, -1));
  1094. }
  1095. else {
  1096. if (lua_type(L, -1) == LUA_TBOOLEAN) {
  1097. ret = lua_toboolean(L, -1);
  1098. }
  1099. else if (lua_type(L, -1) == LUA_TNUMBER) {
  1100. ret = lua_tonumber(L, 1);
  1101. }
  1102. else {
  1103. msg_err_task("%s returned wrong return type: %s",
  1104. mime_atom->str, lua_typename(L, lua_type(L, -1)));
  1105. }
  1106. }
  1107. lua_settop(L, 0);
  1108. }
  1109. else {
  1110. ret = rspamd_mime_expr_process_function(mime_atom->d.func, task,
  1111. task->cfg->lua_state);
  1112. }
  1113. return ret;
  1114. }
  1115. void register_expression_function(const char *name,
  1116. rspamd_internal_func_t func,
  1117. void *user_data)
  1118. {
  1119. static struct _fl *new;
  1120. functions_number++;
  1121. new = g_new(struct _fl, functions_number);
  1122. memcpy(new, list_ptr, (functions_number - 1) * sizeof(struct _fl));
  1123. if (list_allocated) {
  1124. g_free(list_ptr);
  1125. }
  1126. list_allocated = TRUE;
  1127. new[functions_number - 1].name = name;
  1128. new[functions_number - 1].func = func;
  1129. new[functions_number - 1].user_data = user_data;
  1130. qsort(new, functions_number, sizeof(struct _fl), fl_cmp);
  1131. list_ptr = new;
  1132. }
  1133. gboolean
  1134. rspamd_compare_encoding(struct rspamd_task *task, GArray *args, void *unused)
  1135. {
  1136. struct expression_argument *arg;
  1137. if (args == NULL || task == NULL) {
  1138. return FALSE;
  1139. }
  1140. arg = &g_array_index(args, struct expression_argument, 0);
  1141. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1142. msg_warn_task("invalid argument to function is passed");
  1143. return FALSE;
  1144. }
  1145. /* XXX: really write this function */
  1146. return TRUE;
  1147. }
  1148. gboolean
  1149. rspamd_header_exists(struct rspamd_task *task, GArray *args, void *unused)
  1150. {
  1151. struct expression_argument *arg;
  1152. struct rspamd_mime_header *rh;
  1153. if (args == NULL || task == NULL) {
  1154. return FALSE;
  1155. }
  1156. arg = &g_array_index(args, struct expression_argument, 0);
  1157. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1158. msg_warn_task("invalid argument to function is passed");
  1159. return FALSE;
  1160. }
  1161. rh = rspamd_message_get_header_array(task,
  1162. (char *) arg->data, FALSE);
  1163. debug_task("try to get header %s: %d", (char *) arg->data,
  1164. (rh != NULL));
  1165. if (rh) {
  1166. return TRUE;
  1167. }
  1168. return FALSE;
  1169. }
  1170. /*
  1171. * This function is designed to find difference between text/html and text/plain parts
  1172. * It takes one argument: difference threshold, if we have two text parts, compare
  1173. * its hashes and check for threshold, if value is greater than threshold, return TRUE
  1174. * and return FALSE otherwise.
  1175. */
  1176. gboolean
  1177. rspamd_parts_distance(struct rspamd_task *task, GArray *args, void *unused)
  1178. {
  1179. int threshold, threshold2 = -1;
  1180. struct expression_argument *arg;
  1181. double *pdiff, diff;
  1182. if (args == NULL || args->len == 0) {
  1183. debug_task("no threshold is specified, assume it 100");
  1184. threshold = 100;
  1185. }
  1186. else {
  1187. errno = 0;
  1188. arg = &g_array_index(args, struct expression_argument, 0);
  1189. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1190. msg_warn_task("invalid argument to function is passed");
  1191. return FALSE;
  1192. }
  1193. threshold = strtoul((char *) arg->data, NULL, 10);
  1194. if (errno != 0) {
  1195. msg_info_task("bad numeric value for threshold \"%s\", assume it 100",
  1196. (char *) arg->data);
  1197. threshold = 100;
  1198. }
  1199. if (args->len >= 2) {
  1200. arg = &g_array_index(args, struct expression_argument, 1);
  1201. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1202. msg_warn_task("invalid argument to function is passed");
  1203. return FALSE;
  1204. }
  1205. errno = 0;
  1206. threshold2 = strtoul((char *) arg->data, NULL, 10);
  1207. if (errno != 0) {
  1208. msg_info_task("bad numeric value for threshold \"%s\", ignore it",
  1209. (char *) arg->data);
  1210. threshold2 = -1;
  1211. }
  1212. }
  1213. }
  1214. if ((pdiff =
  1215. rspamd_mempool_get_variable(task->task_pool,
  1216. "parts_distance")) != NULL) {
  1217. diff = (1.0 - (*pdiff)) * 100.0;
  1218. if (diff != -1) {
  1219. if (threshold2 > 0) {
  1220. if (diff >= MIN(threshold, threshold2) &&
  1221. diff < MAX(threshold, threshold2)) {
  1222. return TRUE;
  1223. }
  1224. }
  1225. else {
  1226. if (diff <= threshold) {
  1227. return TRUE;
  1228. }
  1229. }
  1230. return FALSE;
  1231. }
  1232. else {
  1233. return FALSE;
  1234. }
  1235. }
  1236. return FALSE;
  1237. }
  1238. struct addr_list {
  1239. const char *name;
  1240. unsigned int namelen;
  1241. const char *addr;
  1242. unsigned int addrlen;
  1243. };
  1244. static int
  1245. addr_list_cmp_func(const void *a, const void *b)
  1246. {
  1247. const struct addr_list *addra = (struct addr_list *) a,
  1248. *addrb = (struct addr_list *) b;
  1249. if (addra->addrlen != addrb->addrlen) {
  1250. return addra->addrlen - addrb->addrlen;
  1251. }
  1252. return memcmp(addra->addr, addrb->addr, addra->addrlen);
  1253. }
  1254. #define COMPARE_RCPT_LEN 3
  1255. #define MIN_RCPT_TO_COMPARE 7
  1256. gboolean
  1257. rspamd_recipients_distance(struct rspamd_task *task, GArray *args,
  1258. void *unused)
  1259. {
  1260. struct expression_argument *arg;
  1261. struct rspamd_email_address *cur;
  1262. double threshold;
  1263. struct addr_list *ar;
  1264. int num, i, hits = 0;
  1265. if (args == NULL) {
  1266. msg_warn_task("no parameters to function");
  1267. return FALSE;
  1268. }
  1269. arg = &g_array_index(args, struct expression_argument, 0);
  1270. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1271. msg_warn_task("invalid argument to function is passed");
  1272. return FALSE;
  1273. }
  1274. errno = 0;
  1275. threshold = strtod((char *) arg->data, NULL);
  1276. if (errno != 0) {
  1277. msg_warn_task("invalid numeric value '%s': %s",
  1278. (char *) arg->data,
  1279. strerror(errno));
  1280. return FALSE;
  1281. }
  1282. if (!MESSAGE_FIELD(task, rcpt_mime)) {
  1283. return FALSE;
  1284. }
  1285. num = MESSAGE_FIELD(task, rcpt_mime)->len;
  1286. if (num < MIN_RCPT_TO_COMPARE) {
  1287. return FALSE;
  1288. }
  1289. ar = rspamd_mempool_alloc0(task->task_pool, num * sizeof(struct addr_list));
  1290. /* Fill array */
  1291. num = 0;
  1292. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, rcpt_mime), i, cur)
  1293. {
  1294. if (cur->addr_len > COMPARE_RCPT_LEN) {
  1295. ar[num].name = cur->addr;
  1296. ar[num].namelen = cur->addr_len;
  1297. ar[num].addr = cur->domain;
  1298. ar[num].addrlen = cur->domain_len;
  1299. num++;
  1300. }
  1301. }
  1302. qsort(ar, num, sizeof(*ar), addr_list_cmp_func);
  1303. /* Cycle all elements in array */
  1304. for (i = 0; i < num; i++) {
  1305. if (i < num - 1) {
  1306. if (ar[i].namelen == ar[i + 1].namelen) {
  1307. if (rspamd_lc_cmp(ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) {
  1308. hits++;
  1309. }
  1310. }
  1311. }
  1312. }
  1313. if ((hits * num / 2.) / (double) num >= threshold) {
  1314. return TRUE;
  1315. }
  1316. return FALSE;
  1317. }
  1318. gboolean
  1319. rspamd_has_only_html_part(struct rspamd_task *task, GArray *args,
  1320. void *unused)
  1321. {
  1322. struct rspamd_mime_text_part *p;
  1323. unsigned int i, cnt_html = 0, cnt_txt = 0;
  1324. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
  1325. {
  1326. if (!IS_TEXT_PART_ATTACHMENT(p)) {
  1327. if (IS_TEXT_PART_HTML(p)) {
  1328. cnt_html++;
  1329. }
  1330. else {
  1331. cnt_txt++;
  1332. }
  1333. }
  1334. }
  1335. return (cnt_html > 0 && cnt_txt == 0);
  1336. }
  1337. static gboolean
  1338. is_recipient_list_sorted(GPtrArray *ar)
  1339. {
  1340. struct rspamd_email_address *addr;
  1341. gboolean res = TRUE;
  1342. rspamd_ftok_t cur, prev;
  1343. int i;
  1344. /* Do not check to short address lists */
  1345. if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
  1346. return FALSE;
  1347. }
  1348. prev.len = 0;
  1349. prev.begin = NULL;
  1350. PTR_ARRAY_FOREACH(ar, i, addr)
  1351. {
  1352. cur.begin = addr->addr;
  1353. cur.len = addr->addr_len;
  1354. if (prev.len != 0) {
  1355. if (rspamd_ftok_casecmp(&cur, &prev) <= 0) {
  1356. res = FALSE;
  1357. break;
  1358. }
  1359. }
  1360. prev = cur;
  1361. }
  1362. return res;
  1363. }
  1364. gboolean
  1365. rspamd_is_recipients_sorted(struct rspamd_task *task,
  1366. GArray *args,
  1367. void *unused)
  1368. {
  1369. /* Check all types of addresses */
  1370. if (MESSAGE_FIELD(task, rcpt_mime)) {
  1371. return is_recipient_list_sorted(MESSAGE_FIELD(task, rcpt_mime));
  1372. }
  1373. return FALSE;
  1374. }
  1375. gboolean
  1376. rspamd_compare_transfer_encoding(struct rspamd_task *task,
  1377. GArray *args,
  1378. void *unused)
  1379. {
  1380. struct expression_argument *arg;
  1381. unsigned int i;
  1382. struct rspamd_mime_part *part;
  1383. enum rspamd_cte cte;
  1384. if (args == NULL) {
  1385. msg_warn_task("no parameters to function");
  1386. return FALSE;
  1387. }
  1388. arg = &g_array_index(args, struct expression_argument, 0);
  1389. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1390. msg_warn_task("invalid argument to function is passed");
  1391. return FALSE;
  1392. }
  1393. cte = rspamd_cte_from_string(arg->data);
  1394. if (cte == RSPAMD_CTE_UNKNOWN) {
  1395. msg_warn_task("unknown cte: %s", arg->data);
  1396. return FALSE;
  1397. }
  1398. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
  1399. {
  1400. if (IS_PART_TEXT(part)) {
  1401. if (part->cte == cte) {
  1402. return TRUE;
  1403. }
  1404. }
  1405. }
  1406. return FALSE;
  1407. }
  1408. gboolean
  1409. rspamd_is_html_balanced(struct rspamd_task *task, GArray *args, void *unused)
  1410. {
  1411. /* Totally broken but seems to be never used */
  1412. return TRUE;
  1413. }
  1414. gboolean
  1415. rspamd_has_html_tag(struct rspamd_task *task, GArray *args, void *unused)
  1416. {
  1417. struct rspamd_mime_text_part *p;
  1418. struct expression_argument *arg;
  1419. unsigned int i;
  1420. gboolean res = FALSE;
  1421. if (args == NULL) {
  1422. msg_warn_task("no parameters to function");
  1423. return FALSE;
  1424. }
  1425. arg = &g_array_index(args, struct expression_argument, 0);
  1426. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1427. msg_warn_task("invalid argument to function is passed");
  1428. return FALSE;
  1429. }
  1430. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
  1431. {
  1432. if (IS_TEXT_PART_HTML(p) && p->html) {
  1433. res = rspamd_html_tag_seen(p->html, arg->data);
  1434. }
  1435. if (res) {
  1436. break;
  1437. }
  1438. }
  1439. return res;
  1440. }
  1441. gboolean
  1442. rspamd_has_fake_html(struct rspamd_task *task, GArray *args, void *unused)
  1443. {
  1444. struct rspamd_mime_text_part *p;
  1445. unsigned int i;
  1446. gboolean res = FALSE;
  1447. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
  1448. {
  1449. if (IS_TEXT_PART_HTML(p) && (rspamd_html_get_tags_count(p->html) < 2)) {
  1450. res = TRUE;
  1451. }
  1452. if (res) {
  1453. break;
  1454. }
  1455. }
  1456. return res;
  1457. }
  1458. static gboolean
  1459. rspamd_raw_header_exists(struct rspamd_task *task, GArray *args, void *unused)
  1460. {
  1461. struct expression_argument *arg;
  1462. if (args == NULL || task == NULL) {
  1463. return FALSE;
  1464. }
  1465. arg = &g_array_index(args, struct expression_argument, 0);
  1466. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1467. msg_warn_task("invalid argument to function is passed");
  1468. return FALSE;
  1469. }
  1470. return rspamd_message_get_header_array(task, arg->data, FALSE) != NULL;
  1471. }
  1472. static gboolean
  1473. match_smtp_data(struct rspamd_task *task,
  1474. struct expression_argument *arg,
  1475. const char *what, gsize len)
  1476. {
  1477. rspamd_regexp_t *re;
  1478. int r = 0;
  1479. if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
  1480. /* This is a regexp */
  1481. re = arg->data;
  1482. if (re == NULL) {
  1483. msg_warn_task("cannot compile regexp for function");
  1484. return FALSE;
  1485. }
  1486. if (len > 0) {
  1487. r = rspamd_regexp_search(re, what, len, NULL, NULL, FALSE, NULL);
  1488. }
  1489. return r;
  1490. }
  1491. else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
  1492. g_ascii_strncasecmp(arg->data, what, len) == 0) {
  1493. return TRUE;
  1494. }
  1495. return FALSE;
  1496. }
  1497. static gboolean
  1498. rspamd_check_smtp_data(struct rspamd_task *task, GArray *args, void *unused)
  1499. {
  1500. struct expression_argument *arg;
  1501. struct rspamd_email_address *addr = NULL;
  1502. GPtrArray *rcpts = NULL;
  1503. const char *type, *str = NULL;
  1504. unsigned int i;
  1505. if (args == NULL) {
  1506. msg_warn_task("no parameters to function");
  1507. return FALSE;
  1508. }
  1509. arg = &g_array_index(args, struct expression_argument, 0);
  1510. if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1511. msg_warn_task("no parameters to function");
  1512. return FALSE;
  1513. }
  1514. else {
  1515. type = arg->data;
  1516. switch (*type) {
  1517. case 'f':
  1518. case 'F':
  1519. if (g_ascii_strcasecmp(type, "from") == 0) {
  1520. addr = rspamd_task_get_sender(task);
  1521. }
  1522. else {
  1523. msg_warn_task("bad argument to function: %s", type);
  1524. return FALSE;
  1525. }
  1526. break;
  1527. case 'h':
  1528. case 'H':
  1529. if (g_ascii_strcasecmp(type, "helo") == 0) {
  1530. str = task->helo;
  1531. }
  1532. else {
  1533. msg_warn_task("bad argument to function: %s", type);
  1534. return FALSE;
  1535. }
  1536. break;
  1537. case 'u':
  1538. case 'U':
  1539. if (g_ascii_strcasecmp(type, "user") == 0) {
  1540. str = task->auth_user;
  1541. }
  1542. else {
  1543. msg_warn_task("bad argument to function: %s", type);
  1544. return FALSE;
  1545. }
  1546. break;
  1547. case 's':
  1548. case 'S':
  1549. if (g_ascii_strcasecmp(type, "subject") == 0) {
  1550. str = MESSAGE_FIELD(task, subject);
  1551. }
  1552. else {
  1553. msg_warn_task("bad argument to function: %s", type);
  1554. return FALSE;
  1555. }
  1556. break;
  1557. case 'r':
  1558. case 'R':
  1559. if (g_ascii_strcasecmp(type, "rcpt") == 0) {
  1560. rcpts = task->rcpt_envelope;
  1561. }
  1562. else {
  1563. msg_warn_task("bad argument to function: %s", type);
  1564. return FALSE;
  1565. }
  1566. break;
  1567. default:
  1568. msg_warn_task("bad argument to function: %s", type);
  1569. return FALSE;
  1570. }
  1571. }
  1572. if (str == NULL && addr == NULL && rcpts == NULL) {
  1573. /* Not enough data so regexp would NOT be found anyway */
  1574. return FALSE;
  1575. }
  1576. /* We would process only one more argument, others are ignored */
  1577. if (args->len >= 2) {
  1578. arg = &g_array_index(args, struct expression_argument, 1);
  1579. if (arg) {
  1580. if (str != NULL) {
  1581. return match_smtp_data(task, arg, str, strlen(str));
  1582. }
  1583. else if (addr != NULL && addr->addr) {
  1584. return match_smtp_data(task, arg, addr->addr, addr->addr_len);
  1585. }
  1586. else {
  1587. if (rcpts != NULL) {
  1588. for (i = 0; i < rcpts->len; i++) {
  1589. addr = g_ptr_array_index(rcpts, i);
  1590. if (addr && addr->addr &&
  1591. match_smtp_data(task, arg,
  1592. addr->addr, addr->addr_len)) {
  1593. return TRUE;
  1594. }
  1595. }
  1596. }
  1597. }
  1598. }
  1599. }
  1600. return FALSE;
  1601. }
  1602. static inline gboolean
  1603. rspamd_check_ct_attr(const char *begin, gsize len,
  1604. struct expression_argument *arg_pattern)
  1605. {
  1606. rspamd_regexp_t *re;
  1607. gboolean r = FALSE;
  1608. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1609. re = arg_pattern->data;
  1610. if (len > 0) {
  1611. r = rspamd_regexp_search(re,
  1612. begin, len,
  1613. NULL, NULL, FALSE, NULL);
  1614. }
  1615. if (r) {
  1616. return TRUE;
  1617. }
  1618. }
  1619. else {
  1620. /* Just do strcasecmp */
  1621. gsize plen = strlen(arg_pattern->data);
  1622. if (plen == len &&
  1623. g_ascii_strncasecmp(arg_pattern->data, begin, len) == 0) {
  1624. return TRUE;
  1625. }
  1626. }
  1627. return FALSE;
  1628. }
  1629. static gboolean
  1630. rspamd_content_type_compare_param(struct rspamd_task *task,
  1631. GArray *args,
  1632. void *unused)
  1633. {
  1634. struct expression_argument *arg, *arg1, *arg_pattern;
  1635. gboolean recursive = FALSE;
  1636. struct rspamd_mime_part *cur_part;
  1637. unsigned int i;
  1638. rspamd_ftok_t srch;
  1639. struct rspamd_content_type_param *found = NULL, *cur;
  1640. const char *param_name;
  1641. if (args == NULL || args->len < 2) {
  1642. msg_warn_task("no parameters to function");
  1643. return FALSE;
  1644. }
  1645. arg = &g_array_index(args, struct expression_argument, 0);
  1646. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1647. param_name = arg->data;
  1648. arg_pattern = &g_array_index(args, struct expression_argument, 1);
  1649. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
  1650. {
  1651. if (args->len >= 3) {
  1652. arg1 = &g_array_index(args, struct expression_argument, 2);
  1653. if (g_ascii_strncasecmp(arg1->data, "true",
  1654. sizeof("true") - 1) == 0) {
  1655. recursive = TRUE;
  1656. }
  1657. }
  1658. else {
  1659. /*
  1660. * If user did not specify argument, let's assume that he wants
  1661. * recursive search if mime part is multipart/mixed
  1662. */
  1663. if (IS_PART_MULTIPART(cur_part)) {
  1664. recursive = TRUE;
  1665. }
  1666. }
  1667. rspamd_ftok_t lit;
  1668. RSPAMD_FTOK_FROM_STR(&srch, param_name);
  1669. RSPAMD_FTOK_FROM_STR(&lit, "charset");
  1670. if (rspamd_ftok_equal(&srch, &lit)) {
  1671. if (rspamd_check_ct_attr(cur_part->ct->charset.begin,
  1672. cur_part->ct->charset.len, arg_pattern)) {
  1673. return TRUE;
  1674. }
  1675. }
  1676. RSPAMD_FTOK_FROM_STR(&lit, "boundary");
  1677. if (rspamd_ftok_equal(&srch, &lit)) {
  1678. if (rspamd_check_ct_attr(cur_part->ct->orig_boundary.begin,
  1679. cur_part->ct->orig_boundary.len, arg_pattern)) {
  1680. return TRUE;
  1681. }
  1682. }
  1683. if (cur_part->ct->attrs) {
  1684. found = g_hash_table_lookup(cur_part->ct->attrs, &srch);
  1685. if (found) {
  1686. DL_FOREACH(found, cur)
  1687. {
  1688. if (rspamd_check_ct_attr(cur->value.begin,
  1689. cur->value.len, arg_pattern)) {
  1690. return TRUE;
  1691. }
  1692. }
  1693. }
  1694. }
  1695. if (!recursive) {
  1696. break;
  1697. }
  1698. }
  1699. return FALSE;
  1700. }
  1701. static gboolean
  1702. rspamd_content_type_has_param(struct rspamd_task *task,
  1703. GArray *args,
  1704. void *unused)
  1705. {
  1706. struct expression_argument *arg, *arg1;
  1707. gboolean recursive = FALSE;
  1708. struct rspamd_mime_part *cur_part;
  1709. unsigned int i;
  1710. rspamd_ftok_t srch;
  1711. struct rspamd_content_type_param *found = NULL;
  1712. const char *param_name;
  1713. if (args == NULL || args->len < 1) {
  1714. msg_warn_task("no parameters to function");
  1715. return FALSE;
  1716. }
  1717. arg = &g_array_index(args, struct expression_argument, 0);
  1718. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1719. param_name = arg->data;
  1720. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
  1721. {
  1722. if (args->len >= 2) {
  1723. arg1 = &g_array_index(args, struct expression_argument, 1);
  1724. if (g_ascii_strncasecmp(arg1->data, "true",
  1725. sizeof("true") - 1) == 0) {
  1726. recursive = TRUE;
  1727. }
  1728. }
  1729. else {
  1730. /*
  1731. * If user did not specify argument, let's assume that he wants
  1732. * recursive search if mime part is multipart/mixed
  1733. */
  1734. if (IS_PART_MULTIPART(cur_part)) {
  1735. recursive = TRUE;
  1736. }
  1737. }
  1738. rspamd_ftok_t lit;
  1739. RSPAMD_FTOK_FROM_STR(&srch, param_name);
  1740. RSPAMD_FTOK_FROM_STR(&lit, "charset");
  1741. if (rspamd_ftok_equal(&srch, &lit)) {
  1742. if (cur_part->ct->charset.len > 0) {
  1743. return TRUE;
  1744. }
  1745. }
  1746. RSPAMD_FTOK_FROM_STR(&lit, "boundary");
  1747. if (rspamd_ftok_equal(&srch, &lit)) {
  1748. if (cur_part->ct->boundary.len > 0) {
  1749. return TRUE;
  1750. }
  1751. }
  1752. if (cur_part->ct->attrs) {
  1753. found = g_hash_table_lookup(cur_part->ct->attrs, &srch);
  1754. if (found) {
  1755. return TRUE;
  1756. }
  1757. }
  1758. if (!recursive) {
  1759. break;
  1760. }
  1761. }
  1762. return FALSE;
  1763. }
  1764. static gboolean
  1765. rspamd_content_type_check(struct rspamd_task *task,
  1766. GArray *args,
  1767. gboolean check_subtype)
  1768. {
  1769. rspamd_ftok_t *param_data, srch;
  1770. rspamd_regexp_t *re;
  1771. struct expression_argument *arg1, *arg_pattern;
  1772. struct rspamd_content_type *ct;
  1773. int r = 0;
  1774. unsigned int i;
  1775. gboolean recursive = FALSE;
  1776. struct rspamd_mime_part *cur_part;
  1777. if (args == NULL || args->len < 1) {
  1778. msg_warn_task("no parameters to function");
  1779. return FALSE;
  1780. }
  1781. arg_pattern = &g_array_index(args, struct expression_argument, 0);
  1782. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
  1783. {
  1784. ct = cur_part->ct;
  1785. if (args->len >= 2) {
  1786. arg1 = &g_array_index(args, struct expression_argument, 1);
  1787. if (g_ascii_strncasecmp(arg1->data, "true",
  1788. sizeof("true") - 1) == 0) {
  1789. recursive = TRUE;
  1790. }
  1791. }
  1792. else {
  1793. /*
  1794. * If user did not specify argument, let's assume that he wants
  1795. * recursive search if mime part is multipart/mixed
  1796. */
  1797. if (IS_PART_MULTIPART(cur_part)) {
  1798. recursive = TRUE;
  1799. }
  1800. }
  1801. if (check_subtype) {
  1802. param_data = &ct->subtype;
  1803. }
  1804. else {
  1805. param_data = &ct->type;
  1806. }
  1807. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1808. re = arg_pattern->data;
  1809. if (param_data->len > 0) {
  1810. r = rspamd_regexp_search(re, param_data->begin, param_data->len,
  1811. NULL, NULL, FALSE, NULL);
  1812. }
  1813. if (r) {
  1814. return TRUE;
  1815. }
  1816. }
  1817. else {
  1818. /* Just do strcasecmp */
  1819. srch.begin = arg_pattern->data;
  1820. srch.len = strlen(arg_pattern->data);
  1821. if (rspamd_ftok_casecmp(param_data, &srch) == 0) {
  1822. return TRUE;
  1823. }
  1824. }
  1825. /* Get next part */
  1826. if (!recursive) {
  1827. break;
  1828. }
  1829. }
  1830. return FALSE;
  1831. }
  1832. static gboolean
  1833. rspamd_content_type_is_type(struct rspamd_task *task,
  1834. GArray *args,
  1835. void *unused)
  1836. {
  1837. return rspamd_content_type_check(task, args, FALSE);
  1838. }
  1839. static gboolean
  1840. rspamd_content_type_is_subtype(struct rspamd_task *task,
  1841. GArray *args,
  1842. void *unused)
  1843. {
  1844. return rspamd_content_type_check(task, args, TRUE);
  1845. }
  1846. static gboolean
  1847. compare_subtype(struct rspamd_task *task, struct rspamd_content_type *ct,
  1848. struct expression_argument *subtype)
  1849. {
  1850. rspamd_regexp_t *re;
  1851. rspamd_ftok_t srch;
  1852. int r = 0;
  1853. if (subtype == NULL || ct == NULL) {
  1854. msg_warn_task("invalid parameters passed");
  1855. return FALSE;
  1856. }
  1857. if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
  1858. re = subtype->data;
  1859. if (ct->subtype.len > 0) {
  1860. r = rspamd_regexp_search(re, ct->subtype.begin, ct->subtype.len,
  1861. NULL, NULL, FALSE, NULL);
  1862. }
  1863. }
  1864. else {
  1865. srch.begin = subtype->data;
  1866. srch.len = strlen(subtype->data);
  1867. /* Just do strcasecmp */
  1868. if (rspamd_ftok_casecmp(&ct->subtype, &srch) == 0) {
  1869. return TRUE;
  1870. }
  1871. }
  1872. return r;
  1873. }
  1874. static gboolean
  1875. compare_len(struct rspamd_mime_part *part, unsigned int min, unsigned int max)
  1876. {
  1877. if (min == 0 && max == 0) {
  1878. return TRUE;
  1879. }
  1880. if (min == 0) {
  1881. return part->parsed_data.len <= max;
  1882. }
  1883. else if (max == 0) {
  1884. return part->parsed_data.len >= min;
  1885. }
  1886. else {
  1887. return part->parsed_data.len >= min && part->parsed_data.len <= max;
  1888. }
  1889. }
  1890. static gboolean
  1891. common_has_content_part(struct rspamd_task *task,
  1892. struct expression_argument *param_type,
  1893. struct expression_argument *param_subtype,
  1894. int min_len,
  1895. int max_len)
  1896. {
  1897. rspamd_regexp_t *re;
  1898. struct rspamd_mime_part *part;
  1899. struct rspamd_content_type *ct;
  1900. rspamd_ftok_t srch;
  1901. int r = 0;
  1902. unsigned int i;
  1903. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
  1904. {
  1905. ct = part->ct;
  1906. if (ct == NULL) {
  1907. continue;
  1908. }
  1909. if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
  1910. re = param_type->data;
  1911. if (ct->type.len > 0) {
  1912. r = rspamd_regexp_search(re, ct->type.begin, ct->type.len,
  1913. NULL, NULL, FALSE, NULL);
  1914. }
  1915. /* Also check subtype and length of the part */
  1916. if (r && param_subtype) {
  1917. r = compare_len(part, min_len, max_len) &&
  1918. compare_subtype(task, ct, param_subtype);
  1919. return r;
  1920. }
  1921. }
  1922. else {
  1923. /* Just do strcasecmp */
  1924. srch.begin = param_type->data;
  1925. srch.len = strlen(param_type->data);
  1926. if (rspamd_ftok_casecmp(&ct->type, &srch) == 0) {
  1927. if (param_subtype) {
  1928. if (compare_subtype(task, ct, param_subtype)) {
  1929. if (compare_len(part, min_len, max_len)) {
  1930. return TRUE;
  1931. }
  1932. }
  1933. }
  1934. else {
  1935. if (compare_len(part, min_len, max_len)) {
  1936. return TRUE;
  1937. }
  1938. }
  1939. }
  1940. }
  1941. }
  1942. return FALSE;
  1943. }
  1944. static gboolean
  1945. rspamd_has_content_part(struct rspamd_task *task, GArray *args, void *unused)
  1946. {
  1947. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1948. if (args == NULL) {
  1949. msg_warn_task("no parameters to function");
  1950. return FALSE;
  1951. }
  1952. param_type = &g_array_index(args, struct expression_argument, 0);
  1953. if (args->len >= 2) {
  1954. param_subtype = &g_array_index(args, struct expression_argument, 1);
  1955. }
  1956. return common_has_content_part(task, param_type, param_subtype, 0, 0);
  1957. }
  1958. static gboolean
  1959. rspamd_has_content_part_len(struct rspamd_task *task,
  1960. GArray *args,
  1961. void *unused)
  1962. {
  1963. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1964. int min = 0, max = 0;
  1965. struct expression_argument *arg;
  1966. if (args == NULL) {
  1967. msg_warn_task("no parameters to function");
  1968. return FALSE;
  1969. }
  1970. param_type = &g_array_index(args, struct expression_argument, 0);
  1971. if (args->len >= 2) {
  1972. param_subtype = &g_array_index(args, struct expression_argument, 1);
  1973. if (args->len >= 3) {
  1974. arg = &g_array_index(args, struct expression_argument, 2);
  1975. errno = 0;
  1976. min = strtoul(arg->data, NULL, 10);
  1977. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1978. if (errno != 0) {
  1979. msg_warn_task("invalid numeric value '%s': %s",
  1980. (char *) arg->data,
  1981. strerror(errno));
  1982. return FALSE;
  1983. }
  1984. if (args->len >= 4) {
  1985. arg = &g_array_index(args, struct expression_argument, 3);
  1986. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1987. max = strtoul(arg->data, NULL, 10);
  1988. if (errno != 0) {
  1989. msg_warn_task("invalid numeric value '%s': %s",
  1990. (char *) arg->data,
  1991. strerror(errno));
  1992. return FALSE;
  1993. }
  1994. }
  1995. }
  1996. }
  1997. return common_has_content_part(task, param_type, param_subtype, min, max);
  1998. }
  1999. static gboolean
  2000. rspamd_is_empty_body(struct rspamd_task *task,
  2001. GArray *args,
  2002. void *unused)
  2003. {
  2004. struct rspamd_mime_part *part;
  2005. unsigned int i;
  2006. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
  2007. {
  2008. if (part->parsed_data.len > 0) {
  2009. return FALSE;
  2010. }
  2011. }
  2012. return TRUE;
  2013. }
  2014. #define TASK_FLAG_READ(flag) \
  2015. do { \
  2016. result = !!(task->flags & (flag)); \
  2017. } while (0)
  2018. #define TASK_GET_FLAG(flag, strname, macro) \
  2019. do { \
  2020. if (!found && strcmp((flag), strname) == 0) { \
  2021. TASK_FLAG_READ((macro)); \
  2022. found = TRUE; \
  2023. } \
  2024. } while (0)
  2025. #define TASK_PROTOCOL_FLAG_READ(flag) \
  2026. do { \
  2027. result = !!(task->protocol_flags & (flag)); \
  2028. } while (0)
  2029. #define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) \
  2030. do { \
  2031. if (!found && strcmp((flag), strname) == 0) { \
  2032. TASK_PROTOCOL_FLAG_READ((macro)); \
  2033. found = TRUE; \
  2034. } \
  2035. } while (0)
  2036. static gboolean
  2037. rspamd_has_flag_expr(struct rspamd_task *task,
  2038. GArray *args,
  2039. void *unused)
  2040. {
  2041. gboolean found = FALSE, result = FALSE;
  2042. struct expression_argument *flag_arg;
  2043. const char *flag_str;
  2044. if (args == NULL) {
  2045. msg_warn_task("no parameters to function");
  2046. return FALSE;
  2047. }
  2048. flag_arg = &g_array_index(args, struct expression_argument, 0);
  2049. if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2050. msg_warn_task("invalid parameter to function");
  2051. return FALSE;
  2052. }
  2053. flag_str = (const char *) flag_arg->data;
  2054. TASK_GET_FLAG(flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
  2055. TASK_GET_FLAG(flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
  2056. TASK_GET_FLAG(flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
  2057. TASK_GET_FLAG(flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
  2058. TASK_GET_PROTOCOL_FLAG(flag_str, "extended_urls",
  2059. RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
  2060. TASK_GET_FLAG(flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
  2061. TASK_GET_FLAG(flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
  2062. TASK_GET_FLAG(flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
  2063. TASK_GET_FLAG(flag_str, "broken_headers",
  2064. RSPAMD_TASK_FLAG_BROKEN_HEADERS);
  2065. TASK_GET_FLAG(flag_str, "skip_process",
  2066. RSPAMD_TASK_FLAG_SKIP_PROCESS);
  2067. TASK_GET_PROTOCOL_FLAG(flag_str, "milter",
  2068. RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
  2069. TASK_GET_FLAG(flag_str, "bad_unicode",
  2070. RSPAMD_TASK_FLAG_BAD_UNICODE);
  2071. if (!found) {
  2072. msg_warn_task("invalid flag name %s", flag_str);
  2073. return FALSE;
  2074. }
  2075. return result;
  2076. }
  2077. static gboolean
  2078. rspamd_has_symbol_expr(struct rspamd_task *task,
  2079. GArray *args,
  2080. void *unused)
  2081. {
  2082. struct expression_argument *sym_arg;
  2083. const char *symbol_str;
  2084. if (args == NULL) {
  2085. msg_warn_task("no parameters to function");
  2086. return FALSE;
  2087. }
  2088. sym_arg = &g_array_index(args, struct expression_argument, 0);
  2089. if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2090. msg_warn_task("invalid parameter to function");
  2091. return FALSE;
  2092. }
  2093. symbol_str = (const char *) sym_arg->data;
  2094. if (rspamd_task_find_symbol_result(task, symbol_str, NULL)) {
  2095. return TRUE;
  2096. }
  2097. return FALSE;
  2098. }