You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_expressions.c 55KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <contrib/libucl/ucl.h>
  17. #include "config.h"
  18. #include "util.h"
  19. #include "cfg_file.h"
  20. #include "rspamd.h"
  21. #include "message.h"
  22. #include "mime_expressions.h"
  23. #include "libserver/html/html.h"
  24. #include "lua/lua_common.h"
  25. #include "utlist.h"
  26. gboolean rspamd_compare_encoding(struct rspamd_task *task,
  27. GArray *args,
  28. void *unused);
  29. gboolean rspamd_header_exists(struct rspamd_task *task,
  30. GArray *args,
  31. void *unused);
  32. gboolean rspamd_parts_distance(struct rspamd_task *task,
  33. GArray *args,
  34. void *unused);
  35. gboolean rspamd_recipients_distance(struct rspamd_task *task,
  36. GArray *args,
  37. void *unused);
  38. gboolean rspamd_has_only_html_part(struct rspamd_task *task,
  39. GArray *args,
  40. void *unused);
  41. gboolean rspamd_is_recipients_sorted(struct rspamd_task *task,
  42. GArray *args,
  43. void *unused);
  44. gboolean rspamd_compare_transfer_encoding(struct rspamd_task *task,
  45. GArray *args,
  46. void *unused);
  47. gboolean rspamd_is_html_balanced(struct rspamd_task *task,
  48. GArray *args,
  49. void *unused);
  50. gboolean rspamd_has_html_tag(struct rspamd_task *task,
  51. GArray *args,
  52. void *unused);
  53. gboolean rspamd_has_fake_html(struct rspamd_task *task,
  54. GArray *args,
  55. void *unused);
  56. static gboolean rspamd_raw_header_exists(struct rspamd_task *task,
  57. GArray *args,
  58. void *unused);
  59. static gboolean rspamd_check_smtp_data(struct rspamd_task *task,
  60. GArray *args,
  61. void *unused);
  62. static gboolean rspamd_content_type_is_type(struct rspamd_task *task,
  63. GArray *args,
  64. void *unused);
  65. static gboolean rspamd_content_type_is_subtype(struct rspamd_task *task,
  66. GArray *args,
  67. void *unused);
  68. static gboolean rspamd_content_type_has_param(struct rspamd_task *task,
  69. GArray *args,
  70. void *unused);
  71. static gboolean rspamd_content_type_compare_param(struct rspamd_task *task,
  72. GArray *args,
  73. void *unused);
  74. static gboolean rspamd_has_content_part(struct rspamd_task *task,
  75. GArray *args,
  76. void *unused);
  77. static gboolean rspamd_has_content_part_len(struct rspamd_task *task,
  78. GArray *args,
  79. void *unused);
  80. static gboolean rspamd_is_empty_body(struct rspamd_task *task,
  81. GArray *args,
  82. void *unused);
  83. static gboolean rspamd_has_flag_expr(struct rspamd_task *task,
  84. GArray *args,
  85. void *unused);
  86. static gboolean rspamd_has_symbol_expr(struct rspamd_task *task,
  87. GArray *args,
  88. void *unused);
  89. static rspamd_expression_atom_t *rspamd_mime_expr_parse(const gchar *line, gsize len,
  90. rspamd_mempool_t *pool, gpointer ud, GError **err);
  91. static gdouble rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom);
  92. static gint rspamd_mime_expr_priority(rspamd_expression_atom_t *atom);
  93. static void rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom);
  94. /**
  95. * Regexp structure
  96. */
  97. struct rspamd_regexp_atom {
  98. enum rspamd_re_type type; /**< regexp type */
  99. gchar *regexp_text; /**< regexp text representation */
  100. rspamd_regexp_t *regexp; /**< regexp structure */
  101. union {
  102. const gchar *header; /**< header name for header regexps */
  103. const gchar *selector; /**< selector name for lua selector regexp */
  104. } extra;
  105. gboolean is_test; /**< true if this expression must be tested */
  106. gboolean is_strong; /**< true if headers search must be case sensitive */
  107. gboolean is_multiple; /**< true if we need to match all inclusions of atom */
  108. };
  109. /**
  110. * Rspamd expression function
  111. */
  112. struct rspamd_function_atom {
  113. gchar *name; /**< name of function */
  114. GArray *args; /**< its args */
  115. };
  116. enum rspamd_mime_atom_type {
  117. MIME_ATOM_REGEXP = 0,
  118. MIME_ATOM_INTERNAL_FUNCTION,
  119. MIME_ATOM_LUA_FUNCTION,
  120. MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
  121. };
  122. struct rspamd_mime_atom {
  123. gchar *str;
  124. union {
  125. struct rspamd_regexp_atom *re;
  126. struct rspamd_function_atom *func;
  127. const gchar *lua_function;
  128. gint lua_cbref;
  129. } d;
  130. enum rspamd_mime_atom_type type;
  131. };
  132. /*
  133. * List of internal functions of rspamd
  134. * Sorted by name to use bsearch
  135. */
  136. static struct _fl {
  137. const gchar *name;
  138. rspamd_internal_func_t func;
  139. void *user_data;
  140. } rspamd_functions_list[] = {
  141. {"check_smtp_data", rspamd_check_smtp_data, NULL},
  142. {"compare_encoding", rspamd_compare_encoding, NULL},
  143. {"compare_parts_distance", rspamd_parts_distance, NULL},
  144. {"compare_recipients_distance", rspamd_recipients_distance, NULL},
  145. {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
  146. {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
  147. {"content_type_has_param", rspamd_content_type_has_param, NULL},
  148. {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
  149. {"content_type_is_type", rspamd_content_type_is_type, NULL},
  150. {"has_content_part", rspamd_has_content_part, NULL},
  151. {"has_content_part_len", rspamd_has_content_part_len, NULL},
  152. {"has_fake_html", rspamd_has_fake_html, NULL},
  153. {"has_flag", rspamd_has_flag_expr, NULL},
  154. {"has_html_tag", rspamd_has_html_tag, NULL},
  155. {"has_only_html_part", rspamd_has_only_html_part, NULL},
  156. {"has_symbol", rspamd_has_symbol_expr, NULL},
  157. {"header_exists", rspamd_header_exists, NULL},
  158. {"is_empty_body", rspamd_is_empty_body, NULL},
  159. {"is_html_balanced", rspamd_is_html_balanced, NULL},
  160. {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
  161. {"raw_header_exists", rspamd_raw_header_exists, NULL},
  162. };
  163. const struct rspamd_atom_subr mime_expr_subr = {
  164. .parse = rspamd_mime_expr_parse,
  165. .process = rspamd_mime_expr_process,
  166. .priority = rspamd_mime_expr_priority,
  167. .destroy = rspamd_mime_expr_destroy};
  168. static struct _fl *list_ptr = &rspamd_functions_list[0];
  169. static guint32 functions_number = sizeof(rspamd_functions_list) /
  170. sizeof(struct _fl);
  171. static gboolean list_allocated = FALSE;
  172. /* Bsearch routine */
  173. static gint
  174. fl_cmp(const void *s1, const void *s2)
  175. {
  176. struct _fl *fl1 = (struct _fl *) s1;
  177. struct _fl *fl2 = (struct _fl *) s2;
  178. return strcmp(fl1->name, fl2->name);
  179. }
  180. static GQuark
  181. rspamd_mime_expr_quark(void)
  182. {
  183. return g_quark_from_static_string("mime-expressions");
  184. }
  185. #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
  186. static gboolean
  187. rspamd_parse_long_option(const gchar *start, gsize len,
  188. struct rspamd_regexp_atom *a)
  189. {
  190. gboolean ret = FALSE;
  191. if (TYPE_CHECK(start, "body", len)) {
  192. ret = TRUE;
  193. a->type = RSPAMD_RE_BODY;
  194. }
  195. else if (TYPE_CHECK(start, "part", len) ||
  196. TYPE_CHECK(start, "mime", len)) {
  197. ret = TRUE;
  198. a->type = RSPAMD_RE_MIME;
  199. }
  200. else if (TYPE_CHECK(start, "raw_part", len) ||
  201. TYPE_CHECK(start, "raw_mime", len) ||
  202. TYPE_CHECK(start, "mime_raw", len)) {
  203. ret = TRUE;
  204. a->type = RSPAMD_RE_RAWMIME;
  205. }
  206. else if (TYPE_CHECK(start, "header", len)) {
  207. ret = TRUE;
  208. a->type = RSPAMD_RE_HEADER;
  209. }
  210. else if (TYPE_CHECK(start, "mime_header", len) ||
  211. TYPE_CHECK(start, "header_mime", len)) {
  212. ret = TRUE;
  213. a->type = RSPAMD_RE_MIMEHEADER;
  214. }
  215. else if (TYPE_CHECK(start, "raw_header", len) ||
  216. TYPE_CHECK(start, "header_raw", len)) {
  217. ret = TRUE;
  218. a->type = RSPAMD_RE_RAWHEADER;
  219. }
  220. else if (TYPE_CHECK(start, "all_header", len) ||
  221. TYPE_CHECK(start, "header_all", len) ||
  222. TYPE_CHECK(start, "all_headers", len)) {
  223. ret = TRUE;
  224. a->type = RSPAMD_RE_ALLHEADER;
  225. }
  226. else if (TYPE_CHECK(start, "url", len)) {
  227. ret = TRUE;
  228. a->type = RSPAMD_RE_URL;
  229. }
  230. else if (TYPE_CHECK(start, "email", len)) {
  231. ret = TRUE;
  232. a->type = RSPAMD_RE_EMAIL;
  233. }
  234. else if (TYPE_CHECK(start, "sa_body", len)) {
  235. ret = TRUE;
  236. a->type = RSPAMD_RE_SABODY;
  237. }
  238. else if (TYPE_CHECK(start, "sa_raw_body", len) ||
  239. TYPE_CHECK(start, "sa_body_raw", len)) {
  240. ret = TRUE;
  241. a->type = RSPAMD_RE_SARAWBODY;
  242. }
  243. else if (TYPE_CHECK(start, "words", len)) {
  244. ret = TRUE;
  245. a->type = RSPAMD_RE_WORDS;
  246. }
  247. else if (TYPE_CHECK(start, "raw_words", len)) {
  248. ret = TRUE;
  249. a->type = RSPAMD_RE_RAWWORDS;
  250. }
  251. else if (TYPE_CHECK(start, "stem_words", len)) {
  252. ret = TRUE;
  253. a->type = RSPAMD_RE_STEMWORDS;
  254. }
  255. else if (TYPE_CHECK(start, "selector", len)) {
  256. ret = TRUE;
  257. a->type = RSPAMD_RE_SELECTOR;
  258. }
  259. return ret;
  260. }
  261. /*
  262. * Rspamd regexp utility functions
  263. */
  264. static struct rspamd_regexp_atom *
  265. rspamd_mime_expr_parse_regexp_atom(rspamd_mempool_t *pool, const gchar *line,
  266. struct rspamd_config *cfg)
  267. {
  268. const gchar *begin, *end, *p, *src, *start, *brace;
  269. gchar *dbegin, *dend, *extra = NULL;
  270. struct rspamd_regexp_atom *result;
  271. GError *err = NULL;
  272. GString *re_flags;
  273. if (line == NULL) {
  274. msg_err_pool("cannot parse NULL line");
  275. return NULL;
  276. }
  277. src = line;
  278. result = rspamd_mempool_alloc0(pool, sizeof(struct rspamd_regexp_atom));
  279. /* Skip whitespaces */
  280. while (g_ascii_isspace(*line)) {
  281. line++;
  282. }
  283. if (*line == '\0') {
  284. msg_warn_pool("got empty regexp");
  285. return NULL;
  286. }
  287. result->type = RSPAMD_RE_MAX;
  288. start = line;
  289. /* First try to find header name */
  290. begin = strchr(line, '/');
  291. if (begin != NULL) {
  292. p = begin;
  293. end = NULL;
  294. while (p != line) {
  295. if (*p == '=') {
  296. end = p;
  297. break;
  298. }
  299. p--;
  300. }
  301. if (end) {
  302. extra = rspamd_mempool_alloc(pool, end - line + 1);
  303. rspamd_strlcpy(extra, line, end - line + 1);
  304. line = end;
  305. }
  306. }
  307. else {
  308. extra = rspamd_mempool_strdup(pool, line);
  309. result->type = RSPAMD_RE_MAX;
  310. line = start;
  311. }
  312. /* Find begin of regexp */
  313. while (*line && *line != '/') {
  314. line++;
  315. }
  316. if (*line != '\0') {
  317. begin = line + 1;
  318. }
  319. else if (extra == NULL) {
  320. /* Assume that line without // is just a header name */
  321. extra = rspamd_mempool_strdup(pool, line);
  322. result->type = RSPAMD_RE_HEADER;
  323. return result;
  324. }
  325. else {
  326. /* We got header name earlier but have not found // expression, so it is invalid regexp */
  327. msg_warn_pool(
  328. "got no header name (eg. header=) but without corresponding regexp, %s",
  329. src);
  330. return NULL;
  331. }
  332. /* Find end */
  333. end = begin;
  334. while (*end && (*end != '/' || *(end - 1) == '\\')) {
  335. end++;
  336. }
  337. if (end == begin || *end != '/') {
  338. msg_warn_pool("no trailing / in regexp %s", src);
  339. return NULL;
  340. }
  341. /* Parse flags */
  342. p = end + 1;
  343. re_flags = g_string_sized_new(32);
  344. while (p != NULL) {
  345. switch (*p) {
  346. case 'i':
  347. case 'm':
  348. case 's':
  349. case 'x':
  350. case 'u':
  351. case 'O':
  352. case 'r':
  353. case 'L':
  354. /* Handled by rspamd_regexp_t */
  355. g_string_append_c(re_flags, *p);
  356. p++;
  357. break;
  358. case 'o':
  359. p++;
  360. break;
  361. /* Type flags */
  362. case 'H':
  363. result->type = RSPAMD_RE_HEADER;
  364. p++;
  365. break;
  366. case 'R':
  367. result->type = RSPAMD_RE_ALLHEADER;
  368. p++;
  369. break;
  370. case 'B':
  371. result->type = RSPAMD_RE_MIMEHEADER;
  372. p++;
  373. break;
  374. case 'C':
  375. result->type = RSPAMD_RE_SABODY;
  376. p++;
  377. break;
  378. case 'D':
  379. result->type = RSPAMD_RE_SARAWBODY;
  380. p++;
  381. break;
  382. case 'M':
  383. result->type = RSPAMD_RE_BODY;
  384. p++;
  385. break;
  386. case 'P':
  387. result->type = RSPAMD_RE_MIME;
  388. p++;
  389. break;
  390. case 'Q':
  391. result->type = RSPAMD_RE_RAWMIME;
  392. p++;
  393. break;
  394. case 'U':
  395. result->type = RSPAMD_RE_URL;
  396. p++;
  397. break;
  398. case 'X':
  399. result->type = RSPAMD_RE_RAWHEADER;
  400. p++;
  401. break;
  402. case '$':
  403. result->type = RSPAMD_RE_SELECTOR;
  404. p++;
  405. break;
  406. case '{':
  407. /* Long definition */
  408. if ((brace = strchr(p + 1, '}')) != NULL) {
  409. if (!rspamd_parse_long_option(p + 1, brace - (p + 1), result)) {
  410. msg_warn_pool("invalid long regexp type: %*s in '%s'",
  411. (int) (brace - (p + 1)), p + 1, src);
  412. p = NULL;
  413. }
  414. else {
  415. p = brace + 1;
  416. }
  417. }
  418. else {
  419. p = NULL;
  420. }
  421. break;
  422. /* Other flags */
  423. case 'T':
  424. result->is_test = TRUE;
  425. p++;
  426. break;
  427. case 'S':
  428. result->is_strong = TRUE;
  429. p++;
  430. break;
  431. case 'A':
  432. result->is_multiple = TRUE;
  433. p++;
  434. break;
  435. /* Stop flags parsing */
  436. default:
  437. p = NULL;
  438. break;
  439. }
  440. }
  441. if (result->type >= RSPAMD_RE_MAX) {
  442. if (extra) {
  443. /* Assume header regexp */
  444. result->extra.header = extra;
  445. result->type = RSPAMD_RE_HEADER;
  446. }
  447. else {
  448. msg_err_pool("could not read regexp: %s, unknown type", src);
  449. return NULL;
  450. }
  451. }
  452. if ((result->type == RSPAMD_RE_HEADER ||
  453. result->type == RSPAMD_RE_RAWHEADER ||
  454. result->type == RSPAMD_RE_MIMEHEADER)) {
  455. if (extra == NULL) {
  456. msg_err_pool("header regexp: '%s' has no header part", src);
  457. return NULL;
  458. }
  459. else {
  460. result->extra.header = extra;
  461. }
  462. }
  463. if (result->type == RSPAMD_RE_SELECTOR) {
  464. if (extra == NULL) {
  465. msg_err_pool("selector regexp: '%s' has no selector part", src);
  466. return NULL;
  467. }
  468. else {
  469. result->extra.selector = extra;
  470. }
  471. }
  472. result->regexp_text = rspamd_mempool_strdup(pool, start);
  473. dbegin = result->regexp_text + (begin - start);
  474. dend = result->regexp_text + (end - start);
  475. *dend = '\0';
  476. result->regexp = rspamd_regexp_new(dbegin, re_flags->str,
  477. &err);
  478. g_string_free(re_flags, TRUE);
  479. if (result->regexp == NULL || err != NULL) {
  480. msg_warn_pool("could not read regexp: %s while reading regexp %e",
  481. src, err);
  482. if (err) {
  483. g_error_free(err);
  484. }
  485. return NULL;
  486. }
  487. if (result->is_multiple) {
  488. rspamd_regexp_set_maxhits(result->regexp, 0);
  489. }
  490. else {
  491. rspamd_regexp_set_maxhits(result->regexp, 1);
  492. }
  493. rspamd_regexp_set_ud(result->regexp, result);
  494. *dend = '/';
  495. return result;
  496. }
  497. struct rspamd_function_atom *
  498. rspamd_mime_expr_parse_function_atom(rspamd_mempool_t *pool, const gchar *input)
  499. {
  500. const gchar *obrace, *ebrace, *p, *c;
  501. gchar t, *databuf;
  502. guint len;
  503. struct rspamd_function_atom *res;
  504. struct expression_argument arg;
  505. GError *err = NULL;
  506. enum {
  507. start_read_argument = 0,
  508. in_string,
  509. in_regexp,
  510. got_backslash,
  511. got_comma
  512. } state,
  513. prev_state = 0;
  514. obrace = strchr(input, '(');
  515. ebrace = strrchr(input, ')');
  516. g_assert(obrace != NULL && ebrace != NULL);
  517. res = rspamd_mempool_alloc0(pool, sizeof(*res));
  518. res->name = rspamd_mempool_alloc(pool, obrace - input + 1);
  519. rspamd_strlcpy(res->name, input, obrace - input + 1);
  520. res->args = g_array_new(FALSE, FALSE, sizeof(struct expression_argument));
  521. p = obrace + 1;
  522. c = p;
  523. state = start_read_argument;
  524. /* Read arguments */
  525. while (p <= ebrace) {
  526. t = *p;
  527. switch (state) {
  528. case start_read_argument:
  529. if (t == '/') {
  530. state = in_regexp;
  531. c = p;
  532. }
  533. else if (!g_ascii_isspace(t)) {
  534. state = in_string;
  535. if (t == '\'' || t == '\"') {
  536. c = p + 1;
  537. }
  538. else {
  539. c = p;
  540. }
  541. }
  542. p++;
  543. break;
  544. case in_regexp:
  545. if (t == '\\') {
  546. state = got_backslash;
  547. prev_state = in_regexp;
  548. }
  549. else if (t == ',' || p == ebrace) {
  550. len = p - c + 1;
  551. databuf = rspamd_mempool_alloc(pool, len);
  552. rspamd_strlcpy(databuf, c, len);
  553. arg.type = EXPRESSION_ARGUMENT_REGEXP;
  554. arg.data = rspamd_regexp_cache_create(NULL, databuf, NULL, &err);
  555. if (arg.data == NULL) {
  556. /* Fallback to string */
  557. msg_warn("cannot parse slashed argument %s as regexp: %s",
  558. databuf, err->message);
  559. g_error_free(err);
  560. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  561. arg.data = databuf;
  562. }
  563. g_array_append_val(res->args, arg);
  564. state = got_comma;
  565. }
  566. p++;
  567. break;
  568. case in_string:
  569. if (t == '\\') {
  570. state = got_backslash;
  571. prev_state = in_string;
  572. }
  573. else if (t == ',' || p == ebrace) {
  574. if (*(p - 1) == '\'' || *(p - 1) == '\"') {
  575. len = p - c;
  576. }
  577. else {
  578. len = p - c + 1;
  579. }
  580. databuf = rspamd_mempool_alloc(pool, len);
  581. rspamd_strlcpy(databuf, c, len);
  582. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  583. arg.data = databuf;
  584. g_array_append_val(res->args, arg);
  585. state = got_comma;
  586. }
  587. p++;
  588. break;
  589. case got_backslash:
  590. state = prev_state;
  591. p++;
  592. break;
  593. case got_comma:
  594. state = start_read_argument;
  595. break;
  596. }
  597. }
  598. return res;
  599. }
  600. static rspamd_expression_atom_t *
  601. rspamd_mime_expr_parse(const gchar *line, gsize len,
  602. rspamd_mempool_t *pool, gpointer ud, GError **err)
  603. {
  604. rspamd_expression_atom_t *a = NULL;
  605. struct rspamd_mime_atom *mime_atom = NULL;
  606. const gchar *p, *end, *c = NULL;
  607. struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *) ud;
  608. struct rspamd_config *cfg;
  609. rspamd_regexp_t *own_re;
  610. gchar t;
  611. gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
  612. enum {
  613. in_header = 0,
  614. got_slash,
  615. in_regexp,
  616. got_backslash,
  617. got_second_slash,
  618. in_flags,
  619. in_flags_brace,
  620. got_obrace,
  621. in_function,
  622. in_local_function,
  623. got_ebrace,
  624. end_atom,
  625. bad_atom
  626. } state = 0,
  627. prev_state = 0;
  628. p = line;
  629. end = p + len;
  630. cfg = real_ud->cfg;
  631. while (p < end) {
  632. t = *p;
  633. switch (state) {
  634. case in_header:
  635. if (t == '/') {
  636. /* Regexp */
  637. state = got_slash;
  638. }
  639. else if (t == '(') {
  640. /* Function */
  641. state = got_obrace;
  642. }
  643. else if (!g_ascii_isalnum(t) && t != '_' && t != '-' && t != '=') {
  644. if (t == ':') {
  645. if (p - line == 3 && memcmp(line, "lua", 3) == 0) {
  646. type = MIME_ATOM_LOCAL_LUA_FUNCTION;
  647. state = in_local_function;
  648. c = p + 1;
  649. }
  650. }
  651. else {
  652. /* Likely lua function, identified by just a string */
  653. type = MIME_ATOM_LUA_FUNCTION;
  654. state = end_atom;
  655. /* Do not increase p */
  656. continue;
  657. }
  658. }
  659. else if (g_ascii_isspace(t)) {
  660. state = bad_atom;
  661. }
  662. p++;
  663. break;
  664. case got_slash:
  665. state = in_regexp;
  666. break;
  667. case in_regexp:
  668. if (t == '\\') {
  669. state = got_backslash;
  670. prev_state = in_regexp;
  671. }
  672. else if (t == '/') {
  673. state = got_second_slash;
  674. }
  675. p++;
  676. break;
  677. case got_second_slash:
  678. state = in_flags;
  679. break;
  680. case in_flags:
  681. if (t == '{') {
  682. state = in_flags_brace;
  683. p++;
  684. }
  685. else if (!g_ascii_isalpha(t) && t != '$') {
  686. state = end_atom;
  687. }
  688. else {
  689. p++;
  690. }
  691. break;
  692. case in_flags_brace:
  693. if (t == '}') {
  694. state = in_flags;
  695. }
  696. p++;
  697. break;
  698. case got_backslash:
  699. state = prev_state;
  700. p++;
  701. break;
  702. case got_obrace:
  703. state = in_function;
  704. type = MIME_ATOM_INTERNAL_FUNCTION;
  705. obraces++;
  706. break;
  707. case in_function:
  708. if (t == '\\') {
  709. state = got_backslash;
  710. prev_state = in_function;
  711. }
  712. else if (t == '(') {
  713. obraces++;
  714. }
  715. else if (t == ')') {
  716. ebraces++;
  717. if (ebraces == obraces) {
  718. state = got_ebrace;
  719. }
  720. }
  721. p++;
  722. break;
  723. case in_local_function:
  724. if (!(g_ascii_isalnum(t) || t == '-' || t == '_')) {
  725. g_assert(c != NULL);
  726. state = end_atom;
  727. }
  728. else {
  729. p++;
  730. }
  731. break;
  732. case got_ebrace:
  733. state = end_atom;
  734. break;
  735. case bad_atom:
  736. g_set_error(err, rspamd_mime_expr_quark(), 100, "cannot parse"
  737. " mime atom '%s' when reading symbol '%c' at offset %d, "
  738. "near %.*s",
  739. line, t, (gint) (p - line),
  740. (gint) MIN(end - p, 10), p);
  741. return NULL;
  742. case end_atom:
  743. goto set;
  744. }
  745. }
  746. set:
  747. if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
  748. state != in_flags && state != end_atom)) {
  749. g_set_error(err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
  750. " mime atom");
  751. return NULL;
  752. }
  753. mime_atom = rspamd_mempool_alloc(pool, sizeof(*mime_atom));
  754. mime_atom->type = type;
  755. mime_atom->str = rspamd_mempool_alloc(pool, p - line + 1);
  756. rspamd_strlcpy(mime_atom->str, line, p - line + 1);
  757. if (type == MIME_ATOM_REGEXP) {
  758. mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom(pool,
  759. mime_atom->str, cfg);
  760. if (mime_atom->d.re == NULL) {
  761. g_set_error(err, rspamd_mime_expr_quark(), 200,
  762. "cannot parse regexp '%s'",
  763. mime_atom->str);
  764. goto err;
  765. }
  766. else {
  767. gint lua_cbref = -1;
  768. /* Check regexp condition */
  769. if (real_ud->conf_obj != NULL) {
  770. const ucl_object_t *re_conditions = ucl_object_lookup(real_ud->conf_obj,
  771. "re_conditions");
  772. if (re_conditions != NULL) {
  773. if (ucl_object_type(re_conditions) != UCL_OBJECT) {
  774. g_set_error(err, rspamd_mime_expr_quark(), 320,
  775. "re_conditions is not a table for '%s'",
  776. mime_atom->str);
  777. rspamd_regexp_unref(mime_atom->d.re->regexp);
  778. goto err;
  779. }
  780. const ucl_object_t *function_obj = ucl_object_lookup(re_conditions,
  781. mime_atom->str);
  782. if (function_obj != NULL) {
  783. if (ucl_object_type(function_obj) != UCL_USERDATA) {
  784. g_set_error(err, rspamd_mime_expr_quark(), 320,
  785. "condition for '%s' is invalid, must be function",
  786. mime_atom->str);
  787. rspamd_regexp_unref(mime_atom->d.re->regexp);
  788. goto err;
  789. }
  790. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  791. lua_cbref = fd->idx;
  792. }
  793. }
  794. }
  795. if (lua_cbref != -1) {
  796. msg_info_config("added condition for regexp %s", mime_atom->str);
  797. }
  798. /* Register new item in the cache */
  799. if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
  800. mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
  801. mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
  802. if (mime_atom->d.re->extra.header != NULL) {
  803. own_re = mime_atom->d.re->regexp;
  804. mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
  805. mime_atom->d.re->regexp,
  806. mime_atom->d.re->type,
  807. mime_atom->d.re->extra.header,
  808. strlen(mime_atom->d.re->extra.header) + 1,
  809. lua_cbref);
  810. /* Pass ownership to the cache */
  811. rspamd_regexp_unref(own_re);
  812. }
  813. else {
  814. /* We have header regexp, but no header name is detected */
  815. g_set_error(err,
  816. rspamd_mime_expr_quark(),
  817. 200,
  818. "no header name in header regexp: '%s'",
  819. mime_atom->str);
  820. rspamd_regexp_unref(mime_atom->d.re->regexp);
  821. goto err;
  822. }
  823. }
  824. else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
  825. if (mime_atom->d.re->extra.selector != NULL) {
  826. own_re = mime_atom->d.re->regexp;
  827. mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
  828. mime_atom->d.re->regexp,
  829. mime_atom->d.re->type,
  830. mime_atom->d.re->extra.selector,
  831. strlen(mime_atom->d.re->extra.selector) + 1,
  832. lua_cbref);
  833. /* Pass ownership to the cache */
  834. rspamd_regexp_unref(own_re);
  835. }
  836. else {
  837. /* We have selector regexp, but no selector name is detected */
  838. g_set_error(err,
  839. rspamd_mime_expr_quark(),
  840. 200,
  841. "no selector name in selector regexp: '%s'",
  842. mime_atom->str);
  843. rspamd_regexp_unref(mime_atom->d.re->regexp);
  844. goto err;
  845. }
  846. }
  847. else {
  848. own_re = mime_atom->d.re->regexp;
  849. mime_atom->d.re->regexp = rspamd_re_cache_add(cfg->re_cache,
  850. mime_atom->d.re->regexp,
  851. mime_atom->d.re->type,
  852. NULL,
  853. 0,
  854. lua_cbref);
  855. /* Pass ownership to the cache */
  856. rspamd_regexp_unref(own_re);
  857. }
  858. }
  859. }
  860. else if (type == MIME_ATOM_LUA_FUNCTION) {
  861. mime_atom->d.lua_function = mime_atom->str;
  862. lua_getglobal(cfg->lua_state, mime_atom->str);
  863. if (lua_type(cfg->lua_state, -1) != LUA_TFUNCTION) {
  864. g_set_error(err, rspamd_mime_expr_quark(), 200,
  865. "no such lua function '%s'",
  866. mime_atom->str);
  867. lua_pop(cfg->lua_state, 1);
  868. goto err;
  869. }
  870. lua_pop(cfg->lua_state, 1);
  871. }
  872. else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  873. /* p pointer is set to the start of Lua function name */
  874. if (real_ud->conf_obj == NULL) {
  875. g_set_error(err, rspamd_mime_expr_quark(), 300,
  876. "no config object for '%s'",
  877. mime_atom->str);
  878. goto err;
  879. }
  880. const ucl_object_t *functions = ucl_object_lookup(real_ud->conf_obj,
  881. "functions");
  882. if (functions == NULL) {
  883. g_set_error(err, rspamd_mime_expr_quark(), 310,
  884. "no functions defined for '%s'",
  885. mime_atom->str);
  886. goto err;
  887. }
  888. if (ucl_object_type(functions) != UCL_OBJECT) {
  889. g_set_error(err, rspamd_mime_expr_quark(), 320,
  890. "functions is not a table for '%s'",
  891. mime_atom->str);
  892. goto err;
  893. }
  894. const ucl_object_t *function_obj;
  895. function_obj = ucl_object_lookup_len(functions, c,
  896. p - c);
  897. if (function_obj == NULL) {
  898. g_set_error(err, rspamd_mime_expr_quark(), 320,
  899. "function %.*s is not found for '%s'",
  900. (int) (p - c), c, mime_atom->str);
  901. goto err;
  902. }
  903. if (ucl_object_type(function_obj) != UCL_USERDATA) {
  904. g_set_error(err, rspamd_mime_expr_quark(), 320,
  905. "function %.*s has invalid type for '%s'",
  906. (int) (p - c), c, mime_atom->str);
  907. goto err;
  908. }
  909. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  910. mime_atom->d.lua_cbref = fd->idx;
  911. }
  912. else {
  913. mime_atom->d.func = rspamd_mime_expr_parse_function_atom(pool,
  914. mime_atom->str);
  915. if (mime_atom->d.func == NULL) {
  916. g_set_error(err, rspamd_mime_expr_quark(), 200,
  917. "cannot parse function '%s'",
  918. mime_atom->str);
  919. goto err;
  920. }
  921. }
  922. a = rspamd_mempool_alloc0(pool, sizeof(*a));
  923. a->len = p - line;
  924. a->priority = 0;
  925. a->data = mime_atom;
  926. return a;
  927. err:
  928. return NULL;
  929. }
  930. static gint
  931. rspamd_mime_expr_process_regexp(struct rspamd_regexp_atom *re,
  932. struct rspamd_task *task)
  933. {
  934. gint ret;
  935. if (re == NULL) {
  936. msg_info_task("invalid regexp passed");
  937. return 0;
  938. }
  939. if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
  940. ret = rspamd_re_cache_process(task,
  941. re->regexp,
  942. re->type,
  943. re->extra.header,
  944. strlen(re->extra.header),
  945. re->is_strong);
  946. }
  947. else if (re->type == RSPAMD_RE_SELECTOR) {
  948. ret = rspamd_re_cache_process(task,
  949. re->regexp,
  950. re->type,
  951. re->extra.selector,
  952. strlen(re->extra.selector),
  953. re->is_strong);
  954. }
  955. else {
  956. ret = rspamd_re_cache_process(task,
  957. re->regexp,
  958. re->type,
  959. NULL,
  960. 0,
  961. re->is_strong);
  962. }
  963. if (re->is_test) {
  964. msg_info_task("test %s regexp '%s' returned %d",
  965. rspamd_re_cache_type_to_string(re->type),
  966. re->regexp_text, ret);
  967. }
  968. return ret;
  969. }
  970. static gint
  971. rspamd_mime_expr_priority(rspamd_expression_atom_t *atom)
  972. {
  973. struct rspamd_mime_atom *mime_atom = atom->data;
  974. gint ret = 0;
  975. switch (mime_atom->type) {
  976. case MIME_ATOM_INTERNAL_FUNCTION:
  977. /* Prioritize internal functions slightly */
  978. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  979. break;
  980. case MIME_ATOM_LUA_FUNCTION:
  981. case MIME_ATOM_LOCAL_LUA_FUNCTION:
  982. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 4;
  983. break;
  984. case MIME_ATOM_REGEXP:
  985. switch (mime_atom->d.re->type) {
  986. case RSPAMD_RE_HEADER:
  987. case RSPAMD_RE_RAWHEADER:
  988. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 16;
  989. break;
  990. case RSPAMD_RE_URL:
  991. case RSPAMD_RE_EMAIL:
  992. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  993. break;
  994. case RSPAMD_RE_SELECTOR:
  995. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  996. break;
  997. case RSPAMD_RE_MIME:
  998. case RSPAMD_RE_RAWMIME:
  999. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 2;
  1000. break;
  1001. case RSPAMD_RE_WORDS:
  1002. case RSPAMD_RE_RAWWORDS:
  1003. case RSPAMD_RE_STEMWORDS:
  1004. default:
  1005. /* For expensive regexps */
  1006. ret = 0;
  1007. break;
  1008. }
  1009. }
  1010. return ret;
  1011. }
  1012. static void
  1013. rspamd_mime_expr_destroy(rspamd_expression_atom_t *atom)
  1014. {
  1015. struct rspamd_mime_atom *mime_atom = atom->data;
  1016. if (mime_atom) {
  1017. if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
  1018. /* Need to cleanup arguments */
  1019. g_array_free(mime_atom->d.func->args, TRUE);
  1020. }
  1021. }
  1022. }
  1023. static gboolean
  1024. rspamd_mime_expr_process_function(struct rspamd_function_atom *func,
  1025. struct rspamd_task *task,
  1026. lua_State *L)
  1027. {
  1028. struct _fl *selected, key;
  1029. key.name = func->name;
  1030. selected = bsearch(&key,
  1031. list_ptr,
  1032. functions_number,
  1033. sizeof(struct _fl),
  1034. fl_cmp);
  1035. if (selected == NULL) {
  1036. /* Try to check lua function */
  1037. return FALSE;
  1038. }
  1039. return selected->func(task, func->args, selected->user_data);
  1040. }
  1041. static gdouble
  1042. rspamd_mime_expr_process(void *ud, rspamd_expression_atom_t *atom)
  1043. {
  1044. struct rspamd_task *task = (struct rspamd_task *) ud;
  1045. struct rspamd_mime_atom *mime_atom;
  1046. lua_State *L;
  1047. gdouble ret = 0;
  1048. g_assert(task != NULL);
  1049. g_assert(atom != NULL);
  1050. mime_atom = atom->data;
  1051. if (mime_atom->type == MIME_ATOM_REGEXP) {
  1052. ret = rspamd_mime_expr_process_regexp(mime_atom->d.re, task);
  1053. }
  1054. else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
  1055. L = task->cfg->lua_state;
  1056. lua_getglobal(L, mime_atom->d.lua_function);
  1057. rspamd_lua_task_push(L, task);
  1058. if (lua_pcall(L, 1, 1, 0) != 0) {
  1059. msg_info_task("lua call to global function '%s' for atom '%s' failed: %s",
  1060. mime_atom->d.lua_function,
  1061. mime_atom->str,
  1062. lua_tostring(L, -1));
  1063. lua_pop(L, 1);
  1064. }
  1065. else {
  1066. if (lua_type(L, -1) == LUA_TBOOLEAN) {
  1067. ret = lua_toboolean(L, -1);
  1068. }
  1069. else if (lua_type(L, -1) == LUA_TNUMBER) {
  1070. ret = lua_tonumber(L, 1);
  1071. }
  1072. else {
  1073. msg_err_task("%s returned wrong return type: %s",
  1074. mime_atom->str, lua_typename(L, lua_type(L, -1)));
  1075. }
  1076. /* Remove result */
  1077. lua_pop(L, 1);
  1078. }
  1079. }
  1080. else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  1081. gint err_idx;
  1082. L = task->cfg->lua_state;
  1083. lua_pushcfunction(L, &rspamd_lua_traceback);
  1084. err_idx = lua_gettop(L);
  1085. lua_rawgeti(L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
  1086. rspamd_lua_task_push(L, task);
  1087. if (lua_pcall(L, 1, 1, err_idx) != 0) {
  1088. msg_info_task("lua call to local function for atom '%s' failed: %s",
  1089. mime_atom->str,
  1090. lua_tostring(L, -1));
  1091. }
  1092. else {
  1093. if (lua_type(L, -1) == LUA_TBOOLEAN) {
  1094. ret = lua_toboolean(L, -1);
  1095. }
  1096. else if (lua_type(L, -1) == LUA_TNUMBER) {
  1097. ret = lua_tonumber(L, 1);
  1098. }
  1099. else {
  1100. msg_err_task("%s returned wrong return type: %s",
  1101. mime_atom->str, lua_typename(L, lua_type(L, -1)));
  1102. }
  1103. }
  1104. lua_settop(L, 0);
  1105. }
  1106. else {
  1107. ret = rspamd_mime_expr_process_function(mime_atom->d.func, task,
  1108. task->cfg->lua_state);
  1109. }
  1110. return ret;
  1111. }
  1112. void register_expression_function(const gchar *name,
  1113. rspamd_internal_func_t func,
  1114. void *user_data)
  1115. {
  1116. static struct _fl *new;
  1117. functions_number++;
  1118. new = g_new(struct _fl, functions_number);
  1119. memcpy(new, list_ptr, (functions_number - 1) * sizeof(struct _fl));
  1120. if (list_allocated) {
  1121. g_free(list_ptr);
  1122. }
  1123. list_allocated = TRUE;
  1124. new[functions_number - 1].name = name;
  1125. new[functions_number - 1].func = func;
  1126. new[functions_number - 1].user_data = user_data;
  1127. qsort(new, functions_number, sizeof(struct _fl), fl_cmp);
  1128. list_ptr = new;
  1129. }
  1130. gboolean
  1131. rspamd_compare_encoding(struct rspamd_task *task, GArray *args, void *unused)
  1132. {
  1133. struct expression_argument *arg;
  1134. if (args == NULL || task == NULL) {
  1135. return FALSE;
  1136. }
  1137. arg = &g_array_index(args, struct expression_argument, 0);
  1138. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1139. msg_warn_task("invalid argument to function is passed");
  1140. return FALSE;
  1141. }
  1142. /* XXX: really write this function */
  1143. return TRUE;
  1144. }
  1145. gboolean
  1146. rspamd_header_exists(struct rspamd_task *task, GArray *args, void *unused)
  1147. {
  1148. struct expression_argument *arg;
  1149. struct rspamd_mime_header *rh;
  1150. if (args == NULL || task == NULL) {
  1151. return FALSE;
  1152. }
  1153. arg = &g_array_index(args, struct expression_argument, 0);
  1154. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1155. msg_warn_task("invalid argument to function is passed");
  1156. return FALSE;
  1157. }
  1158. rh = rspamd_message_get_header_array(task,
  1159. (gchar *) arg->data, FALSE);
  1160. debug_task("try to get header %s: %d", (gchar *) arg->data,
  1161. (rh != NULL));
  1162. if (rh) {
  1163. return TRUE;
  1164. }
  1165. return FALSE;
  1166. }
  1167. /*
  1168. * This function is designed to find difference between text/html and text/plain parts
  1169. * It takes one argument: difference threshold, if we have two text parts, compare
  1170. * its hashes and check for threshold, if value is greater than threshold, return TRUE
  1171. * and return FALSE otherwise.
  1172. */
  1173. gboolean
  1174. rspamd_parts_distance(struct rspamd_task *task, GArray *args, void *unused)
  1175. {
  1176. gint threshold, threshold2 = -1;
  1177. struct expression_argument *arg;
  1178. gdouble *pdiff, diff;
  1179. if (args == NULL || args->len == 0) {
  1180. debug_task("no threshold is specified, assume it 100");
  1181. threshold = 100;
  1182. }
  1183. else {
  1184. errno = 0;
  1185. arg = &g_array_index(args, struct expression_argument, 0);
  1186. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1187. msg_warn_task("invalid argument to function is passed");
  1188. return FALSE;
  1189. }
  1190. threshold = strtoul((gchar *) arg->data, NULL, 10);
  1191. if (errno != 0) {
  1192. msg_info_task("bad numeric value for threshold \"%s\", assume it 100",
  1193. (gchar *) arg->data);
  1194. threshold = 100;
  1195. }
  1196. if (args->len >= 2) {
  1197. arg = &g_array_index(args, struct expression_argument, 1);
  1198. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1199. msg_warn_task("invalid argument to function is passed");
  1200. return FALSE;
  1201. }
  1202. errno = 0;
  1203. threshold2 = strtoul((gchar *) arg->data, NULL, 10);
  1204. if (errno != 0) {
  1205. msg_info_task("bad numeric value for threshold \"%s\", ignore it",
  1206. (gchar *) arg->data);
  1207. threshold2 = -1;
  1208. }
  1209. }
  1210. }
  1211. if ((pdiff =
  1212. rspamd_mempool_get_variable(task->task_pool,
  1213. "parts_distance")) != NULL) {
  1214. diff = (1.0 - (*pdiff)) * 100.0;
  1215. if (diff != -1) {
  1216. if (threshold2 > 0) {
  1217. if (diff >= MIN(threshold, threshold2) &&
  1218. diff < MAX(threshold, threshold2)) {
  1219. return TRUE;
  1220. }
  1221. }
  1222. else {
  1223. if (diff <= threshold) {
  1224. return TRUE;
  1225. }
  1226. }
  1227. return FALSE;
  1228. }
  1229. else {
  1230. return FALSE;
  1231. }
  1232. }
  1233. return FALSE;
  1234. }
  1235. struct addr_list {
  1236. const gchar *name;
  1237. guint namelen;
  1238. const gchar *addr;
  1239. guint addrlen;
  1240. };
  1241. static gint
  1242. addr_list_cmp_func(const void *a, const void *b)
  1243. {
  1244. const struct addr_list *addra = (struct addr_list *) a,
  1245. *addrb = (struct addr_list *) b;
  1246. if (addra->addrlen != addrb->addrlen) {
  1247. return addra->addrlen - addrb->addrlen;
  1248. }
  1249. return memcmp(addra->addr, addrb->addr, addra->addrlen);
  1250. }
  1251. #define COMPARE_RCPT_LEN 3
  1252. #define MIN_RCPT_TO_COMPARE 7
  1253. gboolean
  1254. rspamd_recipients_distance(struct rspamd_task *task, GArray *args,
  1255. void *unused)
  1256. {
  1257. struct expression_argument *arg;
  1258. struct rspamd_email_address *cur;
  1259. double threshold;
  1260. struct addr_list *ar;
  1261. gint num, i, hits = 0;
  1262. if (args == NULL) {
  1263. msg_warn_task("no parameters to function");
  1264. return FALSE;
  1265. }
  1266. arg = &g_array_index(args, struct expression_argument, 0);
  1267. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1268. msg_warn_task("invalid argument to function is passed");
  1269. return FALSE;
  1270. }
  1271. errno = 0;
  1272. threshold = strtod((gchar *) arg->data, NULL);
  1273. if (errno != 0) {
  1274. msg_warn_task("invalid numeric value '%s': %s",
  1275. (gchar *) arg->data,
  1276. strerror(errno));
  1277. return FALSE;
  1278. }
  1279. if (!MESSAGE_FIELD(task, rcpt_mime)) {
  1280. return FALSE;
  1281. }
  1282. num = MESSAGE_FIELD(task, rcpt_mime)->len;
  1283. if (num < MIN_RCPT_TO_COMPARE) {
  1284. return FALSE;
  1285. }
  1286. ar = rspamd_mempool_alloc0(task->task_pool, num * sizeof(struct addr_list));
  1287. /* Fill array */
  1288. num = 0;
  1289. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, rcpt_mime), i, cur)
  1290. {
  1291. if (cur->addr_len > COMPARE_RCPT_LEN) {
  1292. ar[num].name = cur->addr;
  1293. ar[num].namelen = cur->addr_len;
  1294. ar[num].addr = cur->domain;
  1295. ar[num].addrlen = cur->domain_len;
  1296. num++;
  1297. }
  1298. }
  1299. qsort(ar, num, sizeof(*ar), addr_list_cmp_func);
  1300. /* Cycle all elements in array */
  1301. for (i = 0; i < num; i++) {
  1302. if (i < num - 1) {
  1303. if (ar[i].namelen == ar[i + 1].namelen) {
  1304. if (rspamd_lc_cmp(ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) {
  1305. hits++;
  1306. }
  1307. }
  1308. }
  1309. }
  1310. if ((hits * num / 2.) / (double) num >= threshold) {
  1311. return TRUE;
  1312. }
  1313. return FALSE;
  1314. }
  1315. gboolean
  1316. rspamd_has_only_html_part(struct rspamd_task *task, GArray *args,
  1317. void *unused)
  1318. {
  1319. struct rspamd_mime_text_part *p;
  1320. guint i, cnt_html = 0, cnt_txt = 0;
  1321. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
  1322. {
  1323. if (!IS_TEXT_PART_ATTACHMENT(p)) {
  1324. if (IS_TEXT_PART_HTML(p)) {
  1325. cnt_html++;
  1326. }
  1327. else {
  1328. cnt_txt++;
  1329. }
  1330. }
  1331. }
  1332. return (cnt_html > 0 && cnt_txt == 0);
  1333. }
  1334. static gboolean
  1335. is_recipient_list_sorted(GPtrArray *ar)
  1336. {
  1337. struct rspamd_email_address *addr;
  1338. gboolean res = TRUE;
  1339. rspamd_ftok_t cur, prev;
  1340. gint i;
  1341. /* Do not check to short address lists */
  1342. if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
  1343. return FALSE;
  1344. }
  1345. prev.len = 0;
  1346. prev.begin = NULL;
  1347. PTR_ARRAY_FOREACH(ar, i, addr)
  1348. {
  1349. cur.begin = addr->addr;
  1350. cur.len = addr->addr_len;
  1351. if (prev.len != 0) {
  1352. if (rspamd_ftok_casecmp(&cur, &prev) <= 0) {
  1353. res = FALSE;
  1354. break;
  1355. }
  1356. }
  1357. prev = cur;
  1358. }
  1359. return res;
  1360. }
  1361. gboolean
  1362. rspamd_is_recipients_sorted(struct rspamd_task *task,
  1363. GArray *args,
  1364. void *unused)
  1365. {
  1366. /* Check all types of addresses */
  1367. if (MESSAGE_FIELD(task, rcpt_mime)) {
  1368. return is_recipient_list_sorted(MESSAGE_FIELD(task, rcpt_mime));
  1369. }
  1370. return FALSE;
  1371. }
  1372. gboolean
  1373. rspamd_compare_transfer_encoding(struct rspamd_task *task,
  1374. GArray *args,
  1375. void *unused)
  1376. {
  1377. struct expression_argument *arg;
  1378. guint i;
  1379. struct rspamd_mime_part *part;
  1380. enum rspamd_cte cte;
  1381. if (args == NULL) {
  1382. msg_warn_task("no parameters to function");
  1383. return FALSE;
  1384. }
  1385. arg = &g_array_index(args, struct expression_argument, 0);
  1386. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1387. msg_warn_task("invalid argument to function is passed");
  1388. return FALSE;
  1389. }
  1390. cte = rspamd_cte_from_string(arg->data);
  1391. if (cte == RSPAMD_CTE_UNKNOWN) {
  1392. msg_warn_task("unknown cte: %s", arg->data);
  1393. return FALSE;
  1394. }
  1395. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
  1396. {
  1397. if (IS_PART_TEXT(part)) {
  1398. if (part->cte == cte) {
  1399. return TRUE;
  1400. }
  1401. }
  1402. }
  1403. return FALSE;
  1404. }
  1405. gboolean
  1406. rspamd_is_html_balanced(struct rspamd_task *task, GArray *args, void *unused)
  1407. {
  1408. /* Totally broken but seems to be never used */
  1409. return TRUE;
  1410. }
  1411. gboolean
  1412. rspamd_has_html_tag(struct rspamd_task *task, GArray *args, void *unused)
  1413. {
  1414. struct rspamd_mime_text_part *p;
  1415. struct expression_argument *arg;
  1416. guint i;
  1417. gboolean res = FALSE;
  1418. if (args == NULL) {
  1419. msg_warn_task("no parameters to function");
  1420. return FALSE;
  1421. }
  1422. arg = &g_array_index(args, struct expression_argument, 0);
  1423. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1424. msg_warn_task("invalid argument to function is passed");
  1425. return FALSE;
  1426. }
  1427. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
  1428. {
  1429. if (IS_TEXT_PART_HTML(p) && p->html) {
  1430. res = rspamd_html_tag_seen(p->html, arg->data);
  1431. }
  1432. if (res) {
  1433. break;
  1434. }
  1435. }
  1436. return res;
  1437. }
  1438. gboolean
  1439. rspamd_has_fake_html(struct rspamd_task *task, GArray *args, void *unused)
  1440. {
  1441. struct rspamd_mime_text_part *p;
  1442. guint i;
  1443. gboolean res = FALSE;
  1444. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, p)
  1445. {
  1446. if (IS_TEXT_PART_HTML(p) && (rspamd_html_get_tags_count(p->html) < 2)) {
  1447. res = TRUE;
  1448. }
  1449. if (res) {
  1450. break;
  1451. }
  1452. }
  1453. return res;
  1454. }
  1455. static gboolean
  1456. rspamd_raw_header_exists(struct rspamd_task *task, GArray *args, void *unused)
  1457. {
  1458. struct expression_argument *arg;
  1459. if (args == NULL || task == NULL) {
  1460. return FALSE;
  1461. }
  1462. arg = &g_array_index(args, struct expression_argument, 0);
  1463. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1464. msg_warn_task("invalid argument to function is passed");
  1465. return FALSE;
  1466. }
  1467. return rspamd_message_get_header_array(task, arg->data, FALSE) != NULL;
  1468. }
  1469. static gboolean
  1470. match_smtp_data(struct rspamd_task *task,
  1471. struct expression_argument *arg,
  1472. const gchar *what, gsize len)
  1473. {
  1474. rspamd_regexp_t *re;
  1475. gint r = 0;
  1476. if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
  1477. /* This is a regexp */
  1478. re = arg->data;
  1479. if (re == NULL) {
  1480. msg_warn_task("cannot compile regexp for function");
  1481. return FALSE;
  1482. }
  1483. if (len > 0) {
  1484. r = rspamd_regexp_search(re, what, len, NULL, NULL, FALSE, NULL);
  1485. }
  1486. return r;
  1487. }
  1488. else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
  1489. g_ascii_strncasecmp(arg->data, what, len) == 0) {
  1490. return TRUE;
  1491. }
  1492. return FALSE;
  1493. }
  1494. static gboolean
  1495. rspamd_check_smtp_data(struct rspamd_task *task, GArray *args, void *unused)
  1496. {
  1497. struct expression_argument *arg;
  1498. struct rspamd_email_address *addr = NULL;
  1499. GPtrArray *rcpts = NULL;
  1500. const gchar *type, *str = NULL;
  1501. guint i;
  1502. if (args == NULL) {
  1503. msg_warn_task("no parameters to function");
  1504. return FALSE;
  1505. }
  1506. arg = &g_array_index(args, struct expression_argument, 0);
  1507. if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1508. msg_warn_task("no parameters to function");
  1509. return FALSE;
  1510. }
  1511. else {
  1512. type = arg->data;
  1513. switch (*type) {
  1514. case 'f':
  1515. case 'F':
  1516. if (g_ascii_strcasecmp(type, "from") == 0) {
  1517. addr = rspamd_task_get_sender(task);
  1518. }
  1519. else {
  1520. msg_warn_task("bad argument to function: %s", type);
  1521. return FALSE;
  1522. }
  1523. break;
  1524. case 'h':
  1525. case 'H':
  1526. if (g_ascii_strcasecmp(type, "helo") == 0) {
  1527. str = task->helo;
  1528. }
  1529. else {
  1530. msg_warn_task("bad argument to function: %s", type);
  1531. return FALSE;
  1532. }
  1533. break;
  1534. case 'u':
  1535. case 'U':
  1536. if (g_ascii_strcasecmp(type, "user") == 0) {
  1537. str = task->auth_user;
  1538. }
  1539. else {
  1540. msg_warn_task("bad argument to function: %s", type);
  1541. return FALSE;
  1542. }
  1543. break;
  1544. case 's':
  1545. case 'S':
  1546. if (g_ascii_strcasecmp(type, "subject") == 0) {
  1547. str = MESSAGE_FIELD(task, subject);
  1548. }
  1549. else {
  1550. msg_warn_task("bad argument to function: %s", type);
  1551. return FALSE;
  1552. }
  1553. break;
  1554. case 'r':
  1555. case 'R':
  1556. if (g_ascii_strcasecmp(type, "rcpt") == 0) {
  1557. rcpts = task->rcpt_envelope;
  1558. }
  1559. else {
  1560. msg_warn_task("bad argument to function: %s", type);
  1561. return FALSE;
  1562. }
  1563. break;
  1564. default:
  1565. msg_warn_task("bad argument to function: %s", type);
  1566. return FALSE;
  1567. }
  1568. }
  1569. if (str == NULL && addr == NULL && rcpts == NULL) {
  1570. /* Not enough data so regexp would NOT be found anyway */
  1571. return FALSE;
  1572. }
  1573. /* We would process only one more argument, others are ignored */
  1574. if (args->len >= 2) {
  1575. arg = &g_array_index(args, struct expression_argument, 1);
  1576. if (arg) {
  1577. if (str != NULL) {
  1578. return match_smtp_data(task, arg, str, strlen(str));
  1579. }
  1580. else if (addr != NULL && addr->addr) {
  1581. return match_smtp_data(task, arg, addr->addr, addr->addr_len);
  1582. }
  1583. else {
  1584. if (rcpts != NULL) {
  1585. for (i = 0; i < rcpts->len; i++) {
  1586. addr = g_ptr_array_index(rcpts, i);
  1587. if (addr && addr->addr &&
  1588. match_smtp_data(task, arg,
  1589. addr->addr, addr->addr_len)) {
  1590. return TRUE;
  1591. }
  1592. }
  1593. }
  1594. }
  1595. }
  1596. }
  1597. return FALSE;
  1598. }
  1599. static inline gboolean
  1600. rspamd_check_ct_attr(const gchar *begin, gsize len,
  1601. struct expression_argument *arg_pattern)
  1602. {
  1603. rspamd_regexp_t *re;
  1604. gboolean r = FALSE;
  1605. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1606. re = arg_pattern->data;
  1607. if (len > 0) {
  1608. r = rspamd_regexp_search(re,
  1609. begin, len,
  1610. NULL, NULL, FALSE, NULL);
  1611. }
  1612. if (r) {
  1613. return TRUE;
  1614. }
  1615. }
  1616. else {
  1617. /* Just do strcasecmp */
  1618. gsize plen = strlen(arg_pattern->data);
  1619. if (plen == len &&
  1620. g_ascii_strncasecmp(arg_pattern->data, begin, len) == 0) {
  1621. return TRUE;
  1622. }
  1623. }
  1624. return FALSE;
  1625. }
  1626. static gboolean
  1627. rspamd_content_type_compare_param(struct rspamd_task *task,
  1628. GArray *args,
  1629. void *unused)
  1630. {
  1631. struct expression_argument *arg, *arg1, *arg_pattern;
  1632. gboolean recursive = FALSE;
  1633. struct rspamd_mime_part *cur_part;
  1634. guint i;
  1635. rspamd_ftok_t srch;
  1636. struct rspamd_content_type_param *found = NULL, *cur;
  1637. const gchar *param_name;
  1638. if (args == NULL || args->len < 2) {
  1639. msg_warn_task("no parameters to function");
  1640. return FALSE;
  1641. }
  1642. arg = &g_array_index(args, struct expression_argument, 0);
  1643. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1644. param_name = arg->data;
  1645. arg_pattern = &g_array_index(args, struct expression_argument, 1);
  1646. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
  1647. {
  1648. if (args->len >= 3) {
  1649. arg1 = &g_array_index(args, struct expression_argument, 2);
  1650. if (g_ascii_strncasecmp(arg1->data, "true",
  1651. sizeof("true") - 1) == 0) {
  1652. recursive = TRUE;
  1653. }
  1654. }
  1655. else {
  1656. /*
  1657. * If user did not specify argument, let's assume that he wants
  1658. * recursive search if mime part is multipart/mixed
  1659. */
  1660. if (IS_PART_MULTIPART(cur_part)) {
  1661. recursive = TRUE;
  1662. }
  1663. }
  1664. rspamd_ftok_t lit;
  1665. RSPAMD_FTOK_FROM_STR(&srch, param_name);
  1666. RSPAMD_FTOK_FROM_STR(&lit, "charset");
  1667. if (rspamd_ftok_equal(&srch, &lit)) {
  1668. if (rspamd_check_ct_attr(cur_part->ct->charset.begin,
  1669. cur_part->ct->charset.len, arg_pattern)) {
  1670. return TRUE;
  1671. }
  1672. }
  1673. RSPAMD_FTOK_FROM_STR(&lit, "boundary");
  1674. if (rspamd_ftok_equal(&srch, &lit)) {
  1675. if (rspamd_check_ct_attr(cur_part->ct->orig_boundary.begin,
  1676. cur_part->ct->orig_boundary.len, arg_pattern)) {
  1677. return TRUE;
  1678. }
  1679. }
  1680. if (cur_part->ct->attrs) {
  1681. found = g_hash_table_lookup(cur_part->ct->attrs, &srch);
  1682. if (found) {
  1683. DL_FOREACH(found, cur)
  1684. {
  1685. if (rspamd_check_ct_attr(cur->value.begin,
  1686. cur->value.len, arg_pattern)) {
  1687. return TRUE;
  1688. }
  1689. }
  1690. }
  1691. }
  1692. if (!recursive) {
  1693. break;
  1694. }
  1695. }
  1696. return FALSE;
  1697. }
  1698. static gboolean
  1699. rspamd_content_type_has_param(struct rspamd_task *task,
  1700. GArray *args,
  1701. void *unused)
  1702. {
  1703. struct expression_argument *arg, *arg1;
  1704. gboolean recursive = FALSE;
  1705. struct rspamd_mime_part *cur_part;
  1706. guint i;
  1707. rspamd_ftok_t srch;
  1708. struct rspamd_content_type_param *found = NULL;
  1709. const gchar *param_name;
  1710. if (args == NULL || args->len < 1) {
  1711. msg_warn_task("no parameters to function");
  1712. return FALSE;
  1713. }
  1714. arg = &g_array_index(args, struct expression_argument, 0);
  1715. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1716. param_name = arg->data;
  1717. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
  1718. {
  1719. if (args->len >= 2) {
  1720. arg1 = &g_array_index(args, struct expression_argument, 1);
  1721. if (g_ascii_strncasecmp(arg1->data, "true",
  1722. sizeof("true") - 1) == 0) {
  1723. recursive = TRUE;
  1724. }
  1725. }
  1726. else {
  1727. /*
  1728. * If user did not specify argument, let's assume that he wants
  1729. * recursive search if mime part is multipart/mixed
  1730. */
  1731. if (IS_PART_MULTIPART(cur_part)) {
  1732. recursive = TRUE;
  1733. }
  1734. }
  1735. rspamd_ftok_t lit;
  1736. RSPAMD_FTOK_FROM_STR(&srch, param_name);
  1737. RSPAMD_FTOK_FROM_STR(&lit, "charset");
  1738. if (rspamd_ftok_equal(&srch, &lit)) {
  1739. if (cur_part->ct->charset.len > 0) {
  1740. return TRUE;
  1741. }
  1742. }
  1743. RSPAMD_FTOK_FROM_STR(&lit, "boundary");
  1744. if (rspamd_ftok_equal(&srch, &lit)) {
  1745. if (cur_part->ct->boundary.len > 0) {
  1746. return TRUE;
  1747. }
  1748. }
  1749. if (cur_part->ct->attrs) {
  1750. found = g_hash_table_lookup(cur_part->ct->attrs, &srch);
  1751. if (found) {
  1752. return TRUE;
  1753. }
  1754. }
  1755. if (!recursive) {
  1756. break;
  1757. }
  1758. }
  1759. return FALSE;
  1760. }
  1761. static gboolean
  1762. rspamd_content_type_check(struct rspamd_task *task,
  1763. GArray *args,
  1764. gboolean check_subtype)
  1765. {
  1766. rspamd_ftok_t *param_data, srch;
  1767. rspamd_regexp_t *re;
  1768. struct expression_argument *arg1, *arg_pattern;
  1769. struct rspamd_content_type *ct;
  1770. gint r = 0;
  1771. guint i;
  1772. gboolean recursive = FALSE;
  1773. struct rspamd_mime_part *cur_part;
  1774. if (args == NULL || args->len < 1) {
  1775. msg_warn_task("no parameters to function");
  1776. return FALSE;
  1777. }
  1778. arg_pattern = &g_array_index(args, struct expression_argument, 0);
  1779. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, cur_part)
  1780. {
  1781. ct = cur_part->ct;
  1782. if (args->len >= 2) {
  1783. arg1 = &g_array_index(args, struct expression_argument, 1);
  1784. if (g_ascii_strncasecmp(arg1->data, "true",
  1785. sizeof("true") - 1) == 0) {
  1786. recursive = TRUE;
  1787. }
  1788. }
  1789. else {
  1790. /*
  1791. * If user did not specify argument, let's assume that he wants
  1792. * recursive search if mime part is multipart/mixed
  1793. */
  1794. if (IS_PART_MULTIPART(cur_part)) {
  1795. recursive = TRUE;
  1796. }
  1797. }
  1798. if (check_subtype) {
  1799. param_data = &ct->subtype;
  1800. }
  1801. else {
  1802. param_data = &ct->type;
  1803. }
  1804. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1805. re = arg_pattern->data;
  1806. if (param_data->len > 0) {
  1807. r = rspamd_regexp_search(re, param_data->begin, param_data->len,
  1808. NULL, NULL, FALSE, NULL);
  1809. }
  1810. if (r) {
  1811. return TRUE;
  1812. }
  1813. }
  1814. else {
  1815. /* Just do strcasecmp */
  1816. srch.begin = arg_pattern->data;
  1817. srch.len = strlen(arg_pattern->data);
  1818. if (rspamd_ftok_casecmp(param_data, &srch) == 0) {
  1819. return TRUE;
  1820. }
  1821. }
  1822. /* Get next part */
  1823. if (!recursive) {
  1824. break;
  1825. }
  1826. }
  1827. return FALSE;
  1828. }
  1829. static gboolean
  1830. rspamd_content_type_is_type(struct rspamd_task *task,
  1831. GArray *args,
  1832. void *unused)
  1833. {
  1834. return rspamd_content_type_check(task, args, FALSE);
  1835. }
  1836. static gboolean
  1837. rspamd_content_type_is_subtype(struct rspamd_task *task,
  1838. GArray *args,
  1839. void *unused)
  1840. {
  1841. return rspamd_content_type_check(task, args, TRUE);
  1842. }
  1843. static gboolean
  1844. compare_subtype(struct rspamd_task *task, struct rspamd_content_type *ct,
  1845. struct expression_argument *subtype)
  1846. {
  1847. rspamd_regexp_t *re;
  1848. rspamd_ftok_t srch;
  1849. gint r = 0;
  1850. if (subtype == NULL || ct == NULL) {
  1851. msg_warn_task("invalid parameters passed");
  1852. return FALSE;
  1853. }
  1854. if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
  1855. re = subtype->data;
  1856. if (ct->subtype.len > 0) {
  1857. r = rspamd_regexp_search(re, ct->subtype.begin, ct->subtype.len,
  1858. NULL, NULL, FALSE, NULL);
  1859. }
  1860. }
  1861. else {
  1862. srch.begin = subtype->data;
  1863. srch.len = strlen(subtype->data);
  1864. /* Just do strcasecmp */
  1865. if (rspamd_ftok_casecmp(&ct->subtype, &srch) == 0) {
  1866. return TRUE;
  1867. }
  1868. }
  1869. return r;
  1870. }
  1871. static gboolean
  1872. compare_len(struct rspamd_mime_part *part, guint min, guint max)
  1873. {
  1874. if (min == 0 && max == 0) {
  1875. return TRUE;
  1876. }
  1877. if (min == 0) {
  1878. return part->parsed_data.len <= max;
  1879. }
  1880. else if (max == 0) {
  1881. return part->parsed_data.len >= min;
  1882. }
  1883. else {
  1884. return part->parsed_data.len >= min && part->parsed_data.len <= max;
  1885. }
  1886. }
  1887. static gboolean
  1888. common_has_content_part(struct rspamd_task *task,
  1889. struct expression_argument *param_type,
  1890. struct expression_argument *param_subtype,
  1891. gint min_len,
  1892. gint max_len)
  1893. {
  1894. rspamd_regexp_t *re;
  1895. struct rspamd_mime_part *part;
  1896. struct rspamd_content_type *ct;
  1897. rspamd_ftok_t srch;
  1898. gint r = 0;
  1899. guint i;
  1900. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
  1901. {
  1902. ct = part->ct;
  1903. if (ct == NULL) {
  1904. continue;
  1905. }
  1906. if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
  1907. re = param_type->data;
  1908. if (ct->type.len > 0) {
  1909. r = rspamd_regexp_search(re, ct->type.begin, ct->type.len,
  1910. NULL, NULL, FALSE, NULL);
  1911. }
  1912. /* Also check subtype and length of the part */
  1913. if (r && param_subtype) {
  1914. r = compare_len(part, min_len, max_len) &&
  1915. compare_subtype(task, ct, param_subtype);
  1916. return r;
  1917. }
  1918. }
  1919. else {
  1920. /* Just do strcasecmp */
  1921. srch.begin = param_type->data;
  1922. srch.len = strlen(param_type->data);
  1923. if (rspamd_ftok_casecmp(&ct->type, &srch) == 0) {
  1924. if (param_subtype) {
  1925. if (compare_subtype(task, ct, param_subtype)) {
  1926. if (compare_len(part, min_len, max_len)) {
  1927. return TRUE;
  1928. }
  1929. }
  1930. }
  1931. else {
  1932. if (compare_len(part, min_len, max_len)) {
  1933. return TRUE;
  1934. }
  1935. }
  1936. }
  1937. }
  1938. }
  1939. return FALSE;
  1940. }
  1941. static gboolean
  1942. rspamd_has_content_part(struct rspamd_task *task, GArray *args, void *unused)
  1943. {
  1944. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1945. if (args == NULL) {
  1946. msg_warn_task("no parameters to function");
  1947. return FALSE;
  1948. }
  1949. param_type = &g_array_index(args, struct expression_argument, 0);
  1950. if (args->len >= 2) {
  1951. param_subtype = &g_array_index(args, struct expression_argument, 1);
  1952. }
  1953. return common_has_content_part(task, param_type, param_subtype, 0, 0);
  1954. }
  1955. static gboolean
  1956. rspamd_has_content_part_len(struct rspamd_task *task,
  1957. GArray *args,
  1958. void *unused)
  1959. {
  1960. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1961. gint min = 0, max = 0;
  1962. struct expression_argument *arg;
  1963. if (args == NULL) {
  1964. msg_warn_task("no parameters to function");
  1965. return FALSE;
  1966. }
  1967. param_type = &g_array_index(args, struct expression_argument, 0);
  1968. if (args->len >= 2) {
  1969. param_subtype = &g_array_index(args, struct expression_argument, 1);
  1970. if (args->len >= 3) {
  1971. arg = &g_array_index(args, struct expression_argument, 2);
  1972. errno = 0;
  1973. min = strtoul(arg->data, NULL, 10);
  1974. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1975. if (errno != 0) {
  1976. msg_warn_task("invalid numeric value '%s': %s",
  1977. (gchar *) arg->data,
  1978. strerror(errno));
  1979. return FALSE;
  1980. }
  1981. if (args->len >= 4) {
  1982. arg = &g_array_index(args, struct expression_argument, 3);
  1983. g_assert(arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1984. max = strtoul(arg->data, NULL, 10);
  1985. if (errno != 0) {
  1986. msg_warn_task("invalid numeric value '%s': %s",
  1987. (gchar *) arg->data,
  1988. strerror(errno));
  1989. return FALSE;
  1990. }
  1991. }
  1992. }
  1993. }
  1994. return common_has_content_part(task, param_type, param_subtype, min, max);
  1995. }
  1996. static gboolean
  1997. rspamd_is_empty_body(struct rspamd_task *task,
  1998. GArray *args,
  1999. void *unused)
  2000. {
  2001. struct rspamd_mime_part *part;
  2002. guint i;
  2003. PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), i, part)
  2004. {
  2005. if (part->parsed_data.len > 0) {
  2006. return FALSE;
  2007. }
  2008. }
  2009. return TRUE;
  2010. }
  2011. #define TASK_FLAG_READ(flag) \
  2012. do { \
  2013. result = !!(task->flags & (flag)); \
  2014. } while (0)
  2015. #define TASK_GET_FLAG(flag, strname, macro) \
  2016. do { \
  2017. if (!found && strcmp((flag), strname) == 0) { \
  2018. TASK_FLAG_READ((macro)); \
  2019. found = TRUE; \
  2020. } \
  2021. } while (0)
  2022. #define TASK_PROTOCOL_FLAG_READ(flag) \
  2023. do { \
  2024. result = !!(task->protocol_flags & (flag)); \
  2025. } while (0)
  2026. #define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) \
  2027. do { \
  2028. if (!found && strcmp((flag), strname) == 0) { \
  2029. TASK_PROTOCOL_FLAG_READ((macro)); \
  2030. found = TRUE; \
  2031. } \
  2032. } while (0)
  2033. static gboolean
  2034. rspamd_has_flag_expr(struct rspamd_task *task,
  2035. GArray *args,
  2036. void *unused)
  2037. {
  2038. gboolean found = FALSE, result = FALSE;
  2039. struct expression_argument *flag_arg;
  2040. const gchar *flag_str;
  2041. if (args == NULL) {
  2042. msg_warn_task("no parameters to function");
  2043. return FALSE;
  2044. }
  2045. flag_arg = &g_array_index(args, struct expression_argument, 0);
  2046. if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2047. msg_warn_task("invalid parameter to function");
  2048. return FALSE;
  2049. }
  2050. flag_str = (const gchar *) flag_arg->data;
  2051. TASK_GET_FLAG(flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
  2052. TASK_GET_FLAG(flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
  2053. TASK_GET_FLAG(flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
  2054. TASK_GET_FLAG(flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
  2055. TASK_GET_PROTOCOL_FLAG(flag_str, "extended_urls",
  2056. RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
  2057. TASK_GET_FLAG(flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
  2058. TASK_GET_FLAG(flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
  2059. TASK_GET_FLAG(flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
  2060. TASK_GET_FLAG(flag_str, "broken_headers",
  2061. RSPAMD_TASK_FLAG_BROKEN_HEADERS);
  2062. TASK_GET_FLAG(flag_str, "skip_process",
  2063. RSPAMD_TASK_FLAG_SKIP_PROCESS);
  2064. TASK_GET_PROTOCOL_FLAG(flag_str, "milter",
  2065. RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
  2066. TASK_GET_FLAG(flag_str, "bad_unicode",
  2067. RSPAMD_TASK_FLAG_BAD_UNICODE);
  2068. if (!found) {
  2069. msg_warn_task("invalid flag name %s", flag_str);
  2070. return FALSE;
  2071. }
  2072. return result;
  2073. }
  2074. static gboolean
  2075. rspamd_has_symbol_expr(struct rspamd_task *task,
  2076. GArray *args,
  2077. void *unused)
  2078. {
  2079. struct expression_argument *sym_arg;
  2080. const gchar *symbol_str;
  2081. if (args == NULL) {
  2082. msg_warn_task("no parameters to function");
  2083. return FALSE;
  2084. }
  2085. sym_arg = &g_array_index(args, struct expression_argument, 0);
  2086. if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2087. msg_warn_task("invalid parameter to function");
  2088. return FALSE;
  2089. }
  2090. symbol_str = (const gchar *) sym_arg->data;
  2091. if (rspamd_task_find_symbol_result(task, symbol_str, NULL)) {
  2092. return TRUE;
  2093. }
  2094. return FALSE;
  2095. }