You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_expressions.c 55KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <contrib/libucl/ucl.h>
  17. #include "config.h"
  18. #include "util.h"
  19. #include "cfg_file.h"
  20. #include "rspamd.h"
  21. #include "message.h"
  22. #include "mime_expressions.h"
  23. #include "libserver/html/html.h"
  24. #include "lua/lua_common.h"
  25. #include "utlist.h"
  26. gboolean rspamd_compare_encoding (struct rspamd_task *task,
  27. GArray * args,
  28. void *unused);
  29. gboolean rspamd_header_exists (struct rspamd_task *task,
  30. GArray * args,
  31. void *unused);
  32. gboolean rspamd_parts_distance (struct rspamd_task *task,
  33. GArray * args,
  34. void *unused);
  35. gboolean rspamd_recipients_distance (struct rspamd_task *task,
  36. GArray * args,
  37. void *unused);
  38. gboolean rspamd_has_only_html_part (struct rspamd_task *task,
  39. GArray * args,
  40. void *unused);
  41. gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
  42. GArray * args,
  43. void *unused);
  44. gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
  45. GArray * args,
  46. void *unused);
  47. gboolean rspamd_is_html_balanced (struct rspamd_task *task,
  48. GArray * args,
  49. void *unused);
  50. gboolean rspamd_has_html_tag (struct rspamd_task *task,
  51. GArray * args,
  52. void *unused);
  53. gboolean rspamd_has_fake_html (struct rspamd_task *task,
  54. GArray * args,
  55. void *unused);
  56. static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
  57. GArray * args,
  58. void *unused);
  59. static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
  60. GArray * args,
  61. void *unused);
  62. static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
  63. GArray * args,
  64. void *unused);
  65. static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
  66. GArray * args,
  67. void *unused);
  68. static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
  69. GArray * args,
  70. void *unused);
  71. static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
  72. GArray * args,
  73. void *unused);
  74. static gboolean rspamd_has_content_part (struct rspamd_task *task,
  75. GArray * args,
  76. void *unused);
  77. static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
  78. GArray * args,
  79. void *unused);
  80. static gboolean rspamd_is_empty_body (struct rspamd_task *task,
  81. GArray * args,
  82. void *unused);
  83. static gboolean rspamd_has_flag_expr (struct rspamd_task *task,
  84. GArray * args,
  85. void *unused);
  86. static gboolean rspamd_has_symbol_expr (struct rspamd_task *task,
  87. GArray * args,
  88. void *unused);
  89. static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
  90. rspamd_mempool_t *pool, gpointer ud, GError **err);
  91. static gdouble rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom);
  92. static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
  93. static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
  94. /**
  95. * Regexp structure
  96. */
  97. struct rspamd_regexp_atom {
  98. enum rspamd_re_type type; /**< regexp type */
  99. gchar *regexp_text; /**< regexp text representation */
  100. rspamd_regexp_t *regexp; /**< regexp structure */
  101. union {
  102. const gchar *header; /**< header name for header regexps */
  103. const gchar *selector; /**< selector name for lua selector regexp */
  104. } extra;
  105. gboolean is_test; /**< true if this expression must be tested */
  106. gboolean is_strong; /**< true if headers search must be case sensitive */
  107. gboolean is_multiple; /**< true if we need to match all inclusions of atom */
  108. };
  109. /**
  110. * Rspamd expression function
  111. */
  112. struct rspamd_function_atom {
  113. gchar *name; /**< name of function */
  114. GArray *args; /**< its args */
  115. };
  116. enum rspamd_mime_atom_type {
  117. MIME_ATOM_REGEXP = 0,
  118. MIME_ATOM_INTERNAL_FUNCTION,
  119. MIME_ATOM_LUA_FUNCTION,
  120. MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
  121. };
  122. struct rspamd_mime_atom {
  123. gchar *str;
  124. union {
  125. struct rspamd_regexp_atom *re;
  126. struct rspamd_function_atom *func;
  127. const gchar *lua_function;
  128. gint lua_cbref;
  129. } d;
  130. enum rspamd_mime_atom_type type;
  131. };
  132. /*
  133. * List of internal functions of rspamd
  134. * Sorted by name to use bsearch
  135. */
  136. static struct _fl {
  137. const gchar *name;
  138. rspamd_internal_func_t func;
  139. void *user_data;
  140. } rspamd_functions_list[] = {
  141. {"check_smtp_data", rspamd_check_smtp_data, NULL},
  142. {"compare_encoding", rspamd_compare_encoding, NULL},
  143. {"compare_parts_distance", rspamd_parts_distance, NULL},
  144. {"compare_recipients_distance", rspamd_recipients_distance, NULL},
  145. {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
  146. {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
  147. {"content_type_has_param", rspamd_content_type_has_param, NULL},
  148. {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
  149. {"content_type_is_type", rspamd_content_type_is_type, NULL},
  150. {"has_content_part", rspamd_has_content_part, NULL},
  151. {"has_content_part_len", rspamd_has_content_part_len, NULL},
  152. {"has_fake_html", rspamd_has_fake_html, NULL},
  153. {"has_flag", rspamd_has_flag_expr, NULL},
  154. {"has_html_tag", rspamd_has_html_tag, NULL},
  155. {"has_only_html_part", rspamd_has_only_html_part, NULL},
  156. {"has_symbol", rspamd_has_symbol_expr, NULL},
  157. {"header_exists", rspamd_header_exists, NULL},
  158. {"is_empty_body", rspamd_is_empty_body, NULL},
  159. {"is_html_balanced", rspamd_is_html_balanced, NULL},
  160. {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
  161. {"raw_header_exists", rspamd_raw_header_exists, NULL},
  162. };
  163. const struct rspamd_atom_subr mime_expr_subr = {
  164. .parse = rspamd_mime_expr_parse,
  165. .process = rspamd_mime_expr_process,
  166. .priority = rspamd_mime_expr_priority,
  167. .destroy = rspamd_mime_expr_destroy
  168. };
  169. static struct _fl *list_ptr = &rspamd_functions_list[0];
  170. static guint32 functions_number = sizeof (rspamd_functions_list) /
  171. sizeof (struct _fl);
  172. static gboolean list_allocated = FALSE;
  173. /* Bsearch routine */
  174. static gint
  175. fl_cmp (const void *s1, const void *s2)
  176. {
  177. struct _fl *fl1 = (struct _fl *)s1;
  178. struct _fl *fl2 = (struct _fl *)s2;
  179. return strcmp (fl1->name, fl2->name);
  180. }
  181. static GQuark
  182. rspamd_mime_expr_quark (void)
  183. {
  184. return g_quark_from_static_string ("mime-expressions");
  185. }
  186. #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
  187. static gboolean
  188. rspamd_parse_long_option (const gchar *start, gsize len,
  189. struct rspamd_regexp_atom *a)
  190. {
  191. gboolean ret = FALSE;
  192. if (TYPE_CHECK (start, "body", len)) {
  193. ret = TRUE;
  194. a->type = RSPAMD_RE_BODY;
  195. }
  196. else if (TYPE_CHECK (start, "part", len) ||
  197. TYPE_CHECK (start, "mime", len)) {
  198. ret = TRUE;
  199. a->type = RSPAMD_RE_MIME;
  200. }
  201. else if (TYPE_CHECK (start, "raw_part", len) ||
  202. TYPE_CHECK (start, "raw_mime", len) ||
  203. TYPE_CHECK (start, "mime_raw", len)) {
  204. ret = TRUE;
  205. a->type = RSPAMD_RE_RAWMIME;
  206. }
  207. else if (TYPE_CHECK (start, "header", len)) {
  208. ret = TRUE;
  209. a->type = RSPAMD_RE_HEADER;
  210. }
  211. else if (TYPE_CHECK (start, "mime_header", len) ||
  212. TYPE_CHECK (start, "header_mime", len)) {
  213. ret = TRUE;
  214. a->type = RSPAMD_RE_MIMEHEADER;
  215. }
  216. else if (TYPE_CHECK (start, "raw_header", len) ||
  217. TYPE_CHECK (start, "header_raw", len)) {
  218. ret = TRUE;
  219. a->type = RSPAMD_RE_RAWHEADER;
  220. }
  221. else if (TYPE_CHECK (start, "all_header", len) ||
  222. TYPE_CHECK (start, "header_all", len) ||
  223. TYPE_CHECK (start, "all_headers", len)) {
  224. ret = TRUE;
  225. a->type = RSPAMD_RE_ALLHEADER;
  226. }
  227. else if (TYPE_CHECK (start, "url", len)) {
  228. ret = TRUE;
  229. a->type = RSPAMD_RE_URL;
  230. }
  231. else if (TYPE_CHECK (start, "email", len)) {
  232. ret = TRUE;
  233. a->type = RSPAMD_RE_EMAIL;
  234. }
  235. else if (TYPE_CHECK (start, "sa_body", len)) {
  236. ret = TRUE;
  237. a->type = RSPAMD_RE_SABODY;
  238. }
  239. else if (TYPE_CHECK (start, "sa_raw_body", len) ||
  240. TYPE_CHECK (start, "sa_body_raw", len)) {
  241. ret = TRUE;
  242. a->type = RSPAMD_RE_SARAWBODY;
  243. }
  244. else if (TYPE_CHECK (start, "words", len)) {
  245. ret = TRUE;
  246. a->type = RSPAMD_RE_WORDS;
  247. }
  248. else if (TYPE_CHECK (start, "raw_words", len)) {
  249. ret = TRUE;
  250. a->type = RSPAMD_RE_RAWWORDS;
  251. }
  252. else if (TYPE_CHECK (start, "stem_words", len)) {
  253. ret = TRUE;
  254. a->type = RSPAMD_RE_STEMWORDS;
  255. }
  256. else if (TYPE_CHECK (start, "selector", len)) {
  257. ret = TRUE;
  258. a->type = RSPAMD_RE_SELECTOR;
  259. }
  260. return ret;
  261. }
  262. /*
  263. * Rspamd regexp utility functions
  264. */
  265. static struct rspamd_regexp_atom *
  266. rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
  267. struct rspamd_config *cfg)
  268. {
  269. const gchar *begin, *end, *p, *src, *start, *brace;
  270. gchar *dbegin, *dend, *extra = NULL;
  271. struct rspamd_regexp_atom *result;
  272. GError *err = NULL;
  273. GString *re_flags;
  274. if (line == NULL) {
  275. msg_err_pool ("cannot parse NULL line");
  276. return NULL;
  277. }
  278. src = line;
  279. result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
  280. /* Skip whitespaces */
  281. while (g_ascii_isspace (*line)) {
  282. line++;
  283. }
  284. if (*line == '\0') {
  285. msg_warn_pool ("got empty regexp");
  286. return NULL;
  287. }
  288. result->type = RSPAMD_RE_MAX;
  289. start = line;
  290. /* First try to find header name */
  291. begin = strchr (line, '/');
  292. if (begin != NULL) {
  293. p = begin;
  294. end = NULL;
  295. while (p != line) {
  296. if (*p == '=') {
  297. end = p;
  298. break;
  299. }
  300. p--;
  301. }
  302. if (end) {
  303. extra = rspamd_mempool_alloc (pool, end - line + 1);
  304. rspamd_strlcpy (extra, line, end - line + 1);
  305. line = end;
  306. }
  307. }
  308. else {
  309. extra = rspamd_mempool_strdup (pool, line);
  310. result->type = RSPAMD_RE_MAX;
  311. line = start;
  312. }
  313. /* Find begin of regexp */
  314. while (*line && *line != '/') {
  315. line++;
  316. }
  317. if (*line != '\0') {
  318. begin = line + 1;
  319. }
  320. else if (extra == NULL) {
  321. /* Assume that line without // is just a header name */
  322. extra = rspamd_mempool_strdup (pool, line);
  323. result->type = RSPAMD_RE_HEADER;
  324. return result;
  325. }
  326. else {
  327. /* We got header name earlier but have not found // expression, so it is invalid regexp */
  328. msg_warn_pool (
  329. "got no header name (eg. header=) but without corresponding regexp, %s",
  330. src);
  331. return NULL;
  332. }
  333. /* Find end */
  334. end = begin;
  335. while (*end && (*end != '/' || *(end - 1) == '\\')) {
  336. end++;
  337. }
  338. if (end == begin || *end != '/') {
  339. msg_warn_pool ("no trailing / in regexp %s", src);
  340. return NULL;
  341. }
  342. /* Parse flags */
  343. p = end + 1;
  344. re_flags = g_string_sized_new (32);
  345. while (p != NULL) {
  346. switch (*p) {
  347. case 'i':
  348. case 'm':
  349. case 's':
  350. case 'x':
  351. case 'u':
  352. case 'O':
  353. case 'r':
  354. case 'L':
  355. /* Handled by rspamd_regexp_t */
  356. g_string_append_c (re_flags, *p);
  357. p++;
  358. break;
  359. case 'o':
  360. p++;
  361. break;
  362. /* Type flags */
  363. case 'H':
  364. result->type = RSPAMD_RE_HEADER;
  365. p++;
  366. break;
  367. case 'R':
  368. result->type = RSPAMD_RE_ALLHEADER;
  369. p++;
  370. break;
  371. case 'B':
  372. result->type = RSPAMD_RE_MIMEHEADER;
  373. p++;
  374. break;
  375. case 'C':
  376. result->type = RSPAMD_RE_SABODY;
  377. p++;
  378. break;
  379. case 'D':
  380. result->type = RSPAMD_RE_SARAWBODY;
  381. p++;
  382. break;
  383. case 'M':
  384. result->type = RSPAMD_RE_BODY;
  385. p++;
  386. break;
  387. case 'P':
  388. result->type = RSPAMD_RE_MIME;
  389. p++;
  390. break;
  391. case 'Q':
  392. result->type = RSPAMD_RE_RAWMIME;
  393. p++;
  394. break;
  395. case 'U':
  396. result->type = RSPAMD_RE_URL;
  397. p++;
  398. break;
  399. case 'X':
  400. result->type = RSPAMD_RE_RAWHEADER;
  401. p++;
  402. break;
  403. case '$':
  404. result->type = RSPAMD_RE_SELECTOR;
  405. p++;
  406. break;
  407. case '{':
  408. /* Long definition */
  409. if ((brace = strchr (p + 1, '}')) != NULL) {
  410. if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) {
  411. msg_warn_pool ("invalid long regexp type: %*s in '%s'",
  412. (int)(brace - (p + 1)), p + 1, src);
  413. p = NULL;
  414. }
  415. else {
  416. p = brace + 1;
  417. }
  418. }
  419. else {
  420. p = NULL;
  421. }
  422. break;
  423. /* Other flags */
  424. case 'T':
  425. result->is_test = TRUE;
  426. p++;
  427. break;
  428. case 'S':
  429. result->is_strong = TRUE;
  430. p++;
  431. break;
  432. case 'A':
  433. result->is_multiple = TRUE;
  434. p++;
  435. break;
  436. /* Stop flags parsing */
  437. default:
  438. p = NULL;
  439. break;
  440. }
  441. }
  442. if (result->type >= RSPAMD_RE_MAX) {
  443. if (extra) {
  444. /* Assume header regexp */
  445. result->extra.header = extra;
  446. result->type = RSPAMD_RE_HEADER;
  447. }
  448. else {
  449. msg_err_pool ("could not read regexp: %s, unknown type", src);
  450. return NULL;
  451. }
  452. }
  453. if ((result->type == RSPAMD_RE_HEADER ||
  454. result->type == RSPAMD_RE_RAWHEADER ||
  455. result->type == RSPAMD_RE_MIMEHEADER)) {
  456. if (extra == NULL) {
  457. msg_err_pool ("header regexp: '%s' has no header part", src);
  458. return NULL;
  459. }
  460. else {
  461. result->extra.header = extra;
  462. }
  463. }
  464. if (result->type == RSPAMD_RE_SELECTOR) {
  465. if (extra == NULL) {
  466. msg_err_pool ("selector regexp: '%s' has no selector part", src);
  467. return NULL;
  468. }
  469. else {
  470. result->extra.selector = extra;
  471. }
  472. }
  473. result->regexp_text = rspamd_mempool_strdup (pool, start);
  474. dbegin = result->regexp_text + (begin - start);
  475. dend = result->regexp_text + (end - start);
  476. *dend = '\0';
  477. result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
  478. &err);
  479. g_string_free (re_flags, TRUE);
  480. if (result->regexp == NULL || err != NULL) {
  481. msg_warn_pool ("could not read regexp: %s while reading regexp %e",
  482. src, err);
  483. if (err) {
  484. g_error_free (err);
  485. }
  486. return NULL;
  487. }
  488. if (result->is_multiple) {
  489. rspamd_regexp_set_maxhits (result->regexp, 0);
  490. }
  491. else {
  492. rspamd_regexp_set_maxhits (result->regexp, 1);
  493. }
  494. rspamd_regexp_set_ud (result->regexp, result);
  495. *dend = '/';
  496. return result;
  497. }
  498. struct rspamd_function_atom *
  499. rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input)
  500. {
  501. const gchar *obrace, *ebrace, *p, *c;
  502. gchar t, *databuf;
  503. guint len;
  504. struct rspamd_function_atom *res;
  505. struct expression_argument arg;
  506. GError *err = NULL;
  507. enum {
  508. start_read_argument = 0,
  509. in_string,
  510. in_regexp,
  511. got_backslash,
  512. got_comma
  513. } state, prev_state = 0;
  514. obrace = strchr (input, '(');
  515. ebrace = strrchr (input, ')');
  516. g_assert (obrace != NULL && ebrace != NULL);
  517. res = rspamd_mempool_alloc0 (pool, sizeof (*res));
  518. res->name = rspamd_mempool_alloc (pool, obrace - input + 1);
  519. rspamd_strlcpy (res->name, input, obrace - input + 1);
  520. res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
  521. p = obrace + 1;
  522. c = p;
  523. state = start_read_argument;
  524. /* Read arguments */
  525. while (p <= ebrace) {
  526. t = *p;
  527. switch (state) {
  528. case start_read_argument:
  529. if (t == '/') {
  530. state = in_regexp;
  531. c = p;
  532. }
  533. else if (!g_ascii_isspace (t)) {
  534. state = in_string;
  535. if (t == '\'' || t == '\"') {
  536. c = p + 1;
  537. }
  538. else {
  539. c = p;
  540. }
  541. }
  542. p ++;
  543. break;
  544. case in_regexp:
  545. if (t == '\\') {
  546. state = got_backslash;
  547. prev_state = in_regexp;
  548. }
  549. else if (t == ',' || p == ebrace) {
  550. len = p - c + 1;
  551. databuf = rspamd_mempool_alloc (pool, len);
  552. rspamd_strlcpy (databuf, c, len);
  553. arg.type = EXPRESSION_ARGUMENT_REGEXP;
  554. arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
  555. if (arg.data == NULL) {
  556. /* Fallback to string */
  557. msg_warn ("cannot parse slashed argument %s as regexp: %s",
  558. databuf, err->message);
  559. g_error_free (err);
  560. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  561. arg.data = databuf;
  562. }
  563. g_array_append_val (res->args, arg);
  564. state = got_comma;
  565. }
  566. p ++;
  567. break;
  568. case in_string:
  569. if (t == '\\') {
  570. state = got_backslash;
  571. prev_state = in_string;
  572. }
  573. else if (t == ',' || p == ebrace) {
  574. if (*(p - 1) == '\'' || *(p - 1) == '\"') {
  575. len = p - c;
  576. }
  577. else {
  578. len = p - c + 1;
  579. }
  580. databuf = rspamd_mempool_alloc (pool, len);
  581. rspamd_strlcpy (databuf, c, len);
  582. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  583. arg.data = databuf;
  584. g_array_append_val (res->args, arg);
  585. state = got_comma;
  586. }
  587. p ++;
  588. break;
  589. case got_backslash:
  590. state = prev_state;
  591. p ++;
  592. break;
  593. case got_comma:
  594. state = start_read_argument;
  595. break;
  596. }
  597. }
  598. return res;
  599. }
  600. static rspamd_expression_atom_t *
  601. rspamd_mime_expr_parse (const gchar *line, gsize len,
  602. rspamd_mempool_t *pool, gpointer ud, GError **err)
  603. {
  604. rspamd_expression_atom_t *a = NULL;
  605. struct rspamd_mime_atom *mime_atom = NULL;
  606. const gchar *p, *end, *c = NULL;
  607. struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *)ud;
  608. struct rspamd_config *cfg;
  609. rspamd_regexp_t *own_re;
  610. gchar t;
  611. gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
  612. enum {
  613. in_header = 0,
  614. got_slash,
  615. in_regexp,
  616. got_backslash,
  617. got_second_slash,
  618. in_flags,
  619. in_flags_brace,
  620. got_obrace,
  621. in_function,
  622. in_local_function,
  623. got_ebrace,
  624. end_atom,
  625. bad_atom
  626. } state = 0, prev_state = 0;
  627. p = line;
  628. end = p + len;
  629. cfg = real_ud->cfg;
  630. while (p < end) {
  631. t = *p;
  632. switch (state) {
  633. case in_header:
  634. if (t == '/') {
  635. /* Regexp */
  636. state = got_slash;
  637. }
  638. else if (t == '(') {
  639. /* Function */
  640. state = got_obrace;
  641. }
  642. else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') {
  643. if (t == ':') {
  644. if (p - line == 3 && memcmp (line, "lua", 3) == 0) {
  645. type = MIME_ATOM_LOCAL_LUA_FUNCTION;
  646. state = in_local_function;
  647. c = p + 1;
  648. }
  649. }
  650. else {
  651. /* Likely lua function, identified by just a string */
  652. type = MIME_ATOM_LUA_FUNCTION;
  653. state = end_atom;
  654. /* Do not increase p */
  655. continue;
  656. }
  657. }
  658. else if (g_ascii_isspace (t)) {
  659. state = bad_atom;
  660. }
  661. p ++;
  662. break;
  663. case got_slash:
  664. state = in_regexp;
  665. break;
  666. case in_regexp:
  667. if (t == '\\') {
  668. state = got_backslash;
  669. prev_state = in_regexp;
  670. }
  671. else if (t == '/') {
  672. state = got_second_slash;
  673. }
  674. p ++;
  675. break;
  676. case got_second_slash:
  677. state = in_flags;
  678. break;
  679. case in_flags:
  680. if (t == '{') {
  681. state = in_flags_brace;
  682. p ++;
  683. }
  684. else if (!g_ascii_isalpha (t) && t != '$') {
  685. state = end_atom;
  686. }
  687. else {
  688. p ++;
  689. }
  690. break;
  691. case in_flags_brace:
  692. if (t == '}') {
  693. state = in_flags;
  694. }
  695. p ++;
  696. break;
  697. case got_backslash:
  698. state = prev_state;
  699. p ++;
  700. break;
  701. case got_obrace:
  702. state = in_function;
  703. type = MIME_ATOM_INTERNAL_FUNCTION;
  704. obraces ++;
  705. break;
  706. case in_function:
  707. if (t == '\\') {
  708. state = got_backslash;
  709. prev_state = in_function;
  710. }
  711. else if (t == '(') {
  712. obraces ++;
  713. }
  714. else if (t == ')') {
  715. ebraces ++;
  716. if (ebraces == obraces) {
  717. state = got_ebrace;
  718. }
  719. }
  720. p ++;
  721. break;
  722. case in_local_function:
  723. if (!(g_ascii_isalnum (t) || t == '-' || t == '_')) {
  724. g_assert (c != NULL);
  725. state = end_atom;
  726. }
  727. else {
  728. p++;
  729. }
  730. break;
  731. case got_ebrace:
  732. state = end_atom;
  733. break;
  734. case bad_atom:
  735. g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
  736. " mime atom '%s' when reading symbol '%c' at offset %d, "
  737. "near %.*s", line, t, (gint)(p - line),
  738. (gint)MIN (end - p, 10), p);
  739. return NULL;
  740. case end_atom:
  741. goto set;
  742. }
  743. }
  744. set:
  745. if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
  746. state != in_flags && state != end_atom)) {
  747. g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
  748. " mime atom");
  749. return NULL;
  750. }
  751. mime_atom = rspamd_mempool_alloc (pool, sizeof (*mime_atom));
  752. mime_atom->type = type;
  753. mime_atom->str = rspamd_mempool_alloc (pool, p - line + 1);
  754. rspamd_strlcpy (mime_atom->str, line, p - line + 1);
  755. if (type == MIME_ATOM_REGEXP) {
  756. mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
  757. mime_atom->str, cfg);
  758. if (mime_atom->d.re == NULL) {
  759. g_set_error (err, rspamd_mime_expr_quark(), 200,
  760. "cannot parse regexp '%s'",
  761. mime_atom->str);
  762. goto err;
  763. }
  764. else {
  765. gint lua_cbref = -1;
  766. /* Check regexp condition */
  767. if (real_ud->conf_obj != NULL) {
  768. const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
  769. "re_conditions");
  770. if (re_conditions != NULL) {
  771. if (ucl_object_type (re_conditions) != UCL_OBJECT) {
  772. g_set_error (err, rspamd_mime_expr_quark (), 320,
  773. "re_conditions is not a table for '%s'",
  774. mime_atom->str);
  775. rspamd_regexp_unref (mime_atom->d.re->regexp);
  776. goto err;
  777. }
  778. const ucl_object_t *function_obj = ucl_object_lookup (re_conditions,
  779. mime_atom->str);
  780. if (function_obj != NULL) {
  781. if (ucl_object_type (function_obj) != UCL_USERDATA) {
  782. g_set_error (err, rspamd_mime_expr_quark (), 320,
  783. "condition for '%s' is invalid, must be function",
  784. mime_atom->str);
  785. rspamd_regexp_unref (mime_atom->d.re->regexp);
  786. goto err;
  787. }
  788. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  789. lua_cbref = fd->idx;
  790. }
  791. }
  792. }
  793. if (lua_cbref != -1) {
  794. msg_info_config ("added condition for regexp %s", mime_atom->str);
  795. }
  796. /* Register new item in the cache */
  797. if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
  798. mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
  799. mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
  800. if (mime_atom->d.re->extra.header != NULL) {
  801. own_re = mime_atom->d.re->regexp;
  802. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  803. mime_atom->d.re->regexp,
  804. mime_atom->d.re->type,
  805. mime_atom->d.re->extra.header,
  806. strlen (mime_atom->d.re->extra.header) + 1,
  807. lua_cbref);
  808. /* Pass ownership to the cache */
  809. rspamd_regexp_unref (own_re);
  810. }
  811. else {
  812. /* We have header regexp, but no header name is detected */
  813. g_set_error (err,
  814. rspamd_mime_expr_quark (),
  815. 200,
  816. "no header name in header regexp: '%s'",
  817. mime_atom->str);
  818. rspamd_regexp_unref (mime_atom->d.re->regexp);
  819. goto err;
  820. }
  821. }
  822. else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
  823. if (mime_atom->d.re->extra.selector != NULL) {
  824. own_re = mime_atom->d.re->regexp;
  825. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  826. mime_atom->d.re->regexp,
  827. mime_atom->d.re->type,
  828. mime_atom->d.re->extra.selector,
  829. strlen (mime_atom->d.re->extra.selector) + 1,
  830. lua_cbref);
  831. /* Pass ownership to the cache */
  832. rspamd_regexp_unref (own_re);
  833. }
  834. else {
  835. /* We have selector regexp, but no selector name is detected */
  836. g_set_error (err,
  837. rspamd_mime_expr_quark (),
  838. 200,
  839. "no selector name in selector regexp: '%s'",
  840. mime_atom->str);
  841. rspamd_regexp_unref (mime_atom->d.re->regexp);
  842. goto err;
  843. }
  844. }
  845. else {
  846. own_re = mime_atom->d.re->regexp;
  847. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  848. mime_atom->d.re->regexp,
  849. mime_atom->d.re->type,
  850. NULL,
  851. 0,
  852. lua_cbref);
  853. /* Pass ownership to the cache */
  854. rspamd_regexp_unref (own_re);
  855. }
  856. }
  857. }
  858. else if (type == MIME_ATOM_LUA_FUNCTION) {
  859. mime_atom->d.lua_function = mime_atom->str;
  860. lua_getglobal (cfg->lua_state, mime_atom->str);
  861. if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) {
  862. g_set_error (err, rspamd_mime_expr_quark(), 200,
  863. "no such lua function '%s'",
  864. mime_atom->str);
  865. lua_pop (cfg->lua_state, 1);
  866. goto err;
  867. }
  868. lua_pop (cfg->lua_state, 1);
  869. }
  870. else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  871. /* p pointer is set to the start of Lua function name */
  872. if (real_ud->conf_obj == NULL) {
  873. g_set_error (err, rspamd_mime_expr_quark(), 300,
  874. "no config object for '%s'",
  875. mime_atom->str);
  876. goto err;
  877. }
  878. const ucl_object_t *functions = ucl_object_lookup (real_ud->conf_obj,
  879. "functions");
  880. if (functions == NULL) {
  881. g_set_error (err, rspamd_mime_expr_quark(), 310,
  882. "no functions defined for '%s'",
  883. mime_atom->str);
  884. goto err;
  885. }
  886. if (ucl_object_type (functions) != UCL_OBJECT) {
  887. g_set_error (err, rspamd_mime_expr_quark(), 320,
  888. "functions is not a table for '%s'",
  889. mime_atom->str);
  890. goto err;
  891. }
  892. const ucl_object_t *function_obj;
  893. function_obj = ucl_object_lookup_len (functions, c,
  894. p - c);
  895. if (function_obj == NULL) {
  896. g_set_error (err, rspamd_mime_expr_quark(), 320,
  897. "function %.*s is not found for '%s'",
  898. (int)(p - c), c, mime_atom->str);
  899. goto err;
  900. }
  901. if (ucl_object_type (function_obj) != UCL_USERDATA) {
  902. g_set_error (err, rspamd_mime_expr_quark(), 320,
  903. "function %.*s has invalid type for '%s'",
  904. (int)(p - c), c, mime_atom->str);
  905. goto err;
  906. }
  907. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  908. mime_atom->d.lua_cbref = fd->idx;
  909. }
  910. else {
  911. mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool,
  912. mime_atom->str);
  913. if (mime_atom->d.func == NULL) {
  914. g_set_error (err, rspamd_mime_expr_quark(), 200,
  915. "cannot parse function '%s'",
  916. mime_atom->str);
  917. goto err;
  918. }
  919. }
  920. a = rspamd_mempool_alloc0 (pool, sizeof (*a));
  921. a->len = p - line;
  922. a->priority = 0;
  923. a->data = mime_atom;
  924. return a;
  925. err:
  926. return NULL;
  927. }
  928. static gint
  929. rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
  930. struct rspamd_task *task)
  931. {
  932. gint ret;
  933. if (re == NULL) {
  934. msg_info_task ("invalid regexp passed");
  935. return 0;
  936. }
  937. if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
  938. ret = rspamd_re_cache_process (task,
  939. re->regexp,
  940. re->type,
  941. re->extra.header,
  942. strlen (re->extra.header),
  943. re->is_strong);
  944. }
  945. else if (re->type == RSPAMD_RE_SELECTOR) {
  946. ret = rspamd_re_cache_process (task,
  947. re->regexp,
  948. re->type,
  949. re->extra.selector,
  950. strlen (re->extra.selector),
  951. re->is_strong);
  952. }
  953. else {
  954. ret = rspamd_re_cache_process (task,
  955. re->regexp,
  956. re->type,
  957. NULL,
  958. 0,
  959. re->is_strong);
  960. }
  961. if (re->is_test) {
  962. msg_info_task ("test %s regexp '%s' returned %d",
  963. rspamd_re_cache_type_to_string (re->type),
  964. re->regexp_text, ret);
  965. }
  966. return ret;
  967. }
  968. static gint
  969. rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
  970. {
  971. struct rspamd_mime_atom *mime_atom = atom->data;
  972. gint ret = 0;
  973. switch (mime_atom->type) {
  974. case MIME_ATOM_INTERNAL_FUNCTION:
  975. /* Prioritize internal functions slightly */
  976. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  977. break;
  978. case MIME_ATOM_LUA_FUNCTION:
  979. case MIME_ATOM_LOCAL_LUA_FUNCTION:
  980. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 4;
  981. break;
  982. case MIME_ATOM_REGEXP:
  983. switch (mime_atom->d.re->type) {
  984. case RSPAMD_RE_HEADER:
  985. case RSPAMD_RE_RAWHEADER:
  986. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 16;
  987. break;
  988. case RSPAMD_RE_URL:
  989. case RSPAMD_RE_EMAIL:
  990. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  991. break;
  992. case RSPAMD_RE_SELECTOR:
  993. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  994. break;
  995. case RSPAMD_RE_MIME:
  996. case RSPAMD_RE_RAWMIME:
  997. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 2;
  998. break;
  999. case RSPAMD_RE_WORDS:
  1000. case RSPAMD_RE_RAWWORDS:
  1001. case RSPAMD_RE_STEMWORDS:
  1002. default:
  1003. /* For expensive regexps */
  1004. ret = 0;
  1005. break;
  1006. }
  1007. }
  1008. return ret;
  1009. }
  1010. static void
  1011. rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
  1012. {
  1013. struct rspamd_mime_atom *mime_atom = atom->data;
  1014. if (mime_atom) {
  1015. if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
  1016. /* Need to cleanup arguments */
  1017. g_array_free (mime_atom->d.func->args, TRUE);
  1018. }
  1019. }
  1020. }
  1021. static gboolean
  1022. rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
  1023. struct rspamd_task * task,
  1024. lua_State *L)
  1025. {
  1026. struct _fl *selected, key;
  1027. key.name = func->name;
  1028. selected = bsearch (&key,
  1029. list_ptr,
  1030. functions_number,
  1031. sizeof (struct _fl),
  1032. fl_cmp);
  1033. if (selected == NULL) {
  1034. /* Try to check lua function */
  1035. return FALSE;
  1036. }
  1037. return selected->func (task, func->args, selected->user_data);
  1038. }
  1039. static gdouble
  1040. rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom)
  1041. {
  1042. struct rspamd_task *task = (struct rspamd_task *)ud;
  1043. struct rspamd_mime_atom *mime_atom;
  1044. lua_State *L;
  1045. gdouble ret = 0;
  1046. g_assert (task != NULL);
  1047. g_assert (atom != NULL);
  1048. mime_atom = atom->data;
  1049. if (mime_atom->type == MIME_ATOM_REGEXP) {
  1050. ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
  1051. }
  1052. else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
  1053. L = task->cfg->lua_state;
  1054. lua_getglobal (L, mime_atom->d.lua_function);
  1055. rspamd_lua_task_push (L, task);
  1056. if (lua_pcall (L, 1, 1, 0) != 0) {
  1057. msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s",
  1058. mime_atom->d.lua_function,
  1059. mime_atom->str,
  1060. lua_tostring (L, -1));
  1061. lua_pop (L, 1);
  1062. }
  1063. else {
  1064. if (lua_type (L, -1) == LUA_TBOOLEAN) {
  1065. ret = lua_toboolean (L, -1);
  1066. }
  1067. else if (lua_type (L, -1) == LUA_TNUMBER) {
  1068. ret = lua_tonumber (L, 1);
  1069. }
  1070. else {
  1071. msg_err_task ("%s returned wrong return type: %s",
  1072. mime_atom->str, lua_typename (L, lua_type (L, -1)));
  1073. }
  1074. /* Remove result */
  1075. lua_pop (L, 1);
  1076. }
  1077. }
  1078. else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  1079. gint err_idx;
  1080. L = task->cfg->lua_state;
  1081. lua_pushcfunction (L, &rspamd_lua_traceback);
  1082. err_idx = lua_gettop (L);
  1083. lua_rawgeti (L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
  1084. rspamd_lua_task_push (L, task);
  1085. if (lua_pcall (L, 1, 1, err_idx) != 0) {
  1086. msg_info_task ("lua call to local function for atom '%s' failed: %s",
  1087. mime_atom->str,
  1088. lua_tostring (L, -1));
  1089. }
  1090. else {
  1091. if (lua_type (L, -1) == LUA_TBOOLEAN) {
  1092. ret = lua_toboolean (L, -1);
  1093. }
  1094. else if (lua_type (L, -1) == LUA_TNUMBER) {
  1095. ret = lua_tonumber (L, 1);
  1096. }
  1097. else {
  1098. msg_err_task ("%s returned wrong return type: %s",
  1099. mime_atom->str, lua_typename (L, lua_type (L, -1)));
  1100. }
  1101. }
  1102. lua_settop (L, 0);
  1103. }
  1104. else {
  1105. ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
  1106. task->cfg->lua_state);
  1107. }
  1108. return ret;
  1109. }
  1110. void
  1111. register_expression_function (const gchar *name,
  1112. rspamd_internal_func_t func,
  1113. void *user_data)
  1114. {
  1115. static struct _fl *new;
  1116. functions_number++;
  1117. new = g_new (struct _fl, functions_number);
  1118. memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
  1119. if (list_allocated) {
  1120. g_free (list_ptr);
  1121. }
  1122. list_allocated = TRUE;
  1123. new[functions_number - 1].name = name;
  1124. new[functions_number - 1].func = func;
  1125. new[functions_number - 1].user_data = user_data;
  1126. qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
  1127. list_ptr = new;
  1128. }
  1129. gboolean
  1130. rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
  1131. {
  1132. struct expression_argument *arg;
  1133. if (args == NULL || task == NULL) {
  1134. return FALSE;
  1135. }
  1136. arg = &g_array_index (args, struct expression_argument, 0);
  1137. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1138. msg_warn_task ("invalid argument to function is passed");
  1139. return FALSE;
  1140. }
  1141. /* XXX: really write this function */
  1142. return TRUE;
  1143. }
  1144. gboolean
  1145. rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
  1146. {
  1147. struct expression_argument *arg;
  1148. struct rspamd_mime_header *rh;
  1149. if (args == NULL || task == NULL) {
  1150. return FALSE;
  1151. }
  1152. arg = &g_array_index (args, struct expression_argument, 0);
  1153. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1154. msg_warn_task ("invalid argument to function is passed");
  1155. return FALSE;
  1156. }
  1157. rh = rspamd_message_get_header_array(task,
  1158. (gchar *) arg->data, FALSE);
  1159. debug_task ("try to get header %s: %d", (gchar *)arg->data,
  1160. (rh != NULL));
  1161. if (rh) {
  1162. return TRUE;
  1163. }
  1164. return FALSE;
  1165. }
  1166. /*
  1167. * This function is designed to find difference between text/html and text/plain parts
  1168. * It takes one argument: difference threshold, if we have two text parts, compare
  1169. * its hashes and check for threshold, if value is greater than threshold, return TRUE
  1170. * and return FALSE otherwise.
  1171. */
  1172. gboolean
  1173. rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
  1174. {
  1175. gint threshold, threshold2 = -1;
  1176. struct expression_argument *arg;
  1177. gdouble *pdiff, diff;
  1178. if (args == NULL || args->len == 0) {
  1179. debug_task ("no threshold is specified, assume it 100");
  1180. threshold = 100;
  1181. }
  1182. else {
  1183. errno = 0;
  1184. arg = &g_array_index (args, struct expression_argument, 0);
  1185. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1186. msg_warn_task ("invalid argument to function is passed");
  1187. return FALSE;
  1188. }
  1189. threshold = strtoul ((gchar *)arg->data, NULL, 10);
  1190. if (errno != 0) {
  1191. msg_info_task ("bad numeric value for threshold \"%s\", assume it 100",
  1192. (gchar *)arg->data);
  1193. threshold = 100;
  1194. }
  1195. if (args->len >= 2) {
  1196. arg = &g_array_index (args, struct expression_argument, 1);
  1197. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1198. msg_warn_task ("invalid argument to function is passed");
  1199. return FALSE;
  1200. }
  1201. errno = 0;
  1202. threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
  1203. if (errno != 0) {
  1204. msg_info_task ("bad numeric value for threshold \"%s\", ignore it",
  1205. (gchar *)arg->data);
  1206. threshold2 = -1;
  1207. }
  1208. }
  1209. }
  1210. if ((pdiff =
  1211. rspamd_mempool_get_variable (task->task_pool,
  1212. "parts_distance")) != NULL) {
  1213. diff = (1.0 - (*pdiff)) * 100.0;
  1214. if (diff != -1) {
  1215. if (threshold2 > 0) {
  1216. if (diff >= MIN (threshold, threshold2) &&
  1217. diff < MAX (threshold, threshold2)) {
  1218. return TRUE;
  1219. }
  1220. }
  1221. else {
  1222. if (diff <= threshold) {
  1223. return TRUE;
  1224. }
  1225. }
  1226. return FALSE;
  1227. }
  1228. else {
  1229. return FALSE;
  1230. }
  1231. }
  1232. return FALSE;
  1233. }
  1234. struct addr_list {
  1235. const gchar *name;
  1236. guint namelen;
  1237. const gchar *addr;
  1238. guint addrlen;
  1239. };
  1240. static gint
  1241. addr_list_cmp_func (const void *a, const void *b)
  1242. {
  1243. const struct addr_list *addra = (struct addr_list *)a,
  1244. *addrb = (struct addr_list *)b;
  1245. if (addra->addrlen != addrb->addrlen) {
  1246. return addra->addrlen - addrb->addrlen;
  1247. }
  1248. return memcmp (addra->addr, addrb->addr, addra->addrlen);
  1249. }
  1250. #define COMPARE_RCPT_LEN 3
  1251. #define MIN_RCPT_TO_COMPARE 7
  1252. gboolean
  1253. rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
  1254. void *unused)
  1255. {
  1256. struct expression_argument *arg;
  1257. struct rspamd_email_address *cur;
  1258. double threshold;
  1259. struct addr_list *ar;
  1260. gint num, i, hits = 0;
  1261. if (args == NULL) {
  1262. msg_warn_task ("no parameters to function");
  1263. return FALSE;
  1264. }
  1265. arg = &g_array_index (args, struct expression_argument, 0);
  1266. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1267. msg_warn_task ("invalid argument to function is passed");
  1268. return FALSE;
  1269. }
  1270. errno = 0;
  1271. threshold = strtod ((gchar *)arg->data, NULL);
  1272. if (errno != 0) {
  1273. msg_warn_task ("invalid numeric value '%s': %s",
  1274. (gchar *)arg->data,
  1275. strerror (errno));
  1276. return FALSE;
  1277. }
  1278. if (!MESSAGE_FIELD (task, rcpt_mime)) {
  1279. return FALSE;
  1280. }
  1281. num = MESSAGE_FIELD (task, rcpt_mime)->len;
  1282. if (num < MIN_RCPT_TO_COMPARE) {
  1283. return FALSE;
  1284. }
  1285. ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
  1286. /* Fill array */
  1287. num = 0;
  1288. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, rcpt_mime), i, cur) {
  1289. if (cur->addr_len > COMPARE_RCPT_LEN) {
  1290. ar[num].name = cur->addr;
  1291. ar[num].namelen = cur->addr_len;
  1292. ar[num].addr = cur->domain;
  1293. ar[num].addrlen = cur->domain_len;
  1294. num ++;
  1295. }
  1296. }
  1297. qsort (ar, num, sizeof (*ar), addr_list_cmp_func);
  1298. /* Cycle all elements in array */
  1299. for (i = 0; i < num; i++) {
  1300. if (i < num - 1) {
  1301. if (ar[i].namelen == ar[i + 1].namelen) {
  1302. if (rspamd_lc_cmp (ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) {
  1303. hits++;
  1304. }
  1305. }
  1306. }
  1307. }
  1308. if ((hits * num / 2.) / (double)num >= threshold) {
  1309. return TRUE;
  1310. }
  1311. return FALSE;
  1312. }
  1313. gboolean
  1314. rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
  1315. void *unused)
  1316. {
  1317. struct rspamd_mime_text_part *p;
  1318. guint i, cnt_html = 0, cnt_txt = 0;
  1319. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
  1320. p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0);
  1321. if (!IS_TEXT_PART_ATTACHMENT (p)) {
  1322. if (IS_TEXT_PART_HTML (p)) {
  1323. cnt_html++;
  1324. }
  1325. else {
  1326. cnt_txt++;
  1327. }
  1328. }
  1329. }
  1330. return (cnt_html > 0 && cnt_txt == 0);
  1331. }
  1332. static gboolean
  1333. is_recipient_list_sorted (GPtrArray *ar)
  1334. {
  1335. struct rspamd_email_address *addr;
  1336. gboolean res = TRUE;
  1337. rspamd_ftok_t cur, prev;
  1338. gint i;
  1339. /* Do not check to short address lists */
  1340. if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
  1341. return FALSE;
  1342. }
  1343. prev.len = 0;
  1344. prev.begin = NULL;
  1345. PTR_ARRAY_FOREACH (ar, i, addr) {
  1346. cur.begin = addr->addr;
  1347. cur.len = addr->addr_len;
  1348. if (prev.len != 0) {
  1349. if (rspamd_ftok_casecmp (&cur, &prev) <= 0) {
  1350. res = FALSE;
  1351. break;
  1352. }
  1353. }
  1354. prev = cur;
  1355. }
  1356. return res;
  1357. }
  1358. gboolean
  1359. rspamd_is_recipients_sorted (struct rspamd_task * task,
  1360. GArray * args,
  1361. void *unused)
  1362. {
  1363. /* Check all types of addresses */
  1364. if (MESSAGE_FIELD (task, rcpt_mime)) {
  1365. return is_recipient_list_sorted (MESSAGE_FIELD (task, rcpt_mime));
  1366. }
  1367. return FALSE;
  1368. }
  1369. gboolean
  1370. rspamd_compare_transfer_encoding (struct rspamd_task * task,
  1371. GArray * args,
  1372. void *unused)
  1373. {
  1374. struct expression_argument *arg;
  1375. guint i;
  1376. struct rspamd_mime_part *part;
  1377. enum rspamd_cte cte;
  1378. if (args == NULL) {
  1379. msg_warn_task ("no parameters to function");
  1380. return FALSE;
  1381. }
  1382. arg = &g_array_index (args, struct expression_argument, 0);
  1383. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1384. msg_warn_task ("invalid argument to function is passed");
  1385. return FALSE;
  1386. }
  1387. cte = rspamd_cte_from_string (arg->data);
  1388. if (cte == RSPAMD_CTE_UNKNOWN) {
  1389. msg_warn_task ("unknown cte: %s", arg->data);
  1390. return FALSE;
  1391. }
  1392. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
  1393. if (IS_PART_TEXT (part)) {
  1394. if (part->cte == cte) {
  1395. return TRUE;
  1396. }
  1397. }
  1398. }
  1399. return FALSE;
  1400. }
  1401. gboolean
  1402. rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
  1403. {
  1404. /* Totally broken but seems to be never used */
  1405. return TRUE;
  1406. }
  1407. gboolean
  1408. rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
  1409. {
  1410. struct rspamd_mime_text_part *p;
  1411. struct expression_argument *arg;
  1412. guint i;
  1413. gboolean res = FALSE;
  1414. if (args == NULL) {
  1415. msg_warn_task ("no parameters to function");
  1416. return FALSE;
  1417. }
  1418. arg = &g_array_index (args, struct expression_argument, 0);
  1419. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1420. msg_warn_task ("invalid argument to function is passed");
  1421. return FALSE;
  1422. }
  1423. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
  1424. if (IS_TEXT_PART_HTML (p) && p->html) {
  1425. res = rspamd_html_tag_seen (p->html, arg->data);
  1426. }
  1427. if (res) {
  1428. break;
  1429. }
  1430. }
  1431. return res;
  1432. }
  1433. gboolean
  1434. rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
  1435. {
  1436. struct rspamd_mime_text_part *p;
  1437. guint i;
  1438. gboolean res = FALSE;
  1439. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
  1440. if (IS_TEXT_PART_HTML (p) && (rspamd_html_get_tags_count(p->html) < 2)) {
  1441. res = TRUE;
  1442. }
  1443. if (res) {
  1444. break;
  1445. }
  1446. }
  1447. return res;
  1448. }
  1449. static gboolean
  1450. rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
  1451. {
  1452. struct expression_argument *arg;
  1453. if (args == NULL || task == NULL) {
  1454. return FALSE;
  1455. }
  1456. arg = &g_array_index (args, struct expression_argument, 0);
  1457. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1458. msg_warn_task ("invalid argument to function is passed");
  1459. return FALSE;
  1460. }
  1461. return rspamd_message_get_header_array(task, arg->data, FALSE) != NULL;
  1462. }
  1463. static gboolean
  1464. match_smtp_data (struct rspamd_task *task,
  1465. struct expression_argument *arg,
  1466. const gchar *what, gsize len)
  1467. {
  1468. rspamd_regexp_t *re;
  1469. gint r = 0;
  1470. if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
  1471. /* This is a regexp */
  1472. re = arg->data;
  1473. if (re == NULL) {
  1474. msg_warn_task ("cannot compile regexp for function");
  1475. return FALSE;
  1476. }
  1477. if (len > 0) {
  1478. r = rspamd_regexp_search (re, what, len, NULL, NULL, FALSE, NULL);
  1479. }
  1480. return r;
  1481. }
  1482. else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
  1483. g_ascii_strncasecmp (arg->data, what, len) == 0) {
  1484. return TRUE;
  1485. }
  1486. return FALSE;
  1487. }
  1488. static gboolean
  1489. rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
  1490. {
  1491. struct expression_argument *arg;
  1492. struct rspamd_email_address *addr = NULL;
  1493. GPtrArray *rcpts = NULL;
  1494. const gchar *type, *str = NULL;
  1495. guint i;
  1496. if (args == NULL) {
  1497. msg_warn_task ("no parameters to function");
  1498. return FALSE;
  1499. }
  1500. arg = &g_array_index (args, struct expression_argument, 0);
  1501. if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1502. msg_warn_task ("no parameters to function");
  1503. return FALSE;
  1504. }
  1505. else {
  1506. type = arg->data;
  1507. switch (*type) {
  1508. case 'f':
  1509. case 'F':
  1510. if (g_ascii_strcasecmp (type, "from") == 0) {
  1511. addr = rspamd_task_get_sender (task);
  1512. }
  1513. else {
  1514. msg_warn_task ("bad argument to function: %s", type);
  1515. return FALSE;
  1516. }
  1517. break;
  1518. case 'h':
  1519. case 'H':
  1520. if (g_ascii_strcasecmp (type, "helo") == 0) {
  1521. str = task->helo;
  1522. }
  1523. else {
  1524. msg_warn_task ("bad argument to function: %s", type);
  1525. return FALSE;
  1526. }
  1527. break;
  1528. case 'u':
  1529. case 'U':
  1530. if (g_ascii_strcasecmp (type, "user") == 0) {
  1531. str = task->auth_user;
  1532. }
  1533. else {
  1534. msg_warn_task ("bad argument to function: %s", type);
  1535. return FALSE;
  1536. }
  1537. break;
  1538. case 's':
  1539. case 'S':
  1540. if (g_ascii_strcasecmp (type, "subject") == 0) {
  1541. str = MESSAGE_FIELD (task, subject);
  1542. }
  1543. else {
  1544. msg_warn_task ("bad argument to function: %s", type);
  1545. return FALSE;
  1546. }
  1547. break;
  1548. case 'r':
  1549. case 'R':
  1550. if (g_ascii_strcasecmp (type, "rcpt") == 0) {
  1551. rcpts = task->rcpt_envelope;
  1552. }
  1553. else {
  1554. msg_warn_task ("bad argument to function: %s", type);
  1555. return FALSE;
  1556. }
  1557. break;
  1558. default:
  1559. msg_warn_task ("bad argument to function: %s", type);
  1560. return FALSE;
  1561. }
  1562. }
  1563. if (str == NULL && addr == NULL && rcpts == NULL) {
  1564. /* Not enough data so regexp would NOT be found anyway */
  1565. return FALSE;
  1566. }
  1567. /* We would process only one more argument, others are ignored */
  1568. if (args->len >= 2) {
  1569. arg = &g_array_index (args, struct expression_argument, 1);
  1570. if (arg) {
  1571. if (str != NULL) {
  1572. return match_smtp_data (task, arg, str, strlen (str));
  1573. }
  1574. else if (addr != NULL && addr->addr) {
  1575. return match_smtp_data (task, arg, addr->addr, addr->addr_len);
  1576. }
  1577. else {
  1578. if (rcpts != NULL) {
  1579. for (i = 0; i < rcpts->len; i ++) {
  1580. addr = g_ptr_array_index (rcpts, i);
  1581. if (addr && addr->addr &&
  1582. match_smtp_data (task, arg,
  1583. addr->addr, addr->addr_len)) {
  1584. return TRUE;
  1585. }
  1586. }
  1587. }
  1588. }
  1589. }
  1590. }
  1591. return FALSE;
  1592. }
  1593. static inline gboolean
  1594. rspamd_check_ct_attr (const gchar *begin, gsize len,
  1595. struct expression_argument *arg_pattern)
  1596. {
  1597. rspamd_regexp_t *re;
  1598. gboolean r = FALSE;
  1599. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1600. re = arg_pattern->data;
  1601. if (len > 0) {
  1602. r = rspamd_regexp_search (re,
  1603. begin, len,
  1604. NULL, NULL, FALSE, NULL);
  1605. }
  1606. if (r) {
  1607. return TRUE;
  1608. }
  1609. }
  1610. else {
  1611. /* Just do strcasecmp */
  1612. gsize plen = strlen (arg_pattern->data);
  1613. if (plen == len &&
  1614. g_ascii_strncasecmp (arg_pattern->data, begin, len) == 0) {
  1615. return TRUE;
  1616. }
  1617. }
  1618. return FALSE;
  1619. }
  1620. static gboolean
  1621. rspamd_content_type_compare_param (struct rspamd_task * task,
  1622. GArray * args,
  1623. void *unused)
  1624. {
  1625. struct expression_argument *arg, *arg1, *arg_pattern;
  1626. gboolean recursive = FALSE;
  1627. struct rspamd_mime_part *cur_part;
  1628. guint i;
  1629. rspamd_ftok_t srch;
  1630. struct rspamd_content_type_param *found = NULL, *cur;
  1631. const gchar *param_name;
  1632. if (args == NULL || args->len < 2) {
  1633. msg_warn_task ("no parameters to function");
  1634. return FALSE;
  1635. }
  1636. arg = &g_array_index (args, struct expression_argument, 0);
  1637. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1638. param_name = arg->data;
  1639. arg_pattern = &g_array_index (args, struct expression_argument, 1);
  1640. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
  1641. if (args->len >= 3) {
  1642. arg1 = &g_array_index (args, struct expression_argument, 2);
  1643. if (g_ascii_strncasecmp (arg1->data, "true",
  1644. sizeof ("true") - 1) == 0) {
  1645. recursive = TRUE;
  1646. }
  1647. }
  1648. else {
  1649. /*
  1650. * If user did not specify argument, let's assume that he wants
  1651. * recursive search if mime part is multipart/mixed
  1652. */
  1653. if (IS_PART_MULTIPART (cur_part)) {
  1654. recursive = TRUE;
  1655. }
  1656. }
  1657. rspamd_ftok_t lit;
  1658. RSPAMD_FTOK_FROM_STR (&srch, param_name);
  1659. RSPAMD_FTOK_FROM_STR (&lit, "charset");
  1660. if (rspamd_ftok_equal (&srch, &lit)) {
  1661. if (rspamd_check_ct_attr (cur_part->ct->charset.begin,
  1662. cur_part->ct->charset.len, arg_pattern)) {
  1663. return TRUE;
  1664. }
  1665. }
  1666. RSPAMD_FTOK_FROM_STR (&lit, "boundary");
  1667. if (rspamd_ftok_equal (&srch, &lit)) {
  1668. if (rspamd_check_ct_attr (cur_part->ct->orig_boundary.begin,
  1669. cur_part->ct->orig_boundary.len, arg_pattern)) {
  1670. return TRUE;
  1671. }
  1672. }
  1673. if (cur_part->ct->attrs) {
  1674. found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
  1675. if (found) {
  1676. DL_FOREACH (found, cur) {
  1677. if (rspamd_check_ct_attr (cur->value.begin,
  1678. cur->value.len, arg_pattern)) {
  1679. return TRUE;
  1680. }
  1681. }
  1682. }
  1683. }
  1684. if (!recursive) {
  1685. break;
  1686. }
  1687. }
  1688. return FALSE;
  1689. }
  1690. static gboolean
  1691. rspamd_content_type_has_param (struct rspamd_task * task,
  1692. GArray * args,
  1693. void *unused)
  1694. {
  1695. struct expression_argument *arg, *arg1;
  1696. gboolean recursive = FALSE;
  1697. struct rspamd_mime_part *cur_part;
  1698. guint i;
  1699. rspamd_ftok_t srch;
  1700. struct rspamd_content_type_param *found = NULL;
  1701. const gchar *param_name;
  1702. if (args == NULL || args->len < 1) {
  1703. msg_warn_task ("no parameters to function");
  1704. return FALSE;
  1705. }
  1706. arg = &g_array_index (args, struct expression_argument, 0);
  1707. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1708. param_name = arg->data;
  1709. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
  1710. if (args->len >= 2) {
  1711. arg1 = &g_array_index (args, struct expression_argument, 1);
  1712. if (g_ascii_strncasecmp (arg1->data, "true",
  1713. sizeof ("true") - 1) == 0) {
  1714. recursive = TRUE;
  1715. }
  1716. }
  1717. else {
  1718. /*
  1719. * If user did not specify argument, let's assume that he wants
  1720. * recursive search if mime part is multipart/mixed
  1721. */
  1722. if (IS_PART_MULTIPART (cur_part)) {
  1723. recursive = TRUE;
  1724. }
  1725. }
  1726. rspamd_ftok_t lit;
  1727. RSPAMD_FTOK_FROM_STR (&srch, param_name);
  1728. RSPAMD_FTOK_FROM_STR (&lit, "charset");
  1729. if (rspamd_ftok_equal (&srch, &lit)) {
  1730. if (cur_part->ct->charset.len > 0) {
  1731. return TRUE;
  1732. }
  1733. }
  1734. RSPAMD_FTOK_FROM_STR (&lit, "boundary");
  1735. if (rspamd_ftok_equal (&srch, &lit)) {
  1736. if (cur_part->ct->boundary.len > 0) {
  1737. return TRUE;
  1738. }
  1739. }
  1740. if (cur_part->ct->attrs) {
  1741. found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
  1742. if (found) {
  1743. return TRUE;
  1744. }
  1745. }
  1746. if (!recursive) {
  1747. break;
  1748. }
  1749. }
  1750. return FALSE;
  1751. }
  1752. static gboolean
  1753. rspamd_content_type_check (struct rspamd_task *task,
  1754. GArray * args,
  1755. gboolean check_subtype)
  1756. {
  1757. rspamd_ftok_t *param_data, srch;
  1758. rspamd_regexp_t *re;
  1759. struct expression_argument *arg1, *arg_pattern;
  1760. struct rspamd_content_type *ct;
  1761. gint r = 0;
  1762. guint i;
  1763. gboolean recursive = FALSE;
  1764. struct rspamd_mime_part *cur_part;
  1765. if (args == NULL || args->len < 1) {
  1766. msg_warn_task ("no parameters to function");
  1767. return FALSE;
  1768. }
  1769. arg_pattern = &g_array_index (args, struct expression_argument, 0);
  1770. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
  1771. ct = cur_part->ct;
  1772. if (args->len >= 2) {
  1773. arg1 = &g_array_index (args, struct expression_argument, 1);
  1774. if (g_ascii_strncasecmp (arg1->data, "true",
  1775. sizeof ("true") - 1) == 0) {
  1776. recursive = TRUE;
  1777. }
  1778. }
  1779. else {
  1780. /*
  1781. * If user did not specify argument, let's assume that he wants
  1782. * recursive search if mime part is multipart/mixed
  1783. */
  1784. if (IS_PART_MULTIPART (cur_part)) {
  1785. recursive = TRUE;
  1786. }
  1787. }
  1788. if (check_subtype) {
  1789. param_data = &ct->subtype;
  1790. }
  1791. else {
  1792. param_data = &ct->type;
  1793. }
  1794. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1795. re = arg_pattern->data;
  1796. if (param_data->len > 0) {
  1797. r = rspamd_regexp_search (re, param_data->begin, param_data->len,
  1798. NULL, NULL, FALSE, NULL);
  1799. }
  1800. if (r) {
  1801. return TRUE;
  1802. }
  1803. }
  1804. else {
  1805. /* Just do strcasecmp */
  1806. srch.begin = arg_pattern->data;
  1807. srch.len = strlen (arg_pattern->data);
  1808. if (rspamd_ftok_casecmp (param_data, &srch) == 0) {
  1809. return TRUE;
  1810. }
  1811. }
  1812. /* Get next part */
  1813. if (!recursive) {
  1814. break;
  1815. }
  1816. }
  1817. return FALSE;
  1818. }
  1819. static gboolean
  1820. rspamd_content_type_is_type (struct rspamd_task * task,
  1821. GArray * args,
  1822. void *unused)
  1823. {
  1824. return rspamd_content_type_check (task, args, FALSE);
  1825. }
  1826. static gboolean
  1827. rspamd_content_type_is_subtype (struct rspamd_task * task,
  1828. GArray * args,
  1829. void *unused)
  1830. {
  1831. return rspamd_content_type_check (task, args, TRUE);
  1832. }
  1833. static gboolean
  1834. compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct,
  1835. struct expression_argument *subtype)
  1836. {
  1837. rspamd_regexp_t *re;
  1838. rspamd_ftok_t srch;
  1839. gint r = 0;
  1840. if (subtype == NULL || ct == NULL) {
  1841. msg_warn_task ("invalid parameters passed");
  1842. return FALSE;
  1843. }
  1844. if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
  1845. re = subtype->data;
  1846. if (ct->subtype.len > 0) {
  1847. r = rspamd_regexp_search (re, ct->subtype.begin, ct->subtype.len,
  1848. NULL, NULL, FALSE, NULL);
  1849. }
  1850. }
  1851. else {
  1852. srch.begin = subtype->data;
  1853. srch.len = strlen (subtype->data);
  1854. /* Just do strcasecmp */
  1855. if (rspamd_ftok_casecmp (&ct->subtype, &srch) == 0) {
  1856. return TRUE;
  1857. }
  1858. }
  1859. return r;
  1860. }
  1861. static gboolean
  1862. compare_len (struct rspamd_mime_part *part, guint min, guint max)
  1863. {
  1864. if (min == 0 && max == 0) {
  1865. return TRUE;
  1866. }
  1867. if (min == 0) {
  1868. return part->parsed_data.len <= max;
  1869. }
  1870. else if (max == 0) {
  1871. return part->parsed_data.len >= min;
  1872. }
  1873. else {
  1874. return part->parsed_data.len >= min && part->parsed_data.len <= max;
  1875. }
  1876. }
  1877. static gboolean
  1878. common_has_content_part (struct rspamd_task * task,
  1879. struct expression_argument *param_type,
  1880. struct expression_argument *param_subtype,
  1881. gint min_len,
  1882. gint max_len)
  1883. {
  1884. rspamd_regexp_t *re;
  1885. struct rspamd_mime_part *part;
  1886. struct rspamd_content_type *ct;
  1887. rspamd_ftok_t srch;
  1888. gint r = 0;
  1889. guint i;
  1890. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
  1891. ct = part->ct;
  1892. if (ct == NULL) {
  1893. continue;
  1894. }
  1895. if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
  1896. re = param_type->data;
  1897. if (ct->type.len > 0) {
  1898. r = rspamd_regexp_search (re, ct->type.begin, ct->type.len,
  1899. NULL, NULL, FALSE, NULL);
  1900. }
  1901. /* Also check subtype and length of the part */
  1902. if (r && param_subtype) {
  1903. r = compare_len (part, min_len, max_len) &&
  1904. compare_subtype (task, ct, param_subtype);
  1905. return r;
  1906. }
  1907. }
  1908. else {
  1909. /* Just do strcasecmp */
  1910. srch.begin = param_type->data;
  1911. srch.len = strlen (param_type->data);
  1912. if (rspamd_ftok_casecmp (&ct->type, &srch) == 0) {
  1913. if (param_subtype) {
  1914. if (compare_subtype (task, ct, param_subtype)) {
  1915. if (compare_len (part, min_len, max_len)) {
  1916. return TRUE;
  1917. }
  1918. }
  1919. }
  1920. else {
  1921. if (compare_len (part, min_len, max_len)) {
  1922. return TRUE;
  1923. }
  1924. }
  1925. }
  1926. }
  1927. }
  1928. return FALSE;
  1929. }
  1930. static gboolean
  1931. rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
  1932. {
  1933. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1934. if (args == NULL) {
  1935. msg_warn_task ("no parameters to function");
  1936. return FALSE;
  1937. }
  1938. param_type = &g_array_index (args, struct expression_argument, 0);
  1939. if (args->len >= 2) {
  1940. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1941. }
  1942. return common_has_content_part (task, param_type, param_subtype, 0, 0);
  1943. }
  1944. static gboolean
  1945. rspamd_has_content_part_len (struct rspamd_task * task,
  1946. GArray * args,
  1947. void *unused)
  1948. {
  1949. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1950. gint min = 0, max = 0;
  1951. struct expression_argument *arg;
  1952. if (args == NULL) {
  1953. msg_warn_task ("no parameters to function");
  1954. return FALSE;
  1955. }
  1956. param_type = &g_array_index (args, struct expression_argument, 0);
  1957. if (args->len >= 2) {
  1958. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1959. if (args->len >= 3) {
  1960. arg = &g_array_index (args, struct expression_argument, 2);
  1961. errno = 0;
  1962. min = strtoul (arg->data, NULL, 10);
  1963. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1964. if (errno != 0) {
  1965. msg_warn_task ("invalid numeric value '%s': %s",
  1966. (gchar *)arg->data,
  1967. strerror (errno));
  1968. return FALSE;
  1969. }
  1970. if (args->len >= 4) {
  1971. arg = &g_array_index (args, struct expression_argument, 3);
  1972. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1973. max = strtoul (arg->data, NULL, 10);
  1974. if (errno != 0) {
  1975. msg_warn_task ("invalid numeric value '%s': %s",
  1976. (gchar *)arg->data,
  1977. strerror (errno));
  1978. return FALSE;
  1979. }
  1980. }
  1981. }
  1982. }
  1983. return common_has_content_part (task, param_type, param_subtype, min, max);
  1984. }
  1985. static gboolean
  1986. rspamd_is_empty_body (struct rspamd_task *task,
  1987. GArray * args,
  1988. void *unused)
  1989. {
  1990. struct rspamd_mime_part *part;
  1991. guint i;
  1992. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
  1993. if (part->parsed_data.len > 0) {
  1994. return FALSE;
  1995. }
  1996. }
  1997. return TRUE;
  1998. }
  1999. #define TASK_FLAG_READ(flag) do { \
  2000. result = !!(task->flags & (flag)); \
  2001. } while(0)
  2002. #define TASK_GET_FLAG(flag, strname, macro) do { \
  2003. if (!found && strcmp ((flag), strname) == 0) { \
  2004. TASK_FLAG_READ((macro)); \
  2005. found = TRUE; \
  2006. } \
  2007. } while(0)
  2008. #define TASK_PROTOCOL_FLAG_READ(flag) do { \
  2009. result = !!(task->protocol_flags & (flag)); \
  2010. } while(0)
  2011. #define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) do { \
  2012. if (!found && strcmp ((flag), strname) == 0) { \
  2013. TASK_PROTOCOL_FLAG_READ((macro)); \
  2014. found = TRUE; \
  2015. } \
  2016. } while(0)
  2017. static gboolean
  2018. rspamd_has_flag_expr (struct rspamd_task *task,
  2019. GArray * args,
  2020. void *unused)
  2021. {
  2022. gboolean found = FALSE, result = FALSE;
  2023. struct expression_argument *flag_arg;
  2024. const gchar *flag_str;
  2025. if (args == NULL) {
  2026. msg_warn_task ("no parameters to function");
  2027. return FALSE;
  2028. }
  2029. flag_arg = &g_array_index (args, struct expression_argument, 0);
  2030. if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2031. msg_warn_task ("invalid parameter to function");
  2032. return FALSE;
  2033. }
  2034. flag_str = (const gchar *)flag_arg->data;
  2035. TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
  2036. TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
  2037. TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
  2038. TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
  2039. TASK_GET_PROTOCOL_FLAG (flag_str, "extended_urls",
  2040. RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
  2041. TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
  2042. TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
  2043. TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
  2044. TASK_GET_FLAG (flag_str, "broken_headers",
  2045. RSPAMD_TASK_FLAG_BROKEN_HEADERS);
  2046. TASK_GET_FLAG (flag_str, "skip_process",
  2047. RSPAMD_TASK_FLAG_SKIP_PROCESS);
  2048. TASK_GET_PROTOCOL_FLAG (flag_str, "milter",
  2049. RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
  2050. TASK_GET_FLAG (flag_str, "bad_unicode",
  2051. RSPAMD_TASK_FLAG_BAD_UNICODE);
  2052. if (!found) {
  2053. msg_warn_task ("invalid flag name %s", flag_str);
  2054. return FALSE;
  2055. }
  2056. return result;
  2057. }
  2058. static gboolean
  2059. rspamd_has_symbol_expr (struct rspamd_task *task,
  2060. GArray * args,
  2061. void *unused)
  2062. {
  2063. struct expression_argument *sym_arg;
  2064. const gchar *symbol_str;
  2065. if (args == NULL) {
  2066. msg_warn_task ("no parameters to function");
  2067. return FALSE;
  2068. }
  2069. sym_arg = &g_array_index (args, struct expression_argument, 0);
  2070. if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2071. msg_warn_task ("invalid parameter to function");
  2072. return FALSE;
  2073. }
  2074. symbol_str = (const gchar *)sym_arg->data;
  2075. if (rspamd_task_find_symbol_result (task, symbol_str, NULL)) {
  2076. return TRUE;
  2077. }
  2078. return FALSE;
  2079. }