You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_expressions.c 44KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "util.h"
  18. #include "cfg_file.h"
  19. #include "rspamd.h"
  20. #include "message.h"
  21. #include "mime_expressions.h"
  22. #include "html.h"
  23. #include "lua/lua_common.h"
  24. gboolean rspamd_compare_encoding (struct rspamd_task *task,
  25. GArray * args,
  26. void *unused);
  27. gboolean rspamd_header_exists (struct rspamd_task *task,
  28. GArray * args,
  29. void *unused);
  30. gboolean rspamd_parts_distance (struct rspamd_task *task,
  31. GArray * args,
  32. void *unused);
  33. gboolean rspamd_recipients_distance (struct rspamd_task *task,
  34. GArray * args,
  35. void *unused);
  36. gboolean rspamd_has_only_html_part (struct rspamd_task *task,
  37. GArray * args,
  38. void *unused);
  39. gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
  40. GArray * args,
  41. void *unused);
  42. gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
  43. GArray * args,
  44. void *unused);
  45. gboolean rspamd_is_html_balanced (struct rspamd_task *task,
  46. GArray * args,
  47. void *unused);
  48. gboolean rspamd_has_html_tag (struct rspamd_task *task,
  49. GArray * args,
  50. void *unused);
  51. gboolean rspamd_has_fake_html (struct rspamd_task *task,
  52. GArray * args,
  53. void *unused);
  54. static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
  55. GArray * args,
  56. void *unused);
  57. static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
  58. GArray * args,
  59. void *unused);
  60. static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
  61. GArray * args,
  62. void *unused);
  63. static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
  64. GArray * args,
  65. void *unused);
  66. static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
  67. GArray * args,
  68. void *unused);
  69. static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
  70. GArray * args,
  71. void *unused);
  72. static gboolean rspamd_has_content_part (struct rspamd_task *task,
  73. GArray * args,
  74. void *unused);
  75. static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
  76. GArray * args,
  77. void *unused);
  78. static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
  79. rspamd_mempool_t *pool, gpointer ud, GError **err);
  80. static gint rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom);
  81. static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
  82. static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
  83. /**
  84. * Regexp structure
  85. */
  86. struct rspamd_regexp_atom {
  87. enum rspamd_re_type type; /**< regexp type */
  88. gchar *regexp_text; /**< regexp text representation */
  89. rspamd_regexp_t *regexp; /**< regexp structure */
  90. gchar *header; /**< header name for header regexps */
  91. gboolean is_test; /**< true if this expression must be tested */
  92. gboolean is_strong; /**< true if headers search must be case sensitive */
  93. gboolean is_multiple; /**< true if we need to match all inclusions of atom */
  94. };
  95. /**
  96. * Rspamd expression function
  97. */
  98. struct rspamd_function_atom {
  99. gchar *name; /**< name of function */
  100. GArray *args; /**< its args */
  101. };
  102. enum rspamd_mime_atom_type {
  103. MIME_ATOM_REGEXP = 0,
  104. MIME_ATOM_INTERNAL_FUNCTION,
  105. MIME_ATOM_LUA_FUNCTION
  106. };
  107. struct rspamd_mime_atom {
  108. gchar *str;
  109. union {
  110. struct rspamd_regexp_atom *re;
  111. struct rspamd_function_atom *func;
  112. const gchar *lua_function;
  113. } d;
  114. enum rspamd_mime_atom_type type;
  115. };
  116. /*
  117. * List of internal functions of rspamd
  118. * Sorted by name to use bsearch
  119. */
  120. static struct _fl {
  121. const gchar *name;
  122. rspamd_internal_func_t func;
  123. void *user_data;
  124. } rspamd_functions_list[] = {
  125. {"check_smtp_data", rspamd_check_smtp_data, NULL},
  126. {"compare_encoding", rspamd_compare_encoding, NULL},
  127. {"compare_parts_distance", rspamd_parts_distance, NULL},
  128. {"compare_recipients_distance", rspamd_recipients_distance, NULL},
  129. {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
  130. {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
  131. {"content_type_has_param", rspamd_content_type_has_param, NULL},
  132. {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
  133. {"content_type_is_type", rspamd_content_type_is_type, NULL},
  134. {"has_content_part", rspamd_has_content_part, NULL},
  135. {"has_content_part_len", rspamd_has_content_part_len, NULL},
  136. {"has_fake_html", rspamd_has_fake_html, NULL},
  137. {"has_html_tag", rspamd_has_html_tag, NULL},
  138. {"has_only_html_part", rspamd_has_only_html_part, NULL},
  139. {"header_exists", rspamd_header_exists, NULL},
  140. {"is_html_balanced", rspamd_is_html_balanced, NULL},
  141. {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
  142. {"raw_header_exists", rspamd_raw_header_exists, NULL}
  143. };
  144. const struct rspamd_atom_subr mime_expr_subr = {
  145. .parse = rspamd_mime_expr_parse,
  146. .process = rspamd_mime_expr_process,
  147. .priority = rspamd_mime_expr_priority,
  148. .destroy = rspamd_mime_expr_destroy
  149. };
  150. static struct _fl *list_ptr = &rspamd_functions_list[0];
  151. static guint32 functions_number = sizeof (rspamd_functions_list) /
  152. sizeof (struct _fl);
  153. static gboolean list_allocated = FALSE;
  154. /* Bsearch routine */
  155. static gint
  156. fl_cmp (const void *s1, const void *s2)
  157. {
  158. struct _fl *fl1 = (struct _fl *)s1;
  159. struct _fl *fl2 = (struct _fl *)s2;
  160. return strcmp (fl1->name, fl2->name);
  161. }
  162. static GQuark
  163. rspamd_mime_expr_quark (void)
  164. {
  165. return g_quark_from_static_string ("mime-expressions");
  166. }
  167. /*
  168. * Rspamd regexp utility functions
  169. */
  170. static struct rspamd_regexp_atom *
  171. rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
  172. struct rspamd_config *cfg)
  173. {
  174. const gchar *begin, *end, *p, *src, *start;
  175. gchar *dbegin, *dend;
  176. struct rspamd_regexp_atom *result;
  177. GError *err = NULL;
  178. GString *re_flags;
  179. if (line == NULL) {
  180. msg_err_pool ("cannot parse NULL line");
  181. return NULL;
  182. }
  183. src = line;
  184. result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
  185. /* Skip whitespaces */
  186. while (g_ascii_isspace (*line)) {
  187. line++;
  188. }
  189. if (*line == '\0') {
  190. msg_warn_pool ("got empty regexp");
  191. return NULL;
  192. }
  193. start = line;
  194. /* First try to find header name */
  195. begin = strchr (line, '/');
  196. if (begin != NULL) {
  197. p = begin;
  198. end = NULL;
  199. while (p != line) {
  200. if (*p == '=') {
  201. end = p;
  202. break;
  203. }
  204. p--;
  205. }
  206. if (end) {
  207. result->header = rspamd_mempool_alloc (pool, end - line + 1);
  208. rspamd_strlcpy (result->header, line, end - line + 1);
  209. result->type = RSPAMD_RE_HEADER;
  210. line = end;
  211. }
  212. }
  213. else {
  214. result->header = rspamd_mempool_strdup (pool, line);
  215. result->type = RSPAMD_RE_MAX;
  216. line = start;
  217. }
  218. /* Find begin of regexp */
  219. while (*line && *line != '/') {
  220. line++;
  221. }
  222. if (*line != '\0') {
  223. begin = line + 1;
  224. }
  225. else if (result->header == NULL) {
  226. /* Assume that line without // is just a header name */
  227. result->header = rspamd_mempool_strdup (pool, line);
  228. result->type = RSPAMD_RE_HEADER;
  229. return result;
  230. }
  231. else {
  232. /* We got header name earlier but have not found // expression, so it is invalid regexp */
  233. msg_warn_pool (
  234. "got no header name (eg. header=) but without corresponding regexp, %s",
  235. src);
  236. return NULL;
  237. }
  238. /* Find end */
  239. end = begin;
  240. while (*end && (*end != '/' || *(end - 1) == '\\')) {
  241. end++;
  242. }
  243. if (end == begin || *end != '/') {
  244. msg_warn_pool ("no trailing / in regexp %s", src);
  245. return NULL;
  246. }
  247. /* Parse flags */
  248. p = end + 1;
  249. re_flags = g_string_sized_new (32);
  250. while (p != NULL) {
  251. switch (*p) {
  252. case 'i':
  253. case 'm':
  254. case 's':
  255. case 'x':
  256. case 'u':
  257. case 'O':
  258. case 'r':
  259. g_string_append_c (re_flags, *p);
  260. p++;
  261. break;
  262. case 'o':
  263. p++;
  264. break;
  265. /* Type flags */
  266. case 'H':
  267. result->type = RSPAMD_RE_HEADER;
  268. p++;
  269. break;
  270. case 'M':
  271. result->type = RSPAMD_RE_BODY;
  272. p++;
  273. break;
  274. case 'P':
  275. result->type = RSPAMD_RE_MIME;
  276. p++;
  277. break;
  278. case 'Q':
  279. result->type = RSPAMD_RE_RAWMIME;
  280. p++;
  281. break;
  282. case 'U':
  283. result->type = RSPAMD_RE_URL;
  284. p++;
  285. break;
  286. case 'X':
  287. result->type = RSPAMD_RE_RAWHEADER;
  288. p++;
  289. break;
  290. case 'T':
  291. result->is_test = TRUE;
  292. p++;
  293. break;
  294. case 'S':
  295. result->is_strong = TRUE;
  296. p++;
  297. break;
  298. case 'A':
  299. result->is_multiple = TRUE;
  300. p++;
  301. break;
  302. /* Stop flags parsing */
  303. default:
  304. p = NULL;
  305. break;
  306. }
  307. }
  308. if (result->type >= RSPAMD_RE_MAX) {
  309. msg_err_pool ("could not read regexp: %s, unknown type", src);
  310. return NULL;
  311. }
  312. result->regexp_text = rspamd_mempool_strdup (pool, start);
  313. dbegin = result->regexp_text + (begin - start);
  314. dend = result->regexp_text + (end - start);
  315. *dend = '\0';
  316. result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
  317. &err);
  318. g_string_free (re_flags, TRUE);
  319. if (result->regexp == NULL || err != NULL) {
  320. msg_warn_pool ("could not read regexp: %s while reading regexp %s",
  321. err ? err->message : "unknown error",
  322. src);
  323. return NULL;
  324. }
  325. if (result->is_multiple) {
  326. rspamd_regexp_set_maxhits (result->regexp, 0);
  327. }
  328. else {
  329. rspamd_regexp_set_maxhits (result->regexp, 1);
  330. }
  331. rspamd_regexp_set_ud (result->regexp, result);
  332. *dend = '/';
  333. return result;
  334. }
  335. struct rspamd_function_atom *
  336. rspamd_mime_expr_parse_function_atom (const gchar *input)
  337. {
  338. const gchar *obrace, *ebrace, *p, *c;
  339. gchar t, *databuf;
  340. guint len;
  341. struct rspamd_function_atom *res;
  342. struct expression_argument arg;
  343. GError *err = NULL;
  344. enum {
  345. start_read_argument = 0,
  346. in_string,
  347. in_regexp,
  348. got_backslash,
  349. got_comma
  350. } state, prev_state = 0;
  351. obrace = strchr (input, '(');
  352. ebrace = strrchr (input, ')');
  353. g_assert (obrace != NULL && ebrace != NULL);
  354. res = g_slice_alloc0 (sizeof (*res));
  355. res->name = g_malloc (obrace - input + 1);
  356. rspamd_strlcpy (res->name, input, obrace - input + 1);
  357. res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
  358. p = obrace + 1;
  359. c = p;
  360. state = start_read_argument;
  361. /* Read arguments */
  362. while (p <= ebrace) {
  363. t = *p;
  364. switch (state) {
  365. case start_read_argument:
  366. if (t == '/') {
  367. state = in_regexp;
  368. c = p;
  369. }
  370. else if (!g_ascii_isspace (t)) {
  371. state = in_string;
  372. if (t == '\'' || t == '\"') {
  373. c = p + 1;
  374. }
  375. else {
  376. c = p;
  377. }
  378. }
  379. p ++;
  380. break;
  381. case in_regexp:
  382. if (t == '\\') {
  383. state = got_backslash;
  384. prev_state = in_regexp;
  385. }
  386. else if (t == ',' || p == ebrace) {
  387. len = p - c + 1;
  388. databuf = g_malloc (len);
  389. rspamd_strlcpy (databuf, c, len);
  390. arg.type = EXPRESSION_ARGUMENT_REGEXP;
  391. arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
  392. if (arg.data == NULL) {
  393. /* Fallback to string */
  394. msg_warn ("cannot parse slashed argument %s as regexp: %s",
  395. databuf, err->message);
  396. g_error_free (err);
  397. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  398. arg.data = databuf;
  399. }
  400. else {
  401. g_free (databuf);
  402. }
  403. g_array_append_val (res->args, arg);
  404. state = got_comma;
  405. }
  406. p ++;
  407. break;
  408. case in_string:
  409. if (t == '\\') {
  410. state = got_backslash;
  411. prev_state = in_string;
  412. }
  413. else if (t == ',' || p == ebrace) {
  414. if (*(p - 1) == '\'' || *(p - 1) == '\"') {
  415. len = p - c;
  416. }
  417. else {
  418. len = p - c + 1;
  419. }
  420. databuf = g_malloc (len);
  421. rspamd_strlcpy (databuf, c, len);
  422. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  423. arg.data = databuf;
  424. g_array_append_val (res->args, arg);
  425. state = got_comma;
  426. }
  427. p ++;
  428. break;
  429. case got_backslash:
  430. state = prev_state;
  431. p ++;
  432. break;
  433. case got_comma:
  434. state = start_read_argument;
  435. break;
  436. }
  437. }
  438. return res;
  439. }
  440. static rspamd_expression_atom_t *
  441. rspamd_mime_expr_parse (const gchar *line, gsize len,
  442. rspamd_mempool_t *pool, gpointer ud, GError **err)
  443. {
  444. rspamd_expression_atom_t *a = NULL;
  445. struct rspamd_mime_atom *mime_atom = NULL;
  446. const gchar *p, *end;
  447. struct rspamd_config *cfg = ud;
  448. rspamd_regexp_t *own_re;
  449. gchar t;
  450. gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
  451. enum {
  452. in_header = 0,
  453. got_slash,
  454. in_regexp,
  455. got_backslash,
  456. got_second_slash,
  457. in_flags,
  458. got_obrace,
  459. in_function,
  460. got_ebrace,
  461. end_atom,
  462. bad_atom
  463. } state = 0, prev_state = 0;
  464. p = line;
  465. end = p + len;
  466. while (p < end) {
  467. t = *p;
  468. switch (state) {
  469. case in_header:
  470. if (t == '/') {
  471. /* Regexp */
  472. state = got_slash;
  473. }
  474. else if (t == '(') {
  475. /* Function */
  476. state = got_obrace;
  477. }
  478. else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') {
  479. /* Likely lua function, identified by just a string */
  480. type = MIME_ATOM_LUA_FUNCTION;
  481. state = end_atom;
  482. /* Do not increase p */
  483. continue;
  484. }
  485. else if (g_ascii_isspace (t)) {
  486. state = bad_atom;
  487. }
  488. p ++;
  489. break;
  490. case got_slash:
  491. state = in_regexp;
  492. break;
  493. case in_regexp:
  494. if (t == '\\') {
  495. state = got_backslash;
  496. prev_state = in_regexp;
  497. }
  498. else if (t == '/') {
  499. state = got_second_slash;
  500. }
  501. p ++;
  502. break;
  503. case got_second_slash:
  504. state = in_flags;
  505. break;
  506. case in_flags:
  507. if (!g_ascii_isalpha (t)) {
  508. state = end_atom;
  509. }
  510. else {
  511. p ++;
  512. }
  513. break;
  514. case got_backslash:
  515. state = prev_state;
  516. p ++;
  517. break;
  518. case got_obrace:
  519. state = in_function;
  520. type = MIME_ATOM_INTERNAL_FUNCTION;
  521. obraces ++;
  522. break;
  523. case in_function:
  524. if (t == '\\') {
  525. state = got_backslash;
  526. prev_state = in_function;
  527. }
  528. else if (t == '(') {
  529. obraces ++;
  530. }
  531. else if (t == ')') {
  532. ebraces ++;
  533. if (ebraces == obraces) {
  534. state = got_ebrace;
  535. }
  536. }
  537. p ++;
  538. break;
  539. case got_ebrace:
  540. state = end_atom;
  541. break;
  542. case bad_atom:
  543. g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
  544. " mime atom '%s' when reading symbol '%c' at offset %d, "
  545. "near %*.s", line, t, (gint)(p - line),
  546. (gint)MIN (end - p, 10), p);
  547. return NULL;
  548. case end_atom:
  549. goto set;
  550. }
  551. }
  552. set:
  553. if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
  554. state != in_flags && state != end_atom)) {
  555. g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
  556. " mime atom");
  557. return NULL;
  558. }
  559. mime_atom = g_slice_alloc (sizeof (*mime_atom));
  560. mime_atom->type = type;
  561. mime_atom->str = g_malloc (p - line + 1);
  562. rspamd_strlcpy (mime_atom->str, line, p - line + 1);
  563. if (type == MIME_ATOM_REGEXP) {
  564. mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
  565. mime_atom->str, cfg);
  566. if (mime_atom->d.re == NULL) {
  567. g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse regexp '%s'",
  568. mime_atom->str);
  569. goto err;
  570. }
  571. else {
  572. /* Register new item in the cache */
  573. if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
  574. mime_atom->d.re->type == RSPAMD_RE_RAWHEADER) {
  575. if (mime_atom->d.re->header != NULL) {
  576. own_re = mime_atom->d.re->regexp;
  577. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  578. mime_atom->d.re->regexp,
  579. mime_atom->d.re->type,
  580. mime_atom->d.re->header,
  581. strlen (mime_atom->d.re->header) + 1);
  582. /* Pass ownership to the cache */
  583. rspamd_regexp_unref (own_re);
  584. }
  585. else {
  586. /* We have header regexp, but no header name is detected */
  587. g_set_error (err,
  588. rspamd_mime_expr_quark (),
  589. 200,
  590. "no header name in /H regexp: '%s'",
  591. mime_atom->str);
  592. goto err;
  593. }
  594. }
  595. else {
  596. own_re = mime_atom->d.re->regexp;
  597. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  598. mime_atom->d.re->regexp,
  599. mime_atom->d.re->type,
  600. NULL,
  601. 0);
  602. /* Pass ownership to the cache */
  603. rspamd_regexp_unref (own_re);
  604. }
  605. }
  606. }
  607. else if (type == MIME_ATOM_LUA_FUNCTION) {
  608. mime_atom->d.lua_function = mime_atom->str;
  609. lua_getglobal (cfg->lua_state, mime_atom->str);
  610. if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) {
  611. g_set_error (err, rspamd_mime_expr_quark(), 200, "no such lua function '%s'",
  612. mime_atom->str);
  613. lua_pop (cfg->lua_state, 1);
  614. goto err;
  615. }
  616. lua_pop (cfg->lua_state, 1);
  617. }
  618. else {
  619. mime_atom->d.func = rspamd_mime_expr_parse_function_atom (mime_atom->str);
  620. if (mime_atom->d.func == NULL) {
  621. g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'",
  622. mime_atom->str);
  623. goto err;
  624. }
  625. }
  626. a = rspamd_mempool_alloc (pool, sizeof (*a));
  627. a->len = p - line;
  628. a->priority = 0;
  629. a->data = mime_atom;
  630. return a;
  631. err:
  632. if (mime_atom != NULL) {
  633. g_free (mime_atom->str);
  634. g_slice_free1 (sizeof (*mime_atom), mime_atom);
  635. }
  636. return NULL;
  637. }
  638. static gint
  639. rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
  640. struct rspamd_task *task)
  641. {
  642. gint ret;
  643. if (re == NULL) {
  644. msg_info_task ("invalid regexp passed");
  645. return 0;
  646. }
  647. if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
  648. ret = rspamd_re_cache_process (task,
  649. task->re_rt,
  650. re->regexp,
  651. re->type,
  652. re->header,
  653. strlen (re->header),
  654. re->is_strong);
  655. }
  656. else {
  657. ret = rspamd_re_cache_process (task,
  658. task->re_rt,
  659. re->regexp,
  660. re->type,
  661. NULL,
  662. 0,
  663. re->is_strong);
  664. }
  665. if (re->is_test) {
  666. msg_info_task ("test %s regexp '%s' returned %d",
  667. rspamd_re_cache_type_to_string (re->type),
  668. re->regexp_text, ret);
  669. }
  670. return ret;
  671. }
  672. static gint
  673. rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
  674. {
  675. struct rspamd_mime_atom *mime_atom = atom->data;
  676. gint ret = 0;
  677. switch (mime_atom->type) {
  678. case MIME_ATOM_INTERNAL_FUNCTION:
  679. /* Prioritize internal functions slightly */
  680. ret = 50;
  681. break;
  682. case MIME_ATOM_LUA_FUNCTION:
  683. ret = 50;
  684. break;
  685. case MIME_ATOM_REGEXP:
  686. switch (mime_atom->d.re->type) {
  687. case RSPAMD_RE_HEADER:
  688. case RSPAMD_RE_RAWHEADER:
  689. ret = 100;
  690. break;
  691. case RSPAMD_RE_URL:
  692. ret = 90;
  693. break;
  694. case RSPAMD_RE_MIME:
  695. case RSPAMD_RE_RAWMIME:
  696. ret = 10;
  697. break;
  698. default:
  699. /* For message regexp */
  700. ret = 0;
  701. break;
  702. }
  703. }
  704. return ret;
  705. }
  706. static void
  707. rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
  708. {
  709. struct rspamd_mime_atom *mime_atom = atom->data;
  710. guint i;
  711. struct expression_argument *arg;
  712. if (mime_atom) {
  713. if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
  714. /* Need to cleanup arguments */
  715. for (i = 0; i < mime_atom->d.func->args->len; i ++) {
  716. arg = &g_array_index (mime_atom->d.func->args,
  717. struct expression_argument, i);
  718. if (arg->type == EXPRESSION_ARGUMENT_NORMAL) {
  719. g_free (arg->data);
  720. }
  721. }
  722. g_array_free (mime_atom->d.func->args, TRUE);
  723. }
  724. /* XXX: regexp shouldn't be special */
  725. g_slice_free1 (sizeof (*mime_atom), mime_atom);
  726. }
  727. }
  728. static gboolean
  729. rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
  730. struct rspamd_task * task,
  731. lua_State *L)
  732. {
  733. struct _fl *selected, key;
  734. key.name = func->name;
  735. selected = bsearch (&key,
  736. list_ptr,
  737. functions_number,
  738. sizeof (struct _fl),
  739. fl_cmp);
  740. if (selected == NULL) {
  741. /* Try to check lua function */
  742. return FALSE;
  743. }
  744. return selected->func (task, func->args, selected->user_data);
  745. }
  746. static gint
  747. rspamd_mime_expr_process (gpointer input, rspamd_expression_atom_t *atom)
  748. {
  749. struct rspamd_task *task = input;
  750. struct rspamd_mime_atom *mime_atom;
  751. lua_State *L;
  752. gint ret = 0;
  753. g_assert (task != NULL);
  754. g_assert (atom != NULL);
  755. mime_atom = atom->data;
  756. if (mime_atom->type == MIME_ATOM_REGEXP) {
  757. ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
  758. }
  759. else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
  760. L = task->cfg->lua_state;
  761. lua_getglobal (L, mime_atom->d.lua_function);
  762. rspamd_lua_task_push (L, task);
  763. if (lua_pcall (L, 1, 1, 0) != 0) {
  764. msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s",
  765. mime_atom->d.lua_function,
  766. mime_atom->str,
  767. lua_tostring (L, -1));
  768. }
  769. else {
  770. if (lua_type (L, -1) == LUA_TBOOLEAN) {
  771. ret = lua_toboolean (L, -1);
  772. }
  773. else if (lua_type (L, -1) == LUA_TNUMBER) {
  774. ret = lua_tonumber (L, 1);
  775. }
  776. else {
  777. msg_err_task ("%s returned wrong return type: %s",
  778. mime_atom->str, lua_typename (L, lua_type (L, -1)));
  779. }
  780. /* Remove result */
  781. lua_pop (L, 1);
  782. }
  783. }
  784. else {
  785. ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
  786. task->cfg->lua_state);
  787. }
  788. return ret;
  789. }
  790. void
  791. register_expression_function (const gchar *name,
  792. rspamd_internal_func_t func,
  793. void *user_data)
  794. {
  795. static struct _fl *new;
  796. functions_number++;
  797. new = g_new (struct _fl, functions_number);
  798. memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
  799. if (list_allocated) {
  800. g_free (list_ptr);
  801. }
  802. list_allocated = TRUE;
  803. new[functions_number - 1].name = name;
  804. new[functions_number - 1].func = func;
  805. new[functions_number - 1].user_data = user_data;
  806. qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
  807. list_ptr = new;
  808. }
  809. gboolean
  810. rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
  811. {
  812. struct expression_argument *arg;
  813. if (args == NULL || task == NULL) {
  814. return FALSE;
  815. }
  816. arg = &g_array_index (args, struct expression_argument, 0);
  817. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  818. msg_warn_task ("invalid argument to function is passed");
  819. return FALSE;
  820. }
  821. /* XXX: really write this function */
  822. return TRUE;
  823. }
  824. gboolean
  825. rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
  826. {
  827. struct expression_argument *arg;
  828. GList *headerlist;
  829. if (args == NULL || task == NULL) {
  830. return FALSE;
  831. }
  832. arg = &g_array_index (args, struct expression_argument, 0);
  833. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  834. msg_warn_task ("invalid argument to function is passed");
  835. return FALSE;
  836. }
  837. debug_task ("try to get header %s", (gchar *)arg->data);
  838. headerlist = rspamd_message_get_header (task,
  839. (gchar *)arg->data,
  840. FALSE);
  841. if (headerlist) {
  842. return TRUE;
  843. }
  844. return FALSE;
  845. }
  846. /*
  847. * This function is designed to find difference between text/html and text/plain parts
  848. * It takes one argument: difference threshold, if we have two text parts, compare
  849. * its hashes and check for threshold, if value is greater than threshold, return TRUE
  850. * and return FALSE otherwise.
  851. */
  852. gboolean
  853. rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
  854. {
  855. gint threshold, threshold2 = -1, diff;
  856. struct expression_argument *arg;
  857. gint *pdiff;
  858. if (args == NULL || args->len == 0) {
  859. debug_task ("no threshold is specified, assume it 100");
  860. threshold = 100;
  861. }
  862. else {
  863. errno = 0;
  864. arg = &g_array_index (args, struct expression_argument, 0);
  865. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  866. msg_warn_task ("invalid argument to function is passed");
  867. return FALSE;
  868. }
  869. threshold = strtoul ((gchar *)arg->data, NULL, 10);
  870. if (errno != 0) {
  871. msg_info_task ("bad numeric value for threshold \"%s\", assume it 100",
  872. (gchar *)arg->data);
  873. threshold = 100;
  874. }
  875. if (args->len >= 2) {
  876. arg = &g_array_index (args, struct expression_argument, 1);
  877. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  878. msg_warn_task ("invalid argument to function is passed");
  879. return FALSE;
  880. }
  881. errno = 0;
  882. threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
  883. if (errno != 0) {
  884. msg_info_task ("bad numeric value for threshold \"%s\", ignore it",
  885. (gchar *)arg->data);
  886. threshold2 = -1;
  887. }
  888. }
  889. }
  890. if ((pdiff =
  891. rspamd_mempool_get_variable (task->task_pool,
  892. "parts_distance")) != NULL) {
  893. diff = *pdiff;
  894. if (diff != -1) {
  895. if (threshold2 > 0) {
  896. if (diff >=
  897. MIN (threshold,
  898. threshold2) && diff < MAX (threshold, threshold2)) {
  899. return TRUE;
  900. }
  901. }
  902. else {
  903. if (diff <= threshold) {
  904. return TRUE;
  905. }
  906. }
  907. return FALSE;
  908. }
  909. else {
  910. return FALSE;
  911. }
  912. }
  913. return FALSE;
  914. }
  915. struct addr_list {
  916. const gchar *name;
  917. const gchar *addr;
  918. };
  919. #define COMPARE_RCPT_LEN 3
  920. #define MIN_RCPT_TO_COMPARE 7
  921. gboolean
  922. rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
  923. void *unused)
  924. {
  925. struct expression_argument *arg;
  926. InternetAddressList *cur;
  927. double threshold;
  928. struct addr_list *ar;
  929. gchar *c;
  930. gint num, i, j, hits = 0, total = 0;
  931. if (args == NULL) {
  932. msg_warn_task ("no parameters to function");
  933. return FALSE;
  934. }
  935. arg = &g_array_index (args, struct expression_argument, 0);
  936. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  937. msg_warn_task ("invalid argument to function is passed");
  938. return FALSE;
  939. }
  940. errno = 0;
  941. threshold = strtod ((gchar *)arg->data, NULL);
  942. if (errno != 0) {
  943. msg_warn_task ("invalid numeric value '%s': %s",
  944. (gchar *)arg->data,
  945. strerror (errno));
  946. return FALSE;
  947. }
  948. if (!task->rcpt_mime) {
  949. return FALSE;
  950. }
  951. num = internet_address_list_length (task->rcpt_mime);
  952. if (num < MIN_RCPT_TO_COMPARE) {
  953. return FALSE;
  954. }
  955. ar =
  956. rspamd_mempool_alloc0 (task->task_pool, num *
  957. sizeof (struct addr_list));
  958. /* Fill array */
  959. cur = task->rcpt_mime;
  960. #ifdef GMIME24
  961. for (i = 0; i < num; i++) {
  962. InternetAddress *iaelt =
  963. internet_address_list_get_address(cur, i);
  964. InternetAddressMailbox *iamb =
  965. INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
  966. INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
  967. if (iamb) {
  968. ar[i].name = internet_address_mailbox_get_addr (iamb);
  969. if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
  970. ar[i].addr = c + 1;
  971. }
  972. }
  973. }
  974. #else
  975. InternetAddress *addr;
  976. i = 0;
  977. while (cur) {
  978. addr = internet_address_list_get_address (cur);
  979. if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
  980. ar[i].name = rspamd_mempool_strdup (task->task_pool,
  981. internet_address_get_addr (addr));
  982. if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) {
  983. *c = '\0';
  984. ar[i].addr = c + 1;
  985. }
  986. cur = internet_address_list_next (cur);
  987. i++;
  988. }
  989. else {
  990. cur = internet_address_list_next (cur);
  991. }
  992. }
  993. #endif
  994. /* Cycle all elements in array */
  995. for (i = 0; i < num; i++) {
  996. for (j = i + 1; j < num; j++) {
  997. if (ar[i].name && ar[j].name &&
  998. g_ascii_strncasecmp (ar[i].name, ar[j].name,
  999. COMPARE_RCPT_LEN) == 0) {
  1000. /* Common name part */
  1001. hits++;
  1002. }
  1003. #if 0
  1004. /* XXX: when we have a typical mail that is headed towards
  1005. * several users within the same domain, then this rule
  1006. * leads to a false-positive.
  1007. * We actually need to match host against tld, but this is currently
  1008. * too expensive.
  1009. *
  1010. * TODO: think about normal representation of InternetAddress shit
  1011. */
  1012. else if (ar[i].addr && ar[j].addr &&
  1013. g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) {
  1014. /* Common address part, but different name */
  1015. hits++;
  1016. }
  1017. #endif
  1018. total++;
  1019. }
  1020. }
  1021. if ((double)(hits * num / 2.) / (double)total >= threshold) {
  1022. return TRUE;
  1023. }
  1024. return FALSE;
  1025. }
  1026. gboolean
  1027. rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
  1028. void *unused)
  1029. {
  1030. struct mime_text_part *p;
  1031. gboolean res = FALSE;
  1032. if (task->text_parts->len == 1) {
  1033. p = g_ptr_array_index (task->text_parts, 0);
  1034. if (IS_PART_HTML (p)) {
  1035. res = TRUE;
  1036. }
  1037. else {
  1038. res = FALSE;
  1039. }
  1040. }
  1041. return res;
  1042. }
  1043. static gboolean
  1044. is_recipient_list_sorted (const InternetAddressList * ia)
  1045. {
  1046. const InternetAddressList *cur;
  1047. InternetAddress *addr;
  1048. InternetAddressMailbox *addr_mb;
  1049. gboolean res = TRUE;
  1050. struct addr_list current = { NULL, NULL }, previous = {
  1051. NULL, NULL
  1052. };
  1053. #ifdef GMIME24
  1054. gint num, i;
  1055. #endif
  1056. /* Do not check to short address lists */
  1057. if (internet_address_list_length ((InternetAddressList *)ia) <
  1058. MIN_RCPT_TO_COMPARE) {
  1059. return FALSE;
  1060. }
  1061. #ifdef GMIME24
  1062. num = internet_address_list_length ((InternetAddressList *)ia);
  1063. cur = ia;
  1064. for (i = 0; i < num; i++) {
  1065. addr =
  1066. internet_address_list_get_address ((InternetAddressList *)cur, i);
  1067. if (INTERNET_ADDRESS_IS_MAILBOX (addr)) {
  1068. addr_mb = INTERNET_ADDRESS_MAILBOX (addr);
  1069. current.addr = (gchar *) internet_address_mailbox_get_addr (addr_mb);
  1070. }
  1071. if (previous.addr != NULL) {
  1072. if (current.addr &&
  1073. g_ascii_strcasecmp (current.addr, previous.addr) <= 0) {
  1074. res = FALSE;
  1075. break;
  1076. }
  1077. }
  1078. previous.addr = current.addr;
  1079. }
  1080. #else
  1081. cur = ia;
  1082. while (cur) {
  1083. addr = internet_address_list_get_address (cur);
  1084. if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) {
  1085. current.addr = internet_address_get_addr (addr);
  1086. if (previous.addr != NULL) {
  1087. if (current.addr &&
  1088. g_ascii_strcasecmp (current.addr, previous.addr) < 0) {
  1089. res = FALSE;
  1090. break;
  1091. }
  1092. }
  1093. previous.addr = current.addr;
  1094. }
  1095. cur = internet_address_list_next (cur);
  1096. }
  1097. #endif
  1098. return res;
  1099. }
  1100. gboolean
  1101. rspamd_is_recipients_sorted (struct rspamd_task * task,
  1102. GArray * args,
  1103. void *unused)
  1104. {
  1105. /* Check all types of addresses */
  1106. if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
  1107. GMIME_RECIPIENT_TYPE_TO)) == TRUE) {
  1108. return TRUE;
  1109. }
  1110. if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
  1111. GMIME_RECIPIENT_TYPE_BCC)) == TRUE) {
  1112. return TRUE;
  1113. }
  1114. if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message,
  1115. GMIME_RECIPIENT_TYPE_CC)) == TRUE) {
  1116. return TRUE;
  1117. }
  1118. return FALSE;
  1119. }
  1120. gboolean
  1121. rspamd_compare_transfer_encoding (struct rspamd_task * task,
  1122. GArray * args,
  1123. void *unused)
  1124. {
  1125. GMimeObject *part;
  1126. #ifndef GMIME24
  1127. GMimePartEncodingType enc_req, part_enc;
  1128. #else
  1129. GMimeContentEncoding enc_req, part_enc;
  1130. #endif
  1131. struct expression_argument *arg;
  1132. if (args == NULL) {
  1133. msg_warn_task ("no parameters to function");
  1134. return FALSE;
  1135. }
  1136. arg = &g_array_index (args, struct expression_argument, 0);
  1137. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1138. msg_warn_task ("invalid argument to function is passed");
  1139. return FALSE;
  1140. }
  1141. #ifndef GMIME24
  1142. enc_req = g_mime_part_encoding_from_string (arg->data);
  1143. if (enc_req == GMIME_PART_ENCODING_DEFAULT) {
  1144. #else
  1145. enc_req = g_mime_content_encoding_from_string (arg->data);
  1146. if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) {
  1147. #endif
  1148. msg_warn_task ("bad encoding type: %s", (gchar *)arg->data);
  1149. return FALSE;
  1150. }
  1151. part = g_mime_message_get_mime_part (task->message);
  1152. if (part) {
  1153. if (GMIME_IS_PART (part)) {
  1154. #ifndef GMIME24
  1155. part_enc = g_mime_part_get_encoding (GMIME_PART (part));
  1156. if (part_enc == GMIME_PART_ENCODING_DEFAULT) {
  1157. /* Assume 7bit as default transfer encoding */
  1158. part_enc = GMIME_PART_ENCODING_7BIT;
  1159. }
  1160. #else
  1161. part_enc = g_mime_part_get_content_encoding (GMIME_PART (part));
  1162. if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) {
  1163. /* Assume 7bit as default transfer encoding */
  1164. part_enc = GMIME_CONTENT_ENCODING_7BIT;
  1165. }
  1166. #endif
  1167. debug_task ("got encoding in part: %d and compare with %d",
  1168. (gint)part_enc,
  1169. (gint)enc_req);
  1170. #ifndef GMIME24
  1171. g_object_unref (part);
  1172. #endif
  1173. return part_enc == enc_req;
  1174. }
  1175. #ifndef GMIME24
  1176. g_object_unref (part);
  1177. #endif
  1178. }
  1179. return FALSE;
  1180. }
  1181. gboolean
  1182. rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
  1183. {
  1184. struct mime_text_part *p;
  1185. guint i;
  1186. gboolean res = TRUE;
  1187. for (i = 0; i < task->text_parts->len; i ++) {
  1188. p = g_ptr_array_index (task->text_parts, i);
  1189. if (!IS_PART_EMPTY (p) && IS_PART_HTML (p)) {
  1190. if (p->flags & RSPAMD_MIME_PART_FLAG_BALANCED) {
  1191. res = TRUE;
  1192. }
  1193. else {
  1194. res = FALSE;
  1195. break;
  1196. }
  1197. }
  1198. }
  1199. return res;
  1200. }
  1201. gboolean
  1202. rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
  1203. {
  1204. struct mime_text_part *p;
  1205. struct expression_argument *arg;
  1206. guint i;
  1207. gboolean res = FALSE;
  1208. if (args == NULL) {
  1209. msg_warn_task ("no parameters to function");
  1210. return FALSE;
  1211. }
  1212. arg = &g_array_index (args, struct expression_argument, 0);
  1213. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1214. msg_warn_task ("invalid argument to function is passed");
  1215. return FALSE;
  1216. }
  1217. for (i = 0; i < task->text_parts->len && res; i ++) {
  1218. p = g_ptr_array_index (task->text_parts, i);
  1219. if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html) {
  1220. res = rspamd_html_tag_seen (p->html, arg->data);
  1221. }
  1222. }
  1223. return res;
  1224. }
  1225. gboolean
  1226. rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
  1227. {
  1228. struct mime_text_part *p;
  1229. guint i;
  1230. gboolean res = FALSE;
  1231. for (i = 0; i < task->text_parts->len && res; i ++) {
  1232. p = g_ptr_array_index (task->text_parts, i);
  1233. if (!IS_PART_EMPTY (p) && IS_PART_HTML (p) && p->html->html_tags == NULL) {
  1234. res = TRUE;
  1235. }
  1236. }
  1237. return res;
  1238. }
  1239. static gboolean
  1240. rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
  1241. {
  1242. struct expression_argument *arg;
  1243. if (args == NULL || task == NULL) {
  1244. return FALSE;
  1245. }
  1246. arg = &g_array_index (args, struct expression_argument, 0);
  1247. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1248. msg_warn_task ("invalid argument to function is passed");
  1249. return FALSE;
  1250. }
  1251. return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
  1252. }
  1253. static gboolean
  1254. match_smtp_data (struct rspamd_task *task,
  1255. struct expression_argument *arg,
  1256. const gchar *what)
  1257. {
  1258. rspamd_regexp_t *re;
  1259. gint r;
  1260. if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
  1261. /* This is a regexp */
  1262. re = arg->data;
  1263. if (re == NULL) {
  1264. msg_warn_task ("cannot compile regexp for function");
  1265. return FALSE;
  1266. }
  1267. r = rspamd_regexp_search (re, what, 0, NULL, NULL, FALSE, NULL);
  1268. return r;
  1269. }
  1270. else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
  1271. g_ascii_strcasecmp (arg->data, what) == 0) {
  1272. return TRUE;
  1273. }
  1274. return FALSE;
  1275. }
  1276. static gboolean
  1277. rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
  1278. {
  1279. struct expression_argument *arg;
  1280. InternetAddressList *ia = NULL;
  1281. const gchar *type, *what = NULL;
  1282. gint i, ialen;
  1283. if (args == NULL) {
  1284. msg_warn_task ("no parameters to function");
  1285. return FALSE;
  1286. }
  1287. arg = &g_array_index (args, struct expression_argument, 0);
  1288. if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1289. msg_warn_task ("no parameters to function");
  1290. return FALSE;
  1291. }
  1292. else {
  1293. type = arg->data;
  1294. switch (*type) {
  1295. case 'f':
  1296. case 'F':
  1297. if (g_ascii_strcasecmp (type, "from") == 0) {
  1298. what = rspamd_task_get_sender (task);
  1299. }
  1300. else {
  1301. msg_warn_task ("bad argument to function: %s", type);
  1302. return FALSE;
  1303. }
  1304. break;
  1305. case 'h':
  1306. case 'H':
  1307. if (g_ascii_strcasecmp (type, "helo") == 0) {
  1308. what = task->helo;
  1309. }
  1310. else {
  1311. msg_warn_task ("bad argument to function: %s", type);
  1312. return FALSE;
  1313. }
  1314. break;
  1315. case 'u':
  1316. case 'U':
  1317. if (g_ascii_strcasecmp (type, "user") == 0) {
  1318. what = task->user;
  1319. }
  1320. else {
  1321. msg_warn_task ("bad argument to function: %s", type);
  1322. return FALSE;
  1323. }
  1324. break;
  1325. case 's':
  1326. case 'S':
  1327. if (g_ascii_strcasecmp (type, "subject") == 0) {
  1328. what = task->subject;
  1329. }
  1330. else {
  1331. msg_warn_task ("bad argument to function: %s", type);
  1332. return FALSE;
  1333. }
  1334. break;
  1335. case 'r':
  1336. case 'R':
  1337. if (g_ascii_strcasecmp (type, "rcpt") == 0) {
  1338. ia = task->rcpt_mime;
  1339. }
  1340. else {
  1341. msg_warn_task ("bad argument to function: %s", type);
  1342. return FALSE;
  1343. }
  1344. break;
  1345. default:
  1346. msg_warn_task ("bad argument to function: %s", type);
  1347. return FALSE;
  1348. }
  1349. }
  1350. if (what == NULL && ia == NULL) {
  1351. /* Not enough data so regexp would NOT be found anyway */
  1352. return FALSE;
  1353. }
  1354. /* We would process only one more argument, others are ignored */
  1355. if (args->len >= 2) {
  1356. arg = &g_array_index (args, struct expression_argument, 1);
  1357. if (arg) {
  1358. if (what != NULL) {
  1359. return match_smtp_data (task, arg, what);
  1360. }
  1361. else {
  1362. if (ia != NULL) {
  1363. ialen = internet_address_list_length(ia);
  1364. for (i = 0; i < ialen; i ++) {
  1365. InternetAddress *iaelt =
  1366. internet_address_list_get_address(ia, i);
  1367. InternetAddressMailbox *iamb =
  1368. INTERNET_ADDRESS_IS_MAILBOX(iaelt) ?
  1369. INTERNET_ADDRESS_MAILBOX (iaelt) : NULL;
  1370. if (iamb &&
  1371. match_smtp_data (task, arg,
  1372. internet_address_mailbox_get_addr(iamb))) {
  1373. return TRUE;
  1374. }
  1375. }
  1376. }
  1377. }
  1378. }
  1379. }
  1380. return FALSE;
  1381. }
  1382. static gboolean
  1383. rspamd_content_type_compare_param (struct rspamd_task * task,
  1384. GArray * args,
  1385. void *unused)
  1386. {
  1387. const gchar *param_name;
  1388. const gchar *param_data;
  1389. rspamd_regexp_t *re;
  1390. struct expression_argument *arg, *arg1, *arg_pattern;
  1391. GMimeObject *part;
  1392. GMimeContentType *ct;
  1393. gint r;
  1394. guint i;
  1395. gboolean recursive = FALSE;
  1396. struct mime_part *cur_part;
  1397. if (args == NULL || args->len < 2) {
  1398. msg_warn_task ("no parameters to function");
  1399. return FALSE;
  1400. }
  1401. arg = &g_array_index (args, struct expression_argument, 0);
  1402. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1403. param_name = arg->data;
  1404. arg_pattern = &g_array_index (args, struct expression_argument, 1);
  1405. for (i = 0; i < task->parts->len; i ++) {
  1406. cur_part = g_ptr_array_index (task->parts, i);
  1407. part = cur_part->mime;
  1408. ct = (GMimeContentType *)g_mime_object_get_content_type (part);
  1409. if (args->len >= 3) {
  1410. arg1 = &g_array_index (args, struct expression_argument, 2);
  1411. if (g_ascii_strncasecmp (arg1->data, "true",
  1412. sizeof ("true") - 1) == 0) {
  1413. recursive = TRUE;
  1414. }
  1415. }
  1416. else {
  1417. /*
  1418. * If user did not specify argument, let's assume that he wants
  1419. * recursive search if mime part is multipart/mixed
  1420. */
  1421. if (g_mime_content_type_is_type (ct, "multipart", "*")) {
  1422. recursive = TRUE;
  1423. }
  1424. }
  1425. #ifndef GMIME24
  1426. g_object_unref (part);
  1427. #endif
  1428. if ((param_data =
  1429. g_mime_content_type_get_parameter ((GMimeContentType *)ct,
  1430. param_name)) != NULL) {
  1431. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1432. re = arg_pattern->data;
  1433. r = rspamd_regexp_search (re, param_data, 0,
  1434. NULL, NULL, FALSE, NULL);
  1435. if (r) {
  1436. return TRUE;
  1437. }
  1438. }
  1439. else {
  1440. /* Just do strcasecmp */
  1441. if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) {
  1442. return TRUE;
  1443. }
  1444. }
  1445. }
  1446. /* Get next part */
  1447. if (!recursive) {
  1448. break;
  1449. }
  1450. }
  1451. return FALSE;
  1452. }
  1453. static gboolean
  1454. rspamd_content_type_has_param (struct rspamd_task * task,
  1455. GArray * args,
  1456. void *unused)
  1457. {
  1458. gchar *param_name;
  1459. const gchar *param_data;
  1460. struct expression_argument *arg, *arg1;
  1461. GMimeObject *part;
  1462. GMimeContentType *ct;
  1463. gboolean recursive = FALSE, result = FALSE;
  1464. guint i;
  1465. struct mime_part *cur_part;
  1466. if (args == NULL || args->len < 1) {
  1467. msg_warn_task ("no parameters to function");
  1468. return FALSE;
  1469. }
  1470. arg = &g_array_index (args, struct expression_argument, 0);
  1471. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1472. param_name = arg->data;
  1473. for (i = 0; i < task->parts->len; i ++) {
  1474. cur_part = g_ptr_array_index (task->parts, i);
  1475. part = cur_part->mime;
  1476. ct = (GMimeContentType *)g_mime_object_get_content_type (part);
  1477. if (args->len >= 2) {
  1478. arg1 = &g_array_index (args, struct expression_argument, 2);
  1479. if (g_ascii_strncasecmp (arg1->data, "true",
  1480. sizeof ("true") - 1) == 0) {
  1481. recursive = TRUE;
  1482. }
  1483. }
  1484. else {
  1485. /*
  1486. * If user did not specify argument, let's assume that he wants
  1487. * recursive search if mime part is multipart/mixed
  1488. */
  1489. if (g_mime_content_type_is_type (ct, "multipart", "*")) {
  1490. recursive = TRUE;
  1491. }
  1492. }
  1493. #ifndef GMIME24
  1494. g_object_unref (part);
  1495. #endif
  1496. if ((param_data =
  1497. g_mime_content_type_get_parameter ((GMimeContentType *)ct,
  1498. param_name)) != NULL) {
  1499. return TRUE;
  1500. }
  1501. /* Get next part */
  1502. if (!recursive) {
  1503. break;
  1504. }
  1505. }
  1506. return result;
  1507. }
  1508. static gboolean
  1509. rspamd_content_type_check (struct rspamd_task *task,
  1510. GArray * args,
  1511. gboolean check_subtype)
  1512. {
  1513. const gchar *param_data;
  1514. rspamd_regexp_t *re;
  1515. struct expression_argument *arg1, *arg_pattern;
  1516. GMimeObject *part;
  1517. GMimeContentType *ct;
  1518. gint r;
  1519. guint i;
  1520. gboolean recursive = FALSE;
  1521. struct mime_part *cur_part;
  1522. if (args == NULL || args->len < 1) {
  1523. msg_warn_task ("no parameters to function");
  1524. return FALSE;
  1525. }
  1526. arg_pattern = &g_array_index (args, struct expression_argument, 0);
  1527. for (i = 0; i < task->parts->len; i ++) {
  1528. cur_part = g_ptr_array_index (task->parts, i);
  1529. part = cur_part->mime;
  1530. ct = (GMimeContentType *)g_mime_object_get_content_type (part);
  1531. if (args->len >= 2) {
  1532. arg1 = &g_array_index (args, struct expression_argument, 1);
  1533. if (g_ascii_strncasecmp (arg1->data, "true",
  1534. sizeof ("true") - 1) == 0) {
  1535. recursive = TRUE;
  1536. }
  1537. }
  1538. else {
  1539. /*
  1540. * If user did not specify argument, let's assume that he wants
  1541. * recursive search if mime part is multipart/mixed
  1542. */
  1543. if (g_mime_content_type_is_type (ct, "multipart", "*")) {
  1544. recursive = TRUE;
  1545. }
  1546. }
  1547. #ifndef GMIME24
  1548. g_object_unref (part);
  1549. #endif
  1550. if (check_subtype) {
  1551. param_data = ct->subtype;
  1552. }
  1553. else {
  1554. param_data = ct->type;
  1555. }
  1556. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1557. re = arg_pattern->data;
  1558. r = rspamd_regexp_search (re, param_data, 0,
  1559. NULL, NULL, FALSE, NULL);
  1560. if (r) {
  1561. return TRUE;
  1562. }
  1563. }
  1564. else {
  1565. /* Just do strcasecmp */
  1566. if (g_ascii_strcasecmp (param_data, arg_pattern->data) == 0) {
  1567. return TRUE;
  1568. }
  1569. }
  1570. /* Get next part */
  1571. if (!recursive) {
  1572. break;
  1573. }
  1574. }
  1575. return FALSE;
  1576. }
  1577. static gboolean
  1578. rspamd_content_type_is_type (struct rspamd_task * task,
  1579. GArray * args,
  1580. void *unused)
  1581. {
  1582. return rspamd_content_type_check (task, args, FALSE);
  1583. }
  1584. static gboolean
  1585. rspamd_content_type_is_subtype (struct rspamd_task * task,
  1586. GArray * args,
  1587. void *unused)
  1588. {
  1589. return rspamd_content_type_check (task, args, TRUE);
  1590. }
  1591. static gboolean
  1592. compare_subtype (struct rspamd_task *task, GMimeContentType * ct,
  1593. struct expression_argument *subtype)
  1594. {
  1595. rspamd_regexp_t *re;
  1596. gint r = 0;
  1597. if (subtype == NULL || ct == NULL) {
  1598. msg_warn_task ("invalid parameters passed");
  1599. return FALSE;
  1600. }
  1601. if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
  1602. re = subtype->data;
  1603. r = rspamd_regexp_search (re, ct->subtype, 0,
  1604. NULL, NULL, FALSE, NULL);
  1605. }
  1606. else {
  1607. /* Just do strcasecmp */
  1608. if (ct->subtype && g_ascii_strcasecmp (ct->subtype, subtype->data) == 0) {
  1609. return TRUE;
  1610. }
  1611. }
  1612. return r;
  1613. }
  1614. static gboolean
  1615. compare_len (struct mime_part *part, guint min, guint max)
  1616. {
  1617. if (min == 0 && max == 0) {
  1618. return TRUE;
  1619. }
  1620. if (min == 0) {
  1621. return part->content->len <= max;
  1622. }
  1623. else if (max == 0) {
  1624. return part->content->len >= min;
  1625. }
  1626. else {
  1627. return part->content->len >= min && part->content->len <= max;
  1628. }
  1629. }
  1630. static gboolean
  1631. common_has_content_part (struct rspamd_task * task,
  1632. struct expression_argument *param_type,
  1633. struct expression_argument *param_subtype,
  1634. gint min_len,
  1635. gint max_len)
  1636. {
  1637. rspamd_regexp_t *re;
  1638. struct mime_part *part;
  1639. GMimeContentType *ct;
  1640. gint r;
  1641. guint i;
  1642. for (i = 0; i < task->parts->len; i ++) {
  1643. part = g_ptr_array_index (task->parts, i);
  1644. ct = part->type;
  1645. if (ct == NULL) {
  1646. continue;
  1647. }
  1648. if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
  1649. re = param_type->data;
  1650. r = rspamd_regexp_search (re, ct->type, 0,
  1651. NULL, NULL, FALSE, NULL);
  1652. /* Also check subtype and length of the part */
  1653. if (r && param_subtype) {
  1654. r = compare_len (part, min_len, max_len) &&
  1655. compare_subtype (task, ct, param_subtype);
  1656. }
  1657. }
  1658. else {
  1659. /* Just do strcasecmp */
  1660. if (ct->type && g_ascii_strcasecmp (ct->type, param_type->data) == 0) {
  1661. if (param_subtype) {
  1662. if (compare_subtype (task, ct, param_subtype)) {
  1663. if (compare_len (part, min_len, max_len)) {
  1664. return TRUE;
  1665. }
  1666. }
  1667. }
  1668. else {
  1669. if (compare_len (part, min_len, max_len)) {
  1670. return TRUE;
  1671. }
  1672. }
  1673. }
  1674. }
  1675. }
  1676. return FALSE;
  1677. }
  1678. static gboolean
  1679. rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
  1680. {
  1681. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1682. if (args == NULL) {
  1683. msg_warn_task ("no parameters to function");
  1684. return FALSE;
  1685. }
  1686. param_type = &g_array_index (args, struct expression_argument, 0);
  1687. if (args->len >= 2) {
  1688. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1689. }
  1690. return common_has_content_part (task, param_type, param_subtype, 0, 0);
  1691. }
  1692. static gboolean
  1693. rspamd_has_content_part_len (struct rspamd_task * task,
  1694. GArray * args,
  1695. void *unused)
  1696. {
  1697. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1698. gint min = 0, max = 0;
  1699. struct expression_argument *arg;
  1700. if (args == NULL) {
  1701. msg_warn_task ("no parameters to function");
  1702. return FALSE;
  1703. }
  1704. param_type = &g_array_index (args, struct expression_argument, 0);
  1705. if (args->len >= 2) {
  1706. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1707. if (args->len >= 3) {
  1708. arg = &g_array_index (args, struct expression_argument, 2);
  1709. errno = 0;
  1710. min = strtoul (arg->data, NULL, 10);
  1711. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1712. if (errno != 0) {
  1713. msg_warn_task ("invalid numeric value '%s': %s",
  1714. (gchar *)arg->data,
  1715. strerror (errno));
  1716. return FALSE;
  1717. }
  1718. if (args->len >= 4) {
  1719. arg = &g_array_index (args, struct expression_argument, 3);
  1720. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1721. max = strtoul (arg->data, NULL, 10);
  1722. if (errno != 0) {
  1723. msg_warn_task ("invalid numeric value '%s': %s",
  1724. (gchar *)arg->data,
  1725. strerror (errno));
  1726. return FALSE;
  1727. }
  1728. }
  1729. }
  1730. }
  1731. return common_has_content_part (task, param_type, param_subtype, min, max);
  1732. }