You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_expressions.c 52KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <contrib/libucl/ucl.h>
  17. #include "config.h"
  18. #include "util.h"
  19. #include "cfg_file.h"
  20. #include "rspamd.h"
  21. #include "message.h"
  22. #include "mime_expressions.h"
  23. #include "html.h"
  24. #include "lua/lua_common.h"
  25. #include "utlist.h"
  26. gboolean rspamd_compare_encoding (struct rspamd_task *task,
  27. GArray * args,
  28. void *unused);
  29. gboolean rspamd_header_exists (struct rspamd_task *task,
  30. GArray * args,
  31. void *unused);
  32. gboolean rspamd_parts_distance (struct rspamd_task *task,
  33. GArray * args,
  34. void *unused);
  35. gboolean rspamd_recipients_distance (struct rspamd_task *task,
  36. GArray * args,
  37. void *unused);
  38. gboolean rspamd_has_only_html_part (struct rspamd_task *task,
  39. GArray * args,
  40. void *unused);
  41. gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
  42. GArray * args,
  43. void *unused);
  44. gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
  45. GArray * args,
  46. void *unused);
  47. gboolean rspamd_is_html_balanced (struct rspamd_task *task,
  48. GArray * args,
  49. void *unused);
  50. gboolean rspamd_has_html_tag (struct rspamd_task *task,
  51. GArray * args,
  52. void *unused);
  53. gboolean rspamd_has_fake_html (struct rspamd_task *task,
  54. GArray * args,
  55. void *unused);
  56. static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
  57. GArray * args,
  58. void *unused);
  59. static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
  60. GArray * args,
  61. void *unused);
  62. static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
  63. GArray * args,
  64. void *unused);
  65. static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
  66. GArray * args,
  67. void *unused);
  68. static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
  69. GArray * args,
  70. void *unused);
  71. static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
  72. GArray * args,
  73. void *unused);
  74. static gboolean rspamd_has_content_part (struct rspamd_task *task,
  75. GArray * args,
  76. void *unused);
  77. static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
  78. GArray * args,
  79. void *unused);
  80. static gboolean rspamd_is_empty_body (struct rspamd_task *task,
  81. GArray * args,
  82. void *unused);
  83. static gboolean rspamd_has_flag_expr (struct rspamd_task *task,
  84. GArray * args,
  85. void *unused);
  86. static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
  87. rspamd_mempool_t *pool, gpointer ud, GError **err);
  88. static gdouble rspamd_mime_expr_process (struct rspamd_expr_process_data *process_data, rspamd_expression_atom_t *atom);
  89. static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
  90. static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
  91. /**
  92. * Regexp structure
  93. */
  94. struct rspamd_regexp_atom {
  95. enum rspamd_re_type type; /**< regexp type */
  96. gchar *regexp_text; /**< regexp text representation */
  97. rspamd_regexp_t *regexp; /**< regexp structure */
  98. union {
  99. const gchar *header; /**< header name for header regexps */
  100. const gchar *selector; /**< selector name for lua selector regexp */
  101. } extra;
  102. gboolean is_test; /**< true if this expression must be tested */
  103. gboolean is_strong; /**< true if headers search must be case sensitive */
  104. gboolean is_multiple; /**< true if we need to match all inclusions of atom */
  105. };
  106. /**
  107. * Rspamd expression function
  108. */
  109. struct rspamd_function_atom {
  110. gchar *name; /**< name of function */
  111. GArray *args; /**< its args */
  112. };
  113. enum rspamd_mime_atom_type {
  114. MIME_ATOM_REGEXP = 0,
  115. MIME_ATOM_INTERNAL_FUNCTION,
  116. MIME_ATOM_LUA_FUNCTION,
  117. MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
  118. };
  119. struct rspamd_mime_atom {
  120. gchar *str;
  121. union {
  122. struct rspamd_regexp_atom *re;
  123. struct rspamd_function_atom *func;
  124. const gchar *lua_function;
  125. gint lua_cbref;
  126. } d;
  127. enum rspamd_mime_atom_type type;
  128. };
  129. /*
  130. * List of internal functions of rspamd
  131. * Sorted by name to use bsearch
  132. */
  133. static struct _fl {
  134. const gchar *name;
  135. rspamd_internal_func_t func;
  136. void *user_data;
  137. } rspamd_functions_list[] = {
  138. {"check_smtp_data", rspamd_check_smtp_data, NULL},
  139. {"compare_encoding", rspamd_compare_encoding, NULL},
  140. {"compare_parts_distance", rspamd_parts_distance, NULL},
  141. {"compare_recipients_distance", rspamd_recipients_distance, NULL},
  142. {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
  143. {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
  144. {"content_type_has_param", rspamd_content_type_has_param, NULL},
  145. {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
  146. {"content_type_is_type", rspamd_content_type_is_type, NULL},
  147. {"has_content_part", rspamd_has_content_part, NULL},
  148. {"has_content_part_len", rspamd_has_content_part_len, NULL},
  149. {"has_fake_html", rspamd_has_fake_html, NULL},
  150. {"has_flag", rspamd_has_flag_expr, NULL},
  151. {"has_html_tag", rspamd_has_html_tag, NULL},
  152. {"has_only_html_part", rspamd_has_only_html_part, NULL},
  153. {"header_exists", rspamd_header_exists, NULL},
  154. {"is_empty_body", rspamd_is_empty_body, NULL},
  155. {"is_html_balanced", rspamd_is_html_balanced, NULL},
  156. {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
  157. {"raw_header_exists", rspamd_raw_header_exists, NULL},
  158. };
  159. const struct rspamd_atom_subr mime_expr_subr = {
  160. .parse = rspamd_mime_expr_parse,
  161. .process = rspamd_mime_expr_process,
  162. .priority = rspamd_mime_expr_priority,
  163. .destroy = rspamd_mime_expr_destroy
  164. };
  165. static struct _fl *list_ptr = &rspamd_functions_list[0];
  166. static guint32 functions_number = sizeof (rspamd_functions_list) /
  167. sizeof (struct _fl);
  168. static gboolean list_allocated = FALSE;
  169. /* Bsearch routine */
  170. static gint
  171. fl_cmp (const void *s1, const void *s2)
  172. {
  173. struct _fl *fl1 = (struct _fl *)s1;
  174. struct _fl *fl2 = (struct _fl *)s2;
  175. return strcmp (fl1->name, fl2->name);
  176. }
  177. static GQuark
  178. rspamd_mime_expr_quark (void)
  179. {
  180. return g_quark_from_static_string ("mime-expressions");
  181. }
  182. #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
  183. static gboolean
  184. rspamd_parse_long_option (const gchar *start, gsize len,
  185. struct rspamd_regexp_atom *a)
  186. {
  187. gboolean ret = FALSE;
  188. if (TYPE_CHECK (start, "body", len)) {
  189. ret = TRUE;
  190. a->type = RSPAMD_RE_BODY;
  191. }
  192. else if (TYPE_CHECK (start, "part", len) ||
  193. TYPE_CHECK (start, "mime", len)) {
  194. ret = TRUE;
  195. a->type = RSPAMD_RE_MIME;
  196. }
  197. else if (TYPE_CHECK (start, "raw_part", len) ||
  198. TYPE_CHECK (start, "raw_mime", len) ||
  199. TYPE_CHECK (start, "mime_raw", len)) {
  200. ret = TRUE;
  201. a->type = RSPAMD_RE_RAWMIME;
  202. }
  203. else if (TYPE_CHECK (start, "header", len)) {
  204. ret = TRUE;
  205. a->type = RSPAMD_RE_HEADER;
  206. }
  207. else if (TYPE_CHECK (start, "mime_header", len) ||
  208. TYPE_CHECK (start, "header_mime", len)) {
  209. ret = TRUE;
  210. a->type = RSPAMD_RE_MIMEHEADER;
  211. }
  212. else if (TYPE_CHECK (start, "raw_header", len) ||
  213. TYPE_CHECK (start, "header_raw", len)) {
  214. ret = TRUE;
  215. a->type = RSPAMD_RE_RAWHEADER;
  216. }
  217. else if (TYPE_CHECK (start, "all_header", len) ||
  218. TYPE_CHECK (start, "header_all", len) ||
  219. TYPE_CHECK (start, "all_headers", len)) {
  220. ret = TRUE;
  221. a->type = RSPAMD_RE_ALLHEADER;
  222. }
  223. else if (TYPE_CHECK (start, "url", len)) {
  224. ret = TRUE;
  225. a->type = RSPAMD_RE_URL;
  226. }
  227. else if (TYPE_CHECK (start, "sa_body", len)) {
  228. ret = TRUE;
  229. a->type = RSPAMD_RE_SABODY;
  230. }
  231. else if (TYPE_CHECK (start, "sa_raw_body", len) ||
  232. TYPE_CHECK (start, "sa_body_raw", len)) {
  233. ret = TRUE;
  234. a->type = RSPAMD_RE_SARAWBODY;
  235. }
  236. else if (TYPE_CHECK (start, "words", len)) {
  237. ret = TRUE;
  238. a->type = RSPAMD_RE_WORDS;
  239. }
  240. else if (TYPE_CHECK (start, "raw_words", len)) {
  241. ret = TRUE;
  242. a->type = RSPAMD_RE_RAWWORDS;
  243. }
  244. else if (TYPE_CHECK (start, "stem_words", len)) {
  245. ret = TRUE;
  246. a->type = RSPAMD_RE_STEMWORDS;
  247. }
  248. else if (TYPE_CHECK (start, "selector", len)) {
  249. ret = TRUE;
  250. a->type = RSPAMD_RE_SELECTOR;
  251. }
  252. return ret;
  253. }
  254. /*
  255. * Rspamd regexp utility functions
  256. */
  257. static struct rspamd_regexp_atom *
  258. rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
  259. struct rspamd_config *cfg)
  260. {
  261. const gchar *begin, *end, *p, *src, *start, *brace;
  262. gchar *dbegin, *dend, *extra = NULL;
  263. struct rspamd_regexp_atom *result;
  264. GError *err = NULL;
  265. GString *re_flags;
  266. if (line == NULL) {
  267. msg_err_pool ("cannot parse NULL line");
  268. return NULL;
  269. }
  270. src = line;
  271. result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
  272. /* Skip whitespaces */
  273. while (g_ascii_isspace (*line)) {
  274. line++;
  275. }
  276. if (*line == '\0') {
  277. msg_warn_pool ("got empty regexp");
  278. return NULL;
  279. }
  280. result->type = RSPAMD_RE_MAX;
  281. start = line;
  282. /* First try to find header name */
  283. begin = strchr (line, '/');
  284. if (begin != NULL) {
  285. p = begin;
  286. end = NULL;
  287. while (p != line) {
  288. if (*p == '=') {
  289. end = p;
  290. break;
  291. }
  292. p--;
  293. }
  294. if (end) {
  295. extra = rspamd_mempool_alloc (pool, end - line + 1);
  296. rspamd_strlcpy (extra, line, end - line + 1);
  297. line = end;
  298. }
  299. }
  300. else {
  301. extra = rspamd_mempool_strdup (pool, line);
  302. result->type = RSPAMD_RE_MAX;
  303. line = start;
  304. }
  305. /* Find begin of regexp */
  306. while (*line && *line != '/') {
  307. line++;
  308. }
  309. if (*line != '\0') {
  310. begin = line + 1;
  311. }
  312. else if (extra == NULL) {
  313. /* Assume that line without // is just a header name */
  314. extra = rspamd_mempool_strdup (pool, line);
  315. result->type = RSPAMD_RE_HEADER;
  316. return result;
  317. }
  318. else {
  319. /* We got header name earlier but have not found // expression, so it is invalid regexp */
  320. msg_warn_pool (
  321. "got no header name (eg. header=) but without corresponding regexp, %s",
  322. src);
  323. return NULL;
  324. }
  325. /* Find end */
  326. end = begin;
  327. while (*end && (*end != '/' || *(end - 1) == '\\')) {
  328. end++;
  329. }
  330. if (end == begin || *end != '/') {
  331. msg_warn_pool ("no trailing / in regexp %s", src);
  332. return NULL;
  333. }
  334. /* Parse flags */
  335. p = end + 1;
  336. re_flags = g_string_sized_new (32);
  337. while (p != NULL) {
  338. switch (*p) {
  339. case 'i':
  340. case 'm':
  341. case 's':
  342. case 'x':
  343. case 'u':
  344. case 'O':
  345. case 'r':
  346. g_string_append_c (re_flags, *p);
  347. p++;
  348. break;
  349. case 'o':
  350. p++;
  351. break;
  352. /* Type flags */
  353. case 'H':
  354. result->type = RSPAMD_RE_HEADER;
  355. p++;
  356. break;
  357. case 'R':
  358. result->type = RSPAMD_RE_RAWHEADER;
  359. p++;
  360. break;
  361. case 'B':
  362. result->type = RSPAMD_RE_MIMEHEADER;
  363. p++;
  364. break;
  365. case 'C':
  366. result->type = RSPAMD_RE_SABODY;
  367. p++;
  368. break;
  369. case 'D':
  370. result->type = RSPAMD_RE_SARAWBODY;
  371. p++;
  372. break;
  373. case 'M':
  374. result->type = RSPAMD_RE_BODY;
  375. p++;
  376. break;
  377. case 'P':
  378. result->type = RSPAMD_RE_MIME;
  379. p++;
  380. break;
  381. case 'Q':
  382. result->type = RSPAMD_RE_RAWMIME;
  383. p++;
  384. break;
  385. case 'U':
  386. result->type = RSPAMD_RE_URL;
  387. p++;
  388. break;
  389. case 'X':
  390. result->type = RSPAMD_RE_RAWHEADER;
  391. p++;
  392. break;
  393. case '$':
  394. result->type = RSPAMD_RE_SELECTOR;
  395. p++;
  396. break;
  397. case '{':
  398. /* Long definition */
  399. if ((brace = strchr (p + 1, '}')) != NULL) {
  400. if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) {
  401. msg_warn_pool ("invalid long regexp type: %*s in '%s'",
  402. (int)(brace - (p + 1)), p + 1, src);
  403. p = NULL;
  404. }
  405. else {
  406. p = brace + 1;
  407. }
  408. }
  409. else {
  410. p = NULL;
  411. }
  412. break;
  413. /* Other flags */
  414. case 'T':
  415. result->is_test = TRUE;
  416. p++;
  417. break;
  418. case 'S':
  419. result->is_strong = TRUE;
  420. p++;
  421. break;
  422. case 'A':
  423. result->is_multiple = TRUE;
  424. p++;
  425. break;
  426. /* Stop flags parsing */
  427. default:
  428. p = NULL;
  429. break;
  430. }
  431. }
  432. if (result->type >= RSPAMD_RE_MAX) {
  433. if (extra) {
  434. /* Assume header regexp */
  435. result->extra.header = extra;
  436. result->type = RSPAMD_RE_HEADER;
  437. }
  438. else {
  439. msg_err_pool ("could not read regexp: %s, unknown type", src);
  440. return NULL;
  441. }
  442. }
  443. if ((result->type == RSPAMD_RE_HEADER ||
  444. result->type == RSPAMD_RE_RAWHEADER ||
  445. result->type == RSPAMD_RE_MIMEHEADER)) {
  446. if (extra == NULL) {
  447. msg_err_pool ("header regexp: '%s' has no header part", src);
  448. return NULL;
  449. }
  450. else {
  451. result->extra.header = extra;
  452. }
  453. }
  454. if (result->type == RSPAMD_RE_SELECTOR) {
  455. if (extra == NULL) {
  456. msg_err_pool ("selector regexp: '%s' has no selector part", src);
  457. return NULL;
  458. }
  459. else {
  460. result->extra.selector = extra;
  461. }
  462. }
  463. result->regexp_text = rspamd_mempool_strdup (pool, start);
  464. dbegin = result->regexp_text + (begin - start);
  465. dend = result->regexp_text + (end - start);
  466. *dend = '\0';
  467. result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
  468. &err);
  469. g_string_free (re_flags, TRUE);
  470. if (result->regexp == NULL || err != NULL) {
  471. msg_warn_pool ("could not read regexp: %s while reading regexp %e",
  472. src, err);
  473. if (err) {
  474. g_error_free (err);
  475. }
  476. return NULL;
  477. }
  478. if (result->is_multiple) {
  479. rspamd_regexp_set_maxhits (result->regexp, 0);
  480. }
  481. else {
  482. rspamd_regexp_set_maxhits (result->regexp, 1);
  483. }
  484. rspamd_regexp_set_ud (result->regexp, result);
  485. *dend = '/';
  486. return result;
  487. }
  488. struct rspamd_function_atom *
  489. rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input)
  490. {
  491. const gchar *obrace, *ebrace, *p, *c;
  492. gchar t, *databuf;
  493. guint len;
  494. struct rspamd_function_atom *res;
  495. struct expression_argument arg;
  496. GError *err = NULL;
  497. enum {
  498. start_read_argument = 0,
  499. in_string,
  500. in_regexp,
  501. got_backslash,
  502. got_comma
  503. } state, prev_state = 0;
  504. obrace = strchr (input, '(');
  505. ebrace = strrchr (input, ')');
  506. g_assert (obrace != NULL && ebrace != NULL);
  507. res = rspamd_mempool_alloc0 (pool, sizeof (*res));
  508. res->name = rspamd_mempool_alloc (pool, obrace - input + 1);
  509. rspamd_strlcpy (res->name, input, obrace - input + 1);
  510. res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
  511. p = obrace + 1;
  512. c = p;
  513. state = start_read_argument;
  514. /* Read arguments */
  515. while (p <= ebrace) {
  516. t = *p;
  517. switch (state) {
  518. case start_read_argument:
  519. if (t == '/') {
  520. state = in_regexp;
  521. c = p;
  522. }
  523. else if (!g_ascii_isspace (t)) {
  524. state = in_string;
  525. if (t == '\'' || t == '\"') {
  526. c = p + 1;
  527. }
  528. else {
  529. c = p;
  530. }
  531. }
  532. p ++;
  533. break;
  534. case in_regexp:
  535. if (t == '\\') {
  536. state = got_backslash;
  537. prev_state = in_regexp;
  538. }
  539. else if (t == ',' || p == ebrace) {
  540. len = p - c + 1;
  541. databuf = rspamd_mempool_alloc (pool, len);
  542. rspamd_strlcpy (databuf, c, len);
  543. arg.type = EXPRESSION_ARGUMENT_REGEXP;
  544. arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
  545. if (arg.data == NULL) {
  546. /* Fallback to string */
  547. msg_warn ("cannot parse slashed argument %s as regexp: %s",
  548. databuf, err->message);
  549. g_error_free (err);
  550. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  551. arg.data = databuf;
  552. }
  553. g_array_append_val (res->args, arg);
  554. state = got_comma;
  555. }
  556. p ++;
  557. break;
  558. case in_string:
  559. if (t == '\\') {
  560. state = got_backslash;
  561. prev_state = in_string;
  562. }
  563. else if (t == ',' || p == ebrace) {
  564. if (*(p - 1) == '\'' || *(p - 1) == '\"') {
  565. len = p - c;
  566. }
  567. else {
  568. len = p - c + 1;
  569. }
  570. databuf = rspamd_mempool_alloc (pool, len);
  571. rspamd_strlcpy (databuf, c, len);
  572. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  573. arg.data = databuf;
  574. g_array_append_val (res->args, arg);
  575. state = got_comma;
  576. }
  577. p ++;
  578. break;
  579. case got_backslash:
  580. state = prev_state;
  581. p ++;
  582. break;
  583. case got_comma:
  584. state = start_read_argument;
  585. break;
  586. }
  587. }
  588. return res;
  589. }
  590. static rspamd_expression_atom_t *
  591. rspamd_mime_expr_parse (const gchar *line, gsize len,
  592. rspamd_mempool_t *pool, gpointer ud, GError **err)
  593. {
  594. rspamd_expression_atom_t *a = NULL;
  595. struct rspamd_mime_atom *mime_atom = NULL;
  596. const gchar *p, *end, *c = NULL;
  597. struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *)ud;
  598. struct rspamd_config *cfg;
  599. rspamd_regexp_t *own_re;
  600. gchar t;
  601. gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
  602. enum {
  603. in_header = 0,
  604. got_slash,
  605. in_regexp,
  606. got_backslash,
  607. got_second_slash,
  608. in_flags,
  609. in_flags_brace,
  610. got_obrace,
  611. in_function,
  612. in_local_function,
  613. got_ebrace,
  614. end_atom,
  615. bad_atom
  616. } state = 0, prev_state = 0;
  617. p = line;
  618. end = p + len;
  619. cfg = real_ud->cfg;
  620. while (p < end) {
  621. t = *p;
  622. switch (state) {
  623. case in_header:
  624. if (t == '/') {
  625. /* Regexp */
  626. state = got_slash;
  627. }
  628. else if (t == '(') {
  629. /* Function */
  630. state = got_obrace;
  631. }
  632. else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') {
  633. if (t == ':') {
  634. if (p - line == 3 && memcmp (line, "lua", 3) == 0) {
  635. type = MIME_ATOM_LOCAL_LUA_FUNCTION;
  636. state = in_local_function;
  637. c = p + 1;
  638. }
  639. }
  640. else {
  641. /* Likely lua function, identified by just a string */
  642. type = MIME_ATOM_LUA_FUNCTION;
  643. state = end_atom;
  644. /* Do not increase p */
  645. continue;
  646. }
  647. }
  648. else if (g_ascii_isspace (t)) {
  649. state = bad_atom;
  650. }
  651. p ++;
  652. break;
  653. case got_slash:
  654. state = in_regexp;
  655. break;
  656. case in_regexp:
  657. if (t == '\\') {
  658. state = got_backslash;
  659. prev_state = in_regexp;
  660. }
  661. else if (t == '/') {
  662. state = got_second_slash;
  663. }
  664. p ++;
  665. break;
  666. case got_second_slash:
  667. state = in_flags;
  668. break;
  669. case in_flags:
  670. if (t == '{') {
  671. state = in_flags_brace;
  672. p ++;
  673. }
  674. else if (!g_ascii_isalpha (t) && t != '$') {
  675. state = end_atom;
  676. }
  677. else {
  678. p ++;
  679. }
  680. break;
  681. case in_flags_brace:
  682. if (t == '}') {
  683. state = in_flags;
  684. }
  685. p ++;
  686. break;
  687. case got_backslash:
  688. state = prev_state;
  689. p ++;
  690. break;
  691. case got_obrace:
  692. state = in_function;
  693. type = MIME_ATOM_INTERNAL_FUNCTION;
  694. obraces ++;
  695. break;
  696. case in_function:
  697. if (t == '\\') {
  698. state = got_backslash;
  699. prev_state = in_function;
  700. }
  701. else if (t == '(') {
  702. obraces ++;
  703. }
  704. else if (t == ')') {
  705. ebraces ++;
  706. if (ebraces == obraces) {
  707. state = got_ebrace;
  708. }
  709. }
  710. p ++;
  711. break;
  712. case in_local_function:
  713. if (!(g_ascii_isalnum (t) || t == '-' || t == '_')) {
  714. g_assert (c != NULL);
  715. state = end_atom;
  716. }
  717. else {
  718. p++;
  719. }
  720. break;
  721. case got_ebrace:
  722. state = end_atom;
  723. break;
  724. case bad_atom:
  725. g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
  726. " mime atom '%s' when reading symbol '%c' at offset %d, "
  727. "near %*.s", line, t, (gint)(p - line),
  728. (gint)MIN (end - p, 10), p);
  729. return NULL;
  730. case end_atom:
  731. goto set;
  732. }
  733. }
  734. set:
  735. if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
  736. state != in_flags && state != end_atom)) {
  737. g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
  738. " mime atom");
  739. return NULL;
  740. }
  741. mime_atom = rspamd_mempool_alloc (pool, sizeof (*mime_atom));
  742. mime_atom->type = type;
  743. mime_atom->str = rspamd_mempool_alloc (pool, p - line + 1);
  744. rspamd_strlcpy (mime_atom->str, line, p - line + 1);
  745. if (type == MIME_ATOM_REGEXP) {
  746. mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
  747. mime_atom->str, cfg);
  748. if (mime_atom->d.re == NULL) {
  749. g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse regexp '%s'",
  750. mime_atom->str);
  751. goto err;
  752. }
  753. else {
  754. /* Register new item in the cache */
  755. if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
  756. mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
  757. mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
  758. if (mime_atom->d.re->extra.header != NULL) {
  759. own_re = mime_atom->d.re->regexp;
  760. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  761. mime_atom->d.re->regexp,
  762. mime_atom->d.re->type,
  763. mime_atom->d.re->extra.header,
  764. strlen (mime_atom->d.re->extra.header) + 1);
  765. /* Pass ownership to the cache */
  766. rspamd_regexp_unref (own_re);
  767. }
  768. else {
  769. /* We have header regexp, but no header name is detected */
  770. g_set_error (err,
  771. rspamd_mime_expr_quark (),
  772. 200,
  773. "no header name in header regexp: '%s'",
  774. mime_atom->str);
  775. goto err;
  776. }
  777. }
  778. else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
  779. if (mime_atom->d.re->extra.selector != NULL) {
  780. own_re = mime_atom->d.re->regexp;
  781. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  782. mime_atom->d.re->regexp,
  783. mime_atom->d.re->type,
  784. mime_atom->d.re->extra.selector,
  785. strlen (mime_atom->d.re->extra.selector) + 1);
  786. /* Pass ownership to the cache */
  787. rspamd_regexp_unref (own_re);
  788. }
  789. else {
  790. /* We have header regexp, but no header name is detected */
  791. g_set_error (err,
  792. rspamd_mime_expr_quark (),
  793. 200,
  794. "no selector name in selector regexp: '%s'",
  795. mime_atom->str);
  796. goto err;
  797. }
  798. }
  799. else {
  800. own_re = mime_atom->d.re->regexp;
  801. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  802. mime_atom->d.re->regexp,
  803. mime_atom->d.re->type,
  804. NULL,
  805. 0);
  806. /* Pass ownership to the cache */
  807. rspamd_regexp_unref (own_re);
  808. }
  809. }
  810. }
  811. else if (type == MIME_ATOM_LUA_FUNCTION) {
  812. mime_atom->d.lua_function = mime_atom->str;
  813. lua_getglobal (cfg->lua_state, mime_atom->str);
  814. if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) {
  815. g_set_error (err, rspamd_mime_expr_quark(), 200, "no such lua function '%s'",
  816. mime_atom->str);
  817. lua_pop (cfg->lua_state, 1);
  818. goto err;
  819. }
  820. lua_pop (cfg->lua_state, 1);
  821. }
  822. else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  823. /* p pointer is set to the start of Lua function name */
  824. if (real_ud->conf_obj == NULL) {
  825. g_set_error (err, rspamd_mime_expr_quark(), 300,
  826. "no config object for '%s'",
  827. mime_atom->str);
  828. goto err;
  829. }
  830. const ucl_object_t *functions = ucl_object_lookup (real_ud->conf_obj,
  831. "functions");
  832. if (functions == NULL) {
  833. g_set_error (err, rspamd_mime_expr_quark(), 310,
  834. "no functions defined for '%s'",
  835. mime_atom->str);
  836. goto err;
  837. }
  838. if (ucl_object_type (functions) != UCL_OBJECT) {
  839. g_set_error (err, rspamd_mime_expr_quark(), 320,
  840. "functions is not a table for '%s'",
  841. mime_atom->str);
  842. goto err;
  843. }
  844. const ucl_object_t *function_obj;
  845. function_obj = ucl_object_lookup_len (functions, c,
  846. p - c);
  847. if (function_obj == NULL) {
  848. g_set_error (err, rspamd_mime_expr_quark(), 320,
  849. "function %*.s is not found for '%s'",
  850. (int)(p - c), c, mime_atom->str);
  851. goto err;
  852. }
  853. if (ucl_object_type (function_obj) != UCL_USERDATA) {
  854. g_set_error (err, rspamd_mime_expr_quark(), 320,
  855. "function %*.s has invalid type for '%s'",
  856. (int)(p - c), c, mime_atom->str);
  857. goto err;
  858. }
  859. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  860. mime_atom->d.lua_cbref = fd->idx;
  861. }
  862. else {
  863. mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool,
  864. mime_atom->str);
  865. if (mime_atom->d.func == NULL) {
  866. g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'",
  867. mime_atom->str);
  868. goto err;
  869. }
  870. }
  871. a = rspamd_mempool_alloc0 (pool, sizeof (*a));
  872. a->len = p - line;
  873. a->priority = 0;
  874. a->data = mime_atom;
  875. return a;
  876. err:
  877. return NULL;
  878. }
  879. static gint
  880. rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
  881. struct rspamd_task *task)
  882. {
  883. gint ret;
  884. if (re == NULL) {
  885. msg_info_task ("invalid regexp passed");
  886. return 0;
  887. }
  888. if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
  889. ret = rspamd_re_cache_process (task,
  890. re->regexp,
  891. re->type,
  892. re->extra.header,
  893. strlen (re->extra.header),
  894. re->is_strong);
  895. }
  896. else if (re->type == RSPAMD_RE_SELECTOR) {
  897. ret = rspamd_re_cache_process (task,
  898. re->regexp,
  899. re->type,
  900. re->extra.selector,
  901. strlen (re->extra.selector),
  902. re->is_strong);
  903. }
  904. else {
  905. ret = rspamd_re_cache_process (task,
  906. re->regexp,
  907. re->type,
  908. NULL,
  909. 0,
  910. re->is_strong);
  911. }
  912. if (re->is_test) {
  913. msg_info_task ("test %s regexp '%s' returned %d",
  914. rspamd_re_cache_type_to_string (re->type),
  915. re->regexp_text, ret);
  916. }
  917. return ret;
  918. }
  919. static gint
  920. rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
  921. {
  922. struct rspamd_mime_atom *mime_atom = atom->data;
  923. gint ret = 0;
  924. switch (mime_atom->type) {
  925. case MIME_ATOM_INTERNAL_FUNCTION:
  926. /* Prioritize internal functions slightly */
  927. ret = 50;
  928. break;
  929. case MIME_ATOM_LUA_FUNCTION:
  930. case MIME_ATOM_LOCAL_LUA_FUNCTION:
  931. ret = 50;
  932. break;
  933. case MIME_ATOM_REGEXP:
  934. switch (mime_atom->d.re->type) {
  935. case RSPAMD_RE_HEADER:
  936. case RSPAMD_RE_RAWHEADER:
  937. ret = 100;
  938. break;
  939. case RSPAMD_RE_URL:
  940. ret = 90;
  941. break;
  942. case RSPAMD_RE_MIME:
  943. case RSPAMD_RE_RAWMIME:
  944. ret = 10;
  945. break;
  946. default:
  947. /* For message regexp */
  948. ret = 0;
  949. break;
  950. }
  951. }
  952. return ret;
  953. }
  954. static void
  955. rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
  956. {
  957. struct rspamd_mime_atom *mime_atom = atom->data;
  958. if (mime_atom) {
  959. if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
  960. /* Need to cleanup arguments */
  961. g_array_free (mime_atom->d.func->args, TRUE);
  962. }
  963. }
  964. }
  965. static gboolean
  966. rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
  967. struct rspamd_task * task,
  968. lua_State *L)
  969. {
  970. struct _fl *selected, key;
  971. key.name = func->name;
  972. selected = bsearch (&key,
  973. list_ptr,
  974. functions_number,
  975. sizeof (struct _fl),
  976. fl_cmp);
  977. if (selected == NULL) {
  978. /* Try to check lua function */
  979. return FALSE;
  980. }
  981. return selected->func (task, func->args, selected->user_data);
  982. }
  983. static gdouble
  984. rspamd_mime_expr_process (struct rspamd_expr_process_data *process_data, rspamd_expression_atom_t *atom)
  985. {
  986. struct rspamd_task *task = process_data->task;
  987. struct rspamd_mime_atom *mime_atom;
  988. lua_State *L;
  989. gdouble ret = 0;
  990. g_assert (task != NULL);
  991. g_assert (atom != NULL);
  992. mime_atom = atom->data;
  993. if (mime_atom->type == MIME_ATOM_REGEXP) {
  994. ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
  995. }
  996. else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
  997. L = task->cfg->lua_state;
  998. lua_getglobal (L, mime_atom->d.lua_function);
  999. rspamd_lua_task_push (L, task);
  1000. if (lua_pcall (L, 1, 1, 0) != 0) {
  1001. msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s",
  1002. mime_atom->d.lua_function,
  1003. mime_atom->str,
  1004. lua_tostring (L, -1));
  1005. lua_pop (L, 1);
  1006. }
  1007. else {
  1008. if (lua_type (L, -1) == LUA_TBOOLEAN) {
  1009. ret = lua_toboolean (L, -1);
  1010. }
  1011. else if (lua_type (L, -1) == LUA_TNUMBER) {
  1012. ret = lua_tonumber (L, 1);
  1013. }
  1014. else {
  1015. msg_err_task ("%s returned wrong return type: %s",
  1016. mime_atom->str, lua_typename (L, lua_type (L, -1)));
  1017. }
  1018. /* Remove result */
  1019. lua_pop (L, 1);
  1020. }
  1021. }
  1022. else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  1023. gint err_idx;
  1024. GString *tb;
  1025. L = task->cfg->lua_state;
  1026. lua_pushcfunction (L, &rspamd_lua_traceback);
  1027. err_idx = lua_gettop (L);
  1028. lua_rawgeti (L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
  1029. rspamd_lua_task_push (L, task);
  1030. if (lua_pcall (L, 1, 1, err_idx) != 0) {
  1031. tb = lua_touserdata (L, -1);
  1032. msg_info_task ("lua call to local function for atom '%s' failed: %v",
  1033. mime_atom->str,
  1034. tb);
  1035. if (tb) {
  1036. g_string_free (tb, TRUE);
  1037. }
  1038. }
  1039. else {
  1040. if (lua_type (L, -1) == LUA_TBOOLEAN) {
  1041. ret = lua_toboolean (L, -1);
  1042. }
  1043. else if (lua_type (L, -1) == LUA_TNUMBER) {
  1044. ret = lua_tonumber (L, 1);
  1045. }
  1046. else {
  1047. msg_err_task ("%s returned wrong return type: %s",
  1048. mime_atom->str, lua_typename (L, lua_type (L, -1)));
  1049. }
  1050. }
  1051. lua_settop (L, 0);
  1052. }
  1053. else {
  1054. ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
  1055. task->cfg->lua_state);
  1056. }
  1057. return ret;
  1058. }
  1059. void
  1060. register_expression_function (const gchar *name,
  1061. rspamd_internal_func_t func,
  1062. void *user_data)
  1063. {
  1064. static struct _fl *new;
  1065. functions_number++;
  1066. new = g_new (struct _fl, functions_number);
  1067. memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
  1068. if (list_allocated) {
  1069. g_free (list_ptr);
  1070. }
  1071. list_allocated = TRUE;
  1072. new[functions_number - 1].name = name;
  1073. new[functions_number - 1].func = func;
  1074. new[functions_number - 1].user_data = user_data;
  1075. qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
  1076. list_ptr = new;
  1077. }
  1078. gboolean
  1079. rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
  1080. {
  1081. struct expression_argument *arg;
  1082. if (args == NULL || task == NULL) {
  1083. return FALSE;
  1084. }
  1085. arg = &g_array_index (args, struct expression_argument, 0);
  1086. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1087. msg_warn_task ("invalid argument to function is passed");
  1088. return FALSE;
  1089. }
  1090. /* XXX: really write this function */
  1091. return TRUE;
  1092. }
  1093. gboolean
  1094. rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
  1095. {
  1096. struct expression_argument *arg;
  1097. GPtrArray *headerlist;
  1098. if (args == NULL || task == NULL) {
  1099. return FALSE;
  1100. }
  1101. arg = &g_array_index (args, struct expression_argument, 0);
  1102. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1103. msg_warn_task ("invalid argument to function is passed");
  1104. return FALSE;
  1105. }
  1106. headerlist = rspamd_message_get_header_array (task,
  1107. (gchar *)arg->data,
  1108. FALSE);
  1109. debug_task ("try to get header %s: %d", (gchar *)arg->data,
  1110. (headerlist != NULL));
  1111. if (headerlist) {
  1112. return TRUE;
  1113. }
  1114. return FALSE;
  1115. }
  1116. /*
  1117. * This function is designed to find difference between text/html and text/plain parts
  1118. * It takes one argument: difference threshold, if we have two text parts, compare
  1119. * its hashes and check for threshold, if value is greater than threshold, return TRUE
  1120. * and return FALSE otherwise.
  1121. */
  1122. gboolean
  1123. rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
  1124. {
  1125. gint threshold, threshold2 = -1;
  1126. struct expression_argument *arg;
  1127. gdouble *pdiff, diff;
  1128. if (args == NULL || args->len == 0) {
  1129. debug_task ("no threshold is specified, assume it 100");
  1130. threshold = 100;
  1131. }
  1132. else {
  1133. errno = 0;
  1134. arg = &g_array_index (args, struct expression_argument, 0);
  1135. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1136. msg_warn_task ("invalid argument to function is passed");
  1137. return FALSE;
  1138. }
  1139. threshold = strtoul ((gchar *)arg->data, NULL, 10);
  1140. if (errno != 0) {
  1141. msg_info_task ("bad numeric value for threshold \"%s\", assume it 100",
  1142. (gchar *)arg->data);
  1143. threshold = 100;
  1144. }
  1145. if (args->len >= 2) {
  1146. arg = &g_array_index (args, struct expression_argument, 1);
  1147. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1148. msg_warn_task ("invalid argument to function is passed");
  1149. return FALSE;
  1150. }
  1151. errno = 0;
  1152. threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
  1153. if (errno != 0) {
  1154. msg_info_task ("bad numeric value for threshold \"%s\", ignore it",
  1155. (gchar *)arg->data);
  1156. threshold2 = -1;
  1157. }
  1158. }
  1159. }
  1160. if ((pdiff =
  1161. rspamd_mempool_get_variable (task->task_pool,
  1162. "parts_distance")) != NULL) {
  1163. diff = (1.0 - (*pdiff)) * 100.0;
  1164. if (diff != -1) {
  1165. if (threshold2 > 0) {
  1166. if (diff >= MIN (threshold, threshold2) &&
  1167. diff < MAX (threshold, threshold2)) {
  1168. return TRUE;
  1169. }
  1170. }
  1171. else {
  1172. if (diff <= threshold) {
  1173. return TRUE;
  1174. }
  1175. }
  1176. return FALSE;
  1177. }
  1178. else {
  1179. return FALSE;
  1180. }
  1181. }
  1182. return FALSE;
  1183. }
  1184. struct addr_list {
  1185. const gchar *name;
  1186. guint namelen;
  1187. const gchar *addr;
  1188. guint addrlen;
  1189. };
  1190. #define COMPARE_RCPT_LEN 3
  1191. #define MIN_RCPT_TO_COMPARE 7
  1192. gboolean
  1193. rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
  1194. void *unused)
  1195. {
  1196. struct expression_argument *arg;
  1197. struct rspamd_email_address *cur;
  1198. double threshold;
  1199. struct addr_list *ar;
  1200. gint num, i, j, hits = 0, total = 0;
  1201. if (args == NULL) {
  1202. msg_warn_task ("no parameters to function");
  1203. return FALSE;
  1204. }
  1205. arg = &g_array_index (args, struct expression_argument, 0);
  1206. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1207. msg_warn_task ("invalid argument to function is passed");
  1208. return FALSE;
  1209. }
  1210. errno = 0;
  1211. threshold = strtod ((gchar *)arg->data, NULL);
  1212. if (errno != 0) {
  1213. msg_warn_task ("invalid numeric value '%s': %s",
  1214. (gchar *)arg->data,
  1215. strerror (errno));
  1216. return FALSE;
  1217. }
  1218. if (!task->rcpt_mime) {
  1219. return FALSE;
  1220. }
  1221. num = task->rcpt_mime->len;
  1222. if (num < MIN_RCPT_TO_COMPARE) {
  1223. return FALSE;
  1224. }
  1225. ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
  1226. /* Fill array */
  1227. PTR_ARRAY_FOREACH (task->rcpt_mime, i, cur) {
  1228. ar[i].name = cur->addr;
  1229. ar[i].namelen = cur->addr_len;
  1230. ar[i].addr = cur->domain;
  1231. ar[i].addrlen = cur->domain_len;
  1232. }
  1233. /* Cycle all elements in array */
  1234. for (i = 0; i < num; i++) {
  1235. for (j = i + 1; j < num; j++) {
  1236. if (ar[i].namelen >= COMPARE_RCPT_LEN && ar[j].namelen >= COMPARE_RCPT_LEN &&
  1237. rspamd_lc_cmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) {
  1238. /* Common name part */
  1239. hits++;
  1240. }
  1241. total++;
  1242. }
  1243. }
  1244. if ((hits * num / 2.) / (double)total >= threshold) {
  1245. return TRUE;
  1246. }
  1247. return FALSE;
  1248. }
  1249. gboolean
  1250. rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
  1251. void *unused)
  1252. {
  1253. struct rspamd_mime_text_part *p;
  1254. gboolean res = FALSE;
  1255. if (task->text_parts->len == 1) {
  1256. p = g_ptr_array_index (task->text_parts, 0);
  1257. if (IS_PART_HTML (p)) {
  1258. res = TRUE;
  1259. }
  1260. else {
  1261. res = FALSE;
  1262. }
  1263. }
  1264. return res;
  1265. }
  1266. static gboolean
  1267. is_recipient_list_sorted (GPtrArray *ar)
  1268. {
  1269. struct rspamd_email_address *addr;
  1270. gboolean res = TRUE;
  1271. rspamd_ftok_t cur, prev;
  1272. gint i;
  1273. /* Do not check to short address lists */
  1274. if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
  1275. return FALSE;
  1276. }
  1277. prev.len = 0;
  1278. prev.begin = NULL;
  1279. PTR_ARRAY_FOREACH (ar, i, addr) {
  1280. cur.begin = addr->addr;
  1281. cur.len = addr->addr_len;
  1282. if (prev.len != 0) {
  1283. if (rspamd_ftok_casecmp (&cur, &prev) <= 0) {
  1284. res = FALSE;
  1285. break;
  1286. }
  1287. }
  1288. prev = cur;
  1289. }
  1290. return res;
  1291. }
  1292. gboolean
  1293. rspamd_is_recipients_sorted (struct rspamd_task * task,
  1294. GArray * args,
  1295. void *unused)
  1296. {
  1297. /* Check all types of addresses */
  1298. if (task->rcpt_mime) {
  1299. return is_recipient_list_sorted (task->rcpt_mime);
  1300. }
  1301. return FALSE;
  1302. }
  1303. gboolean
  1304. rspamd_compare_transfer_encoding (struct rspamd_task * task,
  1305. GArray * args,
  1306. void *unused)
  1307. {
  1308. struct expression_argument *arg;
  1309. guint i;
  1310. struct rspamd_mime_part *part;
  1311. enum rspamd_cte cte;
  1312. if (args == NULL) {
  1313. msg_warn_task ("no parameters to function");
  1314. return FALSE;
  1315. }
  1316. arg = &g_array_index (args, struct expression_argument, 0);
  1317. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1318. msg_warn_task ("invalid argument to function is passed");
  1319. return FALSE;
  1320. }
  1321. cte = rspamd_cte_from_string (arg->data);
  1322. if (cte == RSPAMD_CTE_UNKNOWN) {
  1323. msg_warn_task ("unknown cte: %s", arg->data);
  1324. return FALSE;
  1325. }
  1326. PTR_ARRAY_FOREACH (task->parts, i, part) {
  1327. if (IS_CT_TEXT (part->ct)) {
  1328. if (part->cte == cte) {
  1329. return TRUE;
  1330. }
  1331. }
  1332. }
  1333. return FALSE;
  1334. }
  1335. gboolean
  1336. rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
  1337. {
  1338. struct rspamd_mime_text_part *p;
  1339. guint i;
  1340. gboolean res = TRUE;
  1341. for (i = 0; i < task->text_parts->len; i ++) {
  1342. p = g_ptr_array_index (task->text_parts, i);
  1343. if (IS_PART_HTML (p)) {
  1344. if (p->flags & RSPAMD_MIME_TEXT_PART_FLAG_BALANCED) {
  1345. res = TRUE;
  1346. }
  1347. else {
  1348. res = FALSE;
  1349. break;
  1350. }
  1351. }
  1352. }
  1353. return res;
  1354. }
  1355. gboolean
  1356. rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
  1357. {
  1358. struct rspamd_mime_text_part *p;
  1359. struct expression_argument *arg;
  1360. guint i;
  1361. gboolean res = FALSE;
  1362. if (args == NULL) {
  1363. msg_warn_task ("no parameters to function");
  1364. return FALSE;
  1365. }
  1366. arg = &g_array_index (args, struct expression_argument, 0);
  1367. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1368. msg_warn_task ("invalid argument to function is passed");
  1369. return FALSE;
  1370. }
  1371. for (i = 0; i < task->text_parts->len; i ++) {
  1372. p = g_ptr_array_index (task->text_parts, i);
  1373. if (IS_PART_HTML (p) && p->html) {
  1374. res = rspamd_html_tag_seen (p->html, arg->data);
  1375. }
  1376. if (res) {
  1377. break;
  1378. }
  1379. }
  1380. return res;
  1381. }
  1382. gboolean
  1383. rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
  1384. {
  1385. struct rspamd_mime_text_part *p;
  1386. guint i;
  1387. gboolean res = FALSE;
  1388. for (i = 0; i < task->text_parts->len; i ++) {
  1389. p = g_ptr_array_index (task->text_parts, i);
  1390. if (IS_PART_HTML (p) && (p->html == NULL || p->html->html_tags == NULL)) {
  1391. res = TRUE;
  1392. }
  1393. if (res) {
  1394. break;
  1395. }
  1396. }
  1397. return res;
  1398. }
  1399. static gboolean
  1400. rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
  1401. {
  1402. struct expression_argument *arg;
  1403. if (args == NULL || task == NULL) {
  1404. return FALSE;
  1405. }
  1406. arg = &g_array_index (args, struct expression_argument, 0);
  1407. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1408. msg_warn_task ("invalid argument to function is passed");
  1409. return FALSE;
  1410. }
  1411. return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
  1412. }
  1413. static gboolean
  1414. match_smtp_data (struct rspamd_task *task,
  1415. struct expression_argument *arg,
  1416. const gchar *what, gsize len)
  1417. {
  1418. rspamd_regexp_t *re;
  1419. gint r = 0;
  1420. if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
  1421. /* This is a regexp */
  1422. re = arg->data;
  1423. if (re == NULL) {
  1424. msg_warn_task ("cannot compile regexp for function");
  1425. return FALSE;
  1426. }
  1427. if (len > 0) {
  1428. r = rspamd_regexp_search (re, what, len, NULL, NULL, FALSE, NULL);
  1429. }
  1430. return r;
  1431. }
  1432. else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
  1433. g_ascii_strncasecmp (arg->data, what, len) == 0) {
  1434. return TRUE;
  1435. }
  1436. return FALSE;
  1437. }
  1438. static gboolean
  1439. rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
  1440. {
  1441. struct expression_argument *arg;
  1442. struct rspamd_email_address *addr = NULL;
  1443. GPtrArray *rcpts = NULL;
  1444. const gchar *type, *str = NULL;
  1445. guint i;
  1446. if (args == NULL) {
  1447. msg_warn_task ("no parameters to function");
  1448. return FALSE;
  1449. }
  1450. arg = &g_array_index (args, struct expression_argument, 0);
  1451. if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1452. msg_warn_task ("no parameters to function");
  1453. return FALSE;
  1454. }
  1455. else {
  1456. type = arg->data;
  1457. switch (*type) {
  1458. case 'f':
  1459. case 'F':
  1460. if (g_ascii_strcasecmp (type, "from") == 0) {
  1461. addr = rspamd_task_get_sender (task);
  1462. }
  1463. else {
  1464. msg_warn_task ("bad argument to function: %s", type);
  1465. return FALSE;
  1466. }
  1467. break;
  1468. case 'h':
  1469. case 'H':
  1470. if (g_ascii_strcasecmp (type, "helo") == 0) {
  1471. str = task->helo;
  1472. }
  1473. else {
  1474. msg_warn_task ("bad argument to function: %s", type);
  1475. return FALSE;
  1476. }
  1477. break;
  1478. case 'u':
  1479. case 'U':
  1480. if (g_ascii_strcasecmp (type, "user") == 0) {
  1481. str = task->user;
  1482. }
  1483. else {
  1484. msg_warn_task ("bad argument to function: %s", type);
  1485. return FALSE;
  1486. }
  1487. break;
  1488. case 's':
  1489. case 'S':
  1490. if (g_ascii_strcasecmp (type, "subject") == 0) {
  1491. str = task->subject;
  1492. }
  1493. else {
  1494. msg_warn_task ("bad argument to function: %s", type);
  1495. return FALSE;
  1496. }
  1497. break;
  1498. case 'r':
  1499. case 'R':
  1500. if (g_ascii_strcasecmp (type, "rcpt") == 0) {
  1501. rcpts = task->rcpt_envelope;
  1502. }
  1503. else {
  1504. msg_warn_task ("bad argument to function: %s", type);
  1505. return FALSE;
  1506. }
  1507. break;
  1508. default:
  1509. msg_warn_task ("bad argument to function: %s", type);
  1510. return FALSE;
  1511. }
  1512. }
  1513. if (str == NULL && addr == NULL && rcpts == NULL) {
  1514. /* Not enough data so regexp would NOT be found anyway */
  1515. return FALSE;
  1516. }
  1517. /* We would process only one more argument, others are ignored */
  1518. if (args->len >= 2) {
  1519. arg = &g_array_index (args, struct expression_argument, 1);
  1520. if (arg) {
  1521. if (str != NULL) {
  1522. return match_smtp_data (task, arg, str, strlen (str));
  1523. }
  1524. else if (addr != NULL && addr->addr) {
  1525. return match_smtp_data (task, arg, addr->addr, addr->addr_len);
  1526. }
  1527. else {
  1528. if (rcpts != NULL) {
  1529. for (i = 0; i < rcpts->len; i ++) {
  1530. addr = g_ptr_array_index (rcpts, i);
  1531. if (addr && addr->addr &&
  1532. match_smtp_data (task, arg,
  1533. addr->addr, addr->addr_len)) {
  1534. return TRUE;
  1535. }
  1536. }
  1537. }
  1538. }
  1539. }
  1540. }
  1541. return FALSE;
  1542. }
  1543. static inline gboolean
  1544. rspamd_check_ct_attr (const gchar *begin, gsize len,
  1545. struct expression_argument *arg_pattern)
  1546. {
  1547. rspamd_regexp_t *re;
  1548. gboolean r = FALSE;
  1549. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1550. re = arg_pattern->data;
  1551. if (len > 0) {
  1552. r = rspamd_regexp_search (re,
  1553. begin, len,
  1554. NULL, NULL, FALSE, NULL);
  1555. }
  1556. if (r) {
  1557. return TRUE;
  1558. }
  1559. }
  1560. else {
  1561. /* Just do strcasecmp */
  1562. gsize plen = strlen (arg_pattern->data);
  1563. if (plen == len &&
  1564. g_ascii_strncasecmp (arg_pattern->data, begin, len) == 0) {
  1565. return TRUE;
  1566. }
  1567. }
  1568. return FALSE;
  1569. }
  1570. static gboolean
  1571. rspamd_content_type_compare_param (struct rspamd_task * task,
  1572. GArray * args,
  1573. void *unused)
  1574. {
  1575. struct expression_argument *arg, *arg1, *arg_pattern;
  1576. gboolean recursive = FALSE;
  1577. struct rspamd_mime_part *cur_part;
  1578. guint i;
  1579. rspamd_ftok_t srch;
  1580. struct rspamd_content_type_param *found = NULL, *cur;
  1581. const gchar *param_name;
  1582. if (args == NULL || args->len < 2) {
  1583. msg_warn_task ("no parameters to function");
  1584. return FALSE;
  1585. }
  1586. arg = &g_array_index (args, struct expression_argument, 0);
  1587. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1588. param_name = arg->data;
  1589. arg_pattern = &g_array_index (args, struct expression_argument, 1);
  1590. for (i = 0; i < task->parts->len; i ++) {
  1591. cur_part = g_ptr_array_index (task->parts, i);
  1592. if (args->len >= 3) {
  1593. arg1 = &g_array_index (args, struct expression_argument, 2);
  1594. if (g_ascii_strncasecmp (arg1->data, "true",
  1595. sizeof ("true") - 1) == 0) {
  1596. recursive = TRUE;
  1597. }
  1598. }
  1599. else {
  1600. /*
  1601. * If user did not specify argument, let's assume that he wants
  1602. * recursive search if mime part is multipart/mixed
  1603. */
  1604. if (IS_CT_MULTIPART (cur_part->ct)) {
  1605. recursive = TRUE;
  1606. }
  1607. }
  1608. rspamd_ftok_t lit;
  1609. RSPAMD_FTOK_FROM_STR (&srch, param_name);
  1610. RSPAMD_FTOK_FROM_STR (&lit, "charset");
  1611. if (rspamd_ftok_equal (&srch, &lit)) {
  1612. if (rspamd_check_ct_attr (cur_part->ct->charset.begin,
  1613. cur_part->ct->charset.len, arg_pattern)) {
  1614. return TRUE;
  1615. }
  1616. }
  1617. RSPAMD_FTOK_FROM_STR (&lit, "boundary");
  1618. if (rspamd_ftok_equal (&srch, &lit)) {
  1619. if (rspamd_check_ct_attr (cur_part->ct->orig_boundary.begin,
  1620. cur_part->ct->orig_boundary.len, arg_pattern)) {
  1621. return TRUE;
  1622. }
  1623. }
  1624. if (cur_part->ct->attrs) {
  1625. found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
  1626. if (found) {
  1627. DL_FOREACH (found, cur) {
  1628. if (rspamd_check_ct_attr (cur->value.begin,
  1629. cur->value.len, arg_pattern)) {
  1630. return TRUE;
  1631. }
  1632. }
  1633. }
  1634. }
  1635. if (!recursive) {
  1636. break;
  1637. }
  1638. }
  1639. return FALSE;
  1640. }
  1641. static gboolean
  1642. rspamd_content_type_has_param (struct rspamd_task * task,
  1643. GArray * args,
  1644. void *unused)
  1645. {
  1646. struct expression_argument *arg, *arg1;
  1647. gboolean recursive = FALSE;
  1648. struct rspamd_mime_part *cur_part;
  1649. guint i;
  1650. rspamd_ftok_t srch;
  1651. struct rspamd_content_type_param *found = NULL;
  1652. const gchar *param_name;
  1653. if (args == NULL || args->len < 1) {
  1654. msg_warn_task ("no parameters to function");
  1655. return FALSE;
  1656. }
  1657. arg = &g_array_index (args, struct expression_argument, 0);
  1658. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1659. param_name = arg->data;
  1660. for (i = 0; i < task->parts->len; i ++) {
  1661. cur_part = g_ptr_array_index (task->parts, i);
  1662. if (args->len >= 2) {
  1663. arg1 = &g_array_index (args, struct expression_argument, 1);
  1664. if (g_ascii_strncasecmp (arg1->data, "true",
  1665. sizeof ("true") - 1) == 0) {
  1666. recursive = TRUE;
  1667. }
  1668. }
  1669. else {
  1670. /*
  1671. * If user did not specify argument, let's assume that he wants
  1672. * recursive search if mime part is multipart/mixed
  1673. */
  1674. if (IS_CT_MULTIPART (cur_part->ct)) {
  1675. recursive = TRUE;
  1676. }
  1677. }
  1678. rspamd_ftok_t lit;
  1679. RSPAMD_FTOK_FROM_STR (&srch, param_name);
  1680. RSPAMD_FTOK_FROM_STR (&lit, "charset");
  1681. if (rspamd_ftok_equal (&srch, &lit)) {
  1682. if (cur_part->ct->charset.len > 0) {
  1683. return TRUE;
  1684. }
  1685. }
  1686. RSPAMD_FTOK_FROM_STR (&lit, "boundary");
  1687. if (rspamd_ftok_equal (&srch, &lit)) {
  1688. if (cur_part->ct->boundary.len > 0) {
  1689. return TRUE;
  1690. }
  1691. }
  1692. if (cur_part->ct->attrs) {
  1693. found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
  1694. if (found) {
  1695. return TRUE;
  1696. }
  1697. }
  1698. if (!recursive) {
  1699. break;
  1700. }
  1701. }
  1702. return FALSE;
  1703. }
  1704. static gboolean
  1705. rspamd_content_type_check (struct rspamd_task *task,
  1706. GArray * args,
  1707. gboolean check_subtype)
  1708. {
  1709. rspamd_ftok_t *param_data, srch;
  1710. rspamd_regexp_t *re;
  1711. struct expression_argument *arg1, *arg_pattern;
  1712. struct rspamd_content_type *ct;
  1713. gint r = 0;
  1714. guint i;
  1715. gboolean recursive = FALSE;
  1716. struct rspamd_mime_part *cur_part;
  1717. if (args == NULL || args->len < 1) {
  1718. msg_warn_task ("no parameters to function");
  1719. return FALSE;
  1720. }
  1721. arg_pattern = &g_array_index (args, struct expression_argument, 0);
  1722. for (i = 0; i < task->parts->len; i ++) {
  1723. cur_part = g_ptr_array_index (task->parts, i);
  1724. ct = cur_part->ct;
  1725. if (args->len >= 2) {
  1726. arg1 = &g_array_index (args, struct expression_argument, 1);
  1727. if (g_ascii_strncasecmp (arg1->data, "true",
  1728. sizeof ("true") - 1) == 0) {
  1729. recursive = TRUE;
  1730. }
  1731. }
  1732. else {
  1733. /*
  1734. * If user did not specify argument, let's assume that he wants
  1735. * recursive search if mime part is multipart/mixed
  1736. */
  1737. if (IS_CT_MULTIPART (ct)) {
  1738. recursive = TRUE;
  1739. }
  1740. }
  1741. if (check_subtype) {
  1742. param_data = &ct->subtype;
  1743. }
  1744. else {
  1745. param_data = &ct->type;
  1746. }
  1747. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1748. re = arg_pattern->data;
  1749. if (param_data->len > 0) {
  1750. r = rspamd_regexp_search (re, param_data->begin, param_data->len,
  1751. NULL, NULL, FALSE, NULL);
  1752. }
  1753. if (r) {
  1754. return TRUE;
  1755. }
  1756. }
  1757. else {
  1758. /* Just do strcasecmp */
  1759. srch.begin = arg_pattern->data;
  1760. srch.len = strlen (arg_pattern->data);
  1761. if (rspamd_ftok_casecmp (param_data, &srch) == 0) {
  1762. return TRUE;
  1763. }
  1764. }
  1765. /* Get next part */
  1766. if (!recursive) {
  1767. break;
  1768. }
  1769. }
  1770. return FALSE;
  1771. }
  1772. static gboolean
  1773. rspamd_content_type_is_type (struct rspamd_task * task,
  1774. GArray * args,
  1775. void *unused)
  1776. {
  1777. return rspamd_content_type_check (task, args, FALSE);
  1778. }
  1779. static gboolean
  1780. rspamd_content_type_is_subtype (struct rspamd_task * task,
  1781. GArray * args,
  1782. void *unused)
  1783. {
  1784. return rspamd_content_type_check (task, args, TRUE);
  1785. }
  1786. static gboolean
  1787. compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct,
  1788. struct expression_argument *subtype)
  1789. {
  1790. rspamd_regexp_t *re;
  1791. rspamd_ftok_t srch;
  1792. gint r = 0;
  1793. if (subtype == NULL || ct == NULL) {
  1794. msg_warn_task ("invalid parameters passed");
  1795. return FALSE;
  1796. }
  1797. if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
  1798. re = subtype->data;
  1799. if (ct->subtype.len > 0) {
  1800. r = rspamd_regexp_search (re, ct->subtype.begin, ct->subtype.len,
  1801. NULL, NULL, FALSE, NULL);
  1802. }
  1803. }
  1804. else {
  1805. srch.begin = subtype->data;
  1806. srch.len = strlen (subtype->data);
  1807. /* Just do strcasecmp */
  1808. if (rspamd_ftok_casecmp (&ct->subtype, &srch) == 0) {
  1809. return TRUE;
  1810. }
  1811. }
  1812. return r;
  1813. }
  1814. static gboolean
  1815. compare_len (struct rspamd_mime_part *part, guint min, guint max)
  1816. {
  1817. if (min == 0 && max == 0) {
  1818. return TRUE;
  1819. }
  1820. if (min == 0) {
  1821. return part->parsed_data.len <= max;
  1822. }
  1823. else if (max == 0) {
  1824. return part->parsed_data.len >= min;
  1825. }
  1826. else {
  1827. return part->parsed_data.len >= min && part->parsed_data.len <= max;
  1828. }
  1829. }
  1830. static gboolean
  1831. common_has_content_part (struct rspamd_task * task,
  1832. struct expression_argument *param_type,
  1833. struct expression_argument *param_subtype,
  1834. gint min_len,
  1835. gint max_len)
  1836. {
  1837. rspamd_regexp_t *re;
  1838. struct rspamd_mime_part *part;
  1839. struct rspamd_content_type *ct;
  1840. rspamd_ftok_t srch;
  1841. gint r = 0;
  1842. guint i;
  1843. for (i = 0; i < task->parts->len; i ++) {
  1844. part = g_ptr_array_index (task->parts, i);
  1845. ct = part->ct;
  1846. if (ct == NULL) {
  1847. continue;
  1848. }
  1849. if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
  1850. re = param_type->data;
  1851. if (ct->type.len > 0) {
  1852. r = rspamd_regexp_search (re, ct->type.begin, ct->type.len,
  1853. NULL, NULL, FALSE, NULL);
  1854. }
  1855. /* Also check subtype and length of the part */
  1856. if (r && param_subtype) {
  1857. r = compare_len (part, min_len, max_len) &&
  1858. compare_subtype (task, ct, param_subtype);
  1859. return r;
  1860. }
  1861. }
  1862. else {
  1863. /* Just do strcasecmp */
  1864. srch.begin = param_type->data;
  1865. srch.len = strlen (param_type->data);
  1866. if (rspamd_ftok_casecmp (&ct->type, &srch) == 0) {
  1867. if (param_subtype) {
  1868. if (compare_subtype (task, ct, param_subtype)) {
  1869. if (compare_len (part, min_len, max_len)) {
  1870. return TRUE;
  1871. }
  1872. }
  1873. }
  1874. else {
  1875. if (compare_len (part, min_len, max_len)) {
  1876. return TRUE;
  1877. }
  1878. }
  1879. }
  1880. }
  1881. }
  1882. return FALSE;
  1883. }
  1884. static gboolean
  1885. rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
  1886. {
  1887. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1888. if (args == NULL) {
  1889. msg_warn_task ("no parameters to function");
  1890. return FALSE;
  1891. }
  1892. param_type = &g_array_index (args, struct expression_argument, 0);
  1893. if (args->len >= 2) {
  1894. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1895. }
  1896. return common_has_content_part (task, param_type, param_subtype, 0, 0);
  1897. }
  1898. static gboolean
  1899. rspamd_has_content_part_len (struct rspamd_task * task,
  1900. GArray * args,
  1901. void *unused)
  1902. {
  1903. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1904. gint min = 0, max = 0;
  1905. struct expression_argument *arg;
  1906. if (args == NULL) {
  1907. msg_warn_task ("no parameters to function");
  1908. return FALSE;
  1909. }
  1910. param_type = &g_array_index (args, struct expression_argument, 0);
  1911. if (args->len >= 2) {
  1912. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1913. if (args->len >= 3) {
  1914. arg = &g_array_index (args, struct expression_argument, 2);
  1915. errno = 0;
  1916. min = strtoul (arg->data, NULL, 10);
  1917. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1918. if (errno != 0) {
  1919. msg_warn_task ("invalid numeric value '%s': %s",
  1920. (gchar *)arg->data,
  1921. strerror (errno));
  1922. return FALSE;
  1923. }
  1924. if (args->len >= 4) {
  1925. arg = &g_array_index (args, struct expression_argument, 3);
  1926. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1927. max = strtoul (arg->data, NULL, 10);
  1928. if (errno != 0) {
  1929. msg_warn_task ("invalid numeric value '%s': %s",
  1930. (gchar *)arg->data,
  1931. strerror (errno));
  1932. return FALSE;
  1933. }
  1934. }
  1935. }
  1936. }
  1937. return common_has_content_part (task, param_type, param_subtype, min, max);
  1938. }
  1939. static gboolean
  1940. rspamd_is_empty_body (struct rspamd_task *task,
  1941. GArray * args,
  1942. void *unused)
  1943. {
  1944. struct rspamd_mime_part *part;
  1945. guint i;
  1946. PTR_ARRAY_FOREACH (task->parts, i, part) {
  1947. if (part->parsed_data.len > 0) {
  1948. return FALSE;
  1949. }
  1950. }
  1951. return TRUE;
  1952. }
  1953. #define TASK_FLAG_READ(flag) do { \
  1954. result = !!(task->flags & (flag)); \
  1955. } while(0)
  1956. #define TASK_GET_FLAG(flag, strname, macro) do { \
  1957. if (!found && strcmp ((flag), strname) == 0) { \
  1958. TASK_FLAG_READ((macro)); \
  1959. found = TRUE; \
  1960. } \
  1961. } while(0)
  1962. static gboolean
  1963. rspamd_has_flag_expr (struct rspamd_task *task,
  1964. GArray * args,
  1965. void *unused)
  1966. {
  1967. gboolean found = FALSE, result = FALSE;
  1968. struct expression_argument *flag_arg;
  1969. const gchar *flag_str;
  1970. if (args == NULL) {
  1971. msg_warn_task ("no parameters to function");
  1972. return FALSE;
  1973. }
  1974. flag_arg = &g_array_index (args, struct expression_argument, 0);
  1975. if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1976. msg_warn_task ("invalid parameter to function");
  1977. return FALSE;
  1978. }
  1979. flag_str = (const gchar *)flag_arg->data;
  1980. TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
  1981. TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
  1982. TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
  1983. TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
  1984. TASK_GET_FLAG (flag_str, "extended_urls", RSPAMD_TASK_FLAG_EXT_URLS);
  1985. TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
  1986. TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
  1987. TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
  1988. TASK_GET_FLAG (flag_str, "broken_headers",
  1989. RSPAMD_TASK_FLAG_BROKEN_HEADERS);
  1990. TASK_GET_FLAG (flag_str, "skip_process",
  1991. RSPAMD_TASK_FLAG_SKIP_PROCESS);
  1992. TASK_GET_FLAG (flag_str, "milter",
  1993. RSPAMD_TASK_FLAG_MILTER);
  1994. TASK_GET_FLAG (flag_str, "bad_unicode",
  1995. RSPAMD_TASK_FLAG_BAD_UNICODE);
  1996. if (!found) {
  1997. msg_warn_task ("invalid flag name %s", flag_str);
  1998. return FALSE;
  1999. }
  2000. return result;
  2001. }