You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mime_expressions.c 55KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <contrib/libucl/ucl.h>
  17. #include "config.h"
  18. #include "util.h"
  19. #include "cfg_file.h"
  20. #include "rspamd.h"
  21. #include "message.h"
  22. #include "mime_expressions.h"
  23. #include "libserver/html/html.h"
  24. #include "lua/lua_common.h"
  25. #include "utlist.h"
  26. gboolean rspamd_compare_encoding (struct rspamd_task *task,
  27. GArray * args,
  28. void *unused);
  29. gboolean rspamd_header_exists (struct rspamd_task *task,
  30. GArray * args,
  31. void *unused);
  32. gboolean rspamd_parts_distance (struct rspamd_task *task,
  33. GArray * args,
  34. void *unused);
  35. gboolean rspamd_recipients_distance (struct rspamd_task *task,
  36. GArray * args,
  37. void *unused);
  38. gboolean rspamd_has_only_html_part (struct rspamd_task *task,
  39. GArray * args,
  40. void *unused);
  41. gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
  42. GArray * args,
  43. void *unused);
  44. gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
  45. GArray * args,
  46. void *unused);
  47. gboolean rspamd_is_html_balanced (struct rspamd_task *task,
  48. GArray * args,
  49. void *unused);
  50. gboolean rspamd_has_html_tag (struct rspamd_task *task,
  51. GArray * args,
  52. void *unused);
  53. gboolean rspamd_has_fake_html (struct rspamd_task *task,
  54. GArray * args,
  55. void *unused);
  56. static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
  57. GArray * args,
  58. void *unused);
  59. static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
  60. GArray * args,
  61. void *unused);
  62. static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
  63. GArray * args,
  64. void *unused);
  65. static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
  66. GArray * args,
  67. void *unused);
  68. static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
  69. GArray * args,
  70. void *unused);
  71. static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
  72. GArray * args,
  73. void *unused);
  74. static gboolean rspamd_has_content_part (struct rspamd_task *task,
  75. GArray * args,
  76. void *unused);
  77. static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
  78. GArray * args,
  79. void *unused);
  80. static gboolean rspamd_is_empty_body (struct rspamd_task *task,
  81. GArray * args,
  82. void *unused);
  83. static gboolean rspamd_has_flag_expr (struct rspamd_task *task,
  84. GArray * args,
  85. void *unused);
  86. static gboolean rspamd_has_symbol_expr (struct rspamd_task *task,
  87. GArray * args,
  88. void *unused);
  89. static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
  90. rspamd_mempool_t *pool, gpointer ud, GError **err);
  91. static gdouble rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom);
  92. static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
  93. static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
  94. /**
  95. * Regexp structure
  96. */
  97. struct rspamd_regexp_atom {
  98. enum rspamd_re_type type; /**< regexp type */
  99. gchar *regexp_text; /**< regexp text representation */
  100. rspamd_regexp_t *regexp; /**< regexp structure */
  101. union {
  102. const gchar *header; /**< header name for header regexps */
  103. const gchar *selector; /**< selector name for lua selector regexp */
  104. } extra;
  105. gboolean is_test; /**< true if this expression must be tested */
  106. gboolean is_strong; /**< true if headers search must be case sensitive */
  107. gboolean is_multiple; /**< true if we need to match all inclusions of atom */
  108. };
  109. /**
  110. * Rspamd expression function
  111. */
  112. struct rspamd_function_atom {
  113. gchar *name; /**< name of function */
  114. GArray *args; /**< its args */
  115. };
  116. enum rspamd_mime_atom_type {
  117. MIME_ATOM_REGEXP = 0,
  118. MIME_ATOM_INTERNAL_FUNCTION,
  119. MIME_ATOM_LUA_FUNCTION,
  120. MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
  121. };
  122. struct rspamd_mime_atom {
  123. gchar *str;
  124. union {
  125. struct rspamd_regexp_atom *re;
  126. struct rspamd_function_atom *func;
  127. const gchar *lua_function;
  128. gint lua_cbref;
  129. } d;
  130. enum rspamd_mime_atom_type type;
  131. };
  132. /*
  133. * List of internal functions of rspamd
  134. * Sorted by name to use bsearch
  135. */
  136. static struct _fl {
  137. const gchar *name;
  138. rspamd_internal_func_t func;
  139. void *user_data;
  140. } rspamd_functions_list[] = {
  141. {"check_smtp_data", rspamd_check_smtp_data, NULL},
  142. {"compare_encoding", rspamd_compare_encoding, NULL},
  143. {"compare_parts_distance", rspamd_parts_distance, NULL},
  144. {"compare_recipients_distance", rspamd_recipients_distance, NULL},
  145. {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
  146. {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
  147. {"content_type_has_param", rspamd_content_type_has_param, NULL},
  148. {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
  149. {"content_type_is_type", rspamd_content_type_is_type, NULL},
  150. {"has_content_part", rspamd_has_content_part, NULL},
  151. {"has_content_part_len", rspamd_has_content_part_len, NULL},
  152. {"has_fake_html", rspamd_has_fake_html, NULL},
  153. {"has_flag", rspamd_has_flag_expr, NULL},
  154. {"has_html_tag", rspamd_has_html_tag, NULL},
  155. {"has_only_html_part", rspamd_has_only_html_part, NULL},
  156. {"has_symbol", rspamd_has_symbol_expr, NULL},
  157. {"header_exists", rspamd_header_exists, NULL},
  158. {"is_empty_body", rspamd_is_empty_body, NULL},
  159. {"is_html_balanced", rspamd_is_html_balanced, NULL},
  160. {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
  161. {"raw_header_exists", rspamd_raw_header_exists, NULL},
  162. };
  163. const struct rspamd_atom_subr mime_expr_subr = {
  164. .parse = rspamd_mime_expr_parse,
  165. .process = rspamd_mime_expr_process,
  166. .priority = rspamd_mime_expr_priority,
  167. .destroy = rspamd_mime_expr_destroy
  168. };
  169. static struct _fl *list_ptr = &rspamd_functions_list[0];
  170. static guint32 functions_number = sizeof (rspamd_functions_list) /
  171. sizeof (struct _fl);
  172. static gboolean list_allocated = FALSE;
  173. /* Bsearch routine */
  174. static gint
  175. fl_cmp (const void *s1, const void *s2)
  176. {
  177. struct _fl *fl1 = (struct _fl *)s1;
  178. struct _fl *fl2 = (struct _fl *)s2;
  179. return strcmp (fl1->name, fl2->name);
  180. }
  181. static GQuark
  182. rspamd_mime_expr_quark (void)
  183. {
  184. return g_quark_from_static_string ("mime-expressions");
  185. }
  186. #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
  187. static gboolean
  188. rspamd_parse_long_option (const gchar *start, gsize len,
  189. struct rspamd_regexp_atom *a)
  190. {
  191. gboolean ret = FALSE;
  192. if (TYPE_CHECK (start, "body", len)) {
  193. ret = TRUE;
  194. a->type = RSPAMD_RE_BODY;
  195. }
  196. else if (TYPE_CHECK (start, "part", len) ||
  197. TYPE_CHECK (start, "mime", len)) {
  198. ret = TRUE;
  199. a->type = RSPAMD_RE_MIME;
  200. }
  201. else if (TYPE_CHECK (start, "raw_part", len) ||
  202. TYPE_CHECK (start, "raw_mime", len) ||
  203. TYPE_CHECK (start, "mime_raw", len)) {
  204. ret = TRUE;
  205. a->type = RSPAMD_RE_RAWMIME;
  206. }
  207. else if (TYPE_CHECK (start, "header", len)) {
  208. ret = TRUE;
  209. a->type = RSPAMD_RE_HEADER;
  210. }
  211. else if (TYPE_CHECK (start, "mime_header", len) ||
  212. TYPE_CHECK (start, "header_mime", len)) {
  213. ret = TRUE;
  214. a->type = RSPAMD_RE_MIMEHEADER;
  215. }
  216. else if (TYPE_CHECK (start, "raw_header", len) ||
  217. TYPE_CHECK (start, "header_raw", len)) {
  218. ret = TRUE;
  219. a->type = RSPAMD_RE_RAWHEADER;
  220. }
  221. else if (TYPE_CHECK (start, "all_header", len) ||
  222. TYPE_CHECK (start, "header_all", len) ||
  223. TYPE_CHECK (start, "all_headers", len)) {
  224. ret = TRUE;
  225. a->type = RSPAMD_RE_ALLHEADER;
  226. }
  227. else if (TYPE_CHECK (start, "url", len)) {
  228. ret = TRUE;
  229. a->type = RSPAMD_RE_URL;
  230. }
  231. else if (TYPE_CHECK (start, "email", len)) {
  232. ret = TRUE;
  233. a->type = RSPAMD_RE_EMAIL;
  234. }
  235. else if (TYPE_CHECK (start, "sa_body", len)) {
  236. ret = TRUE;
  237. a->type = RSPAMD_RE_SABODY;
  238. }
  239. else if (TYPE_CHECK (start, "sa_raw_body", len) ||
  240. TYPE_CHECK (start, "sa_body_raw", len)) {
  241. ret = TRUE;
  242. a->type = RSPAMD_RE_SARAWBODY;
  243. }
  244. else if (TYPE_CHECK (start, "words", len)) {
  245. ret = TRUE;
  246. a->type = RSPAMD_RE_WORDS;
  247. }
  248. else if (TYPE_CHECK (start, "raw_words", len)) {
  249. ret = TRUE;
  250. a->type = RSPAMD_RE_RAWWORDS;
  251. }
  252. else if (TYPE_CHECK (start, "stem_words", len)) {
  253. ret = TRUE;
  254. a->type = RSPAMD_RE_STEMWORDS;
  255. }
  256. else if (TYPE_CHECK (start, "selector", len)) {
  257. ret = TRUE;
  258. a->type = RSPAMD_RE_SELECTOR;
  259. }
  260. return ret;
  261. }
  262. /*
  263. * Rspamd regexp utility functions
  264. */
  265. static struct rspamd_regexp_atom *
  266. rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
  267. struct rspamd_config *cfg)
  268. {
  269. const gchar *begin, *end, *p, *src, *start, *brace;
  270. gchar *dbegin, *dend, *extra = NULL;
  271. struct rspamd_regexp_atom *result;
  272. GError *err = NULL;
  273. GString *re_flags;
  274. if (line == NULL) {
  275. msg_err_pool ("cannot parse NULL line");
  276. return NULL;
  277. }
  278. src = line;
  279. result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
  280. /* Skip whitespaces */
  281. while (g_ascii_isspace (*line)) {
  282. line++;
  283. }
  284. if (*line == '\0') {
  285. msg_warn_pool ("got empty regexp");
  286. return NULL;
  287. }
  288. result->type = RSPAMD_RE_MAX;
  289. start = line;
  290. /* First try to find header name */
  291. begin = strchr (line, '/');
  292. if (begin != NULL) {
  293. p = begin;
  294. end = NULL;
  295. while (p != line) {
  296. if (*p == '=') {
  297. end = p;
  298. break;
  299. }
  300. p--;
  301. }
  302. if (end) {
  303. extra = rspamd_mempool_alloc (pool, end - line + 1);
  304. rspamd_strlcpy (extra, line, end - line + 1);
  305. line = end;
  306. }
  307. }
  308. else {
  309. extra = rspamd_mempool_strdup (pool, line);
  310. result->type = RSPAMD_RE_MAX;
  311. line = start;
  312. }
  313. /* Find begin of regexp */
  314. while (*line && *line != '/') {
  315. line++;
  316. }
  317. if (*line != '\0') {
  318. begin = line + 1;
  319. }
  320. else if (extra == NULL) {
  321. /* Assume that line without // is just a header name */
  322. extra = rspamd_mempool_strdup (pool, line);
  323. result->type = RSPAMD_RE_HEADER;
  324. return result;
  325. }
  326. else {
  327. /* We got header name earlier but have not found // expression, so it is invalid regexp */
  328. msg_warn_pool (
  329. "got no header name (eg. header=) but without corresponding regexp, %s",
  330. src);
  331. return NULL;
  332. }
  333. /* Find end */
  334. end = begin;
  335. while (*end && (*end != '/' || *(end - 1) == '\\')) {
  336. end++;
  337. }
  338. if (end == begin || *end != '/') {
  339. msg_warn_pool ("no trailing / in regexp %s", src);
  340. return NULL;
  341. }
  342. /* Parse flags */
  343. p = end + 1;
  344. re_flags = g_string_sized_new (32);
  345. while (p != NULL) {
  346. switch (*p) {
  347. case 'i':
  348. case 'm':
  349. case 's':
  350. case 'x':
  351. case 'u':
  352. case 'O':
  353. case 'r':
  354. case 'L':
  355. /* Handled by rspamd_regexp_t */
  356. g_string_append_c (re_flags, *p);
  357. p++;
  358. break;
  359. case 'o':
  360. p++;
  361. break;
  362. /* Type flags */
  363. case 'H':
  364. result->type = RSPAMD_RE_HEADER;
  365. p++;
  366. break;
  367. case 'R':
  368. result->type = RSPAMD_RE_ALLHEADER;
  369. p++;
  370. break;
  371. case 'B':
  372. result->type = RSPAMD_RE_MIMEHEADER;
  373. p++;
  374. break;
  375. case 'C':
  376. result->type = RSPAMD_RE_SABODY;
  377. p++;
  378. break;
  379. case 'D':
  380. result->type = RSPAMD_RE_SARAWBODY;
  381. p++;
  382. break;
  383. case 'M':
  384. result->type = RSPAMD_RE_BODY;
  385. p++;
  386. break;
  387. case 'P':
  388. result->type = RSPAMD_RE_MIME;
  389. p++;
  390. break;
  391. case 'Q':
  392. result->type = RSPAMD_RE_RAWMIME;
  393. p++;
  394. break;
  395. case 'U':
  396. result->type = RSPAMD_RE_URL;
  397. p++;
  398. break;
  399. case 'X':
  400. result->type = RSPAMD_RE_RAWHEADER;
  401. p++;
  402. break;
  403. case '$':
  404. result->type = RSPAMD_RE_SELECTOR;
  405. p++;
  406. break;
  407. case '{':
  408. /* Long definition */
  409. if ((brace = strchr (p + 1, '}')) != NULL) {
  410. if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) {
  411. msg_warn_pool ("invalid long regexp type: %*s in '%s'",
  412. (int)(brace - (p + 1)), p + 1, src);
  413. p = NULL;
  414. }
  415. else {
  416. p = brace + 1;
  417. }
  418. }
  419. else {
  420. p = NULL;
  421. }
  422. break;
  423. /* Other flags */
  424. case 'T':
  425. result->is_test = TRUE;
  426. p++;
  427. break;
  428. case 'S':
  429. result->is_strong = TRUE;
  430. p++;
  431. break;
  432. case 'A':
  433. result->is_multiple = TRUE;
  434. p++;
  435. break;
  436. /* Stop flags parsing */
  437. default:
  438. p = NULL;
  439. break;
  440. }
  441. }
  442. if (result->type >= RSPAMD_RE_MAX) {
  443. if (extra) {
  444. /* Assume header regexp */
  445. result->extra.header = extra;
  446. result->type = RSPAMD_RE_HEADER;
  447. }
  448. else {
  449. msg_err_pool ("could not read regexp: %s, unknown type", src);
  450. return NULL;
  451. }
  452. }
  453. if ((result->type == RSPAMD_RE_HEADER ||
  454. result->type == RSPAMD_RE_RAWHEADER ||
  455. result->type == RSPAMD_RE_MIMEHEADER)) {
  456. if (extra == NULL) {
  457. msg_err_pool ("header regexp: '%s' has no header part", src);
  458. return NULL;
  459. }
  460. else {
  461. result->extra.header = extra;
  462. }
  463. }
  464. if (result->type == RSPAMD_RE_SELECTOR) {
  465. if (extra == NULL) {
  466. msg_err_pool ("selector regexp: '%s' has no selector part", src);
  467. return NULL;
  468. }
  469. else {
  470. result->extra.selector = extra;
  471. }
  472. }
  473. result->regexp_text = rspamd_mempool_strdup (pool, start);
  474. dbegin = result->regexp_text + (begin - start);
  475. dend = result->regexp_text + (end - start);
  476. *dend = '\0';
  477. result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
  478. &err);
  479. g_string_free (re_flags, TRUE);
  480. if (result->regexp == NULL || err != NULL) {
  481. msg_warn_pool ("could not read regexp: %s while reading regexp %e",
  482. src, err);
  483. if (err) {
  484. g_error_free (err);
  485. }
  486. return NULL;
  487. }
  488. if (result->is_multiple) {
  489. rspamd_regexp_set_maxhits (result->regexp, 0);
  490. }
  491. else {
  492. rspamd_regexp_set_maxhits (result->regexp, 1);
  493. }
  494. rspamd_regexp_set_ud (result->regexp, result);
  495. *dend = '/';
  496. return result;
  497. }
  498. struct rspamd_function_atom *
  499. rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input)
  500. {
  501. const gchar *obrace, *ebrace, *p, *c;
  502. gchar t, *databuf;
  503. guint len;
  504. struct rspamd_function_atom *res;
  505. struct expression_argument arg;
  506. GError *err = NULL;
  507. enum {
  508. start_read_argument = 0,
  509. in_string,
  510. in_regexp,
  511. got_backslash,
  512. got_comma
  513. } state, prev_state = 0;
  514. obrace = strchr (input, '(');
  515. ebrace = strrchr (input, ')');
  516. g_assert (obrace != NULL && ebrace != NULL);
  517. res = rspamd_mempool_alloc0 (pool, sizeof (*res));
  518. res->name = rspamd_mempool_alloc (pool, obrace - input + 1);
  519. rspamd_strlcpy (res->name, input, obrace - input + 1);
  520. res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
  521. p = obrace + 1;
  522. c = p;
  523. state = start_read_argument;
  524. /* Read arguments */
  525. while (p <= ebrace) {
  526. t = *p;
  527. switch (state) {
  528. case start_read_argument:
  529. if (t == '/') {
  530. state = in_regexp;
  531. c = p;
  532. }
  533. else if (!g_ascii_isspace (t)) {
  534. state = in_string;
  535. if (t == '\'' || t == '\"') {
  536. c = p + 1;
  537. }
  538. else {
  539. c = p;
  540. }
  541. }
  542. p ++;
  543. break;
  544. case in_regexp:
  545. if (t == '\\') {
  546. state = got_backslash;
  547. prev_state = in_regexp;
  548. }
  549. else if (t == ',' || p == ebrace) {
  550. len = p - c + 1;
  551. databuf = rspamd_mempool_alloc (pool, len);
  552. rspamd_strlcpy (databuf, c, len);
  553. arg.type = EXPRESSION_ARGUMENT_REGEXP;
  554. arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
  555. if (arg.data == NULL) {
  556. /* Fallback to string */
  557. msg_warn ("cannot parse slashed argument %s as regexp: %s",
  558. databuf, err->message);
  559. g_error_free (err);
  560. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  561. arg.data = databuf;
  562. }
  563. g_array_append_val (res->args, arg);
  564. state = got_comma;
  565. }
  566. p ++;
  567. break;
  568. case in_string:
  569. if (t == '\\') {
  570. state = got_backslash;
  571. prev_state = in_string;
  572. }
  573. else if (t == ',' || p == ebrace) {
  574. if (*(p - 1) == '\'' || *(p - 1) == '\"') {
  575. len = p - c;
  576. }
  577. else {
  578. len = p - c + 1;
  579. }
  580. databuf = rspamd_mempool_alloc (pool, len);
  581. rspamd_strlcpy (databuf, c, len);
  582. arg.type = EXPRESSION_ARGUMENT_NORMAL;
  583. arg.data = databuf;
  584. g_array_append_val (res->args, arg);
  585. state = got_comma;
  586. }
  587. p ++;
  588. break;
  589. case got_backslash:
  590. state = prev_state;
  591. p ++;
  592. break;
  593. case got_comma:
  594. state = start_read_argument;
  595. break;
  596. }
  597. }
  598. return res;
  599. }
  600. static rspamd_expression_atom_t *
  601. rspamd_mime_expr_parse (const gchar *line, gsize len,
  602. rspamd_mempool_t *pool, gpointer ud, GError **err)
  603. {
  604. rspamd_expression_atom_t *a = NULL;
  605. struct rspamd_mime_atom *mime_atom = NULL;
  606. const gchar *p, *end, *c = NULL;
  607. struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *)ud;
  608. struct rspamd_config *cfg;
  609. rspamd_regexp_t *own_re;
  610. gchar t;
  611. gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
  612. enum {
  613. in_header = 0,
  614. got_slash,
  615. in_regexp,
  616. got_backslash,
  617. got_second_slash,
  618. in_flags,
  619. in_flags_brace,
  620. got_obrace,
  621. in_function,
  622. in_local_function,
  623. got_ebrace,
  624. end_atom,
  625. bad_atom
  626. } state = 0, prev_state = 0;
  627. p = line;
  628. end = p + len;
  629. cfg = real_ud->cfg;
  630. while (p < end) {
  631. t = *p;
  632. switch (state) {
  633. case in_header:
  634. if (t == '/') {
  635. /* Regexp */
  636. state = got_slash;
  637. }
  638. else if (t == '(') {
  639. /* Function */
  640. state = got_obrace;
  641. }
  642. else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') {
  643. if (t == ':') {
  644. if (p - line == 3 && memcmp (line, "lua", 3) == 0) {
  645. type = MIME_ATOM_LOCAL_LUA_FUNCTION;
  646. state = in_local_function;
  647. c = p + 1;
  648. }
  649. }
  650. else {
  651. /* Likely lua function, identified by just a string */
  652. type = MIME_ATOM_LUA_FUNCTION;
  653. state = end_atom;
  654. /* Do not increase p */
  655. continue;
  656. }
  657. }
  658. else if (g_ascii_isspace (t)) {
  659. state = bad_atom;
  660. }
  661. p ++;
  662. break;
  663. case got_slash:
  664. state = in_regexp;
  665. break;
  666. case in_regexp:
  667. if (t == '\\') {
  668. state = got_backslash;
  669. prev_state = in_regexp;
  670. }
  671. else if (t == '/') {
  672. state = got_second_slash;
  673. }
  674. p ++;
  675. break;
  676. case got_second_slash:
  677. state = in_flags;
  678. break;
  679. case in_flags:
  680. if (t == '{') {
  681. state = in_flags_brace;
  682. p ++;
  683. }
  684. else if (!g_ascii_isalpha (t) && t != '$') {
  685. state = end_atom;
  686. }
  687. else {
  688. p ++;
  689. }
  690. break;
  691. case in_flags_brace:
  692. if (t == '}') {
  693. state = in_flags;
  694. }
  695. p ++;
  696. break;
  697. case got_backslash:
  698. state = prev_state;
  699. p ++;
  700. break;
  701. case got_obrace:
  702. state = in_function;
  703. type = MIME_ATOM_INTERNAL_FUNCTION;
  704. obraces ++;
  705. break;
  706. case in_function:
  707. if (t == '\\') {
  708. state = got_backslash;
  709. prev_state = in_function;
  710. }
  711. else if (t == '(') {
  712. obraces ++;
  713. }
  714. else if (t == ')') {
  715. ebraces ++;
  716. if (ebraces == obraces) {
  717. state = got_ebrace;
  718. }
  719. }
  720. p ++;
  721. break;
  722. case in_local_function:
  723. if (!(g_ascii_isalnum (t) || t == '-' || t == '_')) {
  724. g_assert (c != NULL);
  725. state = end_atom;
  726. }
  727. else {
  728. p++;
  729. }
  730. break;
  731. case got_ebrace:
  732. state = end_atom;
  733. break;
  734. case bad_atom:
  735. g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
  736. " mime atom '%s' when reading symbol '%c' at offset %d, "
  737. "near %*.s", line, t, (gint)(p - line),
  738. (gint)MIN (end - p, 10), p);
  739. return NULL;
  740. case end_atom:
  741. goto set;
  742. }
  743. }
  744. set:
  745. if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
  746. state != in_flags && state != end_atom)) {
  747. g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
  748. " mime atom");
  749. return NULL;
  750. }
  751. mime_atom = rspamd_mempool_alloc (pool, sizeof (*mime_atom));
  752. mime_atom->type = type;
  753. mime_atom->str = rspamd_mempool_alloc (pool, p - line + 1);
  754. rspamd_strlcpy (mime_atom->str, line, p - line + 1);
  755. if (type == MIME_ATOM_REGEXP) {
  756. mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
  757. mime_atom->str, cfg);
  758. if (mime_atom->d.re == NULL) {
  759. g_set_error (err, rspamd_mime_expr_quark(), 200,
  760. "cannot parse regexp '%s'",
  761. mime_atom->str);
  762. goto err;
  763. }
  764. else {
  765. gint lua_cbref = -1;
  766. /* Check regexp condition */
  767. if (real_ud->conf_obj != NULL) {
  768. const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
  769. "re_conditions");
  770. if (re_conditions != NULL) {
  771. if (ucl_object_type (re_conditions) != UCL_OBJECT) {
  772. g_set_error (err, rspamd_mime_expr_quark (), 320,
  773. "re_conditions is not a table for '%s'",
  774. mime_atom->str);
  775. goto err;
  776. }
  777. const ucl_object_t *function_obj = ucl_object_lookup (re_conditions,
  778. mime_atom->str);
  779. if (function_obj != NULL) {
  780. if (ucl_object_type (function_obj) != UCL_USERDATA) {
  781. g_set_error (err, rspamd_mime_expr_quark (), 320,
  782. "condition for '%s' is invalid, must be function",
  783. mime_atom->str);
  784. goto err;
  785. }
  786. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  787. lua_cbref = fd->idx;
  788. }
  789. }
  790. }
  791. if (lua_cbref != -1) {
  792. msg_info_config ("added condition for regexp %s", mime_atom->str);
  793. }
  794. /* Register new item in the cache */
  795. if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
  796. mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
  797. mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
  798. if (mime_atom->d.re->extra.header != NULL) {
  799. own_re = mime_atom->d.re->regexp;
  800. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  801. mime_atom->d.re->regexp,
  802. mime_atom->d.re->type,
  803. mime_atom->d.re->extra.header,
  804. strlen (mime_atom->d.re->extra.header) + 1,
  805. lua_cbref);
  806. /* Pass ownership to the cache */
  807. rspamd_regexp_unref (own_re);
  808. }
  809. else {
  810. /* We have header regexp, but no header name is detected */
  811. g_set_error (err,
  812. rspamd_mime_expr_quark (),
  813. 200,
  814. "no header name in header regexp: '%s'",
  815. mime_atom->str);
  816. rspamd_regexp_unref (mime_atom->d.re->regexp);
  817. goto err;
  818. }
  819. }
  820. else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
  821. if (mime_atom->d.re->extra.selector != NULL) {
  822. own_re = mime_atom->d.re->regexp;
  823. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  824. mime_atom->d.re->regexp,
  825. mime_atom->d.re->type,
  826. mime_atom->d.re->extra.selector,
  827. strlen (mime_atom->d.re->extra.selector) + 1,
  828. lua_cbref);
  829. /* Pass ownership to the cache */
  830. rspamd_regexp_unref (own_re);
  831. }
  832. else {
  833. /* We have selector regexp, but no selector name is detected */
  834. g_set_error (err,
  835. rspamd_mime_expr_quark (),
  836. 200,
  837. "no selector name in selector regexp: '%s'",
  838. mime_atom->str);
  839. rspamd_regexp_unref (mime_atom->d.re->regexp);
  840. goto err;
  841. }
  842. }
  843. else {
  844. own_re = mime_atom->d.re->regexp;
  845. mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
  846. mime_atom->d.re->regexp,
  847. mime_atom->d.re->type,
  848. NULL,
  849. 0,
  850. lua_cbref);
  851. /* Pass ownership to the cache */
  852. rspamd_regexp_unref (own_re);
  853. }
  854. }
  855. }
  856. else if (type == MIME_ATOM_LUA_FUNCTION) {
  857. mime_atom->d.lua_function = mime_atom->str;
  858. lua_getglobal (cfg->lua_state, mime_atom->str);
  859. if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) {
  860. g_set_error (err, rspamd_mime_expr_quark(), 200,
  861. "no such lua function '%s'",
  862. mime_atom->str);
  863. lua_pop (cfg->lua_state, 1);
  864. goto err;
  865. }
  866. lua_pop (cfg->lua_state, 1);
  867. }
  868. else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  869. /* p pointer is set to the start of Lua function name */
  870. if (real_ud->conf_obj == NULL) {
  871. g_set_error (err, rspamd_mime_expr_quark(), 300,
  872. "no config object for '%s'",
  873. mime_atom->str);
  874. goto err;
  875. }
  876. const ucl_object_t *functions = ucl_object_lookup (real_ud->conf_obj,
  877. "functions");
  878. if (functions == NULL) {
  879. g_set_error (err, rspamd_mime_expr_quark(), 310,
  880. "no functions defined for '%s'",
  881. mime_atom->str);
  882. goto err;
  883. }
  884. if (ucl_object_type (functions) != UCL_OBJECT) {
  885. g_set_error (err, rspamd_mime_expr_quark(), 320,
  886. "functions is not a table for '%s'",
  887. mime_atom->str);
  888. goto err;
  889. }
  890. const ucl_object_t *function_obj;
  891. function_obj = ucl_object_lookup_len (functions, c,
  892. p - c);
  893. if (function_obj == NULL) {
  894. g_set_error (err, rspamd_mime_expr_quark(), 320,
  895. "function %*.s is not found for '%s'",
  896. (int)(p - c), c, mime_atom->str);
  897. goto err;
  898. }
  899. if (ucl_object_type (function_obj) != UCL_USERDATA) {
  900. g_set_error (err, rspamd_mime_expr_quark(), 320,
  901. "function %*.s has invalid type for '%s'",
  902. (int)(p - c), c, mime_atom->str);
  903. goto err;
  904. }
  905. struct ucl_lua_funcdata *fd = function_obj->value.ud;
  906. mime_atom->d.lua_cbref = fd->idx;
  907. }
  908. else {
  909. mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool,
  910. mime_atom->str);
  911. if (mime_atom->d.func == NULL) {
  912. g_set_error (err, rspamd_mime_expr_quark(), 200,
  913. "cannot parse function '%s'",
  914. mime_atom->str);
  915. goto err;
  916. }
  917. }
  918. a = rspamd_mempool_alloc0 (pool, sizeof (*a));
  919. a->len = p - line;
  920. a->priority = 0;
  921. a->data = mime_atom;
  922. return a;
  923. err:
  924. return NULL;
  925. }
  926. static gint
  927. rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
  928. struct rspamd_task *task)
  929. {
  930. gint ret;
  931. if (re == NULL) {
  932. msg_info_task ("invalid regexp passed");
  933. return 0;
  934. }
  935. if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
  936. ret = rspamd_re_cache_process (task,
  937. re->regexp,
  938. re->type,
  939. re->extra.header,
  940. strlen (re->extra.header),
  941. re->is_strong);
  942. }
  943. else if (re->type == RSPAMD_RE_SELECTOR) {
  944. ret = rspamd_re_cache_process (task,
  945. re->regexp,
  946. re->type,
  947. re->extra.selector,
  948. strlen (re->extra.selector),
  949. re->is_strong);
  950. }
  951. else {
  952. ret = rspamd_re_cache_process (task,
  953. re->regexp,
  954. re->type,
  955. NULL,
  956. 0,
  957. re->is_strong);
  958. }
  959. if (re->is_test) {
  960. msg_info_task ("test %s regexp '%s' returned %d",
  961. rspamd_re_cache_type_to_string (re->type),
  962. re->regexp_text, ret);
  963. }
  964. return ret;
  965. }
  966. static gint
  967. rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
  968. {
  969. struct rspamd_mime_atom *mime_atom = atom->data;
  970. gint ret = 0;
  971. switch (mime_atom->type) {
  972. case MIME_ATOM_INTERNAL_FUNCTION:
  973. /* Prioritize internal functions slightly */
  974. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  975. break;
  976. case MIME_ATOM_LUA_FUNCTION:
  977. case MIME_ATOM_LOCAL_LUA_FUNCTION:
  978. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 4;
  979. break;
  980. case MIME_ATOM_REGEXP:
  981. switch (mime_atom->d.re->type) {
  982. case RSPAMD_RE_HEADER:
  983. case RSPAMD_RE_RAWHEADER:
  984. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 16;
  985. break;
  986. case RSPAMD_RE_URL:
  987. case RSPAMD_RE_EMAIL:
  988. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  989. break;
  990. case RSPAMD_RE_SELECTOR:
  991. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 8;
  992. break;
  993. case RSPAMD_RE_MIME:
  994. case RSPAMD_RE_RAWMIME:
  995. ret = RSPAMD_EXPRESSION_MAX_PRIORITY - RSPAMD_EXPRESSION_MAX_PRIORITY / 2;
  996. break;
  997. case RSPAMD_RE_WORDS:
  998. case RSPAMD_RE_RAWWORDS:
  999. case RSPAMD_RE_STEMWORDS:
  1000. default:
  1001. /* For expensive regexps */
  1002. ret = 0;
  1003. break;
  1004. }
  1005. }
  1006. return ret;
  1007. }
  1008. static void
  1009. rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
  1010. {
  1011. struct rspamd_mime_atom *mime_atom = atom->data;
  1012. if (mime_atom) {
  1013. if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
  1014. /* Need to cleanup arguments */
  1015. g_array_free (mime_atom->d.func->args, TRUE);
  1016. }
  1017. }
  1018. }
  1019. static gboolean
  1020. rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
  1021. struct rspamd_task * task,
  1022. lua_State *L)
  1023. {
  1024. struct _fl *selected, key;
  1025. key.name = func->name;
  1026. selected = bsearch (&key,
  1027. list_ptr,
  1028. functions_number,
  1029. sizeof (struct _fl),
  1030. fl_cmp);
  1031. if (selected == NULL) {
  1032. /* Try to check lua function */
  1033. return FALSE;
  1034. }
  1035. return selected->func (task, func->args, selected->user_data);
  1036. }
  1037. static gdouble
  1038. rspamd_mime_expr_process (void *ud, rspamd_expression_atom_t *atom)
  1039. {
  1040. struct rspamd_task *task = (struct rspamd_task *)ud;
  1041. struct rspamd_mime_atom *mime_atom;
  1042. lua_State *L;
  1043. gdouble ret = 0;
  1044. g_assert (task != NULL);
  1045. g_assert (atom != NULL);
  1046. mime_atom = atom->data;
  1047. if (mime_atom->type == MIME_ATOM_REGEXP) {
  1048. ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
  1049. }
  1050. else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
  1051. L = task->cfg->lua_state;
  1052. lua_getglobal (L, mime_atom->d.lua_function);
  1053. rspamd_lua_task_push (L, task);
  1054. if (lua_pcall (L, 1, 1, 0) != 0) {
  1055. msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s",
  1056. mime_atom->d.lua_function,
  1057. mime_atom->str,
  1058. lua_tostring (L, -1));
  1059. lua_pop (L, 1);
  1060. }
  1061. else {
  1062. if (lua_type (L, -1) == LUA_TBOOLEAN) {
  1063. ret = lua_toboolean (L, -1);
  1064. }
  1065. else if (lua_type (L, -1) == LUA_TNUMBER) {
  1066. ret = lua_tonumber (L, 1);
  1067. }
  1068. else {
  1069. msg_err_task ("%s returned wrong return type: %s",
  1070. mime_atom->str, lua_typename (L, lua_type (L, -1)));
  1071. }
  1072. /* Remove result */
  1073. lua_pop (L, 1);
  1074. }
  1075. }
  1076. else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
  1077. gint err_idx;
  1078. L = task->cfg->lua_state;
  1079. lua_pushcfunction (L, &rspamd_lua_traceback);
  1080. err_idx = lua_gettop (L);
  1081. lua_rawgeti (L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
  1082. rspamd_lua_task_push (L, task);
  1083. if (lua_pcall (L, 1, 1, err_idx) != 0) {
  1084. msg_info_task ("lua call to local function for atom '%s' failed: %s",
  1085. mime_atom->str,
  1086. lua_tostring (L, -1));
  1087. }
  1088. else {
  1089. if (lua_type (L, -1) == LUA_TBOOLEAN) {
  1090. ret = lua_toboolean (L, -1);
  1091. }
  1092. else if (lua_type (L, -1) == LUA_TNUMBER) {
  1093. ret = lua_tonumber (L, 1);
  1094. }
  1095. else {
  1096. msg_err_task ("%s returned wrong return type: %s",
  1097. mime_atom->str, lua_typename (L, lua_type (L, -1)));
  1098. }
  1099. }
  1100. lua_settop (L, 0);
  1101. }
  1102. else {
  1103. ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
  1104. task->cfg->lua_state);
  1105. }
  1106. return ret;
  1107. }
  1108. void
  1109. register_expression_function (const gchar *name,
  1110. rspamd_internal_func_t func,
  1111. void *user_data)
  1112. {
  1113. static struct _fl *new;
  1114. functions_number++;
  1115. new = g_new (struct _fl, functions_number);
  1116. memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
  1117. if (list_allocated) {
  1118. g_free (list_ptr);
  1119. }
  1120. list_allocated = TRUE;
  1121. new[functions_number - 1].name = name;
  1122. new[functions_number - 1].func = func;
  1123. new[functions_number - 1].user_data = user_data;
  1124. qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
  1125. list_ptr = new;
  1126. }
  1127. gboolean
  1128. rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
  1129. {
  1130. struct expression_argument *arg;
  1131. if (args == NULL || task == NULL) {
  1132. return FALSE;
  1133. }
  1134. arg = &g_array_index (args, struct expression_argument, 0);
  1135. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1136. msg_warn_task ("invalid argument to function is passed");
  1137. return FALSE;
  1138. }
  1139. /* XXX: really write this function */
  1140. return TRUE;
  1141. }
  1142. gboolean
  1143. rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
  1144. {
  1145. struct expression_argument *arg;
  1146. struct rspamd_mime_header *rh;
  1147. if (args == NULL || task == NULL) {
  1148. return FALSE;
  1149. }
  1150. arg = &g_array_index (args, struct expression_argument, 0);
  1151. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1152. msg_warn_task ("invalid argument to function is passed");
  1153. return FALSE;
  1154. }
  1155. rh = rspamd_message_get_header_array(task,
  1156. (gchar *) arg->data, FALSE);
  1157. debug_task ("try to get header %s: %d", (gchar *)arg->data,
  1158. (rh != NULL));
  1159. if (rh) {
  1160. return TRUE;
  1161. }
  1162. return FALSE;
  1163. }
  1164. /*
  1165. * This function is designed to find difference between text/html and text/plain parts
  1166. * It takes one argument: difference threshold, if we have two text parts, compare
  1167. * its hashes and check for threshold, if value is greater than threshold, return TRUE
  1168. * and return FALSE otherwise.
  1169. */
  1170. gboolean
  1171. rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
  1172. {
  1173. gint threshold, threshold2 = -1;
  1174. struct expression_argument *arg;
  1175. gdouble *pdiff, diff;
  1176. if (args == NULL || args->len == 0) {
  1177. debug_task ("no threshold is specified, assume it 100");
  1178. threshold = 100;
  1179. }
  1180. else {
  1181. errno = 0;
  1182. arg = &g_array_index (args, struct expression_argument, 0);
  1183. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1184. msg_warn_task ("invalid argument to function is passed");
  1185. return FALSE;
  1186. }
  1187. threshold = strtoul ((gchar *)arg->data, NULL, 10);
  1188. if (errno != 0) {
  1189. msg_info_task ("bad numeric value for threshold \"%s\", assume it 100",
  1190. (gchar *)arg->data);
  1191. threshold = 100;
  1192. }
  1193. if (args->len >= 2) {
  1194. arg = &g_array_index (args, struct expression_argument, 1);
  1195. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1196. msg_warn_task ("invalid argument to function is passed");
  1197. return FALSE;
  1198. }
  1199. errno = 0;
  1200. threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
  1201. if (errno != 0) {
  1202. msg_info_task ("bad numeric value for threshold \"%s\", ignore it",
  1203. (gchar *)arg->data);
  1204. threshold2 = -1;
  1205. }
  1206. }
  1207. }
  1208. if ((pdiff =
  1209. rspamd_mempool_get_variable (task->task_pool,
  1210. "parts_distance")) != NULL) {
  1211. diff = (1.0 - (*pdiff)) * 100.0;
  1212. if (diff != -1) {
  1213. if (threshold2 > 0) {
  1214. if (diff >= MIN (threshold, threshold2) &&
  1215. diff < MAX (threshold, threshold2)) {
  1216. return TRUE;
  1217. }
  1218. }
  1219. else {
  1220. if (diff <= threshold) {
  1221. return TRUE;
  1222. }
  1223. }
  1224. return FALSE;
  1225. }
  1226. else {
  1227. return FALSE;
  1228. }
  1229. }
  1230. return FALSE;
  1231. }
  1232. struct addr_list {
  1233. const gchar *name;
  1234. guint namelen;
  1235. const gchar *addr;
  1236. guint addrlen;
  1237. };
  1238. static gint
  1239. addr_list_cmp_func (const void *a, const void *b)
  1240. {
  1241. const struct addr_list *addra = (struct addr_list *)a,
  1242. *addrb = (struct addr_list *)b;
  1243. if (addra->addrlen != addrb->addrlen) {
  1244. return addra->addrlen - addrb->addrlen;
  1245. }
  1246. return memcmp (addra->addr, addrb->addr, addra->addrlen);
  1247. }
  1248. #define COMPARE_RCPT_LEN 3
  1249. #define MIN_RCPT_TO_COMPARE 7
  1250. gboolean
  1251. rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
  1252. void *unused)
  1253. {
  1254. struct expression_argument *arg;
  1255. struct rspamd_email_address *cur;
  1256. double threshold;
  1257. struct addr_list *ar;
  1258. gint num, i, hits = 0;
  1259. if (args == NULL) {
  1260. msg_warn_task ("no parameters to function");
  1261. return FALSE;
  1262. }
  1263. arg = &g_array_index (args, struct expression_argument, 0);
  1264. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1265. msg_warn_task ("invalid argument to function is passed");
  1266. return FALSE;
  1267. }
  1268. errno = 0;
  1269. threshold = strtod ((gchar *)arg->data, NULL);
  1270. if (errno != 0) {
  1271. msg_warn_task ("invalid numeric value '%s': %s",
  1272. (gchar *)arg->data,
  1273. strerror (errno));
  1274. return FALSE;
  1275. }
  1276. if (!MESSAGE_FIELD (task, rcpt_mime)) {
  1277. return FALSE;
  1278. }
  1279. num = MESSAGE_FIELD (task, rcpt_mime)->len;
  1280. if (num < MIN_RCPT_TO_COMPARE) {
  1281. return FALSE;
  1282. }
  1283. ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
  1284. /* Fill array */
  1285. num = 0;
  1286. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, rcpt_mime), i, cur) {
  1287. if (cur->addr_len > COMPARE_RCPT_LEN) {
  1288. ar[num].name = cur->addr;
  1289. ar[num].namelen = cur->addr_len;
  1290. ar[num].addr = cur->domain;
  1291. ar[num].addrlen = cur->domain_len;
  1292. num ++;
  1293. }
  1294. }
  1295. qsort (ar, num, sizeof (*ar), addr_list_cmp_func);
  1296. /* Cycle all elements in array */
  1297. for (i = 0; i < num; i++) {
  1298. if (i < num - 1) {
  1299. if (ar[i].namelen == ar[i + 1].namelen) {
  1300. if (rspamd_lc_cmp (ar[i].name, ar[i + 1].name, COMPARE_RCPT_LEN) == 0) {
  1301. hits++;
  1302. }
  1303. }
  1304. }
  1305. }
  1306. if ((hits * num / 2.) / (double)num >= threshold) {
  1307. return TRUE;
  1308. }
  1309. return FALSE;
  1310. }
  1311. gboolean
  1312. rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
  1313. void *unused)
  1314. {
  1315. struct rspamd_mime_text_part *p;
  1316. guint i, cnt_html = 0, cnt_txt = 0;
  1317. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
  1318. p = g_ptr_array_index (MESSAGE_FIELD (task, text_parts), 0);
  1319. if (!IS_TEXT_PART_ATTACHMENT (p)) {
  1320. if (IS_TEXT_PART_HTML (p)) {
  1321. cnt_html++;
  1322. }
  1323. else {
  1324. cnt_txt++;
  1325. }
  1326. }
  1327. }
  1328. return (cnt_html > 0 && cnt_txt == 0);
  1329. }
  1330. static gboolean
  1331. is_recipient_list_sorted (GPtrArray *ar)
  1332. {
  1333. struct rspamd_email_address *addr;
  1334. gboolean res = TRUE;
  1335. rspamd_ftok_t cur, prev;
  1336. gint i;
  1337. /* Do not check to short address lists */
  1338. if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
  1339. return FALSE;
  1340. }
  1341. prev.len = 0;
  1342. prev.begin = NULL;
  1343. PTR_ARRAY_FOREACH (ar, i, addr) {
  1344. cur.begin = addr->addr;
  1345. cur.len = addr->addr_len;
  1346. if (prev.len != 0) {
  1347. if (rspamd_ftok_casecmp (&cur, &prev) <= 0) {
  1348. res = FALSE;
  1349. break;
  1350. }
  1351. }
  1352. prev = cur;
  1353. }
  1354. return res;
  1355. }
  1356. gboolean
  1357. rspamd_is_recipients_sorted (struct rspamd_task * task,
  1358. GArray * args,
  1359. void *unused)
  1360. {
  1361. /* Check all types of addresses */
  1362. if (MESSAGE_FIELD (task, rcpt_mime)) {
  1363. return is_recipient_list_sorted (MESSAGE_FIELD (task, rcpt_mime));
  1364. }
  1365. return FALSE;
  1366. }
  1367. gboolean
  1368. rspamd_compare_transfer_encoding (struct rspamd_task * task,
  1369. GArray * args,
  1370. void *unused)
  1371. {
  1372. struct expression_argument *arg;
  1373. guint i;
  1374. struct rspamd_mime_part *part;
  1375. enum rspamd_cte cte;
  1376. if (args == NULL) {
  1377. msg_warn_task ("no parameters to function");
  1378. return FALSE;
  1379. }
  1380. arg = &g_array_index (args, struct expression_argument, 0);
  1381. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1382. msg_warn_task ("invalid argument to function is passed");
  1383. return FALSE;
  1384. }
  1385. cte = rspamd_cte_from_string (arg->data);
  1386. if (cte == RSPAMD_CTE_UNKNOWN) {
  1387. msg_warn_task ("unknown cte: %s", arg->data);
  1388. return FALSE;
  1389. }
  1390. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
  1391. if (IS_PART_TEXT (part)) {
  1392. if (part->cte == cte) {
  1393. return TRUE;
  1394. }
  1395. }
  1396. }
  1397. return FALSE;
  1398. }
  1399. gboolean
  1400. rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
  1401. {
  1402. /* Totally broken but seems to be never used */
  1403. return TRUE;
  1404. }
  1405. gboolean
  1406. rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
  1407. {
  1408. struct rspamd_mime_text_part *p;
  1409. struct expression_argument *arg;
  1410. guint i;
  1411. gboolean res = FALSE;
  1412. if (args == NULL) {
  1413. msg_warn_task ("no parameters to function");
  1414. return FALSE;
  1415. }
  1416. arg = &g_array_index (args, struct expression_argument, 0);
  1417. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1418. msg_warn_task ("invalid argument to function is passed");
  1419. return FALSE;
  1420. }
  1421. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
  1422. if (IS_TEXT_PART_HTML (p) && p->html) {
  1423. res = rspamd_html_tag_seen (p->html, arg->data);
  1424. }
  1425. if (res) {
  1426. break;
  1427. }
  1428. }
  1429. return res;
  1430. }
  1431. gboolean
  1432. rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
  1433. {
  1434. struct rspamd_mime_text_part *p;
  1435. guint i;
  1436. gboolean res = FALSE;
  1437. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, p) {
  1438. if (IS_TEXT_PART_HTML (p) && (rspamd_html_get_tags_count(p->html) < 2)) {
  1439. res = TRUE;
  1440. }
  1441. if (res) {
  1442. break;
  1443. }
  1444. }
  1445. return res;
  1446. }
  1447. static gboolean
  1448. rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
  1449. {
  1450. struct expression_argument *arg;
  1451. if (args == NULL || task == NULL) {
  1452. return FALSE;
  1453. }
  1454. arg = &g_array_index (args, struct expression_argument, 0);
  1455. if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1456. msg_warn_task ("invalid argument to function is passed");
  1457. return FALSE;
  1458. }
  1459. return rspamd_message_get_header_array(task, arg->data, FALSE) != NULL;
  1460. }
  1461. static gboolean
  1462. match_smtp_data (struct rspamd_task *task,
  1463. struct expression_argument *arg,
  1464. const gchar *what, gsize len)
  1465. {
  1466. rspamd_regexp_t *re;
  1467. gint r = 0;
  1468. if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
  1469. /* This is a regexp */
  1470. re = arg->data;
  1471. if (re == NULL) {
  1472. msg_warn_task ("cannot compile regexp for function");
  1473. return FALSE;
  1474. }
  1475. if (len > 0) {
  1476. r = rspamd_regexp_search (re, what, len, NULL, NULL, FALSE, NULL);
  1477. }
  1478. return r;
  1479. }
  1480. else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
  1481. g_ascii_strncasecmp (arg->data, what, len) == 0) {
  1482. return TRUE;
  1483. }
  1484. return FALSE;
  1485. }
  1486. static gboolean
  1487. rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
  1488. {
  1489. struct expression_argument *arg;
  1490. struct rspamd_email_address *addr = NULL;
  1491. GPtrArray *rcpts = NULL;
  1492. const gchar *type, *str = NULL;
  1493. guint i;
  1494. if (args == NULL) {
  1495. msg_warn_task ("no parameters to function");
  1496. return FALSE;
  1497. }
  1498. arg = &g_array_index (args, struct expression_argument, 0);
  1499. if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  1500. msg_warn_task ("no parameters to function");
  1501. return FALSE;
  1502. }
  1503. else {
  1504. type = arg->data;
  1505. switch (*type) {
  1506. case 'f':
  1507. case 'F':
  1508. if (g_ascii_strcasecmp (type, "from") == 0) {
  1509. addr = rspamd_task_get_sender (task);
  1510. }
  1511. else {
  1512. msg_warn_task ("bad argument to function: %s", type);
  1513. return FALSE;
  1514. }
  1515. break;
  1516. case 'h':
  1517. case 'H':
  1518. if (g_ascii_strcasecmp (type, "helo") == 0) {
  1519. str = task->helo;
  1520. }
  1521. else {
  1522. msg_warn_task ("bad argument to function: %s", type);
  1523. return FALSE;
  1524. }
  1525. break;
  1526. case 'u':
  1527. case 'U':
  1528. if (g_ascii_strcasecmp (type, "user") == 0) {
  1529. str = task->user;
  1530. }
  1531. else {
  1532. msg_warn_task ("bad argument to function: %s", type);
  1533. return FALSE;
  1534. }
  1535. break;
  1536. case 's':
  1537. case 'S':
  1538. if (g_ascii_strcasecmp (type, "subject") == 0) {
  1539. str = MESSAGE_FIELD (task, subject);
  1540. }
  1541. else {
  1542. msg_warn_task ("bad argument to function: %s", type);
  1543. return FALSE;
  1544. }
  1545. break;
  1546. case 'r':
  1547. case 'R':
  1548. if (g_ascii_strcasecmp (type, "rcpt") == 0) {
  1549. rcpts = task->rcpt_envelope;
  1550. }
  1551. else {
  1552. msg_warn_task ("bad argument to function: %s", type);
  1553. return FALSE;
  1554. }
  1555. break;
  1556. default:
  1557. msg_warn_task ("bad argument to function: %s", type);
  1558. return FALSE;
  1559. }
  1560. }
  1561. if (str == NULL && addr == NULL && rcpts == NULL) {
  1562. /* Not enough data so regexp would NOT be found anyway */
  1563. return FALSE;
  1564. }
  1565. /* We would process only one more argument, others are ignored */
  1566. if (args->len >= 2) {
  1567. arg = &g_array_index (args, struct expression_argument, 1);
  1568. if (arg) {
  1569. if (str != NULL) {
  1570. return match_smtp_data (task, arg, str, strlen (str));
  1571. }
  1572. else if (addr != NULL && addr->addr) {
  1573. return match_smtp_data (task, arg, addr->addr, addr->addr_len);
  1574. }
  1575. else {
  1576. if (rcpts != NULL) {
  1577. for (i = 0; i < rcpts->len; i ++) {
  1578. addr = g_ptr_array_index (rcpts, i);
  1579. if (addr && addr->addr &&
  1580. match_smtp_data (task, arg,
  1581. addr->addr, addr->addr_len)) {
  1582. return TRUE;
  1583. }
  1584. }
  1585. }
  1586. }
  1587. }
  1588. }
  1589. return FALSE;
  1590. }
  1591. static inline gboolean
  1592. rspamd_check_ct_attr (const gchar *begin, gsize len,
  1593. struct expression_argument *arg_pattern)
  1594. {
  1595. rspamd_regexp_t *re;
  1596. gboolean r = FALSE;
  1597. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1598. re = arg_pattern->data;
  1599. if (len > 0) {
  1600. r = rspamd_regexp_search (re,
  1601. begin, len,
  1602. NULL, NULL, FALSE, NULL);
  1603. }
  1604. if (r) {
  1605. return TRUE;
  1606. }
  1607. }
  1608. else {
  1609. /* Just do strcasecmp */
  1610. gsize plen = strlen (arg_pattern->data);
  1611. if (plen == len &&
  1612. g_ascii_strncasecmp (arg_pattern->data, begin, len) == 0) {
  1613. return TRUE;
  1614. }
  1615. }
  1616. return FALSE;
  1617. }
  1618. static gboolean
  1619. rspamd_content_type_compare_param (struct rspamd_task * task,
  1620. GArray * args,
  1621. void *unused)
  1622. {
  1623. struct expression_argument *arg, *arg1, *arg_pattern;
  1624. gboolean recursive = FALSE;
  1625. struct rspamd_mime_part *cur_part;
  1626. guint i;
  1627. rspamd_ftok_t srch;
  1628. struct rspamd_content_type_param *found = NULL, *cur;
  1629. const gchar *param_name;
  1630. if (args == NULL || args->len < 2) {
  1631. msg_warn_task ("no parameters to function");
  1632. return FALSE;
  1633. }
  1634. arg = &g_array_index (args, struct expression_argument, 0);
  1635. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1636. param_name = arg->data;
  1637. arg_pattern = &g_array_index (args, struct expression_argument, 1);
  1638. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
  1639. if (args->len >= 3) {
  1640. arg1 = &g_array_index (args, struct expression_argument, 2);
  1641. if (g_ascii_strncasecmp (arg1->data, "true",
  1642. sizeof ("true") - 1) == 0) {
  1643. recursive = TRUE;
  1644. }
  1645. }
  1646. else {
  1647. /*
  1648. * If user did not specify argument, let's assume that he wants
  1649. * recursive search if mime part is multipart/mixed
  1650. */
  1651. if (IS_PART_MULTIPART (cur_part)) {
  1652. recursive = TRUE;
  1653. }
  1654. }
  1655. rspamd_ftok_t lit;
  1656. RSPAMD_FTOK_FROM_STR (&srch, param_name);
  1657. RSPAMD_FTOK_FROM_STR (&lit, "charset");
  1658. if (rspamd_ftok_equal (&srch, &lit)) {
  1659. if (rspamd_check_ct_attr (cur_part->ct->charset.begin,
  1660. cur_part->ct->charset.len, arg_pattern)) {
  1661. return TRUE;
  1662. }
  1663. }
  1664. RSPAMD_FTOK_FROM_STR (&lit, "boundary");
  1665. if (rspamd_ftok_equal (&srch, &lit)) {
  1666. if (rspamd_check_ct_attr (cur_part->ct->orig_boundary.begin,
  1667. cur_part->ct->orig_boundary.len, arg_pattern)) {
  1668. return TRUE;
  1669. }
  1670. }
  1671. if (cur_part->ct->attrs) {
  1672. found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
  1673. if (found) {
  1674. DL_FOREACH (found, cur) {
  1675. if (rspamd_check_ct_attr (cur->value.begin,
  1676. cur->value.len, arg_pattern)) {
  1677. return TRUE;
  1678. }
  1679. }
  1680. }
  1681. }
  1682. if (!recursive) {
  1683. break;
  1684. }
  1685. }
  1686. return FALSE;
  1687. }
  1688. static gboolean
  1689. rspamd_content_type_has_param (struct rspamd_task * task,
  1690. GArray * args,
  1691. void *unused)
  1692. {
  1693. struct expression_argument *arg, *arg1;
  1694. gboolean recursive = FALSE;
  1695. struct rspamd_mime_part *cur_part;
  1696. guint i;
  1697. rspamd_ftok_t srch;
  1698. struct rspamd_content_type_param *found = NULL;
  1699. const gchar *param_name;
  1700. if (args == NULL || args->len < 1) {
  1701. msg_warn_task ("no parameters to function");
  1702. return FALSE;
  1703. }
  1704. arg = &g_array_index (args, struct expression_argument, 0);
  1705. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1706. param_name = arg->data;
  1707. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
  1708. if (args->len >= 2) {
  1709. arg1 = &g_array_index (args, struct expression_argument, 1);
  1710. if (g_ascii_strncasecmp (arg1->data, "true",
  1711. sizeof ("true") - 1) == 0) {
  1712. recursive = TRUE;
  1713. }
  1714. }
  1715. else {
  1716. /*
  1717. * If user did not specify argument, let's assume that he wants
  1718. * recursive search if mime part is multipart/mixed
  1719. */
  1720. if (IS_PART_MULTIPART (cur_part)) {
  1721. recursive = TRUE;
  1722. }
  1723. }
  1724. rspamd_ftok_t lit;
  1725. RSPAMD_FTOK_FROM_STR (&srch, param_name);
  1726. RSPAMD_FTOK_FROM_STR (&lit, "charset");
  1727. if (rspamd_ftok_equal (&srch, &lit)) {
  1728. if (cur_part->ct->charset.len > 0) {
  1729. return TRUE;
  1730. }
  1731. }
  1732. RSPAMD_FTOK_FROM_STR (&lit, "boundary");
  1733. if (rspamd_ftok_equal (&srch, &lit)) {
  1734. if (cur_part->ct->boundary.len > 0) {
  1735. return TRUE;
  1736. }
  1737. }
  1738. if (cur_part->ct->attrs) {
  1739. found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
  1740. if (found) {
  1741. return TRUE;
  1742. }
  1743. }
  1744. if (!recursive) {
  1745. break;
  1746. }
  1747. }
  1748. return FALSE;
  1749. }
  1750. static gboolean
  1751. rspamd_content_type_check (struct rspamd_task *task,
  1752. GArray * args,
  1753. gboolean check_subtype)
  1754. {
  1755. rspamd_ftok_t *param_data, srch;
  1756. rspamd_regexp_t *re;
  1757. struct expression_argument *arg1, *arg_pattern;
  1758. struct rspamd_content_type *ct;
  1759. gint r = 0;
  1760. guint i;
  1761. gboolean recursive = FALSE;
  1762. struct rspamd_mime_part *cur_part;
  1763. if (args == NULL || args->len < 1) {
  1764. msg_warn_task ("no parameters to function");
  1765. return FALSE;
  1766. }
  1767. arg_pattern = &g_array_index (args, struct expression_argument, 0);
  1768. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, cur_part) {
  1769. ct = cur_part->ct;
  1770. if (args->len >= 2) {
  1771. arg1 = &g_array_index (args, struct expression_argument, 1);
  1772. if (g_ascii_strncasecmp (arg1->data, "true",
  1773. sizeof ("true") - 1) == 0) {
  1774. recursive = TRUE;
  1775. }
  1776. }
  1777. else {
  1778. /*
  1779. * If user did not specify argument, let's assume that he wants
  1780. * recursive search if mime part is multipart/mixed
  1781. */
  1782. if (IS_PART_MULTIPART (cur_part)) {
  1783. recursive = TRUE;
  1784. }
  1785. }
  1786. if (check_subtype) {
  1787. param_data = &ct->subtype;
  1788. }
  1789. else {
  1790. param_data = &ct->type;
  1791. }
  1792. if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
  1793. re = arg_pattern->data;
  1794. if (param_data->len > 0) {
  1795. r = rspamd_regexp_search (re, param_data->begin, param_data->len,
  1796. NULL, NULL, FALSE, NULL);
  1797. }
  1798. if (r) {
  1799. return TRUE;
  1800. }
  1801. }
  1802. else {
  1803. /* Just do strcasecmp */
  1804. srch.begin = arg_pattern->data;
  1805. srch.len = strlen (arg_pattern->data);
  1806. if (rspamd_ftok_casecmp (param_data, &srch) == 0) {
  1807. return TRUE;
  1808. }
  1809. }
  1810. /* Get next part */
  1811. if (!recursive) {
  1812. break;
  1813. }
  1814. }
  1815. return FALSE;
  1816. }
  1817. static gboolean
  1818. rspamd_content_type_is_type (struct rspamd_task * task,
  1819. GArray * args,
  1820. void *unused)
  1821. {
  1822. return rspamd_content_type_check (task, args, FALSE);
  1823. }
  1824. static gboolean
  1825. rspamd_content_type_is_subtype (struct rspamd_task * task,
  1826. GArray * args,
  1827. void *unused)
  1828. {
  1829. return rspamd_content_type_check (task, args, TRUE);
  1830. }
  1831. static gboolean
  1832. compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct,
  1833. struct expression_argument *subtype)
  1834. {
  1835. rspamd_regexp_t *re;
  1836. rspamd_ftok_t srch;
  1837. gint r = 0;
  1838. if (subtype == NULL || ct == NULL) {
  1839. msg_warn_task ("invalid parameters passed");
  1840. return FALSE;
  1841. }
  1842. if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
  1843. re = subtype->data;
  1844. if (ct->subtype.len > 0) {
  1845. r = rspamd_regexp_search (re, ct->subtype.begin, ct->subtype.len,
  1846. NULL, NULL, FALSE, NULL);
  1847. }
  1848. }
  1849. else {
  1850. srch.begin = subtype->data;
  1851. srch.len = strlen (subtype->data);
  1852. /* Just do strcasecmp */
  1853. if (rspamd_ftok_casecmp (&ct->subtype, &srch) == 0) {
  1854. return TRUE;
  1855. }
  1856. }
  1857. return r;
  1858. }
  1859. static gboolean
  1860. compare_len (struct rspamd_mime_part *part, guint min, guint max)
  1861. {
  1862. if (min == 0 && max == 0) {
  1863. return TRUE;
  1864. }
  1865. if (min == 0) {
  1866. return part->parsed_data.len <= max;
  1867. }
  1868. else if (max == 0) {
  1869. return part->parsed_data.len >= min;
  1870. }
  1871. else {
  1872. return part->parsed_data.len >= min && part->parsed_data.len <= max;
  1873. }
  1874. }
  1875. static gboolean
  1876. common_has_content_part (struct rspamd_task * task,
  1877. struct expression_argument *param_type,
  1878. struct expression_argument *param_subtype,
  1879. gint min_len,
  1880. gint max_len)
  1881. {
  1882. rspamd_regexp_t *re;
  1883. struct rspamd_mime_part *part;
  1884. struct rspamd_content_type *ct;
  1885. rspamd_ftok_t srch;
  1886. gint r = 0;
  1887. guint i;
  1888. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
  1889. ct = part->ct;
  1890. if (ct == NULL) {
  1891. continue;
  1892. }
  1893. if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
  1894. re = param_type->data;
  1895. if (ct->type.len > 0) {
  1896. r = rspamd_regexp_search (re, ct->type.begin, ct->type.len,
  1897. NULL, NULL, FALSE, NULL);
  1898. }
  1899. /* Also check subtype and length of the part */
  1900. if (r && param_subtype) {
  1901. r = compare_len (part, min_len, max_len) &&
  1902. compare_subtype (task, ct, param_subtype);
  1903. return r;
  1904. }
  1905. }
  1906. else {
  1907. /* Just do strcasecmp */
  1908. srch.begin = param_type->data;
  1909. srch.len = strlen (param_type->data);
  1910. if (rspamd_ftok_casecmp (&ct->type, &srch) == 0) {
  1911. if (param_subtype) {
  1912. if (compare_subtype (task, ct, param_subtype)) {
  1913. if (compare_len (part, min_len, max_len)) {
  1914. return TRUE;
  1915. }
  1916. }
  1917. }
  1918. else {
  1919. if (compare_len (part, min_len, max_len)) {
  1920. return TRUE;
  1921. }
  1922. }
  1923. }
  1924. }
  1925. }
  1926. return FALSE;
  1927. }
  1928. static gboolean
  1929. rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
  1930. {
  1931. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1932. if (args == NULL) {
  1933. msg_warn_task ("no parameters to function");
  1934. return FALSE;
  1935. }
  1936. param_type = &g_array_index (args, struct expression_argument, 0);
  1937. if (args->len >= 2) {
  1938. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1939. }
  1940. return common_has_content_part (task, param_type, param_subtype, 0, 0);
  1941. }
  1942. static gboolean
  1943. rspamd_has_content_part_len (struct rspamd_task * task,
  1944. GArray * args,
  1945. void *unused)
  1946. {
  1947. struct expression_argument *param_type = NULL, *param_subtype = NULL;
  1948. gint min = 0, max = 0;
  1949. struct expression_argument *arg;
  1950. if (args == NULL) {
  1951. msg_warn_task ("no parameters to function");
  1952. return FALSE;
  1953. }
  1954. param_type = &g_array_index (args, struct expression_argument, 0);
  1955. if (args->len >= 2) {
  1956. param_subtype = &g_array_index (args, struct expression_argument, 1);
  1957. if (args->len >= 3) {
  1958. arg = &g_array_index (args, struct expression_argument, 2);
  1959. errno = 0;
  1960. min = strtoul (arg->data, NULL, 10);
  1961. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1962. if (errno != 0) {
  1963. msg_warn_task ("invalid numeric value '%s': %s",
  1964. (gchar *)arg->data,
  1965. strerror (errno));
  1966. return FALSE;
  1967. }
  1968. if (args->len >= 4) {
  1969. arg = &g_array_index (args, struct expression_argument, 3);
  1970. g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
  1971. max = strtoul (arg->data, NULL, 10);
  1972. if (errno != 0) {
  1973. msg_warn_task ("invalid numeric value '%s': %s",
  1974. (gchar *)arg->data,
  1975. strerror (errno));
  1976. return FALSE;
  1977. }
  1978. }
  1979. }
  1980. }
  1981. return common_has_content_part (task, param_type, param_subtype, min, max);
  1982. }
  1983. static gboolean
  1984. rspamd_is_empty_body (struct rspamd_task *task,
  1985. GArray * args,
  1986. void *unused)
  1987. {
  1988. struct rspamd_mime_part *part;
  1989. guint i;
  1990. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
  1991. if (part->parsed_data.len > 0) {
  1992. return FALSE;
  1993. }
  1994. }
  1995. return TRUE;
  1996. }
  1997. #define TASK_FLAG_READ(flag) do { \
  1998. result = !!(task->flags & (flag)); \
  1999. } while(0)
  2000. #define TASK_GET_FLAG(flag, strname, macro) do { \
  2001. if (!found && strcmp ((flag), strname) == 0) { \
  2002. TASK_FLAG_READ((macro)); \
  2003. found = TRUE; \
  2004. } \
  2005. } while(0)
  2006. #define TASK_PROTOCOL_FLAG_READ(flag) do { \
  2007. result = !!(task->protocol_flags & (flag)); \
  2008. } while(0)
  2009. #define TASK_GET_PROTOCOL_FLAG(flag, strname, macro) do { \
  2010. if (!found && strcmp ((flag), strname) == 0) { \
  2011. TASK_PROTOCOL_FLAG_READ((macro)); \
  2012. found = TRUE; \
  2013. } \
  2014. } while(0)
  2015. static gboolean
  2016. rspamd_has_flag_expr (struct rspamd_task *task,
  2017. GArray * args,
  2018. void *unused)
  2019. {
  2020. gboolean found = FALSE, result = FALSE;
  2021. struct expression_argument *flag_arg;
  2022. const gchar *flag_str;
  2023. if (args == NULL) {
  2024. msg_warn_task ("no parameters to function");
  2025. return FALSE;
  2026. }
  2027. flag_arg = &g_array_index (args, struct expression_argument, 0);
  2028. if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2029. msg_warn_task ("invalid parameter to function");
  2030. return FALSE;
  2031. }
  2032. flag_str = (const gchar *)flag_arg->data;
  2033. TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
  2034. TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
  2035. TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
  2036. TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
  2037. TASK_GET_PROTOCOL_FLAG (flag_str, "extended_urls",
  2038. RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS);
  2039. TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
  2040. TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
  2041. TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
  2042. TASK_GET_FLAG (flag_str, "broken_headers",
  2043. RSPAMD_TASK_FLAG_BROKEN_HEADERS);
  2044. TASK_GET_FLAG (flag_str, "skip_process",
  2045. RSPAMD_TASK_FLAG_SKIP_PROCESS);
  2046. TASK_GET_PROTOCOL_FLAG (flag_str, "milter",
  2047. RSPAMD_TASK_PROTOCOL_FLAG_MILTER);
  2048. TASK_GET_FLAG (flag_str, "bad_unicode",
  2049. RSPAMD_TASK_FLAG_BAD_UNICODE);
  2050. if (!found) {
  2051. msg_warn_task ("invalid flag name %s", flag_str);
  2052. return FALSE;
  2053. }
  2054. return result;
  2055. }
  2056. static gboolean
  2057. rspamd_has_symbol_expr (struct rspamd_task *task,
  2058. GArray * args,
  2059. void *unused)
  2060. {
  2061. struct expression_argument *sym_arg;
  2062. const gchar *symbol_str;
  2063. if (args == NULL) {
  2064. msg_warn_task ("no parameters to function");
  2065. return FALSE;
  2066. }
  2067. sym_arg = &g_array_index (args, struct expression_argument, 0);
  2068. if (sym_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
  2069. msg_warn_task ("invalid parameter to function");
  2070. return FALSE;
  2071. }
  2072. symbol_str = (const gchar *)sym_arg->data;
  2073. if (rspamd_task_find_symbol_result (task, symbol_str, NULL)) {
  2074. return TRUE;
  2075. }
  2076. return FALSE;
  2077. }