123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286 |
- /*-
- * Copyright 2016 Vsevolod Stakhov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include <contrib/libucl/ucl.h>
- #include "config.h"
- #include "util.h"
- #include "cfg_file.h"
- #include "rspamd.h"
- #include "message.h"
- #include "mime_expressions.h"
- #include "html.h"
- #include "lua/lua_common.h"
- #include "utlist.h"
-
- gboolean rspamd_compare_encoding (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_header_exists (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_parts_distance (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_recipients_distance (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_has_only_html_part (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_is_recipients_sorted (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_is_html_balanced (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_has_html_tag (struct rspamd_task *task,
- GArray * args,
- void *unused);
- gboolean rspamd_has_fake_html (struct rspamd_task *task,
- GArray * args,
- void *unused);
- static gboolean rspamd_raw_header_exists (struct rspamd_task *task,
- GArray * args,
- void *unused);
- static gboolean rspamd_check_smtp_data (struct rspamd_task *task,
- GArray * args,
- void *unused);
- static gboolean rspamd_content_type_is_type (struct rspamd_task * task,
- GArray * args,
- void *unused);
- static gboolean rspamd_content_type_is_subtype (struct rspamd_task *task,
- GArray * args,
- void *unused);
- static gboolean rspamd_content_type_has_param (struct rspamd_task * task,
- GArray * args,
- void *unused);
- static gboolean rspamd_content_type_compare_param (struct rspamd_task * task,
- GArray * args,
- void *unused);
- static gboolean rspamd_has_content_part (struct rspamd_task *task,
- GArray * args,
- void *unused);
- static gboolean rspamd_has_content_part_len (struct rspamd_task *task,
- GArray * args,
- void *unused);
- static gboolean rspamd_is_empty_body (struct rspamd_task *task,
- GArray * args,
- void *unused);
- static gboolean rspamd_has_flag_expr (struct rspamd_task *task,
- GArray * args,
- void *unused);
-
- static rspamd_expression_atom_t * rspamd_mime_expr_parse (const gchar *line, gsize len,
- rspamd_mempool_t *pool, gpointer ud, GError **err);
- static gdouble rspamd_mime_expr_process (struct rspamd_expr_process_data *process_data, rspamd_expression_atom_t *atom);
- static gint rspamd_mime_expr_priority (rspamd_expression_atom_t *atom);
- static void rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom);
-
- /**
- * Regexp structure
- */
- struct rspamd_regexp_atom {
- enum rspamd_re_type type; /**< regexp type */
- gchar *regexp_text; /**< regexp text representation */
- rspamd_regexp_t *regexp; /**< regexp structure */
- union {
- const gchar *header; /**< header name for header regexps */
- const gchar *selector; /**< selector name for lua selector regexp */
- } extra;
- gboolean is_test; /**< true if this expression must be tested */
- gboolean is_strong; /**< true if headers search must be case sensitive */
- gboolean is_multiple; /**< true if we need to match all inclusions of atom */
- };
-
- /**
- * Rspamd expression function
- */
- struct rspamd_function_atom {
- gchar *name; /**< name of function */
- GArray *args; /**< its args */
- };
-
- enum rspamd_mime_atom_type {
- MIME_ATOM_REGEXP = 0,
- MIME_ATOM_INTERNAL_FUNCTION,
- MIME_ATOM_LUA_FUNCTION,
- MIME_ATOM_LOCAL_LUA_FUNCTION, /* New style */
- };
-
- struct rspamd_mime_atom {
- gchar *str;
- union {
- struct rspamd_regexp_atom *re;
- struct rspamd_function_atom *func;
- const gchar *lua_function;
- gint lua_cbref;
- } d;
- enum rspamd_mime_atom_type type;
- };
-
- /*
- * List of internal functions of rspamd
- * Sorted by name to use bsearch
- */
- static struct _fl {
- const gchar *name;
- rspamd_internal_func_t func;
- void *user_data;
- } rspamd_functions_list[] = {
- {"check_smtp_data", rspamd_check_smtp_data, NULL},
- {"compare_encoding", rspamd_compare_encoding, NULL},
- {"compare_parts_distance", rspamd_parts_distance, NULL},
- {"compare_recipients_distance", rspamd_recipients_distance, NULL},
- {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL},
- {"content_type_compare_param", rspamd_content_type_compare_param, NULL},
- {"content_type_has_param", rspamd_content_type_has_param, NULL},
- {"content_type_is_subtype", rspamd_content_type_is_subtype, NULL},
- {"content_type_is_type", rspamd_content_type_is_type, NULL},
- {"has_content_part", rspamd_has_content_part, NULL},
- {"has_content_part_len", rspamd_has_content_part_len, NULL},
- {"has_fake_html", rspamd_has_fake_html, NULL},
- {"has_flag", rspamd_has_flag_expr, NULL},
- {"has_html_tag", rspamd_has_html_tag, NULL},
- {"has_only_html_part", rspamd_has_only_html_part, NULL},
- {"header_exists", rspamd_header_exists, NULL},
- {"is_empty_body", rspamd_is_empty_body, NULL},
- {"is_html_balanced", rspamd_is_html_balanced, NULL},
- {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL},
- {"raw_header_exists", rspamd_raw_header_exists, NULL},
- };
-
- const struct rspamd_atom_subr mime_expr_subr = {
- .parse = rspamd_mime_expr_parse,
- .process = rspamd_mime_expr_process,
- .priority = rspamd_mime_expr_priority,
- .destroy = rspamd_mime_expr_destroy
- };
-
- static struct _fl *list_ptr = &rspamd_functions_list[0];
- static guint32 functions_number = sizeof (rspamd_functions_list) /
- sizeof (struct _fl);
- static gboolean list_allocated = FALSE;
-
- /* Bsearch routine */
- static gint
- fl_cmp (const void *s1, const void *s2)
- {
- struct _fl *fl1 = (struct _fl *)s1;
- struct _fl *fl2 = (struct _fl *)s2;
- return strcmp (fl1->name, fl2->name);
- }
-
- static GQuark
- rspamd_mime_expr_quark (void)
- {
- return g_quark_from_static_string ("mime-expressions");
- }
-
- #define TYPE_CHECK(str, type, len) (sizeof(type) - 1 == (len) && rspamd_lc_cmp((str), (type), (len)) == 0)
- static gboolean
- rspamd_parse_long_option (const gchar *start, gsize len,
- struct rspamd_regexp_atom *a)
- {
- gboolean ret = FALSE;
-
- if (TYPE_CHECK (start, "body", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_BODY;
- }
- else if (TYPE_CHECK (start, "part", len) ||
- TYPE_CHECK (start, "mime", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_MIME;
- }
- else if (TYPE_CHECK (start, "raw_part", len) ||
- TYPE_CHECK (start, "raw_mime", len) ||
- TYPE_CHECK (start, "mime_raw", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_RAWMIME;
- }
- else if (TYPE_CHECK (start, "header", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_HEADER;
- }
- else if (TYPE_CHECK (start, "mime_header", len) ||
- TYPE_CHECK (start, "header_mime", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_MIMEHEADER;
- }
- else if (TYPE_CHECK (start, "raw_header", len) ||
- TYPE_CHECK (start, "header_raw", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_RAWHEADER;
- }
- else if (TYPE_CHECK (start, "all_header", len) ||
- TYPE_CHECK (start, "header_all", len) ||
- TYPE_CHECK (start, "all_headers", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_ALLHEADER;
- }
- else if (TYPE_CHECK (start, "url", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_URL;
- }
- else if (TYPE_CHECK (start, "sa_body", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_SABODY;
- }
- else if (TYPE_CHECK (start, "sa_raw_body", len) ||
- TYPE_CHECK (start, "sa_body_raw", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_SARAWBODY;
- }
- else if (TYPE_CHECK (start, "words", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_WORDS;
- }
- else if (TYPE_CHECK (start, "raw_words", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_RAWWORDS;
- }
- else if (TYPE_CHECK (start, "stem_words", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_STEMWORDS;
- }
- else if (TYPE_CHECK (start, "selector", len)) {
- ret = TRUE;
- a->type = RSPAMD_RE_SELECTOR;
- }
-
- return ret;
- }
-
- /*
- * Rspamd regexp utility functions
- */
- static struct rspamd_regexp_atom *
- rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
- struct rspamd_config *cfg)
- {
- const gchar *begin, *end, *p, *src, *start, *brace;
- gchar *dbegin, *dend, *extra = NULL;
- struct rspamd_regexp_atom *result;
- GError *err = NULL;
- GString *re_flags;
-
- if (line == NULL) {
- msg_err_pool ("cannot parse NULL line");
- return NULL;
- }
-
- src = line;
- result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp_atom));
- /* Skip whitespaces */
- while (g_ascii_isspace (*line)) {
- line++;
- }
- if (*line == '\0') {
- msg_warn_pool ("got empty regexp");
- return NULL;
- }
-
- result->type = RSPAMD_RE_MAX;
-
- start = line;
- /* First try to find header name */
- begin = strchr (line, '/');
- if (begin != NULL) {
- p = begin;
- end = NULL;
- while (p != line) {
- if (*p == '=') {
- end = p;
- break;
- }
- p--;
- }
-
- if (end) {
- extra = rspamd_mempool_alloc (pool, end - line + 1);
- rspamd_strlcpy (extra, line, end - line + 1);
- line = end;
- }
- }
- else {
- extra = rspamd_mempool_strdup (pool, line);
- result->type = RSPAMD_RE_MAX;
- line = start;
- }
- /* Find begin of regexp */
- while (*line && *line != '/') {
- line++;
- }
- if (*line != '\0') {
- begin = line + 1;
- }
- else if (extra == NULL) {
- /* Assume that line without // is just a header name */
- extra = rspamd_mempool_strdup (pool, line);
- result->type = RSPAMD_RE_HEADER;
- return result;
- }
- else {
- /* We got header name earlier but have not found // expression, so it is invalid regexp */
- msg_warn_pool (
- "got no header name (eg. header=) but without corresponding regexp, %s",
- src);
- return NULL;
- }
- /* Find end */
- end = begin;
- while (*end && (*end != '/' || *(end - 1) == '\\')) {
- end++;
- }
- if (end == begin || *end != '/') {
- msg_warn_pool ("no trailing / in regexp %s", src);
- return NULL;
- }
- /* Parse flags */
- p = end + 1;
- re_flags = g_string_sized_new (32);
-
- while (p != NULL) {
- switch (*p) {
- case 'i':
- case 'm':
- case 's':
- case 'x':
- case 'u':
- case 'O':
- case 'r':
- g_string_append_c (re_flags, *p);
- p++;
- break;
- case 'o':
- p++;
- break;
- /* Type flags */
- case 'H':
- result->type = RSPAMD_RE_HEADER;
- p++;
- break;
- case 'R':
- result->type = RSPAMD_RE_RAWHEADER;
- p++;
- break;
- case 'B':
- result->type = RSPAMD_RE_MIMEHEADER;
- p++;
- break;
- case 'C':
- result->type = RSPAMD_RE_SABODY;
- p++;
- break;
- case 'D':
- result->type = RSPAMD_RE_SARAWBODY;
- p++;
- break;
- case 'M':
- result->type = RSPAMD_RE_BODY;
- p++;
- break;
- case 'P':
- result->type = RSPAMD_RE_MIME;
- p++;
- break;
- case 'Q':
- result->type = RSPAMD_RE_RAWMIME;
- p++;
- break;
- case 'U':
- result->type = RSPAMD_RE_URL;
- p++;
- break;
- case 'X':
- result->type = RSPAMD_RE_RAWHEADER;
- p++;
- break;
- case '$':
- result->type = RSPAMD_RE_SELECTOR;
- p++;
- break;
- case '{':
- /* Long definition */
- if ((brace = strchr (p + 1, '}')) != NULL) {
- if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) {
- msg_warn_pool ("invalid long regexp type: %*s in '%s'",
- (int)(brace - (p + 1)), p + 1, src);
- p = NULL;
- }
- else {
- p = brace + 1;
- }
- }
- else {
- p = NULL;
- }
- break;
- /* Other flags */
- case 'T':
- result->is_test = TRUE;
- p++;
- break;
- case 'S':
- result->is_strong = TRUE;
- p++;
- break;
- case 'A':
- result->is_multiple = TRUE;
- p++;
- break;
- /* Stop flags parsing */
- default:
- p = NULL;
- break;
- }
- }
-
- if (result->type >= RSPAMD_RE_MAX) {
- if (extra) {
- /* Assume header regexp */
- result->extra.header = extra;
- result->type = RSPAMD_RE_HEADER;
- }
- else {
- msg_err_pool ("could not read regexp: %s, unknown type", src);
- return NULL;
- }
- }
-
- if ((result->type == RSPAMD_RE_HEADER ||
- result->type == RSPAMD_RE_RAWHEADER ||
- result->type == RSPAMD_RE_MIMEHEADER)) {
- if (extra == NULL) {
- msg_err_pool ("header regexp: '%s' has no header part", src);
- return NULL;
- }
- else {
- result->extra.header = extra;
- }
- }
-
- if (result->type == RSPAMD_RE_SELECTOR) {
- if (extra == NULL) {
- msg_err_pool ("selector regexp: '%s' has no selector part", src);
- return NULL;
- }
- else {
- result->extra.selector = extra;
- }
- }
-
-
- result->regexp_text = rspamd_mempool_strdup (pool, start);
- dbegin = result->regexp_text + (begin - start);
- dend = result->regexp_text + (end - start);
- *dend = '\0';
-
- result->regexp = rspamd_regexp_new (dbegin, re_flags->str,
- &err);
-
- g_string_free (re_flags, TRUE);
-
- if (result->regexp == NULL || err != NULL) {
- msg_warn_pool ("could not read regexp: %s while reading regexp %e",
- src, err);
-
- if (err) {
- g_error_free (err);
- }
-
- return NULL;
- }
-
- if (result->is_multiple) {
- rspamd_regexp_set_maxhits (result->regexp, 0);
- }
- else {
- rspamd_regexp_set_maxhits (result->regexp, 1);
- }
-
- rspamd_regexp_set_ud (result->regexp, result);
-
- *dend = '/';
-
- return result;
- }
-
- struct rspamd_function_atom *
- rspamd_mime_expr_parse_function_atom (rspamd_mempool_t *pool, const gchar *input)
- {
- const gchar *obrace, *ebrace, *p, *c;
- gchar t, *databuf;
- guint len;
- struct rspamd_function_atom *res;
- struct expression_argument arg;
- GError *err = NULL;
- enum {
- start_read_argument = 0,
- in_string,
- in_regexp,
- got_backslash,
- got_comma
- } state, prev_state = 0;
-
- obrace = strchr (input, '(');
- ebrace = strrchr (input, ')');
-
- g_assert (obrace != NULL && ebrace != NULL);
-
- res = rspamd_mempool_alloc0 (pool, sizeof (*res));
- res->name = rspamd_mempool_alloc (pool, obrace - input + 1);
- rspamd_strlcpy (res->name, input, obrace - input + 1);
- res->args = g_array_new (FALSE, FALSE, sizeof (struct expression_argument));
-
- p = obrace + 1;
- c = p;
- state = start_read_argument;
-
- /* Read arguments */
- while (p <= ebrace) {
- t = *p;
- switch (state) {
- case start_read_argument:
- if (t == '/') {
- state = in_regexp;
- c = p;
- }
- else if (!g_ascii_isspace (t)) {
- state = in_string;
-
- if (t == '\'' || t == '\"') {
- c = p + 1;
- }
- else {
- c = p;
- }
- }
- p ++;
- break;
- case in_regexp:
- if (t == '\\') {
- state = got_backslash;
- prev_state = in_regexp;
- }
- else if (t == ',' || p == ebrace) {
- len = p - c + 1;
- databuf = rspamd_mempool_alloc (pool, len);
- rspamd_strlcpy (databuf, c, len);
- arg.type = EXPRESSION_ARGUMENT_REGEXP;
- arg.data = rspamd_regexp_cache_create (NULL, databuf, NULL, &err);
-
- if (arg.data == NULL) {
- /* Fallback to string */
- msg_warn ("cannot parse slashed argument %s as regexp: %s",
- databuf, err->message);
- g_error_free (err);
- arg.type = EXPRESSION_ARGUMENT_NORMAL;
- arg.data = databuf;
- }
-
- g_array_append_val (res->args, arg);
- state = got_comma;
- }
- p ++;
- break;
- case in_string:
- if (t == '\\') {
- state = got_backslash;
- prev_state = in_string;
- }
- else if (t == ',' || p == ebrace) {
- if (*(p - 1) == '\'' || *(p - 1) == '\"') {
- len = p - c;
- }
- else {
- len = p - c + 1;
- }
-
- databuf = rspamd_mempool_alloc (pool, len);
- rspamd_strlcpy (databuf, c, len);
- arg.type = EXPRESSION_ARGUMENT_NORMAL;
- arg.data = databuf;
- g_array_append_val (res->args, arg);
- state = got_comma;
- }
- p ++;
- break;
- case got_backslash:
- state = prev_state;
- p ++;
- break;
- case got_comma:
- state = start_read_argument;
- break;
- }
- }
-
- return res;
- }
-
- static rspamd_expression_atom_t *
- rspamd_mime_expr_parse (const gchar *line, gsize len,
- rspamd_mempool_t *pool, gpointer ud, GError **err)
- {
- rspamd_expression_atom_t *a = NULL;
- struct rspamd_mime_atom *mime_atom = NULL;
- const gchar *p, *end, *c = NULL;
- struct rspamd_mime_expr_ud *real_ud = (struct rspamd_mime_expr_ud *)ud;
- struct rspamd_config *cfg;
- rspamd_regexp_t *own_re;
- gchar t;
- gint type = MIME_ATOM_REGEXP, obraces = 0, ebraces = 0;
- enum {
- in_header = 0,
- got_slash,
- in_regexp,
- got_backslash,
- got_second_slash,
- in_flags,
- in_flags_brace,
- got_obrace,
- in_function,
- in_local_function,
- got_ebrace,
- end_atom,
- bad_atom
- } state = 0, prev_state = 0;
-
- p = line;
- end = p + len;
- cfg = real_ud->cfg;
-
- while (p < end) {
- t = *p;
-
- switch (state) {
- case in_header:
- if (t == '/') {
- /* Regexp */
- state = got_slash;
- }
- else if (t == '(') {
- /* Function */
- state = got_obrace;
- }
- else if (!g_ascii_isalnum (t) && t != '_' && t != '-' && t != '=') {
- if (t == ':') {
- if (p - line == 3 && memcmp (line, "lua", 3) == 0) {
- type = MIME_ATOM_LOCAL_LUA_FUNCTION;
- state = in_local_function;
- c = p + 1;
- }
- }
- else {
- /* Likely lua function, identified by just a string */
- type = MIME_ATOM_LUA_FUNCTION;
- state = end_atom;
- /* Do not increase p */
- continue;
- }
- }
- else if (g_ascii_isspace (t)) {
- state = bad_atom;
- }
- p ++;
- break;
- case got_slash:
- state = in_regexp;
- break;
- case in_regexp:
- if (t == '\\') {
- state = got_backslash;
- prev_state = in_regexp;
- }
- else if (t == '/') {
- state = got_second_slash;
- }
- p ++;
- break;
- case got_second_slash:
- state = in_flags;
- break;
- case in_flags:
- if (t == '{') {
- state = in_flags_brace;
- p ++;
- }
- else if (!g_ascii_isalpha (t) && t != '$') {
- state = end_atom;
- }
- else {
- p ++;
- }
- break;
- case in_flags_brace:
- if (t == '}') {
- state = in_flags;
- }
- p ++;
- break;
- case got_backslash:
- state = prev_state;
- p ++;
- break;
- case got_obrace:
- state = in_function;
- type = MIME_ATOM_INTERNAL_FUNCTION;
- obraces ++;
- break;
- case in_function:
- if (t == '\\') {
- state = got_backslash;
- prev_state = in_function;
- }
- else if (t == '(') {
- obraces ++;
- }
- else if (t == ')') {
- ebraces ++;
- if (ebraces == obraces) {
- state = got_ebrace;
- }
- }
- p ++;
- break;
- case in_local_function:
- if (!(g_ascii_isalnum (t) || t == '-' || t == '_')) {
- g_assert (c != NULL);
- state = end_atom;
- }
- else {
- p++;
- }
- break;
- case got_ebrace:
- state = end_atom;
- break;
- case bad_atom:
- g_set_error (err, rspamd_mime_expr_quark(), 100, "cannot parse"
- " mime atom '%s' when reading symbol '%c' at offset %d, "
- "near %*.s", line, t, (gint)(p - line),
- (gint)MIN (end - p, 10), p);
- return NULL;
- case end_atom:
- goto set;
- }
- }
- set:
-
- if (p - line == 0 || (state != got_ebrace && state != got_second_slash &&
- state != in_flags && state != end_atom)) {
- g_set_error (err, rspamd_mime_expr_quark(), 200, "incomplete or empty"
- " mime atom");
- return NULL;
- }
-
- mime_atom = rspamd_mempool_alloc (pool, sizeof (*mime_atom));
- mime_atom->type = type;
- mime_atom->str = rspamd_mempool_alloc (pool, p - line + 1);
- rspamd_strlcpy (mime_atom->str, line, p - line + 1);
-
- if (type == MIME_ATOM_REGEXP) {
- mime_atom->d.re = rspamd_mime_expr_parse_regexp_atom (pool,
- mime_atom->str, cfg);
- if (mime_atom->d.re == NULL) {
- g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse regexp '%s'",
- mime_atom->str);
- goto err;
- }
- else {
- /* Register new item in the cache */
- if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
- mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
- mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) {
-
- if (mime_atom->d.re->extra.header != NULL) {
- own_re = mime_atom->d.re->regexp;
- mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
- mime_atom->d.re->regexp,
- mime_atom->d.re->type,
- mime_atom->d.re->extra.header,
- strlen (mime_atom->d.re->extra.header) + 1);
- /* Pass ownership to the cache */
- rspamd_regexp_unref (own_re);
- }
- else {
- /* We have header regexp, but no header name is detected */
- g_set_error (err,
- rspamd_mime_expr_quark (),
- 200,
- "no header name in header regexp: '%s'",
- mime_atom->str);
- goto err;
- }
-
- }
- else if (mime_atom->d.re->type == RSPAMD_RE_SELECTOR) {
- if (mime_atom->d.re->extra.selector != NULL) {
- own_re = mime_atom->d.re->regexp;
- mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
- mime_atom->d.re->regexp,
- mime_atom->d.re->type,
- mime_atom->d.re->extra.selector,
- strlen (mime_atom->d.re->extra.selector) + 1);
- /* Pass ownership to the cache */
- rspamd_regexp_unref (own_re);
- }
- else {
- /* We have header regexp, but no header name is detected */
- g_set_error (err,
- rspamd_mime_expr_quark (),
- 200,
- "no selector name in selector regexp: '%s'",
- mime_atom->str);
- goto err;
- }
- }
- else {
- own_re = mime_atom->d.re->regexp;
- mime_atom->d.re->regexp = rspamd_re_cache_add (cfg->re_cache,
- mime_atom->d.re->regexp,
- mime_atom->d.re->type,
- NULL,
- 0);
- /* Pass ownership to the cache */
- rspamd_regexp_unref (own_re);
- }
- }
- }
- else if (type == MIME_ATOM_LUA_FUNCTION) {
- mime_atom->d.lua_function = mime_atom->str;
-
- lua_getglobal (cfg->lua_state, mime_atom->str);
-
- if (lua_type (cfg->lua_state, -1) != LUA_TFUNCTION) {
- g_set_error (err, rspamd_mime_expr_quark(), 200, "no such lua function '%s'",
- mime_atom->str);
- lua_pop (cfg->lua_state, 1);
-
- goto err;
- }
-
- lua_pop (cfg->lua_state, 1);
- }
- else if (type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
- /* p pointer is set to the start of Lua function name */
-
- if (real_ud->conf_obj == NULL) {
- g_set_error (err, rspamd_mime_expr_quark(), 300,
- "no config object for '%s'",
- mime_atom->str);
- goto err;
- }
-
- const ucl_object_t *functions = ucl_object_lookup (real_ud->conf_obj,
- "functions");
-
- if (functions == NULL) {
- g_set_error (err, rspamd_mime_expr_quark(), 310,
- "no functions defined for '%s'",
- mime_atom->str);
- goto err;
- }
-
- if (ucl_object_type (functions) != UCL_OBJECT) {
- g_set_error (err, rspamd_mime_expr_quark(), 320,
- "functions is not a table for '%s'",
- mime_atom->str);
- goto err;
- }
-
- const ucl_object_t *function_obj;
-
- function_obj = ucl_object_lookup_len (functions, c,
- p - c);
-
- if (function_obj == NULL) {
- g_set_error (err, rspamd_mime_expr_quark(), 320,
- "function %*.s is not found for '%s'",
- (int)(p - c), c, mime_atom->str);
- goto err;
- }
-
- if (ucl_object_type (function_obj) != UCL_USERDATA) {
- g_set_error (err, rspamd_mime_expr_quark(), 320,
- "function %*.s has invalid type for '%s'",
- (int)(p - c), c, mime_atom->str);
- goto err;
- }
-
- struct ucl_lua_funcdata *fd = function_obj->value.ud;
-
- mime_atom->d.lua_cbref = fd->idx;
- }
- else {
- mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool,
- mime_atom->str);
- if (mime_atom->d.func == NULL) {
- g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'",
- mime_atom->str);
- goto err;
- }
- }
-
- a = rspamd_mempool_alloc0 (pool, sizeof (*a));
- a->len = p - line;
- a->priority = 0;
- a->data = mime_atom;
-
- return a;
-
- err:
-
- return NULL;
- }
-
- static gint
- rspamd_mime_expr_process_regexp (struct rspamd_regexp_atom *re,
- struct rspamd_task *task)
- {
- gint ret;
-
- if (re == NULL) {
- msg_info_task ("invalid regexp passed");
- return 0;
- }
-
- if (re->type == RSPAMD_RE_HEADER || re->type == RSPAMD_RE_RAWHEADER) {
- ret = rspamd_re_cache_process (task,
- re->regexp,
- re->type,
- re->extra.header,
- strlen (re->extra.header),
- re->is_strong);
- }
- else if (re->type == RSPAMD_RE_SELECTOR) {
- ret = rspamd_re_cache_process (task,
- re->regexp,
- re->type,
- re->extra.selector,
- strlen (re->extra.selector),
- re->is_strong);
- }
- else {
- ret = rspamd_re_cache_process (task,
- re->regexp,
- re->type,
- NULL,
- 0,
- re->is_strong);
- }
-
- if (re->is_test) {
- msg_info_task ("test %s regexp '%s' returned %d",
- rspamd_re_cache_type_to_string (re->type),
- re->regexp_text, ret);
- }
-
- return ret;
- }
-
-
- static gint
- rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
- {
- struct rspamd_mime_atom *mime_atom = atom->data;
- gint ret = 0;
-
- switch (mime_atom->type) {
- case MIME_ATOM_INTERNAL_FUNCTION:
- /* Prioritize internal functions slightly */
- ret = 50;
- break;
- case MIME_ATOM_LUA_FUNCTION:
- case MIME_ATOM_LOCAL_LUA_FUNCTION:
- ret = 50;
- break;
- case MIME_ATOM_REGEXP:
- switch (mime_atom->d.re->type) {
- case RSPAMD_RE_HEADER:
- case RSPAMD_RE_RAWHEADER:
- ret = 100;
- break;
- case RSPAMD_RE_URL:
- ret = 90;
- break;
- case RSPAMD_RE_MIME:
- case RSPAMD_RE_RAWMIME:
- ret = 10;
- break;
- default:
- /* For message regexp */
- ret = 0;
- break;
- }
- }
-
- return ret;
- }
-
- static void
- rspamd_mime_expr_destroy (rspamd_expression_atom_t *atom)
- {
- struct rspamd_mime_atom *mime_atom = atom->data;
-
- if (mime_atom) {
- if (mime_atom->type == MIME_ATOM_INTERNAL_FUNCTION) {
- /* Need to cleanup arguments */
- g_array_free (mime_atom->d.func->args, TRUE);
- }
- }
- }
-
- static gboolean
- rspamd_mime_expr_process_function (struct rspamd_function_atom * func,
- struct rspamd_task * task,
- lua_State *L)
- {
- struct _fl *selected, key;
-
- key.name = func->name;
-
- selected = bsearch (&key,
- list_ptr,
- functions_number,
- sizeof (struct _fl),
- fl_cmp);
- if (selected == NULL) {
- /* Try to check lua function */
- return FALSE;
- }
-
- return selected->func (task, func->args, selected->user_data);
- }
-
- static gdouble
- rspamd_mime_expr_process (struct rspamd_expr_process_data *process_data, rspamd_expression_atom_t *atom)
- {
- struct rspamd_task *task = process_data->task;
- struct rspamd_mime_atom *mime_atom;
- lua_State *L;
- gdouble ret = 0;
-
- g_assert (task != NULL);
- g_assert (atom != NULL);
-
- mime_atom = atom->data;
-
- if (mime_atom->type == MIME_ATOM_REGEXP) {
- ret = rspamd_mime_expr_process_regexp (mime_atom->d.re, task);
- }
- else if (mime_atom->type == MIME_ATOM_LUA_FUNCTION) {
- L = task->cfg->lua_state;
- lua_getglobal (L, mime_atom->d.lua_function);
- rspamd_lua_task_push (L, task);
-
- if (lua_pcall (L, 1, 1, 0) != 0) {
- msg_info_task ("lua call to global function '%s' for atom '%s' failed: %s",
- mime_atom->d.lua_function,
- mime_atom->str,
- lua_tostring (L, -1));
- lua_pop (L, 1);
- }
- else {
- if (lua_type (L, -1) == LUA_TBOOLEAN) {
- ret = lua_toboolean (L, -1);
- }
- else if (lua_type (L, -1) == LUA_TNUMBER) {
- ret = lua_tonumber (L, 1);
- }
- else {
- msg_err_task ("%s returned wrong return type: %s",
- mime_atom->str, lua_typename (L, lua_type (L, -1)));
- }
- /* Remove result */
- lua_pop (L, 1);
- }
- }
- else if (mime_atom->type == MIME_ATOM_LOCAL_LUA_FUNCTION) {
- gint err_idx;
- GString *tb;
-
- L = task->cfg->lua_state;
- lua_pushcfunction (L, &rspamd_lua_traceback);
- err_idx = lua_gettop (L);
-
- lua_rawgeti (L, LUA_REGISTRYINDEX, mime_atom->d.lua_cbref);
- rspamd_lua_task_push (L, task);
-
- if (lua_pcall (L, 1, 1, err_idx) != 0) {
- tb = lua_touserdata (L, -1);
- msg_info_task ("lua call to local function for atom '%s' failed: %v",
- mime_atom->str,
- tb);
- if (tb) {
- g_string_free (tb, TRUE);
- }
- }
- else {
- if (lua_type (L, -1) == LUA_TBOOLEAN) {
- ret = lua_toboolean (L, -1);
- }
- else if (lua_type (L, -1) == LUA_TNUMBER) {
- ret = lua_tonumber (L, 1);
- }
- else {
- msg_err_task ("%s returned wrong return type: %s",
- mime_atom->str, lua_typename (L, lua_type (L, -1)));
- }
- }
-
- lua_settop (L, 0);
- }
- else {
- ret = rspamd_mime_expr_process_function (mime_atom->d.func, task,
- task->cfg->lua_state);
- }
-
- return ret;
- }
-
- void
- register_expression_function (const gchar *name,
- rspamd_internal_func_t func,
- void *user_data)
- {
- static struct _fl *new;
-
- functions_number++;
-
- new = g_new (struct _fl, functions_number);
- memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl));
- if (list_allocated) {
- g_free (list_ptr);
- }
-
- list_allocated = TRUE;
- new[functions_number - 1].name = name;
- new[functions_number - 1].func = func;
- new[functions_number - 1].user_data = user_data;
- qsort (new, functions_number, sizeof (struct _fl), fl_cmp);
- list_ptr = new;
- }
-
- gboolean
- rspamd_compare_encoding (struct rspamd_task *task, GArray * args, void *unused)
- {
- struct expression_argument *arg;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- /* XXX: really write this function */
- return TRUE;
- }
-
- gboolean
- rspamd_header_exists (struct rspamd_task * task, GArray * args, void *unused)
- {
- struct expression_argument *arg;
- GPtrArray *headerlist;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- headerlist = rspamd_message_get_header_array (task,
- (gchar *)arg->data,
- FALSE);
-
- debug_task ("try to get header %s: %d", (gchar *)arg->data,
- (headerlist != NULL));
-
- if (headerlist) {
- return TRUE;
- }
-
- return FALSE;
- }
-
-
- /*
- * This function is designed to find difference between text/html and text/plain parts
- * It takes one argument: difference threshold, if we have two text parts, compare
- * its hashes and check for threshold, if value is greater than threshold, return TRUE
- * and return FALSE otherwise.
- */
- gboolean
- rspamd_parts_distance (struct rspamd_task * task, GArray * args, void *unused)
- {
- gint threshold, threshold2 = -1;
- struct expression_argument *arg;
- gdouble *pdiff, diff;
-
- if (args == NULL || args->len == 0) {
- debug_task ("no threshold is specified, assume it 100");
- threshold = 100;
- }
- else {
- errno = 0;
- arg = &g_array_index (args, struct expression_argument, 0);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- threshold = strtoul ((gchar *)arg->data, NULL, 10);
- if (errno != 0) {
- msg_info_task ("bad numeric value for threshold \"%s\", assume it 100",
- (gchar *)arg->data);
- threshold = 100;
- }
- if (args->len >= 2) {
- arg = &g_array_index (args, struct expression_argument, 1);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- errno = 0;
- threshold2 = strtoul ((gchar *)arg->data, NULL, 10);
- if (errno != 0) {
- msg_info_task ("bad numeric value for threshold \"%s\", ignore it",
- (gchar *)arg->data);
- threshold2 = -1;
- }
- }
- }
-
- if ((pdiff =
- rspamd_mempool_get_variable (task->task_pool,
- "parts_distance")) != NULL) {
- diff = (1.0 - (*pdiff)) * 100.0;
-
- if (diff != -1) {
- if (threshold2 > 0) {
- if (diff >= MIN (threshold, threshold2) &&
- diff < MAX (threshold, threshold2)) {
-
- return TRUE;
- }
- }
- else {
- if (diff <= threshold) {
- return TRUE;
- }
- }
- return FALSE;
- }
- else {
- return FALSE;
- }
- }
-
- return FALSE;
- }
-
- struct addr_list {
- const gchar *name;
- guint namelen;
- const gchar *addr;
- guint addrlen;
- };
-
- #define COMPARE_RCPT_LEN 3
- #define MIN_RCPT_TO_COMPARE 7
-
- gboolean
- rspamd_recipients_distance (struct rspamd_task *task, GArray * args,
- void *unused)
- {
- struct expression_argument *arg;
- struct rspamd_email_address *cur;
- double threshold;
- struct addr_list *ar;
- gint num, i, j, hits = 0, total = 0;
-
- if (args == NULL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- errno = 0;
- threshold = strtod ((gchar *)arg->data, NULL);
-
- if (errno != 0) {
- msg_warn_task ("invalid numeric value '%s': %s",
- (gchar *)arg->data,
- strerror (errno));
- return FALSE;
- }
-
- if (!task->rcpt_mime) {
- return FALSE;
- }
-
- num = task->rcpt_mime->len;
-
- if (num < MIN_RCPT_TO_COMPARE) {
- return FALSE;
- }
-
- ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list));
-
- /* Fill array */
- PTR_ARRAY_FOREACH (task->rcpt_mime, i, cur) {
- ar[i].name = cur->addr;
- ar[i].namelen = cur->addr_len;
- ar[i].addr = cur->domain;
- ar[i].addrlen = cur->domain_len;
- }
-
- /* Cycle all elements in array */
- for (i = 0; i < num; i++) {
- for (j = i + 1; j < num; j++) {
- if (ar[i].namelen >= COMPARE_RCPT_LEN && ar[j].namelen >= COMPARE_RCPT_LEN &&
- rspamd_lc_cmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) {
- /* Common name part */
- hits++;
- }
-
- total++;
- }
- }
-
- if ((hits * num / 2.) / (double)total >= threshold) {
- return TRUE;
- }
-
- return FALSE;
- }
-
- gboolean
- rspamd_has_only_html_part (struct rspamd_task * task, GArray * args,
- void *unused)
- {
- struct rspamd_mime_text_part *p;
- gboolean res = FALSE;
-
- if (task->text_parts->len == 1) {
- p = g_ptr_array_index (task->text_parts, 0);
-
- if (IS_PART_HTML (p)) {
- res = TRUE;
- }
- else {
- res = FALSE;
- }
- }
-
- return res;
- }
-
- static gboolean
- is_recipient_list_sorted (GPtrArray *ar)
- {
- struct rspamd_email_address *addr;
- gboolean res = TRUE;
- rspamd_ftok_t cur, prev;
- gint i;
-
- /* Do not check to short address lists */
- if (ar == NULL || ar->len < MIN_RCPT_TO_COMPARE) {
- return FALSE;
- }
-
- prev.len = 0;
- prev.begin = NULL;
-
- PTR_ARRAY_FOREACH (ar, i, addr) {
- cur.begin = addr->addr;
- cur.len = addr->addr_len;
-
- if (prev.len != 0) {
- if (rspamd_ftok_casecmp (&cur, &prev) <= 0) {
- res = FALSE;
- break;
- }
- }
-
- prev = cur;
- }
-
- return res;
- }
-
- gboolean
- rspamd_is_recipients_sorted (struct rspamd_task * task,
- GArray * args,
- void *unused)
- {
- /* Check all types of addresses */
-
- if (task->rcpt_mime) {
- return is_recipient_list_sorted (task->rcpt_mime);
- }
-
- return FALSE;
- }
-
- gboolean
- rspamd_compare_transfer_encoding (struct rspamd_task * task,
- GArray * args,
- void *unused)
- {
- struct expression_argument *arg;
- guint i;
- struct rspamd_mime_part *part;
- enum rspamd_cte cte;
-
- if (args == NULL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- cte = rspamd_cte_from_string (arg->data);
-
- if (cte == RSPAMD_CTE_UNKNOWN) {
- msg_warn_task ("unknown cte: %s", arg->data);
- return FALSE;
- }
-
- PTR_ARRAY_FOREACH (task->parts, i, part) {
- if (IS_CT_TEXT (part->ct)) {
- if (part->cte == cte) {
- return TRUE;
- }
- }
- }
-
- return FALSE;
- }
-
- gboolean
- rspamd_is_html_balanced (struct rspamd_task * task, GArray * args, void *unused)
- {
- struct rspamd_mime_text_part *p;
- guint i;
- gboolean res = TRUE;
-
- for (i = 0; i < task->text_parts->len; i ++) {
-
- p = g_ptr_array_index (task->text_parts, i);
- if (IS_PART_HTML (p)) {
- if (p->flags & RSPAMD_MIME_TEXT_PART_FLAG_BALANCED) {
- res = TRUE;
- }
- else {
- res = FALSE;
- break;
- }
- }
- }
-
- return res;
-
- }
-
- gboolean
- rspamd_has_html_tag (struct rspamd_task * task, GArray * args, void *unused)
- {
- struct rspamd_mime_text_part *p;
- struct expression_argument *arg;
- guint i;
- gboolean res = FALSE;
-
- if (args == NULL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- for (i = 0; i < task->text_parts->len; i ++) {
- p = g_ptr_array_index (task->text_parts, i);
-
- if (IS_PART_HTML (p) && p->html) {
- res = rspamd_html_tag_seen (p->html, arg->data);
- }
-
- if (res) {
- break;
- }
- }
-
- return res;
-
- }
-
- gboolean
- rspamd_has_fake_html (struct rspamd_task * task, GArray * args, void *unused)
- {
- struct rspamd_mime_text_part *p;
- guint i;
- gboolean res = FALSE;
-
- for (i = 0; i < task->text_parts->len; i ++) {
- p = g_ptr_array_index (task->text_parts, i);
-
- if (IS_PART_HTML (p) && (p->html == NULL || p->html->html_tags == NULL)) {
- res = TRUE;
- }
-
- if (res) {
- break;
- }
- }
-
- return res;
-
- }
-
- static gboolean
- rspamd_raw_header_exists (struct rspamd_task *task, GArray * args, void *unused)
- {
- struct expression_argument *arg;
-
- if (args == NULL || task == NULL) {
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- if (!arg || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid argument to function is passed");
- return FALSE;
- }
-
- return g_hash_table_lookup (task->raw_headers, arg->data) != NULL;
- }
-
- static gboolean
- match_smtp_data (struct rspamd_task *task,
- struct expression_argument *arg,
- const gchar *what, gsize len)
- {
- rspamd_regexp_t *re;
- gint r = 0;
-
- if (arg->type == EXPRESSION_ARGUMENT_REGEXP) {
- /* This is a regexp */
- re = arg->data;
- if (re == NULL) {
- msg_warn_task ("cannot compile regexp for function");
- return FALSE;
- }
-
-
- if (len > 0) {
- r = rspamd_regexp_search (re, what, len, NULL, NULL, FALSE, NULL);
- }
-
- return r;
- }
- else if (arg->type == EXPRESSION_ARGUMENT_NORMAL &&
- g_ascii_strncasecmp (arg->data, what, len) == 0) {
- return TRUE;
- }
-
- return FALSE;
- }
-
- static gboolean
- rspamd_check_smtp_data (struct rspamd_task *task, GArray * args, void *unused)
- {
- struct expression_argument *arg;
- struct rspamd_email_address *addr = NULL;
- GPtrArray *rcpts = NULL;
- const gchar *type, *str = NULL;
- guint i;
-
- if (args == NULL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
-
- if (!arg || !arg->data || arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
- else {
- type = arg->data;
- switch (*type) {
- case 'f':
- case 'F':
- if (g_ascii_strcasecmp (type, "from") == 0) {
- addr = rspamd_task_get_sender (task);
- }
- else {
- msg_warn_task ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 'h':
- case 'H':
- if (g_ascii_strcasecmp (type, "helo") == 0) {
- str = task->helo;
- }
- else {
- msg_warn_task ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 'u':
- case 'U':
- if (g_ascii_strcasecmp (type, "user") == 0) {
- str = task->user;
- }
- else {
- msg_warn_task ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 's':
- case 'S':
- if (g_ascii_strcasecmp (type, "subject") == 0) {
- str = task->subject;
- }
- else {
- msg_warn_task ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- case 'r':
- case 'R':
- if (g_ascii_strcasecmp (type, "rcpt") == 0) {
- rcpts = task->rcpt_envelope;
- }
- else {
- msg_warn_task ("bad argument to function: %s", type);
- return FALSE;
- }
- break;
- default:
- msg_warn_task ("bad argument to function: %s", type);
- return FALSE;
- }
- }
-
- if (str == NULL && addr == NULL && rcpts == NULL) {
- /* Not enough data so regexp would NOT be found anyway */
- return FALSE;
- }
-
- /* We would process only one more argument, others are ignored */
- if (args->len >= 2) {
- arg = &g_array_index (args, struct expression_argument, 1);
-
- if (arg) {
- if (str != NULL) {
- return match_smtp_data (task, arg, str, strlen (str));
- }
- else if (addr != NULL && addr->addr) {
- return match_smtp_data (task, arg, addr->addr, addr->addr_len);
- }
- else {
- if (rcpts != NULL) {
- for (i = 0; i < rcpts->len; i ++) {
- addr = g_ptr_array_index (rcpts, i);
-
- if (addr && addr->addr &&
- match_smtp_data (task, arg,
- addr->addr, addr->addr_len)) {
- return TRUE;
- }
- }
- }
- }
- }
- }
-
- return FALSE;
- }
-
- static inline gboolean
- rspamd_check_ct_attr (const gchar *begin, gsize len,
- struct expression_argument *arg_pattern)
- {
- rspamd_regexp_t *re;
- gboolean r = FALSE;
-
- if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
- re = arg_pattern->data;
-
- if (len > 0) {
- r = rspamd_regexp_search (re,
- begin, len,
- NULL, NULL, FALSE, NULL);
- }
-
- if (r) {
- return TRUE;
- }
- }
- else {
- /* Just do strcasecmp */
- gsize plen = strlen (arg_pattern->data);
-
- if (plen == len &&
- g_ascii_strncasecmp (arg_pattern->data, begin, len) == 0) {
- return TRUE;
- }
- }
-
- return FALSE;
- }
-
- static gboolean
- rspamd_content_type_compare_param (struct rspamd_task * task,
- GArray * args,
- void *unused)
- {
-
- struct expression_argument *arg, *arg1, *arg_pattern;
- gboolean recursive = FALSE;
- struct rspamd_mime_part *cur_part;
- guint i;
- rspamd_ftok_t srch;
- struct rspamd_content_type_param *found = NULL, *cur;
- const gchar *param_name;
-
- if (args == NULL || args->len < 2) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
- param_name = arg->data;
- arg_pattern = &g_array_index (args, struct expression_argument, 1);
-
- for (i = 0; i < task->parts->len; i ++) {
- cur_part = g_ptr_array_index (task->parts, i);
-
- if (args->len >= 3) {
- arg1 = &g_array_index (args, struct expression_argument, 2);
- if (g_ascii_strncasecmp (arg1->data, "true",
- sizeof ("true") - 1) == 0) {
- recursive = TRUE;
- }
- }
- else {
- /*
- * If user did not specify argument, let's assume that he wants
- * recursive search if mime part is multipart/mixed
- */
- if (IS_CT_MULTIPART (cur_part->ct)) {
- recursive = TRUE;
- }
- }
-
- rspamd_ftok_t lit;
- RSPAMD_FTOK_FROM_STR (&srch, param_name);
- RSPAMD_FTOK_FROM_STR (&lit, "charset");
-
- if (rspamd_ftok_equal (&srch, &lit)) {
- if (rspamd_check_ct_attr (cur_part->ct->charset.begin,
- cur_part->ct->charset.len, arg_pattern)) {
- return TRUE;
- }
- }
-
- RSPAMD_FTOK_FROM_STR (&lit, "boundary");
- if (rspamd_ftok_equal (&srch, &lit)) {
- if (rspamd_check_ct_attr (cur_part->ct->orig_boundary.begin,
- cur_part->ct->orig_boundary.len, arg_pattern)) {
- return TRUE;
- }
- }
-
- if (cur_part->ct->attrs) {
- found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
-
- if (found) {
- DL_FOREACH (found, cur) {
- if (rspamd_check_ct_attr (cur->value.begin,
- cur->value.len, arg_pattern)) {
- return TRUE;
- }
- }
- }
- }
-
- if (!recursive) {
- break;
- }
- }
-
- return FALSE;
- }
-
- static gboolean
- rspamd_content_type_has_param (struct rspamd_task * task,
- GArray * args,
- void *unused)
- {
- struct expression_argument *arg, *arg1;
- gboolean recursive = FALSE;
- struct rspamd_mime_part *cur_part;
- guint i;
- rspamd_ftok_t srch;
- struct rspamd_content_type_param *found = NULL;
- const gchar *param_name;
-
- if (args == NULL || args->len < 1) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- arg = &g_array_index (args, struct expression_argument, 0);
- g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
- param_name = arg->data;
-
- for (i = 0; i < task->parts->len; i ++) {
- cur_part = g_ptr_array_index (task->parts, i);
-
- if (args->len >= 2) {
- arg1 = &g_array_index (args, struct expression_argument, 1);
- if (g_ascii_strncasecmp (arg1->data, "true",
- sizeof ("true") - 1) == 0) {
- recursive = TRUE;
- }
- }
- else {
- /*
- * If user did not specify argument, let's assume that he wants
- * recursive search if mime part is multipart/mixed
- */
- if (IS_CT_MULTIPART (cur_part->ct)) {
- recursive = TRUE;
- }
- }
-
-
- rspamd_ftok_t lit;
- RSPAMD_FTOK_FROM_STR (&srch, param_name);
- RSPAMD_FTOK_FROM_STR (&lit, "charset");
-
- if (rspamd_ftok_equal (&srch, &lit)) {
- if (cur_part->ct->charset.len > 0) {
- return TRUE;
- }
- }
-
- RSPAMD_FTOK_FROM_STR (&lit, "boundary");
- if (rspamd_ftok_equal (&srch, &lit)) {
- if (cur_part->ct->boundary.len > 0) {
- return TRUE;
- }
- }
-
- if (cur_part->ct->attrs) {
- found = g_hash_table_lookup (cur_part->ct->attrs, &srch);
-
- if (found) {
- return TRUE;
- }
- }
-
- if (!recursive) {
- break;
- }
- }
-
- return FALSE;
- }
-
- static gboolean
- rspamd_content_type_check (struct rspamd_task *task,
- GArray * args,
- gboolean check_subtype)
- {
- rspamd_ftok_t *param_data, srch;
- rspamd_regexp_t *re;
- struct expression_argument *arg1, *arg_pattern;
- struct rspamd_content_type *ct;
- gint r = 0;
- guint i;
- gboolean recursive = FALSE;
- struct rspamd_mime_part *cur_part;
-
- if (args == NULL || args->len < 1) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- arg_pattern = &g_array_index (args, struct expression_argument, 0);
-
- for (i = 0; i < task->parts->len; i ++) {
- cur_part = g_ptr_array_index (task->parts, i);
- ct = cur_part->ct;
-
- if (args->len >= 2) {
- arg1 = &g_array_index (args, struct expression_argument, 1);
- if (g_ascii_strncasecmp (arg1->data, "true",
- sizeof ("true") - 1) == 0) {
- recursive = TRUE;
- }
- }
- else {
- /*
- * If user did not specify argument, let's assume that he wants
- * recursive search if mime part is multipart/mixed
- */
- if (IS_CT_MULTIPART (ct)) {
- recursive = TRUE;
- }
- }
-
- if (check_subtype) {
- param_data = &ct->subtype;
- }
- else {
- param_data = &ct->type;
- }
-
- if (arg_pattern->type == EXPRESSION_ARGUMENT_REGEXP) {
- re = arg_pattern->data;
-
- if (param_data->len > 0) {
- r = rspamd_regexp_search (re, param_data->begin, param_data->len,
- NULL, NULL, FALSE, NULL);
- }
-
- if (r) {
- return TRUE;
- }
- }
- else {
- /* Just do strcasecmp */
- srch.begin = arg_pattern->data;
- srch.len = strlen (arg_pattern->data);
-
- if (rspamd_ftok_casecmp (param_data, &srch) == 0) {
- return TRUE;
- }
- }
-
- /* Get next part */
- if (!recursive) {
- break;
- }
- }
-
- return FALSE;
- }
-
- static gboolean
- rspamd_content_type_is_type (struct rspamd_task * task,
- GArray * args,
- void *unused)
- {
- return rspamd_content_type_check (task, args, FALSE);
- }
-
- static gboolean
- rspamd_content_type_is_subtype (struct rspamd_task * task,
- GArray * args,
- void *unused)
- {
- return rspamd_content_type_check (task, args, TRUE);
- }
-
- static gboolean
- compare_subtype (struct rspamd_task *task, struct rspamd_content_type *ct,
- struct expression_argument *subtype)
- {
- rspamd_regexp_t *re;
- rspamd_ftok_t srch;
- gint r = 0;
-
- if (subtype == NULL || ct == NULL) {
- msg_warn_task ("invalid parameters passed");
- return FALSE;
- }
- if (subtype->type == EXPRESSION_ARGUMENT_REGEXP) {
- re = subtype->data;
-
- if (ct->subtype.len > 0) {
- r = rspamd_regexp_search (re, ct->subtype.begin, ct->subtype.len,
- NULL, NULL, FALSE, NULL);
- }
- }
- else {
- srch.begin = subtype->data;
- srch.len = strlen (subtype->data);
-
- /* Just do strcasecmp */
- if (rspamd_ftok_casecmp (&ct->subtype, &srch) == 0) {
- return TRUE;
- }
- }
-
- return r;
- }
-
- static gboolean
- compare_len (struct rspamd_mime_part *part, guint min, guint max)
- {
- if (min == 0 && max == 0) {
- return TRUE;
- }
-
- if (min == 0) {
- return part->parsed_data.len <= max;
- }
- else if (max == 0) {
- return part->parsed_data.len >= min;
- }
- else {
- return part->parsed_data.len >= min && part->parsed_data.len <= max;
- }
- }
-
- static gboolean
- common_has_content_part (struct rspamd_task * task,
- struct expression_argument *param_type,
- struct expression_argument *param_subtype,
- gint min_len,
- gint max_len)
- {
- rspamd_regexp_t *re;
- struct rspamd_mime_part *part;
- struct rspamd_content_type *ct;
- rspamd_ftok_t srch;
- gint r = 0;
- guint i;
-
- for (i = 0; i < task->parts->len; i ++) {
- part = g_ptr_array_index (task->parts, i);
- ct = part->ct;
-
- if (ct == NULL) {
- continue;
- }
-
- if (param_type->type == EXPRESSION_ARGUMENT_REGEXP) {
- re = param_type->data;
-
- if (ct->type.len > 0) {
- r = rspamd_regexp_search (re, ct->type.begin, ct->type.len,
- NULL, NULL, FALSE, NULL);
- }
-
- /* Also check subtype and length of the part */
- if (r && param_subtype) {
- r = compare_len (part, min_len, max_len) &&
- compare_subtype (task, ct, param_subtype);
-
- return r;
- }
- }
- else {
- /* Just do strcasecmp */
- srch.begin = param_type->data;
- srch.len = strlen (param_type->data);
-
- if (rspamd_ftok_casecmp (&ct->type, &srch) == 0) {
- if (param_subtype) {
- if (compare_subtype (task, ct, param_subtype)) {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
- }
- }
- }
- else {
- if (compare_len (part, min_len, max_len)) {
- return TRUE;
- }
- }
- }
- }
- }
-
- return FALSE;
- }
-
- static gboolean
- rspamd_has_content_part (struct rspamd_task * task, GArray * args, void *unused)
- {
- struct expression_argument *param_type = NULL, *param_subtype = NULL;
-
- if (args == NULL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- param_type = &g_array_index (args, struct expression_argument, 0);
- if (args->len >= 2) {
- param_subtype = &g_array_index (args, struct expression_argument, 1);
- }
-
- return common_has_content_part (task, param_type, param_subtype, 0, 0);
- }
-
- static gboolean
- rspamd_has_content_part_len (struct rspamd_task * task,
- GArray * args,
- void *unused)
- {
- struct expression_argument *param_type = NULL, *param_subtype = NULL;
- gint min = 0, max = 0;
- struct expression_argument *arg;
-
- if (args == NULL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- param_type = &g_array_index (args, struct expression_argument, 0);
-
- if (args->len >= 2) {
- param_subtype = &g_array_index (args, struct expression_argument, 1);
-
- if (args->len >= 3) {
- arg = &g_array_index (args, struct expression_argument, 2);
- errno = 0;
- min = strtoul (arg->data, NULL, 10);
- g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
-
- if (errno != 0) {
- msg_warn_task ("invalid numeric value '%s': %s",
- (gchar *)arg->data,
- strerror (errno));
- return FALSE;
- }
-
- if (args->len >= 4) {
- arg = &g_array_index (args, struct expression_argument, 3);
- g_assert (arg->type == EXPRESSION_ARGUMENT_NORMAL);
- max = strtoul (arg->data, NULL, 10);
-
- if (errno != 0) {
- msg_warn_task ("invalid numeric value '%s': %s",
- (gchar *)arg->data,
- strerror (errno));
- return FALSE;
- }
- }
- }
- }
-
- return common_has_content_part (task, param_type, param_subtype, min, max);
- }
-
- static gboolean
- rspamd_is_empty_body (struct rspamd_task *task,
- GArray * args,
- void *unused)
- {
- struct rspamd_mime_part *part;
- guint i;
-
- PTR_ARRAY_FOREACH (task->parts, i, part) {
- if (part->parsed_data.len > 0) {
- return FALSE;
- }
- }
-
- return TRUE;
- }
-
- #define TASK_FLAG_READ(flag) do { \
- result = !!(task->flags & (flag)); \
- } while(0)
-
- #define TASK_GET_FLAG(flag, strname, macro) do { \
- if (!found && strcmp ((flag), strname) == 0) { \
- TASK_FLAG_READ((macro)); \
- found = TRUE; \
- } \
- } while(0)
-
- static gboolean
- rspamd_has_flag_expr (struct rspamd_task *task,
- GArray * args,
- void *unused)
- {
- gboolean found = FALSE, result = FALSE;
- struct expression_argument *flag_arg;
- const gchar *flag_str;
-
- if (args == NULL) {
- msg_warn_task ("no parameters to function");
- return FALSE;
- }
-
- flag_arg = &g_array_index (args, struct expression_argument, 0);
-
- if (flag_arg->type != EXPRESSION_ARGUMENT_NORMAL) {
- msg_warn_task ("invalid parameter to function");
- return FALSE;
- }
-
- flag_str = (const gchar *)flag_arg->data;
-
- TASK_GET_FLAG (flag_str, "pass_all", RSPAMD_TASK_FLAG_PASS_ALL);
- TASK_GET_FLAG (flag_str, "no_log", RSPAMD_TASK_FLAG_NO_LOG);
- TASK_GET_FLAG (flag_str, "no_stat", RSPAMD_TASK_FLAG_NO_STAT);
- TASK_GET_FLAG (flag_str, "skip", RSPAMD_TASK_FLAG_SKIP);
- TASK_GET_FLAG (flag_str, "extended_urls", RSPAMD_TASK_FLAG_EXT_URLS);
- TASK_GET_FLAG (flag_str, "learn_spam", RSPAMD_TASK_FLAG_LEARN_SPAM);
- TASK_GET_FLAG (flag_str, "learn_ham", RSPAMD_TASK_FLAG_LEARN_HAM);
- TASK_GET_FLAG (flag_str, "greylisted", RSPAMD_TASK_FLAG_GREYLISTED);
- TASK_GET_FLAG (flag_str, "broken_headers",
- RSPAMD_TASK_FLAG_BROKEN_HEADERS);
- TASK_GET_FLAG (flag_str, "skip_process",
- RSPAMD_TASK_FLAG_SKIP_PROCESS);
- TASK_GET_FLAG (flag_str, "milter",
- RSPAMD_TASK_FLAG_MILTER);
- TASK_GET_FLAG (flag_str, "bad_unicode",
- RSPAMD_TASK_FLAG_BAD_UNICODE);
-
- if (!found) {
- msg_warn_task ("invalid flag name %s", flag_str);
- return FALSE;
- }
-
- return result;
- }
|