]> source.dussan.org Git - rspamd.git/commitdiff
* Add ability to match raw headers
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 17 Mar 2009 09:25:23 +0000 (12:25 +0300)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Tue, 17 Mar 2009 09:25:23 +0000 (12:25 +0300)
* Update documentation

README.utf8.txt
src/cfg_file.h
src/cfg_utils.c
src/main.h
src/message.c
src/plugins/regexp.c

index c49da2476c276d7094c3513c635cf071684fcd0a..c5bc293c0fac6903016e2125ba8a734413c5c6d8 100644 (file)
@@ -89,6 +89,10 @@ Queue-ID - идентификатор очереди
 .module 'regexp' {
        SYMBOL = "regexp_expression";
 };
+header_filters = "regexp";
+
+Обратите внимание, что модуль regexp надо регистрировать как header filter, так как иначе он не будет работать.
+Эту проблему надо исправлять, но это не первоочередная задача.
 
 Формат регэкспов такой:
 /pattern/flags
@@ -98,14 +102,19 @@ headername=/pattern/flags
 Флаги регэскпов:
 i, m, s, x, u, o - такие же, как у perl/pcre
 H - ищет по заголовкам
-M - ищет по всему сообщению
+M - ищет по всему сообщению (в "сыром" виде)
 P - ищет по всем mime частям
 U - ищет по url
+X - ищет по "сырым" хедерам (тут нужно учитывать фолдинг и ставить, где надо, /m для multiline матчинга)
+
 Выражение регэкспов может содержать сложные выражения из нескольких регэкспов, операторов логики и скобок:
 SOME_SYMBOL = "To=/blah@blah/H & !(From=/blah@blah/H | Subject=/blah/H)"
+
 Также можно использовать переменные:
 $to_blah = "To=/blah@blah/H";
 $from_blah = "From=/blah@blah/H";
 $subject_blah = "Subject=/blah/H";
+
 тогда предыдущее выражение будет таким
+
 SOME_SYMBOL = "${to_blah} & !(${from_blah} | ${subject_blah})"
index b484d5b0d43dd444b279a95d07638e3614213fb9..cc31f7a0cfe8b8afd56f2e773c3da837001bbe8d 100644 (file)
@@ -50,7 +50,7 @@ enum rspamd_cred_type {
 };
 
 /**
- * Regexp type: /H - header, /M - mime, /U - url
+ * Regexp type: /H - header, /M - mime, /U - url /X - raw header
  */
 enum rspamd_regexp_type {
        REGEXP_NONE = 0,
@@ -58,6 +58,7 @@ enum rspamd_regexp_type {
        REGEXP_MIME,
        REGEXP_MESSAGE,
        REGEXP_URL,
+       REGEXP_RAW_HEADER,
 };
 
 /**
index 3d2ce611c3ccad63dc925b46878dfe97eaa42fd0..204ed65f66dfda0ddb81a001d739e09bfdf63a69 100644 (file)
@@ -656,6 +656,12 @@ parse_regexp (memory_pool_t *pool, char *line)
                                }
                                p ++;
                                break;
+                       case 'X':
+                               if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+                                       result->type = REGEXP_RAW_HEADER;
+                               }
+                               p ++;
+                               break;
                        /* Stop flags parsing */
                        default:
                                p = NULL;
index 7b7c221ca7743e9e75c3872c248180f1b511cd44..a138666570c18034caeba731fdfa321926b28add 100644 (file)
@@ -185,6 +185,7 @@ struct worker_task {
        int parts_count;                                                                                        /**< mime parts count                                                           */
        GMimeMessage *message;                                                                          /**< message, parsed with GMime                                         */
        GList *parts;                                                                                           /**< list of parsed parts                                                       */
+       char *raw_headers;                                                                                      /**< list of raw headers                                                        */
        TAILQ_HEAD (uriq, uri) urls;                                                            /**< list of parsed urls                                                        */
        GHashTable *results;                                                                            /**< hash table of metric_result indexed by 
                                                                                                                                 *    metric's name                                                                     */
index 4780a11ffb9f0753446de5b5bde2da965b0968c2..76743f7de8140987c6a7cd179df3e20a994b67b0 100644 (file)
@@ -371,6 +371,16 @@ process_message (struct worker_task *task)
                task->message_id = "undef";
        }
 
+#ifdef GMIME24
+       task->raw_headers = g_mime_object_get_headers (GMIME_OBJECT (task->message));
+#else
+       task->raw_headers = g_mime_message_get_headers (task->message);
+#endif
+
+       if (task->raw_headers) {
+               memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, task->raw_headers);
+       }
+
        task->worker->srv->stat->messages_scanned ++;
 
        /* free the parser (and the stream) */
index 06da6932fe1d00f66b07c2d0a8c21acb8b7fc402..9ef4e1ce38ce4f43df69eca905e7f74be8c3294f 100644 (file)
@@ -138,7 +138,7 @@ regexp_module_reconfig (struct config_file *cfg)
 static gsize
 process_regexp (struct rspamd_regexp *re, struct worker_task *task)
 {
-       char *headerv;
+       char *headerv, *c, t;
        struct mime_part *part;
        GList *cur;
        struct uri *url;
@@ -190,19 +190,60 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
                        }
                        return 0;
                case REGEXP_MESSAGE:
-                       msg_debug ("process_message: checking message regexp: /%s/", re->regexp_text);
+                       msg_debug ("process_regexp: checking message regexp: /%s/", re->regexp_text);
                        if (g_regex_match_full (re->regexp, task->msg->begin, task->msg->len, 0, 0, NULL, NULL) == TRUE) {
                                return 1;
                        }
                        return 0;
                case REGEXP_URL:
-                       msg_debug ("process_url: checking url regexp: /%s/", re->regexp_text);
+                       msg_debug ("process_regexp: checking url regexp: /%s/", re->regexp_text);
                        TAILQ_FOREACH (url, &task->urls, next) {
                                if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) {
                                        return 1;
                                }
                        }
                        return 0;
+               case REGEXP_RAW_HEADER:
+                       msg_debug ("process_regexp: checking for raw header: %s with regexp: /%s/", re->header, re->regexp_text);
+                       if (task->raw_headers == NULL) {
+                               msg_debug ("process_regexp: cannot check for raw header in message, no headers found");
+                               return 0;
+                       }
+                       if ((headerv = strstr (task->raw_headers, re->header)) == NULL) {
+                               /* No header was found */
+                               return 0;
+                       }
+                       /* Skip header name and start matching after regexp */
+                       headerv += strlen (re->header) + 1;
+                       /* Now the main problem is to find position of end of raw header */
+                       c = headerv;
+                       while (*c) {
+                               /* We need to handle all types of line end */
+                               if ((*c == '\r' && *(c + 1) == '\n')) {
+                                       c ++;
+                                       /* Check for folding */
+                                       if (!g_ascii_isspace (*(c + 1))) {
+                                               c ++;
+                                               break;
+                                       }
+                               } 
+                               else if (*c == '\r' || *c == '\n') {
+                                       if (!g_ascii_isspace (*(c + 1))) {
+                                               c ++;
+                                               break;
+                                       }
+                               }
+                               c ++;
+                       }
+                       /* Temporary null terminate this part of string */
+                       t = *c;
+                       *c = '\0';
+                       if (g_regex_match (re->regexp, headerv, 0, NULL) == TRUE) {
+                               *c = t;
+                               return 1;
+                       }
+                       *c = t;
+                       return 0;
        }
 
        /* Not reached */