summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-07-22 20:55:28 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-07-22 20:55:28 +0400
commitcba8f14abab46234da3089aff075887757ca2afa (patch)
tree3c0a4b76e4e18bd1646d59510b2c7fb84ba35a4a /src
parentecc3b51cfdd7aecdb7a02791424d8e8cfcd22453 (diff)
downloadrspamd-cba8f14abab46234da3089aff075887757ca2afa.tar.gz
rspamd-cba8f14abab46234da3089aff075887757ca2afa.zip
* Add received headers parser
Diffstat (limited to 'src')
-rw-r--r--src/filter.c4
-rw-r--r--src/main.h1
-rw-r--r--src/map.c1
-rw-r--r--src/message.c229
-rw-r--r--src/message.h9
5 files changed, 242 insertions, 2 deletions
diff --git a/src/filter.c b/src/filter.c
index a1894603d..55c8b2a00 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -388,12 +388,12 @@ composites_foreach_callback (gpointer key, gpointer value, void *data)
static gboolean
check_autolearn (struct statfile_autolearn_params *params, struct worker_task *task)
{
- const char *metric_name = DEFAULT_METRIC;
+ char *metric_name = DEFAULT_METRIC;
struct metric_result *metric_res;
GList *cur;
if (params->metric != NULL) {
- metric_name = params->metric;
+ metric_name = (char *)params->metric;
}
/* First check threshold */
diff --git a/src/main.h b/src/main.h
index 0e8f9966c..79568c258 100644
--- a/src/main.h
+++ b/src/main.h
@@ -179,6 +179,7 @@ struct worker_task {
GList *parts; /**< list of parsed parts */
GList *text_parts; /**< list of text parts */
char *raw_headers; /**< list of raw headers */
+ GList *received; /**< list of received headers */
GList *urls; /**< list of parsed urls */
GHashTable *results; /**< hash table of metric_result indexed by
* metric's name */
diff --git a/src/map.c b/src/map.c
index df6ff8960..5ed1cfe40 100644
--- a/src/map.c
+++ b/src/map.c
@@ -102,6 +102,7 @@ parse_http_reply (u_char *chunk, size_t len, struct http_reply *reply)
{
u_char *s, *p, *err_str, *tmp;
p = chunk;
+ s = chunk;
while (p - chunk < len) {
switch (reply->parser_state) {
diff --git a/src/message.c b/src/message.c
index c11bcf00d..f3d8360c2 100644
--- a/src/message.c
+++ b/src/message.c
@@ -239,6 +239,220 @@ reg_char:
}
static void
+parse_qmail_recv (memory_pool_t *pool, char *line, struct received_header *r)
+{
+ char *s, *p, t;
+
+ /* We are intersted only with received from network headers */
+ if ((p = strstr (line, "from network")) == NULL) {
+ r->is_error = 2;
+ return;
+ }
+
+ p += sizeof ("from network") - 1;
+ while (g_ascii_isspace (*p) || *p == '[') {
+ p ++;
+ }
+ /* format is ip/host */
+ s = p;
+ if (*p) {
+ while (g_ascii_isdigit (*++p) || *p == '.');
+ if (*p != '/') {
+ r->is_error = 1;
+ return;
+ }
+ else {
+ *p = '\0';
+ r->real_ip = memory_pool_strdup (pool, s);
+ *p = '/';
+ /* Now try to parse hostname */
+ s = ++p;
+ while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+ p ++;
+ }
+ t = *p;
+ *p = '\0';
+ r->real_hostname = memory_pool_strdup (pool, s);
+ *p = t;
+ }
+ }
+}
+
+static void
+parse_recv_header (memory_pool_t *pool, char *line, struct received_header *r)
+{
+ char *p, *s, t, **res = NULL;
+ int state = 0, next_state = 0;
+
+ p = line;
+ s = line;
+ while (g_ascii_isspace (*++p));
+
+ while (*p) {
+ switch (state) {
+ /* Initial state, search for from */
+ case 0:
+ if (*p == 'f' || *p == 'F') {
+ if (g_ascii_tolower (*++p) == 'r' &&
+ g_ascii_tolower (*++p) == 'o' &&
+ g_ascii_tolower (*++p) == 'm') {
+ p ++;
+ state = 99;
+ next_state = 1;
+ }
+ }
+ else {
+ /* This can be qmail header, parse it separately */
+ parse_qmail_recv (pool, line, r);
+ return;
+ }
+ break;
+ /* Read hostname */
+ case 1:
+ if (*p == '[') {
+ /* This should be IP address */
+ res = &r->from_ip;
+ state = 98;
+ next_state = 3;
+ s = ++p;
+ }
+ else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+ p ++;
+ }
+ else {
+ t = *p;
+ *p = '\0';
+ r->from_hostname = memory_pool_strdup (pool, s);
+ *p = t;
+ state = 99;
+ next_state = 3;
+ }
+ break;
+ /* Try to extract additional info */
+ case 3:
+ /* Try to extract ip or () info or by */
+ if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') {
+ p += 2;
+ /* Skip spaces after by */
+ state = 99;
+ next_state = 5;
+ }
+ else if (*p == '(') {
+ state = 99;
+ next_state = 4;
+ p ++;
+ }
+ else if (*p == '[') {
+ /* Got ip before '(' so extract it */
+ s = ++p;
+ res = &r->from_ip;
+ state = 98;
+ next_state = 3;
+ }
+ else {
+ p ++;
+ }
+ break;
+ /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */
+ case 4:
+ /* End of block */
+ if (*p == ')') {
+ p ++;
+ state = 3;
+ }
+ else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+ p ++;
+ }
+ else if (*p == '[') {
+ s = ++p;
+ state = 98;
+ res = &r->real_ip;
+ next_state = 3;
+ }
+ else {
+ if (s != p) {
+ /* Got some real hostname */
+ /* check whether it is helo or p is not space symbol*/
+ if (!g_ascii_isspace (*p) || *(p + 1) != '[') {
+ /* skip all */
+ while (*p++ != ')' && *p != '\0');
+ state = 3;
+ }
+ else {
+ t = *p;
+ *p = '\0';
+ r->real_hostname = memory_pool_strdup (pool, s);
+ *p = t;
+ /* Now parse ip */
+ p += 2;
+ s = p;
+ res = &r->real_ip;
+ state = 98;
+ next_state = 4;
+ }
+ }
+ else {
+ r->is_error = 1;
+ return;
+ }
+ }
+ break;
+ /* Got by word */
+ case 5:
+ /* Here can be only hostname */
+ if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') {
+ p ++;
+ }
+ else {
+ /* We got something like hostname */
+ t = *p;
+ *p = '\0';
+ r->by_hostname = memory_pool_strdup (pool, s);
+ *p = t;
+ /* Now end of parsing */
+ return;
+ }
+ break;
+
+ /* Extract ip */
+ case 98:
+ while (g_ascii_isdigit (*++p) || *p == '.');
+ if (*p != ']') {
+ /* Not an ip in fact */
+ state = next_state;
+ p ++;
+ }
+ else {
+ *p = '\0';
+ *res = memory_pool_strdup (pool, s);
+ *p = ']';
+ p ++;
+ state = next_state;
+ }
+ break;
+
+ /* Skip spaces */
+ case 99:
+ if (!g_ascii_isspace (*p)) {
+ state = next_state;
+ s = p;
+ }
+ else {
+ p ++;
+ }
+ break;
+ case 100:
+ r->is_error = 1;
+ return;
+ break;
+ }
+ }
+
+ r->is_error = 1;
+ return;
+}
+
+static void
free_byte_array_callback (void *pointer)
{
GByteArray *arr = (GByteArray *)pointer;
@@ -445,6 +659,8 @@ process_message (struct worker_task *task)
GMimeParser *parser;
GMimeStream *stream;
GByteArray *tmp;
+ GList *first, *cur;
+ struct received_header *recv;
tmp = memory_pool_alloc (task->task_pool, sizeof (GByteArray));
tmp->data = task->msg->begin;
@@ -493,6 +709,19 @@ process_message (struct worker_task *task)
task->raw_headers = g_mime_message_get_headers (task->message);
#endif
+ /* Parse received headers */
+ first = message_get_header (task->task_pool, message, "Received");
+ cur = first;
+ while (cur) {
+ recv = memory_pool_alloc0 (task->task_pool, sizeof (struct received_header));
+ parse_recv_header (task->task_pool, cur->data, recv);
+ task->received = g_list_prepend (task->received, recv);
+ cur = g_list_next (cur);
+ }
+ if (first) {
+ g_list_free (first);
+ }
+
if (task->raw_headers) {
memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, task->raw_headers);
}
diff --git a/src/message.h b/src/message.h
index d32c78d7f..a14a92aa4 100644
--- a/src/message.h
+++ b/src/message.h
@@ -29,6 +29,15 @@ struct mime_text_part {
fuzzy_hash_t *fuzzy;
};
+struct received_header {
+ char *from_hostname;
+ char *from_ip;
+ char *real_hostname;
+ char *real_ip;
+ char *by_hostname;
+ int is_error;
+};
+
/**
* Process message with all filters/statfiles, extract mime parts, urls and
* call metrics consolidation functions