summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-05 17:59:19 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2010-05-05 17:59:19 +0400
commit10d43143584a98ea4aece11c4fa753f71808afec (patch)
treed2445da60b8d591bbeddbf6711b5a6f9d26ae1f2
parent02a6d987bbdf55e06375544a14c046d1724f946e (diff)
downloadrspamd-10d43143584a98ea4aece11c4fa753f71808afec.tar.gz
rspamd-10d43143584a98ea4aece11c4fa753f71808afec.zip
* Add ability to add flags to fuzzy hashes
-rw-r--r--src/fuzzy_storage.c3
-rw-r--r--src/fuzzy_storage.h1
-rw-r--r--src/message.c22
-rw-r--r--src/message.h2
-rw-r--r--src/plugins/fuzzy_check.c40
5 files changed, 59 insertions, 9 deletions
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index 3a7338b04..5f5158620 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -77,8 +77,9 @@ static struct rspamd_stat *server_stat;
struct rspamd_fuzzy_node {
int32_t value;
- fuzzy_hash_t h;
+ int32_t flag;
uint64_t time;
+ fuzzy_hash_t h;
};
#ifndef HAVE_SA_SIGINFO
diff --git a/src/fuzzy_storage.h b/src/fuzzy_storage.h
index aa3d50133..c317bd99f 100644
--- a/src/fuzzy_storage.h
+++ b/src/fuzzy_storage.h
@@ -14,6 +14,7 @@ struct fuzzy_cmd {
u_char cmd;
uint32_t blocksize;
int32_t value;
+ int32_t flag;
u_char hash[FUZZY_HASHLEN];
};
diff --git a/src/message.c b/src/message.c
index 08f1bf5f1..dff1c6594 100644
--- a/src/message.c
+++ b/src/message.c
@@ -31,6 +31,7 @@
#include "modules.h"
#define RECURSION_LIMIT 30
+#define UTF8_CHARSET "UTF-8"
GByteArray *
strip_html_tags (struct worker_task *task, memory_pool_t * pool, struct mime_text_part *part, GByteArray * src, int *stateptr)
@@ -464,6 +465,25 @@ free_byte_array_callback (void *pointer)
g_byte_array_free (arr, TRUE);
}
+static void
+detect_real_charset (struct worker_task *task, GByteArray * part_content, struct mime_text_part *text_part)
+{
+ /* First of all try to detect UTF symbols */
+ text_part->is_utf = FALSE;
+ /* At first decision try to validate a single character */
+ if (g_utf8_get_char_validated (part_content->data, part_content->len) != -1) {
+ /* Now validate the whole part */
+ if (g_utf8_validate (part_content->data, part_content->len, NULL)) {
+ text_part->is_utf = TRUE;
+ text_part->real_charset = UTF8_CHARSET;
+ return;
+ }
+ }
+
+ /* Now try to detect specific symbols from some charsets */
+
+}
+
static GByteArray *
convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part)
{
@@ -488,7 +508,7 @@ convert_text_to_utf (struct worker_task *task, GByteArray * part_content, GMimeC
return part_content;
}
- res_str = g_convert_with_fallback (part_content->data, part_content->len, "UTF-8", charset, NULL, &read_bytes, &write_bytes, &err);
+ res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err);
if (res_str == NULL) {
msg_warn ("cannot convert from %s to utf8: %s", charset, err ? err->message : "unknown problem");
text_part->is_raw = TRUE;
diff --git a/src/message.h b/src/message.h
index 13b93b881..abebe4862 100644
--- a/src/message.h
+++ b/src/message.h
@@ -22,6 +22,8 @@ struct mime_text_part {
gboolean is_raw;
gboolean is_balanced;
gboolean is_empty;
+ gboolean is_utf;
+ const gchar *real_charset;
GByteArray *orig;
GByteArray *content;
GNode *html_nodes;
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index aa2b788ff..1b3f38617 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -88,6 +88,7 @@ struct fuzzy_learn_session {
fuzzy_hash_t *h;
int cmd;
int value;
+ int flag;
int *saved;
struct timeval tv;
struct controller_session *session;
@@ -313,7 +314,7 @@ fuzzy_io_callback (int fd, short what, void *arg)
struct fuzzy_client_session *session = arg;
struct fuzzy_cmd cmd;
char buf[62], *err_str;
- int value;
+ int value = 0, flag = 0, r;
double nval;
if (what == EV_WRITE) {
@@ -332,15 +333,20 @@ fuzzy_io_callback (int fd, short what, void *arg)
}
else if (what == EV_READ) {
/* Got reply */
- if (read (fd, buf, sizeof (buf)) == -1) {
+ if ((r = read (fd, buf, sizeof (buf) - 1)) == -1) {
goto err;
}
else if (buf[0] == 'O' && buf[1] == 'K') {
+ buf[r] = 0;
/* Now try to get value */
value = strtol (buf + 3, &err_str, 10);
+ if (*err_str == ' ') {
+ /* Now read flag */
+ flag = strtol (err_str + 1, &err_str, 10);
+ }
*err_str = '\0';
nval = fuzzy_normalize (value);
- snprintf (buf, sizeof (buf), "%d / %.2f", value, nval);
+ snprintf (buf, sizeof (buf), "%d: %d / %.2f", flag, value, nval);
insert_result (session->task, fuzzy_module_ctx->metric, fuzzy_module_ctx->symbol, nval, g_list_prepend (NULL,
memory_pool_strdup (session->task->task_pool, buf)));
}
@@ -388,6 +394,7 @@ fuzzy_learn_callback (int fd, short what, void *arg)
memcpy (cmd.hash, session->h->hash_pipe, sizeof (cmd.hash));
cmd.cmd = session->cmd;
cmd.value = session->value;
+ cmd.flag = session->flag;
if (write (fd, &cmd, sizeof (struct fuzzy_cmd)) == -1) {
goto err;
}
@@ -497,7 +504,7 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
struct mime_text_part *part;
struct storage_server *selected;
GList *cur;
- int sock, r, cmd = 0, value = 0, *saved, *sargs;
+ int sock, r, cmd = 0, value = 0, flag = 0, *saved, *sargs;
char out_buf[BUFSIZ];
/* Extract arguments */
@@ -505,6 +512,7 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
sargs = session->other_data;
cmd = sargs[0];
value = sargs[1];
+ flag = sargs[2];
}
/* Prepare task */
@@ -565,6 +573,7 @@ fuzzy_process_handler (struct controller_session *session, f_str_t * in)
s->server = selected;
s->cmd = cmd;
s->value = value;
+ s->flag = flag;
s->saved = saved;
s->fd = sock;
event_add (&s->ev, &s->tv);
@@ -597,7 +606,7 @@ fuzzy_controller_handler (char **args, struct controller_session *session, int c
{
char *arg, out_buf[BUFSIZ], *err_str;
uint32_t size;
- int r, value = 1, *sargs;
+ int r, value = 1, flag = 0, *sargs;
/* Process size */
arg = args[0];
@@ -608,8 +617,9 @@ fuzzy_controller_handler (char **args, struct controller_session *session, int c
session->state = STATE_REPLY;
return;
}
+ errno = 0;
size = strtoul (arg, &err_str, 10);
- if (err_str && *err_str != '\0') {
+ if (errno != 0 || (err_str && *err_str != '\0')) {
r = snprintf (out_buf, sizeof (out_buf), "learn size is invalid" CRLF);
rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
session->state = STATE_REPLY;
@@ -618,16 +628,32 @@ fuzzy_controller_handler (char **args, struct controller_session *session, int c
/* Process value */
arg = args[1];
if (arg && *arg != '\0') {
+ errno = 0;
value = strtol (arg, &err_str, 10);
+ if (errno != 0 || *err_str != '\0') {
+ msg_info ("error converting numeric argument %s", arg);
+ value = 0;
+ }
+ }
+ /* Process flag */
+ arg = args[2];
+ if (arg && *arg != '\0') {
+ errno = 0;
+ flag = strtol (arg, &err_str, 10);
+ if (errno != 0 || *err_str != '\0') {
+ msg_info ("error converting numeric argument %s", arg);
+ flag = 0;
+ }
}
session->state = STATE_OTHER;
rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_CHARACTER, size);
session->other_handler = fuzzy_process_handler;
/* Prepare args */
- sargs = memory_pool_alloc (session->session_pool, sizeof (int) * 2);
+ sargs = memory_pool_alloc (session->session_pool, sizeof (int) * 3);
sargs[0] = cmd;
sargs[1] = value;
+ sargs[2] = flag;
session->other_data = sargs;
}