aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/chartable.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-21 16:09:30 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2009-04-21 16:09:30 +0400
commitf074850e41e357aa7a2c5a86d2eef8121710cc47 (patch)
treed97856de0565360303af50d069bf2d0858219f41 /src/plugins/chartable.c
parentda9546194391e7d14ffe9b0c78f84892b012ff28 (diff)
downloadrspamd-f074850e41e357aa7a2c5a86d2eef8121710cc47.tar.gz
rspamd-f074850e41e357aa7a2c5a86d2eef8121710cc47.zip
* Add initial version of chartable plugin: now it can only detects mixed unicode characters
Diffstat (limited to 'src/plugins/chartable.c')
-rw-r--r--src/plugins/chartable.c178
1 files changed, 178 insertions, 0 deletions
diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c
new file mode 100644
index 000000000..95987e48a
--- /dev/null
+++ b/src/plugins/chartable.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2009, Rambler media
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/***MODULE:chartable
+ * rspamd module that make marks based on symbol chains
+ */
+
+#include "../config.h"
+#include "../main.h"
+#include "../message.h"
+#include "../modules.h"
+#include "../cfg_file.h"
+#include "../expressions.h"
+
+#define DEFAULT_SYMBOL "R_CHARSET_MIXED"
+#define DEFAULT_THRESHOLD 0.1
+
+struct chartable_ctx {
+ int (*header_filter)(struct worker_task *task);
+ int (*mime_filter)(struct worker_task *task);
+ int (*message_filter)(struct worker_task *task);
+ int (*url_filter)(struct worker_task *task);
+ char *metric;
+ char *symbol;
+ double threshold;
+
+ memory_pool_t *chartable_pool;
+};
+
+static struct chartable_ctx *chartable_module_ctx = NULL;
+
+static int chartable_mime_filter (struct worker_task *task);
+
+int
+chartable_module_init (struct config_file *cfg, struct module_ctx **ctx)
+{
+ chartable_module_ctx = g_malloc (sizeof (struct chartable_ctx));
+
+ chartable_module_ctx->header_filter = NULL;
+ chartable_module_ctx->mime_filter = chartable_mime_filter;
+ chartable_module_ctx->message_filter = NULL;
+ chartable_module_ctx->url_filter = NULL;
+ chartable_module_ctx->chartable_pool = memory_pool_new (memory_pool_get_size ());
+
+ *ctx = (struct module_ctx *)chartable_module_ctx;
+
+ return 0;
+}
+
+
+int
+chartable_module_config (struct config_file *cfg)
+{
+ char *value;
+ int res = TRUE;
+
+ if ((value = get_module_opt (cfg, "chartable", "metric")) != NULL) {
+ chartable_module_ctx->metric = memory_pool_strdup (chartable_module_ctx->chartable_pool, value);
+ g_free (value);
+ }
+ else {
+ chartable_module_ctx->metric = DEFAULT_METRIC;
+ }
+ if ((value = get_module_opt (cfg, "chartable", "symbol")) != NULL) {
+ chartable_module_ctx->symbol = memory_pool_strdup (chartable_module_ctx->chartable_pool, value);
+ g_free (value);
+ }
+ else {
+ chartable_module_ctx->symbol = DEFAULT_SYMBOL;
+ }
+ if ((value = get_module_opt (cfg, "chartable", "threshold")) != NULL) {
+ errno = 0;
+ chartable_module_ctx->threshold = strtod (value, NULL);
+ if (errno != 0) {
+ msg_warn ("chartable_module_config: invalid numeric value '%s': %s", value, strerror (errno));
+ chartable_module_ctx->threshold = DEFAULT_THRESHOLD;
+ }
+ }
+ else {
+ chartable_module_ctx->threshold = DEFAULT_THRESHOLD;
+ }
+
+ return res;
+}
+
+int
+chartable_module_reconfig (struct config_file *cfg)
+{
+ memory_pool_delete (chartable_module_ctx->chartable_pool);
+ chartable_module_ctx->chartable_pool = memory_pool_new (1024);
+
+ return chartable_module_config (cfg);
+}
+
+static gboolean
+check_part (struct mime_text_part *part)
+{
+ char *p, *p1;
+ gunichar c, t;
+ GUnicodeScript scc, sct;
+ uint32_t mark = 0, total = 0;
+ uint32_t remain = part->content->len;
+
+ if (part->is_raw) {
+ return FALSE;
+ }
+
+ p = part->content->data;
+
+ while (remain > 0) {
+ c = g_utf8_get_char (p);
+ scc = g_unichar_get_script (c);
+ p1 = g_utf8_next_char (p);
+ remain -= p1 - p;
+ p = p1;
+
+ if (remain > 0) {
+ t = g_utf8_get_char (p);
+ sct = g_unichar_get_script (t);
+ if (g_unichar_isalnum (c) && g_unichar_isalnum (t)) {
+ /* We have two unicode alphanumeric characters, so we can check its script */
+ if (sct != scc) {
+ mark ++;
+ }
+ total ++;
+ }
+ p1 = g_utf8_next_char (p);
+ remain -= p1 - p;
+ p = p1;
+ }
+ }
+
+ return ((double)mark / (double)total) > chartable_module_ctx->threshold;
+}
+
+static int
+chartable_mime_filter (struct worker_task *task)
+{
+ GList *cur;
+
+ /* XXX: write translation tables for this */
+ if (task->cfg->raw_mode) {
+ msg_warn ("chartable_mime_filter: cannot work in non-unicode mode");
+ return 0;
+ }
+
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ if (check_part ((struct mime_text_part *)cur->data)) {
+ insert_result (task, chartable_module_ctx->metric, chartable_module_ctx->symbol, 1, NULL);
+ }
+ cur = g_list_next (cur);
+ }
+
+ return 0;
+}
+