#include "rspamd.h"
#define DEFAULT_SYMBOL "R_CHARSET_MIXED"
+#define DEFAULT_URL_SYMBOL "R_CHARSET_MIXED_URL"
#define DEFAULT_THRESHOLD 0.1
#define msg_err_chartable(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
struct chartable_ctx {
struct module_ctx ctx;
const gchar *symbol;
+ const gchar *url_symbol;
double threshold;
guint max_word_len;
static struct chartable_ctx *chartable_module_ctx = NULL;
static void chartable_symbol_callback (struct rspamd_task *task, void *unused);
+static void chartable_url_symbol_callback (struct rspamd_task *task, void *unused);
gint
chartable_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
else {
chartable_module_ctx->symbol = DEFAULT_SYMBOL;
}
+ if ((value =
+ rspamd_config_get_module_opt (cfg, "chartable", "url_symbol")) != NULL) {
+ chartable_module_ctx->url_symbol = ucl_obj_tostring (value);
+ }
+ else {
+ chartable_module_ctx->url_symbol = DEFAULT_URL_SYMBOL;
+ }
if ((value =
rspamd_config_get_module_opt (cfg, "chartable", "threshold")) != NULL) {
if (!ucl_obj_todouble_safe (value, &chartable_module_ctx->threshold)) {
}
rspamd_symbols_cache_add_symbol (cfg->cache,
- chartable_module_ctx->symbol,
- 0,
- chartable_symbol_callback,
- NULL,
- SYMBOL_TYPE_NORMAL,
- -1);
+ chartable_module_ctx->symbol,
+ 0,
+ chartable_symbol_callback,
+ NULL,
+ SYMBOL_TYPE_NORMAL,
+ -1);
+ rspamd_symbols_cache_add_symbol (cfg->cache,
+ chartable_module_ctx->url_symbol,
+ 0,
+ chartable_url_symbol_callback,
+ NULL,
+ SYMBOL_TYPE_NORMAL,
+ -1);
msg_info_config ("init internal chartable module");
}
static gdouble
-rspamd_chartable_process_word_utf (struct rspamd_task *task, rspamd_ftok_t *w)
+rspamd_chartable_process_word_utf (struct rspamd_task *task, rspamd_ftok_t *w,
+ gboolean is_url)
{
const gchar *p, *end, *c;
gdouble badness = 0.0;
if (state == got_digit) {
/* Penalize digit -> alpha translations */
- badness += 1.0;
+ if (!is_url) {
+ badness += 1.0;
+ }
}
else if (state == got_alpha) {
/* Check script */
}
static gdouble
-rspamd_chartable_process_word_ascii (struct rspamd_task *task, rspamd_ftok_t *w)
+rspamd_chartable_process_word_ascii (struct rspamd_task *task, rspamd_ftok_t *w,
+ gboolean is_url)
{
const gchar *p, *end, *c;
gdouble badness = 0.0;
if (state == got_digit) {
/* Penalize digit -> alpha translations */
- badness += 2.0;
+ if (!is_url) {
+ badness += 1.0;
+ }
}
else if (state == got_alpha) {
/* Check script */
if (w->len > 0) {
if (IS_PART_UTF (part)) {
- cur_score += rspamd_chartable_process_word_utf (task, w);
+ cur_score += rspamd_chartable_process_word_utf (task, w, FALSE);
}
else {
- cur_score += rspamd_chartable_process_word_ascii (task, w);
+ cur_score += rspamd_chartable_process_word_ascii (task, w, FALSE);
}
}
}
part = g_ptr_array_index (task->text_parts, i);
rspamd_chartable_process_part (task, part);
}
+}
+
+static void
+chartable_url_symbol_callback (struct rspamd_task *task, void *unused)
+{
+ struct rspamd_url *u;
+ GHashTableIter it;
+ gpointer k, v;
+ rspamd_ftok_t w;
+ gdouble cur_score = 0.0;
+
+ g_hash_table_iter_init (&it, task->urls);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ u = v;
+
+ if (cur_score > 2.0) {
+ cur_score = 2.0;
+ break;
+ }
+
+ if (u->hostlen > 0) {
+ w.begin = u->host;
+ w.len = u->hostlen;
+ if (g_utf8_validate (w.begin, w.len, NULL)) {
+ cur_score += rspamd_chartable_process_word_utf (task, &w, TRUE);
+ }
+ else {
+ cur_score += rspamd_chartable_process_word_ascii (task, &w, TRUE);
+ }
+ }
+ }
+
+ g_hash_table_iter_init (&it, task->emails);
+
+ while (g_hash_table_iter_next (&it, &k, &v)) {
+ u = v;
+
+ if (cur_score > 2.0) {
+ cur_score = 2.0;
+ break;
+ }
+
+ if (u->hostlen > 0) {
+ w.begin = u->host;
+ w.len = u->hostlen;
+
+ if (g_utf8_validate (w.begin, w.len, NULL)) {
+ cur_score += rspamd_chartable_process_word_utf (task, &w, TRUE);
+ }
+ else {
+ cur_score += rspamd_chartable_process_word_ascii (task, &w, TRUE);
+ }
+ }
+ }
+
+ if (cur_score > chartable_module_ctx->threshold) {
+ rspamd_task_insert_result (task, chartable_module_ctx->symbol,
+ cur_score, NULL);
+
+ }
}