Browse Source

Rework fuzzy check module.

- Now all checks are organized to rules.
- Allow to specify read_only rules to avoid problems on learning.
- Use better normalizer for fuzzy module and it now returns values
  from 0 to 1.0 (like bayes does).
- Update configuration accordingly.
- Drop legacy configuration support.
- Detect tanh as well and provide some reasonable (linear) fallback.
tags/0.6.2
Vsevolod Stakhov 10 years ago
parent
commit
703fb40d6e
6 changed files with 405 additions and 377 deletions
  1. 3
    2
      CMakeLists.txt
  2. 4
    4
      conf/metrics.conf
  3. 20
    18
      conf/modules.conf
  4. 1
    0
      config.h.in
  5. 3
    1
      src/cfg_utils.c
  6. 374
    352
      src/plugins/fuzzy_check.c

+ 3
- 2
CMakeLists.txt View File

@@ -10,10 +10,10 @@ PROJECT(rspamd C)

SET(RSPAMD_VERSION_MAJOR 0)
SET(RSPAMD_VERSION_MINOR 6)
SET(RSPAMD_VERSION_PATCH 1)
SET(RSPAMD_VERSION_PATCH 2)

SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}")
SET(RSPAMD_MASTER_SITE_URL "http://bitbucket.org/vstakhov/rspamd")
SET(RSPAMD_MASTER_SITE_URL "https://rspamd.com")

IF(NOT RSPAMD_USER)
SET(RSPAMD_USER "nobody")
@@ -862,6 +862,7 @@ CHECK_FUNCTION_EXISTS(wait4 HAVE_WAIT4)
CHECK_FUNCTION_EXISTS(waitpid HAVE_WAITPID)
CHECK_FUNCTION_EXISTS(flock HAVE_FLOCK)
CHECK_FUNCTION_EXISTS(tanhl HAVE_TANHL)
CHECK_FUNCTION_EXISTS(tanh HAVE_TANH)
CHECK_FUNCTION_EXISTS(expl HAVE_EXPL)
CHECK_FUNCTION_EXISTS(exp2l HAVE_EXP2L)
CHECK_FUNCTION_EXISTS(sendfile HAVE_SENDFILE)

+ 4
- 4
conf/metrics.conf View File

@@ -408,22 +408,22 @@ metric {
name = "BAYES_HAM";
}
symbol {
weight = 1.0;
weight = 10.0;
description = "Generic fuzzy hash match";
name = "R_FUZZY";
}
symbol {
weight = 1.0;
weight = 10.0;
description = "Denied fuzzy hash";
name = "FUZZY_DENIED";
}
symbol {
weight = 1.0;
weight = 5.0;
description = "Probable fuzzy hash";
name = "FUZZY_PROB";
}
symbol {
weight = 1.0;
weight = -2.1;
description = "Whitelisted fuzzy hash";
name = "FUZZY_WHITE";
}

+ 20
- 18
conf/modules.conf View File

@@ -1,24 +1,26 @@
# Rspamd modules configuration
fuzzy_check {
servers = "highsecure.ru:11335";
symbol = "R_FUZZY";
min_bytes = 300;
max_score = 10;
mime_types = "application/pdf";
fuzzy_map = {
FUZZY_DENIED {
weight = 10.0;
flag = 1
}
FUZZY_PROB {
weight = 5.0;
flag = 2
}
FUZZY_WHITE {
weight = -2.1;
flag = 3
}
}
rule {
servers = "highsecure.ru:11335";
symbol = "R_FUZZY";
mime_types = "application/pdf";
max_score = 10;
fuzzy_map = {
FUZZY_DENIED {
max_score = 10.0;
flag = 1
}
FUZZY_PROB {
max_score = 5.0;
flag = 2
}
FUZZY_WHITE {
max_score = 5.0;
flag = 3
}
}
}
}
forged_recipients {
symbol_sender = "FORGED_SENDER";

+ 1
- 0
config.h.in View File

@@ -145,6 +145,7 @@
#cmakedefine HAVE_FLOCK 1

#cmakedefine HAVE_TANHL 1
#cmakedefine HAVE_TANH 1

#cmakedefine HAVE_EXPL 1
#cmakedefine HAVE_EXP2L 1

+ 3
- 1
src/cfg_utils.c View File

@@ -635,12 +635,14 @@ internal_normalizer_func (struct config_file *cfg, long double score, void *data
}
#ifdef HAVE_TANHL
return max * tanhl (score / max);
#else
#elif defined(HAVE_TANHL)
/*
* As some implementations of libm does not support tanhl, try to use
* tanh
*/
return max * tanh ((double) (score / max));
#else
return score < max ? score / max : max;
#endif
}


+ 374
- 352
src/plugins/fuzzy_check.c
File diff suppressed because it is too large
View File


Loading…
Cancel
Save