aboutsummaryrefslogtreecommitdiffstats
path: root/utils/rspamd_stats.pl
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-05-21 12:36:37 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-05-21 12:36:37 +0100
commit094cad0a00d3fc18983093473be14a241d8b0e49 (patch)
treef181b25ac29a7ec974728ec082a068e9bc3f8432 /utils/rspamd_stats.pl
parent4f02ece36c8484554d9856b2d28163870375b528 (diff)
downloadrspamd-094cad0a00d3fc18983093473be14a241d8b0e49.tar.gz
rspamd-094cad0a00d3fc18983093473be14a241d8b0e49.zip
[Feature] Add support for bidirectional symbols in rspamd_stats
Diffstat (limited to 'utils/rspamd_stats.pl')
-rw-r--r--utils/rspamd_stats.pl59
1 files changed, 43 insertions, 16 deletions
diff --git a/utils/rspamd_stats.pl b/utils/rspamd_stats.pl
index 2a985ab8a..374b3bc97 100644
--- a/utils/rspamd_stats.pl
+++ b/utils/rspamd_stats.pl
@@ -9,6 +9,7 @@ use strict;
my @symbols_search;
my @symbols_exclude;
+my @symbols_bidirectional;
my $reject_score = 15.0;
my $junk_score = 6.0;
my $diff_alpha = 0.1;
@@ -27,13 +28,15 @@ my %decompressor = (
'bz2' => 'bzip2 -cd',
'gz' => 'gzip -cd',
'xz' => 'xz -cd',
+ 'zst' => 'zstd -cd',
);
GetOptions(
"reject-score|r=f" => \$reject_score,
"junk-score|j=f" => \$junk_score,
"symbol|s=s@" => \@symbols_search,
- "exclude|s=s@" => \@symbols_exclude,
+ "symbol-bidir|S=s@" => \@symbols_bidirectional,
+ "exclude|X=s@" => \@symbols_exclude,
"log|l=s" => \$log_file,
"alpha-score|alpha|a=f" => \$diff_alpha,
"correlations|c" => \$correlations,
@@ -49,8 +52,6 @@ GetOptions(
pod2usage(1) if $help;
pod2usage(-exitval => 0, -verbose => 2) if $man;
-@symbols_search = '.*'
- unless @symbols_search;
# Global vars
my $total = 0;
@@ -71,29 +72,42 @@ my %scanTime = (
max => 0,
total => 0,
);
+my %bidir_match;
+
+# Convert bidirectional symbols
+foreach my $s (@symbols_bidirectional) {
+ $bidir_match{$s} = {
+ spam => "${s}_SPAM",
+ ham => "${s}_HAM",
+ };
+ push @symbols_search, $s unless grep /^$s$/, @symbols_search;
+}
+
+@symbols_search = '.*'
+ unless @symbols_search;
if ($log_file eq '-' || $log_file eq '') {
$rspamd_log = \*STDIN;
&ProcessLog();
}
elsif ( -d "$log_file" ) {
- my $log_dir = "$log_file";
+ my $log_dir = "$log_file";
- my @logs = &GetLogfilesList($log_dir);
+ my @logs = &GetLogfilesList($log_dir);
- # Process logs
- foreach (@logs) {
- my $ext = (/[^.]+\.?([^.]*?)$/)[0];
- my $dc = $decompressor{$ext} || 'cat';
+ # Process logs
+ foreach (@logs) {
+ my $ext = (/[^.]+\.?([^.]*?)$/)[0];
+ my $dc = $decompressor{$ext} || 'cat';
- open( $rspamd_log, "-|", "$dc $log_dir/$_" )
- or die "cannot execute $dc $log_dir/$_ : $!";
+ open( $rspamd_log, "-|", "$dc $log_dir/$_" )
+ or die "cannot execute $dc $log_dir/$_ : $!";
- &ProcessLog;
+ &ProcessLog;
- close($rspamd_log)
- or warn "cannot close $dc $log_dir/$_: $!";
- }
+ close($rspamd_log)
+ or warn "cannot close $dc $log_dir/$_: $!";
+ }
}
else {
open($rspamd_log, '<', $log_file) or die "cannot open $log_file";
@@ -256,14 +270,27 @@ sub ProcessLog {
$sym =~ /^([^\(]+)(\(([^\)]+)\))?/;
my $sym_name = $1;
my $sym_score = 0;
+ my $orig_name = $sym_name;
+
if ($2) {
$sym_score = $3 * 1.0;
if (abs($sym_score) < $diff_alpha) {
next;
}
+
+ my $bm = $bidir_match{$sym_name};
+ if ($bm) {
+ if ($sym_score >= 0) {
+ $sym_name = $bm->{'spam'};
+ }
+ else {
+ $sym_name = $bm->{'ham'};
+ }
+ }
}
- next if $sym_name !~ /^$s/;
+
+ next if $orig_name !~ /^$s/;
push @sym_names, $sym_name;