diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-08-04 15:39:16 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-08-04 15:39:16 +0100 |
commit | feef96256730fd94afe3ea4e5d51db9763f2e535 (patch) | |
tree | 09de88184262f0528adfe0b2e62600ad013b9172 /utils | |
parent | 15d0fdb942c7d7f7a9791c083c72352c99640710 (diff) | |
download | rspamd-feef96256730fd94afe3ea4e5d51db9763f2e535.tar.gz rspamd-feef96256730fd94afe3ea4e5d51db9763f2e535.zip |
[Feature] Add correlations report in fuzzy stats
Diffstat (limited to 'utils')
-rw-r--r-- | utils/rspamd_stats.pl | 49 |
1 files changed, 42 insertions, 7 deletions
diff --git a/utils/rspamd_stats.pl b/utils/rspamd_stats.pl index 423f4cba6..7b00c7640 100644 --- a/utils/rspamd_stats.pl +++ b/utils/rspamd_stats.pl @@ -11,16 +11,18 @@ my @symbols_search; my $reject_score = 15.0; my $junk_score = 6.0; my $diff_alpha = 0.1; +my $correlations = 0; my $log_file = ""; my $man = 0; my $help = 0; GetOptions( - "reject-score=f" => \$reject_score, - "junk-score=f" => \$junk_score, - "symbol=s@" => \@symbols_search, - "log=s" => \$log_file, - "alpha=f" => \$diff_alpha, + "reject-score|r=f" => \$reject_score, + "junk-score|j=f" => \$junk_score, + "symbol|s=s@" => \@symbols_search, + "log|l=s" => \$log_file, + "alpha|a=f" => \$diff_alpha, + "correlations|c" => \$correlations, "help|?" => \$help, "man" => \$man ) or pod2usage(2); @@ -56,7 +58,7 @@ while(<$rspamd_log>) { my $ts = $elts[0] . ' ' . $elts[1]; if ($_ !~ /\[(-?\d+(?:\.\d+)?)\/(-?\d+(?:\.\d+)?)\]\s+\[([^\]]+)\]/) { - #print "BAD\n"; + #print "BAD: $_\n"; next; } @@ -72,11 +74,13 @@ while(<$rspamd_log>) { # Symbols my @symbols = split /,/, $3; + my @sym_names; foreach my $s (@symbols_search) { my @selected = grep /$s/, @symbols; if (scalar(@selected) > 0) { + foreach my $sym (@selected) { $sym =~ /^([^\(]+)(\(([^\)]+)\))?/; my $sym_name = $1; @@ -90,6 +94,8 @@ while(<$rspamd_log>) { } next if $sym_name !~ /^$s/; + push @sym_names, $sym_name; + if (!$sym_res{$sym_name}) { $sym_res{$sym_name} = { hits => 0, @@ -98,6 +104,7 @@ while(<$rspamd_log>) { spam_change => 0, junk_change => 0, weight => 0, + corr => {}, }; } @@ -137,9 +144,26 @@ while(<$rspamd_log>) { } } } - } + } # End foreach symbols selected } } + + if ($correlations) { + foreach my $sym (@sym_names) { + my $r = $sym_res{$sym}; + + foreach my $corr_sym (@sym_names) { + if ($corr_sym ne $sym) { + if ($r->{'corr'}->{$corr_sym}) { + $r->{'corr'}->{$corr_sym} ++; + } + else { + $r->{'corr'}->{$corr_sym} = 1; + } + } + } + } # End of correlations check + } } } @@ -195,6 +219,17 @@ Junk changes / total junk hits : %6d/%-6d (%7.3f%%) $r->{junk_change}, $total_junk, ( $jchp or 0 ); } } + + if ($correlations) { + print "Correlations report:\n"; + + while (my ($cs,$hits) = each %{$r->{corr}}) { + my $corr_prob = $hits / $total; + my $sym_prob = $r->{hits} / $total; + printf "Probability of %s when %s fires: %.3f\n", $s, $cs, ($corr_prob / $sym_prob); + } + } + } else { print "Symbol $s has not been met\n"; |