瀏覽代碼

[Feature] Add related symbols analysis to rspamd_stats

tags/1.7.1
Vsevolod Stakhov 6 年之前
父節點
當前提交
1565388e50
共有 1 個文件被更改,包括 114 次插入26 次删除
  1. 114
    26
      utils/rspamd_stats.pl

+ 114
- 26
utils/rspamd_stats.pl 查看文件

@@ -15,6 +15,7 @@ my $reject_score = 15.0;
my $junk_score = 6.0;
my $diff_alpha = 0.1;
my $correlations = 0;
my $nrelated = 10;
my $log_file = "";
my $search_pattern = "";
my $startTime="";
@@ -34,22 +35,23 @@ my %decompressor = (
);

GetOptions(
"reject-score|r=f" => \$reject_score,
"junk-score|j=f" => \$junk_score,
"symbol|s=s@" => \@symbols_search,
"symbol-bidir|S=s@" => \@symbols_bidirectional,
"exclude|X=s@" => \@symbols_exclude,
"log|l=s" => \$log_file,
"reject-score|r=f" => \$reject_score,
"junk-score|j=f" => \$junk_score,
"symbol|s=s@" => \@symbols_search,
"symbol-bidir|S=s@" => \@symbols_bidirectional,
"exclude|X=s@" => \@symbols_exclude,
"log|l=s" => \$log_file,
"alpha-score|alpha|a=f" => \$diff_alpha,
"correlations|c" => \$correlations,
"search-pattern=s" => \$search_pattern,
"start=s" => \$startTime,
"end=s" => \$endTime,
"num-logs|n=i" => \$num_logs,
"exclude-logs|x=i" => \$exclude_logs,
"json|j" => \$json,
"help|?" => \$help,
"man" => \$man
"correlations|c" => \$correlations,
"nrelated" => \$nrelated,
"search-pattern=s" => \$search_pattern,
"start=s" => \$startTime,
"end=s" => \$endTime,
"num-logs|n=i" => \$num_logs,
"exclude-logs|x=i" => \$exclude_logs,
"json|j" => \$json,
"help|?" => \$help,
"man" => \$man
) or pod2usage(2);

pod2usage(1) if $help;
@@ -141,7 +143,31 @@ else {

exit;

sub SymbolsStat() {
sub GenRelated {
my ($htb, $target_sym) = @_;

my @result;
my $i = 0;
foreach my $sym (sort { $htb->{$b} <=> $htb->{$a} } keys %{$htb}) {
if ($sym ne $target_sym) {
my @elt = ($sym, $htb->{$sym});
push @result, \@elt;
$i ++;
}

last if $i > $nrelated;
}

return \@result;
}

sub StringifyRelated {
my ($ar, $total) = @_;
return join("\n", (map { sprintf "\t%s(%s: %.1f%%)",
$_->[0], $_->[1], $_->[1] / ($total * 1.0) * 100.0 } @{$ar}));
}

sub SymbolsStat {
if ($total > 0) {
my $has_comma = 0;
while (my ($s, $r) = each(%sym_res)) {
@@ -237,6 +263,11 @@ Junk changes / total junk hits : %6d/%-6d (%7.3f%%)
}

if ($correlations) {

my $spam_related = GenRelated($r->{symbols_met_spam}, $s);
my $junk_related = GenRelated($r->{symbols_met_junk}, $s);
my $ham_related = GenRelated($r->{symbols_met_ham}, $s);

if (!$json) {
print "Correlations report:\n";

@@ -246,18 +277,26 @@ Junk changes / total junk hits : %6d/%-6d (%7.3f%%)
printf "Probability of %s when %s fires: %.3f\n", $s, $cs,
($corr_prob / $sym_prob);
}

print "Related symbols report:\n";
printf "Top related in spam:\n %s\n", StringifyRelated($spam_related,
$r->{spam_hits});
printf "Top related in junk:\n %s\n", StringifyRelated($junk_related,
$r->{junk_hits});
printf "Top related in ham:\n %s\n", StringifyRelated($ham_related,
$r->{hits} - $r->{spam_hits} - $r->{junk_hits});
}
else {
print ",";
print "\"correllations\":{";

my $has_comma = 0;
my $has_comma_ = 0;
while (my ($cs, $hits) = each %{$r->{corr}}) {
if ($has_comma) {
if ($has_comma_) {
print ",";
}
else {
$has_comma = 1;
$has_comma_ = 1;
}
my $corr_prob = $hits / $total;
my $sym_prob = $r->{hits} / $total;
@@ -328,6 +367,41 @@ Messages scanned: $total";
}
}

sub ProcessRelated {
my ($symbols, $target) = @_;

foreach my $s (@{$symbols}) {
$s =~ /^([^\(]+)(\(([^\)]+)\))?/;
my $sym_name = $1;
my $sym_score = 0;

if ($2) {
$sym_score = $3 * 1.0;

if (abs($sym_score) < $diff_alpha) {
next;
}

my $bm = $bidir_match{$sym_name};
if ($bm) {
if ($sym_score >= 0) {
$sym_name = $bm->{'spam'};
}
else {
$sym_name = $bm->{'ham'};
}
}
}

if (exists($target->{$sym_name})) {
$target->{$sym_name} ++;
}
else {
$target->{$sym_name} = 1;
}
}
}

sub ProcessLog {
my ( $ts_format, @line ) = &log_time_format($rspamd_log);
my $is_syslog = defined $ts_format && $ts_format eq 'syslog';
@@ -429,13 +503,16 @@ sub ProcessLog {

if (!$sym_res{$sym_name}) {
$sym_res{$sym_name} = {
hits => 0,
spam_hits => 0,
junk_hits => 0,
spam_change => 0,
junk_change => 0,
weight => 0,
corr => {},
hits => 0,
spam_hits => 0,
junk_hits => 0,
spam_change => 0,
junk_change => 0,
weight => 0,
corr => {},
symbols_met_spam => {},
symbols_met_ham => {},
symbols_met_junk => {},
};
}

@@ -449,10 +526,21 @@ sub ProcessLog {
if ($score >= $reject_score) {
$is_spam = 1;
$r->{spam_hits} ++;
if ($correlations) {
ProcessRelated(\@symbols, $r->{symbols_met_spam});
}
}
elsif ($score >= $junk_score) {
$is_junk = 1;
$r->{junk_hits} ++;
if ($correlations) {
ProcessRelated(\@symbols, $r->{symbols_met_junk});
}
}
else {
if ($correlations) {
ProcessRelated(\@symbols, $r->{symbols_met_ham});
}
}

if ($sym_score != 0) {

Loading…
取消
儲存