diff options
Diffstat (limited to 'utils/rspamd_stats.pl')
-rwxr-xr-x | utils/rspamd_stats.pl | 1168 |
1 files changed, 583 insertions, 585 deletions
diff --git a/utils/rspamd_stats.pl b/utils/rspamd_stats.pl index ac7b1349f..f97e35188 100755 --- a/utils/rspamd_stats.pl +++ b/utils/rspamd_stats.pl @@ -15,20 +15,20 @@ my @symbols_bidirectional; my @symbols_groups; my @symbols_ignored; my %groups; -my $reject_score = 15.0; -my $junk_score = 6.0; -my $diff_alpha = 0.1; -my $correlations = 0; -my $nrelated = 10; -my $log_file = ""; +my $reject_score = 15.0; +my $junk_score = 6.0; +my $diff_alpha = 0.1; +my $correlations = 0; +my $nrelated = 10; +my $log_file = ""; my $search_pattern = ""; -my $startTime=""; +my $startTime = ""; my $endTime; my $num_logs; my $exclude_logs = 0; -my $man = 0; -my $json = 0; -my $help = 0; +my $man = 0; +my $json = 0; +my $help = 0; # Associate file extensions with decompressors my %decompressor = ( @@ -39,44 +39,43 @@ my %decompressor = ( ); GetOptions( - "reject-score|r=f" => \$reject_score, - "junk-score|j=f" => \$junk_score, - "symbol|s=s@" => \@symbols_search, - "symbol-bidir|S=s@" => \@symbols_bidirectional, - "exclude|X=s@" => \@symbols_exclude, - "ignore=s@" => \@symbols_ignored, - "group|g=s@" => \@symbols_groups, - "log|l=s" => \$log_file, - "alpha-score|alpha|a=f" => \$diff_alpha, - "correlations|c" => \$correlations, - "nrelated=i" => \$nrelated, - "search-pattern=s" => \$search_pattern, - "start=s" => \$startTime, - "end=s" => \$endTime, - "num-logs|n=i" => \$num_logs, - "exclude-logs|x=i" => \$exclude_logs, - "json|j" => \$json, - "help|?" => \$help, - "man" => \$man + "reject-score|r=f" => \$reject_score, + "junk-score|j=f" => \$junk_score, + "symbol|s=s@" => \@symbols_search, + "symbol-bidir|S=s@" => \@symbols_bidirectional, + "exclude|X=s@" => \@symbols_exclude, + "ignore=s@" => \@symbols_ignored, + "group|g=s@" => \@symbols_groups, + "log|l=s" => \$log_file, + "alpha-score|alpha|a=f" => \$diff_alpha, + "correlations|c" => \$correlations, + "nrelated=i" => \$nrelated, + "search-pattern=s" => \$search_pattern, + "start=s" => \$startTime, + "end=s" => \$endTime, + "num-logs|n=i" => \$num_logs, + "exclude-logs|x=i" => \$exclude_logs, + "json|j" => \$json, + "help|?" => \$help, + "man" => \$man ) or pod2usage(2); pod2usage(1) if $help; -pod2usage(-exitval => 0, -verbose => 2) if $man; - +pod2usage( -exitval => 0, -verbose => 2 ) if $man; # Global vars -my $total = 0; -my $total_spam = 0; -my $total_junk = 0; -my $junk_symbols = 0; -my $spam_symbols = 0; -my $ham_symbols = 0; +my $total = 0; +my $total_spam = 0; +my $total_junk = 0; +my $junk_symbols = 0; +my $spam_symbols = 0; +my $ham_symbols = 0; my $ham_spam_change = 0; my $ham_junk_change = 0; my %sym_res; my $rspamd_log; -my $enabled = 0; -my $log_file_num = 1; +my $enabled = 0; +my $log_file_num = 1; my $spinner_update_time = 0; my %action; @@ -91,688 +90,689 @@ foreach ( $startTime, $endTime ) { $_ = &normalized_time($_) } # Convert bidirectional symbols foreach my $s (@symbols_bidirectional) { - $bidir_match{$s} = { - spam => "${s}_SPAM", - ham => "${s}_HAM", - }; - push @symbols_search, $s unless grep /^$s$/, @symbols_search; + $bidir_match{$s} = { + spam => "${s}_SPAM", + ham => "${s}_HAM", + }; + push @symbols_search, $s unless grep /^$s$/, @symbols_search; } # Deal with groups my $group_id = 0; foreach my $g (@symbols_groups) { - my @symbols = split /,/,$g; - my $group_name = "group$group_id"; + my @symbols = split /,/, $g; + my $group_name = "group$group_id"; - foreach my $s (@symbols) { - $groups{$s} = $group_name; - push @symbols_search, $s unless grep /^$s$/, @symbols_search; - } + foreach my $s (@symbols) { + $groups{$s} = $group_name; + push @symbols_search, $s unless grep /^$s$/, @symbols_search; + } } @symbols_search = '.*' unless @symbols_search; -if ($log_file eq '-' || $log_file eq '') { - $rspamd_log = \*STDIN; - &ProcessLog(); +if ( $log_file eq '-' || $log_file eq '' ) { + $rspamd_log = \*STDIN; + &ProcessLog(); } elsif ( -d "$log_file" ) { - my $log_dir = "$log_file"; + my $log_dir = "$log_file"; - my @logs = &GetLogfilesList($log_dir); + my @logs = &GetLogfilesList($log_dir); - # Process logs - foreach (@logs) { - my $ext = (/[^.]+\.?([^.]*?)$/)[0]; - my $dc = $decompressor{$ext} || 'cat'; + # Process logs + foreach (@logs) { + my $ext = (/[^.]+\.?([^.]*?)$/)[0]; + my $dc = $decompressor{$ext} || 'cat'; - open( $rspamd_log, "-|", "$dc $log_dir/$_" ) - or die "cannot execute $dc $log_dir/$_ : $!"; + open( $rspamd_log, "-|", "$dc $log_dir/$_" ) + or die "cannot execute $dc $log_dir/$_ : $!"; - printf {interactive(*STDERR)} "\033[J Parsing log files: [%d/%d] %s\033[G", $log_file_num++, scalar @logs, $_; - $spinner_update_time = 0; # Force spinner update - &spinner; + printf { interactive(*STDERR) } "\033[J Parsing log files: [%d/%d] %s\033[G", $log_file_num++, scalar @logs, + $_; + $spinner_update_time = 0; # Force spinner update + &spinner; - &ProcessLog; + &ProcessLog; - close($rspamd_log) - or warn "cannot close $dc $log_dir/$_: $!"; - } - print {interactive(*STDERR)} "\033[J\033[G"; # Progress indicator clean-up + close($rspamd_log) + or warn "cannot close $dc $log_dir/$_: $!"; + } + print { interactive(*STDERR) } "\033[J\033[G"; # Progress indicator clean-up } else { - my $ext = ($log_file =~ /[^.]+\.?([^.]*?)$/)[0]; - my $dc = $decompressor{$ext} || 'cat'; - open( $rspamd_log, "-|", "$dc $log_file" ) - or die "cannot execute $dc $log_file : $!"; - $spinner_update_time = 0; # Force spinner update - &spinner; - &ProcessLog(); + my $ext = ( $log_file =~ /[^.]+\.?([^.]*?)$/ )[0]; + my $dc = $decompressor{$ext} || 'cat'; + open( $rspamd_log, "-|", "$dc $log_file" ) + or die "cannot execute $dc $log_file : $!"; + $spinner_update_time = 0; # Force spinner update + &spinner; + &ProcessLog(); } -my $total_ham = $total - ($total_spam + $total_junk); +my $total_ham = $total - ( $total_spam + $total_junk ); if ($json) { - print "{"; - &Summary(); - print '"symbols":{'; - &SymbolsStat(); - print "}}\n"; + print "{"; + &Summary(); + print '"symbols":{'; + &SymbolsStat(); + print "}}\n"; } else { - &SymbolsStat(); - &Summary(); + &SymbolsStat(); + &Summary(); } exit; sub IsIgnored { - my ($sym) = @_; + my ($sym) = @_; - foreach my $ex (@symbols_ignored) { - if ($sym =~ /^$ex$/) { - return 1; + foreach my $ex (@symbols_ignored) { + if ( $sym =~ /^$ex$/ ) { + return 1; + } } - } - return 0; + return 0; } sub GenRelated { - my ($htb, $target_sym) = @_; - - my @result; - my $i = 0; - foreach my $sym (sort { $htb->{$b} <=> $htb->{$a} } keys %{$htb}) { - if ($sym ne $target_sym) { - my @elt = ($sym, $htb->{$sym}); - push @result, \@elt; - $i ++; - } + my ( $htb, $target_sym ) = @_; + + my @result; + my $i = 0; + foreach my $sym ( sort { $htb->{$b} <=> $htb->{$a} } keys %{$htb} ) { + if ( $sym ne $target_sym ) { + my @elt = ( $sym, $htb->{$sym} ); + push @result, \@elt; + $i++; + } - last if $i > $nrelated; - } + last if $i > $nrelated; + } - return \@result; + return \@result; } sub StringifyRelated { - my ($ar, $total) = @_; - return join("\n", (map { sprintf "\t%s(%s: %.1f%%)", - $_->[0], $_->[1], $_->[1] / ($total * 1.0) * 100.0 } @{$ar})); + my ( $ar, $total ) = @_; + return + join( "\n", ( map { sprintf "\t%s(%s: %.1f%%)", $_->[0], $_->[1], $_->[1] / ( $total * 1.0 ) * 100.0 } @{$ar} ) ); } sub SymbolsStat { - if ($total > 0) { - my $has_comma = 0; - while (my ($s, $r) = each(%sym_res)) { - if ($r->{hits} > 0) { - my $th = $r->{hits}; - my $sh = $r->{spam_hits}; - my $jh = $r->{junk_hits}; - my $hh = $r->{hits} - $sh - $jh; - my $htp = $hh * 100.0 / $total_ham if $total_ham != 0; - my $stp = $sh * 100.0 / $total_spam if $total_spam != 0; - my $jtp = $jh * 100.0 / $total_junk if $total_junk != 0; - - if ($json) { - if ($has_comma) { - print ","; - } - else { - $has_comma = 1; - } - print "\"$s\":{"; - JsonObjectElt("avg_weight", $r->{'weight'},"%.4f"); - print ","; - JsonObjectElt("hits", $th, "%d"); - print ","; - JsonObjectElt("hits_percentage", $th/$total, "%.4f"); - print ","; - JsonObjectElt("spam_hits", $sh, "%d"); - print ","; - JsonObjectElt("spam_to_total", $sh/$th, "%.4f"); - print ","; - JsonObjectElt("spam_percentage", $stp/100.0 || 0, "%.4f"); - print ","; - JsonObjectElt("ham_hits", $hh, "%d"); - print ","; - JsonObjectElt("ham_to_total", $hh/$th, "%.4f"); - print ","; - JsonObjectElt("ham_percentage", $htp/100.0 || 0, "%.4f"); - print ","; - JsonObjectElt("junk_hits", $jh, "%d"); - print ","; - JsonObjectElt("junk_to_total", $jh/$th, "%.4f"); - print ","; - JsonObjectElt("junk_percentage", $jtp/100.0 || 0, "%.4f"); - } - else { - printf "%s avg. weight %.3f, hits %d(%.3f%%): + if ( $total > 0 ) { + my $has_comma = 0; + while ( my ( $s, $r ) = each(%sym_res) ) { + if ( $r->{hits} > 0 ) { + my $th = $r->{hits}; + my $sh = $r->{spam_hits}; + my $jh = $r->{junk_hits}; + my $hh = $r->{hits} - $sh - $jh; + my $htp = $hh * 100.0 / $total_ham if $total_ham != 0; + my $stp = $sh * 100.0 / $total_spam if $total_spam != 0; + my $jtp = $jh * 100.0 / $total_junk if $total_junk != 0; + + if ($json) { + if ($has_comma) { + print ","; + } + else { + $has_comma = 1; + } + print "\"$s\":{"; + JsonObjectElt( "avg_weight", $r->{'weight'}, "%.4f" ); + print ","; + JsonObjectElt( "hits", $th, "%d" ); + print ","; + JsonObjectElt( "hits_percentage", $th / $total, "%.4f" ); + print ","; + JsonObjectElt( "spam_hits", $sh, "%d" ); + print ","; + JsonObjectElt( "spam_to_total", $sh / $th, "%.4f" ); + print ","; + JsonObjectElt( "spam_percentage", $stp / 100.0 || 0, "%.4f" ); + print ","; + JsonObjectElt( "ham_hits", $hh, "%d" ); + print ","; + JsonObjectElt( "ham_to_total", $hh / $th, "%.4f" ); + print ","; + JsonObjectElt( "ham_percentage", $htp / 100.0 || 0, "%.4f" ); + print ","; + JsonObjectElt( "junk_hits", $jh, "%d" ); + print ","; + JsonObjectElt( "junk_to_total", $jh / $th, "%.4f" ); + print ","; + JsonObjectElt( "junk_percentage", $jtp / 100.0 || 0, "%.4f" ); + } + else { + printf "%s avg. weight %.3f, hits %d(%.3f%%): Ham %7.3f%%, %6d/%-6d (%7.3f%%) Spam %7.3f%%, %6d/%-6d (%7.3f%%) Junk %7.3f%%, %6d/%-6d (%7.3f%%) -", - $s, $r->{weight} / $r->{hits}, $th, ($th / $total * 100), - ($hh / $th * 100), $hh, $total_ham, ($htp or 0), - ($sh / $th * 100), $sh, $total_spam, ($stp or 0), - ($jh / $th * 100), $jh, $total_junk, ($jtp or 0); - } +", $s, $r->{weight} / $r->{hits}, $th, ( $th / $total * 100 ), + ( $hh / $th * 100 ), $hh, $total_ham, ( $htp or 0 ), + ( $sh / $th * 100 ), $sh, $total_spam, ( $stp or 0 ), + ( $jh / $th * 100 ), $jh, $total_junk, ( $jtp or 0 ); + } - my $schp = $r->{spam_change} / $total_spam * 100.0 if $total_spam; - my $jchp = $r->{junk_change} / $total_junk * 100.0 if $total_junk; + my $schp = $r->{spam_change} / $total_spam * 100.0 if $total_spam; + my $jchp = $r->{junk_change} / $total_junk * 100.0 if $total_junk; - if ($r->{weight} != 0) { - if (!$json) { - if ($r->{weight} > 0) { - printf " + if ( $r->{weight} != 0 ) { + if ( !$json ) { + if ( $r->{weight} > 0 ) { + printf " Spam changes (ham/junk -> spam): %6d/%-6d (%7.3f%%) Spam changes / total spam hits: %6d/%-6d (%7.3f%%) Junk changes (ham -> junk): %6d/%-6d (%7.3f%%) Junk changes / total junk hits: %6d/%-6d (%7.3f%%) ", - $r->{spam_change}, $th, ($r->{spam_change} / $th * 100), - $r->{spam_change}, $total_spam, ($schp or 0), - $r->{junk_change}, $th, ($r->{junk_change} / $th * 100), - $r->{junk_change}, $total_junk, ($jchp or 0); - } - else { - printf " + $r->{spam_change}, $th, ( $r->{spam_change} / $th * 100 ), + $r->{spam_change}, $total_spam, ( $schp or 0 ), + $r->{junk_change}, $th, ( $r->{junk_change} / $th * 100 ), + $r->{junk_change}, $total_junk, ( $jchp or 0 ); + } + else { + printf " Spam changes (spam -> junk/ham): %6d/%-6d (%7.3f%%) Spam changes / total spam hits : %6d/%-6d (%7.3f%%) Junk changes (junk -> ham) : %6d/%-6d (%7.3f%%) Junk changes / total junk hits : %6d/%-6d (%7.3f%%) ", - $r->{spam_change}, $th, ($r->{spam_change} / $th * 100), - $r->{spam_change}, $total_spam, ($schp or 0), - $r->{junk_change}, $th, ($r->{junk_change} / $th * 100), - $r->{junk_change}, $total_junk, ($jchp or 0); - } - } - else { - print ","; - JsonObjectElt("spam_change", $r->{spam_change}, "%.4f"); - print ","; - JsonObjectElt("junk_change", $r->{junk_change}, "%.4f"); - } - } - - if ($correlations) { - - my $spam_related = GenRelated($r->{symbols_met_spam}, $s); - my $junk_related = GenRelated($r->{symbols_met_junk}, $s); - my $ham_related = GenRelated($r->{symbols_met_ham}, $s); - - if (!$json) { - print "Correlations report:\n"; - - while (my ($cs, $hits) = each %{$r->{corr}}) { - my $corr_prob = $r->{'hits'} / $total; - my $merged_hits = 0; - if($r->{symbols_met_spam}->{$cs}) { - $merged_hits += $r->{symbols_met_spam}->{$cs}; - } - if($r->{symbols_met_junk}->{$cs}) { - $merged_hits += $r->{symbols_met_junk}->{$cs}; - } - if($r->{symbols_met_ham}->{$cs}) { - $merged_hits += $r->{symbols_met_ham}->{$cs}; - } - - if ($merged_hits > 0) { - printf "Probability of %s when %s fires: %.3f\n", $cs, $s, - (($merged_hits / $total) / $corr_prob); - } - } + $r->{spam_change}, $th, ( $r->{spam_change} / $th * 100 ), + $r->{spam_change}, $total_spam, ( $schp or 0 ), + $r->{junk_change}, $th, ( $r->{junk_change} / $th * 100 ), + $r->{junk_change}, $total_junk, ( $jchp or 0 ); + } + } + else { + print ","; + JsonObjectElt( "spam_change", $r->{spam_change}, "%.4f" ); + print ","; + JsonObjectElt( "junk_change", $r->{junk_change}, "%.4f" ); + } + } - print "Related symbols report:\n"; - printf "Top related in spam:\n %s\n", StringifyRelated($spam_related, - $r->{spam_hits}); - printf "Top related in junk:\n %s\n", StringifyRelated($junk_related, - $r->{junk_hits}); - printf "Top related in ham:\n %s\n", StringifyRelated($ham_related, - $r->{hits} - $r->{spam_hits} - $r->{junk_hits}); - } - else { - print ","; - print "\"correllations\":{"; + if ($correlations) { + + my $spam_related = GenRelated( $r->{symbols_met_spam}, $s ); + my $junk_related = GenRelated( $r->{symbols_met_junk}, $s ); + my $ham_related = GenRelated( $r->{symbols_met_ham}, $s ); + + if ( !$json ) { + print "Correlations report:\n"; + + while ( my ( $cs, $hits ) = each %{ $r->{corr} } ) { + my $corr_prob = $r->{'hits'} / $total; + my $merged_hits = 0; + if ( $r->{symbols_met_spam}->{$cs} ) { + $merged_hits += $r->{symbols_met_spam}->{$cs}; + } + if ( $r->{symbols_met_junk}->{$cs} ) { + $merged_hits += $r->{symbols_met_junk}->{$cs}; + } + if ( $r->{symbols_met_ham}->{$cs} ) { + $merged_hits += $r->{symbols_met_ham}->{$cs}; + } + + if ( $merged_hits > 0 ) { + printf "Probability of %s when %s fires: %.3f\n", $cs, $s, + ( ( $merged_hits / $total ) / $corr_prob ); + } + } + + print "Related symbols report:\n"; + printf "Top related in spam:\n %s\n", StringifyRelated( $spam_related, $r->{spam_hits} ); + printf "Top related in junk:\n %s\n", StringifyRelated( $junk_related, $r->{junk_hits} ); + printf "Top related in ham:\n %s\n", + StringifyRelated( $ham_related, $r->{hits} - $r->{spam_hits} - $r->{junk_hits} ); + } + else { + print ","; + print "\"correllations\":{"; + + my $has_comma_ = 0; + while ( my ( $cs, $hits ) = each %{ $r->{corr} } ) { + if ($has_comma_) { + print ","; + } + else { + $has_comma_ = 1; + } + my $corr_prob = $hits / $total; + my $sym_prob = $r->{hits} / $total; + JsonObjectElt( $cs, ( $corr_prob / $sym_prob ), "%.4f" ); + } + + print "}"; + } + } - my $has_comma_ = 0; - while (my ($cs, $hits) = each %{$r->{corr}}) { - if ($has_comma_) { - print ","; - } - else { - $has_comma_ = 1; - } - my $corr_prob = $hits / $total; - my $sym_prob = $r->{hits} / $total; - JsonObjectElt($cs, ($corr_prob / $sym_prob) ,"%.4f"); + print "}" if $json; + } + else { + print "Symbol $s has not been met\n" if !$json; } - print "}"; - } + print '-' x 80 . "\n" if !$json; } - - print "}" if $json; - } - else { - print "Symbol $s has not been met\n" if !$json; - } - - print '-' x 80 . "\n" if !$json; } - } } sub Summary() { - if (!$json) { - print " + if ( !$json ) { + print " === Summary ", '=' x 68, " Messages scanned: $total"; - printf " [ %s / %s ] + printf " [ %s / %s ] ", $timeStamp{'start'}, $timeStamp{'end'} - if defined $timeStamp{'start'}; - say ''; - printf "%11s: %6.2f%%, %d\n", $_, 100 * $action{$_} / $total, $action{$_} - for sort keys %action; - say ''; - printf "scan time min/avg/max = %.2f/%.2f/%.2f s -", $scanTime{'min'} / 1000, - ($total) ? $scanTime{'total'} / $total / 1000 : undef, - $scanTime{'max'} / 1000 - if exists $scanTime{'min'}; - say '=' x 80; - } - else { - JsonObjectElt("total", $total, "%d"); - print ","; - - if (defined $timeStamp{'start'}) { - JsonObjectElt("start", $timeStamp{'start'}); - print ","; + if defined $timeStamp{'start'}; + say ''; + printf "%11s: %6.2f%%, %d\n", $_, 100 * $action{$_} / $total, $action{$_} for sort keys %action; + say ''; + printf "scan time min/avg/max = %.2f/%.2f/%.2f s +", $scanTime{'min'} / 1000, ($total) ? $scanTime{'total'} / $total / 1000 : undef, $scanTime{'max'} / 1000 + if exists $scanTime{'min'}; + say '=' x 80; } + else { + JsonObjectElt( "total", $total, "%d" ); + print ","; - if (defined $timeStamp{'end'}) { - JsonObjectElt("end", $timeStamp{'end'}); - print ","; - } + if ( defined $timeStamp{'start'} ) { + JsonObjectElt( "start", $timeStamp{'start'} ); + print ","; + } - print "\"actions\":{"; + if ( defined $timeStamp{'end'} ) { + JsonObjectElt( "end", $timeStamp{'end'} ); + print ","; + } - my $has_comma = 0; - foreach my $a (sort keys %action) { - if ($has_comma) { - print ","; - } - else { - $has_comma = 1; - } - JsonObjectElt($a, $action{$a}, "%d"); + print "\"actions\":{"; + + my $has_comma = 0; + foreach my $a ( sort keys %action ) { + if ($has_comma) { + print ","; + } + else { + $has_comma = 1; + } + JsonObjectElt( $a, $action{$a}, "%d" ); + } + print "},"; } - print "},"; - } } sub ProcessRelated { - my ($symbols, $target, $source) = @_; + my ( $symbols, $target, $source ) = @_; - foreach my $s (@{$symbols}) { - $s =~ /^([^\(]+)(\(([^\)]+)\))?/; - my $sym_name = $1; - my $sym_score = 0; + foreach my $s ( @{$symbols} ) { + $s =~ /^([^\(]+)(\(([^\)]+)\))?/; + my $sym_name = $1; + my $sym_score = 0; - if ($groups{$sym_name}) { - $sym_name = $groups{$sym_name}; - } + if ( $groups{$sym_name} ) { + $sym_name = $groups{$sym_name}; + } - next if ($source eq $sym_name); + next if ( $source eq $sym_name ); - next if IsIgnored($sym_name); + next if IsIgnored($sym_name); - if ($2) { - $sym_score = $3 * 1.0; + if ($2) { + $sym_score = $3 * 1.0; - if (abs($sym_score) < $diff_alpha) { - next; - } + if ( abs($sym_score) < $diff_alpha ) { + next; + } + + my $bm = $bidir_match{$sym_name}; + if ($bm) { + if ( $sym_score >= 0 ) { + $sym_name = $bm->{'spam'}; + } + else { + $sym_name = $bm->{'ham'}; + } + } + } - my $bm = $bidir_match{$sym_name}; - if ($bm) { - if ($sym_score >= 0) { - $sym_name = $bm->{'spam'}; + if ( exists( $target->{$sym_name} ) ) { + $target->{$sym_name}++; } else { - $sym_name = $bm->{'ham'}; + $target->{$sym_name} = 1; } - } - } - - if (exists($target->{$sym_name})) { - $target->{$sym_name} ++; - } - else { - $target->{$sym_name} = 1; } - } } sub ProcessLog { - my ( $ts_format, @line ) = &log_time_format($rspamd_log); + my ( $ts_format, @line ) = &log_time_format($rspamd_log); - while() { - last if eof $rspamd_log; - $_ = (@line) ? shift @line : <$rspamd_log>; + while () { + last if eof $rspamd_log; + $_ = (@line) ? shift @line : <$rspamd_log>; - if (!$enabled && ($search_pattern eq "" || /$search_pattern/)) { - $enabled = 1; - } - - next if !$enabled; - - if (/^.*rspamd_task_write_log.*$/) { - &spinner; - my $ts; - if ( $ts_format eq 'syslog' ) { - $ts = syslog2iso( join ' ', ( split /\s+/ )[ 0 .. 2 ] ); - } elsif ( $ts_format eq 'syslog5424' ) { - /^([0-9-]+)T([0-9:]+)/; - $ts = "$1 $2"; - } else { - $ts = join ' ', ( split /\s+/ )[ 0 .. 1 ]; - } - - next if ( $ts lt $startTime ); - next if ( defined $endTime && $ts gt $endTime ); - - if ($_ !~ /\(([^()]+)\): \[(NaN|-?\d+(?:\.\d+)?)\/(-?\d+(?:\.\d+)?)\]\s+\[([^\]]+)\].+? time: (\d+\.\d+)ms real/) { - #print "BAD: $_\n"; - next; - } - - my @symbols = split /(?:\{[^}]*\})?(?:$|,)/, $4; - my $scan_time = $5; - my $act = $1; - my $score = $2 * 1.0; - my $skip = 0; - - foreach my $ex (@symbols_exclude) { - my @found = grep {/^$ex/} @symbols; - - if (scalar(@found) > 0) { - $skip = 1; - last; + if ( !$enabled && ( $search_pattern eq "" || /$search_pattern/ ) ) { + $enabled = 1; } - } - - next if ( $skip != 0 ); - - if (defined($timeStamp{'end'})) { - $timeStamp{'end'} = $ts if ( $ts gt $timeStamp{'end'} ); - } - else { - $timeStamp{'end'} = $ts; - } - - if (defined($timeStamp{'start'})) { - $timeStamp{'start'} = $ts if ( $ts lt $timeStamp{'start'} ); - } - else { - $timeStamp{'start'} = $ts; - } - - $scanTime{'min'} = $scan_time if ( !exists $scanTime{'min'} || $scanTime{'min'} > $scan_time ); - $scanTime{'max'} = $scan_time if ( $scanTime{'max'} < $scan_time ); - $scanTime{'total'} += $scan_time; - - $action{$act}++; - $total ++; - if ($score >= $reject_score) { - $total_spam ++; - } - elsif ($score >= $junk_score) { - $total_junk ++; - } + next if !$enabled; - my @sym_names; - - foreach my $s (@symbols_search) { - my @selected = grep /$s/, @symbols; + if (/^.*rspamd_task_write_log.*$/) { + &spinner; + my $ts; + if ( $ts_format eq 'syslog' ) { + $ts = syslog2iso( join ' ', ( split /\s+/ )[ 0 .. 2 ] ); + } + elsif ( $ts_format eq 'syslog5424' ) { + /^([0-9-]+)T([0-9:]+)/; + $ts = "$1 $2"; + } + else { + $ts = join ' ', ( split /\s+/ )[ 0 .. 1 ]; + } - if (scalar(@selected) > 0) { + next if ( $ts lt $startTime ); + next if ( defined $endTime && $ts gt $endTime ); - foreach my $sym (@selected) { - $sym =~ /^([^\(]+)(\(([^\)]+)\))?/; - my $sym_name = $1; - my $sym_score = 0; - my $orig_name = $sym_name; + if ( $_ !~ + /\(([^()]+)\): \[(NaN|-?\d+(?:\.\d+)?)\/(-?\d+(?:\.\d+)?)\]\s+\[([^\]]+)\].+? time: (\d+\.\d+)ms real/ ) + { + #print "BAD: $_\n"; + next; + } - if ($2) { - $sym_score = $3 * 1.0; + my @symbols = split /(?:\{[^}]*\})?(?:$|,)/, $4; + my $scan_time = $5; + my $act = $1; + my $score = $2 * 1.0; + my $skip = 0; - if (abs($sym_score) < $diff_alpha) { - next; - } + foreach my $ex (@symbols_exclude) { + my @found = grep { /^$ex/ } @symbols; - my $bm = $bidir_match{$sym_name}; - if ($bm) { - if ($sym_score >= 0) { - $sym_name = $bm->{'spam'}; - } - else { - $sym_name = $bm->{'ham'}; + if ( scalar(@found) > 0 ) { + $skip = 1; + last; } - } } - next if $orig_name !~ /^$s/; + next if ( $skip != 0 ); - if ($groups{$s}) { - # Replace with group - $sym_name = $groups{$s}; + if ( defined( $timeStamp{'end'} ) ) { + $timeStamp{'end'} = $ts if ( $ts gt $timeStamp{'end'} ); + } + else { + $timeStamp{'end'} = $ts; } - push @sym_names, $sym_name; - - if (!$sym_res{$sym_name}) { - $sym_res{$sym_name} = { - hits => 0, - spam_hits => 0, - junk_hits => 0, - spam_change => 0, - junk_change => 0, - weight => 0, - corr => {}, - symbols_met_spam => {}, - symbols_met_ham => {}, - symbols_met_junk => {}, - }; + if ( defined( $timeStamp{'start'} ) ) { + $timeStamp{'start'} = $ts if ( $ts lt $timeStamp{'start'} ); + } + else { + $timeStamp{'start'} = $ts; } - my $r = $sym_res{$sym_name}; + $scanTime{'min'} = $scan_time if ( !exists $scanTime{'min'} || $scanTime{'min'} > $scan_time ); + $scanTime{'max'} = $scan_time if ( $scanTime{'max'} < $scan_time ); + $scanTime{'total'} += $scan_time; - $r->{hits} ++; - $r->{weight} += $sym_score; - my $is_spam = 0; - my $is_junk = 0; + $action{$act}++; + $total++; - if ($score >= $reject_score) { - $is_spam = 1; - $r->{spam_hits} ++; - if ($correlations) { - ProcessRelated(\@symbols, $r->{symbols_met_spam}, $sym_name); - } + if ( $score >= $reject_score ) { + $total_spam++; } - elsif ($score >= $junk_score) { - $is_junk = 1; - $r->{junk_hits} ++; - if ($correlations) { - ProcessRelated(\@symbols, $r->{symbols_met_junk}, $sym_name); - } - } - else { - if ($correlations) { - ProcessRelated(\@symbols, $r->{symbols_met_ham}, $sym_name); - } + elsif ( $score >= $junk_score ) { + $total_junk++; } - if ($sym_score != 0) { - my $score_without = $score - $sym_score; - - if ($sym_score > 0) { - if ($is_spam && $score_without < $reject_score) { - $r->{spam_change} ++; - } - if ($is_junk && $score_without < $junk_score) { - $r->{junk_change} ++; + my @sym_names; + + foreach my $s (@symbols_search) { + my @selected = grep /$s/, @symbols; + + if ( scalar(@selected) > 0 ) { + + foreach my $sym (@selected) { + $sym =~ /^([^\(]+)(\(([^\)]+)\))?/; + my $sym_name = $1; + my $sym_score = 0; + my $orig_name = $sym_name; + + if ($2) { + $sym_score = $3 * 1.0; + + if ( abs($sym_score) < $diff_alpha ) { + next; + } + + my $bm = $bidir_match{$sym_name}; + if ($bm) { + if ( $sym_score >= 0 ) { + $sym_name = $bm->{'spam'}; + } + else { + $sym_name = $bm->{'ham'}; + } + } + } + + next if $orig_name !~ /^$s/; + + if ( $groups{$s} ) { + + # Replace with group + $sym_name = $groups{$s}; + } + + push @sym_names, $sym_name; + + if ( !$sym_res{$sym_name} ) { + $sym_res{$sym_name} = { + hits => 0, + spam_hits => 0, + junk_hits => 0, + spam_change => 0, + junk_change => 0, + weight => 0, + corr => {}, + symbols_met_spam => {}, + symbols_met_ham => {}, + symbols_met_junk => {}, + }; + } + + my $r = $sym_res{$sym_name}; + + $r->{hits}++; + $r->{weight} += $sym_score; + my $is_spam = 0; + my $is_junk = 0; + + if ( $score >= $reject_score ) { + $is_spam = 1; + $r->{spam_hits}++; + if ($correlations) { + ProcessRelated( \@symbols, $r->{symbols_met_spam}, $sym_name ); + } + } + elsif ( $score >= $junk_score ) { + $is_junk = 1; + $r->{junk_hits}++; + if ($correlations) { + ProcessRelated( \@symbols, $r->{symbols_met_junk}, $sym_name ); + } + } + else { + if ($correlations) { + ProcessRelated( \@symbols, $r->{symbols_met_ham}, $sym_name ); + } + } + + if ( $sym_score != 0 ) { + my $score_without = $score - $sym_score; + + if ( $sym_score > 0 ) { + if ( $is_spam && $score_without < $reject_score ) { + $r->{spam_change}++; + } + if ( $is_junk && $score_without < $junk_score ) { + $r->{junk_change}++; + } + } + else { + if ( !$is_spam && $score_without >= $reject_score ) { + $r->{spam_change}++; + } + if ( !$is_junk && $score_without >= $junk_score ) { + $r->{junk_change}++; + } + } + } + } # End foreach symbols selected } - } - else { - if (!$is_spam && $score_without >= $reject_score) { - $r->{spam_change} ++; - } - if (!$is_junk && $score_without >= $junk_score) { - $r->{junk_change} ++; - } - } } - } # End foreach symbols selected - } - } - - if ($correlations) { - foreach my $sym (@sym_names) { - next if IsIgnored($sym); - my $r = $sym_res{$sym}; - - foreach my $corr_sym (@sym_names) { - if ($corr_sym ne $sym) { - if ($r->{'corr'}->{$corr_sym}) { - $r->{'corr'}->{$corr_sym} ++; - } - else { - $r->{'corr'}->{$corr_sym} = 1; - } + + if ($correlations) { + foreach my $sym (@sym_names) { + next if IsIgnored($sym); + my $r = $sym_res{$sym}; + + foreach my $corr_sym (@sym_names) { + if ( $corr_sym ne $sym ) { + if ( $r->{'corr'}->{$corr_sym} ) { + $r->{'corr'}->{$corr_sym}++; + } + else { + $r->{'corr'}->{$corr_sym} = 1; + } + } + } + } # End of correlations check } - } - } # End of correlations check - } + } } - } } sub JsonObjectElt() { - my ($k, $v) = @_; - my $f = defined $_[2] ? $_[2] : '%s'; + my ( $k, $v ) = @_; + my $f = defined $_[2] ? $_[2] : '%s'; - if ($f eq "%s") { - $f = "\"%s\""; - } + if ( $f eq "%s" ) { + $f = "\"%s\""; + } - printf "\"%s\":$f", $k, $v; + printf "\"%s\":$f", $k, $v; } sub GetLogfilesList { - my ($dir) = @_; - opendir( DIR, $dir ) or die $!; + my ($dir) = @_; + opendir( DIR, $dir ) or die $!; - my $pattern = join( '|', keys %decompressor ); - my $re = qr/\.[0-9]+(?:\.(?:$pattern))?/; + my $pattern = join( '|', keys %decompressor ); + my $re = qr/\.[0-9]+(?:\.(?:$pattern))?/; - # Add unnumbered logs first - my @logs = - grep { -f "$dir/$_" && !/$re/ } readdir(DIR); + # Add unnumbered logs first + my @logs = + grep { -f "$dir/$_" && !/$re/ } readdir(DIR); - # Add numbered logs - rewinddir(DIR); - push( @logs, - ( sort numeric ( grep { -f "$dir/$_" && /$re/ } readdir(DIR) ) ) ); + # Add numbered logs + rewinddir(DIR); + push( @logs, ( sort numeric ( grep { -f "$dir/$_" && /$re/ } readdir(DIR) ) ) ); - closedir(DIR); + closedir(DIR); - # Select required logs and revers their order - @logs = - reverse - splice( @logs, $exclude_logs, $num_logs ||= @logs - $exclude_logs ); + # Select required logs and revers their order + @logs = + reverse splice( @logs, $exclude_logs, $num_logs ||= @logs - $exclude_logs ); - # Loop through array printing out filenames - print {interactive(*STDERR)} "\nLog files to process:\n"; - foreach my $file (@logs) { - print {interactive(*STDERR)} " $file\n"; - } - print {interactive(*STDERR)} "\n"; + # Loop through array printing out filenames + print { interactive(*STDERR) } "\nLog files to process:\n"; + foreach my $file (@logs) { + print { interactive(*STDERR) } " $file\n"; + } + print { interactive(*STDERR) } "\n"; - return @logs; + return @logs; } sub log_time_format { - my $fh = shift; - my ( $format, $line ); - while (<$fh>) { - $line = $_; - - # 2017-08-08 00:00:01 #66984( - # 2017-08-08 00:00:01.001 #66984( - if (/^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(\.\d{3})? #\d+\(/) { - $format = 'rspamd'; - last; - } + my $fh = shift; + my ( $format, $line ); + while (<$fh>) { + $line = $_; + + # 2017-08-08 00:00:01 #66984( + # 2017-08-08 00:00:01.001 #66984( + if (/^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(\.\d{3})? #\d+\(/) { + $format = 'rspamd'; + last; + } - # Aug 8 00:02:50 #66986( - elsif (/^\w{3} (?:\s?\d|\d\d) \d\d:\d\d:\d\d #\d+\(/) { - $format = 'syslog'; - last; - } + # Aug 8 00:02:50 #66986( + elsif (/^\w{3} (?:\s?\d|\d\d) \d\d:\d\d:\d\d #\d+\(/) { + $format = 'syslog'; + last; + } - # Aug 8 00:02:50 hostname rspamd[66986] - elsif (/^\w{3} (?:\s?\d|\d\d) \d\d:\d\d:\d\d \S+ rspamd\[\d+\]/) { - $format = 'syslog'; - last; - } + # Aug 8 00:02:50 hostname rspamd[66986] + elsif (/^\w{3} (?:\s?\d|\d\d) \d\d:\d\d:\d\d \S+ rspamd\[\d+\]/) { + $format = 'syslog'; + last; + } - # 2018-04-16T06:25:46.012590+02:00 rspamd rspamd[12968] - elsif(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,6})?(Z|[-+]\d{2}:\d{2}) \S+ rspamd\[\d+\]/) { - $format = 'syslog5424'; - last; - } + # 2018-04-16T06:25:46.012590+02:00 rspamd rspamd[12968] + elsif (/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,6})?(Z|[-+]\d{2}:\d{2}) \S+ rspamd\[\d+\]/) { + $format = 'syslog5424'; + last; + } - # Skip newsyslog messages - # Aug 8 00:00:00 hostname newsyslog[63284]: logfile turned over - elsif ( /^\w{3} (?:\s?\d|\d\d) \d\d:\d\d:\d\d\ \S+ newsyslog\[\d+\]: logfile turned over$/ ) { - next; - } - # Skip journalctl messages - # -- Logs begin at Mon 2018-01-15 11:16:24 MSK, end at Fri 2018-04-27 09:10:30 MSK. -- - elsif ( /^-- Logs begin at \w{3} \d{4}-\d\d-\d\d \d\d:\d\d:\d\d [A-Z]{3}, end at \w{3} \d{4}-\d\d-\d\d \d\d:\d\d:\d\d [A-Z]{3}\. --$/ ) { - next; - } - else { - print "Unknown log format\n"; - exit 1; + # Skip newsyslog messages + # Aug 8 00:00:00 hostname newsyslog[63284]: logfile turned over + elsif (/^\w{3} (?:\s?\d|\d\d) \d\d:\d\d:\d\d\ \S+ newsyslog\[\d+\]: logfile turned over$/) { + next; + } + + # Skip journalctl messages + # -- Logs begin at Mon 2018-01-15 11:16:24 MSK, end at Fri 2018-04-27 09:10:30 MSK. -- + elsif ( +/^-- Logs begin at \w{3} \d{4}-\d\d-\d\d \d\d:\d\d:\d\d [A-Z]{3}, end at \w{3} \d{4}-\d\d-\d\d \d\d:\d\d:\d\d [A-Z]{3}\. --$/ + ) + { + next; + } + else { + print "Unknown log format\n"; + exit 1; + } } - } - return ( $format, $line ); + return ( $format, $line ); } sub normalized_time { - return undef - if !defined( $_ = shift ); + return undef + if !defined( $_ = shift ); - /^\d\d(?::\d\d){0,2}$/ - ? sprintf '%04d-%02d-%02d %s', 1900 + (localtime)[5], 1 + (localtime)[4], - (localtime)[3], $_ - : $_; + /^\d\d(?::\d\d){0,2}$/ + ? sprintf '%04d-%02d-%02d %s', 1900 + (localtime)[5], 1 + (localtime)[4], (localtime)[3], $_ + : $_; } sub numeric { - $a =~ /\.(\d+)\./; - my $a_num = $1; - $b =~ /\.(\d+)\./; - my $b_num = $1; + $a =~ /\.(\d+)\./; + my $a_num = $1; + $b =~ /\.(\d+)\./; + my $b_num = $1; - $a_num <=> $b_num; + $a_num <=> $b_num; } sub spinner { @@ -780,7 +780,7 @@ sub spinner { return if ( ( time - $spinner_update_time ) < 1 ); $spinner_update_time = time; - printf {interactive(*STDERR)} "%s\r", $spinner[ $spinner_update_time % @spinner ]; + printf { interactive(*STDERR) } "%s\r", $spinner[ $spinner_update_time % @spinner ]; select()->flush(); } @@ -788,33 +788,30 @@ sub spinner { # using current year as syslog does not record the year (nor the timezone) # or the last year if the guessed time is in the future. sub syslog2iso { - my %month_map; - @month_map{qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)} = 0 .. 11; - - my ( $month, @t ) = - $_[0] =~ m/^(\w{3}) \s\s? (\d\d?) \s (\d\d):(\d\d):(\d\d)/x; - my $epoch = - timelocal( ( reverse @t ), $month_map{$month}, 1900 + (localtime)[5] ); - sprintf '%04d-%02d-%02d %02d:%02d:%02d', - 1900 + (localtime)[5] - ( $epoch > time ), - $month_map{$month} + 1, @t; + my %month_map; + @month_map{qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)} = 0 .. 11; + + my ( $month, @t ) = $_[0] =~ m/^(\w{3}) \s\s? (\d\d?) \s (\d\d):(\d\d):(\d\d)/x; + my $epoch = + timelocal( ( reverse @t ), $month_map{$month}, 1900 + (localtime)[5] ); + sprintf '%04d-%02d-%02d %02d:%02d:%02d', 1900 + (localtime)[5] - ( $epoch > time ), $month_map{$month} + 1, @t; } ### Imported from IO::Interactive 1.022 Perl module sub is_interactive { - my ($out_handle) = (@_, select); # Default to default output handle + my ($out_handle) = ( @_, select ); # Default to default output handle # Not interactive if output is not to terminal... return 0 if not -t $out_handle; # If *ARGV is opened, we're interactive if... - if ( tied(*ARGV) or defined(fileno(ARGV)) ) { # this is what 'Scalar::Util::openhandle *ARGV' boils down to + if ( tied(*ARGV) or defined( fileno(ARGV) ) ) { # this is what 'Scalar::Util::openhandle *ARGV' boils down to # ...it's currently opened to the magic '-' file return -t *STDIN if defined $ARGV && $ARGV eq '-'; # ...it's at end-of-file and the next file is the magic '-' file - return @ARGV>0 && $ARGV[0] eq '-' && -t *STDIN if eof *ARGV; + return @ARGV > 0 && $ARGV[0] eq '-' && -t *STDIN if eof *ARGV; # ...it's directly attached to the terminal return -t *ARGV; @@ -828,17 +825,18 @@ sub is_interactive { } ### Imported from IO::Interactive 1.022 Perl module -local (*DEV_NULL, *DEV_NULL2); +local ( *DEV_NULL, *DEV_NULL2 ); my $dev_null; + BEGIN { pipe *DEV_NULL, *DEV_NULL2 - or die "Internal error: can't create null filehandle"; + or die "Internal error: can't create null filehandle"; $dev_null = \*DEV_NULL; } ### Imported from IO::Interactive 1.022 Perl module sub interactive { - my ($out_handle) = (@_, \*STDOUT); # Default to STDOUT + my ($out_handle) = ( @_, \*STDOUT ); # Default to STDOUT return &is_interactive ? $out_handle : $dev_null; } |