From 7d3b279aa3e78c72e8c1a8f022ac0cd84293df5b Mon Sep 17 00:00:00 2001 From: Alexander Moisseev Date: Thu, 11 Aug 2016 10:11:10 +0300 Subject: [PATCH] [Feature] rspamd_stats: support log directory reading rspamd_stats will read (and decompress) multiple log files in the specified directory. --- utils/rspamd_stats.pl | 94 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/utils/rspamd_stats.pl b/utils/rspamd_stats.pl index a12c8a3c3..0d9571b42 100644 --- a/utils/rspamd_stats.pl +++ b/utils/rspamd_stats.pl @@ -13,9 +13,18 @@ my $diff_alpha = 0.1; my $correlations = 0; my $log_file = ""; my $search_pattern = ""; +my $num_logs; +my $exclude_logs = 0; my $man = 0; my $help = 0; +# Associate file extensions with decompressors +my %decompressor = ( + 'bz2' => 'bzcat', + 'gz' => 'zcat', + 'xz' => 'xzcat', +); + GetOptions( "reject-score|r=f" => \$reject_score, "junk-score|j=f" => \$junk_score, @@ -24,6 +33,8 @@ GetOptions( "alpha|a=f" => \$diff_alpha, "correlations|c" => \$correlations, "search-pattern=s" => \$search_pattern, + "num-logs|n=i" => \$num_logs, + "exclude-logs|x=i" => \$exclude_logs, "help|?" => \$help, "man" => \$man ) or pod2usage(2); @@ -51,6 +62,25 @@ if ($log_file eq '-' || $log_file eq '') { $rspamd_log = \*STDIN; &ProcessLog(); } +elsif ( -d "$log_file" ) { + my $log_dir = "$log_file"; + + my @logs = &GetLogfilesList($log_dir); + + # Process logs + foreach (@logs) { + my $ext = (/[^.]+\.?([^.]*?)$/)[0]; + my $dc = $decompressor{$ext} || 'cat'; + + open( $rspamd_log, "-|", "$dc $log_dir/$_" ) + or die "cannot execute $dc $log_dir/$_ : $!"; + + &ProcessLog; + + close($rspamd_log) + or warn "cannot close $dc $log_dir/$_: $!"; + } +} else { open($rspamd_log, '<', $log_file) or die "cannot open $log_file"; &ProcessLog(); @@ -128,6 +158,8 @@ Junk changes / total junk hits : %6d/%-6d (%7.3f%%) } } +exit; + sub ProcessLog { while(<$rspamd_log>) { if (!$enabled && ($search_pattern eq "" || /$search_pattern/)) { @@ -251,6 +283,48 @@ sub ProcessLog { } } +sub GetLogfilesList { + my ($dir) = @_; + opendir( DIR, $dir ) or die $!; + + my $pattern = join( '|', keys %decompressor ); + my $re = qr/\.[0-9]+(?:\.(?:$pattern))?/; + + # Add unnumbered logs first + my @logs = + grep { -f "$dir/$_" && !/$re/ } readdir(DIR); + + # Add numbered logs + rewinddir(DIR); + push( @logs, + ( sort numeric ( grep { -f "$dir/$_" && /$re/ } readdir(DIR) ) ) ); + + closedir(DIR); + + # Select required logs and revers their order + @logs = + reverse + splice( @logs, $exclude_logs, $num_logs ||= @logs - $exclude_logs ); + + # Loop through array printing out filenames + print "\nParsing log files:\n"; + foreach my $file (@logs) { + print " $file\n"; + } + print "\n"; + + return @logs; +} + +sub numeric { + $a =~ /\.(\d+)\./; + my $a_num = $1; + $b =~ /\.(\d+)\./; + my $b_num = $1; + + $a_num <=> $b_num; +} + __END__ =head1 NAME @@ -262,13 +336,15 @@ rspamd_stats - analyze Rspamd rules by parsing log files rspamd_stats [options] [--symbol=SYM1 [--symbol=SYM2...]] [--log file] Options: - --log=file log file to read (stdin by default) + --log=file log file or directory to read (stdin by default) --reject-score=score set reject threshold (15 by default) --junk-score=score set junk score (6.0 by default) --symbol=sym check specified symbol (perl regexps, '.*' by default) --alpha=value set ignore score for symbols (0.1 by default) --correlations enable correlations report --search-pattern do not process input unless the desired pattern is found + --num-logs=integer number of recent logfiles to analyze (all files in the directory by default) + --exclude-logs=integer number of latest logs to exclude (0 by default) --help brief help message --man full documentation @@ -278,7 +354,13 @@ rspamd_stats [options] [--symbol=SYM1 [--symbol=SYM2...]] [--log file] =item B<--log> -Specifies log file to read data from. +Specifies log file or directory to read data from. +If a directory is specified B analyses files in the directory +including known compressed file types. Number of log files can be limited using +B<--num-logs> and B<--exclude-logs> options. This assumes that files in the log +directory have B- or B-like name format with numeric +indexes. Files without indexes (generally it is merely one file) are considered +the most recent and files with lower indexes are considered newer. =item B<--reject-score> @@ -296,6 +378,14 @@ Specifies the minimum score for a symbol to be considered by this script. Add symbol or pattern (pcre format) to analyze. +=item B<--num-logs> + +If set, limits number of analyzed logfiles in the directory to the specified value. + +=item B<--exclude-logs> + +Number of latest logs to exclude (0 by default). + =item B<--correlations> Additionaly print correlation rate for each symbol displayed. This routine calculates merely paired correlations between symbols. -- 2.39.5