aboutsummaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorAlexander Moisseev <moiseev@mezonplus.ru>2016-08-11 10:11:10 +0300
committerAlexander Moisseev <moiseev@mezonplus.ru>2016-08-11 10:11:10 +0300
commit7d3b279aa3e78c72e8c1a8f022ac0cd84293df5b (patch)
treea2bf456281e73612a7d9a5e67f70db50ec045e59 /utils
parent68fc43a8f0f681ea36cf7e7fe89126c4f349ebca (diff)
downloadrspamd-7d3b279aa3e78c72e8c1a8f022ac0cd84293df5b.tar.gz
rspamd-7d3b279aa3e78c72e8c1a8f022ac0cd84293df5b.zip
[Feature] rspamd_stats: support log directory reading
rspamd_stats will read (and decompress) multiple log files in the specified directory.
Diffstat (limited to 'utils')
-rw-r--r--utils/rspamd_stats.pl94
1 files changed, 92 insertions, 2 deletions
diff --git a/utils/rspamd_stats.pl b/utils/rspamd_stats.pl
index a12c8a3c3..0d9571b42 100644
--- a/utils/rspamd_stats.pl
+++ b/utils/rspamd_stats.pl
@@ -13,9 +13,18 @@ my $diff_alpha = 0.1;
my $correlations = 0;
my $log_file = "";
my $search_pattern = "";
+my $num_logs;
+my $exclude_logs = 0;
my $man = 0;
my $help = 0;
+# Associate file extensions with decompressors
+my %decompressor = (
+ 'bz2' => 'bzcat',
+ 'gz' => 'zcat',
+ 'xz' => 'xzcat',
+);
+
GetOptions(
"reject-score|r=f" => \$reject_score,
"junk-score|j=f" => \$junk_score,
@@ -24,6 +33,8 @@ GetOptions(
"alpha|a=f" => \$diff_alpha,
"correlations|c" => \$correlations,
"search-pattern=s" => \$search_pattern,
+ "num-logs|n=i" => \$num_logs,
+ "exclude-logs|x=i" => \$exclude_logs,
"help|?" => \$help,
"man" => \$man
) or pod2usage(2);
@@ -51,6 +62,25 @@ if ($log_file eq '-' || $log_file eq '') {
$rspamd_log = \*STDIN;
&ProcessLog();
}
+elsif ( -d "$log_file" ) {
+ my $log_dir = "$log_file";
+
+ my @logs = &GetLogfilesList($log_dir);
+
+ # Process logs
+ foreach (@logs) {
+ my $ext = (/[^.]+\.?([^.]*?)$/)[0];
+ my $dc = $decompressor{$ext} || 'cat';
+
+ open( $rspamd_log, "-|", "$dc $log_dir/$_" )
+ or die "cannot execute $dc $log_dir/$_ : $!";
+
+ &ProcessLog;
+
+ close($rspamd_log)
+ or warn "cannot close $dc $log_dir/$_: $!";
+ }
+}
else {
open($rspamd_log, '<', $log_file) or die "cannot open $log_file";
&ProcessLog();
@@ -128,6 +158,8 @@ Junk changes / total junk hits : %6d/%-6d (%7.3f%%)
}
}
+exit;
+
sub ProcessLog {
while(<$rspamd_log>) {
if (!$enabled && ($search_pattern eq "" || /$search_pattern/)) {
@@ -251,6 +283,48 @@ sub ProcessLog {
}
}
+sub GetLogfilesList {
+ my ($dir) = @_;
+ opendir( DIR, $dir ) or die $!;
+
+ my $pattern = join( '|', keys %decompressor );
+ my $re = qr/\.[0-9]+(?:\.(?:$pattern))?/;
+
+ # Add unnumbered logs first
+ my @logs =
+ grep { -f "$dir/$_" && !/$re/ } readdir(DIR);
+
+ # Add numbered logs
+ rewinddir(DIR);
+ push( @logs,
+ ( sort numeric ( grep { -f "$dir/$_" && /$re/ } readdir(DIR) ) ) );
+
+ closedir(DIR);
+
+ # Select required logs and revers their order
+ @logs =
+ reverse
+ splice( @logs, $exclude_logs, $num_logs ||= @logs - $exclude_logs );
+
+ # Loop through array printing out filenames
+ print "\nParsing log files:\n";
+ foreach my $file (@logs) {
+ print " $file\n";
+ }
+ print "\n";
+
+ return @logs;
+}
+
+sub numeric {
+ $a =~ /\.(\d+)\./;
+ my $a_num = $1;
+ $b =~ /\.(\d+)\./;
+ my $b_num = $1;
+
+ $a_num <=> $b_num;
+}
+
__END__
=head1 NAME
@@ -262,13 +336,15 @@ rspamd_stats - analyze Rspamd rules by parsing log files
rspamd_stats [options] [--symbol=SYM1 [--symbol=SYM2...]] [--log file]
Options:
- --log=file log file to read (stdin by default)
+ --log=file log file or directory to read (stdin by default)
--reject-score=score set reject threshold (15 by default)
--junk-score=score set junk score (6.0 by default)
--symbol=sym check specified symbol (perl regexps, '.*' by default)
--alpha=value set ignore score for symbols (0.1 by default)
--correlations enable correlations report
--search-pattern do not process input unless the desired pattern is found
+ --num-logs=integer number of recent logfiles to analyze (all files in the directory by default)
+ --exclude-logs=integer number of latest logs to exclude (0 by default)
--help brief help message
--man full documentation
@@ -278,7 +354,13 @@ rspamd_stats [options] [--symbol=SYM1 [--symbol=SYM2...]] [--log file]
=item B<--log>
-Specifies log file to read data from.
+Specifies log file or directory to read data from.
+If a directory is specified B<rspamd_stats> analyses files in the directory
+including known compressed file types. Number of log files can be limited using
+B<--num-logs> and B<--exclude-logs> options. This assumes that files in the log
+directory have B<newsyslog(8)>- or B<logrotate(8)>-like name format with numeric
+indexes. Files without indexes (generally it is merely one file) are considered
+the most recent and files with lower indexes are considered newer.
=item B<--reject-score>
@@ -296,6 +378,14 @@ Specifies the minimum score for a symbol to be considered by this script.
Add symbol or pattern (pcre format) to analyze.
+=item B<--num-logs>
+
+If set, limits number of analyzed logfiles in the directory to the specified value.
+
+=item B<--exclude-logs>
+
+Number of latest logs to exclude (0 by default).
+
=item B<--correlations>
Additionaly print correlation rate for each symbol displayed. This routine calculates merely paired correlations between symbols.