#!/usr/bin/env perl

use warnings;
use strict;
use JSON::XS;
use AnyEvent;
use AnyEvent::HTTP;
use AnyEvent::IO;
use EV;
use Pod::Usage;
use Getopt::Long;
use File::stat;

my $rspamd_host     = "localhost:11333";
my $man             = 0;
my $help            = 0;
my $local           = 0;
my $header          = "X-Spam: yes";
my $max_size        = 10 * 1024 * 1024;          # 10 MB
my $request_timeout = 15;                        # 15 seconds by default
my $reject_message  = "Spam message rejected";

GetOptions(
    "host=s"           => \$rspamd_host,
    "header=s"         => \$header,
    "reject-message=s" => \$reject_message,
    "max-size=i"       => \$max_size,
    "timeout=f"        => \$request_timeout,
    "help|?"           => \$help,
    "man"              => \$man
) or pod2usage(2);

pod2usage(1) if $help;
pod2usage( -exitval => 0, -verbose => 2 ) if $man;

my $main_domain = cgp_main_domain();
my $scanned     = 0;

# Turn off bufferization as required by CGP
$| = 1;

sub cgp_main_domain {
    if ( open( my $fh, 'Settings/Main.settings' ) ) {
        while (<$fh>) {
            if (/^\s+DomainName\s+=\s+([^;]+);/) {
                return $1;
            }
        }
    }
}

sub cgp_string {
    my ($in) = @_;

    $in =~ s/\"/\\"/g;
    $in =~ s/\n/\\n/gms;
    $in =~ s/\r/\\r/mgs;
    $in =~ s/\t/  /g;

    return "\"$in\"";
}

sub rspamd_scan {
    my ( $tag, $file ) = @_;

    my $http_callback = sub {
        my ( $body, $hdr ) = @_;

        if ( $hdr && $hdr->{Status} =~ /^2/ ) {
            my $js = eval('decode_json($body)');
            $scanned++;

            if ( !$js ) {
                print "* Rspamd: Bad response for $file: invalid JSON: parse error\n";
                print "$tag FAILURE\n";
            }
            else {
                my $def     = $js;
                my $headers = "";

                if ( !$def ) {
                    print "* Rspamd: Bad response for $file: invalid JSON: default is missing\n";
                    print "$tag FAILURE\n";
                }
                else {
                    my $action = $def->{'action'};
                    my $id     = $js->{'message-id'};

                    my $symbols = "";
                    while ( my ( $k, $s ) = each( %{ $def->{'symbols'} } ) ) {
                        $symbols .= sprintf "%s(%.2f);", $k, $s->{'score'};
                    }

                    printf
                      "* Rspamd: Scanned %s; id: <%s>; Score: %.2f / %.2f; Symbols: [%s]\n",
                      $file, $id, $def->{'score'}, $def->{'required_score'}, $symbols;

                    if ( $js->{'dkim-signature'} ) {
                        $headers .= "DKIM-Signature: " . $js->{'dkim-signature'};
                    }

                    if ( $js->{'milter'} ) {
                        my $block = $js->{'milter'};

                        if ( $block->{'add_headers'} ) {
                            while ( my ( $h, $v ) = each( %{ $block->{'add_headers'} } ) ) {
                                if ( ref($v) eq 'HASH' ) {
                                    if ( $headers eq "" ) {
                                        $headers .= "$h: $v->{value}";
                                    }
                                    else {
                                        $headers .= "\\e$h: $v->{value}";
                                    }
                                }
                                else {
                                    if ( $headers eq "" ) {
                                        $headers .= "$h: $v";
                                    }
                                    else {
                                        $headers .= "\\e$h: $v";
                                    }
                                }
                            }
                        }
                    }

                    if ( $action eq 'reject' ) {
                        print "$tag DISCARD\n";
                        return;
                    }
                    elsif ( $action eq 'add header' || $action eq 'rewrite subject' ) {
                        if ( $headers eq "" ) {
                            $headers .= "$header";
                        }
                        else {
                            $headers .= "\\e$header";
                        }
                    }
                    elsif ( $action eq 'soft reject' ) {
                        print "$tag REJECT Try again later\n";
                        return;
                    }

                    if ( $headers eq "" ) {
                        print "$tag OK\n";
                    }
                    else {
                        print "$tag ADDHEADER " . cgp_string($headers) . " OK\n";
                    }
                }
            }
        }
        else {
            if ($hdr) {
                print "* Rspamd: Bad response for $file: HTTP error: $hdr->{Status} $hdr->{Reason}\n";
            }
            else {
                print "* Rspamd: Bad response for $file: IO error: $!\n";
            }
            print "$tag FAILURE\n";
        }
    };

    if ($local) {

        # Use file scan
        # XXX: not implemented now due to CGP queue format
        http_get(
            "http://$rspamd_host/symbols?file=$file",
            timeout => $request_timeout,
            $http_callback
        );
    }
    else {
        my $sb = stat($file);

        if ( !$sb || $sb->size > $max_size ) {
            if ($sb) {
                print "* File $file is too large: " . $sb->size . "\n$tag FAILURE\n";

            }
            else {
                print "* Cannot stat $file: $!\n$tag FAILURE\n";
            }
            return;
        }
        aio_load(
            $file,
            sub {
                my ($data) = @_;

                if ( !$data ) {
                    print "* Cannot open $file: $!\n$tag FAILURE\n";
                    return;
                }

                # Parse CGP format
                $data =~ s/^((?:[^\n]*\n)*?)\n(.*)$/$2/ms;
                my @envelope = split /\n/, $1;
                chomp(@envelope);
                my $from;
                my @rcpts;
                my $ip;
                my $user;

                foreach my $elt (@envelope) {
                    if ( $elt =~ /^P\s[^<]*(<[^>]*>).*$/ ) {
                        $from = $1;
                    }
                    elsif ( $elt =~ /^R\s[^<]*(<[^>]*>).*$/ ) {
                        push @rcpts, $1;
                    }
                    elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:SMTP|HTTPU?|AIRSYNC|XIMSS) \[([0-9a-f.:]+)\]/ ) {
                        if ($1) {
                            $user = $1;
                        }
                        if ($2) {
                            $ip = $2;
                        }
                    }
                    elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:DSN|GROUP|LIST|PBX|PIPE|RULE) \[0\.0\.0\.0\]/ ) {
                        if ($1) {
                            $user = $1;
                        }
                        $ip = '127.2.4.7';
                    }
                }

                my $headers = {};
                if ( $file =~ /\/([^\/.]+)\.msg$/ ) {
                    $headers->{'Queue-ID'} = $1;
                }
                if ($from) {
                    $headers->{From} = $from;
                }
                if ( scalar(@rcpts) > 0 ) {

                    # XXX: Anyevent cannot parse headers with multiple values
                    $headers->{Rcpt} = join( ',', @rcpts );
                }
                if ($ip) {
                    $headers->{IP} = $ip;
                }
                if ($user) {
                    $headers->{User} = $user;
                }
                if ($main_domain) {
                    $headers->{'MTA-Tag'} = $main_domain;
                }

                http_post(
                    "http://$rspamd_host/checkv2", $data,
                    timeout => $request_timeout,
                    headers => $headers,
                    $http_callback
                );
            }
        );
    }
}

# Show informational message
print "* Rspamd CGP filter has been started\n";

my $w = AnyEvent->io(
    fh   => \*STDIN,
    poll => 'r',
    cb   => sub {
        chomp( my $input = <STDIN> );

        if ( $input =~ /^(\d+)\s+(\S+)(\s+(\S+)\s*)?$/ ) {
            my $tag = $1;
            my $cmd = $2;

            if ( $cmd eq "INTF" ) {
                print "$input\n";
            }
            elsif ( $cmd eq "FILE" && $4 ) {
                my $file = $4;
                print "* Scanning file $file\n";
                rspamd_scan $tag, $file;
            }
            elsif ( $cmd eq "QUIT" ) {
                print "* Terminating after scanning of $scanned files\n";
                print "$tag OK\n";
                exit 0;
            }
            else {
                print "* Unknown command $cmd\n";
                print "$tag FAILURE\n";
            }
        }
    }
);

EV::run;

__END__

=head1 NAME

cgp_rspamd - implements Rspamd filter for CommunigatePro MTA

=head1 SYNOPSIS

cgp_rspamd [options]

 Options:
   --host=hostport        Rspamd host to connect (localhost:11333 by default)
   --header               Add specific header for a spam message ("X-Spam: yes" by default)
   --reject-message       Rejection message for spam mail ("Spam message rejected" by default)
   --timeout              Timeout to read response from Rspamd (15 seconds by default)
   --max-size             Maximum size of message to scan (10 megabytes by default)
   --help                 brief help message
   --man                  full documentation

=head1 OPTIONS

=over 8

=item B<--host>

Specifies Rspamd host to use for scanning

=item B<--header>

Specifies the header that should be added when Rspamd action is B<add header> or B<rewrite subject>.

=item B<--reject-message>

Specifies the rejection message for spam.

=item B<--timeout>

Sets timeout in seconds for waiting Rspamd reply for a message.

=item B<--max-size>

Define the maximum messages size to be processed by Rspamd in bytes.

=item B<--help>

Print a brief help message and exits.

=item B<--man>

Prints the manual page and exits.

=back

=head1 DESCRIPTION

B<cgp_rspamd> is intended to scan messages processed with B<CommunigatePro> MTA on some Rspamd scanner. It reads
standard input and parses CGP helpers protocol.  On scan requests, this filter can query Rspamd to process a message.
B<cgp_rspamd> can tell CGP to add header or reject SPAM messages depending on Rspamd scan result.

=cut