#!/usr/bin/env perl

use warnings;
use strict;

# Required ports:
# www/p5-POE-Component-Client-HTTP
# www/p5-POE-Component-Server-HTTP
# dns/p5-POE-Component-Client-DNS
# databases/p5-Cache-Memcached-Fast
# devel/p5-Proc-Daemon
# sysutils/p5-Proc-PidUtil

# POE::Component::Client::HTTP uses HTTP::Request and response
# objects.

use POSIX qw(strftime);
use HTTP::Request::Common qw(GET POST);
use HTTP::Response;
use URI::Escape qw(uri_unescape);
use Sys::Syslog qw/:standard :macros setlogsock/;
use HTML::HeadParser;
use Digest;
use Proc::Daemon;
use Proc::PidUtil;
use POE qw(@POE_LOOP@ Component::Server::TCP Filter::HTTPD Component::Client::HTTP);
use Redis::Fast;

my $with_swf = 1;
my $swf_parser;
my $saved_swf_url = "";
eval "require SWF::Element" or $with_swf = 0;    # p5-SWF-File

my $DEBUG = grep { $_ eq '-debug' } @ARGV;

our %cfg = (
	port              => 8080,
	max_size          => 102400,
	http_timeout      => 5,
	max_rec           => 5,
	pidfile           => '/tmp/redirector.pid',
	do_log            => 0,
	debug             => 0,
	redis_server      =>  'localhost:6379',

	facility     => LOG_LOCAL3,         # syslog facility
	log_level    => LOG_INFO,
	digest_bits  => 256,
	cache_expire => 3600,
	user         => '@RSPAMD_USER@',
	group        => '@RSPAMD_GROUP@',
	cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf',
);

our $do_reopen_log = 0;
our $redis_conn;

############################################ Subs ########################################

# Read file into string
sub read_file {
	my ($file) = @_;

	open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" );
	local $/;
	my $content = <IN>;
	close IN;

	return $content;
}

# Write log line:
sub _log {
	my ( $l, $w, @s ) = @_;

	if ($DEBUG) {
		printf STDERR $w . "\n", @s;
	}
	else {
		syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} );
	}

	if ( $l == LOG_ALERT ) {
		die $w;
	}
}

# Init swf parser
sub swf_init_parser {
	if ($with_swf) {
		$swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback );
	}
}

# Checking for SWF url
sub swf_search_get_url {
	my $actions        = shift;
	my $saved_pool_str = "";

	for my $action (@$actions) {
		if ( $action->tag_name eq 'ActionConstantPool' ) {
			my $pool = $action->ConstantPool;
			for my $string (@$pool) {
				if ( $string =~ /^https?:\/\// ) {
					$saved_pool_str = $string->value;
				}
			}
		}
		elsif ( $action->tag_name eq 'ActionGetURL2' ) {
			if ( $saved_pool_str ne "" ) {
				$saved_swf_url = $saved_pool_str;
			}
		}
		elsif ( $action->tag_name =~ 'ActionGetURL' ) {
			$saved_swf_url = $action->UrlString->value;
		}
	}
}

# SWF check tag utility
sub swf_check_tag {
	my ( $t, $stream ) = @_;
	my ($tagname) = $t->tag_name;

	for ($tagname) {
		( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do {
			swf_search_get_url( $t->Actions );
			last;
		};
		/^PlaceObject2$/ and do {
			for my $ca ( @{ $t->ClipActions } ) {
				swf_search_get_url( $ca->Actions );
			}
			last;
		};
		/^DefineButton2$/ and do {
			for my $ba ( @{ $t->Actions } ) {
				swf_search_get_url( $ba->Actions );
			}
			last;
		};
		/^DefineSprite$/ and do {
			for my $tag ( @{ $t->ControlTags } ) {
				swf_search_get_url( $tag, $stream );
			}
			last;
		};
	}
}

# Callback for swf parser
sub swf_tag_callback {
	my ( $self, $tag, $length, $stream ) = @_;
	my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length );
	my ($tagname) = $t->tag_name;

	return
	  unless $tagname eq 'DoAction'
		  or $tagname eq 'DoInitAction'
		  or $tagname eq 'PlaceObject2'
		  or $tagname eq 'DefineButton'
		  or $tagname eq 'DefineButton2'
		  or $tagname eq 'DefineSprite';

	if ( $tagname eq 'DefineSprite' ) {

		# Tags in the sprite are not unpacked here.

		$t->shallow_unpack($stream);
		$t->TagStream->parse( callback => \&swf_tag_callback );
		return;

	}
	elsif ( $tagname eq 'PlaceObject2' ) {

		# Most of PlaceObject2 tags don't have ClipActions.

		$t->lookahead_Flags($stream);
		return unless $t->PlaceFlagHasClipActions;
	}

	# unpack the tag and search actions.

	$t->unpack($stream);
	swf_check_tag($t);
}

# Check url from redis cache first
sub redis_check_url {
	my ($url) = @_;

	my $context = Digest->new("SHA-512");
	$context->add($url);
	return $redis_conn->get( $context->digest() );
}

# Write url to redis key
sub redis_cache_url {
	my ( $url, $url_real ) = @_;

	if ( $url ne $url_real ) {
		my $context = Digest->new("SHA-512");
		$context->add($url);
		if (!$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real)) {
			_log(LOG_INFO, "cannot save redirect from $url to $url_real in redis");
		}
	}
}

sub create_response {
	my ( $code, $uri ) = @_;

	my $new_response;

	if ($uri) {
		$new_response = HTTP::Response->new( $code, 'OK' );
		$new_response->header( "Uri", $uri );
		$new_response->content($uri);
		$new_response->content_length( length($uri) );
	}
	else {
		$new_response = HTTP::Response->new($code);
		$new_response->content_length(0);
	}

	$new_response->header( "Connection",       "Close" );
	$new_response->header( "Proxy-Connection", "Close" );

	return $new_response;
}

# POE http client callback
sub process_client {
	my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];

	my $http_request  = $_[ARG0]->[0];
	my $rec           = $_[ARG0]->[1][0];
	my $http_response = $_[ARG1]->[0];
	my $base_url      = $_[ARG0]->[1][1];
	$saved_swf_url = "";

	if ( $rec == 0 ) {
		$base_url = $http_request->uri;
	}
	else {

		# Check cache for each url
		my $redirect = redis_check_url( $http_request->uri );
		if ($redirect) {
			_log( LOG_INFO,
				"Memcached redirect from %s to %s for request from: %s",
				$http_response->base, $redirect, $heap->{remote_ip} );
			my $new_response = create_response( 200, $redirect );

			# Avoid sending the response if the client has gone away.
			$heap->{client}->put($new_response) if defined $heap->{client};

			# Shut down the client's connection when the response is sent.
			return;
		}
	}

	if ($do_reopen_log) {
		$do_reopen_log = 0;
		reopen_log();
	}

	if ( $rec > $cfg{max_rec} ) {
		_log( LOG_INFO,
			"Max recursion exceeded: %d from %s to %s for request from: %s",
			$rec, $base_url, $http_request->uri, $heap->{remote_ip} );

		# Write to cache
		redis_cache_url( $base_url, $http_request->uri );
		my $new_response = create_response( 200, $http_request->uri );

		# Avoid sending the response if the client has gone away.
		$heap->{client}->put($new_response) if defined $heap->{client};

		# Shut down the client's connection when the response is sent.
		$kernel->yield("shutdown");
		return;
	}

	# Detect HTTP redirects
	if ( $http_response->is_redirect ) {
		my $redirect = $http_response->header('Location');
		if ($redirect) {
			if ( $redirect =~ /^https?:\/\// ) {
				_log( LOG_INFO,
					"HTML redirect from %s to %s for request from: %s",
					$http_response->base, $redirect, $heap->{remote_ip} );
				my $request = HTTP::Request->new( 'GET', $redirect );
				$request->header( "Connection",       "close" );
				$request->header( "Proxy-Connection", "close" );
				$kernel->post( "cl", "request", "got_response", $request,
					[ $rec + 1, $base_url ] );
				return;
			}
			else {
				_log( LOG_INFO,
"ignoring internal redirect from %s to %s for request from: %s",
					$http_request->uri,
					$redirect,
					$heap->{remote_ip}
				);
				my $new_response = create_response( 200, $http_request->uri );

				# Avoid sending the response if the client has gone away.
				$heap->{client}->put($new_response) if defined $heap->{client};

				# Shut down the client's connection when the response is sent.
				$kernel->yield("shutdown");
				return;
			}
		}
	}
	elsif ( $http_response->code != 200 ) {
		_log( LOG_INFO, "HTTP response was %d, for request to %s",
			$http_response->code, $http_request->uri );
		my $new_response;
		if ($rec == 0) {
			$new_response = create_response( $http_response->code );
		}
		else {
		  redis_cache_url( $base_url, $http_request->uri );
			$new_response = create_response( 200, $http_request->uri );
		}

		# Avoid sending the response if the client has gone away.
		$heap->{client}->put($new_response) if defined $heap->{client};

		# Shut down the client's connection when the response is sent.
		$kernel->yield("shutdown");
		return;
	}
	my $response_type = $http_response->content_type();
	if ( $response_type =~ /^text/i ) {
		my $content = $http_response->decoded_content();
		my $p       = HTML::HeadParser->new($http_response);
		$p->parse($content);
		my $expire = $http_response->header('Refresh');
		if ( $http_response->is_redirect || $expire ) {
			my $redirect;
			if ($expire) {
				$expire =~ /URL=(\S+)/;
				$redirect = $1;
			}
			else {
				$redirect = $http_response->header('Location');
			}
			if ($redirect) {
				if ( $redirect =~ /^https?:\/\// ) {
					_log( LOG_INFO,
						"HTML redirect from %s to %s for request from: %s",
						$http_response->base,
						$redirect,
						$heap->{remote_ip}
					);
					my $request = HTTP::Request->new( 'GET', $redirect );
					$request->header( "Connection",       "close" );
					$request->header( "Proxy-Connection", "close" );
					$kernel->post( "cl", "request", "got_response", $request,
						[ $rec + 1, $base_url ] );
					return;
				}
				else {
					_log( LOG_INFO,
"ignoring internal redirect from %s to %s for request from: %s",
						$http_response->base,
						$redirect,
						$heap->{remote_ip}
					);
				}
			}
		}
		if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) {
			my $redir = uri_unescape($1);
			_log( LOG_INFO, "js redirect from %s to %s for request from: %s",
				$http_response->base, $1, $heap->{remote_ip} );
			my $request = HTTP::Request->new( 'GET', $redir );
			$request->header( "Connection",       "close" );
			$request->header( "Proxy-Connection", "close" );
			$kernel->post( "cl", "request", "got_response", $request,
				[ $rec + 1, $base_url ] );
			return;
		}
	}
	elsif (
		$with_swf
		&& (
			$response_type eq 'application/x-shockwave-flash'
			|| (   $http_request->uri =~ /\.swf(\?.*)?$/i
				&& $http_response->code == 200 )
		)
	  )
	{
		my $content = $http_response->decoded_content();
		$swf_parser->parse($content);

		if ( $saved_swf_url ne "" ) {
			_log( LOG_INFO, "flash redirect from %s to %s for request from: %s",
				$http_response->base, $saved_swf_url, $heap->{remote_ip} );
			my $request = HTTP::Request->new( 'GET', $saved_swf_url );

			# Reset swf redirect global variable
			$saved_swf_url = "";
			$request->header( "Connection",       "close" );
			$request->header( "Proxy-Connection", "close" );
			$kernel->post( "cl", "request", "got_response", $request,
				[ $rec + 1, $base_url ] );
			return;
		}
	}
	else {
		_log( LOG_INFO,
			"response wasn't text request from: %s, response is: %s",
			$heap->{remote_ip}, $response_type );
	}

	_log( LOG_INFO, "redirect from %s to %s for request from: %s",
		$base_url, $http_request->uri, $heap->{remote_ip} );

	# Write to cache
	redis_cache_url( $base_url, $http_request->uri );
	my $new_response =
	  create_response( $http_response->code, $http_request->uri );

	# Avoid sending the response if the client has gone away.
	$heap->{client}->put($new_response) if defined $heap->{client};

	# Shut down the client's connection when the response is sent.
	$kernel->yield("shutdown");

}

sub process_input {
	my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ];

	if ( $request->isa("HTTP::Response") ) {
		$heap->{client}->put($request);
		$kernel->yield("shutdown");
		return;
	}

	my $domain;
	if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) {
		my @parts = split( /\./, $1 );
		my $c1    = pop @parts;
		my $c2    = pop @parts;
		$domain = "$c2.$c1";
	}

	if (
		(
			defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp}
		)
		|| ( defined( $cfg{check_domains} )
			&& scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 )
	  )
	{
		my $new_response = create_response( 200, $request->uri );

		# Avoid sending the response if the client has gone away.
		$heap->{client}->put($new_response) if defined $heap->{client};
		$kernel->yield("shutdown");

		# Shut down the client's connection when the response is sent.
		return;
	}

	# Check cache first
	my $redirect = redis_check_url( $request->uri );
	if ($redirect) {
		_log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s",
			$request->uri, $redirect, $heap->{remote_ip} );
		my $new_response = create_response( 200, $redirect );

		# Avoid sending the response if the client has gone away.
		$heap->{client}->put($new_response) if defined $heap->{client};
		$kernel->yield("shutdown");

		# Shut down the client's connection when the response is sent.
		return;
	}

	# Start http request
	my $new_request = HTTP::Request->new( 'GET', $request->uri );
	$new_request->header( "Connection",       "close" );
	$new_request->header( "Proxy-Connection", "close" );
	$kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] );
}

sub sig_DIE {
	my ( $sig, $ex ) = @_[ ARG0, ARG1 ];
	_log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" );
	$poe_kernel->sig_handled();

	# Send the signal to session that sent the original event.
	if ( $ex->{source_session} ne $_[SESSION] ) {
		$poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex );
	}
}

sub sig_CLD {
	my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ];
	return 0;
}

############################### Main code fragment ##################################

# Do daemonization
if ( !$DEBUG ) {
	Proc::Daemon::Init;
	POE::Kernel->has_forked;
	setlogsock('unix');
	openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} );
}

# Try to eval config file
if ( -f $cfg{cfg_file} ) {
	my $config = read_file( $cfg{cfg_file} );
	eval $config;
}

_log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" )
  if Proc::PidUtil::is_running( $cfg{pidfile} );

# Drop privilleges
if ( $> == 0 ) {
	my $uid = getpwnam( $cfg{user} )
	  or _log( LOG_ALERT, "user $cfg{user} unknown" );
	my $gid = getgrnam( $cfg{group} )
	  or _log( LOG_ALERT, "group $cfg{group} unknown" );
	$< = $> = $uid;
	$) = $( = $gid;
}

if ( !$DEBUG ) {
	_log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" )
	  if !open( PID, "> $cfg{pidfile}" );
	close(PID);
}

# Reopen log on SIGUSR1
$poe_kernel->sig( DIE => \&sig_DIE );
$poe_kernel->sig( CLD => \&sig_CLD );
$SIG{USR1} = sub { $do_reopen_log = 1; $poe_kernel->sig_handled(); };
$SIG{INT}  = sub { $poe_kernel->stop(); };
$SIG{QUIT} = sub { $poe_kernel->stop(); };
$SIG{PIPE} = 'IGNORE';

if ( !$DEBUG ) {
	Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ )
	  or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" );
}

# Init redis connection
_log( LOG_INFO, "Starting redis connection" );
$redis_conn = Redis::Fast->new(
		server    => $cfg{redis_server},
		reconnect => 60,
		every     => 500_000,
		encoding  => undef,
);

# POE part
POE::Component::Client::HTTP->spawn(
	Alias   => 'cl',
	MaxSize => $cfg{max_size},       # Remove for unlimited page sizes
	Timeout => $cfg{http_timeout},
	ConnectionManager => POE::Component::Client::Keepalive->new(
		max_per_host => 256,
		max_open     => 1024,
		keep_alive   => 1,
		timeout      => $cfg{http_timeout},
	),
);

_log( LOG_INFO, "Starting HTTP server" );
POE::Component::Server::TCP->new(
	Alias        => "",
	Port         => $cfg{port},
	ClientFilter => 'POE::Filter::HTTPD',

	ClientInput  => \&process_input,
	InlineStates => { got_response => \&process_client, },
);

swf_init_parser();
_log( LOG_NOTICE, "Starting URL resolver" );

# Start POE.  This will run the server until it exits.
POE::Kernel->run();
exit 0;

############################## Final block ####################################

END {
	_log( LOG_NOTICE, 'redirector stopped' );
	closelog();
}