#!/usr/bin/env perl use warnings; use strict; # Required ports: # www/p5-POE-Component-Client-HTTP # www/p5-POE-Component-Server-HTTP # dns/p5-POE-Component-Client-DNS # databases/p5-Cache-Memcached-Fast # devel/p5-Proc-Daemon # sysutils/p5-Proc-PidUtil # POE::Component::Client::HTTP uses HTTP::Request and response # objects. use POSIX qw(strftime); use HTTP::Request::Common qw(GET POST); use HTTP::Response; use URI::Escape qw(uri_unescape); use Sys::Syslog qw/:standard :macros setlogsock/; use HTML::HeadParser; use Digest; use Proc::Daemon; use Proc::PidUtil; use POE qw(@POE_LOOP@ Component::Server::TCP Filter::HTTPD Component::Client::HTTP); use Redis::Fast; my $with_swf = 1; my $swf_parser; my $saved_swf_url = ""; eval "require SWF::Element" or $with_swf = 0; # p5-SWF-File my $DEBUG = grep { $_ eq '-debug' } @ARGV; our %cfg = ( port => 8080, max_size => 102400, http_timeout => 5, max_rec => 5, pidfile => '/tmp/redirector.pid', do_log => 0, debug => 0, redis_server => 'localhost:6379', facility => LOG_LOCAL3, # syslog facility log_level => LOG_INFO, digest_bits => 256, cache_expire => 3600, user => '@RSPAMD_USER@', group => '@RSPAMD_GROUP@', cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf', ); our $do_reopen_log = 0; our $redis_conn; ############################################ Subs ######################################## # Read file into string sub read_file { my ($file) = @_; open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" ); local $/; my $content = ; close IN; return $content; } # Write log line: sub _log { my ( $l, $w, @s ) = @_; if ($DEBUG) { printf STDERR $w . "\n", @s; } else { syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} ); } if ( $l == LOG_ALERT ) { die $w; } } # Init swf parser sub swf_init_parser { if ($with_swf) { $swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback ); } } # Checking for SWF url sub swf_search_get_url { my $actions = shift; my $saved_pool_str = ""; for my $action (@$actions) { if ( $action->tag_name eq 'ActionConstantPool' ) { my $pool = $action->ConstantPool; for my $string (@$pool) { if ( $string =~ /^https?:\/\// ) { $saved_pool_str = $string->value; } } } elsif ( $action->tag_name eq 'ActionGetURL2' ) { if ( $saved_pool_str ne "" ) { $saved_swf_url = $saved_pool_str; } } elsif ( $action->tag_name =~ 'ActionGetURL' ) { $saved_swf_url = $action->UrlString->value; } } } # SWF check tag utility sub swf_check_tag { my ( $t, $stream ) = @_; my ($tagname) = $t->tag_name; for ($tagname) { ( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do { swf_search_get_url( $t->Actions ); last; }; /^PlaceObject2$/ and do { for my $ca ( @{ $t->ClipActions } ) { swf_search_get_url( $ca->Actions ); } last; }; /^DefineButton2$/ and do { for my $ba ( @{ $t->Actions } ) { swf_search_get_url( $ba->Actions ); } last; }; /^DefineSprite$/ and do { for my $tag ( @{ $t->ControlTags } ) { swf_search_get_url( $tag, $stream ); } last; }; } } # Callback for swf parser sub swf_tag_callback { my ( $self, $tag, $length, $stream ) = @_; my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length ); my ($tagname) = $t->tag_name; return unless $tagname eq 'DoAction' or $tagname eq 'DoInitAction' or $tagname eq 'PlaceObject2' or $tagname eq 'DefineButton' or $tagname eq 'DefineButton2' or $tagname eq 'DefineSprite'; if ( $tagname eq 'DefineSprite' ) { # Tags in the sprite are not unpacked here. $t->shallow_unpack($stream); $t->TagStream->parse( callback => \&swf_tag_callback ); return; } elsif ( $tagname eq 'PlaceObject2' ) { # Most of PlaceObject2 tags don't have ClipActions. $t->lookahead_Flags($stream); return unless $t->PlaceFlagHasClipActions; } # unpack the tag and search actions. $t->unpack($stream); swf_check_tag($t); } # Check url from redis cache first sub redis_check_url { my ($url) = @_; my $context = Digest->new("SHA-512"); $context->add($url); return $redis_conn->get( $context->digest() ); } # Write url to redis key sub redis_cache_url { my ( $url, $url_real ) = @_; if ( $url ne $url_real ) { my $context = Digest->new("SHA-512"); $context->add($url); if ( !$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real ) ) { _log( LOG_INFO, "cannot save redirect from $url to $url_real in redis" ); } } } sub create_response { my ( $code, $uri ) = @_; my $new_response; if ($uri) { $new_response = HTTP::Response->new( $code, 'OK' ); $new_response->header( "Uri", $uri ); $new_response->content($uri); $new_response->content_length( length($uri) ); } else { $new_response = HTTP::Response->new($code); $new_response->content_length(0); } $new_response->header( "Connection", "Close" ); $new_response->header( "Proxy-Connection", "Close" ); return $new_response; } # POE http client callback sub process_client { my ( $kernel, $heap ) = @_[ KERNEL, HEAP ]; my $http_request = $_[ARG0]->[0]; my $rec = $_[ARG0]->[1][0]; my $http_response = $_[ARG1]->[0]; my $base_url = $_[ARG0]->[1][1]; $saved_swf_url = ""; if ( $rec == 0 ) { $base_url = $http_request->uri; } else { # Check cache for each url my $redirect = redis_check_url( $http_request->uri ); if ($redirect) { _log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip} ); my $new_response = create_response( 200, $redirect ); # Avoid sending the response if the client has gone away. $heap->{client}->put($new_response) if defined $heap->{client}; # Shut down the client's connection when the response is sent. return; } } if ($do_reopen_log) { $do_reopen_log = 0; reopen_log(); } if ( $rec > $cfg{max_rec} ) { _log( LOG_INFO, "Max recursion exceeded: %d from %s to %s for request from: %s", $rec, $base_url, $http_request->uri, $heap->{remote_ip} ); # Write to cache redis_cache_url( $base_url, $http_request->uri ); my $new_response = create_response( 200, $http_request->uri ); # Avoid sending the response if the client has gone away. $heap->{client}->put($new_response) if defined $heap->{client}; # Shut down the client's connection when the response is sent. $kernel->yield("shutdown"); return; } # Detect HTTP redirects if ( $http_response->is_redirect ) { my $redirect = $http_response->header('Location'); if ($redirect) { if ( $redirect =~ /^https?:\/\// ) { _log( LOG_INFO, "HTML redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip} ); my $request = HTTP::Request->new( 'GET', $redirect ); $request->header( "Connection", "close" ); $request->header( "Proxy-Connection", "close" ); $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); return; } else { _log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", $http_request->uri, $redirect, $heap->{remote_ip} ); my $new_response = create_response( 200, $http_request->uri ); # Avoid sending the response if the client has gone away. $heap->{client}->put($new_response) if defined $heap->{client}; # Shut down the client's connection when the response is sent. $kernel->yield("shutdown"); return; } } } elsif ( $http_response->code != 200 ) { _log( LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri ); my $new_response; if ( $rec == 0 ) { $new_response = create_response( $http_response->code ); } else { redis_cache_url( $base_url, $http_request->uri ); $new_response = create_response( 200, $http_request->uri ); } # Avoid sending the response if the client has gone away. $heap->{client}->put($new_response) if defined $heap->{client}; # Shut down the client's connection when the response is sent. $kernel->yield("shutdown"); return; } my $response_type = $http_response->content_type(); if ( $response_type =~ /^text/i ) { my $content = $http_response->decoded_content(); my $p = HTML::HeadParser->new($http_response); $p->parse($content); my $expire = $http_response->header('Refresh'); if ( $http_response->is_redirect || $expire ) { my $redirect; if ($expire) { $expire =~ /URL=(\S+)/; $redirect = $1; } else { $redirect = $http_response->header('Location'); } if ($redirect) { if ( $redirect =~ /^https?:\/\// ) { _log( LOG_INFO, "HTML redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip} ); my $request = HTTP::Request->new( 'GET', $redirect ); $request->header( "Connection", "close" ); $request->header( "Proxy-Connection", "close" ); $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); return; } else { _log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip} ); } } } if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) { my $redir = uri_unescape($1); _log( LOG_INFO, "js redirect from %s to %s for request from: %s", $http_response->base, $1, $heap->{remote_ip} ); my $request = HTTP::Request->new( 'GET', $redir ); $request->header( "Connection", "close" ); $request->header( "Proxy-Connection", "close" ); $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); return; } } elsif ( $with_swf && ( $response_type eq 'application/x-shockwave-flash' || ( $http_request->uri =~ /\.swf(\?.*)?$/i && $http_response->code == 200 ) ) ) { my $content = $http_response->decoded_content(); $swf_parser->parse($content); if ( $saved_swf_url ne "" ) { _log( LOG_INFO, "flash redirect from %s to %s for request from: %s", $http_response->base, $saved_swf_url, $heap->{remote_ip} ); my $request = HTTP::Request->new( 'GET', $saved_swf_url ); # Reset swf redirect global variable $saved_swf_url = ""; $request->header( "Connection", "close" ); $request->header( "Proxy-Connection", "close" ); $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); return; } } else { _log( LOG_INFO, "response wasn't text request from: %s, response is: %s", $heap->{remote_ip}, $response_type ); } _log( LOG_INFO, "redirect from %s to %s for request from: %s", $base_url, $http_request->uri, $heap->{remote_ip} ); # Write to cache redis_cache_url( $base_url, $http_request->uri ); my $new_response = create_response( $http_response->code, $http_request->uri ); # Avoid sending the response if the client has gone away. $heap->{client}->put($new_response) if defined $heap->{client}; # Shut down the client's connection when the response is sent. $kernel->yield("shutdown"); } sub process_input { my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ]; if ( $request->isa("HTTP::Response") ) { $heap->{client}->put($request); $kernel->yield("shutdown"); return; } my $domain; if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) { my @parts = split( /\./, $1 ); my $c1 = pop @parts; my $c2 = pop @parts; $domain = "$c2.$c1"; } if ( ( defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp} ) || ( defined( $cfg{check_domains} ) && scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 ) ) { my $new_response = create_response( 200, $request->uri ); # Avoid sending the response if the client has gone away. $heap->{client}->put($new_response) if defined $heap->{client}; $kernel->yield("shutdown"); # Shut down the client's connection when the response is sent. return; } # Check cache first my $redirect = redis_check_url( $request->uri ); if ($redirect) { _log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", $request->uri, $redirect, $heap->{remote_ip} ); my $new_response = create_response( 200, $redirect ); # Avoid sending the response if the client has gone away. $heap->{client}->put($new_response) if defined $heap->{client}; $kernel->yield("shutdown"); # Shut down the client's connection when the response is sent. return; } # Start http request my $new_request = HTTP::Request->new( 'GET', $request->uri ); $new_request->header( "Connection", "close" ); $new_request->header( "Proxy-Connection", "close" ); $kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] ); } sub sig_DIE { my ( $sig, $ex ) = @_[ ARG0, ARG1 ]; _log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" ); $poe_kernel->sig_handled(); # Send the signal to session that sent the original event. if ( $ex->{source_session} ne $_[SESSION] ) { $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex ); } } sub sig_CLD { my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ]; return 0; } ############################### Main code fragment ################################## # Do daemonization if ( !$DEBUG ) { Proc::Daemon::Init; POE::Kernel->has_forked; setlogsock('unix'); openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} ); } # Try to eval config file if ( -f $cfg{cfg_file} ) { my $config = read_file( $cfg{cfg_file} ); eval $config; } _log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" ) if Proc::PidUtil::is_running( $cfg{pidfile} ); # Drop privileges if ( $> == 0 ) { my $uid = getpwnam( $cfg{user} ) or _log( LOG_ALERT, "user $cfg{user} unknown" ); my $gid = getgrnam( $cfg{group} ) or _log( LOG_ALERT, "group $cfg{group} unknown" ); $< = $> = $uid; $) = $( = $gid; } if ( !$DEBUG ) { _log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" ) if !open( PID, "> $cfg{pidfile}" ); close(PID); } # Reopen log on SIGUSR1 $poe_kernel->sig( DIE => \&sig_DIE ); $poe_kernel->sig( CLD => \&sig_CLD ); $SIG{USR1} = sub { $do_reopen_log = 1; $poe_kernel->sig_handled(); }; $SIG{INT} = sub { $poe_kernel->stop(); }; $SIG{QUIT} = sub { $poe_kernel->stop(); }; $SIG{PIPE} = 'IGNORE'; if ( !$DEBUG ) { Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ ) or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" ); } # Init redis connection _log( LOG_INFO, "Starting redis connection" ); $redis_conn = Redis::Fast->new( server => $cfg{redis_server}, reconnect => 60, every => 500_000, encoding => undef, ); # POE part POE::Component::Client::HTTP->spawn( Alias => 'cl', MaxSize => $cfg{max_size}, # Remove for unlimited page sizes Timeout => $cfg{http_timeout}, ConnectionManager => POE::Component::Client::Keepalive->new( max_per_host => 256, max_open => 1024, keep_alive => 1, timeout => $cfg{http_timeout}, ), ); _log( LOG_INFO, "Starting HTTP server" ); POE::Component::Server::TCP->new( Alias => "", Port => $cfg{port}, ClientFilter => 'POE::Filter::HTTPD', ClientInput => \&process_input, InlineStates => { got_response => \&process_client, }, ); swf_init_parser(); _log( LOG_NOTICE, "Starting URL resolver" ); # Start POE. This will run the server until it exits. POE::Kernel->run(); exit 0; ############################## Final block #################################### END { _log( LOG_NOTICE, 'redirector stopped' ); closelog(); }