|
|
@@ -34,22 +34,22 @@ eval "require SWF::Element" or $with_swf = 0; # p5-SWF-File |
|
|
|
my $DEBUG = grep { $_ eq '-debug' } @ARGV; |
|
|
|
|
|
|
|
our %cfg = ( |
|
|
|
port => 8080, |
|
|
|
max_size => 102400, |
|
|
|
http_timeout => 5, |
|
|
|
max_rec => 5, |
|
|
|
pidfile => '/tmp/redirector.pid', |
|
|
|
do_log => 0, |
|
|
|
debug => 0, |
|
|
|
redis_server => 'localhost:6379', |
|
|
|
|
|
|
|
facility => LOG_LOCAL3, # syslog facility |
|
|
|
log_level => LOG_INFO, |
|
|
|
digest_bits => 256, |
|
|
|
cache_expire => 3600, |
|
|
|
user => '@RSPAMD_USER@', |
|
|
|
group => '@RSPAMD_GROUP@', |
|
|
|
cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf', |
|
|
|
port => 8080, |
|
|
|
max_size => 102400, |
|
|
|
http_timeout => 5, |
|
|
|
max_rec => 5, |
|
|
|
pidfile => '/tmp/redirector.pid', |
|
|
|
do_log => 0, |
|
|
|
debug => 0, |
|
|
|
redis_server => 'localhost:6379', |
|
|
|
|
|
|
|
facility => LOG_LOCAL3, # syslog facility |
|
|
|
log_level => LOG_INFO, |
|
|
|
digest_bits => 256, |
|
|
|
cache_expire => 3600, |
|
|
|
user => '@RSPAMD_USER@', |
|
|
|
group => '@RSPAMD_GROUP@', |
|
|
|
cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf', |
|
|
|
); |
|
|
|
|
|
|
|
our $do_reopen_log = 0; |
|
|
@@ -59,468 +59,442 @@ our $redis_conn; |
|
|
|
|
|
|
|
# Read file into string |
|
|
|
sub read_file { |
|
|
|
my ($file) = @_; |
|
|
|
my ($file) = @_; |
|
|
|
|
|
|
|
open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" ); |
|
|
|
local $/; |
|
|
|
my $content = <IN>; |
|
|
|
close IN; |
|
|
|
open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" ); |
|
|
|
local $/; |
|
|
|
my $content = <IN>; |
|
|
|
close IN; |
|
|
|
|
|
|
|
return $content; |
|
|
|
return $content; |
|
|
|
} |
|
|
|
|
|
|
|
# Write log line: |
|
|
|
sub _log { |
|
|
|
my ( $l, $w, @s ) = @_; |
|
|
|
|
|
|
|
if ($DEBUG) { |
|
|
|
printf STDERR $w . "\n", @s; |
|
|
|
} |
|
|
|
else { |
|
|
|
syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} ); |
|
|
|
} |
|
|
|
|
|
|
|
if ( $l == LOG_ALERT ) { |
|
|
|
die $w; |
|
|
|
} |
|
|
|
my ( $l, $w, @s ) = @_; |
|
|
|
|
|
|
|
if ($DEBUG) { |
|
|
|
printf STDERR $w . "\n", @s; |
|
|
|
} |
|
|
|
else { |
|
|
|
syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} ); |
|
|
|
} |
|
|
|
|
|
|
|
if ( $l == LOG_ALERT ) { |
|
|
|
die $w; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# Init swf parser |
|
|
|
sub swf_init_parser { |
|
|
|
if ($with_swf) { |
|
|
|
$swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback ); |
|
|
|
} |
|
|
|
if ($with_swf) { |
|
|
|
$swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback ); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# Checking for SWF url |
|
|
|
sub swf_search_get_url { |
|
|
|
my $actions = shift; |
|
|
|
my $saved_pool_str = ""; |
|
|
|
|
|
|
|
for my $action (@$actions) { |
|
|
|
if ( $action->tag_name eq 'ActionConstantPool' ) { |
|
|
|
my $pool = $action->ConstantPool; |
|
|
|
for my $string (@$pool) { |
|
|
|
if ( $string =~ /^https?:\/\// ) { |
|
|
|
$saved_pool_str = $string->value; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( $action->tag_name eq 'ActionGetURL2' ) { |
|
|
|
if ( $saved_pool_str ne "" ) { |
|
|
|
$saved_swf_url = $saved_pool_str; |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( $action->tag_name =~ 'ActionGetURL' ) { |
|
|
|
$saved_swf_url = $action->UrlString->value; |
|
|
|
} |
|
|
|
} |
|
|
|
my $actions = shift; |
|
|
|
my $saved_pool_str = ""; |
|
|
|
|
|
|
|
for my $action (@$actions) { |
|
|
|
if ( $action->tag_name eq 'ActionConstantPool' ) { |
|
|
|
my $pool = $action->ConstantPool; |
|
|
|
for my $string (@$pool) { |
|
|
|
if ( $string =~ /^https?:\/\// ) { |
|
|
|
$saved_pool_str = $string->value; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( $action->tag_name eq 'ActionGetURL2' ) { |
|
|
|
if ( $saved_pool_str ne "" ) { |
|
|
|
$saved_swf_url = $saved_pool_str; |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( $action->tag_name =~ 'ActionGetURL' ) { |
|
|
|
$saved_swf_url = $action->UrlString->value; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# SWF check tag utility |
|
|
|
sub swf_check_tag { |
|
|
|
my ( $t, $stream ) = @_; |
|
|
|
my ($tagname) = $t->tag_name; |
|
|
|
|
|
|
|
for ($tagname) { |
|
|
|
( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do { |
|
|
|
swf_search_get_url( $t->Actions ); |
|
|
|
last; |
|
|
|
}; |
|
|
|
/^PlaceObject2$/ and do { |
|
|
|
for my $ca ( @{ $t->ClipActions } ) { |
|
|
|
swf_search_get_url( $ca->Actions ); |
|
|
|
} |
|
|
|
last; |
|
|
|
}; |
|
|
|
/^DefineButton2$/ and do { |
|
|
|
for my $ba ( @{ $t->Actions } ) { |
|
|
|
swf_search_get_url( $ba->Actions ); |
|
|
|
} |
|
|
|
last; |
|
|
|
}; |
|
|
|
/^DefineSprite$/ and do { |
|
|
|
for my $tag ( @{ $t->ControlTags } ) { |
|
|
|
swf_search_get_url( $tag, $stream ); |
|
|
|
} |
|
|
|
last; |
|
|
|
}; |
|
|
|
} |
|
|
|
my ( $t, $stream ) = @_; |
|
|
|
my ($tagname) = $t->tag_name; |
|
|
|
|
|
|
|
for ($tagname) { |
|
|
|
( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do { |
|
|
|
swf_search_get_url( $t->Actions ); |
|
|
|
last; |
|
|
|
}; |
|
|
|
/^PlaceObject2$/ and do { |
|
|
|
for my $ca ( @{ $t->ClipActions } ) { |
|
|
|
swf_search_get_url( $ca->Actions ); |
|
|
|
} |
|
|
|
last; |
|
|
|
}; |
|
|
|
/^DefineButton2$/ and do { |
|
|
|
for my $ba ( @{ $t->Actions } ) { |
|
|
|
swf_search_get_url( $ba->Actions ); |
|
|
|
} |
|
|
|
last; |
|
|
|
}; |
|
|
|
/^DefineSprite$/ and do { |
|
|
|
for my $tag ( @{ $t->ControlTags } ) { |
|
|
|
swf_search_get_url( $tag, $stream ); |
|
|
|
} |
|
|
|
last; |
|
|
|
}; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
# Callback for swf parser |
|
|
|
sub swf_tag_callback { |
|
|
|
my ( $self, $tag, $length, $stream ) = @_; |
|
|
|
my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length ); |
|
|
|
my ($tagname) = $t->tag_name; |
|
|
|
my ( $self, $tag, $length, $stream ) = @_; |
|
|
|
my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length ); |
|
|
|
my ($tagname) = $t->tag_name; |
|
|
|
|
|
|
|
return |
|
|
|
unless $tagname eq 'DoAction' |
|
|
|
or $tagname eq 'DoInitAction' |
|
|
|
or $tagname eq 'PlaceObject2' |
|
|
|
or $tagname eq 'DefineButton' |
|
|
|
or $tagname eq 'DefineButton2' |
|
|
|
or $tagname eq 'DefineSprite'; |
|
|
|
return |
|
|
|
unless $tagname eq 'DoAction' |
|
|
|
or $tagname eq 'DoInitAction' |
|
|
|
or $tagname eq 'PlaceObject2' |
|
|
|
or $tagname eq 'DefineButton' |
|
|
|
or $tagname eq 'DefineButton2' |
|
|
|
or $tagname eq 'DefineSprite'; |
|
|
|
|
|
|
|
if ( $tagname eq 'DefineSprite' ) { |
|
|
|
if ( $tagname eq 'DefineSprite' ) { |
|
|
|
|
|
|
|
# Tags in the sprite are not unpacked here. |
|
|
|
# Tags in the sprite are not unpacked here. |
|
|
|
|
|
|
|
$t->shallow_unpack($stream); |
|
|
|
$t->TagStream->parse( callback => \&swf_tag_callback ); |
|
|
|
return; |
|
|
|
$t->shallow_unpack($stream); |
|
|
|
$t->TagStream->parse( callback => \&swf_tag_callback ); |
|
|
|
return; |
|
|
|
|
|
|
|
} |
|
|
|
elsif ( $tagname eq 'PlaceObject2' ) { |
|
|
|
} |
|
|
|
elsif ( $tagname eq 'PlaceObject2' ) { |
|
|
|
|
|
|
|
# Most of PlaceObject2 tags don't have ClipActions. |
|
|
|
# Most of PlaceObject2 tags don't have ClipActions. |
|
|
|
|
|
|
|
$t->lookahead_Flags($stream); |
|
|
|
return unless $t->PlaceFlagHasClipActions; |
|
|
|
} |
|
|
|
$t->lookahead_Flags($stream); |
|
|
|
return unless $t->PlaceFlagHasClipActions; |
|
|
|
} |
|
|
|
|
|
|
|
# unpack the tag and search actions. |
|
|
|
# unpack the tag and search actions. |
|
|
|
|
|
|
|
$t->unpack($stream); |
|
|
|
swf_check_tag($t); |
|
|
|
$t->unpack($stream); |
|
|
|
swf_check_tag($t); |
|
|
|
} |
|
|
|
|
|
|
|
# Check url from redis cache first |
|
|
|
sub redis_check_url { |
|
|
|
my ($url) = @_; |
|
|
|
my ($url) = @_; |
|
|
|
|
|
|
|
my $context = Digest->new("SHA-512"); |
|
|
|
$context->add($url); |
|
|
|
return $redis_conn->get( $context->digest() ); |
|
|
|
my $context = Digest->new("SHA-512"); |
|
|
|
$context->add($url); |
|
|
|
return $redis_conn->get( $context->digest() ); |
|
|
|
} |
|
|
|
|
|
|
|
# Write url to redis key |
|
|
|
sub redis_cache_url { |
|
|
|
my ( $url, $url_real ) = @_; |
|
|
|
|
|
|
|
if ( $url ne $url_real ) { |
|
|
|
my $context = Digest->new("SHA-512"); |
|
|
|
$context->add($url); |
|
|
|
if (!$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real)) { |
|
|
|
_log(LOG_INFO, "cannot save redirect from $url to $url_real in redis"); |
|
|
|
} |
|
|
|
} |
|
|
|
my ( $url, $url_real ) = @_; |
|
|
|
|
|
|
|
if ( $url ne $url_real ) { |
|
|
|
my $context = Digest->new("SHA-512"); |
|
|
|
$context->add($url); |
|
|
|
if ( !$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real ) ) { |
|
|
|
_log( LOG_INFO, "cannot save redirect from $url to $url_real in redis" ); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
sub create_response { |
|
|
|
my ( $code, $uri ) = @_; |
|
|
|
my ( $code, $uri ) = @_; |
|
|
|
|
|
|
|
my $new_response; |
|
|
|
my $new_response; |
|
|
|
|
|
|
|
if ($uri) { |
|
|
|
$new_response = HTTP::Response->new( $code, 'OK' ); |
|
|
|
$new_response->header( "Uri", $uri ); |
|
|
|
$new_response->content($uri); |
|
|
|
$new_response->content_length( length($uri) ); |
|
|
|
} |
|
|
|
else { |
|
|
|
$new_response = HTTP::Response->new($code); |
|
|
|
$new_response->content_length(0); |
|
|
|
} |
|
|
|
if ($uri) { |
|
|
|
$new_response = HTTP::Response->new( $code, 'OK' ); |
|
|
|
$new_response->header( "Uri", $uri ); |
|
|
|
$new_response->content($uri); |
|
|
|
$new_response->content_length( length($uri) ); |
|
|
|
} |
|
|
|
else { |
|
|
|
$new_response = HTTP::Response->new($code); |
|
|
|
$new_response->content_length(0); |
|
|
|
} |
|
|
|
|
|
|
|
$new_response->header( "Connection", "Close" ); |
|
|
|
$new_response->header( "Proxy-Connection", "Close" ); |
|
|
|
$new_response->header( "Connection", "Close" ); |
|
|
|
$new_response->header( "Proxy-Connection", "Close" ); |
|
|
|
|
|
|
|
return $new_response; |
|
|
|
return $new_response; |
|
|
|
} |
|
|
|
|
|
|
|
# POE http client callback |
|
|
|
sub process_client { |
|
|
|
my ( $kernel, $heap ) = @_[ KERNEL, HEAP ]; |
|
|
|
|
|
|
|
my $http_request = $_[ARG0]->[0]; |
|
|
|
my $rec = $_[ARG0]->[1][0]; |
|
|
|
my $http_response = $_[ARG1]->[0]; |
|
|
|
my $base_url = $_[ARG0]->[1][1]; |
|
|
|
$saved_swf_url = ""; |
|
|
|
|
|
|
|
if ( $rec == 0 ) { |
|
|
|
$base_url = $http_request->uri; |
|
|
|
} |
|
|
|
else { |
|
|
|
|
|
|
|
# Check cache for each url |
|
|
|
my $redirect = redis_check_url( $http_request->uri ); |
|
|
|
if ($redirect) { |
|
|
|
_log( LOG_INFO, |
|
|
|
"Memcached redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $redirect, $heap->{remote_ip} ); |
|
|
|
my $new_response = create_response( 200, $redirect ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if ($do_reopen_log) { |
|
|
|
$do_reopen_log = 0; |
|
|
|
reopen_log(); |
|
|
|
} |
|
|
|
|
|
|
|
if ( $rec > $cfg{max_rec} ) { |
|
|
|
_log( LOG_INFO, |
|
|
|
"Max recursion exceeded: %d from %s to %s for request from: %s", |
|
|
|
$rec, $base_url, $http_request->uri, $heap->{remote_ip} ); |
|
|
|
|
|
|
|
# Write to cache |
|
|
|
redis_cache_url( $base_url, $http_request->uri ); |
|
|
|
my $new_response = create_response( 200, $http_request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
# Detect HTTP redirects |
|
|
|
if ( $http_response->is_redirect ) { |
|
|
|
my $redirect = $http_response->header('Location'); |
|
|
|
if ($redirect) { |
|
|
|
if ( $redirect =~ /^https?:\/\// ) { |
|
|
|
_log( LOG_INFO, |
|
|
|
"HTML redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $redirect, $heap->{remote_ip} ); |
|
|
|
my $request = HTTP::Request->new( 'GET', $redirect ); |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, |
|
|
|
[ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
else { |
|
|
|
_log( LOG_INFO, |
|
|
|
"ignoring internal redirect from %s to %s for request from: %s", |
|
|
|
$http_request->uri, |
|
|
|
$redirect, |
|
|
|
$heap->{remote_ip} |
|
|
|
); |
|
|
|
my $new_response = create_response( 200, $http_request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( $http_response->code != 200 ) { |
|
|
|
_log( LOG_INFO, "HTTP response was %d, for request to %s", |
|
|
|
$http_response->code, $http_request->uri ); |
|
|
|
my $new_response; |
|
|
|
if ($rec == 0) { |
|
|
|
$new_response = create_response( $http_response->code ); |
|
|
|
} |
|
|
|
else { |
|
|
|
redis_cache_url( $base_url, $http_request->uri ); |
|
|
|
$new_response = create_response( 200, $http_request->uri ); |
|
|
|
} |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
my $response_type = $http_response->content_type(); |
|
|
|
if ( $response_type =~ /^text/i ) { |
|
|
|
my $content = $http_response->decoded_content(); |
|
|
|
my $p = HTML::HeadParser->new($http_response); |
|
|
|
$p->parse($content); |
|
|
|
my $expire = $http_response->header('Refresh'); |
|
|
|
if ( $http_response->is_redirect || $expire ) { |
|
|
|
my $redirect; |
|
|
|
if ($expire) { |
|
|
|
$expire =~ /URL=(\S+)/; |
|
|
|
$redirect = $1; |
|
|
|
} |
|
|
|
else { |
|
|
|
$redirect = $http_response->header('Location'); |
|
|
|
} |
|
|
|
if ($redirect) { |
|
|
|
if ( $redirect =~ /^https?:\/\// ) { |
|
|
|
_log( LOG_INFO, |
|
|
|
"HTML redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, |
|
|
|
$redirect, |
|
|
|
$heap->{remote_ip} |
|
|
|
); |
|
|
|
my $request = HTTP::Request->new( 'GET', $redirect ); |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, |
|
|
|
[ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
else { |
|
|
|
_log( LOG_INFO, |
|
|
|
"ignoring internal redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, |
|
|
|
$redirect, |
|
|
|
$heap->{remote_ip} |
|
|
|
); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) { |
|
|
|
my $redir = uri_unescape($1); |
|
|
|
_log( LOG_INFO, "js redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $1, $heap->{remote_ip} ); |
|
|
|
my $request = HTTP::Request->new( 'GET', $redir ); |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, |
|
|
|
[ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( |
|
|
|
$with_swf |
|
|
|
&& ( |
|
|
|
$response_type eq 'application/x-shockwave-flash' |
|
|
|
|| ( $http_request->uri =~ /\.swf(\?.*)?$/i |
|
|
|
&& $http_response->code == 200 ) |
|
|
|
) |
|
|
|
) |
|
|
|
{ |
|
|
|
my $content = $http_response->decoded_content(); |
|
|
|
$swf_parser->parse($content); |
|
|
|
|
|
|
|
if ( $saved_swf_url ne "" ) { |
|
|
|
_log( LOG_INFO, "flash redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $saved_swf_url, $heap->{remote_ip} ); |
|
|
|
my $request = HTTP::Request->new( 'GET', $saved_swf_url ); |
|
|
|
|
|
|
|
# Reset swf redirect global variable |
|
|
|
$saved_swf_url = ""; |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, |
|
|
|
[ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
_log( LOG_INFO, |
|
|
|
"response wasn't text request from: %s, response is: %s", |
|
|
|
$heap->{remote_ip}, $response_type ); |
|
|
|
} |
|
|
|
|
|
|
|
_log( LOG_INFO, "redirect from %s to %s for request from: %s", |
|
|
|
$base_url, $http_request->uri, $heap->{remote_ip} ); |
|
|
|
|
|
|
|
# Write to cache |
|
|
|
redis_cache_url( $base_url, $http_request->uri ); |
|
|
|
my $new_response = |
|
|
|
create_response( $http_response->code, $http_request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
my ( $kernel, $heap ) = @_[ KERNEL, HEAP ]; |
|
|
|
|
|
|
|
my $http_request = $_[ARG0]->[0]; |
|
|
|
my $rec = $_[ARG0]->[1][0]; |
|
|
|
my $http_response = $_[ARG1]->[0]; |
|
|
|
my $base_url = $_[ARG0]->[1][1]; |
|
|
|
$saved_swf_url = ""; |
|
|
|
|
|
|
|
if ( $rec == 0 ) { |
|
|
|
$base_url = $http_request->uri; |
|
|
|
} |
|
|
|
else { |
|
|
|
|
|
|
|
# Check cache for each url |
|
|
|
my $redirect = redis_check_url( $http_request->uri ); |
|
|
|
if ($redirect) { |
|
|
|
_log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $redirect, $heap->{remote_ip} ); |
|
|
|
my $new_response = create_response( 200, $redirect ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if ($do_reopen_log) { |
|
|
|
$do_reopen_log = 0; |
|
|
|
reopen_log(); |
|
|
|
} |
|
|
|
|
|
|
|
if ( $rec > $cfg{max_rec} ) { |
|
|
|
_log( LOG_INFO, "Max recursion exceeded: %d from %s to %s for request from: %s", |
|
|
|
$rec, $base_url, $http_request->uri, $heap->{remote_ip} ); |
|
|
|
|
|
|
|
# Write to cache |
|
|
|
redis_cache_url( $base_url, $http_request->uri ); |
|
|
|
my $new_response = create_response( 200, $http_request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
# Detect HTTP redirects |
|
|
|
if ( $http_response->is_redirect ) { |
|
|
|
my $redirect = $http_response->header('Location'); |
|
|
|
if ($redirect) { |
|
|
|
if ( $redirect =~ /^https?:\/\// ) { |
|
|
|
_log( LOG_INFO, "HTML redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $redirect, $heap->{remote_ip} ); |
|
|
|
my $request = HTTP::Request->new( 'GET', $redirect ); |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
else { |
|
|
|
_log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", |
|
|
|
$http_request->uri, $redirect, $heap->{remote_ip} ); |
|
|
|
my $new_response = create_response( 200, $http_request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( $http_response->code != 200 ) { |
|
|
|
_log( LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri ); |
|
|
|
my $new_response; |
|
|
|
if ( $rec == 0 ) { |
|
|
|
$new_response = create_response( $http_response->code ); |
|
|
|
} |
|
|
|
else { |
|
|
|
redis_cache_url( $base_url, $http_request->uri ); |
|
|
|
$new_response = create_response( 200, $http_request->uri ); |
|
|
|
} |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
my $response_type = $http_response->content_type(); |
|
|
|
if ( $response_type =~ /^text/i ) { |
|
|
|
my $content = $http_response->decoded_content(); |
|
|
|
my $p = HTML::HeadParser->new($http_response); |
|
|
|
$p->parse($content); |
|
|
|
my $expire = $http_response->header('Refresh'); |
|
|
|
if ( $http_response->is_redirect || $expire ) { |
|
|
|
my $redirect; |
|
|
|
if ($expire) { |
|
|
|
$expire =~ /URL=(\S+)/; |
|
|
|
$redirect = $1; |
|
|
|
} |
|
|
|
else { |
|
|
|
$redirect = $http_response->header('Location'); |
|
|
|
} |
|
|
|
if ($redirect) { |
|
|
|
if ( $redirect =~ /^https?:\/\// ) { |
|
|
|
_log( LOG_INFO, "HTML redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $redirect, $heap->{remote_ip} ); |
|
|
|
my $request = HTTP::Request->new( 'GET', $redirect ); |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
else { |
|
|
|
_log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $redirect, $heap->{remote_ip} ); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) { |
|
|
|
my $redir = uri_unescape($1); |
|
|
|
_log( LOG_INFO, "js redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $1, $heap->{remote_ip} ); |
|
|
|
my $request = HTTP::Request->new( 'GET', $redir ); |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
elsif ( |
|
|
|
$with_swf |
|
|
|
&& ( |
|
|
|
$response_type eq 'application/x-shockwave-flash' |
|
|
|
|| ( $http_request->uri =~ /\.swf(\?.*)?$/i |
|
|
|
&& $http_response->code == 200 ) |
|
|
|
) |
|
|
|
) |
|
|
|
{ |
|
|
|
my $content = $http_response->decoded_content(); |
|
|
|
$swf_parser->parse($content); |
|
|
|
|
|
|
|
if ( $saved_swf_url ne "" ) { |
|
|
|
_log( LOG_INFO, "flash redirect from %s to %s for request from: %s", |
|
|
|
$http_response->base, $saved_swf_url, $heap->{remote_ip} ); |
|
|
|
my $request = HTTP::Request->new( 'GET', $saved_swf_url ); |
|
|
|
|
|
|
|
# Reset swf redirect global variable |
|
|
|
$saved_swf_url = ""; |
|
|
|
$request->header( "Connection", "close" ); |
|
|
|
$request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
else { |
|
|
|
_log( LOG_INFO, "response wasn't text request from: %s, response is: %s", $heap->{remote_ip}, $response_type ); |
|
|
|
} |
|
|
|
|
|
|
|
_log( LOG_INFO, "redirect from %s to %s for request from: %s", $base_url, $http_request->uri, $heap->{remote_ip} ); |
|
|
|
|
|
|
|
# Write to cache |
|
|
|
redis_cache_url( $base_url, $http_request->uri ); |
|
|
|
my $new_response = create_response( $http_response->code, $http_request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
sub process_input { |
|
|
|
my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ]; |
|
|
|
|
|
|
|
if ( $request->isa("HTTP::Response") ) { |
|
|
|
$heap->{client}->put($request); |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
my $domain; |
|
|
|
if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) { |
|
|
|
my @parts = split( /\./, $1 ); |
|
|
|
my $c1 = pop @parts; |
|
|
|
my $c2 = pop @parts; |
|
|
|
$domain = "$c2.$c1"; |
|
|
|
} |
|
|
|
|
|
|
|
if ( |
|
|
|
( |
|
|
|
defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp} |
|
|
|
) |
|
|
|
|| ( defined( $cfg{check_domains} ) |
|
|
|
&& scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 ) |
|
|
|
) |
|
|
|
{ |
|
|
|
my $new_response = create_response( 200, $request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
# Check cache first |
|
|
|
my $redirect = redis_check_url( $request->uri ); |
|
|
|
if ($redirect) { |
|
|
|
_log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", |
|
|
|
$request->uri, $redirect, $heap->{remote_ip} ); |
|
|
|
my $new_response = create_response( 200, $redirect ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
# Start http request |
|
|
|
my $new_request = HTTP::Request->new( 'GET', $request->uri ); |
|
|
|
$new_request->header( "Connection", "close" ); |
|
|
|
$new_request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] ); |
|
|
|
my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ]; |
|
|
|
|
|
|
|
if ( $request->isa("HTTP::Response") ) { |
|
|
|
$heap->{client}->put($request); |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
my $domain; |
|
|
|
if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) { |
|
|
|
my @parts = split( /\./, $1 ); |
|
|
|
my $c1 = pop @parts; |
|
|
|
my $c2 = pop @parts; |
|
|
|
$domain = "$c2.$c1"; |
|
|
|
} |
|
|
|
|
|
|
|
if ( |
|
|
|
( defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp} ) |
|
|
|
|| ( defined( $cfg{check_domains} ) |
|
|
|
&& scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 ) |
|
|
|
) |
|
|
|
{ |
|
|
|
my $new_response = create_response( 200, $request->uri ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
# Check cache first |
|
|
|
my $redirect = redis_check_url( $request->uri ); |
|
|
|
if ($redirect) { |
|
|
|
_log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", |
|
|
|
$request->uri, $redirect, $heap->{remote_ip} ); |
|
|
|
my $new_response = create_response( 200, $redirect ); |
|
|
|
|
|
|
|
# Avoid sending the response if the client has gone away. |
|
|
|
$heap->{client}->put($new_response) if defined $heap->{client}; |
|
|
|
$kernel->yield("shutdown"); |
|
|
|
|
|
|
|
# Shut down the client's connection when the response is sent. |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
# Start http request |
|
|
|
my $new_request = HTTP::Request->new( 'GET', $request->uri ); |
|
|
|
$new_request->header( "Connection", "close" ); |
|
|
|
$new_request->header( "Proxy-Connection", "close" ); |
|
|
|
$kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] ); |
|
|
|
} |
|
|
|
|
|
|
|
sub sig_DIE { |
|
|
|
my ( $sig, $ex ) = @_[ ARG0, ARG1 ]; |
|
|
|
_log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" ); |
|
|
|
$poe_kernel->sig_handled(); |
|
|
|
|
|
|
|
# Send the signal to session that sent the original event. |
|
|
|
if ( $ex->{source_session} ne $_[SESSION] ) { |
|
|
|
$poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex ); |
|
|
|
} |
|
|
|
my ( $sig, $ex ) = @_[ ARG0, ARG1 ]; |
|
|
|
_log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" ); |
|
|
|
$poe_kernel->sig_handled(); |
|
|
|
|
|
|
|
# Send the signal to session that sent the original event. |
|
|
|
if ( $ex->{source_session} ne $_[SESSION] ) { |
|
|
|
$poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex ); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
sub sig_CLD { |
|
|
|
my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ]; |
|
|
|
return 0; |
|
|
|
my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ]; |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
############################### Main code fragment ################################## |
|
|
|
|
|
|
|
# Do daemonization |
|
|
|
if ( !$DEBUG ) { |
|
|
|
Proc::Daemon::Init; |
|
|
|
POE::Kernel->has_forked; |
|
|
|
setlogsock('unix'); |
|
|
|
openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} ); |
|
|
|
Proc::Daemon::Init; |
|
|
|
POE::Kernel->has_forked; |
|
|
|
setlogsock('unix'); |
|
|
|
openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} ); |
|
|
|
} |
|
|
|
|
|
|
|
# Try to eval config file |
|
|
|
if ( -f $cfg{cfg_file} ) { |
|
|
|
my $config = read_file( $cfg{cfg_file} ); |
|
|
|
eval $config; |
|
|
|
my $config = read_file( $cfg{cfg_file} ); |
|
|
|
eval $config; |
|
|
|
} |
|
|
|
|
|
|
|
_log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" ) |
|
|
@@ -528,18 +502,18 @@ _log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" ) |
|
|
|
|
|
|
|
# Drop privilleges |
|
|
|
if ( $> == 0 ) { |
|
|
|
my $uid = getpwnam( $cfg{user} ) |
|
|
|
or _log( LOG_ALERT, "user $cfg{user} unknown" ); |
|
|
|
my $gid = getgrnam( $cfg{group} ) |
|
|
|
or _log( LOG_ALERT, "group $cfg{group} unknown" ); |
|
|
|
$< = $> = $uid; |
|
|
|
$) = $( = $gid; |
|
|
|
my $uid = getpwnam( $cfg{user} ) |
|
|
|
or _log( LOG_ALERT, "user $cfg{user} unknown" ); |
|
|
|
my $gid = getgrnam( $cfg{group} ) |
|
|
|
or _log( LOG_ALERT, "group $cfg{group} unknown" ); |
|
|
|
$< = $> = $uid; |
|
|
|
$) = $( = $gid; |
|
|
|
} |
|
|
|
|
|
|
|
if ( !$DEBUG ) { |
|
|
|
_log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" ) |
|
|
|
if !open( PID, "> $cfg{pidfile}" ); |
|
|
|
close(PID); |
|
|
|
_log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" ) |
|
|
|
if !open( PID, "> $cfg{pidfile}" ); |
|
|
|
close(PID); |
|
|
|
} |
|
|
|
|
|
|
|
# Reopen log on SIGUSR1 |
|
|
@@ -551,40 +525,40 @@ $SIG{QUIT} = sub { $poe_kernel->stop(); }; |
|
|
|
$SIG{PIPE} = 'IGNORE'; |
|
|
|
|
|
|
|
if ( !$DEBUG ) { |
|
|
|
Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ ) |
|
|
|
or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" ); |
|
|
|
Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ ) |
|
|
|
or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" ); |
|
|
|
} |
|
|
|
|
|
|
|
# Init redis connection |
|
|
|
_log( LOG_INFO, "Starting redis connection" ); |
|
|
|
$redis_conn = Redis::Fast->new( |
|
|
|
server => $cfg{redis_server}, |
|
|
|
reconnect => 60, |
|
|
|
every => 500_000, |
|
|
|
encoding => undef, |
|
|
|
server => $cfg{redis_server}, |
|
|
|
reconnect => 60, |
|
|
|
every => 500_000, |
|
|
|
encoding => undef, |
|
|
|
); |
|
|
|
|
|
|
|
# POE part |
|
|
|
POE::Component::Client::HTTP->spawn( |
|
|
|
Alias => 'cl', |
|
|
|
MaxSize => $cfg{max_size}, # Remove for unlimited page sizes |
|
|
|
Timeout => $cfg{http_timeout}, |
|
|
|
ConnectionManager => POE::Component::Client::Keepalive->new( |
|
|
|
max_per_host => 256, |
|
|
|
max_open => 1024, |
|
|
|
keep_alive => 1, |
|
|
|
timeout => $cfg{http_timeout}, |
|
|
|
), |
|
|
|
Alias => 'cl', |
|
|
|
MaxSize => $cfg{max_size}, # Remove for unlimited page sizes |
|
|
|
Timeout => $cfg{http_timeout}, |
|
|
|
ConnectionManager => POE::Component::Client::Keepalive->new( |
|
|
|
max_per_host => 256, |
|
|
|
max_open => 1024, |
|
|
|
keep_alive => 1, |
|
|
|
timeout => $cfg{http_timeout}, |
|
|
|
), |
|
|
|
); |
|
|
|
|
|
|
|
_log( LOG_INFO, "Starting HTTP server" ); |
|
|
|
POE::Component::Server::TCP->new( |
|
|
|
Alias => "", |
|
|
|
Port => $cfg{port}, |
|
|
|
ClientFilter => 'POE::Filter::HTTPD', |
|
|
|
Alias => "", |
|
|
|
Port => $cfg{port}, |
|
|
|
ClientFilter => 'POE::Filter::HTTPD', |
|
|
|
|
|
|
|
ClientInput => \&process_input, |
|
|
|
InlineStates => { got_response => \&process_client, }, |
|
|
|
ClientInput => \&process_input, |
|
|
|
InlineStates => { got_response => \&process_client, }, |
|
|
|
); |
|
|
|
|
|
|
|
swf_init_parser(); |
|
|
@@ -597,6 +571,6 @@ exit 0; |
|
|
|
############################## Final block #################################### |
|
|
|
|
|
|
|
END { |
|
|
|
_log( LOG_NOTICE, 'redirector stopped' ); |
|
|
|
closelog(); |
|
|
|
_log( LOG_NOTICE, 'redirector stopped' ); |
|
|
|
closelog(); |
|
|
|
} |