diff options
Diffstat (limited to 'utils')
-rw-r--r-- | utils/cgp_rspamd.pl | 452 | ||||
-rwxr-xr-x | utils/redirector.pl.in | 850 |
2 files changed, 637 insertions, 665 deletions
diff --git a/utils/cgp_rspamd.pl b/utils/cgp_rspamd.pl index e55ac5791..1d4d4406f 100644 --- a/utils/cgp_rspamd.pl +++ b/utils/cgp_rspamd.pl @@ -21,278 +21,276 @@ my $request_timeout = 15; # 15 seconds by default my $reject_message = "Spam message rejected"; GetOptions( - "host=s" => \$rspamd_host, - "header=s" => \$header, - "reject-message=s" => \$reject_message, - "max-size=i" => \$max_size, - "timeout=f" => \$request_timeout, - "help|?" => \$help, - "man" => \$man + "host=s" => \$rspamd_host, + "header=s" => \$header, + "reject-message=s" => \$reject_message, + "max-size=i" => \$max_size, + "timeout=f" => \$request_timeout, + "help|?" => \$help, + "man" => \$man ) or pod2usage(2); pod2usage(1) if $help; pod2usage( -exitval => 0, -verbose => 2 ) if $man; my $main_domain = cgp_main_domain(); -my $scanned = 0; +my $scanned = 0; # Turn off bufferization as required by CGP $| = 1; sub cgp_main_domain { - if ( open(my $fh, 'Settings/Main.settings') ) { - while (<$fh>) { - if ( /^\s+DomainName\s+=\s+([^;]+);/ ) { - return $1; - } + if ( open( my $fh, 'Settings/Main.settings' ) ) { + while (<$fh>) { + if (/^\s+DomainName\s+=\s+([^;]+);/) { + return $1; + } + } } - } } sub cgp_string { - my ($in) = @_; + my ($in) = @_; - $in =~ s/\"/\\"/g; - $in =~ s/\n/\\n/gms; - $in =~ s/\r/\\r/mgs; - $in =~ s/\t/ /g; + $in =~ s/\"/\\"/g; + $in =~ s/\n/\\n/gms; + $in =~ s/\r/\\r/mgs; + $in =~ s/\t/ /g; - return "\"$in\""; + return "\"$in\""; } sub rspamd_scan { - my ( $tag, $file ) = @_; - - my $http_callback = sub { - my ( $body, $hdr ) = @_; - - if ( $hdr && $hdr->{Status} =~ /^2/ ) { - my $js = eval('decode_json($body)'); - $scanned++; - - if ( !$js ) { - print "* Rspamd: Bad response for $file: invalid JSON: parse error\n"; - print "$tag FAILURE\n"; - } - else { - my $def = $js; - my $headers = ""; - - if ( !$def ) { - print -"* Rspamd: Bad response for $file: invalid JSON: default is missing\n"; - print "$tag FAILURE\n"; - } - else { - my $action = $def->{'action'}; - my $id = $js->{'message-id'}; - - my $symbols = ""; - while ( my ( $k, $s ) = each( %{$def->{'symbols'}}) ) { - $symbols .= sprintf "%s(%.2f);", $k, $s->{'score'}; - } - - printf -"* Rspamd: Scanned %s; id: <%s>; Score: %.2f / %.2f; Symbols: [%s]\n", - $file, $id, $def->{'score'}, $def->{'required_score'}, $symbols; - - if ( $js->{'dkim-signature'} ) { - $headers .= "DKIM-Signature: " . $js->{'dkim-signature'}; - } - - if ( $js->{'milter'} ) { - my $block = $js->{'milter'}; - - if ( $block->{'add_headers'} ) { - while ( my ( $h, $v ) = each( %{ $block->{'add_headers'} } ) ) { - if (ref($v) eq 'HASH') { - if ($headers eq "") { - $headers .= "$h: $v->{value}"; - } - else { - $headers .= "\\e$h: $v->{value}"; - } + my ( $tag, $file ) = @_; + + my $http_callback = sub { + my ( $body, $hdr ) = @_; + + if ( $hdr && $hdr->{Status} =~ /^2/ ) { + my $js = eval('decode_json($body)'); + $scanned++; + + if ( !$js ) { + print "* Rspamd: Bad response for $file: invalid JSON: parse error\n"; + print "$tag FAILURE\n"; + } + else { + my $def = $js; + my $headers = ""; + + if ( !$def ) { + print "* Rspamd: Bad response for $file: invalid JSON: default is missing\n"; + print "$tag FAILURE\n"; } else { - if ($headers eq "") { - $headers .= "$h: $v"; - } - else { - $headers .= "\\e$h: $v"; - } + my $action = $def->{'action'}; + my $id = $js->{'message-id'}; + + my $symbols = ""; + while ( my ( $k, $s ) = each( %{ $def->{'symbols'} } ) ) { + $symbols .= sprintf "%s(%.2f);", $k, $s->{'score'}; + } + + printf + "* Rspamd: Scanned %s; id: <%s>; Score: %.2f / %.2f; Symbols: [%s]\n", + $file, $id, $def->{'score'}, $def->{'required_score'}, $symbols; + + if ( $js->{'dkim-signature'} ) { + $headers .= "DKIM-Signature: " . $js->{'dkim-signature'}; + } + + if ( $js->{'milter'} ) { + my $block = $js->{'milter'}; + + if ( $block->{'add_headers'} ) { + while ( my ( $h, $v ) = each( %{ $block->{'add_headers'} } ) ) { + if ( ref($v) eq 'HASH' ) { + if ( $headers eq "" ) { + $headers .= "$h: $v->{value}"; + } + else { + $headers .= "\\e$h: $v->{value}"; + } + } + else { + if ( $headers eq "" ) { + $headers .= "$h: $v"; + } + else { + $headers .= "\\e$h: $v"; + } + } + } + } + } + + if ( $action eq 'reject' ) { + print "$tag DISCARD\n"; + return; + } + elsif ( $action eq 'add header' || $action eq 'rewrite subject' ) { + if ( $headers eq "" ) { + $headers .= "$header"; + } + else { + $headers .= "\\e$header"; + } + } + elsif ( $action eq 'soft reject' ) { + print "$tag REJECT Try again later\n"; + return; + } + + if ( $headers eq "" ) { + print "$tag OK\n"; + } + else { + print "$tag ADDHEADER " . cgp_string($headers) . " OK\n"; + } } - } } - } - - if ( $action eq 'reject' ) { - print "$tag DISCARD\n"; - return; - } - elsif ( $action eq 'add header' || $action eq 'rewrite subject' ) { - if ( $headers eq "" ) { - $headers .= "$header"; + } + else { + if ($hdr) { + print "* Rspamd: Bad response for $file: HTTP error: $hdr->{Status} $hdr->{Reason}\n"; } else { - $headers .= "\\e$header"; + print "* Rspamd: Bad response for $file: IO error: $!\n"; } - } - elsif ( $action eq 'soft reject' ) { - print "$tag REJECT Try again later\n"; - return; - } - - if ( $headers eq "" ) { - print "$tag OK\n"; - } - else { - print "$tag ADDHEADER " . cgp_string($headers) . " OK\n"; - } + print "$tag FAILURE\n"; } - } + }; + + if ($local) { + + # Use file scan + # XXX: not implemented now due to CGP queue format + http_get( + "http://$rspamd_host/symbols?file=$file", + timeout => $request_timeout, + $http_callback + ); } else { - if ($hdr) { - print -"* Rspamd: Bad response for $file: HTTP error: $hdr->{Status} $hdr->{Reason}\n"; - } - else { - print "* Rspamd: Bad response for $file: IO error: $!\n"; - } - print "$tag FAILURE\n"; - } - }; - - if ($local) { - - # Use file scan - # XXX: not implemented now due to CGP queue format - http_get( - "http://$rspamd_host/symbols?file=$file", - timeout => $request_timeout, - $http_callback - ); - } - else { - my $sb = stat($file); - - if ( !$sb || $sb->size > $max_size ) { - if ($sb) { - print "* File $file is too large: " . $sb->size . "\n$tag FAILURE\n"; - - } - else { - print "* Cannot stat $file: $!\n$tag FAILURE\n"; - } - return; - } - aio_load( - $file, - sub { - my ($data) = @_; - - if ( !$data ) { - print "* Cannot open $file: $!\n$tag FAILURE\n"; - return; - } + my $sb = stat($file); + + if ( !$sb || $sb->size > $max_size ) { + if ($sb) { + print "* File $file is too large: " . $sb->size . "\n$tag FAILURE\n"; - # Parse CGP format - $data =~ s/^((?:[^\n]*\n)*?)\n(.*)$/$2/ms; - my @envelope = split /\n/, $1; - chomp(@envelope); - my $from; - my @rcpts; - my $ip; - my $user; - - foreach my $elt (@envelope) { - if ( $elt =~ /^P\s[^<]*(<[^>]*>).*$/ ) { - $from = $1; - } - elsif ( $elt =~ /^R\s[^<]*(<[^>]*>).*$/ ) { - push @rcpts, $1; - } - elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:SMTP|HTTPU?|AIRSYNC|XIMSS) \[([0-9a-f.:]+)\]/ ) { - if ($1) { - $user = $1; - } - if ($2) { - $ip = $2; } - } - elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:DSN|GROUP|LIST|PBX|PIPE|RULE) \[0\.0\.0\.0\]/ ) { - if ($1) { - $user = $1; + else { + print "* Cannot stat $file: $!\n$tag FAILURE\n"; } - $ip = '127.2.4.7'; - } + return; } + aio_load( + $file, + sub { + my ($data) = @_; + + if ( !$data ) { + print "* Cannot open $file: $!\n$tag FAILURE\n"; + return; + } - my $headers = {}; - if ( $file =~ /\/([^\/.]+)\.msg$/ ) { - $headers->{'Queue-ID'} = $1; - } - if ($from) { - $headers->{From} = $from; - } - if ( scalar(@rcpts) > 0 ) { + # Parse CGP format + $data =~ s/^((?:[^\n]*\n)*?)\n(.*)$/$2/ms; + my @envelope = split /\n/, $1; + chomp(@envelope); + my $from; + my @rcpts; + my $ip; + my $user; + + foreach my $elt (@envelope) { + if ( $elt =~ /^P\s[^<]*(<[^>]*>).*$/ ) { + $from = $1; + } + elsif ( $elt =~ /^R\s[^<]*(<[^>]*>).*$/ ) { + push @rcpts, $1; + } + elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:SMTP|HTTPU?|AIRSYNC|XIMSS) \[([0-9a-f.:]+)\]/ ) { + if ($1) { + $user = $1; + } + if ($2) { + $ip = $2; + } + } + elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:DSN|GROUP|LIST|PBX|PIPE|RULE) \[0\.0\.0\.0\]/ ) { + if ($1) { + $user = $1; + } + $ip = '127.2.4.7'; + } + } - # XXX: Anyevent cannot parse headers with multiple values - $headers->{Rcpt} = join(',', @rcpts); - } - if ($ip) { - $headers->{IP} = $ip; - } - if ($user) { - $headers->{User} = $user; - } - if ($main_domain) { - $headers->{'MTA-Tag'} = $main_domain; - } + my $headers = {}; + if ( $file =~ /\/([^\/.]+)\.msg$/ ) { + $headers->{'Queue-ID'} = $1; + } + if ($from) { + $headers->{From} = $from; + } + if ( scalar(@rcpts) > 0 ) { - http_post( - "http://$rspamd_host/checkv2", $data, - timeout => $request_timeout, - headers => $headers, - $http_callback + # XXX: Anyevent cannot parse headers with multiple values + $headers->{Rcpt} = join( ',', @rcpts ); + } + if ($ip) { + $headers->{IP} = $ip; + } + if ($user) { + $headers->{User} = $user; + } + if ($main_domain) { + $headers->{'MTA-Tag'} = $main_domain; + } + + http_post( + "http://$rspamd_host/checkv2", $data, + timeout => $request_timeout, + headers => $headers, + $http_callback + ); + } ); - } - ); - } + } } # Show informational message print "* Rspamd CGP filter has been started\n"; my $w = AnyEvent->io( - fh => \*STDIN, - poll => 'r', - cb => sub { - chomp( my $input = <STDIN> ); - - if ( $input =~ /^(\d+)\s+(\S+)(\s+(\S+)\s*)?$/ ) { - my $tag = $1; - my $cmd = $2; - - if ( $cmd eq "INTF" ) { - print "$input\n"; - } - elsif ( $cmd eq "FILE" && $4 ) { - my $file = $4; - print "* Scanning file $file\n"; - rspamd_scan $tag, $file; - } - elsif ( $cmd eq "QUIT" ) { - print "* Terminating after scanning of $scanned files\n"; - print "$tag OK\n"; - exit 0; - } - else { - print "* Unknown command $cmd\n"; - print "$tag FAILURE\n"; - } + fh => \*STDIN, + poll => 'r', + cb => sub { + chomp( my $input = <STDIN> ); + + if ( $input =~ /^(\d+)\s+(\S+)(\s+(\S+)\s*)?$/ ) { + my $tag = $1; + my $cmd = $2; + + if ( $cmd eq "INTF" ) { + print "$input\n"; + } + elsif ( $cmd eq "FILE" && $4 ) { + my $file = $4; + print "* Scanning file $file\n"; + rspamd_scan $tag, $file; + } + elsif ( $cmd eq "QUIT" ) { + print "* Terminating after scanning of $scanned files\n"; + print "$tag OK\n"; + exit 0; + } + else { + print "* Unknown command $cmd\n"; + print "$tag FAILURE\n"; + } + } } - } ); EV::run; diff --git a/utils/redirector.pl.in b/utils/redirector.pl.in index 3bd6ab112..4fbe3844a 100755 --- a/utils/redirector.pl.in +++ b/utils/redirector.pl.in @@ -34,22 +34,22 @@ eval "require SWF::Element" or $with_swf = 0; # p5-SWF-File my $DEBUG = grep { $_ eq '-debug' } @ARGV; our %cfg = ( - port => 8080, - max_size => 102400, - http_timeout => 5, - max_rec => 5, - pidfile => '/tmp/redirector.pid', - do_log => 0, - debug => 0, - redis_server => 'localhost:6379', - - facility => LOG_LOCAL3, # syslog facility - log_level => LOG_INFO, - digest_bits => 256, - cache_expire => 3600, - user => '@RSPAMD_USER@', - group => '@RSPAMD_GROUP@', - cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf', + port => 8080, + max_size => 102400, + http_timeout => 5, + max_rec => 5, + pidfile => '/tmp/redirector.pid', + do_log => 0, + debug => 0, + redis_server => 'localhost:6379', + + facility => LOG_LOCAL3, # syslog facility + log_level => LOG_INFO, + digest_bits => 256, + cache_expire => 3600, + user => '@RSPAMD_USER@', + group => '@RSPAMD_GROUP@', + cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf', ); our $do_reopen_log = 0; @@ -59,468 +59,442 @@ our $redis_conn; # Read file into string sub read_file { - my ($file) = @_; + my ($file) = @_; - open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" ); - local $/; - my $content = <IN>; - close IN; + open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" ); + local $/; + my $content = <IN>; + close IN; - return $content; + return $content; } # Write log line: sub _log { - my ( $l, $w, @s ) = @_; - - if ($DEBUG) { - printf STDERR $w . "\n", @s; - } - else { - syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} ); - } - - if ( $l == LOG_ALERT ) { - die $w; - } + my ( $l, $w, @s ) = @_; + + if ($DEBUG) { + printf STDERR $w . "\n", @s; + } + else { + syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} ); + } + + if ( $l == LOG_ALERT ) { + die $w; + } } # Init swf parser sub swf_init_parser { - if ($with_swf) { - $swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback ); - } + if ($with_swf) { + $swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback ); + } } # Checking for SWF url sub swf_search_get_url { - my $actions = shift; - my $saved_pool_str = ""; - - for my $action (@$actions) { - if ( $action->tag_name eq 'ActionConstantPool' ) { - my $pool = $action->ConstantPool; - for my $string (@$pool) { - if ( $string =~ /^https?:\/\// ) { - $saved_pool_str = $string->value; - } - } - } - elsif ( $action->tag_name eq 'ActionGetURL2' ) { - if ( $saved_pool_str ne "" ) { - $saved_swf_url = $saved_pool_str; - } - } - elsif ( $action->tag_name =~ 'ActionGetURL' ) { - $saved_swf_url = $action->UrlString->value; - } - } + my $actions = shift; + my $saved_pool_str = ""; + + for my $action (@$actions) { + if ( $action->tag_name eq 'ActionConstantPool' ) { + my $pool = $action->ConstantPool; + for my $string (@$pool) { + if ( $string =~ /^https?:\/\// ) { + $saved_pool_str = $string->value; + } + } + } + elsif ( $action->tag_name eq 'ActionGetURL2' ) { + if ( $saved_pool_str ne "" ) { + $saved_swf_url = $saved_pool_str; + } + } + elsif ( $action->tag_name =~ 'ActionGetURL' ) { + $saved_swf_url = $action->UrlString->value; + } + } } # SWF check tag utility sub swf_check_tag { - my ( $t, $stream ) = @_; - my ($tagname) = $t->tag_name; - - for ($tagname) { - ( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do { - swf_search_get_url( $t->Actions ); - last; - }; - /^PlaceObject2$/ and do { - for my $ca ( @{ $t->ClipActions } ) { - swf_search_get_url( $ca->Actions ); - } - last; - }; - /^DefineButton2$/ and do { - for my $ba ( @{ $t->Actions } ) { - swf_search_get_url( $ba->Actions ); - } - last; - }; - /^DefineSprite$/ and do { - for my $tag ( @{ $t->ControlTags } ) { - swf_search_get_url( $tag, $stream ); - } - last; - }; - } + my ( $t, $stream ) = @_; + my ($tagname) = $t->tag_name; + + for ($tagname) { + ( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do { + swf_search_get_url( $t->Actions ); + last; + }; + /^PlaceObject2$/ and do { + for my $ca ( @{ $t->ClipActions } ) { + swf_search_get_url( $ca->Actions ); + } + last; + }; + /^DefineButton2$/ and do { + for my $ba ( @{ $t->Actions } ) { + swf_search_get_url( $ba->Actions ); + } + last; + }; + /^DefineSprite$/ and do { + for my $tag ( @{ $t->ControlTags } ) { + swf_search_get_url( $tag, $stream ); + } + last; + }; + } } # Callback for swf parser sub swf_tag_callback { - my ( $self, $tag, $length, $stream ) = @_; - my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length ); - my ($tagname) = $t->tag_name; + my ( $self, $tag, $length, $stream ) = @_; + my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length ); + my ($tagname) = $t->tag_name; - return - unless $tagname eq 'DoAction' - or $tagname eq 'DoInitAction' - or $tagname eq 'PlaceObject2' - or $tagname eq 'DefineButton' - or $tagname eq 'DefineButton2' - or $tagname eq 'DefineSprite'; + return + unless $tagname eq 'DoAction' + or $tagname eq 'DoInitAction' + or $tagname eq 'PlaceObject2' + or $tagname eq 'DefineButton' + or $tagname eq 'DefineButton2' + or $tagname eq 'DefineSprite'; - if ( $tagname eq 'DefineSprite' ) { + if ( $tagname eq 'DefineSprite' ) { - # Tags in the sprite are not unpacked here. + # Tags in the sprite are not unpacked here. - $t->shallow_unpack($stream); - $t->TagStream->parse( callback => \&swf_tag_callback ); - return; + $t->shallow_unpack($stream); + $t->TagStream->parse( callback => \&swf_tag_callback ); + return; - } - elsif ( $tagname eq 'PlaceObject2' ) { + } + elsif ( $tagname eq 'PlaceObject2' ) { - # Most of PlaceObject2 tags don't have ClipActions. + # Most of PlaceObject2 tags don't have ClipActions. - $t->lookahead_Flags($stream); - return unless $t->PlaceFlagHasClipActions; - } + $t->lookahead_Flags($stream); + return unless $t->PlaceFlagHasClipActions; + } - # unpack the tag and search actions. + # unpack the tag and search actions. - $t->unpack($stream); - swf_check_tag($t); + $t->unpack($stream); + swf_check_tag($t); } # Check url from redis cache first sub redis_check_url { - my ($url) = @_; + my ($url) = @_; - my $context = Digest->new("SHA-512"); - $context->add($url); - return $redis_conn->get( $context->digest() ); + my $context = Digest->new("SHA-512"); + $context->add($url); + return $redis_conn->get( $context->digest() ); } # Write url to redis key sub redis_cache_url { - my ( $url, $url_real ) = @_; - - if ( $url ne $url_real ) { - my $context = Digest->new("SHA-512"); - $context->add($url); - if (!$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real)) { - _log(LOG_INFO, "cannot save redirect from $url to $url_real in redis"); - } - } + my ( $url, $url_real ) = @_; + + if ( $url ne $url_real ) { + my $context = Digest->new("SHA-512"); + $context->add($url); + if ( !$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real ) ) { + _log( LOG_INFO, "cannot save redirect from $url to $url_real in redis" ); + } + } } sub create_response { - my ( $code, $uri ) = @_; + my ( $code, $uri ) = @_; - my $new_response; + my $new_response; - if ($uri) { - $new_response = HTTP::Response->new( $code, 'OK' ); - $new_response->header( "Uri", $uri ); - $new_response->content($uri); - $new_response->content_length( length($uri) ); - } - else { - $new_response = HTTP::Response->new($code); - $new_response->content_length(0); - } + if ($uri) { + $new_response = HTTP::Response->new( $code, 'OK' ); + $new_response->header( "Uri", $uri ); + $new_response->content($uri); + $new_response->content_length( length($uri) ); + } + else { + $new_response = HTTP::Response->new($code); + $new_response->content_length(0); + } - $new_response->header( "Connection", "Close" ); - $new_response->header( "Proxy-Connection", "Close" ); + $new_response->header( "Connection", "Close" ); + $new_response->header( "Proxy-Connection", "Close" ); - return $new_response; + return $new_response; } # POE http client callback sub process_client { - my ( $kernel, $heap ) = @_[ KERNEL, HEAP ]; - - my $http_request = $_[ARG0]->[0]; - my $rec = $_[ARG0]->[1][0]; - my $http_response = $_[ARG1]->[0]; - my $base_url = $_[ARG0]->[1][1]; - $saved_swf_url = ""; - - if ( $rec == 0 ) { - $base_url = $http_request->uri; - } - else { - - # Check cache for each url - my $redirect = redis_check_url( $http_request->uri ); - if ($redirect) { - _log( LOG_INFO, - "Memcached redirect from %s to %s for request from: %s", - $http_response->base, $redirect, $heap->{remote_ip} ); - my $new_response = create_response( 200, $redirect ); - - # Avoid sending the response if the client has gone away. - $heap->{client}->put($new_response) if defined $heap->{client}; - - # Shut down the client's connection when the response is sent. - return; - } - } - - if ($do_reopen_log) { - $do_reopen_log = 0; - reopen_log(); - } - - if ( $rec > $cfg{max_rec} ) { - _log( LOG_INFO, - "Max recursion exceeded: %d from %s to %s for request from: %s", - $rec, $base_url, $http_request->uri, $heap->{remote_ip} ); - - # Write to cache - redis_cache_url( $base_url, $http_request->uri ); - my $new_response = create_response( 200, $http_request->uri ); - - # Avoid sending the response if the client has gone away. - $heap->{client}->put($new_response) if defined $heap->{client}; - - # Shut down the client's connection when the response is sent. - $kernel->yield("shutdown"); - return; - } - - # Detect HTTP redirects - if ( $http_response->is_redirect ) { - my $redirect = $http_response->header('Location'); - if ($redirect) { - if ( $redirect =~ /^https?:\/\// ) { - _log( LOG_INFO, - "HTML redirect from %s to %s for request from: %s", - $http_response->base, $redirect, $heap->{remote_ip} ); - my $request = HTTP::Request->new( 'GET', $redirect ); - $request->header( "Connection", "close" ); - $request->header( "Proxy-Connection", "close" ); - $kernel->post( "cl", "request", "got_response", $request, - [ $rec + 1, $base_url ] ); - return; - } - else { - _log( LOG_INFO, -"ignoring internal redirect from %s to %s for request from: %s", - $http_request->uri, - $redirect, - $heap->{remote_ip} - ); - my $new_response = create_response( 200, $http_request->uri ); - - # Avoid sending the response if the client has gone away. - $heap->{client}->put($new_response) if defined $heap->{client}; - - # Shut down the client's connection when the response is sent. - $kernel->yield("shutdown"); - return; - } - } - } - elsif ( $http_response->code != 200 ) { - _log( LOG_INFO, "HTTP response was %d, for request to %s", - $http_response->code, $http_request->uri ); - my $new_response; - if ($rec == 0) { - $new_response = create_response( $http_response->code ); - } - else { - redis_cache_url( $base_url, $http_request->uri ); - $new_response = create_response( 200, $http_request->uri ); - } - - # Avoid sending the response if the client has gone away. - $heap->{client}->put($new_response) if defined $heap->{client}; - - # Shut down the client's connection when the response is sent. - $kernel->yield("shutdown"); - return; - } - my $response_type = $http_response->content_type(); - if ( $response_type =~ /^text/i ) { - my $content = $http_response->decoded_content(); - my $p = HTML::HeadParser->new($http_response); - $p->parse($content); - my $expire = $http_response->header('Refresh'); - if ( $http_response->is_redirect || $expire ) { - my $redirect; - if ($expire) { - $expire =~ /URL=(\S+)/; - $redirect = $1; - } - else { - $redirect = $http_response->header('Location'); - } - if ($redirect) { - if ( $redirect =~ /^https?:\/\// ) { - _log( LOG_INFO, - "HTML redirect from %s to %s for request from: %s", - $http_response->base, - $redirect, - $heap->{remote_ip} - ); - my $request = HTTP::Request->new( 'GET', $redirect ); - $request->header( "Connection", "close" ); - $request->header( "Proxy-Connection", "close" ); - $kernel->post( "cl", "request", "got_response", $request, - [ $rec + 1, $base_url ] ); - return; - } - else { - _log( LOG_INFO, -"ignoring internal redirect from %s to %s for request from: %s", - $http_response->base, - $redirect, - $heap->{remote_ip} - ); - } - } - } - if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) { - my $redir = uri_unescape($1); - _log( LOG_INFO, "js redirect from %s to %s for request from: %s", - $http_response->base, $1, $heap->{remote_ip} ); - my $request = HTTP::Request->new( 'GET', $redir ); - $request->header( "Connection", "close" ); - $request->header( "Proxy-Connection", "close" ); - $kernel->post( "cl", "request", "got_response", $request, - [ $rec + 1, $base_url ] ); - return; - } - } - elsif ( - $with_swf - && ( - $response_type eq 'application/x-shockwave-flash' - || ( $http_request->uri =~ /\.swf(\?.*)?$/i - && $http_response->code == 200 ) - ) - ) - { - my $content = $http_response->decoded_content(); - $swf_parser->parse($content); - - if ( $saved_swf_url ne "" ) { - _log( LOG_INFO, "flash redirect from %s to %s for request from: %s", - $http_response->base, $saved_swf_url, $heap->{remote_ip} ); - my $request = HTTP::Request->new( 'GET', $saved_swf_url ); - - # Reset swf redirect global variable - $saved_swf_url = ""; - $request->header( "Connection", "close" ); - $request->header( "Proxy-Connection", "close" ); - $kernel->post( "cl", "request", "got_response", $request, - [ $rec + 1, $base_url ] ); - return; - } - } - else { - _log( LOG_INFO, - "response wasn't text request from: %s, response is: %s", - $heap->{remote_ip}, $response_type ); - } - - _log( LOG_INFO, "redirect from %s to %s for request from: %s", - $base_url, $http_request->uri, $heap->{remote_ip} ); - - # Write to cache - redis_cache_url( $base_url, $http_request->uri ); - my $new_response = - create_response( $http_response->code, $http_request->uri ); - - # Avoid sending the response if the client has gone away. - $heap->{client}->put($new_response) if defined $heap->{client}; - - # Shut down the client's connection when the response is sent. - $kernel->yield("shutdown"); + my ( $kernel, $heap ) = @_[ KERNEL, HEAP ]; + + my $http_request = $_[ARG0]->[0]; + my $rec = $_[ARG0]->[1][0]; + my $http_response = $_[ARG1]->[0]; + my $base_url = $_[ARG0]->[1][1]; + $saved_swf_url = ""; + + if ( $rec == 0 ) { + $base_url = $http_request->uri; + } + else { + + # Check cache for each url + my $redirect = redis_check_url( $http_request->uri ); + if ($redirect) { + _log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", + $http_response->base, $redirect, $heap->{remote_ip} ); + my $new_response = create_response( 200, $redirect ); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + return; + } + } + + if ($do_reopen_log) { + $do_reopen_log = 0; + reopen_log(); + } + + if ( $rec > $cfg{max_rec} ) { + _log( LOG_INFO, "Max recursion exceeded: %d from %s to %s for request from: %s", + $rec, $base_url, $http_request->uri, $heap->{remote_ip} ); + + # Write to cache + redis_cache_url( $base_url, $http_request->uri ); + my $new_response = create_response( 200, $http_request->uri ); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); + return; + } + + # Detect HTTP redirects + if ( $http_response->is_redirect ) { + my $redirect = $http_response->header('Location'); + if ($redirect) { + if ( $redirect =~ /^https?:\/\// ) { + _log( LOG_INFO, "HTML redirect from %s to %s for request from: %s", + $http_response->base, $redirect, $heap->{remote_ip} ); + my $request = HTTP::Request->new( 'GET', $redirect ); + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); + return; + } + else { + _log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", + $http_request->uri, $redirect, $heap->{remote_ip} ); + my $new_response = create_response( 200, $http_request->uri ); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); + return; + } + } + } + elsif ( $http_response->code != 200 ) { + _log( LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri ); + my $new_response; + if ( $rec == 0 ) { + $new_response = create_response( $http_response->code ); + } + else { + redis_cache_url( $base_url, $http_request->uri ); + $new_response = create_response( 200, $http_request->uri ); + } + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); + return; + } + my $response_type = $http_response->content_type(); + if ( $response_type =~ /^text/i ) { + my $content = $http_response->decoded_content(); + my $p = HTML::HeadParser->new($http_response); + $p->parse($content); + my $expire = $http_response->header('Refresh'); + if ( $http_response->is_redirect || $expire ) { + my $redirect; + if ($expire) { + $expire =~ /URL=(\S+)/; + $redirect = $1; + } + else { + $redirect = $http_response->header('Location'); + } + if ($redirect) { + if ( $redirect =~ /^https?:\/\// ) { + _log( LOG_INFO, "HTML redirect from %s to %s for request from: %s", + $http_response->base, $redirect, $heap->{remote_ip} ); + my $request = HTTP::Request->new( 'GET', $redirect ); + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); + return; + } + else { + _log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", + $http_response->base, $redirect, $heap->{remote_ip} ); + } + } + } + if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) { + my $redir = uri_unescape($1); + _log( LOG_INFO, "js redirect from %s to %s for request from: %s", + $http_response->base, $1, $heap->{remote_ip} ); + my $request = HTTP::Request->new( 'GET', $redir ); + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); + return; + } + } + elsif ( + $with_swf + && ( + $response_type eq 'application/x-shockwave-flash' + || ( $http_request->uri =~ /\.swf(\?.*)?$/i + && $http_response->code == 200 ) + ) + ) + { + my $content = $http_response->decoded_content(); + $swf_parser->parse($content); + + if ( $saved_swf_url ne "" ) { + _log( LOG_INFO, "flash redirect from %s to %s for request from: %s", + $http_response->base, $saved_swf_url, $heap->{remote_ip} ); + my $request = HTTP::Request->new( 'GET', $saved_swf_url ); + + # Reset swf redirect global variable + $saved_swf_url = ""; + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] ); + return; + } + } + else { + _log( LOG_INFO, "response wasn't text request from: %s, response is: %s", $heap->{remote_ip}, $response_type ); + } + + _log( LOG_INFO, "redirect from %s to %s for request from: %s", $base_url, $http_request->uri, $heap->{remote_ip} ); + + # Write to cache + redis_cache_url( $base_url, $http_request->uri ); + my $new_response = create_response( $http_response->code, $http_request->uri ); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); } sub process_input { - my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ]; - - if ( $request->isa("HTTP::Response") ) { - $heap->{client}->put($request); - $kernel->yield("shutdown"); - return; - } - - my $domain; - if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) { - my @parts = split( /\./, $1 ); - my $c1 = pop @parts; - my $c2 = pop @parts; - $domain = "$c2.$c1"; - } - - if ( - ( - defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp} - ) - || ( defined( $cfg{check_domains} ) - && scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 ) - ) - { - my $new_response = create_response( 200, $request->uri ); - - # Avoid sending the response if the client has gone away. - $heap->{client}->put($new_response) if defined $heap->{client}; - $kernel->yield("shutdown"); - - # Shut down the client's connection when the response is sent. - return; - } - - # Check cache first - my $redirect = redis_check_url( $request->uri ); - if ($redirect) { - _log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", - $request->uri, $redirect, $heap->{remote_ip} ); - my $new_response = create_response( 200, $redirect ); - - # Avoid sending the response if the client has gone away. - $heap->{client}->put($new_response) if defined $heap->{client}; - $kernel->yield("shutdown"); - - # Shut down the client's connection when the response is sent. - return; - } - - # Start http request - my $new_request = HTTP::Request->new( 'GET', $request->uri ); - $new_request->header( "Connection", "close" ); - $new_request->header( "Proxy-Connection", "close" ); - $kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] ); + my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ]; + + if ( $request->isa("HTTP::Response") ) { + $heap->{client}->put($request); + $kernel->yield("shutdown"); + return; + } + + my $domain; + if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) { + my @parts = split( /\./, $1 ); + my $c1 = pop @parts; + my $c2 = pop @parts; + $domain = "$c2.$c1"; + } + + if ( + ( defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp} ) + || ( defined( $cfg{check_domains} ) + && scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 ) + ) + { + my $new_response = create_response( 200, $request->uri ); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + $kernel->yield("shutdown"); + + # Shut down the client's connection when the response is sent. + return; + } + + # Check cache first + my $redirect = redis_check_url( $request->uri ); + if ($redirect) { + _log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s", + $request->uri, $redirect, $heap->{remote_ip} ); + my $new_response = create_response( 200, $redirect ); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + $kernel->yield("shutdown"); + + # Shut down the client's connection when the response is sent. + return; + } + + # Start http request + my $new_request = HTTP::Request->new( 'GET', $request->uri ); + $new_request->header( "Connection", "close" ); + $new_request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] ); } sub sig_DIE { - my ( $sig, $ex ) = @_[ ARG0, ARG1 ]; - _log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" ); - $poe_kernel->sig_handled(); - - # Send the signal to session that sent the original event. - if ( $ex->{source_session} ne $_[SESSION] ) { - $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex ); - } + my ( $sig, $ex ) = @_[ ARG0, ARG1 ]; + _log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" ); + $poe_kernel->sig_handled(); + + # Send the signal to session that sent the original event. + if ( $ex->{source_session} ne $_[SESSION] ) { + $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex ); + } } sub sig_CLD { - my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ]; - return 0; + my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ]; + return 0; } ############################### Main code fragment ################################## # Do daemonization if ( !$DEBUG ) { - Proc::Daemon::Init; - POE::Kernel->has_forked; - setlogsock('unix'); - openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} ); + Proc::Daemon::Init; + POE::Kernel->has_forked; + setlogsock('unix'); + openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} ); } # Try to eval config file if ( -f $cfg{cfg_file} ) { - my $config = read_file( $cfg{cfg_file} ); - eval $config; + my $config = read_file( $cfg{cfg_file} ); + eval $config; } _log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" ) @@ -528,18 +502,18 @@ _log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" ) # Drop privilleges if ( $> == 0 ) { - my $uid = getpwnam( $cfg{user} ) - or _log( LOG_ALERT, "user $cfg{user} unknown" ); - my $gid = getgrnam( $cfg{group} ) - or _log( LOG_ALERT, "group $cfg{group} unknown" ); - $< = $> = $uid; - $) = $( = $gid; + my $uid = getpwnam( $cfg{user} ) + or _log( LOG_ALERT, "user $cfg{user} unknown" ); + my $gid = getgrnam( $cfg{group} ) + or _log( LOG_ALERT, "group $cfg{group} unknown" ); + $< = $> = $uid; + $) = $( = $gid; } if ( !$DEBUG ) { - _log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" ) - if !open( PID, "> $cfg{pidfile}" ); - close(PID); + _log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" ) + if !open( PID, "> $cfg{pidfile}" ); + close(PID); } # Reopen log on SIGUSR1 @@ -551,40 +525,40 @@ $SIG{QUIT} = sub { $poe_kernel->stop(); }; $SIG{PIPE} = 'IGNORE'; if ( !$DEBUG ) { - Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ ) - or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" ); + Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ ) + or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" ); } # Init redis connection _log( LOG_INFO, "Starting redis connection" ); $redis_conn = Redis::Fast->new( - server => $cfg{redis_server}, - reconnect => 60, - every => 500_000, - encoding => undef, + server => $cfg{redis_server}, + reconnect => 60, + every => 500_000, + encoding => undef, ); # POE part POE::Component::Client::HTTP->spawn( - Alias => 'cl', - MaxSize => $cfg{max_size}, # Remove for unlimited page sizes - Timeout => $cfg{http_timeout}, - ConnectionManager => POE::Component::Client::Keepalive->new( - max_per_host => 256, - max_open => 1024, - keep_alive => 1, - timeout => $cfg{http_timeout}, - ), + Alias => 'cl', + MaxSize => $cfg{max_size}, # Remove for unlimited page sizes + Timeout => $cfg{http_timeout}, + ConnectionManager => POE::Component::Client::Keepalive->new( + max_per_host => 256, + max_open => 1024, + keep_alive => 1, + timeout => $cfg{http_timeout}, + ), ); _log( LOG_INFO, "Starting HTTP server" ); POE::Component::Server::TCP->new( - Alias => "", - Port => $cfg{port}, - ClientFilter => 'POE::Filter::HTTPD', + Alias => "", + Port => $cfg{port}, + ClientFilter => 'POE::Filter::HTTPD', - ClientInput => \&process_input, - InlineStates => { got_response => \&process_client, }, + ClientInput => \&process_input, + InlineStates => { got_response => \&process_client, }, ); swf_init_parser(); @@ -597,6 +571,6 @@ exit 0; ############################## Final block #################################### END { - _log( LOG_NOTICE, 'redirector stopped' ); - closelog(); + _log( LOG_NOTICE, 'redirector stopped' ); + closelog(); } |