123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521 |
- #!/usr/bin/perl
-
- use warnings;
- use strict;
-
- # Required ports:
- # www/p5-POE-Component-Client-HTTP
- # www/p5-POE-Component-Server-HTTP
- # dns/p5-POE-Component-Client-DNS
- # databases/p5-Cache-Memcached-Fast
- # devel/p5-Proc-Daemon
- # sysutils/p5-Proc-PidUtil
- # security/p5-Digest-SHA256
-
- # POE::Component::Client::HTTP uses HTTP::Request and response
- # objects.
-
- use POSIX qw(strftime);
- use HTTP::Request::Common qw(GET POST);
-
- use POE qw(Component::Server::TCP Filter::HTTPD Component::Client::HTTP); # p5-POE-Component-Client-HTTP
- use HTTP::Response;
- use HTML::HeadParser;
- use SWF::Element; # p5-SWF-File
- use Cache::Memcached::Fast;
- use Digest::SHA256;
-
- use HTML::HeadParser;
- use Proc::Daemon;
- use Proc::PidUtil;
- use URI::Escape qw(uri_unescape);
- use Sys::Syslog qw/:standard :macros setlogsock/;
-
- my $swf_parser;
- my $saved_swf_url = "";
-
- my $DEBUG = grep { $_ eq '-debug' } @ARGV;
-
- our %cfg = (
- port => 8080,
- max_size => 102400,
- http_timeout => 5,
- max_rec => 5,
- pidfile => '/var/run/rspamd/redirector.pid',
- logfile => '/var/log/rspamd-redirector.log',
- do_log => 0,
- debug => 0,
- memcached_servers => [ { address => 'localhost:11211', weight => 2.5 },
- ],
-
- facility => LOG_LOCAL3, # syslog facility
- log_level => LOG_INFO,
- digest_bits => 256,
- cache_expire => 3600,
- user => '@RSPAMD_USER@',
- group => '@RSPAMD_GROUP@',
- cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf',
- );
-
- our $do_reopen_log = 0;
- our $memd;
-
- ############################################ Subs ########################################
-
- # Read file into string
- sub read_file {
- my ($file) = @_;
-
- open(IN, $file) or die "Can't open $file: $!";
- local $/;
- my $content = <IN>;
- close IN;
-
- return $content;
- }
-
- # Write log line:
- sub _log {
- my ($l,$w,@s)=@_;
-
- if ($DEBUG) {
- printf STDERR $w."\n", @s
- } else {
- syslog ($l, $w."\n", @s) if ($l <= $cfg{'log_level'})
- }
- }
-
- # Init swf parser
- sub swf_init_parser {
- $swf_parser = SWF::Parser->new('tag-callback' => \&swf_tag_callback);
- }
-
- # Checking for SWF url
- sub swf_search_get_url {
- my $actions = shift;
- my $saved_pool_str = "";
-
- for my $action (@$actions) {
- if ($action->tag_name eq 'ActionConstantPool') {
- my $pool = $action->ConstantPool;
- for my $string (@$pool) {
- if ($string =~ /^https?:\/\//) {
- $saved_pool_str = $string->value;
- }
- }
- }
- elsif ($action->tag_name eq 'ActionGetURL2') {
- if ($saved_pool_str ne "") {
- $saved_swf_url = $saved_pool_str;
- }
- }
- elsif ($action->tag_name =~ 'ActionGetURL') {
- $saved_swf_url = $action->UrlString->value;
- }
- }
- }
-
- # SWF check tag utility
- sub swf_check_tag {
- my ($t, $stream) = @_;
- my ($tagname) = $t->tag_name;
-
- for ($tagname) {
- (/^Do(Init)?Action$/ or /^DefineButton$/) and do {
- swf_search_get_url ($t->Actions);
- last;
- };
- /^PlaceObject2$/ and do {
- for my $ca (@{$t->ClipActions}) {
- swf_search_get_url ($ca->Actions);
- }
- last;
- };
- /^DefineButton2$/ and do {
- for my $ba (@{$t->Actions}) {
- swf_search_get_url ($ba->Actions);
- }
- last;
- };
- /^DefineSprite$/ and do {
- for my $tag (@{$t->ControlTags}) {
- swf_search_get_url ($tag, $stream);
- }
- last;
- };
- }
- }
-
- # Callback for swf parser
- sub swf_tag_callback {
- my ($self, $tag, $length, $stream)=@_;
- my $t = SWF::Element::Tag->new (Tag=>$tag, Length=>$length);
- my ($tagname) = $t->tag_name;
-
- return unless
- $tagname eq 'DoAction' or
- $tagname eq 'DoInitAction' or
- $tagname eq 'PlaceObject2' or
- $tagname eq 'DefineButton' or
- $tagname eq 'DefineButton2' or
- $tagname eq 'DefineSprite';
-
- if ($tagname eq 'DefineSprite') {
-
- # Tags in the sprite are not unpacked here.
-
- $t->shallow_unpack ($stream);
- $t->TagStream->parse (callback => \&swf_tag_callback);
- return;
-
- } elsif ($tagname eq 'PlaceObject2') {
-
- # Most of PlaceObject2 tags don't have ClipActions.
-
- $t->lookahead_Flags ($stream);
- return unless $t->PlaceFlagHasClipActions;
- }
-
- # unpack the tag and search actions.
-
- $t->unpack ($stream);
- swf_check_tag ($t);
- }
-
- # Check url from memcached cache first
- sub memcached_check_url {
- my ( $url ) = @_;
-
- my $context = Digest::SHA256::new($cfg{digest_bits});
-
- return $memd->get(unpack("H*", ($context->hash($url))));
- }
-
- # Write url to memcached key
- sub memcached_cache_url {
- my ( $url, $url_real ) = @_;
-
- my $context = Digest::SHA256::new($cfg{digest_bits});
-
- $memd->set(unpack("H*", ($context->hash($url))), $url_real, $cfg{cache_expire});
- }
-
- # POE http client callback
- sub process_client {
- my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];
-
- my $http_request = $_[ARG0]->[0];
- my $rec = $_[ARG0]->[1][0];
- my $http_response = $_[ARG1]->[0];
- my $base_url = $_[ARG0]->[1][1];
- $saved_swf_url = "";
-
- if ($rec == 0) {
- $base_url = $http_request->uri;
- }
- else {
- # Check cache for each url
- my $redirect = memcached_check_url($http_request->uri);
- if ($redirect) {
- _log (LOG_INFO, "Memcached redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
- my $new_response = HTTP::Response->new(200);
- $new_response->header("Uri", $redirect);
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- return;
- }
- }
-
- if ($do_reopen_log) {
- $do_reopen_log = 0;
- reopen_log();
- }
-
- if ($rec > $cfg{max_rec}) {
- _log (LOG_INFO, "Max recursion exceeded: %d from %s to %s for request from: %s", $rec, $base_url, $http_request->uri, $heap->{remote_ip});
- # Write to cache
- memcached_cache_url ($base_url, $http_request->uri);
- my $new_response = HTTP::Response->new(200);
- $new_response->header("Uri", $http_request->uri);
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
- return;
- }
-
- # Detect HTTP redirects
- if ($http_response->is_redirect) {
- my $redirect = $http_response->header('Location');
- if ($redirect) {
- if ($redirect =~ /^https?:\/\//) {
- _log (LOG_INFO, "HTML redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
- my $request = HTTP::Request->new('GET', $redirect);
- $request->header( "Connection", "close" );
- $request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]);
- return;
- }
- else {
- _log (LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", $http_request->uri, $redirect, $heap->{remote_ip});
- my $new_response = HTTP::Response->new(200);
- $new_response->header("Uri", $http_request->uri);
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
- return;
- }
- }
- }
- elsif ($http_response->code != 200) {
- _log (LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri);
- my $new_response = HTTP::Response->new($http_response->code);
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
- return;
- }
- my $response_type = $http_response->content_type();
- if ( $response_type =~ /^text/i ) {
- my $content = $http_response->decoded_content();
- my $p = HTML::HeadParser->new($http_response);
- $p->parse($content);
- my $expire = $http_response->header('Refresh');
- if ($http_response->is_redirect || $expire) {
- my $redirect;
- if ($expire) {
- $expire =~ /URL=(\S+)/;
- $redirect = $1;
- }
- else {
- $redirect = $http_response->header('Location');
- }
- if ($redirect) {
- if ($redirect =~ /^https?:\/\//) {
- _log (LOG_INFO, "HTML redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
- my $request = HTTP::Request->new('GET', $redirect);
- $request->header( "Connection", "close" );
- $request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]);
- return;
- }
- else {
- _log (LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
- }
- }
- }
- if ($content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im) {
- my $redir = uri_unescape ($1);
- _log (LOG_INFO, "js redirect from %s to %s for request from: %s", $http_response->base, $1, $heap->{remote_ip});
- my $request = HTTP::Request->new('GET', $redir);
- $request->header("Connection", "close" );
- $request->header("Proxy-Connection", "close" );
- $kernel->post("cl", "request", "got_response", $request, [$rec + 1, $base_url]);
- return;
- }
- }
- elsif ( $response_type eq 'application/x-shockwave-flash' ||
- ($http_request->uri =~ /\.swf(\?.*)?$/i && $http_response->code == 200)) {
- my $content = $http_response->decoded_content();
- $swf_parser->parse($content);
-
- if ($saved_swf_url ne "") {
- _log (LOG_INFO, "flash redirect from %s to %s for request from: %s", $http_response->base, $saved_swf_url, $heap->{remote_ip});
- my $request = HTTP::Request->new('GET', $saved_swf_url);
- # Reset swf redirect global variable
- $saved_swf_url = "";
- $request->header( "Connection", "close" );
- $request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]);
- return;
- }
- }
- else {
- _log (LOG_INFO, "response wasn't text request from: %s, response is: %s", $heap->{remote_ip}, $response_type);
- }
-
- _log (LOG_INFO, "redirect from %s to %s for request from: %s", $base_url, $http_request->uri, $heap->{remote_ip});
- # Write to cache
- memcached_cache_url ($base_url, $http_request->uri);
- my $new_response = HTTP::Response->new($http_response->code);
- $new_response->header("Uri", $http_request->uri);
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
-
- }
-
- sub process_input {
- my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ];
-
- if ($request->isa ("HTTP::Response")) {
- $heap->{client}->put($request);
- $kernel->yield("shutdown");
- return;
- }
-
- my $domain;
- if ($request->uri =~ /^http:\/\/([^\/]+)\//) {
- my @parts = split(/\./, $1);
- my $c1 = pop @parts;
- my $c2 = pop @parts;
- $domain = "$c2.$c1";
- }
-
- if ((defined($cfg{check_regexp}) && $request->uri !~ $cfg{check_regexp}) ||
- (defined($cfg{check_domains}) && scalar(grep {$_ eq $domain} @{$cfg{check_domains}}) == 0)) {
- my $new_response = HTTP::Response->new(200);
- $new_response->header("Uri", $request->uri);
- $new_response->header("Connection", "close");
- $new_response->header("Proxy-Connection", "close");
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
- $kernel->yield("shutdown");
-
- # Shut down the client's connection when the response is sent.
- return;
- }
-
- # Check cache first
- my $redirect = memcached_check_url($request->uri);
- if ($redirect) {
- _log (LOG_INFO, "Memcached redirect from %s to %s for request from: %s", $request->uri, $redirect, $heap->{remote_ip});
- my $new_response = HTTP::Response->new(200);
- $new_response->header("Uri", $redirect);
- $new_response->header("Connection", "close");
- $new_response->header("Proxy-Connection", "close");
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
- $kernel->yield("shutdown");
-
- # Shut down the client's connection when the response is sent.
- return;
- }
- # Start http request
- my $new_request = HTTP::Request->new('GET', $request->uri);
- $new_request->header( "Connection", "close" );
- $new_request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $new_request, [0, ""]);
- }
-
- sub sig_DIE {
- my( $sig, $ex ) = @_[ ARG0, ARG1 ];
- _log(LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}");
- $poe_kernel->sig_handled();
-
- # Send the signal to session that sent the original event.
- if( $ex->{source_session} ne $_[SESSION] ) {
- $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex );
- }
- }
-
-
- ############################### Main code fragment ##################################
-
- # Do daemonization
- if (!$DEBUG) {
- Proc::Daemon::Init;
- POE::Kernel->has_forked;
- setlogsock('unix');
- openlog('redirector', 'ndelay,pid', $cfg{'facility'})
- }
-
- # Try to eval config file
- if (-f $cfg{cfg_file}) {
- my $config = read_file ($cfg{cfg_file});
- eval $config;
- }
-
- die "Process is already started, check $cfg{pidfile}" if Proc::PidUtil::is_running($cfg{pidfile});
-
-
- # Drop privilleges
- if ($> == 0) {
- my $uid = getpwnam($cfg{user}) or die "user $cfg{user} unknown";
- my $gid = getgrnam($cfg{group}) or die "group $cfg{group} unknown";
- $< = $> = $uid;
- $) = $( = $gid;
- }
-
- if (!$DEBUG) {
- die "Cannot write to pidfile $cfg{pidfile}" if ! open(PID, "> $cfg{pidfile}");
- close(PID);
- }
-
- # Reopen log on SIGUSR1
- $poe_kernel->sig(DIE => 'sig_DIE');
- $SIG{USR1} = sub { $do_reopen_log = 1; $poe_kernel->sig_handled(); };
- $SIG{INT} = sub { $poe_kernel->stop(); };
- $SIG{QUIT} = sub { $poe_kernel->stop(); };
- $SIG{PIPE} = 'IGNORE';
-
- if (!$DEBUG) {
- Proc::PidUtil::make_pidfile($cfg{pidfile}, $$) or die "Cannot write pidfile $cfg{pidfile}";
- }
-
- # Init memcached connection
- _log(LOG_INFO, "Starting memcached connection");
- $memd = new Cache::Memcached::Fast({
- servers => $cfg{memcached_servers},
- connect_timeout => 0.2,
- io_timeout => 0.5,
- max_failures => 3,
- failure_timeout => 2,
- ketama_points => 150,
- hash_namespace => 1,
- serialize_methods => [ \&Storable::freeze, \&Storable::thaw ],
- utf8 => ($^V ge v5.8.1 ? 1 : 0),
- });
-
- # POE part
- POE::Component::Client::HTTP->spawn(
- Alias => 'cl',
- MaxSize => $cfg{max_size}, # Remove for unlimited page sizes
- Timeout => $cfg{http_timeout},
- ConnectionManager => POE::Component::Client::Keepalive->new(
- max_per_host => 256,
- max_open => 1024,
- keep_alive => 1,
- timeout => $cfg{http_timeout},
- ),
- );
-
- _log(LOG_INFO, "Starting HTTP server");
- POE::Component::Server::TCP->new
- ( Alias => "",
- Port => $cfg{port},
- ClientFilter => 'POE::Filter::HTTPD',
-
- ClientInput => \&process_input,
- InlineStates => { got_response => \&process_client, },
- );
-
- swf_init_parser ();
- _log(LOG_NOTICE, "Starting URL resolver");
-
- # Start POE. This will run the server until it exits.
- POE::Kernel->run();
- exit 0;
-
- ############################## Final block ####################################
-
- END {
- _log(LOG_NOTICE, 'redirector stopped');
- closelog();
- }
|