+++ /dev/null
-#!/usr/bin/env perl
-
-use warnings;
-use strict;
-
-# Required ports:
-# www/p5-POE-Component-Client-HTTP
-# www/p5-POE-Component-Server-HTTP
-# dns/p5-POE-Component-Client-DNS
-# databases/p5-Cache-Memcached-Fast
-# devel/p5-Proc-Daemon
-# sysutils/p5-Proc-PidUtil
-
-# POE::Component::Client::HTTP uses HTTP::Request and response
-# objects.
-
-use POSIX qw(strftime);
-use HTTP::Request::Common qw(GET POST);
-use HTTP::Response;
-use URI::Escape qw(uri_unescape);
-use Sys::Syslog qw/:standard :macros setlogsock/;
-use HTML::HeadParser;
-use Digest;
-use Proc::Daemon;
-use Proc::PidUtil;
-use POE qw(@POE_LOOP@ Component::Server::TCP Filter::HTTPD Component::Client::HTTP);
-use Redis::Fast;
-
-my $with_swf = 1;
-my $swf_parser;
-my $saved_swf_url = "";
-eval "require SWF::Element" or $with_swf = 0; # p5-SWF-File
-
-my $DEBUG = grep { $_ eq '-debug' } @ARGV;
-
-our %cfg = (
- port => 8080,
- max_size => 102400,
- http_timeout => 5,
- max_rec => 5,
- pidfile => '/tmp/redirector.pid',
- do_log => 0,
- debug => 0,
- redis_server => 'localhost:6379',
-
- facility => LOG_LOCAL3, # syslog facility
- log_level => LOG_INFO,
- digest_bits => 256,
- cache_expire => 3600,
- user => '@RSPAMD_USER@',
- group => '@RSPAMD_GROUP@',
- cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf',
-);
-
-our $do_reopen_log = 0;
-our $redis_conn;
-
-############################################ Subs ########################################
-
-# Read file into string
-sub read_file {
- my ($file) = @_;
-
- open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" );
- local $/;
- my $content = <IN>;
- close IN;
-
- return $content;
-}
-
-# Write log line:
-sub _log {
- my ( $l, $w, @s ) = @_;
-
- if ($DEBUG) {
- printf STDERR $w . "\n", @s;
- }
- else {
- syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} );
- }
-
- if ( $l == LOG_ALERT ) {
- die $w;
- }
-}
-
-# Init swf parser
-sub swf_init_parser {
- if ($with_swf) {
- $swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback );
- }
-}
-
-# Checking for SWF url
-sub swf_search_get_url {
- my $actions = shift;
- my $saved_pool_str = "";
-
- for my $action (@$actions) {
- if ( $action->tag_name eq 'ActionConstantPool' ) {
- my $pool = $action->ConstantPool;
- for my $string (@$pool) {
- if ( $string =~ /^https?:\/\// ) {
- $saved_pool_str = $string->value;
- }
- }
- }
- elsif ( $action->tag_name eq 'ActionGetURL2' ) {
- if ( $saved_pool_str ne "" ) {
- $saved_swf_url = $saved_pool_str;
- }
- }
- elsif ( $action->tag_name =~ 'ActionGetURL' ) {
- $saved_swf_url = $action->UrlString->value;
- }
- }
-}
-
-# SWF check tag utility
-sub swf_check_tag {
- my ( $t, $stream ) = @_;
- my ($tagname) = $t->tag_name;
-
- for ($tagname) {
- ( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do {
- swf_search_get_url( $t->Actions );
- last;
- };
- /^PlaceObject2$/ and do {
- for my $ca ( @{ $t->ClipActions } ) {
- swf_search_get_url( $ca->Actions );
- }
- last;
- };
- /^DefineButton2$/ and do {
- for my $ba ( @{ $t->Actions } ) {
- swf_search_get_url( $ba->Actions );
- }
- last;
- };
- /^DefineSprite$/ and do {
- for my $tag ( @{ $t->ControlTags } ) {
- swf_search_get_url( $tag, $stream );
- }
- last;
- };
- }
-}
-
-# Callback for swf parser
-sub swf_tag_callback {
- my ( $self, $tag, $length, $stream ) = @_;
- my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length );
- my ($tagname) = $t->tag_name;
-
- return
- unless $tagname eq 'DoAction'
- or $tagname eq 'DoInitAction'
- or $tagname eq 'PlaceObject2'
- or $tagname eq 'DefineButton'
- or $tagname eq 'DefineButton2'
- or $tagname eq 'DefineSprite';
-
- if ( $tagname eq 'DefineSprite' ) {
-
- # Tags in the sprite are not unpacked here.
-
- $t->shallow_unpack($stream);
- $t->TagStream->parse( callback => \&swf_tag_callback );
- return;
-
- }
- elsif ( $tagname eq 'PlaceObject2' ) {
-
- # Most of PlaceObject2 tags don't have ClipActions.
-
- $t->lookahead_Flags($stream);
- return unless $t->PlaceFlagHasClipActions;
- }
-
- # unpack the tag and search actions.
-
- $t->unpack($stream);
- swf_check_tag($t);
-}
-
-# Check url from redis cache first
-sub redis_check_url {
- my ($url) = @_;
-
- my $context = Digest->new("SHA-512");
- $context->add($url);
- return $redis_conn->get( $context->digest() );
-}
-
-# Write url to redis key
-sub redis_cache_url {
- my ( $url, $url_real ) = @_;
-
- if ( $url ne $url_real ) {
- my $context = Digest->new("SHA-512");
- $context->add($url);
- if ( !$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real ) ) {
- _log( LOG_INFO, "cannot save redirect from $url to $url_real in redis" );
- }
- }
-}
-
-sub create_response {
- my ( $code, $uri ) = @_;
-
- my $new_response;
-
- if ($uri) {
- $new_response = HTTP::Response->new( $code, 'OK' );
- $new_response->header( "Uri", $uri );
- $new_response->content($uri);
- $new_response->content_length( length($uri) );
- }
- else {
- $new_response = HTTP::Response->new($code);
- $new_response->content_length(0);
- }
-
- $new_response->header( "Connection", "Close" );
- $new_response->header( "Proxy-Connection", "Close" );
-
- return $new_response;
-}
-
-# POE http client callback
-sub process_client {
- my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];
-
- my $http_request = $_[ARG0]->[0];
- my $rec = $_[ARG0]->[1][0];
- my $http_response = $_[ARG1]->[0];
- my $base_url = $_[ARG0]->[1][1];
- $saved_swf_url = "";
-
- if ( $rec == 0 ) {
- $base_url = $http_request->uri;
- }
- else {
-
- # Check cache for each url
- my $redirect = redis_check_url( $http_request->uri );
- if ($redirect) {
- _log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s",
- $http_response->base, $redirect, $heap->{remote_ip} );
- my $new_response = create_response( 200, $redirect );
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- return;
- }
- }
-
- if ($do_reopen_log) {
- $do_reopen_log = 0;
- reopen_log();
- }
-
- if ( $rec > $cfg{max_rec} ) {
- _log( LOG_INFO, "Max recursion exceeded: %d from %s to %s for request from: %s",
- $rec, $base_url, $http_request->uri, $heap->{remote_ip} );
-
- # Write to cache
- redis_cache_url( $base_url, $http_request->uri );
- my $new_response = create_response( 200, $http_request->uri );
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
- return;
- }
-
- # Detect HTTP redirects
- if ( $http_response->is_redirect ) {
- my $redirect = $http_response->header('Location');
- if ($redirect) {
- if ( $redirect =~ /^https?:\/\// ) {
- _log( LOG_INFO, "HTML redirect from %s to %s for request from: %s",
- $http_response->base, $redirect, $heap->{remote_ip} );
- my $request = HTTP::Request->new( 'GET', $redirect );
- $request->header( "Connection", "close" );
- $request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
- return;
- }
- else {
- _log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s",
- $http_request->uri, $redirect, $heap->{remote_ip} );
- my $new_response = create_response( 200, $http_request->uri );
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
- return;
- }
- }
- }
- elsif ( $http_response->code != 200 ) {
- _log( LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri );
- my $new_response;
- if ( $rec == 0 ) {
- $new_response = create_response( $http_response->code );
- }
- else {
- redis_cache_url( $base_url, $http_request->uri );
- $new_response = create_response( 200, $http_request->uri );
- }
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
- return;
- }
- my $response_type = $http_response->content_type();
- if ( $response_type =~ /^text/i ) {
- my $content = $http_response->decoded_content();
- my $p = HTML::HeadParser->new($http_response);
- $p->parse($content);
- my $expire = $http_response->header('Refresh');
- if ( $http_response->is_redirect || $expire ) {
- my $redirect;
- if ($expire) {
- $expire =~ /URL=(\S+)/;
- $redirect = $1;
- }
- else {
- $redirect = $http_response->header('Location');
- }
- if ($redirect) {
- if ( $redirect =~ /^https?:\/\// ) {
- _log( LOG_INFO, "HTML redirect from %s to %s for request from: %s",
- $http_response->base, $redirect, $heap->{remote_ip} );
- my $request = HTTP::Request->new( 'GET', $redirect );
- $request->header( "Connection", "close" );
- $request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
- return;
- }
- else {
- _log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s",
- $http_response->base, $redirect, $heap->{remote_ip} );
- }
- }
- }
- if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) {
- my $redir = uri_unescape($1);
- _log( LOG_INFO, "js redirect from %s to %s for request from: %s",
- $http_response->base, $1, $heap->{remote_ip} );
- my $request = HTTP::Request->new( 'GET', $redir );
- $request->header( "Connection", "close" );
- $request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
- return;
- }
- }
- elsif (
- $with_swf
- && (
- $response_type eq 'application/x-shockwave-flash'
- || ( $http_request->uri =~ /\.swf(\?.*)?$/i
- && $http_response->code == 200 )
- )
- )
- {
- my $content = $http_response->decoded_content();
- $swf_parser->parse($content);
-
- if ( $saved_swf_url ne "" ) {
- _log( LOG_INFO, "flash redirect from %s to %s for request from: %s",
- $http_response->base, $saved_swf_url, $heap->{remote_ip} );
- my $request = HTTP::Request->new( 'GET', $saved_swf_url );
-
- # Reset swf redirect global variable
- $saved_swf_url = "";
- $request->header( "Connection", "close" );
- $request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
- return;
- }
- }
- else {
- _log( LOG_INFO, "response wasn't text request from: %s, response is: %s", $heap->{remote_ip}, $response_type );
- }
-
- _log( LOG_INFO, "redirect from %s to %s for request from: %s", $base_url, $http_request->uri, $heap->{remote_ip} );
-
- # Write to cache
- redis_cache_url( $base_url, $http_request->uri );
- my $new_response = create_response( $http_response->code, $http_request->uri );
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
-
- # Shut down the client's connection when the response is sent.
- $kernel->yield("shutdown");
-
-}
-
-sub process_input {
- my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ];
-
- if ( $request->isa("HTTP::Response") ) {
- $heap->{client}->put($request);
- $kernel->yield("shutdown");
- return;
- }
-
- my $domain;
- if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) {
- my @parts = split( /\./, $1 );
- my $c1 = pop @parts;
- my $c2 = pop @parts;
- $domain = "$c2.$c1";
- }
-
- if (
- ( defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp} )
- || ( defined( $cfg{check_domains} )
- && scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 )
- )
- {
- my $new_response = create_response( 200, $request->uri );
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
- $kernel->yield("shutdown");
-
- # Shut down the client's connection when the response is sent.
- return;
- }
-
- # Check cache first
- my $redirect = redis_check_url( $request->uri );
- if ($redirect) {
- _log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s",
- $request->uri, $redirect, $heap->{remote_ip} );
- my $new_response = create_response( 200, $redirect );
-
- # Avoid sending the response if the client has gone away.
- $heap->{client}->put($new_response) if defined $heap->{client};
- $kernel->yield("shutdown");
-
- # Shut down the client's connection when the response is sent.
- return;
- }
-
- # Start http request
- my $new_request = HTTP::Request->new( 'GET', $request->uri );
- $new_request->header( "Connection", "close" );
- $new_request->header( "Proxy-Connection", "close" );
- $kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] );
-}
-
-sub sig_DIE {
- my ( $sig, $ex ) = @_[ ARG0, ARG1 ];
- _log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" );
- $poe_kernel->sig_handled();
-
- # Send the signal to session that sent the original event.
- if ( $ex->{source_session} ne $_[SESSION] ) {
- $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex );
- }
-}
-
-sub sig_CLD {
- my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ];
- return 0;
-}
-
-############################### Main code fragment ##################################
-
-# Do daemonization
-if ( !$DEBUG ) {
- Proc::Daemon::Init;
- POE::Kernel->has_forked;
- setlogsock('unix');
- openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} );
-}
-
-# Try to eval config file
-if ( -f $cfg{cfg_file} ) {
- my $config = read_file( $cfg{cfg_file} );
- eval $config;
-}
-
-_log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" )
- if Proc::PidUtil::is_running( $cfg{pidfile} );
-
-# Drop privileges
-if ( $> == 0 ) {
- my $uid = getpwnam( $cfg{user} )
- or _log( LOG_ALERT, "user $cfg{user} unknown" );
- my $gid = getgrnam( $cfg{group} )
- or _log( LOG_ALERT, "group $cfg{group} unknown" );
- $< = $> = $uid;
- $) = $( = $gid;
-}
-
-if ( !$DEBUG ) {
- _log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" )
- if !open( PID, "> $cfg{pidfile}" );
- close(PID);
-}
-
-# Reopen log on SIGUSR1
-$poe_kernel->sig( DIE => \&sig_DIE );
-$poe_kernel->sig( CLD => \&sig_CLD );
-$SIG{USR1} = sub { $do_reopen_log = 1; $poe_kernel->sig_handled(); };
-$SIG{INT} = sub { $poe_kernel->stop(); };
-$SIG{QUIT} = sub { $poe_kernel->stop(); };
-$SIG{PIPE} = 'IGNORE';
-
-if ( !$DEBUG ) {
- Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ )
- or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" );
-}
-
-# Init redis connection
-_log( LOG_INFO, "Starting redis connection" );
-$redis_conn = Redis::Fast->new(
- server => $cfg{redis_server},
- reconnect => 60,
- every => 500_000,
- encoding => undef,
-);
-
-# POE part
-POE::Component::Client::HTTP->spawn(
- Alias => 'cl',
- MaxSize => $cfg{max_size}, # Remove for unlimited page sizes
- Timeout => $cfg{http_timeout},
- ConnectionManager => POE::Component::Client::Keepalive->new(
- max_per_host => 256,
- max_open => 1024,
- keep_alive => 1,
- timeout => $cfg{http_timeout},
- ),
-);
-
-_log( LOG_INFO, "Starting HTTP server" );
-POE::Component::Server::TCP->new(
- Alias => "",
- Port => $cfg{port},
- ClientFilter => 'POE::Filter::HTTPD',
-
- ClientInput => \&process_input,
- InlineStates => { got_response => \&process_client, },
-);
-
-swf_init_parser();
-_log( LOG_NOTICE, "Starting URL resolver" );
-
-# Start POE. This will run the server until it exits.
-POE::Kernel->run();
-exit 0;
-
-############################## Final block ####################################
-
-END {
- _log( LOG_NOTICE, 'redirector stopped' );
- closelog();
-}