Browse Source

[Minor] Reformat more Perl scripts

tags/1.8.2
Alexander Moisseev 5 years ago
parent
commit
c959a46cda
3 changed files with 643 additions and 671 deletions
  1. 6
    6
      .tidyallrc
  2. 225
    227
      utils/cgp_rspamd.pl
  3. 412
    438
      utils/redirector.pl.in

+ 6
- 6
.tidyallrc View File

@@ -5,21 +5,21 @@
ignore = contrib/**/* doc/doxydown/doxydown.pl

;[PerlCritic]
;select = **/*.{pl,pm,t}
;select = **/*.{pl,pl.in,pm,t}

[PerlTidy]
select = **/*.{pl,pm,t}
select = **/*.{pl,pl.in,pm,t}
argv = -l=120

[PodChecker]
select = **/*.{pl,pm,pod}
select = **/*.{pl,pl.in,pm,pod}

;[PodSpell]
;select = **/*.{pl,pm,pod}
;select = **/*.{pl,pl.in,pm,pod}

;[PodTidy]
;select = **/*.{pl,pm,pod}
;select = **/*.{pl,pl.in,pm,pod}
;argv = --columns=120

[Test::Vars]
select = **/*.{pl,pm,t}
select = **/*.{pl,pl.in,pm,t}

+ 225
- 227
utils/cgp_rspamd.pl View File

@@ -21,278 +21,276 @@ my $request_timeout = 15; # 15 seconds by default
my $reject_message = "Spam message rejected";

GetOptions(
"host=s" => \$rspamd_host,
"header=s" => \$header,
"reject-message=s" => \$reject_message,
"max-size=i" => \$max_size,
"timeout=f" => \$request_timeout,
"help|?" => \$help,
"man" => \$man
"host=s" => \$rspamd_host,
"header=s" => \$header,
"reject-message=s" => \$reject_message,
"max-size=i" => \$max_size,
"timeout=f" => \$request_timeout,
"help|?" => \$help,
"man" => \$man
) or pod2usage(2);

pod2usage(1) if $help;
pod2usage( -exitval => 0, -verbose => 2 ) if $man;

my $main_domain = cgp_main_domain();
my $scanned = 0;
my $scanned = 0;

# Turn off bufferization as required by CGP
$| = 1;

sub cgp_main_domain {
if ( open(my $fh, 'Settings/Main.settings') ) {
while (<$fh>) {
if ( /^\s+DomainName\s+=\s+([^;]+);/ ) {
return $1;
}
if ( open( my $fh, 'Settings/Main.settings' ) ) {
while (<$fh>) {
if (/^\s+DomainName\s+=\s+([^;]+);/) {
return $1;
}
}
}
}
}

sub cgp_string {
my ($in) = @_;
my ($in) = @_;

$in =~ s/\"/\\"/g;
$in =~ s/\n/\\n/gms;
$in =~ s/\r/\\r/mgs;
$in =~ s/\t/ /g;
$in =~ s/\"/\\"/g;
$in =~ s/\n/\\n/gms;
$in =~ s/\r/\\r/mgs;
$in =~ s/\t/ /g;

return "\"$in\"";
return "\"$in\"";
}

sub rspamd_scan {
my ( $tag, $file ) = @_;

my $http_callback = sub {
my ( $body, $hdr ) = @_;

if ( $hdr && $hdr->{Status} =~ /^2/ ) {
my $js = eval('decode_json($body)');
$scanned++;

if ( !$js ) {
print "* Rspamd: Bad response for $file: invalid JSON: parse error\n";
print "$tag FAILURE\n";
}
else {
my $def = $js;
my $headers = "";

if ( !$def ) {
print
"* Rspamd: Bad response for $file: invalid JSON: default is missing\n";
print "$tag FAILURE\n";
}
else {
my $action = $def->{'action'};
my $id = $js->{'message-id'};

my $symbols = "";
while ( my ( $k, $s ) = each( %{$def->{'symbols'}}) ) {
$symbols .= sprintf "%s(%.2f);", $k, $s->{'score'};
}

printf
"* Rspamd: Scanned %s; id: <%s>; Score: %.2f / %.2f; Symbols: [%s]\n",
$file, $id, $def->{'score'}, $def->{'required_score'}, $symbols;

if ( $js->{'dkim-signature'} ) {
$headers .= "DKIM-Signature: " . $js->{'dkim-signature'};
}

if ( $js->{'milter'} ) {
my $block = $js->{'milter'};

if ( $block->{'add_headers'} ) {
while ( my ( $h, $v ) = each( %{ $block->{'add_headers'} } ) ) {
if (ref($v) eq 'HASH') {
if ($headers eq "") {
$headers .= "$h: $v->{value}";
}
else {
$headers .= "\\e$h: $v->{value}";
}
my ( $tag, $file ) = @_;

my $http_callback = sub {
my ( $body, $hdr ) = @_;

if ( $hdr && $hdr->{Status} =~ /^2/ ) {
my $js = eval('decode_json($body)');
$scanned++;

if ( !$js ) {
print "* Rspamd: Bad response for $file: invalid JSON: parse error\n";
print "$tag FAILURE\n";
}
else {
my $def = $js;
my $headers = "";

if ( !$def ) {
print "* Rspamd: Bad response for $file: invalid JSON: default is missing\n";
print "$tag FAILURE\n";
}
else {
if ($headers eq "") {
$headers .= "$h: $v";
}
else {
$headers .= "\\e$h: $v";
}
my $action = $def->{'action'};
my $id = $js->{'message-id'};

my $symbols = "";
while ( my ( $k, $s ) = each( %{ $def->{'symbols'} } ) ) {
$symbols .= sprintf "%s(%.2f);", $k, $s->{'score'};
}

printf
"* Rspamd: Scanned %s; id: <%s>; Score: %.2f / %.2f; Symbols: [%s]\n",
$file, $id, $def->{'score'}, $def->{'required_score'}, $symbols;

if ( $js->{'dkim-signature'} ) {
$headers .= "DKIM-Signature: " . $js->{'dkim-signature'};
}

if ( $js->{'milter'} ) {
my $block = $js->{'milter'};

if ( $block->{'add_headers'} ) {
while ( my ( $h, $v ) = each( %{ $block->{'add_headers'} } ) ) {
if ( ref($v) eq 'HASH' ) {
if ( $headers eq "" ) {
$headers .= "$h: $v->{value}";
}
else {
$headers .= "\\e$h: $v->{value}";
}
}
else {
if ( $headers eq "" ) {
$headers .= "$h: $v";
}
else {
$headers .= "\\e$h: $v";
}
}
}
}
}

if ( $action eq 'reject' ) {
print "$tag DISCARD\n";
return;
}
elsif ( $action eq 'add header' || $action eq 'rewrite subject' ) {
if ( $headers eq "" ) {
$headers .= "$header";
}
else {
$headers .= "\\e$header";
}
}
elsif ( $action eq 'soft reject' ) {
print "$tag REJECT Try again later\n";
return;
}

if ( $headers eq "" ) {
print "$tag OK\n";
}
else {
print "$tag ADDHEADER " . cgp_string($headers) . " OK\n";
}
}
}
}
}

if ( $action eq 'reject' ) {
print "$tag DISCARD\n";
return;
}
elsif ( $action eq 'add header' || $action eq 'rewrite subject' ) {
if ( $headers eq "" ) {
$headers .= "$header";
}
else {
if ($hdr) {
print "* Rspamd: Bad response for $file: HTTP error: $hdr->{Status} $hdr->{Reason}\n";
}
else {
$headers .= "\\e$header";
print "* Rspamd: Bad response for $file: IO error: $!\n";
}
}
elsif ( $action eq 'soft reject' ) {
print "$tag REJECT Try again later\n";
return;
}

if ( $headers eq "" ) {
print "$tag OK\n";
}
else {
print "$tag ADDHEADER " . cgp_string($headers) . " OK\n";
}
print "$tag FAILURE\n";
}
}
};

if ($local) {

# Use file scan
# XXX: not implemented now due to CGP queue format
http_get(
"http://$rspamd_host/symbols?file=$file",
timeout => $request_timeout,
$http_callback
);
}
else {
if ($hdr) {
print
"* Rspamd: Bad response for $file: HTTP error: $hdr->{Status} $hdr->{Reason}\n";
}
else {
print "* Rspamd: Bad response for $file: IO error: $!\n";
}
print "$tag FAILURE\n";
}
};

if ($local) {

# Use file scan
# XXX: not implemented now due to CGP queue format
http_get(
"http://$rspamd_host/symbols?file=$file",
timeout => $request_timeout,
$http_callback
);
}
else {
my $sb = stat($file);

if ( !$sb || $sb->size > $max_size ) {
if ($sb) {
print "* File $file is too large: " . $sb->size . "\n$tag FAILURE\n";

}
else {
print "* Cannot stat $file: $!\n$tag FAILURE\n";
}
return;
}
aio_load(
$file,
sub {
my ($data) = @_;

if ( !$data ) {
print "* Cannot open $file: $!\n$tag FAILURE\n";
return;
}
my $sb = stat($file);

if ( !$sb || $sb->size > $max_size ) {
if ($sb) {
print "* File $file is too large: " . $sb->size . "\n$tag FAILURE\n";

# Parse CGP format
$data =~ s/^((?:[^\n]*\n)*?)\n(.*)$/$2/ms;
my @envelope = split /\n/, $1;
chomp(@envelope);
my $from;
my @rcpts;
my $ip;
my $user;

foreach my $elt (@envelope) {
if ( $elt =~ /^P\s[^<]*(<[^>]*>).*$/ ) {
$from = $1;
}
elsif ( $elt =~ /^R\s[^<]*(<[^>]*>).*$/ ) {
push @rcpts, $1;
}
elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:SMTP|HTTPU?|AIRSYNC|XIMSS) \[([0-9a-f.:]+)\]/ ) {
if ($1) {
$user = $1;
}
if ($2) {
$ip = $2;
}
}
elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:DSN|GROUP|LIST|PBX|PIPE|RULE) \[0\.0\.0\.0\]/ ) {
if ($1) {
$user = $1;
else {
print "* Cannot stat $file: $!\n$tag FAILURE\n";
}
$ip = '127.2.4.7';
}
return;
}
aio_load(
$file,
sub {
my ($data) = @_;

if ( !$data ) {
print "* Cannot open $file: $!\n$tag FAILURE\n";
return;
}

my $headers = {};
if ( $file =~ /\/([^\/.]+)\.msg$/ ) {
$headers->{'Queue-ID'} = $1;
}
if ($from) {
$headers->{From} = $from;
}
if ( scalar(@rcpts) > 0 ) {
# Parse CGP format
$data =~ s/^((?:[^\n]*\n)*?)\n(.*)$/$2/ms;
my @envelope = split /\n/, $1;
chomp(@envelope);
my $from;
my @rcpts;
my $ip;
my $user;

foreach my $elt (@envelope) {
if ( $elt =~ /^P\s[^<]*(<[^>]*>).*$/ ) {
$from = $1;
}
elsif ( $elt =~ /^R\s[^<]*(<[^>]*>).*$/ ) {
push @rcpts, $1;
}
elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:SMTP|HTTPU?|AIRSYNC|XIMSS) \[([0-9a-f.:]+)\]/ ) {
if ($1) {
$user = $1;
}
if ($2) {
$ip = $2;
}
}
elsif ( $elt =~ /^S (?:<([^>]+)> )?(?:DSN|GROUP|LIST|PBX|PIPE|RULE) \[0\.0\.0\.0\]/ ) {
if ($1) {
$user = $1;
}
$ip = '127.2.4.7';
}
}

# XXX: Anyevent cannot parse headers with multiple values
$headers->{Rcpt} = join(',', @rcpts);
}
if ($ip) {
$headers->{IP} = $ip;
}
if ($user) {
$headers->{User} = $user;
}
if ($main_domain) {
$headers->{'MTA-Tag'} = $main_domain;
}
my $headers = {};
if ( $file =~ /\/([^\/.]+)\.msg$/ ) {
$headers->{'Queue-ID'} = $1;
}
if ($from) {
$headers->{From} = $from;
}
if ( scalar(@rcpts) > 0 ) {

http_post(
"http://$rspamd_host/checkv2", $data,
timeout => $request_timeout,
headers => $headers,
$http_callback
# XXX: Anyevent cannot parse headers with multiple values
$headers->{Rcpt} = join( ',', @rcpts );
}
if ($ip) {
$headers->{IP} = $ip;
}
if ($user) {
$headers->{User} = $user;
}
if ($main_domain) {
$headers->{'MTA-Tag'} = $main_domain;
}

http_post(
"http://$rspamd_host/checkv2", $data,
timeout => $request_timeout,
headers => $headers,
$http_callback
);
}
);
}
);
}
}
}

# Show informational message
print "* Rspamd CGP filter has been started\n";

my $w = AnyEvent->io(
fh => \*STDIN,
poll => 'r',
cb => sub {
chomp( my $input = <STDIN> );

if ( $input =~ /^(\d+)\s+(\S+)(\s+(\S+)\s*)?$/ ) {
my $tag = $1;
my $cmd = $2;

if ( $cmd eq "INTF" ) {
print "$input\n";
}
elsif ( $cmd eq "FILE" && $4 ) {
my $file = $4;
print "* Scanning file $file\n";
rspamd_scan $tag, $file;
}
elsif ( $cmd eq "QUIT" ) {
print "* Terminating after scanning of $scanned files\n";
print "$tag OK\n";
exit 0;
}
else {
print "* Unknown command $cmd\n";
print "$tag FAILURE\n";
}
fh => \*STDIN,
poll => 'r',
cb => sub {
chomp( my $input = <STDIN> );

if ( $input =~ /^(\d+)\s+(\S+)(\s+(\S+)\s*)?$/ ) {
my $tag = $1;
my $cmd = $2;

if ( $cmd eq "INTF" ) {
print "$input\n";
}
elsif ( $cmd eq "FILE" && $4 ) {
my $file = $4;
print "* Scanning file $file\n";
rspamd_scan $tag, $file;
}
elsif ( $cmd eq "QUIT" ) {
print "* Terminating after scanning of $scanned files\n";
print "$tag OK\n";
exit 0;
}
else {
print "* Unknown command $cmd\n";
print "$tag FAILURE\n";
}
}
}
}
);

EV::run;

+ 412
- 438
utils/redirector.pl.in View File

@@ -34,22 +34,22 @@ eval "require SWF::Element" or $with_swf = 0; # p5-SWF-File
my $DEBUG = grep { $_ eq '-debug' } @ARGV;

our %cfg = (
port => 8080,
max_size => 102400,
http_timeout => 5,
max_rec => 5,
pidfile => '/tmp/redirector.pid',
do_log => 0,
debug => 0,
redis_server => 'localhost:6379',
facility => LOG_LOCAL3, # syslog facility
log_level => LOG_INFO,
digest_bits => 256,
cache_expire => 3600,
user => '@RSPAMD_USER@',
group => '@RSPAMD_GROUP@',
cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf',
port => 8080,
max_size => 102400,
http_timeout => 5,
max_rec => 5,
pidfile => '/tmp/redirector.pid',
do_log => 0,
debug => 0,
redis_server => 'localhost:6379',
facility => LOG_LOCAL3, # syslog facility
log_level => LOG_INFO,
digest_bits => 256,
cache_expire => 3600,
user => '@RSPAMD_USER@',
group => '@RSPAMD_GROUP@',
cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf',
);

our $do_reopen_log = 0;
@@ -59,468 +59,442 @@ our $redis_conn;

# Read file into string
sub read_file {
my ($file) = @_;
my ($file) = @_;

open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" );
local $/;
my $content = <IN>;
close IN;
open( IN, $file ) or _log( LOG_ALERT, "Can't open $file: $!" );
local $/;
my $content = <IN>;
close IN;

return $content;
return $content;
}

# Write log line:
sub _log {
my ( $l, $w, @s ) = @_;
if ($DEBUG) {
printf STDERR $w . "\n", @s;
}
else {
syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} );
}
if ( $l == LOG_ALERT ) {
die $w;
}
my ( $l, $w, @s ) = @_;
if ($DEBUG) {
printf STDERR $w . "\n", @s;
}
else {
syslog( $l, $w . "\n", @s ) if ( $l <= $cfg{'log_level'} );
}
if ( $l == LOG_ALERT ) {
die $w;
}
}

# Init swf parser
sub swf_init_parser {
if ($with_swf) {
$swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback );
}
if ($with_swf) {
$swf_parser = SWF::Parser->new( 'tag-callback' => \&swf_tag_callback );
}
}

# Checking for SWF url
sub swf_search_get_url {
my $actions = shift;
my $saved_pool_str = "";
for my $action (@$actions) {
if ( $action->tag_name eq 'ActionConstantPool' ) {
my $pool = $action->ConstantPool;
for my $string (@$pool) {
if ( $string =~ /^https?:\/\// ) {
$saved_pool_str = $string->value;
}
}
}
elsif ( $action->tag_name eq 'ActionGetURL2' ) {
if ( $saved_pool_str ne "" ) {
$saved_swf_url = $saved_pool_str;
}
}
elsif ( $action->tag_name =~ 'ActionGetURL' ) {
$saved_swf_url = $action->UrlString->value;
}
}
my $actions = shift;
my $saved_pool_str = "";
for my $action (@$actions) {
if ( $action->tag_name eq 'ActionConstantPool' ) {
my $pool = $action->ConstantPool;
for my $string (@$pool) {
if ( $string =~ /^https?:\/\// ) {
$saved_pool_str = $string->value;
}
}
}
elsif ( $action->tag_name eq 'ActionGetURL2' ) {
if ( $saved_pool_str ne "" ) {
$saved_swf_url = $saved_pool_str;
}
}
elsif ( $action->tag_name =~ 'ActionGetURL' ) {
$saved_swf_url = $action->UrlString->value;
}
}
}

# SWF check tag utility
sub swf_check_tag {
my ( $t, $stream ) = @_;
my ($tagname) = $t->tag_name;
for ($tagname) {
( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do {
swf_search_get_url( $t->Actions );
last;
};
/^PlaceObject2$/ and do {
for my $ca ( @{ $t->ClipActions } ) {
swf_search_get_url( $ca->Actions );
}
last;
};
/^DefineButton2$/ and do {
for my $ba ( @{ $t->Actions } ) {
swf_search_get_url( $ba->Actions );
}
last;
};
/^DefineSprite$/ and do {
for my $tag ( @{ $t->ControlTags } ) {
swf_search_get_url( $tag, $stream );
}
last;
};
}
my ( $t, $stream ) = @_;
my ($tagname) = $t->tag_name;
for ($tagname) {
( /^Do(Init)?Action$/ or /^DefineButton$/ ) and do {
swf_search_get_url( $t->Actions );
last;
};
/^PlaceObject2$/ and do {
for my $ca ( @{ $t->ClipActions } ) {
swf_search_get_url( $ca->Actions );
}
last;
};
/^DefineButton2$/ and do {
for my $ba ( @{ $t->Actions } ) {
swf_search_get_url( $ba->Actions );
}
last;
};
/^DefineSprite$/ and do {
for my $tag ( @{ $t->ControlTags } ) {
swf_search_get_url( $tag, $stream );
}
last;
};
}
}

# Callback for swf parser
sub swf_tag_callback {
my ( $self, $tag, $length, $stream ) = @_;
my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length );
my ($tagname) = $t->tag_name;
my ( $self, $tag, $length, $stream ) = @_;
my $t = SWF::Element::Tag->new( Tag => $tag, Length => $length );
my ($tagname) = $t->tag_name;

return
unless $tagname eq 'DoAction'
or $tagname eq 'DoInitAction'
or $tagname eq 'PlaceObject2'
or $tagname eq 'DefineButton'
or $tagname eq 'DefineButton2'
or $tagname eq 'DefineSprite';
return
unless $tagname eq 'DoAction'
or $tagname eq 'DoInitAction'
or $tagname eq 'PlaceObject2'
or $tagname eq 'DefineButton'
or $tagname eq 'DefineButton2'
or $tagname eq 'DefineSprite';

if ( $tagname eq 'DefineSprite' ) {
if ( $tagname eq 'DefineSprite' ) {

# Tags in the sprite are not unpacked here.
# Tags in the sprite are not unpacked here.

$t->shallow_unpack($stream);
$t->TagStream->parse( callback => \&swf_tag_callback );
return;
$t->shallow_unpack($stream);
$t->TagStream->parse( callback => \&swf_tag_callback );
return;

}
elsif ( $tagname eq 'PlaceObject2' ) {
}
elsif ( $tagname eq 'PlaceObject2' ) {

# Most of PlaceObject2 tags don't have ClipActions.
# Most of PlaceObject2 tags don't have ClipActions.

$t->lookahead_Flags($stream);
return unless $t->PlaceFlagHasClipActions;
}
$t->lookahead_Flags($stream);
return unless $t->PlaceFlagHasClipActions;
}

# unpack the tag and search actions.
# unpack the tag and search actions.

$t->unpack($stream);
swf_check_tag($t);
$t->unpack($stream);
swf_check_tag($t);
}

# Check url from redis cache first
sub redis_check_url {
my ($url) = @_;
my ($url) = @_;

my $context = Digest->new("SHA-512");
$context->add($url);
return $redis_conn->get( $context->digest() );
my $context = Digest->new("SHA-512");
$context->add($url);
return $redis_conn->get( $context->digest() );
}

# Write url to redis key
sub redis_cache_url {
my ( $url, $url_real ) = @_;
if ( $url ne $url_real ) {
my $context = Digest->new("SHA-512");
$context->add($url);
if (!$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real)) {
_log(LOG_INFO, "cannot save redirect from $url to $url_real in redis");
}
}
my ( $url, $url_real ) = @_;
if ( $url ne $url_real ) {
my $context = Digest->new("SHA-512");
$context->add($url);
if ( !$redis_conn->setex( $context->digest(), $cfg{cache_expire}, $url_real ) ) {
_log( LOG_INFO, "cannot save redirect from $url to $url_real in redis" );
}
}
}

sub create_response {
my ( $code, $uri ) = @_;
my ( $code, $uri ) = @_;

my $new_response;
my $new_response;

if ($uri) {
$new_response = HTTP::Response->new( $code, 'OK' );
$new_response->header( "Uri", $uri );
$new_response->content($uri);
$new_response->content_length( length($uri) );
}
else {
$new_response = HTTP::Response->new($code);
$new_response->content_length(0);
}
if ($uri) {
$new_response = HTTP::Response->new( $code, 'OK' );
$new_response->header( "Uri", $uri );
$new_response->content($uri);
$new_response->content_length( length($uri) );
}
else {
$new_response = HTTP::Response->new($code);
$new_response->content_length(0);
}

$new_response->header( "Connection", "Close" );
$new_response->header( "Proxy-Connection", "Close" );
$new_response->header( "Connection", "Close" );
$new_response->header( "Proxy-Connection", "Close" );

return $new_response;
return $new_response;
}

# POE http client callback
sub process_client {
my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];

my $http_request = $_[ARG0]->[0];
my $rec = $_[ARG0]->[1][0];
my $http_response = $_[ARG1]->[0];
my $base_url = $_[ARG0]->[1][1];
$saved_swf_url = "";

if ( $rec == 0 ) {
$base_url = $http_request->uri;
}
else {

# Check cache for each url
my $redirect = redis_check_url( $http_request->uri );
if ($redirect) {
_log( LOG_INFO,
"Memcached redirect from %s to %s for request from: %s",
$http_response->base, $redirect, $heap->{remote_ip} );
my $new_response = create_response( 200, $redirect );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
return;
}
}

if ($do_reopen_log) {
$do_reopen_log = 0;
reopen_log();
}

if ( $rec > $cfg{max_rec} ) {
_log( LOG_INFO,
"Max recursion exceeded: %d from %s to %s for request from: %s",
$rec, $base_url, $http_request->uri, $heap->{remote_ip} );

# Write to cache
redis_cache_url( $base_url, $http_request->uri );
my $new_response = create_response( 200, $http_request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");
return;
}

# Detect HTTP redirects
if ( $http_response->is_redirect ) {
my $redirect = $http_response->header('Location');
if ($redirect) {
if ( $redirect =~ /^https?:\/\// ) {
_log( LOG_INFO,
"HTML redirect from %s to %s for request from: %s",
$http_response->base, $redirect, $heap->{remote_ip} );
my $request = HTTP::Request->new( 'GET', $redirect );
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request,
[ $rec + 1, $base_url ] );
return;
}
else {
_log( LOG_INFO,
"ignoring internal redirect from %s to %s for request from: %s",
$http_request->uri,
$redirect,
$heap->{remote_ip}
);
my $new_response = create_response( 200, $http_request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");
return;
}
}
}
elsif ( $http_response->code != 200 ) {
_log( LOG_INFO, "HTTP response was %d, for request to %s",
$http_response->code, $http_request->uri );
my $new_response;
if ($rec == 0) {
$new_response = create_response( $http_response->code );
}
else {
redis_cache_url( $base_url, $http_request->uri );
$new_response = create_response( 200, $http_request->uri );
}

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");
return;
}
my $response_type = $http_response->content_type();
if ( $response_type =~ /^text/i ) {
my $content = $http_response->decoded_content();
my $p = HTML::HeadParser->new($http_response);
$p->parse($content);
my $expire = $http_response->header('Refresh');
if ( $http_response->is_redirect || $expire ) {
my $redirect;
if ($expire) {
$expire =~ /URL=(\S+)/;
$redirect = $1;
}
else {
$redirect = $http_response->header('Location');
}
if ($redirect) {
if ( $redirect =~ /^https?:\/\// ) {
_log( LOG_INFO,
"HTML redirect from %s to %s for request from: %s",
$http_response->base,
$redirect,
$heap->{remote_ip}
);
my $request = HTTP::Request->new( 'GET', $redirect );
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request,
[ $rec + 1, $base_url ] );
return;
}
else {
_log( LOG_INFO,
"ignoring internal redirect from %s to %s for request from: %s",
$http_response->base,
$redirect,
$heap->{remote_ip}
);
}
}
}
if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) {
my $redir = uri_unescape($1);
_log( LOG_INFO, "js redirect from %s to %s for request from: %s",
$http_response->base, $1, $heap->{remote_ip} );
my $request = HTTP::Request->new( 'GET', $redir );
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request,
[ $rec + 1, $base_url ] );
return;
}
}
elsif (
$with_swf
&& (
$response_type eq 'application/x-shockwave-flash'
|| ( $http_request->uri =~ /\.swf(\?.*)?$/i
&& $http_response->code == 200 )
)
)
{
my $content = $http_response->decoded_content();
$swf_parser->parse($content);

if ( $saved_swf_url ne "" ) {
_log( LOG_INFO, "flash redirect from %s to %s for request from: %s",
$http_response->base, $saved_swf_url, $heap->{remote_ip} );
my $request = HTTP::Request->new( 'GET', $saved_swf_url );

# Reset swf redirect global variable
$saved_swf_url = "";
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request,
[ $rec + 1, $base_url ] );
return;
}
}
else {
_log( LOG_INFO,
"response wasn't text request from: %s, response is: %s",
$heap->{remote_ip}, $response_type );
}

_log( LOG_INFO, "redirect from %s to %s for request from: %s",
$base_url, $http_request->uri, $heap->{remote_ip} );

# Write to cache
redis_cache_url( $base_url, $http_request->uri );
my $new_response =
create_response( $http_response->code, $http_request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");
my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];

my $http_request = $_[ARG0]->[0];
my $rec = $_[ARG0]->[1][0];
my $http_response = $_[ARG1]->[0];
my $base_url = $_[ARG0]->[1][1];
$saved_swf_url = "";

if ( $rec == 0 ) {
$base_url = $http_request->uri;
}
else {

# Check cache for each url
my $redirect = redis_check_url( $http_request->uri );
if ($redirect) {
_log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s",
$http_response->base, $redirect, $heap->{remote_ip} );
my $new_response = create_response( 200, $redirect );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
return;
}
}

if ($do_reopen_log) {
$do_reopen_log = 0;
reopen_log();
}

if ( $rec > $cfg{max_rec} ) {
_log( LOG_INFO, "Max recursion exceeded: %d from %s to %s for request from: %s",
$rec, $base_url, $http_request->uri, $heap->{remote_ip} );

# Write to cache
redis_cache_url( $base_url, $http_request->uri );
my $new_response = create_response( 200, $http_request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");
return;
}

# Detect HTTP redirects
if ( $http_response->is_redirect ) {
my $redirect = $http_response->header('Location');
if ($redirect) {
if ( $redirect =~ /^https?:\/\// ) {
_log( LOG_INFO, "HTML redirect from %s to %s for request from: %s",
$http_response->base, $redirect, $heap->{remote_ip} );
my $request = HTTP::Request->new( 'GET', $redirect );
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
return;
}
else {
_log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s",
$http_request->uri, $redirect, $heap->{remote_ip} );
my $new_response = create_response( 200, $http_request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");
return;
}
}
}
elsif ( $http_response->code != 200 ) {
_log( LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri );
my $new_response;
if ( $rec == 0 ) {
$new_response = create_response( $http_response->code );
}
else {
redis_cache_url( $base_url, $http_request->uri );
$new_response = create_response( 200, $http_request->uri );
}

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");
return;
}
my $response_type = $http_response->content_type();
if ( $response_type =~ /^text/i ) {
my $content = $http_response->decoded_content();
my $p = HTML::HeadParser->new($http_response);
$p->parse($content);
my $expire = $http_response->header('Refresh');
if ( $http_response->is_redirect || $expire ) {
my $redirect;
if ($expire) {
$expire =~ /URL=(\S+)/;
$redirect = $1;
}
else {
$redirect = $http_response->header('Location');
}
if ($redirect) {
if ( $redirect =~ /^https?:\/\// ) {
_log( LOG_INFO, "HTML redirect from %s to %s for request from: %s",
$http_response->base, $redirect, $heap->{remote_ip} );
my $request = HTTP::Request->new( 'GET', $redirect );
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
return;
}
else {
_log( LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s",
$http_response->base, $redirect, $heap->{remote_ip} );
}
}
}
if ( $content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im ) {
my $redir = uri_unescape($1);
_log( LOG_INFO, "js redirect from %s to %s for request from: %s",
$http_response->base, $1, $heap->{remote_ip} );
my $request = HTTP::Request->new( 'GET', $redir );
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
return;
}
}
elsif (
$with_swf
&& (
$response_type eq 'application/x-shockwave-flash'
|| ( $http_request->uri =~ /\.swf(\?.*)?$/i
&& $http_response->code == 200 )
)
)
{
my $content = $http_response->decoded_content();
$swf_parser->parse($content);

if ( $saved_swf_url ne "" ) {
_log( LOG_INFO, "flash redirect from %s to %s for request from: %s",
$http_response->base, $saved_swf_url, $heap->{remote_ip} );
my $request = HTTP::Request->new( 'GET', $saved_swf_url );

# Reset swf redirect global variable
$saved_swf_url = "";
$request->header( "Connection", "close" );
$request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $request, [ $rec + 1, $base_url ] );
return;
}
}
else {
_log( LOG_INFO, "response wasn't text request from: %s, response is: %s", $heap->{remote_ip}, $response_type );
}

_log( LOG_INFO, "redirect from %s to %s for request from: %s", $base_url, $http_request->uri, $heap->{remote_ip} );

# Write to cache
redis_cache_url( $base_url, $http_request->uri );
my $new_response = create_response( $http_response->code, $http_request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};

# Shut down the client's connection when the response is sent.
$kernel->yield("shutdown");

}

sub process_input {
my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ];

if ( $request->isa("HTTP::Response") ) {
$heap->{client}->put($request);
$kernel->yield("shutdown");
return;
}

my $domain;
if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) {
my @parts = split( /\./, $1 );
my $c1 = pop @parts;
my $c2 = pop @parts;
$domain = "$c2.$c1";
}

if (
(
defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp}
)
|| ( defined( $cfg{check_domains} )
&& scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 )
)
{
my $new_response = create_response( 200, $request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};
$kernel->yield("shutdown");

# Shut down the client's connection when the response is sent.
return;
}

# Check cache first
my $redirect = redis_check_url( $request->uri );
if ($redirect) {
_log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s",
$request->uri, $redirect, $heap->{remote_ip} );
my $new_response = create_response( 200, $redirect );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};
$kernel->yield("shutdown");

# Shut down the client's connection when the response is sent.
return;
}

# Start http request
my $new_request = HTTP::Request->new( 'GET', $request->uri );
$new_request->header( "Connection", "close" );
$new_request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] );
my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ];

if ( $request->isa("HTTP::Response") ) {
$heap->{client}->put($request);
$kernel->yield("shutdown");
return;
}

my $domain;
if ( $request->uri =~ /^http:\/\/([^\/]+)\// ) {
my @parts = split( /\./, $1 );
my $c1 = pop @parts;
my $c2 = pop @parts;
$domain = "$c2.$c1";
}

if (
( defined( $cfg{check_regexp} ) && $request->uri !~ $cfg{check_regexp} )
|| ( defined( $cfg{check_domains} )
&& scalar( grep { $_ eq $domain } @{ $cfg{check_domains} } ) == 0 )
)
{
my $new_response = create_response( 200, $request->uri );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};
$kernel->yield("shutdown");

# Shut down the client's connection when the response is sent.
return;
}

# Check cache first
my $redirect = redis_check_url( $request->uri );
if ($redirect) {
_log( LOG_INFO, "Memcached redirect from %s to %s for request from: %s",
$request->uri, $redirect, $heap->{remote_ip} );
my $new_response = create_response( 200, $redirect );

# Avoid sending the response if the client has gone away.
$heap->{client}->put($new_response) if defined $heap->{client};
$kernel->yield("shutdown");

# Shut down the client's connection when the response is sent.
return;
}

# Start http request
my $new_request = HTTP::Request->new( 'GET', $request->uri );
$new_request->header( "Connection", "close" );
$new_request->header( "Proxy-Connection", "close" );
$kernel->post( "cl", "request", "got_response", $new_request, [ 0, "" ] );
}

sub sig_DIE {
my ( $sig, $ex ) = @_[ ARG0, ARG1 ];
_log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" );
$poe_kernel->sig_handled();
# Send the signal to session that sent the original event.
if ( $ex->{source_session} ne $_[SESSION] ) {
$poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex );
}
my ( $sig, $ex ) = @_[ ARG0, ARG1 ];
_log( LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}" );
$poe_kernel->sig_handled();
# Send the signal to session that sent the original event.
if ( $ex->{source_session} ne $_[SESSION] ) {
$poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex );
}
}

sub sig_CLD {
my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ];
return 0;
my ( $heap, $child_pid ) = @_[ HEAP, ARG1 ];
return 0;
}

############################### Main code fragment ##################################

# Do daemonization
if ( !$DEBUG ) {
Proc::Daemon::Init;
POE::Kernel->has_forked;
setlogsock('unix');
openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} );
Proc::Daemon::Init;
POE::Kernel->has_forked;
setlogsock('unix');
openlog( 'redirector', 'ndelay,pid', $cfg{'facility'} );
}

# Try to eval config file
if ( -f $cfg{cfg_file} ) {
my $config = read_file( $cfg{cfg_file} );
eval $config;
my $config = read_file( $cfg{cfg_file} );
eval $config;
}

_log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" )
@@ -528,18 +502,18 @@ _log( LOG_ALERT, "Process is already started, check $cfg{pidfile}" )

# Drop privilleges
if ( $> == 0 ) {
my $uid = getpwnam( $cfg{user} )
or _log( LOG_ALERT, "user $cfg{user} unknown" );
my $gid = getgrnam( $cfg{group} )
or _log( LOG_ALERT, "group $cfg{group} unknown" );
$< = $> = $uid;
$) = $( = $gid;
my $uid = getpwnam( $cfg{user} )
or _log( LOG_ALERT, "user $cfg{user} unknown" );
my $gid = getgrnam( $cfg{group} )
or _log( LOG_ALERT, "group $cfg{group} unknown" );
$< = $> = $uid;
$) = $( = $gid;
}

if ( !$DEBUG ) {
_log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" )
if !open( PID, "> $cfg{pidfile}" );
close(PID);
_log( LOG_ALERT, "Cannot write to pidfile $cfg{pidfile}" )
if !open( PID, "> $cfg{pidfile}" );
close(PID);
}

# Reopen log on SIGUSR1
@@ -551,40 +525,40 @@ $SIG{QUIT} = sub { $poe_kernel->stop(); };
$SIG{PIPE} = 'IGNORE';

if ( !$DEBUG ) {
Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ )
or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" );
Proc::PidUtil::make_pidfile( $cfg{pidfile}, $$ )
or _log( LOG_ALERT, "Cannot write pidfile $cfg{pidfile}" );
}

# Init redis connection
_log( LOG_INFO, "Starting redis connection" );
$redis_conn = Redis::Fast->new(
server => $cfg{redis_server},
reconnect => 60,
every => 500_000,
encoding => undef,
server => $cfg{redis_server},
reconnect => 60,
every => 500_000,
encoding => undef,
);

# POE part
POE::Component::Client::HTTP->spawn(
Alias => 'cl',
MaxSize => $cfg{max_size}, # Remove for unlimited page sizes
Timeout => $cfg{http_timeout},
ConnectionManager => POE::Component::Client::Keepalive->new(
max_per_host => 256,
max_open => 1024,
keep_alive => 1,
timeout => $cfg{http_timeout},
),
Alias => 'cl',
MaxSize => $cfg{max_size}, # Remove for unlimited page sizes
Timeout => $cfg{http_timeout},
ConnectionManager => POE::Component::Client::Keepalive->new(
max_per_host => 256,
max_open => 1024,
keep_alive => 1,
timeout => $cfg{http_timeout},
),
);

_log( LOG_INFO, "Starting HTTP server" );
POE::Component::Server::TCP->new(
Alias => "",
Port => $cfg{port},
ClientFilter => 'POE::Filter::HTTPD',
Alias => "",
Port => $cfg{port},
ClientFilter => 'POE::Filter::HTTPD',

ClientInput => \&process_input,
InlineStates => { got_response => \&process_client, },
ClientInput => \&process_input,
InlineStates => { got_response => \&process_client, },
);

swf_init_parser();
@@ -597,6 +571,6 @@ exit 0;
############################## Final block ####################################

END {
_log( LOG_NOTICE, 'redirector stopped' );
closelog();
_log( LOG_NOTICE, 'redirector stopped' );
closelog();
}

Loading…
Cancel
Save