diff options
author | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-03-03 18:56:10 +0300 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@rambler-co.ru> | 2009-03-03 18:56:10 +0300 |
commit | 89569341997df14df1f3fdfe2bbbb5bb6fa0e7a9 (patch) | |
tree | 9914bd27e698e1b0884070500013a0dc9b88c449 | |
parent | 5424cc32d47c5e98b9edfb2b342d9d98d97a1175 (diff) | |
download | rspamd-89569341997df14df1f3fdfe2bbbb5bb6fa0e7a9.tar.gz rspamd-89569341997df14df1f3fdfe2bbbb5bb6fa0e7a9.zip |
* Add ability to fork multiply lmtp workers
* Add ability to drop privilleges of rspamd processes
* Add ability to install redirector with rspamd
* Add FreeBSD start scripts for rspamd and redirector
* Remove command line parsing from util.c as it has specific usage only in main process
-rw-r--r-- | CMakeLists.txt | 34 | ||||
-rw-r--r-- | config.h.in | 12 | ||||
-rwxr-xr-x | freebsd/redirector.sh.in | 32 | ||||
-rwxr-xr-x | freebsd/rspamd.sh.in | 35 | ||||
-rw-r--r-- | rspamd.conf.sample | 2 | ||||
-rw-r--r-- | src/cfg_file.h | 3 | ||||
-rw-r--r-- | src/cfg_file.y | 6 | ||||
-rw-r--r-- | src/main.c | 128 | ||||
-rw-r--r-- | src/util.c | 33 | ||||
-rw-r--r-- | src/util.h | 2 | ||||
-rwxr-xr-x | utils/redirector.pl.in | 447 |
11 files changed, 677 insertions, 57 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 060541513..19172f15d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,12 +11,15 @@ SET(RSPAMD_VERSION_PATCH 0) SET(RSPAMD_VERSION "${RSPAMD_VERSION_MAJOR}.${RSPAMD_VERSION_MINOR}.${RSPAMD_VERSION_PATCH}") SET(RSPAMD_MASTER_SITE_URL "http://cebka.pp.ru/hg/rspamd") +SET(RSPAMD_USER "nobody") +SET(RSPAMD_GROUP "nobody") CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) OPTION(DEBUG_MODE "Enable debug output [default: ON]" ON) OPTION(ENABLE_OPTIMIZATION "Enable optimization [default: OFF]" OFF) OPTION(SKIP_RELINK_RPATH "Skip relinking and full RPATH for the install tree" OFF) +OPTION(ENABLE_REDIRECTOR "Enable redirector install [default: OFF]" OFF) # Build optimized code for following CPU (default i386) #SET(CPU_TUNE "i686") @@ -152,6 +155,8 @@ CHECK_INCLUDE_FILES(arpa/inet.h HAVE_ARPA_INET_H) CHECK_INCLUDE_FILES(netdb.h HAVE_NETDB_H) CHECK_INCLUDE_FILES(syslog.h HAVE_SYSLOG_H) CHECK_INCLUDE_FILES(libgen.h HAVE_LIBGEN_H) +CHECK_INCLUDE_FILES(pwd.h HAVE_PWD_H) +CHECK_INCLUDE_FILES(grp.h HAVE_GRP_H) IF(HAVE_SYS_WAIT_H) LIST(APPEND CMAKE_REQUIRED_INCLUDES sys/wait.h) @@ -202,6 +207,20 @@ ENDIF (MD5_INCLUDE) SET(CMAKE_C_WARN_FLAGS " -Wall -W -Wpointer-arith -Wno-unused-parameter -Wno-unused-function -Wunused-variable -Wno-sign-compare -Wunused-value") +# Platform specific routines +IF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") + CONFIGURE_FILE(freebsd/rspamd.sh.in freebsd/rspamd.sh @ONLY) + + IF(ENABLE_REDIRECTOR MATCHES "ON") + CONFIGURE_FILE(freebsd/redirector.sh.in freebsd/redirector.sh @ONLY) + ENDIF(ENABLE_REDIRECTOR MATCHES "ON") + +ENDIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") + +IF(ENABLE_REDIRECTOR MATCHES "ON") + CONFIGURE_FILE(utils/redirector.pl.in utils/redirector.pl @ONLY) +ENDIF(ENABLE_REDIRECTOR MATCHES "ON") + IF(DEBUG_MODE MATCHES "ON") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ggdb ") ENDIF(DEBUG_MODE MATCHES "ON") @@ -299,7 +318,7 @@ IF(PERL_DYNALOADER) TARGET_LINK_LIBRARIES(rspamd dynaloader) ENDIF(PERL_DYNALOADER) TARGET_LINK_LIBRARIES(rspamd ${GMIME2_LIBRARIES}) -ADD_DEPENDENCIES(rspamd perlxs) +ADD_DEPENDENCIES(rspamd perlxs ${LEX_OUTPUT} ${YACC_OUTPUT}) ADD_EXECUTABLE(test/rspamd-test ${TESTDEPENDS} ${CONTRIBSRC} ${TESTSRC}) SET_TARGET_PROPERTIES(test/rspamd-test PROPERTIES LINKER_LANGUAGE C) @@ -319,3 +338,16 @@ TARGET_LINK_LIBRARIES(utils/url-extracter ${GMIME2_LIBRARIES}) INSTALL(PROGRAMS rspamd-${RSPAMD_VERSION} DESTINATION bin RENAME rspamd) INSTALL(PROGRAMS rspamc.pl DESTINATION bin RENAME rspamc) INSTALL(CODE "EXECUTE_PROCESS(COMMAND make install WORKING_DIRECTORY perl)") + +IF(ENABLE_REDIRECTOR MATCHES "ON") + INSTALL(PROGRAMS utils/redirector.pl DESTINATION bin RENAME rspamd-redirector) +ENDIF(ENABLE_REDIRECTOR MATCHES "ON") + +IF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") + INSTALL(PROGRAMS freebsd/rspamd.sh DESTINATION etc/rc.d) + IF(ENABLE_REDIRECTOR MATCHES "ON") + INSTALL(PROGRAMS freebsd/redirector.sh DESTINATION etc/rc.d) + ENDIF(ENABLE_REDIRECTOR MATCHES "ON") + INSTALL(CODE "EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E make_directory /var/run/rspamd/)") + INSTALL(CODE "EXECUTE_PROCESS(COMMAND chown ${RSPAMD_USER}:${RSPAMD_GROUP} /var/run/rspamd/)") +ENDIF(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") diff --git a/config.h.in b/config.h.in index a1b3d932a..97da9c381 100644 --- a/config.h.in +++ b/config.h.in @@ -39,6 +39,9 @@ #cmakedefine HAVE_LIBGEN_H 1 +#cmakedefine HAVE_GRP_H 1 +#cmakedefine HAVE_PWD_H 1 + #cmakedefine HAVE_ENDIAN_H 1 #cmakedefine HAVE_SYS_ENDIAN_H 1 #cmakedefine HAVE_MACHINE_ENDIAN_H 1 @@ -207,6 +210,15 @@ #include <libutil.h> #endif +/* pwd and grp */ +#ifdef HAVE_PWD_H +#include <pwd.h> +#endif + +#ifdef HAVE_GRP_H +#include <grp.h> +#endif + /* syslog */ #ifdef HAVE_SYSLOG_H #include <syslog.h> diff --git a/freebsd/redirector.sh.in b/freebsd/redirector.sh.in new file mode 100755 index 000000000..71f6df5ed --- /dev/null +++ b/freebsd/redirector.sh.in @@ -0,0 +1,32 @@ +#!/bin/sh +# +# $Id$ +# +# PROVIDE: rspamd_redirector +# REQUIRE: LOGIN +# KEYWORD: shutdown + +# +# Add the following line to /etc/rc.conf to enable countd: +# rspamd-redirector (bool): Set to "NO" by default. +# Set it to "YES" to enable redirector. + +. /etc/rc.subr + +name="rspamd_redirector" +rcvar=`set_rcvar` +procname="@CMAKE_INSTALL_PREFIX@/bin/rspamd-redirector" + +load_rc_config $name + +: ${rspamd_redirector_enable="NO"} +: ${rspamd_redirector_pidfile="/var/run/rspamd/redirector.pid"} + +stop_postcmd="rm -f $rspamd_redirector_pidfile" + +extra_commands="reload" +sig_reload="USR1" + +command="$procname" + +run_rc_command "$1" diff --git a/freebsd/rspamd.sh.in b/freebsd/rspamd.sh.in new file mode 100755 index 000000000..c19fd3e94 --- /dev/null +++ b/freebsd/rspamd.sh.in @@ -0,0 +1,35 @@ +#!/bin/sh +# +# $Id$ +# +# PROVIDE: rspamd +# REQUIRE: LOGIN +# KEYWORD: shutdown + +# +# Add the following line to /etc/rc.conf to enable countd: +# rspamd (bool): Set to "NO" by default. +# Set it to "YES" to enable rspamd. + +. /etc/rc.subr + +name="rspamd" +rcvar=`set_rcvar` +procname="@CMAKE_INSTALL_PREFIX@/bin/rspamd" + +load_rc_config $name + +: ${rspamd_enable="NO"} +: ${rspamd_pidfile="/var/run/rspamd/rspamd.pid"} +: ${rspamd_user="@RSPAMD_USER@"} +: ${rspamd_group="@RSPAMD_GROUP@"} + +stop_postcmd="rm -f $rspamd_pidfile" + +extra_commands="reload" +sig_reload="USR1" + +command="$procname" +command_args="-u ${rspamd_user} -g ${rspamd_group} -c @CMAKE_INSTALL_PREFIX@/etc/rspamd.conf" + +run_rc_command "$1" diff --git a/rspamd.conf.sample b/rspamd.conf.sample index bac7dc922..14744c771 100644 --- a/rspamd.conf.sample +++ b/rspamd.conf.sample @@ -84,6 +84,8 @@ lmtp { # Metric that is considered as main. If we have spam result on # this metric, lmtp delivery would be failed metric = "default"; + # Number of lmtp workers + workers = 1; }; delivery { diff --git a/src/cfg_file.h b/src/cfg_file.h index 78ad6e93a..75c3a8c2f 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -136,6 +136,8 @@ struct config_scalar { * Structure that stores all config data */ struct config_file { + char *rspamd_user; /**< user to run as */ + char *rspamd_group; /**< group to run as */ memory_pool_t *cfg_pool; /**< memory pool for config */ char *cfg_name; /**< name of config file */ char *pid_file; /**< name of pid file */ @@ -156,6 +158,7 @@ struct config_file { gboolean no_fork; /**< if 1 do not call daemon() */ gboolean config_test; /**< if TRUE do only config file test */ unsigned int workers_number; /**< number of workers */ + unsigned int lmtp_workers_number; /**< number of lmtp workers */ enum rspamd_log_type log_type; /**< log type */ int log_facility; /**< log facility in case of syslog */ diff --git a/src/cfg_file.y b/src/cfg_file.y index 7eee8516c..dd48520f2 100644 --- a/src/cfg_file.y +++ b/src/cfg_file.y @@ -676,6 +676,7 @@ lmtpcmd: lmtpenabled | lmtpsock | lmtpmetric + | lmtpworkers ; lmtpenabled: @@ -698,6 +699,11 @@ lmtpmetric: cfg->lmtp_metric = memory_pool_strdup (cfg->cfg_pool, $3); } ; +lmtpworkers: + WORKERS EQSIGN NUMBER { + cfg->lmtp_workers_number = $3; + } + ; delivery: DELIVERY OBRACE deliverybody EBRACE diff --git a/src/main.c b/src/main.c index 1e9ba6788..f0eca39d3 100644 --- a/src/main.c +++ b/src/main.c @@ -77,6 +77,84 @@ void sig_handler (int signo) } } +static void +read_cmd_line (int argc, char **argv, struct config_file *cfg) +{ + int ch; + while ((ch = getopt(argc, argv, "thfc:u:g:")) != -1) { + switch (ch) { + case 'f': + cfg->no_fork = 1; + break; + case 'c': + if (optarg && cfg->cfg_name) { + cfg->cfg_name = memory_pool_strdup (cfg->cfg_pool, optarg); + } + break; + case 't': + cfg->config_test = 1; + break; + case 'u': + if (optarg) { + cfg->rspamd_user = memory_pool_strdup (cfg->cfg_pool, optarg); + } + break; + case 'g': + if (optarg) { + cfg->rspamd_group = memory_pool_strdup (cfg->cfg_pool, optarg); + } + break; + case 'h': + case '?': + default: + /* Show help message and exit */ + printf ("Rspamd version " RVERSION "\n" + "Usage: rspamd [-t] [-h] [-n] [-f] [-c config_file]\n" + "-h: This help message\n" + "-t: Do config test and exit\n" + "-f: Do not daemonize main process\n" + "-c: Specify config file (./rspamd.conf is used by default)\n" + "-u: User to run rspamd as\n" + "-g: Group to run rspamd as\n"); + exit (0); + break; + } + } +} + +static void +drop_priv (struct config_file *cfg) +{ + struct passwd *pwd; + struct group *grp; + + if (geteuid () == 0 && cfg->rspamd_user) { + pwd = getpwnam (cfg->rspamd_user); + if (pwd == NULL) { + msg_err ("drop_priv: user specified does not exists (%s), aborting", strerror (errno)); + exit (-errno); + } + if (cfg->rspamd_group) { + grp = getgrnam (cfg->rspamd_group); + if (grp == NULL) { + msg_err ("drop_priv: group specified does not exists (%s), aborting", strerror (errno)); + exit (-errno); + } + if (setgid (grp->gr_gid) == -1) { + msg_err ("drop_priv: cannot setgid to %d (%s), aborting", (int)grp->gr_gid, strerror (errno)); + exit (-errno); + } + if (initgroups(cfg->rspamd_user, grp->gr_gid) == -1) { + msg_err ("drop_priv: initgroups failed (%s), aborting", strerror (errno)); + exit (-errno); + } + } + if (setuid (pwd->pw_uid) == -1) { + msg_err ("drop_priv: cannot setuid to %d (%s), aborting", (int)pwd->pw_uid, strerror (errno)); + exit (-errno); + } + } +} static void config_logger (struct rspamd_main *rspamd, gboolean is_fatal) @@ -321,11 +399,6 @@ main (int argc, char **argv, char **env) exit (-errno); } - if (write_pid (rspamd) == -1) { - msg_err ("main: cannot write pid file %s", rspamd->cfg->pid_file); - exit (-errno); - } - /* Init C modules */ for (i = 0; i < MODULES_NUM; i ++) { cur_module = memory_pool_alloc (rspamd->cfg->cfg_pool, sizeof (struct module_ctx)); @@ -338,21 +411,8 @@ main (int argc, char **argv, char **env) rspamd->type = TYPE_MAIN; init_signals (&signals, sig_handler); - /* Init perl interpreter */ - dTHXa (perl_interpreter); - PERL_SYS_INIT3 (&argc, &argv, &env); - perl_interpreter = perl_alloc (); - if (perl_interpreter == NULL) { - msg_err ("main: cannot allocate perl interpreter, %s", strerror (errno)); - exit (-errno); - } - - PERL_SET_CONTEXT (perl_interpreter); - perl_construct (perl_interpreter); - perl_parse (perl_interpreter, xs_init, 3, args, NULL); - /* Block signals to use sigsuspend in future */ - sigprocmask(SIG_BLOCK, &signals.sa_mask, NULL); + /* Create listen socket */ if (rspamd->cfg->bind_family == AF_INET) { if ((listen_sock = make_tcp_socket (&rspamd->cfg->bind_addr, rspamd->cfg->bind_port, TRUE)) == -1) { msg_err ("main: cannot create tcp listen socket. %s", strerror (errno)); @@ -371,6 +431,30 @@ main (int argc, char **argv, char **env) msg_err ("main: cannot listen on socket. %s", strerror (errno)); exit(-errno); } + + /* Drop privilleges */ + drop_priv (cfg); + + if (write_pid (rspamd) == -1) { + msg_err ("main: cannot write pid file %s", rspamd->cfg->pid_file); + exit (-errno); + } + + /* Init perl interpreter */ + dTHXa (perl_interpreter); + PERL_SYS_INIT3 (&argc, &argv, &env); + perl_interpreter = perl_alloc (); + if (perl_interpreter == NULL) { + msg_err ("main: cannot allocate perl interpreter, %s", strerror (errno)); + exit (-errno); + } + + PERL_SET_CONTEXT (perl_interpreter); + perl_construct (perl_interpreter); + perl_parse (perl_interpreter, xs_init, 3, args, NULL); + /* Block signals to use sigsuspend in future */ + sigprocmask(SIG_BLOCK, &signals.sa_mask, NULL); + TAILQ_INIT (&rspamd->workers); @@ -389,7 +473,9 @@ main (int argc, char **argv, char **env) /* Start lmtp if enabled */ if (cfg->lmtp_enable) { - fork_worker (rspamd, listen_sock, 0, TYPE_LMTP); + for (i = 0; i < cfg->lmtp_workers_number; i++) { + fork_worker (rspamd, listen_sock, 0, TYPE_LMTP); + } } /* Signal processing cycle */ @@ -443,7 +529,7 @@ main (int argc, char **argv, char **env) /* Start new worker that would reread configuration*/ active_worker = fork_worker (rspamd, listen_sock, 1, TYPE_WORKER); } - /* Do not start new workers untill active worker is not ready for accept */ + /* Do not start new workers until active worker is not ready for accept */ } if (child_ready) { child_ready = 0; diff --git a/src/util.c b/src/util.c index 6a107436a..589d5b5d9 100644 --- a/src/util.c +++ b/src/util.c @@ -203,39 +203,6 @@ make_unix_socket (const char *path, struct sockaddr_un *addr, gboolean is_server return (-1); } -void -read_cmd_line (int argc, char **argv, struct config_file *cfg) -{ - int ch; - while ((ch = getopt(argc, argv, "thfc:")) != -1) { - switch (ch) { - case 'f': - cfg->no_fork = 1; - break; - case 'c': - if (optarg && cfg->cfg_name) { - cfg->cfg_name = memory_pool_strdup (cfg->cfg_pool, optarg); - } - break; - case 't': - cfg->config_test = 1; - break; - case 'h': - case '?': - default: - /* Show help message and exit */ - printf ("Rspamd version " RVERSION "\n" - "Usage: rspamd [-t] [-h] [-n] [-f] [-c config_file]\n" - "-h: This help message\n" - "-t: Do config test and exit\n" - "-f: Do not daemonize main process\n" - "-c: Specify config file (./rspamd.conf is used by default)\n"); - exit (0); - break; - } - } -} - int write_pid (struct rspamd_main *main) { diff --git a/src/util.h b/src/util.h index f0764b960..7267a719b 100644 --- a/src/util.h +++ b/src/util.h @@ -12,8 +12,6 @@ int make_tcp_socket (struct in_addr *, u_short, gboolean is_server); int accept_from_socket (int listen_sock, struct sockaddr *addr, socklen_t *len); /* Create and bind or connect unix socket */ int make_unix_socket (const char *, struct sockaddr_un *, gboolean is_server); -/* Parse command line arguments using getopt (3) */ -void read_cmd_line (int , char **, struct config_file *); /* Write pid to file */ int write_pid (struct rspamd_main *); /* Make specified socket non-blocking */ diff --git a/utils/redirector.pl.in b/utils/redirector.pl.in new file mode 100755 index 000000000..cb74b0222 --- /dev/null +++ b/utils/redirector.pl.in @@ -0,0 +1,447 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +# Required ports: +# www/p5-POE-Component-Client-HTTP +# www/p5-POE-Component-Server-HTTP +# dns/p5-POE-Component-Client-DNS +# databases/p5-Cache-Memcached-Fast +# devel/p5-Proc-Daemon +# sysutils/p5-Proc-PidUtil +# security/p5-Digest-SHA256 + +# POE::Component::Client::HTTP uses HTTP::Request and response +# objects. + +use POSIX qw(strftime); +use HTTP::Request::Common qw(GET POST); + +use POE qw(Component::Server::TCP Filter::HTTPD Component::Client::HTTP); # p5-POE-Component-Client-HTTP +use HTTP::Response; +use HTML::HeadParser; +use SWF::Element; # p5-SWF-File +use Cache::Memcached::Fast; +use Digest::SHA256; + +use HTML::HeadParser; +use Proc::Daemon; +use Proc::PidUtil; +use URI::Escape qw(uri_unescape); + +my $swf_parser; +my $saved_swf_url = ""; + +my %cfg = ( + port => 8080, + max_size => 102400, + http_timeout => 5, + max_rec => 5, + pidfile => '/var/run/rspamd/redirector.pid', + logfile => '/var/log/rspamd-redirector.log', + do_log => 1, + debug => 0, + digest_bits => 256, + cache_expire => 3600, + user => '@RSPAMD_USER@', + group => '@RSPAMD_GROUP@', +); + +our $do_reopen_log = 0; + +die "Process is already started, check $cfg{pidfile}" if Proc::PidUtil::is_running($cfg{pidfile}); + +die "Cannot write to pidfile $cfg{pidfile}" if ! open(PID, "> $cfg{pidfile}"); +close(PID); + +$cfg{do_log} = 0 if ! open(LOG, ">> $cfg{logfile}"); + +# Do daemonization +Proc::Daemon::Init if !$cfg{debug}; + +my $uid = getpwnam($cfg{user}) or die "user $cfg{user} unknown"; +my $gid = getgrnam($cfg{group}) or die "group $cfg{group} unknown"; +setpgrp ($uid, $gid) or die "cannot drop privilleges"; + +Proc::PidUtil::make_pidfile($cfg{pidfile}, $$) or die "Cannot write pidfile $cfg{pidfile}"; + +# Init memcached connection +my $memd = new Cache::Memcached::Fast({ + servers => [ { address => 'localhost:11211', weight => 2.5 }, + ], + connect_timeout => 0.2, + io_timeout => 0.5, + max_failures => 3, + failure_timeout => 2, + ketama_points => 150, + hash_namespace => 1, + serialize_methods => [ \&Storable::freeze, \&Storable::thaw ], + utf8 => ($^V ge v5.8.1 ? 1 : 0), +}); + +# Reopen log on SIGUSR1 +$SIG{USR1} = sub { $do_reopen_log = 1; }; +$SIG{INT} = sub { $poe_kernel->stop(); }; +$SIG{QUIT} = sub { $poe_kernel->stop(); }; + +write_log ("", "Starting URL resolver"); + +# POE part +POE::Component::Client::HTTP->spawn( + Alias => 'cl', + MaxSize => $cfg{max_size}, # Remove for unlimited page sizes + Timeout => $cfg{http_timeout}, + ConnectionManager => POE::Component::Client::Keepalive->new( + max_per_host => 256, + max_open => 1024, + keep_alive => 1, + timeout => $cfg{http_timeout}, + ), +); + +sub reopen_log { + if ($cfg{do_log}) { + close (LOG); + $cfg{do_log} = 0 if ! open (LOG, ">> $cfg{logfile}"); + write_log ("", "Log reopened"); + } +} + +# Write log line: +# $remote_ip - remote ip string +# $str - string to write +sub write_log { + my ( $remote_ip, $str ) = @_; + + if ($cfg{do_log}) { + my $now_string = strftime "%F %T", localtime; + LOG->autoflush(1); + print LOG "[$now_string]: $remote_ip: $str\n"; + } +} + +sub swf_init_parser { + $swf_parser = SWF::Parser->new('tag-callback' => \&swf_tag_callback); +} + +# Checking for SWF url +sub swf_search_get_url { + my $actions = shift; + my $saved_pool_str = ""; + + for my $action (@$actions) { + if ($action->tag_name eq 'ActionConstantPool') { + my $pool = $action->ConstantPool; + for my $string (@$pool) { + if ($string =~ /^https?:\/\//) { + $saved_pool_str = $string->value; + } + } + } + elsif ($action->tag_name eq 'ActionGetURL2') { + if ($saved_pool_str ne "") { + $saved_swf_url = $saved_pool_str; + } + } + elsif ($action->tag_name =~ 'ActionGetURL') { + $saved_swf_url = $action->UrlString->value; + } + } +} + +# SWF check tag utility +sub swf_check_tag { + my ($t, $stream) = @_; + my ($tagname) = $t->tag_name; + + for ($tagname) { + (/^Do(Init)?Action$/ or /^DefineButton$/) and do { + swf_search_get_url ($t->Actions); + last; + }; + /^PlaceObject2$/ and do { + for my $ca (@{$t->ClipActions}) { + swf_search_get_url ($ca->Actions); + } + last; + }; + /^DefineButton2$/ and do { + for my $ba (@{$t->Actions}) { + swf_search_get_url ($ba->Actions); + } + last; + }; + /^DefineSprite$/ and do { + for my $tag (@{$t->ControlTags}) { + swf_search_get_url ($tag, $stream); + } + last; + }; + } +} + +# Callback for swf parser +sub swf_tag_callback { + my ($self, $tag, $length, $stream)=@_; + my $t = SWF::Element::Tag->new (Tag=>$tag, Length=>$length); + my ($tagname) = $t->tag_name; + + return unless + $tagname eq 'DoAction' or + $tagname eq 'DoInitAction' or + $tagname eq 'PlaceObject2' or + $tagname eq 'DefineButton' or + $tagname eq 'DefineButton2' or + $tagname eq 'DefineSprite'; + + if ($tagname eq 'DefineSprite') { + + # Tags in the sprite are not unpacked here. + + $t->shallow_unpack ($stream); + $t->TagStream->parse (callback => \&swf_tag_callback); + return; + + } elsif ($tagname eq 'PlaceObject2') { + + # Most of PlaceObject2 tags don't have ClipActions. + + $t->lookahead_Flags ($stream); + return unless $t->PlaceFlagHasClipActions; + } + + # unpack the tag and search actions. + + $t->unpack ($stream); + swf_check_tag ($t); +} + +# Check url from memcached cache first +sub memcached_check_url { + my ( $url ) = @_; + + my $context = Digest::SHA256::new($cfg{digest_bits}); + + if ($cfg{debug}) { + write_log ("127.0.0.1", "Check key '". unpack("H*", ($context->hash($url))) . "'"); + } + + return $memd->get(unpack("H*", ($context->hash($url)))); +} + +# Write url to memcached key +sub memcached_cache_url { + my ( $url, $url_real ) = @_; + + my $context = Digest::SHA256::new($cfg{digest_bits}); + + if ($cfg{debug}) { + write_log ("127.0.0.1", "Cache key '". unpack("H*", ($context->hash($url))) . "' with value '$url_real'"); + } + + $memd->set(unpack("H*", ($context->hash($url))), $url_real, $cfg{cache_expire}); +} + +# POE http client callback +sub process_client { + my ( $kernel, $heap ) = @_[ KERNEL, HEAP ]; + + my $http_request = $_[ARG0]->[0]; + my $rec = $_[ARG0]->[1][0]; + my $http_response = $_[ARG1]->[0]; + my $base_url = $_[ARG0]->[1][1]; + $saved_swf_url = ""; + + if ($rec == 0) { + $base_url = $http_request->uri; + } + else { + # Check cache for each url + my $redirect = memcached_check_url($http_request->uri); + if ($redirect) { + write_log ($heap->{remote_ip}, "Memcached redirect from: " . $http_response->base . " to: " . $redirect); + my $new_response = HTTP::Response->new(200); + $new_response->header("Uri", $redirect); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + return; + } + } + + if ($do_reopen_log) { + $do_reopen_log = 0; + reopen_log(); + } + + if ($rec > $cfg{max_rec}) { + write_log ($heap->{remote_ip}, "Max recursion exceeded: $rec, returning '$base_url' -> '" . $http_request->uri . "'"); + # Write to cache + memcached_cache_url ($base_url, $http_request->uri); + my $new_response = HTTP::Response->new(200); + $new_response->header("Uri", $http_request->uri); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); + return; + } + + # Detect HTTP redirects + if ($http_response->is_redirect) { + my $redirect = $http_response->header('Location'); + if ($redirect) { + if ($redirect =~ /^https?:\/\//) { + write_log ($heap->{remote_ip}, "HTTP redirect from: ". $http_response->base . " to: " . $redirect); + my $request = HTTP::Request->new('GET', $redirect); + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]); + return; + } + else { + write_log ($heap->{remote_ip}, "Internal redirect, ignoring '$redirect', returning '$base_url' -> '" . $http_request->uri . "'"); + my $new_response = HTTP::Response->new(200); + $new_response->header("Uri", $http_request->uri); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); + return; + } + } + } + my $response_type = $http_response->content_type(); + if ( $response_type =~ /^text/i ) { + my $content = $http_response->decoded_content(); + my $p = HTML::HeadParser->new($http_response); + $p->parse($content); + my $expire = $http_response->header('Refresh'); + if ($http_response->is_redirect || $expire) { + my $redirect; + if ($expire) { + $expire =~ /URL=(\S+)/; + $redirect = $1; + } + else { + $redirect = $http_response->header('Location'); + } + if ($redirect) { + if ($redirect =~ /^https?:\/\//) { + write_log ($heap->{remote_ip}, "HTML redirect from:". $http_response->base . " to: " . $redirect); + my $request = HTTP::Request->new('GET', $redirect); + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]); + return; + } + else { + write_log ($heap->{remote_ip}, "Internal redirect, ignoring '$redirect'"); + } + } + } + if ($content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im) { + my $redir = uri_unescape ($1); + write_log ($heap->{remote_ip}, "JavaScript redirect from:". $http_response->base . " to: " . $1); + my $request = HTTP::Request->new('GET', $redir); + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]); + return; + } + } + elsif ( $response_type eq 'application/x-shockwave-flash' || + ($http_request->uri =~ /\.swf(\?.*)?$/i && $http_response->code == 200)) { + my $content = $http_response->decoded_content(); + $swf_parser->parse( $content ); + if ($saved_swf_url ne "") { + write_log ($heap->{remote_ip}, "Flash redirect from:". $http_response->base . " to: " . $saved_swf_url); + my $request = HTTP::Request->new('GET', $saved_swf_url); + # Reset swf redirect global variable + $saved_swf_url = ""; + $request->header( "Connection", "close" ); + $request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]); + return; + } + } + else { + write_log ($heap->{remote_ip}, "Response wasn't text"); + } + + write_log ($heap->{remote_ip}, "Returning '$base_url' -> '" . $http_request->uri . "'"); + # Write to cache + memcached_cache_url ($base_url, $http_request->uri); + my $new_response = HTTP::Response->new($http_response->code); + $new_response->header("Uri", $http_request->uri); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + + # Shut down the client's connection when the response is sent. + $kernel->yield("shutdown"); + +} + +sub process_input { + my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ]; + + if ($request->isa ("HTTP::Response")) { + $heap->{client}->put($request); + $kernel->yield("shutdown"); + return; + } + + # Check cache first + my $redirect = memcached_check_url($request->uri); + if ($redirect) { + write_log ($heap->{remote_ip}, "Memcached redirect from: " . $request->uri . " to: " . $redirect); + my $new_response = HTTP::Response->new(200); + $new_response->header("Uri", $redirect); + $new_response->header("Connection", "close"); + $new_response->header("Proxy-Connection", "close"); + + # Avoid sending the response if the client has gone away. + $heap->{client}->put($new_response) if defined $heap->{client}; + $kernel->yield("shutdown"); + + # Shut down the client's connection when the response is sent. + return; + } + # Start http request + my $new_request = HTTP::Request->new('GET', $request->uri); + $new_request->header( "Connection", "close" ); + $new_request->header( "Proxy-Connection", "close" ); + $kernel->post( "cl", "request", "got_response", $new_request, [0, ""]); +} + +POE::Component::Server::TCP->new + ( Alias => "", + Port => $cfg{port}, + ClientFilter => 'POE::Filter::HTTPD', + + ClientInput => \&process_input, + InlineStates => { got_response => \&process_client, }, +); + +swf_init_parser (); + + +# Start POE. This will run the server until it exits. +POE::Kernel->run(); +exit 0; + +END { + unlink($cfg{pidfile}); + if ($cfg{do_log}) { + write_log ("", "Stopping URL resolver"); + close (LOG); + } +} |