You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. #!/usr/bin/perl
  2. use warnings;
  3. use strict;
  4. # Required ports:
  5. # www/p5-POE-Component-Client-HTTP
  6. # www/p5-POE-Component-Server-HTTP
  7. # dns/p5-POE-Component-Client-DNS
  8. # databases/p5-Cache-Memcached-Fast
  9. # devel/p5-Proc-Daemon
  10. # sysutils/p5-Proc-PidUtil
  11. # security/p5-Digest-SHA256
  12. # POE::Component::Client::HTTP uses HTTP::Request and response
  13. # objects.
  14. use POSIX qw(strftime);
  15. use HTTP::Request::Common qw(GET POST);
  16. use POE qw(Component::Server::TCP Filter::HTTPD Component::Client::HTTP); # p5-POE-Component-Client-HTTP
  17. use HTTP::Response;
  18. use HTML::HeadParser;
  19. use SWF::Element; # p5-SWF-File
  20. use Cache::Memcached::Fast;
  21. use Digest::SHA256;
  22. use HTML::HeadParser;
  23. use Proc::Daemon;
  24. use Proc::PidUtil;
  25. use URI::Escape qw(uri_unescape);
  26. use Sys::Syslog qw/:standard :macros setlogsock/;
  27. my $swf_parser;
  28. my $saved_swf_url = "";
  29. my $DEBUG = grep { $_ eq '-debug' } @ARGV;
  30. our %cfg = (
  31. port => 8080,
  32. max_size => 102400,
  33. http_timeout => 5,
  34. max_rec => 5,
  35. pidfile => '/var/run/rspamd/redirector.pid',
  36. logfile => '/var/log/rspamd-redirector.log',
  37. do_log => 0,
  38. debug => 0,
  39. memcached_servers => [ { address => 'localhost:11211', weight => 2.5 },
  40. ],
  41. facility => LOG_LOCAL3, # syslog facility
  42. log_level => LOG_INFO,
  43. digest_bits => 256,
  44. cache_expire => 3600,
  45. user => '@RSPAMD_USER@',
  46. group => '@RSPAMD_GROUP@',
  47. cfg_file => '@CMAKE_INSTALL_PREFIX@/etc/rspamd-redirector.conf',
  48. );
  49. our $do_reopen_log = 0;
  50. our $memd;
  51. ############################################ Subs ########################################
  52. # Read file into string
  53. sub read_file {
  54. my ($file) = @_;
  55. open(IN, $file) or die "Can't open $file: $!";
  56. local $/;
  57. my $content = <IN>;
  58. close IN;
  59. return $content;
  60. }
  61. # Write log line:
  62. sub _log {
  63. my ($l,$w,@s)=@_;
  64. if ($DEBUG) {
  65. printf STDERR $w."\n", @s
  66. } else {
  67. syslog ($l, $w."\n", @s) if ($l <= $cfg{'log_level'})
  68. }
  69. }
  70. # Init swf parser
  71. sub swf_init_parser {
  72. $swf_parser = SWF::Parser->new('tag-callback' => \&swf_tag_callback);
  73. }
  74. # Checking for SWF url
  75. sub swf_search_get_url {
  76. my $actions = shift;
  77. my $saved_pool_str = "";
  78. for my $action (@$actions) {
  79. if ($action->tag_name eq 'ActionConstantPool') {
  80. my $pool = $action->ConstantPool;
  81. for my $string (@$pool) {
  82. if ($string =~ /^https?:\/\//) {
  83. $saved_pool_str = $string->value;
  84. }
  85. }
  86. }
  87. elsif ($action->tag_name eq 'ActionGetURL2') {
  88. if ($saved_pool_str ne "") {
  89. $saved_swf_url = $saved_pool_str;
  90. }
  91. }
  92. elsif ($action->tag_name =~ 'ActionGetURL') {
  93. $saved_swf_url = $action->UrlString->value;
  94. }
  95. }
  96. }
  97. # SWF check tag utility
  98. sub swf_check_tag {
  99. my ($t, $stream) = @_;
  100. my ($tagname) = $t->tag_name;
  101. for ($tagname) {
  102. (/^Do(Init)?Action$/ or /^DefineButton$/) and do {
  103. swf_search_get_url ($t->Actions);
  104. last;
  105. };
  106. /^PlaceObject2$/ and do {
  107. for my $ca (@{$t->ClipActions}) {
  108. swf_search_get_url ($ca->Actions);
  109. }
  110. last;
  111. };
  112. /^DefineButton2$/ and do {
  113. for my $ba (@{$t->Actions}) {
  114. swf_search_get_url ($ba->Actions);
  115. }
  116. last;
  117. };
  118. /^DefineSprite$/ and do {
  119. for my $tag (@{$t->ControlTags}) {
  120. swf_search_get_url ($tag, $stream);
  121. }
  122. last;
  123. };
  124. }
  125. }
  126. # Callback for swf parser
  127. sub swf_tag_callback {
  128. my ($self, $tag, $length, $stream)=@_;
  129. my $t = SWF::Element::Tag->new (Tag=>$tag, Length=>$length);
  130. my ($tagname) = $t->tag_name;
  131. return unless
  132. $tagname eq 'DoAction' or
  133. $tagname eq 'DoInitAction' or
  134. $tagname eq 'PlaceObject2' or
  135. $tagname eq 'DefineButton' or
  136. $tagname eq 'DefineButton2' or
  137. $tagname eq 'DefineSprite';
  138. if ($tagname eq 'DefineSprite') {
  139. # Tags in the sprite are not unpacked here.
  140. $t->shallow_unpack ($stream);
  141. $t->TagStream->parse (callback => \&swf_tag_callback);
  142. return;
  143. } elsif ($tagname eq 'PlaceObject2') {
  144. # Most of PlaceObject2 tags don't have ClipActions.
  145. $t->lookahead_Flags ($stream);
  146. return unless $t->PlaceFlagHasClipActions;
  147. }
  148. # unpack the tag and search actions.
  149. $t->unpack ($stream);
  150. swf_check_tag ($t);
  151. }
  152. # Check url from memcached cache first
  153. sub memcached_check_url {
  154. my ( $url ) = @_;
  155. my $context = Digest::SHA256::new($cfg{digest_bits});
  156. return $memd->get(unpack("H*", ($context->hash($url))));
  157. }
  158. # Write url to memcached key
  159. sub memcached_cache_url {
  160. my ( $url, $url_real ) = @_;
  161. my $context = Digest::SHA256::new($cfg{digest_bits});
  162. $memd->set(unpack("H*", ($context->hash($url))), $url_real, $cfg{cache_expire});
  163. }
  164. # POE http client callback
  165. sub process_client {
  166. my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];
  167. my $http_request = $_[ARG0]->[0];
  168. my $rec = $_[ARG0]->[1][0];
  169. my $http_response = $_[ARG1]->[0];
  170. my $base_url = $_[ARG0]->[1][1];
  171. $saved_swf_url = "";
  172. if ($rec == 0) {
  173. $base_url = $http_request->uri;
  174. }
  175. else {
  176. # Check cache for each url
  177. my $redirect = memcached_check_url($http_request->uri);
  178. if ($redirect) {
  179. _log (LOG_INFO, "Memcached redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
  180. my $new_response = HTTP::Response->new(200);
  181. $new_response->header("Uri", $redirect);
  182. # Avoid sending the response if the client has gone away.
  183. $heap->{client}->put($new_response) if defined $heap->{client};
  184. # Shut down the client's connection when the response is sent.
  185. return;
  186. }
  187. }
  188. if ($do_reopen_log) {
  189. $do_reopen_log = 0;
  190. reopen_log();
  191. }
  192. if ($rec > $cfg{max_rec}) {
  193. _log (LOG_INFO, "Max recursion exceeded: %d from %s to %s for request from: %s", $rec, $base_url, $http_request->uri, $heap->{remote_ip});
  194. # Write to cache
  195. memcached_cache_url ($base_url, $http_request->uri);
  196. my $new_response = HTTP::Response->new(200);
  197. $new_response->header("Uri", $http_request->uri);
  198. # Avoid sending the response if the client has gone away.
  199. $heap->{client}->put($new_response) if defined $heap->{client};
  200. # Shut down the client's connection when the response is sent.
  201. $kernel->yield("shutdown");
  202. return;
  203. }
  204. # Detect HTTP redirects
  205. if ($http_response->is_redirect) {
  206. my $redirect = $http_response->header('Location');
  207. if ($redirect) {
  208. if ($redirect =~ /^https?:\/\//) {
  209. _log (LOG_INFO, "HTML redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
  210. my $request = HTTP::Request->new('GET', $redirect);
  211. $request->header( "Connection", "close" );
  212. $request->header( "Proxy-Connection", "close" );
  213. $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]);
  214. return;
  215. }
  216. else {
  217. _log (LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", $http_request->uri, $redirect, $heap->{remote_ip});
  218. my $new_response = HTTP::Response->new(200);
  219. $new_response->header("Uri", $http_request->uri);
  220. # Avoid sending the response if the client has gone away.
  221. $heap->{client}->put($new_response) if defined $heap->{client};
  222. # Shut down the client's connection when the response is sent.
  223. $kernel->yield("shutdown");
  224. return;
  225. }
  226. }
  227. }
  228. elsif ($http_response->code != 200) {
  229. _log (LOG_INFO, "HTTP response was %d, for request to %s", $http_response->code, $http_request->uri);
  230. my $new_response = HTTP::Response->new($http_response->code);
  231. # Avoid sending the response if the client has gone away.
  232. $heap->{client}->put($new_response) if defined $heap->{client};
  233. # Shut down the client's connection when the response is sent.
  234. $kernel->yield("shutdown");
  235. return;
  236. }
  237. my $response_type = $http_response->content_type();
  238. if ( $response_type =~ /^text/i ) {
  239. my $content = $http_response->decoded_content();
  240. my $p = HTML::HeadParser->new($http_response);
  241. $p->parse($content);
  242. my $expire = $http_response->header('Refresh');
  243. if ($http_response->is_redirect || $expire) {
  244. my $redirect;
  245. if ($expire) {
  246. $expire =~ /URL=(\S+)/;
  247. $redirect = $1;
  248. }
  249. else {
  250. $redirect = $http_response->header('Location');
  251. }
  252. if ($redirect) {
  253. if ($redirect =~ /^https?:\/\//) {
  254. _log (LOG_INFO, "HTML redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
  255. my $request = HTTP::Request->new('GET', $redirect);
  256. $request->header( "Connection", "close" );
  257. $request->header( "Proxy-Connection", "close" );
  258. $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]);
  259. return;
  260. }
  261. else {
  262. _log (LOG_INFO, "ignoring internal redirect from %s to %s for request from: %s", $http_response->base, $redirect, $heap->{remote_ip});
  263. }
  264. }
  265. }
  266. if ($content =~ /location\s*=\s*["']*(https?:\/\/[^"'\s]+)["']*/im) {
  267. my $redir = uri_unescape ($1);
  268. _log (LOG_INFO, "js redirect from %s to %s for request from: %s", $http_response->base, $1, $heap->{remote_ip});
  269. my $request = HTTP::Request->new('GET', $redir);
  270. $request->header("Connection", "close" );
  271. $request->header("Proxy-Connection", "close" );
  272. $kernel->post("cl", "request", "got_response", $request, [$rec + 1, $base_url]);
  273. return;
  274. }
  275. }
  276. elsif ( $response_type eq 'application/x-shockwave-flash' ||
  277. ($http_request->uri =~ /\.swf(\?.*)?$/i && $http_response->code == 200)) {
  278. my $content = $http_response->decoded_content();
  279. $swf_parser->parse($content);
  280. if ($saved_swf_url ne "") {
  281. _log (LOG_INFO, "flash redirect from %s to %s for request from: %s", $http_response->base, $saved_swf_url, $heap->{remote_ip});
  282. my $request = HTTP::Request->new('GET', $saved_swf_url);
  283. # Reset swf redirect global variable
  284. $saved_swf_url = "";
  285. $request->header( "Connection", "close" );
  286. $request->header( "Proxy-Connection", "close" );
  287. $kernel->post( "cl", "request", "got_response", $request, [$rec + 1, $base_url]);
  288. return;
  289. }
  290. }
  291. else {
  292. _log (LOG_INFO, "response wasn't text request from: %s, response is: %s", $heap->{remote_ip}, $response_type);
  293. }
  294. _log (LOG_INFO, "redirect from %s to %s for request from: %s", $base_url, $http_request->uri, $heap->{remote_ip});
  295. # Write to cache
  296. memcached_cache_url ($base_url, $http_request->uri);
  297. my $new_response = HTTP::Response->new($http_response->code);
  298. $new_response->header("Uri", $http_request->uri);
  299. # Avoid sending the response if the client has gone away.
  300. $heap->{client}->put($new_response) if defined $heap->{client};
  301. # Shut down the client's connection when the response is sent.
  302. $kernel->yield("shutdown");
  303. }
  304. sub process_input {
  305. my ( $kernel, $heap, $request ) = @_[ KERNEL, HEAP, ARG0 ];
  306. if ($request->isa ("HTTP::Response")) {
  307. $heap->{client}->put($request);
  308. $kernel->yield("shutdown");
  309. return;
  310. }
  311. my $domain;
  312. if ($request->uri =~ /^http:\/\/([^\/]+)\//) {
  313. my @parts = split(/\./, $1);
  314. my $c1 = pop @parts;
  315. my $c2 = pop @parts;
  316. $domain = "$c2.$c1";
  317. }
  318. if ((defined($cfg{check_regexp}) && $request->uri !~ $cfg{check_regexp}) ||
  319. (defined($cfg{check_domains}) && scalar(grep {$_ eq $domain} @{$cfg{check_domains}}) == 0)) {
  320. my $new_response = HTTP::Response->new(200);
  321. $new_response->header("Uri", $request->uri);
  322. $new_response->header("Connection", "close");
  323. $new_response->header("Proxy-Connection", "close");
  324. # Avoid sending the response if the client has gone away.
  325. $heap->{client}->put($new_response) if defined $heap->{client};
  326. $kernel->yield("shutdown");
  327. # Shut down the client's connection when the response is sent.
  328. return;
  329. }
  330. # Check cache first
  331. my $redirect = memcached_check_url($request->uri);
  332. if ($redirect) {
  333. _log (LOG_INFO, "Memcached redirect from %s to %s for request from: %s", $request->uri, $redirect, $heap->{remote_ip});
  334. my $new_response = HTTP::Response->new(200);
  335. $new_response->header("Uri", $redirect);
  336. $new_response->header("Connection", "close");
  337. $new_response->header("Proxy-Connection", "close");
  338. # Avoid sending the response if the client has gone away.
  339. $heap->{client}->put($new_response) if defined $heap->{client};
  340. $kernel->yield("shutdown");
  341. # Shut down the client's connection when the response is sent.
  342. return;
  343. }
  344. # Start http request
  345. my $new_request = HTTP::Request->new('GET', $request->uri);
  346. $new_request->header( "Connection", "close" );
  347. $new_request->header( "Proxy-Connection", "close" );
  348. $kernel->post( "cl", "request", "got_response", $new_request, [0, ""]);
  349. }
  350. sub sig_DIE {
  351. my( $sig, $ex ) = @_[ ARG0, ARG1 ];
  352. _log(LOG_ERR, "$$: error in $ex->{event}: $ex->{error_str}");
  353. $poe_kernel->sig_handled();
  354. # Send the signal to session that sent the original event.
  355. if( $ex->{source_session} ne $_[SESSION] ) {
  356. $poe_kernel->signal( $ex->{source_session}, 'DIE', $sig, $ex );
  357. }
  358. }
  359. ############################### Main code fragment ##################################
  360. # Do daemonization
  361. if (!$DEBUG) {
  362. Proc::Daemon::Init;
  363. POE::Kernel->has_forked;
  364. setlogsock('unix');
  365. openlog('redirector', 'ndelay,pid', $cfg{'facility'})
  366. }
  367. # Try to eval config file
  368. if (-f $cfg{cfg_file}) {
  369. my $config = read_file ($cfg{cfg_file});
  370. eval $config;
  371. }
  372. die "Process is already started, check $cfg{pidfile}" if Proc::PidUtil::is_running($cfg{pidfile});
  373. # Drop privilleges
  374. if ($> == 0) {
  375. my $uid = getpwnam($cfg{user}) or die "user $cfg{user} unknown";
  376. my $gid = getgrnam($cfg{group}) or die "group $cfg{group} unknown";
  377. $< = $> = $uid;
  378. $) = $( = $gid;
  379. }
  380. if (!$DEBUG) {
  381. die "Cannot write to pidfile $cfg{pidfile}" if ! open(PID, "> $cfg{pidfile}");
  382. close(PID);
  383. }
  384. # Reopen log on SIGUSR1
  385. $poe_kernel->sig(DIE => 'sig_DIE');
  386. $SIG{USR1} = sub { $do_reopen_log = 1; $poe_kernel->sig_handled(); };
  387. $SIG{INT} = sub { $poe_kernel->stop(); };
  388. $SIG{QUIT} = sub { $poe_kernel->stop(); };
  389. $SIG{PIPE} = 'IGNORE';
  390. if (!$DEBUG) {
  391. Proc::PidUtil::make_pidfile($cfg{pidfile}, $$) or die "Cannot write pidfile $cfg{pidfile}";
  392. }
  393. # Init memcached connection
  394. _log(LOG_INFO, "Starting memcached connection");
  395. $memd = new Cache::Memcached::Fast({
  396. servers => $cfg{memcached_servers},
  397. connect_timeout => 0.2,
  398. io_timeout => 0.5,
  399. max_failures => 3,
  400. failure_timeout => 2,
  401. ketama_points => 150,
  402. hash_namespace => 1,
  403. serialize_methods => [ \&Storable::freeze, \&Storable::thaw ],
  404. utf8 => ($^V ge v5.8.1 ? 1 : 0),
  405. });
  406. # POE part
  407. POE::Component::Client::HTTP->spawn(
  408. Alias => 'cl',
  409. MaxSize => $cfg{max_size}, # Remove for unlimited page sizes
  410. Timeout => $cfg{http_timeout},
  411. ConnectionManager => POE::Component::Client::Keepalive->new(
  412. max_per_host => 256,
  413. max_open => 1024,
  414. keep_alive => 1,
  415. timeout => $cfg{http_timeout},
  416. ),
  417. );
  418. _log(LOG_INFO, "Starting HTTP server");
  419. POE::Component::Server::TCP->new
  420. ( Alias => "",
  421. Port => $cfg{port},
  422. ClientFilter => 'POE::Filter::HTTPD',
  423. ClientInput => \&process_input,
  424. InlineStates => { got_response => \&process_client, },
  425. );
  426. swf_init_parser ();
  427. _log(LOG_NOTICE, "Starting URL resolver");
  428. # Start POE. This will run the server until it exits.
  429. POE::Kernel->run();
  430. exit 0;
  431. ############################## Final block ####################################
  432. END {
  433. _log(LOG_NOTICE, 'redirector stopped');
  434. closelog();
  435. }