diff options
author | Larry Hynes <larry@larryhynes.com> | 2016-06-09 20:09:17 +0100 |
---|---|---|
committer | Larry Hynes <larry@larryhynes.com> | 2016-06-09 20:09:17 +0100 |
commit | 6ec2e81512e068e798da20e4ab486ade81d3b9dc (patch) | |
tree | c84faf049dea2e52bc8da120d25f242fa9c0c397 | |
parent | 25083072e809ac328b8c4b4c98cdf6c9e35c112e (diff) | |
parent | fb326efc2b3fa1c25705d218987199a608b87b87 (diff) | |
download | rspamd-6ec2e81512e068e798da20e4ab486ade81d3b9dc.tar.gz rspamd-6ec2e81512e068e798da20e4ab486ade81d3b9dc.zip |
Merge remote-tracking branch 'upstream/master' into documentation
* upstream/master: (90 commits)
[Fix] Plug memory leak in proxy
[Feature] Do not print garbadge in --compact output
[Fix] Fix encrypted proxy requests
[Fix] Do not delete uninitialized events
[Feature] Add protection against open files limit and accepting sockets
[Fix] Another fix for redis timeouts
[Fix] Fix order of initialization
[Feature] Use file lock in logger to avoid deadlocks
[Fix] Fix errors handling in the proxy
[Fix] More fixes for redis refcounts
[Fix] Initialize parser scripts properly
[Fix] Try to fix issue in redis stats backend when task is closed
[Fix] Fix usage of rdns reply structure
[Fix] Fix symbol name for spf soft fail
[Fix] Fix setting path for lua
[Doc] Update regexp module documentation
[Minor] Fix names
[Fix] Add missing types
[Feature] Implement braced regexp quantifiers
[Fix] Implement new automata to skip empty lines for dkim signing
...
80 files changed, 2869 insertions, 622 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 19da2632d..219fdce41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -486,6 +486,7 @@ INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/contrib/librdns" "${CMAKE_SOURCE_DIR}/contrib/aho-corasick" "${CMAKE_SOURCE_DIR}/contrib/lc-btrie" + "${CMAKE_SOURCE_DIR}/contrib/lpeg" "${CMAKE_BINARY_DIR}/src" #Stored in the binary dir "${CMAKE_BINARY_DIR}/src/libcryptobox") @@ -1,3 +1,23 @@ +1.2.7: + * Slightly reduce weights of rules with high FP rate + * Add workround for rspamd-1.3 + * Fix possible FP in TRACKER_ID rule + * Simplify MISSING_MIMEOLE rule + * Add workaround for gmime CTE stupidity + * Fix mime headers processing + * Fix false positive URL detections in text parts + * Fix Exim shutdown patch + * Enable workaround for exim mailbox format + * Backport shingles static test + * Fix levenshtein distance calculations + * Fix max_train setup in ANN module + * Fix redis structure by adding {NULL, NULL} member + * Fix build with unmodified LibreSSL opensslv.h + * Repair optional dependencies + * Really skip filters in case of pre-result set + * Restore the intended pre-filters behaviour + * Fix ipv6 mask application + 1.2.6: * Fix parsing of URLs in texts * Fix creating of URLs from LUA diff --git a/centos/rspamd.spec b/centos/rspamd.spec index 7e6f4b758..c3ba247c5 100644 --- a/centos/rspamd.spec +++ b/centos/rspamd.spec @@ -198,6 +198,7 @@ fi %config(noreplace) %{rspamd_confdir}/common.conf %config(noreplace) %{rspamd_confdir}/logging.inc %config(noreplace) %{rspamd_confdir}/options.inc +%config(noreplace) %{rspamd_confdir}/redirectors.inc %config(noreplace) %{rspamd_confdir}/worker-controller.inc %config(noreplace) %{rspamd_confdir}/worker-normal.inc %config(noreplace) %{rspamd_confdir}/modules.d/* diff --git a/conf/metrics.conf b/conf/metrics.conf index 90b815fcf..a3c8b27c8 100644 --- a/conf/metrics.conf +++ b/conf/metrics.conf @@ -44,7 +44,7 @@ metric { description = "Recipients seems to be autogenerated (works if recipients count is more than 5)"; } symbol "MIME_HTML_ONLY" { - weight = 1.0; + weight = 0.2; description = "Messages that have only HTML part"; } symbol "FORGED_MSGID_YAHOO" { @@ -56,7 +56,7 @@ metric { description = "Forged The Bat! MUA headers"; } symbol "R_MISSING_CHARSET" { - weight = 5.0; + weight = 2.5; description = "Charset is missing in a message"; } symbol "RCVD_DOUBLE_IP_SPAM" { @@ -68,7 +68,7 @@ metric { description = "Forged outlook HTML signature"; } symbol "R_UNDISC_RCPT" { - weight = 5.0; + weight = 3.0; description = "Recipients are absent or undisclosed"; } symbol "FM_FAKE_HELO_VERIZON" { diff --git a/contrib/exim/shutdown.patch b/contrib/exim/shutdown.patch index 39007130f..e8bf8a057 100644 --- a/contrib/exim/shutdown.patch +++ b/contrib/exim/shutdown.patch @@ -1,14 +1,14 @@ -diff -ru exim-4.86.orig/src/spam.c exim-4.86/src/spam.c ---- exim-4.86.orig/src/spam.c 2016-04-09 13:54:51.583800284 +0200 -+++ exim-4.86/src/spam.c 2016-04-09 13:55:16.659806242 +0200 -@@ -499,7 +499,9 @@ +--- exim4-4.86.2.orig/src/spam.c ++++ exim4-4.86.2/src/spam.c +@@ -499,7 +499,10 @@ if (ferror(mbox_file)) (void)fclose(mbox_file); /* we're done sending, close socket for writing */ -shutdown(spamd_sock,SHUT_WR); -+if (!is_rspamd) { ++if (!sd->is_rspamd) ++ { + shutdown(spamd_sock,SHUT_WR); -+} ++ } /* read spamd response using what's left of the timeout. */ memset(spamd_buffer, 0, sizeof(spamd_buffer)); diff --git a/contrib/lpeg/lptree.c b/contrib/lpeg/lptree.c index ac5f51503..bc82ae8da 100644 --- a/contrib/lpeg/lptree.c +++ b/contrib/lpeg/lptree.c @@ -21,7 +21,7 @@ /* number of siblings for each tree */ const byte numsiblings[] = { 0, 0, 0, /* char, set, any */ - 0, 0, /* true, false */ + 0, 0, /* true, false */ 1, /* rep */ 2, 2, /* seq, choice */ 1, 1, /* not, and */ @@ -1280,8 +1280,6 @@ static struct luaL_Reg metareg[] = { {NULL, NULL} }; - -int luaopen_lpeg (lua_State *L); int luaopen_lpeg (lua_State *L) { luaL_newmetatable(L, PATTERN_T); lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */ diff --git a/contrib/lpeg/lptree.h b/contrib/lpeg/lptree.h index b69528a6f..38a668e9f 100644 --- a/contrib/lpeg/lptree.h +++ b/contrib/lpeg/lptree.h @@ -1,4 +1,4 @@ -/* +/* ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $ */ @@ -6,7 +6,7 @@ #define lptree_h -#include "lptypes.h" +#include "lptypes.h" /* @@ -69,7 +69,7 @@ extern const byte numsiblings[]; #define sib2(t) ((t) + (t)->u.ps) - +int luaopen_lpeg (lua_State *L); diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h index 1daebf3de..161c5390e 100644 --- a/contrib/mumhash/mum.h +++ b/contrib/mumhash/mum.h @@ -43,12 +43,13 @@ #ifndef __MUM_HASH__ #define __MUM_HASH__ -#include "config.h" #include <stddef.h> #include <stdlib.h> #include <string.h> #include <limits.h> + #ifdef _MSC_VER +typedef unsigned __int16 uint16_t; typedef unsigned __int32 uint32_t; typedef unsigned __int64 uint64_t; #else @@ -57,8 +58,13 @@ typedef unsigned __int64 uint64_t; #ifdef __GNUC__ #define _MUM_ATTRIBUTE_UNUSED __attribute__((unused)) -#define _MUM_OPTIMIZE(opts) RSPAMD_OPTIMIZE(opts) +#ifndef __clang__ +#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts))) +#define _MUM_TARGET(opts) __attribute__((__target__ (opts))) +#else +#define _MUM_OPTIMIZE(opts) #define _MUM_TARGET(opts) +#endif #else #define _MUM_ATTRIBUTE_UNUSED #define _MUM_OPTIMIZE(opts) @@ -145,14 +151,18 @@ _mum (uint64_t v, uint64_t p) { } #if defined(_MSC_VER) +#define _mum_bswap_32(x) _byteswap_uint32_t (x) #define _mum_bswap_64(x) _byteswap_uint64_t (x) #elif defined(__APPLE__) #include <libkern/OSByteOrder.h> +#define _mum_bswap_32(x) OSSwapInt32 (x) #define _mum_bswap_64(x) OSSwapInt64 (x) #elif defined(__GNUC__) +#define _mum_bswap32(x) __builtin_bswap32 (x) #define _mum_bswap64(x) __builtin_bswap64 (x) #else #include <byteswap.h> +#define _mum_bswap32(x) bswap32 (x) #define _mum_bswap64(x) bswap64 (x) #endif @@ -166,6 +176,18 @@ _mum_le (uint64_t v) { #error "Unknown endianess" #endif } + +static inline uint32_t +_mum_le32 (uint32_t v) { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH) + return v; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return _mum_bswap32 (v); +#else +#error "Unknown endianess" +#endif +} + /* Macro defining how many times the most nested loop in _mum_hash_aligned will be unrolled by the compiler (although it can make an own decision:). Use only a constant here to help a @@ -196,7 +218,7 @@ static inline uint64_t _MUM_OPTIMIZE("unroll-loops") _mum_hash_aligned (uint64_t start, const void *key, size_t len) { uint64_t result = start; const unsigned char *str = (const unsigned char *) key; - union {uint64_t i; unsigned char s[sizeof (uint64_t)];} p; + uint64_t u64; int i; size_t n; @@ -217,9 +239,37 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) { for (i = 0; i < n; i++) result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]); len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t); - if (len) { - p.i = 0; memcpy (p.s, str, len); - result ^= _mum (_mum_le (((uint64_t *) p.s)[0]), _mum_tail_prime); + switch (len) { + case 7: + u64 = _mum_le32 (*(uint32_t *) str); + u64 |= (uint64_t) str[4] << 32; + u64 |= (uint64_t) str[5] << 40; + u64 |= (uint64_t) str[6] << 48; + return result ^ _mum (u64, _mum_tail_prime); + case 6: + u64 = _mum_le32 (*(uint32_t *) str); + u64 |= (uint64_t) str[4] << 32; + u64 |= (uint64_t) str[5] << 40; + return result ^ _mum (u64, _mum_tail_prime); + case 5: + u64 = _mum_le32 (*(uint32_t *) str); + u64 |= (uint64_t) str[4] << 32; + return result ^ _mum (u64, _mum_tail_prime); + case 4: + u64 = _mum_le32 (*(uint32_t *) str); + return result ^ _mum (u64, _mum_tail_prime); + case 3: + u64 = str[0]; + u64 |= (uint64_t) str[1] << 8; + u64 |= (uint64_t) str[2] << 16; + return result ^ _mum (u64, _mum_tail_prime); + case 2: + u64 = str[0]; + u64 |= (uint64_t) str[1] << 8; + return result ^ _mum (u64, _mum_tail_prime); + case 1: + u64 = str[0]; + return result ^ _mum (u64, _mum_tail_prime); } return result; } @@ -232,8 +282,8 @@ _mum_final (uint64_t h) { return h; } -#if defined(__x86_64__) && defined(__GNUC__) -#if 0 +#if defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 9) && !defined(__clang__) + /* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where it is possible. Although on modern Intel processors MULQ takes 3-cycles vs. 4 for MULX, MULX permits more freedom in insn @@ -243,7 +293,6 @@ _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) { return _mum_final (_mum_hash_aligned (seed + len, key, len)); } #endif -#endif #ifndef _MUM_UNALIGNED_ACCESS #if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \ @@ -283,7 +332,7 @@ _mum_hash_default (const void *key, size_t len, uint64_t seed) { else { while (len != 0) { block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN; - memcpy (buf, str, block_len); + memmove (buf, str, block_len); result = _mum_hash_aligned (result, buf, block_len); len -= block_len; str += block_len; @@ -351,13 +400,11 @@ mum_hash64 (uint64_t key, uint64_t seed) { target endianess and the unroll factor. */ static inline uint64_t mum_hash (const void *key, size_t len, uint64_t seed) { -#if defined(__x86_64__) && defined(__GNUC__) -#if 0 - static int avx2_support = 0; +#if defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 9) && !defined(__clang__) + static int avx2_support = 0; if (avx2_support > 0) return _mum_hash_avx2 (key, len, seed); - else if (! avx2_support) { __builtin_cpu_init (); avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1; @@ -365,7 +412,6 @@ mum_hash (const void *key, size_t len, uint64_t seed) { return _mum_hash_avx2 (key, len, seed); } #endif -#endif return _mum_hash_default (key, len, seed); } diff --git a/doc/markdown/modules/multimap.md b/doc/markdown/modules/multimap.md index 290df9e49..cede3bc94 100644 --- a/doc/markdown/modules/multimap.md +++ b/doc/markdown/modules/multimap.md @@ -22,6 +22,7 @@ Multimap module allows to build rules based on the dynamic maps content. Rspamd map types in this module: * `hash map` - a list of domains or `user@domain` +* `regexp map` - a list of regular expressions * `ip map` - an effective radix trie of `ip/mask` values (supports both IPv4 and IPv6 addresses) * `cdb` - constant database format (files only) @@ -36,6 +37,8 @@ The module itself contains a set of rules in form: symbol { type = type; map = uri; [optional params] } +### Map types + Type attribute means what is matched with this map. The following types are supported: * `ip` - matches source IP of message (radix map) @@ -52,7 +55,7 @@ Maps can also be specified as [CDB](http://www.corpit.ru/mjt/tinycdb.html) datab map = "cdb:///path/to/file.cdb"; -Here is an example configuration of multimap module: +### Pre-filter maps To enable pre-filter support, you should specify `action` parameter which can take the following values: @@ -73,13 +76,28 @@ multimap { } ~~~ +### Regexp maps + + All maps but `ip` and `dnsbl` support `regexp` mode. In this mode, all keys in maps are treated as regular expressions, for example: /example\d+\.com/i /other\d+\.com/i test + # Comments are still enabled For performance considerations, use only expressions supported by [hyperscan](http://01org.github.io/hyperscan/dev-reference/compilation.html#pattern-support) as this engine provides blazing performance at no additional cost. Currently, there is no way to distinguish what particular regexp was matched in case if multiple regexp were matched. +To enable regexp mode, you should set `regexp` option to `true`: + +~~~ucl +sender_from_whitelist_user { + type = "from"; + map = "file:///tmp/from.map"; + symbol = "SENDER_FROM_WHITELIST"; + regexp = true; +} +~~~ + ### Map filters It is also possible to apply a filtering expression before checking value against some map. This is mainly useful diff --git a/doc/markdown/modules/regexp.md b/doc/markdown/modules/regexp.md index f08079bff..01d7a0635 100644 --- a/doc/markdown/modules/regexp.md +++ b/doc/markdown/modules/regexp.md @@ -60,12 +60,24 @@ The match type is defined by special flags after the last `/` symbol: * `B` - MIME header regexp (applied for headers in MIME parts only) * `R` - full headers content (applied for all headers undecoded and for the message only - **not** including MIME headers) * `M` - raw message regexp -* `P` - part regexp +* `P` - part regexp without HTML tags +* `Q` - part regexp with HTML tags +* `C` - spamassassin `BODY` regexp analogue(see http://spamassassin.apache.org/full/3.4.x/doc/Mail_SpamAssassin_Conf.txt) +* `D` - spamassassin `RAWBODY` regexp analogue * `U` - URL regexp +From 1.3, it is also possible to specify long regexp types for convenience in curly braces: -We strongly discourage from using of raw message regexps as they are expensive and -should be replaced by [trie](trie.md) rules if possible. +* `{header}` - header regexp +* `{raw_header}` - undecoded header regexp (e.g. without quoted-printable decoding) +* `{mime_header}` - MIME header regexp (applied for headers in MIME parts only) +* `{all_header}` - full headers content (applied for all headers undecoded and for the message only - **not** including MIME headers) +* `{body}` - raw message regexp +* `{mime}` - part regexp without HTML tags +* `{raw_mime}` - part regexp with HTML tags +* `{sa_body}` - spamassassin `BODY` regexp analogue(see http://spamassassin.apache.org/full/3.4.x/doc/Mail_SpamAssassin_Conf.txt) +* `{sa_raw_body}` - spamassassin `RAWBODY` regexp analogue +* `{url}` - URL regexp Each regexp also supports the following flags: @@ -122,7 +134,7 @@ Here is an example of table form definition of regexp rule: ~~~lua config['regexp']['RE_TEST'] = { - re = '/test/P', + re = '/test/i{mime}', score = 10.0, condition = function(task) if task:get_header('Subject') then diff --git a/doc/markdown/modules/whitelist.md b/doc/markdown/modules/whitelist.md index ec4671e58..5b2417194 100644 --- a/doc/markdown/modules/whitelist.md +++ b/doc/markdown/modules/whitelist.md @@ -1,6 +1,6 @@ # Whitelist module -Whitelist module is intended to negate scores for some messages that are known to +Whitelist module is intended to negate or increase scores for some messages that are known to be from the trusted sources. Due to `SMTP` protocol design flaws, it is quite easy to forge sender. Therefore, rspamd tries to validate sender based on the following additional properties: @@ -14,15 +14,30 @@ properties: Whitelist configuration is quite straightforward. You can define a set of rules within `rules` section. Each rule **must** have `domains` attribute that specifies either map of domains (if specified as a string) or a direct list of domains (if specified as an array). -The following optional parameters are allowed: + +### Whitelist constraints + +The following constraints are allowed: - `valid_spf`: require a valid SPF policy - `valid_dkim`: require DKIM validation - `valid_dmarc`: require a valid DMARC policy -These options are combined using `AND` operator, therefore `valid_dkim = true` and +### Whitelist rules modes + +Each whitelist rule can work in 3 modes: + +- `whitelist` (default): add symbol when a domain has been found and one of constraints defined is satisfied (e.g. `valid_dmarc`) +- `blacklist`: add symbol when a domain has been found and one of constraints defined is *NOT* satisfied (e.g. `valid_dmarc`) +- `strict`: add symbol with negative (ham) score when a domain has been found and one of constraints defined is satisfied (e.g. `valid_dmarc`) and add symbol with **POSITIVE** (spam) score when some of constraints defined has failed + +If you do not define any constraints, then all both `strict` and `whitelist` rules just insert result for all mail from the specified domains. For `blacklist` rules the result has normally positive score. + +These options are combined using `AND` operator for `whitelist` and using `OR` for `blacklist` and `strict` rules. Therefore, if `valid_dkim = true` and `valid_spf = true` would require both DKIM and SPF validation to whitelist domains from -the list. +the list. On the contrary, for blacklist and strict rules any violation would cause positive score symbol being inserted. + +### Optional settings You can also set the default metric settings using the ordinary attributes, such as: @@ -50,34 +65,52 @@ whitelist { valid_spf = true; domains = [ "github.com", - ] - score = -1.0 + ]; + score = -1.0; } - + WHITELIST_DKIM = { valid_dkim = true; domains = [ "github.com", - ] - score = -2.0 + ]; + score = -2.0; } - + WHITELIST_SPF_DKIM = { valid_spf = true; valid_dkim = true; domains = [ ["github.com", 2.0], - ] - score = -3.0 + ]; + score = -3.0; } - + + STRICT_SPF_DKIM = { + valid_spf = true; + valid_dkim = true; + strict = true; + domains = [ + ["paypal.com", 2.0], + ]; + score = -3.0; # For strict rules negative score should be defined + } + + BLACKLIST_DKIM = { + valid_spf = true; + valid_dkim = true; + blacklist = true; + domains = "/some/file/blacklist_dkim.map"; + score = 3.0; # Mention positive score here + } + WHITELIST_DMARC_DKIM = { valid_dkim = true; valid_dmarc = true; domains = [ "github.com", - ] - score = -7.0 + ]; + score = -7.0; } } } diff --git a/interface/css/rspamd.css b/interface/css/rspamd.css index c47abbb08..87ecc3bb3 100644 --- a/interface/css/rspamd.css +++ b/interface/css/rspamd.css @@ -653,3 +653,11 @@ td.maps-cell { #historyLog_wrapper div.row:last-child > div { padding: 5px 20px 0 20px; } + +/* Throughput graph controls */ +#graph_controls select { + margin: 10px 20px 0; + display: inline-block; + width: auto; + border: 1px solid grey; +} diff --git a/interface/index.html b/interface/index.html index a02a2d668..eb6925736 100644 --- a/interface/index.html +++ b/interface/index.html @@ -9,6 +9,7 @@ <link href="//cdnjs.cloudflare.com/ajax/libs/file-uploader/3.7.0/fineuploader.min.css" rel="stylesheet"> <link href="//maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css" rel="stylesheet"> <link rel="stylesheet" type="text/css" href="./css/datatables.min.css"/> + <link rel="stylesheet" type="text/css" href="//cdn.rawgit.com/moisseev/D3Evolution/fb6ea62c43e26e728b48a43012fb796c5ab6689c/d3evolution.css"> <link href="./css/rspamd.css" rel="stylesheet"> </head> @@ -25,6 +26,9 @@ </ul> <ul class="nav navbar-nav nav-pills" role="tablist"> <li role="presentation" class="active"><a id="status_nav" aria-controls="status" role="tab" href="#status" data-toggle="tab">Status</a></li> +<!-- + <li role="presentation"><a id="throughput_nav" aria-controls="throughput" role="tab" href="#throughput" data-toggle="tab">Throughput</a></li> +--> <li role="presentation"><a id="configuration_nav" aria-controls="configuration" role="tab" href="#configuration" data-toggle="tab">Configuration</a></li> <li role="presentation"><a id="learning_nav" aria-controls="learning" role="tab" href="#learning" data-toggle="tab">Learning</a></li> <li role="presentation"><a id="scan_nav"aria-controls="scan" role="tab" href="#scan" data-toggle="tab">Scan</a></li> @@ -64,6 +68,49 @@ </div> </div> + <div class="tab-pane" id="throughput"> + <div class="widget-box"> + <div class="widget-title"> + <span class="icon"><i class="glyphicon glyphicon-stats"></i></span> + <h5>Throughput</h5> + </div> + <div class="widget-content chart-content"> + <div class="row row-chart"> + <div class="chart" id="graph"> + <span class="notice">Loading..</span> + <noscript>Please enable Javascript</noscript> + </div> + </div> + <form id="graph_controls" action="#"> + Select dataset: + <select id="selData" class="form-control"> + <option value="hourly" selected>Hourly</option> + <option value="daily">Daily</option> + <option value="weekly">Weekly</option> + <option value="monthly">Monthly</option> + </select> + Select chart type: + <select id="selType" class="form-control"> + <option value="line" selected>Line</option> + <option value="area">Stacked area</option> + </select> + Select <a title="View Mike Bostock's Block." href="https://bl.ocks.org/mbostock/4342190" target="_blank">interpolation mode</a>: + <select id="selInterpolate" class="form-control"> + <option value="linear" selected>linear</option> + <option value="step-before">step-before</option> + <option value="step-after">step-after</option> + <option value="basis">basis</option> + <option value="basis-open">basis-open</option> + <option value="bundle">bundle</option> + <option value="cardinal">cardinal</option> + <option value="cardinal-open">cardinal-open</option> + <option value="monotone">monotone</option> + </select> + </form> + </div> + </div> + </div> + <div class="tab-pane" id="configuration"> <div class="widget-box"> <div class="widget-title"> @@ -313,6 +360,7 @@ <script src="//cdnjs.cloudflare.com/ajax/libs/file-uploader/3.7.0/fineuploader.min.js"></script> <script src="//cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js"></script> <script src="./js/d3pie.min.js"></script> +<script src="//cdn.rawgit.com/moisseev/D3Evolution/fb6ea62c43e26e728b48a43012fb796c5ab6689c/d3evolution.js"></script> <script src="./js/rspamd.js"></script> <script type="text/javascript" src="./js/datatables.min.js"></script> diff --git a/interface/js/rspamd.js b/interface/js/rspamd.js index 2fcc6bfc3..7b55e1cf9 100644 --- a/interface/js/rspamd.js +++ b/interface/js/rspamd.js @@ -28,10 +28,30 @@ //$.cookie.json = true; var pie; var history; + var graph; + + var selected = []; // Keep graph selectors state + + // Bind event handlers to selectors + $("#selData").change(function () { + selected.selData = this.value; + getGraphData(this.value); + }); + $("#selType").change(function () { + graph.type(this.value); + }); + $("#selInterpolate").change(function () { + graph.interpolate(this.value); + }); + $('#disconnect').on('click', function (event) { if (pie) { pie.destroy(); } + if (graph) { + graph.destroy(); + graph = undefined; + } if (history) { history.destroy(); } @@ -43,6 +63,7 @@ $('#refresh').on('click', function (event) { statWidgets(); getChart(); + getGraphData(selected.selData); }); // @supports session storage function supportsSessionStorage() { @@ -461,6 +482,54 @@ } }); } + + function initGraph() { + // Get selectors' current state + var selIds = ["selData", "selType", "selInterpolate"]; + selIds.forEach(function (id) { + var e = document.getElementById(id); + selected[id] = e.options[e.selectedIndex].value; + }); + + var options = { + title: "Rspamd throughput", + width: 1060, + height: 370, + + type: selected.selType, + interpolate: selected.selInterpolate, + + legend: { + entries: [ + {label: "Rejected", color: "#FF0000"}, + {label: "Probable spam", color: "#FFD700"}, + {label: "Greylisted", color: "#436EEE"}, + {label: "Clean", color: "#66cc00"} + ] + } + }; + graph = new D3Evolution("graph", options); + } + + function getGraphData(type) { + $.ajax({ + dataType: 'json', + type: 'GET', + url: 'graph?type=', + data: type, + beforeSend: function (xhr) { + xhr.setRequestHeader('Password', getPassword()); + }, + success: function (data) { + graph.data(data); + }, + error: function (jqXHR, textStatus, errorThrown) { + alertMessage('alert-error', 'Cannot receive throughput data: ' + + textStatus + ' ' + jqXHR.status + ' ' + errorThrown); + } + }); + } + // @get history log // function getChart() { // //console.log(data) @@ -1115,6 +1184,7 @@ getSymbols(); getHistory(); getChart(); + initGraph(); $('#progress').hide(); $(disconnect).show(); } @@ -1128,5 +1198,8 @@ $('#status_nav').bind('click', function (e) { getChart(); }); + $('#throughput_nav').bind('click', function () { + getGraphData(selected.selData); + }); }); })(); diff --git a/src/rspamadm/lua_preprocess.pl b/lua_preprocess.pl index d1e8f4689..d1e8f4689 100644 --- a/src/rspamadm/lua_preprocess.pl +++ b/lua_preprocess.pl diff --git a/rules/forwarding.lua b/rules/forwarding.lua index 6ee0b9a97..c5c8912af 100644 --- a/rules/forwarding.lua +++ b/rules/forwarding.lua @@ -81,24 +81,27 @@ rspamd_config.FORWARDED = { local matches = 0 -- Retrieve and loop through all Received headers local rcvds = task:get_header_full('Received') - for _, rcvd in ipairs(rcvds) do + + if rcvds then + for _, rcvd in ipairs(rcvds) do local _,_,addr = rcvd['decoded']:lower():find("%sfor%s<(.-)>") if addr then - matches = matches + 1 - -- Check that it doesn't match the envrcpt - -- TODO: remove any plus addressing? - if addr ~= envrcpts[1].addr:lower() then - -- Check for mailing-lists as they will have the same signature - if matches < 2 and lu and to and to[1].addr:lower() == addr then - return false - else - return true, addr - end + matches = matches + 1 + -- Check that it doesn't match the envrcpt + -- TODO: remove any plus addressing? + if addr ~= envrcpts[1].addr:lower() then + -- Check for mailing-lists as they will have the same signature + if matches < 2 and lu and to and to[1].addr:lower() == addr then + return false + else + return true, addr end - -- Prevent any other iterations as we only want - -- process the first matching Received header - return false + end + -- Prevent any other iterations as we only want + -- process the first matching Received header + return false end + end end return false end, diff --git a/rules/http_headers.lua b/rules/http_headers.lua index f8d7f2be6..0252ccce9 100644 --- a/rules/http_headers.lua +++ b/rules/http_headers.lua @@ -69,7 +69,7 @@ rspamd_config:add_condition("R_SPF_ALLOW", function(task) elseif obj['result'] == 'neutral' then task:insert_result('R_SPF_NEUTRAL', 1.0, 'http header') elseif obj['result'] == 'tempfail' or obj['result'] == 'softfail' then - task:insert_result('R_SPF_TEMPFAIL', 1.0, 'http header') + task:insert_result('R_SPF_SOFTFAIL', 1.0, 'http header') end return false diff --git a/rules/regexp/headers.lua b/rules/regexp/headers.lua index 8a3dad9e7..afd0633cd 100644 --- a/rules/regexp/headers.lua +++ b/rules/regexp/headers.lua @@ -87,7 +87,7 @@ reconf['SUSPICIOUS_RECIPS'] = 'compare_recipients_distance(0.65)' reconf['SORTED_RECIPS'] = 'is_recipients_sorted()' -- Spam string at the end of message to make statistics faults -reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{2,36}[a-z0-9]{6,24}\\s*\\z/isPr' +reconf['TRACKER_ID'] = '/^[a-z0-9]{6,24}[-_a-z0-9]{12,36}[a-z0-9]{6,24}\\s*\\z/isPr' -- From that contains encoded characters while base 64 is not needed as all symbols are 7bit @@ -355,12 +355,11 @@ reconf['FAKE_REPLY_C'] = string.format('(%s) & (%s) & (%s) & !(%s)', subj_re, mi local has_msmail_pri = 'header_exists(X-MSMail-Priority)' local has_mimeole = 'header_exists(X-MimeOLE)' local has_squirrelmail_in_mailer = 'X-Mailer=/SquirrelMail\\b/H' -local has_ips_php_in_mailer = 'X-Mailer=/^IPS PHP Mailer/' local has_office12145_in_mailer = 'X-Mailer=/^Microsoft (?:Office )?Outlook 1[245]\\.0/' -reconf['MISSING_MIMEOLE'] = string.format('(%s) & !(%s) & !(%s) & !(%s) & !(%s) & !(%s) & !(%s)', - has_msmail_pri, has_mimeole, - has_squirrelmail_in_mailer, xm_mso12, - xm_cgpmapi, has_ips_php_in_mailer, +reconf['MISSING_MIMEOLE'] = string.format('(%s) & !(%s) & !(%s) & !(%s)', + has_msmail_pri, + has_mimeole, + has_squirrelmail_in_mailer, has_office12145_in_mailer) -- Header delimiters diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a7b03f55c..0c1e31dbe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -111,6 +111,8 @@ TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) TARGET_LINK_LIBRARIES(rspamd-server lcbtrie) +ADD_DEPENDENCIES(rspamd-server rspamd_lua_preprocess) + IF (ENABLE_CLANG_PLUGIN MATCHES "ON") ADD_DEPENDENCIES(rspamd-server rspamd-clang) ENDIF() diff --git a/src/client/rspamc.c b/src/client/rspamc.c index 5280914b9..8f7f463bb 100644 --- a/src/client/rspamc.c +++ b/src/client/rspamc.c @@ -67,12 +67,17 @@ static GList *children; g_queue_push_tail ((o), nh); \ } while (0) +static gboolean rspamc_password_callback (const gchar *option_name, + const gchar *value, + gpointer data, + GError **error); + static GOptionEntry entries[] = { { "connect", 'h', 0, G_OPTION_ARG_STRING, &connect_str, "Specify host and port", NULL }, - { "password", 'P', 0, G_OPTION_ARG_STRING, &password, - "Specify control password", NULL }, + { "password", 'P', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, + &rspamc_password_callback, "Specify control password", NULL }, { "classifier", 'c', 0, G_OPTION_ARG_STRING, &classifier, "Classifier to learn spam or ham", NULL }, { "weight", 'w', 0, G_OPTION_ARG_INT, &weight, @@ -292,6 +297,31 @@ struct rspamc_callback_data { gdouble start; }; +gboolean +rspamc_password_callback (const gchar *option_name, + const gchar *value, + gpointer data, + GError **error) +{ + guint plen = 8192; + + if (value != NULL) { + password = g_strdup (value); + } + else { + /* Read password from console */ + password = g_malloc0 (plen); + plen = rspamd_read_passphrase (password, plen, 0, NULL); + } + + if (plen == 0) { + rspamd_fprintf (stderr, "Invalid password\n"); + exit (EXIT_FAILURE); + } + + return TRUE; +} + /* * Parse command line */ @@ -1208,12 +1238,16 @@ rspamc_client_cb (struct rspamd_client_connection *conn, } else { if (cmd->need_input) { - rspamd_fprintf (out, "Results for file: %s (%.3f seconds)\n", - cbdata->filename, diff); + if (!compact) { + rspamd_fprintf (out, "Results for file: %s (%.3f seconds)\n", + cbdata->filename, diff); + } } else { - rspamd_fprintf (out, "Results for command: %s (%.3f seconds)\n", - cmd->name, diff); + if (!compact) { + rspamd_fprintf (out, "Results for command: %s (%.3f seconds)\n", + cmd->name, diff); + } } if (result != NULL) { diff --git a/src/controller.c b/src/controller.c index 8b16eda19..f5efb4535 100644 --- a/src/controller.c +++ b/src/controller.c @@ -25,6 +25,7 @@ #include "ottery.h" #include "libutil/rrd.h" #include "unix-std.h" +#include "utlist.h" #include <math.h> /* 60 seconds for worker's IO */ @@ -109,7 +110,7 @@ worker_t controller_worker = { init_controller_worker, /* Init function */ start_controller_worker, /* Start function */ RSPAMD_WORKER_HAS_SOCKET | RSPAMD_WORKER_KILLABLE, - SOCK_STREAM, /* TCP socket */ + RSPAMD_WORKER_SOCKET_TCP, /* TCP socket */ RSPAMD_WORKER_VER /* Version info */ }; /* @@ -503,9 +504,15 @@ static gboolean rspamd_controller_check_password( "using password as enable_password for a privileged command"); check = ctx->password; } + if (check != NULL) { if (!rspamd_is_encrypted_password (check, &pbkdf)) { - ret = rspamd_constant_memcmp (password->begin, check, password->len); + ret = FALSE; + + if (strlen (check) == password->len) { + ret = rspamd_constant_memcmp (password->begin, check, + password->len); + } } else { ret = rspamd_check_encrypted_password (ctx, password, check, @@ -526,9 +533,15 @@ static gboolean rspamd_controller_check_password( /* Accept both normal and enable passwords */ if (ctx->password != NULL) { check = ctx->password; + if (!rspamd_is_encrypted_password (check, &pbkdf)) { - check_normal = rspamd_constant_memcmp (password->begin, check, - password->len); + check_normal = FALSE; + + if (strlen (check) == password->len) { + check_normal = rspamd_constant_memcmp (password->begin, + check, + password->len); + } } else { check_normal = rspamd_check_encrypted_password (ctx, @@ -540,11 +553,18 @@ static gboolean rspamd_controller_check_password( else { check_normal = FALSE; } + if (ctx->enable_password != NULL) { check = ctx->enable_password; + if (!rspamd_is_encrypted_password (check, &pbkdf)) { - check_enable = rspamd_constant_memcmp (password->begin, check, - password->len); + check_enable = FALSE; + + if (strlen (check) == password->len) { + check_enable = rspamd_constant_memcmp (password->begin, + check, + password->len); + } } else { check_enable = rspamd_check_encrypted_password (ctx, @@ -563,7 +583,7 @@ static gboolean rspamd_controller_check_password( } if (check_normal == FALSE && check_enable == FALSE) { - msg_info("absent or incorrect password has been specified"); + msg_info ("absent or incorrect password has been specified"); ret = FALSE; } @@ -2196,7 +2216,7 @@ rspamd_controller_accept_socket (gint fd, short what, void *arg) ctx = worker->ctx; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn_ctx ("accept failed: %s", strerror (errno)); return; } @@ -2564,9 +2584,10 @@ start_controller_worker (struct rspamd_worker *worker) ctx->srv = worker->srv; ctx->custom_commands = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - if (ctx->secure_ip != NULL) { + if (ctx->secure_ip != NULL) { if (ucl_object_type (ctx->secure_ip) == UCL_ARRAY) { + while ((cur = ucl_object_iterate (ctx->secure_ip, &it, true)) != NULL) { /* Try map syntax */ if (ucl_object_type (cur) == UCL_STRING && @@ -2586,10 +2607,22 @@ start_controller_worker (struct rspamd_worker *worker) } } else { - rspamd_map_add_from_ucl (worker->srv->cfg, ctx->secure_ip, - "Allow webui access from the specified IP", - rspamd_radix_read, rspamd_radix_fin, - (void **)&ctx->secure_map); + LL_FOREACH (ctx->secure_ip, cur) { + if (ucl_object_type (cur) == UCL_STRING && + !rspamd_map_is_map (ucl_object_tostring (cur))) { + if (!radix_add_generic_iplist (ucl_object_tostring (cur), + &ctx->secure_map)) { + msg_warn_ctx ("cannot load or parse ip list from '%s'", + ucl_object_tostring (cur)); + } + } + else { + rspamd_map_add_from_ucl (worker->srv->cfg, ctx->secure_ip, + "Allow webui access from the specified IP", + rspamd_radix_read, rspamd_radix_fin, + (void **)&ctx->secure_map); + } + } } } diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index 7803307b0..d96346ce4 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -40,10 +40,12 @@ /* Resync value in seconds */ #define DEFAULT_SYNC_TIMEOUT 60.0 #define DEFAULT_KEYPAIR_CACHE_SIZE 512 - +#define DEFAULT_MASTER_TIMEOUT 10.0 #define INVALID_NODE_TIME (guint64) - 1 +static const gchar *local_db_name = "local"; + /* Init functions */ gpointer init_fuzzy (struct rspamd_config *cfg); void start_fuzzy (struct rspamd_worker *worker); @@ -53,7 +55,7 @@ worker_t fuzzy_worker = { init_fuzzy, /* Init function */ start_fuzzy, /* Start function */ RSPAMD_WORKER_HAS_SOCKET, - SOCK_DGRAM, /* UDP socket */ + RSPAMD_WORKER_SOCKET_UDP|RSPAMD_WORKER_SOCKET_TCP, /* Both socket */ RSPAMD_WORKER_VER /* Version info */ }; @@ -84,6 +86,12 @@ struct fuzzy_key_stat { rspamd_lru_hash_t *last_ips; }; +struct rspamd_fuzzy_mirror { + gchar *name; + struct upstream_list *u; + struct rspamd_cryptobox_pubkey *key; +}; + static const guint64 rspamd_fuzzy_storage_magic = 0x291a3253eb1b3ea5ULL; struct rspamd_fuzzy_storage_ctx { @@ -93,7 +101,14 @@ struct rspamd_fuzzy_storage_ctx { gdouble expire; gdouble sync_timeout; radix_compressed_t *update_ips; + radix_compressed_t *master_ips; + struct rspamd_cryptobox_keypair *sync_keypair; + struct rspamd_cryptobox_pubkey *master_key; + struct timeval master_io_tv; + gdouble master_timeout; + GPtrArray *mirrors; gchar *update_map; + gchar *masters_map; guint keypair_cache_size; struct event_base *ev_base; gint peer_fd; @@ -108,6 +123,7 @@ struct rspamd_fuzzy_storage_ctx { struct rspamd_fuzzy_backend *backend; GQueue *updates_pending; struct rspamd_dns_resolver *resolver; + struct rspamd_config *cfg; }; enum fuzzy_cmd_type { @@ -160,6 +176,12 @@ struct fuzzy_key { struct fuzzy_key_stat *stat; }; +struct fuzzy_master_update_session { + struct rspamd_http_connection *conn; + struct rspamd_fuzzy_storage_ctx *ctx; + rspamd_inet_addr_t *addr; +}; + static void rspamd_fuzzy_write_reply (struct fuzzy_session *session); static gboolean @@ -170,8 +192,12 @@ rspamd_fuzzy_check_client (struct fuzzy_session *session) session->addr) == RADIX_NO_VALUE) { return FALSE; } + else { + return TRUE; + } } - return TRUE; + + return FALSE; } static void @@ -201,20 +227,179 @@ fuzzy_key_dtor (gpointer p) g_slice_free1 (sizeof (*key), key); } +struct fuzzy_slave_connection { + struct rspamd_cryptobox_keypair *local_key; + struct rspamd_cryptobox_pubkey *remote_key; + struct upstream *up; + struct rspamd_http_connection *http_conn; + struct rspamd_fuzzy_mirror *mirror; + gint sock; +}; + +static void +fuzzy_mirror_close_connection (struct fuzzy_slave_connection *conn) +{ + if (conn) { + if (conn->http_conn) { + rspamd_http_connection_reset (conn->http_conn); + rspamd_http_connection_unref (conn->http_conn); + } + + close (conn->sock); + + g_slice_free1 (sizeof (*conn), conn); + } +} + +static void +fuzzy_mirror_updates_to_http (struct rspamd_fuzzy_storage_ctx *ctx, + struct rspamd_http_message *msg) +{ + GList *cur; + struct fuzzy_peer_cmd *io_cmd; + gsize len; + guint32 rev; + const gchar *p; + + rev = rspamd_fuzzy_backend_version (ctx->backend, local_db_name); + rev = GUINT32_TO_LE (rev); + len = sizeof (guint32) * 2; /* revision + last chunk */ + + for (cur = ctx->updates_pending->head; cur != NULL; cur = g_list_next (cur)) { + io_cmd = cur->data; + + if (io_cmd->is_shingle) { + len += sizeof (guint32) + sizeof (gboolean) + + sizeof (struct rspamd_fuzzy_shingle_cmd); + } + else { + len += sizeof (guint32) + sizeof (gboolean) + + sizeof (struct rspamd_fuzzy_cmd); + } + } + + msg->body = rspamd_fstring_sized_new (len); + msg->body = rspamd_fstring_append (msg->body, (const char *)&rev, + sizeof (rev)); + + for (cur = ctx->updates_pending->head; cur != NULL; cur = g_list_next (cur)) { + io_cmd = cur->data; + + if (io_cmd->is_shingle) { + len = sizeof (gboolean) + + sizeof (struct rspamd_fuzzy_shingle_cmd); + } + else { + len = sizeof (gboolean) + + sizeof (struct rspamd_fuzzy_cmd); + } + + p = (const char *)io_cmd; + msg->body = rspamd_fstring_append (msg->body, (const char *)&len, + sizeof (len)); + msg->body = rspamd_fstring_append (msg->body, p, len); + } + + /* Last chunk */ + len = 0; + msg->body = rspamd_fstring_append (msg->body, (const char *)&len, + sizeof (len)); +} + +static void +fuzzy_mirror_error_handler (struct rspamd_http_connection *conn, GError *err) +{ + struct fuzzy_slave_connection *bk_conn = conn->ud; + msg_info ("abnormally closing connection from backend: %s:%s, " + "error: %e", + bk_conn->mirror->name, + rspamd_inet_address_to_string (rspamd_upstream_addr (bk_conn->up)), + err); + + fuzzy_mirror_close_connection (bk_conn); +} + +static gint +fuzzy_mirror_finish_handler (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct fuzzy_slave_connection *bk_conn = conn->ud; + + msg_info ("finished mirror connection to %s", bk_conn->mirror->name); + fuzzy_mirror_close_connection (bk_conn); + + return 0; +} + static void -rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) +rspamd_fuzzy_send_update_mirror (struct rspamd_fuzzy_storage_ctx *ctx, + struct rspamd_fuzzy_mirror *m) +{ + struct fuzzy_slave_connection *conn; + struct rspamd_http_message *msg; + struct timeval tv; + + conn = g_slice_alloc0 (sizeof (*conn)); + conn->up = rspamd_upstream_get (m->u, + RSPAMD_UPSTREAM_MASTER_SLAVE, NULL, 0); + conn->mirror = m; + + if (conn->up == NULL) { + msg_err ("cannot select upstream for %s", m->name); + return; + } + + conn->sock = rspamd_inet_address_connect ( + rspamd_upstream_addr (conn->up), + SOCK_STREAM, TRUE); + + if (conn->sock == -1) { + msg_err ("cannot connect upstream for %s", m->name); + rspamd_upstream_fail (conn->up); + return; + } + + msg = rspamd_http_new_message (HTTP_REQUEST); + rspamd_printf_fstring (&msg->url, "/update_v1/%s", m->name); + + conn->http_conn = rspamd_http_connection_new ( + NULL, + fuzzy_mirror_error_handler, + fuzzy_mirror_finish_handler, + RSPAMD_HTTP_CLIENT_SIMPLE, + RSPAMD_HTTP_CLIENT, + ctx->keypair_cache); + + rspamd_http_connection_set_key (conn->http_conn, + ctx->sync_keypair); + msg->peer_key = rspamd_pubkey_ref (m->key); + double_to_tv (ctx->sync_timeout, &tv); + fuzzy_mirror_updates_to_http (ctx, msg); + + rspamd_http_connection_write_message (conn->http_conn, + msg, NULL, NULL, conn, + conn->sock, + &tv, ctx->ev_base); + msg_info ("send update request to %s", m->name); +} + +static void +rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx, + const gchar *source) { GList *cur; struct fuzzy_peer_cmd *io_cmd; struct rspamd_fuzzy_cmd *cmd; gpointer ptr; - guint nupdates = 0; + struct rspamd_fuzzy_mirror *m; + guint nupdates = 0, i; time_t now = time (NULL); if (ctx->updates_pending && g_queue_get_length (ctx->updates_pending) > 0 && - rspamd_fuzzy_backend_prepare_update (ctx->backend)) { + rspamd_fuzzy_backend_prepare_update (ctx->backend, source)) { cur = ctx->updates_pending->head; + while (cur) { io_cmd = cur->data; @@ -238,8 +423,16 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) cur = g_list_next (cur); } - if (rspamd_fuzzy_backend_finish_update (ctx->backend)) { + if (rspamd_fuzzy_backend_finish_update (ctx->backend, source)) { ctx->stat.fuzzy_hashes = rspamd_fuzzy_backend_count (ctx->backend); + + for (i = 0; i < ctx->mirrors->len; i ++) { + m = g_ptr_array_index (ctx->mirrors, i); + + rspamd_fuzzy_send_update_mirror (ctx, m); + } + + /* Clear updates */ cur = ctx->updates_pending->head; while (cur) { @@ -250,7 +443,7 @@ rspamd_fuzzy_process_updates_queue (struct rspamd_fuzzy_storage_ctx *ctx) g_queue_clear (ctx->updates_pending); msg_info ("updated fuzzy storage: %ud updates processed, version: %d", - nupdates, rspamd_fuzzy_backend_version (ctx->backend)); + nupdates, rspamd_fuzzy_backend_version (ctx->backend, source)); } else { msg_err ("cannot commit update transaction to fuzzy backend, " @@ -492,6 +685,25 @@ rspamd_fuzzy_process_command (struct fuzzy_session *session) reply: result.tag = cmd->tag; + + if (session->epoch < RSPAMD_FUZZY_EPOCH11) { + /* We need to convert flags to legacy format */ + guint32 flag = 0; + + /* We select the least significant flag if multiple flags are set */ + for (flag = 0; flag < 32; flag ++) { + if (result.flag & (1U << flag)) { + break; + } + } + + if (flag == (1U << 31)) { + flag = 0; + } + + result.flag = flag + 1; + } + memcpy (&session->reply.rep, &result, sizeof (result)); rspamd_fuzzy_update_stats (session->ctx, @@ -523,19 +735,32 @@ rspamd_fuzzy_command_valid (struct rspamd_fuzzy_cmd *cmd, gint r) { enum rspamd_fuzzy_epoch ret = RSPAMD_FUZZY_EPOCH_MAX; - if (cmd->version == RSPAMD_FUZZY_VERSION) { + switch (cmd->version) { + case 4: if (cmd->shingles_count > 0) { if (r == sizeof (struct rspamd_fuzzy_shingle_cmd)) { - ret = RSPAMD_FUZZY_EPOCH9; + ret = RSPAMD_FUZZY_EPOCH11; } } else { if (r == sizeof (*cmd)) { - ret = RSPAMD_FUZZY_EPOCH9; + ret = RSPAMD_FUZZY_EPOCH11; } } - } - else if (cmd->version == 2) { + break; + case 3: + if (cmd->shingles_count > 0) { + if (r == sizeof (struct rspamd_fuzzy_shingle_cmd)) { + ret = RSPAMD_FUZZY_EPOCH10; + } + } + else { + if (r == sizeof (*cmd)) { + ret = RSPAMD_FUZZY_EPOCH10; + } + } + break; + case 2: /* * rspamd 0.8 has slightly different tokenizer then it might be not * 100% compatible @@ -548,6 +773,9 @@ rspamd_fuzzy_command_valid (struct rspamd_fuzzy_cmd *cmd, gint r) else { ret = RSPAMD_FUZZY_EPOCH8; } + break; + default: + break; } return ret; @@ -665,7 +893,7 @@ rspamd_fuzzy_cmd_from_wire (guchar *buf, guint buflen, struct fuzzy_session *s) return FALSE; } /* Encrypted is epoch 10 at least */ - s->epoch = RSPAMD_FUZZY_EPOCH10; + s->epoch = epoch; break; case sizeof (struct rspamd_fuzzy_encrypted_shingle_cmd): s->cmd_type = CMD_ENCRYPTED_SHINGLE; @@ -682,7 +910,7 @@ rspamd_fuzzy_cmd_from_wire (guchar *buf, guint buflen, struct fuzzy_session *s) return FALSE; } - s->epoch = RSPAMD_FUZZY_EPOCH10; + s->epoch = epoch; break; default: msg_debug ("invalid fuzzy command of size %d received", buflen); @@ -693,6 +921,165 @@ rspamd_fuzzy_cmd_from_wire (guchar *buf, guint buflen, struct fuzzy_session *s) } static void +rspamd_fuzzy_mirror_process_update (struct fuzzy_master_update_session *session, + struct rspamd_http_message *msg) +{ + const guchar *p; + gchar *src = NULL, *psrc; + gsize remain; + guint32 revision, our_rev, len, cnt = 0; + struct fuzzy_peer_cmd cmd, *pcmd; + enum { + read_len = 0, + read_data, + finish_processing + } state = read_len; + GList *updates = NULL, *cur; + + if (!msg->body || msg->body->len == 0 || !msg->url || msg->url->len == 0) { + msg_err ("empty update message, not processing"); + + return; + } + + /* Detect source from url: /update_v1/<source>, so we look for the last '/' */ + remain = msg->url->len; + psrc = rspamd_fstringdup (msg->url); + src = psrc; + + while (remain--) { + if (src[remain] == '/') { + src = &src[remain + 1]; + break; + } + } + + /* + * Message format: + * <uint32_le> - revision + * <uint32_le> - size of the next element + * <data> - command data + * ... + * <0> - end of data + * ... - ignored + */ + p = (const guchar *)msg->body->str; + remain = msg->body->len; + + if (remain > sizeof (guint32) * 2) { + memcpy (&revision, p, sizeof (guint32)); + revision = GUINT32_TO_LE (revision); + our_rev = rspamd_fuzzy_backend_version (session->ctx->backend, src); + + if (revision <= our_rev) { + msg_err ("remote revision:d %d is older than ours: %d, refusing update", + revision, our_rev); + g_free (psrc); + + return; + } + else if (revision - our_rev > 1) { + msg_warn ("remote revision:d %d is newer more than 1 revision " + "than ours: %d, cold sync is recommended", + revision, our_rev); + } + + remain -= sizeof (guint32); + p += sizeof (guint32); + } + else { + msg_err ("short update message, not processing"); + goto err; + } + + while (remain > 0) { + switch (state) { + case read_len: + if (remain < sizeof (guint32)) { + msg_err ("short update message while reading length, not processing"); + goto err; + } + + memcpy (&len, p, sizeof (guint32)); + len = GUINT32_TO_LE (len); + remain -= sizeof (guint32); + p += sizeof (guint32); + + if (len == 0) { + remain = 0; + state = finish_processing; + } + else { + state = read_data; + } + break; + case read_data: + if (remain < len) { + msg_err ("short update message while reading data, not processing"); + return; + } + + if (len < sizeof (struct rspamd_fuzzy_cmd) + sizeof (gboolean) || + len > sizeof (cmd)) { + /* Bad size command */ + msg_err ("incorrect element size: %d, at least %d expected", len, + (gint)(sizeof (struct rspamd_fuzzy_cmd) + sizeof (gboolean))); + goto err; + } + + memcpy (&cmd, p, sizeof (gboolean)); + if (cmd.is_shingle && len < sizeof (cmd)) { + /* Short command */ + msg_err ("incorrect element size: %d, at least %d expected", len, + (gint)(sizeof (cmd))); + goto err; + } + + pcmd = g_slice_alloc (sizeof (cmd)); + memcpy (pcmd, p, len); + updates = g_list_prepend (updates, pcmd); + + p += len; + remain -= len; + len = 0; + state = read_len; + cnt ++; + break; + case finish_processing: + /* Do nothing */ + remain = 0; + break; + } + } + + /* Insert elements to the updates from head */ + for (cur = updates; cur != NULL; cur = g_list_next (cur)) { + g_queue_push_head (session->ctx->updates_pending, cur->data); + cur->data = NULL; + } + + rspamd_fuzzy_process_updates_queue (session->ctx, src); + msg_info ("processed updates from the master, %ud operations processed," + " revision: %ud", cnt, revision); + +err: + g_free (psrc); + + if (updates) { + /* We still need to clear queue */ + for (cur = updates; cur != NULL; cur = g_list_next (cur)) { + if (cur->data) { + g_slice_free1 (sizeof (cmd), cur->data); + } + } + + /* This also update our version id */ + g_list_free (updates); + } +} + + +static void fuzzy_session_destroy (gpointer d) { struct fuzzy_session *session = d; @@ -703,6 +1090,135 @@ fuzzy_session_destroy (gpointer d) g_slice_free1 (sizeof (*session), session); } +static void +rspamd_fuzzy_mirror_session_destroy (struct fuzzy_master_update_session *session) +{ + if (session) { + rspamd_http_connection_unref (session->conn); + rspamd_inet_address_destroy (session->addr); + g_slice_free1 (sizeof (*session), session); + } +} + +static void +rspamd_fuzzy_mirror_error_handler (struct rspamd_http_connection *conn, GError *err) +{ + struct fuzzy_master_update_session *session = conn->ud; + + msg_err ("abnormally closing connection from: %s, error: %e", + rspamd_inet_address_to_string (session->addr), err); + /* Terminate session immediately */ + rspamd_fuzzy_mirror_session_destroy (session); +} + +static gint +rspamd_fuzzy_mirror_finish_handler (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct fuzzy_master_update_session *session = conn->ud; + const struct rspamd_cryptobox_pubkey *rk; + + /* Check key */ + if (!rspamd_http_connection_is_encrypted (conn)) { + msg_err ("refuse unencrypted update from: %s", + rspamd_inet_address_to_string (session->addr)); + goto end; + } + else { + + if (session->ctx->master_key) { + rk = rspamd_http_connection_get_peer_key (conn); + g_assert (rk != NULL); + + if (!rspamd_pubkey_equal (rk, session->ctx->master_key)) { + msg_err ("refuse unknown pubkey update from: %s", + rspamd_inet_address_to_string (session->addr)); + goto end; + } + } + else { + msg_warn ("no trusted key specified, accept any update from %s", + rspamd_inet_address_to_string (session->addr)); + } + + rspamd_fuzzy_mirror_process_update (session, msg); + } + +end: + rspamd_fuzzy_mirror_session_destroy (session); + + return 0; +} + +static void +accept_fuzzy_mirror_socket (gint fd, short what, void *arg) +{ + struct rspamd_worker *worker = (struct rspamd_worker *)arg; + rspamd_inet_addr_t *addr; + gint nfd; + struct rspamd_http_connection *http_conn; + struct rspamd_fuzzy_storage_ctx *ctx; + struct fuzzy_master_update_session *session; + + if ((nfd = + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { + msg_warn ("accept failed: %s", strerror (errno)); + return; + } + /* Check for EAGAIN */ + if (nfd == 0) { + return; + } + + ctx = worker->ctx; + + if (!ctx->master_ips) { + msg_err ("deny update request from %s as no masters defined", + rspamd_inet_address_to_string (addr)); + rspamd_inet_address_destroy (addr); + close (nfd); + + return; + } + else if (radix_find_compressed_addr (ctx->master_ips, addr) == RADIX_NO_VALUE) { + msg_err ("deny update request from %s", + rspamd_inet_address_to_string (addr)); + rspamd_inet_address_destroy (addr); + close (nfd); + + return; + } + + if (!ctx->sync_keypair) { + msg_err ("deny update request from %s, as no local keypair is specified", + rspamd_inet_address_to_string (addr)); + rspamd_inet_address_destroy (addr); + close (nfd); + + return; + } + + session = g_slice_alloc0 (sizeof (*session)); + http_conn = rspamd_http_connection_new ( + NULL, + rspamd_fuzzy_mirror_error_handler, + rspamd_fuzzy_mirror_finish_handler, + 0, + RSPAMD_HTTP_SERVER, + ctx->keypair_cache); + + rspamd_http_connection_set_key (http_conn, ctx->sync_keypair); + session->ctx = ctx; + session->conn = http_conn; + session->addr = addr; + + rspamd_http_connection_read_message (http_conn, + session, + nfd, + &ctx->master_io_tv, + ctx->ev_base); +} + /* * Accept new connection and construct task */ @@ -791,7 +1307,7 @@ sync_callback (gint fd, short what, void *arg) ctx = worker->ctx; if (ctx->backend) { - rspamd_fuzzy_process_updates_queue (ctx); + rspamd_fuzzy_process_updates_queue (ctx, local_db_name); /* Call backend sync */ old_expired = rspamd_fuzzy_backend_expired (ctx->backend); rspamd_fuzzy_backend_sync (ctx->backend, ctx->expire, TRUE); @@ -825,7 +1341,7 @@ rspamd_fuzzy_storage_sync (struct rspamd_main *rspamd_main, struct rspamd_control_reply rep; if (ctx->backend) { - rspamd_fuzzy_process_updates_queue (ctx); + rspamd_fuzzy_process_updates_queue (ctx, local_db_name); /* Call backend sync */ old_expired = rspamd_fuzzy_backend_expired (ctx->backend); rspamd_fuzzy_backend_sync (ctx->backend, ctx->expire, TRUE); @@ -1118,6 +1634,88 @@ rspamd_fuzzy_storage_stat (struct rspamd_main *rspamd_main, } static gboolean +fuzzy_storage_parse_mirror (rspamd_mempool_t *pool, + const ucl_object_t *obj, + gpointer ud, + struct rspamd_rcl_section *section, + GError **err) +{ + const ucl_object_t *elt; + struct rspamd_fuzzy_mirror *up = NULL; + struct rspamd_rcl_struct_parser *pd = ud; + struct rspamd_fuzzy_storage_ctx *ctx; + + ctx = pd->user_struct; + + if (ucl_object_type (obj) != UCL_OBJECT) { + g_set_error (err, g_quark_try_string ("fuzzy"), 100, + "mirror/slave option must be an object"); + + return FALSE; + } + + elt = ucl_object_lookup (obj, "name"); + if (elt == NULL) { + g_set_error (err, g_quark_try_string ("fuzzy"), 100, + "mirror option must have some name definition"); + + return FALSE; + } + + up = g_slice_alloc0 (sizeof (*up)); + up->name = g_strdup (ucl_object_tostring (elt)); + + elt = ucl_object_lookup (obj, "key"); + if (elt != NULL) { + up->key = rspamd_pubkey_from_base32 (ucl_object_tostring (elt), 0, + RSPAMD_KEYPAIR_KEX, RSPAMD_CRYPTOBOX_MODE_25519); + } + + if (up->key == NULL) { + g_set_error (err, g_quark_try_string ("fuzzy"), 100, + "cannot read mirror key"); + + goto err; + } + + elt = ucl_object_lookup (obj, "hosts"); + + if (elt == NULL) { + g_set_error (err, g_quark_try_string ("fuzzy"), 100, + "mirror option must have some hosts definition"); + + goto err; + } + + up->u = rspamd_upstreams_create (ctx->cfg->ups_ctx); + if (!rspamd_upstreams_from_ucl (up->u, elt, 11335, NULL)) { + g_set_error (err, g_quark_try_string ("fuzzy"), 100, + "mirror has bad hosts definition"); + + goto err; + } + + g_ptr_array_add (ctx->mirrors, up); + + return TRUE; + +err: + + if (up) { + g_free (up->name); + rspamd_upstreams_destroy (up->u); + + if (up->key) { + rspamd_pubkey_unref (up->key); + } + + g_slice_free1 (sizeof (*up), up); + } + + return FALSE; +} + +static gboolean fuzzy_parse_keypair (rspamd_mempool_t *pool, const ucl_object_t *obj, gpointer ud, @@ -1211,6 +1809,7 @@ init_fuzzy (struct rspamd_config *cfg) ctx->magic = rspamd_fuzzy_storage_magic; ctx->sync_timeout = DEFAULT_SYNC_TIMEOUT; + ctx->master_timeout = DEFAULT_MASTER_TIMEOUT; ctx->expire = DEFAULT_EXPIRE; ctx->keypair_cache_size = DEFAULT_KEYPAIR_CACHE_SIZE; ctx->keys = g_hash_table_new_full (fuzzy_kp_hash, fuzzy_kp_equal, @@ -1218,6 +1817,8 @@ init_fuzzy (struct rspamd_config *cfg) ctx->errors_ips = rspamd_lru_hash_new_full (1024, (GDestroyNotify) rspamd_inet_address_destroy, g_free, rspamd_inet_address_hash, rspamd_inet_address_equal); + ctx->cfg = cfg; + ctx->mirrors = g_ptr_array_new (); rspamd_rcl_register_worker_option (cfg, type, @@ -1315,6 +1916,59 @@ init_fuzzy (struct rspamd_config *cfg) 0, "Allow encrypted requests only (and forbid all unknown keys or plaintext requests)"); + rspamd_rcl_register_worker_option (cfg, + type, + "master_timeout", + rspamd_rcl_parse_struct_time, + ctx, + G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, master_timeout), + RSPAMD_CL_FLAG_TIME_FLOAT, + "Master protocol IO timeout"); + + rspamd_rcl_register_worker_option (cfg, + type, + "sync_keypair", + rspamd_rcl_parse_struct_keypair, + ctx, + G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, sync_keypair), + 0, + "Encryption key for master/slave updates"); + + rspamd_rcl_register_worker_option (cfg, + type, + "masters", + rspamd_rcl_parse_struct_string, + ctx, + G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, masters_map), + 0, + "Allow master/slave updates from the following IP addresses"); + + rspamd_rcl_register_worker_option (cfg, + type, + "master_key", + rspamd_rcl_parse_struct_pubkey, + ctx, + G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, master_key), + 0, + "Allow master/slave updates merely using the specified key"); + + rspamd_rcl_register_worker_option (cfg, + type, + "mirror", + fuzzy_storage_parse_mirror, + ctx, + 0, + RSPAMD_CL_FLAG_MULTIPLE, + "List of slave hosts"); + + rspamd_rcl_register_worker_option (cfg, + type, + "slave", + fuzzy_storage_parse_mirror, + ctx, + 0, + RSPAMD_CL_FLAG_MULTIPLE, + "List of slave hosts"); return ctx; } @@ -1351,8 +2005,8 @@ fuzzy_peer_rep (struct rspamd_worker *worker, { struct rspamd_fuzzy_storage_ctx *ctx = ud; GList *cur; - gint listen_socket; - struct event *accept_event; + struct rspamd_worker_listen_socket *ls; + struct event *accept_events; gdouble next_check; ctx->peer_fd = rep_fd; @@ -1368,16 +2022,30 @@ fuzzy_peer_rep (struct rspamd_worker *worker, /* Start listening */ cur = worker->cf->listen_socks; while (cur) { - listen_socket = GPOINTER_TO_INT (cur->data); - if (listen_socket != -1) { - accept_event = g_slice_alloc0 (sizeof (struct event)); - event_set (accept_event, listen_socket, EV_READ | EV_PERSIST, - accept_fuzzy_socket, worker); - event_base_set (ctx->ev_base, accept_event); - event_add (accept_event, NULL); - worker->accept_events = g_list_prepend (worker->accept_events, - accept_event); + ls = cur->data; + + if (ls->fd != -1) { + if (ls->type == RSPAMD_WORKER_SOCKET_UDP) { + accept_events = g_slice_alloc0 (sizeof (struct event) * 2); + event_set (&accept_events[0], ls->fd, EV_READ | EV_PERSIST, + accept_fuzzy_socket, worker); + event_base_set (ctx->ev_base, &accept_events[0]); + event_add (&accept_events[0], NULL); + worker->accept_events = g_list_prepend (worker->accept_events, + accept_events); + } + else if (worker->index == 0) { + /* We allow TCP listeners only for a update worker */ + accept_events = g_slice_alloc0 (sizeof (struct event) * 2); + event_set (&accept_events[0], ls->fd, EV_READ | EV_PERSIST, + accept_fuzzy_mirror_socket, worker); + event_base_set (ctx->ev_base, &accept_events[0]); + event_add (&accept_events[0], NULL); + worker->accept_events = g_list_prepend (worker->accept_events, + accept_events); + } } + cur = g_list_next (cur); } @@ -1413,6 +2081,7 @@ start_fuzzy (struct rspamd_worker *worker) "fuzzy", NULL); ctx->peer_fd = -1; + double_to_tv (ctx->master_timeout, &ctx->master_io_tv); /* * Open DB and perform VACUUM @@ -1441,7 +2110,7 @@ start_fuzzy (struct rspamd_worker *worker) rspamd_fuzzy_storage_stat, ctx); rspamd_control_worker_add_cmd_handler (worker, RSPAMD_CONTROL_FUZZY_SYNC, rspamd_fuzzy_storage_sync, ctx); - /* Create radix tree */ + /* Create radix trees */ if (ctx->update_map != NULL) { if (!rspamd_map_is_map (ctx->update_map)) { if (!radix_add_generic_iplist (ctx->update_map, @@ -1458,6 +2127,22 @@ start_fuzzy (struct rspamd_worker *worker) } } + if (ctx->masters_map != NULL) { + if (!rspamd_map_is_map (ctx->masters_map)) { + if (!radix_add_generic_iplist (ctx->masters_map, + &ctx->master_ips)) { + msg_warn ("cannot load or parse ip list from '%s'", + ctx->masters_map); + } + } + else { + rspamd_map_add (worker->srv->cfg, ctx->masters_map, + "Allow fuzzy master/slave updates from specified addresses", + rspamd_radix_read, rspamd_radix_fin, + (void **)&ctx->master_ips); + + } + } /* Maps events */ ctx->resolver = dns_resolver_init (worker->srv->logger, @@ -1481,7 +2166,7 @@ start_fuzzy (struct rspamd_worker *worker) rspamd_worker_block_signals (); if (worker->index == 0) { - rspamd_fuzzy_process_updates_queue (ctx); + rspamd_fuzzy_process_updates_queue (ctx, local_db_name); rspamd_fuzzy_backend_sync (ctx->backend, ctx->expire, TRUE); } diff --git a/src/fuzzy_storage.h b/src/fuzzy_storage.h index ff3438b38..aca6ab952 100644 --- a/src/fuzzy_storage.h +++ b/src/fuzzy_storage.h @@ -6,7 +6,7 @@ #include "shingles.h" #include "cryptobox.h" -#define RSPAMD_FUZZY_VERSION 3 +#define RSPAMD_FUZZY_VERSION 4 #define RSPAMD_FUZZY_KEYLEN 8 /* Commands for fuzzy storage */ @@ -22,7 +22,8 @@ enum rspamd_fuzzy_epoch { RSPAMD_FUZZY_EPOCH6 = 0, /**< pre 0.6.x */ RSPAMD_FUZZY_EPOCH8, /**< 0.8 till 0.9 */ RSPAMD_FUZZY_EPOCH9, /**< 0.9 + */ - RSPAMD_FUZZY_EPOCH10, /**< 1.0 + encryption */ + RSPAMD_FUZZY_EPOCH10, /**< 1.0+ encryption */ + RSPAMD_FUZZY_EPOCH11, /**< 1.3+ multiple flags */ RSPAMD_FUZZY_EPOCH_MAX }; diff --git a/src/hs_helper.c b/src/hs_helper.c index 1292d6460..127cf2785 100644 --- a/src/hs_helper.c +++ b/src/hs_helper.c @@ -33,7 +33,7 @@ worker_t hs_helper_worker = { init_hs_helper, /* Init function */ start_hs_helper, /* Start function */ RSPAMD_WORKER_UNIQUE|RSPAMD_WORKER_KILLABLE|RSPAMD_WORKER_ALWAYS_START, - SOCK_STREAM, /* TCP socket */ + RSPAMD_WORKER_SOCKET_NONE, /* No socket */ RSPAMD_WORKER_VER /* Version info */ }; diff --git a/src/libcryptobox/keypair.c b/src/libcryptobox/keypair.c index 1568293e3..51e023128 100644 --- a/src/libcryptobox/keypair.c +++ b/src/libcryptobox/keypair.c @@ -92,7 +92,7 @@ rspamd_cryptobox_keypair_pk (struct rspamd_cryptobox_keypair *kp, } static void * -rspamd_cryptobox_pubkey_pk (struct rspamd_cryptobox_pubkey *kp, +rspamd_cryptobox_pubkey_pk (const struct rspamd_cryptobox_pubkey *kp, guint *len) { g_assert (kp != NULL); @@ -880,3 +880,23 @@ rspamd_keypair_verify (struct rspamd_cryptobox_pubkey *pk, return TRUE; } + +gboolean +rspamd_pubkey_equal (const struct rspamd_cryptobox_pubkey *k1, + const struct rspamd_cryptobox_pubkey *k2) +{ + guchar *p1 = NULL, *p2 = NULL; + guint len1, len2; + + + if (k1->alg == k2->alg && k1->type == k2->type) { + p1 = rspamd_cryptobox_pubkey_pk (k1, &len1); + p2 = rspamd_cryptobox_pubkey_pk (k2, &len2); + + if (len1 == len2) { + return (memcmp (p1, p2, len1) == 0); + } + } + + return FALSE; +} diff --git a/src/libcryptobox/keypair.h b/src/libcryptobox/keypair.h index 6c30c5134..b50bc84db 100644 --- a/src/libcryptobox/keypair.h +++ b/src/libcryptobox/keypair.h @@ -261,5 +261,14 @@ gboolean rspamd_keypair_verify (struct rspamd_cryptobox_pubkey *pk, const void *data, gsize len, guchar *sig, gsize siglen, GError **err); +/** + * Compares two public keys + * @param k1 key to compare + * @param k2 key to compare + * @return TRUE if two keys are equal + */ +gboolean rspamd_pubkey_equal (const struct rspamd_cryptobox_pubkey *k1, + const struct rspamd_cryptobox_pubkey *k2); + #endif /* SRC_LIBCRYPTOBOX_KEYPAIR_H_ */ diff --git a/src/libmime/message.c b/src/libmime/message.c index 791bd6837..b20da368a 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -1650,8 +1650,37 @@ rspamd_message_parse (struct rspamd_task *task) len --; } + /* + * Exim somehow uses mailbox format for messages being scanned: + * From xxx@xxx.com Fri May 13 19:08:48 2016 + * + * So we check if a task has non-http format then we check for such a line + * at the beginning to avoid errors + */ + if (!(task->flags & RSPAMD_TASK_FLAG_JSON)) { + if (len > sizeof ("From ") - 1) { + if (memcmp (p, "From ", sizeof ("From ") - 1) == 0) { + /* Skip to CRLF */ + msg_info_task ("mailbox input detected, enable workaround"); + p += sizeof ("From ") - 1; + len -= sizeof ("From ") - 1; + + while (len > 0 && *p != '\n') { + p ++; + len --; + } + while (len > 0 && g_ascii_isspace (*p)) { + p ++; + len --; + } + } + } + } + tmp->data = (guint8 *)p; tmp->len = len; + task->msg.begin = p; + task->msg.len = len; stream = g_mime_stream_mem_new_with_byte_array (tmp); /* diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index af0378e1d..065e63d6d 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -177,6 +177,56 @@ rspamd_mime_expr_quark (void) return g_quark_from_static_string ("mime-expressions"); } +static gboolean +rspamd_parse_long_option (const gchar *start, gsize len, + struct rspamd_regexp_atom *a) +{ + gboolean ret = FALSE; + + if (rspamd_lc_cmp (start, "body", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_BODY; + } + else if (rspamd_lc_cmp (start, "part", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_MIME; + } + else if (rspamd_lc_cmp (start, "raw_part", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_RAWMIME; + } + else if (rspamd_lc_cmp (start, "header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_HEADER; + } + else if (rspamd_lc_cmp (start, "mime_header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_MIMEHEADER; + } + else if (rspamd_lc_cmp (start, "raw_header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_RAWHEADER; + } + else if (rspamd_lc_cmp (start, "all_header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_ALLHEADER; + } + else if (rspamd_lc_cmp (start, "url", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_URL; + } + else if (rspamd_lc_cmp (start, "sa_body", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_SABODY; + } + else if (rspamd_lc_cmp (start, "sa_raw_body", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_SARAWBODY; + } + + return ret; +} + /* * Rspamd regexp utility functions */ @@ -184,7 +234,7 @@ static struct rspamd_regexp_atom * rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, struct rspamd_config *cfg) { - const gchar *begin, *end, *p, *src, *start; + const gchar *begin, *end, *p, *src, *start, *brace; gchar *dbegin, *dend; struct rspamd_regexp_atom *result; GError *err = NULL; @@ -291,6 +341,14 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, result->type = RSPAMD_RE_MIMEHEADER; p++; break; + case 'C': + result->type = RSPAMD_RE_SABODY; + p++; + break; + case 'D': + result->type = RSPAMD_RE_SARAWBODY; + p++; + break; case 'M': result->type = RSPAMD_RE_BODY; p++; @@ -311,6 +369,20 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, result->type = RSPAMD_RE_RAWHEADER; p++; break; + case '{': + /* Long definition */ + if ((brace = strchr (p + 1, '}')) != NULL) { + if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) { + p = NULL; + } + else { + p = brace + 1; + } + } + else { + p = NULL; + } + break; /* Other flags */ case 'T': result->is_test = TRUE; @@ -636,7 +708,8 @@ set: else { /* Register new item in the cache */ if (mime_atom->d.re->type == RSPAMD_RE_HEADER || - mime_atom->d.re->type == RSPAMD_RE_RAWHEADER) { + mime_atom->d.re->type == RSPAMD_RE_RAWHEADER || + mime_atom->d.re->type == RSPAMD_RE_MIMEHEADER) { if (mime_atom->d.re->header != NULL) { own_re = mime_atom->d.re->regexp; @@ -1261,13 +1334,11 @@ rspamd_compare_transfer_encoding (struct rspamd_task * task, GArray * args, void *unused) { - GMimeObject *part; -#ifndef GMIME24 - GMimePartEncodingType enc_req, part_enc; -#else - GMimeContentEncoding enc_req, part_enc; -#endif + GPtrArray *headerlist; struct expression_argument *arg; + guint i; + struct raw_header *rh; + static const char *hname = "Content-Transfer-Encoding"; if (args == NULL) { msg_warn_task ("no parameters to function"); @@ -1280,47 +1351,42 @@ rspamd_compare_transfer_encoding (struct rspamd_task * task, return FALSE; } -#ifndef GMIME24 - enc_req = g_mime_part_encoding_from_string (arg->data); - if (enc_req == GMIME_PART_ENCODING_DEFAULT) { -#else - enc_req = g_mime_content_encoding_from_string (arg->data); - if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) { -#endif - msg_warn_task ("bad encoding type: %s", (gchar *)arg->data); - return FALSE; - } + headerlist = rspamd_message_get_header_array (task, hname, FALSE); - part = g_mime_message_get_mime_part (task->message); - if (part) { - if (GMIME_IS_PART (part)) { -#ifndef GMIME24 - part_enc = g_mime_part_get_encoding (GMIME_PART (part)); - if (part_enc == GMIME_PART_ENCODING_DEFAULT) { - /* Assume 7bit as default transfer encoding */ - part_enc = GMIME_PART_ENCODING_7BIT; + if (headerlist) { + for (i = 0; i < headerlist->len; i ++) { + rh = g_ptr_array_index (headerlist, i); + + if (rh->decoded == NULL) { + continue; } -#else - part_enc = g_mime_part_get_content_encoding (GMIME_PART (part)); - if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) { - /* Assume 7bit as default transfer encoding */ - part_enc = GMIME_CONTENT_ENCODING_7BIT; + + if (g_ascii_strcasecmp (rh->decoded, arg->data) == 0) { + return TRUE; } -#endif + } + } + /* + * In fact, we need to check 'Content-Transfer-Encoding' for each part + * as gmime has 'strange' assumptions + */ + headerlist = rspamd_message_get_mime_header_array (task, + arg->data, + FALSE); - debug_task ("got encoding in part: %d and compare with %d", - (gint)part_enc, - (gint)enc_req); -#ifndef GMIME24 - g_object_unref (part); -#endif + if (headerlist) { + for (i = 0; i < headerlist->len; i ++) { + rh = g_ptr_array_index (headerlist, i); - return part_enc == enc_req; + if (rh->decoded == NULL) { + continue; + } + + if (g_ascii_strcasecmp (rh->decoded, arg->data) == 0) { + return TRUE; + } } -#ifndef GMIME24 - g_object_unref (part); -#endif } return FALSE; diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index f774ac126..ebbc29d61 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -2668,7 +2668,6 @@ rspamd_rcl_parse_struct_keypair (rspamd_mempool_t *pool, struct rspamd_rcl_struct_parser *pd = ud; struct rspamd_cryptobox_keypair **target, *kp; - target = (struct rspamd_cryptobox_keypair **)(((gchar *)pd->user_struct) + pd->offset); if (obj->type == UCL_OBJECT) { @@ -2698,6 +2697,49 @@ rspamd_rcl_parse_struct_keypair (rspamd_mempool_t *pool, return TRUE; } +gboolean +rspamd_rcl_parse_struct_pubkey (rspamd_mempool_t *pool, + const ucl_object_t *obj, + gpointer ud, + struct rspamd_rcl_section *section, + GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + struct rspamd_cryptobox_pubkey **target, *pk; + gsize len; + const gchar *str; + + target = (struct rspamd_cryptobox_pubkey **)(((gchar *)pd->user_struct) + + pd->offset); + if (obj->type == UCL_STRING) { + str = ucl_object_tolstring (obj, &len); + pk = rspamd_pubkey_from_base32 (str, len, RSPAMD_KEYPAIR_KEX, + RSPAMD_CRYPTOBOX_MODE_25519); + + if (pk != NULL) { + *target = pk; + } + else { + g_set_error (err, + CFG_RCL_ERROR, + EINVAL, + "cannot load the pubkey specified: %s", + ucl_object_key (obj)); + return FALSE; + } + } + else { + g_set_error (err, + CFG_RCL_ERROR, + EINVAL, + "no sane pubkey found in the element: %s", + ucl_object_key (obj)); + return FALSE; + } + + return TRUE; +} + static void rspamd_rcl_insert_string_list_item (gpointer *target, rspamd_mempool_t *pool, const gchar *src, gboolean is_hash) diff --git a/src/libserver/cfg_rcl.h b/src/libserver/cfg_rcl.h index ee0a1b526..1a27b056f 100644 --- a/src/libserver/cfg_rcl.h +++ b/src/libserver/cfg_rcl.h @@ -278,6 +278,21 @@ gboolean rspamd_rcl_parse_struct_keypair (rspamd_mempool_t *pool, GError **err); /** + * Parse a pubkey field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure (flags mean the exact structure used) + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_pubkey (rspamd_mempool_t *pool, + const ucl_object_t *obj, + gpointer ud, + struct rspamd_rcl_section *section, + GError **err); + +/** * Parse a inet addr field of a structure * @param cfg config pointer * @param obj object to parse diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c index 5052960f0..1f65733ba 100644 --- a/src/libserver/dkim.c +++ b/src/libserver/dkim.c @@ -1260,6 +1260,147 @@ rspamd_dkim_simple_body_step (rspamd_dkim_context_t *ctx, return (len != 0); } +static const gchar * +rspamd_dkim_skip_empty_lines (const gchar *start, const gchar *end, + guint type, gboolean *need_crlf) +{ + const gchar *p = end - 1, *t; + enum { + init = 0, + init_2, + got_cr, + got_lf, + got_crlf, + test_spaces, + } state = init; + guint skip = 0; + + while (p >= start + 2) { + switch (state) { + case init: + if (*p == '\r') { + state = got_cr; + } + else if (*p == '\n') { + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *p == ' ') { + skip = 0; + state = test_spaces; + } + else { + if (type == DKIM_CANON_SIMPLE) { + *need_crlf = TRUE; + } + + goto end; + } + break; + case init_2: + if (*p == '\r') { + state = got_cr; + } + else if (*p == '\n') { + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *p == ' ') { + skip = 0; + state = test_spaces; + } + else { + goto end; + } + break; + case got_cr: + if (*(p - 1) == '\r') { + p --; + state = got_cr; + } + else if (*(p - 1) == '\n') { + if ((*p - 2) == '\r') { + /* \r\n\r -> we know about one line */ + p -= 1; + state = got_crlf; + } + else { + /* \n\r -> we know about one line */ + p -= 1; + state = got_lf; + } + } + else if (type == DKIM_CANON_RELAXED && *(p - 1) == ' ') { + skip = 1; + state = test_spaces; + } + else { + goto end; + } + break; + case got_lf: + if (*(p - 1) == '\r') { + state = got_crlf; + } + else if (*(p - 1) == '\n') { + /* We know about one line */ + p --; + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *(p - 1) == ' ') { + skip = 1; + state = test_spaces; + } + else { + goto end; + } + break; + case got_crlf: + if (p > start - 2) { + if (*(p - 3) == '\r') { + p -= 2; + state = got_cr; + } + else if (*(p - 3) == '\n') { + p -= 2; + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *(p - 3) == ' ') { + skip = 2; + state = test_spaces; + } + else { + goto end; + } + } + else { + goto end; + } + break; + case test_spaces: + t = p - skip; + + while (t > start - 2 && *t == ' ') { + t --; + } + + if (*t == '\r') { + p = t; + state = got_cr; + } + else if (*t == '\n') { + p = t; + state = got_lf; + } + else { + goto end; + } + break; + } + } + +end: + return p; +} + static gboolean rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, const gchar *start, @@ -1267,6 +1408,7 @@ rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, { const gchar *p; guint remain = ctx->len ? ctx->len : (guint)(end - start); + gboolean need_crlf = FALSE; if (start == NULL) { /* Empty body */ @@ -1279,22 +1421,9 @@ rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, } else { /* Strip extra ending CRLF */ - p = end - 1; - while (p >= start + 2) { - if (*p == '\n' && *(p - 1) == '\r' && *(p - 2) == '\n') { - p -= 2; - } - else if (*p == '\n' && *(p - 1) == '\n') { - p--; - } - else if (*p == '\r' && *(p - 1) == '\r') { - p--; - } - else { - break; - } - } + p = rspamd_dkim_skip_empty_lines (start, end, ctx->body_canon_type, &need_crlf); end = p + 1; + if (end == start) { /* Empty body */ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { @@ -1308,7 +1437,15 @@ rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { /* Simple canonization */ while (rspamd_dkim_simple_body_step (ctx, ctx->body_hash, - &start, end - start, &remain)) ; + &start, end - start, &remain)); + + if (need_crlf) { + start = "\r\n"; + end = start + 2; + remain = 2; + rspamd_dkim_simple_body_step (ctx, ctx->body_hash, + &start, end - start, &remain); + } } else { while (rspamd_dkim_relaxed_body_step (ctx, ctx->body_hash, diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c index 26e595e5f..e58e8a546 100644 --- a/src/libserver/fuzzy_backend.c +++ b/src/libserver/fuzzy_backend.c @@ -32,6 +32,7 @@ struct rspamd_fuzzy_backend { static const gdouble sql_sleep_time = 0.1; static const guint max_retries = 10; +static const guint32 flags_mask = (1U << 31); #define msg_err_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ backend->pool->tag.tagname, backend->pool->tag.uid, \ @@ -52,17 +53,21 @@ static const guint max_retries = 10; static const char *create_tables_sql = "BEGIN;" - "CREATE TABLE digests(" - "id INTEGER PRIMARY KEY," - "flag INTEGER NOT NULL," - "digest TEXT NOT NULL," - "value INTEGER," - "time INTEGER);" - "CREATE TABLE shingles(" - "value INTEGER NOT NULL," - "number INTEGER NOT NULL," - "digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE " - "ON UPDATE CASCADE);" + "CREATE TABLE IF NOT EXISTS digests(" + " id INTEGER PRIMARY KEY," + " flag INTEGER NOT NULL," + " digest TEXT NOT NULL," + " value INTEGER," + " time INTEGER);" + "CREATE TABLE IF NOT EXISTS shingles(" + " value INTEGER NOT NULL," + " number INTEGER NOT NULL," + " digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE " + " ON UPDATE CASCADE);" + "CREATE TABLE IF NOT EXISTS sources(" + " name TEXT UNIQUE," + " version INTEGER," + " last INTEGER);" "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);" "CREATE INDEX IF NOT EXISTS t ON digests(time);" "CREATE INDEX IF NOT EXISTS dgst_id ON shingles(digest_id);" @@ -93,7 +98,9 @@ enum rspamd_fuzzy_statement_idx { RSPAMD_FUZZY_BACKEND_EXPIRE, RSPAMD_FUZZY_BACKEND_VACUUM, RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED, + RSPAMD_FUZZY_BACKEND_ADD_SOURCE, RSPAMD_FUZZY_BACKEND_VERSION, + RSPAMD_FUZZY_BACKEND_SET_VERSION, RSPAMD_FUZZY_BACKEND_MAX }; static struct rspamd_fuzzy_stmts { @@ -214,12 +221,26 @@ static struct rspamd_fuzzy_stmts { .result = SQLITE_DONE }, { + .idx = RSPAMD_FUZZY_BACKEND_ADD_SOURCE, + .sql = "INSERT OR IGNORE INTO sources(name, version, last) VALUES (?1, ?2, ?3);", + .args = "TII", + .stmt = NULL, + .result = SQLITE_DONE + }, + { .idx = RSPAMD_FUZZY_BACKEND_VERSION, - .sql = "PRAGMA user_version;", - .args = "", + .sql = "SELECT version FROM sources WHERE name=?1;", + .args = "T", .stmt = NULL, .result = SQLITE_ROW }, + { + .idx = RSPAMD_FUZZY_BACKEND_SET_VERSION, + .sql = "UPDATE sources SET version=?1, last=?2 WHERE name=?3;", + .args = "IIT", + .stmt = NULL, + .result = SQLITE_DONE + }, }; static GQuark @@ -410,7 +431,7 @@ rspamd_fuzzy_backend_open_db (const gchar *path, GError **err) bk->expired = 0; bk->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "fuzzy_backend"); bk->db = rspamd_sqlite3_open_or_create (bk->pool, bk->path, - create_tables_sql, err); + create_tables_sql, 1, err); if (bk->db == NULL) { rspamd_fuzzy_backend_close (bk); @@ -506,6 +527,10 @@ rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend, rep.prob = 1.0; rep.flag = sqlite3_column_int ( prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 2); + + if (!(rep.flag & flags_mask) && rep.flag > 0) { + rep.flag = (1U << (rep.flag - 1)) | flags_mask; + } } } else if (cmd->shingles_count > 0) { @@ -589,6 +614,10 @@ rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend, rep.flag = sqlite3_column_int ( prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt, 3); + + if (!(rep.flag & flags_mask) && rep.flag > 0) { + rep.flag = (1U << (rep.flag - 1)) | flags_mask; + } } } } @@ -610,7 +639,8 @@ rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend, } gboolean -rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend) +rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend, + const gchar *source) { gint rc; @@ -643,6 +673,11 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, return FALSE; } + if (cmd->flag > 31 || cmd->flag == 0) { + msg_err_fuzzy_backend ("flag more than 31 is no longer supported"); + return FALSE; + } + rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE, RSPAMD_FUZZY_BACKEND_CHECK, cmd->digest); @@ -654,7 +689,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, 2); rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK); - if (flag == cmd->flag) { + if (flag & (1U << (cmd->flag - 1))) { /* We need to increase weight */ rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, RSPAMD_FUZZY_BACKEND_UPDATE, @@ -669,11 +704,28 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, } else { /* We need to relearn actually */ + if (flag & flags_mask) { + /* This is already new format */ + flag |= (1U << (cmd->flag - 1)); + } + else { + /* Convert to the new format */ + if (flag > 31 || flag == 0) { + msg_warn_fuzzy_backend ("storage had flag more than 31, remove " + "it"); + flag = cmd->flag | flags_mask; + } + else { + flag = (1U << (flag - 1)) | (1U << (cmd->flag - 1)) | flags_mask; + } + } + rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, RSPAMD_FUZZY_BACKEND_UPDATE_FLAG, (gint64) cmd->value, - (gint64) cmd->flag, + (gint64) flag, cmd->digest); + if (rc != SQLITE_OK) { msg_warn_fuzzy_backend ("cannot update hash to %d -> " "%*xs: %s", (gint) cmd->flag, @@ -686,7 +738,7 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK); rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE, RSPAMD_FUZZY_BACKEND_INSERT, - (gint) cmd->flag, + (gint) (1U << (cmd->flag - 1)), cmd->digest, (gint64) cmd->value, (gint64) timestamp); @@ -729,42 +781,47 @@ rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, } gboolean -rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend) +rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend, + const gchar *source) { gint rc, wal_frames, wal_checkpointed, ver; - gint64 version = 0; - gchar version_buf[128]; + + /* Get and update version */ + ver = rspamd_fuzzy_backend_version (backend, source); + ++ver; rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, - RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT); + RSPAMD_FUZZY_BACKEND_SET_VERSION, + (gint64)ver, (gint64)time (NULL), source); - if (rc != SQLITE_OK) { - msg_warn_fuzzy_backend ("cannot commit updates: %s", - sqlite3_errmsg (backend->db)); - rspamd_fuzzy_backend_run_stmt (backend, TRUE, - RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK); - return FALSE; - } - else { - if (!rspamd_sqlite3_sync (backend->db, &wal_frames, &wal_checkpointed)) { - msg_warn_fuzzy_backend ("cannot commit checkpoint: %s", + if (rc == SQLITE_OK) { + rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, + RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT); + + if (rc != SQLITE_OK) { + msg_warn_fuzzy_backend ("cannot commit updates: %s", sqlite3_errmsg (backend->db)); + rspamd_fuzzy_backend_run_stmt (backend, TRUE, + RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK); + return FALSE; } - else if (wal_checkpointed > 0) { - msg_info_fuzzy_backend ("total number of frames in the wal file: " - "%d, checkpointed: %d", wal_frames, wal_checkpointed); + else { + if (!rspamd_sqlite3_sync (backend->db, &wal_frames, &wal_checkpointed)) { + msg_warn_fuzzy_backend ("cannot commit checkpoint: %s", + sqlite3_errmsg (backend->db)); + } + else if (wal_checkpointed > 0) { + msg_info_fuzzy_backend ("total number of frames in the wal file: " + "%d, checkpointed: %d", wal_frames, wal_checkpointed); + } } } - - /* Get and update version */ - ver = rspamd_fuzzy_backend_version (backend); - ++ver; - rspamd_snprintf (version_buf, sizeof (version_buf), "PRAGMA user_version=%d;", - ver); - - if (sqlite3_exec (backend->db, version_buf, NULL, NULL, NULL) != SQLITE_OK) { - msg_err_fuzzy_backend ("cannot set database version to %L: %s", - version, sqlite3_errmsg (backend->db)); + else { + msg_warn_fuzzy_backend ("cannot update version for %s: %s", source, + sqlite3_errmsg (backend->db)); + rspamd_fuzzy_backend_run_stmt (backend, TRUE, + RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK); + return FALSE; } return TRUE; @@ -774,16 +831,61 @@ gboolean rspamd_fuzzy_backend_del (struct rspamd_fuzzy_backend *backend, const struct rspamd_fuzzy_cmd *cmd) { - int rc; + int rc = -1; + guint32 flag; if (backend == NULL) { return FALSE; } - rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, - RSPAMD_FUZZY_BACKEND_DELETE, + rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE, + RSPAMD_FUZZY_BACKEND_CHECK, cmd->digest); + if (rc == SQLITE_OK) { + /* Check flag */ + flag = sqlite3_column_int64 ( + prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, + 2); + rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK); + + if (!(flag & flags_mask)) { + flag = (1U << (flag - 1)) | flags_mask; + } + + if (flag & (1U << (cmd->flag - 1))) { + flag &= ~(1U << (cmd->flag - 1)); + + if (flag == 0) { + /* It is the last flag, so delete hash completely */ + rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, + RSPAMD_FUZZY_BACKEND_DELETE, + cmd->digest); + } + else { + /* We need to delete specific flag */ + rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE, + RSPAMD_FUZZY_BACKEND_UPDATE_FLAG, + (gint64) cmd->value, + (gint64) flag, + cmd->digest); + if (rc != SQLITE_OK) { + msg_warn_fuzzy_backend ("cannot update hash to %d -> " + "%*xs: %s", (gint) cmd->flag, + (gint) sizeof (cmd->digest), cmd->digest, + sqlite3_errmsg (backend->db)); + } + } + } + else { + /* The hash has a wrong flag, ignoring */ + } + } + else { + /* Hash is missing */ + rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK); + } + return (rc == SQLITE_OK); } @@ -976,13 +1078,14 @@ rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend) } gint -rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend) +rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend, + const gchar *source) { - gint ret = 0; + gint ret = -1; if (backend) { if (rspamd_fuzzy_backend_run_stmt (backend, FALSE, - RSPAMD_FUZZY_BACKEND_VERSION) == SQLITE_OK) { + RSPAMD_FUZZY_BACKEND_VERSION, source) == SQLITE_OK) { ret = sqlite3_column_int64 ( prepared_stmts[RSPAMD_FUZZY_BACKEND_VERSION].stmt, 0); } diff --git a/src/libserver/fuzzy_backend.h b/src/libserver/fuzzy_backend.h index bcd199d1a..91a613f2a 100644 --- a/src/libserver/fuzzy_backend.h +++ b/src/libserver/fuzzy_backend.h @@ -46,7 +46,8 @@ struct rspamd_fuzzy_reply rspamd_fuzzy_backend_check ( /** * Prepare storage for updates (by starting transaction) */ -gboolean rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend); +gboolean rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend, + const gchar *source); /** * Add digest to the database @@ -72,7 +73,8 @@ gboolean rspamd_fuzzy_backend_del ( /** * Commit updates to storage */ -gboolean rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend); +gboolean rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend, + const gchar *source); /** * Sync storage @@ -90,7 +92,7 @@ gboolean rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend, void rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend); gsize rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend); -gint rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend); +gint rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend, const gchar *source); gsize rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend); const gchar * rspamd_fuzzy_backend_id (struct rspamd_fuzzy_backend *backend); diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index d314d3fdc..a48de05dc 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -873,9 +873,8 @@ rspamd_metric_result_ucl (struct rspamd_task *task, return obj; } -static void -rspamd_ucl_torspamc_output (struct rspamd_task *task, - ucl_object_t *top, +void +rspamd_ucl_torspamc_output (const ucl_object_t *top, rspamd_fstring_t **out) { const ucl_object_t *metric, *score, @@ -927,12 +926,15 @@ rspamd_ucl_torspamc_output (struct rspamd_task *task, } } - rspamd_printf_fstring (out, "Message-ID: %s\r\n", task->message_id); + elt = ucl_object_lookup (top, "message-id"); + if (elt != NULL) { + rspamd_printf_fstring (out, "Message-ID: %s\r\n", + ucl_object_tostring (elt)); + } } static void -rspamd_ucl_tospamc_output (struct rspamd_task *task, - ucl_object_t *top, +rspamd_ucl_tospamc_output (const ucl_object_t *top, rspamd_fstring_t **out) { const ucl_object_t *metric, *score, @@ -1055,10 +1057,10 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg, } else { if (RSPAMD_TASK_IS_SPAMC (task)) { - rspamd_ucl_tospamc_output (task, top, &msg->body); + rspamd_ucl_tospamc_output (top, &msg->body); } else { - rspamd_ucl_torspamc_output (task, top, &msg->body); + rspamd_ucl_torspamc_output (top, &msg->body); } } diff --git a/src/libserver/protocol.h b/src/libserver/protocol.h index 3c8383565..1f7acbab2 100644 --- a/src/libserver/protocol.h +++ b/src/libserver/protocol.h @@ -82,5 +82,13 @@ ucl_object_t * rspamd_protocol_write_ucl (struct rspamd_task *task); */ void rspamd_protocol_write_reply (struct rspamd_task *task); +/** + * Convert rspamd output to legacy protocol reply + * @param task + * @param top + * @param out + */ +void rspamd_ucl_torspamc_output (const ucl_object_t *top, + rspamd_fstring_t **out); #endif diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c index 3e308415d..332486cdd 100644 --- a/src/libserver/re_cache.c +++ b/src/libserver/re_cache.c @@ -746,7 +746,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, re_class->type_data, is_strong); - if (headerlist) { + if (headerlist && headerlist->len > 0) { scvec = g_malloc (sizeof (*scvec) * headerlist->len); lenvec = g_malloc (sizeof (*lenvec) * headerlist->len); @@ -795,7 +795,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task, re_class->type_data, is_strong); - if (headerlist) { + if (headerlist && headerlist->len > 0) { scvec = g_malloc (sizeof (*scvec) * headerlist->len); lenvec = g_malloc (sizeof (*lenvec) * headerlist->len); @@ -1494,7 +1494,8 @@ rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache, * crc - 8 bytes checksum * <hyperscan blob> */ - crc = XXH64 (hs_serialized, serialized_len, 0xdeadbabe); + crc = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_XXHASH64, + hs_serialized, serialized_len, 0xdeadbabe); if (cache->vectorized_hyperscan) { iov[0].iov_base = (void *) rspamd_hs_magic_vector; diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c index 740c4cd7a..bf596a343 100644 --- a/src/libserver/worker_util.c +++ b/src/libserver/worker_util.c @@ -248,9 +248,9 @@ rspamd_prepare_worker (struct rspamd_worker *worker, const char *name, void (*accept_handler)(int, short, void *)) { struct event_base *ev_base; - struct event *accept_event; + struct event *accept_events; GList *cur; - gint listen_socket; + struct rspamd_worker_listen_socket *ls; #ifdef WITH_PROFILER extern void _start (void), etext (void); @@ -271,17 +271,20 @@ rspamd_prepare_worker (struct rspamd_worker *worker, const char *name, /* Accept all sockets */ if (accept_handler) { cur = worker->cf->listen_socks; + while (cur) { - listen_socket = GPOINTER_TO_INT (cur->data); - if (listen_socket != -1) { - accept_event = g_slice_alloc0 (sizeof (struct event)); - event_set (accept_event, listen_socket, EV_READ | EV_PERSIST, + ls = cur->data; + + if (ls->fd != -1) { + accept_events = g_slice_alloc0 (sizeof (struct event) * 2); + event_set (&accept_events[0], ls->fd, EV_READ | EV_PERSIST, accept_handler, worker); - event_base_set (ev_base, accept_event); - event_add (accept_event, NULL); + event_base_set (ev_base, &accept_events[0]); + event_add (&accept_events[0], NULL); worker->accept_events = g_list_prepend (worker->accept_events, - accept_event); + accept_events); } + cur = g_list_next (cur); } } @@ -293,7 +296,7 @@ void rspamd_worker_stop_accept (struct rspamd_worker *worker) { GList *cur; - struct event *event; + struct event *events; GHashTableIter it; struct rspamd_worker_signal_handler *sigh; gpointer k, v; @@ -302,10 +305,18 @@ rspamd_worker_stop_accept (struct rspamd_worker *worker) /* Remove all events */ cur = worker->accept_events; while (cur) { - event = cur->data; - event_del (event); + events = cur->data; + + if (event_get_base (&events[0])) { + event_del (&events[0]); + } + + if (event_get_base (&events[1])) { + event_del (&events[1]); + } + cur = g_list_next (cur); - g_slice_free1 (sizeof (struct event), event); + g_slice_free1 (sizeof (struct event) * 2, events); } if (worker->accept_events != NULL) { diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c index 4f65a673c..a2924d054 100644 --- a/src/libstat/backends/redis_backend.c +++ b/src/libstat/backends/redis_backend.c @@ -22,6 +22,7 @@ #ifdef WITH_HIREDIS #include "hiredis.h" #include "adapters/libevent.h" +#include "ref.h" #define REDIS_CTX(p) (struct redis_stat_ctx *)(p) @@ -64,6 +65,7 @@ struct redis_stat_runtime { guint64 learned; gint id; enum rspamd_redis_connection_state conn_state; + ref_entry_t ref; }; /* Used to get statistics from redis */ @@ -678,9 +680,9 @@ rspamd_redis_fin (gpointer data) if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + event_del (&rt->timeout_event); + REF_RELEASE (rt); } - - event_del (&rt->timeout_event); } static void @@ -690,9 +692,9 @@ rspamd_redis_fin_learn (gpointer data) if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + event_del (&rt->timeout_event); + REF_RELEASE (rt); } - - event_del (&rt->timeout_event); } static void @@ -703,9 +705,14 @@ rspamd_redis_timeout (gint fd, short what, gpointer d) task = rt->task; - msg_err_task ("connection to redis server %s timed out", + msg_err_task_check ("connection to redis server %s timed out", rspamd_upstream_name (rt->selected)); rspamd_upstream_fail (rt->selected); + + if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { + rspamd_session_remove_event (task->s, rspamd_redis_fin, rt); + } + rt->conn_state = RSPAMD_REDIS_TIMEDOUT; redisAsyncFree (rt->redis); rt->redis = NULL; @@ -722,6 +729,12 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) task = rt->task; + if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { + /* Task has disappeared already */ + REF_RELEASE (rt); + return; + } + if (c->err == 0) { if (r != NULL) { if (G_LIKELY (reply->type == REDIS_REPLY_INTEGER)) { @@ -748,6 +761,7 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) rt->learned = val; rt->conn_state = RSPAMD_REDIS_CONNECTED; + REF_RETAIN (rt); msg_debug_task ("connected to redis server, tokens learned for %s: %uL", rt->redis_object_expanded, rt->learned); @@ -765,6 +779,8 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) rspamd_upstream_fail (rt->selected); rspamd_session_remove_event (task->s, rspamd_redis_fin, rt); } + + REF_RELEASE (rt); } /* Called when we have received tokens values from redis */ @@ -781,6 +797,12 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) task = rt->task; + if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { + /* Task has disappeared already */ + REF_RELEASE (rt); + return; + } + if (c->err == 0) { if (r != NULL) { if (reply->type == REDIS_REPLY_ARRAY) { @@ -848,6 +870,8 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) rspamd_upstream_fail (rt->selected); rspamd_session_remove_event (task->s, rspamd_redis_fin, rt); } + + REF_RELEASE (rt); } /* Called when we have set tokens during learning */ @@ -859,6 +883,12 @@ rspamd_redis_learned (redisAsyncContext *c, gpointer r, gpointer priv) task = rt->task; + if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { + /* Task has disappeared already */ + REF_RELEASE (rt); + return; + } + if (c->err == 0) { rspamd_upstream_ok (rt->selected); rspamd_session_remove_event (task->s, rspamd_redis_fin_learn, rt); @@ -874,6 +904,8 @@ rspamd_redis_learned (redisAsyncContext *c, gpointer r, gpointer priv) redisAsyncFree (rt->redis); rt->conn_state = RSPAMD_REDIS_DISCONNECTED; } + + REF_RELEASE (rt); } static gboolean @@ -1053,6 +1085,12 @@ rspamd_redis_init (struct rspamd_stat_ctx *ctx, return (gpointer)backend; } +static void +rspamd_redis_runtime_dtor (struct redis_stat_runtime *rt) +{ + g_slice_free1 (sizeof (*rt), rt); +} + gpointer rspamd_redis_runtime (struct rspamd_task *task, struct rspamd_statfile_config *stcf, @@ -1090,7 +1128,8 @@ rspamd_redis_runtime (struct rspamd_task *task, return NULL; } - rt = rspamd_mempool_alloc0 (task->task_pool, sizeof (*rt)); + rt = g_slice_alloc0 (sizeof (*rt)); + REF_INIT_RETAIN (rt, rspamd_redis_runtime_dtor); rspamd_redis_expand_object (ctx->redis_object, ctx, task, &rt->redis_object_expanded); rt->selected = up; @@ -1114,6 +1153,8 @@ rspamd_redis_runtime (struct rspamd_task *task, event_base_set (task->ev_base, &rt->timeout_event); double_to_tv (ctx->timeout, &tv); event_add (&rt->timeout_event, &tv); + /* Cleared by timeout */ + REF_RETAIN (rt); rspamd_redis_maybe_auth (ctx, rt->redis); redisAsyncCommand (rt->redis, rspamd_redis_connected, rt, "HGET %s %s", @@ -1192,6 +1233,8 @@ rspamd_redis_finalize_process (struct rspamd_task *task, gpointer runtime, rt->redis = NULL; rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + + REF_RELEASE (rt); } } @@ -1329,6 +1372,7 @@ rspamd_redis_finalize_learn (struct rspamd_task *task, gpointer runtime, rt->redis = NULL; rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + REF_RELEASE (rt); } } diff --git a/src/libstat/backends/sqlite3_backend.c b/src/libstat/backends/sqlite3_backend.c index f90e6b93a..30c9c74a2 100644 --- a/src/libstat/backends/sqlite3_backend.c +++ b/src/libstat/backends/sqlite3_backend.c @@ -438,7 +438,8 @@ rspamd_sqlite3_opendb (rspamd_mempool_t *pool, }; bk = g_slice_alloc0 (sizeof (*bk)); - bk->sqlite = rspamd_sqlite3_open_or_create (pool, path, create_tables_sql, err); + bk->sqlite = rspamd_sqlite3_open_or_create (pool, path, create_tables_sql, + 0, err); bk->pool = pool; if (bk->sqlite == NULL) { diff --git a/src/libstat/learn_cache/sqlite3_cache.c b/src/libstat/learn_cache/sqlite3_cache.c index 61335ab34..48cfe4af7 100644 --- a/src/libstat/learn_cache/sqlite3_cache.c +++ b/src/libstat/learn_cache/sqlite3_cache.c @@ -135,7 +135,7 @@ rspamd_stat_cache_sqlite3_init (struct rspamd_stat_ctx *ctx, rspamd_snprintf (dbpath, sizeof (dbpath), "%s", path); sqlite = rspamd_sqlite3_open_or_create (cfg->cfg_pool, - dbpath, create_tables_sql, &err); + dbpath, create_tables_sql, 0, &err); if (sqlite == NULL) { msg_err ("cannot open sqlite3 cache: %e", err); diff --git a/src/libutil/addr.c b/src/libutil/addr.c index 18414a98c..3fee0b4bc 100644 --- a/src/libutil/addr.c +++ b/src/libutil/addr.c @@ -199,8 +199,41 @@ rspamd_ip_is_valid (const rspamd_inet_addr_t *addr) return ret; } +static void +rspamd_enable_accept_event (gint fd, short what, gpointer d) +{ + struct event *events = d; + + event_del (&events[1]); + event_add (&events[0], NULL); +} + +static void +rspamd_disable_accept_events (gint sock, GList *accept_events) +{ + GList *cur; + struct event *events; + const gdouble throttling = 0.5; + struct timeval tv; + struct event_base *ev_base; + + double_to_tv (throttling, &tv); + + for (cur = accept_events; cur != NULL; cur = g_list_next (cur)) { + events = cur->data; + + ev_base = event_get_base (&events[0]); + event_del (&events[0]); + event_set (&events[1], sock, EV_TIMEOUT, rspamd_enable_accept_event, + events); + event_base_set (ev_base, &events[1]); + event_add (&events[1], &tv); + } +} + gint -rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **target) +rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **target, + GList *accept_events) { gint nfd, serrno; union sa_union su; @@ -215,6 +248,13 @@ rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **target) if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) { return 0; } + else if (errno == EMFILE || errno == ENFILE) { + /* Temporary disable accept event */ + rspamd_disable_accept_events (sock, accept_events); + + return 0; + } + return -1; } @@ -667,6 +707,36 @@ rspamd_inet_address_to_string (const rspamd_inet_addr_t *addr) return "undefined"; } +const char * +rspamd_inet_address_to_string_pretty (const rspamd_inet_addr_t *addr) +{ + static char addr_str[PATH_MAX + 5]; + + if (addr == NULL) { + return "<empty inet address>"; + } + + switch (addr->af) { + case AF_INET: + rspamd_snprintf (addr_str, sizeof (addr_str), "%s:%d", + rspamd_inet_address_to_string (addr), + rspamd_inet_address_get_port (addr)); + break; + case AF_INET6: + rspamd_snprintf (addr_str, sizeof (addr_str), "[%s]:%d", + rspamd_inet_address_to_string (addr), + rspamd_inet_address_get_port (addr)); + break; + case AF_UNIX: + rspamd_snprintf (addr_str, sizeof (addr_str), "unix:%s", + rspamd_inet_address_to_string (addr), + rspamd_inet_address_get_port (addr)); + break; + } + + return addr_str; +} + uint16_t rspamd_inet_address_get_port (const rspamd_inet_addr_t *addr) { diff --git a/src/libutil/addr.h b/src/libutil/addr.h index afed3f8fc..200543d6f 100644 --- a/src/libutil/addr.h +++ b/src/libutil/addr.h @@ -108,6 +108,13 @@ gboolean rspamd_parse_inet_address (rspamd_inet_addr_t **target, const char * rspamd_inet_address_to_string (const rspamd_inet_addr_t *addr); /** + * Returns pretty string representation of inet address + * @param addr + * @return statically allocated string pointer (not thread safe) + */ +const char * rspamd_inet_address_to_string_pretty (const rspamd_inet_addr_t *addr); + +/** * Returns port number for the specified inet address in host byte order * @param addr * @return @@ -186,10 +193,12 @@ gboolean rspamd_ip_is_valid (const rspamd_inet_addr_t *addr); /** * Accept from listening socket filling addr structure * @param sock listening socket - * @param addr allocated inet addr structur + * @param addr allocated inet addr structure + * @param accept_events events for accepting new sockets * @return */ -gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **addr); +gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **addr, + GList *accept_events); /** * Parse host[:port[:priority]] line diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index 285940f9c..a70290a0b 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -416,3 +416,19 @@ rspamd_ftokdup (const rspamd_ftok_t *src) return newstr; } + +gchar * +rspamd_fstringdup (const rspamd_fstring_t *src) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = g_malloc (src->len + 1); + memcpy (newstr, src->str, src->len); + newstr[src->len] = '\0'; + + return newstr; +} diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h index 10916d876..0bd9a271d 100644 --- a/src/libutil/fstring.h +++ b/src/libutil/fstring.h @@ -81,6 +81,8 @@ rspamd_fstring_t *rspamd_fstring_append_chars (rspamd_fstring_t *str, */ void rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len); +#define rspamd_fstring_clear(s) rspamd_fstring_erase(s, 0, s->len) + /** * Convert fixed string to a zero terminated string. This string should be * freed by a caller @@ -160,5 +162,10 @@ rspamd_fstring_t * rspamd_fstring_grow (rspamd_fstring_t *str, */ gchar *rspamd_ftokdup (const rspamd_ftok_t *src) G_GNUC_WARN_UNUSED_RESULT; - +/** + * Copies fstring to zero terminated string (must be freed using g_free) + * @param src + * @return + */ +gchar *rspamd_fstringdup (const rspamd_fstring_t *src) G_GNUC_WARN_UNUSED_RESULT; #endif diff --git a/src/libutil/http.c b/src/libutil/http.c index fef9cb73c..fc8263ddd 100644 --- a/src/libutil/http.c +++ b/src/libutil/http.c @@ -1204,8 +1204,14 @@ rspamd_http_connection_copy_msg (struct rspamd_http_connection *conn) } if (msg->url) { - new_msg->url = rspamd_fstring_new_init (msg->url->str, - msg->url->len); + if (new_msg->url) { + new_msg->url = rspamd_fstring_append (new_msg->url, msg->url->str, + msg->url->len); + } + else { + new_msg->url = rspamd_fstring_new_init (msg->url->str, + msg->url->len); + } } if (msg->host) { @@ -2391,6 +2397,21 @@ rspamd_http_connection_set_key (struct rspamd_http_connection *conn, priv->local_key = rspamd_keypair_ref (key); } +const struct rspamd_cryptobox_pubkey* +rspamd_http_connection_get_peer_key (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv = conn->priv; + + if (priv->peer_key) { + return priv->peer_key; + } + else if (priv->msg) { + return priv->msg->peer_key; + } + + return NULL; +} + gboolean rspamd_http_connection_is_encrypted (struct rspamd_http_connection *conn) { diff --git a/src/libutil/http.h b/src/libutil/http.h index 9793e577b..d9fb73b82 100644 --- a/src/libutil/http.h +++ b/src/libutil/http.h @@ -166,6 +166,14 @@ void rspamd_http_connection_set_key (struct rspamd_http_connection *conn, struct rspamd_cryptobox_keypair *key); /** + * Get peer's public key + * @param conn connection structure + * @return pubkey structure or NULL + */ +const struct rspamd_cryptobox_pubkey* rspamd_http_connection_get_peer_key ( + struct rspamd_http_connection *conn); + +/** * Returns TRUE if a connection is encrypted * @param conn * @return diff --git a/src/libutil/logger.c b/src/libutil/logger.c index f81730448..7f46c2a50 100644 --- a/src/libutil/logger.c +++ b/src/libutil/logger.c @@ -61,8 +61,6 @@ struct rspamd_logger_s { gchar *saved_module; gchar *saved_id; guint saved_loglevel; - rspamd_mempool_t *pool; - rspamd_mempool_mutex_t *mtx; guint64 log_cnt[4]; }; @@ -70,18 +68,6 @@ static const gchar lf_chr = '\n'; static rspamd_logger_t *default_logger = NULL; -#define RSPAMD_LOGGER_LOCK(l) do { \ - if ((l) != NULL && !(l)->no_lock) { \ - rspamd_mempool_lock_mutex ((l)->mtx); \ - } \ -} while (0) - -#define RSPAMD_LOGGER_UNLOCK(l) do { \ - if ((l) != NULL && !(l)->no_lock) { \ - rspamd_mempool_unlock_mutex ((l)->mtx); \ - } \ -} while (0) - static void syslog_log_function (const gchar *log_domain, const gchar *module, const gchar *id, const gchar *function, @@ -118,6 +104,10 @@ direct_write_log_line (rspamd_logger_t *rspamd_log, glong r; if (rspamd_log->enabled) { + if (!rspamd_log->no_lock) { + rspamd_file_lock (rspamd_log->fd, FALSE); + } + if (is_iov) { iov = (struct iovec *) data; r = writev (rspamd_log->fd, iov, count); @@ -126,6 +116,11 @@ direct_write_log_line (rspamd_logger_t *rspamd_log, line = (const gchar *) data; r = write (rspamd_log->fd, line, count); } + + if (!rspamd_log->no_lock) { + rspamd_file_unlock (rspamd_log->fd, FALSE); + } + if (r == -1) { /* We cannot write message to file, so we need to detect error and make decision */ if (errno == EINTR) { @@ -321,11 +316,7 @@ rspamd_set_logger (struct rspamd_config *cfg, struct rspamd_main *rspamd) { if (rspamd->logger == NULL) { - rspamd->logger = g_malloc (sizeof (rspamd_logger_t)); - memset (rspamd->logger, 0, sizeof (rspamd_logger_t)); - /* Small pool for interlocking */ - rspamd->logger->pool = rspamd_mempool_new (512, NULL); - rspamd->logger->mtx = rspamd_mempool_get_mutex (rspamd->logger->pool); + rspamd->logger = g_slice_alloc0 (sizeof (rspamd_logger_t)); } rspamd->logger->type = cfg->log_type; @@ -468,14 +459,12 @@ rspamd_common_logv (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, else { if (rspamd_logger_need_log (rspamd_log, log_level, module)) { rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); - RSPAMD_LOGGER_LOCK (rspamd_log); rspamd_log->log_func (NULL, module, id, function, log_level, logbuf, FALSE, rspamd_log); - RSPAMD_LOGGER_UNLOCK (rspamd_log); } switch (log_level) { @@ -936,7 +925,6 @@ rspamd_conditional_debug (rspamd_logger_t *rspamd_log, } } - RSPAMD_LOGGER_LOCK (rspamd_log); va_start (vp, fmt); end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, vp); *end = '\0'; @@ -947,7 +935,6 @@ rspamd_conditional_debug (rspamd_logger_t *rspamd_log, logbuf, TRUE, rspamd_log); - RSPAMD_LOGGER_UNLOCK (rspamd_log); } } @@ -964,14 +951,12 @@ rspamd_glib_log_function (const gchar *log_domain, if (rspamd_log->enabled && rspamd_logger_need_log (rspamd_log, log_level, NULL)) { - RSPAMD_LOGGER_LOCK (rspamd_log); rspamd_log->log_func (log_domain, "glib", NULL, NULL, log_level, message, FALSE, rspamd_log); - RSPAMD_LOGGER_UNLOCK (rspamd_log); } } diff --git a/src/libutil/map.c b/src/libutil/map.c index f99c35784..86cd3e5ee 100644 --- a/src/libutil/map.c +++ b/src/libutil/map.c @@ -784,9 +784,12 @@ rspamd_map_file_check_callback (gint fd, short what, void *ud) if (stat (data->filename, &st) != -1 && (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { /* File was modified since last check */ + msg_info_map ("old mtime is %t, new mtime is %t for map file %s", + data->st.st_mtime, st.st_mtime, data->filename); memcpy (&data->st, &st, sizeof (struct stat)); periodic->need_modify = TRUE; periodic->cur_backend = 0; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, periodic); return; @@ -1062,6 +1065,7 @@ rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) /* Now check for each proto separately */ if (bk->protocol == MAP_PROTO_FILE) { fdata = g_slice_alloc0 (sizeof (struct file_map_data)); + fdata->st.st_mtime = -1; if (access (bk->uri, R_OK) == -1) { if (errno != ENOENT) { @@ -1072,11 +1076,6 @@ rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) msg_info_config ( "map '%s' is not found, but it can be loaded automatically later", bk->uri); - /* We still can add this file */ - fdata->st.st_mtime = -1; - } - else { - stat (bk->uri, &fdata->st); } fdata->filename = g_strdup (bk->uri); @@ -1329,6 +1328,11 @@ rspamd_map_add_from_ucl (struct rspamd_config *cfg, } } + rspamd_map_calculate_hash (map); + msg_info_map ("added map from ucl"); + + cfg->maps = g_list_prepend (cfg->maps, map); + return map; err: @@ -1956,27 +1960,34 @@ rspamd_re_map_finalize (struct rspamd_regexp_map *re_map) re_map->ids[i] = i; } - if (hs_compile_multi (re_map->patterns, - re_map->flags, - re_map->ids, - re_map->regexps->len, - HS_MODE_BLOCK, - &plt, - &re_map->hs_db, - &err) != HS_SUCCESS) { - - msg_err_map ("cannot create tree of regexp when processing '%s': %s", - re_map->patterns[err->expression], err->message); - re_map->hs_db = NULL; - hs_free_compile_error (err); + if (re_map->regexps->len > 0 && re_map->patterns) { + if (hs_compile_multi (re_map->patterns, + re_map->flags, + re_map->ids, + re_map->regexps->len, + HS_MODE_BLOCK, + &plt, + &re_map->hs_db, + &err) != HS_SUCCESS) { + + msg_err_map ("cannot create tree of regexp when processing '%s': %s", + err->expression >= 0 ? + re_map->patterns[err->expression] : + "unknown regexp", err->message); + re_map->hs_db = NULL; + hs_free_compile_error (err); + + return; + } - return; + if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { + msg_err_map ("cannot allocate scratch space for hyperscan"); + hs_free_database (re_map->hs_db); + re_map->hs_db = NULL; + } } - - if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) { - msg_err_map ("cannot allocate scratch space for hyperscan"); - hs_free_database (re_map->hs_db); - re_map->hs_db = NULL; + else { + msg_err_map ("regexp map is empty"); } #endif } @@ -2058,6 +2069,8 @@ rspamd_match_regexp_map (struct rspamd_regexp_map *map, res = 1; ret = g_ptr_array_index (map->values, i); } + + return ret; } #endif diff --git a/src/libutil/sqlite_utils.c b/src/libutil/sqlite_utils.c index 452559c76..e7768142c 100644 --- a/src/libutil/sqlite_utils.c +++ b/src/libutil/sqlite_utils.c @@ -248,7 +248,7 @@ rspamd_sqlite3_wait (rspamd_mempool_t *pool, const gchar *lock) sqlite3 * rspamd_sqlite3_open_or_create (rspamd_mempool_t *pool, const gchar *path, const - gchar *create_sql, GError **err) + gchar *create_sql, guint version, GError **err) { sqlite3 *sqlite; gint rc, flags, lock_fd; @@ -268,7 +268,8 @@ rspamd_sqlite3_open_or_create (rspamd_mempool_t *pool, const gchar *path, const other_pragmas[] = "PRAGMA read_uncommitted=\"ON\";" "PRAGMA cache_size=" - G_STRINGIFY(RSPAMD_SQLITE_CACHE_SIZE) ";"; + G_STRINGIFY(RSPAMD_SQLITE_CACHE_SIZE) ";", + db_version[] = "PRAGMA user_version;"; gboolean create = FALSE, has_lock = FALSE; flags = SQLITE_OPEN_READWRITE; @@ -401,6 +402,79 @@ rspamd_sqlite3_open_or_create (rspamd_mempool_t *pool, const gchar *path, const return NULL; } } + else if (has_lock && version > 0) { + /* Check user version */ + sqlite3_stmt *stmt = NULL; + guint32 db_ver; + GString *new_ver_sql; + + if (sqlite3_prepare (sqlite, db_version, -1, &stmt, NULL) != SQLITE_OK) { + msg_warn_pool_check ("Cannot get user version pragma", + sqlite3_errmsg (sqlite)); + } + else { + if (sqlite3_step (stmt) != SQLITE_ROW) { + msg_warn_pool_check ("Cannot get user version pragma, step failed", + sqlite3_errmsg (sqlite)); + sqlite3_finalize (stmt); + } + else { + db_ver = sqlite3_column_int (stmt, 0); + sqlite3_reset (stmt); + sqlite3_finalize (stmt); + + if (version > db_ver) { + msg_warn_pool_check ("Database version %ud is less than " + "desired version %ud, run create script", db_ver, + version); + + if (create_sql) { + if (sqlite3_exec (sqlite, create_sql, NULL, NULL, NULL) != SQLITE_OK) { + g_set_error (err, rspamd_sqlite3_quark (), + -1, "cannot execute create sql `%s`: %s", + create_sql, sqlite3_errmsg (sqlite)); + sqlite3_close (sqlite); + rspamd_file_unlock (lock_fd, FALSE); + unlink (lock_path); + if (lock_fd != -1) { + close (lock_fd); + } + + return NULL; + } + } + + new_ver_sql = g_string_new ("PRAGMA user_version="); + rspamd_printf_gstring (new_ver_sql, "%ud", version); + + if (sqlite3_exec (sqlite, new_ver_sql->str, NULL, NULL, NULL) + != SQLITE_OK) { + g_set_error (err, rspamd_sqlite3_quark (), + -1, "cannot execute update version sql `%s`: %s", + new_ver_sql->str, sqlite3_errmsg (sqlite)); + sqlite3_close (sqlite); + rspamd_file_unlock (lock_fd, FALSE); + unlink (lock_path); + if (lock_fd != -1) { + close (lock_fd); + } + + g_string_free (new_ver_sql, TRUE); + + return NULL; + } + + g_string_free (new_ver_sql, TRUE); + } + else if (db_ver > version) { + msg_warn_pool_check ("Database version %ud is more than " + "desired version %ud, this could cause" + " unexpected behaviour", db_ver, + version); + } + } + } + } if (sqlite3_exec (sqlite, sqlite_wal, NULL, NULL, NULL) != SQLITE_OK) { msg_warn_pool_check ("WAL mode is not supported (%s), locking issues might occur", diff --git a/src/libutil/sqlite_utils.h b/src/libutil/sqlite_utils.h index f25be655e..78ee26fc6 100644 --- a/src/libutil/sqlite_utils.h +++ b/src/libutil/sqlite_utils.h @@ -69,7 +69,7 @@ void rspamd_sqlite3_close_prstmt (sqlite3 *db, GArray *stmts); * @return */ sqlite3 * rspamd_sqlite3_open_or_create (rspamd_mempool_t *pool, const gchar *path, const - gchar *create_sql, GError **err); + gchar *create_sql, guint32 version, GError **err); /** diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index a25dc32d5..67aa63aa8 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -1237,7 +1237,7 @@ rspamd_substring_search_caseless (const gchar *in, gsize inlen, j = 0; while (j <= inlen - srchlen) { - if (hash_srch == hash_in && g_ascii_strncasecmp (srch, in + j, srchlen) == 0) { + if (hash_srch == hash_in && rspamd_lc_cmp (srch, in + j, srchlen) == 0) { return (goffset) j; } diff --git a/src/log_helper.c b/src/log_helper.c index d01e102c6..a118f1181 100644 --- a/src/log_helper.c +++ b/src/log_helper.c @@ -37,8 +37,8 @@ worker_t log_helper_worker = { init_log_helper, /* Init function */ start_log_helper, /* Start function */ RSPAMD_WORKER_UNIQUE | RSPAMD_WORKER_KILLABLE, - SOCK_STREAM, /* TCP socket */ - RSPAMD_WORKER_VER /* Version info */ + RSPAMD_WORKER_SOCKET_NONE, /* No socket */ + RSPAMD_WORKER_VER /* Version info */ }; static const guint64 rspamd_log_helper_magic = 0x1090bb46aaa74c9aULL; @@ -129,7 +129,14 @@ rspamd_log_helper_read (gint fd, short what, gpointer ud) } } else if (r == -1) { - msg_warn ("cannot read data from log pipe: %s", strerror (errno)); + if (errno != EAGAIN || errno != EINTR) { + msg_warn ("cannot read data from log pipe: %s", strerror (errno)); + event_del (&ctx->log_ev); + } + } + else if (r == 0) { + msg_warn ("cannot read data from log pipe: EOF"); + event_del (&ctx->log_ev); } } diff --git a/src/lua/CMakeLists.txt b/src/lua/CMakeLists.txt index cb97ca3ed..fefd074d5 100644 --- a/src/lua/CMakeLists.txt +++ b/src/lua/CMakeLists.txt @@ -30,3 +30,10 @@ SET(LUASRC ${CMAKE_CURRENT_SOURCE_DIR}/lua_common.c ${CMAKE_CURRENT_SOURCE_DIR}/lua_map.c) SET(RSPAMD_LUA ${LUASRC} PARENT_SCOPE) +SET(RSPAMDMLUASRC "${CMAKE_CURRENT_SOURCE_DIR}/global_functions.lua") +ADD_CUSTOM_TARGET(rspamd_lua_preprocess + ${PERL_EXECUTABLE} + "${CMAKE_SOURCE_DIR}/lua_preprocess.pl" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_CURRENT_BINARY_DIR}" + SOURCES ${RSPAMDMLUASRC} ${CMAKE_SOURCE_DIR}/lua_preprocess.pl)
\ No newline at end of file diff --git a/src/lua/global_functions.lua b/src/lua/global_functions.lua new file mode 100644 index 000000000..9cf5f9a6f --- /dev/null +++ b/src/lua/global_functions.lua @@ -0,0 +1,65 @@ + +-- This function parses redis server definition using either +-- specific server string for this module or global +-- redis section +function rspamd_parse_redis_server(module_name) + + local default_port = 6379 + local logger = require "rspamd_logger" + local upstream_list = require "rspamd_upstream_list" + + local function try_load_redis_servers(options) + local key = options['servers'] + + if not key then key = options['server'] end + + if key then + local upstreams = upstream_list.create(rspamd_config, key, default_port) + + if upstreams then + return upstreams + end + end + + return nil + end + + local opts = rspamd_config:get_all_opt(module_name) + local ret + + if opts then + ret = try_load_redis_servers(opts) + end + + if ret then + return ret + end + + opts = rspamd_config:get_all_opt('redis') + + if opts then + if opts[module_name] then + ret = try_load_redis_servers(opts[module_name]) + if ret then + return ret + end + else + ret = try_load_redis_servers(opts) + + if ret then + logger.infox(rspamd_config, "using default redis server for module %s", + module_name) + end + end + end + + return ret +end + +function rspamd_str_split(s, sep) + local lpeg = require "lpeg" + sep = lpeg.P(sep) + local elem = lpeg.C((1 - sep)^0) + local p = lpeg.Ct(elem * (sep * elem)^0) -- make a table capture + return lpeg.match(p, s) +end
\ No newline at end of file diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 4563d9f84..85dbfa372 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -14,6 +14,8 @@ * limitations under the License. */ #include "lua_common.h" +#include "lua/global_functions.lua.h" +#include "lptree.h" /* Lua module init function */ #define MODULE_INIT_FUNC "module_init" @@ -185,13 +187,13 @@ rspamd_lua_set_path (lua_State *L, struct rspamd_config *cfg) if (additional_path) { rspamd_snprintf (path_buf, sizeof (path_buf), - "%s/lua/?.lua;%s/lua/?.lua;%s;%s;%s", + "%s/lua/?.lua;%s/lua/?.lua;%s/?.lua;%s;%s", RSPAMD_PLUGINSDIR, RSPAMD_CONFDIR, RSPAMD_RULESDIR, additional_path, old_path); } else { rspamd_snprintf (path_buf, sizeof (path_buf), - "%s/lua/?.lua;%s/lua/?.lua;%s;%s", + "%s/lua/?.lua;%s/lua/?.lua;%s/?.lua;%s", RSPAMD_PLUGINSDIR, RSPAMD_CONFDIR, RSPAMD_RULESDIR, old_path); } @@ -241,9 +243,15 @@ rspamd_lua_init () luaopen_fann (L); luaopen_sqlite3 (L); luaopen_cryptobox (L); + luaopen_lpeg (L); rspamd_lua_add_preload (L, "ucl", luaopen_ucl); + if (luaL_dostring (L, rspamadm_script_global_functions) != 0) { + msg_err ("cannot execute lua global script: %s", + lua_tostring (L, -1)); + } + return L; } diff --git a/src/lua/lua_sqlite3.c b/src/lua/lua_sqlite3.c index ca8481117..e15673702 100644 --- a/src/lua/lua_sqlite3.c +++ b/src/lua/lua_sqlite3.c @@ -98,7 +98,7 @@ lua_sqlite3_open (lua_State *L) return 1; } - db = rspamd_sqlite3_open_or_create (NULL, path, NULL, &err); + db = rspamd_sqlite3_open_or_create (NULL, path, NULL, 0, &err); if (db == NULL) { if (err) { diff --git a/src/lua/lua_tcp.c b/src/lua/lua_tcp.c index e6ccd85ee..094ebf12b 100644 --- a/src/lua/lua_tcp.c +++ b/src/lua/lua_tcp.c @@ -348,10 +348,12 @@ static void lua_tcp_dns_handler (struct rdns_reply *reply, gpointer ud) { struct lua_tcp_cbdata *cbd = (struct lua_tcp_cbdata *)ud; + const struct rdns_request_name *rn; if (reply->code != RDNS_RC_NOERROR) { + rn = rdns_request_get_name (reply->request, NULL); lua_tcp_push_error (cbd, "unable to resolve host: %s", - reply->requested_name); + rn->name); lua_tcp_maybe_free (cbd); } else { @@ -368,7 +370,7 @@ lua_tcp_dns_handler (struct rdns_reply *reply, gpointer ud) if (!lua_tcp_make_connection (cbd)) { lua_tcp_push_error (cbd, "unable to make connection to the host %s", - reply->requested_name); + rspamd_inet_address_to_string (cbd->addr)); lua_tcp_maybe_free (cbd); } } diff --git a/src/lua/lua_trie.c b/src/lua/lua_trie.c index ac6ebb559..a0bf3afd4 100644 --- a/src/lua/lua_trie.c +++ b/src/lua/lua_trie.c @@ -42,12 +42,14 @@ LUA_FUNCTION_DEF (trie, create); LUA_FUNCTION_DEF (trie, match); LUA_FUNCTION_DEF (trie, search_mime); LUA_FUNCTION_DEF (trie, search_rawmsg); +LUA_FUNCTION_DEF (trie, search_rawbody); LUA_FUNCTION_DEF (trie, destroy); static const struct luaL_reg trielib_m[] = { LUA_INTERFACE_DEF (trie, match), LUA_INTERFACE_DEF (trie, search_mime), LUA_INTERFACE_DEF (trie, search_rawmsg), + LUA_INTERFACE_DEF (trie, search_rawbody), {"__tostring", rspamd_lua_class_tostring}, {"__gc", lua_trie_destroy}, {NULL, NULL} @@ -303,6 +305,43 @@ lua_trie_search_rawmsg (lua_State *L) return 1; } +/*** + * @method trie:search_rawbody(task, cb[, caseless]) + * This is a helper mehthod to search pattern within the whole undecoded content of task's body (not including headers) + * @param {task} task object + * @param {function} cb callback called on each pattern match @see trie:match + * @param {boolean} caseless if `true` then match ignores symbols case (ASCII only) + * @return {boolean} `true` if any pattern has been found (`cb` might be called multiple times however) + */ +static gint +lua_trie_search_rawbody (lua_State *L) +{ + struct rspamd_multipattern *trie = lua_check_trie (L, 1); + struct rspamd_task *task = lua_check_task (L, 2); + const gchar *text; + gsize len; + gboolean found = FALSE; + + if (trie) { + if (task->raw_headers_content.len > 0) { + text = task->msg.begin + task->raw_headers_content.len; + len = task->msg.len - task->raw_headers_content.len; + } + else { + /* Treat as raw message */ + text = task->msg.begin; + len = task->msg.len; + } + + if (lua_trie_search_str (L, trie, text, len) != 0) { + found = TRUE; + } + } + + lua_pushboolean (L, found); + return 1; +} + static gint lua_load_trie (lua_State *L) { diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index 05a9a4452..a5b5eac84 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -80,6 +80,14 @@ LUA_FUNCTION_DEF (util, encode_base32); LUA_FUNCTION_DEF (util, decode_base32); /*** + * @function util.decode_url(input) + * Decodes data from url encoding + * @param {text or string} input data to decode + * @return {rspamd_text} decoded data chunk + */ +LUA_FUNCTION_DEF (util, decode_url); + +/*** * @function util.tokenize_text(input[, exceptions]) * Create tokens from a text using optional exceptions list * @param {text/string} input input data @@ -325,6 +333,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, decode_base64), LUA_INTERFACE_DEF (util, encode_base32), LUA_INTERFACE_DEF (util, decode_base32), + LUA_INTERFACE_DEF (util, decode_url), LUA_INTERFACE_DEF (util, tokenize_text), LUA_INTERFACE_DEF (util, tanh), LUA_INTERFACE_DEF (util, parse_html), @@ -659,11 +668,45 @@ lua_util_decode_base32 (lua_State *L) } if (s != NULL) { - t = lua_newuserdata (L, sizeof (*t)); - rspamd_lua_setclass (L, "rspamd{text}", -1); - t->start = rspamd_decode_base32 (s, inlen, &outlen); - t->len = outlen; - t->own = TRUE; + t = lua_newuserdata (L, sizeof (*t)); + rspamd_lua_setclass (L, "rspamd{text}", -1); + t->start = rspamd_decode_base32 (s, inlen, &outlen); + t->len = outlen; + t->own = TRUE; + } + else { + lua_pushnil (L); + } + + return 1; +} + +static gint +lua_util_decode_url (lua_State *L) +{ + struct rspamd_lua_text *t; + const gchar *s = NULL; + gsize inlen; + + if (lua_type (L, 1) == LUA_TSTRING) { + s = luaL_checklstring (L, 1, &inlen); + } + else if (lua_type (L, 1) == LUA_TUSERDATA) { + t = lua_check_text (L, 1); + + if (t != NULL) { + s = t->start; + inlen = t->len; + } + } + + if (s != NULL) { + t = lua_newuserdata (L, sizeof (*t)); + rspamd_lua_setclass (L, "rspamd{text}", -1); + t->start = g_malloc (inlen); + memcpy ((char *)t->start, s, inlen); + t->len = rspamd_decode_url ((char *)t->start, s, inlen); + t->own = TRUE; } else { lua_pushnil (L); @@ -672,6 +715,7 @@ lua_util_decode_base32 (lua_State *L) return 1; } + static gint lua_util_tokenize_text (lua_State *L) { diff --git a/src/lua_worker.c b/src/lua_worker.c index 87f597e28..b74b8d422 100644 --- a/src/lua_worker.c +++ b/src/lua_worker.c @@ -39,12 +39,12 @@ gpointer init_lua_worker (struct rspamd_config *cfg); void start_lua_worker (struct rspamd_worker *worker); worker_t lua_worker = { - "lua", /* Name */ - init_lua_worker, /* Init function */ - start_lua_worker, /* Start function */ + "lua", /* Name */ + init_lua_worker, /* Init function */ + start_lua_worker, /* Start function */ RSPAMD_WORKER_HAS_SOCKET | RSPAMD_WORKER_KILLABLE, - SOCK_STREAM, /* TCP socket */ - RSPAMD_WORKER_VER /* Version info */ + RSPAMD_WORKER_SOCKET_TCP, /* TCP socket */ + RSPAMD_WORKER_VER /* Version info */ }; static const guint64 rspamd_lua_ctx_magic = 0x8055e2652aacf96eULL; @@ -261,7 +261,7 @@ lua_accept_socket (gint fd, short what, void *arg) L = ctx->L; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index bf4cd3a0a..30eccf0c3 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -53,6 +53,13 @@ #define DEFAULT_RETRANSMITS 3 #define DEFAULT_PORT 11335 +/* + * WARNING: + * As 1.3 is not yet stable, we want to keep compatibility here as 1.2 won't + * recognize version 4 unless 1.2.7 + */ +#define RSPAMD_FUZZY_PLUGIN_VERSION 3 + static const gint rspamd_fuzzy_hash_len = 5; struct fuzzy_mapping { @@ -188,6 +195,13 @@ parse_flags (struct fuzzy_rule *rule, if (elt != NULL) { map->fuzzy_flag = ucl_obj_toint (elt); + + if (map->fuzzy_flag > 31) { + msg_err_config ("flags more than 31 are no longer " + "supported by rspamd"); + return; + } + elt = ucl_object_lookup (val, "max_score"); if (elt != NULL) { @@ -1007,7 +1021,7 @@ fuzzy_cmd_from_task_meta (struct fuzzy_rule *rule, } cmd->cmd = c; - cmd->version = RSPAMD_FUZZY_VERSION; + cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION; if (c != FUZZY_CHECK) { cmd->flag = flag; cmd->value = weight; @@ -1165,7 +1179,7 @@ fuzzy_cmd_from_text_part (struct fuzzy_rule *rule, shcmd->basic.tag = ottery_rand_uint32 (); shcmd->basic.cmd = c; - shcmd->basic.version = RSPAMD_FUZZY_VERSION; + shcmd->basic.version = RSPAMD_FUZZY_PLUGIN_VERSION; if (c != FUZZY_CHECK) { shcmd->basic.flag = flag; @@ -1214,7 +1228,7 @@ fuzzy_cmd_from_data_part (struct fuzzy_rule *rule, } cmd->cmd = c; - cmd->version = RSPAMD_FUZZY_VERSION; + cmd->version = RSPAMD_FUZZY_PLUGIN_VERSION; if (c != FUZZY_CHECK) { cmd->flag = flag; cmd->value = weight; @@ -1390,6 +1404,66 @@ fuzzy_process_reply (guchar **pos, gint *r, GPtrArray *req, return NULL; } +static void +fuzzy_insert_result (struct fuzzy_client_session *session, + const struct rspamd_fuzzy_reply *rep, + struct rspamd_fuzzy_cmd *cmd, guint flag) +{ + const gchar *symbol; + struct fuzzy_mapping *map; + struct rspamd_task *task = session->task; + double nval; + guchar buf[2048]; + + /* Get mapping by flag */ + if ((map = + g_hash_table_lookup (session->rule->mappings, + GINT_TO_POINTER (rep->flag))) == NULL) { + /* Default symbol and default weight */ + symbol = session->rule->symbol; + + } + else { + /* Get symbol and weight from map */ + symbol = map->symbol; + } + + + /* + * Hash is assumed to be found if probability is more than 0.5 + * In that case `value` means number of matches + * Otherwise `value` means error code + */ + + nval = fuzzy_normalize (rep->value, + session->rule->max_score); + nval *= rep->prob; + msg_info_task ( + "found fuzzy hash %*xs with weight: " + "%.2f, in list: %s:%d%s", + rspamd_fuzzy_hash_len, cmd->digest, + nval, + symbol, + rep->flag, + map == NULL ? "(unknown)" : ""); + if (map != NULL || !session->rule->skip_unknown) { + rspamd_snprintf (buf, + sizeof (buf), + "%d:%*xs:%.2f", + rep->flag, + rspamd_fuzzy_hash_len, cmd->digest, + rep->prob, + nval); + rspamd_task_insert_result_single (session->task, + symbol, + nval, + g_list_prepend (NULL, + rspamd_mempool_strdup ( + session->task->task_pool, + buf))); + } +} + /* Fuzzy check callback */ static void fuzzy_check_io_callback (gint fd, short what, void *arg) @@ -1397,14 +1471,12 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) struct fuzzy_client_session *session = arg; const struct rspamd_fuzzy_reply *rep; struct rspamd_task *task; - struct fuzzy_mapping *map; guchar buf[2048], *p; - const gchar *symbol; struct fuzzy_cmd_io *io; struct rspamd_fuzzy_cmd *cmd = NULL; guint i; gint r; - double nval; + enum { return_error = 0, return_want_more, @@ -1427,68 +1499,30 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) while ((rep = fuzzy_process_reply (&p, &r, session->commands, session->rule, &cmd)) != NULL) { - /* Get mapping by flag */ - if ((map = - g_hash_table_lookup (session->rule->mappings, - GINT_TO_POINTER (rep->flag))) == NULL) { - /* Default symbol and default weight */ - symbol = session->rule->symbol; - - } - else { - /* Get symbol and weight from map */ - symbol = map->symbol; - } - - - /* - * Hash is assumed to be found if probability is more than 0.5 - * In that case `value` means number of matches - * Otherwise `value` means error code - */ if (rep->prob > 0.5) { - nval = fuzzy_normalize (rep->value, - session->rule->max_score); - nval *= rep->prob; - msg_info_task ( - "found fuzzy hash %*xs with weight: " - "%.2f, in list: %s:%d%s", - rspamd_fuzzy_hash_len, cmd->digest, - nval, - symbol, - rep->flag, - map == NULL ? "(unknown)" : ""); - if (map != NULL || !session->rule->skip_unknown) { - rspamd_snprintf (buf, - sizeof (buf), - "%d:%*xs:%.2f", - rep->flag, - rspamd_fuzzy_hash_len, cmd->digest, - rep->prob, - nval); - rspamd_task_insert_result_single (session->task, - symbol, - nval, - g_list_prepend (NULL, - rspamd_mempool_strdup ( - session->task->task_pool, - buf))); + if (rep->flag & (1U << 31)) { + /* Multi-flag */ + for (i = 0; i < 31; i ++) { + if ((1U << i) & rep->flag) { + fuzzy_insert_result (session, rep, cmd, i + 1); + } + } + } + else { + fuzzy_insert_result (session, rep, cmd, rep->flag); } } else if (rep->value == 403) { msg_info_task ( - "fuzzy check error for %s(%d): forbidden", - symbol, + "fuzzy check error for %d: forbidden", rep->flag); } else if (rep->value != 0) { msg_info_task ( - "fuzzy check error for %s(%d): unknown error (%d)", - symbol, + "fuzzy check error for %d: unknown error (%d)", rep->flag, rep->value); } - /* Not found */ ret = return_finished; } diff --git a/src/plugins/lua/dmarc.lua b/src/plugins/lua/dmarc.lua index 924b02017..063142f24 100644 --- a/src/plugins/lua/dmarc.lua +++ b/src/plugins/lua/dmarc.lua @@ -35,7 +35,6 @@ local symbols = { dkim_deny_symbol = 'R_DKIM_REJECT', } -- Default port for redis upstreams -local default_port = 6379 local upstreams = nil local dmarc_redis_key_prefix = "dmarc_" local dmarc_domain = nil @@ -272,13 +271,9 @@ if not opts or type(opts) ~= 'table' then return end -if not opts['servers'] then - rspamd_logger.infox(rspamd_config, 'no servers are specified for dmarc stats') -else - upstreams = upstream_list.create(rspamd_config, opts['servers'], default_port) - if not upstreams then - rspamd_logger.errx(rspamd_config, 'cannot parse servers parameter') - end +upstreams = rspamd_parse_redis_server('dmarc') +if not upstreams then + rspamd_logger.errx(rspamd_config, 'cannot parse servers parameter') end if opts['key_prefix'] then diff --git a/src/plugins/lua/greylist.lua b/src/plugins/lua/greylist.lua index 81d4604a0..ebb9a4586 100644 --- a/src/plugins/lua/greylist.lua +++ b/src/plugins/lua/greylist.lua @@ -16,8 +16,6 @@ limitations under the License. -- A plugin that implements greylisting using redis --- Default port for redis upstreams -local default_port = 6379 local upstreams local whitelisted_ip local settings = { @@ -295,16 +293,13 @@ if opts then whitelisted_ip = rspamd_config:add_radix_map(opts['whitelisted_ip'], 'Greylist whitelist ip map') end - if not opts['servers'] then + + upstreams = rspamd_parse_redis_server('greylist') + if not upstreams then rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') else - upstreams = upstream_list.create(rspamd_config, opts['servers'], default_port) - if not upstreams then - rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') - else - rspamd_config:register_pre_filter(greylist_check) - rspamd_config:register_post_filter(greylist_set, 10) - end + rspamd_config:register_pre_filter(greylist_check) + rspamd_config:register_post_filter(greylist_set, 10) end for k,v in pairs(opts) do diff --git a/src/plugins/lua/hfilter.lua b/src/plugins/lua/hfilter.lua index 1785e8a7b..3e39638a6 100644 --- a/src/plugins/lua/hfilter.lua +++ b/src/plugins/lua/hfilter.lua @@ -125,31 +125,6 @@ local function check_regexp(str, regexp_text) return false end -local function split(str, delim, maxNb) - -- Eliminate bad cases... - if string.find(str, delim) == nil then - return { str } - end - if maxNb == nil or maxNb < 1 then - maxNb = 0 -- No limit - end - local result = {} - local pat = "(.-)" .. delim .. "()" - local nb = 0 - local lastPos - for part, pos in string.gmatch(str, pat) do - nb = nb + 1 - result[nb] = part - lastPos = pos - if nb == maxNb then break end - end - -- Handle the last field - if nb ~= maxNb then - result[nb + 1] = string.sub(str, lastPos) - end - return result -end - local function check_fqdn(domain) if check_regexp(domain, '(?=^.{4,253}$)(^((?!-)[a-zA-Z0-9-]{1,63}(?<!-)\\.)+[a-zA-Z0-9-]{2,63}\\.?$)') then return true @@ -407,7 +382,7 @@ local function hfilter(task) if from then --FROM host check for _,fr in ipairs(from) do - local fr_split = split(fr['addr'], '@', 0) + local fr_split = rspamd_str_split(fr['addr'], '@') if table.maxn(fr_split) == 2 then check_host(task, fr_split[2], 'FROMHOST', '', '') if fr_split[1] == 'postmaster' then @@ -440,7 +415,7 @@ local function hfilter(task) if config['mid_enabled'] then local message_id = task:get_message_id() if message_id then - local mid_split = split(message_id, '@', 0) + local mid_split = rspamd_str_split(message_id, '@') if table.maxn(mid_split) == 2 and not string.find(mid_split[2], 'local') then check_host(task, mid_split[2], 'MID') end diff --git a/src/plugins/lua/ip_score.lua b/src/plugins/lua/ip_score.lua index 7d12f1a5e..bb34003cb 100644 --- a/src/plugins/lua/ip_score.lua +++ b/src/plugins/lua/ip_score.lua @@ -23,7 +23,6 @@ local rspamd_util = require "rspamd_util" local _ = require "fun" -- Default settings -local default_port = 6379 local upstreams = nil local whitelist = nil local asn_cc_whitelist = nil @@ -333,19 +332,17 @@ local configure_ip_score_module = function() for k,v in pairs(opts) do options[k] = v end - if options['servers'] and options['servers'] ~= '' then - upstreams = upstream_list.create(rspamd_config, options['servers'], default_port) - if not upstreams then - rspamd_logger.infox(rspamd_config, 'no servers are specified') - end - end - if options['whitelist'] then - whitelist = rspamd_config:add_radix_map(opts['whitelist']) - end - if options['asn_cc_whitelist'] then - asn_cc_whitelist = rspamd_config:add_hash_map(opts['asn_cc_whitelist']) + upstreams = rspamd_parse_redis_server('ip_score') + if not upstreams then + rspamd_logger.infox(rspamd_config, 'no servers are specified') end end + if options['whitelist'] then + whitelist = rspamd_config:add_radix_map(opts['whitelist']) + end + if options['asn_cc_whitelist'] then + asn_cc_whitelist = rspamd_config:add_hash_map(opts['asn_cc_whitelist']) + end end diff --git a/src/plugins/lua/ratelimit.lua b/src/plugins/lua/ratelimit.lua index d8bbf0dfa..1032a35b9 100644 --- a/src/plugins/lua/ratelimit.lua +++ b/src/plugins/lua/ratelimit.lua @@ -16,8 +16,6 @@ limitations under the License. -- A plugin that implements ratelimits using redis or kvstorage server --- Default port for redis upstreams -local default_port = 6379 -- Default settings for limits, 1-st member is burst, second is rate and the third is numeric type local settings = { -- Limit for all mail per recipient (burst 100, rate 2 per minute) @@ -53,36 +51,10 @@ local rspamd_util = require "rspamd_util" local _ = require "fun" --local dumper = require 'pl.pretty'.dump ---- Utility function for split string to table -local function split(str, delim, maxNb) - -- Eliminate bad cases... - if string.find(str, delim) == nil then - return { str } - end - if maxNb == nil or maxNb < 1 then - maxNb = 0 -- No limit - end - local result = {} - local pat = "(.-)" .. delim .. "()" - local nb = 0 - local lastPos - for part, pos in string.gmatch(str, pat) do - nb = nb + 1 - result[nb] = part - lastPos = pos - if nb == maxNb then break end - end - -- Handle the last field - if nb ~= maxNb then - result[nb + 1] = string.sub(str, lastPos) - end - return result -end - --- Parse atime and bucket of limit local function parse_limits(data) local function parse_limit_elt(str) - local elts = split(str, ':', 3) + local elts = rspamd_str_split(str, ':') if not elts or #elts < 2 then return {0, 0, 0} else @@ -343,7 +315,7 @@ end --- Parse a single limit description local function parse_limit(str) - local params = split(str, ':', 0) + local params = rspamd_str_split(str, ':') local function set_limit(limit, burst, rate) limit[1] = tonumber(burst) @@ -398,7 +370,7 @@ if opts then rspamd_logger.infox(rspamd_config, 'enabled rate buckets: %s', enabled_limits) if opts['whitelisted_rcpts'] and type(opts['whitelisted_rcpts']) == 'string' then - whitelisted_rcpts = split(opts['whitelisted_rcpts'], ',') + whitelisted_rcpts = rspamd_str_split(opts['whitelisted_rcpts'], ',') elseif type(opts['whitelisted_rcpts']) == 'table' then whitelisted_rcpts = opts['whitelisted_rcpts'] end @@ -420,25 +392,21 @@ if opts then max_rcpt = tonumber(opts['max_delay']) end - if not opts['servers'] then + upstreams = rspamd_parse_redis_server('ratelimit') + if not upstreams then rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') else - upstreams = upstream_list.create(rspamd_config, opts['servers'], default_port) - if not upstreams then - rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') + if not ratelimit_symbol then + rspamd_config:register_pre_filter(rate_test) else - if not ratelimit_symbol then - rspamd_config:register_pre_filter(rate_test) - else - rspamd_config:register_symbol({ - name = ratelimit_symbol, - callback = rate_test, - flags = 'empty' - }) - end - - rspamd_config:register_post_filter(rate_set) + rspamd_config:register_symbol({ + name = ratelimit_symbol, + callback = rate_test, + flags = 'empty' + }) end + + rspamd_config:register_post_filter(rate_set) end end diff --git a/src/plugins/lua/replies.lua b/src/plugins/lua/replies.lua index 70228f0dc..98a48bb1f 100644 --- a/src/plugins/lua/replies.lua +++ b/src/plugins/lua/replies.lua @@ -18,7 +18,6 @@ limitations under the License. -- A plugin that implements replies check using redis -- Default port for redis upstreams -local default_port = 6379 local upstreams local whitelisted_ip local settings = { @@ -102,17 +101,13 @@ end local opts = rspamd_config:get_all_opt('replies') if opts then - if not opts['servers'] then - rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') - else - upstreams = upstream_list.create(rspamd_config, opts['servers'], default_port) + upstreams = rspamd_parse_redis_server('replies') if not upstreams then rspamd_logger.infox(rspamd_config, 'no servers are specified, disabling module') else rspamd_config:register_pre_filter(replies_check) rspamd_config:register_post_filter(replies_set, 10) end - end for k,v in pairs(opts) do settings[k] = v diff --git a/src/plugins/lua/trie.lua b/src/plugins/lua/trie.lua index 4a1fdfd4e..da8bba560 100644 --- a/src/plugins/lua/trie.lua +++ b/src/plugins/lua/trie.lua @@ -22,26 +22,32 @@ local _ = require "fun" local mime_trie local raw_trie +local body_trie -- here we store all patterns as text local mime_patterns = {} local raw_patterns = {} +local body_patterns = {} -- here we store params for each pattern, so for each i = 1..n patterns[i] -- should have corresponding params[i] local mime_params = {} local raw_params = {} +local body_params = {} local function tries_callback(task) local matched = {} - local function gen_trie_cb(raw) + local function gen_trie_cb(type) local patterns = mime_patterns local params = mime_params - if raw then + if type == 'rawmessage' then patterns = raw_patterns params = raw_params + elseif type == 'rawbody' then + patterns = body_patterns + params = body_params end return function (idx, pos) @@ -51,7 +57,7 @@ local function tries_callback(task) if param['multi'] or not matched[pattern] then rspamd_logger.debugx(task, "<%1> matched pattern %2 at pos %3", task:get_message_id(), pattern, pos) - task:insert_result(param['symbol'], 1.0) + task:insert_result(param['symbol'], 1.0, type) if not param['multi'] then matched[pattern] = true end @@ -60,10 +66,13 @@ local function tries_callback(task) end if mime_trie then - mime_trie:search_mime(task, gen_trie_cb(false)) + mime_trie:search_mime(task, gen_trie_cb('mime')) end if raw_trie then - raw_trie:search_rawmsg(task, gen_trie_cb(true)) + raw_trie:search_rawmsg(task, gen_trie_cb('rawmessage')) + end + if body_trie then + raw_trie:search_rawbody(task, gen_trie_cb('rawbody')) end end @@ -75,6 +84,9 @@ local function process_single_pattern(pat, symbol, cf) if cf['raw'] then table.insert(raw_patterns, pat) table.insert(raw_params, {symbol=symbol, multi=multi}) + elseif cf['body'] then + table.insert(body_patterns, pat) + table.insert(body_params, {symbol=symbol, multi=multi}) else table.insert(mime_patterns, pat) table.insert(mime_params, {symbol=symbol, multi=multi}) @@ -101,16 +113,12 @@ local function process_trie_file(symbol, cf) end local function process_trie_conf(symbol, cf) - local raw = false - if type(cf) ~= 'table' then rspamd_logger.errx(rspamd_config, 'invalid value for symbol %1: "%2", expected table', symbol, cf) return end - if cf['raw'] then raw = true end - if cf['file'] then process_trie_file(symbol, cf) elseif cf['patterns'] then @@ -136,8 +144,13 @@ if opts then rspamd_logger.infox(rspamd_config, 'registered mime search trie from %1 patterns', #mime_patterns) end + if #body_patterns > 0 then + body_trie = rspamd_trie.create(body_patterns) + rspamd_logger.infox(rspamd_config, 'registered body search trie from %1 patterns', #body_patterns) + end + local id = -1 - if mime_trie or raw_trie then + if mime_trie or raw_trie or body_trie then id = rspamd_config:register_symbol({ type = 'callback', callback = tries_callback diff --git a/src/plugins/lua/whitelist.lua b/src/plugins/lua/whitelist.lua index 8c4f4cbcc..946e47adf 100644 --- a/src/plugins/lua/whitelist.lua +++ b/src/plugins/lua/whitelist.lua @@ -58,11 +58,18 @@ local function whitelist_cb(symbol, rule, task) local from = task:get_from(1) local found = false local mult = 1.0 + local spf_violated = false + local dkim_violated = false + local dmarc_violated = false if rule['valid_spf'] then if not task:has_symbol(options['spf_allow_symbol']) then -- Not whitelisted - return + if not rule['blacklist'] or rule['strict'] then + return + end + + spf_violated = true end -- Now we can check from domain or helo @@ -90,7 +97,11 @@ local function whitelist_cb(symbol, rule, task) if rule['valid_dkim'] then local sym = task:get_symbol(options['dkim_allow_symbol']) if not sym then - return + if not rule['blacklist'] or rule['strict'] then + return + end + + dkim_violated = true end local dkim_opts = sym[1]['options'] @@ -109,7 +120,11 @@ local function whitelist_cb(symbol, rule, task) if rule['valid_dmarc'] then if not task:has_symbol(options['dmarc_allow_symbol']) then - return + if not rule['blacklist'] or rule['strict'] then + return + end + + dmarc_violated = true end local from = task:get_from(2) @@ -123,7 +138,28 @@ local function whitelist_cb(symbol, rule, task) end if found then - task:insert_result(symbol, mult, domains) + if not rule['blacklist'] or rule['strict'] then + task:insert_result(symbol, mult, domains) + else + -- Additional constraints for blacklist + if rule['valid_spf'] or rule['valid_dkim'] or rule['valid_dmarc'] then + if dmarc_violated or dkim_violated or spf_violated then + + if rule['strict'] then + -- Inverse multiplier to convert whitelist to blacklist + mult = -mult + end + + task:insert_result(symbol, mult, domains) + elseif rule['strict'] then + -- Add whitelist score (negative) + task:insert_result(symbol, mult, domains) + end + else + -- Unconstrained input + task:insert_result(symbol, mult, domains) + end + end end end @@ -180,9 +216,14 @@ local configure_whitelist_module = function() return end + local flags = 'nice,empty' + if rule['blacklist'] then + flags = 'empty' + end + local id = rspamd_config:register_symbol({ name = symbol, - flags = 'nice,empty', + flags = flags, callback = gen_whitelist_cb(symbol, rule) }) diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 87b8effa7..1063013c7 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -80,6 +80,15 @@ module_t surbl_module = { }; static void +exceptions_free_value (gpointer v) +{ + rspamd_ftok_t *val = v; + + g_free ((gpointer)val->begin); + g_slice_free1 (sizeof (*val), val); +} + +static void exception_insert (gpointer st, gconstpointer key, gconstpointer value) { GHashTable **t = st; @@ -100,18 +109,18 @@ exception_insert (gpointer st, gconstpointer key, gconstpointer value) return; } - val = g_malloc (sizeof (rspamd_ftok_t)); - val->begin = key; + val = g_slice_alloc (sizeof (rspamd_ftok_t)); + val->begin = g_strdup (key); val->len = strlen (key); if (t[level] == NULL) { t[level] = g_hash_table_new_full (rspamd_ftok_icase_hash, rspamd_ftok_icase_equal, - g_free, + exceptions_free_value, g_free); } - g_hash_table_insert (t[level], val, g_strdup (value)); + g_hash_table_replace (t[level], val, g_strdup (value)); } static gchar * @@ -121,13 +130,13 @@ read_exceptions_list (gchar * chunk, gboolean final) { if (data->cur_data == NULL) { - data->cur_data = g_malloc (sizeof (GHashTable *) * MAX_LEVELS); + data->cur_data = g_malloc0 (sizeof (GHashTable *) * MAX_LEVELS); } return rspamd_parse_kv_list ( chunk, len, data, - (insert_func) exception_insert, + exception_insert, "", final); } @@ -256,9 +265,7 @@ surbl_module_init (struct rspamd_config *cfg, struct module_ctx **ctx) surbl_module_ctx->whitelist = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); /* Zero exceptions hashes */ - surbl_module_ctx->exceptions = rspamd_mempool_alloc0 ( - surbl_module_ctx->surbl_pool, - MAX_LEVELS * sizeof (GHashTable *)); + surbl_module_ctx->exceptions = g_malloc0 (MAX_LEVELS * sizeof (GHashTable *)); /* Register destructors */ rspamd_mempool_add_destructor (surbl_module_ctx->surbl_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, diff --git a/src/rspamadm/CMakeLists.txt b/src/rspamadm/CMakeLists.txt index 65b8669bc..992e2a3aa 100644 --- a/src/rspamadm/CMakeLists.txt +++ b/src/rspamadm/CMakeLists.txt @@ -25,10 +25,10 @@ SET(RSPAMADMLUASRC INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) ADD_CUSTOM_TARGET(rspamadm_lua_preprocess ${PERL_EXECUTABLE} - "${CMAKE_CURRENT_SOURCE_DIR}/lua_preprocess.pl" + "${CMAKE_SOURCE_DIR}/lua_preprocess.pl" "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" - SOURCES ${RSPAMADMLUASRC} ${CMAKE_CURRENT_SOURCE_DIR}/lua_preprocess.pl) + SOURCES ${RSPAMADMLUASRC} ${CMAKE_SOURCE_DIR}/lua_preprocess.pl) IF (ENABLE_HYPERSCAN MATCHES "ON") LIST(APPEND RSPAMADMSRC "${CMAKE_SOURCE_DIR}/src/hs_helper.c") ENDIF() diff --git a/src/rspamadm/fuzzy_merge.c b/src/rspamadm/fuzzy_merge.c index 13bef43ef..931bea0f0 100644 --- a/src/rspamadm/fuzzy_merge.c +++ b/src/rspamadm/fuzzy_merge.c @@ -256,7 +256,7 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv) pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "fuzzy_merge"); dest_db = rspamd_sqlite3_open_or_create (pool, target, create_tables_sql, - &error); + 0, &error); if (dest_db == NULL) { rspamd_fprintf(stderr, "cannot open destination: %s\n", error->message); @@ -281,7 +281,7 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv) unique_ops = g_hash_table_new (rspamadm_op_hash, rspamadm_op_equal); for (i = 0; i < nsrc; i++) { - src = rspamd_sqlite3_open_or_create (pool, sources[i], NULL, &error); + src = rspamd_sqlite3_open_or_create (pool, sources[i], NULL, 0, &error); if (src == NULL) { rspamd_fprintf(stderr, "cannot open source %s: %s\n", sources[i], @@ -299,7 +299,7 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv) nsrc_shingles = 0; src = g_ptr_array_index (source_dbs, i); - + if (!quiet) { rspamd_printf ("reading data from %s\n", sources[i]); } @@ -449,7 +449,7 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv) sqlite3_finalize (stmt); sqlite3_close (src); } - + if (!quiet) { rspamd_printf ("start writing to %s, %ud ops pending\n", target, ops->len); } diff --git a/src/rspamadm/lua_repl.c b/src/rspamadm/lua_repl.c index c7f873df2..f96aa2890 100644 --- a/src/rspamadm/lua_repl.c +++ b/src/rspamadm/lua_repl.c @@ -23,6 +23,7 @@ #include "task.h" #include "unix-std.h" #include "linenoise.h" +#include "worker_util.h" #ifdef WITH_LUAJIT #include <luajit.h> #endif @@ -31,6 +32,7 @@ static gchar **paths = NULL; static gchar **scripts = NULL; static gchar *histfile = NULL; static guint max_history = 2000; +static gchar *serve = NULL; static const char *default_history_file = ".rspamd_repl.hist"; @@ -94,6 +96,8 @@ static GOptionEntry entries[] = { "Load history from the specified file", NULL}, {"max-history", 'm', 0, G_OPTION_ARG_INT, &max_history, "Store this number of history entries", NULL}, + {"serve", 'S', 0, G_OPTION_ARG_STRING, &serve, + "Serve http lua server", NULL}, {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} }; @@ -108,6 +112,7 @@ rspamadm_lua_help (gboolean full_help) "Where options are:\n\n" "-p: add additional lua paths (may be repeated)\n" "-s: load scripts on start from specified files (may be repeated)\n" + "-S: listen on a specified address as HTTP server\n" "--help: shows available options and commands"; } else { @@ -456,6 +461,141 @@ rspamadm_lua_run_repl (lua_State *L) } } +struct rspamadm_lua_repl_context { + struct rspamd_http_connection_router *rt; + lua_State *L; +}; + +struct rspamadm_lua_repl_session { + struct rspamd_http_connection_router *rt; + rspamd_inet_addr_t *addr; + struct rspamadm_lua_repl_context *ctx; + gint sock; +}; + +static void +rspamadm_lua_accept_cb (gint fd, short what, void *arg) +{ + struct rspamadm_lua_repl_context *ctx = arg; + rspamd_inet_addr_t *addr; + struct rspamadm_lua_repl_session *session; + gint nfd; + + if ((nfd = + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { + rspamd_fprintf (stderr, "accept failed: %s", strerror (errno)); + return; + } + /* Check for EAGAIN */ + if (nfd == 0) { + return; + } + + session = g_slice_alloc0 (sizeof (*session)); + session->rt = ctx->rt; + session->ctx = ctx; + session->addr = addr; + session->sock = nfd; + + rspamd_http_router_handle_socket (ctx->rt, nfd, session); +} + +static void +rspamadm_lua_error_handler (struct rspamd_http_connection_entry *conn_ent, + GError *err) +{ + struct rspamadm_lua_repl_session *session = conn_ent->ud; + + rspamd_fprintf (stderr, "http error occurred: %s\n", err->message); +} + +static void +rspamadm_lua_finish_handler (struct rspamd_http_connection_entry *conn_ent) +{ + struct rspamadm_lua_repl_session *session = conn_ent->ud; + + g_slice_free1 (sizeof (*session), session); +} + +/* + * Exec command handler: + * request: /exec + * body: lua script + * reply: json {"status": "ok", "reply": {<lua json object>}} + */ +static int +rspamadm_lua_handle_exec (struct rspamd_http_connection_entry *conn_ent, + struct rspamd_http_message *msg) +{ + GString *tb; + gint err_idx, i; + lua_State *L; + struct rspamadm_lua_repl_context *ctx; + struct rspamadm_lua_repl_session *session = conn_ent->ud; + ucl_object_t *obj, *elt; + + ctx = session->ctx; + L = ctx->L; + + if (msg->body == NULL || msg->body->len == 0) { + rspamd_controller_send_error (conn_ent, 400, "Empty lua script"); + + return 0; + } + + lua_pushcfunction (L, &rspamd_lua_traceback); + err_idx = lua_gettop (L); + + /* First try return + input */ + tb = g_string_sized_new (msg->body->len + sizeof ("return ")); + rspamd_printf_gstring (tb, "return %V", msg->body); + + if (luaL_loadstring (L, tb->str) != 0) { + /* Reset stack */ + lua_settop (L, 0); + lua_pushcfunction (L, &rspamd_lua_traceback); + err_idx = lua_gettop (L); + /* Try with no return */ + if (luaL_loadbuffer (L, msg->body->str, msg->body->len, "http input") != 0) { + rspamd_controller_send_error (conn_ent, 400, "Invalid lua script"); + + return 0; + } + } + + g_string_free (tb, TRUE); + + if (lua_pcall (L, 0, LUA_MULTRET, err_idx) != 0) { + tb = lua_touserdata (L, -1); + rspamd_controller_send_error (conn_ent, 500, "call failed: %v\n", tb); + g_string_free (tb, TRUE); + lua_settop (L, 0); + + return 0; + } + + obj = ucl_object_typed_new (UCL_ARRAY); + + for (i = err_idx + 1; i <= lua_gettop (L); i ++) { + if (lua_isfunction (L, i)) { + /* XXX: think about API */ + } + else { + elt = ucl_object_lua_import (L, i); + + if (elt) { + ucl_array_append (obj, elt); + } + } + } + + rspamd_controller_send_ucl (conn_ent, obj); + ucl_object_unref (obj); + lua_settop (L, 0); + + return 0; +} + static void rspamadm_lua (gint argc, gchar **argv) { @@ -496,6 +636,55 @@ rspamadm_lua (gint argc, gchar **argv) } } + if (serve) { + /* HTTP Server mode */ + GPtrArray *addrs = NULL; + gchar *name = NULL; + struct event_base *ev_base; + struct rspamd_http_connection_router *http; + gint fd; + struct rspamadm_lua_repl_context *ctx; + + if (!rspamd_parse_host_port_priority (serve, &addrs, NULL, &name, + 10000, NULL)) { + fprintf (stderr, "cannot listen on %s", serve); + exit (EXIT_FAILURE); + } + + ev_base = event_init (); + ctx = g_slice_alloc0 (sizeof (*ctx)); + http = rspamd_http_router_new (rspamadm_lua_error_handler, + rspamadm_lua_finish_handler, + NULL, ev_base, + NULL, NULL); + ctx->L = L; + ctx->rt = http; + rspamd_http_router_add_path (http, + "/exec", + rspamadm_lua_handle_exec); + + for (i = 0; i < addrs->len; i ++) { + rspamd_inet_addr_t *addr = g_ptr_array_index (addrs, i); + + fd = rspamd_inet_address_listen (addr, SOCK_STREAM, TRUE); + if (fd != -1) { + struct event *ev; + + ev = g_slice_alloc0 (sizeof (*ev)); + event_set (ev, fd, EV_READ|EV_PERSIST, rspamadm_lua_accept_cb, + ctx); + event_base_set (ev_base, ev); + event_add (ev, NULL); + rspamd_printf ("listen on %s\n", + rspamd_inet_address_to_string_pretty (addr)); + } + } + + event_base_loop (ev_base, 0); + + exit (EXIT_SUCCESS); + } + if (histfile == NULL) { const gchar *homedir; GString *hist_path; diff --git a/src/rspamd.c b/src/rspamd.c index 72e676267..922327f38 100644 --- a/src/rspamd.c +++ b/src/rspamd.c @@ -340,20 +340,38 @@ rspamd_fork_delayed (struct rspamd_worker_conf *cf, } static GList * -create_listen_socket (GPtrArray *addrs, guint cnt, gint listen_type) +create_listen_socket (GPtrArray *addrs, guint cnt, + enum rspamd_worker_socket_type listen_type) { GList *result = NULL; gint fd; guint i; - gpointer p; + struct rspamd_worker_listen_socket *ls; g_ptr_array_sort (addrs, rspamd_inet_address_compare_ptr); for (i = 0; i < cnt; i ++) { - fd = rspamd_inet_address_listen (g_ptr_array_index (addrs, i), - listen_type, TRUE); - if (fd != -1) { - p = GINT_TO_POINTER (fd); - result = g_list_prepend (result, p); + + if (listen_type & RSPAMD_WORKER_SOCKET_TCP) { + fd = rspamd_inet_address_listen (g_ptr_array_index (addrs, i), + SOCK_STREAM, TRUE); + if (fd != -1) { + ls = g_slice_alloc0 (sizeof (*ls)); + ls->addr = g_ptr_array_index (addrs, i); + ls->fd = fd; + ls->type = RSPAMD_WORKER_SOCKET_TCP; + result = g_list_prepend (result, ls); + } + } + if (listen_type & RSPAMD_WORKER_SOCKET_UDP) { + fd = rspamd_inet_address_listen (g_ptr_array_index (addrs, i), + SOCK_DGRAM, TRUE); + if (fd != -1) { + ls = g_slice_alloc0 (sizeof (*ls)); + ls->addr = g_ptr_array_index (addrs, i); + ls->fd = fd; + ls->type = RSPAMD_WORKER_SOCKET_UDP; + result = g_list_prepend (result, ls); + } } } @@ -492,7 +510,7 @@ spawn_workers (struct rspamd_main *rspamd_main, struct event_base *ev_base) key = make_listen_key (bcf); if ((p = g_hash_table_lookup (listen_sockets, - GINT_TO_POINTER (key))) == NULL) { + GINT_TO_POINTER (key))) == NULL) { if (!bcf->is_systemd) { /* Create listen socket */ @@ -941,7 +959,7 @@ rspamd_control_handler (gint fd, short what, gpointer arg) gint nfd; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { msg_warn_main ("accept failed: %s", strerror (errno)); return; } diff --git a/src/rspamd.h b/src/rspamd.h index bd4ca9684..c0c60185d 100644 --- a/src/rspamd.h +++ b/src/rspamd.h @@ -165,12 +165,24 @@ enum rspamd_worker_flags { RSPAMD_WORKER_ALWAYS_START = (1 << 4), }; +enum rspamd_worker_socket_type { + RSPAMD_WORKER_SOCKET_NONE = 0, + RSPAMD_WORKER_SOCKET_TCP = (1 << 0), + RSPAMD_WORKER_SOCKET_UDP = (1 << 1), +}; + +struct rspamd_worker_listen_socket { + const rspamd_inet_addr_t *addr; + gint fd; + enum rspamd_worker_socket_type type; +}; + typedef struct worker_s { const gchar *name; gpointer (*worker_init_func)(struct rspamd_config *cfg); void (*worker_start_func)(struct rspamd_worker *worker); enum rspamd_worker_flags flags; - gint listen_type; + enum rspamd_worker_socket_type listen_type; guint worker_version; guint64 rspamd_version; const gchar *rspamd_features; diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c index 32a2937e8..5864d6020 100644 --- a/src/rspamd_proxy.c +++ b/src/rspamd_proxy.c @@ -57,7 +57,7 @@ worker_t rspamd_proxy_worker = { init_rspamd_proxy, /* Init function */ start_rspamd_proxy, /* Start function */ RSPAMD_WORKER_HAS_SOCKET | RSPAMD_WORKER_KILLABLE, - SOCK_STREAM, /* TCP socket */ + RSPAMD_WORKER_SOCKET_TCP, /* TCP socket */ RSPAMD_WORKER_VER }; @@ -65,6 +65,8 @@ struct rspamd_http_upstream { gchar *name; struct upstream_list *u; struct rspamd_cryptobox_pubkey *key; + gint parser_from_ref; + gint parser_to_ref; }; struct rspamd_http_mirror { @@ -73,7 +75,8 @@ struct rspamd_http_mirror { struct upstream_list *u; struct rspamd_cryptobox_pubkey *key; gdouble prob; - gint parser_ref; + gint parser_from_ref; + gint parser_to_ref; }; static const guint64 rspamd_rspamd_proxy_magic = 0xcdeb4fd1fc351980ULL; @@ -125,6 +128,8 @@ struct rspamd_proxy_backend_connection { struct rspamd_proxy_session *s; gint backend_sock; enum rspamd_backend_flags flags; + gint parser_from_ref; + gint parser_to_ref; }; struct rspamd_proxy_session { @@ -134,9 +139,10 @@ struct rspamd_proxy_session { struct rspamd_http_connection *client_conn; gpointer map; gsize map_len; - gint client_sock; struct rspamd_proxy_backend_connection *master_conn; GPtrArray *mirror_conns; + gint client_sock; + gboolean is_spamc; ref_entry_t ref; }; @@ -147,6 +153,101 @@ rspamd_proxy_quark (void) } static gboolean +rspamd_proxy_parse_lua_parser (lua_State *L, const ucl_object_t *obj, + gint *ref_from, gint *ref_to, GError **err) +{ + const gchar *lua_script; + gsize slen; + gint err_idx, ref_idx; + GString *tb = NULL; + gboolean has_ref = FALSE; + + g_assert (obj != NULL); + g_assert (ref_from != NULL); + g_assert (ref_to != NULL); + + *ref_from = -1; + *ref_to = -1; + + lua_script = ucl_object_tolstring (obj, &slen); + lua_pushcfunction (L, &rspamd_lua_traceback); + err_idx = lua_gettop (L); + + /* Load data */ + if (luaL_loadbuffer (L, lua_script, slen, "proxy parser") != 0) { + g_set_error (err, + rspamd_proxy_quark (), + EINVAL, + "cannot load lua parser script: %s", + lua_tostring (L, -1)); + lua_settop (L, 0); /* Error function */ + + return FALSE; + } + + /* Now do it */ + if (lua_pcall (L, 0, 1, err_idx) != 0) { + tb = lua_touserdata (L, -1); + g_set_error (err, + rspamd_proxy_quark (), + EINVAL, + "cannot init lua parser script: %s", + tb->str); + g_string_free (tb, TRUE); + lua_settop (L, 0); + + return FALSE; + } + + if (lua_istable (L, -1)) { + /* + * We have a table, so we check for two keys: + * 'from' -> function + * 'to' -> function + * + * From converts parent request to a client one + * To converts client request to a parent one + */ + lua_pushstring (L, "from"); + lua_gettable (L, -2); + + if (lua_isfunction (L, -1)) { + ref_idx = luaL_ref (L, LUA_REGISTRYINDEX); + *ref_from = ref_idx; + has_ref = TRUE; + } + + lua_pushstring (L, "to"); + lua_gettable (L, -2); + + if (lua_isfunction (L, -1)) { + ref_idx = luaL_ref (L, LUA_REGISTRYINDEX); + *ref_to = ref_idx; + has_ref = TRUE; + } + } + else if (!lua_isfunction (L, -1)) { + g_set_error (err, + rspamd_proxy_quark (), + EINVAL, + "cannot init lua parser script: " + "must return function"); + lua_settop (L, 0); + + return FALSE; + } + else { + /* Just parser from protocol */ + ref_idx = luaL_ref (L, LUA_REGISTRYINDEX); + *ref_from = ref_idx; + lua_settop (L, 0); + has_ref = TRUE; + } + + return has_ref; +} + +static gboolean rspamd_proxy_parse_upstream (rspamd_mempool_t *pool, const ucl_object_t *obj, gpointer ud, @@ -157,8 +258,10 @@ rspamd_proxy_parse_upstream (rspamd_mempool_t *pool, struct rspamd_http_upstream *up = NULL; struct rspamd_proxy_ctx *ctx; struct rspamd_rcl_struct_parser *pd = ud; + lua_State *L; ctx = pd->user_struct; + L = ctx->lua_state; if (ucl_object_type (obj) != UCL_OBJECT) { g_set_error (err, rspamd_proxy_quark (), 100, @@ -176,6 +279,8 @@ rspamd_proxy_parse_upstream (rspamd_mempool_t *pool, } up = g_slice_alloc0 (sizeof (*up)); + up->parser_from_ref = -1; + up->parser_to_ref = -1; up->name = g_strdup (ucl_object_tostring (elt)); elt = ucl_object_lookup (obj, "key"); @@ -213,6 +318,18 @@ rspamd_proxy_parse_upstream (rspamd_mempool_t *pool, ctx->default_upstream = up; } + /* + * Accept lua function here in form + * fun :: String -> UCL + */ + elt = ucl_object_lookup (obj, "parser"); + if (elt) { + if (!rspamd_proxy_parse_lua_parser (L, elt, &up->parser_from_ref, + &up->parser_to_ref, err)) { + goto err; + } + } + g_hash_table_insert (ctx->upstreams, up->name, up); return TRUE; @@ -227,6 +344,13 @@ err: rspamd_pubkey_unref (up->key); } + if (up->parser_from_ref != -1) { + luaL_unref (L, LUA_REGISTRYINDEX, up->parser_from_ref); + } + if (up->parser_to_ref != -1) { + luaL_unref (L, LUA_REGISTRYINDEX, up->parser_to_ref); + } + g_slice_free1 (sizeof (*up), up); } @@ -266,7 +390,8 @@ rspamd_proxy_parse_mirror (rspamd_mempool_t *pool, up = g_slice_alloc0 (sizeof (*up)); up->name = g_strdup (ucl_object_tostring (elt)); - up->parser_ref = -1; + up->parser_to_ref = -1; + up->parser_from_ref = -1; elt = ucl_object_lookup (obj, "key"); if (elt != NULL) { @@ -312,55 +437,10 @@ rspamd_proxy_parse_mirror (rspamd_mempool_t *pool, */ elt = ucl_object_lookup (obj, "parser"); if (elt) { - const gchar *lua_script; - gsize slen; - gint err_idx, ref_idx; - GString *tb = NULL; - - lua_script = ucl_object_tolstring (elt, &slen); - lua_pushcfunction (L, &rspamd_lua_traceback); - err_idx = lua_gettop (L); - - /* Load data */ - if (luaL_loadbuffer (L, lua_script, slen, "proxy parser") != 0) { - g_set_error (err, - rspamd_proxy_quark (), - EINVAL, - "cannot load lua parser script: %s", - lua_tostring (L, -1)); - lua_settop (L, 0); /* Error function */ - - goto err; - } - - /* Now do it */ - if (lua_pcall (L, 0, 1, err_idx) != 0) { - tb = lua_touserdata (L, -1); - g_set_error (err, - rspamd_proxy_quark (), - EINVAL, - "cannot init lua parser script: %s", - tb->str); - g_string_free (tb, TRUE); - lua_settop (L, 0); - - goto err; - } - - if (!lua_isfunction (L, -1)) { - g_set_error (err, - rspamd_proxy_quark (), - EINVAL, - "cannot init lua parser script: " - "must return function"); - lua_settop (L, 0); - + if (!rspamd_proxy_parse_lua_parser (L, elt, &up->parser_from_ref, + &up->parser_to_ref, err)) { goto err; } - - ref_idx = luaL_ref (L, LUA_REGISTRYINDEX); - up->parser_ref = ref_idx; - lua_settop (L, 0); } elt = ucl_object_lookup_any (obj, "settings", "settings_id", NULL); @@ -382,8 +462,11 @@ err: rspamd_pubkey_unref (up->key); } - if (up->parser_ref != -1) { - luaL_unref (L, LUA_REGISTRYINDEX, up->parser_ref); + if (up->parser_from_ref != -1) { + luaL_unref (L, LUA_REGISTRYINDEX, up->parser_from_ref); + } + if (up->parser_to_ref != -1) { + luaL_unref (L, LUA_REGISTRYINDEX, up->parser_to_ref); } g_slice_free1 (sizeof (*up), up); @@ -647,7 +730,7 @@ proxy_call_cmp_script (struct rspamd_proxy_session *session, gint cbref) lua_createtable (L, 0, session->mirror_conns->len + 1); /* Now push master results */ - if (session->master_conn->results) { + if (session->master_conn && session->master_conn->results) { lua_pushstring (L, "master"); ucl_object_push_lua (L, session->master_conn->results, true); lua_settable (L, -3); @@ -721,6 +804,10 @@ proxy_session_dtor (struct rspamd_proxy_session *session) } } + if (session->master_conn->results) { + ucl_object_unref (session->master_conn->results); + } + g_ptr_array_free (session->mirror_conns, TRUE); rspamd_inet_address_destroy (session->client_addr); close (session->client_sock); @@ -846,7 +933,7 @@ proxy_backend_mirror_finish_handler (struct rspamd_http_connection *conn, session = bk_conn->s; if (!proxy_backend_parse_results (session, bk_conn, session->ctx->lua_state, - -1, msg->body_buf.begin, msg->body_buf.len)) { + bk_conn->parser_from_ref, msg->body_buf.begin, msg->body_buf.len)) { msg_warn_session ("cannot parse results from the mirror backend %s:%s", bk_conn->name, rspamd_inet_address_to_string (rspamd_upstream_addr (bk_conn->up))); @@ -888,6 +975,8 @@ proxy_open_mirror_connections (struct rspamd_proxy_session *session) bk_conn->up = rspamd_upstream_get (m->u, RSPAMD_UPSTREAM_ROUND_ROBIN, NULL, 0); + bk_conn->parser_from_ref = m->parser_from_ref; + bk_conn->parser_to_ref = m->parser_to_ref; if (bk_conn->up == NULL) { msg_err_session ("cannot select upstream for %s", m->name); @@ -907,6 +996,11 @@ proxy_open_mirror_connections (struct rspamd_proxy_session *session) msg = rspamd_http_connection_copy_msg (session->client_conn); rspamd_http_message_remove_header (msg, "Content-Length"); rspamd_http_message_remove_header (msg, "Key"); + msg->method = HTTP_GET; + + if (msg->url->len == 0) { + msg->url = rspamd_fstring_append (msg->url, "/check", strlen ("/check")); + } if (m->settings_id != NULL) { rspamd_http_message_remove_header (msg, "Settings-ID"); @@ -974,18 +1068,33 @@ proxy_backend_master_finish_handler (struct rspamd_http_connection *conn, session = bk_conn->s; rspamd_http_connection_steal_msg (session->master_conn->backend_conn); + rspamd_http_message_remove_header (msg, "Content-Length"); rspamd_http_message_remove_header (msg, "Key"); rspamd_http_connection_reset (session->master_conn->backend_conn); - rspamd_http_connection_write_message (session->client_conn, - msg, NULL, NULL, session, session->client_sock, - &session->ctx->io_tv, session->ctx->ev_base); if (!proxy_backend_parse_results (session, bk_conn, session->ctx->lua_state, - -1, msg->body_buf.begin, msg->body_buf.len)) { + bk_conn->parser_from_ref, msg->body_buf.begin, msg->body_buf.len)) { msg_warn_session ("cannot parse results from the master backend"); } + + if (session->is_spamc) { + /* We need to reformat ucl to fit with legacy spamc protocol */ + if (bk_conn->results) { + rspamd_fstring_clear (msg->body); + rspamd_ucl_torspamc_output (bk_conn->results, &msg->body); + } + else { + msg_warn_session ("cannot parse results from the master backend, " + "return them as is"); + } + } + + rspamd_http_connection_write_message (session->client_conn, + msg, NULL, NULL, session, session->client_sock, + &session->ctx->io_tv, session->ctx->ev_base); + return 0; } @@ -1017,6 +1126,17 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, session->master_conn->name = "master"; host = rspamd_http_message_find_header (msg, "Host"); + /* Reset spamc legacy */ + if (msg->method >= HTTP_SYMBOLS) { + msg->method = HTTP_GET; + session->is_spamc = TRUE; + msg_info_session ("enabling legacy rspamc mode for session"); + } + + if (msg->url->len == 0) { + msg->url = rspamd_fstring_append (msg->url, "/check", strlen ("/check")); + } + if (host == NULL) { backend = session->ctx->default_upstream; } @@ -1048,7 +1168,9 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, SOCK_STREAM, TRUE); if (session->master_conn->backend_sock == -1) { - msg_err_session ("cannot connect upstream for %s", host ? hostbuf : "default"); + msg_err_session ("cannot connect upstream: %s(%s)", + host ? hostbuf : "default", + rspamd_inet_address_to_string (rspamd_upstream_addr (session->master_conn->up))); rspamd_upstream_fail (session->master_conn->up); goto err; } @@ -1070,6 +1192,8 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, RSPAMD_HTTP_CLIENT_SIMPLE, RSPAMD_HTTP_CLIENT, session->ctx->keys_cache); + session->master_conn->parser_from_ref = backend->parser_from_ref; + session->master_conn->parser_to_ref = backend->parser_to_ref; rspamd_http_connection_set_key (session->master_conn->backend_conn, session->ctx->local_key); @@ -1090,6 +1214,10 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, return 0; err: + rspamd_http_connection_steal_msg (session->client_conn); + rspamd_http_message_remove_header (msg, "Content-Length"); + rspamd_http_message_remove_header (msg, "Key"); + rspamd_http_connection_reset (session->client_conn); proxy_client_write_error (session, 404, "Backend not found"); return 0; @@ -1107,7 +1235,7 @@ proxy_accept_socket (gint fd, short what, void *arg) ctx = worker->ctx; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } diff --git a/src/smtp_proxy.c b/src/smtp_proxy.c index 0202e3cfe..8eebc2c86 100644 --- a/src/smtp_proxy.c +++ b/src/smtp_proxy.c @@ -44,7 +44,7 @@ worker_t smtp_proxy_worker = { init_smtp_proxy, /* Init function */ start_smtp_proxy, /* Start function */ RSPAMD_WORKER_HAS_SOCKET | RSPAMD_WORKER_KILLABLE, - SOCK_STREAM, /* TCP socket */ + RSPAMD_WORKER_SOCKET_TCP, /* TCP socket */ RSPAMD_WORKER_VER /* Version info */ }; @@ -902,7 +902,7 @@ accept_socket (gint fd, short what, void *arg) ctx = worker->ctx; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } diff --git a/src/worker.c b/src/worker.c index 735cbc6a9..81c5b1c63 100644 --- a/src/worker.c +++ b/src/worker.c @@ -50,7 +50,7 @@ worker_t normal_worker = { init_worker, /* Init function */ start_worker, /* Start function */ RSPAMD_WORKER_HAS_SOCKET|RSPAMD_WORKER_KILLABLE, - SOCK_STREAM, /* TCP socket */ + RSPAMD_WORKER_SOCKET_TCP, /* TCP socket */ RSPAMD_WORKER_VER /* Version info */ }; @@ -266,7 +266,7 @@ accept_socket (gint fd, short what, void *arg) } if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn_ctx ("accept failed: %s", strerror (errno)); return; } @@ -320,9 +320,9 @@ accept_socket (gint fd, short what, void *arg) rspamd_http_connection_read_message (task->http_conn, task, - nfd, - &ctx->io_tv, - ctx->ev_base); + nfd, + &ctx->io_tv, + ctx->ev_base); } #ifdef WITH_HYPERSCAN diff --git a/test/rspamd_http_test.c b/test/rspamd_http_test.c index 428c510c4..88ac48676 100644 --- a/test/rspamd_http_test.c +++ b/test/rspamd_http_test.c @@ -55,7 +55,7 @@ rspamd_server_accept (gint fd, short what, void *arg) gint nfd; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } diff --git a/utils/rspamd_http_server.c b/utils/rspamd_http_server.c index ad01085df..9af96fad9 100644 --- a/utils/rspamd_http_server.c +++ b/utils/rspamd_http_server.c @@ -134,7 +134,7 @@ rspamd_server_accept (gint fd, short what, void *arg) do { if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { rspamd_fprintf (stderr, "accept failed: %s", strerror (errno)); return; } |