diff options
author | Larry Hynes <larry@larryhynes.com> | 2016-06-16 21:58:42 +0100 |
---|---|---|
committer | Larry Hynes <larry@larryhynes.com> | 2016-06-16 21:58:42 +0100 |
commit | dc9be11f4a10c3181f285cad626877a47a1f32cd (patch) | |
tree | a3c4e2ef0e517172c1bb4681c6b0eb26bbec38d0 | |
parent | 21fb842ee0d504778986e7a21630632cb17bad26 (diff) | |
parent | 5e87d49bc798d06a698db4735129f514b88be503 (diff) | |
download | rspamd-dc9be11f4a10c3181f285cad626877a47a1f32cd.tar.gz rspamd-dc9be11f4a10c3181f285cad626877a47a1f32cd.zip |
Merge remote-tracking branch 'upstream/master'
* upstream/master: (110 commits)
[Fix] Fix detection of URLs in text parts
[Fix] Strip '\r\n' properly
[Fix] More fixes about shared memory in proxy
[Fix] One more try to fix redis
[Feature] Use one pass to remove newlines and store their positions
[Fix] Fix descriptors leak on shmem detaching
[Fix] More and more fixes to redis states
[Fix] Another try to fix redis states
[Fix] Set terminated state before calling of async free
[Fix] Fix state on timeout
[Fix] Another try to fix redis mess
[Fix] Fix redis timeout events handling
[Minor] Add some more tests
[Feature] Try to read on fuzzy timeout to avoid fake timeouts
[Feature] Add ESMTPSA received type
[Fix] Fix stack growing
[Feature] Further relax parser
[Fix] Fix parsing of nested braces in SMTP comments
[Minor] Add `application/octet-stream` mime type for `pdf` extension
[Fix] Fix parser
...
99 files changed, 4264 insertions, 2615 deletions
diff --git a/.travis.yml b/.travis.yml index cf1a0b50f..d2c8bf754 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ language: c before_script: - sudo apt-get update -qq - - sudo apt-get install -qq cmake libevent-dev libglib2.0-dev libgmime-2.6-dev libluajit-5.1-dev liblua5.1-0-dev libpcre3-dev libsqlite3-dev libmagic-dev + - sudo apt-get install -qq cmake libevent-dev libglib2.0-dev libgmime-2.6-dev libluajit-5.1-dev liblua5.1-0-dev libpcre3-dev libsqlite3-dev libmagic-dev ragel make libssl-dev # - echo $TRAVIS_OS_NAME # - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo apt-get update -qq ; fi # - if [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo apt-get install -qq cmake libevent-dev libglib2.0-dev libgmime-2.6-dev libluajit-5.1-dev libpcre3-dev libsqlite3-dev libhiredis-dev ; fi diff --git a/CMakeLists.txt b/CMakeLists.txt index 219fdce41..84a3f5005 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,6 +65,11 @@ OPTION(ENABLE_JEMALLOC "Build rspamd with jemalloc allocator [default: OFF] INCLUDE(FindArch.cmake) TARGET_ARCHITECTURE(ARCH) +INCLUDE(FindRagel.cmake) +IF(NOT RAGEL_FOUND) + MESSAGE(FATAL_ERROR "Ragel is required to build rspamd") +ENDIF() + IF ("${ARCH}" STREQUAL "x86_64") IF (ENABLE_HYPERSCAN MATCHES "ON") ENABLE_LANGUAGE(CXX) @@ -613,7 +618,6 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") LIST(APPEND CMAKE_REQUIRED_LIBRARIES socket) LIST(APPEND CMAKE_REQUIRED_LIBRARIES umem) # Ugly hack, but FindOpenSSL on Solaris does not link with libcrypto - LIST(APPEND CMAKE_REQUIRED_LIBRARIES crypto) SET(CMAKE_VERBOSE_MAKEFILE ON) SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib:${RSPAMD_LIBDIR}") @@ -671,8 +675,10 @@ ProcessPackage(SQLITE3 LIBRARY sqlite3 INCLUDE sqlite3.h INCLUDE_SUFFIXES includ ROOT ${SQLITE3_ROOT_DIR} MODULES sqlite3 sqlite) ProcessPackage(ICONV LIBRARY iconv libiconv libiconv-2 c INCLUDE iconv.h INCLUDE_SUFFIXES include/libiconv ROOT ${ICONV_ROOT_DIR} MODULES iconv) -ProcessPackage(OPENSSL LIBRARY crypto INCLUDE err.h INCLUDE_SUFFIXES include/openssl - ROOT ${OPENSSL_ROOT_DIR} MODULES openssl) +ProcessPackage(LIBCRYPT LIBRARY crypto INCLUDE err.h INCLUDE_SUFFIXES include/openssl + ROOT ${OPENSSL_ROOT_DIR} MODULES openssl libcrypt) +ProcessPackage(LIBSSL LIBRARY ssl INCLUDE ssl.h INCLUDE_SUFFIXES include/openssl + ROOT ${OPENSSL_ROOT_DIR} MODULES openssl libssl) ProcessPackage(MAGIC LIBRARY magic INCLUDE magic.h INCLUDE_SUFFIXES include/libmagic ROOT ${LIBMAGIC_ROOT_DIR} MODULES magic) @@ -690,9 +696,7 @@ IF (ENABLE_FANN MATCHES "ON") ENDIF () #Check for openssl (required for dkim) -IF(WITH_OPENSSL) - SET(HAVE_OPENSSL 1) -ENDIF(WITH_OPENSSL) +SET(HAVE_OPENSSL 1) IF(GMIME2_VERSION VERSION_GREATER "2.4.0" OR NOT GMIME2_VERSION) SET(GMIME24 1) @@ -725,10 +729,11 @@ CHECK_C_COMPILER_FLAG(-Wunused-variable SUPPORT_WUNUSED_VAR) CHECK_C_COMPILER_FLAG(-Wno-pointer-sign SUPPORT_WPOINTER_SIGN) CHECK_C_COMPILER_FLAG(-Wstrict-prototypes SUPPORT_WSTRICT_PROTOTYPES) CHECK_C_COMPILER_FLAG(-pedantic SUPPORT_PEDANTIC_FLAG) +CHECK_C_COMPILER_FLAG(-Wno-unused-const-variable SUPPORT_WNO_UNUSED_CONST) # GCC 6 specific CHECK_C_COMPILER_FLAG(-Wnull-dereference SUPPORT_WNULL_DEREFERENCE) CHECK_C_COMPILER_FLAG(-Wduplicated-cond SUPPORT_WDUPLICATED_COND) -CHECK_C_COMPILER_FLAG(-Wlogical-op SUPPORT_WLOGICAL_OP) + IF(NOT "${CMAKE_C_COMPILER_ID}" MATCHES SunPro) CHECK_C_COMPILER_FLAG("-std=c11" SUPPORT_STD11_FLAG) CHECK_C_COMPILER_FLAG("-std=c99" SUPPORT_STD99_FLAG) @@ -769,6 +774,9 @@ ENDIF() IF(SUPPORT_WLOGICAL_OP) SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wlogical-op") ENDIF() +IF(SUPPORT_WNO_UNUSED_CONST) + SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -Wno-unused-const-variable") +ENDIF() IF(SUPPORT_STD11_FLAG) SET(CMAKE_C_WARN_FLAGS "${CMAKE_C_WARN_FLAGS} -std=c11") ELSE(SUPPORT_STD11_FLAG) @@ -927,6 +935,8 @@ CHECK_SYMBOL_EXISTS(SOCK_SEQPACKET "sys/types.h;sys/socket.h" HAVE_SOCK_SEQPACKE CHECK_SYMBOL_EXISTS(I_SETSIG "sys/types.h;sys/ioctl.h" HAVE_SETSIG) CHECK_SYMBOL_EXISTS(O_ASYNC "sys/types.h;sys/fcntl.h" HAVE_OASYNC) CHECK_SYMBOL_EXISTS(O_NOFOLLOW "sys/types.h;sys/fcntl.h" HAVE_ONOFOLLOW) +LIST(APPEND CMAKE_REQUIRED_INCLUDES "${LIBSSL_INCLUDE}") +CHECK_SYMBOL_EXISTS(SSL_set_tlsext_host_name "openssl/ssl.h" HAVE_SSL_TLSEXT_HOSTNAME) IF(ENABLE_PCRE2 MATCHES "ON") IF(HAVE_PCRE_JIT) diff --git a/FindRagel.cmake b/FindRagel.cmake new file mode 100644 index 000000000..a058b7fb1 --- /dev/null +++ b/FindRagel.cmake @@ -0,0 +1,96 @@ +# - Find Ragel executable and provides macros to generate custom build rules +# The module defines the following variables: +# +# RAGEL_EXECUTABLE - path to the ragel program +# RAGEL_VERSION - version of ragel +# RAGEL_FOUND - true if the program was found +# +# If ragel is found, the module defines the macros: +# +# RAGEL_TARGET(<Name> INPUTS <inputs> OUTPUT <output> +# [COMPILE_FLAGS <string>] [DEPENDS <depends>]) +# +# which will create a custom rule to generate a state machine. <RagelInp> is +# the path to a Ragel file. <CodeOutput> is the name of the source file +# generated by ragel. If COMPILE_FLAGS option is specified, the next +# parameter is added in the ragel command line. +# +# The macro defines a set of variables: +# RAGEL_${Name}_DEFINED - true is the macro ran successfully +# RAGEL_${Name}_INPUT - The input source file, an alias for <RagelInp> +# RAGEL_${Name}_OUTPUT_SOURCE - The source file generated by ragel +# RAGEL_${Name}_OUTPUT_HEADER - The header file generated by ragel +# RAGEL_${Name}_OUTPUTS - The sources files generated by ragel +# RAGEL_${Name}_COMPILE_FLAGS - Options used in the ragel command line +# +# ==================================================================== +# Example: +# +# find_package(RAGEL) # or e.g.: find_package(RAGEL 6.6 REQUIRED) +# RAGEL_TARGET(MyMachine machine.rl ${CMAKE_CURRENT_BINARY_DIR}/machine.cc) +# add_executable(Foo main.cc ${RAGEL_MyMachine_OUTPUTS}) +# ==================================================================== + +# 2014-02-09, Georg Sauthoff <mail@georg.so> +# +# I don't think that these few lines are even copyrightable material, +# but I am fine with using the BSD/MIT/GPL license on it ... +# +# I've used following references: +# http://www.cmake.org/cmake/help/v2.8.12/cmake.html +# /usr/share/cmake/Modules/FindFLEX.cmake +# /usr/share/cmake/Modules/FindBISON.cmake + +# uses some features which are not available in 2.6 +cmake_minimum_required(VERSION 2.8) + +find_program(RAGEL_EXECUTABLE NAMES ragel DOC "path to the ragel executable") +mark_as_advanced(RAGEL_EXECUTABLE) + +if(RAGEL_EXECUTABLE) + + execute_process(COMMAND ${RAGEL_EXECUTABLE} --version + OUTPUT_VARIABLE RAGEL_version_output + ERROR_VARIABLE RAGEL_version_error + RESULT_VARIABLE RAGEL_version_result + OUTPUT_STRIP_TRAILING_WHITESPACE) + + if(${RAGEL_version_result} EQUAL 0) + string(REGEX REPLACE "^Ragel State Machine Compiler version ([^ ]+) .*$" + "\\1" + RAGEL_VERSION "${RAGEL_version_output}") + else() + message(SEND_ERROR + "Command \"${RAGEL_EXECUTABLE} --version\" failed with output: +${RAGEL_version_error}") + endif() + + #============================================================ + # RAGEL_TARGET (public macro) + #============================================================ + # + macro(RAGEL_TARGET Name) + CMAKE_PARSE_ARGUMENTS(RAGEL "" "OUTPUT" + "INPUTS;DEPENDS;COMPILE_FLAGS" ${ARGN}) + add_custom_command(OUTPUT ${RAGEL_OUTPUT} + COMMAND ${RAGEL_EXECUTABLE} + ARGS ${RAGEL_COMPILE_FLAGS} -o${RAGEL_OUTPUT} ${RAGEL_INPUTS} + DEPENDS ${RAGEL_INPUTS} ${RAGEL_DEPENDS} + COMMENT + "[RAGEL][${Name}] Compiling state machine with Ragel ${RAGEL_VERSION}" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + + set(RAGEL_${Name}_DEFINED TRUE) + set(RAGEL_${Name}_OUTPUTS ${RAGEL_OUTPUT}) + set(RAGEL_${Name}_INPUT ${RAGEL_INPUTS}) + set(RAGEL_${Name}_COMPILE_FLAGS ${RAGEL_COMPILE_FLAGS}) + endmacro() + +endif() + +# use this include when module file is located under /usr/share/cmake/Modules +#include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) +# use this include when module file is located in build tree +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(RAGEL REQUIRED_VARS RAGEL_EXECUTABLE + VERSION_VAR RAGEL_VERSION)
\ No newline at end of file diff --git a/conf/metrics.conf b/conf/metrics.conf index a3c8b27c8..aa4dab022 100644 --- a/conf/metrics.conf +++ b/conf/metrics.conf @@ -834,9 +834,13 @@ metric { group "phishing" { symbol "PHISHING" { weight = 4.0; - description = "Phished mail"; + description = "Phished URL"; one_shot = true; } + symbol "PHISHED_OPENPHISH" { + weight = 7.0; + description = "Phished URL found in openphish.com"; + } } group "date" { diff --git a/conf/modules.d/mime_types.conf b/conf/modules.d/mime_types.conf index eb00e04c7..e4c80e0a1 100644 --- a/conf/modules.d/mime_types.conf +++ b/conf/modules.d/mime_types.conf @@ -27,6 +27,9 @@ mime_types { "text/plain", "text/rfc822-headers" ]; - pdf = "application/pdf"; + pdf = [ + "application/octet-stream", + "application/pdf" + ]; } } diff --git a/conf/modules.d/phishing.conf b/conf/modules.d/phishing.conf index c1b63909a..392708cde 100644 --- a/conf/modules.d/phishing.conf +++ b/conf/modules.d/phishing.conf @@ -18,6 +18,7 @@ phishing { .include(try=true,priority=1) "$LOCAL_CONFDIR/local.d/phishing.conf" .include(try=true,priority=10) "$LOCAL_CONFDIR/override.d/phishing.conf" symbol = "PHISHING"; + openphish_map = "https://www.openphish.com/feed.txt"; # Make exclusions for known redirectors redirector_domains = [ diff --git a/conf/statistic.conf b/conf/statistic.conf index f5359f2b0..2b64b7e84 100644 --- a/conf/statistic.conf +++ b/conf/statistic.conf @@ -68,6 +68,9 @@ return function(task, is_spam, is_unlearn) return true end EOD + + .include(try=true; priority=1) "$LOCAL_CONFDIR/local.d/classifier-bayes.conf" + .include(try=true; priority=10) "$LOCAL_CONFDIR/override.d/classifier-bayes.conf" } .include(try=true; priority=1) "$LOCAL_CONFDIR/local.d/statistic.conf" diff --git a/config.h.in b/config.h.in index f7bdedbfb..30b07797a 100644 --- a/config.h.in +++ b/config.h.in @@ -78,6 +78,7 @@ #cmakedefine HAVE_SETSIG 1 #cmakedefine HAVE_SIGINFO_H 1 #cmakedefine HAVE_SOCK_SEQPACKET 1 +#cmakedefine HAVE_SSL_TLSEXT_HOSTNAME 1 #cmakedefine HAVE_STDBOOL_H 1 #cmakedefine HAVE_STDINT_H 1 #cmakedefine HAVE_STDIO_H 1 diff --git a/contrib/mumhash/mum.h b/contrib/mumhash/mum.h index 161c5390e..ae6eec16c 100644 --- a/contrib/mumhash/mum.h +++ b/contrib/mumhash/mum.h @@ -56,21 +56,6 @@ typedef unsigned __int64 uint64_t; #include <stdint.h> #endif -#ifdef __GNUC__ -#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused)) -#ifndef __clang__ -#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts))) -#define _MUM_TARGET(opts) __attribute__((__target__ (opts))) -#else -#define _MUM_OPTIMIZE(opts) -#define _MUM_TARGET(opts) -#endif -#else -#define _MUM_ATTRIBUTE_UNUSED -#define _MUM_OPTIMIZE(opts) -#define _MUM_TARGET(opts) -#endif - /* Macro saying to use 128-bit integers implemented by GCC for some targets. */ #ifndef _MUM_USE_INT128 @@ -84,6 +69,20 @@ typedef unsigned __int64 uint64_t; #endif #endif +#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4)) +#define _MUM_FRESH_GCC +#endif + +#if defined(__GNUC__) && !defined(__llvm__) +#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused)) +#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts))) +#define _MUM_TARGET(opts) __attribute__((__target__ (opts))) +#else +#define _MUM_ATTRIBUTE_UNUSED +#define _MUM_OPTIMIZE(opts) +#define _MUM_TARGET(opts) +#endif + /* Here are different primes randomly generated with the equal probability of their bit values. They are used to randomize input @@ -236,7 +235,7 @@ _mum_hash_aligned (uint64_t start, const void *key, size_t len) { result = _mum (result, _mum_unroll_prime); } n = len / sizeof (uint64_t); - for (i = 0; i < n; i++) + for (i = 0; i < (int)n; i++) result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]); len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t); switch (len) { @@ -282,7 +281,7 @@ _mum_final (uint64_t h) { return h; } -#if defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 9) && !defined(__clang__) +#if defined(__x86_64__) && defined(_MUM_FRESH_GCC) /* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where it is possible. Although on modern Intel processors MULQ takes @@ -366,7 +365,7 @@ mum_hash_randomize (uint64_t seed) { _mum_block_start_prime = _mum_next_factor (); _mum_unroll_prime = _mum_next_factor (); _mum_tail_prime = _mum_next_factor (); - for (i = 0; i < sizeof (_mum_primes) / sizeof (uint64_t); i++) + for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++) _mum_primes[i] = _mum_next_factor (); } @@ -400,7 +399,7 @@ mum_hash64 (uint64_t key, uint64_t seed) { target endianess and the unroll factor. */ static inline uint64_t mum_hash (const void *key, size_t len, uint64_t seed) { -#if defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 9) && !defined(__clang__) +#if defined(__x86_64__) && defined(_MUM_FRESH_GCC) static int avx2_support = 0; if (avx2_support > 0) diff --git a/doc/markdown/architecture/index.md b/doc/markdown/architecture/index.md index 45e52fa8e..f93ce9818 100644 --- a/doc/markdown/architecture/index.md +++ b/doc/markdown/architecture/index.md @@ -1,28 +1,20 @@ -# Rspamd architecture +# rspamd architecture ## Introduction -Rspamd is a universal spam filtering system based on event-driven processing -model. It means that rspamd is intended not to block anywhere in the code. To -process messages rspamd uses a set of so called `rules`. Each `rule` is a symbolic -name associated with some message property. For example, we can define the following -rules: +rspamd is a universal spam filtering system based on an event-driven processing model, which means that rspamd is not intended to block anywhere in the code. To process messages rspamd uses a set of `rules`. Each `rule` is a symbolic name associated with a message property. For example, we can define the following rules: - `SPF_ALLOW` - means that a message is validated by SPF; - `BAYES_SPAM` - means that a message is statistically considered as spam; -- `FORGED_OUTLOOK_MID` - message ID seems to be forged for Outlook MUA. +- `FORGED_OUTLOOK_MID` - message ID seems to be forged for the Outlook MUA. -Rules are defined by [modules](../modules/). So far, if there is a module that -performs SPF checks it may define several rules according to SPF policy: +Rules are defined by [modules](../modules/). If there is a module, for example, that performs SPF checks it may define several rules according to SPF policy: - `SPF_ALLOW` - a sender is allowed to send messages for this domain; - `SPF_DENY` - a sender is denied by SPF policy; - `SPF_SOFTFAIL` - there is no affinity defined by SPF policy. -Rspamd supports two main types of modules: internal written in C and external -written in Lua. There is no real difference between these two types with the exception -that C modules are embeded all the time and can be enabled in `filters` attribute -in the `options` section of the config: +rspamd supports two main types of modules: internal modules written in C and external modules written in lua. There is no real difference between the two types with the exception that C modules are embedded and can be enabled in a `filters` attribute in the `options` section of the config: ~~~ucl options { @@ -33,29 +25,21 @@ options { ## Protocol -Rspamd uses HTTP protocol for all operations. This protocol is described in the [protocol section](protocol.md). +rspamd uses the HTTP protocol for all operations. This protocol is described in the [protocol section](protocol.md). ## Metrics -Rules in rspamd, defines merely a logic of checks, however it is required to -set up weights for each rule. Weight means `significance` in terms of rspamd. So -far, rules with greater absolute value of weight are considered as more important -than the recent rules. The weight of rules is defined in `metrics`. Each metric -is a set of grouped rules with specific weights. For example, we may define the -following weights for our SPF rules: +Rules in rspamd define a logic of checks, but it is required to set up weights for each rule. (For rspamd, weight means `significance`.) Rules with a greater absolute value of weight are considered more important. The weight of rules is defined in `metrics`. Each metric is a set of grouped rules with specific weights. For example, we may define the following weights for our SPF rules: - `SPF_ALLOW`: -1 - `SPF_DENY`: 2 - `SPF_SOFTFAIL`: 0.5 -Positive weights means that this rule turns message to more spammy, while negative -means the opposite. +Positive weights mean that this rule increases a messages 'spammyness', while negative weights mean the opposite. ### Rules scheduler -To avoid unnecessary checks rspamd uses scheduler of rules for each message. So far, -if a message is considered as `definite spam` then further checks are not performed. -This scheduler is rather naive and it performs the following logic: +To avoid unnecessary checks rspamd uses a scheduler of rules for each message. If a message is considered as definite spam then further checks are not performed. This scheduler is rather naive and it performs the following logic: - select negative rules *before* positive ones to prevent false positives; - prefer rules with the following characteristics: @@ -65,77 +49,39 @@ This scheduler is rather naive and it performs the following logic: These optimizations can filter definite spam more quickly than a generic queue. -Since rspamd-0.9 there are more optimizations for rules and expressions that are -roughly described in the [following presentation](http://highsecure.ru/ast-rspamd.pdf). +Since rspamd-0.9 there are further optimizations for rules and expressions that are described generally in the [following presentation](http://highsecure.ru/ast-rspamd.pdf). ## Actions -Another important property of metrics is their actions set. This set defines recommended -actions for a message if it reach a certain score defined by all rules triggered. -Rspamd defines the following actions: +Another important property of metrics is their actions set. This set defines recommended actions for a message if it reaches a certain score defined by all rules which have been triggered. rspamd defines the following actions: -- `No action`: a message is likely ham; -- `Greylist`: greylist message is it is not certainly ham; +- `No action`: a message is likely to be ham; +- `Greylist`: greylist a message if it is not certainly ham; - `Add header`: a message is likely spam, so add a specific header; - `Rewrite subject`: a message is likely spam, so rewrite its subject; - `Reject`: a message is very likely spam, so reject it completely -These actions are just recommendations for MTA and are not to be strictly followed. -For all actions that are greater or equal than `greylist` it is recommended to -perform explicit greylisting. `Add header` and `rewrite subject` actions are very -close in semantics and are both considered as `probable spam`. `Reject` is a -strong rule that usually means that a message should be really rejected by MTA. -The triggering score for these actions should be specified according to their logic -priorities. If two actions have the same weight, the result is unspecified. +These actions are just recommendations for the MTA and are not to be strictly followed. For all actions that are greater or equal than `greylist` it is recommended to perform explicit greylisting. `Add header` and `rewrite subject` actions are very close in semantics and are both considered as probable spam. `Reject` is a strong rule which usually means that a message should be really rejected by the MTA. The triggering score for these actions should be specified according to their logic priorities. If two actions have the same weight, the result is unspecified. ## Rules weight -The weights of rules is not necessarily constant. For example, for statistics rules -we have no certain confidence if a message is spam or not. We have some probability -instead. To allow fuzzy rules weight, rspamd supports `dynamic weights`. Generally, -it means that a rule may add a dynamic range from 0 to a defined weight in the metric. -So far if we define symbol `BAYES_SPAM` with weight 5.0, then this rule can add -a resulting symbol with weight from 0 to 5.0. To distribute values in the proper -way, rspamd usually uses some sort of Sigma function to provide fair distribution curve. -Nevertheless, the most of rspamd rules uses static weights with the exception of -fuzzy rules. - -## Statistic - -Rspamd uses statistic algorithms to precise the final score of a message. Currently, -the only algorithm defined is OSB-Bayes. You may find the concrete details of this -algorithm in the following [paper](http://osbf-lua.luaforge.net/papers/osbf-eddc.pdf). -Rspamd uses window size of 5 words in its classification. During classification procedure, -rspamd split a message to a set of tokens. - -Tokens are separated by punctiation or space characters. Short tokens (less than 3 symbols) are ignored. For each token rspamd -calculates two non-cryptographic hashes used subsequently as indices. All these tokens -are stored in memory-mapped files called `statistic files` (or `statfiles`). Each statfile -is a set of token chains, indexed by the first hash. A new token may be inserted to some -chain, and if this chain is full then rspamd tries to expire less significant tokens to -insert a new one. It is possible to obtain the current state of tokens by running - - rspamc stat - -command that asks controller for free and used tokens in each statfile. -Please note that if a statfile is close to be completely filled then during subsequent -learning you will loose existing data. Therefore, it is recommended to increase size for -such statfiles. +The weight of rules is not necessarily constant. For example, for statistics rules we have no certain confidence if a message is spam or not; instead we have a measure of probability. To allow fuzzy rules weight, rspamd supports `dynamic weights`. Generally, it means that a rule may add a dynamic range from 0 to a defined weight in the metric. So if we define the symbol `BAYES_SPAM` with a weight of 5.0, then this rule can add a resulting symbol with a weight from 0 to 5.0. To distribute values, rspamd uses a form of Sigma function to provide a fair distribution curve. The majority of rspamd rules, with the exception of fuzzy rules, use static weights. -## Running rspamd +## Statistics -There are several command-line options that can be passed to rspamd. All of them can be displayed by passing `--help` argument: +rspamd uses statistic algorithms to precisely calculate the final score of a message. Currently, the only algorithm defined is OSB-Bayes. You can find details of this algorithm in the following [paper](http://osbf-lua.luaforge.net/papers/osbf-eddc.pdf). rspamd uses a window size of 5 words in its classification. During the classification procedure, rspamd splits a message into a set of tokens. Tokens are separated by punctuation or whitespace characters. Short tokens (less than 3 symbols) are ignored. For each token, rspamd calculates two non-cryptographic hashes used subsequently as indices. All these tokens are stored in different statistics backends (mmapped files, sqlite3 database or redis server). Currently, the recommended backend for statistics is `redis`. -All options are optional: by default rspamd would try to read `etc/rspamd.conf` config file and run as daemon. Also there is test mode that can be turned on by passing `-t` argument. In test mode, rspamd reads config file and checks its syntax. If a configuration file is OK, then exit code is zero. Test mode is useful for testing new config file withou rspamd restart. `--convert-config` option can be used to convert old style (pre 0.6.0) config to [ucl](../configuration/ucl.md) one: +## Running rspamd - $ rspamd -c ./rspamd.xml --convert-conf ./rspamd.conf +There are several command-line options that can be passed to rspamd. All of them can be displayed by passing the `--help` argument. +All options are optional: by default rspamd will try to read the `etc/rspamd.conf` config file and run as a daemon. Also there is a test mode that can be turned on by passing the `-t` argument. In test mode, rspamd reads the config file and checks its syntax. If a configuration file is OK, the exit code is zero. Test mode is useful for testing new config files without restarting rspamd. ## Managing rspamd using signals -First of all, it is important to note that all user's signals should be sent to rspamd main process and not to its children (as for child processes these signals can have other meanings). To determine which process is main you can use two ways: +It is important to note that all user signals should be sent to the rspamd main process and not to its children (as for child processes these signals can have other meanings). You can identify the main process: -- by reading pidfile: +- by reading the pidfile: $ cat pidfile @@ -151,10 +97,10 @@ First of all, it is important to note that all user's signals should be sent to $ ps auxwww | grep rspamd | grep main nobody 28378 0.0 0.2 49744 9424 rspamd: main process -After getting the pid of main process it is possible to manage rspamd with signals: +After getting the pid of the main process it is possible to manage rspamd with signals, as follows: -- `SIGHUP` - restart rspamd: reread config file, start new workers (as well as controller and other processes), stop accepting connections by old workers, reopen all log files. Note that old workers would be terminated after one minute that should allow to process all pending requests. All new requests to rspamd will be processed by newly started workers. -- `SIGTERM` - terminate rspamd system. -- `SIGUSR1` - reopen log files (useful for log files rotation). +- `SIGHUP` - restart rspamd: reread config file, start new workers (as well as controller and other processes), stop accepting connections by old workers, reopen all log files. Note that old workers would be terminated after one minute which should allow processing of all pending requests. All new requests to rspamd will be processed by the newly started workers. +- `SIGTERM` - terminate rspamd. +- `SIGUSR1` - reopen log files (useful for log file rotation). -These signals may be used in start scripts as it is done in `FreeBSD` start script. Restarting of rspamd is performed softly: no connections are dropped and if a new config is incorrect then the old config is used. +These signals may be used in rc-style scripts. Restarting of rspamd is performed softly: no connections are dropped and if a new config is incorrect then the old config is used. diff --git a/doc/markdown/architecture/protocol.md b/doc/markdown/architecture/protocol.md index 51ac8e9c6..56e1da1d4 100644 --- a/doc/markdown/architecture/protocol.md +++ b/doc/markdown/architecture/protocol.md @@ -1,15 +1,12 @@ -# Rspamd protocol +# rspamd protocol ## Protocol basics -Rspamd uses HTTP protocol of either version 1.0 or 1.1. However, there is compatibility layer described further in this document. -Rspamd defines some servicing headers that allows to pass extra information about a message scanned, such as envelope data, IP address, -SMTP sasl authentication data and so on. Rspamd supports both normal and chunked encoded HTTP request, however, form URL encoding is **NOT** supported currently. +rspamd uses the HTTP protocol, either version 1.0 or 1.1. (There is also a compatibility layer described further in this document.) rspamd defines some headers which allow the passing of extra information about a scanned message, such as envelope data, IP address or SMTP sasl authentication data, etc. rspamd supports normal and chunked encoded HTTP requests. -## Rspamd HTTP request +## rspamd HTTP request -Rspamd encourages usage of HTTP protocol since it is standard and can be used by literally every programming language without exotic libraries. -The typical HTTP request looks like the following: +rspamd encourages the use of the HTTP protocol since it is standard and can be used by every programming language without the use of exotic libraries. A typical HTTP request looks like the following: POST /check HTTP/1.0 Content-Length: 26969 @@ -21,32 +18,31 @@ The typical HTTP request looks like the following: <your message goes here> -You can also use chunked encoding that allows streamlined data transfer which is useful if you don't know the length of the message. +You can also use chunked encoding that allows streamlined data transfer which is useful if you don't know the length of a message. ### HTTP request -Normally, you should just use '/check' here. However, if you talk to the controller then you might want to use controllers commands here. +Normally, you should just use '/check' here. However, if you want to communicate with the controller then you might want to use controllers commands. (TODO: write this part) ### HTTP headers -To avoid unnecessary work, rspamd allows MTA to pass pre-processed data about the message by using either HTTP headers or JSON control block (described further in this document). -Rspamd supports the following non-standard HTTP headers: +To avoid unnecessary work, rspamd allows an MTA to pass pre-processed data about the message by using either HTTP headers or a JSON control block (described further in this document). rspamd supports the following non-standard HTTP headers: | Header | Description | -| :-------------- | :-------------------------------- | -| **Deliver-To:** | Defines actual delivery recipient of message. Can be used for personalized statistic and for user specific options.| -| **IP:** | Defines IP from which this message is received. | -| **Helo:** | Defines SMTP helo. | -| **Hostname:** | Defines resolved hostname. | -| **From:** | Defines SMTP mail from command data. | -| **Queue-Id:** | Defines SMTP queue id for message (can be used instead of message id in logging). | -| **Rcpt:** | Defines SMTP recipient (it may be several `Rcpt` headers). | -| **Pass:** | If this header has `all` value, all filters would be checked for this message. | -| **Subject:** | Defines subject of message (is used for non-mime messages). | +| :-------------- | :-------------------------------- | +| **Deliver-To:** | Defines actual delivery recipient of message. Can be used for personalized statistics and for user specific options. | +| **IP:** | Defines IP from which this message is received. | +| **Helo:** | Defines SMTP helo | +| **Hostname:** | Defines resolved hostname | +| **From:** | Defines SMTP mail from command data | +| **Queue-Id:** | Defines SMTP queue id for message (can be used instead of message id in logging). | +| **Rcpt:** | Defines SMTP recipient (there may be several `Rcpt` headers) | +| **Pass:** | If this header has `all` value, all filters would be checked for this message. | +| **Subject:** | Defines subject of message (is used for non-mime messages). | | **User:** | Defines SMTP user. | -| **Message-Length:** | Defines the length of message excluding the control block. | +| **Message-Length:** | Defines the length of message excluding the control block. | Controller also defines certain headers: @@ -54,9 +50,9 @@ Controller also defines certain headers: Standard HTTP headers, such as `Content-Length`, are also supported. -## Rspamd HTTP reply +## rspamd HTTP reply -Rspamd reply is encoded using `json` format. Here is a typical HTTP reply: +rspamd reply is encoded in `JSON`. Here is a typical HTTP reply: HTTP/1.1 200 OK Connection: close @@ -111,7 +107,7 @@ Rspamd reply is encoded using `json` format. Here is a typical HTTP reply: For convenience, the reply is LINTed using [jsonlint](http://jsonlint.com). The actual reply is compressed for speed. -The reply can be treated as the JSON object where keys are metric names (namely `default`) and values are objects that represent metric. +The reply can be treated as a JSON object where keys are metric names (namely `default`) and values are objects that represent metrics. Each metric has the following fields: @@ -124,27 +120,26 @@ Each metric has the following fields: - `greylist` - message should be greylisted; - `add header` - message is suspicious and should be marked as spam - `rewrite subject` - message is suspicious and should have subject rewritten - - `soft reject` - message should be temporary rejected at the moment (for example, due to rate limit exhausting) + - `soft reject` - message should be temporary rejected (for example, due to rate limit exhausting) - `reject` - message should be rejected as spam -Additionally, metric contains all symbols added during message's processing indexed by symbols' names. +Additionally, metric contains all symbols added during a message's processing, indexed by symbol names. -Moreover, some other keys might be in the reply: +Additional keys which may be in the reply include: -* `subject` - if action is `rewrite subject` then this value defines the desired subject for a message +* `subject` - if action is `rewrite subject` this value defines the desired subject for a message * `urls` - a list of urls found in a message (only hostnames) * `emails` - a list of emails found in a message * `message-id` - ID of message (useful for logging) -* `messages` - array of optional messages added by some rspamd filters (such as `SPF`) +* `messages` - array of optional messages added by rspamd filters (such as `SPF`) -## Rspamd JSON control block +## rspamd JSON control block -Since rspamd 0.9 it is also possible to pass additional data by using request body prepending JSON control block to the message. Hence, you can use either headers or JSON block to pass data from MTA to rspamd. -The advantage of JSON block is that it can be encrypted using `httpcrypt`. Headers encryption is currently unsupported. +Since rspamd version 0.9 it is also possible to pass additional data by prepending a JSON control block to a message. So you can use either headers or a JSON block to pass data from the MTA to rspamd. -To use JSON control block, you need to pass extra header to rspamd called `Message-Length`. This header should be equal to the size of the message **excluding** JSON control block. Therefore, the size of control block is equal to `Content-Length` - `Message-Length`. Rspamd assumes that a message starts immediately after control block (with no extra CRLF). This method is equally compatible with streaming transfer, however even if not specifying `Content-Length` you are still required to specify `Message-Length`. +To use a JSON control block, you need to pass an extra header called `Message-Length` to rspamd. This header should be equal to the size of the message **excluding** the JSON control block. Therefore, the size of the control block is equal to `Content-Length - Message-Length`. rspamd assumes that a message starts immediately after the control block (with no extra CRLF). This method is equally compatible with streaming transfer, however even if you are not specifying `Content-Length` you are still required to specify `Message-Length`. -Here is an example of JSON control block: +Here is an example of a JSON control block: ~~~json { @@ -156,43 +151,4 @@ Here is an example of JSON control block: } ~~~ -Moreover, [UCL](https://github.com/vstakhov/libucl) json extensions and syntax conventions are also supported inside control block. - -## Legacy RSPAMC protocol - -For compatibility, rspamd also supports legacy `RSPAMC` and also spamassassin `SPAMC` protocols. Thought their usage is discouraged, these protocols could be still used as last resort to communicate with rspamd from legacy applications. -The rspamc dialog looks as following: - - SYMBOLS RSPAMC/1.1 - Content-Length: 2200 - - <message octets> - - RSPAMD/1.1 0 OK - Metric: default; True; 10.40 / 10.00 / 0.00 - Symbol: R_UNDISC_RCPT - Symbol: ONCE_RECEIVED - Symbol: R_MISSING_CHARSET - Urls: - -Rspamc protocol support different commands as well: - -| Command | Description | -| :-------| :----- | -| CHECK | Check a message and output results for each metric. But do not output symbols. | -| SYMBOLS | Same as `CHECK` but output symbols. | -| PROCESS | Same as `SYMBOLS` but output also original message with inserted X-Spam headers. | -| PING | Do not do any processing, just check rspamd state: | - - -After command there should be one mandatory header: `Content-Length` that defines message's length in bytes and optional headers (same as for HTTP). - -Rspamd supports spamassassin `spamc` protocol and you can even pass rspamc headers in spamc mode, but reply of rspamd in `spamc` mode is truncated to "default" metric only with no options for symbols being displayed. Rspamc reply looks as following: - - RSPAMD/1.1 0 OK - Metric: default; True; 10.40 / 10.00 / 0.00 - Symbol: R_UNDISC_RCPT - Symbol: ONCE_RECEIVED - Symbol: R_MISSING_CHARSET - Urls: - +Moreover, [UCL](https://github.com/vstakhov/libucl) json extensions and syntax conventions are also supported inside the control block.
\ No newline at end of file diff --git a/doc/markdown/modules/regexp.md b/doc/markdown/modules/regexp.md index f08079bff..01d7a0635 100644 --- a/doc/markdown/modules/regexp.md +++ b/doc/markdown/modules/regexp.md @@ -60,12 +60,24 @@ The match type is defined by special flags after the last `/` symbol: * `B` - MIME header regexp (applied for headers in MIME parts only) * `R` - full headers content (applied for all headers undecoded and for the message only - **not** including MIME headers) * `M` - raw message regexp -* `P` - part regexp +* `P` - part regexp without HTML tags +* `Q` - part regexp with HTML tags +* `C` - spamassassin `BODY` regexp analogue(see http://spamassassin.apache.org/full/3.4.x/doc/Mail_SpamAssassin_Conf.txt) +* `D` - spamassassin `RAWBODY` regexp analogue * `U` - URL regexp +From 1.3, it is also possible to specify long regexp types for convenience in curly braces: -We strongly discourage from using of raw message regexps as they are expensive and -should be replaced by [trie](trie.md) rules if possible. +* `{header}` - header regexp +* `{raw_header}` - undecoded header regexp (e.g. without quoted-printable decoding) +* `{mime_header}` - MIME header regexp (applied for headers in MIME parts only) +* `{all_header}` - full headers content (applied for all headers undecoded and for the message only - **not** including MIME headers) +* `{body}` - raw message regexp +* `{mime}` - part regexp without HTML tags +* `{raw_mime}` - part regexp with HTML tags +* `{sa_body}` - spamassassin `BODY` regexp analogue(see http://spamassassin.apache.org/full/3.4.x/doc/Mail_SpamAssassin_Conf.txt) +* `{sa_raw_body}` - spamassassin `RAWBODY` regexp analogue +* `{url}` - URL regexp Each regexp also supports the following flags: @@ -122,7 +134,7 @@ Here is an example of table form definition of regexp rule: ~~~lua config['regexp']['RE_TEST'] = { - re = '/test/P', + re = '/test/i{mime}', score = 10.0, condition = function(task) if task:get_header('Subject') then diff --git a/doc/rspamc.1 b/doc/rspamc.1 index b29bbb20d..331bdada8 100644 --- a/doc/rspamc.1 +++ b/doc/rspamc.1 @@ -100,22 +100,22 @@ Emulate that message was received from specified ip address .RE .TP .B \-u \f[I]username\f[], \-\-user=\f[I]username\f[] -Emulate that message was from specified user +Emulate that message was received from specified authenticated user .RS .RE .TP .B \-d \f[I]user\@domain\f[], \-\-deliver=\f[I]user\@domain\f[] -Emulate that message is delivered to specified user +Emulate that message is delivered to specified user (for LDA/statistics) .RS .RE .TP .B \-F \f[I]user\@domain\f[], \-\-from=\f[I]user\@domain\f[] -Emulate that message is from specified user +Emulate that message has specified SMTP FROM address .RS .RE .TP .B \-r \f[I]user\@domain\f[], \-\-rcpt=\f[I]user\@domain\f[] -Emulate that message is for specified user +Emulate that message has specified SMTP RCPT address .RS .RE .TP diff --git a/doc/rspamc.1.md b/doc/rspamc.1.md index 8e76d01b2..fb0fa58c8 100644 --- a/doc/rspamc.1.md +++ b/doc/rspamc.1.md @@ -64,16 +64,16 @@ requires input. : Emulate that message was received from specified ip address -u *username*, \--user=*username* -: Emulate that message was from specified user +: Emulate that message was received from specified authenticated user -d *user@domain*, \--deliver=*user@domain* -: Emulate that message is delivered to specified user +: Emulate that message is delivered to specified user (for LDA/statistics) -F *user@domain*, \--from=*user@domain* -: Emulate that message is from specified user +: Emulate that message has specified SMTP FROM address -r *user@domain*, \--rcpt=*user@domain* -: Emulate that message is for specified user +: Emulate that message has specified SMTP RCPT address \--helo=*helo_string* : Imitate SMTP HELO passing from MTA @@ -167,4 +167,4 @@ Add custom action's weight: Rspamd documentation and source codes may be downloaded from <https://rspamd.com/>. -[rspamd-workers]: https://rspamd.com/doc/workers/
\ No newline at end of file +[rspamd-workers]: https://rspamd.com/doc/workers/ diff --git a/rules/forwarding.lua b/rules/forwarding.lua index 6ee0b9a97..c5c8912af 100644 --- a/rules/forwarding.lua +++ b/rules/forwarding.lua @@ -81,24 +81,27 @@ rspamd_config.FORWARDED = { local matches = 0 -- Retrieve and loop through all Received headers local rcvds = task:get_header_full('Received') - for _, rcvd in ipairs(rcvds) do + + if rcvds then + for _, rcvd in ipairs(rcvds) do local _,_,addr = rcvd['decoded']:lower():find("%sfor%s<(.-)>") if addr then - matches = matches + 1 - -- Check that it doesn't match the envrcpt - -- TODO: remove any plus addressing? - if addr ~= envrcpts[1].addr:lower() then - -- Check for mailing-lists as they will have the same signature - if matches < 2 and lu and to and to[1].addr:lower() == addr then - return false - else - return true, addr - end + matches = matches + 1 + -- Check that it doesn't match the envrcpt + -- TODO: remove any plus addressing? + if addr ~= envrcpts[1].addr:lower() then + -- Check for mailing-lists as they will have the same signature + if matches < 2 and lu and to and to[1].addr:lower() == addr then + return false + else + return true, addr end - -- Prevent any other iterations as we only want - -- process the first matching Received header - return false + end + -- Prevent any other iterations as we only want + -- process the first matching Received header + return false end + end end return false end, diff --git a/rules/http_headers.lua b/rules/http_headers.lua index f8d7f2be6..0252ccce9 100644 --- a/rules/http_headers.lua +++ b/rules/http_headers.lua @@ -69,7 +69,7 @@ rspamd_config:add_condition("R_SPF_ALLOW", function(task) elseif obj['result'] == 'neutral' then task:insert_result('R_SPF_NEUTRAL', 1.0, 'http header') elseif obj['result'] == 'tempfail' or obj['result'] == 'softfail' then - task:insert_result('R_SPF_TEMPFAIL', 1.0, 'http header') + task:insert_result('R_SPF_SOFTFAIL', 1.0, 'http header') end return false diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0c1e31dbe..55e76fcfa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -101,21 +101,44 @@ ENDIF() AddModules(MODULES_LIST WORKERS_LIST) LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM) + +SET(RAGEL_DEPENDS "${CMAKE_SOURCE_DIR}/src/ragel/smtp_address.rl" + "${CMAKE_SOURCE_DIR}/src/ragel/smtp_date.rl" + "${CMAKE_SOURCE_DIR}/src/ragel/smtp_ip.rl" + "${CMAKE_SOURCE_DIR}/src/ragel/smtp_whitespace.rl" + "${CMAKE_SOURCE_DIR}/src/ragel/smtp_received.rl") +RAGEL_TARGET(ragel_smtp_addr + INPUTS ragel/smtp_addr_parser.rl + DEPENDS ${RAGEL_DEPENDS} + COMPILE_FLAGS -T1 + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_addr_parser.rl.c) +RAGEL_TARGET(ragel_smtp_received + INPUTS ragel/smtp_received_parser.rl + DEPENDS ${RAGEL_DEPENDS} + COMPILE_FLAGS -T1 + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/smtp_received_parser.rl.c) ######################### LINK SECTION ############################### -ADD_LIBRARY(rspamd-server STATIC ${RSPAMD_CRYPTOBOX} ${RSPAMD_UTIL} ${RSPAMD_LUA} ${RSPAMD_SERVER} - ${RSPAMD_STAT} ${RSPAMD_MIME} - ${CMAKE_CURRENT_BINARY_DIR}/modules.c ${PLUGINSSRC}) +ADD_LIBRARY(rspamd-server STATIC + ${RSPAMD_CRYPTOBOX} + ${RSPAMD_UTIL} + ${RSPAMD_LUA} + ${RSPAMD_SERVER} + ${RSPAMD_STAT} + ${RSPAMD_MIME} + ${CMAKE_CURRENT_BINARY_DIR}/modules.c + ${PLUGINSSRC} + "${RAGEL_ragel_smtp_addr_OUTPUTS}" + "${RAGEL_ragel_smtp_received_OUTPUTS}") TARGET_LINK_LIBRARIES(rspamd-server rspamd-http-parser) TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) TARGET_LINK_LIBRARIES(rspamd-server lcbtrie) -ADD_DEPENDENCIES(rspamd-server rspamd_lua_preprocess) - IF (ENABLE_CLANG_PLUGIN MATCHES "ON") ADD_DEPENDENCIES(rspamd-server rspamd-clang) ENDIF() +ADD_DEPENDENCIES(rspamd-server rspamd_lua_preprocess) ADD_EXECUTABLE(rspamd ${RSPAMDSRC} ${CMAKE_CURRENT_BINARY_DIR}/workers.c) SET_TARGET_PROPERTIES(rspamd PROPERTIES LINKER_LANGUAGE C) diff --git a/src/client/rspamc.c b/src/client/rspamc.c index 5280914b9..56cbeec16 100644 --- a/src/client/rspamc.c +++ b/src/client/rspamc.c @@ -14,8 +14,9 @@ * limitations under the License. */ #include "config.h" -#include "util.h" -#include "http.h" +#include "libutil/util.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "rspamdclient.h" #include "utlist.h" #include "unix-std.h" @@ -67,12 +68,17 @@ static GList *children; g_queue_push_tail ((o), nh); \ } while (0) +static gboolean rspamc_password_callback (const gchar *option_name, + const gchar *value, + gpointer data, + GError **error); + static GOptionEntry entries[] = { { "connect", 'h', 0, G_OPTION_ARG_STRING, &connect_str, "Specify host and port", NULL }, - { "password", 'P', 0, G_OPTION_ARG_STRING, &password, - "Specify control password", NULL }, + { "password", 'P', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, + &rspamc_password_callback, "Specify control password", NULL }, { "classifier", 'c', 0, G_OPTION_ARG_STRING, &classifier, "Classifier to learn spam or ham", NULL }, { "weight", 'w', 0, G_OPTION_ARG_INT, &weight, @@ -87,13 +93,13 @@ static GOptionEntry entries[] = "Emulate that message was received from specified ip address", NULL }, { "user", 'u', 0, G_OPTION_ARG_STRING, &user, - "Emulate that message was from specified user", NULL }, + "Emulate that message was received from specified authenticated user", NULL }, { "deliver", 'd', 0, G_OPTION_ARG_STRING, &deliver_to, - "Emulate that message is delivered to specified user", NULL }, + "Emulate that message is delivered to specified user (for LDA/statistics)", NULL }, { "from", 'F', 0, G_OPTION_ARG_STRING, &from, - "Emulate that message is from specified user", NULL }, + "Emulate that message has specified SMTP FROM address", NULL }, { "rcpt", 'r', 0, G_OPTION_ARG_STRING_ARRAY, &rcpts, - "Emulate that message is for specified user", NULL }, + "Emulate that message has specified SMTP RCPT address", NULL }, { "helo", 0, 0, G_OPTION_ARG_STRING, &helo, "Imitate SMTP HELO passing from MTA", NULL }, { "hostname", 0, 0, G_OPTION_ARG_STRING, &hostname, @@ -292,6 +298,31 @@ struct rspamc_callback_data { gdouble start; }; +gboolean +rspamc_password_callback (const gchar *option_name, + const gchar *value, + gpointer data, + GError **error) +{ + guint plen = 8192; + + if (value != NULL) { + password = g_strdup (value); + } + else { + /* Read password from console */ + password = g_malloc0 (plen); + plen = rspamd_read_passphrase (password, plen, 0, NULL); + } + + if (plen == 0) { + rspamd_fprintf (stderr, "Invalid password\n"); + exit (EXIT_FAILURE); + } + + return TRUE; +} + /* * Parse command line */ @@ -920,12 +951,12 @@ rspamc_stat_output (FILE *out, ucl_object_t *obj) static void rspamc_output_headers (FILE *out, struct rspamd_http_message *msg) { - struct rspamd_http_header *h; + struct rspamd_http_header *h, *htmp; - LL_FOREACH (msg->headers, h) - { + HASH_ITER (hh, msg->headers, h, htmp) { rspamd_fprintf (out, "%T: %T\n", h->name, h->value); } + rspamd_fprintf (out, "\n"); } @@ -1193,6 +1224,8 @@ rspamc_client_cb (struct rspamd_client_connection *conn, struct rspamc_command *cmd; FILE *out = stdout; gdouble finish = rspamd_get_ticks (), diff; + const gchar *body; + gsize body_len; cmd = cbdata->cmd; diff = finish - cbdata->start; @@ -1208,12 +1241,16 @@ rspamc_client_cb (struct rspamd_client_connection *conn, } else { if (cmd->need_input) { - rspamd_fprintf (out, "Results for file: %s (%.3f seconds)\n", - cbdata->filename, diff); + if (!compact) { + rspamd_fprintf (out, "Results for file: %s (%.3f seconds)\n", + cbdata->filename, diff); + } } else { - rspamd_fprintf (out, "Results for command: %s (%.3f seconds)\n", - cmd->name, diff); + if (!compact) { + rspamd_fprintf (out, "Results for command: %s (%.3f seconds)\n", + cmd->name, diff); + } } if (result != NULL) { @@ -1241,9 +1278,13 @@ rspamc_client_cb (struct rspamd_client_connection *conn, else if (err != NULL) { rspamd_fprintf (out, "%s\n", err->message); - if (json && msg != NULL && msg->body != NULL) { - /* We can also output the resulting json */ - rspamd_fprintf (out, "%V\n", msg->body); + if (json && msg != NULL) { + body = rspamd_http_message_get_body (msg, &body_len); + + if (body) { + /* We can also output the resulting json */ + rspamd_fprintf (out, "%*s\n", (gint)body_len, body); + } } } } diff --git a/src/client/rspamdclient.c b/src/client/rspamdclient.c index d386664dc..753f64c74 100644 --- a/src/client/rspamdclient.c +++ b/src/client/rspamdclient.c @@ -14,8 +14,9 @@ * limitations under the License. */ #include "rspamdclient.h" -#include "util.h" -#include "http.h" +#include "libutil/util.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "unix-std.h" #ifdef HAVE_FETCH_H @@ -115,7 +116,7 @@ rspamd_client_finish_handler (struct rspamd_http_connection *conn, return 0; } else { - if (msg->body == NULL || msg->body_buf.len == 0 || msg->code != 200) { + if (rspamd_http_message_get_body (msg, NULL) == NULL || msg->code != 200) { err = g_error_new (RCLIENT_ERROR, msg->code, "HTTP error: %d, %.*s", msg->code, (gint)msg->status->len, msg->status->str); @@ -166,7 +167,8 @@ rspamd_client_init (struct event_base *ev_base, const gchar *name, rspamd_client_finish_handler, 0, RSPAMD_HTTP_CLIENT, - conn->keys_cache); + conn->keys_cache, + NULL); conn->server_name = g_string_new (name); if (port != 0) { @@ -205,6 +207,7 @@ rspamd_client_command (struct rspamd_client_connection *conn, gsize remain, old_len; GList *cur; GString *input = NULL; + rspamd_fstring_t *body; req = g_slice_alloc0 (sizeof (struct rspamd_client_request)); req->conn = conn; @@ -243,11 +246,11 @@ rspamd_client_command (struct rspamd_client_connection *conn, return FALSE; } - req->msg->body = rspamd_fstring_new_init (input->str, input->len); + body = rspamd_fstring_new_init (input->str, input->len); + rspamd_http_message_set_body_from_fstring_steal (req->msg, body); req->input = input; } else { - req->msg->body = NULL; req->input = NULL; } diff --git a/src/controller.c b/src/controller.c index 91f4cfed6..f2352e514 100644 --- a/src/controller.c +++ b/src/controller.c @@ -18,6 +18,7 @@ #include "libutil/rrd.h" #include "libutil/map.h" #include "libutil/map_private.h" +#include "libutil/http_private.h" #include "libstat/stat_api.h" #include "rspamd.h" #include "libserver/worker_util.h" @@ -871,7 +872,6 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, gboolean found = FALSE; struct rspamd_http_message *reply; - if (!rspamd_controller_check_password (conn_ent, session, msg, FALSE)) { return 0; } @@ -919,10 +919,8 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, reply = rspamd_http_new_message (HTTP_RESPONSE); reply->date = time (NULL); reply->code = 200; - reply->body = rspamd_fstring_sized_new (st.st_size); - /* Read the whole buffer */ - if (read (fd, reply->body->str, st.st_size) == -1) { + if (!rspamd_http_message_set_body_from_fd (reply, fd)) { close (fd); rspamd_http_message_free (reply); msg_err_session ("cannot read map %s: %s", bk->uri, strerror (errno)); @@ -930,8 +928,6 @@ rspamd_controller_handle_get_map (struct rspamd_http_connection_entry *conn_ent, return 0; } - reply->body->len = st.st_size; - close (fd); rspamd_http_connection_reset (conn_ent->conn); @@ -1420,7 +1416,7 @@ rspamd_controller_handle_learn_common ( return 0; } - if (msg->body == NULL || msg->body->len == 0) { + if (rspamd_http_message_get_body (msg, NULL) == NULL) { msg_err_session ("got zero length body, cannot continue"); rspamd_controller_send_error (conn_ent, 400, @@ -1521,7 +1517,7 @@ rspamd_controller_handle_scan (struct rspamd_http_connection_entry *conn_ent, return 0; } - if (msg->body == NULL || msg->body->len == 0) { + if (rspamd_http_message_get_body (msg, NULL) == NULL) { msg_err_session ("got zero length body, cannot continue"); rspamd_controller_send_error (conn_ent, 400, @@ -1595,7 +1591,7 @@ rspamd_controller_handle_saveactions ( return 0; } - if (msg->body == NULL || msg->body->len == 0) { + if (rspamd_http_message_get_body (msg, NULL) == NULL) { msg_err_session ("got zero length body, cannot continue"); rspamd_controller_send_error (conn_ent, 400, @@ -1714,7 +1710,7 @@ rspamd_controller_handle_savesymbols ( return 0; } - if (msg->body == NULL || msg->body->len == 0) { + if (rspamd_http_message_get_body (msg, NULL) == NULL) { msg_err_session ("got zero length body, cannot continue"); rspamd_controller_send_error (conn_ent, 400, @@ -1840,7 +1836,7 @@ rspamd_controller_handle_savemap (struct rspamd_http_connection_entry *conn_ent, return 0; } - if (msg->body == NULL || msg->body->len == 0) { + if (rspamd_http_message_get_body (msg, NULL) == NULL) { msg_err_session ("got zero length body, cannot continue"); rspamd_controller_send_error (conn_ent, 400, @@ -2162,7 +2158,7 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent, cmd->privilleged)) { return 0; } - if (cmd->require_message && (msg->body == NULL || msg->body->len == 0)) { + if (cmd->require_message && (rspamd_http_message_get_body (msg, NULL) == NULL)) { msg_err_session ("got zero length body, cannot continue"); rspamd_controller_send_error (conn_ent, 400, @@ -2216,7 +2212,7 @@ rspamd_controller_accept_socket (gint fd, short what, void *arg) ctx = worker->ctx; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn_ctx ("accept failed: %s", strerror (errno)); return; } diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index bd888fd3c..04498f84d 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -33,6 +33,7 @@ #include "ref.h" #include "xxhash.h" #include "libutil/hash.h" +#include "libutil/http_private.h" #include "unix-std.h" /* This number is used as expire time in seconds for cache items (2 days) */ @@ -260,6 +261,7 @@ fuzzy_mirror_updates_to_http (struct rspamd_fuzzy_storage_ctx *ctx, gsize len; guint32 rev; const gchar *p; + rspamd_fstring_t *reply; rev = rspamd_fuzzy_backend_version (ctx->backend, local_db_name); rev = GUINT32_TO_LE (rev); @@ -278,8 +280,8 @@ fuzzy_mirror_updates_to_http (struct rspamd_fuzzy_storage_ctx *ctx, } } - msg->body = rspamd_fstring_sized_new (len); - msg->body = rspamd_fstring_append (msg->body, (const char *)&rev, + reply = rspamd_fstring_sized_new (len); + reply = rspamd_fstring_append (reply, (const char *)&rev, sizeof (rev)); for (cur = ctx->updates_pending->head; cur != NULL; cur = g_list_next (cur)) { @@ -295,15 +297,14 @@ fuzzy_mirror_updates_to_http (struct rspamd_fuzzy_storage_ctx *ctx, } p = (const char *)io_cmd; - msg->body = rspamd_fstring_append (msg->body, (const char *)&len, - sizeof (len)); - msg->body = rspamd_fstring_append (msg->body, p, len); + reply = rspamd_fstring_append (reply, (const char *)&len, sizeof (len)); + reply = rspamd_fstring_append (reply, p, len); } /* Last chunk */ len = 0; - msg->body = rspamd_fstring_append (msg->body, (const char *)&len, - sizeof (len)); + reply = rspamd_fstring_append (reply, (const char *)&len, sizeof (len)); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); } static void @@ -362,13 +363,13 @@ rspamd_fuzzy_send_update_mirror (struct rspamd_fuzzy_storage_ctx *ctx, msg = rspamd_http_new_message (HTTP_REQUEST); rspamd_printf_fstring (&msg->url, "/update_v1/%s", m->name); - conn->http_conn = rspamd_http_connection_new ( - NULL, + conn->http_conn = rspamd_http_connection_new (NULL, fuzzy_mirror_error_handler, fuzzy_mirror_finish_handler, RSPAMD_HTTP_CLIENT_SIMPLE, RSPAMD_HTTP_CLIENT, - ctx->keypair_cache); + ctx->keypair_cache, + NULL); rspamd_http_connection_set_key (conn->http_conn, ctx->sync_keypair); @@ -936,7 +937,8 @@ rspamd_fuzzy_mirror_process_update (struct fuzzy_master_update_session *session, } state = read_len; GList *updates = NULL, *cur; - if (!msg->body || msg->body->len == 0 || !msg->url || msg->url->len == 0) { + if (!rspamd_http_message_get_body (msg, NULL) || !msg->url + || msg->url->len == 0) { msg_err ("empty update message, not processing"); return; @@ -963,8 +965,7 @@ rspamd_fuzzy_mirror_process_update (struct fuzzy_master_update_session *session, * <0> - end of data * ... - ignored */ - p = (const guchar *)msg->body->str; - remain = msg->body->len; + p = rspamd_http_message_get_body (msg, &remain); if (remain > sizeof (guint32) * 2) { memcpy (&revision, p, sizeof (guint32)); @@ -1161,7 +1162,7 @@ accept_fuzzy_mirror_socket (gint fd, short what, void *arg) struct fuzzy_master_update_session *session; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } @@ -1199,13 +1200,13 @@ accept_fuzzy_mirror_socket (gint fd, short what, void *arg) } session = g_slice_alloc0 (sizeof (*session)); - http_conn = rspamd_http_connection_new ( - NULL, + http_conn = rspamd_http_connection_new (NULL, rspamd_fuzzy_mirror_error_handler, rspamd_fuzzy_mirror_finish_handler, 0, RSPAMD_HTTP_SERVER, - ctx->keypair_cache); + ctx->keypair_cache, + NULL); rspamd_http_connection_set_key (http_conn, ctx->sync_keypair); session->ctx = ctx; @@ -2006,7 +2007,7 @@ fuzzy_peer_rep (struct rspamd_worker *worker, struct rspamd_fuzzy_storage_ctx *ctx = ud; GList *cur; struct rspamd_worker_listen_socket *ls; - struct event *accept_event; + struct event *accept_events; gdouble next_check; ctx->peer_fd = rep_fd; @@ -2026,23 +2027,23 @@ fuzzy_peer_rep (struct rspamd_worker *worker, if (ls->fd != -1) { if (ls->type == RSPAMD_WORKER_SOCKET_UDP) { - accept_event = g_slice_alloc0 (sizeof (struct event)); - event_set (accept_event, ls->fd, EV_READ | EV_PERSIST, + accept_events = g_slice_alloc0 (sizeof (struct event) * 2); + event_set (&accept_events[0], ls->fd, EV_READ | EV_PERSIST, accept_fuzzy_socket, worker); - event_base_set (ctx->ev_base, accept_event); - event_add (accept_event, NULL); + event_base_set (ctx->ev_base, &accept_events[0]); + event_add (&accept_events[0], NULL); worker->accept_events = g_list_prepend (worker->accept_events, - accept_event); + accept_events); } else if (worker->index == 0) { /* We allow TCP listeners only for a update worker */ - accept_event = g_slice_alloc0 (sizeof (struct event)); - event_set (accept_event, ls->fd, EV_READ | EV_PERSIST, + accept_events = g_slice_alloc0 (sizeof (struct event) * 2); + event_set (&accept_events[0], ls->fd, EV_READ | EV_PERSIST, accept_fuzzy_mirror_socket, worker); - event_base_set (ctx->ev_base, accept_event); - event_add (accept_event, NULL); + event_base_set (ctx->ev_base, &accept_events[0]); + event_add (&accept_events[0], NULL); worker->accept_events = g_list_prepend (worker->accept_events, - accept_event); + accept_events); } } diff --git a/src/libmime/email_addr.c b/src/libmime/email_addr.c index 9305e4693..2c7964f87 100644 --- a/src/libmime/email_addr.c +++ b/src/libmime/email_addr.c @@ -18,8 +18,7 @@ #include "email_addr.h" #include "message.h" #include "printf.h" - -#include "./parsers/smtp_addr_parser.c" +#include "smtp_parsers.h" static void rspamd_email_addr_dtor (struct rspamd_email_address *addr) diff --git a/src/libmime/message.c b/src/libmime/message.c index b20da368a..da9ded5ee 100644 --- a/src/libmime/message.c +++ b/src/libmime/message.c @@ -25,6 +25,7 @@ #include "utlist.h" #include "tokenizers/tokenizers.h" #include "cryptobox.h" +#include "smtp_parsers.h" #ifdef WITH_SNOWBALL #include "libstemmer.h" @@ -51,338 +52,6 @@ rspamd_message_quark (void) } static void -parse_qmail_recv (rspamd_mempool_t * pool, - gchar *line, - struct received_header *r) -{ - gchar *s, *p, t; - - /* We are interested only with received from network headers */ - if ((p = strstr (line, "from network")) == NULL) { - r->is_error = 2; - return; - } - - p += sizeof ("from network") - 1; - while (g_ascii_isspace (*p) || *p == '[') { - p++; - } - /* format is ip/host */ - s = p; - if (*p) { - while (g_ascii_isdigit (*++p) || *p == '.') ; - if (*p != '/') { - r->is_error = 1; - return; - } - else { - *p = '\0'; - r->real_ip = rspamd_mempool_strdup (pool, s); - *p = '/'; - /* Now try to parse hostname */ - s = ++p; - while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == - '_') { - p++; - } - t = *p; - *p = '\0'; - r->real_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - } - } -} - -static void -parse_recv_header (rspamd_mempool_t * pool, - struct raw_header *rh, - struct received_header *r) -{ - gchar *p, *s, t, **res = NULL; - gchar *line; - enum { - RSPAMD_RECV_STATE_INIT = 0, - RSPAMD_RECV_STATE_FROM, - RSPAMD_RECV_STATE_IP_BLOCK, - RSPAMD_RECV_STATE_BRACES_BLOCK, - RSPAMD_RECV_STATE_BY_BLOCK, - RSPAMD_RECV_STATE_PARSE_IP, - RSPAMD_RECV_STATE_PARSE_IP6, - RSPAMD_RECV_STATE_SKIP_SPACES, - RSPAMD_RECV_STATE_ERROR - } state = RSPAMD_RECV_STATE_INIT, next_state = RSPAMD_RECV_STATE_INIT; - gboolean is_exim = FALSE; - - line = rh->decoded; - if (line == NULL) { - return; - } - - g_strstrip (line); - p = line; - s = line; - - while (*p) { - switch (state) { - /* Initial state, search for from */ - case RSPAMD_RECV_STATE_INIT: - if (*p == 'f' || *p == 'F') { - if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == - 'o' && g_ascii_tolower (*++p) == 'm') { - p++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_FROM; - } - } - else if (g_ascii_tolower (*p) == 'b' && - g_ascii_tolower (*(p + 1)) == 'y') { - state = RSPAMD_RECV_STATE_IP_BLOCK; - } - else { - /* This can be qmail header, parse it separately */ - parse_qmail_recv (pool, line, r); - return; - } - break; - /* Read hostname */ - case RSPAMD_RECV_STATE_FROM: - if (*p == '[') { - /* This should be IP address */ - res = &r->from_ip; - state = RSPAMD_RECV_STATE_PARSE_IP; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - s = ++p; - } - else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == - '_') { - p++; - } - else { - t = *p; - *p = '\0'; - r->from_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - break; - /* Try to extract additional info */ - case RSPAMD_RECV_STATE_IP_BLOCK: - /* Try to extract ip or () info or by */ - if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == - 'y') { - p += 2; - /* Skip spaces after by */ - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BY_BLOCK; - } - else if (*p == '(') { - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - p++; - } - else if (*p == '[') { - /* Got ip before '(' so extract it */ - s = ++p; - res = &r->from_ip; - state = RSPAMD_RECV_STATE_PARSE_IP; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - else { - p++; - } - break; - /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */ - case RSPAMD_RECV_STATE_BRACES_BLOCK: - /* End of block */ - if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || - *p == '_' || *p == ':') { - p++; - } - else if (*p == '[') { - s = ++p; - state = RSPAMD_RECV_STATE_PARSE_IP; - res = &r->real_ip; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else { - if (p > s) { - /* Got some real hostname */ - /* check whether it is helo or p is not space symbol */ - if (!g_ascii_isspace (*p) || *(p + 1) != '[') { - /* Exim style ([ip]:port helo=hostname) */ - if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) { - /* Ip ending */ - is_exim = TRUE; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) { - p++; - is_exim = TRUE; - if (r->real_hostname == NULL && r->from_hostname != - NULL) { - r->real_hostname = r->from_hostname; - } - s = p; - while (*p != ')' && !g_ascii_isspace (*p) && *p != - '\0') { - p++; - } - if (p > s) { - r->from_hostname = rspamd_mempool_alloc (pool, - p - s + 1); - rspamd_strlcpy (r->from_hostname, s, p - s + 1); - } - } - else if (p - s == 4 && memcmp (s, "port=", 5) == 0) { - p++; - is_exim = TRUE; - while (g_ascii_isdigit (*p)) { - p++; - } - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else if (*p == '=' && is_exim) { - /* Just skip unknown pairs */ - p++; - while (!g_ascii_isspace (*p) && *p != ')' && *p != - '\0') { - p++; - } - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else { - /* skip all */ - while (*p++ != ')' && *p != '\0') ; - state = RSPAMD_RECV_STATE_IP_BLOCK; - } - } - else { - /* Postfix style (hostname [ip]) */ - t = *p; - *p = '\0'; - r->real_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - /* Now parse ip */ - p += 2; - s = p; - res = &r->real_ip; - state = RSPAMD_RECV_STATE_PARSE_IP; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - continue; - } - if (*p == ')') { - p++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - } - else if (*p == ')') { - p++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - else { - r->is_error = 1; - return; - } - } - break; - /* Got by word */ - case RSPAMD_RECV_STATE_BY_BLOCK: - /* Here can be only hostname */ - if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-' - || *p == '_') && p[1] != '\0') { - p++; - } - else { - /* We got something like hostname */ - if (p[1] != '\0') { - t = *p; - *p = '\0'; - r->by_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - } - else { - r->by_hostname = rspamd_mempool_strdup (pool, s); - } - /* Now end of parsing */ - if (is_exim) { - /* Adjust for exim received */ - if (r->real_ip == NULL && r->from_ip != NULL) { - r->real_ip = r->from_ip; - } - else if (r->from_ip == NULL && r->real_ip != NULL) { - r->from_ip = r->real_ip; - if (r->real_hostname == NULL && r->from_hostname != - NULL) { - r->real_hostname = r->from_hostname; - } - } - } - return; - } - break; - - /* Extract ip */ - case RSPAMD_RECV_STATE_PARSE_IP: - if (*p == 'I') { - /* IPv6: */ - state = RSPAMD_RECV_STATE_PARSE_IP6; - } - else { - while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') { - p++; - } - if (*p != ']') { - /* Not an ip in fact */ - state = RSPAMD_RECV_STATE_SKIP_SPACES; - p++; - } - else { - *p = '\0'; - *res = rspamd_mempool_strdup (pool, s); - *p = ']'; - p++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - } - } - break; - case RSPAMD_RECV_STATE_PARSE_IP6: - if (g_ascii_strncasecmp (p, "IPv6:", sizeof ("IPv6") - 1) == 0) { - p += sizeof ("IPv6") - 1; - s = p; - state = RSPAMD_RECV_STATE_PARSE_IP; - } - else { - state = RSPAMD_RECV_STATE_SKIP_SPACES; - } - break; - /* Skip spaces */ - case RSPAMD_RECV_STATE_SKIP_SPACES: - if (!g_ascii_isspace (*p)) { - state = next_state; - s = p; - } - else { - p++; - } - break; - default: - r->is_error = 1; - return; - break; - } - } - - r->is_error = 1; - return; -} - -static void append_raw_header (struct rspamd_task *task, GHashTable *target, struct raw_header *rh) { @@ -1001,23 +670,41 @@ rspamd_normalize_text_part (struct rspamd_task *task, #endif /* Strip newlines */ part->stripped_content = g_byte_array_sized_new (part->content->len); + part->newlines = g_ptr_array_sized_new (128); p = part->content->data; c = p; end = p + part->content->len; while (p < end) { - if (*p == '\r' || *p == '\n') { + p = memchr (c, '\n', end - c); + + if (p) { + if (*(p - 1) == '\r') { + p --; + } + if (p > c) { g_byte_array_append (part->stripped_content, c, p - c); } + /* As it could cause reallocation, we initially store offsets */ + g_ptr_array_add (part->newlines, + GUINT_TO_POINTER (part->stripped_content->len)); + part->nlines ++; + p ++; + while (p < end && (*p == '\r' || *p == '\n')) { + if (*p == '\n') { + part->nlines ++; + } + p ++; } c = p; } else { - p ++; + p = end; + break; } } @@ -1025,9 +712,18 @@ rspamd_normalize_text_part (struct rspamd_task *task, g_byte_array_append (part->stripped_content, c, p - c); } + /* Now convert offsets to real pointers for convenience */ + for (i = 0; i < part->newlines->len; i ++) { + guint off = GPOINTER_TO_UINT (g_ptr_array_index (part->newlines, i)); + g_ptr_array_index (part->newlines, i) = part->stripped_content->data + off; + } + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, part->stripped_content); + rspamd_mempool_add_destructor (task->task_pool, + (rspamd_mempool_destruct_t) rspamd_ptr_array_free_hard, + part->newlines); /* Ugly workaround */ part->normalized_words = rspamd_tokenize_text (part->content->data, @@ -1175,8 +871,7 @@ process_text_part (struct rspamd_task *task, gboolean is_empty) { struct mime_text_part *text_part; - const gchar *cd, *p, *c; - guint remain; + const gchar *cd; /* Skip attachments */ #ifndef GMIME24 @@ -1295,21 +990,6 @@ process_text_part (struct rspamd_task *task, detect_text_language (text_part); rspamd_normalize_text_part (task, text_part); - /* Calculate number of lines */ - p = text_part->content->data; - remain = text_part->content->len; - c = p; - - while (p != NULL && remain > 0) { - p = memchr (c, '\n', remain); - - if (p != NULL) { - text_part->nlines ++; - remain -= p - c + 1; - c = p + 1; - } - } - if (!IS_PART_HTML (text_part)) { rspamd_url_text_extract (task->task_pool, task, text_part, FALSE); } @@ -1778,15 +1458,13 @@ rspamd_message_parse (struct rspamd_task *task) rspamd_images_process (task); /* Parse received headers */ - first = - rspamd_message_get_header (task, "Received", FALSE); + first = rspamd_message_get_header (task, "Received", FALSE); for (cur = first, i = 0; cur != NULL; cur = g_list_next (cur), i ++) { - recv = - rspamd_mempool_alloc0 (task->task_pool, - sizeof (struct received_header)); - parse_recv_header (task->task_pool, cur->data, recv); - + recv = rspamd_mempool_alloc0 (task->task_pool, + sizeof (struct received_header)); + rh = cur->data; + rspamd_smtp_recieved_parse (task, rh->decoded, strlen (rh->decoded), recv); /* * For the first header we must ensure that * received is consistent with the IP that we obtain through @@ -1794,22 +1472,19 @@ rspamd_message_parse (struct rspamd_task *task) */ if (i == 0) { gboolean need_recv_correction = FALSE; + rspamd_inet_addr_t *raddr = recv->addr; if (recv->real_ip == NULL || (task->cfg && task->cfg->ignore_received)) { need_recv_correction = TRUE; } else if (!(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) { - rspamd_inet_addr_t *raddr = NULL; - - if (!rspamd_parse_inet_address (&raddr, recv->real_ip, 0)) { + if (raddr) { need_recv_correction = TRUE; } else { if (rspamd_inet_address_compare (raddr, task->from_addr) != 0) { need_recv_correction = TRUE; } - - rspamd_inet_address_destroy (raddr); } } @@ -1824,11 +1499,14 @@ rspamd_message_parse (struct rspamd_task *task) trecv->real_ip = rspamd_mempool_strdup (task->task_pool, rspamd_inet_address_to_string (task->from_addr)); trecv->from_ip = trecv->real_ip; + trecv->addr = task->from_addr; if (task->hostname) { trecv->real_hostname = task->hostname; trecv->from_hostname = trecv->real_hostname; } + + g_ptr_array_add (task->received, trecv); } } @@ -1910,7 +1588,7 @@ rspamd_message_parse (struct rspamd_task *task) rh = cur->data; p = rh->decoded; len = strlen (p); - rspamd_url_find_multiple (task->task_pool, p, len, FALSE, + rspamd_url_find_multiple (task->task_pool, p, len, FALSE, NULL, rspamd_url_task_callback, task); } diff --git a/src/libmime/message.h b/src/libmime/message.h index 3994bd102..8017514f3 100644 --- a/src/libmime/message.h +++ b/src/libmime/message.h @@ -7,6 +7,8 @@ #define RSPAMD_MESSAGE_H #include "config.h" +#include "email_addr.h" +#include "addr.h" #include <gmime/gmime.h> struct rspamd_task; @@ -44,6 +46,7 @@ struct mime_text_part { GByteArray *orig; GByteArray *content; GByteArray *stripped_content; /**< no newlines or html tags */ + GPtrArray *newlines; /**< positions of newlines in text */ struct html_content *html; GList *urls_offset; /**< list of offsets of urls */ GMimeObject *parent; @@ -54,13 +57,26 @@ struct mime_text_part { guint64 hash; }; +enum rspamd_received_type { + RSPAMD_RECEIVED_SMTP = 0, + RSPAMD_RECEIVED_ESMTP, + RSPAMD_RECEIVED_ESMTPA, + RSPAMD_RECEIVED_ESMTPS, + RSPAMD_RECEIVED_ESMTPSA, + RSPAMD_RECEIVED_LMTP, + RSPAMD_RECEIVED_IMAP, + RSPAMD_RECEIVED_UNKNOWN +}; + struct received_header { gchar *from_hostname; gchar *from_ip; gchar *real_hostname; gchar *real_ip; gchar *by_hostname; - gint is_error; + rspamd_inet_addr_t *addr; + time_t timestamp; + enum rspamd_received_type type; }; struct raw_header { diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c index 427b3654c..065e63d6d 100644 --- a/src/libmime/mime_expressions.c +++ b/src/libmime/mime_expressions.c @@ -177,6 +177,56 @@ rspamd_mime_expr_quark (void) return g_quark_from_static_string ("mime-expressions"); } +static gboolean +rspamd_parse_long_option (const gchar *start, gsize len, + struct rspamd_regexp_atom *a) +{ + gboolean ret = FALSE; + + if (rspamd_lc_cmp (start, "body", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_BODY; + } + else if (rspamd_lc_cmp (start, "part", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_MIME; + } + else if (rspamd_lc_cmp (start, "raw_part", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_RAWMIME; + } + else if (rspamd_lc_cmp (start, "header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_HEADER; + } + else if (rspamd_lc_cmp (start, "mime_header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_MIMEHEADER; + } + else if (rspamd_lc_cmp (start, "raw_header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_RAWHEADER; + } + else if (rspamd_lc_cmp (start, "all_header", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_ALLHEADER; + } + else if (rspamd_lc_cmp (start, "url", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_URL; + } + else if (rspamd_lc_cmp (start, "sa_body", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_SABODY; + } + else if (rspamd_lc_cmp (start, "sa_raw_body", len) == 0) { + ret = TRUE; + a->type = RSPAMD_RE_SARAWBODY; + } + + return ret; +} + /* * Rspamd regexp utility functions */ @@ -184,7 +234,7 @@ static struct rspamd_regexp_atom * rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, struct rspamd_config *cfg) { - const gchar *begin, *end, *p, *src, *start; + const gchar *begin, *end, *p, *src, *start, *brace; gchar *dbegin, *dend; struct rspamd_regexp_atom *result; GError *err = NULL; @@ -291,6 +341,14 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, result->type = RSPAMD_RE_MIMEHEADER; p++; break; + case 'C': + result->type = RSPAMD_RE_SABODY; + p++; + break; + case 'D': + result->type = RSPAMD_RE_SARAWBODY; + p++; + break; case 'M': result->type = RSPAMD_RE_BODY; p++; @@ -311,6 +369,20 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line, result->type = RSPAMD_RE_RAWHEADER; p++; break; + case '{': + /* Long definition */ + if ((brace = strchr (p + 1, '}')) != NULL) { + if (!rspamd_parse_long_option (p + 1, brace - (p + 1), result)) { + p = NULL; + } + else { + p = brace + 1; + } + } + else { + p = NULL; + } + break; /* Other flags */ case 'T': result->is_test = TRUE; diff --git a/src/libmime/parsers/smtp_addr_parser.c b/src/libmime/parsers/smtp_addr_parser.c deleted file mode 100644 index d97116faa..000000000 --- a/src/libmime/parsers/smtp_addr_parser.c +++ /dev/null @@ -1,1210 +0,0 @@ - -#line 1 "src/ragel/smtp_addr_parser.rl" - -#line 72 "src/ragel/smtp_addr_parser.rl" - - - -#line 9 "src/libmime/parsers/smtp_addr_parser.c" -static const char _smtp_addr_parser_eof_actions[] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 14, 0, 15, 16, 17 -}; - -static const int smtp_addr_parser_start = 1; -static const int smtp_addr_parser_first_final = 75; -static const int smtp_addr_parser_error = 0; - -static const int smtp_addr_parser_en_main = 1; - - -#line 75 "src/ragel/smtp_addr_parser.rl" - -static int -rspamd_smtp_addr_parse (const char *data, size_t len, struct rspamd_email_address *addr) -{ - const char *p = data, *pe = data + len, *eof; - int cs; - - g_assert (addr != NULL); - memset (addr, 0, sizeof (*addr)); - addr->raw = data; - addr->raw_len = len; - eof = pe; - - -#line 45 "src/libmime/parsers/smtp_addr_parser.c" - { - cs = smtp_addr_parser_start; - } - -#line 89 "src/ragel/smtp_addr_parser.rl" - -#line 52 "src/libmime/parsers/smtp_addr_parser.c" - { - if ( p == pe ) - goto _test_eof; - if ( cs == 0 ) - goto _out; -_resume: - switch ( cs ) { -case 1: - switch( (*p) ) { - case 32: goto tr0; - case 34: goto tr3; - case 45: goto tr2; - case 60: goto tr4; - case 61: goto tr2; - case 64: goto tr5; - } - if ( (*p) < 42 ) { - if ( (*p) > 13 ) { - if ( 33 <= (*p) && (*p) <= 39 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto tr0; - } else if ( (*p) > 43 ) { - if ( (*p) < 63 ) { - if ( 47 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr2; - } else - goto tr2; - } else - goto tr2; - goto tr1; -case 0: - goto _out; -case 2: - switch( (*p) ) { - case 33: goto tr6; - case 46: goto tr7; - case 61: goto tr6; - case 64: goto tr8; - } - if ( (*p) < 45 ) { - if ( (*p) > 39 ) { - if ( 42 <= (*p) && (*p) <= 43 ) - goto tr6; - } else if ( (*p) >= 35 ) - goto tr6; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr6; - } else if ( (*p) >= 63 ) - goto tr6; - } else - goto tr6; - goto tr1; -case 3: - switch( (*p) ) { - case 33: goto tr6; - case 45: goto tr6; - case 61: goto tr6; - case 63: goto tr6; - } - if ( (*p) < 47 ) { - if ( (*p) > 39 ) { - if ( 42 <= (*p) && (*p) <= 43 ) - goto tr6; - } else if ( (*p) >= 35 ) - goto tr6; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr6; - } else if ( (*p) >= 65 ) - goto tr6; - } else - goto tr6; - goto tr1; -case 4: - if ( (*p) == 91 ) - goto tr10; - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr9; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr9; - } else - goto tr9; - goto tr1; -case 75: - switch( (*p) ) { - case 32: goto tr101; - case 45: goto tr11; - case 46: goto tr102; - case 95: goto tr11; - } - if ( (*p) < 48 ) { - if ( 9 <= (*p) && (*p) <= 13 ) - goto tr101; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr12; - } else if ( (*p) >= 65 ) - goto tr12; - } else - goto tr12; - goto tr1; -case 76: - if ( (*p) == 32 ) - goto tr103; - if ( 9 <= (*p) && (*p) <= 13 ) - goto tr103; - goto tr1; -case 5: - switch( (*p) ) { - case 45: goto tr11; - case 95: goto tr11; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr12; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr12; - } else - goto tr12; - goto tr1; -case 6: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr12; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr12; - } else - goto tr12; - goto tr1; -case 7: - switch( (*p) ) { - case 45: goto tr13; - case 95: goto tr13; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr14; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr15; - } else - goto tr15; - goto tr1; -case 8: - switch( (*p) ) { - case 45: goto tr16; - case 95: goto tr16; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr17; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr17; - } else - goto tr17; - goto tr1; -case 9: - switch( (*p) ) { - case 45: goto tr16; - case 58: goto tr18; - case 95: goto tr16; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr17; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr17; - } else - goto tr17; - goto tr1; -case 10: - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr19; - } else if ( (*p) >= 33 ) - goto tr19; - goto tr1; -case 11: - if ( (*p) == 93 ) - goto tr20; - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr19; - } else if ( (*p) >= 33 ) - goto tr19; - goto tr1; -case 77: - if ( (*p) == 32 ) - goto tr104; - if ( 9 <= (*p) && (*p) <= 13 ) - goto tr104; - goto tr1; -case 12: - switch( (*p) ) { - case 45: goto tr16; - case 46: goto tr21; - case 58: goto tr18; - case 95: goto tr16; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr22; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr17; - } else - goto tr17; - goto tr1; -case 13: - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr23; - goto tr1; -case 14: - if ( (*p) == 46 ) - goto tr24; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr25; - goto tr1; -case 15: - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr26; - goto tr1; -case 16: - if ( (*p) == 46 ) - goto tr27; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr28; - goto tr1; -case 17: - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr29; - goto tr1; -case 18: - if ( (*p) == 93 ) - goto tr20; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr30; - goto tr1; -case 19: - if ( (*p) == 93 ) - goto tr20; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr31; - goto tr1; -case 20: - if ( (*p) == 93 ) - goto tr20; - goto tr1; -case 21: - if ( (*p) == 46 ) - goto tr27; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr32; - goto tr1; -case 22: - if ( (*p) == 46 ) - goto tr27; - goto tr1; -case 23: - if ( (*p) == 46 ) - goto tr24; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr33; - goto tr1; -case 24: - if ( (*p) == 46 ) - goto tr24; - goto tr1; -case 25: - switch( (*p) ) { - case 45: goto tr16; - case 46: goto tr21; - case 58: goto tr18; - case 95: goto tr16; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr34; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr17; - } else - goto tr17; - goto tr1; -case 26: - switch( (*p) ) { - case 45: goto tr16; - case 46: goto tr21; - case 58: goto tr18; - case 95: goto tr16; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr17; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr17; - } else - goto tr17; - goto tr1; -case 27: - switch( (*p) ) { - case 34: goto tr36; - case 92: goto tr37; - } - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr35; - goto tr1; -case 28: - switch( (*p) ) { - case 34: goto tr39; - case 92: goto tr40; - } - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr38; - goto tr1; -case 29: - if ( (*p) == 64 ) - goto tr41; - goto tr1; -case 30: - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr42; - goto tr1; -case 31: - switch( (*p) ) { - case 34: goto tr44; - case 92: goto tr45; - } - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr43; - goto tr1; -case 32: - switch( (*p) ) { - case 34: goto tr47; - case 45: goto tr46; - case 62: goto tr48; - case 64: goto tr49; - } - if ( (*p) < 47 ) { - if ( (*p) > 39 ) { - if ( 42 <= (*p) && (*p) <= 43 ) - goto tr46; - } else if ( (*p) >= 33 ) - goto tr46; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr46; - } else if ( (*p) >= 61 ) - goto tr46; - } else - goto tr46; - goto tr1; -case 33: - switch( (*p) ) { - case 33: goto tr50; - case 46: goto tr51; - case 61: goto tr50; - case 64: goto tr52; - } - if ( (*p) < 45 ) { - if ( (*p) > 39 ) { - if ( 42 <= (*p) && (*p) <= 43 ) - goto tr50; - } else if ( (*p) >= 35 ) - goto tr50; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr50; - } else if ( (*p) >= 63 ) - goto tr50; - } else - goto tr50; - goto tr1; -case 34: - switch( (*p) ) { - case 33: goto tr50; - case 45: goto tr50; - case 61: goto tr50; - case 63: goto tr50; - } - if ( (*p) < 47 ) { - if ( (*p) > 39 ) { - if ( 42 <= (*p) && (*p) <= 43 ) - goto tr50; - } else if ( (*p) >= 35 ) - goto tr50; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr50; - } else if ( (*p) >= 65 ) - goto tr50; - } else - goto tr50; - goto tr1; -case 35: - if ( (*p) == 91 ) - goto tr54; - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr53; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr53; - } else - goto tr53; - goto tr1; -case 36: - switch( (*p) ) { - case 45: goto tr55; - case 46: goto tr56; - case 62: goto tr58; - case 95: goto tr55; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr57; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr57; - } else - goto tr57; - goto tr1; -case 37: - switch( (*p) ) { - case 45: goto tr55; - case 95: goto tr55; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr57; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr57; - } else - goto tr57; - goto tr1; -case 38: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr57; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr57; - } else - goto tr57; - goto tr1; -case 78: - if ( (*p) == 32 ) - goto tr105; - if ( 9 <= (*p) && (*p) <= 13 ) - goto tr105; - goto tr1; -case 39: - switch( (*p) ) { - case 45: goto tr59; - case 95: goto tr59; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr60; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr61; - } else - goto tr61; - goto tr1; -case 40: - switch( (*p) ) { - case 45: goto tr62; - case 95: goto tr62; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr63; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr63; - } else - goto tr63; - goto tr1; -case 41: - switch( (*p) ) { - case 45: goto tr62; - case 58: goto tr64; - case 95: goto tr62; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr63; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr63; - } else - goto tr63; - goto tr1; -case 42: - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr65; - } else if ( (*p) >= 33 ) - goto tr65; - goto tr1; -case 43: - if ( (*p) == 93 ) - goto tr66; - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr65; - } else if ( (*p) >= 33 ) - goto tr65; - goto tr1; -case 44: - if ( (*p) == 62 ) - goto tr67; - goto tr1; -case 45: - switch( (*p) ) { - case 45: goto tr62; - case 46: goto tr68; - case 58: goto tr64; - case 95: goto tr62; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr69; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr63; - } else - goto tr63; - goto tr1; -case 46: - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr70; - goto tr1; -case 47: - if ( (*p) == 46 ) - goto tr71; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr72; - goto tr1; -case 48: - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr73; - goto tr1; -case 49: - if ( (*p) == 46 ) - goto tr74; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr75; - goto tr1; -case 50: - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr76; - goto tr1; -case 51: - if ( (*p) == 93 ) - goto tr66; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr77; - goto tr1; -case 52: - if ( (*p) == 93 ) - goto tr66; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr78; - goto tr1; -case 53: - if ( (*p) == 93 ) - goto tr66; - goto tr1; -case 54: - if ( (*p) == 46 ) - goto tr74; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr79; - goto tr1; -case 55: - if ( (*p) == 46 ) - goto tr74; - goto tr1; -case 56: - if ( (*p) == 46 ) - goto tr71; - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr80; - goto tr1; -case 57: - if ( (*p) == 46 ) - goto tr71; - goto tr1; -case 58: - switch( (*p) ) { - case 45: goto tr62; - case 46: goto tr68; - case 58: goto tr64; - case 95: goto tr62; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr81; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr63; - } else - goto tr63; - goto tr1; -case 59: - switch( (*p) ) { - case 45: goto tr62; - case 46: goto tr68; - case 58: goto tr64; - case 95: goto tr62; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr63; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr63; - } else - goto tr63; - goto tr1; -case 60: - switch( (*p) ) { - case 34: goto tr83; - case 92: goto tr84; - } - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr82; - goto tr1; -case 61: - switch( (*p) ) { - case 34: goto tr86; - case 92: goto tr87; - } - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr85; - goto tr1; -case 62: - if ( (*p) == 64 ) - goto tr88; - goto tr1; -case 63: - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr89; - goto tr1; -case 64: - switch( (*p) ) { - case 34: goto tr91; - case 92: goto tr92; - } - if ( 32 <= (*p) && (*p) <= 126 ) - goto tr90; - goto tr1; -case 79: - if ( (*p) == 32 ) - goto tr106; - if ( 9 <= (*p) && (*p) <= 13 ) - goto tr106; - goto tr1; -case 65: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr93; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr93; - } else - goto tr93; - goto tr1; -case 66: - switch( (*p) ) { - case 44: goto tr94; - case 45: goto tr95; - case 46: goto tr49; - case 58: goto tr96; - case 95: goto tr95; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr93; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr93; - } else - goto tr93; - goto tr1; -case 67: - if ( (*p) == 64 ) - goto tr49; - goto tr1; -case 68: - switch( (*p) ) { - case 45: goto tr95; - case 95: goto tr95; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr93; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr93; - } else - goto tr93; - goto tr1; -case 69: - switch( (*p) ) { - case 34: goto tr47; - case 45: goto tr46; - case 61: goto tr46; - case 63: goto tr46; - } - if ( (*p) < 47 ) { - if ( (*p) > 39 ) { - if ( 42 <= (*p) && (*p) <= 43 ) - goto tr46; - } else if ( (*p) >= 33 ) - goto tr46; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr46; - } else if ( (*p) >= 65 ) - goto tr46; - } else - goto tr46; - goto tr1; -case 70: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr97; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr97; - } else - goto tr97; - goto tr1; -case 71: - switch( (*p) ) { - case 44: goto tr98; - case 45: goto tr99; - case 46: goto tr5; - case 58: goto tr100; - case 95: goto tr99; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr97; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr97; - } else - goto tr97; - goto tr1; -case 72: - if ( (*p) == 64 ) - goto tr5; - goto tr1; -case 73: - switch( (*p) ) { - case 45: goto tr99; - case 95: goto tr99; - } - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr97; - } else if ( (*p) > 90 ) { - if ( 97 <= (*p) && (*p) <= 122 ) - goto tr97; - } else - goto tr97; - goto tr1; -case 74: - switch( (*p) ) { - case 34: goto tr3; - case 45: goto tr2; - case 61: goto tr2; - case 63: goto tr2; - } - if ( (*p) < 47 ) { - if ( (*p) > 39 ) { - if ( 42 <= (*p) && (*p) <= 43 ) - goto tr2; - } else if ( (*p) >= 33 ) - goto tr2; - } else if ( (*p) > 57 ) { - if ( (*p) > 90 ) { - if ( 94 <= (*p) && (*p) <= 126 ) - goto tr2; - } else if ( (*p) >= 65 ) - goto tr2; - } else - goto tr2; - goto tr1; - } - - tr1: cs = 0; goto _again; - tr0: cs = 1; goto _again; - tr6: cs = 2; goto _again; - tr2: cs = 2; goto f0; - tr7: cs = 3; goto _again; - tr8: cs = 4; goto f2; - tr41: cs = 4; goto f8; - tr11: cs = 5; goto _again; - tr102: cs = 6; goto _again; - tr10: cs = 7; goto _again; - tr16: cs = 8; goto _again; - tr13: cs = 8; goto f4; - tr17: cs = 9; goto _again; - tr15: cs = 9; goto f4; - tr18: cs = 10; goto _again; - tr19: cs = 11; goto _again; - tr14: cs = 12; goto f4; - tr21: cs = 13; goto _again; - tr23: cs = 14; goto _again; - tr24: cs = 15; goto _again; - tr26: cs = 16; goto _again; - tr27: cs = 17; goto _again; - tr29: cs = 18; goto _again; - tr30: cs = 19; goto _again; - tr31: cs = 20; goto _again; - tr28: cs = 21; goto _again; - tr32: cs = 22; goto _again; - tr25: cs = 23; goto _again; - tr33: cs = 24; goto _again; - tr22: cs = 25; goto _again; - tr34: cs = 26; goto _again; - tr3: cs = 27; goto f1; - tr38: cs = 28; goto _again; - tr35: cs = 28; goto f6; - tr43: cs = 28; goto f9; - tr39: cs = 29; goto f2; - tr36: cs = 29; goto f7; - tr44: cs = 29; goto f10; - tr40: cs = 30; goto _again; - tr37: cs = 30; goto f6; - tr45: cs = 30; goto f9; - tr42: cs = 31; goto _again; - tr4: cs = 32; goto _again; - tr50: cs = 33; goto _again; - tr46: cs = 33; goto f0; - tr51: cs = 34; goto _again; - tr52: cs = 35; goto f2; - tr88: cs = 35; goto f8; - tr57: cs = 36; goto _again; - tr53: cs = 36; goto f3; - tr55: cs = 37; goto _again; - tr56: cs = 38; goto _again; - tr54: cs = 39; goto _again; - tr62: cs = 40; goto _again; - tr59: cs = 40; goto f4; - tr63: cs = 41; goto _again; - tr61: cs = 41; goto f4; - tr64: cs = 42; goto _again; - tr65: cs = 43; goto _again; - tr66: cs = 44; goto f5; - tr60: cs = 45; goto f4; - tr68: cs = 46; goto _again; - tr70: cs = 47; goto _again; - tr71: cs = 48; goto _again; - tr73: cs = 49; goto _again; - tr74: cs = 50; goto _again; - tr76: cs = 51; goto _again; - tr77: cs = 52; goto _again; - tr78: cs = 53; goto _again; - tr75: cs = 54; goto _again; - tr79: cs = 55; goto _again; - tr72: cs = 56; goto _again; - tr80: cs = 57; goto _again; - tr69: cs = 58; goto _again; - tr81: cs = 59; goto _again; - tr47: cs = 60; goto f1; - tr85: cs = 61; goto _again; - tr82: cs = 61; goto f6; - tr90: cs = 61; goto f9; - tr86: cs = 62; goto f2; - tr83: cs = 62; goto f7; - tr91: cs = 62; goto f10; - tr87: cs = 63; goto _again; - tr84: cs = 63; goto f6; - tr92: cs = 63; goto f9; - tr89: cs = 64; goto _again; - tr49: cs = 65; goto _again; - tr93: cs = 66; goto _again; - tr94: cs = 67; goto _again; - tr95: cs = 68; goto _again; - tr96: cs = 69; goto _again; - tr5: cs = 70; goto _again; - tr97: cs = 71; goto _again; - tr98: cs = 72; goto _again; - tr99: cs = 73; goto _again; - tr100: cs = 74; goto _again; - tr12: cs = 75; goto _again; - tr9: cs = 75; goto f3; - tr103: cs = 76; goto _again; - tr101: cs = 76; goto f13; - tr104: cs = 76; goto f14; - tr105: cs = 76; goto f15; - tr106: cs = 76; goto f16; - tr20: cs = 77; goto f5; - tr58: cs = 78; goto f11; - tr67: cs = 78; goto f12; - tr48: cs = 79; goto _again; - -f6: -#line 5 "src/ragel/smtp_addr_parser.rl" - { - addr->user = p; - } - goto _again; -f2: -#line 9 "src/ragel/smtp_addr_parser.rl" - { - if (addr->user) { - addr->user_len = p - addr->user; - } - } - goto _again; -f3: -#line 15 "src/ragel/smtp_addr_parser.rl" - { - addr->domain = p; - } - goto _again; -f4: -#line 25 "src/ragel/smtp_addr_parser.rl" - { - addr->domain = p; - addr->flags |= RSPAMD_EMAIL_ADDR_IP; - } - goto _again; -f5: -#line 30 "src/ragel/smtp_addr_parser.rl" - { - if (addr->domain) { - addr->domain_len = p - addr->domain; - } - } - goto _again; -f9: -#line 36 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_HAS_BACKSLASH; - } - goto _again; -f8: -#line 40 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_QUOTED; - } - goto _again; -f1: -#line 59 "src/ragel/smtp_addr_parser.rl" - { - addr->addr = p; - } - goto _again; -f12: -#line 63 "src/ragel/smtp_addr_parser.rl" - { - if (addr->addr) { - addr->addr_len = p - addr->addr; - } - } - goto _again; -f7: -#line 5 "src/ragel/smtp_addr_parser.rl" - { - addr->user = p; - } -#line 9 "src/ragel/smtp_addr_parser.rl" - { - if (addr->user) { - addr->user_len = p - addr->user; - } - } - goto _again; -f11: -#line 19 "src/ragel/smtp_addr_parser.rl" - { - if (addr->domain) { - addr->domain_len = p - addr->domain; - } - } -#line 63 "src/ragel/smtp_addr_parser.rl" - { - if (addr->addr) { - addr->addr_len = p - addr->addr; - } - } - goto _again; -f10: -#line 36 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_HAS_BACKSLASH; - } -#line 9 "src/ragel/smtp_addr_parser.rl" - { - if (addr->user) { - addr->user_len = p - addr->user; - } - } - goto _again; -f16: -#line 44 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_EMPTY; - addr->addr = ""; - addr->user = addr->addr; - addr->domain = addr->addr; - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - goto _again; -f15: -#line 55 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_BRACED; - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - goto _again; -f0: -#line 59 "src/ragel/smtp_addr_parser.rl" - { - addr->addr = p; - } -#line 5 "src/ragel/smtp_addr_parser.rl" - { - addr->user = p; - } - goto _again; -f14: -#line 63 "src/ragel/smtp_addr_parser.rl" - { - if (addr->addr) { - addr->addr_len = p - addr->addr; - } - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - goto _again; -f13: -#line 19 "src/ragel/smtp_addr_parser.rl" - { - if (addr->domain) { - addr->domain_len = p - addr->domain; - } - } -#line 63 "src/ragel/smtp_addr_parser.rl" - { - if (addr->addr) { - addr->addr_len = p - addr->addr; - } - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - goto _again; - -_again: - if ( cs == 0 ) - goto _out; - if ( ++p != pe ) - goto _resume; - _test_eof: {} - if ( p == eof ) - { - switch ( _smtp_addr_parser_eof_actions[cs] ) { - case 17: -#line 44 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_EMPTY; - addr->addr = ""; - addr->user = addr->addr; - addr->domain = addr->addr; - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - break; - case 16: -#line 55 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_BRACED; - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - break; - case 15: -#line 63 "src/ragel/smtp_addr_parser.rl" - { - if (addr->addr) { - addr->addr_len = p - addr->addr; - } - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - break; - case 14: -#line 19 "src/ragel/smtp_addr_parser.rl" - { - if (addr->domain) { - addr->domain_len = p - addr->domain; - } - } -#line 63 "src/ragel/smtp_addr_parser.rl" - { - if (addr->addr) { - addr->addr_len = p - addr->addr; - } - } -#line 51 "src/ragel/smtp_addr_parser.rl" - { - addr->flags |= RSPAMD_EMAIL_ADDR_VALID; - } - break; -#line 1201 "src/libmime/parsers/smtp_addr_parser.c" - } - } - - _out: {} - } - -#line 90 "src/ragel/smtp_addr_parser.rl" - - return cs; -} diff --git a/src/libmime/smtp_parsers.h b/src/libmime/smtp_parsers.h new file mode 100644 index 000000000..62e7738e3 --- /dev/null +++ b/src/libmime/smtp_parsers.h @@ -0,0 +1,29 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBMIME_SMTP_PARSERS_H_ +#define SRC_LIBMIME_SMTP_PARSERS_H_ + +#include "config.h" +#include "email_addr.h" +#include "task.h" +#include "message.h" + +int rspamd_smtp_recieved_parse (struct rspamd_task *task, + const char *data, size_t len, struct received_header *rh); +int rspamd_smtp_addr_parse (const char *data, size_t len, + struct rspamd_email_address *addr); + +#endif /* SRC_LIBMIME_SMTP_PARSERS_H_ */ diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h index e14fbd90a..93470c0f3 100644 --- a/src/libserver/cfg_file.h +++ b/src/libserver/cfg_file.h @@ -405,6 +405,9 @@ struct rspamd_config { struct rspamd_config_post_load_script *on_load; /**< list of scripts executed on config load */ + gchar *ssl_ca_path; /**< path to CA certs */ + gchar *ssl_ciphers; /**< set of preferred ciphers */ + ref_entry_t ref; /**< reference counter */ }; diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c index ebbc29d61..7b7c76420 100644 --- a/src/libserver/cfg_rcl.c +++ b/src/libserver/cfg_rcl.c @@ -15,6 +15,8 @@ */ #include "cfg_rcl.h" #include "rspamd.h" +#include "../../contrib/mumhash/mum.h" +#define HASH_CASELESS #include "uthash_strcase.h" #include "utlist.h" #include "cfg_file.h" @@ -1922,6 +1924,18 @@ rspamd_rcl_config_init (struct rspamd_config *cfg) G_STRUCT_OFFSET (struct rspamd_config, ignore_received), 0, "Ignore data from the first received header"); + rspamd_rcl_add_default_handler (sub, + "ssl_ca_path", + rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct rspamd_config, ssl_ca_path), + RSPAMD_CL_FLAG_STRING_PATH, + "Path to ssl CA file"); + rspamd_rcl_add_default_handler (sub, + "ssl_ciphers", + rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct rspamd_config, ssl_ciphers), + 0, + "List of ssl ciphers (e.g. HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4)"); /* New DNS configuration */ ssub = rspamd_rcl_add_section_doc (&sub->subsections, "dns", NULL, NULL, UCL_OBJECT, FALSE, TRUE, diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c index 85fd6af80..9e80cbaa8 100644 --- a/src/libserver/cfg_utils.c +++ b/src/libserver/cfg_utils.c @@ -161,6 +161,8 @@ rspamd_config_new (void) */ cfg->enable_shutdown_workaround = TRUE; + cfg->ssl_ciphers = "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4"; + REF_INIT_RETAIN (cfg, rspamd_config_free); return cfg; diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c index 5052960f0..1f65733ba 100644 --- a/src/libserver/dkim.c +++ b/src/libserver/dkim.c @@ -1260,6 +1260,147 @@ rspamd_dkim_simple_body_step (rspamd_dkim_context_t *ctx, return (len != 0); } +static const gchar * +rspamd_dkim_skip_empty_lines (const gchar *start, const gchar *end, + guint type, gboolean *need_crlf) +{ + const gchar *p = end - 1, *t; + enum { + init = 0, + init_2, + got_cr, + got_lf, + got_crlf, + test_spaces, + } state = init; + guint skip = 0; + + while (p >= start + 2) { + switch (state) { + case init: + if (*p == '\r') { + state = got_cr; + } + else if (*p == '\n') { + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *p == ' ') { + skip = 0; + state = test_spaces; + } + else { + if (type == DKIM_CANON_SIMPLE) { + *need_crlf = TRUE; + } + + goto end; + } + break; + case init_2: + if (*p == '\r') { + state = got_cr; + } + else if (*p == '\n') { + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *p == ' ') { + skip = 0; + state = test_spaces; + } + else { + goto end; + } + break; + case got_cr: + if (*(p - 1) == '\r') { + p --; + state = got_cr; + } + else if (*(p - 1) == '\n') { + if ((*p - 2) == '\r') { + /* \r\n\r -> we know about one line */ + p -= 1; + state = got_crlf; + } + else { + /* \n\r -> we know about one line */ + p -= 1; + state = got_lf; + } + } + else if (type == DKIM_CANON_RELAXED && *(p - 1) == ' ') { + skip = 1; + state = test_spaces; + } + else { + goto end; + } + break; + case got_lf: + if (*(p - 1) == '\r') { + state = got_crlf; + } + else if (*(p - 1) == '\n') { + /* We know about one line */ + p --; + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *(p - 1) == ' ') { + skip = 1; + state = test_spaces; + } + else { + goto end; + } + break; + case got_crlf: + if (p > start - 2) { + if (*(p - 3) == '\r') { + p -= 2; + state = got_cr; + } + else if (*(p - 3) == '\n') { + p -= 2; + state = got_lf; + } + else if (type == DKIM_CANON_RELAXED && *(p - 3) == ' ') { + skip = 2; + state = test_spaces; + } + else { + goto end; + } + } + else { + goto end; + } + break; + case test_spaces: + t = p - skip; + + while (t > start - 2 && *t == ' ') { + t --; + } + + if (*t == '\r') { + p = t; + state = got_cr; + } + else if (*t == '\n') { + p = t; + state = got_lf; + } + else { + goto end; + } + break; + } + } + +end: + return p; +} + static gboolean rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, const gchar *start, @@ -1267,6 +1408,7 @@ rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, { const gchar *p; guint remain = ctx->len ? ctx->len : (guint)(end - start); + gboolean need_crlf = FALSE; if (start == NULL) { /* Empty body */ @@ -1279,22 +1421,9 @@ rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, } else { /* Strip extra ending CRLF */ - p = end - 1; - while (p >= start + 2) { - if (*p == '\n' && *(p - 1) == '\r' && *(p - 2) == '\n') { - p -= 2; - } - else if (*p == '\n' && *(p - 1) == '\n') { - p--; - } - else if (*p == '\r' && *(p - 1) == '\r') { - p--; - } - else { - break; - } - } + p = rspamd_dkim_skip_empty_lines (start, end, ctx->body_canon_type, &need_crlf); end = p + 1; + if (end == start) { /* Empty body */ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { @@ -1308,7 +1437,15 @@ rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { /* Simple canonization */ while (rspamd_dkim_simple_body_step (ctx, ctx->body_hash, - &start, end - start, &remain)) ; + &start, end - start, &remain)); + + if (need_crlf) { + start = "\r\n"; + end = start + 2; + remain = 2; + rspamd_dkim_simple_body_step (ctx, ctx->body_hash, + &start, end - start, &remain); + } } else { while (rspamd_dkim_relaxed_body_step (ctx, ctx->body_hash, diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c index 57cbb729d..e58e8a546 100644 --- a/src/libserver/fuzzy_backend.c +++ b/src/libserver/fuzzy_backend.c @@ -431,7 +431,7 @@ rspamd_fuzzy_backend_open_db (const gchar *path, GError **err) bk->expired = 0; bk->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "fuzzy_backend"); bk->db = rspamd_sqlite3_open_or_create (bk->pool, bk->path, - create_tables_sql, 0, err); + create_tables_sql, 1, err); if (bk->db == NULL) { rspamd_fuzzy_backend_close (bk); diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c index d314d3fdc..becaca01b 100644 --- a/src/libserver/protocol.c +++ b/src/libserver/protocol.c @@ -21,6 +21,7 @@ #include "message.h" #include "utlist.h" #include "http.h" +#include "http_private.h" #include "email_addr.h" #include "worker_private.h" #include "cryptobox.h" @@ -270,11 +271,10 @@ rspamd_protocol_handle_headers (struct rspamd_task *task, rspamd_fstring_t *hn, *hv; rspamd_ftok_t *hn_tok, *hv_tok, srch; gboolean fl, has_ip = FALSE; - struct rspamd_http_header *h; + struct rspamd_http_header *h, *htmp; struct rspamd_email_address *addr; - LL_FOREACH (msg->headers, h) - { + HASH_ITER (hh, msg->headers, h, htmp) { hn = rspamd_fstring_new_init (h->name->begin, h->name->len); hv = rspamd_fstring_new_init (h->value->begin, h->value->len); hn_tok = rspamd_ftok_map (hn); @@ -873,9 +873,8 @@ rspamd_metric_result_ucl (struct rspamd_task *task, return obj; } -static void -rspamd_ucl_torspamc_output (struct rspamd_task *task, - ucl_object_t *top, +void +rspamd_ucl_torspamc_output (const ucl_object_t *top, rspamd_fstring_t **out) { const ucl_object_t *metric, *score, @@ -927,12 +926,15 @@ rspamd_ucl_torspamc_output (struct rspamd_task *task, } } - rspamd_printf_fstring (out, "Message-ID: %s\r\n", task->message_id); + elt = ucl_object_lookup (top, "message-id"); + if (elt != NULL) { + rspamd_printf_fstring (out, "Message-ID: %s\r\n", + ucl_object_tostring (elt)); + } } static void -rspamd_ucl_tospamc_output (struct rspamd_task *task, - ucl_object_t *top, +rspamd_ucl_tospamc_output (const ucl_object_t *top, rspamd_fstring_t **out) { const ucl_object_t *metric, *score, @@ -1015,6 +1017,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg, const struct rspamd_re_cache_stat *restat; gpointer h, v; ucl_object_t *top = NULL; + rspamd_fstring_t *reply; gint action; /* Write custom headers */ @@ -1048,21 +1051,22 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg, restat->bytes_scanned); } - msg->body = rspamd_fstring_sized_new (1000); + reply = rspamd_fstring_sized_new (1000); if (msg->method < HTTP_SYMBOLS && !RSPAMD_TASK_IS_SPAMC (task)) { - rspamd_ucl_emit_fstring (top, UCL_EMIT_JSON_COMPACT, &msg->body); + rspamd_ucl_emit_fstring (top, UCL_EMIT_JSON_COMPACT, &reply); } else { if (RSPAMD_TASK_IS_SPAMC (task)) { - rspamd_ucl_tospamc_output (task, top, &msg->body); + rspamd_ucl_tospamc_output (top, &reply); } else { - rspamd_ucl_torspamc_output (task, top, &msg->body); + rspamd_ucl_torspamc_output (top, &reply); } } ucl_object_unref (top); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); if (!(task->flags & RSPAMD_TASK_FLAG_NO_STAT)) { /* Update stat for default metric */ @@ -1185,6 +1189,7 @@ rspamd_protocol_write_reply (struct rspamd_task *task) struct rspamd_http_message *msg; const gchar *ctype = "application/json"; struct rspamd_abstract_worker_ctx *actx; + rspamd_fstring_t *reply; msg = rspamd_http_new_message (HTTP_RESPONSE); @@ -1216,9 +1221,10 @@ rspamd_protocol_write_reply (struct rspamd_task *task) ucl_object_insert_key (top, ucl_object_fromstring (g_quark_to_string (task->err->domain)), "error_domain", 0, false); - msg->body = rspamd_fstring_sized_new (256); - rspamd_ucl_emit_fstring (top, UCL_EMIT_JSON_COMPACT, &msg->body); + reply = rspamd_fstring_sized_new (256); + rspamd_ucl_emit_fstring (top, UCL_EMIT_JSON_COMPACT, &reply); ucl_object_unref (top); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); } else { msg->status = rspamd_fstring_new_init ("OK", 2); @@ -1241,7 +1247,7 @@ rspamd_protocol_write_reply (struct rspamd_task *task) } break; case CMD_PING: - msg->body = rspamd_fstring_new_init ("pong" CRLF, 6); + rspamd_http_message_set_body (msg, "pong" CRLF, 6); ctype = "text/plain"; break; case CMD_OTHER: diff --git a/src/libserver/protocol.h b/src/libserver/protocol.h index 3c8383565..1f7acbab2 100644 --- a/src/libserver/protocol.h +++ b/src/libserver/protocol.h @@ -82,5 +82,13 @@ ucl_object_t * rspamd_protocol_write_ucl (struct rspamd_task *task); */ void rspamd_protocol_write_reply (struct rspamd_task *task); +/** + * Convert rspamd output to legacy protocol reply + * @param task + * @param top + * @param out + */ +void rspamd_ucl_torspamc_output (const ucl_object_t *top, + rspamd_fstring_t **out); #endif diff --git a/src/libserver/rspamd_control.c b/src/libserver/rspamd_control.c index 8c0a150e2..da573fa94 100644 --- a/src/libserver/rspamd_control.c +++ b/src/libserver/rspamd_control.c @@ -16,7 +16,8 @@ #include "config.h" #include "rspamd.h" #include "rspamd_control.h" -#include "http.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "unix-std.h" #include "utlist.h" @@ -107,6 +108,7 @@ rspamd_control_send_error (struct rspamd_control_session *session, gint code, const gchar *error_msg, ...) { struct rspamd_http_message *msg; + rspamd_fstring_t *reply; va_list args; msg = rspamd_http_new_message (HTTP_RESPONSE); @@ -118,8 +120,9 @@ rspamd_control_send_error (struct rspamd_control_session *session, msg->date = time (NULL); msg->code = code; - msg->body = rspamd_fstring_new (); - rspamd_printf_fstring (&msg->body, "{\"error\":\"%V\"}", msg->status); + reply = rspamd_fstring_sized_new (msg->status->len + 16); + rspamd_printf_fstring (&reply, "{\"error\":\"%V\"}", msg->status); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); rspamd_http_connection_reset (session->conn); rspamd_http_connection_write_message (session->conn, msg, @@ -136,13 +139,15 @@ rspamd_control_send_ucl (struct rspamd_control_session *session, ucl_object_t *obj) { struct rspamd_http_message *msg; + rspamd_fstring_t *reply; msg = rspamd_http_new_message (HTTP_RESPONSE); msg->date = time (NULL); msg->code = 200; msg->status = rspamd_fstring_new_init ("OK", 2); - msg->body = rspamd_fstring_sized_new (BUFSIZ); - rspamd_ucl_emit_fstring (obj, UCL_EMIT_JSON_COMPACT, &msg->body); + reply = rspamd_fstring_sized_new (BUFSIZ); + rspamd_ucl_emit_fstring (obj, UCL_EMIT_JSON_COMPACT, &reply); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); rspamd_http_connection_reset (session->conn); rspamd_http_connection_write_message (session->conn, msg, @@ -498,8 +503,13 @@ rspamd_control_process_client_socket (struct rspamd_main *rspamd_main, session = g_slice_alloc0 (sizeof (*session)); session->fd = fd; - session->conn = rspamd_http_connection_new (NULL, rspamd_control_error_handler, - rspamd_control_finish_handler, 0, RSPAMD_HTTP_SERVER, NULL); + session->conn = rspamd_http_connection_new (NULL, + rspamd_control_error_handler, + rspamd_control_finish_handler, + 0, + RSPAMD_HTTP_SERVER, + NULL, + NULL); session->rspamd_main = rspamd_main; rspamd_http_connection_read_message (session->conn, session, session->fd, &io_timeout, rspamd_main->ev_base); diff --git a/src/libserver/task.c b/src/libserver/task.c index ce95b927d..f8ebe81f1 100644 --- a/src/libserver/task.c +++ b/src/libserver/task.c @@ -264,12 +264,17 @@ rspamd_task_free (struct rspamd_task *task) } } +struct rspamd_task_map { + gpointer begin; + gulong len; +}; + static void rspamd_task_unmapper (gpointer ud) { - struct rspamd_task *task = ud; + struct rspamd_task_map *m = ud; - munmap ((void *)task->msg.begin, task->msg.len); + munmap (m->begin, m->len); } gboolean @@ -281,14 +286,112 @@ rspamd_task_load_message (struct rspamd_task *task, ucl_object_t *control_obj; gchar filepath[PATH_MAX], *fp; gint fd, flen; + gulong offset = 0, shmem_size = 0; rspamd_ftok_t srch, *tok; gpointer map; struct stat st; + struct rspamd_task_map *m; if (msg) { rspamd_protocol_handle_headers (task, msg); } + srch.begin = "shm"; + srch.len = 3; + tok = g_hash_table_lookup (task->request_headers, &srch); + + if (tok) { + /* Shared memory part */ + r = rspamd_strlcpy (filepath, tok->begin, + MIN (sizeof (filepath), tok->len + 1)); + + rspamd_decode_url (filepath, filepath, r + 1); + flen = strlen (filepath); + + if (filepath[0] == '"' && flen > 2) { + /* We need to unquote filepath */ + fp = &filepath[1]; + fp[flen - 2] = '\0'; + } + else { + fp = &filepath[0]; + } + + fd = shm_open (fp, O_RDONLY, 00600); + + if (fd == -1) { + g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, + "Cannot open shm segment (%s): %s", fp, strerror (errno)); + return FALSE; + } + + if (fstat (fd, &st) == -1) { + g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, + "Cannot stat shm segment (%s): %s", fp, strerror (errno)); + close (fd); + + return FALSE; + } + + map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + + if (map == MAP_FAILED) { + close (fd); + g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, + "Cannot mmap file (%s): %s", fp, strerror (errno)); + return FALSE; + } + + close (fd); + + srch.begin = "shm-offset"; + srch.len = 10; + tok = g_hash_table_lookup (task->request_headers, &srch); + + if (tok) { + rspamd_strtoul (tok->begin, tok->len, &offset); + + if (offset > (gulong)st.st_size) { + msg_err_task ("invalid offset %ul (%ul available) for shm " + "segment %s", offset, st.st_size, fp); + munmap (map, st.st_size); + + return FALSE; + } + } + + srch.begin = "shm-length"; + srch.len = 10; + tok = g_hash_table_lookup (task->request_headers, &srch); + shmem_size = st.st_size; + + if (tok) { + rspamd_strtoul (tok->begin, tok->len, &shmem_size); + + if (shmem_size > (gulong)st.st_size) { + msg_err_task ("invalid length %ul (%ul available) for shm " + "segment %s", shmem_size, st.st_size, fp); + munmap (map, st.st_size); + + return FALSE; + } + } + + task->msg.begin = ((guchar *)map) + offset; + task->msg.len = shmem_size; + task->flags |= RSPAMD_TASK_FLAG_FILE; + m = rspamd_mempool_alloc (task->task_pool, sizeof (*m)); + m->begin = map; + m->len = st.st_size; + + msg_info_task ("loaded message from shared memory %s (%ul size, %ul offset)", + fp, shmem_size, offset); + + rspamd_mempool_add_destructor (task->task_pool, rspamd_task_unmapper, m); + + return TRUE; + } + srch.begin = "file"; srch.len = 4; tok = g_hash_table_lookup (task->request_headers, &srch); @@ -317,7 +420,7 @@ rspamd_task_load_message (struct rspamd_task *task, fp = &filepath[0]; } - if (access (fp, R_OK) == -1 || stat (fp, &st) == -1) { + if (stat (fp, &st) == -1) { g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, "Invalid file (%s): %s", fp, strerror (errno)); return FALSE; @@ -346,46 +449,53 @@ rspamd_task_load_message (struct rspamd_task *task, task->msg.len = st.st_size; task->flags |= RSPAMD_TASK_FLAG_FILE; - rspamd_mempool_add_destructor (task->task_pool, rspamd_task_unmapper, task); + msg_info_task ("loaded message from file %s", fp); + m = rspamd_mempool_alloc (task->task_pool, sizeof (*m)); + m->begin = map; + m->len = st.st_size; + + rspamd_mempool_add_destructor (task->task_pool, rspamd_task_unmapper, m); + + return TRUE; } - else { - debug_task ("got input of length %z", task->msg.len); - task->msg.begin = start; - task->msg.len = len; - if (task->msg.len == 0) { - task->flags |= RSPAMD_TASK_FLAG_EMPTY; - } + /* Plain data */ + debug_task ("got input of length %z", task->msg.len); + task->msg.begin = start; + task->msg.len = len; - if (task->flags & RSPAMD_TASK_FLAG_HAS_CONTROL) { - /* We have control chunk, so we need to process it separately */ - if (task->msg.len < task->message_len) { - msg_warn_task ("message has invalid message length: %ul and total len: %ul", - task->message_len, task->msg.len); - g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, - "Invalid length"); - return FALSE; - } - control_len = task->msg.len - task->message_len; + if (task->msg.len == 0) { + task->flags |= RSPAMD_TASK_FLAG_EMPTY; + } - if (control_len > 0) { - parser = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE); + if (task->flags & RSPAMD_TASK_FLAG_HAS_CONTROL) { + /* We have control chunk, so we need to process it separately */ + if (task->msg.len < task->message_len) { + msg_warn_task ("message has invalid message length: %ul and total len: %ul", + task->message_len, task->msg.len); + g_set_error (&task->err, rspamd_task_quark(), RSPAMD_PROTOCOL_ERROR, + "Invalid length"); + return FALSE; + } + control_len = task->msg.len - task->message_len; - if (!ucl_parser_add_chunk (parser, task->msg.begin, control_len)) { - msg_warn_task ("processing of control chunk failed: %s", - ucl_parser_get_error (parser)); - ucl_parser_free (parser); - } - else { - control_obj = ucl_parser_get_object (parser); - ucl_parser_free (parser); - rspamd_protocol_handle_control (task, control_obj); - ucl_object_unref (control_obj); - } + if (control_len > 0) { + parser = ucl_parser_new (UCL_PARSER_KEY_LOWERCASE); - task->msg.begin += control_len; - task->msg.len -= control_len; + if (!ucl_parser_add_chunk (parser, task->msg.begin, control_len)) { + msg_warn_task ("processing of control chunk failed: %s", + ucl_parser_get_error (parser)); + ucl_parser_free (parser); } + else { + control_obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + rspamd_protocol_handle_control (task, control_obj); + ucl_object_unref (control_obj); + } + + task->msg.begin += control_len; + task->msg.len -= control_len; } } diff --git a/src/libserver/url.c b/src/libserver/url.c index 70a5f3c9b..068057cd7 100644 --- a/src/libserver/url.c +++ b/src/libserver/url.c @@ -54,6 +54,8 @@ typedef struct url_match_s { gsize m_len; const gchar *pattern; const gchar *prefix; + const gchar *newline_pos; + const gchar *prev_newline_pos; gboolean add_prefix; gchar st; } url_match_t; @@ -156,6 +158,8 @@ struct url_callback_data { rspamd_mempool_t *pool; gint len; gboolean is_html; + guint newline_idx; + GPtrArray *newlines; const gchar *start; const gchar *fin; const gchar *end; @@ -1744,14 +1748,21 @@ url_tld_start (struct url_callback_data *cb, /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */ while (p >= cb->begin) { - if (!is_domain (*p) || g_ascii_isspace (*p) || is_url_start (*p)) { - if (!is_url_start (*p) && !g_ascii_isspace (*p)) { + if (!is_domain (*p) || g_ascii_isspace (*p) || is_url_start (*p) || + p == match->prev_newline_pos) { + if (!is_url_start (*p) && !g_ascii_isspace (*p) && + p != match->prev_newline_pos) { return FALSE; } - match->st = *p; + if (p != match->prev_newline_pos) { + match->st = *p; - p++; + p++; + } + else { + match->st = '\n'; + } if (!g_ascii_isalnum (*p)) { /* Urls cannot start with strange symbols */ @@ -1801,7 +1812,8 @@ url_tld_end (struct url_callback_data *cb, match->m_len = p - match->m_begin; return TRUE; } - else if (*p == '/' || *p == ':' || is_url_end (*p)) { + else if (*p == '/' || *p == ':' || is_url_end (*p) || + (match->st != '<' && p == match->newline_pos)) { /* Parse arguments, ports by normal way by url default function */ p = match->m_begin; /* Check common prefix */ @@ -1838,7 +1850,8 @@ url_web_start (struct url_callback_data *cb, (g_ascii_strncasecmp (pos, "www", 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { - if (!is_url_start (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) { + if (!is_url_start (*(pos - 1)) && !g_ascii_isspace (*(pos - 1)) && + pos - 1 != match->prev_newline_pos) { return FALSE; } } @@ -1866,8 +1879,14 @@ url_web_end (struct url_callback_data *cb, url_match_t *match) { const gchar *last = NULL; + gint len = cb->end - pos; + + if (match->newline_pos && match->st != '<') { + /* We should also limit our match end to the newline */ + len = MIN (len, match->newline_pos - pos); + } - if (rspamd_web_parse (NULL, pos, cb->end - pos, &last, FALSE) != 0) { + if (rspamd_web_parse (NULL, pos, len, &last, FALSE) != 0) { return FALSE; } @@ -1921,10 +1940,16 @@ url_email_end (struct url_callback_data *cb, { const gchar *last = NULL; struct http_parser_url u; + gint len = cb->end - pos; + + if (match->newline_pos && match->st != '<') { + /* We should also limit our match end to the newline */ + len = MIN (len, match->newline_pos - pos); + } if (!match->prefix || match->prefix[0] == '\0') { /* We have mailto:// at the beginning */ - if (rspamd_mailto_parse (&u, pos, cb->end - pos, &last, FALSE) != 0) { + if (rspamd_mailto_parse (&u, pos, len, &last, FALSE) != 0) { return FALSE; } @@ -1992,12 +2017,13 @@ url_email_end (struct url_callback_data *cb, static gboolean rspamd_url_trie_is_match (struct url_matcher *matcher, const gchar *pos, - const gchar *end) + const gchar *end, const gchar *newline_pos) { if (matcher->flags & URL_FLAG_TLD_MATCH) { /* Immediately check pos for valid chars */ if (pos < end) { - if (!g_ascii_isspace (*pos) && *pos != '/' && *pos != '?' && + if (pos != newline_pos && !g_ascii_isspace (*pos) + && *pos != '/' && *pos != '?' && *pos != ':' && !is_url_end (*pos)) { if (*pos == '.') { /* We allow . at the end of the domain however */ @@ -2030,7 +2056,7 @@ rspamd_url_trie_callback (struct rspamd_multipattern *mp, { struct url_matcher *matcher; url_match_t m; - const gchar *pos; + const gchar *pos, *newline_pos = NULL; struct url_callback_data *cb = context; matcher = &g_array_index (url_scanner->matchers, struct url_matcher, @@ -2042,16 +2068,36 @@ rspamd_url_trie_callback (struct rspamd_multipattern *mp, } pos = text + match_pos; + memset (&m, 0, sizeof (m)); m.m_begin = text + match_start; m.m_len = match_pos - match_start; - if (!rspamd_url_trie_is_match (matcher, pos, cb->end)) { + if (cb->newlines && cb->newlines->len > 0) { + newline_pos = g_ptr_array_index (cb->newlines, cb->newline_idx); + + while (pos > newline_pos && cb->newline_idx < cb->newlines->len) { + cb->newline_idx ++; + newline_pos = g_ptr_array_index (cb->newlines, cb->newline_idx); + } + + if (pos > newline_pos) { + newline_pos = NULL; + } + + if (cb->newline_idx > 0) { + m.prev_newline_pos = g_ptr_array_index (cb->newlines, + cb->newline_idx - 1); + } + } + + if (!rspamd_url_trie_is_match (matcher, pos, cb->end, newline_pos)) { return 0; } m.pattern = matcher->pattern; m.prefix = matcher->prefix; m.add_prefix = FALSE; + m.newline_pos = newline_pos; pos = cb->begin + match_start; if (matcher->start (cb, pos, &m) && @@ -2127,7 +2173,7 @@ rspamd_url_trie_generic_callback_common (struct rspamd_multipattern *mp, struct rspamd_url *url; struct url_matcher *matcher; url_match_t m; - const gchar *pos; + const gchar *pos, *newline_pos = NULL; struct url_callback_data *cb = context; gint rc; rspamd_mempool_t *pool; @@ -2141,9 +2187,28 @@ rspamd_url_trie_generic_callback_common (struct rspamd_multipattern *mp, return 0; } + memset (&m, 0, sizeof (m)); pos = text + match_pos; - if (!rspamd_url_trie_is_match (matcher, pos, text + len)) { + /* Find the next newline after our pos */ + if (cb->newlines && cb->newlines->len > 0) { + newline_pos = g_ptr_array_index (cb->newlines, cb->newline_idx); + + while (pos > newline_pos && cb->newline_idx < cb->newlines->len) { + cb->newline_idx ++; + newline_pos = g_ptr_array_index (cb->newlines, cb->newline_idx); + } + + if (pos > newline_pos) { + newline_pos = NULL; + } + if (cb->newline_idx > 0) { + m.prev_newline_pos = g_ptr_array_index (cb->newlines, + cb->newline_idx - 1); + } + } + + if (!rspamd_url_trie_is_match (matcher, pos, text + len, newline_pos)) { return 0; } @@ -2153,6 +2218,7 @@ rspamd_url_trie_generic_callback_common (struct rspamd_multipattern *mp, m.add_prefix = FALSE; m.m_begin = text + match_start; m.m_len = match_pos - match_start; + m.newline_pos = newline_pos; if (matcher->start (cb, pos, &m) && matcher->end (cb, pos, &m)) { @@ -2310,7 +2376,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, mcbd.part = part; rspamd_url_find_multiple (task->task_pool, part->stripped_content->data, - part->stripped_content->len, is_html, + part->stripped_content->len, is_html, part->newlines, rspamd_url_text_part_callback, &mcbd); /* Handle offsets of this part */ @@ -2323,7 +2389,7 @@ rspamd_url_text_extract (rspamd_mempool_t *pool, void rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in, - gsize inlen, gboolean is_html, + gsize inlen, gboolean is_html, GPtrArray *nlines, url_insert_function func, gpointer ud) { struct url_callback_data cb; @@ -2342,6 +2408,7 @@ rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in, cb.funcd = ud; cb.func = func; + cb.newlines = nlines; rspamd_multipattern_lookup (url_scanner->search_trie, in, inlen, diff --git a/src/libserver/url.h b/src/libserver/url.h index f42ab5dde..36fbb2c76 100644 --- a/src/libserver/url.h +++ b/src/libserver/url.h @@ -136,7 +136,7 @@ typedef void (*url_insert_function) (struct rspamd_url *url, * @param ud */ void rspamd_url_find_multiple (rspamd_mempool_t *pool, const gchar *in, - gsize inlen, gboolean is_html, + gsize inlen, gboolean is_html, GPtrArray *nlines, url_insert_function func, gpointer ud); /** * Search for a single url in text and call `func` for each url found diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c index d3e296d6d..9924671cf 100644 --- a/src/libserver/worker_util.c +++ b/src/libserver/worker_util.c @@ -24,6 +24,7 @@ #include "rspamd_control.h" #include "libutil/map.h" #include "libutil/map_private.h" +#include "libutil/http_private.h" #ifdef WITH_GPERF_TOOLS #include <gperftools/profiler.h> @@ -248,7 +249,7 @@ rspamd_prepare_worker (struct rspamd_worker *worker, const char *name, void (*accept_handler)(int, short, void *)) { struct event_base *ev_base; - struct event *accept_event; + struct event *accept_events; GList *cur; struct rspamd_worker_listen_socket *ls; @@ -276,13 +277,13 @@ rspamd_prepare_worker (struct rspamd_worker *worker, const char *name, ls = cur->data; if (ls->fd != -1) { - accept_event = g_slice_alloc0 (sizeof (struct event)); - event_set (accept_event, ls->fd, EV_READ | EV_PERSIST, + accept_events = g_slice_alloc0 (sizeof (struct event) * 2); + event_set (&accept_events[0], ls->fd, EV_READ | EV_PERSIST, accept_handler, worker); - event_base_set (ev_base, accept_event); - event_add (accept_event, NULL); + event_base_set (ev_base, &accept_events[0]); + event_add (&accept_events[0], NULL); worker->accept_events = g_list_prepend (worker->accept_events, - accept_event); + accept_events); } cur = g_list_next (cur); @@ -296,7 +297,7 @@ void rspamd_worker_stop_accept (struct rspamd_worker *worker) { GList *cur; - struct event *event; + struct event *events; GHashTableIter it; struct rspamd_worker_signal_handler *sigh; gpointer k, v; @@ -305,10 +306,18 @@ rspamd_worker_stop_accept (struct rspamd_worker *worker) /* Remove all events */ cur = worker->accept_events; while (cur) { - event = cur->data; - event_del (event); + events = cur->data; + + if (event_get_base (&events[0])) { + event_del (&events[0]); + } + + if (event_get_base (&events[1])) { + event_del (&events[1]); + } + cur = g_list_next (cur); - g_slice_free1 (sizeof (struct event), event); + g_slice_free1 (sizeof (struct event) * 2, events); } if (worker->accept_events != NULL) { @@ -344,6 +353,7 @@ rspamd_controller_send_error (struct rspamd_http_connection_entry *entry, { struct rspamd_http_message *msg; va_list args; + rspamd_fstring_t *reply; msg = rspamd_http_new_message (HTTP_RESPONSE); @@ -354,8 +364,9 @@ rspamd_controller_send_error (struct rspamd_http_connection_entry *entry, msg->date = time (NULL); msg->code = code; - msg->body = rspamd_fstring_new (); - rspamd_printf_fstring (&msg->body, "{\"error\":\"%V\"}", msg->status); + reply = rspamd_fstring_sized_new (msg->status->len + 16); + rspamd_printf_fstring (&reply, "{\"error\":\"%V\"}", msg->status); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); rspamd_http_connection_reset (entry->conn); rspamd_http_connection_write_message (entry->conn, msg, @@ -373,12 +384,14 @@ rspamd_controller_send_string (struct rspamd_http_connection_entry *entry, const gchar *str) { struct rspamd_http_message *msg; + rspamd_fstring_t *reply; msg = rspamd_http_new_message (HTTP_RESPONSE); msg->date = time (NULL); msg->code = 200; msg->status = rspamd_fstring_new_init ("OK", 2); - msg->body = rspamd_fstring_new_init (str, strlen (str)); + reply = rspamd_fstring_new_init (str, strlen (str)); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); rspamd_http_connection_reset (entry->conn); rspamd_http_connection_write_message (entry->conn, msg, @@ -396,13 +409,15 @@ rspamd_controller_send_ucl (struct rspamd_http_connection_entry *entry, ucl_object_t *obj) { struct rspamd_http_message *msg; + rspamd_fstring_t *reply; msg = rspamd_http_new_message (HTTP_RESPONSE); msg->date = time (NULL); msg->code = 200; msg->status = rspamd_fstring_new_init ("OK", 2); - msg->body = rspamd_fstring_sized_new (BUFSIZ); - rspamd_ucl_emit_fstring (obj, UCL_EMIT_JSON_COMPACT, &msg->body); + reply = rspamd_fstring_sized_new (BUFSIZ); + rspamd_ucl_emit_fstring (obj, UCL_EMIT_JSON_COMPACT, &reply); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); rspamd_http_connection_reset (entry->conn); rspamd_http_connection_write_message (entry->conn, msg, diff --git a/src/libstat/backends/redis_backend.c b/src/libstat/backends/redis_backend.c index 4f65a673c..8aed06994 100644 --- a/src/libstat/backends/redis_backend.c +++ b/src/libstat/backends/redis_backend.c @@ -22,6 +22,7 @@ #ifdef WITH_HIREDIS #include "hiredis.h" #include "adapters/libevent.h" +#include "ref.h" #define REDIS_CTX(p) (struct redis_stat_ctx *)(p) @@ -49,7 +50,9 @@ struct redis_stat_ctx { enum rspamd_redis_connection_state { RSPAMD_REDIS_DISCONNECTED = 0, RSPAMD_REDIS_CONNECTED, - RSPAMD_REDIS_TIMEDOUT + RSPAMD_REDIS_REQUEST_SENT, + RSPAMD_REDIS_TIMEDOUT, + RSPAMD_REDIS_TERMINATED }; struct redis_stat_runtime { @@ -64,6 +67,7 @@ struct redis_stat_runtime { guint64 learned; gint id; enum rspamd_redis_connection_state conn_state; + ref_entry_t ref; }; /* Used to get statistics from redis */ @@ -676,11 +680,11 @@ rspamd_redis_fin (gpointer data) { struct redis_stat_runtime *rt = REDIS_RUNTIME (data); - if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { - rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + if (rt->conn_state != RSPAMD_REDIS_TERMINATED) { + rt->conn_state = RSPAMD_REDIS_TERMINATED; + event_del (&rt->timeout_event); + REF_RELEASE (rt); } - - event_del (&rt->timeout_event); } static void @@ -688,11 +692,11 @@ rspamd_redis_fin_learn (gpointer data) { struct redis_stat_runtime *rt = REDIS_RUNTIME (data); - if (rt->conn_state != RSPAMD_REDIS_CONNECTED) { - rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + if (rt->conn_state != RSPAMD_REDIS_TERMINATED) { + rt->conn_state = RSPAMD_REDIS_TERMINATED; + event_del (&rt->timeout_event); + REF_RELEASE (rt); } - - event_del (&rt->timeout_event); } static void @@ -703,12 +707,23 @@ rspamd_redis_timeout (gint fd, short what, gpointer d) task = rt->task; - msg_err_task ("connection to redis server %s timed out", + REF_RETAIN (rt); + msg_err_task_check ("connection to redis server %s timed out", rspamd_upstream_name (rt->selected)); rspamd_upstream_fail (rt->selected); - rt->conn_state = RSPAMD_REDIS_TIMEDOUT; - redisAsyncFree (rt->redis); + + if (rt->conn_state == RSPAMD_REDIS_REQUEST_SENT && rt->task) { + rspamd_session_remove_event (task->s, rspamd_redis_fin, rt); + } + + rt->conn_state = RSPAMD_REDIS_TERMINATED; + + if (rt->redis) { + redisAsyncFree (rt->redis); + } + rt->redis = NULL; + REF_RELEASE (rt); } /* Called when we have connected to the redis server and got stats */ @@ -722,6 +737,12 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) task = rt->task; + if (rt->conn_state == RSPAMD_REDIS_TERMINATED) { + /* Task has disappeared already */ + REF_RELEASE (rt); + return; + } + if (c->err == 0) { if (r != NULL) { if (G_LIKELY (reply->type == REDIS_REPLY_INTEGER)) { @@ -748,6 +769,7 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) rt->learned = val; rt->conn_state = RSPAMD_REDIS_CONNECTED; + REF_RETAIN (rt); msg_debug_task ("connected to redis server, tokens learned for %s: %uL", rt->redis_object_expanded, rt->learned); @@ -765,6 +787,8 @@ rspamd_redis_connected (redisAsyncContext *c, gpointer r, gpointer priv) rspamd_upstream_fail (rt->selected); rspamd_session_remove_event (task->s, rspamd_redis_fin, rt); } + + REF_RELEASE (rt); } /* Called when we have received tokens values from redis */ @@ -781,6 +805,12 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) task = rt->task; + if (rt->conn_state == RSPAMD_REDIS_TERMINATED) { + /* Task has disappeared already */ + REF_RELEASE (rt); + return; + } + if (c->err == 0) { if (r != NULL) { if (reply->type == REDIS_REPLY_ARRAY) { @@ -841,6 +871,8 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) else { rspamd_session_remove_event (task->s, rspamd_redis_fin, rt); } + + rt->conn_state = RSPAMD_REDIS_CONNECTED; } else { msg_err_task ("error getting reply from redis server %s: %s", @@ -848,6 +880,8 @@ rspamd_redis_processed (redisAsyncContext *c, gpointer r, gpointer priv) rspamd_upstream_fail (rt->selected); rspamd_session_remove_event (task->s, rspamd_redis_fin, rt); } + + REF_RELEASE (rt); } /* Called when we have set tokens during learning */ @@ -859,6 +893,12 @@ rspamd_redis_learned (redisAsyncContext *c, gpointer r, gpointer priv) task = rt->task; + if (rt->conn_state == RSPAMD_REDIS_TERMINATED) { + /* Task has disappeared already */ + REF_RELEASE (rt); + return; + } + if (c->err == 0) { rspamd_upstream_ok (rt->selected); rspamd_session_remove_event (task->s, rspamd_redis_fin_learn, rt); @@ -870,10 +910,12 @@ rspamd_redis_learned (redisAsyncContext *c, gpointer r, gpointer priv) rspamd_session_remove_event (task->s, rspamd_redis_fin_learn, rt); } - if (rt->conn_state == RSPAMD_REDIS_CONNECTED) { + if (rt->conn_state != RSPAMD_REDIS_TERMINATED) { + rt->conn_state = RSPAMD_REDIS_TERMINATED; redisAsyncFree (rt->redis); - rt->conn_state = RSPAMD_REDIS_DISCONNECTED; } + + REF_RELEASE (rt); } static gboolean @@ -1053,6 +1095,16 @@ rspamd_redis_init (struct rspamd_stat_ctx *ctx, return (gpointer)backend; } +static void +rspamd_redis_runtime_dtor (struct redis_stat_runtime *rt) +{ + if (event_get_base (&rt->timeout_event)) { + event_del (&rt->timeout_event); + } + + g_slice_free1 (sizeof (*rt), rt); +} + gpointer rspamd_redis_runtime (struct rspamd_task *task, struct rspamd_statfile_config *stcf, @@ -1090,7 +1142,8 @@ rspamd_redis_runtime (struct rspamd_task *task, return NULL; } - rt = rspamd_mempool_alloc0 (task->task_pool, sizeof (*rt)); + rt = g_slice_alloc0 (sizeof (*rt)); + REF_INIT_RETAIN (rt, rspamd_redis_runtime_dtor); rspamd_redis_expand_object (ctx->redis_object, ctx, task, &rt->redis_object_expanded); rt->selected = up; @@ -1106,18 +1159,22 @@ rspamd_redis_runtime (struct rspamd_task *task, g_assert (rt->redis != NULL); redisLibeventAttach (rt->redis, task->ev_base); - rspamd_session_add_event (task->s, rspamd_redis_fin, rt, - rspamd_redis_stat_quark ()); + rspamd_redis_maybe_auth (ctx, rt->redis); - /* Now check stats */ - event_set (&rt->timeout_event, -1, EV_TIMEOUT, rspamd_redis_timeout, rt); - event_base_set (task->ev_base, &rt->timeout_event); - double_to_tv (ctx->timeout, &tv); - event_add (&rt->timeout_event, &tv); + if (redisAsyncCommand (rt->redis, rspamd_redis_connected, rt, "HGET %s %s", + rt->redis_object_expanded, "learns") == REDIS_OK) { + rt->conn_state = RSPAMD_REDIS_REQUEST_SENT; - rspamd_redis_maybe_auth (ctx, rt->redis); - redisAsyncCommand (rt->redis, rspamd_redis_connected, rt, "HGET %s %s", - rt->redis_object_expanded, "learns"); + rspamd_session_add_event (task->s, rspamd_redis_fin, rt, + rspamd_redis_stat_quark ()); + + event_set (&rt->timeout_event, -1, EV_TIMEOUT, rspamd_redis_timeout, rt); + event_base_set (task->ev_base, &rt->timeout_event); + double_to_tv (ctx->timeout, &tv); + event_add (&rt->timeout_event, &tv); + /* Cleared by timeout */ + REF_RETAIN (rt); + } return rt; } @@ -1164,6 +1221,7 @@ rspamd_redis_process_tokens (struct rspamd_task *task, ret = redisAsyncFormattedCommand (rt->redis, rspamd_redis_processed, rt, query->str, query->len); if (ret == REDIS_OK) { + rt->conn_state = RSPAMD_REDIS_REQUEST_SENT; rspamd_session_add_event (task->s, rspamd_redis_fin, rt, rspamd_redis_stat_quark ()); /* Reset timeout */ @@ -1186,12 +1244,13 @@ rspamd_redis_finalize_process (struct rspamd_task *task, gpointer runtime, { struct redis_stat_runtime *rt = REDIS_RUNTIME (runtime); - if (rt->conn_state == RSPAMD_REDIS_CONNECTED) { + if (rt->conn_state != RSPAMD_REDIS_TERMINATED) { event_del (&rt->timeout_event); + rt->conn_state = RSPAMD_REDIS_TERMINATED; + redisAsyncFree (rt->redis); rt->redis = NULL; - - rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + REF_RELEASE (rt); } } @@ -1208,7 +1267,7 @@ rspamd_redis_learn_tokens (struct rspamd_task *task, GPtrArray *tokens, rspamd_token_t *tok; gint ret; - if (rt->conn_state != RSPAMD_REDIS_DISCONNECTED) { + if (rt->conn_state == RSPAMD_REDIS_CONNECTED) { /* We are likely in some bad state */ msg_err_task ("invalid state for function: %d", rt->conn_state); @@ -1325,10 +1384,10 @@ rspamd_redis_finalize_learn (struct rspamd_task *task, gpointer runtime, if (rt->conn_state == RSPAMD_REDIS_CONNECTED) { event_del (&rt->timeout_event); + rt->conn_state = RSPAMD_REDIS_TERMINATED; redisAsyncFree (rt->redis); rt->redis = NULL; - - rt->conn_state = RSPAMD_REDIS_DISCONNECTED; + REF_RELEASE (rt); } } @@ -1381,11 +1440,13 @@ rspamd_redis_get_stat (gpointer runtime, st = rt->ctx->stat_elt->ud; if (rt->redis) { + if (rt->conn_state == RSPAMD_REDIS_REQUEST_SENT && rt->task) { + rspamd_session_remove_event (rt->task->s, rspamd_redis_fin, rt); + } event_del (&rt->timeout_event); + rt->conn_state = RSPAMD_REDIS_TERMINATED; redisAsyncFree (rt->redis); rt->redis = NULL; - - rt->conn_state = RSPAMD_REDIS_DISCONNECTED; } if (st->stat) { diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt index a229c7f0d..0bf4590e2 100644 --- a/src/libutil/CMakeLists.txt +++ b/src/libutil/CMakeLists.txt @@ -20,6 +20,7 @@ SET(LIBRSPAMDUTILSRC ${CMAKE_CURRENT_SOURCE_DIR}/upstream.c ${CMAKE_CURRENT_SOURCE_DIR}/util.c ${CMAKE_CURRENT_SOURCE_DIR}/heap.c - ${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c) + ${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c + ${CMAKE_CURRENT_SOURCE_DIR}/ssl_util.c) # Rspamdutil SET(RSPAMD_UTIL ${LIBRSPAMDUTILSRC} PARENT_SCOPE)
\ No newline at end of file diff --git a/src/libutil/addr.c b/src/libutil/addr.c index 5c5a51e13..3fee0b4bc 100644 --- a/src/libutil/addr.c +++ b/src/libutil/addr.c @@ -199,8 +199,41 @@ rspamd_ip_is_valid (const rspamd_inet_addr_t *addr) return ret; } +static void +rspamd_enable_accept_event (gint fd, short what, gpointer d) +{ + struct event *events = d; + + event_del (&events[1]); + event_add (&events[0], NULL); +} + +static void +rspamd_disable_accept_events (gint sock, GList *accept_events) +{ + GList *cur; + struct event *events; + const gdouble throttling = 0.5; + struct timeval tv; + struct event_base *ev_base; + + double_to_tv (throttling, &tv); + + for (cur = accept_events; cur != NULL; cur = g_list_next (cur)) { + events = cur->data; + + ev_base = event_get_base (&events[0]); + event_del (&events[0]); + event_set (&events[1], sock, EV_TIMEOUT, rspamd_enable_accept_event, + events); + event_base_set (ev_base, &events[1]); + event_add (&events[1], &tv); + } +} + gint -rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **target) +rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **target, + GList *accept_events) { gint nfd, serrno; union sa_union su; @@ -215,6 +248,13 @@ rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **target) if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) { return 0; } + else if (errno == EMFILE || errno == ENFILE) { + /* Temporary disable accept event */ + rspamd_disable_accept_events (sock, accept_events); + + return 0; + } + return -1; } diff --git a/src/libutil/addr.h b/src/libutil/addr.h index bb9fd2573..200543d6f 100644 --- a/src/libutil/addr.h +++ b/src/libutil/addr.h @@ -193,10 +193,12 @@ gboolean rspamd_ip_is_valid (const rspamd_inet_addr_t *addr); /** * Accept from listening socket filling addr structure * @param sock listening socket - * @param addr allocated inet addr structur + * @param addr allocated inet addr structure + * @param accept_events events for accepting new sockets * @return */ -gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **addr); +gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t **addr, + GList *accept_events); /** * Parse host[:port[:priority]] line diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c index a70290a0b..25eda60e7 100644 --- a/src/libutil/fstring.c +++ b/src/libutil/fstring.c @@ -17,8 +17,6 @@ #include "str_util.h" static const gsize default_initial_size = 48; -/* Maximum size when we double the size of new string */ -static const gsize max_grow = 1024 * 1024; #define fstravail(s) ((s)->allocated - (s)->len) @@ -27,7 +25,13 @@ rspamd_fstring_new (void) { rspamd_fstring_t *s; - g_assert (posix_memalign ((void**)&s, 16, default_initial_size + sizeof (*s)) == 0); + if (posix_memalign ((void**)&s, 16, default_initial_size + sizeof (*s)) != 0) { + g_error ("%s: failed to allocate %"G_GSIZE_FORMAT" bytes", + G_STRLOC, default_initial_size + sizeof (*s)); + + return NULL; + } + s->len = 0; s->allocated = default_initial_size; @@ -40,7 +44,12 @@ rspamd_fstring_sized_new (gsize initial_size) rspamd_fstring_t *s; gsize real_size = MAX (default_initial_size, initial_size); - g_assert (posix_memalign ((void **)&s, 16, real_size + sizeof (*s)) == 0); + if (posix_memalign ((void **)&s, 16, real_size + sizeof (*s)) != 0) { + g_error ("%s: failed to allocate %"G_GSIZE_FORMAT" bytes", + G_STRLOC, real_size + sizeof (*s)); + + return NULL; + } s->len = 0; s->allocated = real_size; @@ -53,7 +62,13 @@ rspamd_fstring_new_init (const gchar *init, gsize len) rspamd_fstring_t *s; gsize real_size = MAX (default_initial_size, len); - g_assert (posix_memalign ((void **) &s, 16, real_size + sizeof (*s)) == 0); + if (posix_memalign ((void **) &s, 16, real_size + sizeof (*s)) != 0) { + g_error ("%s: failed to allocate %"G_GSIZE_FORMAT" bytes", + G_STRLOC, real_size + sizeof (*s)); + + return NULL; + } + s->len = len; s->allocated = real_size; memcpy (s->str, init, len); @@ -85,13 +100,14 @@ rspamd_fstring_free (rspamd_fstring_t *str) free (str); } -rspamd_fstring_t * -rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len) +inline gsize +rspamd_fstring_suggest_size (gsize len, gsize allocated, gsize needed_len) { gsize newlen; - gpointer nptr; + /* Maximum size when we double the size of new string */ + static const gsize max_grow = 1024 * 1024; - newlen = str->allocated; + newlen = allocated; /* * Stop exponential grow at some point, since it might be slow for the @@ -107,8 +123,8 @@ rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len) /* * Check for overflow */ - if (newlen <= str->len + needed_len) { - newlen = str->len + needed_len; + if (newlen <= len + needed_len) { + newlen = len + needed_len; if (newlen < max_grow) { newlen *= 2; @@ -118,12 +134,26 @@ rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len) } } + return newlen; +} + +rspamd_fstring_t * +rspamd_fstring_grow (rspamd_fstring_t *str, gsize needed_len) +{ + gsize newlen; + gpointer nptr; + + newlen = rspamd_fstring_suggest_size (str->len, str->allocated, needed_len); + nptr = realloc (str, newlen + sizeof (*str)); if (nptr == NULL) { /* Avoid memory leak */ free (str); - g_assert (nptr); + g_error ("%s: failed to re-allocate %"G_GSIZE_FORMAT" bytes", + G_STRLOC, newlen + sizeof (*str)); + + return NULL; } str = nptr; diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h index db8b49085..e23bd5e19 100644 --- a/src/libutil/fstring.h +++ b/src/libutil/fstring.h @@ -81,6 +81,8 @@ rspamd_fstring_t *rspamd_fstring_append_chars (rspamd_fstring_t *str, */ void rspamd_fstring_erase (rspamd_fstring_t *str, gsize pos, gsize len); +#define rspamd_fstring_clear(s) rspamd_fstring_erase(s, 0, s->len) + /** * Convert fixed string to a zero terminated string. This string should be * freed by a caller @@ -145,6 +147,15 @@ void rspamd_fstring_mapped_ftok_free (gpointer p); rspamd_ftok_t *rspamd_ftok_map (const rspamd_fstring_t *s); /** + * Suggest suitable size to grow fstring + * @param len + * @param allocated + * @param needed_len + * @return + */ +gsize rspamd_fstring_suggest_size (gsize len, gsize allocated, gsize needed_len); + +/** * Grow the specified fixed string * @param str * @param needed_len diff --git a/src/libutil/http.c b/src/libutil/http.c index fc8263ddd..30ec29b61 100644 --- a/src/libutil/http.c +++ b/src/libutil/http.c @@ -14,7 +14,7 @@ * limitations under the License. */ #include "config.h" -#include "http.h" +#include "http_private.h" #include "utlist.h" #include "util.h" #include "printf.h" @@ -24,6 +24,7 @@ #include "keypair_private.h" #include "cryptobox.h" #include "unix-std.h" +#include "libutil/ssl_util.h" #define ENCRYPTED_VERSION " HTTP/1.0" @@ -42,6 +43,8 @@ enum rspamd_http_priv_flags { #define IS_CONN_RESETED(c) ((c)->flags & RSPAMD_HTTP_CONN_FLAG_RESETED) struct rspamd_http_connection_private { + gpointer ssl_ctx; + struct rspamd_ssl_connection *ssl; struct _rspamd_http_privbuf *buf; struct rspamd_cryptobox_pubkey *peer_key; struct rspamd_cryptobox_keypair *local_key; @@ -96,6 +99,8 @@ static const rspamd_ftok_t last_modified_header = { .len = 13 }; +static void rspamd_http_message_storage_cleanup (struct rspamd_http_message *msg); + #define HTTP_ERROR http_error_quark () GQuark http_error_quark (void) @@ -517,7 +522,8 @@ rspamd_http_finish_header (struct rspamd_http_connection *conn, priv->header->value->begin = priv->header->combined->str + priv->header->name->len + 2; priv->header->name->begin = priv->header->combined->str; - DL_APPEND (priv->msg->headers, priv->header); + HASH_ADD_KEYPTR (hh, priv->msg->headers, priv->header->name->begin, + priv->header->name->len, priv->header); rspamd_http_check_special_header (conn, priv); } @@ -592,8 +598,10 @@ rspamd_http_on_headers_complete (http_parser * parser) struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; priv = conn->priv; + msg = priv->msg; if (priv->header != NULL) { rspamd_http_finish_header (conn, priv); @@ -602,20 +610,17 @@ rspamd_http_on_headers_complete (http_parser * parser) priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_NEW_HEADER; } - if (parser->content_length != 0 && parser->content_length != ULLONG_MAX) { - priv->msg->body = rspamd_fstring_sized_new (parser->content_length); - } - else { - priv->msg->body = rspamd_fstring_new (); + if (!rspamd_http_message_set_body (msg, NULL, parser->content_length)) { + return -1; } if (parser->flags & F_SPAMC) { - priv->msg->flags |= RSPAMD_HTTP_FLAG_SPAMC; + msg->flags |= RSPAMD_HTTP_FLAG_SPAMC; } - priv->msg->body_buf.begin = priv->msg->body->str; - priv->msg->method = parser->method; - priv->msg->code = parser->status_code; + + msg->method = parser->method; + msg->code = parser->status_code; return 0; } @@ -626,18 +631,18 @@ rspamd_http_on_body (http_parser * parser, const gchar *at, size_t length) struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; priv = conn->priv; + msg = priv->msg; - priv->msg->body = rspamd_fstring_append (priv->msg->body, at, length); - - /* Append might cause realloc */ - priv->msg->body_buf.begin = priv->msg->body->str; - priv->msg->body_buf.len = priv->msg->body->len; + if (!rspamd_http_message_append_body (msg, at, length)) { + return -1; + } if ((conn->opts & RSPAMD_HTTP_BODY_PARTIAL) && !IS_CONN_ENCRYPTED (priv)) { /* Incremental update is impossible for encrypted requests so far */ - return (conn->body_handler (conn, priv->msg, at, length)); + return (conn->body_handler (conn, msg, at, length)); } return 0; @@ -710,10 +715,10 @@ rspamd_http_decrypt_message (struct rspamd_http_connection *conn, enum rspamd_cryptobox_mode mode; mode = rspamd_keypair_alg (priv->local_key); - nonce = msg->body->str; - m = msg->body->str + rspamd_cryptobox_nonce_bytes (mode) + + nonce = msg->body_buf.str; + m = msg->body_buf.str + rspamd_cryptobox_nonce_bytes (mode) + rspamd_cryptobox_mac_bytes (mode); - dec_len = msg->body->len - rspamd_cryptobox_nonce_bytes (mode) - + dec_len = msg->body_buf.len - rspamd_cryptobox_nonce_bytes (mode) - rspamd_cryptobox_mac_bytes (mode); if ((nm = rspamd_pubkey_get_nm (peer_key)) == NULL) { @@ -727,7 +732,8 @@ rspamd_http_decrypt_message (struct rspamd_http_connection *conn, } /* Cleanup message */ - DL_FOREACH_SAFE (msg->headers, hdr, hdrtmp) { + HASH_ITER (hh, msg->headers, hdr, hdrtmp) { + HASH_DELETE (hh, msg->headers, hdr); rspamd_fstring_free (hdr->combined); g_slice_free1 (sizeof (*hdr->name), hdr->name); g_slice_free1 (sizeof (*hdr->value), hdr->value); @@ -781,7 +787,7 @@ rspamd_http_on_message_complete (http_parser * parser) mode = rspamd_keypair_alg (priv->local_key); if (priv->local_key == NULL || priv->msg->peer_key == NULL || - priv->msg->body->len < rspamd_cryptobox_nonce_bytes (mode) + + priv->msg->body_buf.len < rspamd_cryptobox_nonce_bytes (mode) + rspamd_cryptobox_mac_bytes (mode)) { msg_err ("cannot decrypt message"); return -1; @@ -831,9 +837,15 @@ static void rspamd_http_simple_client_helper (struct rspamd_http_connection *conn) { struct event_base *base; + struct rspamd_http_connection_private *priv; + gpointer ssl; + priv = conn->priv; base = conn->priv->ev.ev_base; + ssl = priv->ssl; + priv->ssl = NULL; rspamd_http_connection_reset (conn); + priv->ssl = ssl; /* Plan read message */ rspamd_http_connection_read_message (conn, conn->ud, conn->fd, conn->priv->ptv, base); @@ -886,16 +898,23 @@ rspamd_http_write_helper (struct rspamd_http_connection *conn) #ifdef MSG_NOSIGNAL flags = MSG_NOSIGNAL; #endif - r = sendmsg (conn->fd, &msg, flags); + + if (priv->ssl) { + r = rspamd_ssl_writev (priv->ssl, msg.msg_iov, msg.msg_iovlen); + } + else { + r = sendmsg (conn->fd, &msg, flags); + } if (r == -1) { - err = - g_error_new (HTTP_ERROR, errno, "IO write error: %s", strerror ( - errno)); - rspamd_http_connection_ref (conn); - conn->error_handler (conn, err); - rspamd_http_connection_unref (conn); - g_error_free (err); + if (!priv->ssl) { + err = g_error_new (HTTP_ERROR, errno, "IO write error: %s", strerror (errno)); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + } + return; } else { @@ -936,7 +955,13 @@ rspamd_http_try_read (gint fd, rspamd_fstring_t *buf; buf = priv->buf->data; - r = read (fd, buf->str, buf->allocated); + + if (priv->ssl) { + r = rspamd_ssl_read (priv->ssl, buf->str, buf->allocated); + } + else { + r = read (fd, buf->str, buf->allocated); + } if (r <= 0) { return r; @@ -949,6 +974,16 @@ rspamd_http_try_read (gint fd, } static void +rspamd_http_ssl_err_handler (gpointer ud, GError *err) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; + + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); +} + +static void rspamd_http_event_handler (int fd, short what, gpointer ud) { struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; @@ -998,12 +1033,14 @@ rspamd_http_event_handler (int fd, short what, gpointer ud) return; } else { - err = g_error_new (HTTP_ERROR, - errno, - "IO read error: %s", - strerror (errno)); - conn->error_handler (conn, err); - g_error_free (err); + if (!priv->ssl) { + err = g_error_new (HTTP_ERROR, + errno, + "IO read error: %s", + strerror (errno)); + conn->error_handler (conn, err); + g_error_free (err); + } REF_RELEASE (pbuf); rspamd_http_connection_unref (conn); @@ -1081,39 +1118,42 @@ rspamd_http_parser_reset (struct rspamd_http_connection *conn) } struct rspamd_http_connection * -rspamd_http_connection_new (rspamd_http_body_handler_t body_handler, - rspamd_http_error_handler_t error_handler, - rspamd_http_finish_handler_t finish_handler, - unsigned opts, - enum rspamd_http_connection_type type, - struct rspamd_keypair_cache *cache) +rspamd_http_connection_new ( + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts, + enum rspamd_http_connection_type type, + struct rspamd_keypair_cache *cache, + gpointer ssl_ctx) { - struct rspamd_http_connection *new; + struct rspamd_http_connection *conn; struct rspamd_http_connection_private *priv; if (error_handler == NULL || finish_handler == NULL) { return NULL; } - new = g_slice_alloc0 (sizeof (struct rspamd_http_connection)); - new->opts = opts; - new->type = type; - new->body_handler = body_handler; - new->error_handler = error_handler; - new->finish_handler = finish_handler; - new->fd = -1; - new->ref = 1; - new->finished = FALSE; - new->cache = cache; + conn = g_slice_alloc0 (sizeof (struct rspamd_http_connection)); + conn->opts = opts; + conn->type = type; + conn->body_handler = body_handler; + conn->error_handler = error_handler; + conn->finish_handler = finish_handler; + conn->fd = -1; + conn->ref = 1; + conn->finished = FALSE; + conn->cache = cache; /* Init priv */ priv = g_slice_alloc0 (sizeof (struct rspamd_http_connection_private)); - new->priv = priv; + conn->priv = priv; + priv->ssl_ctx = ssl_ctx; - rspamd_http_parser_reset (new); - priv->parser.data = new; + rspamd_http_parser_reset (conn); + priv->parser.data = conn; - return new; + return conn; } void @@ -1172,13 +1212,6 @@ rspamd_http_connection_steal_msg (struct rspamd_http_connection *conn) msg->peer_key = NULL; } priv->msg = NULL; - - /* We also might need to adjust body/body_buf */ - if (msg->body_buf.begin > msg->body->str) { - memmove (msg->body->str, msg->body_buf.begin, msg->body_buf.len); - msg->body->len = msg->body_buf.len; - msg->body_buf.begin = msg->body->str; - } } return msg; @@ -1189,18 +1222,66 @@ rspamd_http_connection_copy_msg (struct rspamd_http_connection *conn) { struct rspamd_http_connection_private *priv; struct rspamd_http_message *new_msg, *msg; - struct rspamd_http_header *hdr, *nhdr; + struct rspamd_http_header *hdr, *nhdr, *thdr; + const gchar *old_body; + gsize old_len; + struct stat st; + union _rspamd_storage_u *storage; priv = conn->priv; msg = priv->msg; new_msg = rspamd_http_new_message (msg->type); + new_msg->flags = msg->flags; + + if (msg->body_buf.len > 0) { + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + /* Avoid copying by just maping a shared segment */ + new_msg->flags |= RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE; - if (msg->body) { - new_msg->body = rspamd_fstring_new_init (msg->body->str, - msg->body->len); - new_msg->body_buf.begin = msg->body_buf.begin; - new_msg->body_buf.len = msg->body_buf.len; + storage = &new_msg->body_buf.c; + storage->shared.shm_fd = dup (msg->body_buf.c.shared.shm_fd); + + if (storage->shared.shm_fd == -1) { + rspamd_http_message_free (new_msg); + return NULL; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + rspamd_http_message_free (new_msg); + return NULL; + } + + /* We don't own segment, so do not try to touch it */ + + if (msg->body_buf.c.shared.name) { + storage->shared.name = msg->body_buf.c.shared.name; + REF_RETAIN (storage->shared.name); + } + + new_msg->body_buf.str = mmap (NULL, st.st_size, + PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (new_msg->body_buf.str == MAP_FAILED) { + rspamd_http_message_free (new_msg); + return NULL; + } + + new_msg->body_buf.begin = new_msg->body_buf.str; + new_msg->body_buf.len = msg->body_buf.len; + new_msg->body_buf.begin = new_msg->body_buf.str + + (msg->body_buf.begin - msg->body_buf.str); + } + else { + old_body = rspamd_http_message_get_body (msg, &old_len); + + if (!rspamd_http_message_set_body (new_msg, old_body, old_len)) { + rspamd_http_message_free (new_msg); + return NULL; + } + } } if (msg->url) { @@ -1222,10 +1303,9 @@ rspamd_http_connection_copy_msg (struct rspamd_http_connection *conn) new_msg->method = msg->method; new_msg->port = msg->port; new_msg->date = msg->date; - new_msg->flags = msg->flags; new_msg->last_modified = msg->last_modified; - LL_FOREACH (msg->headers, hdr) { + HASH_ITER (hh, msg->headers, hdr, thdr) { nhdr = g_slice_alloc (sizeof (struct rspamd_http_header)); nhdr->name = g_slice_alloc (sizeof (*nhdr->name)); nhdr->value = g_slice_alloc (sizeof (*nhdr->value)); @@ -1238,7 +1318,8 @@ rspamd_http_connection_copy_msg (struct rspamd_http_connection *conn) (hdr->value->begin - hdr->combined->str); nhdr->value->len = hdr->value->len; - DL_APPEND (new_msg->headers, nhdr); + HASH_ADD_KEYPTR (hh, new_msg->headers, nhdr->name->begin, + nhdr->name->len, nhdr); } return new_msg; @@ -1254,6 +1335,11 @@ rspamd_http_connection_free (struct rspamd_http_connection *conn) if (priv != NULL) { rspamd_http_connection_reset (conn); + if (priv->ssl) { + rspamd_ssl_connection_free (priv->ssl); + priv->ssl = NULL; + } + if (priv->local_key) { rspamd_keypair_unref (priv->local_key); } @@ -1267,9 +1353,10 @@ rspamd_http_connection_free (struct rspamd_http_connection *conn) g_slice_free1 (sizeof (struct rspamd_http_connection), conn); } -void -rspamd_http_connection_read_message (struct rspamd_http_connection *conn, - gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +static void +rspamd_http_connection_read_message_common (struct rspamd_http_connection *conn, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base, + gint flags) { struct rspamd_http_connection_private *priv = conn->priv; struct rspamd_http_message *req; @@ -1279,6 +1366,7 @@ rspamd_http_connection_read_message (struct rspamd_http_connection *conn, req = rspamd_http_new_message ( conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); priv->msg = req; + req->flags = flags; if (priv->peer_key) { priv->msg->peer_key = priv->peer_key; @@ -1313,6 +1401,21 @@ rspamd_http_connection_read_message (struct rspamd_http_connection *conn, event_add (&priv->ev, priv->ptv); } +void +rspamd_http_connection_read_message (struct rspamd_http_connection *conn, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + rspamd_http_connection_read_message_common (conn, ud, fd, timeout, base, 0); +} + +void +rspamd_http_connection_read_message_shared (struct rspamd_http_connection *conn, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + rspamd_http_connection_read_message_common (conn, ud, fd, timeout, base, + RSPAMD_HTTP_FLAG_SHMEM); +} + static void rspamd_http_connection_encrypt_message ( struct rspamd_http_connection *conn, @@ -1333,7 +1436,7 @@ rspamd_http_connection_encrypt_message ( const guchar *nm; gint i, cnt; guint outlen; - struct rspamd_http_header *hdr; + struct rspamd_http_header *hdr, *htmp; enum rspamd_cryptobox_mode mode; mode = rspamd_keypair_alg (priv->local_key); @@ -1368,7 +1471,7 @@ rspamd_http_connection_encrypt_message ( } - LL_FOREACH (msg->headers, hdr) { + HASH_ITER (hh, msg->headers, hdr, htmp) { segments[i].data = hdr->combined->str; segments[i++].len = hdr->combined->len; } @@ -1416,13 +1519,30 @@ rspamd_http_connection_encrypt_message ( g_free (segments); } -void -rspamd_http_connection_write_message (struct rspamd_http_connection *conn, - struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type, - gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +static void +rspamd_http_detach_shared (struct rspamd_http_message *msg) +{ + rspamd_fstring_t *cpy_str; + + if (msg->body_buf.c.shared.shm_fd != -1) { + close (msg->body_buf.c.shared.shm_fd); + msg->body_buf.c.shared.shm_fd = -1; + } + + REF_RELEASE (msg->body_buf.c.shared.name); + + cpy_str = rspamd_fstring_new_init (msg->body_buf.begin, msg->body_buf.len); + rspamd_http_message_set_body_from_fstring_steal (msg, cpy_str); +} + +static void +rspamd_http_connection_write_message_common (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base, + gboolean allow_shared) { struct rspamd_http_connection_private *priv = conn->priv; - struct rspamd_http_header *hdr; + struct rspamd_http_header *hdr, *htmp; struct tm t, *ptm; gchar datebuf[64], repbuf[512], *pbody; gint i, hdrcount, meth_len = 0, preludelen = 0; @@ -1433,6 +1553,7 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, guchar *np = NULL, *mp = NULL, *meth_pos = NULL; struct rspamd_cryptobox_pubkey *peer_key = NULL; enum rspamd_cryptobox_mode mode; + GError *err; conn->fd = fd; conn->ud = ud; @@ -1466,17 +1587,44 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, } } + if (encrypted && (msg->flags & + (RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE|RSPAMD_HTTP_FLAG_SHMEM))) { + /* We cannot use immutable body to encrypt message in place */ + allow_shared = FALSE; + rspamd_http_detach_shared (msg); + } + + if (allow_shared) { + if (!(msg->flags & RSPAMD_HTTP_FLAG_SHMEM) || + msg->body_buf.c.shared.name == NULL) { + allow_shared = FALSE; + } + else { + /* Insert new headers */ + rspamd_http_message_add_header (msg, "Shm", + msg->body_buf.c.shared.name->shm_name); + rspamd_snprintf (datebuf, sizeof (datebuf), "%d", + (int)(msg->body_buf.begin - msg->body_buf.str)); + rspamd_http_message_add_header (msg, "Shm-Offset", + datebuf); + rspamd_snprintf (datebuf, sizeof (datebuf), "%z", + msg->body_buf.len); + rspamd_http_message_add_header (msg, "Shm-Length", + datebuf); + } + } + if (encrypted) { mode = rspamd_keypair_alg (priv->local_key); - if (msg->body == NULL || msg->body->len == 0) { + if (msg->body_buf.len == 0) { pbody = NULL; bodylen = 0; msg->method = HTTP_GET; } else { - pbody = msg->body->str; - bodylen = msg->body->len; + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; msg->method = HTTP_POST; } @@ -1534,22 +1682,23 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, } else { if (msg->method < HTTP_SYMBOLS) { - if (msg->body == NULL || msg->body->len == 0) { + if (msg->body_buf.len == 0 || allow_shared) { pbody = NULL; bodylen = 0; priv->outlen = 2; msg->method = HTTP_GET; } else { - pbody = msg->body->str; - bodylen = msg->body->len; + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; priv->outlen = 3; msg->method = HTTP_POST; } } - else if (msg->body != NULL) { - pbody = msg->body->str; - bodylen = msg->body->len; + else if (msg->body_buf.len > 0) { + allow_shared = FALSE; + pbody = (gchar *)msg->body_buf.begin; + bodylen = msg->body_buf.len; priv->outlen = 2; } else { @@ -1563,7 +1712,7 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, priv->wr_total = bodylen + buf->len + 2; hdrcount = 0; - DL_FOREACH (msg->headers, hdr) { + HASH_ITER (hh, msg->headers, hdr, htmp) { /* <name: value\r\n> */ priv->wr_total += hdr->combined->len; enclen += hdr->combined->len; @@ -1782,7 +1931,7 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, } else { i = 1; - LL_FOREACH (msg->headers, hdr) { + HASH_ITER (hh, msg->headers, hdr, htmp) { priv->out[i].iov_base = hdr->combined->str; priv->out[i++].iov_len = hdr->combined->len; } @@ -1796,28 +1945,79 @@ rspamd_http_connection_write_message (struct rspamd_http_connection *conn, } if (pbody != NULL) { - - if (msg->body_buf.begin == NULL && msg->body_buf.len == 0) { - msg->body_buf.begin = msg->body->str; - } - priv->out[i].iov_base = pbody; priv->out[i++].iov_len = bodylen; } } + priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED; + if (base != NULL && event_get_base (&priv->ev) == base) { event_del (&priv->ev); } - event_set (&priv->ev, fd, EV_WRITE, rspamd_http_event_handler, conn); + if (msg->flags & RSPAMD_HTTP_FLAG_SSL) { + if (base != NULL) { + event_base_set (base, &priv->ev); + } + if (!priv->ssl_ctx) { + err = g_error_new (HTTP_ERROR, errno, "ssl message requested " + "with no ssl ctx"); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return; + } + else { + if (priv->ssl) { + /* Cleanup the existing connection */ + rspamd_ssl_connection_free (priv->ssl); + } + + priv->ssl = rspamd_ssl_connection_new (priv->ssl_ctx, base); + g_assert (priv->ssl != NULL); + + if (!rspamd_ssl_connect_fd (priv->ssl, fd, host, &priv->ev, + priv->ptv, rspamd_http_event_handler, + rspamd_http_ssl_err_handler, conn)) { + + err = g_error_new (HTTP_ERROR, errno, "ssl connection error"); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return; + } + } + } + else { + event_set (&priv->ev, fd, EV_WRITE, rspamd_http_event_handler, conn); + + if (base != NULL) { + event_base_set (base, &priv->ev); + } - if (base != NULL) { - event_base_set (base, &priv->ev); + event_add (&priv->ev, priv->ptv); } +} - priv->flags &= ~RSPAMD_HTTP_CONN_FLAG_RESETED; - event_add (&priv->ev, priv->ptv); +void +rspamd_http_connection_write_message (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + rspamd_http_connection_write_message_common (conn, msg, host, mime_type, + ud, fd, timeout, base, FALSE); +} + +void +rspamd_http_connection_write_message_shared (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + rspamd_http_connection_write_message_common (conn, msg, host, mime_type, + ud, fd, timeout, base, TRUE); } struct rspamd_http_message * @@ -1825,7 +2025,8 @@ rspamd_http_new_message (enum http_parser_type type) { struct rspamd_http_message *new; - new = g_slice_alloc (sizeof (struct rspamd_http_message)); + new = g_slice_alloc0 (sizeof (struct rspamd_http_message)); + if (type == HTTP_REQUEST) { new->url = rspamd_fstring_new (); } @@ -1834,17 +2035,9 @@ rspamd_http_new_message (enum http_parser_type type) new->code = 200; } - new->headers = NULL; - new->date = 0; - new->body = NULL; - memset (&new->body_buf, 0, sizeof (new->body_buf)); - new->status = NULL; - new->host = NULL; new->port = 80; new->type = type; new->method = HTTP_GET; - new->peer_key = NULL; - new->flags = 0; return new; } @@ -1856,6 +2049,7 @@ rspamd_http_message_from_url (const gchar *url) struct rspamd_http_message *msg; const gchar *host, *path; size_t pathlen, urllen; + guint flags = 0; if (url == NULL) { return NULL; @@ -1872,6 +2066,14 @@ rspamd_http_message_from_url (const gchar *url) msg_warn ("no host argument in URL: %s", url); return NULL; } + + if ((pu.field_set & (1 << UF_SCHEMA))) { + if (pu.field_data[UF_SCHEMA].len == sizeof ("https") - 1 && + memcmp (url + pu.field_data[UF_SCHEMA].off, "https", 5) == 0) { + flags |= RSPAMD_HTTP_FLAG_SSL; + } + } + if ((pu.field_set & (1 << UF_PATH)) == 0) { path = "/"; pathlen = 1; @@ -1883,13 +2085,19 @@ rspamd_http_message_from_url (const gchar *url) msg = rspamd_http_new_message (HTTP_REQUEST); host = url + pu.field_data[UF_HOST].off; + msg->flags = flags; if ((pu.field_set & (1 << UF_PORT)) != 0) { msg->port = pu.port; } else { /* XXX: magic constant */ - msg->port = 80; + if (flags & RSPAMD_HTTP_FLAG_SSL) { + msg->port = 443; + } + else { + msg->port = 80; + } } msg->host = rspamd_fstring_new_init (host, pu.field_data[UF_HOST].len); @@ -1898,21 +2106,306 @@ rspamd_http_message_from_url (const gchar *url) return msg; } +const gchar * +rspamd_http_message_get_body (struct rspamd_http_message *msg, + gsize *blen) +{ + const gchar *ret = NULL; + + if (msg->body_buf.len > 0) { + ret = msg->body_buf.begin; + } + + if (blen) { + *blen = msg->body_buf.len; + } + + return ret; +} + +static void +rspamd_http_shname_dtor (void *p) +{ + struct _rspamd_storage_shmem_s *n = p; + + shm_unlink (n->shm_name); + g_free (n->shm_name); + g_slice_free1 (sizeof (*n), n); +} + +void * +rspamd_http_message_shmem_ref (struct rspamd_http_message *msg) +{ + if ((msg->flags & RSPAMD_HTTP_FLAG_SHMEM) && msg->body_buf.c.shared.name) { + REF_RETAIN (msg->body_buf.c.shared.name); + return msg->body_buf.c.shared.name; + } + + return NULL; +} + +void +rspamd_http_message_shmem_unref (void *p) +{ + struct _rspamd_storage_shmem_s *n = p; + + if (n) { + REF_RELEASE (n); + } +} + +gboolean +rspamd_http_message_set_body (struct rspamd_http_message *msg, + const gchar *data, gsize len) +{ + union _rspamd_storage_u *storage; + storage = &msg->body_buf.c; + + rspamd_http_message_storage_cleanup (msg); + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + storage->shared.name = g_slice_alloc (sizeof (*storage->shared.name)); + REF_INIT_RETAIN (storage->shared.name, rspamd_http_shname_dtor); + storage->shared.name->shm_name = g_strdup ("/rhm.XXXXXXXXXXXXXXXXXXXX"); + storage->shared.shm_fd = rspamd_shmem_mkstemp (storage->shared.name->shm_name); + + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (len != 0 && len != ULLONG_MAX) { + if (ftruncate (storage->shared.shm_fd, len) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, len, + PROT_WRITE|PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + msg->body_buf.begin = msg->body_buf.str; + + if (data != NULL) { + memcpy (msg->body_buf.str, data, len); + msg->body_buf.len = len; + } + } + else { + msg->body_buf.len = 0; + msg->body_buf.begin = NULL; + msg->body_buf.str = NULL; + } + } + else { + if (len != 0 && len != ULLONG_MAX) { + if (data == NULL) { + storage->normal = rspamd_fstring_sized_new (len); + msg->body_buf.len = 0; + } + else { + storage->normal = rspamd_fstring_new_init (data, len); + msg->body_buf.len = len; + } + } + else { + storage->normal = rspamd_fstring_new (); + } + + msg->body_buf.begin = storage->normal->str; + msg->body_buf.str = storage->normal->str; + } + + return TRUE; +} + +gboolean +rspamd_http_message_set_body_from_fd (struct rspamd_http_message *msg, + gint fd) +{ + union _rspamd_storage_u *storage; + struct stat st; + + rspamd_http_message_storage_cleanup (msg); + + storage = &msg->body_buf.c; + msg->flags |= RSPAMD_HTTP_FLAG_SHMEM|RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE; + + storage->shared.shm_fd = dup (fd); + msg->body_buf.str = MAP_FAILED; + + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, st.st_size, + PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.len = st.st_size; + + return TRUE; +} + +gboolean +rspamd_http_message_set_body_from_fstring_steal (struct rspamd_http_message *msg, + rspamd_fstring_t *fstr) +{ + union _rspamd_storage_u *storage; + + rspamd_http_message_storage_cleanup (msg); + + storage = &msg->body_buf.c; + msg->flags &= ~(RSPAMD_HTTP_FLAG_SHMEM|RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE); + + storage->normal = fstr; + msg->body_buf.str = fstr->str; + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.len = fstr->len; + + return TRUE; +} + +gboolean +rspamd_http_message_set_body_from_fstring_copy (struct rspamd_http_message *msg, + const rspamd_fstring_t *fstr) +{ + union _rspamd_storage_u *storage; + + rspamd_http_message_storage_cleanup (msg); + + storage = &msg->body_buf.c; + msg->flags &= ~(RSPAMD_HTTP_FLAG_SHMEM|RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE); + + storage->normal = rspamd_fstring_new_init (fstr->str, fstr->len); + msg->body_buf.str = storage->normal->str; + msg->body_buf.begin = msg->body_buf.str; + msg->body_buf.len = storage->normal->len; + + return TRUE; +} + +gboolean +rspamd_http_message_append_body (struct rspamd_http_message *msg, + const gchar *data, gsize len) +{ + struct stat st; + union _rspamd_storage_u *storage; + gsize newlen; + + storage = &msg->body_buf.c; + + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + if (storage->shared.shm_fd == -1) { + return FALSE; + } + + if (fstat (storage->shared.shm_fd, &st) == -1) { + return FALSE; + } + + /* Check if we need to grow */ + if (st.st_size < msg->body_buf.len + len) { + /* Need to grow */ + newlen = rspamd_fstring_suggest_size (msg->body_buf.len, st.st_size, + len); + /* Unmap as we need another size of segment */ + if (msg->body_buf.str != MAP_FAILED) { + munmap (msg->body_buf.str, st.st_size); + } + + if (ftruncate (storage->shared.shm_fd, newlen) == -1) { + return FALSE; + } + + msg->body_buf.str = mmap (NULL, newlen, + PROT_WRITE|PROT_READ, MAP_SHARED, + storage->shared.shm_fd, 0); + if (msg->body_buf.str == MAP_FAILED) { + return FALSE; + } + } + + memcpy (msg->body_buf.str + msg->body_buf.len, data, len); + msg->body_buf.len += len; + msg->body_buf.begin = msg->body_buf.str; + } + else { + storage->normal = rspamd_fstring_append (storage->normal, data, len); + + /* Append might cause realloc */ + msg->body_buf.begin = storage->normal->str; + msg->body_buf.len = storage->normal->len; + msg->body_buf.str = storage->normal->str; + } + + return TRUE; +} + +static void +rspamd_http_message_storage_cleanup (struct rspamd_http_message *msg) +{ + union _rspamd_storage_u *storage; + struct stat st; + + if (msg->body_buf.len != 0) { + if (msg->flags & RSPAMD_HTTP_FLAG_SHMEM) { + storage = &msg->body_buf.c; + + if (storage->shared.shm_fd != -1) { + g_assert (fstat (storage->shared.shm_fd, &st) != -1); + + if (msg->body_buf.str != MAP_FAILED) { + munmap (msg->body_buf.str, st.st_size); + } + + close (storage->shared.shm_fd); + } + + if (storage->shared.name != NULL) { + REF_RELEASE (storage->shared.name); + } + + storage->shared.shm_fd = -1; + msg->body_buf.str = MAP_FAILED; + } + else { + rspamd_fstring_free (msg->body_buf.c.normal); + msg->body_buf.c.normal = NULL; + } + + msg->body_buf.len = 0; + } +} + void rspamd_http_message_free (struct rspamd_http_message *msg) { - struct rspamd_http_header *hdr, *tmp_hdr; + struct rspamd_http_header *hdr, *htmp; - LL_FOREACH_SAFE (msg->headers, hdr, tmp_hdr) - { + + HASH_ITER (hh, msg->headers, hdr, htmp) { + HASH_DEL (msg->headers, hdr); rspamd_fstring_free (hdr->combined); g_slice_free1 (sizeof (*hdr->name), hdr->name); g_slice_free1 (sizeof (*hdr->value), hdr->value); g_slice_free1 (sizeof (struct rspamd_http_header), hdr); } - if (msg->body != NULL) { - rspamd_fstring_free (msg->body); - } + + + rspamd_http_message_storage_cleanup (msg); + if (msg->url != NULL) { rspamd_fstring_free (msg->url); } @@ -1949,7 +2442,7 @@ rspamd_http_message_add_header (struct rspamd_http_message *msg, hdr->name->len = nlen; hdr->value->begin = hdr->combined->str + nlen + 2; hdr->value->len = vlen; - DL_APPEND (msg->headers, hdr); + HASH_ADD_KEYPTR (hh, msg->headers, hdr->name->begin, hdr->name->len, hdr); } } @@ -1959,46 +2452,37 @@ rspamd_http_message_find_header (struct rspamd_http_message *msg, { struct rspamd_http_header *hdr; const rspamd_ftok_t *res = NULL; - rspamd_ftok_t cmp; guint slen = strlen (name); if (msg != NULL) { - cmp.begin = name; - cmp.len = slen; + HASH_FIND (hh, msg->headers, name, slen, hdr); - LL_FOREACH (msg->headers, hdr) { - if (rspamd_ftok_casecmp (hdr->name, &cmp) == 0) { - res = hdr->value; - break; - } + if (hdr) { + res = hdr->value; } } return res; } -gboolean rspamd_http_message_remove_header (struct rspamd_http_message *msg, +gboolean +rspamd_http_message_remove_header (struct rspamd_http_message *msg, const gchar *name) { - struct rspamd_http_header *hdr, *tmp; + struct rspamd_http_header *hdr; gboolean res = FALSE; guint slen = strlen (name); - rspamd_ftok_t cmp; if (msg != NULL) { - cmp.begin = name; - cmp.len = slen; - - DL_FOREACH_SAFE (msg->headers, hdr, tmp) { - if (rspamd_ftok_casecmp (hdr->name, &cmp) == 0) { - res = TRUE; - DL_DELETE (msg->headers, hdr); - - rspamd_fstring_free (hdr->combined); - g_slice_free1 (sizeof (*hdr->value), hdr->value); - g_slice_free1 (sizeof (*hdr->name), hdr->name); - g_slice_free1 (sizeof (*hdr), hdr); - } + HASH_FIND (hh, msg->headers, name, slen, hdr); + + if (hdr) { + HASH_DEL (msg->headers, hdr); + res = TRUE; + rspamd_fstring_free (hdr->combined); + g_slice_free1 (sizeof (*hdr->value), hdr->value); + g_slice_free1 (sizeof (*hdr->name), hdr->name); + g_slice_free1 (sizeof (*hdr), hdr); } } @@ -2046,7 +2530,7 @@ rspamd_http_router_error_handler (struct rspamd_http_connection *conn, msg = rspamd_http_new_message (HTTP_RESPONSE); msg->date = time (NULL); msg->code = err->code; - msg->body = rspamd_fstring_new_init (err->message, strlen (err->message)); + rspamd_http_message_set_body (msg, err->message, strlen (err->message)); rspamd_http_connection_reset (entry->conn); rspamd_http_connection_write_message (entry->conn, msg, @@ -2160,14 +2644,8 @@ rspamd_http_router_try_file (struct rspamd_http_connection_entry *entry, reply_msg->date = time (NULL); reply_msg->code = 200; - reply_msg->body = rspamd_fstring_sized_new (st.st_size); - reply_msg->body->len = st.st_size; - reply_msg->body_buf.len = st.st_size; - reply_msg->body_buf.begin = reply_msg->body->str; - - if (read (fd, reply_msg->body->str, st.st_size) != st.st_size) { + if (!rspamd_http_message_set_body_from_fd (reply_msg, fd)) { close (fd); - rspamd_http_message_free (reply_msg); return FALSE; } @@ -2239,7 +2717,7 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, err_msg = rspamd_http_new_message (HTTP_RESPONSE); err_msg->date = time (NULL); err_msg->code = err->code; - err_msg->body = rspamd_fstring_new_init (err->message, + rspamd_http_message_set_body (err_msg, err->message, strlen (err->message)); rspamd_http_connection_reset (entry->conn); rspamd_http_connection_write_message (entry->conn, @@ -2349,7 +2827,9 @@ rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, rspamd_http_router_error_handler, rspamd_http_router_finish_handler, 0, - RSPAMD_HTTP_SERVER, router->cache); + RSPAMD_HTTP_SERVER, + router->cache, + NULL); if (router->key) { rspamd_http_connection_set_key (conn->conn, router->key); diff --git a/src/libutil/http.h b/src/libutil/http.h index d9fb73b82..e85e7ccee 100644 --- a/src/libutil/http.h +++ b/src/libutil/http.h @@ -34,41 +34,29 @@ enum rspamd_http_connection_type { RSPAMD_HTTP_CLIENT }; -/** - * HTTP header structure - */ -struct rspamd_http_header { - rspamd_ftok_t *name; - rspamd_ftok_t *value; - rspamd_fstring_t *combined; - struct rspamd_http_header *next, *prev; -}; +struct rspamd_http_header; +struct rspamd_http_message; +struct rspamd_http_connection_private; +struct rspamd_http_connection; +struct rspamd_http_connection_router; +struct rspamd_http_connection_entry; /** * Legacy spamc protocol */ -#define RSPAMD_HTTP_FLAG_SPAMC 1 << 1 - -/** - * HTTP message structure, used for requests and replies - */ -struct rspamd_http_message { - rspamd_fstring_t *url; - rspamd_fstring_t *host; - rspamd_fstring_t *status; - struct rspamd_http_header *headers; - rspamd_fstring_t *body; - rspamd_ftok_t body_buf; - struct rspamd_cryptobox_pubkey *peer_key; - time_t date; - time_t last_modified; - unsigned port; - enum http_parser_type type; - gint code; - enum http_method method; - gint flags; -}; - +#define RSPAMD_HTTP_FLAG_SPAMC (1 << 0) +/** + * Store body of the message in a shared memory segment + */ +#define RSPAMD_HTTP_FLAG_SHMEM (1 << 2) +/** + * Store body of the message in an immutable shared memory segment + */ +#define RSPAMD_HTTP_FLAG_SHMEM_IMMUTABLE (1 << 3) +/** + * Use tls for this message + */ +#define RSPAMD_HTTP_FLAG_SSL (1 << 4) /** * Options for HTTP connection @@ -79,30 +67,25 @@ enum rspamd_http_options { RSPAMD_HTTP_CLIENT_ENCRYPTED = 0x4 /**< Encrypt data for client */ }; -struct rspamd_http_connection_private; -struct rspamd_http_connection; -struct rspamd_http_connection_router; -struct rspamd_http_connection_entry; - typedef int (*rspamd_http_body_handler_t) (struct rspamd_http_connection *conn, - struct rspamd_http_message *msg, - const gchar *chunk, - gsize len); + struct rspamd_http_message *msg, + const gchar *chunk, + gsize len); typedef void (*rspamd_http_error_handler_t) (struct rspamd_http_connection *conn, - GError *err); + GError *err); typedef int (*rspamd_http_finish_handler_t) (struct rspamd_http_connection *conn, - struct rspamd_http_message *msg); + struct rspamd_http_message *msg); typedef int (*rspamd_http_router_handler_t) (struct rspamd_http_connection_entry - *conn_ent, - struct rspamd_http_message *msg); + *conn_ent, + struct rspamd_http_message *msg); typedef void (*rspamd_http_router_error_handler_t) (struct - rspamd_http_connection_entry *conn_ent, - GError *err); + rspamd_http_connection_entry *conn_ent, + GError *err); typedef void (*rspamd_http_router_finish_handler_t) (struct - rspamd_http_connection_entry *conn_ent); + rspamd_http_connection_entry *conn_ent); /** * HTTP connection structure @@ -148,13 +131,14 @@ struct rspamd_http_connection_router { * @param opts options * @return new connection structure */ -struct rspamd_http_connection * rspamd_http_connection_new ( - rspamd_http_body_handler_t body_handler, - rspamd_http_error_handler_t error_handler, - rspamd_http_finish_handler_t finish_handler, - unsigned opts, - enum rspamd_http_connection_type type, - struct rspamd_keypair_cache *cache); +struct rspamd_http_connection *rspamd_http_connection_new ( + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + unsigned opts, + enum rspamd_http_connection_type type, + struct rspamd_keypair_cache *cache, + gpointer ssl_ctx); /** @@ -187,11 +171,18 @@ gboolean rspamd_http_connection_is_encrypted (struct rspamd_http_connection *con * @param fd fd to read/write */ void rspamd_http_connection_read_message ( - struct rspamd_http_connection *conn, - gpointer ud, - gint fd, - struct timeval *timeout, - struct event_base *base); + struct rspamd_http_connection *conn, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); + +void rspamd_http_connection_read_message_shared ( + struct rspamd_http_connection *conn, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); /** * Send reply using initialised connection @@ -201,14 +192,24 @@ void rspamd_http_connection_read_message ( * @param fd fd to read/write */ void rspamd_http_connection_write_message ( - struct rspamd_http_connection *conn, - struct rspamd_http_message *msg, - const gchar *host, - const gchar *mime_type, - gpointer ud, - gint fd, - struct timeval *timeout, - struct event_base *base); + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); + +void rspamd_http_connection_write_message_shared ( + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); /** * Free connection structure @@ -277,14 +278,70 @@ struct rspamd_http_message * rspamd_http_new_message (enum http_parser_type type struct rspamd_http_message* rspamd_http_message_from_url (const gchar *url); /** + * Returns body for a message + * @param msg + * @param blen pointer where to save body length + * @return pointer to body start + */ +const gchar *rspamd_http_message_get_body (struct rspamd_http_message *msg, + gsize *blen); + +/** + * Set message's body from the string + * @param msg + * @param data + * @param len + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body (struct rspamd_http_message *msg, + const gchar *data, gsize len); + +/** + * Maps fd as message's body + * @param msg + * @param fd + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body_from_fd (struct rspamd_http_message *msg, + gint fd); + +/** + * Uses rspamd_fstring_t as message's body, string is consumed by this operation + * @param msg + * @param fstr + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body_from_fstring_steal (struct rspamd_http_message *msg, + rspamd_fstring_t *fstr); + +/** + * Uses rspamd_fstring_t as message's body, string is copied by this operation + * @param msg + * @param fstr + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_set_body_from_fstring_copy (struct rspamd_http_message *msg, + const rspamd_fstring_t *fstr); + +/** + * Appends data to message's body + * @param msg + * @param data + * @param len + * @return TRUE if a message's body has been set + */ +gboolean rspamd_http_message_append_body (struct rspamd_http_message *msg, + const gchar *data, gsize len); + +/** * Append a header to reply * @param rep * @param name * @param value */ void rspamd_http_message_add_header (struct rspamd_http_message *msg, - const gchar *name, - const gchar *value); + const gchar *name, + const gchar *value); /** * Search for a specified header in message @@ -302,7 +359,7 @@ const rspamd_ftok_t * rspamd_http_message_find_header ( * @return */ gboolean rspamd_http_message_remove_header (struct rspamd_http_message *msg, - const gchar *name); + const gchar *name); /** * Free HTTP message @@ -311,6 +368,17 @@ gboolean rspamd_http_message_remove_header (struct rspamd_http_message *msg, void rspamd_http_message_free (struct rspamd_http_message *msg); /** + * Increase refcount for shared file (if any) to prevent early memory unlinking + * @param msg + */ +void* rspamd_http_message_shmem_ref (struct rspamd_http_message *msg); +/** + * Decrease external ref for shmem segment associated with a message + * @param msg + */ +void rspamd_http_message_shmem_unref (void *p); + +/** * Parse HTTP date header and return it as time_t * @param header HTTP date header * @param len length of header @@ -327,12 +395,12 @@ time_t rspamd_http_parse_date (const gchar *header, gsize len); * @return */ struct rspamd_http_connection_router * rspamd_http_router_new ( - rspamd_http_router_error_handler_t eh, - rspamd_http_router_finish_handler_t fh, - struct timeval *timeout, - struct event_base *base, - const char *default_fs_path, - struct rspamd_keypair_cache *cache); + rspamd_http_router_error_handler_t eh, + rspamd_http_router_finish_handler_t fh, + struct timeval *timeout, + struct event_base *base, + const char *default_fs_path, + struct rspamd_keypair_cache *cache); /** * Set encryption key for the HTTP router @@ -346,7 +414,7 @@ void rspamd_http_router_set_key (struct rspamd_http_connection_router *router, * Add new path to the router */ void rspamd_http_router_add_path (struct rspamd_http_connection_router *router, - const gchar *path, rspamd_http_router_handler_t handler); + const gchar *path, rspamd_http_router_handler_t handler); /** * Handle new accepted socket @@ -355,9 +423,9 @@ void rspamd_http_router_add_path (struct rspamd_http_connection_router *router, * @param ud opaque userdata */ void rspamd_http_router_handle_socket ( - struct rspamd_http_connection_router *router, - gint fd, - gpointer ud); + struct rspamd_http_connection_router *router, + gint fd, + gpointer ud); /** * Free router and all connections associated diff --git a/src/libutil/http_private.h b/src/libutil/http_private.h new file mode 100644 index 000000000..38fbec742 --- /dev/null +++ b/src/libutil/http_private.h @@ -0,0 +1,77 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_HTTP_PRIVATE_H_ +#define SRC_LIBUTIL_HTTP_PRIVATE_H_ + +#include "http.h" +#include "str_util.h" +#include "ref.h" +#include "../../contrib/mumhash/mum.h" +#define HASH_CASELESS +#include "uthash_strcase.h" + +/** + * HTTP header structure + */ +struct rspamd_http_header { + rspamd_ftok_t *name; + rspamd_ftok_t *value; + rspamd_fstring_t *combined; + UT_hash_handle hh; +}; + +/** + * HTTP message structure, used for requests and replies + */ +struct rspamd_http_message { + rspamd_fstring_t *url; + rspamd_fstring_t *host; + rspamd_fstring_t *status; + struct rspamd_http_header *headers; + + struct _rspamd_body_buf_s { + /* Data start */ + const gchar *begin; + /* Data len */ + gsize len; + /* Data buffer (used to write data inside) */ + gchar *str; + + /* Internal storage */ + union _rspamd_storage_u { + rspamd_fstring_t *normal; + struct _rspamd_storage_shared_s { + struct _rspamd_storage_shmem_s { + gchar *shm_name; + ref_entry_t ref; + } *name; + gint shm_fd; + } shared; + } c; + } body_buf; + + struct rspamd_cryptobox_pubkey *peer_key; + time_t date; + time_t last_modified; + unsigned port; + enum http_parser_type type; + gint code; + enum http_method method; + gint flags; +}; + + +#endif /* SRC_LIBUTIL_HTTP_PRIVATE_H_ */ diff --git a/src/libutil/logger.c b/src/libutil/logger.c index f81730448..7f46c2a50 100644 --- a/src/libutil/logger.c +++ b/src/libutil/logger.c @@ -61,8 +61,6 @@ struct rspamd_logger_s { gchar *saved_module; gchar *saved_id; guint saved_loglevel; - rspamd_mempool_t *pool; - rspamd_mempool_mutex_t *mtx; guint64 log_cnt[4]; }; @@ -70,18 +68,6 @@ static const gchar lf_chr = '\n'; static rspamd_logger_t *default_logger = NULL; -#define RSPAMD_LOGGER_LOCK(l) do { \ - if ((l) != NULL && !(l)->no_lock) { \ - rspamd_mempool_lock_mutex ((l)->mtx); \ - } \ -} while (0) - -#define RSPAMD_LOGGER_UNLOCK(l) do { \ - if ((l) != NULL && !(l)->no_lock) { \ - rspamd_mempool_unlock_mutex ((l)->mtx); \ - } \ -} while (0) - static void syslog_log_function (const gchar *log_domain, const gchar *module, const gchar *id, const gchar *function, @@ -118,6 +104,10 @@ direct_write_log_line (rspamd_logger_t *rspamd_log, glong r; if (rspamd_log->enabled) { + if (!rspamd_log->no_lock) { + rspamd_file_lock (rspamd_log->fd, FALSE); + } + if (is_iov) { iov = (struct iovec *) data; r = writev (rspamd_log->fd, iov, count); @@ -126,6 +116,11 @@ direct_write_log_line (rspamd_logger_t *rspamd_log, line = (const gchar *) data; r = write (rspamd_log->fd, line, count); } + + if (!rspamd_log->no_lock) { + rspamd_file_unlock (rspamd_log->fd, FALSE); + } + if (r == -1) { /* We cannot write message to file, so we need to detect error and make decision */ if (errno == EINTR) { @@ -321,11 +316,7 @@ rspamd_set_logger (struct rspamd_config *cfg, struct rspamd_main *rspamd) { if (rspamd->logger == NULL) { - rspamd->logger = g_malloc (sizeof (rspamd_logger_t)); - memset (rspamd->logger, 0, sizeof (rspamd_logger_t)); - /* Small pool for interlocking */ - rspamd->logger->pool = rspamd_mempool_new (512, NULL); - rspamd->logger->mtx = rspamd_mempool_get_mutex (rspamd->logger->pool); + rspamd->logger = g_slice_alloc0 (sizeof (rspamd_logger_t)); } rspamd->logger->type = cfg->log_type; @@ -468,14 +459,12 @@ rspamd_common_logv (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, else { if (rspamd_logger_need_log (rspamd_log, log_level, module)) { rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); - RSPAMD_LOGGER_LOCK (rspamd_log); rspamd_log->log_func (NULL, module, id, function, log_level, logbuf, FALSE, rspamd_log); - RSPAMD_LOGGER_UNLOCK (rspamd_log); } switch (log_level) { @@ -936,7 +925,6 @@ rspamd_conditional_debug (rspamd_logger_t *rspamd_log, } } - RSPAMD_LOGGER_LOCK (rspamd_log); va_start (vp, fmt); end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, vp); *end = '\0'; @@ -947,7 +935,6 @@ rspamd_conditional_debug (rspamd_logger_t *rspamd_log, logbuf, TRUE, rspamd_log); - RSPAMD_LOGGER_UNLOCK (rspamd_log); } } @@ -964,14 +951,12 @@ rspamd_glib_log_function (const gchar *log_domain, if (rspamd_log->enabled && rspamd_logger_need_log (rspamd_log, log_level, NULL)) { - RSPAMD_LOGGER_LOCK (rspamd_log); rspamd_log->log_func (log_domain, "glib", NULL, NULL, log_level, message, FALSE, rspamd_log); - RSPAMD_LOGGER_UNLOCK (rspamd_log); } } diff --git a/src/libutil/map.c b/src/libutil/map.c index 86cd3e5ee..11d760fe5 100644 --- a/src/libutil/map.c +++ b/src/libutil/map.c @@ -20,6 +20,7 @@ #include "map.h" #include "map_private.h" #include "http.h" +#include "http_private.h" #include "rspamd.h" #include "cryptobox.h" #include "unix-std.h" @@ -79,6 +80,10 @@ write_http_request (struct http_callback_data *cbd) if (cbd->fd != -1) { msg = rspamd_http_new_message (HTTP_REQUEST); + if (cbd->bk->protocol == MAP_PROTO_HTTPS) { + msg->flags |= RSPAMD_HTTP_FLAG_SSL; + } + if (cbd->check) { msg->method = HTTP_HEAD; } @@ -470,6 +475,9 @@ http_map_finish (struct rspamd_http_connection *conn, else { cbd->data->last_checked = msg->date; } + + cbd->periodic->cur_backend ++; + rspamd_map_periodic_callback (-1, EV_TIMEOUT, cbd->periodic); } else { msg_info_map ("cannot load map %s from %s: HTTP error %d", @@ -561,6 +569,11 @@ read_map_file (struct rspamd_map *map, struct file_map_data *data, static void rspamd_map_periodic_dtor (struct map_periodic_cbdata *periodic) { + struct rspamd_map *map; + + map = periodic->map; + msg_debug_map ("periodic dtor %p", periodic); + if (periodic->need_modify) { /* We are done */ periodic->map->fin_callback (&periodic->cbdata); @@ -605,6 +618,8 @@ rspamd_map_schedule_periodic (struct rspamd_map *map, cbd->map = map; REF_INIT_RETAIN (cbd, rspamd_map_periodic_dtor); + msg_debug_map ("schedule new periodic event %p in %.2f seconds", cbd, timeout); + if (initial) { evtimer_set (&map->ev, rspamd_map_periodic_callback, cbd); event_base_set (map->ev_base, &map->ev); @@ -645,9 +660,13 @@ rspamd_map_dns_callback (struct rdns_reply *reply, void *arg) if (cbd->fd != -1) { cbd->stage = map_load_file; cbd->conn = rspamd_http_connection_new (http_map_read, - http_map_error, http_map_finish, - RSPAMD_HTTP_BODY_PARTIAL|RSPAMD_HTTP_CLIENT_SIMPLE, - RSPAMD_HTTP_CLIENT, NULL); + http_map_error, + http_map_finish, + RSPAMD_HTTP_BODY_PARTIAL | + RSPAMD_HTTP_CLIENT_SIMPLE, + RSPAMD_HTTP_CLIENT, + NULL, + cbd->map->cfg->libs_ctx->ssl_ctx); write_http_request (cbd); } @@ -852,7 +871,7 @@ rspamd_map_periodic_callback (gint fd, short what, void *ud) if (cbd->need_modify) { /* Load data from the next backend */ - if (bk->protocol == MAP_PROTO_HTTP) { + if (bk->protocol == MAP_PROTO_HTTP || bk->protocol == MAP_PROTO_HTTPS) { rspamd_map_http_read_callback (fd, what, cbd); } else { @@ -861,7 +880,7 @@ rspamd_map_periodic_callback (gint fd, short what, void *ud) } else { /* Check the next backend */ - if (bk->protocol == MAP_PROTO_HTTP) { + if (bk->protocol == MAP_PROTO_HTTP || bk->protocol == MAP_PROTO_HTTPS) { rspamd_map_http_check_callback (fd, what, cbd); } else { @@ -980,15 +999,19 @@ rspamd_map_check_proto (struct rspamd_config *cfg, bk->protocol = MAP_PROTO_FILE; - if (g_ascii_strncasecmp (pos, "http://", - sizeof ("http://") - 1) == 0) { + if (g_ascii_strncasecmp (pos, "http://", sizeof ("http://") - 1) == 0) { bk->protocol = MAP_PROTO_HTTP; /* Include http:// */ bk->uri = g_strdup (pos); pos += sizeof ("http://") - 1; } - else if (g_ascii_strncasecmp (pos, "file://", sizeof ("file://") - - 1) == 0) { + else if (g_ascii_strncasecmp (pos, "https://", sizeof ("https://") - 1) == 0) { + bk->protocol = MAP_PROTO_HTTPS; + /* Include https:// */ + bk->uri = g_strdup (pos); + pos += sizeof ("https://") - 1; + } + else if (g_ascii_strncasecmp (pos, "file://", sizeof ("file://") - 1) == 0) { pos += sizeof ("file://") - 1; /* Exclude file:// */ bk->uri = g_strdup (pos); @@ -1023,7 +1046,10 @@ rspamd_map_is_map (const gchar *map_line) else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") - 1) == 0) { ret = TRUE; } - else if (g_ascii_strncasecmp (map_line, "http://", sizeof ("file://") - 1) == 0) { + else if (g_ascii_strncasecmp (map_line, "http://", sizeof ("http://") - 1) == 0) { + ret = TRUE; + } + else if (g_ascii_strncasecmp (map_line, "https://", sizeof ("https://") - 1) == 0) { ret = TRUE; } @@ -1081,7 +1107,7 @@ rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) fdata->filename = g_strdup (bk->uri); bk->data.fd = fdata; } - else if (bk->protocol == MAP_PROTO_HTTP) { + else if (bk->protocol == MAP_PROTO_HTTP || bk->protocol == MAP_PROTO_HTTPS) { hdata = g_slice_alloc0 (sizeof (struct http_map_data)); memset (&up, 0, sizeof (up)); @@ -1104,7 +1130,12 @@ rspamd_map_parse_backend (struct rspamd_config *cfg, const gchar *map_line) hdata->port = up.port; } else { - hdata->port = 80; + if (bk->protocol == MAP_PROTO_HTTP) { + hdata->port = 80; + } + else { + hdata->port = 443; + } } if (up.field_set & 1 << UF_PATH) { diff --git a/src/libutil/map_private.h b/src/libutil/map_private.h index 9bdca5f90..0370fc607 100644 --- a/src/libutil/map_private.h +++ b/src/libutil/map_private.h @@ -44,6 +44,7 @@ typedef void (*rspamd_map_dtor) (gpointer p); enum fetch_proto { MAP_PROTO_FILE, MAP_PROTO_HTTP, + MAP_PROTO_HTTPS }; struct rspamd_map_backend { diff --git a/src/libutil/ssl_util.c b/src/libutil/ssl_util.c new file mode 100644 index 000000000..55d5a1ad4 --- /dev/null +++ b/src/libutil/ssl_util.c @@ -0,0 +1,699 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "libutil/util.h" +#include "ssl_util.h" + +#include <openssl/ssl.h> +#include <openssl/err.h> +#include <openssl/rand.h> +#include <openssl/conf.h> +#include <openssl/x509v3.h> + +struct rspamd_ssl_connection { + gint fd; + enum { + ssl_conn_reset = 0, + ssl_conn_init, + ssl_conn_connected, + ssl_next_read, + ssl_next_write + } state; + SSL *ssl; + gchar *hostname; + struct event *ev; + struct event_base *ev_base; + struct timeval *tv; + rspamd_ssl_handler_t handler; + rspamd_ssl_error_handler_t err_handler; + gpointer handler_data; +}; + +static GQuark +rspamd_ssl_quark (void) +{ + return g_quark_from_static_string ("rspamd-ssl"); +} + +/* $OpenBSD: tls_verify.c,v 1.14 2015/09/29 10:17:04 deraadt Exp $ */ +/* + * Copyright (c) 2014 Jeremie Courreges-Anglas <jca@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +static gboolean +rspamd_tls_match_name (const char *cert_name, const char *name) +{ + const char *cert_domain, *domain, *next_dot; + + if (g_ascii_strcasecmp (cert_name, name) == 0) { + return TRUE; + } + + /* Wildcard match? */ + if (cert_name[0] == '*') { + /* + * Valid wildcards: + * - "*.domain.tld" + * - "*.sub.domain.tld" + * - etc. + * Reject "*.tld". + * No attempt to prevent the use of eg. "*.co.uk". + */ + cert_domain = &cert_name[1]; + /* Disallow "*" */ + if (cert_domain[0] == '\0') { + return FALSE; + } + + /* Disallow "*foo" */ + if (cert_domain[0] != '.') { + return FALSE; + } + /* Disallow "*.." */ + if (cert_domain[1] == '.') { + return FALSE; + } + next_dot = strchr (&cert_domain[1], '.'); + /* Disallow "*.bar" */ + if (next_dot == NULL) { + return FALSE; + } + /* Disallow "*.bar.." */ + if (next_dot[1] == '.') { + return FALSE; + } + + domain = strchr (name, '.'); + + /* No wildcard match against a name with no host part. */ + if (name[0] == '.') { + return FALSE; + } + /* No wildcard match against a name with no domain part. */ + if (domain == NULL || strlen (domain) == 1) { + return FALSE; + } + + if (g_ascii_strcasecmp (cert_domain, domain) == 0) { + return TRUE; + } + } + + return FALSE; +} + +/* See RFC 5280 section 4.2.1.6 for SubjectAltName details. */ +static gboolean +rspamd_tls_check_subject_altname (X509 *cert, const char *name) +{ + STACK_OF(GENERAL_NAME) *altname_stack = NULL; + int addrlen, type; + int count, i; + union { + struct in_addr ip4; + struct in6_addr ip6; + } addrbuf; + gboolean ret = FALSE; + + altname_stack = X509_get_ext_d2i (cert, NID_subject_alt_name, NULL, NULL); + + if (altname_stack == NULL) { + return FALSE; + } + + if (inet_pton (AF_INET, name, &addrbuf) == 1) { + type = GEN_IPADD; + addrlen = 4; + } + else if (inet_pton (AF_INET6, name, &addrbuf) == 1) { + type = GEN_IPADD; + addrlen = 16; + } + else { + type = GEN_DNS; + addrlen = 0; + } + + count = sk_GENERAL_NAME_num (altname_stack); + + for (i = 0; i < count; i++) { + GENERAL_NAME *altname; + + altname = sk_GENERAL_NAME_value (altname_stack, i); + + if (altname->type != type) { + continue; + } + + if (type == GEN_DNS) { + unsigned char *data; + int format, len; + + format = ASN1_STRING_type (altname->d.dNSName); + + if (format == V_ASN1_IA5STRING) { + data = ASN1_STRING_data (altname->d.dNSName); + len = ASN1_STRING_length (altname->d.dNSName); + + if (len < 0 || len != (gint)strlen (data)) { + ret = FALSE; + break; + } + + /* + * Per RFC 5280 section 4.2.1.6: + * " " is a legal domain name, but that + * dNSName must be rejected. + */ + if (strcmp (data, " ") == 0) { + ret = FALSE; + break; + } + + if (rspamd_tls_match_name (data, name)) { + ret = TRUE; + break; + } + } + } + else if (type == GEN_IPADD) { + unsigned char *data; + int datalen; + + datalen = ASN1_STRING_length (altname->d.iPAddress); + data = ASN1_STRING_data (altname->d.iPAddress); + + if (datalen < 0) { + ret = FALSE; + break; + } + + /* + * Per RFC 5280 section 4.2.1.6: + * IPv4 must use 4 octets and IPv6 must use 16 octets. + */ + if (datalen == addrlen && memcmp (data, &addrbuf, addrlen) == 0) { + ret = TRUE; + break; + } + } + } + + sk_GENERAL_NAME_pop_free (altname_stack, GENERAL_NAME_free); + return ret; +} + +static gboolean +rspamd_tls_check_common_name (X509 *cert, const char *name) +{ + X509_NAME *subject_name; + char *common_name = NULL; + union { + struct in_addr ip4; + struct in6_addr ip6; + } addrbuf; + int common_name_len; + gboolean ret = FALSE; + + subject_name = X509_get_subject_name (cert); + if (subject_name == NULL) { + goto out; + } + + common_name_len = X509_NAME_get_text_by_NID (subject_name, NID_commonName, NULL, 0); + + if (common_name_len < 0) { + goto out; + } + + common_name = g_malloc0 (common_name_len + 1); + X509_NAME_get_text_by_NID (subject_name, NID_commonName, common_name, + common_name_len + 1); + + /* NUL bytes in CN? */ + if (common_name_len != (gint)strlen (common_name)) { + goto out; + } + + if (inet_pton (AF_INET, name, &addrbuf) == 1 + || inet_pton (AF_INET6, name, &addrbuf) == 1) { + /* + * We don't want to attempt wildcard matching against IP + * addresses, so perform a simple comparison here. + */ + if (strcmp (common_name, name) == 0) { + ret = TRUE; + } + else { + ret = FALSE; + } + + goto out; + } + + if (rspamd_tls_match_name (common_name, name)) { + ret = TRUE; + } + +out: + g_free (common_name); + + return ret; +} + +static gboolean +rspamd_tls_check_name (X509 *cert, const char *name) +{ + gboolean ret; + + ret = rspamd_tls_check_subject_altname (cert, name); + if (ret) { + return ret; + } + + return rspamd_tls_check_common_name (cert, name); +} + +static gboolean +rspamd_ssl_peer_verify (struct rspamd_ssl_connection *c) +{ + X509 *server_cert; + glong ver_err; + GError *err = NULL; + + ver_err = SSL_get_verify_result (c->ssl); + + if (ver_err != X509_V_OK) { + g_set_error (&err, rspamd_ssl_quark (), ver_err, "certificate validation " + "failed: %s", X509_verify_cert_error_string (ver_err)); + c->err_handler (c->handler_data, err); + g_error_free (err); + + return FALSE; + } + + /* Get server's certificate */ + server_cert = SSL_get_peer_certificate (c->ssl); + if (server_cert == NULL) { + g_set_error (&err, rspamd_ssl_quark (), ver_err, "peer certificate is absent"); + c->err_handler (c->handler_data, err); + g_error_free (err); + + return FALSE; + } + + if (c->hostname) { + if (!rspamd_tls_check_name (server_cert, c->hostname)) { + g_set_error (&err, rspamd_ssl_quark (), ver_err, "peer certificate fails " + "hostname verification for %s", c->hostname); + c->err_handler (c->handler_data, err); + g_error_free (err); + + return FALSE; + } + } + + return TRUE; +} + +static void +rspamd_ssl_event_handler (gint fd, short what, gpointer ud) +{ + struct rspamd_ssl_connection *c = ud; + gint ret; + GError *err = NULL; + + switch (c->state) { + case ssl_conn_init: + /* Continue connection */ + ret = SSL_connect (c->ssl); + + if (ret == 1) { + event_del (c->ev); + /* Verify certificate */ + if (rspamd_ssl_peer_verify (c)) { + c->state = ssl_conn_connected; + c->handler (fd, EV_WRITE, c->handler_data); + } + else { + g_assert (0); + } + } + else { + ret = SSL_get_error (c->ssl, ret); + + if (ret == SSL_ERROR_WANT_READ) { + what = EV_READ; + } + else if (ret == SSL_ERROR_WANT_WRITE) { + what = EV_WRITE; + } + else { + g_set_error (&err, rspamd_ssl_quark (), ret, + "ssl connect error: %s", ERR_error_string (ret, NULL)); + c->err_handler (c->handler_data, err); + g_error_free (err); + return; + } + + event_del (c->ev); + event_set (c->ev, fd, what, rspamd_ssl_event_handler, c); + event_base_set (c->ev_base, c->ev); + event_add (c->ev, c->tv); + } + break; + case ssl_next_read: + event_del (c->ev); + /* Restore handler */ + event_set (c->ev, c->fd, EV_READ|EV_PERSIST, + c->handler, c->handler_data); + event_base_set (c->ev_base, c->ev); + event_add (c->ev, c->tv); + c->state = ssl_conn_connected; + c->handler (fd, EV_READ, c->handler_data); + break; + case ssl_next_write: + case ssl_conn_connected: + event_del (c->ev); + /* Restore handler */ + event_set (c->ev, c->fd, EV_WRITE, + c->handler, c->handler_data); + event_base_set (c->ev_base, c->ev); + event_add (c->ev, c->tv); + c->state = ssl_conn_connected; + c->handler (fd, EV_WRITE, c->handler_data); + break; + default: + g_set_error (&err, rspamd_ssl_quark (), EINVAL, + "ssl bad state error: %d", c->state); + c->err_handler (c->handler_data, err); + g_error_free (err); + break; + } +} + +struct rspamd_ssl_connection * +rspamd_ssl_connection_new (gpointer ssl_ctx, struct event_base *ev_base) +{ + struct rspamd_ssl_connection *c; + + g_assert (ssl_ctx != NULL); + c = g_slice_alloc0 (sizeof (*c)); + c->ssl = SSL_new (ssl_ctx); + c->ev_base = ev_base; + + return c; +} + + +gboolean +rspamd_ssl_connect_fd (struct rspamd_ssl_connection *conn, gint fd, + const gchar *hostname, struct event *ev, struct timeval *tv, + rspamd_ssl_handler_t handler, rspamd_ssl_error_handler_t err_handler, + gpointer handler_data) +{ + gint ret; + short what; + + g_assert (conn != NULL); + + if (conn->state != ssl_conn_reset) { + return FALSE; + } + + conn->fd = fd; + conn->ev = ev; + conn->handler = handler; + conn->err_handler = err_handler; + conn->handler_data = handler_data; + + if (SSL_set_fd (conn->ssl, fd) != 1) { + return FALSE; + } + + if (hostname) { + conn->hostname = g_strdup (hostname); +#ifdef HAVE_SSL_TLSEXT_HOSTNAME + SSL_set_tlsext_host_name (conn->ssl, hostname); +#endif + } + + conn->state = ssl_conn_init; + + ret = SSL_connect (conn->ssl); + + if (ret == 1) { + conn->state = ssl_conn_connected; + + if (event_get_base (ev)) { + event_del (ev); + } + + event_set (ev, fd, EV_WRITE, rspamd_ssl_event_handler, conn); + + if (conn->ev_base) { + event_base_set (conn->ev_base, ev); + } + + event_add (ev, tv); + } + else { + ret = SSL_get_error (conn->ssl, ret); + + if (ret == SSL_ERROR_WANT_READ) { + what = EV_READ; + } + else if (ret == SSL_ERROR_WANT_WRITE) { + what = EV_WRITE; + } + else { + return FALSE; + } + + if (event_get_base (ev)) { + event_del (ev); + } + + event_set (ev, fd, what, rspamd_ssl_event_handler, conn); + event_base_set (conn->ev_base, ev); + event_add (ev, tv); + } + + return TRUE; +} + +gssize +rspamd_ssl_read (struct rspamd_ssl_connection *conn, gpointer buf, + gsize buflen) +{ + gint ret; + short what; + GError *err = NULL; + + g_assert (conn != NULL); + + if (conn->state != ssl_conn_connected && conn->state != ssl_next_read) { + errno = EINVAL; + return -1; + } + + ret = SSL_read (conn->ssl, buf, buflen); + + if (ret > 0) { + conn->state = ssl_conn_connected; + return ret; + } + else if (ret == 0) { + ret = SSL_get_error (conn->ssl, ret); + + if (ret == SSL_ERROR_ZERO_RETURN) { + conn->state = ssl_conn_reset; + return 0; + } + else { + g_set_error (&err, rspamd_ssl_quark (), ret, + "ssl write error: %s", ERR_error_string (ret, NULL)); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + } + else { + ret = SSL_get_error (conn->ssl, ret); + conn->state = ssl_next_read; + + if (ret == SSL_ERROR_WANT_READ) { + what = EV_READ; + } + else if (ret == SSL_ERROR_WANT_WRITE) { + what = EV_WRITE; + } + else { + g_set_error (&err, rspamd_ssl_quark (), ret, + "ssl read error: %s", ERR_error_string (ret, NULL)); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + + event_del (conn->ev); + event_set (conn->ev, conn->fd, what, rspamd_ssl_event_handler, conn); + event_base_set (conn->ev_base, conn->ev); + event_add (conn->ev, conn->tv); + + errno = EAGAIN; + + } + + return -1; +} + +gssize +rspamd_ssl_write (struct rspamd_ssl_connection *conn, gconstpointer buf, + gsize buflen) +{ + gint ret; + short what; + GError *err = NULL; + + g_assert (conn != NULL); + + if (conn->state != ssl_conn_connected && conn->state != ssl_next_write) { + errno = EINVAL; + return -1; + } + + ret = SSL_write (conn->ssl, buf, buflen); + + if (ret > 0) { + conn->state = ssl_conn_connected; + return ret; + } + else if (ret == 0) { + ret = SSL_get_error (conn->ssl, ret); + + if (ret == SSL_ERROR_ZERO_RETURN) { + conn->state = ssl_conn_reset; + return 0; + } + else { + g_set_error (&err, rspamd_ssl_quark (), ret, + "ssl write error: %s", ERR_error_string (ret, NULL)); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + } + else { + ret = SSL_get_error (conn->ssl, ret); + conn->state = ssl_next_read; + + if (ret == SSL_ERROR_WANT_READ) { + what = EV_READ; + } + else if (ret == SSL_ERROR_WANT_WRITE) { + what = EV_WRITE; + } + else { + g_set_error (&err, rspamd_ssl_quark (), ret, + "ssl fatal write error: %s", ERR_error_string (ret, NULL)); + conn->err_handler (conn->handler_data, err); + g_error_free (err); + errno = EINVAL; + + return -1; + } + + event_del (conn->ev); + event_set (conn->ev, conn->fd, what, rspamd_ssl_event_handler, conn); + event_base_set (conn->ev_base, conn->ev); + event_add (conn->ev, conn->tv); + + errno = EAGAIN; + } + + return -1; +} + +gssize +rspamd_ssl_writev (struct rspamd_ssl_connection *conn, struct iovec *iov, + gsize iovlen) +{ + static guchar ssl_buf[16000]; + guchar *p; + struct iovec *cur; + guint i, remain; + + remain = sizeof (ssl_buf); + p = ssl_buf; + + for (i = 0; i < iovlen; i ++) { + cur = &iov[i]; + + if (cur->iov_len > 0) { + if (remain >= cur->iov_len) { + memcpy (p, cur->iov_base, cur->iov_len); + p += cur->iov_len; + remain -= cur->iov_len; + } + else { + memcpy (p, cur->iov_base, remain); + p += remain; + remain = 0; + break; + } + } + } + + return rspamd_ssl_write (conn, ssl_buf, p - ssl_buf); +} + +/** + * Removes connection data + * @param conn + */ +void +rspamd_ssl_connection_free (struct rspamd_ssl_connection *conn) +{ + if (conn) { + SSL_free (conn->ssl); + + if (conn->hostname) { + g_free (conn->hostname); + } + + g_slice_free1 (sizeof (*conn), conn); + } +} diff --git a/src/libutil/ssl_util.h b/src/libutil/ssl_util.h new file mode 100644 index 000000000..64e6a413e --- /dev/null +++ b/src/libutil/ssl_util.h @@ -0,0 +1,89 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SRC_LIBUTIL_SSL_UTIL_H_ +#define SRC_LIBUTIL_SSL_UTIL_H_ + +#include "config.h" +#include "libutil/addr.h" + +struct rspamd_ssl_connection; + +typedef void (*rspamd_ssl_handler_t)(gint fd, short what, gpointer d); +typedef void (*rspamd_ssl_error_handler_t)(gpointer d, GError *err); + +/** + * Creates a new ssl connection data structure + * @param ssl_ctx initialized SSL_CTX structure + * @return opaque connection data + */ +struct rspamd_ssl_connection * rspamd_ssl_connection_new (gpointer ssl_ctx, + struct event_base *ev_base); + +/** + * Connects SSL session using the specified (connected) FD + * @param conn connection + * @param fd fd to use + * @param hostname hostname for SNI + * @param ev event to use + * @param tv timeout for connection + * @param handler connected session handler + * @param handler_data opaque data + * @return TRUE if a session has been connected + */ +gboolean rspamd_ssl_connect_fd (struct rspamd_ssl_connection *conn, gint fd, + const gchar *hostname, struct event *ev, struct timeval *tv, + rspamd_ssl_handler_t handler, rspamd_ssl_error_handler_t err_handler, + gpointer handler_data); + +/** + * Perform async read from SSL socket + * @param conn + * @param buf + * @param buflen + * @return + */ +gssize rspamd_ssl_read (struct rspamd_ssl_connection *conn, gpointer buf, + gsize buflen); + +/** + * Perform async write to ssl buffer + * @param conn + * @param buf + * @param buflen + * @param ev + * @param tv + * @return + */ +gssize rspamd_ssl_write (struct rspamd_ssl_connection *conn, gconstpointer buf, + gsize buflen); + +/** + * Emulate writev by copying iovec to a temporary buffer + * @param conn + * @param buf + * @param buflen + * @return + */ +gssize rspamd_ssl_writev (struct rspamd_ssl_connection *conn, struct iovec *iov, + gsize iovlen); + +/** + * Removes connection data + * @param conn + */ +void rspamd_ssl_connection_free (struct rspamd_ssl_connection *conn); + +#endif /* SRC_LIBUTIL_SSL_UTIL_H_ */ diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 67aa63aa8..1ce81bc9e 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -17,9 +17,10 @@ #include "util.h" #include "cryptobox.h" #include "url.h" +#include "str_util.h" #include <math.h> -static const guchar lc_map[256] = { +const guchar lc_map[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index a63b160dd..695a8d022 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -308,4 +308,6 @@ gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b); /* Compare two urls for building emails hash */ gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b); +extern const guchar lc_map[256]; + #endif /* SRC_LIBUTIL_STR_UTIL_H_ */ diff --git a/src/libutil/uthash_strcase.h b/src/libutil/uthash_strcase.h index 5d1df130f..77c807630 100644 --- a/src/libutil/uthash_strcase.h +++ b/src/libutil/uthash_strcase.h @@ -16,30 +16,66 @@ #ifndef UTHASH_STRCASE_H_ #define UTHASH_STRCASE_H_ -#include "xxhash.h" - /* Utils for uthash tuning */ #ifndef HASH_CASELESS #define HASH_FUNCTION(key,keylen,num_bkts,hashv,bkt) do {\ - hashv = XXH32(key, keylen, 0); \ + hashv = mum(key, keylen, 0xdeadbabe); \ bkt = (hashv) & (num_bkts-1); \ } while (0) #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) #else #define HASH_FUNCTION(key,keylen,num_bkts,hashv,bkt) do {\ - XXH32_state_t xxh; \ - XXH32_reset(&xxh, 0xdead); \ - unsigned char *p = (unsigned char *)key, t; \ - for (unsigned int i = 0; i < keylen; i ++) { \ - t = g_ascii_tolower(p[i]); \ - XXH32_update(&xxh, &t, 1); \ - } \ - hashv = XXH32_digest(&xxh); \ - bkt = (hashv) & (num_bkts-1); \ + unsigned _len = keylen; \ + unsigned _leftover = keylen % 8; \ + unsigned _fp, _i; \ + const uint8_t* _s = (const uint8_t*)(key); \ + union { \ + struct { \ + unsigned char c1, c2, c3, c4, c5, c6, c7, c8; \ + } c; \ + uint64_t pp; \ + } _u; \ + uint64_t _r; \ + _fp = _len - _leftover; \ + _r = 0xdeadbabe; \ + for (_i = 0; _i != _fp; _i += 8) { \ + _u.c.c1 = _s[_i], _u.c.c2 = _s[_i + 1], _u.c.c3 = _s[_i + 2], _u.c.c4 = _s[_i + 3]; \ + _u.c.c5 = _s[_i + 4], _u.c.c6 = _s[_i + 5], _u.c.c7 = _s[_i + 6], _u.c.c8 = _s[_i + 7]; \ + _u.c.c1 = lc_map[_u.c.c1]; \ + _u.c.c2 = lc_map[_u.c.c2]; \ + _u.c.c3 = lc_map[_u.c.c3]; \ + _u.c.c4 = lc_map[_u.c.c4]; \ + _u.c.c1 = lc_map[_u.c.c5]; \ + _u.c.c2 = lc_map[_u.c.c6]; \ + _u.c.c3 = lc_map[_u.c.c7]; \ + _u.c.c4 = lc_map[_u.c.c8]; \ + _r = mum_hash_step (_r, _u.pp); \ + } \ + _u.pp = 0; \ + switch (_leftover) { \ + case 7: \ + _u.c.c7 = lc_map[(unsigned char)_s[_i++]]; \ + case 6: \ + _u.c.c6 = lc_map[(unsigned char)_s[_i++]]; \ + case 5: \ + _u.c.c5 = lc_map[(unsigned char)_s[_i++]]; \ + case 4: \ + _u.c.c4 = lc_map[(unsigned char)_s[_i++]]; \ + case 3: \ + _u.c.c3 = lc_map[(unsigned char)_s[_i++]]; \ + case 2: \ + _u.c.c2 = lc_map[(unsigned char)_s[_i++]]; \ + case 1: \ + _u.c.c1 = lc_map[(unsigned char)_s[_i]]; \ + _r = mum_hash_step (_r, _u.pp); \ + break; \ + } \ + hashv = mum_hash_finish (_r); \ + bkt = (hashv) & (num_bkts-1); \ } while (0) -#define HASH_KEYCMP(a,b,len) strncasecmp(a,b,len) +#define HASH_KEYCMP(a,b,len) rspamd_lc_cmp(a,b,len) #endif #include "uthash.h" diff --git a/src/libutil/util.c b/src/libutil/util.c index 4ce90ba06..aaaa09f27 100644 --- a/src/libutil/util.c +++ b/src/libutil/util.c @@ -28,6 +28,9 @@ #include <openssl/rand.h> #include <openssl/err.h> #include <openssl/evp.h> +#include <openssl/ssl.h> +#include <openssl/conf.h> +#include <openssl/engine.h> #endif #ifdef HAVE_TERMIOS_H @@ -1875,6 +1878,63 @@ randombytes (guchar *buf, guint64 len) ottery_rand_bytes (buf, (size_t)len); } +void +rspamd_random_hex (guchar *buf, guint64 len) +{ + static const gchar hexdigests[16] = "0123456789abcdef"; + gint64 i; + + g_assert (len > 0); + + ottery_rand_bytes (buf, (len / 2.0 + 0.5)); + + for (i = (gint64)len - 1; i >= 0; i -= 2) { + buf[i] = hexdigests[buf[i / 2] & 0xf]; + + if (i > 0) { + buf[i - 1] = hexdigests[(buf[i / 2] >> 4) & 0xf]; + } + } +} + +gint +rspamd_shmem_mkstemp (gchar *pattern) +{ + gint fd = -1; + gchar *nbuf, *xpos; + gsize blen; + + xpos = strchr (pattern, 'X'); + + if (xpos == NULL) { + errno = EINVAL; + return -1; + } + + blen = strlen (pattern); + nbuf = g_malloc (blen + 1); + rspamd_strlcpy (nbuf, pattern, blen + 1); + xpos = nbuf + (xpos - pattern); + + for (;;) { + rspamd_random_hex (xpos, blen - (xpos - nbuf)); + + fd = shm_open (nbuf, O_RDWR | O_EXCL | O_CREAT, 0600); + + if (fd != -1) { + rspamd_strlcpy (pattern, nbuf, blen + 1); + break; + } + else if (errno != EEXIST) { + g_error ("%s: failed to create temp shmem %s: %s", + G_STRLOC, nbuf, strerror (errno)); + } + } + + g_free (nbuf); + + return fd; +} void rspamd_ptr_array_free_hard (gpointer p) @@ -1950,6 +2010,36 @@ rspamd_init_libs (void) OpenSSL_add_all_algorithms (); OpenSSL_add_all_digests (); OpenSSL_add_all_ciphers (); + +#if OPENSSL_VERSION_NUMBER >= 0x1000104fL + ENGINE_load_builtin_engines (); + + if ((ctx->crypto_ctx->cpu_config & CPUID_RDRAND) == 0) { + RAND_set_rand_engine (NULL); + } +#endif +#if OPENSSL_VERSION_NUMBER < 0x10100000L || defined(LIBRESSL_VERSION_NUMBER) + SSL_library_init (); +#else + OPENSSL_init_ssl (0, NULL); +#endif + SSL_library_init (); + SSL_load_error_strings (); + OPENSSL_config (NULL); + + if (RAND_poll () == 0) { + guchar seed[128]; + + /* Try to use ottery to seed rand */ + ottery_rand_bytes (seed, sizeof (seed)); + RAND_seed (seed, sizeof (seed)); + rspamd_explicit_memzero (seed, sizeof (seed)); + } + + ctx->ssl_ctx = SSL_CTX_new (SSLv23_method ()); + SSL_CTX_set_verify (ctx->ssl_ctx, SSL_VERIFY_PEER, NULL); + SSL_CTX_set_verify_depth (ctx->ssl_ctx, 4); + SSL_CTX_set_options(ctx->ssl_ctx, SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|SSL_OP_NO_COMPRESSION); #endif g_random_set_seed (ottery_rand_uint32 ()); @@ -1977,6 +2067,8 @@ void rspamd_config_libs (struct rspamd_external_libs_ctx *ctx, struct rspamd_config *cfg) { + static const char secure_ciphers[] = "HIGH:!aNULL:!kRSA:!PSK:!SRP:!MD5:!RC4"; + g_assert (cfg != NULL); if (ctx != NULL) { @@ -1992,6 +2084,30 @@ rspamd_config_libs (struct rspamd_external_libs_ctx *ctx, (void **) ctx->local_addrs); } } + + if (cfg->ssl_ca_path) { + if (SSL_CTX_load_verify_locations (ctx->ssl_ctx, cfg->ssl_ca_path, + NULL) != 1) { + msg_err_config ("cannot load CA certs from %s: %s", + cfg->ssl_ca_path, + ERR_error_string (ERR_get_error (), NULL)); + } + } + else { + msg_warn_config ("ssl_ca_path is not set, using default CA path"); + SSL_CTX_set_default_verify_paths (ctx->ssl_ctx); + } + + if (cfg->ssl_ciphers) { + if (SSL_CTX_set_cipher_list (ctx->ssl_ctx, cfg->ssl_ciphers) != 1) { + msg_err_config ("cannot set ciphers set to %s: %s; fallback to %s", + cfg->ssl_ciphers, + ERR_error_string (ERR_get_error (), NULL), + secure_ciphers); + /* Default settings */ + SSL_CTX_set_cipher_list (ctx->ssl_ctx, secure_ciphers); + } + } } } @@ -2010,6 +2126,7 @@ rspamd_deinit_libs (struct rspamd_external_libs_ctx *ctx) #ifdef HAVE_OPENSSL EVP_cleanup (); ERR_free_strings (); + SSL_CTX_free (ctx->ssl_ctx); #endif rspamd_inet_library_destroy (); } diff --git a/src/libutil/util.h b/src/libutil/util.h index 48e91cb4f..0c293ccbe 100644 --- a/src/libutil/util.h +++ b/src/libutil/util.h @@ -405,6 +405,20 @@ void rspamd_deinit_libs (struct rspamd_external_libs_ctx *ctx); guint64 rspamd_hash_seed (void); /** + * Returns random hex string of the specified length + * @param buf + * @param len + */ +void rspamd_random_hex (guchar *buf, guint64 len); + +/** + * Returns + * @param pattern pattern to create (should end with some number of X symbols), modified by this function + * @return + */ +gint rspamd_shmem_mkstemp (gchar *pattern); + +/** * Return jittered time value */ gdouble rspamd_time_jitter (gdouble in, gdouble jitter); diff --git a/src/lua/lua_common.c b/src/lua/lua_common.c index 4c5051aeb..371567695 100644 --- a/src/lua/lua_common.c +++ b/src/lua/lua_common.c @@ -68,36 +68,63 @@ rspamd_lua_new_class_full (lua_State *L, luaL_register (L, static_name, func); } -gint -rspamd_lua_class_tostring (lua_State * L) +static const gchar * +rspamd_lua_class_tostring_buf (lua_State *L, gboolean print_pointer, gint pos) { - gchar buf[32]; + static gchar buf[64]; + const gchar *ret = NULL; + gint pop = 0; - if (!lua_getmetatable (L, 1)) { - goto error; + if (!lua_getmetatable (L, pos)) { + goto err; } + lua_pushstring (L, "__index"); lua_gettable (L, -2); + pop ++; if (!lua_istable (L, -1)) { - goto error; + goto err; } + lua_pushstring (L, "class"); lua_gettable (L, -2); + pop ++; if (!lua_isstring (L, -1)) { - goto error; + goto err; } - snprintf (buf, sizeof (buf), "%p", lua_touserdata (L, 1)); + if (print_pointer) { + rspamd_snprintf (buf, sizeof (buf), "%s(%p)", lua_tostring (L, -1), + lua_touserdata (L, 1)); + } + else { + rspamd_snprintf (buf, sizeof (buf), "%s", lua_tostring (L, -1)); + } - lua_pushfstring (L, "%s: %s", lua_tostring (L, -1), buf); + ret = buf; - return 1; +err: + lua_pop (L, pop); + + return ret; +} + +gint +rspamd_lua_class_tostring (lua_State * L) +{ + const gchar *p; + + p = rspamd_lua_class_tostring_buf (L, TRUE, 1); + + if (!p) { + lua_pushstring (L, "invalid object passed to 'lua_common.c:__tostring'"); + return lua_error (L); + } + + lua_pushstring (L, p); -error: - lua_pushstring (L, "invalid object passed to 'lua_common.c:__tostring'"); - lua_error (L); return 1; } @@ -187,13 +214,13 @@ rspamd_lua_set_path (lua_State *L, struct rspamd_config *cfg) if (additional_path) { rspamd_snprintf (path_buf, sizeof (path_buf), - "%s/lua/?.lua;%s/lua/?.lua;%s;%s;%s", + "%s/lua/?.lua;%s/lua/?.lua;%s/?.lua;%s;%s", RSPAMD_PLUGINSDIR, RSPAMD_CONFDIR, RSPAMD_RULESDIR, additional_path, old_path); } else { rspamd_snprintf (path_buf, sizeof (path_buf), - "%s/lua/?.lua;%s/lua/?.lua;%s;%s", + "%s/lua/?.lua;%s/lua/?.lua;%s/?.lua;%s", RSPAMD_PLUGINSDIR, RSPAMD_CONFDIR, RSPAMD_RULESDIR, old_path); } @@ -702,6 +729,7 @@ rspamd_lua_parse_table_arguments (lua_State *L, gint pos, case 'U': if (t == LUA_TNIL || t == LUA_TNONE) { failed = TRUE; + *(va_arg (ap, void **)) = NULL; } else if (t != LUA_TUSERDATA) { g_set_error (err, @@ -804,11 +832,11 @@ rspamd_lua_parse_table_arguments (lua_State *L, gint pos, g_set_error (err, lua_error_quark (), 2, - "invalid class for key %*.s, expected %s, got %s", + "invalid class for key %.*s, expected %s, got %s", (gint) keylen, key, classbuf, - lua_tostring (L, idx)); + rspamd_lua_class_tostring_buf (L, FALSE, idx)); va_end (ap); return FALSE; diff --git a/src/lua/lua_common.h b/src/lua/lua_common.h index 63039533c..ba389d7a6 100644 --- a/src/lua/lua_common.h +++ b/src/lua/lua_common.h @@ -302,6 +302,7 @@ struct rspamd_config * lua_check_config (lua_State * L, gint pos); * - V - size_t + const char * * - U{classname} - userdata of the following class (stored in gpointer) * - F - function + * - O - ucl_object_t * * * If any of keys is prefixed with `*` then it is treated as required argument * @param L lua state diff --git a/src/lua/lua_dns.c b/src/lua/lua_dns.c index 1a778c53d..cb834cf88 100644 --- a/src/lua/lua_dns.c +++ b/src/lua/lua_dns.c @@ -256,92 +256,34 @@ lua_dns_resolver_resolve_common (lua_State *L, enum rdns_request_type type, int first) { - struct rspamd_async_session *session = NULL, **psession; - rspamd_mempool_t *pool = NULL, **ppool; - const gchar *to_resolve, *user_str = NULL; + struct rspamd_async_session *session = NULL; + rspamd_mempool_t *pool = NULL; + const gchar *to_resolve = NULL, *user_str = NULL; struct lua_dns_cbdata *cbdata; - gint cbref = -1; + gint cbref = -1, ret; struct rspamd_task *task = NULL; + GError *err = NULL; + gboolean forced = FALSE; /* Check arguments */ - if (lua_type (L, first) == LUA_TUSERDATA) { - /* Legacy version */ - psession = rspamd_lua_check_udata (L, first, "rspamd{session}"); - luaL_argcheck (L, psession != NULL, first, "'session' expected"); - session = psession ? *(psession) : NULL; - ppool = rspamd_lua_check_udata (L, first + 1, "rspamd{mempool}"); - luaL_argcheck (L, ppool != NULL, first + 1, "'mempool' expected"); - pool = ppool ? *(ppool) : NULL; - to_resolve = luaL_checkstring (L, first + 2); - - lua_pushvalue (L, first + 3); - cbref = luaL_ref (L, LUA_REGISTRYINDEX); - - if (lua_gettop (L) > first + 3) { - user_str = lua_tostring (L, first + 4); - } - else { - user_str = NULL; - } - } - else if (lua_type (L, first) == LUA_TTABLE) { - lua_pushvalue (L, first); - - lua_pushstring (L, "name"); - lua_gettable (L, -2); - to_resolve = luaL_checkstring (L, -1); - lua_pop (L, 1); - - lua_pushstring (L, "callback"); - lua_gettable (L, -2); - - if (to_resolve == NULL || lua_type (L, -1) != LUA_TFUNCTION) { - lua_pop (L, 2); - msg_err ("DNS request has bad params"); - lua_pushboolean (L, FALSE); - return 1; - } - cbref = luaL_ref (L, LUA_REGISTRYINDEX); - - lua_pushstring (L, "task"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TUSERDATA) { - task = lua_check_task (L, -1); - session = task->s; - pool = task->task_pool; - } - lua_pop (L, 1); + if (!rspamd_lua_parse_table_arguments (L, first, &err, + "session=U{session};mempool=U{mempool};*name=S;*callback=F;" + "option=S;task=U{task};forced=B", + &session, &pool, &to_resolve, &cbref, &user_str, &task, &forced)) { - if (task == NULL) { - lua_pushstring (L, "session"); - lua_gettable (L, -2); - if (rspamd_lua_check_udata (L, -1, "rspamd{session}")) { - session = *(struct rspamd_async_session **)lua_touserdata (L, -1); - } - else { - session = NULL; - } - lua_pop (L, 1); + if (err) { + ret = luaL_error (L, "invalid arguments: %s", err->message); + g_error_free (err); - lua_pushstring (L, "pool"); - lua_gettable (L, -2); - if (rspamd_lua_check_udata (L, -1, "rspamd{mempool}")) { - pool = *(rspamd_mempool_t **)lua_touserdata (L, -1); - } - else { - pool = NULL; - } - lua_pop (L, 1); + return ret; } - lua_pushstring (L, "option"); - lua_gettable (L, -2); - if (lua_type (L, -1) == LUA_TSTRING) { - user_str = luaL_checkstring (L, -1); - } - lua_pop (L, 1); + return luaL_error (L, "invalid arguments"); + } - lua_pop (L, 1); + if (task) { + pool = task->task_pool; + session = task->s; } if (pool != NULL && session != NULL && to_resolve != NULL && cbref != -1) { @@ -360,8 +302,10 @@ lua_dns_resolver_resolve_common (lua_State *L, ptr_str = rdns_generate_ptr_from_str (to_resolve); if (ptr_str == NULL) { - msg_err ("wrong resolve string to PTR request: %s", to_resolve); + msg_err_task_check ("wrong resolve string to PTR request: %s", + to_resolve); lua_pushnil (L); + return 1; } @@ -392,11 +336,22 @@ lua_dns_resolver_resolve_common (lua_State *L, } } else { - if (make_dns_request_task (task, - lua_dns_callback, - cbdata, - type, - to_resolve)) { + if (forced) { + ret = make_dns_request_task_forced (task, + lua_dns_callback, + cbdata, + type, + to_resolve); + } + else { + ret = make_dns_request_task (task, + lua_dns_callback, + cbdata, + type, + to_resolve); + } + + if (ret) { lua_pushboolean (L, TRUE); cbdata->s = session; cbdata->w = rspamd_session_get_watcher (session); @@ -408,8 +363,7 @@ lua_dns_resolver_resolve_common (lua_State *L, } } else { - msg_err ("invalid arguments to lua_resolve"); - lua_pushnil (L); + return luaL_error (L, "invalid arguments to lua_resolve"); } return 1; diff --git a/src/lua/lua_http.c b/src/lua/lua_http.c index 84ab2de16..0c4eb976d 100644 --- a/src/lua/lua_http.c +++ b/src/lua/lua_http.c @@ -17,6 +17,7 @@ #include "buffer.h" #include "dns.h" #include "http.h" +#include "http_private.h" #include "utlist.h" #include "unix-std.h" @@ -64,6 +65,7 @@ struct lua_http_cbdata { struct timeval tv; rspamd_inet_addr_t *addr; gchar *mime_type; + gchar *host; gint fd; gint cbref; }; @@ -109,6 +111,10 @@ lua_http_fin (gpointer arg) g_free (cbd->mime_type); } + if (cbd->host) { + g_free (cbd->host); + } + g_slice_free1 (sizeof (struct lua_http_cbdata), cbd); } @@ -150,7 +156,9 @@ lua_http_finish_handler (struct rspamd_http_connection *conn, struct rspamd_http_message *msg) { struct lua_http_cbdata *cbd = (struct lua_http_cbdata *)conn->ud; - struct rspamd_http_header *h; + struct rspamd_http_header *h, *htmp; + const gchar *body; + gsize body_len; lua_rawgeti (cbd->L, LUA_REGISTRYINDEX, cbd->cbref); /* Error */ @@ -158,14 +166,23 @@ lua_http_finish_handler (struct rspamd_http_connection *conn, /* Reply code */ lua_pushinteger (cbd->L, msg->code); /* Body */ - lua_pushlstring (cbd->L, msg->body->str, msg->body->len); + body = rspamd_http_message_get_body (msg, &body_len); + + if (body_len > 0) { + lua_pushlstring (cbd->L, body, body_len); + } + else { + lua_pushnil (cbd->L); + } /* Headers */ lua_newtable (cbd->L); - LL_FOREACH (msg->headers, h) { + + HASH_ITER (hh, msg->headers, h, htmp) { lua_pushlstring (cbd->L, h->name->begin, h->name->len); lua_pushlstring (cbd->L, h->value->begin, h->value->len); lua_settable (cbd->L, -3); } + if (lua_pcall (cbd->L, 4, 0, 0) != 0) { msg_info ("callback call failed: %s", lua_tostring (cbd->L, -1)); lua_pop (cbd->L, 1); @@ -189,12 +206,17 @@ lua_http_make_connection (struct lua_http_cbdata *cbd) return FALSE; } cbd->fd = fd; - cbd->conn = rspamd_http_connection_new (NULL, lua_http_error_handler, - lua_http_finish_handler, RSPAMD_HTTP_CLIENT_SIMPLE, - RSPAMD_HTTP_CLIENT, NULL); + cbd->conn = rspamd_http_connection_new (NULL, + lua_http_error_handler, + lua_http_finish_handler, + RSPAMD_HTTP_CLIENT_SIMPLE, + RSPAMD_HTTP_CLIENT, + NULL, + NULL); rspamd_http_connection_write_message (cbd->conn, cbd->msg, - NULL, cbd->mime_type, cbd, fd, &cbd->tv, cbd->ev_base); + cbd->host, cbd->mime_type, cbd, fd, + &cbd->tv, cbd->ev_base); /* Message is now owned by a connection object */ cbd->msg = NULL; @@ -296,19 +318,23 @@ lua_http_request (lua_State *L) else { ev_base = NULL; } + if (lua_gettop (L) >= 4 && rspamd_lua_check_udata (L, 4, "rspamd{resolver}")) { resolver = *(struct rspamd_dns_resolver **)lua_touserdata (L, 4); } else { resolver = lua_http_global_resolver (ev_base); } + if (lua_gettop (L) >= 5 && rspamd_lua_check_udata (L, 5, "rspamd{session}")) { session = *(struct rspamd_async_session **)lua_touserdata (L, 5); } else { session = NULL; } + msg = rspamd_http_message_from_url (url); + if (msg == NULL) { lua_pushboolean (L, FALSE); return 1; @@ -403,13 +429,13 @@ lua_http_request (lua_State *L) lua_gettable (L, -2); if (lua_type (L, -1) == LUA_TSTRING) { lua_body = lua_tolstring (L, -1, &bodylen); - msg->body = rspamd_fstring_new_init (lua_body, bodylen); + rspamd_http_message_set_body (msg, lua_body, bodylen); } else if (lua_type (L, -1) == LUA_TUSERDATA) { t = lua_check_text (L, -1); /* TODO: think about zero-copy possibilities */ if (t) { - msg->body = rspamd_fstring_new_init (t->start, t->len); + rspamd_http_message_set_body (msg, t->start, t->len); } } @@ -431,6 +457,10 @@ lua_http_request (lua_State *L) msec_to_tv (timeout, &cbd->tv); cbd->fd = -1; + if (msg->host) { + cbd->host = rspamd_fstring_cstr (msg->host); + } + if (session) { cbd->session = session; rspamd_session_add_event (session, diff --git a/src/lua/lua_map.c b/src/lua/lua_map.c index a74ee205c..570287c13 100644 --- a/src/lua/lua_map.c +++ b/src/lua/lua_map.c @@ -309,10 +309,6 @@ lua_map_fin (struct map_cb_data *data) map = data->map; - if (data->prev_data) { - data->prev_data = NULL; - } - if (data->cur_data) { cbdata = (struct lua_map_callback_data *)data->cur_data; } @@ -321,6 +317,10 @@ lua_map_fin (struct map_cb_data *data) return; } + if (data->prev_data) { + data->prev_data = NULL; + } + if (cbdata->ref == -1) { msg_err_map ("map has no callback set"); } @@ -621,6 +621,9 @@ lua_map_get_proto (lua_State *L) case MAP_PROTO_HTTP: ret = "http"; break; + case MAP_PROTO_HTTPS: + ret = "https"; + break; } lua_pushstring (L, ret); } diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 7a756679b..15b0ae0b7 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -1427,6 +1427,7 @@ lua_task_get_received_headers (lua_State * L) { struct rspamd_task *task = lua_check_task (L, 1); struct received_header *rh; + const gchar *proto; guint i, k = 1; if (task) { @@ -1435,23 +1436,56 @@ lua_task_get_received_headers (lua_State * L) for (i = 0; i < task->received->len; i ++) { rh = g_ptr_array_index (task->received, i); - if (rh->is_error || G_UNLIKELY ( - rh->from_ip == NULL && + if (G_UNLIKELY (rh->from_ip == NULL && rh->real_ip == NULL && rh->real_hostname == NULL && - rh->by_hostname == NULL)) { + rh->by_hostname == NULL && rh->timestamp == 0)) { continue; } lua_newtable (L); rspamd_lua_table_set (L, "from_hostname", rh->from_hostname); - lua_pushstring (L, "from_ip"); - rspamd_lua_ip_push_fromstring (L, rh->from_ip); - lua_settable (L, -3); + rspamd_lua_table_set (L, "from_ip", rh->from_ip); rspamd_lua_table_set (L, "real_hostname", rh->real_hostname); lua_pushstring (L, "real_ip"); - rspamd_lua_ip_push_fromstring (L, rh->real_ip); + rspamd_lua_ip_push (L, rh->addr); lua_settable (L, -3); + lua_pushstring (L, "proto"); + + switch (rh->type) { + case RSPAMD_RECEIVED_SMTP: + proto = "smtp"; + break; + case RSPAMD_RECEIVED_ESMTP: + proto = "esmtp"; + break; + case RSPAMD_RECEIVED_ESMTPS: + proto = "esmtps"; + break; + case RSPAMD_RECEIVED_ESMTPA: + proto = "esmtpa"; + break; + case RSPAMD_RECEIVED_ESMTPSA: + proto = "esmtpsa"; + break; + case RSPAMD_RECEIVED_LMTP: + proto = "lmtp"; + break; + case RSPAMD_RECEIVED_IMAP: + proto = "imap"; + break; + case RSPAMD_RECEIVED_UNKNOWN: + default: + proto = "unknown"; + break; + } + lua_pushstring (L, proto); + lua_settable (L, -3); + + lua_pushstring (L, "timestamp"); + lua_pushnumber (L, rh->timestamp); + lua_settable (L, -3); + rspamd_lua_table_set (L, "by_hostname", rh->by_hostname); lua_rawseti (L, -2, k ++); } diff --git a/src/lua/lua_tcp.c b/src/lua/lua_tcp.c index e6ccd85ee..094ebf12b 100644 --- a/src/lua/lua_tcp.c +++ b/src/lua/lua_tcp.c @@ -348,10 +348,12 @@ static void lua_tcp_dns_handler (struct rdns_reply *reply, gpointer ud) { struct lua_tcp_cbdata *cbd = (struct lua_tcp_cbdata *)ud; + const struct rdns_request_name *rn; if (reply->code != RDNS_RC_NOERROR) { + rn = rdns_request_get_name (reply->request, NULL); lua_tcp_push_error (cbd, "unable to resolve host: %s", - reply->requested_name); + rn->name); lua_tcp_maybe_free (cbd); } else { @@ -368,7 +370,7 @@ lua_tcp_dns_handler (struct rdns_reply *reply, gpointer ud) if (!lua_tcp_make_connection (cbd)) { lua_tcp_push_error (cbd, "unable to make connection to the host %s", - reply->requested_name); + rspamd_inet_address_to_string (cbd->addr)); lua_tcp_maybe_free (cbd); } } diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c index af49624b9..9c43494ad 100644 --- a/src/lua/lua_url.c +++ b/src/lua/lua_url.c @@ -537,7 +537,7 @@ lua_url_all (lua_State *L) if (text != NULL) { lua_newtable (L); - rspamd_url_find_multiple (pool, text, length, FALSE, + rspamd_url_find_multiple (pool, text, length, FALSE, NULL, lua_url_table_inserter, L); } diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index a5b5eac84..272e39463 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -324,6 +324,15 @@ LUA_FUNCTION_DEF (util, create_file); */ LUA_FUNCTION_DEF (util, close_file); +/** + * @function util.random_hex(size) + * Returns random hex string of the specified size + * + * @param {number} len length of desired string in bytes + * @return {string} string with random hex digests + */ +LUA_FUNCTION_DEF (util, random_hex); + static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, create_event_base), LUA_INTERFACE_DEF (util, load_rspamd_config), @@ -358,6 +367,7 @@ static const struct luaL_reg utillib_f[] = { LUA_INTERFACE_DEF (util, unlock_file), LUA_INTERFACE_DEF (util, create_file), LUA_INTERFACE_DEF (util, close_file), + LUA_INTERFACE_DEF (util, random_hex), {NULL, NULL} }; @@ -1471,6 +1481,26 @@ lua_util_close_file (lua_State *L) } static gint +lua_util_random_hex (lua_State *L) +{ + gchar *buf; + gint buflen; + + buflen = lua_tonumber (L, 1); + + if (buflen <= 0) { + return luaL_error (L, "invalid arguments"); + } + + buf = g_malloc (buflen); + rspamd_random_hex (buf, buflen); + lua_pushlstring (L, buf, buflen); + g_free (buf); + + return 1; +} + +static gint lua_load_util (lua_State * L) { lua_newtable (L); diff --git a/src/lua_worker.c b/src/lua_worker.c index df6970efa..b74b8d422 100644 --- a/src/lua_worker.c +++ b/src/lua_worker.c @@ -261,7 +261,7 @@ lua_accept_socket (gint fd, short what, void *arg) L = ctx->L; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 30eccf0c3..a37eea148 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -42,6 +42,7 @@ #include "keypair.h" #include "lua/lua_common.h" #include "unix-std.h" +#include "libutil/http_private.h" #include <math.h> #define DEFAULT_SYMBOL "R_FUZZY_HASH" @@ -1464,17 +1465,97 @@ fuzzy_insert_result (struct fuzzy_client_session *session, } } +static gint +fuzzy_check_try_read (struct fuzzy_client_session *session) +{ + struct rspamd_task *task; + const struct rspamd_fuzzy_reply *rep; + struct rspamd_fuzzy_cmd *cmd = NULL; + guint i; + gint r, ret; + guchar buf[2048], *p; + + task = session->task; + + if ((r = read (session->fd, buf, sizeof (buf) - 1)) == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { + return 0; + } + else { + return -1; + } + } + else { + p = buf; + + ret = 0; + + while ((rep = fuzzy_process_reply (&p, &r, + session->commands, session->rule, &cmd)) != NULL) { + if (rep->prob > 0.5) { + if (rep->flag & (1U << 31)) { + /* Multi-flag */ + for (i = 0; i < 31; i ++) { + if ((1U << i) & rep->flag) { + fuzzy_insert_result (session, rep, cmd, i + 1); + } + } + } + else { + fuzzy_insert_result (session, rep, cmd, rep->flag); + } + } + else if (rep->value == 403) { + msg_info_task ( + "fuzzy check error for %d: forbidden", + rep->flag); + } + else if (rep->value != 0) { + msg_info_task ( + "fuzzy check error for %d: unknown error (%d)", + rep->flag, + rep->value); + } + + ret = 1; + } + } + + return ret; +} + +static gboolean +fuzzy_check_session_is_completed (struct fuzzy_client_session *session) +{ + struct fuzzy_cmd_io *io; + guint nreplied = 0, i; + + rspamd_upstream_ok (session->server); + + for (i = 0; i < session->commands->len; i++) { + io = g_ptr_array_index (session->commands, i); + + if (io->flags & FUZZY_CMD_FLAG_REPLIED) { + nreplied++; + } + } + + if (nreplied == session->commands->len) { + rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session); + + return TRUE; + } + + return FALSE; +} + /* Fuzzy check callback */ static void fuzzy_check_io_callback (gint fd, short what, void *arg) { struct fuzzy_client_session *session = arg; - const struct rspamd_fuzzy_reply *rep; struct rspamd_task *task; - guchar buf[2048], *p; - struct fuzzy_cmd_io *io; - struct rspamd_fuzzy_cmd *cmd = NULL; - guint i; + struct event_base *ev_base; gint r; enum { @@ -1487,45 +1568,18 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) if ((what & EV_READ) || session->state == 1) { /* Try to read reply */ - if ((r = read (fd, buf, sizeof (buf) - 1)) == -1) { - if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { - event_add (&session->ev, NULL); - return; - } - } - else { - p = buf; - ret = return_want_more; - - while ((rep = fuzzy_process_reply (&p, &r, - session->commands, session->rule, &cmd)) != NULL) { - if (rep->prob > 0.5) { - if (rep->flag & (1U << 31)) { - /* Multi-flag */ - for (i = 0; i < 31; i ++) { - if ((1U << i) & rep->flag) { - fuzzy_insert_result (session, rep, cmd, i + 1); - } - } - } - else { - fuzzy_insert_result (session, rep, cmd, rep->flag); - } - } - else if (rep->value == 403) { - msg_info_task ( - "fuzzy check error for %d: forbidden", - rep->flag); - } - else if (rep->value != 0) { - msg_info_task ( - "fuzzy check error for %d: unknown error (%d)", - rep->flag, - rep->value); - } + r = fuzzy_check_try_read (session); - ret = return_finished; - } + switch (r) { + case 0: + ret = return_want_more; + break; + case 1: + ret = return_finished; + break; + default: + ret = return_error; + break; } } else if (what & EV_WRITE) { @@ -1544,9 +1598,11 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) if (ret == return_want_more) { /* Processed write, switch to reading */ + ev_base = event_get_base (&session->ev); event_del (&session->ev); event_set (&session->ev, fd, EV_READ, fuzzy_check_io_callback, session); + event_base_set (ev_base, &session->ev); event_add (&session->ev, NULL); } else if (ret == return_error) { @@ -1561,25 +1617,13 @@ fuzzy_check_io_callback (gint fd, short what, void *arg) } else { /* Read something from network */ - rspamd_upstream_ok (session->server); - guint nreplied = 0; - - for (i = 0; i < session->commands->len; i++) { - io = g_ptr_array_index (session->commands, i); - - if (io->flags & FUZZY_CMD_FLAG_REPLIED) { - nreplied++; - } - } - - if (nreplied == session->commands->len) { - rspamd_session_remove_event (session->task->s, fuzzy_io_fin, session); - } - else { + if (!fuzzy_check_session_is_completed (session)) { /* Need to read more */ + ev_base = event_get_base (&session->ev); event_del (&session->ev); - event_set (&session->ev, fd, EV_READ, + event_set (&session->ev, session->fd, EV_READ, fuzzy_check_io_callback, session); + event_base_set (ev_base, &session->ev); event_add (&session->ev, NULL); } } @@ -1591,9 +1635,17 @@ fuzzy_check_timer_callback (gint fd, short what, void *arg) { struct fuzzy_client_session *session = arg; struct rspamd_task *task; + struct event_base *ev_base; task = session->task; + /* We might be here because of other checks being slow */ + if (fuzzy_check_try_read (session) > 0) { + if (fuzzy_check_session_is_completed (session)) { + return; + } + } + if (session->retransmits >= fuzzy_module_ctx->retransmits) { msg_err_task ("got IO timeout with server %s, after %d retransmits", rspamd_upstream_name (session->server), @@ -1603,13 +1655,17 @@ fuzzy_check_timer_callback (gint fd, short what, void *arg) } else { /* Plan write event */ + ev_base = event_get_base (&session->ev); event_del (&session->ev); event_set (&session->ev, fd, EV_WRITE|EV_READ, fuzzy_check_io_callback, session); + event_base_set (ev_base, &session->ev); event_add (&session->ev, NULL); /* Plan new retransmit timer */ + ev_base = event_get_base (&session->timev); event_del (&session->timev); + event_base_set (ev_base, &session->timev); event_add (&session->timev, &session->tv); session->retransmits ++; } @@ -1627,6 +1683,7 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) struct fuzzy_cmd_io *io; struct rspamd_fuzzy_cmd *cmd = NULL; const gchar *symbol; + struct event_base *ev_base; gint r; enum { return_error = 0, @@ -1728,10 +1785,13 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg) } if (ret == return_want_more) { + ev_base = event_get_base (&session->ev); event_del (&session->ev); event_set (&session->ev, fd, EV_READ, fuzzy_controller_io_callback, session); + event_base_set (ev_base, &session->ev); event_add (&session->ev, NULL); + return; } else if (ret == return_error) { @@ -1790,6 +1850,7 @@ fuzzy_controller_timer_callback (gint fd, short what, void *arg) { struct fuzzy_learn_session *session = arg; struct rspamd_task *task; + struct event_base *ev_base; task = session->task; @@ -1816,13 +1877,17 @@ fuzzy_controller_timer_callback (gint fd, short what, void *arg) } else { /* Plan write event */ + ev_base = event_get_base (&session->ev); event_del (&session->ev); event_set (&session->ev, fd, EV_WRITE|EV_READ, fuzzy_controller_io_callback, session); + event_base_set (ev_base, &session->ev); event_add (&session->ev, NULL); /* Plan new retransmit timer */ + ev_base = event_get_base (&session->timev); event_del (&session->timev); + event_base_set (ev_base, &session->timev); event_add (&session->timev, &session->tv); session->retransmits ++; } diff --git a/src/plugins/lua/dmarc.lua b/src/plugins/lua/dmarc.lua index 063142f24..cb65a6875 100644 --- a/src/plugins/lua/dmarc.lua +++ b/src/plugins/lua/dmarc.lua @@ -80,7 +80,8 @@ local function dmarc_callback(task) task:get_resolver():resolve_txt({ task=task, name = resolve_name, - callback = dmarc_dns_cb}) + callback = dmarc_dns_cb, + forced = true}) return end @@ -182,7 +183,8 @@ local function dmarc_callback(task) task:get_resolver():resolve_txt({ task=task, name = resolve_name, - callback = dmarc_dns_cb}) + callback = dmarc_dns_cb, + forced = true}) return else @@ -263,7 +265,8 @@ local function dmarc_callback(task) task:get_resolver():resolve_txt({ task=task, name = resolve_name, - callback = dmarc_dns_cb}) + callback = dmarc_dns_cb, + forced = true}) end local opts = rspamd_config:get_all_opt('dmarc') diff --git a/src/plugins/lua/once_received.lua b/src/plugins/lua/once_received.lua index 341618429..63de22776 100644 --- a/src/plugins/lua/once_received.lua +++ b/src/plugins/lua/once_received.lua @@ -80,7 +80,8 @@ local function check_quantity_received (task) if (not hn or hn == 'unknown') and task_ip and task_ip:is_valid() then task:get_resolver():resolve_ptr({task = task, name = task_ip:to_string(), - callback = recv_dns_cb + callback = recv_dns_cb, + forced = true }) return end diff --git a/src/plugins/lua/phishing.lua b/src/plugins/lua/phishing.lua index ecf88679f..22a792223 100644 --- a/src/plugins/lua/phishing.lua +++ b/src/plugins/lua/phishing.lua @@ -18,9 +18,12 @@ limitations under the License. -- -- local symbol = 'PHISHED_URL' +local openphish_symbol = 'PHISHED_OPENPHISH' local domains = nil local strict_domains = {} local redirector_domains = {} +local openphish_map = 'https://www.openphish.com/feed.txt' +local openphish_hash local rspamd_logger = require "rspamd_logger" local util = require "rspamd_util" local opts = rspamd_config:get_all_opt('phishing') @@ -30,6 +33,14 @@ local function phishing_cb(task) if urls then for _,url in ipairs(urls) do + if openphish_hash then + local t = url:get_text() + + if openphish_hash:get_key(t) then + task:insert_result(openphish_symbol, 1.0, url:get_tld()) + end + end + if url:is_phished() and not url:is_redirected() then local found = false local purl = url:get_phished() @@ -94,7 +105,11 @@ local function phishing_map(mapname, phishmap) local sym = string.sub(d, s + 1, -1) local map = string.sub(d, 1, s - 1) rspamd_config:register_virtual_symbol(sym, 1, id) - local rmap = rspamd_config:add_hash_map (map, 'Phishing ' .. mapname .. ' map') + local rmap = rspamd_config:add_map ({ + type = 'set', + url = map, + description = 'Phishing ' .. mapname .. ' map', + }) if rmap then local rule = {symbol = sym, map = rmap} table.insert(phishmap, rule) @@ -113,13 +128,35 @@ if opts then if opts['symbol'] then symbol = opts['symbol'] -- Register symbol's callback - rspamd_config:register_symbol({ + local id = rspamd_config:register_symbol({ name = symbol, callback = phishing_cb }) + + if opts['openphish_map'] then + openphish_map = opts['openphish_map'] + end + + openphish_hash = rspamd_config:add_map({ + type = 'set', + url = openphish_map, + description = 'Open phishing feed map (see https://www.openphish.com for details)' + }) + + if openphish_hash then + rspamd_config:register_symbol({ + type = 'virtual', + parent = id, + name = openphish_symbol, + }) + end end if opts['domains'] and type(opt['domains']) == 'string' then - domains = rspamd_config:add_hash_map (opts['domains']) + domains = rspamd_config:add_map({ + url = opts['domains'], + type = 'set', + description = 'Phishing domains' + }) end phishing_map('strict_domains', strict_domains) phishing_map('redirector_domains', redirector_domains) diff --git a/src/plugins/lua/rbl.lua b/src/plugins/lua/rbl.lua index 91e20dc5b..2a0042019 100644 --- a/src/plugins/lua/rbl.lua +++ b/src/plugins/lua/rbl.lua @@ -147,7 +147,8 @@ local function rbl_cb (task) task:get_resolver():resolve_a({task = task, name = havegot['helo'] .. '.' .. rbl['rbl'], callback = rbl_dns_cb, - option = k}) + option = k, + forced = true}) end)() end @@ -173,7 +174,8 @@ local function rbl_cb (task) task:get_resolver():resolve_a({task = task, name = d .. '.' .. rbl['rbl'], callback = rbl_dns_cb, - option = k}) + option = k, + forced = true}) end end)() end @@ -214,14 +216,16 @@ local function rbl_cb (task) task:get_resolver():resolve_a({task = task, name = domain .. '.' .. rbl['rbl'], callback = rbl_dns_cb, - option = k}) + option = k, + forced = true}) end else for _, email in pairs(havegot['emails']) do task:get_resolver():resolve_a({task = task, name = email .. '.' .. rbl['rbl'], callback = rbl_dns_cb, - option = k}) + option = k, + forced = true}) end end end)() @@ -242,7 +246,8 @@ local function rbl_cb (task) task:get_resolver():resolve_a({task = task, name = havegot['rdns'] .. '.' .. rbl['rbl'], callback = rbl_dns_cb, - option = k}) + option = k, + forced = true}) end)() end @@ -263,7 +268,8 @@ local function rbl_cb (task) task:get_resolver():resolve_a({task = task, name = ip_to_rbl(havegot['from'], rbl['rbl']), callback = rbl_dns_cb, - option = k}) + option = k, + forced = true}) end end)() end @@ -287,10 +293,13 @@ local function rbl_cb (task) ((rbl['exclude_private_ips'] and not rh['real_ip']:is_local()) or not rbl['exclude_private_ips']) and ((rbl['exclude_local_ips'] and not is_excluded_ip(rh['real_ip'])) or not rbl['exclude_local_ips']) then + -- Disable forced for received resolving, as we have no control on + -- those headers count task:get_resolver():resolve_a({task = task, name = ip_to_rbl(rh['real_ip'], rbl['rbl']), callback = rbl_dns_cb, - option = k}) + option = k, + forced = false}) end end end diff --git a/src/plugins/lua/rspamd_update.lua b/src/plugins/lua/rspamd_update.lua index 25898d712..9af937263 100644 --- a/src/plugins/lua/rspamd_update.lua +++ b/src/plugins/lua/rspamd_update.lua @@ -144,7 +144,8 @@ if section then each(function(k, map) -- Check sanity for maps - if map:get_proto() == 'http' and not map:get_sign_key() then + local proto = map:get_proto() + if (proto == 'http' or proto == 'https') and not map:get_sign_key() then if trusted_key then map:set_sign_key(trusted_key) else diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c index 1063013c7..1e0eacff1 100644 --- a/src/plugins/surbl.c +++ b/src/plugins/surbl.c @@ -40,6 +40,7 @@ #include "surbl.h" #include "utlist.h" #include "libserver/html.h" +#include "libutil/http_private.h" #include "unix-std.h" static struct surbl_ctx *surbl_module_ctx = NULL; @@ -1328,10 +1329,13 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task, sizeof (struct redirector_param)); param->url = url; param->task = task; - param->conn = rspamd_http_connection_new (NULL, surbl_redirector_error, + param->conn = rspamd_http_connection_new (NULL, + surbl_redirector_error, surbl_redirector_finish, RSPAMD_HTTP_CLIENT_SIMPLE, - RSPAMD_HTTP_CLIENT, NULL); + RSPAMD_HTTP_CLIENT, + NULL, + NULL); msg = rspamd_http_new_message (HTTP_REQUEST); msg->url = rspamd_fstring_assign (msg->url, url->string, url->urllen); param->sock = s; diff --git a/src/ragel/smtp_addr_parser.rl b/src/ragel/smtp_addr_parser.rl index a480970ec..7e8498966 100644 --- a/src/ragel/smtp_addr_parser.rl +++ b/src/ragel/smtp_addr_parser.rl @@ -2,6 +2,11 @@ machine smtp_addr_parser; + action IP6_start {} + action IP6_end {} + action IP4_start {} + action IP4_end {} + action User_start { addr->user = p; } @@ -71,9 +76,11 @@ main := SMTPAddr; }%% +#include "smtp_parsers.h" + %% write data; -static int +int rspamd_smtp_addr_parse (const char *data, size_t len, struct rspamd_email_address *addr) { const char *p = data, *pe = data + len, *eof; diff --git a/src/ragel/smtp_address.rl b/src/ragel/smtp_address.rl index dd148d654..fc69a0138 100644 --- a/src/ragel/smtp_address.rl +++ b/src/ragel/smtp_address.rl @@ -2,28 +2,11 @@ machine smtp_address; include smtp_ip "smtp_ip.rl"; + include smtp_whitespace "smtp_whitespace.rl"; # SMTP address spec # Obtained from: https://tools.ietf.org/html/rfc5321#section-4.1.2 - LF = "\n"; - CR = "\r"; - CRLF = "\r\n"; - DQUOTE = '"'; - - atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | - "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" | - "-" | "`" | "{" | "|" | "}" | "~"; - - dcontent = 33..90 | 94..126; - Let_dig = alpha | digit; - Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig; - - quoted_pairSMTP = "\\" 32..126; - qtextSMTP = 32..33 | 35..91 | 93..126; - Atom = atext+; - Dot_string = Atom ("." Atom)*; - QcontentSMTP = qtextSMTP | quoted_pairSMTP %User_has_backslash; Quoted_string = ( DQUOTE QcontentSMTP* >User_start %User_end DQUOTE ) %Quoted_addr; Local_part = Dot_string >User_start %User_end | Quoted_string; diff --git a/src/ragel/smtp_date.rl b/src/ragel/smtp_date.rl new file mode 100644 index 000000000..eb5d0cdc5 --- /dev/null +++ b/src/ragel/smtp_date.rl @@ -0,0 +1,27 @@ +%%{ + machine smtp_date; + + include smtp_whitespace "smtp_whitespace.rl"; + + # SMTP date spec + # Obtained from: http://tools.ietf.org/html/rfc5322#section_3.3 + + digit_2 = digit{2}; + digit_4 = digit{4}; + day_name = "Mon" | "Tue" | "Wed" | "Thu" | + "Fri" | "Sat" | "Sun"; + day_of_week = FWS? day_name; + day = FWS? digit{1,2} FWS; + month = "Jan" | "Feb" | "Mar" | "Apr" | + "May" | "Jun" | "Jul" | "Aug" | + "Sep" | "Oct" | "Nov" | "Dec"; + year = FWS digit{4,} FWS; + date = day month year; + hour = digit_2; + minute = digit_2; + second = digit_2; + time_of_day = hour ":" minute (":" second )?; + zone = FWS ("+" | "-") digit_4; + time = time_of_day zone; + date_time = (day_of_week ",")? date time; +}%%
\ No newline at end of file diff --git a/src/ragel/smtp_ip.rl b/src/ragel/smtp_ip.rl index b6b0080f3..dae90a096 100644 --- a/src/ragel/smtp_ip.rl +++ b/src/ragel/smtp_ip.rl @@ -5,7 +5,7 @@ # Source: https://tools.ietf.org/html/rfc5321#section-4.1.3 Snum = digit{1,3}; - IPv4_address_literal = Snum ("." Snum){3}; + IPv4_address_literal = (Snum ("." Snum){3}) >IP4_start %IP4_end; IPv6_hex = xdigit{1,4}; IPv6_full = IPv6_hex (":" IPv6_hex){7}; IPv6_comp = (IPv6_hex (":" IPv6_hex){0,5})? "::" @@ -15,5 +15,5 @@ (IPv6_hex (":" IPv6_hex){0,3} ":")? IPv4_address_literal; IPv6_addr = IPv6_full | IPv6_comp | IPv6v4_full | IPv6v4_comp; - IPv6_address_literal = "IPv6:" IPv6_addr; + IPv6_address_literal = "IPv6:" %IP6_start IPv6_addr %IP6_end; }%%
\ No newline at end of file diff --git a/src/ragel/smtp_received.rl b/src/ragel/smtp_received.rl new file mode 100644 index 000000000..e005dcc9c --- /dev/null +++ b/src/ragel/smtp_received.rl @@ -0,0 +1,61 @@ +%%{ + machine smtp_received; + + include smtp_whitespace "smtp_whitespace.rl"; + include smtp_ip "smtp_ip.rl"; + include smtp_date "smtp_date.rl"; + include smtp_address"smtp_address.rl"; + + # http://tools.ietf.org/html/rfc5321#section-4.4 + + Addtl_Link = Atom; + Link = "TCP" | Addtl_Link; + Attdl_Protocol = Atom; + Protocol = "ESMTP" %ESMTP_proto | + "SMTP" %SMTP_proto | + "ESMTPS" %ESMTPS_proto | + "ESMTPA" %ESMTPA_proto | + "ESMTPSA" %ESMTPSA_proto | + "LMTP" %LMTP_proto | + "IMAP" %IMAP_proto | + Attdl_Protocol; + + TCP_info = address_literal >Real_IP_Start %Real_IP_End | + ( Domain >Real_Domain_Start %Real_Domain_End FWS address_literal >Real_IP_Start %Real_IP_End ); + Extended_Domain = Domain >Real_Domain_Start %Real_Domain_End | # Used to be a real domain + ( Domain >Reported_Domain_Start %Reported_Domain_End FWS "(" TCP_info ")" ) | # Here domain is something specified by remote side + ( address_literal >Real_Domain_Start %Real_Domain_End FWS "(" TCP_info ")" ) | + address_literal >Real_IP_Start %Real_IP_End; # Not RFC conforming, but many MTA try this + + ccontent = ctext | FWS | '(' @{ fcall balanced_ccontent; }; + balanced_ccontent := ccontent* ')' @{ fret; }; + comment = "(" (FWS? ccontent)* FWS? ")"; + CFWS = ((FWS? comment)+ FWS?) | FWS; + + From_domain = "FROM"i FWS Extended_Domain >From_Start %From_End; + By_domain = "BY"i FWS Extended_Domain >By_Start %By_End; + + Via = CFWS "VIA"i FWS Link; + With = CFWS "WITH"i FWS Protocol; + + id_left = dot_atom_text; + no_fold_literal = "[" dtext* "]"; + id_right = dot_atom_text | no_fold_literal; + msg_id = "<" id_left "@" id_right ">"; + ID = CFWS "ID"i FWS ( Atom | msg_id ); + + For = CFWS "FOR"i FWS ( Path | Mailbox ) %For_End; + Additional_Registered_Clauses = CFWS Atom FWS String; + Opt_info = Via? With? ID? For? Additional_Registered_Clauses?; + # Here we make From part optional just because many received headers lack it + Received = From_domain? CFWS? By_domain? CFWS? Opt_info CFWS? ";" FWS date_time >Date_Start %Date_End CFWS?; + + prepush { + if (top >= st_storage.size) { + st_storage.size = (top + 1) * 2; + st_storage.data = realloc (st_storage.data, st_storage.size * sizeof (int)); + g_assert (st_storage.data != NULL); + stack = st_storage.data; + } + } +}%% diff --git a/src/ragel/smtp_received_parser.rl b/src/ragel/smtp_received_parser.rl new file mode 100644 index 000000000..b2c73cab3 --- /dev/null +++ b/src/ragel/smtp_received_parser.rl @@ -0,0 +1,309 @@ +%%{ + + machine smtp_received_parser; + + + action IP6_start { + in_v6 = 1; + ip_start = p; + } + action IP6_end { + in_v6 = 0; + ip_end = p; + } + action IP4_start { + if (!in_v6) { + ip_start = p; + } + } + action IP4_end { + if (!in_v6) { + ip_end = p; + } + } + + action User_start { + addr->user = p; + } + + action User_end { + if (addr->user) { + addr->user_len = p - addr->user; + } + } + + action Domain_start { + addr->domain = p; + } + + action Domain_end { + if (addr->domain) { + addr->domain_len = p - addr->domain; + } + } + + action Domain_addr_start { + addr->domain = p; + addr->flags |= RSPAMD_EMAIL_ADDR_IP; + } + + action Domain_addr_end { + if (addr->domain) { + addr->domain_len = p - addr->domain; + } + } + + action User_has_backslash { + addr->flags |= RSPAMD_EMAIL_ADDR_HAS_BACKSLASH; + } + + action Quoted_addr { + addr->flags |= RSPAMD_EMAIL_ADDR_QUOTED; + } + + action Empty_addr { + addr->flags |= RSPAMD_EMAIL_ADDR_EMPTY; + addr->addr = ""; + addr->user = addr->addr; + addr->domain = addr->addr; + } + + action Valid_addr { + addr->flags |= RSPAMD_EMAIL_ADDR_VALID; + } + + action Addr_has_angle { + addr->flags |= RSPAMD_EMAIL_ADDR_BRACED; + } + + action Addr_start { + addr->addr = p; + } + + action Addr_end { + if (addr->addr) { + addr->addr_len = p - addr->addr; + } + } + + action Real_Domain_Start { + real_domain_start = p; + } + action Real_Domain_End { + real_domain_end = p; + } + action Reported_Domain_Start { + reported_domain_start = p; + } + action Reported_Domain_End { + reported_domain_end = p; + } + + action Real_IP_Start { + real_ip_start = p; + } + action Real_IP_End { + if (ip_start && ip_end && ip_end > ip_start) { + real_ip_start = ip_start; + real_ip_end = ip_end; + } + else { + real_ip_end = p; + } + + ip_start = NULL; + ip_end = NULL; + } + action Reported_IP_Start { + reported_ip_start = p; + } + action Reported_IP_End { + + if (ip_start && ip_end && ip_end > ip_start) { + reported_ip_start = ip_start; + reported_ip_end = ip_end; + } + else { + reported_ip_end = p; + } + + ip_start = NULL; + ip_end = NULL; + } + + action From_Start { + real_domain_start = NULL; + real_domain_end = NULL; + real_ip_start = NULL; + real_ip_end = NULL; + reported_domain_start = NULL; + reported_domain_end = NULL; + reported_ip_start = NULL; + reported_ip_end = NULL; + ip_start = NULL; + ip_end = NULL; + } + + action By_Start { + real_domain_start = NULL; + real_domain_end = NULL; + real_ip_start = NULL; + real_ip_end = NULL; + reported_domain_start = NULL; + reported_domain_end = NULL; + reported_ip_start = NULL; + reported_ip_end = NULL; + ip_start = NULL; + ip_end = NULL; + } + + action By_End { + guint len; + + if (real_domain_end && real_domain_start && real_domain_end > real_domain_start) { + len = real_domain_end - real_domain_start; + rh->by_hostname = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (rh->by_hostname, real_domain_start, len + 1); + } + else if (reported_domain_end && reported_domain_start && reported_domain_end > reported_domain_start) { + len = reported_domain_end - reported_domain_start; + rh->by_hostname = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (rh->by_hostname, reported_domain_start, len + 1); + } + } + + action From_End { + guint len; + + if (real_domain_end && real_domain_start && real_domain_end > real_domain_start) { + len = real_domain_end - real_domain_start; + rh->real_hostname = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (rh->real_hostname, real_domain_start, len + 1); + } + if (reported_domain_end && reported_domain_start && reported_domain_end > reported_domain_start) { + len = reported_domain_end - reported_domain_start; + rh->from_hostname = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (rh->from_hostname, reported_domain_start, len + 1); + } + if (real_ip_end && real_ip_start && real_ip_end > real_ip_start) { + len = real_ip_end - real_ip_start; + rh->real_ip = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (rh->real_ip, real_ip_start, len + 1); + } + if (reported_ip_end && reported_ip_start && reported_ip_end > reported_ip_start) { + len = reported_ip_end - reported_ip_start; + rh->from_ip = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (rh->from_ip, reported_ip_start, len + 1); + } + + if (rh->real_ip && !rh->from_ip) { + rh->from_ip = rh->real_ip; + } + if (rh->real_hostname && !rh->from_hostname) { + rh->from_hostname = rh->real_hostname; + } + + if (rh->real_ip) { + if (rspamd_parse_inet_address (&rh->addr, rh->real_ip, strlen (rh->real_ip))) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)rspamd_inet_address_destroy, rh->addr); + } + } + } + + action For_End { + + } + + action SMTP_proto { + rh->type = RSPAMD_RECEIVED_SMTP; + } + action ESMTPS_proto { + rh->type = RSPAMD_RECEIVED_ESMTPS; + } + action ESMTPA_proto { + rh->type = RSPAMD_RECEIVED_ESMTPA; + } + action ESMTP_proto { + rh->type = RSPAMD_RECEIVED_ESMTP; + } + action ESMTPSA_proto { + rh->type = RSPAMD_RECEIVED_ESMTPSA; + } + action LMTP_proto { + rh->type = RSPAMD_RECEIVED_LMTP; + } + action IMAP_proto { + rh->type = RSPAMD_RECEIVED_IMAP; + } + + action Date_Start { + date_start = p; + } + action Date_End { + if (date_start && p > date_start) { + guint len; + char *tdate; + + len = p - date_start; + tdate = g_malloc (len + 1); + rspamd_strlcpy (tdate, date_start, len + 1); + rh->timestamp = g_mime_utils_header_decode_date (tdate, NULL); + g_free (tdate); + } + } + + include smtp_received "smtp_received.rl"; + + main := Received; + +}%% + +#include "smtp_parsers.h" + +%% write data; + +int +rspamd_smtp_recieved_parse (struct rspamd_task *task, const char *data, size_t len, struct received_header *rh) +{ + struct rspamd_email_address for_addr, *addr; + const char *real_domain_start, *real_domain_end, + *real_ip_start, *real_ip_end, + *reported_domain_start, *reported_domain_end, + *reported_ip_start, *reported_ip_end, + *ip_start, *ip_end, *date_start; + const char *p = data, *pe = data + len, *eof; + int cs, in_v6 = 0, *stack = NULL; + gsize top = 0; + struct _ragel_st_storage { + int *data; + gsize size; + } st_storage; + + memset (&st_storage, 0, sizeof (st_storage)); + memset (rh, 0, sizeof (*rh)); + real_domain_start = NULL; + real_domain_end = NULL; + real_ip_start = NULL; + real_ip_end = NULL; + reported_domain_start = NULL; + reported_domain_end = NULL; + reported_ip_start = NULL; + reported_ip_end = NULL; + ip_start = NULL; + ip_end = NULL; + date_start = NULL; + rh->type = RSPAMD_RECEIVED_UNKNOWN; + + memset (&for_addr, 0, sizeof (for_addr)); + addr = &for_addr; + eof = pe; + + %% write init; + %% write exec; + + if (st_storage.data) { + free (st_storage.data); + } + + return cs; +} diff --git a/src/ragel/smtp_whitespace.rl b/src/ragel/smtp_whitespace.rl new file mode 100644 index 000000000..3b8563e8b --- /dev/null +++ b/src/ragel/smtp_whitespace.rl @@ -0,0 +1,28 @@ +%%{ + machine smtp_whitespace; + + WSP = " "; + CRLF = "\r\n" | ("\r" [^\n]) | ([^\r] "\n"); + DQUOTE = '"'; + + # Printable US-ASCII characters not including specials + atext = alpha | digit | "!" | "#" | "$" | "%" | "&" | + "'" | "*" | "+" | "_" | "/" | "=" | "?" | "^" | + "-" | "`" | "{" | "|" | "}" | "~"; + # Printable US-ASCII characters not including "[", "]", or "\" + dtext = 33..90 | 94..126; + # Printable US-ASCII characters not including "(", ")", or "\" + ctext = 33..39 | 42..91 | 93..126; + + dcontent = 33..90 | 94..126; + Let_dig = alpha | digit; + Ldh_str = ( alpha | digit | "_" | "-" )* Let_dig; + + quoted_pairSMTP = "\\" 32..126; + qtextSMTP = 32..33 | 35..91 | 93..126; + Atom = atext+; + Dot_string = Atom ("." Atom)*; + dot_atom_text = atext+ ("." atext+)*; + #FWS = ((WSP* CRLF)? WSP+); + FWS = WSP+; # We work with unfolded headers, so we can simplify machine +}%%
\ No newline at end of file diff --git a/src/rspamadm/configtest.c b/src/rspamadm/configtest.c index 80c2ec759..7ba9af3fe 100644 --- a/src/rspamadm/configtest.c +++ b/src/rspamadm/configtest.c @@ -40,7 +40,7 @@ struct rspamadm_command configtest_command = { static GOptionEntry entries[] = { {"quiet", 'q', 0, G_OPTION_ARG_NONE, &quiet, - "Supress output", NULL}, + "Suppress output", NULL}, {"config", 'c', 0, G_OPTION_ARG_STRING, &config, "Config file to test", NULL}, {"strict", 's', 0, G_OPTION_ARG_NONE, &strict, diff --git a/src/rspamadm/control.c b/src/rspamadm/control.c index de6e48346..2bdccc876 100644 --- a/src/rspamadm/control.c +++ b/src/rspamadm/control.c @@ -17,7 +17,8 @@ #include "rspamadm.h" #include "cryptobox.h" #include "printf.h" -#include "http.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "addr.h" #include "unix-std.h" #include <event.h> @@ -100,11 +101,14 @@ rspamd_control_finish_handler (struct rspamd_http_connection *conn, struct ucl_parser *parser; ucl_object_t *obj; rspamd_fstring_t *out; + const gchar *body; + gsize body_len; struct rspamadm_control_cbdata *cbdata = conn->ud; + body = rspamd_http_message_get_body (msg, &body_len); parser = ucl_parser_new (0); - if (!ucl_parser_add_chunk (parser, msg->body->str, msg->body->len)) { + if (!body || !ucl_parser_add_chunk (parser, body, body_len)) { rspamd_fprintf (stderr, "cannot parse server's reply: %s\n", ucl_parser_get_error (parser)); ucl_parser_free (parser); @@ -226,9 +230,13 @@ rspamadm_control (gint argc, gchar **argv) L = rspamd_lua_init (); - conn = rspamd_http_connection_new (NULL, rspamd_control_error_handler, - rspamd_control_finish_handler, RSPAMD_HTTP_CLIENT_SIMPLE, - RSPAMD_HTTP_CLIENT, NULL); + conn = rspamd_http_connection_new (NULL, + rspamd_control_error_handler, + rspamd_control_finish_handler, + RSPAMD_HTTP_CLIENT_SIMPLE, + RSPAMD_HTTP_CLIENT, + NULL, + NULL); msg = rspamd_http_new_message (HTTP_REQUEST); msg->url = rspamd_fstring_new_init (path, strlen (path)); double_to_tv (timeout, &tv); diff --git a/src/rspamadm/fuzzy_merge.c b/src/rspamadm/fuzzy_merge.c index 931bea0f0..92a434064 100644 --- a/src/rspamadm/fuzzy_merge.c +++ b/src/rspamadm/fuzzy_merge.c @@ -38,7 +38,7 @@ static GOptionEntry entries[] = { {"destination", 'd', 0, G_OPTION_ARG_STRING, &target, "Destination db", NULL}, {"quiet", 'q', 0, G_OPTION_ARG_NONE, &quiet, - "Supress output", NULL}, + "Suppress output", NULL}, {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} }; diff --git a/src/rspamadm/lua_repl.c b/src/rspamadm/lua_repl.c index f0d337ad9..a7c598be0 100644 --- a/src/rspamadm/lua_repl.c +++ b/src/rspamadm/lua_repl.c @@ -17,6 +17,8 @@ #include "config.h" #include "rspamadm.h" #include "cryptobox.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "printf.h" #include "lua/lua_common.h" #include "message.h" @@ -482,7 +484,7 @@ rspamadm_lua_accept_cb (gint fd, short what, void *arg) gint nfd; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { rspamd_fprintf (stderr, "accept failed: %s", strerror (errno)); return; } @@ -533,11 +535,14 @@ rspamadm_lua_handle_exec (struct rspamd_http_connection_entry *conn_ent, struct rspamadm_lua_repl_context *ctx; struct rspamadm_lua_repl_session *session = conn_ent->ud; ucl_object_t *obj, *elt; + const gchar *body; + gsize body_len; ctx = session->ctx; L = ctx->L; + body = rspamd_http_message_get_body (msg, &body_len); - if (msg->body == NULL || msg->body->len == 0) { + if (body == NULL) { rspamd_controller_send_error (conn_ent, 400, "Empty lua script"); return 0; @@ -547,8 +552,8 @@ rspamadm_lua_handle_exec (struct rspamd_http_connection_entry *conn_ent, err_idx = lua_gettop (L); /* First try return + input */ - tb = g_string_sized_new (msg->body->len + sizeof ("return ")); - rspamd_printf_gstring (tb, "return %V", msg->body); + tb = g_string_sized_new (body_len + sizeof ("return ")); + rspamd_printf_gstring (tb, "return %*s", (gint)body_len, body); if (luaL_loadstring (L, tb->str) != 0) { /* Reset stack */ @@ -556,7 +561,7 @@ rspamadm_lua_handle_exec (struct rspamd_http_connection_entry *conn_ent, lua_pushcfunction (L, &rspamd_lua_traceback); err_idx = lua_gettop (L); /* Try with no return */ - if (luaL_loadbuffer (L, msg->body->str, msg->body->len, "http input") != 0) { + if (luaL_loadbuffer (L, body, body_len, "http input") != 0) { rspamd_controller_send_error (conn_ent, 400, "Invalid lua script"); return 0; diff --git a/src/rspamadm/pw.c b/src/rspamadm/pw.c index 47c111335..fb2817c59 100644 --- a/src/rspamadm/pw.c +++ b/src/rspamadm/pw.c @@ -44,7 +44,7 @@ static GOptionEntry entries[] = { {"check", 'c', 0, G_OPTION_ARG_NONE, &do_check, "Check password", NULL}, {"quiet", 'q', 0, G_OPTION_ARG_NONE, &quiet, - "Supress output", NULL}, + "Suppress output", NULL}, {"password", 'p', 0, G_OPTION_ARG_STRING, &password, "Input password", NULL}, {"type", 't', 0, G_OPTION_ARG_STRING, &type, diff --git a/src/rspamd.c b/src/rspamd.c index b6b5a271c..922327f38 100644 --- a/src/rspamd.c +++ b/src/rspamd.c @@ -959,7 +959,7 @@ rspamd_control_handler (gint fd, short what, gpointer arg) gint nfd; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { msg_warn_main ("accept failed: %s", strerror (errno)); return; } diff --git a/src/rspamd.h b/src/rspamd.h index c0c60185d..6a24370aa 100644 --- a/src/rspamd.h +++ b/src/rspamd.h @@ -19,6 +19,7 @@ #include "libserver/events.h" #include "libserver/roll_history.h" #include "libserver/task.h" +#include <openssl/ssl.h> #include <magic.h> @@ -295,6 +296,7 @@ struct rspamd_external_libs_ctx { void **local_addrs; struct rspamd_cryptobox_library_ctx *crypto_ctx; struct ottery_config *ottery_cfg; + SSL_CTX *ssl_ctx; ref_entry_t ref; }; diff --git a/src/rspamd_proxy.c b/src/rspamd_proxy.c index bac7af32f..b7e0c9618 100644 --- a/src/rspamd_proxy.c +++ b/src/rspamd_proxy.c @@ -17,6 +17,8 @@ #include "libutil/util.h" #include "libutil/map.h" #include "libutil/upstream.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "libserver/protocol.h" #include "libserver/cfg_file.h" #include "libserver/url.h" @@ -67,6 +69,7 @@ struct rspamd_http_upstream { struct rspamd_cryptobox_pubkey *key; gint parser_from_ref; gint parser_to_ref; + gboolean local; }; struct rspamd_http_mirror { @@ -77,6 +80,7 @@ struct rspamd_http_mirror { gdouble prob; gint parser_from_ref; gint parser_to_ref; + gboolean local; }; static const guint64 rspamd_rspamd_proxy_magic = 0xcdeb4fd1fc351980ULL; @@ -138,10 +142,12 @@ struct rspamd_proxy_session { rspamd_inet_addr_t *client_addr; struct rspamd_http_connection *client_conn; gpointer map; - gsize map_len; - gint client_sock; + gpointer shmem_ref; struct rspamd_proxy_backend_connection *master_conn; GPtrArray *mirror_conns; + gsize map_len; + gint client_sock; + gboolean is_spamc; ref_entry_t ref; }; @@ -278,6 +284,8 @@ rspamd_proxy_parse_upstream (rspamd_mempool_t *pool, } up = g_slice_alloc0 (sizeof (*up)); + up->parser_from_ref = -1; + up->parser_to_ref = -1; up->name = g_strdup (ucl_object_tostring (elt)); elt = ucl_object_lookup (obj, "key"); @@ -315,6 +323,11 @@ rspamd_proxy_parse_upstream (rspamd_mempool_t *pool, ctx->default_upstream = up; } + elt = ucl_object_lookup (obj, "local"); + if (elt && ucl_object_toboolean (elt)) { + up->local = TRUE; + } + /* * Accept lua function here in form * fun :: String -> UCL @@ -428,6 +441,11 @@ rspamd_proxy_parse_mirror (rspamd_mempool_t *pool, up->prob = 1.0; } + elt = ucl_object_lookup (obj, "local"); + if (elt && ucl_object_toboolean (elt)) { + up->local = TRUE; + } + /* * Accept lua function here in form * fun :: String -> UCL @@ -801,7 +819,12 @@ proxy_session_dtor (struct rspamd_proxy_session *session) } } + if (session->master_conn && session->master_conn->results) { + ucl_object_unref (session->master_conn->results); + } + g_ptr_array_free (session->mirror_conns, TRUE); + rspamd_http_message_shmem_unref (session->shmem_ref); rspamd_inet_address_destroy (session->client_addr); close (session->client_sock); rspamd_mempool_delete (session->pool); @@ -1000,22 +1023,31 @@ proxy_open_mirror_connections (struct rspamd_proxy_session *session) rspamd_http_message_add_header (msg, "Settings-ID", m->settings_id); } - bk_conn->backend_conn = rspamd_http_connection_new ( - NULL, + bk_conn->backend_conn = rspamd_http_connection_new (NULL, proxy_backend_mirror_error_handler, proxy_backend_mirror_finish_handler, RSPAMD_HTTP_CLIENT_SIMPLE, RSPAMD_HTTP_CLIENT, - session->ctx->keys_cache); + session->ctx->keys_cache, + NULL); rspamd_http_connection_set_key (bk_conn->backend_conn, session->ctx->local_key); msg->peer_key = rspamd_pubkey_ref (m->key); - rspamd_http_connection_write_message (bk_conn->backend_conn, - msg, NULL, NULL, bk_conn, - bk_conn->backend_sock, - &session->ctx->io_tv, session->ctx->ev_base); + if (m->local || + rspamd_inet_address_is_local (rspamd_upstream_addr (bk_conn->up))) { + rspamd_http_connection_write_message_shared (bk_conn->backend_conn, + msg, NULL, NULL, bk_conn, + bk_conn->backend_sock, + &session->ctx->io_tv, session->ctx->ev_base); + } + else { + rspamd_http_connection_write_message (bk_conn->backend_conn, + msg, NULL, NULL, bk_conn, + bk_conn->backend_sock, + &session->ctx->io_tv, session->ctx->ev_base); + } g_ptr_array_add (session->mirror_conns, bk_conn); REF_RETAIN (session); @@ -1058,31 +1090,38 @@ proxy_backend_master_finish_handler (struct rspamd_http_connection *conn, { struct rspamd_proxy_backend_connection *bk_conn = conn->ud; struct rspamd_proxy_session *session; + rspamd_fstring_t *reply; session = bk_conn->s; rspamd_http_connection_steal_msg (session->master_conn->backend_conn); - /* Reset spamc legacy */ - if (msg->method >= HTTP_CHECK) { - msg->method = HTTP_GET; - } - - if (msg->url->len == 0) { - msg->url = rspamd_fstring_append (msg->url, "/check", strlen ("/check")); - } - rspamd_http_message_remove_header (msg, "Content-Length"); rspamd_http_message_remove_header (msg, "Key"); rspamd_http_connection_reset (session->master_conn->backend_conn); - rspamd_http_connection_write_message (session->client_conn, - msg, NULL, NULL, session, session->client_sock, - &session->ctx->io_tv, session->ctx->ev_base); if (!proxy_backend_parse_results (session, bk_conn, session->ctx->lua_state, bk_conn->parser_from_ref, msg->body_buf.begin, msg->body_buf.len)) { msg_warn_session ("cannot parse results from the master backend"); } + + if (session->is_spamc) { + /* We need to reformat ucl to fit with legacy spamc protocol */ + if (bk_conn->results) { + reply = rspamd_fstring_new (); + rspamd_ucl_torspamc_output (bk_conn->results, &reply); + rspamd_http_message_set_body_from_fstring_steal (msg, reply); + } + else { + msg_warn_session ("cannot parse results from the master backend, " + "return them as is"); + } + } + + rspamd_http_connection_write_message (session->client_conn, + msg, NULL, NULL, session, session->client_sock, + &session->ctx->io_tv, session->ctx->ev_base); + return 0; } @@ -1114,6 +1153,17 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, session->master_conn->name = "master"; host = rspamd_http_message_find_header (msg, "Host"); + /* Reset spamc legacy */ + if (msg->method >= HTTP_SYMBOLS) { + msg->method = HTTP_GET; + session->is_spamc = TRUE; + msg_info_session ("enabling legacy rspamc mode for session"); + } + + if (msg->url->len == 0) { + msg->url = rspamd_fstring_append (msg->url, "/check", strlen ("/check")); + } + if (host == NULL) { backend = session->ctx->default_upstream; } @@ -1145,7 +1195,9 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, SOCK_STREAM, TRUE); if (session->master_conn->backend_sock == -1) { - msg_err_session ("cannot connect upstream for %s", host ? hostbuf : "default"); + msg_err_session ("cannot connect upstream: %s(%s)", + host ? hostbuf : "default", + rspamd_inet_address_to_string (rspamd_upstream_addr (session->master_conn->up))); rspamd_upstream_fail (session->master_conn->up); goto err; } @@ -1159,15 +1211,7 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, rspamd_http_message_remove_header (msg, "Content-Length"); rspamd_http_message_remove_header (msg, "Key"); rspamd_http_connection_reset (session->client_conn); - - /* Reset spamc legacy */ - if (msg->method >= HTTP_CHECK) { - msg->method = HTTP_GET; - } - - if (msg->url->len == 0) { - msg->url = rspamd_fstring_append (msg->url, "/check", strlen ("/check")); - } + session->shmem_ref = rspamd_http_message_shmem_ref (msg); session->master_conn->backend_conn = rspamd_http_connection_new ( NULL, @@ -1175,7 +1219,8 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, proxy_backend_master_finish_handler, RSPAMD_HTTP_CLIENT_SIMPLE, RSPAMD_HTTP_CLIENT, - session->ctx->keys_cache); + session->ctx->keys_cache, + NULL); session->master_conn->parser_from_ref = backend->parser_from_ref; session->master_conn->parser_to_ref = backend->parser_to_ref; @@ -1183,10 +1228,22 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, session->ctx->local_key); msg->peer_key = rspamd_pubkey_ref (backend->key); - rspamd_http_connection_write_message (session->master_conn->backend_conn, - msg, NULL, NULL, session->master_conn, - session->master_conn->backend_sock, - &session->ctx->io_tv, session->ctx->ev_base); + if (backend->local || + rspamd_inet_address_is_local ( + rspamd_upstream_addr (session->master_conn->up))) { + rspamd_http_connection_write_message_shared ( + session->master_conn->backend_conn, + msg, NULL, NULL, session->master_conn, + session->master_conn->backend_sock, + &session->ctx->io_tv, session->ctx->ev_base); + } + else { + rspamd_http_connection_write_message ( + session->master_conn->backend_conn, + msg, NULL, NULL, session->master_conn, + session->master_conn->backend_sock, + &session->ctx->io_tv, session->ctx->ev_base); + } } } else { @@ -1198,6 +1255,10 @@ proxy_client_finish_handler (struct rspamd_http_connection *conn, return 0; err: + rspamd_http_connection_steal_msg (session->client_conn); + rspamd_http_message_remove_header (msg, "Content-Length"); + rspamd_http_message_remove_header (msg, "Key"); + rspamd_http_connection_reset (session->client_conn); proxy_client_write_error (session, 404, "Backend not found"); return 0; @@ -1215,7 +1276,7 @@ proxy_accept_socket (gint fd, short what, void *arg) ctx = worker->ctx; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } @@ -1231,13 +1292,13 @@ proxy_accept_socket (gint fd, short what, void *arg) session->mirror_conns = g_ptr_array_sized_new (ctx->mirrors->len); session->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "proxy"); - session->client_conn = rspamd_http_connection_new ( - NULL, - proxy_client_error_handler, - proxy_client_finish_handler, - 0, - RSPAMD_HTTP_SERVER, - ctx->keys_cache); + session->client_conn = rspamd_http_connection_new (NULL, + proxy_client_error_handler, + proxy_client_finish_handler, + 0, + RSPAMD_HTTP_SERVER, + ctx->keys_cache, + NULL); session->ctx = ctx; if (ctx->key) { @@ -1248,7 +1309,7 @@ proxy_accept_socket (gint fd, short what, void *arg) rspamd_inet_address_to_string (addr), rspamd_inet_address_get_port (addr)); - rspamd_http_connection_read_message (session->client_conn, + rspamd_http_connection_read_message_shared (session->client_conn, session, nfd, &ctx->io_tv, diff --git a/src/smtp_proxy.c b/src/smtp_proxy.c index 6b0a4fe2e..8eebc2c86 100644 --- a/src/smtp_proxy.c +++ b/src/smtp_proxy.c @@ -902,7 +902,7 @@ accept_socket (gint fd, short what, void *arg) ctx = worker->ctx; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } diff --git a/src/worker.c b/src/worker.c index ac104f7f0..a099e8177 100644 --- a/src/worker.c +++ b/src/worker.c @@ -266,7 +266,7 @@ accept_socket (gint fd, short what, void *arg) } if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, worker->accept_events)) == -1) { msg_warn_ctx ("accept failed: %s", strerror (errno)); return; } @@ -298,13 +298,13 @@ accept_socket (gint fd, short what, void *arg) /* TODO: allow to disable autolearn in protocol */ task->flags |= RSPAMD_TASK_FLAG_LEARN_AUTO; - task->http_conn = rspamd_http_connection_new ( - rspamd_worker_body_handler, - rspamd_worker_error_handler, - rspamd_worker_finish_handler, - 0, - RSPAMD_HTTP_SERVER, - ctx->keys_cache); + task->http_conn = rspamd_http_connection_new (rspamd_worker_body_handler, + rspamd_worker_error_handler, + rspamd_worker_finish_handler, + 0, + RSPAMD_HTTP_SERVER, + ctx->keys_cache, + NULL); task->ev_base = ctx->ev_base; worker->nconns++; rspamd_mempool_add_destructor (task->task_pool, diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c613d0cfd..e94a1a04b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -26,6 +26,7 @@ ENDIF(NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin") TARGET_LINK_LIBRARIES(rspamd-test rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-test lcbtrie) TARGET_LINK_LIBRARIES(rspamd-test rspamd-http-parser) +TARGET_LINK_LIBRARIES(rspamd-test rspamd-lpeg) TARGET_LINK_LIBRARIES(rspamd-test ${RSPAMD_REQUIRED_LIBRARIES}) IF (ENABLE_SNOWBALL MATCHES "ON") TARGET_LINK_LIBRARIES(rspamd-test stemmer) diff --git a/test/lua/unit/expressions.lua b/test/lua/unit/expressions.lua index f2c4014b5..1e5d09cb5 100644 --- a/test/lua/unit/expressions.lua +++ b/test/lua/unit/expressions.lua @@ -93,7 +93,9 @@ context("Rspamd expressions", function() {'F && ((A + B + C + D) > 1)', 0}, {'(E) && ((B + B + B + B) >= 1)', 0}, {'!!C', 1}, - {'(B) & (D) & ((G) | (H) | (I) | (A))', 0} + {'(B) & (D) & ((G) | (H) | (I) | (A))', 0}, + {'A & C & (!D || !C || !E)', 1}, + {'A & C & !(D || C || E)', 0}, } for _,c in ipairs(cases) do local expr,err = rspamd_expression.create(c[1], @@ -102,8 +104,8 @@ context("Rspamd expressions", function() assert_not_nil(expr, "Cannot parse " .. c[1]) --print(expr) res = expr:process(atoms) - assert_equal(res, c[2], string.format("Processed expr '%s' returned '%d', expected: '%d'", - expr:to_string(), res, c[2])) + assert_equal(res, c[2], string.format("Processed expr '%s'{%s} returned '%d', expected: '%d'", + expr:to_string(), c[1], res, c[2])) end pool:destroy() diff --git a/test/rspamd_http_test.c b/test/rspamd_http_test.c index 428c510c4..4ce859efd 100644 --- a/test/rspamd_http_test.c +++ b/test/rspamd_http_test.c @@ -16,7 +16,8 @@ #include "config.h" #include "rspamd.h" #include "util.h" -#include "http.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "tests.h" #include "ottery.h" #include "cryptobox.h" @@ -55,7 +56,7 @@ rspamd_server_accept (gint fd, short what, void *arg) gint nfd; if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { msg_warn ("accept failed: %s", strerror (errno)); return; } @@ -155,9 +156,13 @@ rspamd_http_client_func (const gchar *path, rspamd_inet_addr_t *addr, gint fd; g_assert ((fd = rspamd_inet_address_connect (addr, SOCK_STREAM, TRUE)) != -1); - conn = rspamd_http_connection_new (rspamd_client_body, rspamd_client_err, - rspamd_client_finish, RSPAMD_HTTP_CLIENT_SIMPLE, - RSPAMD_HTTP_CLIENT, c); + conn = rspamd_http_connection_new (rspamd_client_body, + rspamd_client_err, + rspamd_client_finish, + RSPAMD_HTTP_CLIENT_SIMPLE, + RSPAMD_HTTP_CLIENT, + c, + NULL); rspamd_snprintf (urlbuf, sizeof (urlbuf), "http://127.0.0.1/%s", path); msg = rspamd_http_message_from_url (urlbuf); diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 672325924..e0bb5c886 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -1,5 +1,6 @@ SET(UTILSERVERSRC rspamd_http_server.c) SET(UTILBENCHSRC rspamd_http_bench.c) +SET(RECVBENCHSRC received_parser_bench.c) ADD_EXECUTABLE(rspamd-http-server ${UTILSERVERSRC}) SET_TARGET_PROPERTIES(rspamd-http-server PROPERTIES LINKER_LANGUAGE C) @@ -9,13 +10,25 @@ TARGET_LINK_LIBRARIES(rspamd-http-server ${RSPAMD_REQUIRED_LIBRARIES}) ADD_EXECUTABLE(rspamd-http-bench ${UTILBENCHSRC}) SET_TARGET_PROPERTIES(rspamd-http-bench PROPERTIES LINKER_LANGUAGE C) -TARGET_LINK_LIBRARIES(rspamd-http-bench rspamd-server) TARGET_LINK_LIBRARIES(rspamd-http-bench rspamd-http-parser) +TARGET_LINK_LIBRARIES(rspamd-http-bench rspamd-server) TARGET_LINK_LIBRARIES(rspamd-http-bench ${RSPAMD_REQUIRED_LIBRARIES}) +ADD_EXECUTABLE(rspamd-received-bench ${RECVBENCHSRC}) +SET_TARGET_PROPERTIES(rspamd-received-bench PROPERTIES LINKER_LANGUAGE C) +TARGET_LINK_LIBRARIES(rspamd-received-bench rspamd-server) +IF (ENABLE_SNOWBALL MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd-received-bench stemmer) +ENDIF() +IF(ENABLE_HIREDIS MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd-received-bench rspamd-hiredis) +ENDIF() +TARGET_LINK_LIBRARIES(rspamd-received-bench ${RSPAMD_REQUIRED_LIBRARIES}) + IF (ENABLE_HYPERSCAN MATCHES "ON") SET_TARGET_PROPERTIES(rspamd-http-bench PROPERTIES LINKER_LANGUAGE CXX) SET_TARGET_PROPERTIES(rspamd-http-server PROPERTIES LINKER_LANGUAGE CXX) + SET_TARGET_PROPERTIES(rspamd-received-bench PROPERTIES LINKER_LANGUAGE CXX) ENDIF() # Redirector diff --git a/utils/received_parser_bench.c b/utils/received_parser_bench.c new file mode 100644 index 000000000..cb304b631 --- /dev/null +++ b/utils/received_parser_bench.c @@ -0,0 +1,121 @@ +/*- + * Copyright 2016 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "config.h" +#include "printf.h" +#include "message.h" +#include "smtp_parsers.h" + +static gdouble total_time = 0; +static gint total_parsed = 0; +static gint total_valid = 0; +static gint total_real_ip = 0; +static gint total_real_host = 0; +static gint total_known_proto = 0; +static gint total_known_ts = 0; + +static void +rspamd_process_file (const gchar *fname) +{ + struct rspamd_task *task; + GIOChannel *f; + GError *err = NULL; + GString *buf; + struct received_header rh; + gdouble t1, t2; + + f = g_io_channel_new_file (fname, "r", &err); + + if (!f) { + rspamd_fprintf (stderr, "cannot open %s: %e\n", fname, err); + g_error_free (err); + + return; + } + + g_io_channel_set_encoding (f, NULL, NULL); + buf = g_string_sized_new (8192); + task = g_malloc0 (sizeof (*task)); + task->task_pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "test"); + + while (g_io_channel_read_line_string (f, buf, NULL, &err) + == G_IO_STATUS_NORMAL) { + + while (buf->len > 0 && g_ascii_isspace (buf->str[buf->len - 1])) { + buf->len --; + } + + t1 = rspamd_get_virtual_ticks (); + rspamd_smtp_recieved_parse (task, buf->str, buf->len, &rh); + t2 = rspamd_get_virtual_ticks (); + + total_time += t2 - t1; + total_parsed ++; + + if (rh.addr) { + total_real_ip ++; + } + if (rh.real_hostname) { + total_real_host ++; + } + if (rh.type != RSPAMD_RECEIVED_UNKNOWN) { + total_known_proto ++; + } + + if (rh.by_hostname || rh.timestamp > 0) { + total_valid ++; + } + + if (rh.timestamp != 0) { + total_known_ts ++; + } + } + + if (err) { + rspamd_fprintf (stderr, "cannot read %s: %e\n", fname, err); + g_error_free (err); + } + + g_io_channel_unref (f); + g_string_free (buf, TRUE); + rspamd_mempool_delete (task->task_pool); + g_free (task); +} + +int +main (int argc, char **argv) +{ + gint i; + + for (i = 1; i < argc; i ++) { + if (argv[i]) { + rspamd_process_file (argv[i]); + } + } + + rspamd_printf ("Parsed %d received headers in %.3f seconds\n" + "Total valid (has by part): %d\n" + "Total real ip: %d\n" + "Total real host: %d\n" + "Total known proto: %d\n" + "Total known timestamp: %d\n", + total_parsed, total_time, + total_valid, total_real_ip, + total_real_host, total_known_proto, + total_known_ts); + + return 0; +} diff --git a/utils/rspamd_http_bench.c b/utils/rspamd_http_bench.c index c9a752243..32aedb334 100644 --- a/utils/rspamd_http_bench.c +++ b/utils/rspamd_http_bench.c @@ -16,7 +16,8 @@ #include "config.h" #include "rspamd.h" #include "util.h" -#include "http.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "ottery.h" #include "cryptobox.h" #include "unix-std.h" @@ -145,9 +146,13 @@ rspamd_http_client_func (struct event_base *ev_base, struct lat_elt *latency, g_assert (fd != -1); flags = 1; (void)setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &flags, sizeof (flags)); - conn = rspamd_http_connection_new (rspamd_client_body, rspamd_client_err, - rspamd_client_finish, RSPAMD_HTTP_CLIENT_SIMPLE, - RSPAMD_HTTP_CLIENT, c); + conn = rspamd_http_connection_new (rspamd_client_body, + rspamd_client_err, + rspamd_client_finish, + RSPAMD_HTTP_CLIENT_SIMPLE, + RSPAMD_HTTP_CLIENT, + c, + NULL); rspamd_snprintf (urlbuf, sizeof (urlbuf), "http://%s/%d", host, file_size); msg = rspamd_http_message_from_url (urlbuf); diff --git a/utils/rspamd_http_server.c b/utils/rspamd_http_server.c index ad01085df..69ef852e6 100644 --- a/utils/rspamd_http_server.c +++ b/utils/rspamd_http_server.c @@ -16,7 +16,9 @@ #include "config.h" #include "rspamd.h" #include "util.h" -#include "http.h" +#include "libutil/fstring.h" +#include "libutil/http.h" +#include "libutil/http_private.h" #include "ottery.h" #include "cryptobox.h" #include "keypair.h" @@ -83,6 +85,7 @@ rspamd_server_finish (struct rspamd_http_connection *conn, gulong size; const gchar *url_str; guint url_len; + rspamd_fstring_t *body; if (!session->reply) { session->reply = TRUE; @@ -100,9 +103,11 @@ rspamd_server_finish (struct rspamd_http_connection *conn, reply->code = 200; reply->status = rspamd_fstring_new_init ("OK", 2); - reply->body = rspamd_fstring_sized_new (size); - reply->body->len = size; - memset (reply->body->str, 0, size); + body = rspamd_fstring_sized_new (size); + body->len = size; + memset (body->str, 0, size); + rspamd_http_message_set_body_from_fstring_steal (msg, body); + } else { reply->code = 404; @@ -134,7 +139,7 @@ rspamd_server_accept (gint fd, short what, void *arg) do { if ((nfd = - rspamd_accept_from_socket (fd, &addr)) == -1) { + rspamd_accept_from_socket (fd, &addr, NULL)) == -1) { rspamd_fprintf (stderr, "accept failed: %s", strerror (errno)); return; } @@ -145,8 +150,13 @@ rspamd_server_accept (gint fd, short what, void *arg) rspamd_inet_address_destroy (addr); session = g_slice_alloc (sizeof (*session)); - session->conn = rspamd_http_connection_new (NULL, rspamd_server_error, - rspamd_server_finish, 0, RSPAMD_HTTP_SERVER, c); + session->conn = rspamd_http_connection_new (NULL, + rspamd_server_error, + rspamd_server_finish, + 0, + RSPAMD_HTTP_SERVER, + c, + NULL); rspamd_http_connection_set_key (session->conn, server_key); rspamd_http_connection_read_message (session->conn, session, |