From 61555065f3d1c8badcc9573691232f1b6e42988c Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 21 Apr 2014 16:25:51 +0100 Subject: Rework project structure, remove trash files. --- CMakeLists.txt | 142 +-- src/CMakeLists.txt | 250 +++-- src/aio_event.c | 487 --------- src/aio_event.h | 67 -- src/binlog.c | 579 ----------- src/binlog.h | 93 -- src/bloom.c | 153 --- src/bloom.h | 48 - src/buffer.c | 786 -------------- src/buffer.h | 158 --- src/cfg_file.h | 516 ---------- src/cfg_rcl.c | 1471 -------------------------- src/cfg_rcl.h | 238 ----- src/cfg_utils.c | 969 ------------------ src/diff.c | 445 -------- src/diff.h | 74 -- src/dkim.c | 1480 --------------------------- src/dkim.h | 207 ---- src/dns.c | 151 --- src/dns.h | 60 -- src/dns_private.h | 209 ---- src/dynamic_cfg.c | 599 ----------- src/dynamic_cfg.h | 66 -- src/events.c | 250 ----- src/events.h | 88 -- src/expressions.c | 1452 -------------------------- src/expressions.h | 133 --- src/filter.c | 1096 -------------------- src/filter.h | 167 --- src/fstring.c | 461 --------- src/fstring.h | 120 --- src/fuzzy.c | 498 --------- src/fuzzy.h | 69 -- src/hash.c | 489 --------- src/hash.h | 160 --- src/html.c | 942 ----------------- src/html.h | 226 ---- src/http.c | 1222 ---------------------- src/http.h | 278 ----- src/images.c | 255 ----- src/images.h | 33 - src/libmime/CMakeLists.txt | 29 + src/libmime/expressions.c | 1452 ++++++++++++++++++++++++++ src/libmime/expressions.h | 133 +++ src/libmime/filter.c | 1096 ++++++++++++++++++++ src/libmime/filter.h | 167 +++ src/libmime/images.c | 255 +++++ src/libmime/images.h | 33 + src/libmime/message.c | 1764 ++++++++++++++++++++++++++++++++ src/libmime/message.h | 91 ++ src/libmime/protocol.c | 821 +++++++++++++++ src/libmime/protocol.h | 46 + src/libmime/smtp_proto.c | 701 +++++++++++++ src/libmime/smtp_proto.h | 95 ++ src/libmime/smtp_utils.c | 362 +++++++ src/libmime/smtp_utils.h | 63 ++ src/libmime/worker_util.c | 255 +++++ src/libserver/CMakeLists.txt | 63 ++ src/libserver/binlog.c | 579 +++++++++++ src/libserver/binlog.h | 93 ++ src/libserver/buffer.c | 786 ++++++++++++++ src/libserver/buffer.h | 158 +++ src/libserver/cfg_file.h | 516 ++++++++++ src/libserver/cfg_rcl.c | 1471 ++++++++++++++++++++++++++ src/libserver/cfg_rcl.h | 238 +++++ src/libserver/cfg_utils.c | 969 ++++++++++++++++++ src/libserver/dkim.c | 1480 +++++++++++++++++++++++++++ src/libserver/dkim.h | 207 ++++ src/libserver/dns.c | 151 +++ src/libserver/dns.h | 60 ++ src/libserver/dynamic_cfg.c | 599 +++++++++++ src/libserver/dynamic_cfg.h | 66 ++ src/libserver/events.c | 250 +++++ src/libserver/events.h | 88 ++ src/libserver/html.c | 942 +++++++++++++++++ src/libserver/html.h | 226 ++++ src/libserver/proxy.c | 241 +++++ src/libserver/proxy.h | 69 ++ src/libserver/roll_history.c | 212 ++++ src/libserver/roll_history.h | 106 ++ src/libserver/settings.c | 657 ++++++++++++ src/libserver/settings.h | 55 + src/libserver/spf.c | 1465 ++++++++++++++++++++++++++ src/libserver/spf.h | 84 ++ src/libserver/statfile.c | 927 +++++++++++++++++ src/libserver/statfile.h | 284 +++++ src/libserver/statfile_sync.c | 350 +++++++ src/libserver/statfile_sync.h | 14 + src/libserver/symbols_cache.c | 1055 +++++++++++++++++++ src/libserver/symbols_cache.h | 150 +++ src/libserver/task.c | 159 +++ src/libserver/task.h | 165 +++ src/libserver/url.c | 1620 +++++++++++++++++++++++++++++ src/libserver/url.h | 111 ++ src/libutil/CMakeLists.txt | 50 + src/libutil/aio_event.c | 487 +++++++++ src/libutil/aio_event.h | 67 ++ src/libutil/bloom.c | 153 +++ src/libutil/bloom.h | 48 + src/libutil/diff.c | 445 ++++++++ src/libutil/diff.h | 74 ++ src/libutil/fstring.c | 461 +++++++++ src/libutil/fstring.h | 120 +++ src/libutil/fuzzy.c | 498 +++++++++ src/libutil/fuzzy.h | 69 ++ src/libutil/hash.c | 489 +++++++++ src/libutil/hash.h | 160 +++ src/libutil/http.c | 1222 ++++++++++++++++++++++ src/libutil/http.h | 278 +++++ src/libutil/logger.c | 769 ++++++++++++++ src/libutil/logger.h | 117 +++ src/libutil/map.c | 1148 +++++++++++++++++++++ src/libutil/map.h | 134 +++ src/libutil/mem_pool.c | 776 ++++++++++++++ src/libutil/mem_pool.h | 299 ++++++ src/libutil/memcached.c | 831 +++++++++++++++ src/libutil/memcached.h | 142 +++ src/libutil/printf.c | 635 ++++++++++++ src/libutil/printf.h | 75 ++ src/libutil/radix.c | 311 ++++++ src/libutil/radix.h | 82 ++ src/libutil/rrd.c | 1015 ++++++++++++++++++ src/libutil/rrd.h | 374 +++++++ src/libutil/trie.c | 230 +++++ src/libutil/trie.h | 86 ++ src/libutil/upstream.c | 525 ++++++++++ src/libutil/upstream.h | 127 +++ src/libutil/util.c | 2275 +++++++++++++++++++++++++++++++++++++++++ src/libutil/util.h | 491 +++++++++ src/logger.c | 769 -------------- src/logger.h | 117 --- src/map.c | 1148 --------------------- src/map.h | 134 --- src/mem_pool.c | 776 -------------- src/mem_pool.h | 299 ------ src/memcached.c | 831 --------------- src/memcached.h | 142 --- src/message.c | 1764 -------------------------------- src/message.h | 91 -- src/printf.c | 635 ------------ src/printf.h | 75 -- src/protocol.c | 821 --------------- src/protocol.h | 46 - src/proxy.c | 241 ----- src/proxy.h | 69 -- src/radix.c | 311 ------ src/radix.h | 82 -- src/roll_history.c | 212 ---- src/roll_history.h | 106 -- src/rrd.c | 1015 ------------------ src/rrd.h | 374 ------- src/settings.c | 657 ------------ src/settings.h | 55 - src/smtp_proto.c | 701 ------------- src/smtp_proto.h | 95 -- src/smtp_utils.c | 362 ------- src/smtp_utils.h | 63 -- src/spf.c | 1465 -------------------------- src/spf.h | 84 -- src/statfile.c | 927 ----------------- src/statfile.h | 284 ----- src/statfile_sync.c | 350 ------- src/statfile_sync.h | 14 - src/symbols_cache.c | 1055 ------------------- src/symbols_cache.h | 150 --- src/task.c | 159 --- src/task.h | 165 --- src/trie.c | 230 ----- src/trie.h | 86 -- src/upstream.c | 525 ---------- src/upstream.h | 127 --- src/url.c | 1620 ----------------------------- src/url.h | 111 -- src/util.c | 2275 ----------------------------------------- src/util.h | 491 --------- src/worker_util.c | 255 ----- 176 files changed, 39217 insertions(+), 39426 deletions(-) delete mode 100644 src/aio_event.c delete mode 100644 src/aio_event.h delete mode 100644 src/binlog.c delete mode 100644 src/binlog.h delete mode 100644 src/bloom.c delete mode 100644 src/bloom.h delete mode 100644 src/buffer.c delete mode 100644 src/buffer.h delete mode 100644 src/cfg_file.h delete mode 100644 src/cfg_rcl.c delete mode 100644 src/cfg_rcl.h delete mode 100644 src/cfg_utils.c delete mode 100644 src/diff.c delete mode 100644 src/diff.h delete mode 100644 src/dkim.c delete mode 100644 src/dkim.h delete mode 100644 src/dns.c delete mode 100644 src/dns.h delete mode 100644 src/dns_private.h delete mode 100644 src/dynamic_cfg.c delete mode 100644 src/dynamic_cfg.h delete mode 100644 src/events.c delete mode 100644 src/events.h delete mode 100644 src/expressions.c delete mode 100644 src/expressions.h delete mode 100644 src/filter.c delete mode 100644 src/filter.h delete mode 100644 src/fstring.c delete mode 100644 src/fstring.h delete mode 100644 src/fuzzy.c delete mode 100644 src/fuzzy.h delete mode 100644 src/hash.c delete mode 100644 src/hash.h delete mode 100644 src/html.c delete mode 100644 src/html.h delete mode 100644 src/http.c delete mode 100644 src/http.h delete mode 100644 src/images.c delete mode 100644 src/images.h create mode 100644 src/libmime/CMakeLists.txt create mode 100644 src/libmime/expressions.c create mode 100644 src/libmime/expressions.h create mode 100644 src/libmime/filter.c create mode 100644 src/libmime/filter.h create mode 100644 src/libmime/images.c create mode 100644 src/libmime/images.h create mode 100644 src/libmime/message.c create mode 100644 src/libmime/message.h create mode 100644 src/libmime/protocol.c create mode 100644 src/libmime/protocol.h create mode 100644 src/libmime/smtp_proto.c create mode 100644 src/libmime/smtp_proto.h create mode 100644 src/libmime/smtp_utils.c create mode 100644 src/libmime/smtp_utils.h create mode 100644 src/libmime/worker_util.c create mode 100644 src/libserver/CMakeLists.txt create mode 100644 src/libserver/binlog.c create mode 100644 src/libserver/binlog.h create mode 100644 src/libserver/buffer.c create mode 100644 src/libserver/buffer.h create mode 100644 src/libserver/cfg_file.h create mode 100644 src/libserver/cfg_rcl.c create mode 100644 src/libserver/cfg_rcl.h create mode 100644 src/libserver/cfg_utils.c create mode 100644 src/libserver/dkim.c create mode 100644 src/libserver/dkim.h create mode 100644 src/libserver/dns.c create mode 100644 src/libserver/dns.h create mode 100644 src/libserver/dynamic_cfg.c create mode 100644 src/libserver/dynamic_cfg.h create mode 100644 src/libserver/events.c create mode 100644 src/libserver/events.h create mode 100644 src/libserver/html.c create mode 100644 src/libserver/html.h create mode 100644 src/libserver/proxy.c create mode 100644 src/libserver/proxy.h create mode 100644 src/libserver/roll_history.c create mode 100644 src/libserver/roll_history.h create mode 100644 src/libserver/settings.c create mode 100644 src/libserver/settings.h create mode 100644 src/libserver/spf.c create mode 100644 src/libserver/spf.h create mode 100644 src/libserver/statfile.c create mode 100644 src/libserver/statfile.h create mode 100644 src/libserver/statfile_sync.c create mode 100644 src/libserver/statfile_sync.h create mode 100644 src/libserver/symbols_cache.c create mode 100644 src/libserver/symbols_cache.h create mode 100644 src/libserver/task.c create mode 100644 src/libserver/task.h create mode 100644 src/libserver/url.c create mode 100644 src/libserver/url.h create mode 100644 src/libutil/CMakeLists.txt create mode 100644 src/libutil/aio_event.c create mode 100644 src/libutil/aio_event.h create mode 100644 src/libutil/bloom.c create mode 100644 src/libutil/bloom.h create mode 100644 src/libutil/diff.c create mode 100644 src/libutil/diff.h create mode 100644 src/libutil/fstring.c create mode 100644 src/libutil/fstring.h create mode 100644 src/libutil/fuzzy.c create mode 100644 src/libutil/fuzzy.h create mode 100644 src/libutil/hash.c create mode 100644 src/libutil/hash.h create mode 100644 src/libutil/http.c create mode 100644 src/libutil/http.h create mode 100644 src/libutil/logger.c create mode 100644 src/libutil/logger.h create mode 100644 src/libutil/map.c create mode 100644 src/libutil/map.h create mode 100644 src/libutil/mem_pool.c create mode 100644 src/libutil/mem_pool.h create mode 100644 src/libutil/memcached.c create mode 100644 src/libutil/memcached.h create mode 100644 src/libutil/printf.c create mode 100644 src/libutil/printf.h create mode 100644 src/libutil/radix.c create mode 100644 src/libutil/radix.h create mode 100644 src/libutil/rrd.c create mode 100644 src/libutil/rrd.h create mode 100644 src/libutil/trie.c create mode 100644 src/libutil/trie.h create mode 100644 src/libutil/upstream.c create mode 100644 src/libutil/upstream.h create mode 100644 src/libutil/util.c create mode 100644 src/libutil/util.h delete mode 100644 src/logger.c delete mode 100644 src/logger.h delete mode 100644 src/map.c delete mode 100644 src/map.h delete mode 100644 src/mem_pool.c delete mode 100644 src/mem_pool.h delete mode 100644 src/memcached.c delete mode 100644 src/memcached.h delete mode 100644 src/message.c delete mode 100644 src/message.h delete mode 100644 src/printf.c delete mode 100644 src/printf.h delete mode 100644 src/protocol.c delete mode 100644 src/protocol.h delete mode 100644 src/proxy.c delete mode 100644 src/proxy.h delete mode 100644 src/radix.c delete mode 100644 src/radix.h delete mode 100644 src/roll_history.c delete mode 100644 src/roll_history.h delete mode 100644 src/rrd.c delete mode 100644 src/rrd.h delete mode 100644 src/settings.c delete mode 100644 src/settings.h delete mode 100644 src/smtp_proto.c delete mode 100644 src/smtp_proto.h delete mode 100644 src/smtp_utils.c delete mode 100644 src/smtp_utils.h delete mode 100644 src/spf.c delete mode 100644 src/spf.h delete mode 100644 src/statfile.c delete mode 100644 src/statfile.h delete mode 100644 src/statfile_sync.c delete mode 100644 src/statfile_sync.h delete mode 100644 src/symbols_cache.c delete mode 100644 src/symbols_cache.h delete mode 100644 src/task.c delete mode 100644 src/task.h delete mode 100644 src/trie.c delete mode 100644 src/trie.h delete mode 100644 src/upstream.c delete mode 100644 src/upstream.h delete mode 100644 src/url.c delete mode 100644 src/url.h delete mode 100644 src/util.c delete mode 100644 src/util.h delete mode 100644 src/worker_util.c diff --git a/CMakeLists.txt b/CMakeLists.txt index f465da28d..b970df796 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,61 +107,7 @@ INCLUDE(CheckCCompilerFlag) INCLUDE(FindPerl) INCLUDE(FindOpenSSL) -############################# MODULES SECTION ############################################# - -MACRO(_AddModulesForced MLIST WLIST) -# Generate unique string for this build - STRING(RANDOM LENGTH 8 _MODULES_ID) - SET(MODULES_ID ${_MODULES_ID} CACHE INTERNAL "Modules ID" FORCE) - FILE(WRITE "src/modules.c" "/* ${MODULES_ID} */\n#include \"config.h\"\n") - - # Handle even old cmake - LIST(LENGTH ${MLIST} MLIST_COUNT) - LIST(LENGTH ${WLIST} WLIST_COUNT) - MATH(EXPR MLIST_MAX ${MLIST_COUNT}-1) - MATH(EXPR WLIST_MAX ${WLIST_COUNT}-1) - - FOREACH(MOD_IDX RANGE ${MLIST_MAX}) - LIST(GET ${MLIST} ${MOD_IDX} MOD) - FILE(APPEND "src/modules.c" "extern module_t ${MOD}_module;\n") - ENDFOREACH(MOD_IDX RANGE ${MLIST_MAX}) - - FILE(APPEND "src/modules.c" "\n\nmodule_t *modules[] = {\n") - - FOREACH(MOD_IDX RANGE ${MLIST_MAX}) - LIST(GET ${MLIST} ${MOD_IDX} MOD) - FILE(APPEND "src/modules.c" "&${MOD}_module,\n") - ENDFOREACH(MOD_IDX RANGE ${MLIST_MAX}) - - FILE(APPEND "src/modules.c" "NULL\n};\n") - - FOREACH(MOD_IDX RANGE ${WLIST_MAX}) - LIST(GET ${WLIST} ${MOD_IDX} WRK) - FILE(APPEND "src/modules.c" "extern worker_t ${WRK}_worker;\n") - ENDFOREACH(MOD_IDX RANGE ${WLIST_MAX}) - - FILE(APPEND "src/modules.c" "\n\nworker_t *workers[] = {\n") - - FOREACH(MOD_IDX RANGE ${WLIST_MAX}) - LIST(GET ${WLIST} ${MOD_IDX} WRK) - FILE(APPEND "src/modules.c" "&${WRK}_worker,\n") - ENDFOREACH(MOD_IDX RANGE ${WLIST_MAX}) - FILE(APPEND "src/modules.c" "NULL\n};\n") -ENDMACRO(_AddModulesForced MLIST WLIST) - -MACRO(AddModules MLIST WLIST) - _AddModulesForced(${MLIST} ${WLIST}) - #IF(NOT EXISTS "src/modules.c") - # _AddModulesForced(${MLIST} ${WLIST}) - #ELSE(NOT EXISTS "src/modules.c") - # FILE(STRINGS "src/modules.c" FILE_ID_RAW REGEX "^/.*[a-zA-Z0-9]+.*/$") - # STRING(REGEX MATCH "[a-zA-Z0-9]+" FILE_ID "${FILE_ID_RAW}") - # IF(NOT FILE_ID STREQUAL MODULES_ID) - # MESSAGE("Regenerate modules info") - # _AddModulesForced(${MLIST} ${WLIST}) - # ENDIF(NOT FILE_ID STREQUAL MODULES_ID) - #ENDIF(NOT EXISTS "src/modules.c") -ENDMACRO(AddModules MLIST WLIST) +############################# MACRO SECTION ############################################# # Find lua installation MACRO(FindLua _major _minor) @@ -839,7 +785,6 @@ ENDIF(HG) ################################ SOURCES SECTION ########################### - INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src" "${CMAKE_BINARY_DIR}/src" "${CMAKE_SOURCE_DIR}/src/ucl/include" @@ -849,28 +794,6 @@ INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/src" "${CMAKE_SOURCE_DIR}/contrib/xxhash" "${CMAKE_SOURCE_DIR}/src/rdns/include") -SET(RSPAMDSRC src/modules.c - src/controller.c - src/fuzzy_storage.c - src/lua_worker.c - src/main.c - src/map.c - src/smtp_proxy.c - src/webui.c - src/worker.c) - -SET(PLUGINSSRC src/plugins/surbl.c - src/plugins/regexp.c - src/plugins/chartable.c - src/plugins/fuzzy_check.c - src/plugins/spf.c - src/plugins/dkim_check.c) - -SET(MODULES_LIST surbl regexp chartable fuzzy_check spf dkim) -SET(WORKERS_LIST normal controller smtp_proxy fuzzy lua webui) - -AddModules(MODULES_LIST WORKERS_LIST) - ################################ SUBDIRS SECTION ########################### ADD_SUBDIRECTORY(contrib/lgpl) @@ -888,21 +811,10 @@ ADD_DEFINITIONS(-DHAVE_CONFIG_H) ADD_SUBDIRECTORY(contrib/xxhash) ADD_SUBDIRECTORY(contrib/http-parser) ADD_SUBDIRECTORY(contrib/libottery) -ADD_SUBDIRECTORY(src/lua) -ADD_SUBDIRECTORY(src/json) -ADD_SUBDIRECTORY(src/cdb) -ADD_SUBDIRECTORY(src/ucl/cmake) ADD_SUBDIRECTORY(src) -ADD_SUBDIRECTORY(src/client) - -SET(SLAVE_BUILD 1) -ADD_SUBDIRECTORY(src/rdns) -UNSET(SLAVE_BUILD) ADD_SUBDIRECTORY(test) -LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM) - ############################ TARGETS SECTION ############################### @@ -921,54 +833,9 @@ SET(CONFFILES conf/2tld.inc conf/surbl-whitelist.inc) -######################### LINK SECTION ############################### - -ADD_EXECUTABLE(rspamd ${RSPAMDSRC} ${PLUGINSSRC}) -SET_TARGET_PROPERTIES(rspamd PROPERTIES LINKER_LANGUAGE C) -SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS "-DRSPAMD_MAIN") -IF(NOT DEBIAN_BUILD) -SET_TARGET_PROPERTIES(rspamd PROPERTIES VERSION ${RSPAMD_VERSION}) -ENDIF(NOT DEBIAN_BUILD) - -TARGET_LINK_LIBRARIES(rspamd rspamd-mime) -TARGET_LINK_LIBRARIES(rspamd rspamd-server) -TARGET_LINK_LIBRARIES(rspamd rspamd-util) -TARGET_LINK_LIBRARIES(rspamd rspamd-lua) - -TARGET_LINK_LIBRARIES(rspamd event) -IF(HAVE_LIBEVENT2) - TARGET_LINK_LIBRARIES(rspamd event_pthreads) -ENDIF(HAVE_LIBEVENT2) -IF(WITH_DB) - TARGET_LINK_LIBRARIES(rspamd db) -ENDIF(WITH_DB) - -IF(OPENSSL_FOUND) - TARGET_LINK_LIBRARIES(rspamd ${OPENSSL_LIBRARIES}) -ENDIF(OPENSSL_FOUND) -IF(HAVE_FETCH_H) - TARGET_LINK_LIBRARIES(rspamd fetch) -ENDIF(HAVE_FETCH_H) -TARGET_LINK_LIBRARIES(rspamd ${RSPAMD_REQUIRED_LIBRARIES}) - -IF(ENABLE_LUAJIT MATCHES "ON") - TARGET_LINK_LIBRARIES(rspamd "${LUAJIT_LIBRARY}") -ELSE(ENABLE_LUAJIT MATCHES "ON") - TARGET_LINK_LIBRARIES(rspamd "${LUA_LIBRARY}") -ENDIF(ENABLE_LUAJIT MATCHES "ON") - -IF(ENABLE_GPERF_TOOLS MATCHES "ON") - TARGET_LINK_LIBRARIES(rspamd profiler) -ENDIF(ENABLE_GPERF_TOOLS MATCHES "ON") -TARGET_LINK_LIBRARIES(rspamd hiredis) -IF(GLIB_COMPAT) - TARGET_LINK_LIBRARIES(rspamd glibadditions) -ENDIF(GLIB_COMPAT) - ##################### INSTALLATION ########################################## # Binaries -INSTALL(TARGETS rspamd RUNTIME DESTINATION bin) # Configs INSTALL(CODE "FILE(MAKE_DIRECTORY \$ENV{DESTDIR}${CONFDIR})") @@ -1012,9 +879,10 @@ FOREACH(LUA_CONF ${LUA_CONFIGS}) ENDFOREACH(LUA_CONF) # systemd unit - -INSTALL(FILES "rspamd.service" DESTINATION ${SYSTEMDDIR}) -INSTALL(FILES "rspamd.socket" DESTINATION ${SYSTEMDDIR}) +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + INSTALL(FILES "rspamd.service" DESTINATION ${SYSTEMDDIR}) + INSTALL(FILES "rspamd.socket" DESTINATION ${SYSTEMDDIR}) +ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") # Manual pages INSTALL(FILES "doc/rspamd.8" DESTINATION ${MANDIR}/man8) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 70c96f99d..270648697 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,149 +1,139 @@ -# Librspamd-util -SET(LIBRSPAMDUTILSRC aio_event.c - bloom.c - diff.c - fstring.c - fuzzy.c - hash.c - http.c - logger.c - map.c - memcached.c - mem_pool.c - printf.c - radix.c - rrd.c - trie.c - upstream.c - util.c) +MACRO(_AddModulesForced MLIST WLIST) +# Generate unique string for this build + STRING(RANDOM LENGTH 8 _MODULES_ID) + SET(MODULES_ID ${_MODULES_ID} CACHE INTERNAL "Modules ID" FORCE) + FILE(WRITE "src/modules.c" "/* ${MODULES_ID} */\n#include \"config.h\"\n") + + # Handle even old cmake + LIST(LENGTH ${MLIST} MLIST_COUNT) + LIST(LENGTH ${WLIST} WLIST_COUNT) + MATH(EXPR MLIST_MAX ${MLIST_COUNT}-1) + MATH(EXPR WLIST_MAX ${WLIST_COUNT}-1) + + FOREACH(MOD_IDX RANGE ${MLIST_MAX}) + LIST(GET ${MLIST} ${MOD_IDX} MOD) + FILE(APPEND "src/modules.c" "extern module_t ${MOD}_module;\n") + ENDFOREACH(MOD_IDX RANGE ${MLIST_MAX}) + + FILE(APPEND "src/modules.c" "\n\nmodule_t *modules[] = {\n") + + FOREACH(MOD_IDX RANGE ${MLIST_MAX}) + LIST(GET ${MLIST} ${MOD_IDX} MOD) + FILE(APPEND "src/modules.c" "&${MOD}_module,\n") + ENDFOREACH(MOD_IDX RANGE ${MLIST_MAX}) + + FILE(APPEND "src/modules.c" "NULL\n};\n") + + FOREACH(MOD_IDX RANGE ${WLIST_MAX}) + LIST(GET ${WLIST} ${MOD_IDX} WRK) + FILE(APPEND "src/modules.c" "extern worker_t ${WRK}_worker;\n") + ENDFOREACH(MOD_IDX RANGE ${WLIST_MAX}) + + FILE(APPEND "src/modules.c" "\n\nworker_t *workers[] = {\n") + + FOREACH(MOD_IDX RANGE ${WLIST_MAX}) + LIST(GET ${WLIST} ${MOD_IDX} WRK) + FILE(APPEND "src/modules.c" "&${WRK}_worker,\n") + ENDFOREACH(MOD_IDX RANGE ${WLIST_MAX}) + FILE(APPEND "src/modules.c" "NULL\n};\n") +ENDMACRO(_AddModulesForced MLIST WLIST) -# Librspamdserver -SET(LIBRSPAMDSERVERSRC - binlog.c - buffer.c - cfg_utils.c - cfg_rcl.c - dkim.c - dns.c - dynamic_cfg.c - events.c - html.c - proxy.c - roll_history.c - settings.c - spf.c - statfile.c - statfile_sync.c - symbols_cache.c - task.c - url.c) - -# Librspamd mime -SET(LIBRSPAMDMIMESRC - expressions.c - filter.c - images.c - message.c - protocol.c - smtp_utils.c - smtp_proto.c - worker_util.c) - -SET(TOKENIZERSSRC tokenizers/tokenizers.c - tokenizers/osb.c) - -SET(CLASSIFIERSSRC classifiers/classifiers.c - classifiers/bayes.c - classifiers/winnow.c) -# Add targets +MACRO(AddModules MLIST WLIST) + _AddModulesForced(${MLIST} ${WLIST}) + #IF(NOT EXISTS "src/modules.c") + # _AddModulesForced(${MLIST} ${WLIST}) + #ELSE(NOT EXISTS "src/modules.c") + # FILE(STRINGS "src/modules.c" FILE_ID_RAW REGEX "^/.*[a-zA-Z0-9]+.*/$") + # STRING(REGEX MATCH "[a-zA-Z0-9]+" FILE_ID "${FILE_ID_RAW}") + # IF(NOT FILE_ID STREQUAL MODULES_ID) + # MESSAGE("Regenerate modules info") + # _AddModulesForced(${MLIST} ${WLIST}) + # ENDIF(NOT FILE_ID STREQUAL MODULES_ID) + #ENDIF(NOT EXISTS "src/modules.c") +ENDMACRO(AddModules MLIST WLIST) -# Rspamdutil -ADD_LIBRARY(rspamd-util ${LINK_TYPE} ${LIBRSPAMDUTILSRC}) -IF(CMAKE_COMPILER_IS_GNUCC) -SET_TARGET_PROPERTIES(rspamd-util PROPERTIES COMPILE_FLAGS "-fno-strict-aliasing") -ENDIF(CMAKE_COMPILER_IS_GNUCC) +# Contrib software +ADD_SUBDIRECTORY(json) +ADD_SUBDIRECTORY(cdb) +ADD_SUBDIRECTORY(ucl/cmake) +SET(SLAVE_BUILD 1) +ADD_SUBDIRECTORY(rdns) +UNSET(SLAVE_BUILD) +INCLUDE_DIRECTORIES(libutil libserver libmime) -TARGET_LINK_LIBRARIES(rspamd-util ${RSPAMD_REQUIRED_LIBRARIES}) -TARGET_LINK_LIBRARIES(rspamd-util pcre) -TARGET_LINK_LIBRARIES(rspamd-util ucl) -TARGET_LINK_LIBRARIES(rspamd-util ottery) -TARGET_LINK_LIBRARIES(rspamd-util rspamd-http-parser) -TARGET_LINK_LIBRARIES(rspamd-util event) -TARGET_LINK_LIBRARIES(rspamd-util xxhash) -IF(OPENSSL_FOUND) - TARGET_LINK_LIBRARIES(rspamd-util ${OPENSSL_LIBRARIES}) -ENDIF(OPENSSL_FOUND) +# Rspamd core components +ADD_SUBDIRECTORY(lua) +ADD_SUBDIRECTORY(libutil) +ADD_SUBDIRECTORY(libserver) +ADD_SUBDIRECTORY(libmime) +ADD_SUBDIRECTORY(client) + +SET(RSPAMDSRC modules.c + controller.c + fuzzy_storage.c + lua_worker.c + main.c + smtp_proxy.c + webui.c + worker.c) -IF(NOT DEBIAN_BUILD) -SET_TARGET_PROPERTIES(rspamd-util PROPERTIES VERSION ${RSPAMD_VERSION}) -ENDIF(NOT DEBIAN_BUILD) +SET(PLUGINSSRC plugins/surbl.c + plugins/regexp.c + plugins/chartable.c + plugins/fuzzy_check.c + plugins/spf.c + plugins/dkim_check.c) + +SET(MODULES_LIST surbl regexp chartable fuzzy_check spf dkim) +SET(WORKERS_LIST normal controller smtp_proxy fuzzy lua webui) -IF(GLIB_COMPAT) - INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/contrib/lgpl") - TARGET_LINK_LIBRARIES(rspamd-util glibadditions) -ENDIF(GLIB_COMPAT) +AddModules(MODULES_LIST WORKERS_LIST) +LIST(LENGTH PLUGINSSRC RSPAMD_MODULES_NUM) -IF(NO_SHARED MATCHES "OFF") - INSTALL(TARGETS rspamd-util - LIBRARY DESTINATION ${LIBDIR} - PUBLIC_HEADER DESTINATION include) -ENDIF(NO_SHARED MATCHES "OFF") - -# Librspamd-server +######################### LINK SECTION ############################### -#IF(WITH_DB) -# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_bdb.c) -#ENDIF(WITH_DB) -#IF(WITH_SQLITE) -# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_sqlite.c) -#ENDIF(WITH_SQLITE) - -ADD_LIBRARY(rspamd-server ${LINK_TYPE} ${LIBRSPAMDSERVERSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC}) +ADD_EXECUTABLE(rspamd ${RSPAMDSRC} ${PLUGINSSRC}) +SET_TARGET_PROPERTIES(rspamd PROPERTIES LINKER_LANGUAGE C) +SET_TARGET_PROPERTIES(rspamd PROPERTIES COMPILE_FLAGS "-DRSPAMD_MAIN") IF(NOT DEBIAN_BUILD) -SET_TARGET_PROPERTIES(rspamd-server PROPERTIES VERSION ${RSPAMD_VERSION}) +SET_TARGET_PROPERTIES(rspamd PROPERTIES VERSION ${RSPAMD_VERSION}) ENDIF(NOT DEBIAN_BUILD) -SET_TARGET_PROPERTIES(rspamd-server PROPERTIES LINKER_LANGUAGE C COMPILE_FLAGS "-DRSPAMD_LIB") -TARGET_LINK_LIBRARIES(rspamd-server rspamd-lua) -TARGET_LINK_LIBRARIES(rspamd-server rspamd-json) -TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) -TARGET_LINK_LIBRARIES(rspamd-server rspamd-util) -TARGET_LINK_LIBRARIES(rspamd-server rdns) -IF(CMAKE_COMPILER_IS_GNUCC) -SET_TARGET_PROPERTIES(rspamd-server PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing") -ENDIF(CMAKE_COMPILER_IS_GNUCC) +TARGET_LINK_LIBRARIES(rspamd rspamd-mime) +TARGET_LINK_LIBRARIES(rspamd rspamd-server) +TARGET_LINK_LIBRARIES(rspamd rspamd-util) +TARGET_LINK_LIBRARIES(rspamd rspamd-lua) + +TARGET_LINK_LIBRARIES(rspamd event) +IF(HAVE_LIBEVENT2) + TARGET_LINK_LIBRARIES(rspamd event_pthreads) +ENDIF(HAVE_LIBEVENT2) IF(WITH_DB) - TARGET_LINK_LIBRARIES(rspamd-server db) + TARGET_LINK_LIBRARIES(rspamd db) ENDIF(WITH_DB) IF(OPENSSL_FOUND) - TARGET_LINK_LIBRARIES(rspamd-server ${OPENSSL_LIBRARIES}) + TARGET_LINK_LIBRARIES(rspamd ${OPENSSL_LIBRARIES}) ENDIF(OPENSSL_FOUND) +IF(HAVE_FETCH_H) + TARGET_LINK_LIBRARIES(rspamd fetch) +ENDIF(HAVE_FETCH_H) +TARGET_LINK_LIBRARIES(rspamd ${RSPAMD_REQUIRED_LIBRARIES}) -IF(NO_SHARED MATCHES "OFF") - INSTALL(TARGETS rspamd-server - LIBRARY DESTINATION ${LIBDIR} - PUBLIC_HEADER DESTINATION ${INCLUDEDIR}) -ENDIF(NO_SHARED MATCHES "OFF") - -# Librspamdmime -ADD_LIBRARY(rspamd-mime ${LINK_TYPE} ${LIBRSPAMDMIMESRC}) -IF(NOT DEBIAN_BUILD) -SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES VERSION ${RSPAMD_VERSION}) -ENDIF(NOT DEBIAN_BUILD) -SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES LINKER_LANGUAGE C) -SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB") -TARGET_LINK_LIBRARIES(rspamd-mime rspamd-server) -TARGET_LINK_LIBRARIES(rspamd-mime rspamd-util) -IF(CMAKE_COMPILER_IS_GNUCC) -SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing") -ENDIF(CMAKE_COMPILER_IS_GNUCC) +IF(ENABLE_LUAJIT MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd "${LUAJIT_LIBRARY}") +ELSE(ENABLE_LUAJIT MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd "${LUA_LIBRARY}") +ENDIF(ENABLE_LUAJIT MATCHES "ON") + +IF(ENABLE_GPERF_TOOLS MATCHES "ON") + TARGET_LINK_LIBRARIES(rspamd profiler) +ENDIF(ENABLE_GPERF_TOOLS MATCHES "ON") +TARGET_LINK_LIBRARIES(rspamd hiredis) +IF(GLIB_COMPAT) + TARGET_LINK_LIBRARIES(rspamd glibadditions) +ENDIF(GLIB_COMPAT) -IF(NO_SHARED MATCHES "OFF") - INSTALL(TARGETS rspamd-mime - LIBRARY DESTINATION ${LIBDIR} - PUBLIC_HEADER DESTINATION ${INCLUDEDIR}) -ENDIF(NO_SHARED MATCHES "OFF") -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/src") \ No newline at end of file +INSTALL(TARGETS rspamd RUNTIME DESTINATION bin) \ No newline at end of file diff --git a/src/aio_event.c b/src/aio_event.c deleted file mode 100644 index ccda37083..000000000 --- a/src/aio_event.c +++ /dev/null @@ -1,487 +0,0 @@ -/* Copyright (c) 2010-2011, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "aio_event.h" -#include "main.h" - -#ifdef HAVE_SYS_EVENTFD_H -#include -#endif - -#ifdef HAVE_AIO_H -#include -#endif - -/* Linux syscall numbers */ -#if defined(__i386__) -# define SYS_io_setup 245 -# define SYS_io_destroy 246 -# define SYS_io_getevents 247 -# define SYS_io_submit 248 -# define SYS_io_cancel 249 -#elif defined(__x86_64__) -# define SYS_io_setup 206 -# define SYS_io_destroy 207 -# define SYS_io_getevents 208 -# define SYS_io_submit 209 -# define SYS_io_cancel 210 -#else -# warning "aio is not supported on this platform, please contact author for details" -# define SYS_io_setup 0 -# define SYS_io_destroy 0 -# define SYS_io_getevents 0 -# define SYS_io_submit 0 -# define SYS_io_cancel 0 -#endif - -#define SYS_eventfd 323 -#define MAX_AIO_EV 64 - -struct io_cbdata { - gint fd; - rspamd_aio_cb cb; - guint64 len; - gpointer buf; - gpointer io_buf; - gpointer ud; -}; - -#ifdef LINUX - -/* Linux specific mappings and utilities to avoid using of libaio */ - -typedef unsigned long aio_context_t; - -typedef enum io_iocb_cmd { - IO_CMD_PREAD = 0, - IO_CMD_PWRITE = 1, - - IO_CMD_FSYNC = 2, - IO_CMD_FDSYNC = 3, - - IO_CMD_POLL = 5, - IO_CMD_NOOP = 6, -} io_iocb_cmd_t; - -#if defined(__LITTLE_ENDIAN) -#define PADDED(x,y) x, y -#elif defined(__BIG_ENDIAN) -#define PADDED(x,y) y, x -#else -#error edit for your odd byteorder. -#endif - -/* - * we always use a 64bit off_t when communicating - * with userland. its up to libraries to do the - * proper padding and aio_error abstraction - */ - -struct iocb { - /* these are internal to the kernel/libc. */ - guint64 aio_data; /* data to be returned in event's data */ - guint32 PADDED(aio_key, aio_reserved1); - /* the kernel sets aio_key to the req # */ - - /* common fields */ - guint16 aio_lio_opcode; /* see IOCB_CMD_ above */ - gint16 aio_reqprio; - guint32 aio_fildes; - - guint64 aio_buf; - guint64 aio_nbytes; - gint64 aio_offset; - - /* extra parameters */ - guint64 aio_reserved2; /* TODO: use this for a (struct sigevent *) */ - - /* flags for the "struct iocb" */ - guint32 aio_flags; - - /* - * if the IOCB_FLAG_RESFD flag of "aio_flags" is set, this is an - * eventfd to signal AIO readiness to - */ - guint32 aio_resfd; -}; - -struct io_event { - guint64 data; /* the data field from the iocb */ - guint64 obj; /* what iocb this event came from */ - gint64 res; /* result code for this event */ - gint64 res2; /* secondary result */ -}; - -/* Linux specific io calls */ -static int -io_setup (guint nr_reqs, aio_context_t *ctx) -{ - return syscall (SYS_io_setup, nr_reqs, ctx); -} - -static int -io_destroy (aio_context_t ctx) -{ - return syscall (SYS_io_destroy, ctx); -} - -static int -io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *events, struct timespec *tmo) -{ - return syscall (SYS_io_getevents, ctx, min_nr, nr, events, tmo); -} - -static int -io_submit (aio_context_t ctx, long n, struct iocb **paiocb) -{ - return syscall (SYS_io_submit, ctx, n, paiocb); -} - -static int -io_cancel (aio_context_t ctx, struct iocb *iocb, struct io_event *result) -{ - return syscall (SYS_io_cancel, ctx, iocb, result); -} - -# ifndef HAVE_SYS_EVENTFD_H -static int -eventfd (guint initval, guint flags) -{ - return syscall (SYS_eventfd, initval); -} -# endif - -#endif - -/** - * AIO context - */ -struct aio_context { - struct event_base *base; - gboolean has_aio; /**< Whether we have aio support on a system */ -#ifdef LINUX - /* Eventfd variant */ - gint event_fd; - struct event eventfd_ev; - aio_context_t io_ctx; -#elif defined(HAVE_AIO_H) - /* POSIX aio */ - struct event rtsigs[128]; -#endif -}; - -#ifdef LINUX -/* Eventfd read callback */ -static void -rspamd_eventfdcb (gint fd, gshort what, gpointer ud) -{ - struct aio_context *ctx = ud; - guint64 ready; - gint done, i; - struct io_event event[32]; - struct timespec ts; - struct io_cbdata *ev_data; - - /* Eventfd returns number of events ready got from kernel */ - if (read (fd, &ready, 8) != 8) { - if (errno == EAGAIN) { - return; - } - msg_err ("eventfd read returned error: %s", strerror (errno)); - } - - ts.tv_sec = 0; - ts.tv_nsec = 0; - - while (ready) { - /* Get events ready */ - done = io_getevents (ctx->io_ctx, 1, 32, event, &ts); - - if (done > 0) { - ready -= done; - - for (i = 0; i < done; i ++) { - ev_data = (struct io_cbdata *) (uintptr_t) event[i].data; - /* Call this callback */ - ev_data->cb (ev_data->fd, event[i].res, ev_data->len, ev_data->buf, ev_data->ud); - if (ev_data->io_buf) { - free (ev_data->io_buf); - } - g_slice_free1 (sizeof (struct io_cbdata), ev_data); - } - } - else if (done == 0) { - /* No more events are ready */ - return; - } - else { - msg_err ("io_getevents failed: %s", strerror (errno)); - return; - } - } -} - -#endif - -/** - * Initialize aio with specified event base - */ -struct aio_context* -rspamd_aio_init (struct event_base *base) -{ - struct aio_context *new; - - /* First of all we need to detect which type of aio we can try to use */ - new = g_malloc0 (sizeof (struct aio_context)); - new->base = base; - -#ifdef LINUX - /* On linux we are trying to use io (3) and eventfd for notifying */ - new->event_fd = eventfd (0, 0); - if (new->event_fd == -1) { - msg_err ("eventfd failed: %s", strerror (errno)); - } - else { - /* Set this socket non-blocking */ - if (make_socket_nonblocking (new->event_fd) == -1) { - msg_err ("non blocking for eventfd failed: %s", strerror (errno)); - close (new->event_fd); - } - else { - event_set (&new->eventfd_ev, new->event_fd, EV_READ|EV_PERSIST, rspamd_eventfdcb, new); - event_base_set (new->base, &new->eventfd_ev); - event_add (&new->eventfd_ev, NULL); - if (io_setup (MAX_AIO_EV, &new->io_ctx) == -1) { - msg_err ("io_setup failed: %s", strerror (errno)); - close (new->event_fd); - } - else { - new->has_aio = TRUE; - } - } - } -#elif defined(HAVE_AIO_H) - /* TODO: implement this */ -#endif - - return new; -} - -/** - * Open file for aio - */ -gint -rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags) -{ - gint fd = -1; - /* Fallback */ - if (!ctx->has_aio) { - return open (path, flags); - } -#ifdef LINUX - - fd = open (path, flags | O_DIRECT); - - return fd; -#elif defined(HAVE_AIO_H) - fd = open (path, flags); -#endif - - return fd; -} - -/** - * Asynchronous read of file - */ -gint -rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud) -{ - struct io_cbdata *cbdata; - gint r = -1; - - if (ctx->has_aio) { -#ifdef LINUX - struct iocb *iocb[1]; - - cbdata = g_slice_alloc (sizeof (struct io_cbdata)); - cbdata->cb = cb; - cbdata->buf = buf; - cbdata->len = len; - cbdata->ud = ud; - cbdata->fd = fd; - cbdata->io_buf = NULL; - - iocb[0] = alloca (sizeof (struct iocb)); - memset (iocb[0], 0, sizeof (struct iocb)); - iocb[0]->aio_fildes = fd; - iocb[0]->aio_lio_opcode = IO_CMD_PREAD; - iocb[0]->aio_reqprio = 0; - iocb[0]->aio_buf = (guint64)((uintptr_t)buf); - iocb[0]->aio_nbytes = len; - iocb[0]->aio_offset = offset; - iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */; - iocb[0]->aio_resfd = ctx->event_fd; - iocb[0]->aio_data = (guint64)((uintptr_t)cbdata); - - /* Iocb is copied to kernel internally, so it is safe to put it on stack */ - if (io_submit (ctx->io_ctx, 1, iocb) == 1) { - return len; - } - else { - if (errno == EAGAIN || errno == ENOSYS) { - /* Fall back to sync read */ - goto blocking; - } - return -1; - } - -#elif defined(HAVE_AIO_H) -#endif - } - else { - /* Blocking variant */ -blocking: -#ifdef _LARGEFILE64_SOURCE - r = lseek64 (fd, offset, SEEK_SET); -#else - r = lseek (fd, offset, SEEK_SET); -#endif - if (r > 0) { - r = read (fd, buf, len); - if (r >= 0) { - cb (fd, 0, r, buf, ud); - } - else { - cb (fd, r, -1, buf, ud); - } - } - } - - return r; -} - -/** - * Asynchronous write of file - */ -gint -rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud) -{ - struct io_cbdata *cbdata; - gint r = -1; - - if (ctx->has_aio) { -#ifdef LINUX - struct iocb *iocb[1]; - - cbdata = g_slice_alloc (sizeof (struct io_cbdata)); - cbdata->cb = cb; - cbdata->buf = buf; - cbdata->len = len; - cbdata->ud = ud; - cbdata->fd = fd; - /* We need to align pointer on boundary of 512 bytes here */ - if (posix_memalign (&cbdata->io_buf, 512, len) != 0) { - return -1; - } - memcpy (cbdata->io_buf, buf, len); - - iocb[0] = alloca (sizeof (struct iocb)); - memset (iocb[0], 0, sizeof (struct iocb)); - iocb[0]->aio_fildes = fd; - iocb[0]->aio_lio_opcode = IO_CMD_PWRITE; - iocb[0]->aio_reqprio = 0; - iocb[0]->aio_buf = (guint64)((uintptr_t)cbdata->io_buf); - iocb[0]->aio_nbytes = len; - iocb[0]->aio_offset = offset; - iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */; - iocb[0]->aio_resfd = ctx->event_fd; - iocb[0]->aio_data = (guint64)((uintptr_t)cbdata); - - /* Iocb is copied to kernel internally, so it is safe to put it on stack */ - if (io_submit (ctx->io_ctx, 1, iocb) == 1) { - return len; - } - else { - if (errno == EAGAIN || errno == ENOSYS) { - /* Fall back to sync read */ - goto blocking; - } - return -1; - } - -#elif defined(HAVE_AIO_H) -#endif - } - else { - /* Blocking variant */ -blocking: -#ifdef _LARGEFILE64_SOURCE - r = lseek64 (fd, offset, SEEK_SET); -#else - r = lseek (fd, offset, SEEK_SET); -#endif - if (r > 0) { - r = write (fd, buf, len); - if (r >= 0) { - cb (fd, 0, r, buf, ud); - } - else { - cb (fd, r, -1, buf, ud); - } - } - } - - return r; -} - -/** - * Close of aio operations - */ -gint -rspamd_aio_close (gint fd, struct aio_context *ctx) -{ - gint r = -1; - - if (ctx->has_aio) { -#ifdef LINUX - struct iocb iocb; - struct io_event ev; - - memset (&iocb, 0, sizeof (struct iocb)); - iocb.aio_fildes = fd; - iocb.aio_lio_opcode = IO_CMD_NOOP; - - /* Iocb is copied to kernel internally, so it is safe to put it on stack */ - r = io_cancel (ctx->io_ctx, &iocb, &ev); - close (fd); - return r; - -#elif defined(HAVE_AIO_H) -#endif - } - - r = close (fd); - - return r; -} diff --git a/src/aio_event.h b/src/aio_event.h deleted file mode 100644 index 45f6015de..000000000 --- a/src/aio_event.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2010-2011, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef AIO_EVENT_H_ -#define AIO_EVENT_H_ - -#include "config.h" - -/** - * AIO context - */ -struct aio_context; - -/** - * Callback for notifying - */ -typedef void (*rspamd_aio_cb) (gint fd, gint res, guint64 len, gpointer data, gpointer ud); - -/** - * Initialize aio with specified event base - */ -struct aio_context* rspamd_aio_init (struct event_base *base); - -/** - * Open file for aio - */ -gint rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags); - -/** - * Asynchronous read of file - */ -gint rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset, - struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud); - -/** - * Asynchronous write of file - */ -gint rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset, - struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud); - -/** - * Close of aio operations - */ -gint rspamd_aio_close (gint fd, struct aio_context *ctx); - -#endif /* AIO_EVENT_H_ */ diff --git a/src/binlog.c b/src/binlog.c deleted file mode 100644 index f085a7de0..000000000 --- a/src/binlog.c +++ /dev/null @@ -1,579 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "binlog.h" -#include "cfg_file.h" -#include "tokenizers/tokenizers.h" - -#define BINLOG_SUFFIX ".binlog" -#define BACKUP_SUFFIX ".old" -#define VALID_MAGIC { 'r', 's', 'l' } -#define VALID_VERSION { '1', '0' } - -static GHashTable *binlog_opened = NULL; -static rspamd_mempool_t *binlog_pool = NULL; - -static gboolean -binlog_write_header (struct rspamd_binlog *log) -{ - struct rspamd_binlog_header header = { - .magic = VALID_MAGIC, - .version = VALID_VERSION, - .padding = { '\0', '\0' }, - }; - - header.create_time = time (NULL); - lock_file (log->fd, FALSE); - - if (write (log->fd, &header, sizeof (struct rspamd_binlog_header)) == -1) { - msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); - return FALSE; - } - - - memcpy (&log->header, &header, sizeof (struct rspamd_binlog_header)); - - /* Metaindex */ - log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex)); - bzero (log->metaindex, sizeof (struct rspamd_binlog_metaindex)); - /* Offset to metaindex */ - log->metaindex->indexes[0] = sizeof (struct rspamd_binlog_metaindex) + sizeof (struct rspamd_binlog_header); - - if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) { - g_free (log->metaindex); - msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); - unlock_file (log->fd, FALSE); - return FALSE; - } - - /* Alloc, write, mmap */ - log->cur_idx = g_malloc (sizeof (struct rspamd_index_block)); - bzero (log->cur_idx, sizeof (struct rspamd_index_block)); - if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { - g_free (log->cur_idx); - msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); - unlock_file (log->fd, FALSE); - return FALSE; - } - - unlock_file (log->fd, FALSE); - - return TRUE; -} - -static gboolean -binlog_check_file (struct rspamd_binlog *log) -{ - static gchar valid_magic[] = VALID_MAGIC, valid_version[] = VALID_VERSION; - - if (read (log->fd, &log->header, sizeof (struct rspamd_binlog_header)) != sizeof (struct rspamd_binlog_header)) { - msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno)); - return FALSE; - } - - /* Now check all fields */ - if (memcmp (&log->header.magic, valid_magic, sizeof (valid_magic)) != 0 || - memcmp (&log->header.version, valid_version, sizeof (valid_version)) != 0) { - msg_warn ("cannot validate file %s"); - return FALSE; - } - /* Now mmap metaindex and current index */ - if (log->metaindex == NULL) { - log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex)); - } - if ((read (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex))) != sizeof (struct rspamd_binlog_metaindex)) { - msg_warn ("cannot read metaindex of file %s, error %d, %s", log->filename, errno, strerror (errno)); - return FALSE; - } - /* Current index */ - if (log->cur_idx == NULL) { - log->cur_idx = g_malloc (sizeof (struct rspamd_index_block)); - } - if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) { - msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); - return FALSE; - } - if ((read (log->fd, log->cur_idx, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) { - msg_warn ("cannot read index in file %s, error %d, %s", log->filename, errno, strerror (errno)); - return FALSE; - } - - log->cur_seq = log->metaindex->last_index * BINLOG_IDX_LEN + log->cur_idx->last_index; - log->cur_time = log->cur_idx->indexes[log->cur_idx->last_index].time; - - return TRUE; - -} - -static gboolean -binlog_create (struct rspamd_binlog *log) -{ - if ((log->fd = open (log->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", log->filename, errno, strerror (errno)); - return FALSE; - } - - return binlog_write_header (log); -} - -static gboolean -binlog_open_real (struct rspamd_binlog *log) -{ - if ((log->fd = open (log->filename, O_RDWR)) == -1) { - msg_info ("cannot open file %s, error %d, %s", log->filename, errno, strerror (errno)); - return FALSE; - } - - return binlog_check_file (log); -} - - -struct rspamd_binlog* -binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter) -{ - struct rspamd_binlog *new; - gint len = strlen (path); - struct stat st; - - new = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_binlog)); - new->pool = pool; - new->rotate_time = rotate_time; - new->fd = -1; - - if (rotate_time) { - new->rotate_jitter = g_random_int_range (0, rotate_jitter); - } - - new->filename = rspamd_mempool_alloc (pool, len + sizeof (BINLOG_SUFFIX)); - rspamd_strlcpy (new->filename, path, len + 1); - rspamd_strlcpy (new->filename + len, BINLOG_SUFFIX, sizeof (BINLOG_SUFFIX)); - - if (stat (new->filename, &st) == -1) { - /* Check errno to check whether we should create this file */ - if (errno != ENOENT) { - msg_err ("cannot stat file: %s, error %s", new->filename, strerror (errno)); - return NULL; - } - else { - /* In case of ENOENT try to create binlog */ - if (!binlog_create (new)) { - return NULL; - } - } - } - else { - /* Try to open binlog */ - if (!binlog_open_real (new)) { - return NULL; - } - } - - return new; -} - -void -binlog_close (struct rspamd_binlog *log) -{ - if (log) { - if (log->metaindex) { - g_free (log->metaindex); - } - if (log->cur_idx) { - g_free (log->cur_idx); - } - close (log->fd); - } -} - -static gboolean -binlog_tree_callback (gpointer key, gpointer value, gpointer data) -{ - token_node_t *node = key; - struct rspamd_binlog *log = data; - struct rspamd_binlog_element elt; - - elt.h1 = node->h1; - elt.h2 = node->h2; - elt.value = node->value; - - if (write (log->fd, &elt, sizeof (elt)) == -1) { - msg_info ("cannot write token to file: %s, error: %s", log->filename, strerror (errno)); - return TRUE; - } - - return FALSE; -} - -static gboolean -write_binlog_tree (struct rspamd_binlog *log, GTree *nodes) -{ - off_t seek; - struct rspamd_binlog_index *idx; - - lock_file (log->fd, FALSE); - log->cur_seq ++; - - /* Seek to end of file */ - if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) { - unlock_file (log->fd, FALSE); - msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); - return FALSE; - } - - /* Now write all nodes to file */ - g_tree_foreach (nodes, binlog_tree_callback, (gpointer)log); - - /* Write index */ - idx = &log->cur_idx->indexes[log->cur_idx->last_index]; - idx->seek = seek; - idx->time = (guint64)time (NULL); - log->cur_time = idx->time; - idx->len = g_tree_nnodes (nodes) * sizeof (struct rspamd_binlog_element); - if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) { - unlock_file (log->fd, FALSE); - msg_info ("cannot seek in file: %s, error: %s, seek: %L, op: insert index", log->filename, - strerror (errno), log->metaindex->indexes[log->metaindex->last_index]); - return FALSE; - } - log->cur_idx->last_index ++; - if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { - unlock_file (log->fd, FALSE); - msg_info ("cannot write index to file: %s, error: %s", log->filename, strerror (errno)); - return FALSE; - } - - unlock_file (log->fd, FALSE); - - return TRUE; -} - -static gboolean -create_new_metaindex_block (struct rspamd_binlog *log) -{ - off_t seek; - - lock_file (log->fd, FALSE); - - log->metaindex->last_index ++; - /* Seek to end of file */ - if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) { - unlock_file (log->fd, FALSE); - msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); - return FALSE; - } - if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { - unlock_file (log->fd, FALSE); - g_free (log->cur_idx); - msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); - return FALSE; - } - /* Offset to metaindex */ - log->metaindex->indexes[log->metaindex->last_index] = seek; - /* Overwrite all metaindexes */ - if (lseek (log->fd, sizeof (struct rspamd_binlog_header), SEEK_SET) == -1) { - unlock_file (log->fd, FALSE); - msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); - return FALSE; - } - if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) { - unlock_file (log->fd, FALSE); - msg_info ("cannot write metaindex in file: %s, error: %s", log->filename, strerror (errno)); - return FALSE; - } - bzero (log->cur_idx, sizeof (struct rspamd_index_block)); - unlock_file (log->fd, FALSE); - - return TRUE; -} - -static gboolean -maybe_rotate_binlog (struct rspamd_binlog *log) -{ - guint64 now = time (NULL); - - if (log->rotate_time && ((now - log->header.create_time) > (guint)(log->rotate_time + log->rotate_jitter))) { - return TRUE; - } - return FALSE; -} - -static gboolean -rotate_binlog (struct rspamd_binlog *log) -{ - gchar *backup_name; - struct stat st; - - lock_file (log->fd, FALSE); - - /* Unmap mapped fragments */ - if (log->metaindex) { - g_free (log->metaindex); - log->metaindex = NULL; - } - if (log->cur_idx) { - g_free (log->cur_idx); - log->cur_idx = NULL; - } - /* Format backup name */ - backup_name = g_strdup_printf ("%s.%s", log->filename, BACKUP_SUFFIX); - - if (stat (backup_name, &st) != -1) { - msg_info ("replace old %s", backup_name); - unlink (backup_name); - } - - rename (log->filename, backup_name); - g_free (backup_name); - - /* XXX: maybe race condition here */ - unlock_file (log->fd, FALSE); - close (log->fd); - - return binlog_create (log); - -} - -gboolean -binlog_insert (struct rspamd_binlog *log, GTree *nodes) -{ - off_t seek; - - if (!log || !log->metaindex || !log->cur_idx || !nodes) { - msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown"); - return FALSE; - } - - if (maybe_rotate_binlog (log)) { - if (!rotate_binlog (log)) { - return FALSE; - } - } - /* First of all try to place new tokens in current index */ - if (log->cur_idx->last_index < BINLOG_IDX_LEN) { - /* All is ok */ - return write_binlog_tree (log, nodes); - } - /* Current index table is all busy, try to allocate new index */ - - /* Check metaindex free space */ - if (log->metaindex->last_index < METAINDEX_LEN) { - /* Create new index block */ - if ((seek = lseek (log->fd, 0, SEEK_END)) == (off_t)-1) { - msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); - return FALSE; - } - if (!create_new_metaindex_block (log)) { - return FALSE; - } - return write_binlog_tree (log, nodes); - } - - /* All binlog is filled, we need to rotate it forcefully */ - if (!rotate_binlog (log)) { - return FALSE; - } - - return write_binlog_tree (log, nodes); -} - -gboolean -binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep) -{ - guint32 metaindex_num; - struct rspamd_index_block *idxb; - struct rspamd_binlog_index *idx; - gboolean idx_mapped = FALSE, res = TRUE, is_first = FALSE; - - if (!log || !log->metaindex || !log->cur_idx) { - msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown"); - return FALSE; - } - - if (*rep == NULL) { - *rep = g_malloc (sizeof (GByteArray)); - is_first = TRUE; - } - else { - /* Unmap old fragment */ - g_free ((*rep)->data); - } - - if (from_rev == log->cur_seq) { - /* Last record */ - *rep = NULL; - return FALSE; - } - else if (from_rev > log->cur_seq) { - /* Slave has more actual copy, write this to log and abort sync */ - msg_warn ("slave has more recent revision of statfile %s: %uL and our is: %uL", log->filename, from_rev, log->cur_seq); - *rep = NULL; - *from_time = 0; - return FALSE; - } - - metaindex_num = from_rev / BINLOG_IDX_LEN; - /* First of all try to find this revision */ - if (metaindex_num > log->metaindex->last_index) { - return FALSE; - } - else if (metaindex_num != log->metaindex->last_index) { - /* Need to remap index block */ - lock_file (log->fd, FALSE); - idxb = g_malloc (sizeof (struct rspamd_index_block)); - idx_mapped = TRUE; - if (lseek (log->fd, log->metaindex->indexes[metaindex_num], SEEK_SET) == -1) { - unlock_file (log->fd, FALSE); - msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno)); - res = FALSE; - goto end; - } - if ((read (log->fd, idxb, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) { - unlock_file (log->fd, FALSE); - msg_warn ("cannot read index from file %s, error %d, %s", log->filename, errno, strerror (errno)); - res = FALSE; - goto end; - } - unlock_file (log->fd, FALSE); - } - else { - idxb = log->cur_idx; - } - /* Now check specified index */ - idx = &idxb->indexes[from_rev % BINLOG_IDX_LEN]; - if (is_first && idx->time != *from_time) { - res = FALSE; - *from_time = 0; - goto end; - } - else { - *from_time = idx->time; - } - - /* Now fill reply structure */ - (*rep)->len = idx->len; - /* Read result */ - msg_info ("update from binlog '%s' from revision: %uL to revision %uL size is %uL", - log->filename, from_rev, log->cur_seq, idx->len); - if (lseek (log->fd, idx->seek, SEEK_SET) == -1) { - msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno)); - res = FALSE; - goto end; - } - - (*rep)->data = g_malloc (idx->len); - if ((read (log->fd, (*rep)->data, idx->len)) != (ssize_t)idx->len) { - msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno)); - res = FALSE; - goto end; - } - -end: - if (idx_mapped) { - g_free (idxb); - } - - return res; -} - -static gboolean -maybe_init_static (void) -{ - if (!binlog_opened) { - binlog_opened = g_hash_table_new (g_direct_hash, g_direct_equal); - if (!binlog_opened) { - return FALSE; - } - } - - if (!binlog_pool) { - binlog_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - if (!binlog_pool) { - return FALSE; - } - } - - return TRUE; -} - -gboolean -maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes) -{ - struct rspamd_binlog *log; - - if (ccf == NULL) { - return FALSE; - } - - - if (st == NULL || nodes == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) { - return FALSE; - } - - if (!maybe_init_static ()) { - return FALSE; - } - - if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) { - if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) { - g_hash_table_insert (binlog_opened, st, log); - } - else { - return FALSE; - } - } - - if (binlog_insert (log, nodes)) { - msg_info ("set new revision of statfile %s: %uL", st->symbol, log->cur_seq); - (void)statfile_set_revision (file, log->cur_seq, log->cur_time); - return TRUE; - } - - return FALSE; -} - -struct rspamd_binlog* -get_binlog_by_statfile (struct statfile *st) -{ - struct rspamd_binlog *log; - - if (st == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) { - return NULL; - } - - if (!maybe_init_static ()) { - return NULL; - } - - if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) { - if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) { - g_hash_table_insert (binlog_opened, st, log); - } - else { - return NULL; - } - } - - return log; -} diff --git a/src/binlog.h b/src/binlog.h deleted file mode 100644 index 9e1a786d3..000000000 --- a/src/binlog.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef RSPAMD_BINLOG_H -#define RSPAMD_BINLOG_H - -#include "config.h" -#include "main.h" -#include "statfile.h" - -/* How much records are in a single index */ -#define BINLOG_IDX_LEN 200 -#define METAINDEX_LEN 1024 - -/* Assume 8 bytes words */ -struct rspamd_binlog_header { - gchar magic[3]; - gchar version[2]; - gchar padding[3]; - guint64 create_time; -}; - -struct rspamd_binlog_index { - guint64 time; - guint64 seek; - guint32 len; -}; - -struct rspamd_index_block { - struct rspamd_binlog_index indexes[BINLOG_IDX_LEN]; - guint32 last_index; -}; - -struct rspamd_binlog_metaindex { - guint64 indexes[METAINDEX_LEN]; - guint64 last_index; -}; - -struct rspamd_binlog_element { - guint32 h1; - guint32 h2; - float value; -} __attribute__((__packed__)); - -struct rspamd_binlog { - gchar *filename; - time_t rotate_time; - gint rotate_jitter; - guint64 cur_seq; - guint64 cur_time; - gint fd; - rspamd_mempool_t *pool; - - struct rspamd_binlog_header header; - struct rspamd_binlog_metaindex *metaindex; - struct rspamd_index_block *cur_idx; -}; - -struct classifier_config; - -/* - * Open binlog at specified path with specified rotate params - */ -struct rspamd_binlog* binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter); - -/* - * Get and open binlog for specified statfile - */ -struct rspamd_binlog* get_binlog_by_statfile (struct statfile *st); - -/* - * Close binlog - */ -void binlog_close (struct rspamd_binlog *log); - -/* - * Insert new nodes inside binlog - */ -gboolean binlog_insert (struct rspamd_binlog *log, GTree *nodes); - -/* - * Sync binlog from specified revision - * @param log binlog structure - * @param from_rev from revision - * @param from_time from time - * @param rep a portion of changes for revision is stored here - * @return TRUE if there are more revisions to get and FALSE if synchronization is complete - */ -gboolean binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep); - -/* - * Conditional write to a binlog for specified statfile - */ -gboolean maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes); - -#endif diff --git a/src/bloom.c b/src/bloom.c deleted file mode 100644 index f857d2e49..000000000 --- a/src/bloom.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "bloom.h" -#include "xxhash.h" - -/* 4 bits are used for counting (implementing delete operation) */ -#define SIZE_BIT 4 - -/* These macroes are for 4 bits for counting element */ -#define INCBIT(a, n, acc) do { \ - acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \ - acc ++; \ - acc &= 0xF; \ - \ - a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \ - a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \ -} while (0); - -#define DECBIT(a, n, acc) do { \ - acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \ - acc --; \ - acc &= 0xF; \ - \ - a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \ - a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \ -} while (0); - -#define GETBIT(a, n) (a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))) - -/* Common hash functions */ - - -rspamd_bloom_filter_t * -rspamd_bloom_create (size_t size, size_t nfuncs, ...) -{ - rspamd_bloom_filter_t *bloom; - va_list l; - gsize n; - - if (!(bloom = g_malloc (sizeof (rspamd_bloom_filter_t)))) { - return NULL; - } - if (!(bloom->a = g_new0 (gchar, (size + CHAR_BIT - 1) / CHAR_BIT * SIZE_BIT))) { - g_free (bloom); - return NULL; - } - if (!(bloom->seeds = g_new0 (guint32, nfuncs))) { - g_free (bloom->a); - g_free (bloom); - return NULL; - } - - va_start (l, nfuncs); - for (n = 0; n < nfuncs; ++n) { - bloom->seeds[n] = va_arg (l, guint32); - } - va_end (l); - - bloom->nfuncs = nfuncs; - bloom->asize = size; - - return bloom; -} - -void -rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom) -{ - g_free (bloom->a); - g_free (bloom->seeds); - g_free (bloom); -} - -gboolean -rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s) -{ - size_t n, len; - u_char t; - guint v; - - if (s == NULL) { - return FALSE; - } - len = strlen (s); - for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; - INCBIT (bloom->a, v, t); - } - - return TRUE; -} - -gboolean -rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s) -{ - size_t n, len; - u_char t; - guint v; - - if (s == NULL) { - return FALSE; - } - len = strlen (s); - for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; - DECBIT (bloom->a, v, t); - } - - return TRUE; - -} - -gboolean -rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s) -{ - size_t n, len; - guint v; - - if (s == NULL) { - return FALSE; - } - len = strlen (s); - for (n = 0; n < bloom->nfuncs; ++n) { - v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; - if (!(GETBIT (bloom->a, v))) { - return FALSE; - } - } - - return TRUE; -} diff --git a/src/bloom.h b/src/bloom.h deleted file mode 100644 index 380143c80..000000000 --- a/src/bloom.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef __RSPAMD_BLOOM_H__ -#define __RSPAMD_BLOOM_H__ - -#include "config.h" - -typedef struct rspamd_bloom_filter_s { - size_t asize; - gchar *a; - size_t nfuncs; - guint32 *seeds; -} rspamd_bloom_filter_t; - - -/* - * Some random uint32 seeds for hashing - */ -#define RSPAMD_DEFAULT_BLOOM_HASHES 8, 0x61782caaU, 0x79ab8141U, 0xe45ee2d1U, \ - 0xf97542d1U, 0x1e2623edU, 0xf5a23cfeU, 0xa41b2508U, 0x85abdce8U - -/* - * Create new bloom filter - * @param size length of bloom buffer - * @param nfuncs number of hash functions - * @param ... hash functions list - */ -rspamd_bloom_filter_t* rspamd_bloom_create (size_t size, size_t nfuncs, ...); - -/* - * Destroy bloom filter - */ -void rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom); - -/* - * Add a string to bloom filter - */ -gboolean rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s); - -/* - * Delete a string from bloom filter - */ -gboolean rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s); - -/* - * Check whether this string is in bloom filter (algorithm produces FALSE-POSITIVES, so result must be checked if it is positive) - */ -gboolean rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s); - -#endif diff --git a/src/buffer.c b/src/buffer.c deleted file mode 100644 index 864f2fad6..000000000 --- a/src/buffer.c +++ /dev/null @@ -1,786 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "buffer.h" -#include "main.h" -#ifdef HAVE_SYS_SENDFILE_H -#include -#endif - -#define G_DISPATCHER_ERROR dispatcher_error_quark() -#define debug_ip(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__) - -static void dispatcher_cb (gint fd, short what, void *arg); - -static inline GQuark -dispatcher_error_quark (void) -{ - return g_quark_from_static_string ("g-dispatcher-error-quark"); -} - -static gboolean -sendfile_callback (rspamd_io_dispatcher_t *d) -{ - - GError *err; - -#ifdef HAVE_SENDFILE -# if defined(FREEBSD) || defined(DARWIN) - off_t off = 0; - #if defined(FREEBSD) - /* FreeBSD version */ - if (sendfile (d->sendfile_fd, d->fd, d->offset, 0, NULL, &off, 0) != 0) { - #elif defined(DARWIN) - /* Darwin version */ - if (sendfile (d->sendfile_fd, d->fd, d->offset, &off, NULL, 0) != 0) { - #endif - if (errno != EAGAIN) { - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); - d->err_callback (err, d->user_data); - return FALSE; - } - } - else { - debug_ip("partially write data, retry"); - /* Wait for other event */ - d->offset += off; - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - } - } - else { - if (d->write_callback) { - if (!d->write_callback (d->user_data)) { - debug_ip("callback set wanna_die flag, terminating"); - return FALSE; - } - } - event_del (d->ev); - event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - d->in_sendfile = FALSE; - } -# else - ssize_t r; - /* Linux version */ - r = sendfile (d->fd, d->sendfile_fd, &d->offset, d->file_size); - if (r == -1) { - if (errno != EAGAIN) { - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); - d->err_callback (err, d->user_data); - return FALSE; - } - } - else { - debug_ip("partially write data, retry"); - /* Wait for other event */ - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - } - } - else if (r + d->offset < (ssize_t)d->file_size) { - debug_ip("partially write data, retry"); - /* Wait for other event */ - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - } - else { - if (d->write_callback) { - if (!d->write_callback (d->user_data)) { - debug_ip("callback set wanna_die flag, terminating"); - return FALSE; - } - } - event_del (d->ev); - event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - d->in_sendfile = FALSE; - } -# endif -#else - ssize_t r; - r = write (d->fd, d->map, d->file_size - d->offset); - if (r == -1) { - if (errno != EAGAIN) { - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); - d->err_callback (err, d->user_data); - return FALSE; - } - } - else { - debug_ip("partially write data, retry"); - /* Wait for other event */ - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - } - } - else if (r + d->offset < d->file_size) { - d->offset += r; - debug_ip("partially write data, retry"); - /* Wait for other event */ - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - } - else { - if (d->write_callback) { - if (!d->write_callback (d->user_data)) { - debug_ip("callback set wanna_die flag, terminating"); - return FALSE; - } - } - event_del (d->ev); - event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - d->in_sendfile = FALSE; - } -#endif - return TRUE; -} - -#define BUFREMAIN(x) (x)->data->size - ((x)->pos - (x)->data->begin) - -#define APPEND_OUT_BUFFER(d, buf) do { \ - DL_APPEND((d)->out_buffers.buffers, buf); \ - (d)->out_buffers.pending ++; \ - } while (0) -#define DELETE_OUT_BUFFER(d, buf) do { \ - DL_DELETE((d)->out_buffers.buffers, (buf)); \ - g_string_free((buf->data), (buf)->allocated); \ - g_slice_free1(sizeof (struct rspamd_out_buffer_s), (buf)); \ - (d)->out_buffers.pending --; \ - } while (0) - -static gboolean -write_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean is_delayed) -{ - GError *err = NULL; - struct rspamd_out_buffer_s *cur = NULL, *tmp; - ssize_t r; - struct iovec *iov; - guint i, len; - - len = d->out_buffers.pending; - while (len > 0) { - /* Unset delayed as actually we HAVE buffers to write */ - is_delayed = TRUE; - iov = g_slice_alloc (len * sizeof (struct iovec)); - i = 0; - DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { - iov[i].iov_base = cur->data->str; - iov[i].iov_len = cur->data->len; - i ++; - } - /* Now try to write the whole vector */ - r = writev (fd, iov, len); - if (r == -1 && errno != EAGAIN) { - g_slice_free1 (len * sizeof (struct iovec), iov); - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); - d->err_callback (err, d->user_data); - return FALSE; - } - } - else if (r > 0) { - /* Find pos inside buffers */ - DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { - if (r >= (ssize_t)cur->data->len) { - /* Mark this buffer as read */ - r -= cur->data->len; - DELETE_OUT_BUFFER (d, cur); - } - else { - /* This buffer was not written completely */ - g_string_erase (cur->data, 0, r); - break; - } - } - g_slice_free1 (len * sizeof (struct iovec), iov); - if (d->out_buffers.pending > 0) { - /* Wait for other event */ - event_del (d->ev); - event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - return TRUE; - } - } - else if (r == 0) { - /* Got EOF while we wait for data */ - g_slice_free1 (len * sizeof (struct iovec), iov); - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); - d->err_callback (err, d->user_data); - return FALSE; - } - } - else if (r == -1 && errno == EAGAIN) { - g_slice_free1 (len * sizeof (struct iovec), iov); - debug_ip("partially write data, retry"); - /* Wait for other event */ - event_del (d->ev); - event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - return TRUE; - } - len = d->out_buffers.pending; - } - - if (d->out_buffers.pending == 0) { - /* Disable write event for this time */ - - debug_ip ("all buffers were written successfully"); - - if (is_delayed && d->write_callback) { - if (!d->write_callback (d->user_data)) { - debug_ip("callback set wanna_die flag, terminating"); - return FALSE; - } - } - - event_del (d->ev); - event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - } - else { - /* Plan other write event */ - event_del (d->ev); - event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - } - - return TRUE; -} - -static void -read_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean skip_read) -{ - ssize_t r; - GError *err = NULL; - f_str_t res; - gchar *c, *b; - gchar *end; - size_t len; - enum io_policy saved_policy; - - if (d->wanna_die) { - rspamd_remove_dispatcher (d); - return; - } - - if (d->in_buf == NULL) { - d->in_buf = rspamd_mempool_alloc_tmp (d->pool, sizeof (rspamd_buffer_t)); - if (d->policy == BUFFER_LINE || d->policy == BUFFER_ANY) { - d->in_buf->data = fstralloc_tmp (d->pool, d->default_buf_size); - } - else { - d->in_buf->data = fstralloc_tmp (d->pool, d->nchars + 1); - } - d->in_buf->pos = d->in_buf->data->begin; - } - - end = d->in_buf->pos; - len = d->in_buf->data->len; - - if (BUFREMAIN (d->in_buf) == 0) { - /* Buffer is full, try to call callback with overflow error */ - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, E2BIG, "buffer overflow"); - d->err_callback (err, d->user_data); - return; - } - } - else if (!skip_read) { - /* Try to read the whole buffer */ - r = read (fd, end, BUFREMAIN (d->in_buf)); - if (r == -1 && errno != EAGAIN) { - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); - d->err_callback (err, d->user_data); - return; - } - } - else if (r == 0) { - /* Got EOF while we wait for data */ -#if 0 - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); - d->err_callback (err, d->user_data); - return; - } -#endif - /* Read returned 0, it may be shutdown or full quit */ - if (!d->want_read) { - d->half_closed = TRUE; - /* Do not expect any read after this */ - event_del (d->ev); - } - else { - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); - d->err_callback (err, d->user_data); - return; - } - } - } - else if (r == -1 && errno == EAGAIN) { - debug_ip("partially read data, retry"); - return; - } - else { - /* Set current position in buffer */ - d->in_buf->pos += r; - d->in_buf->data->len += r; - } - debug_ip("read %z characters, policy is %s, watermark is: %z, buffer has %z bytes", r, - d->policy == BUFFER_LINE ? "LINE" : "CHARACTER", d->nchars, d->in_buf->data->len); - } - - saved_policy = d->policy; - c = d->in_buf->data->begin; - end = d->in_buf->pos; - len = d->in_buf->data->len; - b = c; - r = 0; - - switch (d->policy) { - case BUFFER_LINE: - /** Variables: - * b - begin of line - * r - current position in buffer - * *len - length of remaining buffer - * c - pointer to current position (buffer->begin + r) - * res - result string - */ - while (r < (ssize_t)len) { - if (*c == '\n') { - res.begin = b; - res.len = c - b; - /* Strip EOL */ - if (d->strip_eol) { - if (r != 0 && *(c - 1) == '\r') { - res.len--; - } - } - else { - /* Include EOL in reply */ - res.len ++; - } - /* Call callback for a line */ - if (d->read_callback) { - if (!d->read_callback (&res, d->user_data)) { - return; - } - if (d->policy != saved_policy) { - /* Drain buffer as policy is changed */ - /* Note that d->in_buffer is other pointer now, so we need to reinit all pointers */ - /* First detect how much symbols do we have */ - if (end == c) { - /* In fact we read the whole buffer and change input policy, so just set current pos to begin of buffer */ - d->in_buf->pos = d->in_buf->data->begin; - d->in_buf->data->len = 0; - } - else { - /* Otherwise we need to move buffer */ - /* Reinit pointers */ - len = d->in_buf->data->len - r - 1; - end = d->in_buf->data->begin + r + 1; - memmove (d->in_buf->data->begin, end, len); - d->in_buf->data->len = len; - d->in_buf->pos = d->in_buf->data->begin + len; - /* Process remaining buffer */ - read_buffers (fd, d, TRUE); - } - return; - } - } - /* Set new begin of line */ - b = c + 1; - } - r++; - c++; - } - /* Now drain remaining characters in buffer */ - memmove (d->in_buf->data->begin, b, c - b); - d->in_buf->data->len = c - b; - d->in_buf->pos = d->in_buf->data->begin + (c - b); - break; - case BUFFER_CHARACTER: - r = d->nchars; - if ((ssize_t)len >= r) { - res.begin = b; - res.len = r; - c = b + r; - if (d->read_callback) { - if (!d->read_callback (&res, d->user_data)) { - return; - } - /* Move remaining string to begin of buffer (draining) */ - if ((ssize_t)len > r) { - len -= r; - memmove (d->in_buf->data->begin, c, len); - d->in_buf->data->len = len; - d->in_buf->pos = d->in_buf->data->begin + len; - b = d->in_buf->data->begin; - } - else { - d->in_buf->data->len = 0; - d->in_buf->pos = d->in_buf->data->begin; - } - if (d->policy != saved_policy && (ssize_t)len != r) { - debug_ip("policy changed during callback, restart buffer's processing"); - read_buffers (fd, d, TRUE); - return; - } - } - } - break; - case BUFFER_ANY: - res.begin = d->in_buf->data->begin; - res.len = len; - - if (d->read_callback) { - /* - * Actually we do not want to send zero sized - * buffers to a read callback - */ - if (! (d->want_read && res.len == 0)) { - if (!d->read_callback (&res, d->user_data)) { - return; - } - } - if (d->policy != saved_policy) { - debug_ip("policy changed during callback, restart buffer's processing"); - read_buffers (fd, d, TRUE); - return; - } - } - d->in_buf->pos = d->in_buf->data->begin; - d->in_buf->data->len = 0; - break; - } -} - -#undef BUFREMAIN - -static void -dispatcher_cb (gint fd, short what, void *arg) -{ - rspamd_io_dispatcher_t *d = (rspamd_io_dispatcher_t *) arg; - GError *err = NULL; - - debug_ip("in dispatcher callback, what: %d, fd: %d", (gint)what, fd); - - if ((what & EV_TIMEOUT) != 0) { - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, ETIMEDOUT, "IO timeout"); - d->err_callback (err, d->user_data); - } - } - else if ((what & EV_READ) != 0) { - read_buffers (fd, d, FALSE); - } - else if ((what & EV_WRITE) != 0) { - /* No data to write, disable further EV_WRITE to this fd */ - if (d->in_sendfile) { - sendfile_callback (d); - } - else { - if (d->out_buffers.pending == 0) { - if (d->half_closed && !d->is_restored) { - /* Socket is half closed and there is nothing more to write, closing connection */ - if (d->err_callback) { - err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); - d->err_callback (err, d->user_data); - return; - } - } - else { - /* Want read again */ - event_del (d->ev); - event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - if (d->is_restored && d->write_callback) { - if (!d->write_callback (d->user_data)) { - return; - } - d->is_restored = FALSE; - } - } - } - else { - /* Delayed write */ - write_buffers (fd, d, TRUE); - } - } - } -} - - -rspamd_io_dispatcher_t * -rspamd_create_dispatcher (struct event_base *base, gint fd, enum io_policy policy, - dispatcher_read_callback_t read_cb, dispatcher_write_callback_t write_cb, dispatcher_err_callback_t err_cb, struct timeval *tv, void *user_data) -{ - rspamd_io_dispatcher_t *new; - - if (fd == -1) { - return NULL; - } - - new = g_slice_alloc0 (sizeof (rspamd_io_dispatcher_t)); - - new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - if (tv != NULL) { - new->tv = rspamd_mempool_alloc (new->pool, sizeof (struct timeval)); - memcpy (new->tv, tv, sizeof (struct timeval)); - } - else { - new->tv = NULL; - } - new->nchars = 0; - new->in_sendfile = FALSE; - new->policy = policy; - new->read_callback = read_cb; - new->write_callback = write_cb; - new->err_callback = err_cb; - new->user_data = user_data; - new->strip_eol = TRUE; - new->half_closed = FALSE; - new->want_read = TRUE; - new->is_restored = FALSE; - new->default_buf_size = sysconf (_SC_PAGESIZE); - - new->ev = rspamd_mempool_alloc0 (new->pool, sizeof (struct event)); - new->fd = fd; - new->ev_base = base; - - event_set (new->ev, fd, EV_WRITE, dispatcher_cb, (void *)new); - event_base_set (new->ev_base, new->ev); - event_add (new->ev, new->tv); - - return new; -} - -void -rspamd_remove_dispatcher (rspamd_io_dispatcher_t * d) -{ - struct rspamd_out_buffer_s *cur, *tmp; - - if (d != NULL) { - DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { - DELETE_OUT_BUFFER (d, cur); - } - event_del (d->ev); - rspamd_mempool_delete (d->pool); - g_slice_free1 (sizeof (rspamd_io_dispatcher_t), d); - } -} - -void -rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t * d, enum io_policy policy, size_t nchars) -{ - f_str_t *tmp; - gint t; - - if (d->policy != policy || nchars != d->nchars) { - d->policy = policy; - d->nchars = nchars ? nchars : d->default_buf_size; - /* Resize input buffer if needed */ - if (policy == BUFFER_CHARACTER && nchars != 0) { - if (d->in_buf && d->in_buf->data->size < nchars) { - tmp = fstralloc_tmp (d->pool, d->nchars + 1); - memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len); - t = d->in_buf->pos - d->in_buf->data->begin; - tmp->len = d->in_buf->data->len; - d->in_buf->data = tmp; - d->in_buf->pos = d->in_buf->data->begin + t; - } - } - else if (policy == BUFFER_LINE || policy == BUFFER_ANY) { - if (d->in_buf && d->nchars < d->default_buf_size) { - tmp = fstralloc_tmp (d->pool, d->default_buf_size); - memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len); - t = d->in_buf->pos - d->in_buf->data->begin; - tmp->len = d->in_buf->data->len; - d->in_buf->data = tmp; - d->in_buf->pos = d->in_buf->data->begin + t; - } - d->strip_eol = TRUE; - } - } - - debug_ip("new input length watermark is %uz", d->nchars); -} - -gboolean -rspamd_dispatcher_write (rspamd_io_dispatcher_t * d, - const void *data, size_t len, gboolean delayed, gboolean allocated) -{ - struct rspamd_out_buffer_s *newbuf; - - newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s)); - if (len == 0) { - /* Assume NULL terminated */ - len = strlen ((const gchar *)data); - } - - if (!allocated) { - newbuf->data = g_string_new_len (data, len); - newbuf->allocated = TRUE; - } - else { - newbuf->data = g_string_new (NULL); - newbuf->data->str = (gchar *)data; - newbuf->data->len = len; - newbuf->data->allocated_len = len; - newbuf->allocated = FALSE; - } - - APPEND_OUT_BUFFER (d, newbuf); - - if (!delayed) { - debug_ip("plan write event"); - return write_buffers (d->fd, d, FALSE); - } - /* Otherwise plan write event */ - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - - return TRUE; -} - -gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d, - GString *str, - gboolean delayed, - gboolean free_on_write) -{ - struct rspamd_out_buffer_s *newbuf; - - newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s)); - newbuf->data = str; - newbuf->allocated = free_on_write; - - APPEND_OUT_BUFFER (d, newbuf); - - if (!delayed) { - debug_ip("plan write event"); - return write_buffers (d->fd, d, FALSE); - } - /* Otherwise plan write event */ - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - - return TRUE; -} - -gboolean -rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len) -{ - if (lseek (fd, 0, SEEK_SET) == -1) { - msg_warn ("lseek failed: %s", strerror (errno)); - return FALSE; - } - - d->offset = 0; - d->in_sendfile = TRUE; - d->sendfile_fd = fd; - d->file_size = len; - -#ifndef HAVE_SENDFILE - #ifdef HAVE_MMAP_NOCORE - if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED | MAP_NOCORE, fd, 0)) == MAP_FAILED) { - #else - if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { - #endif - msg_warn ("mmap failed: %s", strerror (errno)); - return FALSE; - } -#endif - - return sendfile_callback (d); -} - -void -rspamd_dispatcher_pause (rspamd_io_dispatcher_t * d) -{ - debug_ip ("paused dispatcher"); - event_del (d->ev); - d->is_restored = FALSE; -} - -void -rspamd_dispatcher_restore (rspamd_io_dispatcher_t * d) -{ - if (!d->is_restored) { - debug_ip ("restored dispatcher"); - event_del (d->ev); - event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, d); - event_base_set (d->ev_base, d->ev); - event_add (d->ev, d->tv); - d->is_restored = TRUE; - } -} - -void -rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *d) -{ - struct rspamd_out_buffer_s *cur, *tmp; - - DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { - DELETE_OUT_BUFFER (d, cur); - } - /* Cleanup temporary data */ - rspamd_mempool_cleanup_tmp (d->pool); - d->in_buf = NULL; -} - -#undef debug_ip - -/* - * vi:ts=4 - */ diff --git a/src/buffer.h b/src/buffer.h deleted file mode 100644 index 5ed42bfb3..000000000 --- a/src/buffer.h +++ /dev/null @@ -1,158 +0,0 @@ -/** - * @file buffer.h - * Implements buffered IO - */ - -#ifndef RSPAMD_BUFFER_H -#define RSPAMD_BUFFER_H - -#include "config.h" -#include "mem_pool.h" -#include "fstring.h" - -typedef gboolean (*dispatcher_read_callback_t)(f_str_t *in, void *user_data); -typedef gboolean (*dispatcher_write_callback_t)(void *user_data); -typedef void (*dispatcher_err_callback_t)(GError *err, void *user_data); - -/** - * Types of IO handling - */ -enum io_policy { - BUFFER_LINE, /**< call handler when we have line ready */ - BUFFER_CHARACTER, /**< call handler when we have some characters */ - BUFFER_ANY /**< call handler whenever we got data in buffer */ -}; - -/** - * Buffer structure - */ -typedef struct rspamd_buffer_s { - f_str_t *data; /**< buffer logic */ - gchar *pos; /**< current position */ -} rspamd_buffer_t; - -struct rspamd_out_buffer_s { - GString *data; - gboolean allocated; - struct rspamd_out_buffer_s *prev, *next; -}; - -typedef struct rspamd_io_dispatcher_s { - rspamd_buffer_t *in_buf; /**< input buffer */ - struct { - guint pending; - struct rspamd_out_buffer_s *buffers; - } out_buffers; /**< output buffers chain */ - struct timeval *tv; /**< io timeout */ - struct event *ev; /**< libevent io event */ - rspamd_mempool_t *pool; /**< where to store data */ - enum io_policy policy; /**< IO policy */ - size_t nchars; /**< how many chars to read */ - gint fd; /**< descriptor */ - guint32 peer_addr; /**< address of peer for debugging */ - gboolean wanna_die; /**< if dispatcher should be stopped */ - dispatcher_read_callback_t read_callback; /**< read callback */ - dispatcher_write_callback_t write_callback; /**< write callback */ - dispatcher_err_callback_t err_callback; /**< error callback */ - void *user_data; /**< user's data for callbacks */ - gulong default_buf_size; /**< default size for buffering */ - off_t offset; /**< for sendfile use */ - size_t file_size; - gint sendfile_fd; - gboolean in_sendfile; /**< whether buffer is in sendfile mode */ - gboolean strip_eol; /**< strip or not line ends in BUFFER_LINE policy */ - gboolean is_restored; /**< call a callback when dispatcher is restored */ - gboolean half_closed; /**< connection is half closed */ - gboolean want_read; /**< whether we want to read more data */ - struct event_base *ev_base; /**< event base for io operations */ -#ifndef HAVE_SENDFILE - void *map; -#endif -} rspamd_io_dispatcher_t; - -/** - * Creates rspamd IO dispatcher for specified descriptor - * @param fd descriptor to IO - * @param policy IO policy - * @param read_cb read callback handler - * @param write_cb write callback handler - * @param err_cb error callback handler - * @param tv IO timeout - * @param user_data pointer to user's data - * @return new dispatcher object or NULL in case of failure - */ -rspamd_io_dispatcher_t* rspamd_create_dispatcher (struct event_base *base, gint fd, - enum io_policy policy, - dispatcher_read_callback_t read_cb, - dispatcher_write_callback_t write_cb, - dispatcher_err_callback_t err_cb, - struct timeval *tv, - void *user_data); - -/** - * Set new policy for dispatcher - * @param d pointer to dispatcher's object - * @param policy IO policy - * @param nchars number of characters in buffer for character policy - */ -void rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t *d, - enum io_policy policy, - size_t nchars); - -/** - * Write data when it would be possible - * @param d pointer to dispatcher's object - * @param data data to write - * @param len length of data - */ -gboolean rspamd_dispatcher_write (rspamd_io_dispatcher_t *d, - const void *data, - size_t len, gboolean delayed, - gboolean allocated) G_GNUC_WARN_UNUSED_RESULT; - -/** - * Write a GString to dispatcher - * @param d dipatcher object - * @param str string to write - * @param delayed delay write - * @param free_on_write free string after writing to a socket - * @return TRUE if write has been queued successfully - */ -gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d, - GString *str, - gboolean delayed, - gboolean free_on_write) G_GNUC_WARN_UNUSED_RESULT; - -/** - * Send specified descriptor to dispatcher - * @param d pointer to dispatcher's object - * @param fd descriptor of file - * @param len length of data - */ -gboolean rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len) G_GNUC_WARN_UNUSED_RESULT; - -/** - * Pause IO events on dispatcher - * @param d pointer to dispatcher's object - */ -void rspamd_dispatcher_pause (rspamd_io_dispatcher_t *d); - -/** - * Restore IO events on dispatcher - * @param d pointer to dispatcher's object - */ -void rspamd_dispatcher_restore (rspamd_io_dispatcher_t *d); - -/** - * Frees dispatcher object - * @param dispatcher pointer to dispatcher's object - */ -void rspamd_remove_dispatcher (rspamd_io_dispatcher_t *dispatcher); - -/** - * Cleanup dispatcher freeing all temporary data - * @param dispatcher pointer to dispatcher's object - */ -void rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *dispatcher); - -#endif diff --git a/src/cfg_file.h b/src/cfg_file.h deleted file mode 100644 index 6ecb441fd..000000000 --- a/src/cfg_file.h +++ /dev/null @@ -1,516 +0,0 @@ -/** - * @file cfg_file.h - * Config file parser and config routines API - */ - -#ifndef CFG_FILE_H -#define CFG_FILE_H - -#include "config.h" -#include "mem_pool.h" -#include "upstream.h" -#include "memcached.h" -#include "symbols_cache.h" -#include "cfg_rcl.h" -#include "utlist.h" -#include "ucl.h" - -#define DEFAULT_BIND_PORT 11333 -#define DEFAULT_CONTROL_PORT 11334 -#define MAX_MEMCACHED_SERVERS 4 -#define DEFAULT_MEMCACHED_PORT 11211 -/* Memcached timeouts */ -#define DEFAULT_MEMCACHED_CONNECT_TIMEOUT 1000 -/* Upstream timeouts */ -#define DEFAULT_UPSTREAM_ERROR_TIME 10 -#define DEFAULT_UPSTREAM_ERROR_TIME 10 -#define DEFAULT_UPSTREAM_DEAD_TIME 300 -#define DEFAULT_UPSTREAM_MAXERRORS 10 - -struct expression; -struct tokenizer; -struct classifier; - -enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE }; - -/** - * Type of time configuration parameter - */ -enum time_type { - TIME_SECONDS = 0, - TIME_MILLISECONDS, - TIME_MINUTES, - TIME_HOURS -}; -/** - * Types of rspamd bind lines - */ -enum rspamd_cred_type { - CRED_NORMAL, - CRED_CONTROL, - CRED_LMTP, - CRED_DELIVERY -}; - -/** - * Regexp type: /H - header, /M - mime, /U - url /X - raw header - */ -enum rspamd_regexp_type { - REGEXP_NONE = 0, - REGEXP_HEADER, - REGEXP_MIME, - REGEXP_MESSAGE, - REGEXP_URL, - REGEXP_RAW_HEADER -}; - -/** - * Logging type - */ -enum rspamd_log_type { - RSPAMD_LOG_CONSOLE, - RSPAMD_LOG_SYSLOG, - RSPAMD_LOG_FILE -}; - -/** - * Regexp structure - */ -struct rspamd_regexp { - enum rspamd_regexp_type type; /**< regexp type */ - gchar *regexp_text; /**< regexp text representation */ - GRegex *regexp; /**< glib regexp structure */ - GRegex *raw_regexp; /**< glib regexp structure for raw matching */ - gchar *header; /**< header name for header regexps */ - gboolean is_test; /**< true if this expression must be tested */ - gboolean is_raw; /**< true if this regexp is done by raw matching */ - gboolean is_strong; /**< true if headers search must be case sensitive */ -}; - -/** - * Memcached server object - */ -struct memcached_server { - struct upstream up; /**< common upstream base */ - struct in_addr addr; /**< address of server */ - guint16 port; /**< port to connect */ - short alive; /**< is this server alive */ - gint16 num; /**< number of servers in case of mirror */ -}; - -/** - * script module list item - */ -struct script_module { - gchar *name; /**< name of module */ - gchar *path; /**< path to module */ -}; - -/** - * Type of lua variable - */ -enum lua_var_type { - LUA_VAR_NUM, - LUA_VAR_BOOLEAN, - LUA_VAR_STRING, - LUA_VAR_FUNCTION, - LUA_VAR_UNKNOWN -}; -/** - * Module option - */ -struct module_opt { - gchar *param; /**< parameter name */ - gchar *value; /**< parameter value */ - gchar *description; /**< parameter description */ - gchar *group; /**< parameter group */ - gpointer actual_data; /**< parsed data */ - gboolean is_lua; /**< actually this is lua variable */ - enum lua_var_type lua_type; /**< type of lua variable */ -}; - -struct module_meta_opt { - gchar *name; /**< Name of meta option */ - GList *options; /**< List of struct module_opt */ -}; - -/** - * Symbol definition - */ -struct symbol_def { - gchar *name; - gchar *description; - gdouble *weight_ptr; -}; - -/** - * Symbols group - */ -struct symbols_group { - gchar *name; - GList *symbols; -}; - -/** - * Statfile section definition - */ -struct statfile_section { - guint32 code; /**< section's code */ - guint64 size; /**< size of section */ - double weight; /**< weight coefficient for section */ -}; - -/** - * Statfile autolearn parameters - */ -struct statfile_autolearn_params { - const gchar *metric; /**< metric name for autolearn triggering */ - double threshold_min; /**< threshold mark */ - double threshold_max; /**< threshold mark */ - GList *symbols; /**< list of symbols */ -}; - -/** - * Sync affinity - */ -enum sync_affinity { - AFFINITY_NONE = 0, - AFFINITY_MASTER, - AFFINITY_SLAVE -}; - -/** - * Binlog params - */ -struct statfile_binlog_params { - enum sync_affinity affinity; - time_t rotate_time; - gchar *master_addr; - guint16 master_port; -}; - -typedef double (*statfile_normalize_func)(struct config_file *cfg, long double score, void *params); - -/** - * Statfile config definition - */ -struct statfile { - gchar *symbol; /**< symbol of statfile */ - gchar *path; /**< filesystem pattern (with %r or %f) */ - gchar *label; /**< label of this statfile */ - gsize size; /**< size of statfile */ - GList *sections; /**< list of sections in statfile */ - struct statfile_autolearn_params *autolearn; /**< autolearn params */ - struct statfile_binlog_params *binlog; /**< binlog params */ - statfile_normalize_func normalizer; /**< function that is used as normaliser */ - void *normalizer_data; /**< normalizer function params */ - gchar *normalizer_str; /**< source string (for dump) */ - ucl_object_t *opts; /**< other options */ - gboolean is_spam; /**< spam flag */ -}; - -/** - * Classifier config definition - */ -struct classifier_config { - GList *statfiles; /**< statfiles list */ - GHashTable *labels; /**< statfiles with labels */ - gchar *metric; /**< metric of this classifier */ - struct classifier *classifier; /**< classifier interface */ - struct tokenizer *tokenizer; /**< tokenizer used for classifier */ - GHashTable *opts; /**< other options */ - GList *pre_callbacks; /**< list of callbacks that are called before classification */ - GList *post_callbacks; /**< list of callbacks that are called after classification */ -}; - -struct rspamd_worker_bind_conf { - gchar *bind_host; - guint16 bind_port; - gint ai; - gboolean is_systemd; - struct rspamd_worker_bind_conf *next; -}; - -struct rspamd_worker_param_parser { - rspamd_rcl_handler_t handler; /**< handler function */ - struct rspamd_rcl_struct_parser parser; /**< parser attributes */ - const gchar *name; /**< parameter's name */ - UT_hash_handle hh; /**< hash by name */ -}; - -struct rspamd_worker_cfg_parser { - struct rspamd_worker_param_parser *parsers; /**< parsers hash */ - gint type; /**< workers quark */ - gboolean (*def_obj_parser)(const ucl_object_t *obj, gpointer ud); /**< default object parser */ - gpointer def_ud; - UT_hash_handle hh; /**< hash by type */ -}; - -/** - * Config params for rspamd worker - */ -struct worker_conf { - worker_t *worker; /**< pointer to worker type */ - GQuark type; /**< type of worker */ - struct rspamd_worker_bind_conf *bind_conf; /**< bind configuration */ - guint16 count; /**< number of workers */ - GList *listen_socks; /**< listening sockets desctiptors */ - guint32 rlimit_nofile; /**< max files limit */ - guint32 rlimit_maxcore; /**< maximum core file size */ - GHashTable *params; /**< params for worker */ - GQueue *active_workers; /**< linked list of spawned workers */ - gboolean has_socket; /**< whether we should make listening socket in main process */ - gpointer *ctx; /**< worker's context */ - ucl_object_t *options; /**< other worker's options */ -}; - -/** - * Structure that stores all config data - */ -struct config_file { - gchar *rspamd_user; /**< user to run as */ - gchar *rspamd_group; /**< group to run as */ - rspamd_mempool_t *cfg_pool; /**< memory pool for config */ - gchar *cfg_name; /**< name of config file */ - gchar *pid_file; /**< name of pid file */ - gchar *temp_dir; /**< dir for temp files */ -#ifdef WITH_GPERF_TOOLS - gchar *profile_path; -#endif - - gboolean no_fork; /**< if 1 do not call daemon() */ - gboolean config_test; /**< if TRUE do only config file test */ - gboolean raw_mode; /**< work in raw mode instead of utf one */ - gboolean one_shot_mode; /**< rules add only one symbol */ - gboolean check_text_attachements; /**< check text attachements as text */ - gboolean convert_config; /**< convert config to XML format */ - gboolean strict_protocol_headers; /**< strictly check protocol headers */ - - gsize max_diff; /**< maximum diff size for text parts */ - - enum rspamd_log_type log_type; /**< log type */ - gint log_facility; /**< log facility in case of syslog */ - gint log_level; /**< log level trigger */ - gchar *log_file; /**< path to logfile in case of file logging */ - gboolean log_buffered; /**< whether logging is buffered */ - guint32 log_buf_size; /**< length of log buffer */ - gchar *debug_ip_map; /**< turn on debugging for specified ip addresses */ - gboolean log_urls; /**< whether we should log URLs */ - GList *debug_symbols; /**< symbols to debug */ - gboolean log_color; /**< output colors for console output */ - gboolean log_extended; /**< log extended information */ - - guint32 statfile_sync_interval; /**< synchronization interval */ - guint32 statfile_sync_timeout; /**< synchronization timeout */ - gboolean mlock_statfile_pool; /**< use mlock (2) for locking statfiles */ - - struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS]; /**< memcached servers */ - gsize memcached_servers_num; /**< number of memcached servers */ - memc_proto_t memcached_protocol; /**< memcached protocol */ - guint memcached_error_time; /**< memcached error time (see upstream documentation) */ - guint memcached_dead_time; /**< memcached dead time */ - guint memcached_maxerrors; /**< maximum number of errors */ - guint memcached_connect_timeout; /**< connection timeout */ - - gboolean delivery_enable; /**< is delivery agent is enabled */ - gchar *deliver_host; /**< host for mail deliviring */ - struct in_addr deliver_addr; /**< its address */ - guint16 deliver_port; /**< port for deliviring */ - guint16 deliver_family; /**< socket family for delivirnig */ - gchar *deliver_agent_path; /**< deliver to pipe instead of socket */ - gboolean deliver_lmtp; /**< use LMTP instead of SMTP */ - - GList *script_modules; /**< linked list of script modules to load */ - - GList *filters; /**< linked list of all filters */ - GList *workers; /**< linked list of all workers params */ - struct rspamd_worker_cfg_parser *wrk_parsers; /**< hash for worker config parsers, indexed by worker quarks */ - gchar *filters_str; /**< string of filters */ - ucl_object_t *rcl_obj; /**< rcl object */ - GHashTable* metrics; /**< hash of metrics indexed by metric name */ - GList* symbols_groups; /**< groups of symbols */ - GList* metrics_list; /**< linked list of metrics */ - GHashTable* metrics_symbols; /**< hash table of metrics indexed by symbol */ - GHashTable* c_modules; /**< hash of c modules indexed by module name */ - GHashTable* composite_symbols; /**< hash of composite symbols indexed by its name */ - GList *classifiers; /**< list of all classifiers defined */ - GList *statfiles; /**< list of all statfiles in config file order */ - GHashTable *classifiers_symbols; /**< hashtable indexed by symbol name of classifiers */ - GHashTable* cfg_params; /**< all cfg params indexed by its name in this structure */ - GList *pre_filters; /**< list of pre-processing lua filters */ - GList *post_filters; /**< list of post-processing lua filters */ - gchar *dynamic_conf; /**< path to dynamic configuration */ - GList *current_dynamic_conf; /**< currently loaded dynamic configuration */ - GHashTable* domain_settings; /**< settings per-domains */ - GHashTable* user_settings; /**< settings per-user */ - gchar* domain_settings_str; /**< string representation of settings */ - gchar* user_settings_str; - gint clock_res; /**< resolution of clock used */ - - GList *maps; /**< maps active */ - rspamd_mempool_t *map_pool; /**< static maps pool */ - gdouble map_timeout; /**< maps watch timeout */ - - struct symbols_cache *cache; /**< symbols cache object */ - gchar *cache_filename; /**< filename of cache file */ - struct metric *default_metric; /**< default metric */ - - gchar* checksum; /**< real checksum of config file */ - gchar* dump_checksum; /**< dump checksum of config file */ - gpointer lua_state; /**< pointer to lua state */ - - gchar* rrd_file; /**< rrd file to store statistics */ - - gchar* history_file; /**< file to save rolling history */ - - gdouble dns_timeout; /**< timeout in milliseconds for waiting for dns reply */ - guint32 dns_retransmits; /**< maximum retransmits count */ - guint32 dns_throttling_errors; /**< maximum errors for starting resolver throttling */ - guint32 dns_throttling_time; /**< time in seconds for DNS throttling */ - guint32 dns_io_per_server; /**< number of sockets per DNS server */ - GList *nameservers; /**< list of nameservers or NULL to parse resolv.conf */ -}; - - -/** - * Parse host[:port[:priority]] line - * @param ina host address - * @param port port - * @param priority priority - * @return TRUE if string was parsed - */ -gboolean parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority); - -/** - * Parse host:port line - * @param ina host address - * @param port port - * @return TRUE if string was parsed - */ -gboolean parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port); - -/** - * Parse host:priority line - * @param ina host address - * @param priority priority - * @return TRUE if string was parsed - */ -gboolean parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority); - -/** - * Parse bind credits - * @param cf config file to use - * @param str line that presents bind line - * @param type type of credits - * @return 1 if line was successfully parsed and 0 in case of error - */ -gboolean parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str); - -/** - * Init default values - * @param cfg config file - */ -void init_defaults (struct config_file *cfg); - -/** - * Free memory used by config structure - * @param cfg config file - */ -void free_config (struct config_file *cfg); - -/** - * Gets module option with specified name - * @param cfg config file - * @param module_name name of module - * @param opt_name name of option to get - * @return module value or NULL if option does not defined - */ -const ucl_object_t* get_module_opt (struct config_file *cfg, const gchar *module_name, - const gchar *opt_name); - -/** - * Parse limit - * @param limit string representation of limit (eg. 1M) - * @return numeric value of limit - */ -guint64 parse_limit (const gchar *limit, guint len); - -/** - * Parse flag - * @param str string representation of flag (eg. 'on') - * @return numeric value of flag (0 or 1) - */ -gchar parse_flag (const gchar *str); - -/** - * Do post load actions for config - * @param cfg config file - */ -void post_load_config (struct config_file *cfg); - -/** - * Calculate checksum for config file - * @param cfg config file - */ -gboolean get_config_checksum (struct config_file *cfg); - - -/** - * Replace all \" with a single " in given string - * @param line input string - */ -void unescape_quotes (gchar *line); - -/* - * Convert comma separated string to a list of strings - */ -GList* parse_comma_list (rspamd_mempool_t *pool, const gchar *line); - -/* - * Return a new classifier_config structure, setting default and non-conflicting attributes - */ -struct classifier_config* check_classifier_conf (struct config_file *cfg, struct classifier_config *c); -/* - * Return a new worker_conf structure, setting default and non-conflicting attributes - */ -struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c); -/* - * Return a new metric structure, setting default and non-conflicting attributes - */ -struct metric* check_metric_conf (struct config_file *cfg, struct metric *c); -/* - * Return a new statfile structure, setting default and non-conflicting attributes - */ -struct statfile* check_statfile_conf (struct config_file *cfg, struct statfile *c); - -/* - * Read XML configuration file - */ -gboolean read_rspamd_config (struct config_file *cfg, - const gchar *filename, const gchar *convert_to, - rspamd_rcl_section_fin_t logger_fin, gpointer logger_ud); - -/* - * Register symbols of classifiers inside metrics - */ -void insert_classifier_symbols (struct config_file *cfg); - -/* - * Check statfiles inside a classifier - */ -gboolean check_classifier_statfiles (struct classifier_config *cf); - -/* - * Find classifier config by name - */ -struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name); - -/* - * Parse input `ip_list` to radix tree `tree`. Now supports only IPv4 addresses. - */ -gboolean rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree); - -#endif /* ifdef CFG_FILE_H */ -/* - * vi:ts=4 - */ diff --git a/src/cfg_rcl.c b/src/cfg_rcl.c deleted file mode 100644 index 37b554dec..000000000 --- a/src/cfg_rcl.c +++ /dev/null @@ -1,1471 +0,0 @@ -/* Copyright (c) 2013, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "cfg_rcl.h" -#include "main.h" -#include "settings.h" -#include "cfg_file.h" -#include "lua/lua_common.h" -#include "expressions.h" -#include "classifiers/classifiers.h" -#include "tokenizers/tokenizers.h" - -/* - * Common section handlers - */ -static gboolean -rspamd_rcl_logging_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const ucl_object_t *val; - const gchar *facility, *log_type, *log_level; - - val = ucl_object_find_key (obj, "type"); - if (val != NULL && ucl_object_tostring_safe (val, &log_type)) { - if (g_ascii_strcasecmp (log_type, "file") == 0) { - /* Need to get filename */ - val = ucl_object_find_key (obj, "filename"); - if (val == NULL || val->type != UCL_STRING) { - g_set_error (err, CFG_RCL_ERROR, ENOENT, "filename attribute must be specified for file logging type"); - return FALSE; - } - cfg->log_type = RSPAMD_LOG_FILE; - cfg->log_file = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (val)); - } - else if (g_ascii_strcasecmp (log_type, "syslog") == 0) { - /* Need to get facility */ - cfg->log_facility = LOG_DAEMON; - cfg->log_type = RSPAMD_LOG_SYSLOG; - val = ucl_object_find_key (obj, "facility"); - if (val != NULL && ucl_object_tostring_safe (val, &facility)) { - if (g_ascii_strcasecmp (facility, "LOG_AUTH") == 0 || - g_ascii_strcasecmp (facility, "auth") == 0 ) { - cfg->log_facility = LOG_AUTH; - } - else if (g_ascii_strcasecmp (facility, "LOG_CRON") == 0 || - g_ascii_strcasecmp (facility, "cron") == 0 ) { - cfg->log_facility = LOG_CRON; - } - else if (g_ascii_strcasecmp (facility, "LOG_DAEMON") == 0 || - g_ascii_strcasecmp (facility, "daemon") == 0 ) { - cfg->log_facility = LOG_DAEMON; - } - else if (g_ascii_strcasecmp (facility, "LOG_MAIL") == 0 || - g_ascii_strcasecmp (facility, "mail") == 0) { - cfg->log_facility = LOG_MAIL; - } - else if (g_ascii_strcasecmp (facility, "LOG_USER") == 0 || - g_ascii_strcasecmp (facility, "user") == 0 ) { - cfg->log_facility = LOG_USER; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL0") == 0 || - g_ascii_strcasecmp (facility, "local0") == 0) { - cfg->log_facility = LOG_LOCAL0; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL1") == 0 || - g_ascii_strcasecmp (facility, "local1") == 0) { - cfg->log_facility = LOG_LOCAL1; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL2") == 0 || - g_ascii_strcasecmp (facility, "local2") == 0) { - cfg->log_facility = LOG_LOCAL2; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL3") == 0 || - g_ascii_strcasecmp (facility, "local3") == 0) { - cfg->log_facility = LOG_LOCAL3; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL4") == 0 || - g_ascii_strcasecmp (facility, "local4") == 0) { - cfg->log_facility = LOG_LOCAL4; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL5") == 0 || - g_ascii_strcasecmp (facility, "local5") == 0) { - cfg->log_facility = LOG_LOCAL5; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL6") == 0 || - g_ascii_strcasecmp (facility, "local6") == 0) { - cfg->log_facility = LOG_LOCAL6; - } - else if (g_ascii_strcasecmp (facility, "LOG_LOCAL7") == 0 || - g_ascii_strcasecmp (facility, "local7") == 0) { - cfg->log_facility = LOG_LOCAL7; - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log facility: %s", facility); - return FALSE; - } - } - } - else if (g_ascii_strcasecmp (log_type, "stderr") == 0 || g_ascii_strcasecmp (log_type, "console") == 0) { - cfg->log_type = RSPAMD_LOG_CONSOLE; - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log type: %s", log_type); - return FALSE; - } - } - else { - /* No type specified */ - msg_warn ("logging type is not specified correctly, log output to the console"); - } - - /* Handle log level */ - val = ucl_object_find_key (obj, "level"); - if (val != NULL && ucl_object_tostring_safe (val, &log_level)) { - if (g_ascii_strcasecmp (log_level, "error") == 0) { - cfg->log_level = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL; - } - else if (g_ascii_strcasecmp (log_level, "warning") == 0) { - cfg->log_level = G_LOG_LEVEL_WARNING; - } - else if (g_ascii_strcasecmp (log_level, "info") == 0) { - cfg->log_level = G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE; - } - else if (g_ascii_strcasecmp (log_level, "debug") == 0) { - cfg->log_level = G_LOG_LEVEL_DEBUG; - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log level: %s", log_level); - return FALSE; - } - } - - return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); -} - -static gboolean -rspamd_rcl_options_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const ucl_object_t *val; - const gchar *user_settings, *domain_settings; - - /* Handle user and domain settings */ - val = ucl_object_find_key (obj, "user_settings"); - if (val != NULL && ucl_object_tostring_safe (val, &user_settings)) { - if (!read_settings (user_settings, "Users' settings", cfg, cfg->user_settings)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", user_settings); - return FALSE; - } - cfg->user_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, user_settings); - } - - val = ucl_object_find_key (obj, "domain_settings"); - if (val != NULL && ucl_object_tostring_safe (val, &domain_settings)) { - if (!read_settings (domain_settings, "Domains settings", cfg, cfg->domain_settings)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", domain_settings); - return FALSE; - } - cfg->domain_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, domain_settings); - } - - return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); -} - -static gint -rspamd_symbols_group_find_func (gconstpointer a, gconstpointer b) -{ - const struct symbols_group *gr = a; - const gchar *uv = b; - - return g_ascii_strcasecmp (gr->name, uv); -} - -/** - * Insert a symbol to the metric - * @param cfg - * @param metric - * @param obj symbol rcl object (either float value or an object) - * @param err - * @return - */ -static gboolean -rspamd_rcl_insert_symbol (struct config_file *cfg, struct metric *metric, - const ucl_object_t *obj, gboolean is_legacy, GError **err) -{ - const gchar *group = "ungrouped", *description = NULL, *sym_name; - gdouble symbol_score, *score_ptr; - const ucl_object_t *val; - struct symbols_group *sym_group; - struct symbol_def *sym_def; - GList *metric_list, *group_list; - - /* - * We allow two type of definitions: - * symbol = weight - * or - * symbol { - * weight = ...; - * description = ...; - * group = ...; - * } - */ - if (is_legacy) { - val = ucl_object_find_key (obj, "name"); - if (val == NULL) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbol name is missing"); - return FALSE; - } - sym_name = ucl_object_tostring (val); - } - else { - sym_name = ucl_object_key (obj); - } - if (ucl_object_todouble_safe (obj, &symbol_score)) { - description = NULL; - } - else if (obj->type == UCL_OBJECT) { - val = ucl_object_find_key (obj, "weight"); - if (val == NULL || !ucl_object_todouble_safe (val, &symbol_score)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol score: %s", sym_name); - return FALSE; - } - val = ucl_object_find_key (obj, "description"); - if (val != NULL) { - description = ucl_object_tostring (val); - } - val = ucl_object_find_key (obj, "group"); - if (val != NULL) { - ucl_object_tostring_safe (val, &group); - } - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol type: %s", sym_name); - return FALSE; - } - - sym_def = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbol_def)); - score_ptr = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (gdouble)); - - *score_ptr = symbol_score; - sym_def->weight_ptr = score_ptr; - sym_def->name = rspamd_mempool_strdup (cfg->cfg_pool, sym_name); - sym_def->description = (gchar *)description; - - g_hash_table_insert (metric->symbols, sym_def->name, score_ptr); - - if ((metric_list = g_hash_table_lookup (cfg->metrics_symbols, sym_def->name)) == NULL) { - metric_list = g_list_prepend (NULL, metric); - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, metric_list); - g_hash_table_insert (cfg->metrics_symbols, sym_def->name, metric_list); - } - else { - /* Slow but keep start element of list in safe */ - if (!g_list_find (metric_list, metric)) { - metric_list = g_list_append (metric_list, metric); - } - } - - /* Search for symbol group */ - group_list = g_list_find_custom (cfg->symbols_groups, group, rspamd_symbols_group_find_func); - if (group_list == NULL) { - /* Create new group */ - sym_group = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbols_group)); - sym_group->name = rspamd_mempool_strdup (cfg->cfg_pool, group); - sym_group->symbols = NULL; - cfg->symbols_groups = g_list_prepend (cfg->symbols_groups, sym_group); - } - else { - sym_group = group_list->data; - } - /* Insert symbol */ - sym_group->symbols = g_list_prepend (sym_group->symbols, sym_def); - - return TRUE; -} - -static gboolean -rspamd_rcl_metric_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const ucl_object_t *val, *cur; - const gchar *metric_name, *subject_name, *semicolon, *act_str; - struct metric *metric; - struct metric_action *action; - gdouble action_score, grow_factor; - gint action_value; - gboolean new = TRUE, have_actions = FALSE; - ucl_object_iter_t it = NULL; - - val = ucl_object_find_key (obj, "name"); - if (val == NULL || !ucl_object_tostring_safe (val, &metric_name)) { - metric_name = DEFAULT_METRIC; - } - - metric = g_hash_table_lookup (cfg->metrics, metric_name); - if (metric == NULL) { - metric = check_metric_conf (cfg, metric); - metric->name = metric_name; - } - else { - new = FALSE; - } - - /* Handle actions */ - val = ucl_object_find_key (obj, "actions"); - if (val != NULL) { - if (val->type != UCL_OBJECT) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "actions must be an object"); - return FALSE; - } - while ((cur = ucl_iterate_object (val, &it, true)) != NULL) { - if (!check_action_str (ucl_object_key (cur), &action_value) || - !ucl_object_todouble_safe (cur, &action_score)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid action definition: %s", ucl_object_key (cur)); - return FALSE; - } - action = &metric->actions[action_value]; - action->action = action_value; - action->score = action_score; - } - } - else if (new) { - /* Switch to legacy mode */ - val = ucl_object_find_key (obj, "required_score"); - if (val != NULL && ucl_object_todouble_safe (val, &action_score)) { - action = &metric->actions[METRIC_ACTION_REJECT]; - action->action = METRIC_ACTION_REJECT; - action->score = action_score; - have_actions = TRUE; - } - val = ucl_object_find_key (obj, "action"); - LL_FOREACH (val, cur) { - if (cur->type == UCL_STRING) { - act_str = ucl_object_tostring (cur); - semicolon = strchr (act_str, ':'); - if (semicolon != NULL) { - if (check_action_str (act_str, &action_value)) { - action_score = strtod (semicolon + 1, NULL); - action = &metric->actions[action_value]; - action->action = action_value; - action->score = action_score; - have_actions = TRUE; - } - } - } - } - if (new && !have_actions) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no actions", metric_name); - return FALSE; - } - } - - /* Handle symbols */ - val = ucl_object_find_key (obj, "symbols"); - if (val != NULL) { - if (val->type == UCL_ARRAY) { - val = val->value.ov; - } - if (val->type != UCL_OBJECT) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object"); - return FALSE; - } - it = NULL; - while ((cur = ucl_iterate_object (val, &it, true)) != NULL) { - if (!rspamd_rcl_insert_symbol (cfg, metric, cur, FALSE, err)) { - return FALSE; - } - } - } - else { - /* Legacy variant */ - val = ucl_object_find_key (obj, "symbol"); - if (val != NULL) { - if (val->type == UCL_ARRAY) { - val = val->value.ov; - } - if (val->type != UCL_OBJECT) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object"); - return FALSE; - } - LL_FOREACH (val, cur) { - if (!rspamd_rcl_insert_symbol (cfg, metric, cur, TRUE, err)) { - return FALSE; - } - } - } - else if (new) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no symbols", metric_name); - return FALSE; - } - } - - val = ucl_object_find_key (obj, "grow_factor"); - if (val && ucl_object_todouble_safe (val, &grow_factor)) { - metric->grow_factor = grow_factor; - } - - val = ucl_object_find_key (obj, "subject"); - if (val && ucl_object_tostring_safe (val, &subject_name)) { - metric->subject = (gchar *)subject_name; - } - - /* Insert the resulting metric */ - if (new) { - g_hash_table_insert (cfg->metrics, (void *)metric->name, metric); - cfg->metrics_list = g_list_prepend (cfg->metrics_list, metric); - } - - return TRUE; -} - -static gboolean -rspamd_rcl_worker_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const ucl_object_t *val, *cur; - ucl_object_iter_t it = NULL; - const gchar *worker_type, *worker_bind; - GQuark qtype; - struct worker_conf *wrk; - struct rspamd_worker_cfg_parser *wparser; - struct rspamd_worker_param_parser *whandler; - - val = ucl_object_find_key (obj, "type"); - if (val != NULL && ucl_object_tostring_safe (val, &worker_type)) { - qtype = g_quark_try_string (worker_type); - if (qtype != 0) { - wrk = check_worker_conf (cfg, NULL); - wrk->worker = get_worker_by_type (qtype); - if (wrk->worker == NULL) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type); - return FALSE; - } - wrk->type = qtype; - if (wrk->worker->worker_init_func) { - wrk->ctx = wrk->worker->worker_init_func (cfg); - } - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type); - return FALSE; - } - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "undefined worker type"); - return FALSE; - } - - val = ucl_object_find_key (obj, "bind_socket"); - if (val != NULL) { - if (val->type == UCL_ARRAY) { - val = val->value.ov; - } - LL_FOREACH (val, cur) { - if (!ucl_object_tostring_safe (cur, &worker_bind)) { - continue; - } - if (!parse_bind_line (cfg, wrk, worker_bind)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse bind line: %s", worker_bind); - return FALSE; - } - } - } - - wrk->options = (ucl_object_t *)obj; - - if (!rspamd_rcl_section_parse_defaults (section, cfg, obj, wrk, err)) { - return FALSE; - } - - /* Parse other attributes */ - HASH_FIND_INT (cfg->wrk_parsers, (gint *)&qtype, wparser); - if (wparser != NULL && obj->type == UCL_OBJECT) { - while ((cur = ucl_iterate_object (obj, &it, true)) != NULL) { - HASH_FIND_STR (wparser->parsers, ucl_object_key (cur), whandler); - if (whandler != NULL) { - if (!whandler->handler (cfg, cur, &whandler->parser, section, err)) { - return FALSE; - } - } - } - if (wparser->def_obj_parser != NULL) { - if (! wparser->def_obj_parser (obj, wparser->def_ud)) { - return FALSE; - } - } - } - - cfg->workers = g_list_prepend (cfg->workers, wrk); - - return TRUE; -} - -static void -rspamd_rcl_set_lua_globals (struct config_file *cfg, lua_State *L) -{ - struct config_file **pcfg; - - /* First check for global variable 'config' */ - lua_getglobal (L, "config"); - if (lua_isnil (L, -1)) { - /* Assign global table to set up attributes */ - lua_newtable (L); - lua_setglobal (L, "config"); - } - - lua_getglobal (L, "metrics"); - if (lua_isnil (L, -1)) { - lua_newtable (L); - lua_setglobal (L, "metrics"); - } - - lua_getglobal (L, "composites"); - if (lua_isnil (L, -1)) { - lua_newtable (L); - lua_setglobal (L, "composites"); - } - - lua_getglobal (L, "classifiers"); - if (lua_isnil (L, -1)) { - lua_newtable (L); - lua_setglobal (L, "classifiers"); - } - - pcfg = lua_newuserdata (L, sizeof (struct config_file *)); - lua_setclass (L, "rspamd{config}", -1); - *pcfg = cfg; - lua_setglobal (L, "rspamd_config"); - - /* Clear stack from globals */ - lua_pop (L, 4); -} - -static gboolean -rspamd_rcl_lua_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const gchar *lua_src = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (obj)); - gchar *cur_dir, *lua_dir, *lua_file, *tmp1, *tmp2; - lua_State *L = cfg->lua_state; - - tmp1 = g_strdup (lua_src); - tmp2 = g_strdup (lua_src); - lua_dir = dirname (tmp1); - lua_file = basename (tmp2); - if (lua_dir && lua_file) { - cur_dir = g_malloc (PATH_MAX); - if (getcwd (cur_dir, PATH_MAX) != NULL && chdir (lua_dir) != -1) { - /* Load file */ - if (luaL_loadfile (L, lua_file) != 0) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot load lua file %s: %s", - lua_src, lua_tostring (L, -1)); - if (chdir (cur_dir) == -1) { - msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; - } - g_free (cur_dir); - g_free (tmp1); - g_free (tmp2); - return FALSE; - } - rspamd_rcl_set_lua_globals (cfg, L); - /* Now do it */ - if (lua_pcall (L, 0, LUA_MULTRET, 0) != 0) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot init lua file %s: %s", - lua_src, lua_tostring (L, -1)); - if (chdir (cur_dir) == -1) { - msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; - } - g_free (cur_dir); - g_free (tmp1); - g_free (tmp2); - return FALSE; - } - } - else { - g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot chdir to %s: %s", - lua_src, strerror (errno)); - if (chdir (cur_dir) == -1) { - msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; - } - g_free (cur_dir); - g_free (tmp1); - g_free (tmp2); - return FALSE; - - } - if (chdir (cur_dir) == -1) { - msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; - } - g_free (cur_dir); - g_free (tmp1); - g_free (tmp2); - } - else { - g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot find to %s: %s", - lua_src, strerror (errno)); - return FALSE; - } - - return TRUE; -} - -static gboolean -rspamd_rcl_add_module_path (struct config_file *cfg, const gchar *path, GError **err) -{ - struct stat st; - struct script_module *cur_mod; - glob_t globbuf; - gchar *pattern; - size_t len; - guint i; - - if (stat (path, &st) == -1) { - g_set_error (err, CFG_RCL_ERROR, errno, "cannot stat path %s, %s", path, strerror (errno)); - return FALSE; - } - - /* Handle directory */ - if (S_ISDIR (st.st_mode)) { - globbuf.gl_offs = 0; - len = strlen (path) + sizeof ("*.lua"); - pattern = g_malloc (len); - snprintf (pattern, len, "%s%s", path, "*.lua"); - - if (glob (pattern, GLOB_DOOFFS, NULL, &globbuf) == 0) { - for (i = 0; i < globbuf.gl_pathc; i ++) { - cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module)); - cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, globbuf.gl_pathv[i]); - cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod); - } - globfree (&globbuf); - g_free (pattern); - } - else { - g_set_error (err, CFG_RCL_ERROR, errno, "glob failed for %s, %s", pattern, strerror (errno)); - g_free (pattern); - return FALSE; - } - } - else { - /* Handle single file */ - cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module)); - cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, path); - cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod); - } - - return TRUE; -} - -static gboolean -rspamd_rcl_modules_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const ucl_object_t *val, *cur; - const gchar *data; - - if (obj->type == UCL_OBJECT) { - val = ucl_object_find_key (obj, "path"); - - LL_FOREACH (val, cur) { - if (ucl_object_tostring_safe (cur, &data)) { - if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) { - return FALSE; - } - } - } - } - else if (ucl_object_tostring_safe (obj, &data)) { - if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) { - return FALSE; - } - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "module parameter has wrong type (must be an object or a string)"); - return FALSE; - } - - return TRUE; -} - -static gboolean -rspamd_rcl_statfile_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - struct classifier_config *ccf = ud; - const ucl_object_t *val; - struct statfile *st; - const gchar *data; - gdouble binlog_rotate; - GList *labels; - - st = check_statfile_conf (cfg, NULL); - - val = ucl_object_find_key (obj, "binlog"); - if (val != NULL && ucl_object_tostring_safe (val, &data)) { - if (st->binlog == NULL) { - st->binlog = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_binlog_params)); - } - if (g_ascii_strcasecmp (data, "master") == 0) { - st->binlog->affinity = AFFINITY_MASTER; - } - else if (g_ascii_strcasecmp (data, "slave") == 0) { - st->binlog->affinity = AFFINITY_SLAVE; - } - else { - st->binlog->affinity = AFFINITY_NONE; - } - /* Parse remaining binlog attributes */ - val = ucl_object_find_key (obj, "binlog_rotate"); - if (val != NULL && ucl_object_todouble_safe (val, &binlog_rotate)) { - st->binlog->rotate_time = binlog_rotate; - } - val = ucl_object_find_key (obj, "binlog_master"); - if (val != NULL && ucl_object_tostring_safe (val, &data)) { - if (!parse_host_port (cfg->cfg_pool, data, &st->binlog->master_addr, &st->binlog->master_port)) { - msg_err ("cannot parse master address: %s", data); - return FALSE; - } - } - } - - - if (rspamd_rcl_section_parse_defaults (section, cfg, obj, st, err)) { - ccf->statfiles = g_list_prepend (ccf->statfiles, st); - if (st->label != NULL) { - labels = g_hash_table_lookup (ccf->labels, st->label); - if (labels != NULL) { - labels = g_list_append (labels, st); - } - else { - g_hash_table_insert (ccf->labels, st->label, g_list_prepend (NULL, st)); - } - } - if (st->symbol != NULL) { - g_hash_table_insert (cfg->classifiers_symbols, st->symbol, st); - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a symbol defined"); - return FALSE; - } - - if (st->path == NULL) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a path defined"); - return FALSE; - } - - st->opts = (ucl_object_t *)obj; - - val = ucl_object_find_key (obj, "spam"); - if (val == NULL) { - msg_info ("statfile %s has no explicit 'spam' setting, trying to guess by symbol", st->symbol); - if (rspamd_strncasestr (st->symbol, "spam", strlen (st->symbol)) != NULL) { - st->is_spam = TRUE; - } - else if (rspamd_strncasestr (st->symbol, "ham", strlen (st->symbol)) != NULL) { - st->is_spam = FALSE; - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot guess spam setting from %s", st->symbol); - return FALSE; - } - msg_info ("guessed that statfile with symbol %s is %s", st->symbol, st->is_spam ? - "spam" : "ham"); - } - return TRUE; - } - - return FALSE; -} - -static gboolean -rspamd_rcl_classifier_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const ucl_object_t *val, *cur; - ucl_object_iter_t it = NULL; - const gchar *key, *type; - struct classifier_config *ccf, *found = NULL; - gboolean res = TRUE; - struct rspamd_rcl_section *stat_section; - GList *cur_cl; - - val = ucl_object_find_key (obj, "type"); - if (val == NULL || !ucl_object_tostring_safe (val, &type)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "classifier should have type defined"); - return FALSE; - } - - cur_cl = cfg->classifiers; - while (cur_cl != NULL) { - ccf = cur_cl->data; - if (g_ascii_strcasecmp (ccf->classifier->name, type) == 0) { - found = ccf; - break; - } - cur_cl = g_list_next (cur_cl); - } - - if (found == NULL) { - ccf = check_classifier_conf (cfg, NULL); - ccf->classifier = get_classifier (type); - } - else { - ccf = found; - } - - HASH_FIND_STR (section->subsections, "statfile", stat_section); - - while ((val = ucl_iterate_object (obj, &it, true)) != NULL && res) { - key = ucl_object_key (val); - if (key != NULL) { - if (g_ascii_strcasecmp (key, "statfile") == 0) { - LL_FOREACH (val, cur) { - res = rspamd_rcl_statfile_handler (cfg, cur, ccf, stat_section, err); - if (!res) { - return FALSE; - } - } - } - else if (g_ascii_strcasecmp (key, "type") == 0 && val->type == UCL_STRING) { - continue; - } - else if (g_ascii_strcasecmp (key, "tokenizer") == 0 && val->type == UCL_STRING) { - ccf->tokenizer = get_tokenizer (ucl_object_tostring (val)); - } - else { - /* Just insert a value of option to the hash */ - g_hash_table_insert (ccf->opts, (gpointer)key, (gpointer)ucl_object_tostring_forced (val)); - } - } - } - - if (found == NULL) { - cfg->classifiers = g_list_prepend (cfg->classifiers, ccf); - } - - - return res; -} - -static gboolean -rspamd_rcl_composite_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - const ucl_object_t *val; - struct expression *expr; - struct rspamd_composite *composite; - const gchar *composite_name, *composite_expression; - gboolean new = TRUE; - - val = ucl_object_find_key (obj, "name"); - if (val == NULL || !ucl_object_tostring_safe (val, &composite_name)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have a name defined"); - return FALSE; - } - - if (g_hash_table_lookup (cfg->composite_symbols, composite_name) != NULL) { - msg_warn ("composite %s is redefined", composite_name); - new = FALSE; - } - - val = ucl_object_find_key (obj, "expression"); - if (val == NULL || !ucl_object_tostring_safe (val, &composite_expression)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have an expression defined"); - return FALSE; - } - - if ((expr = parse_expression (cfg->cfg_pool, (gchar *)composite_expression)) == NULL) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse composite expression: %s", composite_expression); - return FALSE; - } - - composite = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct rspamd_composite)); - composite->expr = expr; - composite->id = g_hash_table_size (cfg->composite_symbols) + 1; - g_hash_table_insert (cfg->composite_symbols, (gpointer)composite_name, composite); - - if (new) { - register_virtual_symbol (&cfg->cache, composite_name, 1); - } - - return TRUE; -} - -/** - * Fake handler to parse default options only, uses struct cfg_file as pointer - * for default handlers - */ -static gboolean -rspamd_rcl_empty_handler (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); -} - -/** - * Add new section to the configuration - * @param top top section - * @param name the name of the section - * @param handler handler function for all attributes - * @param type type of object handled by a handler - * @param required whether at least one of these sections is required - * @param strict_type turn on strict check for types for this section - * @return newly created structure - */ -static inline struct rspamd_rcl_section* -rspamd_rcl_add_section (struct rspamd_rcl_section **top, - const gchar *name, rspamd_rcl_handler_t handler, - enum ucl_type type, gboolean required, gboolean strict_type) -{ - struct rspamd_rcl_section *new; - - new = g_slice_alloc0 (sizeof (struct rspamd_rcl_section)); - new->name = name; - new->handler = handler; - new->type = type; - new->strict_type = strict_type; - - HASH_ADD_KEYPTR (hh, *top, new->name, strlen (new->name), new); - return new; -} - -/** - * Add a default handler for a section - * @param section section pointer - * @param name name of param - * @param handler handler of param - * @param offset offset in a structure - * @param flags flags for the parser - * @return newly created structure - */ -static inline struct rspamd_rcl_default_handler_data * -rspamd_rcl_add_default_handler (struct rspamd_rcl_section *section, const gchar *name, - rspamd_rcl_handler_t handler, gsize offset, gint flags) -{ - struct rspamd_rcl_default_handler_data *new; - - new = g_slice_alloc0 (sizeof (struct rspamd_rcl_default_handler_data)); - new->key = name; - new->handler = handler; - new->pd.offset = offset; - new->pd.flags = flags; - - HASH_ADD_KEYPTR (hh, section->default_parser, new->key, strlen (new->key), new); - return new; -} - -struct rspamd_rcl_section* -rspamd_rcl_config_init (void) -{ - struct rspamd_rcl_section *new = NULL, *sub, *ssub; - - /* TODO: add all known rspamd sections here */ - /** - * Logging section - */ - sub = rspamd_rcl_add_section (&new, "logging", rspamd_rcl_logging_handler, UCL_OBJECT, - FALSE, TRUE); - /* Default handlers */ - rspamd_rcl_add_default_handler (sub, "log_buffer", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct config_file, log_buf_size), 0); - rspamd_rcl_add_default_handler (sub, "log_urls", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct config_file, log_urls), 0); - rspamd_rcl_add_default_handler (sub, "debug_ip", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, debug_ip_map), 0); - rspamd_rcl_add_default_handler (sub, "debug_symbols", rspamd_rcl_parse_struct_string_list, - G_STRUCT_OFFSET (struct config_file, debug_symbols), 0); - rspamd_rcl_add_default_handler (sub, "log_color", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct config_file, log_color), 0); - /** - * Options section - */ - sub = rspamd_rcl_add_section (&new, "options", rspamd_rcl_options_handler, UCL_OBJECT, - FALSE, TRUE); - rspamd_rcl_add_default_handler (sub, "cache_file", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, cache_filename), RSPAMD_CL_FLAG_STRING_PATH); - rspamd_rcl_add_default_handler (sub, "dns_nameserver", rspamd_rcl_parse_struct_string_list, - G_STRUCT_OFFSET (struct config_file, nameservers), 0); - rspamd_rcl_add_default_handler (sub, "dns_timeout", rspamd_rcl_parse_struct_time, - G_STRUCT_OFFSET (struct config_file, dns_timeout), RSPAMD_CL_FLAG_TIME_FLOAT); - rspamd_rcl_add_default_handler (sub, "dns_retransmits", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct config_file, dns_retransmits), RSPAMD_CL_FLAG_INT_32); - rspamd_rcl_add_default_handler (sub, "dns_sockets", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct config_file, dns_io_per_server), RSPAMD_CL_FLAG_INT_32); - rspamd_rcl_add_default_handler (sub, "raw_mode", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct config_file, raw_mode), 0); - rspamd_rcl_add_default_handler (sub, "one_shot", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct config_file, one_shot_mode), 0); - rspamd_rcl_add_default_handler (sub, "check_attachements", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct config_file, check_text_attachements), 0); - rspamd_rcl_add_default_handler (sub, "tempdir", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, temp_dir), RSPAMD_CL_FLAG_STRING_PATH); - rspamd_rcl_add_default_handler (sub, "pidfile", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, pid_file), RSPAMD_CL_FLAG_STRING_PATH); - rspamd_rcl_add_default_handler (sub, "filters", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, filters_str), 0); - rspamd_rcl_add_default_handler (sub, "sync_interval", rspamd_rcl_parse_struct_time, - G_STRUCT_OFFSET (struct config_file, statfile_sync_interval), RSPAMD_CL_FLAG_TIME_INTEGER); - rspamd_rcl_add_default_handler (sub, "sync_timeout", rspamd_rcl_parse_struct_time, - G_STRUCT_OFFSET (struct config_file, statfile_sync_timeout), RSPAMD_CL_FLAG_TIME_INTEGER); - rspamd_rcl_add_default_handler (sub, "max_diff", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct config_file, max_diff), RSPAMD_CL_FLAG_INT_SIZE); - rspamd_rcl_add_default_handler (sub, "map_watch_interval", rspamd_rcl_parse_struct_time, - G_STRUCT_OFFSET (struct config_file, map_timeout), RSPAMD_CL_FLAG_TIME_FLOAT); - rspamd_rcl_add_default_handler (sub, "dynamic_conf", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, dynamic_conf), 0); - rspamd_rcl_add_default_handler (sub, "rrd", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, rrd_file), RSPAMD_CL_FLAG_STRING_PATH); - rspamd_rcl_add_default_handler (sub, "history_file", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct config_file, history_file), RSPAMD_CL_FLAG_STRING_PATH); - rspamd_rcl_add_default_handler (sub, "use_mlock", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct config_file, mlock_statfile_pool), 0); - rspamd_rcl_add_default_handler (sub, "strict_protocol_headers", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct config_file, strict_protocol_headers), 0); - - /** - * Metric section - */ - sub = rspamd_rcl_add_section (&new, "metric", rspamd_rcl_metric_handler, UCL_OBJECT, - FALSE, TRUE); - - /** - * Worker section - */ - sub = rspamd_rcl_add_section (&new, "worker", rspamd_rcl_worker_handler, UCL_OBJECT, - FALSE, TRUE); - rspamd_rcl_add_default_handler (sub, "count", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct worker_conf, count), RSPAMD_CL_FLAG_INT_16); - rspamd_rcl_add_default_handler (sub, "max_files", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct worker_conf, rlimit_nofile), RSPAMD_CL_FLAG_INT_32); - rspamd_rcl_add_default_handler (sub, "max_core", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct worker_conf, rlimit_maxcore), RSPAMD_CL_FLAG_INT_32); - - /** - * Lua handler - */ - sub = rspamd_rcl_add_section (&new, "lua", rspamd_rcl_lua_handler, UCL_STRING, - FALSE, TRUE); - - /** - * Modules handler - */ - sub = rspamd_rcl_add_section (&new, "modules", rspamd_rcl_modules_handler, UCL_OBJECT, - FALSE, FALSE); - - /** - * Classifiers handler - */ - sub = rspamd_rcl_add_section (&new, "classifier", rspamd_rcl_classifier_handler, UCL_OBJECT, - FALSE, TRUE); - ssub = rspamd_rcl_add_section (&sub->subsections, "statfile", rspamd_rcl_statfile_handler, - UCL_OBJECT, TRUE, TRUE); - rspamd_rcl_add_default_handler (ssub, "symbol", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct statfile, symbol), 0); - rspamd_rcl_add_default_handler (ssub, "path", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct statfile, path), RSPAMD_CL_FLAG_STRING_PATH); - rspamd_rcl_add_default_handler (ssub, "label", rspamd_rcl_parse_struct_string, - G_STRUCT_OFFSET (struct statfile, label), 0); - rspamd_rcl_add_default_handler (ssub, "size", rspamd_rcl_parse_struct_integer, - G_STRUCT_OFFSET (struct statfile, size), RSPAMD_CL_FLAG_INT_SIZE); - rspamd_rcl_add_default_handler (ssub, "spam", rspamd_rcl_parse_struct_boolean, - G_STRUCT_OFFSET (struct statfile, is_spam), 0); - - /** - * Composites handler - */ - sub = rspamd_rcl_add_section (&new, "composite", rspamd_rcl_composite_handler, UCL_OBJECT, - FALSE, TRUE); - - return new; -} - -struct rspamd_rcl_section * -rspamd_rcl_config_get_section (struct rspamd_rcl_section *top, - const char *path) -{ - struct rspamd_rcl_section *cur, *found; - char **path_components; - gint ncomponents, i; - - - if (path == NULL) { - return top; - } - - path_components = g_strsplit_set (path, "/", -1); - ncomponents = g_strv_length (path_components); - - cur = top; - for (i = 0; i < ncomponents; i ++) { - if (cur == NULL) { - g_strfreev (path_components); - return NULL; - } - HASH_FIND_STR (cur, path_components[i], found); - if (found == NULL) { - g_strfreev (path_components); - return NULL; - } - cur = found; - } - - g_strfreev (path_components); - return found; -} - -gboolean -rspamd_read_rcl_config (struct rspamd_rcl_section *top, - struct config_file *cfg, const ucl_object_t *obj, GError **err) -{ - const ucl_object_t *found, *cur_obj; - struct rspamd_rcl_section *cur, *tmp; - - if (obj->type != UCL_OBJECT) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "top configuration must be an object"); - return FALSE; - } - - /* Iterate over known sections and ignore unknown ones */ - HASH_ITER (hh, top, cur, tmp) { - found = ucl_object_find_key (obj, cur->name); - if (found == NULL) { - if (cur->required) { - g_set_error (err, CFG_RCL_ERROR, ENOENT, "required section %s is missing", cur->name); - return FALSE; - } - } - else { - /* Check type */ - if (cur->strict_type) { - if (cur->type != found->type) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "object in section %s has invalid type", cur->name); - return FALSE; - } - } - LL_FOREACH (found, cur_obj) { - if (!cur->handler (cfg, cur_obj, NULL, cur, err)) { - return FALSE; - } - } - } - if (cur->fin) { - cur->fin (cfg, cur->fin_ud); - } - } - - cfg->rcl_obj = (ucl_object_t *)obj; - - return TRUE; -} - -gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section, - struct config_file *cfg, const ucl_object_t *obj, gpointer ptr, - GError **err) -{ - const ucl_object_t *found; - struct rspamd_rcl_default_handler_data *cur, *tmp; - - if (obj->type != UCL_OBJECT) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "default configuration must be an object"); - return FALSE; - } - - HASH_ITER (hh, section->default_parser, cur, tmp) { - found = ucl_object_find_key (obj, cur->key); - if (found != NULL) { - cur->pd.user_struct = ptr; - if (!cur->handler (cfg, found, &cur->pd, section, err)) { - return FALSE; - } - } - } - - return TRUE; -} - -gboolean -rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - struct rspamd_rcl_struct_parser *pd = ud; - gchar **target; - const gsize num_str_len = 32; - - target = (gchar **)(((gchar *)pd->user_struct) + pd->offset); - switch (obj->type) { - case UCL_STRING: - *target = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (obj)); - break; - case UCL_INT: - *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); - rspamd_snprintf (*target, num_str_len, "%L", obj->value.iv); - break; - case UCL_FLOAT: - *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); - rspamd_snprintf (*target, num_str_len, "%f", obj->value.dv); - break; - case UCL_BOOLEAN: - *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); - rspamd_snprintf (*target, num_str_len, "%b", (gboolean)obj->value.iv); - break; - default: - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert object or array to string"); - return FALSE; - } - - return TRUE; -} - -gboolean -rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - struct rspamd_rcl_struct_parser *pd = ud; - union { - gint *ip; - gint32 *i32p; - gint16 *i16p; - gint64 *i64p; - gsize *sp; - } target; - gint64 val; - - if (pd->flags == RSPAMD_CL_FLAG_INT_32) { - target.i32p = (gint32 *)(((gchar *)pd->user_struct) + pd->offset); - if (!ucl_object_toint_safe (obj, &val)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); - return FALSE; - } - *target.i32p = val; - } - else if (pd->flags == RSPAMD_CL_FLAG_INT_64) { - target.i64p = (gint64 *)(((gchar *)pd->user_struct) + pd->offset); - if (!ucl_object_toint_safe (obj, &val)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); - return FALSE; - } - *target.i64p = val; - } - else if (pd->flags == RSPAMD_CL_FLAG_INT_SIZE) { - target.sp = (gsize *)(((gchar *)pd->user_struct) + pd->offset); - if (!ucl_object_toint_safe (obj, &val)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); - return FALSE; - } - *target.sp = val; - } - else if (pd->flags == RSPAMD_CL_FLAG_INT_16) { - target.i16p = (gint16 *)(((gchar *)pd->user_struct) + pd->offset); - if (!ucl_object_toint_safe (obj, &val)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); - return FALSE; - } - *target.i16p = val; - } - else { - target.ip = (gint *)(((gchar *)pd->user_struct) + pd->offset); - if (!ucl_object_toint_safe (obj, &val)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); - return FALSE; - } - *target.ip = val; - } - - return TRUE; -} - -gboolean -rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - struct rspamd_rcl_struct_parser *pd = ud; - gdouble *target; - - target = (gdouble *)(((gchar *)pd->user_struct) + pd->offset); - - if (!ucl_object_todouble_safe (obj, target)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double"); - return FALSE; - } - - return TRUE; -} - -gboolean -rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - struct rspamd_rcl_struct_parser *pd = ud; - union { - gint *psec; - guint32 *pu32; - gdouble *pdv; - struct timeval *ptv; - struct timespec *pts; - } target; - gdouble val; - - if (!ucl_object_todouble_safe (obj, &val)) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double"); - return FALSE; - } - - if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMEVAL) { - target.ptv = (struct timeval *)(((gchar *)pd->user_struct) + pd->offset); - target.ptv->tv_sec = (glong)val; - target.ptv->tv_usec = (val - (glong)val) * 1000000; - } - else if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMESPEC) { - target.pts = (struct timespec *)(((gchar *)pd->user_struct) + pd->offset); - target.pts->tv_sec = (glong)val; - target.pts->tv_nsec = (val - (glong)val) * 1000000000000LL; - } - else if (pd->flags == RSPAMD_CL_FLAG_TIME_FLOAT) { - target.pdv = (double *)(((gchar *)pd->user_struct) + pd->offset); - *target.pdv = val; - } - else if (pd->flags == RSPAMD_CL_FLAG_TIME_INTEGER) { - target.psec = (gint *)(((gchar *)pd->user_struct) + pd->offset); - *target.psec = val * 1000; - } - else if (pd->flags == RSPAMD_CL_FLAG_TIME_UINT_32) { - target.pu32 = (guint32 *)(((gchar *)pd->user_struct) + pd->offset); - *target.pu32 = val * 1000; - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid flags to parse time value"); - return FALSE; - } - - return TRUE; -} - -gboolean -rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - struct rspamd_rcl_struct_parser *pd = ud; - GList **target; - gchar *val; - const ucl_object_t *cur; - const gsize num_str_len = 32; - ucl_object_iter_t iter = NULL; - - target = (GList **)(((gchar *)pd->user_struct) + pd->offset); - - if (obj->type != UCL_ARRAY) { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "an array of strings is expected"); - return FALSE; - } - - while ((cur = ucl_iterate_object (obj, &iter, true)) != NULL) { - switch (cur->type) { - case UCL_STRING: - val = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (cur)); - break; - case UCL_INT: - val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); - rspamd_snprintf (val, num_str_len, "%L", cur->value.iv); - break; - case UCL_FLOAT: - val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); - rspamd_snprintf (val, num_str_len, "%f", cur->value.dv); - break; - case UCL_BOOLEAN: - val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); - rspamd_snprintf (val, num_str_len, "%b", (gboolean)cur->value.iv); - break; - default: - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object or array to string"); - return FALSE; - } - *target = g_list_prepend (*target, val); - } - - /* Add a destructor */ - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, *target); - - return TRUE; -} - -gboolean -rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err) -{ - struct rspamd_rcl_struct_parser *pd = ud; - gboolean *target; - - target = (gboolean *)(((gchar *)pd->user_struct) + pd->offset); - - if (obj->type == UCL_BOOLEAN) { - *target = obj->value.iv; - } - else if (obj->type == UCL_INT) { - *target = obj->value.iv; - } - else { - g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object to boolean"); - return FALSE; - } - - return TRUE; -} - -void -rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name, - rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags) -{ - struct rspamd_worker_param_parser *nhandler; - struct rspamd_worker_cfg_parser *nparser; - - HASH_FIND_INT (cfg->wrk_parsers, &type, nparser); - if (nparser == NULL) { - /* Allocate new parser for this worker */ - nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser)); - nparser->type = type; - HASH_ADD_INT (cfg->wrk_parsers, type, nparser); - } - - HASH_FIND_STR (nparser->parsers, name, nhandler); - if (nhandler != NULL) { - msg_warn ("handler for parameter %s is already registered for worker type %s", - name, g_quark_to_string (type)); - return; - } - nhandler = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_param_parser)); - nhandler->name = name; - nhandler->parser.flags = flags; - nhandler->parser.offset = offset; - nhandler->parser.user_struct = target; - nhandler->handler = handler; - HASH_ADD_KEYPTR (hh, nparser->parsers, name, strlen (name), nhandler); -} - - -void -rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type, - gboolean (*func)(ucl_object_t *, gpointer), gpointer ud) -{ - struct rspamd_worker_cfg_parser *nparser; - HASH_FIND_INT (cfg->wrk_parsers, &type, nparser); - if (nparser == NULL) { - /* Allocate new parser for this worker */ - nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser)); - nparser->type = type; - HASH_ADD_INT (cfg->wrk_parsers, type, nparser); - } - - nparser->def_obj_parser = func; - nparser->def_ud = ud; -} diff --git a/src/cfg_rcl.h b/src/cfg_rcl.h deleted file mode 100644 index 99839d1ea..000000000 --- a/src/cfg_rcl.h +++ /dev/null @@ -1,238 +0,0 @@ -/* Copyright (c) 2013, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef CFG_RCL_H_ -#define CFG_RCL_H_ - -#include "config.h" -#include "ucl/include/ucl.h" -#include "uthash.h" - -#define CFG_RCL_ERROR cfg_rcl_error_quark () -static inline GQuark -cfg_rcl_error_quark (void) -{ - return g_quark_from_static_string ("cfg-rcl-error-quark"); -} - -struct rspamd_rcl_section; -struct config_file; - -struct rspamd_rcl_struct_parser { - gpointer user_struct; - goffset offset; - enum { - RSPAMD_CL_FLAG_TIME_FLOAT = 0x1 << 0, - RSPAMD_CL_FLAG_TIME_TIMEVAL = 0x1 << 1, - RSPAMD_CL_FLAG_TIME_TIMESPEC = 0x1 << 2, - RSPAMD_CL_FLAG_TIME_INTEGER = 0x1 << 3, - RSPAMD_CL_FLAG_TIME_UINT_32 = 0x1 << 4, - RSPAMD_CL_FLAG_INT_16 = 0x1 << 5, - RSPAMD_CL_FLAG_INT_32 = 0x1 << 6, - RSPAMD_CL_FLAG_INT_64 = 0x1 << 7, - RSPAMD_CL_FLAG_INT_SIZE = 0x1 << 8, - RSPAMD_CL_FLAG_STRING_PATH = 0x1 << 9 - } flags; -}; - -/** - * Common handler type - * @param cfg configuration - * @param obj object to parse - * @param ud user data (depends on section) - * @param err error object - * @return TRUE if a section has been parsed - */ -typedef gboolean (*rspamd_rcl_handler_t) (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err); - -/** - * A handler type that is called at the end of section parsing - * @param cfg configuration - * @param ud user data - */ -typedef void (*rspamd_rcl_section_fin_t)(struct config_file *cfg, gpointer ud); - -struct rspamd_rcl_default_handler_data { - struct rspamd_rcl_struct_parser pd; - const gchar *key; - rspamd_rcl_handler_t handler; - UT_hash_handle hh; -}; - -struct rspamd_rcl_section { - const gchar *name; /**< name of section */ - rspamd_rcl_handler_t handler; /**< handler of section attributes */ - enum ucl_type type; /**< type of attribute */ - gboolean required; /**< whether this param is required */ - gboolean strict_type; /**< whether we need strict type */ - UT_hash_handle hh; /** hash handle */ - struct rspamd_rcl_section *subsections; /**< hash table of subsections */ - struct rspamd_rcl_default_handler_data *default_parser; /**< generic parsing fields */ - rspamd_rcl_section_fin_t fin; /** called at the end of section parsing */ - gpointer fin_ud; -}; - -/** - * Init common sections known to rspamd - * @return top section - */ -struct rspamd_rcl_section* rspamd_rcl_config_init (void); - -/** - * Get a section specified by path, it understand paths separated by '/' character - * @param top top section - * @param path '/' divided path - * @return - */ -struct rspamd_rcl_section *rspamd_rcl_config_get_section (struct rspamd_rcl_section *top, - const char *path); - -/** - * Read RCL configuration and parse it to a config file - * @param top top section - * @param cfg target configuration - * @param obj object to handle - * @return TRUE if an object can be parsed - */ -gboolean rspamd_read_rcl_config (struct rspamd_rcl_section *top, - struct config_file *cfg, const ucl_object_t *obj, GError **err); - - -/** - * Parse default structure for a section - * @param section section - * @param cfg config file - * @param obj object to parse - * @param ptr ptr to pass - * @param err error ptr - * @return TRUE if the object has been parsed - */ -gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section, - struct config_file *cfg, const ucl_object_t *obj, gpointer ptr, - GError **err); -/** - * Here is a section of common handlers that accepts rcl_struct_parser - * which itself contains a struct pointer and the offset of a member in a - * specific structure - */ - -/** - * Parse a string field of a structure - * @param cfg config pointer - * @param obj object to parse - * @param ud struct_parser structure - * @param section the current section - * @param err error pointer - * @return TRUE if a string value has been successfully parsed - */ -gboolean rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err); - -/** - * Parse an integer field of a structure - * @param cfg config pointer - * @param obj object to parse - * @param ud struct_parser structure - * @param section the current section - * @param err error pointer - * @return TRUE if a value has been successfully parsed - */ -gboolean rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err); - - -/** - * Parse a float field of a structure - * @param cfg config pointer - * @param obj object to parse - * @param ud struct_parser structure - * @param section the current section - * @param err error pointer - * @return TRUE if a value has been successfully parsed - */ -gboolean rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err); - -/** - * Parse a time field of a structure - * @param cfg config pointer - * @param obj object to parse - * @param ud struct_parser structure (flags mean the exact structure used) - * @param section the current section - * @param err error pointer - * @return TRUE if a value has been successfully parsed - */ -gboolean rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err); - -/** - * Parse a string list field of a structure presented by a GList* object - * @param cfg config pointer - * @param obj object to parse - * @param ud struct_parser structure (flags mean the exact structure used) - * @param section the current section - * @param err error pointer - * @return TRUE if a value has been successfully parsed - */ -gboolean rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err); - -/** - * Parse a boolean field of a structure - * @param cfg config pointer - * @param obj object to parse - * @param ud struct_parser structure (flags mean the exact structure used) - * @param section the current section - * @param err error pointer - * @return TRUE if a value has been successfully parsed - */ -gboolean rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj, - gpointer ud, struct rspamd_rcl_section *section, GError **err); - -/** - * Utility functions - */ - -/** - * Register new parser for a worker type of an option with the specified name - * @param cfg config structure - * @param type type of worker (GQuark) - * @param name name of option - * @param handler handler of option - * @param target opaque target structure - * @param offset offset inside a structure - */ -void rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name, - rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags); - -/** - * Regiester a default parser for a worker - * @param cfg config structure - * @param type type of worker (GQuark) - * @param func handler function - * @param ud userdata for handler function - */ -void rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type, - gboolean (*func)(ucl_object_t *, gpointer), gpointer ud); -#endif /* CFG_RCL_H_ */ diff --git a/src/cfg_utils.c b/src/cfg_utils.c deleted file mode 100644 index 2ca846ebd..000000000 --- a/src/cfg_utils.c +++ /dev/null @@ -1,969 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "config.h" - -#include "cfg_file.h" -#include "main.h" -#include "filter.h" -#include "settings.h" -#include "classifiers/classifiers.h" -#include "lua/lua_common.h" -#include "kvstorage_config.h" -#include "map.h" -#include "dynamic_cfg.h" - -#define DEFAULT_SCORE 10.0 - -#define DEFAULT_RLIMIT_NOFILE 2048 -#define DEFAULT_RLIMIT_MAXCORE 0 -#define DEFAULT_MAP_TIMEOUT 10 - -struct rspamd_ucl_map_cbdata { - struct config_file *cfg; - GString *buf; -}; -static gchar* rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data); -static void rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data); - -static gboolean -parse_host_port_priority_strv (rspamd_mempool_t *pool, gchar **tokens, - gchar **addr, guint16 *port, guint *priority, guint default_port) -{ - gchar *err_str, portbuf[8]; - const gchar *cur_tok, *cur_port; - struct addrinfo hints, *res; - guint port_parsed, priority_parsed, saved_errno = errno; - gint r; - union { - struct sockaddr_in v4; - struct sockaddr_in6 v6; - } addr_holder; - - /* Now try to parse host and write address to ina */ - memset (&hints, 0, sizeof (hints)); - hints.ai_socktype = SOCK_STREAM; /* Type of the socket */ - hints.ai_flags = AI_NUMERICSERV; - - cur_tok = tokens[0]; - - if (strcmp (cur_tok, "*v6") == 0) { - hints.ai_family = AF_INET6; - hints.ai_flags |= AI_PASSIVE; - cur_tok = NULL; - } - else if (strcmp (cur_tok, "*v4") == 0) { - hints.ai_family = AF_INET; - hints.ai_flags |= AI_PASSIVE; - cur_tok = NULL; - } - else { - hints.ai_family = AF_UNSPEC; - } - - if (tokens[1] != NULL) { - /* Port part */ - rspamd_strlcpy (portbuf, tokens[1], sizeof (portbuf)); - cur_port = portbuf; - if (port != NULL) { - errno = 0; - port_parsed = strtoul (tokens[1], &err_str, 10); - if (*err_str != '\0' || errno != 0) { - msg_warn ("cannot parse port: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno)); - hints.ai_flags ^= AI_NUMERICSERV; - } - else if (port_parsed > G_MAXUINT16) { - errno = ERANGE; - msg_warn ("cannot parse port: %s, error: %s", tokens[1], *err_str, strerror (errno)); - hints.ai_flags ^= AI_NUMERICSERV; - } - else { - *port = port_parsed; - } - } - if (priority != NULL) { - if (port != NULL) { - cur_tok = tokens[2]; - } - else { - cur_tok = tokens[1]; - } - if (cur_tok != NULL) { - /* Priority part */ - errno = 0; - priority_parsed = strtoul (cur_tok, &err_str, 10); - if (*err_str != '\0' || errno != 0) { - msg_warn ("cannot parse priority: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno)); - } - else { - *priority = priority_parsed; - } - } - } - } - else if (default_port != 0) { - rspamd_snprintf (portbuf, sizeof (portbuf), "%ud", default_port); - cur_port = portbuf; - } - else { - cur_port = NULL; - } - - if ((r = getaddrinfo (cur_tok, cur_port, &hints, &res)) == 0) { - memcpy (&addr_holder, res->ai_addr, MIN (sizeof (addr_holder), res->ai_addrlen)); - if (res->ai_family == AF_INET) { - if (pool != NULL) { - *addr = rspamd_mempool_alloc (pool, INET_ADDRSTRLEN + 1); - } - inet_ntop (res->ai_family, &addr_holder.v4.sin_addr, *addr, INET_ADDRSTRLEN + 1); - } - else { - if (pool != NULL) { - *addr = rspamd_mempool_alloc (pool, INET6_ADDRSTRLEN + 1); - } - inet_ntop (res->ai_family, &addr_holder.v6.sin6_addr, *addr, INET6_ADDRSTRLEN + 1); - } - freeaddrinfo (res); - } - else { - msg_err ("address resolution for %s failed: %s", tokens[0], gai_strerror (r)); - goto err; - } - - /* Restore errno */ - errno = saved_errno; - return TRUE; - -err: - errno = saved_errno; - return FALSE; -} - -gboolean -parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority) -{ - gchar **tokens; - gboolean ret; - - tokens = g_strsplit_set (str, ":", 0); - if (!tokens || !tokens[0]) { - return FALSE; - } - - ret = parse_host_port_priority_strv (pool, tokens, addr, port, priority, 0); - - g_strfreev (tokens); - - return ret; -} - -gboolean -parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port) -{ - return parse_host_port_priority (pool, str, addr, port, NULL); -} - -gboolean -parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority) -{ - return parse_host_port_priority (pool, str, addr, NULL, priority); -} - -gboolean -parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str) -{ - struct rspamd_worker_bind_conf *cnf; - gchar **tokens, *tmp, *err; - gboolean ret = TRUE; - - if (str == NULL) { - return FALSE; - } - - tokens = g_strsplit_set (str, ":", 0); - if (!tokens || !tokens[0]) { - return FALSE; - } - - cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf)); - cnf->bind_port = DEFAULT_BIND_PORT; - cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); - cnf->ai = AF_UNSPEC; - - if (*tokens[0] == '/' || *tokens[0] == '.') { - cnf->ai = AF_UNIX; - LL_PREPEND (cf->bind_conf, cnf); - return TRUE; - } - else if (strcmp (tokens[0], "*") == 0) { - /* We need to add two listen entries: one for ipv4 and one for ipv6 */ - tmp = tokens[0]; - tokens[0] = "*v4"; - cnf->ai = AF_INET; - if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens, - &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { - LL_PREPEND (cf->bind_conf, cnf); - } - cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf)); - cnf->bind_port = DEFAULT_BIND_PORT; - cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); - cnf->ai = AF_INET6; - tokens[0] = "*v6"; - if ((ret &= parse_host_port_priority_strv (cfg->cfg_pool, tokens, - &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { - LL_PREPEND (cf->bind_conf, cnf); - } - tokens[0] = tmp; - } - else if (strcmp (tokens[0], "systemd") == 0) { - /* The actual socket will be passed by systemd environment */ - cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); - cnf->ai = strtoul (tokens[1], &err, 10); - cnf->is_systemd = TRUE; - if (err == NULL || *err == '\0') { - LL_PREPEND (cf->bind_conf, cnf); - } - } - else { - if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens, - &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { - LL_PREPEND (cf->bind_conf, cnf); - } - } - - g_strfreev (tokens); - - return ret; -} - -void -init_defaults (struct config_file *cfg) -{ - - cfg->memcached_error_time = DEFAULT_UPSTREAM_ERROR_TIME; - cfg->memcached_dead_time = DEFAULT_UPSTREAM_DEAD_TIME; - cfg->memcached_maxerrors = DEFAULT_UPSTREAM_MAXERRORS; - cfg->memcached_protocol = TCP_TEXT; - - cfg->dns_timeout = 1000; - cfg->dns_retransmits = 5; - /* After 20 errors do throttling for 10 seconds */ - cfg->dns_throttling_errors = 20; - cfg->dns_throttling_time = 10000; - /* 16 sockets per DNS server */ - cfg->dns_io_per_server = 16; - - cfg->statfile_sync_interval = 60000; - cfg->statfile_sync_timeout = 20000; - - /* 20 Kb */ - cfg->max_diff = 20480; - - cfg->metrics = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - cfg->c_modules = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - cfg->composite_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - cfg->classifiers_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - cfg->cfg_params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - cfg->metrics_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - - cfg->map_timeout = DEFAULT_MAP_TIMEOUT; - - cfg->log_level = G_LOG_LEVEL_WARNING; - cfg->log_extended = TRUE; - - init_settings (cfg); - -} - -void -free_config (struct config_file *cfg) -{ - GList *cur; - struct symbols_group *gr; - - remove_all_maps (cfg); - ucl_obj_unref (cfg->rcl_obj); - g_hash_table_remove_all (cfg->metrics); - g_hash_table_unref (cfg->metrics); - g_hash_table_remove_all (cfg->c_modules); - g_hash_table_unref (cfg->c_modules); - g_hash_table_remove_all (cfg->composite_symbols); - g_hash_table_unref (cfg->composite_symbols); - g_hash_table_remove_all (cfg->cfg_params); - g_hash_table_unref (cfg->cfg_params); - g_hash_table_destroy (cfg->metrics_symbols); - g_hash_table_destroy (cfg->classifiers_symbols); - /* Free symbols groups */ - cur = cfg->symbols_groups; - while (cur) { - gr = cur->data; - if (gr->symbols) { - g_list_free (gr->symbols); - } - cur = g_list_next (cur); - } - if (cfg->symbols_groups) { - g_list_free (cfg->symbols_groups); - } - - if (cfg->checksum) { - g_free (cfg->checksum); - } - g_list_free (cfg->classifiers); - g_list_free (cfg->metrics_list); - rspamd_mempool_delete (cfg->cfg_pool); -} - -const ucl_object_t * -get_module_opt (struct config_file *cfg, const gchar *module_name, const gchar *opt_name) -{ - const ucl_object_t *res = NULL, *sec; - - sec = ucl_obj_get_key (cfg->rcl_obj, module_name); - if (sec != NULL) { - res = ucl_obj_get_key (sec, opt_name); - } - - return res; -} - -guint64 -parse_limit (const gchar *limit, guint len) -{ - guint64 result = 0; - const gchar *err_str; - - if (!limit || *limit == '\0' || len == 0) { - return 0; - } - - errno = 0; - result = strtoull (limit, (gchar **)&err_str, 10); - - if (*err_str != '\0') { - /* Megabytes */ - if (*err_str == 'm' || *err_str == 'M') { - result *= 1048576L; - } - /* Kilobytes */ - else if (*err_str == 'k' || *err_str == 'K') { - result *= 1024; - } - /* Gigabytes */ - else if (*err_str == 'g' || *err_str == 'G') { - result *= 1073741824L; - } - else if (len > 0 && err_str - limit != (gint)len) { - msg_warn ("invalid limit value '%s' at position '%s'", limit, err_str); - result = 0; - } - } - - return result; -} - -gchar -parse_flag (const gchar *str) -{ - guint len; - gchar c; - - if (!str || !*str) { - return -1; - } - - len = strlen (str); - - switch (len) { - case 1: - c = g_ascii_tolower (*str); - if (c == 'y' || c == '1') { - return 1; - } - else if (c == 'n' || c == '0') { - return 0; - } - break; - case 2: - if (g_ascii_strncasecmp (str, "no", len) == 0) { - return 0; - } - else if (g_ascii_strncasecmp (str, "on", len) == 0) { - return 1; - } - break; - case 3: - if (g_ascii_strncasecmp (str, "yes", len) == 0) { - return 1; - } - else if (g_ascii_strncasecmp (str, "off", len) == 0) { - return 0; - } - break; - case 4: - if (g_ascii_strncasecmp (str, "true", len) == 0) { - return 1; - } - break; - case 5: - if (g_ascii_strncasecmp (str, "false", len) == 0) { - return 0; - } - break; - } - - return -1; -} - -gboolean -get_config_checksum (struct config_file *cfg) -{ - gint fd; - void *map; - struct stat st; - - /* Compute checksum for config file that should be used by xml dumper */ - if ((fd = open (cfg->cfg_name, O_RDONLY)) == -1) { - msg_err ("config file %s is no longer available, cannot calculate checksum"); - return FALSE; - } - if (stat (cfg->cfg_name, &st) == -1) { - msg_err ("cannot stat %s: %s", cfg->cfg_name, strerror (errno)); - return FALSE; - } - - /* Now mmap this file to simplify reading process */ - if ((map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { - msg_err ("cannot mmap %s: %s", cfg->cfg_name, strerror (errno)); - close (fd); - return FALSE; - } - close (fd); - - /* Get checksum for a file */ - cfg->checksum = g_compute_checksum_for_string (G_CHECKSUM_MD5, map, st.st_size); - munmap (map, st.st_size); - - return TRUE; -} -/* - * Perform post load actions - */ -void -post_load_config (struct config_file *cfg) -{ -#ifdef HAVE_CLOCK_GETTIME - struct timespec ts; -#endif - struct metric *def_metric; - -#ifdef HAVE_CLOCK_GETTIME -#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID - clock_getres (CLOCK_PROCESS_CPUTIME_ID, &ts); -# elif defined(HAVE_CLOCK_VIRTUAL) - clock_getres (CLOCK_VIRTUAL, &ts); -# else - clock_getres (CLOCK_REALTIME, &ts); -# endif - - cfg->clock_res = (gint)log10 (1000000 / ts.tv_nsec); - if (cfg->clock_res < 0) { - cfg->clock_res = 0; - } - if (cfg->clock_res > 3) { - cfg->clock_res = 3; - } -#else - /* For gettimeofday */ - cfg->clock_res = 1; -#endif - - if ((def_metric = g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) { - def_metric = check_metric_conf (cfg, NULL); - def_metric->name = DEFAULT_METRIC; - def_metric->actions[METRIC_ACTION_REJECT].score = DEFAULT_SCORE; - cfg->metrics_list = g_list_prepend (cfg->metrics_list, def_metric); - g_hash_table_insert (cfg->metrics, DEFAULT_METRIC, def_metric); - } - - cfg->default_metric = def_metric; - - /* Lua options */ - (void)lua_post_load_config (cfg); - init_dynamic_config (cfg); -} - -#if 0 -void -parse_err (const gchar *fmt, ...) -{ - va_list aq; - gchar logbuf[BUFSIZ], readbuf[32]; - gint r; - - va_start (aq, fmt); - rspamd_strlcpy (readbuf, yytext, sizeof (readbuf)); - - r = snprintf (logbuf, sizeof (logbuf), "config file parse error! line: %d, text: %s, reason: ", yylineno, readbuf); - r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq); - - va_end (aq); - g_critical ("%s", logbuf); -} - -void -parse_warn (const gchar *fmt, ...) -{ - va_list aq; - gchar logbuf[BUFSIZ], readbuf[32]; - gint r; - - va_start (aq, fmt); - rspamd_strlcpy (readbuf, yytext, sizeof (readbuf)); - - r = snprintf (logbuf, sizeof (logbuf), "config file parse warning! line: %d, text: %s, reason: ", yylineno, readbuf); - r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq); - - va_end (aq); - g_warning ("%s", logbuf); -} -#endif - -void -unescape_quotes (gchar *line) -{ - gchar *c = line, *t; - - while (*c) { - if (*c == '\\' && *(c + 1) == '"') { - t = c; - while (*t) { - *t = *(t + 1); - t++; - } - } - c++; - } -} - -GList * -parse_comma_list (rspamd_mempool_t * pool, const gchar *line) -{ - GList *res = NULL; - const gchar *c, *p; - gchar *str; - - c = line; - p = c; - - while (*p) { - if (*p == ',' && *c != *p) { - str = rspamd_mempool_alloc (pool, p - c + 1); - rspamd_strlcpy (str, c, p - c + 1); - res = g_list_prepend (res, str); - /* Skip spaces */ - while (g_ascii_isspace (*(++p))); - c = p; - continue; - } - p++; - } - if (res != NULL) { - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_list_free, res); - } - - return res; -} - -struct classifier_config * -check_classifier_conf (struct config_file *cfg, struct classifier_config *c) -{ - if (c == NULL) { - c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct classifier_config)); - } - if (c->opts == NULL) { - c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->opts); - } - if (c->labels == NULL) { - c->labels = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, NULL, (GDestroyNotify)g_list_free); - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->labels); - } - - return c; -} - -struct statfile* -check_statfile_conf (struct config_file *cfg, struct statfile *c) -{ - if (c == NULL) { - c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile)); - } - - return c; -} - -struct metric * -check_metric_conf (struct config_file *cfg, struct metric *c) -{ - int i; - if (c == NULL) { - c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); - c->grow_factor = 1.0; - c->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - c->descriptions = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { - c->actions[i].score = -1.0; - } - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->symbols); - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->descriptions); - } - - return c; -} - -struct worker_conf * -check_worker_conf (struct config_file *cfg, struct worker_conf *c) -{ - if (c == NULL) { - c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct worker_conf)); - c->params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - c->active_workers = g_queue_new (); - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_hash_table_destroy, c->params); - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_queue_free, c->active_workers); -#ifdef HAVE_SC_NPROCESSORS_ONLN - c->count = sysconf (_SC_NPROCESSORS_ONLN); -#else - c->count = DEFAULT_WORKERS_NUM; -#endif - c->rlimit_nofile = DEFAULT_RLIMIT_NOFILE; - c->rlimit_maxcore = DEFAULT_RLIMIT_MAXCORE; - } - - return c; -} - - -static bool -rspamd_include_map_handler (const guchar *data, gsize len, void* ud) -{ - struct config_file *cfg = (struct config_file *)ud; - struct rspamd_ucl_map_cbdata *cbdata, **pcbdata; - gchar *map_line; - - map_line = rspamd_mempool_alloc (cfg->cfg_pool, len + 1); - rspamd_strlcpy (map_line, data, len + 1); - - cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata)); - pcbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata *)); - cbdata->buf = NULL; - cbdata->cfg = cfg; - *pcbdata = cbdata; - - return add_map (cfg, map_line, "ucl include", rspamd_ucl_read_cb, rspamd_ucl_fin_cb, (void **)pcbdata); -} - -/* - * Variables: - * $CONFDIR - configuration directory - * $RUNDIR - local states directory - * $DBDIR - databases dir - * $LOGDIR - logs dir - * $PLUGINSDIR - pluggins dir - * $PREFIX - installation prefix - * $VERSION - rspamd version - */ - -#define RSPAMD_CONFDIR_MACRO "CONFDIR" -#define RSPAMD_RUNDIR_MACRO "RUNDIR" -#define RSPAMD_DBDIR_MACRO "DBDIR" -#define RSPAMD_LOGDIR_MACRO "LOGDIR" -#define RSPAMD_PLUGINSDIR_MACRO "PLUGINSDIR" -#define RSPAMD_PREFIX_MACRO "PREFIX" -#define RSPAMD_VERSION_MACRO "VERSION" - -static void -rspamd_ucl_add_conf_variables (struct ucl_parser *parser) -{ - ucl_parser_register_variable (parser, RSPAMD_CONFDIR_MACRO, RSPAMD_CONFDIR); - ucl_parser_register_variable (parser, RSPAMD_RUNDIR_MACRO, RSPAMD_RUNDIR); - ucl_parser_register_variable (parser, RSPAMD_DBDIR_MACRO, RSPAMD_DBDIR); - ucl_parser_register_variable (parser, RSPAMD_LOGDIR_MACRO, RSPAMD_LOGDIR); - ucl_parser_register_variable (parser, RSPAMD_PLUGINSDIR_MACRO, RSPAMD_PLUGINSDIR); - ucl_parser_register_variable (parser, RSPAMD_PREFIX_MACRO, RSPAMD_PREFIX); - ucl_parser_register_variable (parser, RSPAMD_VERSION_MACRO, RVERSION); -} - -static void -rspamd_ucl_add_conf_macros (struct ucl_parser *parser, struct config_file *cfg) -{ - ucl_parser_register_macro (parser, "include_map", rspamd_include_map_handler, cfg); -} - -gboolean -read_rspamd_config (struct config_file *cfg, const gchar *filename, - const gchar *convert_to, rspamd_rcl_section_fin_t logger_fin, - gpointer logger_ud) -{ - struct stat st; - gint fd; - gchar *data; - GError *err = NULL; - struct rspamd_rcl_section *top, *logger; - gboolean res; - struct ucl_parser *parser; - - if (stat (filename, &st) == -1) { - msg_err ("cannot stat %s: %s", filename, strerror (errno)); - return FALSE; - } - if ((fd = open (filename, O_RDONLY)) == -1) { - msg_err ("cannot open %s: %s", filename, strerror (errno)); - return FALSE; - - } - /* Now mmap this file to simplify reading process */ - if ((data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { - msg_err ("cannot mmap %s: %s", filename, strerror (errno)); - close (fd); - return FALSE; - } - close (fd); - - parser = ucl_parser_new (0); - rspamd_ucl_add_conf_variables (parser); - rspamd_ucl_add_conf_macros (parser, cfg); - if (!ucl_parser_add_chunk (parser, data, st.st_size)) { - msg_err ("ucl parser error: %s", ucl_parser_get_error (parser)); - ucl_parser_free (parser); - munmap (data, st.st_size); - return FALSE; - } - munmap (data, st.st_size); - cfg->rcl_obj = ucl_parser_get_object (parser); - ucl_parser_free (parser); - res = TRUE; - - if (!res) { - return FALSE; - } - - top = rspamd_rcl_config_init (); - err = NULL; - - HASH_FIND_STR(top, "logging", logger); - if (logger != NULL) { - logger->fin = logger_fin; - logger->fin_ud = logger_ud; - } - - if (!rspamd_read_rcl_config (top, cfg, cfg->rcl_obj, &err)) { - msg_err ("rcl parse error: %s", err->message); - return FALSE; - } - - return TRUE; -} - -static void -symbols_classifiers_callback (gpointer key, gpointer value, gpointer ud) -{ - struct config_file *cfg = ud; - - register_virtual_symbol (&cfg->cache, key, 1.0); -} - -void -insert_classifier_symbols (struct config_file *cfg) -{ - g_hash_table_foreach (cfg->classifiers_symbols, symbols_classifiers_callback, cfg); -} - -struct classifier_config* -find_classifier_conf (struct config_file *cfg, const gchar *name) -{ - GList *cur; - struct classifier_config *cf; - - if (name == NULL) { - return NULL; - } - - cur = cfg->classifiers; - while (cur) { - cf = cur->data; - - if (g_ascii_strcasecmp (cf->classifier->name, name) == 0) { - return cf; - } - - cur = g_list_next (cur); - } - - return NULL; -} - -gboolean -check_classifier_statfiles (struct classifier_config *cf) -{ - struct statfile *st; - gboolean has_other = FALSE, res = FALSE, cur_class; - GList *cur; - - /* First check classes directly */ - cur = cf->statfiles; - while (cur) { - st = cur->data; - if (!has_other) { - cur_class = st->is_spam; - has_other = TRUE; - } - else { - if (cur_class != st->is_spam) { - return TRUE; - } - } - - cur = g_list_next (cur); - } - - if (!has_other) { - /* We have only one statfile */ - return FALSE; - } - /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */ - has_other = FALSE; - cur = cf->statfiles; - while (cur) { - st = cur->data; - if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) { - st->is_spam = TRUE; - } - else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) { - st->is_spam = FALSE; - } - - if (!has_other) { - cur_class = st->is_spam; - has_other = TRUE; - } - else { - if (cur_class != st->is_spam) { - res = TRUE; - } - } - - cur = g_list_next (cur); - } - - return res; -} - -static gchar* -rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) -{ - struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev; - - if (cbdata == NULL) { - cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata)); - prev = data->prev_data; - cbdata->buf = g_string_sized_new (BUFSIZ); - cbdata->cfg = prev->cfg; - data->cur_data = cbdata; - } - g_string_append_len (cbdata->buf, chunk, len); - - /* Say not to copy any part of this buffer */ - return NULL; -} - -static void -rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) -{ - struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev = data->prev_data; - ucl_object_t *obj; - struct ucl_parser *parser; - guint32 checksum; - - if (prev != NULL) { - if (prev->buf != NULL) { - g_string_free (prev->buf, TRUE); - } - g_free (prev); - } - - if (cbdata == NULL) { - msg_err ("map fin error: new data is NULL"); - return; - } - - checksum = murmur32_hash (cbdata->buf->str, cbdata->buf->len); - if (data->map->checksum != checksum) { - /* New data available */ - parser = ucl_parser_new (0); - if (!ucl_parser_add_chunk (parser, cbdata->buf->str, cbdata->buf->len)) { - msg_err ("cannot parse map %s: %s", data->map->uri, ucl_parser_get_error (parser)); - ucl_parser_free (parser); - } - else { - obj = ucl_parser_get_object (parser); - ucl_parser_free (parser); - /* XXX: add replace objects code */ - ucl_object_unref (obj); - data->map->checksum = checksum; - } - } - else { - msg_info ("do not reload map %s, checksum is the same: %d", data->map->uri, checksum); - } -} - -gboolean -rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree) -{ - gchar **strvec, **cur; - struct in_addr ina; - guint32 mask; - - strvec = g_strsplit_set (ip_list, ",", 0); - cur = strvec; - - while (*cur != NULL) { - /* XXX: handle only ipv4 addresses */ - if (parse_ipmask_v4 (*cur, &ina, &mask)) { - if (*tree == NULL) { - *tree = radix_tree_create (); - } - radix32tree_add (*tree, htonl (ina.s_addr), mask, 1); - } - cur ++; - } - - return (*tree != NULL); -} - -/* - * vi:ts=4 - */ diff --git a/src/diff.c b/src/diff.c deleted file mode 100644 index 4038d8680..000000000 --- a/src/diff.c +++ /dev/null @@ -1,445 +0,0 @@ -/* diff - compute a shortest edit script (SES) given two sequences - * Copyright (c) 2004 Michael B. Allen - * Copyright (c) 2010-2014, Vsevolod Stakhov - * - * The MIT License - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* This algorithm is basically Myers' solution to SES/LCS with - * the Hirschberg linear space refinement as described in the - * following publication: - * - * E. Myers, ``An O(ND) Difference Algorithm and Its Variations,'' - * Algorithmica 1, 2 (1986), 251-266. - * http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps - * - * This is the same algorithm used by GNU diff(1). - */ - - -#include "config.h" -#include "diff.h" - - -#define FV(k) _v(ctx, (k), 0) -#define RV(k) _v(ctx, (k), 1) - -#define MAX_DIFF 1024 - -struct _ctx -{ - GArray *buf; - GArray *ses; - gint si; - gint dmax; -}; - -struct middle_snake -{ - gint x, y, u, v; -}; - -static -void maybe_resize_array(GArray *arr, guint k) -{ - if (k > arr->len) { - g_array_set_size (arr, k); - } - -} - -static void -_setv(struct _ctx *ctx, gint k, gint r, gint val) -{ - gint j; - gint *i; - /* Pack -N to N ginto 0 to N * 2 - */ - j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2); - - maybe_resize_array (ctx->buf, j); - i = (gint *) &g_array_index (ctx->buf, gint, j); - *i = val; -} - -static gint -_v(struct _ctx *ctx, gint k, gint r) -{ - gint j; - - j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2); - - return *((gint *) &g_array_index (ctx->buf, gint, j)); -} - -static gint -_find_middle_snake(const void *a, gint aoff, gint n, const void *b, - gint boff, gint m, struct _ctx *ctx, struct middle_snake *ms) -{ - gint delta, odd, mid, d; - - delta = n - m; - odd = delta & 1; - mid = (n + m) / 2; - mid += odd; - - _setv (ctx, 1, 0, 0); - _setv (ctx, delta - 1, 1, n); - - for (d = 0; d <= mid; d++) { - gint k, x, y; - - if ((2 * d - 1) >= ctx->dmax) { - return ctx->dmax; - } - - for (k = d; k >= -d; k -= 2) { - if (k == -d || (k != d && FV(k - 1) < FV(k + 1))) { - x = FV(k + 1); - } - else { - x = FV(k - 1) + 1; - } - y = x - k; - - ms->x = x; - ms->y = y; - const guchar *a0 = (const guchar *) a + aoff; - const guchar *b0 = (const guchar *) b + boff; - while (x < n && y < m && a0[x] == b0[y]) { - x++; - y++; - } - _setv (ctx, k, 0, x); - - if (odd && k >= (delta - (d - 1)) && k <= (delta + (d - 1))) { - if (x >= RV(k)) { - ms->u = x; - ms->v = y; - return 2 * d - 1; - } - } - } - for (k = d; k >= -d; k -= 2) { - gint kr = (n - m) + k; - - if (k == d || (k != -d && RV(kr - 1) < RV(kr + 1))) { - x = RV(kr - 1); - } - else { - x = RV(kr + 1) - 1; - } - y = x - kr; - - ms->u = x; - ms->v = y; - const guchar *a0 = (const guchar *) a + aoff; - const guchar *b0 = (const guchar *) b + boff; - while (x > 0 && y > 0 && a0[x - 1] == b0[y - 1]) { - x--; - y--; - } - _setv (ctx, kr, 1, x); - - if (!odd && kr >= -d && kr <= d) { - if (x <= FV(kr)) { - ms->x = x; - ms->y = y; - return 2 * d; - } - } - } - } - - errno = EFAULT; - - return -1; -} - -static void -_edit(struct _ctx *ctx, gint op, gint off, gint len) -{ - struct diff_edit *e = NULL, newe; - - if (len == 0 || ctx->ses == NULL) { - return; - } - /* - * Add an edit to the SES (or - * coalesce if the op is the same) - */ - if (ctx->ses->len != 0) { - e = &g_array_index (ctx->ses, struct diff_edit, ctx->ses->len - 1); - } - if (e == NULL || e->op != op) { - newe.op = op; - newe.off = off; - newe.len = len; - g_array_append_val (ctx->ses, newe); - } - else { - e->len += len; - } -} - -static gint -_ses(const void *a, gint aoff, gint n, const void *b, gint boff, - gint m, struct _ctx *ctx) -{ - struct middle_snake ms = { - .x = 0, - .y = 0, - .u = 0, - .v = 0 - }; - gint d; - - if (n == 0) { - _edit (ctx, DIFF_INSERT, boff, m); - d = m; - } - else if (m == 0) { - _edit (ctx, DIFF_DELETE, aoff, n); - d = n; - } - else { - /* Find the middle "snake" around which we - * recursively solve the sub-problems. - */ - d = _find_middle_snake (a, aoff, n, b, boff, m, ctx, &ms); - if (d == -1) { - return -1; - } - else if (d >= ctx->dmax) { - return ctx->dmax; - } - else if (ctx->ses == NULL) { - return d; - } - else if (d > 1) { - if (_ses (a, aoff, ms.x, b, boff, ms.y, ctx) == -1) { - return -1; - } - - _edit (ctx, DIFF_MATCH, aoff + ms.x, ms.u - ms.x); - - aoff += ms.u; - boff += ms.v; - n -= ms.u; - m -= ms.v; - if (_ses (a, aoff, n, b, boff, m, ctx) == -1) { - return -1; - } - } - else { - gint x = ms.x; - gint u = ms.u; - - /* There are only 4 base cases when the - * edit distance is 1. - * - * n > m m > n - * - * - | - * \ \ x != u - * \ \ - * - * \ \ - * \ \ x == u - * - | - */ - - if (m > n) { - if (x == u) { - _edit (ctx, DIFF_MATCH, aoff, n); - _edit (ctx, DIFF_INSERT, boff + (m - 1), 1); - } - else { - _edit (ctx, DIFF_INSERT, boff, 1); - _edit (ctx, DIFF_MATCH, aoff, n); - } - } - else { - if (x == u) { - _edit (ctx, DIFF_MATCH, aoff, m); - _edit (ctx, DIFF_DELETE, aoff + (n - 1), 1); - } - else { - _edit (ctx, DIFF_DELETE, aoff, 1); - _edit (ctx, DIFF_MATCH, aoff + 1, m); - } - } - } - } - - return d; -} - -gint -rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m, - gint dmax, GArray *ses, gint *sn) -{ - struct _ctx ctx; - gint d, x, y; - struct diff_edit *e = NULL; - GArray *tmp; - - tmp = g_array_sized_new (FALSE, TRUE, sizeof(gint), dmax); - ctx.buf = tmp; - ctx.ses = ses; - ctx.si = 0; - ctx.dmax = dmax; - - /* The _ses function assumes the SES will begin or end with a delete - * or insert. The following will insure this is true by eating any - * beginning matches. This is also a quick to process sequences - * that match entirely. - */ - x = y = 0; - const guchar *a0 = (const guchar *) a + aoff; - const guchar *b0 = (const guchar *) b + boff; - while (x < n && y < m && a0[x] == b0[y]) { - x++; - y++; - } - _edit (&ctx, DIFF_MATCH, aoff, x); - - if ((d = _ses (a, aoff + x, n - x, b, boff + y, m - y, &ctx)) == -1) { - g_array_free (tmp, TRUE); - return -1; - } - if (ses && sn && e) { - *sn = e->op ? ctx.si + 1 : 0; - } - - g_array_free (tmp, TRUE); - return d; -} - -static guint32 -compare_diff_distance_unnormalized (f_str_t *s1, f_str_t *s2) -{ - GArray *ses; - struct diff_edit *e; - guint i; - guint32 distance = 0; - - ses = g_array_sized_new (FALSE, TRUE, sizeof (struct diff_edit), MAX_DIFF); - - if (rspamd_diff (s1->begin, 0, s1->len, - s2->begin, 0, s2->len, MAX_DIFF, ses, NULL) == -1) { - /* Diff failed, strings are different */ - g_array_free (ses, TRUE); - return 0; - } - - for (i = 0; i < ses->len; i ++) { - e = &g_array_index(ses, struct diff_edit, i); - if (e->op != DIFF_MATCH) { - distance += e->len; - } - } - - g_array_free (ses, TRUE); - - return distance; -} - -guint32 -compare_diff_distance (f_str_t *s1, f_str_t *s2) -{ - - return 100 - (2 * compare_diff_distance_unnormalized (s1, s2) * 100) / (s1->len + s2->len); -} - - -guint32 -compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2) -{ - gchar b1[BUFSIZ], b2[BUFSIZ], *t, *h, *p1, *p2; - gsize r1, r2; - f_str_t t1, t2; - guint32 cur_diff = 0; - - r1 = s1->len; - r2 = s2->len; - p1 = s1->begin; - p2 = s2->begin; - - while (r1 > 0 && r2 > 0) { - /* Copy strings to the buffer normalized */ - h = p1; - t = b1; - - /* The first string */ - while (r1 > 0 && t - b1 < (gint)sizeof (b1)) { - if (!g_ascii_isspace (*h)) { - *t++ = g_ascii_tolower (*h); - } - h ++; - p1 ++; - r1 --; - } - - t1.begin = b1; - t1.len = t - b1; - - /* The second string */ - h = p2; - t = b2; - while (r2 > 0 && t - b2 < (gint)sizeof (b2)) { - if (!g_ascii_isspace (*h)) { - *t++ = g_ascii_tolower (*h); - } - h ++; - p2 ++; - r2 --; - } - - t2.begin = b2; - t2.len = t - b2; - - cur_diff += compare_diff_distance_unnormalized (&t1, &t2); - } - - if (r1 > 0) { - h = p1; - while (r1 > 0) { - if (!g_ascii_isspace (*h)) { - cur_diff ++; - } - r1 --; - h ++; - } - } - else if (r2 > 0) { - h = p2; - while (r2 > 0) { - if (!g_ascii_isspace (*h)) { - cur_diff ++; - } - r2 --; - h ++; - } - } - - return 100 - (2 * cur_diff * 100) / (s1->len + s2->len); -} diff --git a/src/diff.h b/src/diff.h deleted file mode 100644 index cea5e5d4a..000000000 --- a/src/diff.h +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef DIFF_H_ -#define DIFF_H_ - -#include "config.h" -#include "fstring.h" - -typedef enum -{ - DIFF_MATCH = 1, - DIFF_DELETE, - DIFF_INSERT -} diff_op; - -struct diff_edit -{ - gshort op; - gint off; /* off ginto s1 if MATCH or DELETE but s2 if INSERT */ - gint len; -}; - -/* - * Calculate difference between two strings using diff algorithm - * @param a the first line begin - * @param aoff the first line offset - * @param n the first line length - * @param b the second line begin - * @param boff the second line offset - * @param b the second line length - * @param dmax maximum differences number - * @param ses here would be stored the shortest script to transform a to b - * @param sn here would be stored a number of differences between a and b - * @return distance between strings or -1 in case of error - */ -gint rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m, - gint dmax, GArray *ses, gint *sn); - -/* - * Calculate distance between two strings (in percentage) using diff algorithm. - * @return 100 in case of identical strings and 0 in case of totally different strings. - */ -guint32 compare_diff_distance (f_str_t *s1, f_str_t *s2); - -/* - * Calculate distance between two strings (in percentage) using diff algorithm. Strings are normalized before: - * all spaces are removed and all characters are lowercased. - * @return 100 in case of identical strings and 0 in case of totally different strings. -*/ -guint32 compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2); - -#endif /* DIFF_H_ */ diff --git a/src/dkim.c b/src/dkim.c deleted file mode 100644 index c7c8a35e1..000000000 --- a/src/dkim.c +++ /dev/null @@ -1,1480 +0,0 @@ -/* Copyright (c) 2010-2011, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "message.h" -#include "dkim.h" -#include "dns.h" - -/* Parser of dkim params */ -typedef gboolean (*dkim_parse_param_f) (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); - -static gboolean rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); -static gboolean rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); - - -static const dkim_parse_param_f parser_funcs[] = { - [DKIM_PARAM_SIGNATURE] = rspamd_dkim_parse_signature, - [DKIM_PARAM_SIGNALG] = rspamd_dkim_parse_signalg, - [DKIM_PARAM_DOMAIN] = rspamd_dkim_parse_domain, - [DKIM_PARAM_CANONALG] = rspamd_dkim_parse_canonalg, - [DKIM_PARAM_QUERYMETHOD] = rspamd_dkim_parse_ignore, - [DKIM_PARAM_SELECTOR] = rspamd_dkim_parse_selector, - [DKIM_PARAM_HDRLIST] = rspamd_dkim_parse_hdrlist, - [DKIM_PARAM_VERSION] = rspamd_dkim_parse_version, - [DKIM_PARAM_IDENTITY] = rspamd_dkim_parse_ignore, - [DKIM_PARAM_TIMESTAMP] = rspamd_dkim_parse_timestamp, - [DKIM_PARAM_EXPIRATION] = rspamd_dkim_parse_expiration, - [DKIM_PARAM_COPIEDHDRS] = rspamd_dkim_parse_ignore, - [DKIM_PARAM_BODYHASH] = rspamd_dkim_parse_bodyhash, - [DKIM_PARAM_BODYLENGTH] = rspamd_dkim_parse_bodylength -}; - -struct rspamd_dkim_header { - gchar *name; - guint count; -}; - -#define DKIM_ERROR dkim_error_quark () -GQuark -dkim_error_quark (void) -{ - return g_quark_from_static_string ("dkim-error-quark"); -} - -/* Parsers implementation */ -static gboolean -rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - ctx->b = rspamd_mempool_alloc (ctx->pool, len + 1); - rspamd_strlcpy (ctx->b, param, len + 1); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) - gchar *tmp; - gsize tmp_len = len; - tmp = g_base64_decode (ctx->b, &tmp_len); - rspamd_strlcpy (ctx->b, tmp, len + 1); - g_free (tmp); -#else - g_base64_decode_inplace (ctx->b, &len); -#endif - ctx->blen = len; - return TRUE; -} - -static gboolean -rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - if (len == 8) { - if (memcmp (param, "rsa-sha1", len) == 0) { - ctx->sig_alg = DKIM_SIGN_RSASHA1; - return TRUE; - } - } - else if (len == 10) { - if (memcmp (param, "rsa-sha256", len) == 0) { - ctx->sig_alg = DKIM_SIGN_RSASHA256; - return TRUE; - } - } - - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim sign algorithm"); - return FALSE; -} - -static gboolean -rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - ctx->domain = rspamd_mempool_alloc (ctx->pool, len + 1); - rspamd_strlcpy (ctx->domain, param, len + 1); - return TRUE; -} - -static gboolean -rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - const gchar *p, *slash = NULL, *end = param + len; - gsize sl = 0; - - p = param; - while (p != end) { - if (*p == '/') { - slash = p; - break; - } - p ++; - sl ++; - } - - if (slash == NULL) { - /* Only check header */ - if (len == 6 && memcmp (param, "simple", len) == 0) { - ctx->header_canon_type = DKIM_CANON_SIMPLE; - return TRUE; - } - else if (len == 7 && memcmp (param, "relaxed", len) == 0) { - ctx->header_canon_type = DKIM_CANON_RELAXED; - return TRUE; - } - } - else { - /* First check header */ - if (sl == 6 && memcmp (param, "simple", sl) == 0) { - ctx->header_canon_type = DKIM_CANON_SIMPLE; - } - else if (sl == 7 && memcmp (param, "relaxed", sl) == 0) { - ctx->header_canon_type = DKIM_CANON_RELAXED; - } - else { - goto err; - } - /* Check body */ - len -= sl + 1; - slash ++; - if (len == 6 && memcmp (slash, "simple", len) == 0) { - ctx->body_canon_type = DKIM_CANON_SIMPLE; - return TRUE; - } - else if (len == 7 && memcmp (slash, "relaxed", len) == 0) { - ctx->body_canon_type = DKIM_CANON_RELAXED; - return TRUE; - } - } - -err: - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim canonization algorithm"); - return FALSE; -} - -static gboolean -rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - /* Just ignore unused params */ - return TRUE; -} - -static gboolean -rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - ctx->selector = rspamd_mempool_alloc (ctx->pool, len + 1); - rspamd_strlcpy (ctx->selector, param, len + 1); - return TRUE; -} - -static struct rspamd_dkim_header* -rspamd_dkim_find_header (GPtrArray *arr, const gchar *name, gsize len) -{ - guint i; - struct rspamd_dkim_header *h; - - for (i = 0; i < arr->len; i ++) { - h = g_ptr_array_index (arr, i); - if (g_ascii_strncasecmp (h->name, name, len) == 0) { - return h; - } - } - - return NULL; -} - -static void -rspamd_dkim_hlist_free (void *ud) -{ - GPtrArray *a = ud; - - g_ptr_array_free (a, TRUE); -} - -static gboolean -rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - const gchar *c, *p, *end = param + len; - gchar *h; - gboolean from_found = FALSE; - guint count = 0; - struct rspamd_dkim_header *new; - - p = param; - while (p <= end) { - if ((*p == ':' || p == end)) { - count ++; - } - p ++; - } - - if (count > 0) { - ctx->hlist = g_ptr_array_sized_new (count); - } - else { - return FALSE; - } - - c = param; - p = param; - while (p <= end) { - if ((*p == ':' || p == end) && p - c > 0) { - if ((new = rspamd_dkim_find_header (ctx->hlist, c, p - c)) != NULL) { - new->count ++; - } - else { - /* Insert new header to the list */ - new = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_header)); - h = rspamd_mempool_alloc (ctx->pool, p - c + 1); - rspamd_strlcpy (h, c, p - c + 1); - g_strstrip (h); - new->name = h; - new->count = 1; - /* Check mandatory from */ - if (!from_found && g_ascii_strcasecmp (h, "from") == 0) { - from_found = TRUE; - } - g_ptr_array_add (ctx->hlist, new); - } - c = p + 1; - p ++; - } - else { - p ++; - } - } - - if (!ctx->hlist) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list"); - return FALSE; - } - else { - if (!from_found) { - g_ptr_array_free (ctx->hlist, TRUE); - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list, from header is missing"); - return FALSE; - } - /* Reverse list */ - rspamd_mempool_add_destructor (ctx->pool, (rspamd_mempool_destruct_t)rspamd_dkim_hlist_free, ctx->hlist); - } - - return TRUE; -} - -static gboolean -rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - if (len != 1 || *param != '1') { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_VERSION, "invalid dkim version"); - return FALSE; - } - - ctx->ver = 1; - return TRUE; -} - -static gboolean -rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - gulong val; - - if (!rspamd_strtoul (param, len, &val)) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim timestamp"); - return FALSE; - } - ctx->timestamp = val; - - return TRUE; -} - -static gboolean -rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - gulong val; - - if (!rspamd_strtoul (param, len, &val)) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim expiration"); - return FALSE; - } - ctx->expiration = val; - - return TRUE; -} - -static gboolean -rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - ctx->bh = rspamd_mempool_alloc (ctx->pool, len + 1); - rspamd_strlcpy (ctx->bh, param, len + 1); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) - gchar *tmp; - gsize tmp_len = len; - tmp = g_base64_decode (ctx->bh, &tmp_len); - rspamd_strlcpy (ctx->bh, tmp, len + 1); - g_free (tmp); -#else - g_base64_decode_inplace (ctx->bh, &len); -#endif - ctx->bhlen = len; - return TRUE; -} - -static gboolean -rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) -{ - gulong val; - - if (!rspamd_strtoul (param, len, &val)) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_L, "invalid dkim body length"); - return FALSE; - } - ctx->len = val; - - return TRUE; -} - -/** - * Create new dkim context from signature - * @param sig message's signature - * @param pool pool to allocate memory from - * @param err pointer to error object - * @return new context or NULL - */ -rspamd_dkim_context_t* -rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err) -{ - const gchar *p, *c, *tag = NULL, *end; - gsize taglen; - gint param = DKIM_PARAM_UNKNOWN; - time_t now; - rspamd_dkim_context_t *new; - enum { - DKIM_STATE_TAG = 0, - DKIM_STATE_AFTER_TAG, - DKIM_STATE_VALUE, - DKIM_STATE_SKIP_SPACES = 99, - DKIM_STATE_ERROR = 100 - } state, next_state; - - - new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_dkim_context_t)); - new->pool = pool; - new->header_canon_type = DKIM_CANON_DEFAULT; - new->body_canon_type = DKIM_CANON_DEFAULT; - new->sig_alg = DKIM_SIGN_UNKNOWN; - /* A simple state machine of parsing tags */ - state = DKIM_STATE_SKIP_SPACES; - next_state = DKIM_STATE_TAG; - taglen = 0; - p = sig; - c = sig; - end = p + strlen (p); - while (p <= end) { - switch (state) { - case DKIM_STATE_TAG: - if (g_ascii_isspace (*p)) { - taglen = p - c; - while (*p && g_ascii_isspace (*p)) { - /* Skip spaces before '=' sign */ - p ++; - } - if (*p != '=') { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param"); - state = DKIM_STATE_ERROR; - } - else { - state = DKIM_STATE_SKIP_SPACES; - next_state = DKIM_STATE_AFTER_TAG; - param = DKIM_PARAM_UNKNOWN; - p ++; - tag = c; - } - } - else if (*p == '=') { - state = DKIM_STATE_SKIP_SPACES; - next_state = DKIM_STATE_AFTER_TAG; - param = DKIM_PARAM_UNKNOWN; - p ++; - tag = c; - } - else { - taglen ++; - p ++; - } - break; - case DKIM_STATE_AFTER_TAG: - /* We got tag at tag and len at taglen */ - switch (taglen) { - case 0: - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "zero length dkim param"); - state = DKIM_STATE_ERROR; - break; - case 1: - /* Simple tags */ - switch (*tag) { - case 'v': - param = DKIM_PARAM_VERSION; - break; - case 'a': - param = DKIM_PARAM_SIGNALG; - break; - case 'b': - param = DKIM_PARAM_SIGNATURE; - break; - case 'c': - param = DKIM_PARAM_CANONALG; - break; - case 'd': - param = DKIM_PARAM_DOMAIN; - break; - case 'h': - param = DKIM_PARAM_HDRLIST; - break; - case 'i': - param = DKIM_PARAM_IDENTITY; - break; - case 'l': - param = DKIM_PARAM_BODYLENGTH; - break; - case 'q': - param = DKIM_PARAM_QUERYMETHOD; - break; - case 's': - param = DKIM_PARAM_SELECTOR; - break; - case 't': - param = DKIM_PARAM_TIMESTAMP; - break; - case 'x': - param = DKIM_PARAM_EXPIRATION; - break; - case 'z': - param = DKIM_PARAM_COPIEDHDRS; - break; - default: - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c", *tag); - state = DKIM_STATE_ERROR; - break; - } - break; - case 2: - if (tag[0] == 'b' && tag[1] == 'h') { - param = DKIM_PARAM_BODYHASH; - } - else { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c%c", tag[0], tag[1]); - state = DKIM_STATE_ERROR; - } - break; - default: - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param length: %zd", taglen); - state = DKIM_STATE_ERROR; - break; - } - if (state != DKIM_STATE_ERROR) { - /* Skip spaces */ - state = DKIM_STATE_SKIP_SPACES; - next_state = DKIM_STATE_VALUE; - } - break; - case DKIM_STATE_VALUE: - if (*p == ';') { - if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c, err)) { - state = DKIM_STATE_ERROR; - } - else { - state = DKIM_STATE_SKIP_SPACES; - next_state = DKIM_STATE_TAG; - p ++; - taglen = 0; - } - } - else if (p == end) { - if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c + 1, err)) { - state = DKIM_STATE_ERROR; - } - else { - /* Finish processing */ - p ++; - } - } - else { - p ++; - } - break; - case DKIM_STATE_SKIP_SPACES: - if (g_ascii_isspace (*p)) { - p ++; - } - else { - c = p; - state = next_state; - } - break; - case DKIM_STATE_ERROR: - if (err) { - msg_info ("dkim parse failed: %s", (*err)->message); - return NULL; - } - else { - msg_info ("dkim parse failed: unknown error"); - return NULL; - } - break; - } - } - - /* Now check validity of signature */ - if (new->b == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_B, "b parameter missing"); - return NULL; - } - if (new->bh == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_BH, "bh parameter missing"); - return NULL; - } - if (new->domain == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_D, "domain parameter missing"); - return NULL; - } - if (new->selector == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "selector parameter missing"); - return NULL; - } - if (new->ver == 0) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_V, "v parameter missing"); - return NULL; - } - if (new->hlist == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_H, "h parameter missing"); - return NULL; - } - if (new->sig_alg == DKIM_SIGN_UNKNOWN) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "s parameter missing"); - return NULL; - } - if (new->sig_alg == DKIM_SIGN_RSASHA1) { - /* Check bh length */ - if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA1)) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen); - return NULL; - } - - } - else if (new->sig_alg == DKIM_SIGN_RSASHA256) { - if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA256)) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen); - return NULL; - } - } - /* Check expiration */ - now = time (NULL); - if (new->timestamp && now < new->timestamp && new->timestamp - now > (gint)time_jitter) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_FUTURE, "signature was made in future, ignoring"); - return NULL; - } - if (new->expiration && new->expiration < now) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EXPIRED, "signature has expired"); - return NULL; - } - - /* Now create dns key to request further */ - taglen = strlen (new->domain) + strlen (new->selector) + sizeof (DKIM_DNSKEYNAME) + 2; - new->dns_key = rspamd_mempool_alloc (new->pool, taglen); - rspamd_snprintf (new->dns_key, taglen, "%s.%s.%s", new->selector, DKIM_DNSKEYNAME, new->domain); - - /* Create checksums for further operations */ - if (new->sig_alg == DKIM_SIGN_RSASHA1) { - new->body_hash = g_checksum_new (G_CHECKSUM_SHA1); - new->headers_hash = g_checksum_new (G_CHECKSUM_SHA1); - } - else if (new->sig_alg == DKIM_SIGN_RSASHA256) { - new->body_hash = g_checksum_new (G_CHECKSUM_SHA256); - new->headers_hash = g_checksum_new (G_CHECKSUM_SHA256); - } - else { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has unsupported signature algorithm"); - return NULL; - } - - rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->body_hash); - rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->headers_hash); - - return new; -} - -struct rspamd_dkim_key_cbdata { - rspamd_dkim_context_t *ctx; - dkim_key_handler_f handler; - gpointer ud; -}; - -static rspamd_dkim_key_t* -rspamd_dkim_make_key (const gchar *keydata, guint keylen, GError **err) -{ - rspamd_dkim_key_t *key = NULL; - - key = g_slice_alloc0 (sizeof (rspamd_dkim_key_t)); - key->keydata = g_slice_alloc (keylen + 1); - rspamd_strlcpy (key->keydata, keydata, keylen + 1); - key->keylen = keylen + 1; - key->decoded_len = keylen + 1; -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) - gchar *tmp; - gsize tmp_len = keylen; - tmp = g_base64_decode (key->keydata, &tmp_len); - rspamd_strlcpy (key->keydata, tmp, keylen + 1); - g_free (tmp); - key->decoded_len = tmp_len; -#else - g_base64_decode_inplace (key->keydata, &key->decoded_len); -#endif -#ifdef HAVE_OPENSSL - key->key_bio = BIO_new_mem_buf (key->keydata, key->decoded_len); - if (key->key_bio == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot make ssl bio from key"); - rspamd_dkim_key_free (key); - return NULL; - } - - key->key_evp = d2i_PUBKEY_bio (key->key_bio, NULL); - if (key->key_evp == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract pubkey from bio"); - rspamd_dkim_key_free (key); - return NULL; - } - - key->key_rsa = EVP_PKEY_get1_RSA (key->key_evp); - if (key->key_rsa == NULL) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract rsa key from evp key"); - rspamd_dkim_key_free (key); - return NULL; - } - -#endif - - return key; -} - -/** - * Free DKIM key - * @param key - */ -void -rspamd_dkim_key_free (rspamd_dkim_key_t *key) -{ -#ifdef HAVE_OPENSSL - if (key->key_rsa) { - RSA_free (key->key_rsa); - } - if (key->key_bio) { - BIO_free (key->key_bio); - } -#endif - g_slice_free1 (key->keylen, key->keydata); - g_slice_free1 (sizeof (rspamd_dkim_key_t), key); -} - -static rspamd_dkim_key_t* -rspamd_dkim_parse_key (const gchar *txt, gsize *keylen, GError **err) -{ - const gchar *c, *p, *end; - gint state = 0; - gsize len; - - c = txt; - p = txt; - end = txt + strlen (txt); - - while (p <= end) { - switch (state) { - case 0: - if (p != end && p[0] == 'p' && p[1] == '=') { - /* We got something like public key */ - c = p + 2; - p = c; - state = 1; - } - else { - /* Ignore everything */ - p ++; - } - break; - case 1: - /* State when we got p= and looking for some public key */ - if ((*p == ';' || p == end) && p > c) { - len = p - c; - return rspamd_dkim_make_key (c, len, err); - } - else { - p ++; - } - break; - } - } - - if (p - c == 0) { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYREVOKED, "key was revoked"); - } - else { - g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "key was not found"); - } - - return NULL; -} - -/* Get TXT request data and parse it */ -static void -rspamd_dkim_dns_cb (struct rdns_reply *reply, gpointer arg) -{ - struct rspamd_dkim_key_cbdata *cbdata = arg; - rspamd_dkim_key_t *key = NULL; - GError *err = NULL; - struct rdns_reply_entry *elt; - gsize keylen = 0; - - if (reply->code != RDNS_RC_NOERROR) { - g_set_error (&err, DKIM_ERROR, DKIM_SIGERROR_NOKEY, "dns request to %s failed: %s", cbdata->ctx->dns_key, - rdns_strerror (reply->code)); - cbdata->handler (NULL, 0, cbdata->ctx, cbdata->ud, err); - } - else { - LL_FOREACH (reply->entries, elt) { - if (elt->type == RDNS_REQUEST_TXT) { - key = rspamd_dkim_parse_key (elt->content.txt.data, &keylen, &err); - if (key) { - key->ttl = elt->ttl; - break; - } - } - } - if (key != NULL && err != NULL) { - /* Free error as it is insignificant */ - g_error_free (err); - err = NULL; - } - cbdata->handler (key, keylen, cbdata->ctx, cbdata->ud, err); - } -} - -/** - * Make DNS request for specified context and obtain and parse key - * @param ctx dkim context from signature - * @param resolver dns resolver object - * @param s async session to make request - * @return - */ -gboolean -rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver, - struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud) -{ - struct rspamd_dkim_key_cbdata *cbdata; - - g_return_val_if_fail (ctx != NULL, FALSE); - g_return_val_if_fail (ctx->dns_key != NULL, FALSE); - - cbdata = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_key_cbdata)); - cbdata->ctx = ctx; - cbdata->handler = handler; - cbdata->ud = ud; - - return make_dns_request (resolver, s, ctx->pool, rspamd_dkim_dns_cb, cbdata, RDNS_REQUEST_TXT, ctx->dns_key); -} - -static gboolean -rspamd_dkim_relaxed_body_step (GChecksum *ck, const gchar **start, guint remain) -{ - const gchar *h; - static gchar buf[BUFSIZ]; - gchar *t; - guint len, inlen; - gboolean got_sp, finished = FALSE; - - if (remain > sizeof (buf)) { - len = sizeof (buf); - } - else { - len = remain; - finished = TRUE; - } - inlen = sizeof (buf) - 1; - h = *start; - t = &buf[0]; - got_sp = FALSE; - - while (len && inlen) { - if (*h == '\r' || *h == '\n') { - /* Ignore spaces at the end of line */ - if (got_sp) { - got_sp = FALSE; - t --; - } - /* Replace a single \n or \r with \r\n */ - if (*h == '\n' && *(h - 1) != '\r') { - *t ++ = '\r'; - inlen --; - } - else if (*h == '\r' && *(h + 1) != '\n') { - *t ++ = *h ++; - *t ++ = '\n'; - if (inlen > 1) { - inlen -= 2; - } - else { - /* It is safe as inlen = sizeof (buf) - 1 */ - inlen = 0; - } - len --; - continue; - } - } - else if (g_ascii_isspace (*h)) { - if (got_sp) { - /* Ignore multiply spaces */ - h ++; - len --; - continue; - } - else { - *t++ = ' '; - h ++; - inlen --; - len --; - got_sp = TRUE; - continue; - } - } - else { - got_sp = FALSE; - } - *t++ = *h++; - inlen --; - len --; - } - - *start = h; - - if (!finished && *(t - 1) == ' ' && g_ascii_isspace (*h)) { - /* Avoid border problems */ - t --; - } -#if 0 - msg_debug ("update signature with buffer: %*s", t - buf, buf); -#endif - g_checksum_update (ck, buf, t - buf); - - return !finished; -} - -static gboolean -rspamd_dkim_simple_body_step (GChecksum *ck, const gchar **start, guint remain) -{ - const gchar *h; - static gchar buf[BUFSIZ]; - gchar *t; - guint len, inlen; - gboolean finished = FALSE; - - if (remain > sizeof (buf)) { - len = sizeof (buf); - } - else { - len = remain; - finished = TRUE; - } - inlen = sizeof (buf) - 1; - h = *start; - t = &buf[0]; - - while (len && inlen) { - if (*h == '\r' || *h == '\n') { - /* Replace a single \n or \r with \r\n */ - if (*h == '\n' && *(h - 1) != '\r') { - *t ++ = '\r'; - inlen --; - } - else if (*h == '\r' && *(h + 1) != '\n') { - *t ++ = *h ++; - *t ++ = '\n'; - if (inlen > 1) { - inlen -= 2; - } - else { - /* It is safe as inlen = sizeof (buf) - 1 */ - inlen = 0; - } - len --; - continue; - } - } - *t++ = *h++; - inlen --; - len --; - } - - *start = h; - -#if 0 - msg_debug ("update signature with buffer: %*s", t - buf, buf); -#endif - g_checksum_update (ck, buf, t - buf); - - return !finished; -} - -static gboolean -rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, const gchar *start, const gchar *end) -{ - const gchar *p; - - if (start == NULL) { - /* Empty body */ - if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { - g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1); - } - else { - g_checksum_update (ctx->body_hash, "", 0); - } - } - else { - /* Strip extra ending CRLF */ - p = end - 1; - while (p >= start + 2) { - if (*p == '\n' && *(p - 1) == '\r' && *(p - 2) == '\n') { - p -= 2; - } - else if (*p == '\n' && *(p - 1) == '\n') { - p --; - } - else if (*p == '\r' && *(p - 1) == '\r') { - p --; - } - else { - break; - } - } - end = p + 1; - if (end == start || end == start + 2) { - /* Empty body */ - if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { - g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1); - } - else { - g_checksum_update (ctx->body_hash, "", 0); - } - } - else { - if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { - /* Simple canonization */ - while (rspamd_dkim_simple_body_step (ctx->body_hash, &start, end - start)); - } - else { - while (rspamd_dkim_relaxed_body_step (ctx->body_hash, &start, end - start)); - } - } - return TRUE; - } - - /* TODO: Implement relaxed algorithm */ - return FALSE; -} - -/* Update hash converting all CR and LF to CRLF */ -static void -rspamd_dkim_hash_update (GChecksum *ck, const gchar *begin, gsize len) -{ - const gchar *p, *c, *end; - - end = begin + len; - p = begin; - c = p; - while (p != end) { - if (*p == '\r') { - g_checksum_update (ck, c, p - c); - g_checksum_update (ck, CRLF, sizeof (CRLF) - 1); - p ++; - if (*p == '\n') { - p ++; - } - c = p; - } - else if (*p == '\n') { - g_checksum_update (ck, c, p - c); - g_checksum_update (ck, CRLF, sizeof (CRLF) - 1); - p ++; - c = p; - } - else { - p ++; - } - } - if (p != c) { - g_checksum_update (ck, c, p - c); - } -} - -/* Update hash by signature value (ignoring b= tag) */ -static void -rspamd_dkim_signature_update (rspamd_dkim_context_t *ctx, const gchar *begin, guint len) -{ - const gchar *p, *c, *end; - gboolean tag, skip; - - end = begin + len; - p = begin; - c = begin; - tag = TRUE; - skip = FALSE; - - while (p < end) { - if (tag && p[0] == 'b' && p[1] == '=') { - /* Add to signature */ - msg_debug ("initial update hash with signature part: %*s", p - c + 2, c); - rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 2); - skip = TRUE; - } - else if (skip && (*p == ';' || p == end - 1)) { - skip = FALSE; - c = p; - } - else if (!tag && *p == ';') { - tag = TRUE; - } - else if (tag && *p == '=') { - tag = FALSE; - } - p ++; - } - - p --; - /* Skip \r\n at the end */ - while ((*p == '\r' || *p == '\n') && p >= c) { - p --; - } - - if (p - c + 1 > 0) { - msg_debug ("final update hash with signature part: %*s", p - c + 1, c); - rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 1); - } -} - -static gboolean -rspamd_dkim_canonize_header_relaxed (rspamd_dkim_context_t *ctx, const gchar *header, const gchar *header_name, gboolean is_sign) -{ - const gchar *h; - gchar *t, *buf; - guint inlen; - gboolean got_sp, allocated = FALSE; - - inlen = strlen (header) + strlen (header_name) + sizeof (":" CRLF); - if (inlen > BUFSIZ) { - buf = g_malloc (inlen); - allocated = TRUE; - } - else { - /* Faster */ - buf = g_alloca (inlen); - } - - /* Name part */ - t = buf; - h = header_name; - while (*h) { - *t ++ = g_ascii_tolower (*h++); - } - *t++ = ':'; - - /* Value part */ - h = header; - /* Skip spaces at the beginning */ - while (g_ascii_isspace (*h)) { - h ++; - } - got_sp = FALSE; - - while (*h) { - if (g_ascii_isspace (*h)) { - if (got_sp) { - h ++; - continue; - } - else { - got_sp = TRUE; - *t ++ = ' '; - h ++; - continue; - } - } - else { - got_sp = FALSE; - } - *t ++ = *h ++; - } - if (g_ascii_isspace (*(t - 1))) { - t --; - } - *t++ = '\r'; - *t++ = '\n'; - *t = '\0'; - - if (!is_sign) { - msg_debug ("update signature with header: %s", buf); - g_checksum_update (ctx->headers_hash, buf, t - buf); - } - else { - rspamd_dkim_signature_update (ctx, buf, t - buf); - } - - if (allocated) { - g_free (buf); - } - - return TRUE; -} - -struct rspamd_dkim_sign_chunk { - const gchar *begin; - gsize len; - gboolean append_crlf; -}; - -static gboolean -rspamd_dkim_canonize_header_simple (rspamd_dkim_context_t *ctx, const gchar *headers, - const gchar *header_name, guint count, gboolean is_sign) -{ - const gchar *p, *c; - gint state = 0, hlen; - gboolean found = FALSE; - GArray *to_sign; - struct rspamd_dkim_sign_chunk chunk, *elt; - gint i; - - /* This process is very similar to raw headers processing */ - to_sign = g_array_sized_new (FALSE, FALSE, sizeof (struct rspamd_dkim_sign_chunk), count); - p = headers; - c = p; - hlen = strlen (header_name); - - while (*p) { - switch (state) { - case 0: - /* Compare state */ - if (*p == ':') { - /* Compare header's name with desired one */ - if (p - c == hlen) { - if (g_ascii_strncasecmp (c, header_name, hlen) == 0) { - /* Get value */ - state = 2; - } - else { - /* Skip the whole header */ - state = 1; - } - } - else { - /* Skip the whole header */ - state = 1; - } - } - p ++; - break; - case 1: - /* Skip header state */ - if (*p == '\n' && !g_ascii_isspace (p[1])) { - /* Header is skipped */ - state = 0; - c = p + 1; - } - p ++; - break; - case 2: - /* c contains the beginning of header */ - if (*p == '\n' && (!g_ascii_isspace (p[1]) || p[1] == '\0')) { - chunk.begin = c; - if (*(p - 1) == '\r') { - chunk.len = p - c + 1; - chunk.append_crlf = FALSE; - } - else { - /* Need append CRLF as linefeed is not proper */ - chunk.len = p - c; - chunk.append_crlf = TRUE; - } - g_array_append_val (to_sign, chunk); - c = p + 1; - state = 0; - found = TRUE; - } - p ++; - break; - } - } - - if (found) { - if (!is_sign) { - - for (i = to_sign->len - 1; i >= 0 && count > 0; i --, count --) { - elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, i); - - if (!chunk.append_crlf) { - msg_debug ("update signature with header: %*s", elt->len, elt->begin); - rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len); - } - else { - msg_debug ("update signature with header: %*s", elt->len + 1, elt->begin); - rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len + 1); - } - } - } - else { - elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, 0); - if (elt->append_crlf) { - rspamd_dkim_signature_update (ctx, elt->begin, elt->len + 1); - } - else { - rspamd_dkim_signature_update (ctx, elt->begin, elt->len); - } - } - } - - g_array_free (to_sign, TRUE); - - return found; -} - -static gboolean -rspamd_dkim_canonize_header (rspamd_dkim_context_t *ctx, struct rspamd_task *task, const gchar *header_name, - guint count, gboolean is_sig) -{ - struct raw_header *rh, *rh_iter; - guint rh_num = 0; - GList *nh = NULL, *cur; - - if (ctx->header_canon_type == DKIM_CANON_SIMPLE) { - return rspamd_dkim_canonize_header_simple (ctx, task->raw_headers_str, header_name, count, is_sig); - } - else { - rh = g_hash_table_lookup (task->raw_headers, header_name); - if (rh) { - if (!is_sig) { - rh_iter = rh; - while (rh_iter) { - rh_num ++; - rh_iter = rh_iter->next; - } - - if (rh_num > count) { - /* Set skip count */ - rh_num -= count; - } - else { - rh_num = 0; - } - rh_iter = rh; - while (rh_num) { - rh_iter = rh_iter->next; - rh_num --; - } - /* Now insert required headers */ - while (rh_iter) { - nh = g_list_prepend (nh, rh_iter); - rh_iter = rh_iter->next; - } - cur = nh; - while (cur) { - rh = cur->data; - if (! rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig)) { - g_list_free (nh); - return FALSE; - } - cur = g_list_next (cur); - } - if (nh != NULL) { - g_list_free (nh); - } - } - else { - /* For signature check just use the first dkim header */ - rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig); - } - return TRUE; - } - } - - /* TODO: Implement relaxed algorithm */ - return FALSE; -} - -/** - * Check task for dkim context using dkim key - * @param ctx dkim verify context - * @param key dkim key (from cache or from dns request) - * @param task task to check - * @return - */ -gint -rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task) -{ - const gchar *p, *headers_end = NULL, *end, *body_end; - gboolean got_cr = FALSE, got_crlf = FALSE, got_lf = FALSE; - gchar *digest; - gsize dlen; - gint res = DKIM_CONTINUE; - guint i; - struct rspamd_dkim_header *dh; -#ifdef HAVE_OPENSSL - gint nid; -#endif - - g_return_val_if_fail (ctx != NULL, DKIM_ERROR); - g_return_val_if_fail (key != NULL, DKIM_ERROR); - g_return_val_if_fail (task->msg != NULL, DKIM_ERROR); - - /* First of all find place of body */ - p = task->msg->str; - - end = task->msg->str + task->msg->len; - - while (p <= end) { - /* Search for \r\n\r\n at the end of headers */ - if (*p == '\n') { - if (got_cr && *(p - 1) == '\r') { - if (got_crlf) { - /* \r\n\r\n */ - headers_end = p + 1; - break; - } - else if (got_lf) { - /* \n\r\n */ - headers_end = p + 1; - break; - } - else { - /* Set got crlf flag */ - got_crlf = TRUE; - got_cr = FALSE; - got_lf = FALSE; - } - } - else if (got_cr && *(p - 1) != '\r') { - /* We got CR somewhere but not right before */ - got_cr = FALSE; - if (*(p - 1) == '\n') { - /* \r\n\n case */ - headers_end = p + 1; - break; - } - got_lf = TRUE; - } - else if (got_lf && *(p - 1) == '\n') { - /* \n\n case */ - headers_end = p + 1; - break; - } - else { - got_lf = TRUE; - } - } - else if (*p == '\r') { - if (got_cr && *(p - 1) == '\r') { - /* \r\r case */ - headers_end = p + 1; - break; - } - else if (got_lf && *(p - 1) != '\n') { - /* Sequence is broken */ - got_lf = FALSE; - got_cr = TRUE; - } - else { - got_cr = TRUE; - } - } - else { - got_cr = FALSE; - got_crlf = FALSE; - } - p ++; - } - - /* Start canonization of body part */ - if (headers_end) { - if (ctx->len == 0 || (gint)ctx->len > end - headers_end) { - body_end = end; - } - else { - /* Strip message */ - body_end = headers_end + ctx->len; - } - } - else { - body_end = end; - } - if (!rspamd_dkim_canonize_body (ctx, headers_end, body_end)) { - return DKIM_RECORD_ERROR; - } - /* Now canonize headers */ - for (i = 0; i < ctx->hlist->len; i ++) { - dh = g_ptr_array_index (ctx->hlist, i); - rspamd_dkim_canonize_header (ctx, task, dh->name, dh->count, FALSE); - } - - /* Canonize dkim signature */ - rspamd_dkim_canonize_header (ctx, task, DKIM_SIGNHEADER, 1, TRUE); - - dlen = ctx->bhlen; - digest = g_alloca (dlen); - g_checksum_get_digest (ctx->body_hash, digest, &dlen); - - /* Check bh field */ - if (memcmp (ctx->bh, digest, dlen) != 0) { - msg_debug ("bh value missmatch"); - return DKIM_REJECT; - } - - g_checksum_get_digest (ctx->headers_hash, digest, &dlen); -#ifdef HAVE_OPENSSL - /* Check headers signature */ - - if (ctx->sig_alg == DKIM_SIGN_RSASHA1) { - nid = NID_sha1; - } - else if (ctx->sig_alg == DKIM_SIGN_RSASHA256) { - nid = NID_sha256; - } - else { - /* Not reached */ - nid = NID_sha1; - } - - if (RSA_verify (nid, digest, dlen, ctx->b, ctx->blen, key->key_rsa) != 1) { - msg_debug ("rsa verify failed"); - res = DKIM_REJECT; - } -#endif - return res; -} diff --git a/src/dkim.h b/src/dkim.h deleted file mode 100644 index 29ec479b7..000000000 --- a/src/dkim.h +++ /dev/null @@ -1,207 +0,0 @@ -/* Copyright (c) 2010-2011, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef DKIM_H_ -#define DKIM_H_ - -#include "config.h" -#include "event.h" -#include "dns.h" -#ifdef HAVE_OPENSSL -#include -#include -#endif - -/* Main types and definitions */ - -#define DKIM_SIGNHEADER "DKIM-Signature" - /* DKIM signature header */ - -/* special DNS tokens */ -#define DKIM_DNSKEYNAME "_domainkey" - /* reserved DNS sub-zone */ -#define DKIM_DNSPOLICYNAME "_adsp" /* reserved DNS sub-zone */ - -/* Canonization methods */ -#define DKIM_CANON_UNKNOWN (-1) /* unknown method */ -#define DKIM_CANON_SIMPLE 0 /* as specified in DKIM spec */ -#define DKIM_CANON_RELAXED 1 /* as specified in DKIM spec */ - -#define DKIM_CANON_DEFAULT DKIM_CANON_SIMPLE - -/* Signature methods */ -#define DKIM_SIGN_UNKNOWN (-2) /* unknown method */ -#define DKIM_SIGN_DEFAULT (-1) /* use internal default */ -#define DKIM_SIGN_RSASHA1 0 /* an RSA-signed SHA1 digest */ -#define DKIM_SIGN_RSASHA256 1 /* an RSA-signed SHA256 digest */ - -/* Params */ -#define DKIM_PARAM_UNKNOWN (-1) /* unknown */ -#define DKIM_PARAM_SIGNATURE 0 /* b */ -#define DKIM_PARAM_SIGNALG 1 /* a */ -#define DKIM_PARAM_DOMAIN 2 /* d */ -#define DKIM_PARAM_CANONALG 3 /* c */ -#define DKIM_PARAM_QUERYMETHOD 4 /* q */ -#define DKIM_PARAM_SELECTOR 5 /* s */ -#define DKIM_PARAM_HDRLIST 6 /* h */ -#define DKIM_PARAM_VERSION 7 /* v */ -#define DKIM_PARAM_IDENTITY 8 /* i */ -#define DKIM_PARAM_TIMESTAMP 9 /* t */ -#define DKIM_PARAM_EXPIRATION 10 /* x */ -#define DKIM_PARAM_COPIEDHDRS 11 /* z */ -#define DKIM_PARAM_BODYHASH 12 /* bh */ -#define DKIM_PARAM_BODYLENGTH 13 /* l */ - -/* Errors (from OpenDKIM) */ - -#define DKIM_SIGERROR_UNKNOWN (-1) /* unknown error */ -#define DKIM_SIGERROR_OK 0 /* no error */ -#define DKIM_SIGERROR_VERSION 1 /* unsupported version */ -#define DKIM_SIGERROR_DOMAIN 2 /* invalid domain (d=/i=) */ -#define DKIM_SIGERROR_EXPIRED 3 /* signature expired */ -#define DKIM_SIGERROR_FUTURE 4 /* signature in the future */ -#define DKIM_SIGERROR_TIMESTAMPS 5 /* x= < t= */ -#define DKIM_SIGERROR_UNUSED 6 /* OBSOLETE */ -#define DKIM_SIGERROR_INVALID_HC 7 /* c= invalid (header) */ -#define DKIM_SIGERROR_INVALID_BC 8 /* c= invalid (body) */ -#define DKIM_SIGERROR_MISSING_A 9 /* a= missing */ -#define DKIM_SIGERROR_INVALID_A 10 /* a= invalid */ -#define DKIM_SIGERROR_MISSING_H 11 /* h= missing */ -#define DKIM_SIGERROR_INVALID_L 12 /* l= invalid */ -#define DKIM_SIGERROR_INVALID_Q 13 /* q= invalid */ -#define DKIM_SIGERROR_INVALID_QO 14 /* q= option invalid */ -#define DKIM_SIGERROR_MISSING_D 15 /* d= missing */ -#define DKIM_SIGERROR_EMPTY_D 16 /* d= empty */ -#define DKIM_SIGERROR_MISSING_S 17 /* s= missing */ -#define DKIM_SIGERROR_EMPTY_S 18 /* s= empty */ -#define DKIM_SIGERROR_MISSING_B 19 /* b= missing */ -#define DKIM_SIGERROR_EMPTY_B 20 /* b= empty */ -#define DKIM_SIGERROR_CORRUPT_B 21 /* b= corrupt */ -#define DKIM_SIGERROR_NOKEY 22 /* no key found in DNS */ -#define DKIM_SIGERROR_DNSSYNTAX 23 /* DNS reply corrupt */ -#define DKIM_SIGERROR_KEYFAIL 24 /* DNS query failed */ -#define DKIM_SIGERROR_MISSING_BH 25 /* bh= missing */ -#define DKIM_SIGERROR_EMPTY_BH 26 /* bh= empty */ -#define DKIM_SIGERROR_CORRUPT_BH 27 /* bh= corrupt */ -#define DKIM_SIGERROR_BADSIG 28 /* signature mismatch */ -#define DKIM_SIGERROR_SUBDOMAIN 29 /* unauthorized subdomain */ -#define DKIM_SIGERROR_MULTIREPLY 30 /* multiple records returned */ -#define DKIM_SIGERROR_EMPTY_H 31 /* h= empty */ -#define DKIM_SIGERROR_INVALID_H 32 /* h= missing req'd entries */ -#define DKIM_SIGERROR_TOOLARGE_L 33 /* l= value exceeds body size */ -#define DKIM_SIGERROR_MBSFAILED 34 /* "must be signed" failure */ -#define DKIM_SIGERROR_KEYVERSION 35 /* unknown key version */ -#define DKIM_SIGERROR_KEYUNKNOWNHASH 36 /* unknown key hash */ -#define DKIM_SIGERROR_KEYHASHMISMATCH 37 /* sig-key hash mismatch */ -#define DKIM_SIGERROR_NOTEMAILKEY 38 /* not an e-mail key */ -#define DKIM_SIGERROR_UNUSED2 39 /* OBSOLETE */ -#define DKIM_SIGERROR_KEYTYPEMISSING 40 /* key type missing */ -#define DKIM_SIGERROR_KEYTYPEUNKNOWN 41 /* key type unknown */ -#define DKIM_SIGERROR_KEYREVOKED 42 /* key revoked */ -#define DKIM_SIGERROR_KEYDECODE 43 /* key couldn't be decoded */ -#define DKIM_SIGERROR_MISSING_V 44 /* v= tag missing */ -#define DKIM_SIGERROR_EMPTY_V 45 /* v= tag empty */ - -/* Check results */ -#define DKIM_CONTINUE 0 /* continue */ -#define DKIM_REJECT 1 /* reject */ -#define DKIM_TRYAGAIN 2 /* try again later */ -#define DKIM_NOTFOUND 3 /* requested record not found */ -#define DKIM_RECORD_ERROR 4 /* error requesting record */ - -typedef struct rspamd_dkim_context_s { - rspamd_mempool_t *pool; - gint sig_alg; - gint header_canon_type; - gint body_canon_type; - gsize len; - gchar *domain; - gchar *selector; - time_t timestamp; - time_t expiration; - gint8 *b; - gint8 *bh; - guint bhlen; - guint blen; - GPtrArray *hlist; - guint ver; - gchar *dns_key; - GChecksum *headers_hash; - GChecksum *body_hash; -} rspamd_dkim_context_t; - -typedef struct rspamd_dkim_key_s { - guint8 *keydata; - guint keylen; - gsize decoded_len; - guint ttl; -#ifdef HAVE_OPENSSL - RSA *key_rsa; - BIO *key_bio; - EVP_PKEY *key_evp; -#endif -} -rspamd_dkim_key_t; - -struct rspamd_task; - -/* Err MUST be freed if it is not NULL, key is allocated by slice allocator */ -typedef void (*dkim_key_handler_f)(rspamd_dkim_key_t *key, gsize keylen, rspamd_dkim_context_t *ctx, gpointer ud, GError *err); - -/** - * Create new dkim context from signature - * @param sig message's signature - * @param pool pool to allocate memory from - * @param time_jitter jitter in seconds to allow time diff while checking - * @param err pointer to error object - * @return new context or NULL - */ -rspamd_dkim_context_t* rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err); - -/** - * Make DNS request for specified context and obtain and parse key - * @param ctx dkim context from signature - * @param resolver dns resolver object - * @param s async session to make request - * @return - */ -gboolean rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver, - struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud); - -/** - * Check task for dkim context using dkim key - * @param ctx dkim verify context - * @param key dkim key (from cache or from dns request) - * @param task task to check - * @return - */ -gint rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task); - -/** - * Free DKIM key - * @param key - */ -void rspamd_dkim_key_free (rspamd_dkim_key_t *key); - -#endif /* DKIM_H_ */ diff --git a/src/dns.c b/src/dns.c deleted file mode 100644 index e20cca9df..000000000 --- a/src/dns.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2009-2013, Vsevolod Stakhov - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "dns.h" -#include "main.h" -#include "utlist.h" -#include "uthash.h" -#include "rdns_event.h" - -struct rspamd_dns_resolver { - struct rdns_resolver *r; - struct event_base *ev_base; - gdouble request_timeout; - guint max_retransmits; -}; - -struct rspamd_dns_request_ud { - struct rspamd_async_session *session; - dns_callback_type cb; - gpointer ud; - struct rdns_request *req; -}; - -static void -rspamd_dns_fin_cb (gpointer arg) -{ - struct rdns_request *req = arg; - - rdns_request_release (req); -} - -static void -rspamd_dns_callback (struct rdns_reply *reply, gpointer ud) -{ - struct rspamd_dns_request_ud *reqdata = ud; - - reqdata->cb (reply, reqdata->ud); - - remove_normal_event (reqdata->session, rspamd_dns_fin_cb, reqdata->req); -} - -gboolean -make_dns_request (struct rspamd_dns_resolver *resolver, - struct rspamd_async_session *session, rspamd_mempool_t *pool, dns_callback_type cb, - gpointer ud, enum rdns_request_type type, const char *name) -{ - struct rdns_request *req; - struct rspamd_dns_request_ud *reqdata; - - reqdata = rspamd_mempool_alloc (pool, sizeof (struct rspamd_dns_request_ud)); - reqdata->session = session; - reqdata->cb = cb; - reqdata->ud = ud; - - req = rdns_make_request_full (resolver->r, rspamd_dns_callback, reqdata, - resolver->request_timeout, resolver->max_retransmits, 1, name, type); - - if (req != NULL) { - register_async_event (session, (event_finalizer_t)rspamd_dns_fin_cb, req, - g_quark_from_static_string ("dns resolver")); - /* Ref event to free it only when according async event is deleted from the session */ - rdns_request_retain (req); - reqdata->req = req; - } - else { - return FALSE; - } - - return TRUE; -} - - -struct rspamd_dns_resolver * -dns_resolver_init (rspamd_logger_t *logger, struct event_base *ev_base, struct config_file *cfg) -{ - GList *cur; - struct rspamd_dns_resolver *new; - gchar *begin, *p, *err; - gint priority; - - new = g_slice_alloc0 (sizeof (struct rspamd_dns_resolver)); - new->ev_base = ev_base; - new->request_timeout = cfg->dns_timeout; - new->max_retransmits = cfg->dns_retransmits; - - new->r = rdns_resolver_new (); - rdns_bind_libevent (new->r, new->ev_base); - rdns_resolver_set_log_level (new->r, cfg->log_level); - rdns_resolver_set_logger (new->r, (rdns_log_function)rspamd_common_logv, logger); - - if (cfg->nameservers == NULL) { - /* Parse resolv.conf */ - if (!rdns_resolver_parse_resolv_conf (new->r, "/etc/resolv.conf")) { - msg_err ("cannot parse resolv.conf and no nameservers defined, so no ways to resolve addresses"); - return new; - } - } - else { - cur = cfg->nameservers; - while (cur) { - begin = cur->data; - p = strchr (begin, ':'); - if (p != NULL) { - *p = '\0'; - p ++; - priority = strtoul (p, &err, 10); - if (err != NULL && *err != '\0') { - msg_info ("bad character '%x', must be 'm' or 's' or a numeric priority", *err); - } - } - else { - priority = 0; - } - if (!rdns_resolver_add_server (new->r, begin, 53, priority, cfg->dns_io_per_server)) { - msg_warn ("cannot parse ip address of nameserver: %s", begin); - cur = g_list_next (cur); - continue; - } - - cur = g_list_next (cur); - } - - } - - rdns_resolver_init (new->r); - - return new; -} diff --git a/src/dns.h b/src/dns.h deleted file mode 100644 index 26ae71387..000000000 --- a/src/dns.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2013, Vsevolod Stakhov - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RSPAMD_DNS_H -#define RSPAMD_DNS_H - -#include "config.h" -#include "mem_pool.h" -#include "events.h" -#include "logger.h" -#include "rdns.h" - -struct rspamd_dns_resolver; - -/* Rspamd DNS API */ - -/** - * Init DNS resolver, params are obtained from a config file or system file /etc/resolv.conf - */ -struct rspamd_dns_resolver *dns_resolver_init (rspamd_logger_t *logger, - struct event_base *ev_base, struct config_file *cfg); - -/** - * Make a DNS request - * @param resolver resolver object - * @param session async session to register event - * @param pool memory pool for storage - * @param cb callback to call on resolve completing - * @param ud user data for callback - * @param type request type - * @param ... string or ip address based on a request type - * @return TRUE if request was sent. - */ -gboolean make_dns_request (struct rspamd_dns_resolver *resolver, - struct rspamd_async_session *session, rspamd_mempool_t *pool, - dns_callback_type cb, gpointer ud, enum rdns_request_type type, const char *name); - -#endif diff --git a/src/dns_private.h b/src/dns_private.h deleted file mode 100644 index 1c1df8ded..000000000 --- a/src/dns_private.h +++ /dev/null @@ -1,209 +0,0 @@ -/* Copyright (c) 2014, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef DNS_PRIVATE_H_ -#define DNS_PRIVATE_H_ - -#include "config.h" - -#define MAX_SERVERS 16 -/* Upstream timeouts */ -#define DEFAULT_UPSTREAM_ERROR_TIME 10 -#define DEFAULT_UPSTREAM_DEAD_TIME 300 -#define DEFAULT_UPSTREAM_MAXERRORS 10 - -static const unsigned base = 36; -static const unsigned t_min = 1; -static const unsigned t_max = 26; -static const unsigned skew = 38; -static const unsigned damp = 700; -static const unsigned initial_n = 128; -static const unsigned initial_bias = 72; - -static const gint dns_port = 53; - -#define UDP_PACKET_SIZE 4096 - -#define DNS_COMPRESSION_BITS 0xC0 - -#define DNS_D_MAXLABEL 63 /* + 1 '\0' */ -#define DNS_D_MAXNAME 255 /* + 1 '\0' */ - -#define RESOLV_CONF "/etc/resolv.conf" - -/** - * Represents DNS server - */ -struct rspamd_dns_server { - struct upstream up; /**< upstream structure */ - gchar *name; /**< name of DNS server */ - struct rspamd_dns_io_channel *io_channels; - struct rspamd_dns_io_channel *cur_io_channel; -}; - -/** - * IO channel for a specific DNS server - */ -struct rspamd_dns_io_channel { - struct rspamd_dns_server *srv; - struct rspamd_dns_resolver *resolver; - gint sock; /**< persistent socket */ - struct event ev; - GHashTable *requests; /**< requests in flight */ - struct rspamd_dns_io_channel *prev, *next; -}; - - -struct rspamd_dns_resolver { - struct rspamd_dns_server servers[MAX_SERVERS]; - gint servers_num; /**< number of DNS servers registered */ - guint request_timeout; - guint max_retransmits; - guint max_errors; - GHashTable *io_channels; /**< hash of io chains indexed by socket */ - gboolean throttling; /**< dns servers are busy */ - gboolean is_master_slave; /**< if this is true, then select upstreams as master/slave */ - guint errors; /**< resolver errors */ - struct timeval throttling_time; /**< throttling time */ - struct event throttling_event; /**< throttling event */ - struct event_base *ev_base; /**< base for event ops */ -}; - -struct dns_header; -struct dns_query; - -/* Internal DNS structs */ - -struct dns_header { - guint qid :16; - -#if BYTE_ORDER == BIG_ENDIAN - guint qr:1; - guint opcode:4; - guint aa:1; - guint tc:1; - guint rd:1; - - guint ra:1; - guint unused:3; - guint rcode:4; -#else - guint rd :1; - guint tc :1; - guint aa :1; - guint opcode :4; - guint qr :1; - - guint rcode :4; - guint unused :3; - guint ra :1; -#endif - - guint qdcount :16; - guint ancount :16; - guint nscount :16; - guint arcount :16; -}; - -enum dns_section { - DNS_S_QD = 0x01, -#define DNS_S_QUESTION DNS_S_QD - - DNS_S_AN = 0x02, -#define DNS_S_ANSWER DNS_S_AN - - DNS_S_NS = 0x04, -#define DNS_S_AUTHORITY DNS_S_NS - - DNS_S_AR = 0x08, -#define DNS_S_ADDITIONAL DNS_S_AR - - DNS_S_ALL = 0x0f -}; -/* enum dns_section */ - -enum dns_opcode { - DNS_OP_QUERY = 0, - DNS_OP_IQUERY = 1, - DNS_OP_STATUS = 2, - DNS_OP_NOTIFY = 4, - DNS_OP_UPDATE = 5, -}; -/* dns_opcode */ - -enum dns_class { - DNS_C_IN = 1, - - DNS_C_ANY = 255 -}; -/* enum dns_class */ - -struct dns_query { - gchar *qname; - guint qtype :16; - guint qclass :16; -}; - -enum dns_type { - DNS_T_A = 1, - DNS_T_NS = 2, - DNS_T_CNAME = 5, - DNS_T_SOA = 6, - DNS_T_PTR = 12, - DNS_T_MX = 15, - DNS_T_TXT = 16, - DNS_T_AAAA = 28, - DNS_T_SRV = 33, - DNS_T_OPT = 41, - DNS_T_SSHFP = 44, - DNS_T_SPF = 99, - - DNS_T_ALL = 255 -}; -/* enum dns_type */ - -static const gchar dns_rcodes[16][16] = { - [DNS_RC_NOERROR] = "NOERROR", - [DNS_RC_FORMERR] = "FORMERR", - [DNS_RC_SERVFAIL] = "SERVFAIL", - [DNS_RC_NXDOMAIN] = "NXDOMAIN", - [DNS_RC_NOTIMP] = "NOTIMP", - [DNS_RC_REFUSED] = "REFUSED", - [DNS_RC_YXDOMAIN] = "YXDOMAIN", - [DNS_RC_YXRRSET] = "YXRRSET", - [DNS_RC_NXRRSET] = "NXRRSET", - [DNS_RC_NOTAUTH] = "NOTAUTH", - [DNS_RC_NOTZONE] = "NOTZONE", -}; - -static const gchar dns_types[7][16] = { - [DNS_REQUEST_A] = "A request", - [DNS_REQUEST_PTR] = "PTR request", - [DNS_REQUEST_MX] = "MX request", - [DNS_REQUEST_TXT] = "TXT request", - [DNS_REQUEST_SRV] = "SRV request", - [DNS_REQUEST_SPF] = "SPF request", - [DNS_REQUEST_AAA] = "AAA request" -}; - -#endif /* DNS_PRIVATE_H_ */ diff --git a/src/dynamic_cfg.c b/src/dynamic_cfg.c deleted file mode 100644 index 7f5e8530d..000000000 --- a/src/dynamic_cfg.c +++ /dev/null @@ -1,599 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "map.h" -#include "filter.h" -#include "dynamic_cfg.h" -#include "json/jansson.h" - -struct dynamic_cfg_symbol { - gchar *name; - gdouble value; -}; - -struct dynamic_cfg_action { - enum rspamd_metric_action action; - gdouble value; -}; - -struct dynamic_cfg_metric { - GList *symbols; - struct dynamic_cfg_action actions[METRIC_ACTION_MAX]; - gchar *name; -}; - -struct config_json_buf { - gchar *buf; - gchar *pos; - size_t buflen; - struct config_file *cfg; - GList *config_metrics; -}; - -/** - * Free dynamic configuration - * @param conf_metrics - */ -static void -dynamic_cfg_free (GList *conf_metrics) -{ - GList *cur, *cur_elt; - struct dynamic_cfg_metric *metric; - struct dynamic_cfg_symbol *sym; - - if (conf_metrics) { - cur = conf_metrics; - while (cur) { - metric = cur->data; - if (metric->symbols) { - cur_elt = metric->symbols; - while (cur_elt) { - sym = cur_elt->data; - g_free (sym->name); - g_slice_free1 (sizeof (struct dynamic_cfg_symbol), sym); - cur_elt = g_list_next (cur_elt); - } - g_list_free (metric->symbols); - } - g_slice_free1 (sizeof (struct dynamic_cfg_metric), metric); - cur = g_list_next (cur); - } - g_list_free (conf_metrics); - } -} -/** - * Apply configuration to the specified configuration - * @param conf_metrics - * @param cfg - */ -static void -apply_dynamic_conf (GList *conf_metrics, struct config_file *cfg) -{ - GList *cur, *cur_elt; - struct dynamic_cfg_metric *metric; - struct dynamic_cfg_symbol *sym; - struct dynamic_cfg_action *act; - struct metric *real_metric; - struct metric_action *real_act; - gdouble *w; - gint i, j; - - cur = conf_metrics; - while (cur) { - metric = cur->data; - if ((real_metric = g_hash_table_lookup (cfg->metrics, metric->name)) != NULL) { - cur_elt = metric->symbols; - while (cur_elt) { - sym = cur_elt->data; - if ((w = g_hash_table_lookup (real_metric->symbols, sym->name)) != NULL) { - *w = sym->value; - } - else { - msg_info ("symbol %s is not found in the main configuration", sym->name); - } - cur_elt = g_list_next (cur_elt); - } - - for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { - act = &metric->actions[i]; - if (act->value < 0) { - continue; - } - for (j = METRIC_ACTION_REJECT; j < METRIC_ACTION_MAX; j ++) { - real_act = &real_metric->actions[j]; - if (real_act->action == act->action) { - real_act->score = act->value; - } - /* Update required score accordingly to metric's action */ - if (act->action == METRIC_ACTION_REJECT) { - real_metric->actions[METRIC_ACTION_REJECT].score = act->value; - } - } - } - } - cur = g_list_next (cur); - } -} - -/* Callbacks for reading json dynamic rules */ -gchar * -json_config_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) -{ - struct config_json_buf *jb; - gint free, off; - - if (data->cur_data == NULL) { - jb = g_malloc (sizeof (struct config_json_buf)); - jb->cfg = ((struct config_json_buf *)data->prev_data)->cfg; - jb->buf = NULL; - jb->pos = NULL; - jb->config_metrics = NULL; - data->cur_data = jb; - } - else { - jb = data->cur_data; - } - - if (jb->buf == NULL) { - /* Allocate memory for buffer */ - jb->buflen = len * 2; - jb->buf = g_malloc (jb->buflen); - jb->pos = jb->buf; - } - - off = jb->pos - jb->buf; - free = jb->buflen - off; - - if (free < len) { - jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2); - jb->buf = g_realloc (jb->buf, jb->buflen); - jb->pos = jb->buf + off; - } - - memcpy (jb->pos, chunk, len); - jb->pos += len; - - /* Say not to copy any part of this buffer */ - return NULL; -} - -void -json_config_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) -{ - struct config_json_buf *jb; - guint nelts, i, j, selts; - gint test_act; - json_t *js, *cur_elt, *cur_nm, *it_val; - json_error_t je; - struct dynamic_cfg_metric *cur_metric; - struct dynamic_cfg_symbol *cur_symbol; - struct dynamic_cfg_action *cur_action; - - if (data->prev_data) { - jb = data->prev_data; - /* Clean prev data */ - if (jb->buf) { - g_free (jb->buf); - } - g_free (jb); - } - - /* Now parse json */ - if (data->cur_data) { - jb = data->cur_data; - } - else { - msg_err ("no data read"); - return; - } - if (jb->buf == NULL) { - msg_err ("no data read"); - return; - } - /* NULL terminate current buf */ - *jb->pos = '\0'; - - js = json_loads (jb->buf, &je); - if (!js) { - msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line); - return; - } - - if (!json_is_array (js)) { - json_decref (js); - msg_err ("loaded json is not an array"); - return; - } - - jb->cfg->current_dynamic_conf = NULL; - dynamic_cfg_free (jb->config_metrics); - jb->config_metrics = NULL; - - /* Parse configuration */ - nelts = json_array_size (js); - for (i = 0; i < nelts; i++) { - cur_elt = json_array_get (js, i); - if (!cur_elt || !json_is_object (cur_elt)) { - msg_err ("loaded json array element is not an object"); - continue; - } - - cur_nm = json_object_get (cur_elt, "metric"); - if (!cur_nm || !json_is_string (cur_nm)) { - msg_err ("loaded json metric object element has no 'metric' attribute"); - continue; - } - cur_metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); - for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { - cur_metric->actions[i].value = -1.0; - } - cur_metric->name = g_strdup (json_string_value (cur_nm)); - cur_nm = json_object_get (cur_elt, "symbols"); - /* Parse symbols */ - if (cur_nm && json_is_array (cur_nm)) { - selts = json_array_size (cur_nm); - for (j = 0; j < selts; j ++) { - it_val = json_array_get (cur_nm, j); - if (it_val && json_is_object (it_val)) { - if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) { - cur_symbol = g_slice_alloc0 (sizeof (struct dynamic_cfg_symbol)); - cur_symbol->name = g_strdup (json_string_value (json_object_get (it_val, "name"))); - cur_symbol->value = json_number_value (json_object_get (it_val, "value")); - /* Insert symbol */ - cur_metric->symbols = g_list_prepend (cur_metric->symbols, cur_symbol); - } - else { - msg_info ("json symbol object has no mandatory 'name' and 'value' attributes"); - } - } - } - } - cur_nm = json_object_get (cur_elt, "actions"); - /* Parse actions */ - if (cur_nm && json_is_array (cur_nm)) { - selts = json_array_size (cur_nm); - for (j = 0; j < selts; j ++) { - it_val = json_array_get (cur_nm, j); - if (it_val && json_is_object (it_val)) { - if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) { - if (!check_action_str (json_string_value (json_object_get (it_val, "name")), &test_act)) { - msg_err ("unknown action: %s", json_string_value (json_object_get (it_val, "name"))); - g_slice_free1 (sizeof (struct dynamic_cfg_action), cur_action); - continue; - } - cur_action = &cur_metric->actions[test_act]; - cur_action->action = test_act; - cur_action->value = json_number_value (json_object_get (it_val, "value")); - } - else { - msg_info ("json symbol object has no mandatory 'name' and 'value' attributes"); - } - } - } - } - jb->config_metrics = g_list_prepend (jb->config_metrics, cur_metric); - } - /* - * Note about thread safety: we are updating values that are gdoubles so it is not atomic in general case - * but on the other hand all that data is used only in the main thread, so why it is *likely* safe - * to do this task in this way without explicit lock. - */ - apply_dynamic_conf (jb->config_metrics, jb->cfg); - - jb->cfg->current_dynamic_conf = jb->config_metrics; - - json_decref (js); -} - -/** - * Init dynamic configuration using map logic and specific configuration - * @param cfg config file - */ -void -init_dynamic_config (struct config_file *cfg) -{ - struct config_json_buf *jb, **pjb; - - if (cfg->dynamic_conf == NULL) { - /* No dynamic conf has been specified, so do not try to load it */ - return; - } - - /* Now try to add map with json data */ - jb = g_malloc0 (sizeof (struct config_json_buf)); - pjb = g_malloc (sizeof (struct config_json_buf *)); - jb->buf = NULL; - jb->cfg = cfg; - *pjb = jb; - if (!add_map (cfg, cfg->dynamic_conf, "Dynamic configuration map", json_config_read_cb, json_config_fin_cb, (void **)pjb)) { - msg_err ("cannot add map for configuration %s", cfg->dynamic_conf); - } -} - -static gboolean -dump_dynamic_list (gint fd, GList *rules) -{ - GList *cur, *cur_elt; - struct dynamic_cfg_metric *metric; - struct dynamic_cfg_symbol *sym; - struct dynamic_cfg_action *act; - FILE *f; - gint i; - gboolean start = TRUE; - - /* Open buffered stream for the descriptor */ - if ((f = fdopen (fd, "a+")) == NULL) { - msg_err ("fdopen failed: %s", strerror (errno)); - return FALSE; - } - - - if (rules) { - fprintf (f, "[\n"); - cur = rules; - while (cur) { - metric = cur->data; - fprintf (f, "{\n \"metric\": \"%s\",\n", metric->name); - if (metric->symbols) { - fprintf (f, " \"symbols\": [\n"); - cur_elt = metric->symbols; - while (cur_elt) { - sym = cur_elt->data; - cur_elt = g_list_next (cur_elt); - if (cur_elt) { - fprintf (f, " {\"name\": \"%s\",\"value\": %.2f},\n", sym->name, sym->value); - } - else { - fprintf (f, " {\"name\": \"%s\",\"value\": %.2f}\n", sym->name, sym->value); - } - } - if (metric->actions) { - fprintf (f, " ],\n"); - } - else { - fprintf (f, " ]\n"); - } - } - - if (metric->actions) { - fprintf (f, " \"actions\": [\n"); - for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { - act = &metric->actions[i]; - if (act->value < 0) { - continue; - } - fprintf (f, " %s{\"name\": \"%s\",\"value\": %.2f}\n", - (start ? "" : ","), str_action_metric (act->action), act->value); - if (start) { - start = FALSE; - } - } - fprintf (f, " ]\n"); - } - cur = g_list_next (cur); - if (cur) { - fprintf (f, "},\n"); - } - else { - fprintf (f, "}\n]\n"); - } - } - } - fclose (f); - - return TRUE; -} - -/** - * Dump dynamic configuration to the disk - * @param cfg - * @return - */ -gboolean -dump_dynamic_config (struct config_file *cfg) -{ - struct stat st; - gchar *dir, pathbuf[PATH_MAX]; - gint fd; - - if (cfg->dynamic_conf == NULL || cfg->current_dynamic_conf == NULL) { - /* No dynamic conf has been specified, so do not try to dump it */ - return FALSE; - } - - dir = g_path_get_dirname (cfg->dynamic_conf); - if (dir == NULL) { - /* Inaccessible path */ - if (dir != NULL) { - g_free (dir); - } - msg_err ("invalid file: %s", cfg->dynamic_conf); - return FALSE; - } - - if (stat (cfg->dynamic_conf, &st) == -1) { - msg_debug ("%s is unavailable: %s", cfg->dynamic_conf, strerror (errno)); - st.st_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; - } - if (access (dir, W_OK | R_OK) == -1) { - msg_warn ("%s is inaccessible: %s", dir, strerror (errno)); - g_free (dir); - return FALSE; - } - rspamd_snprintf (pathbuf, sizeof (pathbuf), "%s%crconf-XXXXXX", dir, G_DIR_SEPARATOR); - g_free (dir); -#ifdef HAVE_MKSTEMP - /* Umask is set before */ - fd = mkstemp (pathbuf); -#else - fd = g_mkstemp_full (pathbuf, O_RDWR, S_IWUSR | S_IRUSR); -#endif - if (fd == -1) { - msg_err ("mkstemp error: %s", strerror (errno)); - - return FALSE; - } - - if (!dump_dynamic_list (fd, cfg->current_dynamic_conf)) { - close (fd); - unlink (pathbuf); - return FALSE; - } - - (void)unlink (cfg->dynamic_conf); - - /* Rename old config */ - if (rename (pathbuf, cfg->dynamic_conf) == -1) { - msg_err ("rename error: %s", strerror (errno)); - close (fd); - unlink (pathbuf); - return FALSE; - } - /* Set permissions */ - - if (chmod (cfg->dynamic_conf, st.st_mode) == -1) { - msg_warn ("chmod failed: %s", strerror (errno)); - } - - close (fd); - return TRUE; -} - -/** - * Add symbol for specified metric - * @param cfg config file object - * @param metric metric's name - * @param symbol symbol's name - * @param value value of symbol - * @return - */ -gboolean -add_dynamic_symbol (struct config_file *cfg, const gchar *metric_name, const gchar *symbol, gdouble value) -{ - GList *cur; - struct dynamic_cfg_metric *metric = NULL; - struct dynamic_cfg_symbol *sym = NULL; - - if (cfg->dynamic_conf == NULL) { - msg_info ("dynamic conf is disabled"); - return FALSE; - } - - cur = cfg->current_dynamic_conf; - while (cur) { - metric = cur->data; - if (g_ascii_strcasecmp (metric->name, metric_name) == 0) { - break; - } - metric = NULL; - cur = g_list_next (cur); - } - - if (metric != NULL) { - /* Search for a symbol */ - cur = metric->symbols; - while (cur) { - sym = cur->data; - if (g_ascii_strcasecmp (sym->name, symbol) == 0) { - sym->value = value; - msg_debug ("change value of action %s to %.2f", symbol, value); - break; - } - sym = NULL; - cur = g_list_next (cur); - } - if (sym == NULL) { - /* Symbol not found, insert it */ - sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol)); - sym->name = g_strdup (symbol); - sym->value = value; - metric->symbols = g_list_prepend (metric->symbols, sym); - msg_debug ("create symbol %s in metric %s", symbol, metric_name); - } - } - else { - /* Metric not found, create it */ - metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); - sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol)); - sym->name = g_strdup (symbol); - sym->value = value; - metric->symbols = g_list_prepend (metric->symbols, sym); - metric->name = g_strdup (metric_name); - cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric); - msg_debug ("create metric %s for symbol %s", metric_name, symbol); - } - - apply_dynamic_conf (cfg->current_dynamic_conf, cfg); - - return TRUE; -} - - -/** - * Add action for specified metric - * @param cfg config file object - * @param metric metric's name - * @param action action's name - * @param value value of symbol - * @return - */ -gboolean -add_dynamic_action (struct config_file *cfg, const gchar *metric_name, guint action, gdouble value) -{ - GList *cur; - struct dynamic_cfg_metric *metric = NULL; - - if (cfg->dynamic_conf == NULL) { - msg_info ("dynamic conf is disabled"); - return FALSE; - } - - cur = cfg->current_dynamic_conf; - while (cur) { - metric = cur->data; - if (g_ascii_strcasecmp (metric->name, metric_name) == 0) { - break; - } - metric = NULL; - cur = g_list_next (cur); - } - - if (metric != NULL) { - /* Search for an action */ - metric->actions[action].value = value; - } - else { - /* Metric not found, create it */ - metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); - metric->actions[action].value = value; - metric->name = g_strdup (metric_name); - cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric); - msg_debug ("create metric %s for action %d", metric_name, action); - } - - apply_dynamic_conf (cfg->current_dynamic_conf, cfg); - - return TRUE; -} diff --git a/src/dynamic_cfg.h b/src/dynamic_cfg.h deleted file mode 100644 index b65d7aa9a..000000000 --- a/src/dynamic_cfg.h +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef DYNAMIC_CFG_H_ -#define DYNAMIC_CFG_H_ - -#include "config.h" -#include "cfg_file.h" - -/** - * Init dynamic configuration using map logic and specific configuration - * @param cfg config file - */ -void init_dynamic_config (struct config_file *cfg); - -/** - * Dump dynamic configuration to the disk - * @param cfg - * @return - */ -gboolean dump_dynamic_config (struct config_file *cfg); - -/** - * Add symbol for specified metric - * @param cfg config file object - * @param metric metric's name - * @param symbol symbol's name - * @param value value of symbol - * @return - */ -gboolean add_dynamic_symbol (struct config_file *cfg, const gchar *metric, const gchar *symbol, gdouble value); - - -/** - * Add action for specified metric - * @param cfg config file object - * @param metric metric's name - * @param action action's name - * @param value value of symbol - * @return - */ -gboolean add_dynamic_action (struct config_file *cfg, const gchar *metric, guint action, gdouble value); - - -#endif /* DYNAMIC_CFG_H_ */ diff --git a/src/events.c b/src/events.c deleted file mode 100644 index 85843fd05..000000000 --- a/src/events.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "events.h" - -static gboolean -rspamd_event_equal (gconstpointer a, gconstpointer b) -{ - const struct rspamd_async_event *ev1 = a, *ev2 = b; - - if (ev1->fin == ev2->fin) { - return ev1->user_data == ev2->user_data; - } - - return FALSE; -} - -static guint -rspamd_event_hash (gconstpointer a) -{ - const struct rspamd_async_event *ev = a; - - return GPOINTER_TO_UINT (ev->user_data); -} - -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) -static void -event_mutex_free (gpointer data) -{ - GMutex *mtx = data; - - g_mutex_free (mtx); -} - -static void -event_cond_free (gpointer data) -{ - GCond *cond = data; - - g_cond_free (cond); -} -#endif - -struct rspamd_async_session * -new_async_session (rspamd_mempool_t * pool, session_finalizer_t fin, - event_finalizer_t restore, event_finalizer_t cleanup, void *user_data) -{ - struct rspamd_async_session *new; - - new = rspamd_mempool_alloc (pool, sizeof (struct rspamd_async_session)); - new->pool = pool; - new->fin = fin; - new->restore = restore; - new->cleanup = cleanup; - new->user_data = user_data; - new->wanna_die = FALSE; - new->events = g_hash_table_new (rspamd_event_hash, rspamd_event_equal); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - new->mtx = g_mutex_new (); - new->cond = g_cond_new (); - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_mutex_free, new->mtx); - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_cond_free, new->cond); -#else - new->mtx = rspamd_mempool_alloc (pool, sizeof (GMutex)); - g_mutex_init (new->mtx); - new->cond = rspamd_mempool_alloc (pool, sizeof (GCond)); - g_cond_init (new->cond); - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_mutex_clear, new->mtx); - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_cond_clear, new->cond); -#endif - new->threads = 0; - - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, new->events); - - return new; -} - -void -register_async_event (struct rspamd_async_session *session, event_finalizer_t fin, void *user_data, GQuark subsystem) -{ - struct rspamd_async_event *new; - - if (session == NULL) { - msg_info ("session is NULL"); - return; - } - - g_mutex_lock (session->mtx); - new = rspamd_mempool_alloc (session->pool, sizeof (struct rspamd_async_event)); - new->fin = fin; - new->user_data = user_data; - new->subsystem = subsystem; - - g_hash_table_insert (session->events, new, new); - - msg_debug ("added event: %p, pending %d events, subsystem: %s", user_data, g_hash_table_size (session->events), - g_quark_to_string (subsystem)); - - g_mutex_unlock (session->mtx); -} - -void -remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud) -{ - struct rspamd_async_event search_ev, *found_ev; - - if (session == NULL) { - msg_info ("session is NULL"); - return; - } - - g_mutex_lock (session->mtx); - /* Search for event */ - search_ev.fin = fin; - search_ev.user_data = ud; - if ((found_ev = g_hash_table_lookup (session->events, &search_ev)) != NULL) { - g_hash_table_remove (session->events, found_ev); - msg_debug ("removed event: %p, subsystem: %s, pending %d events", ud, - g_quark_to_string (found_ev->subsystem), g_hash_table_size (session->events)); - /* Remove event */ - fin (ud); - } - g_mutex_unlock (session->mtx); - - check_session_pending (session); -} - -static gboolean -rspamd_session_destroy (gpointer k, gpointer v, gpointer unused) -{ - struct rspamd_async_event *ev = v; - - /* Call event's finalizer */ - if (ev->fin != NULL) { - ev->fin (ev->user_data); - } - - return TRUE; -} - -gboolean -destroy_session (struct rspamd_async_session *session) -{ - if (session == NULL) { - msg_info ("session is NULL"); - return FALSE; - } - - g_mutex_lock (session->mtx); - if (session->threads > 0) { - /* Wait for conditional variable to finish processing */ - g_mutex_unlock (session->mtx); - g_cond_wait (session->cond, session->mtx); - } - - session->wanna_die = TRUE; - - g_hash_table_foreach_remove (session->events, rspamd_session_destroy, session); - - /* Mutex can be destroyed here */ - g_mutex_unlock (session->mtx); - - if (session->cleanup != NULL) { - session->cleanup (session->user_data); - } - return TRUE; -} - -gboolean -check_session_pending (struct rspamd_async_session *session) -{ - g_mutex_lock (session->mtx); - if (session->wanna_die && g_hash_table_size (session->events) == 0) { - session->wanna_die = FALSE; - if (session->threads > 0) { - /* Wait for conditional variable to finish processing */ - g_cond_wait (session->cond, session->mtx); - } - if (session->fin != NULL) { - g_mutex_unlock (session->mtx); - if (! session->fin (session->user_data)) { - /* Session finished incompletely, perform restoration */ - if (session->restore != NULL) { - session->restore (session->user_data); - /* Call pending once more */ - return check_session_pending (session); - } - return TRUE; - } - else { - return FALSE; - } - } - g_mutex_unlock (session->mtx); - return FALSE; - } - g_mutex_unlock (session->mtx); - return TRUE; -} - - -/** - * Add new async thread to session - * @param session session object - */ -void -register_async_thread (struct rspamd_async_session *session) -{ - g_atomic_int_inc (&session->threads); - msg_debug ("added thread: pending %d thread", session->threads); -} - -/** - * Remove async thread from session and check whether session can be terminated - * @param session session object - */ -void -remove_async_thread (struct rspamd_async_session *session) -{ - if (g_atomic_int_dec_and_test (&session->threads)) { - /* Signal if there are any sessions waiting */ - g_mutex_lock (session->mtx); - g_cond_signal (session->cond); - g_mutex_unlock (session->mtx); - } - msg_debug ("removed thread: pending %d thread", session->threads); -} diff --git a/src/events.h b/src/events.h deleted file mode 100644 index 6728288eb..000000000 --- a/src/events.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef RSPAMD_EVENTS_H -#define RSPAMD_EVENTS_H - -#include "config.h" -#include "mem_pool.h" - -struct rspamd_async_event; - -typedef void (*event_finalizer_t)(void *user_data); -typedef gboolean (*session_finalizer_t)(void *user_data); - -struct rspamd_async_event { - GQuark subsystem; - event_finalizer_t fin; - void *user_data; - guint ref; -}; - -struct rspamd_async_session { - session_finalizer_t fin; - event_finalizer_t restore; - event_finalizer_t cleanup; - GHashTable *events; - void *user_data; - rspamd_mempool_t *pool; - gboolean wanna_die; - guint threads; - GMutex *mtx; - GCond *cond; -}; - -/** - * Make new async session - * @param pool pool to alloc memory from - * @param fin a callback called when no events are found in session - * @param restore a callback is called to restore processing of session - * @param cleanup a callback called when session is forcefully destroyed - * @param user_data abstract user data - * @return - */ -struct rspamd_async_session *new_async_session (rspamd_mempool_t *pool, - session_finalizer_t fin, event_finalizer_t restore, - event_finalizer_t cleanup, void *user_data); - -/** - * Insert new event to the session - * @param session session object - * @param fin finalizer callback - * @param user_data abstract user_data - * @param forced unused - */ -void register_async_event (struct rspamd_async_session *session, - event_finalizer_t fin, void *user_data, GQuark subsystem); - -/** - * Remove normal event - * @param session session object - * @param fin final callback - * @param ud user data object - */ -void remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud); - -/** - * Must be called at the end of session, it calls fin functions for all non-forced callbacks - * @return true if the whole session was destroyed and false if there are forced events - */ -gboolean destroy_session (struct rspamd_async_session *session); - -/** - * Check session for events pending and call fin callback if no events are pending - * @param session session object - * @return TRUE if session has pending events - */ -gboolean check_session_pending (struct rspamd_async_session *session); - -/** - * Add new async thread to session - * @param session session object - */ -void register_async_thread (struct rspamd_async_session *session); - -/** - * Remove async thread from session and check whether session can be terminated - * @param session session object - */ -void remove_async_thread (struct rspamd_async_session *session); - -#endif /* RSPAMD_EVENTS_H */ diff --git a/src/expressions.c b/src/expressions.c deleted file mode 100644 index 5d19626bb..000000000 --- a/src/expressions.c +++ /dev/null @@ -1,1452 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "util.h" -#include "cfg_file.h" -#include "main.h" -#include "message.h" -#include "fuzzy.h" -#include "expressions.h" -#include "html.h" -#include "lua/lua_common.h" -#include "diff.h" - -gboolean rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_header_exists (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_parts_distance (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_has_only_html_part (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_is_html_balanced (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_has_html_tag (struct rspamd_task *task, GList * args, void *unused); -gboolean rspamd_has_fake_html (struct rspamd_task *task, GList * args, void *unused); - -/* - * List of internal functions of rspamd - * Sorted by name to use bsearch - */ -static struct _fl { - const gchar *name; - rspamd_internal_func_t func; - void *user_data; -} rspamd_functions_list[] = { - {"compare_encoding", rspamd_compare_encoding, NULL}, - {"compare_parts_distance", rspamd_parts_distance, NULL}, - {"compare_recipients_distance", rspamd_recipients_distance, NULL}, - {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, - {"has_fake_html", rspamd_has_fake_html, NULL}, - {"has_html_tag", rspamd_has_html_tag, NULL}, - {"has_only_html_part", rspamd_has_only_html_part, NULL}, - {"header_exists", rspamd_header_exists, NULL}, - {"is_html_balanced", rspamd_is_html_balanced, NULL}, - {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL} -}; - -static struct _fl *list_ptr = &rspamd_functions_list[0]; -static guint32 functions_number = sizeof (rspamd_functions_list) / sizeof (struct _fl); -static gboolean list_allocated = FALSE; - -/* Bsearch routine */ -static gint -fl_cmp (const void *s1, const void *s2) -{ - struct _fl *fl1 = (struct _fl *)s1; - struct _fl *fl2 = (struct _fl *)s2; - return strcmp (fl1->name, fl2->name); -} - -/* Cache for regular expressions that are used in functions */ -void * -re_cache_check (const gchar *line, rspamd_mempool_t *pool) -{ - GHashTable *re_cache; - - re_cache = rspamd_mempool_get_variable (pool, "re_cache"); - - if (re_cache == NULL) { - re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); - return NULL; - } - return g_hash_table_lookup (re_cache, line); -} - -void -re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool) -{ - GHashTable *re_cache; - - re_cache = rspamd_mempool_get_variable (pool, "re_cache"); - - if (re_cache == NULL) { - re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); - } - - g_hash_table_insert (re_cache, (gpointer)line, pointer); -} - -void -re_cache_del (const gchar *line, rspamd_mempool_t *pool) -{ - GHashTable *re_cache; - - re_cache = rspamd_mempool_get_variable (pool, "re_cache"); - - if (re_cache != NULL) { - g_hash_table_remove (re_cache, line); - } - -} - -/* - * Functions for parsing expressions - */ -struct expression_stack { - gchar op; - struct expression_stack *next; -}; - -/* - * Push operand or operator to stack - */ -static struct expression_stack * -push_expression_stack (rspamd_mempool_t * pool, struct expression_stack *head, gchar op) -{ - struct expression_stack *new; - new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack)); - new->op = op; - new->next = head; - return new; -} - -/* - * Delete symbol from stack, return pointer to operand or operator (casted to void* ) - */ -static gchar -delete_expression_stack (struct expression_stack **head) -{ - struct expression_stack *cur; - gchar res; - - if (*head == NULL) - return 0; - - cur = *head; - res = cur->op; - - *head = cur->next; - return res; -} - -/* - * Return operation priority - */ -static gint -logic_priority (gchar a) -{ - switch (a) { - case '!': - return 3; - case '|': - case '&': - return 2; - case '(': - return 1; - default: - return 0; - } -} - -/* - * Return FALSE if symbol is not operation symbol (operand) - * Return TRUE if symbol is operation symbol - */ -static gboolean -is_operation_symbol (gchar *a) -{ - switch (*a) { - case '!': - case '&': - case '|': - case '(': - case ')': - return TRUE; - case 'O': - case 'o': - if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0&& g_ascii_isspace (a[2])) { - return TRUE; - } - break; - case 'A': - case 'a': - if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0&& g_ascii_isspace (a[3])) { - return TRUE; - } - break; - case 'N': - case 'n': - if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) { - return TRUE; - } - break; - } - - return FALSE; -} - -/* Return character representation of operation */ -static gchar -op_to_char (gchar *a, gchar **next) -{ - switch (*a) { - case '!': - case '&': - case '|': - case '(': - case ')': - *next = a + 1; - return *a; - case 'O': - case 'o': - if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) { - *next = a + sizeof ("or") - 1; - return '|'; - } - break; - case 'A': - case 'a': - if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) { - *next = a + sizeof ("and") - 1; - return '&'; - } - break; - case 'N': - case 'n': - if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) { - *next = a + sizeof ("not") - 1; - return '!'; - } - break; - } - - return '\0'; -} - -/* - * Return TRUE if symbol can be regexp flag - */ -static gboolean -is_regexp_flag (gchar a) -{ - switch (a) { - case 'i': - case 'm': - case 'x': - case 's': - case 'u': - case 'o': - case 'r': - case 'H': - case 'M': - case 'P': - case 'U': - case 'X': - case 'T': - case 'S': - return TRUE; - default: - return FALSE; - } -} - -static void -insert_expression (rspamd_mempool_t * pool, struct expression **head, gint type, gchar op, void *operand, const gchar *orig) -{ - struct expression *new, *cur; - - new = rspamd_mempool_alloc (pool, sizeof (struct expression)); - new->type = type; - new->orig = orig; - if (new->type != EXPR_OPERATION) { - new->content.operand = operand; - } - else { - new->content.operation = op; - } - new->next = NULL; - - if (!*head) { - *head = new; - } - else { - cur = *head; - while (cur->next) { - cur = cur->next; - } - cur->next = new; - } -} - -static struct expression * -maybe_parse_expression (rspamd_mempool_t * pool, gchar *line) -{ - struct expression *expr; - gchar *p = line; - - while (*p) { - if (is_operation_symbol (p)) { - return parse_expression (pool, line); - } - p++; - } - - expr = rspamd_mempool_alloc (pool, sizeof (struct expression)); - expr->type = EXPR_STR; - expr->content.operand = rspamd_mempool_strdup (pool, line); - expr->next = NULL; - - return expr; -} - -/* - * Make inverse polish record for specified expression - * Memory is allocated from given pool - */ -struct expression * -parse_expression (rspamd_mempool_t * pool, gchar *line) -{ - struct expression *expr = NULL; - struct expression_stack *stack = NULL; - struct expression_function *func = NULL; - struct expression *arg; - GQueue *function_stack; - gchar *p, *c, *str, op, newop, *copy, *next; - gboolean in_regexp = FALSE; - gint brackets = 0; - - enum { - SKIP_SPACES, - READ_OPERATOR, - READ_REGEXP, - READ_REGEXP_FLAGS, - READ_FUNCTION, - READ_FUNCTION_ARGUMENT, - } state = SKIP_SPACES; - - if (line == NULL || pool == NULL) { - return NULL; - } - - msg_debug ("parsing expression {{ %s }}", line); - - function_stack = g_queue_new (); - copy = rspamd_mempool_strdup (pool, line); - p = line; - c = p; - while (*p) { - switch (state) { - case SKIP_SPACES: - if (!g_ascii_isspace (*p)) { - if (is_operation_symbol (p)) { - state = READ_OPERATOR; - } - else if (*p == '/') { - c = ++p; - state = READ_REGEXP; - } - else { - c = p; - state = READ_FUNCTION; - } - } - else { - p++; - } - break; - case READ_OPERATOR: - if (*p == ')') { - if (stack == NULL) { - return NULL; - } - /* Pop all operators from stack to nearest '(' or to head */ - while (stack && stack->op != '(') { - op = delete_expression_stack (&stack); - if (op != '(') { - insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); - } - } - if (stack) { - op = delete_expression_stack (&stack); - } - } - else if (*p == '(') { - /* Push it to stack */ - stack = push_expression_stack (pool, stack, *p); - } - else { - if (stack == NULL) { - newop = op_to_char (p, &next); - if (newop != '\0') { - stack = push_expression_stack (pool, stack, newop); - p = next; - state = SKIP_SPACES; - continue; - } - } - /* Check priority of logic operation */ - else { - newop = op_to_char (p, &next); - if (newop != '\0') { - if (logic_priority (stack->op) < logic_priority (newop)) { - stack = push_expression_stack (pool, stack, newop); - } - else { - /* Pop all operations that have higher priority than this one */ - while ((stack != NULL) && (logic_priority (stack->op) >= logic_priority (newop))) { - op = delete_expression_stack (&stack); - if (op != '(') { - insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); - } - } - stack = push_expression_stack (pool, stack, newop); - } - } - p = next; - state = SKIP_SPACES; - continue; - } - } - p++; - state = SKIP_SPACES; - break; - - case READ_REGEXP: - if (*p == '/' && *(p - 1) != '\\') { - if (*(p + 1)) { - p++; - } - state = READ_REGEXP_FLAGS; - } - else { - p++; - } - break; - - case READ_REGEXP_FLAGS: - if (!is_regexp_flag (*p) || *(p + 1) == '\0') { - if (c != p) { - if ((is_regexp_flag (*p) || *p == '/') && *(p + 1) == '\0') { - p++; - } - str = rspamd_mempool_alloc (pool, p - c + 2); - rspamd_strlcpy (str, c - 1, (p - c + 2)); - g_strstrip (str); - msg_debug ("found regexp: %s", str); - if (strlen (str) > 0) { - insert_expression (pool, &expr, EXPR_REGEXP, 0, str, copy); - } - } - c = p; - state = SKIP_SPACES; - } - else { - p++; - } - break; - - case READ_FUNCTION: - if (*p == '/') { - /* In fact it is regexp */ - state = READ_REGEXP; - c++; - p++; - } - else if (*p == '(') { - func = rspamd_mempool_alloc (pool, sizeof (struct expression_function)); - func->name = rspamd_mempool_alloc (pool, p - c + 1); - func->args = NULL; - rspamd_strlcpy (func->name, c, (p - c + 1)); - g_strstrip (func->name); - state = READ_FUNCTION_ARGUMENT; - g_queue_push_tail (function_stack, func); - insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy); - c = ++p; - } - else if (is_operation_symbol (p)) { - /* In fact it is not function, but symbol */ - if (c != p) { - str = rspamd_mempool_alloc (pool, p - c + 1); - rspamd_strlcpy (str, c, (p - c + 1)); - g_strstrip (str); - if (strlen (str) > 0) { - insert_expression (pool, &expr, EXPR_STR, 0, str, copy); - } - } - state = READ_OPERATOR; - } - else if (*(p + 1) == '\0') { - /* In fact it is not function, but symbol */ - p++; - if (c != p) { - str = rspamd_mempool_alloc (pool, p - c + 1); - rspamd_strlcpy (str, c, (p - c + 1)); - g_strstrip (str); - if (strlen (str) > 0) { - insert_expression (pool, &expr, EXPR_STR, 0, str, copy); - } - } - state = SKIP_SPACES; - } - else { - p++; - } - break; - - case READ_FUNCTION_ARGUMENT: - if (*p == '/' && !in_regexp) { - in_regexp = TRUE; - p++; - } - if (!in_regexp) { - /* Append argument to list */ - if (*p == ',' || (*p == ')' && brackets == 0)) { - arg = NULL; - str = rspamd_mempool_alloc (pool, p - c + 1); - rspamd_strlcpy (str, c, (p - c + 1)); - g_strstrip (str); - /* Recursive call */ - arg = maybe_parse_expression (pool, str); - func->args = g_list_append (func->args, arg); - /* Pop function */ - if (*p == ')') { - /* Last function in chain, goto skipping spaces state */ - func = g_queue_pop_tail (function_stack); - if (g_queue_get_length (function_stack) == 0) { - state = SKIP_SPACES; - } - } - c = p + 1; - } - else if (*p == '(') { - brackets++; - } - else if (*p == ')') { - brackets--; - } - } - else if (*p == '/' && *(p - 1) != '\\') { - in_regexp = FALSE; - } - p++; - break; - } - } - - g_queue_free (function_stack); - if (state != SKIP_SPACES) { - /* In fact we got bad expression */ - msg_warn ("expression \"%s\" is invalid", line); - return NULL; - } - /* Pop everything from stack */ - while (stack != NULL) { - op = delete_expression_stack (&stack); - if (op != '(') { - insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); - } - } - - return expr; -} - -/* - * Rspamd regexp utility functions - */ -struct rspamd_regexp * -parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode) -{ - const gchar *begin, *end, *p, *src, *start; - gchar *dbegin, *dend; - struct rspamd_regexp *result, *check; - gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE; - GError *err = NULL; - - if (line == NULL) { - msg_err ("cannot parse NULL line"); - return NULL; - } - - src = line; - result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp)); - /* Skip whitespaces */ - while (g_ascii_isspace (*line)) { - line++; - } - if (*line == '\0') { - msg_warn ("got empty regexp"); - return NULL; - } - start = line; - /* First try to find header name */ - begin = strchr (line, '/'); - if (begin != NULL) { - p = begin; - end = NULL; - while (p != line) { - if (*p == '=') { - end = p; - break; - } - p --; - } - if (end) { - result->header = rspamd_mempool_alloc (pool, end - line + 1); - rspamd_strlcpy (result->header, line, end - line + 1); - result->type = REGEXP_HEADER; - line = end; - } - } - else { - result->header = rspamd_mempool_strdup (pool, line); - result->type = REGEXP_HEADER; - line = start; - } - /* Find begin of regexp */ - while (*line && *line != '/') { - line++; - } - if (*line != '\0') { - begin = line + 1; - } - else if (result->header == NULL) { - /* Assume that line without // is just a header name */ - result->header = rspamd_mempool_strdup (pool, line); - result->type = REGEXP_HEADER; - return result; - } - else { - /* We got header name earlier but have not found // expression, so it is invalid regexp */ - msg_warn ("got no header name (eg. header=) but without corresponding regexp, %s", src); - return NULL; - } - /* Find end */ - end = begin; - while (*end && (*end != '/' || *(end - 1) == '\\')) { - end++; - } - if (end == begin || *end != '/') { - msg_warn ("no trailing / in regexp %s", src); - return NULL; - } - /* Parse flags */ - p = end + 1; - while (p != NULL) { - switch (*p) { - case 'i': - regexp_flags |= G_REGEX_CASELESS; - p++; - break; - case 'm': - regexp_flags |= G_REGEX_MULTILINE; - p++; - break; - case 's': - regexp_flags |= G_REGEX_DOTALL; - p++; - break; - case 'x': - regexp_flags |= G_REGEX_EXTENDED; - p++; - break; - case 'u': - regexp_flags |= G_REGEX_UNGREEDY; - p++; - break; - case 'o': - regexp_flags |= G_REGEX_OPTIMIZE; - p++; - break; - case 'r': - regexp_flags |= G_REGEX_RAW; - result->is_raw = TRUE; - p++; - break; - /* Type flags */ - case 'H': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_HEADER; - } - p++; - break; - case 'M': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_MESSAGE; - } - p++; - break; - case 'P': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_MIME; - } - p++; - break; - case 'U': - if (result->type == REGEXP_NONE) { - result->type = REGEXP_URL; - } - p++; - break; - case 'X': - if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) { - result->type = REGEXP_RAW_HEADER; - } - p++; - break; - case 'T': - result->is_test = TRUE; - p ++; - break; - case 'S': - result->is_strong = TRUE; - p ++; - break; - /* Stop flags parsing */ - default: - p = NULL; - break; - } - } - - result->regexp_text = rspamd_mempool_strdup (pool, start); - dbegin = result->regexp_text + (begin - start); - dend = result->regexp_text + (end - start); - *dend = '\0'; - - if (raw_mode) { - regexp_flags |= G_REGEX_RAW; - } - - /* Avoid multiply regexp structures for similar regexps */ - if ((check = (struct rspamd_regexp *)re_cache_check (result->regexp_text, pool)) != NULL) { - /* Additional check for headers */ - if (result->type == REGEXP_HEADER || result->type == REGEXP_RAW_HEADER) { - if (result->header && check->header) { - if (strcmp (result->header, check->header) == 0) { - return check; - } - } - } - else { - return check; - } - } - result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err); - if ((regexp_flags & G_REGEX_RAW) != 0) { - result->raw_regexp = result->regexp; - } - else { - result->raw_regexp = g_regex_new (dbegin, regexp_flags | G_REGEX_RAW, 0, &err); - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->raw_regexp); - } - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->regexp); - - *dend = '/'; - - if (result->regexp == NULL || err != NULL) { - msg_warn ("could not read regexp: %s while reading regexp %s", err->message, src); - return NULL; - } - - if (result->raw_regexp == NULL || err != NULL) { - msg_warn ("could not read raw regexp: %s while reading regexp %s", err->message, src); - return NULL; - } - - /* Add to cache for further usage */ - re_cache_add (result->regexp_text, result, pool); - return result; -} - -gboolean -call_expression_function (struct expression_function * func, struct rspamd_task * task, lua_State *L) -{ - struct _fl *selected, key; - - key.name = func->name; - - selected = bsearch (&key, list_ptr, functions_number, sizeof (struct _fl), fl_cmp); - if (selected == NULL) { - /* Try to check lua function */ - return FALSE; - } - - return selected->func (task, func->args, selected->user_data); -} - -struct expression_argument * -get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string) -{ - GQueue *stack; - gsize cur, op1, op2; - struct expression_argument *res; - struct expression *it; - - if (expr == NULL) { - msg_warn ("NULL expression passed"); - return NULL; - } - if (expr->next == NULL) { - res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); - if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type == EXPR_REGEXP_PARSED) { - res->type = EXPRESSION_ARGUMENT_NORMAL; - res->data = expr->content.operand; - } - else if (expr->type == EXPR_FUNCTION && !want_string) { - res->type = EXPRESSION_ARGUMENT_BOOL; - cur = call_expression_function (expr->content.operand, task, NULL); - res->data = GSIZE_TO_POINTER (cur); - } - else { - msg_warn ("cannot parse argument: it contains operator or bool expression that is not wanted"); - return NULL; - } - return res; - } - else if (!want_string) { - res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); - res->type = EXPRESSION_ARGUMENT_BOOL; - stack = g_queue_new (); - it = expr; - - while (it) { - if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED || it->type == EXPR_STR) { - g_queue_free (stack); - res->type = EXPRESSION_ARGUMENT_EXPR; - res->data = expr; - return res; - } - else if (it->type == EXPR_FUNCTION) { - cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task, NULL); - debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false"); - } - else if (it->type == EXPR_OPERATION) { - if (g_queue_is_empty (stack)) { - /* Queue has no operands for operation, exiting */ - debug_task ("invalid expression"); - g_queue_free (stack); - return NULL; - } - switch (it->content.operation) { - case '!': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op1 = !op1; - g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); - break; - case '&': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); - break; - case '|': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); - break; - default: - it = it->next; - continue; - } - } - if (it) { - it = it->next; - } - } - if (!g_queue_is_empty (stack)) { - res->data = g_queue_pop_head (stack); - } - else { - res->data = GSIZE_TO_POINTER (FALSE); - } - - return res; - } - - msg_warn ("invalid expression argument"); - - return NULL; -} - -void -register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data) -{ - static struct _fl *new; - - functions_number++; - - new = g_new (struct _fl, functions_number); - memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl)); - if (list_allocated) { - g_free (list_ptr); - } - - list_allocated = TRUE; - new[functions_number - 1].name = name; - new[functions_number - 1].func = func; - new[functions_number - 1].user_data = user_data; - qsort (new, functions_number, sizeof (struct _fl), fl_cmp); - list_ptr = new; -} - -gboolean -rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused) -{ - struct expression_argument *arg; - - if (args == NULL || task == NULL) { - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - if (arg->type == EXPRESSION_ARGUMENT_BOOL) { - msg_warn ("invalid argument to function is passed"); - return FALSE; - } - - /* XXX: really write this function */ - return TRUE; -} - -gboolean -rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused) -{ - struct expression_argument *arg; - GList *headerlist; - - if (args == NULL || task == NULL) { - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) { - msg_warn ("invalid argument to function is passed"); - return FALSE; - } - - debug_task ("try to get header %s", (gchar *)arg->data); - headerlist = message_get_header (task->task_pool, task->message, (gchar *)arg->data, FALSE); - if (headerlist) { - g_list_free (headerlist); - return TRUE; - } - return FALSE; -} - -/* - * This function is designed to find difference between text/html and text/plain parts - * It takes one argument: difference threshold, if we have two text parts, compare - * its hashes and check for threshold, if value is greater than threshold, return TRUE - * and return FALSE otherwise. - */ -gboolean -rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused) -{ - gint threshold, threshold2 = -1, diff; - struct mime_text_part *p1, *p2; - GList *cur; - struct expression_argument *arg; - GMimeObject *parent; - const GMimeContentType *ct; - gint *pdiff; - - if (args == NULL) { - debug_task ("no threshold is specified, assume it 100"); - threshold = 100; - } - else { - errno = 0; - arg = get_function_arg (args->data, task, TRUE); - threshold = strtoul ((gchar *)arg->data, NULL, 10); - if (errno != 0) { - msg_info ("bad numeric value for threshold \"%s\", assume it 100", (gchar *)args->data); - threshold = 100; - } - if (args->next) { - arg = get_function_arg (args->next->data, task, TRUE); - errno = 0; - threshold2 = strtoul ((gchar *)arg->data, NULL, 10); - if (errno != 0) { - msg_info ("bad numeric value for threshold \"%s\", ignore it", (gchar *)arg->data); - threshold2 = -1; - } - } - } - - if ((pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance")) != NULL) { - diff = *pdiff; - if (diff != -1) { - if (threshold2 > 0) { - if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { - return TRUE; - } - } - else { - if (diff <= threshold) { - return TRUE; - } - } - return FALSE; - } - else { - return FALSE; - } - } - - if (g_list_length (task->text_parts) == 2) { - cur = g_list_first (task->text_parts); - p1 = cur->data; - cur = g_list_next (cur); - pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint)); - *pdiff = -1; - - if (cur == NULL) { - msg_info ("bad parts list"); - return FALSE; - } - p2 = cur->data; - /* First of all check parent object */ - if (p1->parent && p1->parent == p2->parent) { - parent = p1->parent; - ct = g_mime_object_get_content_type (parent); -#ifndef GMIME24 - if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) { -#else - if (ct == NULL || ! g_mime_content_type_is_type ((GMimeContentType *)ct, "multipart", "alternative")) { -#endif - debug_task ("two parts are not belong to multipart/alternative container, skip check"); - rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); - return FALSE; - } - } - else { - debug_task ("message contains two parts but they are in different multi-parts"); - rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); - return FALSE; - } - if (!p1->is_empty && !p2->is_empty) { - if (p1->diff_str != NULL && p2->diff_str != NULL) { - diff = compare_diff_distance_normalized (p1->diff_str, p2->diff_str); - } - else { - diff = fuzzy_compare_parts (p1, p2); - } - debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold); - *pdiff = diff; - rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); - if (threshold2 > 0) { - if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { - return TRUE; - } - } - else { - if (diff <= threshold) { - return TRUE; - } - } - } - else if ((p1->is_empty && !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { - /* Empty and non empty parts are different */ - *pdiff = 0; - rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); - return TRUE; - } - } - else { - debug_task ("message has too many text parts, so do not try to compare them with each other"); - rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); - return FALSE; - } - - rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); - return FALSE; -} - -struct addr_list { - const gchar *name; - const gchar *addr; -}; - -#define COMPARE_RCPT_LEN 3 -#define MIN_RCPT_TO_COMPARE 7 - -gboolean -rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused) -{ - struct expression_argument *arg; - InternetAddressList *cur; - InternetAddress *addr; - double threshold; - struct addr_list *ar; - gchar *c; - gint num, i, j, hits = 0, total = 0; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - errno = 0; - threshold = strtod ((gchar *)arg->data, NULL); - if (errno != 0) { - msg_warn ("invalid numeric value '%s': %s", (gchar *)arg->data, strerror (errno)); - return FALSE; - } - - if (!task->rcpts) { - return FALSE; - } - num = internet_address_list_length (task->rcpts); - if (num < MIN_RCPT_TO_COMPARE) { - return FALSE; - } - ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list)); - - /* Fill array */ - cur = task->rcpts; -#ifdef GMIME24 - for (i = 0; i < num; i ++) { - addr = internet_address_list_get_address (cur, i); - ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_name (addr)); - if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { - *c = '\0'; - ar[i].addr = c + 1; - } - } -#else - i = 0; - while (cur) { - addr = internet_address_list_get_address (cur); - if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { - ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_addr (addr)); - if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { - *c = '\0'; - ar[i].addr = c + 1; - } - cur = internet_address_list_next (cur); - i++; - } - else { - cur = internet_address_list_next (cur); - } - } -#endif - - /* Cycle all elements in array */ - for (i = 0; i < num; i++) { - for (j = i + 1; j < num; j++) { - if (ar[i].name && ar[j].name && g_ascii_strncasecmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) { - /* Common name part */ - hits++; - } - else if (ar[i].addr && ar[j].addr && g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) { - /* Common address part, but different name */ - hits++; - } - total++; - } - } - - if ((double)(hits * num / 2.) / (double)total >= threshold) { - return TRUE; - } - - return FALSE; -} - -gboolean -rspamd_has_only_html_part (struct rspamd_task * task, GList * args, void *unused) -{ - struct mime_text_part *p; - GList *cur; - gboolean res = FALSE; - - cur = g_list_first (task->text_parts); - while (cur) { - p = cur->data; - if (p->is_html) { - res = TRUE; - } - else { - res = FALSE; - break; - } - cur = g_list_next (cur); - } - - return res; -} - -static gboolean -is_recipient_list_sorted (const InternetAddressList * ia) -{ - const InternetAddressList *cur; - InternetAddress *addr; - gboolean res = TRUE; - struct addr_list current = { NULL, NULL }, previous = { - NULL, NULL}; -#ifdef GMIME24 - gint num, i; -#endif - - /* Do not check to short address lists */ - if (internet_address_list_length ((InternetAddressList *)ia) < MIN_RCPT_TO_COMPARE) { - return FALSE; - } -#ifdef GMIME24 - num = internet_address_list_length ((InternetAddressList *)ia); - cur = ia; - for (i = 0; i < num; i ++) { - addr = internet_address_list_get_address ((InternetAddressList *)cur, i); - current.addr = (gchar *)internet_address_get_name (addr); - if (previous.addr != NULL) { - if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { - res = FALSE; - break; - } - } - previous.addr = current.addr; - } -#else - cur = ia; - while (cur) { - addr = internet_address_list_get_address (cur); - if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { - current.addr = internet_address_get_addr (addr); - if (previous.addr != NULL) { - if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { - res = FALSE; - break; - } - } - previous.addr = current.addr; - } - cur = internet_address_list_next (cur); - } -#endif - - return res; -} - -gboolean -rspamd_is_recipients_sorted (struct rspamd_task * task, GList * args, void *unused) -{ - /* Check all types of addresses */ - if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_TO)) == TRUE) { - return TRUE; - } - if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_BCC)) == TRUE) { - return TRUE; - } - if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_CC)) == TRUE) { - return TRUE; - } - - return FALSE; -} - -gboolean -rspamd_compare_transfer_encoding (struct rspamd_task * task, GList * args, void *unused) -{ - GMimeObject *part; -#ifndef GMIME24 - GMimePartEncodingType enc_req, part_enc; -#else - GMimeContentEncoding enc_req, part_enc; -#endif - struct expression_argument *arg; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); -#ifndef GMIME24 - enc_req = g_mime_part_encoding_from_string (arg->data); - if (enc_req == GMIME_PART_ENCODING_DEFAULT) { -#else - enc_req = g_mime_content_encoding_from_string (arg->data); - if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) { -#endif - msg_warn ("bad encoding type: %s", (gchar *)arg->data); - return FALSE; - } - - part = g_mime_message_get_mime_part (task->message); - if (part) { - if (GMIME_IS_PART (part)) { -#ifndef GMIME24 - part_enc = g_mime_part_get_encoding (GMIME_PART (part)); - if (part_enc == GMIME_PART_ENCODING_DEFAULT) { - /* Assume 7bit as default transfer encoding */ - part_enc = GMIME_PART_ENCODING_7BIT; - } -#else - part_enc = g_mime_part_get_content_encoding (GMIME_PART (part)); - if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) { - /* Assume 7bit as default transfer encoding */ - part_enc = GMIME_CONTENT_ENCODING_7BIT; - } -#endif - - - debug_task ("got encoding in part: %d and compare with %d", (gint)part_enc, (gint)enc_req); -#ifndef GMIME24 - g_object_unref (part); -#endif - - return part_enc == enc_req; - } -#ifndef GMIME24 - g_object_unref (part); -#endif - } - - return FALSE; -} - -gboolean -rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused) -{ - struct mime_text_part *p; - GList *cur; - gboolean res = TRUE; - - cur = g_list_first (task->text_parts); - while (cur) { - p = cur->data; - if (!p->is_empty && p->is_html) { - if (p->is_balanced) { - res = TRUE; - } - else { - res = FALSE; - break; - } - } - cur = g_list_next (cur); - } - - return res; - -} - -struct html_callback_data { - struct html_tag *tag; - gboolean *res; -}; - -static gboolean -search_html_node_callback (GNode * node, gpointer data) -{ - struct html_callback_data *cd = data; - struct html_node *nd; - - nd = node->data; - if (nd) { - if (nd->tag == cd->tag) { - *cd->res = TRUE; - return TRUE; - } - } - - return FALSE; -} - -gboolean -rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused) -{ - struct mime_text_part *p; - GList *cur; - struct expression_argument *arg; - struct html_tag *tag; - gboolean res = FALSE; - struct html_callback_data cd; - - if (args == NULL) { - msg_warn ("no parameters to function"); - return FALSE; - } - - arg = get_function_arg (args->data, task, TRUE); - tag = get_tag_by_name (arg->data); - if (tag == NULL) { - msg_warn ("unknown tag type passed as argument: %s", (gchar *)arg->data); - return FALSE; - } - - cur = g_list_first (task->text_parts); - cd.res = &res; - cd.tag = tag; - - while (cur && res == FALSE) { - p = cur->data; - if (!p->is_empty && p->is_html && p->html_nodes) { - g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, -1, search_html_node_callback, &cd); - } - cur = g_list_next (cur); - } - - return res; - -} - -gboolean -rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused) -{ - struct mime_text_part *p; - GList *cur; - gboolean res = FALSE; - - cur = g_list_first (task->text_parts); - - while (cur && res == FALSE) { - p = cur->data; - if (!p->is_empty && p->is_html && p->html_nodes == NULL) { - res = TRUE; - } - cur = g_list_next (cur); - } - - return res; - -} - - -/* - * vi:ts=4 - */ diff --git a/src/expressions.h b/src/expressions.h deleted file mode 100644 index 954cc74f7..000000000 --- a/src/expressions.h +++ /dev/null @@ -1,133 +0,0 @@ -/** - * @file expressions.h - * Rspamd expressions API - */ - -#ifndef RSPAMD_EXPRESSIONS_H -#define RSPAMD_EXPRESSIONS_H - -#include "config.h" -#include - -struct rspamd_task; -struct rspamd_regexp; - -/** - * Rspamd expression function - */ -struct expression_function { - gchar *name; /**< name of function */ - GList *args; /**< its args */ -}; - -/** - * Function's argument - */ -struct expression_argument { - enum { - EXPRESSION_ARGUMENT_NORMAL, - EXPRESSION_ARGUMENT_BOOL, - EXPRESSION_ARGUMENT_EXPR, - } type; /**< type of argument (text or other function) */ - void *data; /**< pointer to its data */ -}; - -/** - * Logic expression - */ -struct expression { - enum { - EXPR_REGEXP, - EXPR_OPERATION, - EXPR_FUNCTION, - EXPR_STR, - EXPR_REGEXP_PARSED, - } type; /**< expression type */ - union { - void *operand; - gchar operation; - } content; /**< union for storing operand or operation code */ - const gchar *orig; /**< original line */ - struct expression *next; /**< chain link */ -}; - -typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args, void *user_data); - -/** - * Parse regexp line to regexp structure - * @param pool memory pool to use - * @param line incoming line - * @return regexp structure or NULL in case of error - */ -struct rspamd_regexp* parse_regexp (rspamd_mempool_t *pool, const gchar *line, gboolean raw_mode); - -/** - * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3") - * @param pool memory pool to use - * @param line incoming line - * @return expression structure or NULL in case of error - */ -struct expression* parse_expression (rspamd_mempool_t *pool, gchar *line); - -/** - * Call specified fucntion and return boolean result - * @param func function to call - * @param task task object - * @param L lua specific state - * @return TRUE or FALSE depending on function result - */ -gboolean call_expression_function (struct expression_function *func, struct rspamd_task *task, lua_State *L); - -/** - * Register specified function to rspamd internal functions list - * @param name name of function - * @param func pointer to function - */ -void register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data); - -/** - * Add regexp to regexp cache - * @param line symbolic representation - * @param pointer regexp data - */ -void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool); - -/** - * Check regexp in cache - * @param line symbolic representation - * @return pointer to regexp data or NULL if regexp is not found - */ -void * re_cache_check (const gchar *line, rspamd_mempool_t *pool); - -/** - * Remove regexp from regexp cache - * @param line symbolic representation - */ -void re_cache_del (const gchar *line, rspamd_mempool_t *pool); - -/** - * Add regexp to regexp task cache - * @param task task object - * @param pointer regexp data - * @param result numeric result of this regexp - */ -void task_cache_add (struct rspamd_task *task, struct rspamd_regexp *re, gint32 result); - -/** - * Check regexp in cache - * @param task task object - * @param pointer regexp data - * @return numeric result if value exists or -1 if not - */ -gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp *re); - -/** - * Parse and return a single function argument for a function (may recurse) - * @param expr expression structure that represents function's argument - * @param task task object - * @param want_string return NULL if argument is not a string - * @return expression argument structure or NULL if failed - */ -struct expression_argument *get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string); - -#endif diff --git a/src/filter.c b/src/filter.c deleted file mode 100644 index cb0630d9d..000000000 --- a/src/filter.c +++ /dev/null @@ -1,1096 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "mem_pool.h" -#include "filter.h" -#include "main.h" -#include "message.h" -#include "cfg_file.h" -#include "util.h" -#include "expressions.h" -#include "settings.h" -#include "binlog.h" -#include "diff.h" -#include "classifiers/classifiers.h" -#include "tokenizers/tokenizers.h" - -#ifdef WITH_LUA -# include "lua/lua_common.h" -#endif - -#define COMMON_PART_FACTOR 95 - -#ifndef PARAM_H_HAS_BITSET -/* Bit map related macros. */ -#define NBBY 8 /* number of bits in a byte */ -#define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) -#define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) -#define isset(a,i) \ - (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) -#define isclr(a,i) \ - ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) -#endif -#define BITSPERBYTE (8*sizeof (gchar)) -#define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) - -static inline GQuark -filter_error_quark (void) -{ - return g_quark_from_static_string ("g-filter-error-quark"); -} - -static void -insert_metric_result (struct rspamd_task *task, struct metric *metric, const gchar *symbol, - double flag, GList * opts, gboolean single) -{ - struct metric_result *metric_res; - struct symbol *s; - gdouble *weight, w; - - metric_res = g_hash_table_lookup (task->results, metric->name); - - if (metric_res == NULL) { - /* Create new metric chain */ - metric_res = rspamd_mempool_alloc (task->task_pool, sizeof (struct metric_result)); - metric_res->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - metric_res->checked = FALSE; - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, metric_res->symbols); - metric_res->metric = metric; - metric_res->grow_factor = 0; - metric_res->score = 0; - metric_res->domain_settings = NULL; - metric_res->user_settings = NULL; - apply_metric_settings (task, metric, metric_res); - g_hash_table_insert (task->results, (gpointer) metric->name, metric_res); - } - - weight = g_hash_table_lookup (metric->symbols, symbol); - if (weight == NULL) { - w = 0.0; - } - else { - w = (*weight) * flag; - } - - - /* Add metric score */ - if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) { - if (s->options && opts && opts != s->options) { - /* Append new options */ - s->options = g_list_concat (s->options, g_list_copy(opts)); - /* - * Note that there is no need to add new destructor of GList as elements of appended - * GList are used directly, so just free initial GList - */ - } - else if (opts) { - s->options = g_list_copy (opts); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); - } - if (!single) { - /* Handle grow factor */ - if (metric_res->grow_factor && w > 0) { - w *= metric_res->grow_factor; - metric_res->grow_factor *= metric->grow_factor; - } - s->score += w; - metric_res->score += w; - } - else { - if (fabs (s->score) < fabs (w)) { - /* Replace less weight with a bigger one */ - metric_res->score = metric_res->score - s->score + w; - s->score = w; - } - } - } - else { - s = rspamd_mempool_alloc (task->task_pool, sizeof (struct symbol)); - - /* Handle grow factor */ - if (metric_res->grow_factor && w > 0) { - w *= metric_res->grow_factor; - metric_res->grow_factor *= metric->grow_factor; - } - else if (w > 0) { - metric_res->grow_factor = metric->grow_factor; - } - - s->score = w; - s->name = symbol; - metric_res->score += w; - - if (opts) { - s->options = g_list_copy (opts); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); - } - else { - s->options = NULL; - } - - g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s); - } - debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w); - -} - -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) -static GStaticMutex result_mtx = G_STATIC_MUTEX_INIT; -#else -G_LOCK_DEFINE (result_mtx); -#endif - -static void -insert_result_common (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts, gboolean single) -{ - struct metric *metric; - struct cache_item *item; - GList *cur, *metric_list; - - /* Avoid concurrenting inserting of results */ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_lock (&result_mtx); -#else - G_LOCK (result_mtx); -#endif - metric_list = g_hash_table_lookup (task->cfg->metrics_symbols, symbol); - if (metric_list) { - cur = metric_list; - - while (cur) { - metric = cur->data; - insert_metric_result (task, metric, symbol, flag, opts, single); - cur = g_list_next (cur); - } - } - else { - /* Insert symbol to default metric */ - insert_metric_result (task, task->cfg->default_metric, symbol, flag, opts, single); - } - - /* Process cache item */ - if (task->cfg->cache) { - item = g_hash_table_lookup (task->cfg->cache->items_by_symbol, symbol); - if (item != NULL) { - item->s->frequency++; - } - } - - if (opts != NULL) { - /* XXX: it is not wise to destroy them here */ - g_list_free (opts); - } -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&result_mtx); -#else - G_UNLOCK (result_mtx); -#endif -} - -/* Insert result that may be increased on next insertions */ -void -insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts) -{ - insert_result_common (task, symbol, flag, opts, task->cfg->one_shot_mode); -} - -/* Insert result as a single option */ -void -insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts) -{ - insert_result_common (task, symbol, flag, opts, TRUE); -} - -/* Return true if metric has score that is more than spam score for it */ -static gboolean -check_metric_is_spam (struct rspamd_task *task, struct metric *metric) -{ - struct metric_result *res; - double ms, rs; - - /* Avoid concurrency while checking results */ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_lock (&result_mtx); -#else - G_LOCK (result_mtx); -#endif - res = g_hash_table_lookup (task->results, metric->name); - if (res) { -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&result_mtx); -#else - G_UNLOCK (result_mtx); -#endif - if (!check_metric_settings (res, &ms, &rs)) { - ms = metric->actions[METRIC_ACTION_REJECT].score; - } - return (ms > 0 && res->score >= ms); - } - -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - g_static_mutex_unlock (&result_mtx); -#else - G_UNLOCK (result_mtx); -#endif - - return FALSE; -} - -gint -process_filters (struct rspamd_task *task) -{ - GList *cur; - struct metric *metric; - gpointer item = NULL; - - /* Process metrics symbols */ - while (call_symbol_callback (task, task->cfg->cache, &item)) { - /* Check reject actions */ - cur = task->cfg->metrics_list; - while (cur) { - metric = cur->data; - if (!task->pass_all_filters && - metric->actions[METRIC_ACTION_REJECT].score > 0 && - check_metric_is_spam (task, metric)) { - task->state = WRITE_REPLY; - return 1; - } - cur = g_list_next (cur); - } - } - - task->state = WAIT_FILTER; - - return 1; -} - - -struct composites_data { - struct rspamd_task *task; - struct metric_result *metric_res; - GTree *symbols_to_remove; - guint8 *checked; -}; - -struct symbol_remove_data { - struct symbol *ms; - gboolean remove_weight; - gboolean remove_symbol; -}; - -static gint -remove_compare_data (gconstpointer a, gconstpointer b) -{ - const gchar *ca = a, *cb = b; - - return strcmp (ca, cb); -} - -static void -composites_foreach_callback (gpointer key, gpointer value, void *data) -{ - struct composites_data *cd = (struct composites_data *)data; - struct rspamd_composite *composite = value, *ncomp; - struct expression *expr; - GQueue *stack; - GList *symbols = NULL, *s; - gsize cur, op1, op2; - gchar logbuf[256], *sym, *check_sym; - gint r; - struct symbol *ms; - struct symbol_remove_data *rd; - - - expr = composite->expr; - if (isset (cd->checked, composite->id)) { - /* Symbol was already checked */ - return; - } - - stack = g_queue_new (); - - while (expr) { - if (expr->type == EXPR_STR) { - /* Find corresponding symbol */ - sym = expr->content.operand; - if (*sym == '~' || *sym == '-') { - sym ++; - } - if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) { - cur = 0; - if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, sym)) != NULL) { - /* Set checked for this symbol to avoid cyclic references */ - if (isclr (cd->checked, ncomp->id)) { - setbit (cd->checked, composite->id); - composites_foreach_callback (sym, ncomp, cd); - if (g_hash_table_lookup (cd->metric_res->symbols, sym) != NULL) { - cur = 1; - } - } - } - } - else { - cur = 1; - symbols = g_list_prepend (symbols, expr->content.operand); - } - g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); - } - else { - if (g_queue_is_empty (stack)) { - /* Queue has no operands for operation, exiting */ - g_list_free (symbols); - g_queue_free (stack); - setbit (cd->checked, composite->id); - return; - } - switch (expr->content.operation) { - case '!': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op1 = !op1; - g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); - break; - case '&': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); - break; - case '|': - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); - break; - default: - expr = expr->next; - continue; - } - } - expr = expr->next; - } - if (!g_queue_is_empty (stack)) { - op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); - if (op1) { - /* Remove all symbols that are in composite symbol */ - s = g_list_first (symbols); - r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key); - while (s) { - sym = s->data; - if (*sym == '~' || *sym == '-') { - check_sym = sym + 1; - } - else { - check_sym = sym; - } - ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym); - - if (ms == NULL) { - /* Try to process other composites */ - if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, check_sym)) != NULL) { - /* Set checked for this symbol to avoid cyclic references */ - if (isclr (cd->checked, ncomp->id)) { - setbit (cd->checked, composite->id); - composites_foreach_callback (check_sym, ncomp, cd); - ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym); - } - } - } - - if (ms != NULL) { - rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (struct symbol_remove_data)); - rd->ms = ms; - if (G_UNLIKELY (*sym == '~')) { - rd->remove_weight = FALSE; - rd->remove_symbol = TRUE; - } - else if (G_UNLIKELY (*sym == '-')) { - rd->remove_symbol = FALSE; - rd->remove_weight = FALSE; - } - else { - rd->remove_symbol = TRUE; - rd->remove_weight = TRUE; - } - if (!g_tree_lookup (cd->symbols_to_remove, rd)) { - g_tree_insert (cd->symbols_to_remove, (gpointer)ms->name, rd); - } - } - else { - - } - - if (s->next) { - r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data); - } - else { - r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data); - } - s = g_list_next (s); - } - /* Add new symbol */ - insert_result_single (cd->task, key, 1.0, NULL); - msg_info ("%s", logbuf); - } - } - - setbit (cd->checked, composite->id); - g_queue_free (stack); - g_list_free (symbols); - - return; -} - -static gboolean -check_autolearn (struct statfile_autolearn_params *params, struct rspamd_task *task) -{ - gchar *metric_name = DEFAULT_METRIC; - struct metric_result *metric_res; - GList *cur; - - if (params->metric != NULL) { - metric_name = (gchar *)params->metric; - } - - /* First check threshold */ - metric_res = g_hash_table_lookup (task->results, metric_name); - if (metric_res == NULL) { - if (params->symbols == NULL && params->threshold_max > 0) { - /* For ham messages */ - return TRUE; - } - debug_task ("metric %s has no results", metric_name); - return FALSE; - } - else { - /* Process score of metric */ - if ((params->threshold_min != 0 && metric_res->score > params->threshold_min) || (params->threshold_max != 0 && metric_res->score < params->threshold_max)) { - /* Now check for specific symbols */ - if (params->symbols) { - cur = params->symbols; - while (cur) { - if (g_hash_table_lookup (metric_res->symbols, cur->data) == NULL) { - return FALSE; - } - cur = g_list_next (cur); - } - } - /* Now allow processing of actual autolearn */ - return TRUE; - } - } - - return FALSE; -} - -void -process_autolearn (struct statfile *st, struct rspamd_task *task, GTree * tokens, struct classifier *classifier, gchar *filename, struct classifier_ctx *ctx) -{ - stat_file_t *statfile; - struct statfile *unused; - - if (check_autolearn (st->autolearn, task)) { - if (tokens) { - /* Take care of subject */ - tokenize_subject (task, &tokens); - msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename); - - /* Get or create statfile */ - statfile = get_statfile_by_symbol (task->worker->srv->statfile_pool, ctx->cfg, - st->symbol, &unused, TRUE); - - if (statfile == NULL) { - return; - } - - classifier->learn_func (ctx, task->worker->srv->statfile_pool, st->symbol, tokens, TRUE, NULL, 1., NULL); - maybe_write_binlog (ctx->cfg, st, statfile, tokens); - statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, DEFAULT_STATFILE_INVALIDATE_TIME, DEFAULT_STATFILE_INVALIDATE_JITTER); - } - } -} - -static gboolean -composites_remove_symbols (gpointer key, gpointer value, gpointer data) -{ - struct composites_data *cd = data; - struct symbol_remove_data *rd = value; - - if (rd->remove_symbol) { - g_hash_table_remove (cd->metric_res->symbols, key); - } - if (rd->remove_weight) { - cd->metric_res->score -= rd->ms->score; - } - - return FALSE; -} - -static void -composites_metric_callback (gpointer key, gpointer value, gpointer data) -{ - struct rspamd_task *task = (struct rspamd_task *)data; - struct composites_data *cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); - struct metric_result *metric_res = (struct metric_result *)value; - - cd->task = task; - cd->metric_res = (struct metric_result *)metric_res; - cd->symbols_to_remove = g_tree_new (remove_compare_data); - cd->checked = rspamd_mempool_alloc0 (task->task_pool, NBYTES (g_hash_table_size (task->cfg->composite_symbols))); - - /* Process hash table */ - g_hash_table_foreach (task->cfg->composite_symbols, composites_foreach_callback, cd); - - /* Remove symbols that are in composites */ - g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); - /* Free list */ - g_tree_destroy (cd->symbols_to_remove); -} - -void -make_composites (struct rspamd_task *task) -{ - g_hash_table_foreach (task->results, composites_metric_callback, task); -} - -struct classifiers_cbdata { - struct rspamd_task *task; - struct lua_locked_state *nL; -}; - -static void -classifiers_callback (gpointer value, void *arg) -{ - struct classifiers_cbdata *cbdata = arg; - struct rspamd_task *task; - struct classifier_config *cl = value; - struct classifier_ctx *ctx; - struct mime_text_part *text_part, *p1, *p2; - struct statfile *st; - GTree *tokens = NULL; - GList *cur; - f_str_t c; - gchar *header = NULL; - gint *dist = NULL, diff; - gboolean is_twopart = FALSE; - - task = cbdata->task; - - if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) { - cur = message_get_header (task->task_pool, task->message, header, FALSE); - if (cur) { - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); - } - } - else { - cur = g_list_first (task->text_parts); - dist = rspamd_mempool_get_variable (task->task_pool, "parts_distance"); - if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { - is_twopart = TRUE; - } - } - ctx = cl->classifier->init_func (task->task_pool, cl); - - if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) { - while (cur != NULL) { - if (header) { - c.len = strlen (cur->data); - if (c.len > 0) { - c.begin = cur->data; - if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, FALSE, NULL)) { - msg_info ("cannot tokenize input"); - return; - } - } - } - else { - text_part = (struct mime_text_part *)cur->data; - if (text_part->is_empty) { - cur = g_list_next (cur); - continue; - } - if (dist != NULL && cur->next == NULL) { - /* Compare part's content */ - - if (*dist >= COMMON_PART_FACTOR) { - msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); - break; - } - } - else if (cur->next == NULL && is_twopart) { - p1 = cur->prev->data; - p2 = text_part; - if (p1->diff_str != NULL && p2->diff_str != NULL) { - diff = compare_diff_distance (p1->diff_str, p2->diff_str); - } - else { - diff = fuzzy_compare_parts (p1, p2); - } - if (diff >= COMMON_PART_FACTOR) { - msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); - break; - } - } - c.begin = (gchar *)text_part->content->data; - c.len = text_part->content->len; - /* Tree would be freed at task pool freeing */ - if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, - FALSE, text_part->is_utf, text_part->urls_offset)) { - msg_info ("cannot tokenize input"); - return; - } - } - cur = g_list_next (cur); - } - g_hash_table_insert (task->tokens, cl->tokenizer, tokens); - } - - /* Take care of subject */ - tokenize_subject (task, &tokens); - - if (tokens == NULL) { - return; - } - - if (cbdata->nL != NULL) { - rspamd_mutex_lock (cbdata->nL->m); - cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, cbdata->nL->L); - rspamd_mutex_unlock (cbdata->nL->m); - } - else { - /* Non-threaded case */ - cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, task->cfg->lua_state); - } - - /* Autolearning */ - cur = g_list_first (cl->statfiles); - while (cur) { - st = cur->data; - if (st->autolearn) { - if (check_autolearn (st->autolearn, task)) { - /* Process autolearn */ - process_autolearn (st, task, tokens, cl->classifier, st->path, ctx); - } - } - cur = g_list_next (cur); - } -} - - -void -process_statfiles (struct rspamd_task *task) -{ - struct classifiers_cbdata cbdata; - - if (task->is_skipped) { - return; - } - - if (task->tokens == NULL) { - task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens); - } - cbdata.task = task; - cbdata.nL = NULL; - g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata); - - /* Process results */ - make_composites (task); -} - -void -process_statfiles_threaded (gpointer data, gpointer user_data) -{ - struct rspamd_task *task = (struct rspamd_task *)data; - struct lua_locked_state *nL = user_data; - struct classifiers_cbdata cbdata; - - if (task->is_skipped) { - remove_async_thread (task->s); - return; - } - - if (task->tokens == NULL) { - task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens); - } - - cbdata.task = task; - cbdata.nL = nL; - g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata); - remove_async_thread (task->s); -} - -static void -insert_metric_header (gpointer metric_name, gpointer metric_value, gpointer data) -{ -#ifndef GLIB_HASH_COMPAT - struct rspamd_task *task = (struct rspamd_task *)data; - gint r = 0; - /* Try to be rfc2822 compatible and avoid long headers with folding */ - gchar header_name[128], outbuf[1000]; - GList *symbols = NULL, *cur; - struct metric_result *metric_res = (struct metric_result *)metric_value; - double ms, rs; - - rspamd_snprintf (header_name, sizeof (header_name), "X-Spam-%s", metric_res->metric->name); - - if (!check_metric_settings (metric_res, &ms, &rs)) { - ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score; - } - if (ms > 0 && metric_res->score >= ms) { - r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "yes; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs); - } - else { - r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "no; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs); - } - - symbols = g_hash_table_get_keys (metric_res->symbols); - cur = symbols; - while (cur) { - if (g_list_next (cur) != NULL) { - r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s,", (gchar *)cur->data); - } - else { - r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s", (gchar *)cur->data); - } - cur = g_list_next (cur); - } - g_list_free (symbols); -#ifdef GMIME24 - g_mime_object_append_header (GMIME_OBJECT (task->message), header_name, outbuf); -#else - g_mime_message_add_header (task->message, header_name, outbuf); -#endif - -#endif /* GLIB_COMPAT */ -} - -void -insert_headers (struct rspamd_task *task) -{ - g_hash_table_foreach (task->results, insert_metric_header, task); -} - -gboolean -check_action_str (const gchar *data, gint *result) -{ - if (g_ascii_strncasecmp (data, "reject", sizeof ("reject") - 1) == 0) { - *result = METRIC_ACTION_REJECT; - } - else if (g_ascii_strncasecmp (data, "greylist", sizeof ("greylist") - 1) == 0) { - *result = METRIC_ACTION_GREYLIST; - } - else if (g_ascii_strncasecmp (data, "add_header", sizeof ("add_header") - 1) == 0) { - *result = METRIC_ACTION_ADD_HEADER; - } - else if (g_ascii_strncasecmp (data, "rewrite_subject", sizeof ("rewrite_subject") - 1) == 0) { - *result = METRIC_ACTION_REWRITE_SUBJECT; - } - else { - return FALSE; - } - return TRUE; -} - -const gchar * -str_action_metric (enum rspamd_metric_action action) -{ - switch (action) { - case METRIC_ACTION_REJECT: - return "reject"; - case METRIC_ACTION_SOFT_REJECT: - return "soft_reject"; - case METRIC_ACTION_REWRITE_SUBJECT: - return "rewrite_subject"; - case METRIC_ACTION_ADD_HEADER: - return "add_header"; - case METRIC_ACTION_GREYLIST: - return "greylist"; - case METRIC_ACTION_NOACTION: - return "no_action"; - case METRIC_ACTION_MAX: - return "invalid max action"; - } - - return "unknown action"; -} - -gint -check_metric_action (double score, double required_score, struct metric *metric) -{ - struct metric_action *action, *selected_action = NULL; - double max_score = 0; - int i; - - if (score >= required_score) { - return METRIC_ACTION_REJECT; - } - else if (metric->actions == NULL) { - return METRIC_ACTION_NOACTION; - } - else { - for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { - action = &metric->actions[i]; - if (action->score < 0) { - continue; - } - if (score >= action->score && action->score > max_score) { - selected_action = action; - max_score = action->score; - } - } - if (selected_action) { - return selected_action->action; - } - else { - return METRIC_ACTION_NOACTION; - } - } -} - -gboolean -learn_task (const gchar *statfile, struct rspamd_task *task, GError **err) -{ - GList *cur, *ex; - struct classifier_config *cl; - struct classifier_ctx *cls_ctx; - gchar *s; - f_str_t c; - GTree *tokens = NULL; - struct statfile *st; - stat_file_t *stf; - gdouble sum; - struct mime_text_part *part, *p1, *p2; - gboolean is_utf = FALSE, is_twopart = FALSE; - gint diff; - - - /* Load classifier by symbol */ - cl = g_hash_table_lookup (task->cfg->classifiers_symbols, statfile); - if (cl == NULL) { - g_set_error (err, filter_error_quark(), 1, "Statfile %s is not configured in any classifier", statfile); - return FALSE; - } - - /* If classifier has 'header' option just classify header of this type */ - if ((s = g_hash_table_lookup (cl->opts, "header")) != NULL) { - cur = message_get_header (task->task_pool, task->message, s, FALSE); - if (cur) { - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); - } - } - else { - /* Classify message otherwise */ - cur = g_list_first (task->text_parts); - if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { - is_twopart = TRUE; - } - } - - /* Get tokens from each element */ - while (cur) { - if (s != NULL) { - c.len = strlen (cur->data); - c.begin = cur->data; - ex = NULL; - } - else { - part = cur->data; - /* Skip empty parts */ - if (part->is_empty) { - cur = g_list_next (cur); - continue; - } - c.begin = (gchar *)part->content->data; - c.len = part->content->len; - is_utf = part->is_utf; - ex = part->urls_offset; - if (is_twopart && cur->next == NULL) { - /* Compare part's content */ - p1 = cur->prev->data; - p2 = part; - if (p1->diff_str != NULL && p2->diff_str != NULL) { - diff = compare_diff_distance (p1->diff_str, p2->diff_str); - } - else { - diff = fuzzy_compare_parts (p1, p2); - } - if (diff >= COMMON_PART_FACTOR) { - msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); - break; - } - } - } - /* Get tokens */ - if (!cl->tokenizer->tokenize_func ( - cl->tokenizer, task->task_pool, - &c, &tokens, FALSE, is_utf, ex)) { - g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message"); - return FALSE; - } - cur = g_list_next (cur); - } - - /* Handle messages without text */ - if (tokens == NULL) { - g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data"); - msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id); - return FALSE; - } - - /* Take care of subject */ - tokenize_subject (task, &tokens); - - /* Init classifier */ - cls_ctx = cl->classifier->init_func ( - task->task_pool, cl); - /* Get or create statfile */ - stf = get_statfile_by_symbol (task->worker->srv->statfile_pool, - cl, statfile, &st, TRUE); - - /* Learn */ - if (stf== NULL || !cl->classifier->learn_func ( - cls_ctx, task->worker->srv->statfile_pool, - statfile, tokens, TRUE, &sum, - 1.0, err)) { - if (*err) { - msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message); - return FALSE; - } - else { - g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error"); - msg_info ("learn failed for message <%s>, unknown learn error", task->message_id); - return FALSE; - } - } - /* Increase statistics */ - task->worker->srv->stat->messages_learned++; - - maybe_write_binlog (cl, st, stf, tokens); - msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f", - task->message_id, statfile, sum); - statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, - DEFAULT_STATFILE_INVALIDATE_TIME, - DEFAULT_STATFILE_INVALIDATE_JITTER); - - return TRUE; -} - -gboolean -learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err) -{ - GList *cur, *ex; - struct classifier_ctx *cls_ctx; - f_str_t c; - GTree *tokens = NULL; - struct mime_text_part *part, *p1, *p2; - gboolean is_utf = FALSE, is_twopart = FALSE; - gint diff; - - cur = g_list_first (task->text_parts); - if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { - is_twopart = TRUE; - } - - /* Get tokens from each element */ - while (cur) { - part = cur->data; - /* Skip empty parts */ - if (part->is_empty) { - cur = g_list_next (cur); - continue; - } - c.begin = (gchar *)part->content->data; - c.len = part->content->len; - is_utf = part->is_utf; - ex = part->urls_offset; - if (is_twopart && cur->next == NULL) { - /* - * Compare part's content - * Note: here we don't have filters proceeded this message, so using pool variable is a bad idea - */ - p1 = cur->prev->data; - p2 = part; - if (p1->diff_str != NULL && p2->diff_str != NULL) { - diff = compare_diff_distance (p1->diff_str, p2->diff_str); - } - else { - diff = fuzzy_compare_parts (p1, p2); - } - if (diff >= COMMON_PART_FACTOR) { - msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); - break; - } - } - /* Get tokens */ - if (!cl->tokenizer->tokenize_func ( - cl->tokenizer, task->task_pool, - &c, &tokens, FALSE, is_utf, ex)) { - g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message"); - return FALSE; - } - cur = g_list_next (cur); - } - - /* Handle messages without text */ - if (tokens == NULL) { - g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data"); - msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id); - return FALSE; - } - - /* Take care of subject */ - tokenize_subject (task, &tokens); - - /* Init classifier */ - cls_ctx = cl->classifier->init_func ( - task->task_pool, cl); - /* Learn */ - if (!cl->classifier->learn_spam_func ( - cls_ctx, task->worker->srv->statfile_pool, - tokens, task, is_spam, task->cfg->lua_state, err)) { - if (*err) { - msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message); - return FALSE; - } - else { - g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error"); - msg_info ("learn failed for message <%s>, unknown learn error", task->message_id); - return FALSE; - } - } - /* Increase statistics */ - task->worker->srv->stat->messages_learned++; - - msg_info ("learn success for message <%s>", - task->message_id); - statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, - DEFAULT_STATFILE_INVALIDATE_TIME, - DEFAULT_STATFILE_INVALIDATE_JITTER); - - return TRUE; -} - -/* - * vi:ts=4 - */ diff --git a/src/filter.h b/src/filter.h deleted file mode 100644 index 258bd9447..000000000 --- a/src/filter.h +++ /dev/null @@ -1,167 +0,0 @@ -/** - * @file filter.h - * Filters logic implemetation - */ - -#ifndef RSPAMD_FILTER_H -#define RSPAMD_FILTER_H - -#include "config.h" -#include "symbols_cache.h" -#include "task.h" - -struct rspamd_task; -struct rspamd_settings; -struct classifier_config; - -typedef double (*metric_cons_func)(struct rspamd_task *task, const gchar *metric_name, const gchar *func_name); -typedef void (*filter_func)(struct rspamd_task *task); - -enum filter_type { C_FILTER, PERL_FILTER }; - -/** - * Filter structure - */ -struct filter { - gchar *func_name; /**< function name */ - enum filter_type type; /**< filter type (c or perl) */ - module_t *module; -}; - -/** - * Rspamd symbol - */ -struct symbol { - double score; /**< symbol's score */ - GList *options; /**< list of symbol's options */ - const gchar *name; -}; - -struct metric_action { - enum rspamd_metric_action action; - gdouble score; -}; - -/** - * Common definition of metric - */ -struct metric { - const gchar *name; /**< name of metric */ - gchar *func_name; /**< name of consolidation function */ - metric_cons_func func; /**< c consolidation function */ - double grow_factor; /**< grow factor for metric */ - GHashTable *symbols; /**< weights of symbols in metric */ - GHashTable *descriptions; /**< descriptions of symbols in metric */ - struct metric_action actions[METRIC_ACTION_MAX]; /**< all actions of the metric */ - gchar *subject; /**< subject rewrite string */ -}; - -/** - * Result of metric processing - */ -struct metric_result { - struct metric *metric; /**< pointer to metric structure */ - double score; /**< total score */ - GHashTable *symbols; /**< symbols of metric */ - gboolean checked; /**< whether metric result is consolidated */ - double grow_factor; /**< current grow factor */ - struct rspamd_settings *user_settings; /**< settings for metric */ - struct rspamd_settings *domain_settings; /**< settings for metric */ -}; - -/** - * Composite structure - */ -struct rspamd_composite { - struct expression *expr; - gint id; -}; - -/** - * Process all filters - * @param task worker's task that present message from user - * @return 0 - if there is non-finished tasks and 1 if processing is completed - */ -gint process_filters (struct rspamd_task *task); - -/** - * Process message with statfiles - * @param task worker's task that present message from user - */ -void process_statfiles (struct rspamd_task *task); - -/** - * Process message with statfiles threaded - * @param data worker's task that present message from user - */ -void process_statfiles_threaded (gpointer data, gpointer user_data); - -/** - * Insert a result to task - * @param task worker's task that present message from user - * @param metric_name metric's name to which we need to insert result - * @param symbol symbol to insert - * @param flag numeric weight for symbol - * @param opts list of symbol's options - */ -void insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts); - -/** - * Insert a single result to task - * @param task worker's task that present message from user - * @param metric_name metric's name to which we need to insert result - * @param symbol symbol to insert - * @param flag numeric weight for symbol - * @param opts list of symbol's options - */ -void insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts); - -/** - * Process all results and form composite metrics from existent metrics as it is defined in config - * @param task worker's task that present message from user - */ -void make_composites (struct rspamd_task *task); - -/** - * Default consolidation function for metric, it get all symbols and multiply symbol - * weight by some factor that is specified in config. Default factor is 1. - * @param task worker's task that present message from user - * @param metric_name name of metric - * @return result metric weight - */ -double factor_consolidation_func (struct rspamd_task *task, const gchar *metric_name, const gchar *unused); - -/* - * Learn specified statfile with message in a task - * @param statfile symbol of statfile - * @param task worker's task object - * @param err pointer to GError - * @return true if learn succeed - */ -gboolean learn_task (const gchar *statfile, struct rspamd_task *task, GError **err); - -/* - * Learn specified statfile with message in a task - * @param statfile symbol of statfile - * @param task worker's task object - * @param err pointer to GError - * @return true if learn succeed - */ -gboolean learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err); - -/* - * Get action from a string - */ -gboolean check_action_str (const gchar *data, gint *result); - -/* - * Return textual representation of action enumeration - */ -const gchar *str_action_metric (enum rspamd_metric_action action); - -/* - * Get action for specific metric - */ -gint check_metric_action (double score, double required_score, struct metric *metric); - -#endif diff --git a/src/fstring.c b/src/fstring.c deleted file mode 100644 index 098824101..000000000 --- a/src/fstring.c +++ /dev/null @@ -1,461 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "fstring.h" - -/* - * Search first occurence of character in string - */ -ssize_t -fstrchr (f_str_t * src, gchar c) -{ - register size_t cur = 0; - - while (cur < src->len) { - if (*(src->begin + cur) == c) { - return cur; - } - cur++; - } - - return -1; -} - -/* - * Search last occurence of character in string - */ -ssize_t -fstrrchr (f_str_t * src, gchar c) -{ - register ssize_t cur = src->len; - - while (cur > 0) { - if (*(src->begin + cur) == c) { - return cur; - } - cur--; - } - - return -1; -} - -/* - * Search for pattern in orig - */ -ssize_t -fstrstr (f_str_t * orig, f_str_t * pattern) -{ - register size_t cur = 0, pcur = 0; - - if (pattern->len > orig->len) { - return -1; - } - - while (cur < orig->len) { - if (*(orig->begin + cur) == *pattern->begin) { - while (cur < orig->len && pcur < pattern->len) { - if (*(orig->begin + cur) != *(pattern->begin + pcur)) { - pcur = 0; - break; - } - cur++; - pcur++; - } - return cur - pattern->len; - } - cur++; - } - - return -1; - -} - -/* - * Search for pattern in orig ignoring case - */ -ssize_t -fstrstri (f_str_t * orig, f_str_t * pattern) -{ - register size_t cur = 0, pcur = 0; - - if (pattern->len > orig->len) { - return -1; - } - - while (cur < orig->len) { - if (g_ascii_tolower (*(orig->begin + cur)) == g_ascii_tolower (*pattern->begin)) { - while (cur < orig->len && pcur < pattern->len) { - if (g_ascii_tolower (*(orig->begin + cur)) != g_ascii_tolower (*(pattern->begin + pcur))) { - pcur = 0; - break; - } - cur++; - pcur++; - } - return cur - pattern->len; - } - cur++; - } - - return -1; - -} - -/* - * Split string by tokens - * word contains parsed word - * - * Return: -1 - no new words can be extracted - * 1 - word was extracted and there are more words - * 0 - last word extracted - */ -gint -fstrtok (f_str_t * text, const gchar *sep, f_tok_t * state) -{ - register size_t cur; - const gchar *csep = sep; - - if (state->pos >= text->len) { - return -1; - } - - cur = state->pos; - - while (cur < text->len) { - while (*csep) { - if (*(text->begin + cur) == *csep) { - state->word.begin = (text->begin + state->pos); - state->word.len = cur - state->pos; - state->pos = cur + 1; - return 1; - } - csep++; - } - csep = sep; - cur++; - } - - /* Last word */ - state->word.begin = (text->begin + state->pos); - state->word.len = cur - state->pos; - state->pos = cur; - - return 0; -} - -/* - * Copy one string into other - */ -size_t -fstrcpy (f_str_t * dest, f_str_t * src) -{ - register size_t cur = 0; - - if (dest->size < src->len) { - return 0; - } - - while (cur < src->len && cur < dest->size) { - *(dest->begin + cur) = *(src->begin + cur); - cur++; - } - - return cur; -} - -/* - * Concatenate two strings - */ -size_t -fstrcat (f_str_t * dest, f_str_t * src) -{ - register size_t cur = 0; - gchar *p = dest->begin + dest->len; - - if (dest->size < src->len + dest->len) { - return 0; - } - - while (cur < src->len) { - *p = *(src->begin + cur); - p++; - cur++; - } - - dest->len += src->len; - - return cur; - -} - -/* - * Make copy of string to 0-terminated string - */ -gchar * -fstrcstr (f_str_t * str, rspamd_mempool_t * pool) -{ - gchar *res; - res = rspamd_mempool_alloc (pool, str->len + 1); - - /* Do not allow multiply \0 characters */ - memccpy (res, str->begin, '\0', str->len); - res[str->len] = 0; - - return res; -} - -/* - * Push one character to fstr - */ -gint -fstrpush (f_str_t * dest, gchar c) -{ - if (dest->size < dest->len) { - /* Need to reallocate string */ - return 0; - } - - *(dest->begin + dest->len) = c; - dest->len++; - return 1; -} - -/* - * Push one character to fstr - */ -gint -fstrpush_unichar (f_str_t * dest, gunichar c) -{ - int l; - if (dest->size < dest->len) { - /* Need to reallocate string */ - return 0; - } - - l = g_unichar_to_utf8 (c, dest->begin + dest->len); - dest->len += l; - return l; -} - -/* - * Allocate memory for f_str_t - */ -f_str_t * -fstralloc (rspamd_mempool_t * pool, size_t len) -{ - f_str_t *res = rspamd_mempool_alloc (pool, sizeof (f_str_t)); - - res->begin = rspamd_mempool_alloc (pool, len); - - res->size = len; - res->len = 0; - return res; -} - -/* - * Allocate memory for f_str_t from temporary pool - */ -f_str_t * -fstralloc_tmp (rspamd_mempool_t * pool, size_t len) -{ - f_str_t *res = rspamd_mempool_alloc_tmp (pool, sizeof (f_str_t)); - - res->begin = rspamd_mempool_alloc_tmp (pool, len); - - res->size = len; - res->len = 0; - return res; -} - -/* - * Truncate string to its len - */ -f_str_t * -fstrtruncate (rspamd_mempool_t * pool, f_str_t * orig) -{ - f_str_t *res; - - if (orig == NULL || orig->len == 0 || orig->size <= orig->len) { - return orig; - } - - res = fstralloc (pool, orig->len); - if (res == NULL) { - return NULL; - } - fstrcpy (res, orig); - - return res; -} - -/* - * Enlarge string to new size - */ -f_str_t * -fstrgrow (rspamd_mempool_t * pool, f_str_t * orig, size_t newlen) -{ - f_str_t *res; - - if (orig == NULL || orig->len == 0 || orig->size >= newlen) { - return orig; - } - - res = fstralloc (pool, newlen); - if (res == NULL) { - return NULL; - } - fstrcpy (res, orig); - - return res; -} - -static guint32 -fstrhash_c (gchar c, guint32 hval) -{ - guint32 tmp; - /* - * xor in the current byte against each byte of hval - * (which alone gaurantees that every bit of input will have - * an effect on the output) - */ - tmp = c & 0xFF; - tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24); - hval ^= tmp; - - /* add some bits out of the middle as low order bits */ - hval = hval + ((hval >> 12) & 0x0000ffff); - - /* swap most and min significative bytes */ - tmp = (hval << 24) | ((hval >> 24) & 0xff); - /* zero most and min significative bytes of hval */ - hval &= 0x00ffff00; - hval |= tmp; - /* - * rotate hval 3 bits to the left (thereby making the - * 3rd msb of the above mess the hsb of the output hash) - */ - return (hval << 3) + (hval >> 29); -} - -/* - * Return hash value for a string - */ -guint32 -fstrhash (f_str_t * str) -{ - size_t i; - guint32 hval; - gchar *c = str->begin; - - if (str == NULL) { - return 0; - } - hval = str->len; - - for (i = 0; i < str->len; i++, c++) { - hval = fstrhash_c (*c, hval); - } - return hval; -} - -/* - * Return hash value for a string - */ -guint32 -fstrhash_lowercase (f_str_t * str, gboolean is_utf) -{ - gsize i; - guint32 j, hval; - const gchar *p = str->begin, *end = NULL; - gchar t; - gunichar uc; - - if (str == NULL) { - return 0; - } - hval = str->len; - - if (is_utf) { - while (end < str->begin + str->len) { - if (!g_utf8_validate (p, str->len, &end)) { - return fstrhash_lowercase (str, FALSE); - } - while (p < end) { - uc = g_unichar_tolower (g_utf8_get_char (p)); - for (j = 0; j < sizeof (gunichar); j ++) { - t = (uc >> (j * 8)) & 0xff; - if (t != 0) { - hval = fstrhash_c (t, hval); - } - } - p = g_utf8_next_char (p); - } - p = end + 1; - } - - } - else { - for (i = 0; i < str->len; i++, p++) { - hval = fstrhash_c (g_ascii_tolower (*p), hval); - } - } - - return hval; -} - -void -fstrstrip (f_str_t * str) -{ - gchar *p = str->begin; - guint r = 0; - - while (r < str->len) { - if (g_ascii_isspace (*p)) { - p++; - r++; - } - else { - break; - } - } - - if (r > 0) { - memmove (str->begin, p, str->len - r); - str->len -= r; - } - - r = str->len; - p = str->begin + str->len; - while (r > 0) { - if (g_ascii_isspace (*p)) { - p--; - r--; - } - else { - break; - } - } - - str->len = r; -} diff --git a/src/fstring.h b/src/fstring.h deleted file mode 100644 index bd680e365..000000000 --- a/src/fstring.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Functions for handling with fixed size strings - */ - -#ifndef FSTRING_H -#define FSTRING_H - -#include "config.h" -#include "mem_pool.h" - -#define update_buf_size(x) (x)->free = (x)->buf->size - ((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin - -typedef struct f_str_s { - gchar *begin; - size_t len; - size_t size; -} f_str_t; - -typedef struct f_str_buf_s { - f_str_t *buf; - gchar *pos; - size_t free; -} f_str_buf_t; - -typedef struct f_tok_s { - f_str_t word; - size_t pos; -} f_tok_t; - -/* - * Search first occurence of character in string - */ -ssize_t fstrchr (f_str_t *src, gchar c); - -/* - * Search last occurence of character in string - */ -ssize_t fstrrchr (f_str_t *src, gchar c); - -/* - * Search for pattern in orig - */ -ssize_t fstrstr (f_str_t *orig, f_str_t *pattern); - -/* - * Search for pattern in orig ignoring case - */ -ssize_t fstrstri (f_str_t *orig, f_str_t *pattern); - -/* - * Split string by tokens - * word contains parsed word - */ -gint fstrtok (f_str_t *text, const gchar *sep, f_tok_t *state); - -/* - * Copy one string into other - */ -size_t fstrcpy (f_str_t *dest, f_str_t *src); - -/* - * Concatenate two strings - */ -size_t fstrcat (f_str_t *dest, f_str_t *src); - -/* - * Push one character to fstr - */ -gint fstrpush (f_str_t *dest, gchar c); - -/* - * Push one character to fstr - */ -gint fstrpush_unichar (f_str_t *dest, gunichar c); - -/* - * Allocate memory for f_str_t - */ -f_str_t* fstralloc (rspamd_mempool_t *pool, size_t len); - -/* - * Allocate memory for f_str_t from temporary pool - */ -f_str_t* fstralloc_tmp (rspamd_mempool_t *pool, size_t len); - -/* - * Truncate string to its len - */ -f_str_t* fstrtruncate (rspamd_mempool_t *pool, f_str_t *orig); - -/* - * Enlarge string to new size - */ -f_str_t* fstrgrow (rspamd_mempool_t *pool, f_str_t *orig, size_t newlen); - -/* - * Return specified character - */ -#define fstridx(str, pos) *((str)->begin + (pos)) - -/* - * Return fast hash value for fixed string - */ -guint32 fstrhash (f_str_t *str); - -/* - * Return fast hash value for fixed string converted to lowercase - */ -guint32 fstrhash_lowercase (f_str_t *str, gboolean is_utf); -/* - * Make copy of string to 0-terminated string - */ -gchar* fstrcstr (f_str_t *str, rspamd_mempool_t *pool); - -/* - * Strip fstr string from space symbols - */ -void fstrstrip (f_str_t *str); - -#endif diff --git a/src/fuzzy.c b/src/fuzzy.c deleted file mode 100644 index 7e8a01ce3..000000000 --- a/src/fuzzy.c +++ /dev/null @@ -1,498 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "config.h" -#include "mem_pool.h" -#include "fstring.h" -#include "fuzzy.h" -#include "message.h" -#include "url.h" -#include "main.h" - -#define ROLL_WINDOW_SIZE 9 -#define MIN_FUZZY_BLOCK_SIZE 3 -#define HASH_INIT 0x28021967 - -static const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -struct roll_state { - guint32 h[3]; - gchar window[ROLL_WINDOW_SIZE]; - gint n; -}; - -static struct roll_state rs; - - -/* Rolling hash function based on Adler-32 checksum */ -static guint32 -fuzzy_roll_hash (guint c) -{ - /* Check window position */ - if (rs.n == ROLL_WINDOW_SIZE) { - rs.n = 0; - } - - rs.h[1] -= rs.h[0]; - rs.h[1] += ROLL_WINDOW_SIZE * c; - - rs.h[0] += c; - rs.h[0] -= rs.window[rs.n]; - - /* Save current symbol */ - rs.window[rs.n] = c; - rs.n++; - - rs.h[2] <<= 5; - rs.h[2] ^= c; - - return rs.h[0] + rs.h[1] + rs.h[2]; -} - -/* A simple non-rolling hash, based on the FNV hash */ -static guint32 -fuzzy_fnv_hash (guint c, guint32 hval) -{ - hval ^= c; - hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); - return hval; -} - -/* Calculate blocksize depending on length of input */ -static guint32 -fuzzy_blocksize (guint32 len) -{ - guint32 nlen = MIN_FUZZY_BLOCK_SIZE; - - while (nlen * (FUZZY_HASHLEN - 1) < len) { - nlen *= 2; - } - return nlen; -} - - -/* Update hash with new symbol */ -static void -fuzzy_update (fuzzy_hash_t * h, guint c) -{ - h->rh = fuzzy_roll_hash (c); - h->h = fuzzy_fnv_hash (c, h->h); - - if (h->rh % h->block_size == (h->block_size - 1)) { - h->hash_pipe[h->hi] = b64[h->h % 64]; - if (h->hi < FUZZY_HASHLEN - 2) { - h->h = HASH_INIT; - h->hi++; - } - } -} - -static void -fuzzy_update2 (fuzzy_hash_t * h1, fuzzy_hash_t *h2, guint c) -{ - h1->rh = fuzzy_roll_hash (c); - h1->h = fuzzy_fnv_hash (c, h1->h); - h2->rh = h1->rh; - h2->h = fuzzy_fnv_hash (c, h2->h); - - if (h1->rh % h1->block_size == (h1->block_size - 1)) { - h1->hash_pipe[h1->hi] = b64[h1->h % 64]; - if (h1->hi < FUZZY_HASHLEN - 2) { - h1->h = HASH_INIT; - h1->hi++; - } - } - if (h2->rh % h2->block_size == (h2->block_size - 1)) { - h2->hash_pipe[h2->hi] = b64[h2->h % 64]; - if (h2->hi < FUZZY_HASHLEN - 2) { - h2->h = HASH_INIT; - h2->hi++; - } - } -} - -/* - * Levenshtein distance between string1 and string2. - * - * Replace cost is normally 1, and 2 with nonzero xcost. - */ -guint32 -lev_distance (gchar *s1, gint len1, gchar *s2, gint len2) -{ - gint i; - gint *row; /* we only need to keep one row of costs */ - gint *end; - gint half, nx; - gchar *sx, *char2p, char1; - gint *p, D, x, offset, c3; - - /* strip common prefix */ - while (len1 > 0 && len2 > 0 && *s1 == *s2) { - len1--; - len2--; - s1++; - s2++; - } - - /* strip common suffix */ - while (len1 > 0 && len2 > 0 && s1[len1 - 1] == s2[len2 - 1]) { - len1--; - len2--; - } - - /* catch trivial cases */ - if (len1 == 0) { - return len2; - } - - if (len2 == 0) { - return len1; - } - - /* make the inner cycle (i.e. string2) the longer one */ - if (len1 > len2) { - nx = len1; - sx = s1; - len1 = len2; - len2 = nx; - s1 = s2; - s2 = sx; - } - /* check len1 == 1 separately */ - if (len1 == 1) { - return len2 - (memchr (s2, *s1, len2) != NULL); - } - - len1++; - len2++; - half = len1 >> 1; - - /* initalize first row */ - row = g_malloc (len2 * sizeof (gint)); - end = row + len2 - 1; - for (i = 0; i < len2; i++) { - row[i] = i; - } - - /* in this case we don't have to scan two corner triangles (of size len1/2) - * in the matrix because no best path can go throught them. note this - * breaks when len1 == len2 == 2 so the memchr() special case above is - * necessary */ - row[0] = len1 - half - 1; - for (i = 1; i < len1; i++) { - char1 = s1[i - 1]; - /* skip the upper triangle */ - if (i >= len1 - half) { - offset = i - (len1 - half); - char2p = s2 + offset; - p = row + offset; - c3 = *(p++) + (char1 != *(char2p++)); - x = *p; - x++; - D = x; - if (x > c3) - x = c3; - *(p++) = x; - } - else { - p = row + 1; - char2p = s2; - D = x = i; - } - /* skip the lower triangle */ - if (i <= half + 1) - end = row + len2 + i - half - 2; - /* main */ - while (p <= end) { - c3 = --D + (char1 != *(char2p++)); - x++; - if (x > c3) - x = c3; - D = *p; - D++; - if (x > D) - x = D; - *(p++) = x; - } - /* lower triangle sentinel */ - if (i <= half) { - c3 = --D + (char1 != *char2p); - x++; - if (x > c3) - x = c3; - *p = x; - } - } - - i = *end; - g_free (row); - return i; -} - -/* Calculate fuzzy hash for specified string */ -fuzzy_hash_t * -fuzzy_init (f_str_t * in, rspamd_mempool_t * pool) -{ - fuzzy_hash_t *new; - guint i, repeats = 0; - gchar *c = in->begin, last = '\0'; - gsize real_len = 0; - - new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); - bzero (&rs, sizeof (rs)); - for (i = 0; i < in->len; i++) { - if (*c == last) { - repeats++; - } - else { - repeats = 0; - } - if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) { - real_len ++; - } - last = *c; - c++; - } - - new->block_size = fuzzy_blocksize (real_len); - c = in->begin; - - for (i = 0; i < in->len; i++) { - if (*c == last) { - repeats++; - } - else { - repeats = 0; - } - if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) { - fuzzy_update (new, *c); - } - last = *c; - c++; - } - - /* Check whether we have more bytes in a rolling window */ - if (new->rh != 0) { - new->hash_pipe[new->hi] = b64[new->h % 64]; - } - - return new; -} - -fuzzy_hash_t * -fuzzy_init_byte_array (GByteArray * in, rspamd_mempool_t * pool) -{ - f_str_t f; - - f.begin = (gchar *)in->data; - f.len = in->len; - - return fuzzy_init (&f, pool); -} - -void -fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff) -{ - fuzzy_hash_t *new, *new2; - gchar *c, *end, *begin; - gsize real_len = 0, len = part->content->len; - GList *cur_offset; - struct process_exception *cur_ex = NULL; - gunichar uc; - gboolean write_diff = FALSE; - - cur_offset = part->urls_offset; - if (cur_offset != NULL) { - cur_ex = cur_offset->data; - } - - begin = (gchar *)part->content->data; - c = begin; - new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); - new2 = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); - bzero (&rs, sizeof (rs)); - end = c + len; - - if (part->is_utf) { - while (c < end) { - if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { - c += cur_ex->len + 1; - cur_offset = g_list_next (cur_offset); - if (cur_offset != NULL) { - cur_ex = cur_offset->data; - } - } - else { - uc = g_utf8_get_char (c); - if (g_unichar_isalnum (uc)) { - real_len ++; - } - c = g_utf8_next_char (c); - } - } - } - else { - while (c < end) { - if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { - c += cur_ex->len + 1; - cur_offset = g_list_next (cur_offset); - if (cur_offset != NULL) { - cur_ex = cur_offset->data; - } - } - else { - if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) { - real_len ++; - } - c++; - } - } - } - - write_diff = real_len > 0 && real_len < max_diff; - - if (write_diff) { - part->diff_str = fstralloc (pool, real_len); - } - else { - part->diff_str = NULL; - } - - new->block_size = fuzzy_blocksize (real_len); - new2->block_size = new->block_size * 2; - - cur_offset = part->urls_offset; - if (cur_offset != NULL) { - cur_ex = cur_offset->data; - } - - begin = (gchar *)part->content->data; - c = begin; - end = c + len; - if (part->is_utf) { - - while (c < end) { - if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { - c += cur_ex->len + 1; - cur_offset = g_list_next (cur_offset); - if (cur_offset != NULL) { - cur_ex = cur_offset->data; - } - } - else { - uc = g_utf8_get_char (c); - if (g_unichar_isalnum (uc)) { - fuzzy_update2 (new, new2, uc); - if (write_diff) { - fstrpush_unichar (part->diff_str, uc); - } - } - c = g_utf8_next_char (c); - } - } - } - else { - while (c < end) { - if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { - c += cur_ex->len + 1; - cur_offset = g_list_next (cur_offset); - if (cur_offset != NULL) { - cur_ex = cur_offset->data; - } - } - else { - if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) { - fuzzy_update2 (new, new2, *c); - if (write_diff) { - fstrpush (part->diff_str, *c); - } - } - c++; - } - } - } - - /* Check whether we have more bytes in a rolling window */ - if (new->rh != 0) { - new->hash_pipe[new->hi] = b64[new->h % 64]; - } - if (new2->rh != 0) { - new2->hash_pipe[new2->hi] = b64[new2->h % 64]; - } - - part->fuzzy = new; - part->double_fuzzy = new2; -} - -/* Compare score of difference between two hashes 0 - different hashes, 100 - identical hashes */ -gint -fuzzy_compare_hashes (fuzzy_hash_t * h1, fuzzy_hash_t * h2) -{ - gint res, l1, l2; - - /* If we have hashes of different size, input strings are too different */ - if (h1->block_size != h2->block_size) { - return 0; - } - - l1 = strlen (h1->hash_pipe); - l2 = strlen (h2->hash_pipe); - - if (l1 == 0 || l2 == 0) { - if (l1 == 0 && l2 == 0) { - return 100; - } - else { - return 0; - } - } - - res = lev_distance (h1->hash_pipe, l1, h2->hash_pipe, l2); - res = 100 - (2 * res * 100) / (l1 + l2); - - return res; -} - -gint -fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2) -{ - if (p1->fuzzy != NULL && p2->fuzzy != NULL) { - if (p1->fuzzy->block_size == p2->fuzzy->block_size) { - return fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy); - } - else if (p1->double_fuzzy->block_size == p2->fuzzy->block_size) { - return fuzzy_compare_hashes (p1->double_fuzzy, p2->fuzzy); - } - else if (p2->double_fuzzy->block_size == p1->fuzzy->block_size) { - return fuzzy_compare_hashes (p2->double_fuzzy, p1->fuzzy); - } - } - - return 0; -} - -/* - * vi:ts=4 - */ diff --git a/src/fuzzy.h b/src/fuzzy.h deleted file mode 100644 index c226c5765..000000000 --- a/src/fuzzy.h +++ /dev/null @@ -1,69 +0,0 @@ -/** - * @file fuzzy.h - * Fuzzy hashes API - */ - -#ifndef RSPAMD_FUZZY_H -#define RSPAMD_FUZZY_H - -#include "config.h" -#include "mem_pool.h" -#include "fstring.h" - -#define FUZZY_HASHLEN 64 - -typedef struct fuzzy_hash_s { - gchar hash_pipe[FUZZY_HASHLEN]; /**< result hash */ - guint32 block_size; /**< current blocksize */ - guint32 rh; /**< roll hash value */ - guint32 h; /**< hash of block */ - guint32 hi; /**< current index in hash pipe */ -} fuzzy_hash_t; - -struct mime_text_part; - -/** - * Calculate fuzzy hash for specified string - * @param in input string - * @param pool pool object - * @return fuzzy_hash object allocated in pool - */ -fuzzy_hash_t * fuzzy_init (f_str_t *in, rspamd_mempool_t *pool); -/** - * Calculate fuzzy hash for specified byte array - * @param in input string - * @param pool pool object - * @return fuzzy_hash object allocated in pool - */ -fuzzy_hash_t * fuzzy_init_byte_array (GByteArray *in, rspamd_mempool_t *pool); - -/** - * Calculate fuzzy hash for specified text part - * @param part text part object - * @param pool pool object - * @param max_diff maximum text length to use diff algorithm in comparasions - * @return fuzzy_hash object allocated in pool - */ -void fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff); - -/** - * Compare score of difference between two hashes - * @param h1 first hash - * @param h2 second hash - * @return result in percents 0 - different hashes, 100 - identical hashes - */ -gint fuzzy_compare_hashes (fuzzy_hash_t *h1, fuzzy_hash_t *h2); - -/* - * Compare two text parts and return percents of difference - */ -gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2); - -/* - * Calculate levenstein distance between two strings. Note: this algorithm should be used - * only for short texts - it runs too slow on long ones. - */ -guint32 lev_distance (gchar *s1, gint len1, gchar *s2, gint len2); - - -#endif diff --git a/src/hash.c b/src/hash.c deleted file mode 100644 index 3bb381651..000000000 --- a/src/hash.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "hash.h" - -#define HASH_TABLE_MIN_SIZE 19 -#define HASH_TABLE_MAX_SIZE 13845163 - -/* - * Performs a lookup in the hash table. Virtually all hash operations - * will use this function internally. - */ -static inline struct rspamd_hash_node ** -rspamd_hash_lookup_node (rspamd_hash_t * hash, gconstpointer key, guint * hash_return) -{ - struct rspamd_hash_node **node_ptr, *node; - guint hash_value; - hash_value = (*hash->hash_func) (key); - - if (hash->shared) { - rspamd_mempool_rlock_rwlock (hash->lock); - } - node_ptr = &hash->nodes[hash_value % hash->size]; - - if (hash_return) - *hash_return = hash_value; - - /* Hash table lookup needs to be fast. - * We therefore remove the extra conditional of testing - * whether to call the key_equal_func or not from - * the inner loop. - * - * Additional optimisation: first check if our full hash - * values are equal so we can avoid calling the full-blown - * key equality function in most cases. - */ - if (hash->key_equal_func) { - while ((node = *node_ptr)) { - if (node->key_hash == hash_value && hash->key_equal_func (node->key, key)) { - break; - } - node_ptr = &(*node_ptr)->next; - } - } - else { - while ((node = *node_ptr)) { - if (node->key == key) { - break; - } - node_ptr = &(*node_ptr)->next; - } - } - if (hash->shared) { - rspamd_mempool_runlock_rwlock (hash->lock); - } - return node_ptr; -} - -/* - * Removes a node from the hash table and updates the node count. - * No table resize is performed. - */ -static void -rspamd_hash_remove_node (rspamd_hash_t * hash, struct rspamd_hash_node ***node_ptr_ptr) -{ - struct rspamd_hash_node **node_ptr, *node; - - if (hash->shared) { - rspamd_mempool_wlock_rwlock (hash->lock); - } - node_ptr = *node_ptr_ptr; - node = *node_ptr; - - *node_ptr = node->next; - - hash->nnodes--; - if (hash->shared) { - rspamd_mempool_wunlock_rwlock (hash->lock); - } -} - -/* - * Resizes the hash table to the optimal size based on the number of - * nodes currently held. - */ -static void -rspamd_hash_resize (rspamd_hash_t * hash) -{ - struct rspamd_hash_node **new_nodes; - struct rspamd_hash_node *node, *next; - guint hash_val; - gint new_size, i; - - new_size = g_spaced_primes_closest (hash->nnodes); - new_size = CLAMP (new_size, HASH_TABLE_MIN_SIZE, HASH_TABLE_MAX_SIZE); - - if (hash->shared) { - new_nodes = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node *) * new_size); - } - else { - new_nodes = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node *) * new_size); - } - - if (hash->shared) { - rspamd_mempool_wlock_rwlock (hash->lock); - } - - for (i = 0; i < hash->size; i++) { - for (node = hash->nodes[i]; node; node = next) { - next = node->next; - hash_val = node->key_hash % new_size; - node->next = new_nodes[hash_val]; - new_nodes[hash_val] = node; - } - } - - hash->nodes = new_nodes; - hash->size = new_size; - - if (hash->shared) { - rspamd_mempool_wunlock_rwlock (hash->lock); - } -} - -/* - * Resizes the hash table, if needed. - */ -static inline void -rspamd_hash_maybe_resize (rspamd_hash_t * hash) -{ - gint nnodes = hash->nnodes; - gint size = hash->size; - - if ((size >= 3 * nnodes && size > HASH_TABLE_MIN_SIZE) || (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE)) { - rspamd_hash_resize (hash); - } -} - -/* Create new hash in specified pool */ -rspamd_hash_t * -rspamd_hash_new (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func) -{ - rspamd_hash_t *hash; - - hash = rspamd_mempool_alloc (pool, sizeof (rspamd_hash_t)); - hash->size = HASH_TABLE_MIN_SIZE; - hash->nnodes = 0; - hash->hash_func = hash_func ? hash_func : g_direct_hash; - hash->key_equal_func = key_equal_func; - hash->nodes = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_hash_node *) * hash->size); - hash->shared = 0; - hash->pool = pool; - - return hash; -} - -/* - * Create new hash in specified pool using shared memory - */ -rspamd_hash_t * -rspamd_hash_new_shared (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size) -{ - rspamd_hash_t *hash; - - hash = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_hash_t)); - hash->size = size; - hash->nnodes = 0; - hash->hash_func = hash_func ? hash_func : g_direct_hash; - hash->key_equal_func = key_equal_func; - hash->nodes = rspamd_mempool_alloc0_shared (pool, sizeof (struct rspamd_hash_node *) * hash->size); - hash->shared = 1; - /* Get mutex from pool for locking on insert/remove operations */ - hash->lock = rspamd_mempool_get_rwlock (pool); - hash->pool = pool; - - return hash; -} - -/* - * Insert item in hash - */ -void -rspamd_hash_insert (rspamd_hash_t * hash, gpointer key, gpointer value) -{ - struct rspamd_hash_node **node_ptr, *node; - guint key_hash; - - g_return_if_fail (hash != NULL); - node_ptr = rspamd_hash_lookup_node (hash, key, &key_hash); - - if (hash->shared) { - rspamd_mempool_wlock_rwlock (hash->lock); - } - if ((node = *node_ptr)) { - node->key = key; - node->value = value; - } - else { - if (hash->shared) { - node = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node)); - } - else { - node = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node)); - } - - node->key = key; - node->value = value; - node->key_hash = key_hash; - node->next = NULL; - - *node_ptr = node; - hash->nnodes++; - } - if (hash->shared) { - rspamd_mempool_wunlock_rwlock (hash->lock); - } - - if (!hash->shared) { - rspamd_hash_maybe_resize (hash); - } -} - -/* - * Remove item from hash - */ -gboolean -rspamd_hash_remove (rspamd_hash_t * hash, gpointer key) -{ - struct rspamd_hash_node **node_ptr; - - g_return_val_if_fail (hash != NULL, FALSE); - - node_ptr = rspamd_hash_lookup_node (hash, key, NULL); - if (*node_ptr == NULL) - return FALSE; - - rspamd_hash_remove_node (hash, &node_ptr); - rspamd_hash_maybe_resize (hash); - - return TRUE; -} - -/* - * Lookup item from hash - */ -gpointer -rspamd_hash_lookup (rspamd_hash_t * hash, gpointer key) -{ - struct rspamd_hash_node *node; - g_return_val_if_fail (hash != NULL, NULL); - - node = *rspamd_hash_lookup_node (hash, key, NULL); - - return node ? node->value : NULL; -} - -/* - * Iterate throught hash - */ -void -rspamd_hash_foreach (rspamd_hash_t * hash, GHFunc func, gpointer user_data) -{ - struct rspamd_hash_node *node; - gint i; - - g_return_if_fail (hash != NULL); - g_return_if_fail (func != NULL); - - if (hash->shared) { - rspamd_mempool_rlock_rwlock (hash->lock); - } - for (i = 0; i < hash->size; i++) { - for (node = hash->nodes[i]; node; node = node->next) { - (*func) (node->key, node->value, user_data); - } - } - if (hash->shared) { - rspamd_mempool_runlock_rwlock (hash->lock); - } -} - -/** - * LRU hashing - */ - -static void -rspamd_lru_hash_destroy_node (gpointer v) -{ - rspamd_lru_element_t *node = v; - - if (node->hash->value_destroy) { - node->hash->value_destroy (node->data); - } - g_queue_delete_link (node->hash->q, node->link); - g_slice_free1 (sizeof (rspamd_lru_element_t), node); -} - -static rspamd_lru_element_t* -rspamd_lru_create_node (rspamd_lru_hash_t *hash, gpointer key, gpointer value, time_t now, guint ttl) -{ - rspamd_lru_element_t *node; - - node = g_slice_alloc (sizeof (rspamd_lru_element_t)); - node->data = value; - node->key = key; - node->store_time = now; - node->ttl = ttl; - node->hash = hash; - - return node; -} - -/** - * Create new lru hash with GHashTable as storage - * @param maxsize maximum elements in a hash - * @param maxage maximum age of elemnt - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_lru_hash_t* -rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func, gint maxsize, gint maxage, - GDestroyNotify key_destroy, GDestroyNotify value_destroy) -{ - rspamd_lru_hash_t *new; - - new = g_malloc (sizeof (rspamd_lru_hash_t)); - new->storage = g_hash_table_new_full (hash_func, key_equal_func, key_destroy, rspamd_lru_hash_destroy_node); - new->maxage = maxage; - new->maxsize = maxsize; - new->value_destroy = value_destroy; - new->key_destroy = NULL; - new->q = g_queue_new (); - new->insert_func = (lru_cache_insert_func)g_hash_table_replace; - new->lookup_func = (lru_cache_lookup_func)g_hash_table_lookup; - new->delete_func = (lru_cache_delete_func)g_hash_table_remove; - new->destroy_func = (lru_cache_destroy_func)g_hash_table_destroy; - - return new; -} -/** - * Create new lru hash with custom storage - * @param maxsize maximum elements in a hash - * @param maxage maximum age of elemnt - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_lru_hash_t* -rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, - gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy, - gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func, - lru_cache_delete_func delete_func) -{ - rspamd_lru_hash_t *new; - - new = g_malloc (sizeof (rspamd_lru_hash_t)); - new->storage = storage; - new->maxage = maxage; - new->maxsize = maxsize; - new->value_destroy = value_destroy; - new->key_destroy = key_destroy; - new->q = g_queue_new (); - new->insert_func = insert_func; - new->lookup_func = lookup_func; - new->delete_func = delete_func; - new->destroy_func = NULL; - - return new; -} - -/** - * Lookup item from hash - * @param hash hash object - * @param key key to find - * @return value of key or NULL if key is not found - */ -gpointer -rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now) -{ - rspamd_lru_element_t *res; - - if ((res = hash->lookup_func (hash->storage, key)) != NULL) { - if (res->ttl != 0) { - if (now - res->store_time > res->ttl) { - hash->delete_func (hash->storage, key); - return NULL; - } - } - if (hash->maxage > 0) { - if (now - res->store_time > hash->maxage) { - res = g_queue_peek_tail (hash->q); - /* Expire elements from queue tail */ - while (res != NULL && now - res->store_time > hash->maxage) { - hash->delete_func (hash->storage, res->key); - res = g_queue_peek_tail (hash->q); - } - - return NULL; - } - } - return res->data; - } - - return NULL; -} -/** - * Insert item in hash - * @param hash hash object - * @param key key to insert - * @param value value of key - */ -void -rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value, - time_t now, guint ttl) -{ - rspamd_lru_element_t *res; - gint removed = 0; - - if ((res = hash->lookup_func (hash->storage, key)) != NULL) { - hash->delete_func (hash->storage, res->key); - } - else { - if (hash->maxsize > 0 && - (gint)g_queue_get_length (hash->q) >= hash->maxsize) { - /* Expire some elements */ - res = g_queue_peek_tail (hash->q); - if (hash->maxage > 0) { - while (res != NULL && now - res->store_time > hash->maxage) { - if (res->key != NULL) { - hash->delete_func (hash->storage, res->key); - } - else { - break; - } - res = g_queue_peek_tail (hash->q); - removed ++; - } - } - if (removed == 0) { - /* Remove explicitly */ - if (res->key != NULL) { - hash->delete_func (hash->storage, res->key); - } - } - } - } - - res = rspamd_lru_create_node (hash, key, value, now, ttl); - hash->insert_func (hash->storage, key, res); - g_queue_push_head (hash->q, res); - res->link = g_queue_peek_head_link (hash->q); -} - -void -rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash) -{ - if (hash->destroy_func) { - hash->destroy_func (hash->storage); - } - g_queue_free (hash->q); - g_free (hash); -} - -/* - * vi:ts=4 - */ diff --git a/src/hash.h b/src/hash.h deleted file mode 100644 index c5d4639af..000000000 --- a/src/hash.h +++ /dev/null @@ -1,160 +0,0 @@ -/** - * @file hash.h - * Hash table implementation that allows using memory pools for storage as well as using - * shared memory for this purpose - */ - -#ifndef RSPAMD_HASH_H -#define RSPAMD_HASH_H - -#include "mem_pool.h" - -struct rspamd_hash_node { - gpointer key; - gpointer value; - guint key_hash; - struct rspamd_hash_node *next; -}; - -typedef struct rspamd_hash_s { - gint size; - gint nnodes; - struct rspamd_hash_node **nodes; - - GHashFunc hash_func; - GEqualFunc key_equal_func; - gint shared; - rspamd_mempool_rwlock_t *lock; - rspamd_mempool_t *pool; -} rspamd_hash_t; - -typedef void (*lru_cache_insert_func)(gpointer storage, gpointer key, gpointer value); -typedef gpointer (*lru_cache_lookup_func)(gpointer storage, gpointer key); -typedef gboolean (*lru_cache_delete_func)(gpointer storage, gpointer key); -typedef void (*lru_cache_destroy_func)(gpointer storage); - -typedef struct rspamd_lru_hash_s { - gint maxsize; - gint maxage; - GDestroyNotify value_destroy; - GDestroyNotify key_destroy; - GQueue *q; - gpointer storage; - lru_cache_insert_func insert_func; - lru_cache_lookup_func lookup_func; - lru_cache_delete_func delete_func; - lru_cache_destroy_func destroy_func; -} rspamd_lru_hash_t; - -typedef struct rspamd_lru_element_s { - gpointer data; - gpointer key; - time_t store_time; - guint ttl; - rspamd_lru_hash_t *hash; - GList *link; -} rspamd_lru_element_t; - - -#define rspamd_hash_size(x) (x)->nnodes - -/** - * Create new hash in specified pool - * @param pool memory pool object - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_hash_t* rspamd_hash_new (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func); - -/** - * Create new hash in specified pool using shared memory - * @param pool memory pool object - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_hash_t* rspamd_hash_new_shared (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size); - -/** - * Insert item in hash - * @param hash hash object - * @param key key to insert - * @param value value of key - */ -void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value); - -/** - * Remove item from hash - * @param hash hash object - * @param key key to delete - */ -gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key); - -/** - * Lookup item from hash - * @param hash hash object - * @param key key to find - * @return value of key or NULL if key is not found - */ -gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key); - -/** - * Iterate throught hash - * @param hash hash object - * @param func user's function that would be called for each key/value pair - * @param user_data pointer to user's data that would be passed to user's function - */ -void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data); - -/** - * Create new lru hash - * @param maxsize maximum elements in a hash - * @param maxage maximum age of elemnt - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_lru_hash_t* rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func, - gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy); - -/** - * Create new lru hash with custom storage - * @param maxsize maximum elements in a hash - * @param maxage maximum age of elemnt - * @param hash_func pointer to hash function - * @param key_equal_func pointer to function for comparing keys - * @return new rspamd_hash object - */ -rspamd_lru_hash_t* rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, - gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy, - gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func, - lru_cache_delete_func delete_func); -/** - * Lookup item from hash - * @param hash hash object - * @param key key to find - * @return value of key or NULL if key is not found - */ -gpointer rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now); -/** - * Insert item in hash - * @param hash hash object - * @param key key to insert - * @param value value of key - */ -void rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value, - time_t now, guint ttl); - -/** - * Remove lru hash - * @param hash hash object - */ - -void rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash); - -#endif - -/* - * vi:ts=4 - */ diff --git a/src/html.c b/src/html.c deleted file mode 100644 index 028c54f6c..000000000 --- a/src/html.c +++ /dev/null @@ -1,942 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "util.h" -#include "main.h" -#include "message.h" -#include "html.h" -#include "url.h" - -static sig_atomic_t tags_sorted = 0; - -static struct html_tag tag_defs[] = { - /* W3C defined elements */ - {Tag_A, "a", (CM_INLINE)}, - {Tag_ABBR, "abbr", (CM_INLINE)}, - {Tag_ACRONYM, "acronym", (CM_INLINE)}, - {Tag_ADDRESS, "address", (CM_BLOCK)}, - {Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)}, - {Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)}, - {Tag_B, "b", (CM_INLINE)}, - {Tag_BASE, "base", (CM_HEAD | CM_EMPTY)}, - {Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)}, - {Tag_BDO, "bdo", (CM_INLINE)}, - {Tag_BIG, "big", (CM_INLINE)}, - {Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)}, - {Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST)}, - {Tag_BR, "br", (CM_INLINE | CM_EMPTY)}, - {Tag_BUTTON, "button", (CM_INLINE)}, - {Tag_CAPTION, "caption", (CM_TABLE)}, - {Tag_CENTER, "center", (CM_BLOCK)}, - {Tag_CITE, "cite", (CM_INLINE)}, - {Tag_CODE, "code", (CM_INLINE)}, - {Tag_COL, "col", (CM_TABLE | CM_EMPTY)}, - {Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)}, - {Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)}, - {Tag_DEL, "del", (CM_INLINE | CM_BLOCK | CM_MIXED)}, - {Tag_DFN, "dfn", (CM_INLINE)}, - {Tag_DIR, "dir", (CM_BLOCK | CM_OBSOLETE)}, - {Tag_DIV, "div", (CM_BLOCK)}, - {Tag_DL, "dl", (CM_BLOCK)}, - {Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)}, - {Tag_EM, "em", (CM_INLINE)}, - {Tag_FIELDSET, "fieldset", (CM_BLOCK)}, - {Tag_FONT, "font", (CM_INLINE)}, - {Tag_FORM, "form", (CM_BLOCK)}, - {Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)}, - {Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)}, - {Tag_H1, "h1", (CM_BLOCK | CM_HEADING)}, - {Tag_H2, "h2", (CM_BLOCK | CM_HEADING)}, - {Tag_H3, "h3", (CM_BLOCK | CM_HEADING)}, - {Tag_H4, "h4", (CM_BLOCK | CM_HEADING)}, - {Tag_H5, "h5", (CM_BLOCK | CM_HEADING)}, - {Tag_H6, "h6", (CM_BLOCK | CM_HEADING)}, - {Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST)}, - {Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)}, - {Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST)}, - {Tag_I, "i", (CM_INLINE)}, - {Tag_IFRAME, "iframe", (CM_INLINE)}, - {Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)}, - {Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)}, - {Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)}, - {Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)}, - {Tag_KBD, "kbd", (CM_INLINE)}, - {Tag_LABEL, "label", (CM_INLINE)}, - {Tag_LEGEND, "legend", (CM_INLINE)}, - {Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT)}, - {Tag_LINK, "link", (CM_HEAD | CM_EMPTY)}, - {Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)}, - {Tag_MAP, "map", (CM_INLINE)}, - {Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)}, - {Tag_META, "meta", (CM_HEAD | CM_EMPTY)}, - {Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)}, - {Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_MIXED)}, - {Tag_OBJECT, "object", (CM_OBJECT | CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)}, - {Tag_OL, "ol", (CM_BLOCK)}, - {Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)}, - {Tag_OPTION, "option", (CM_FIELD | CM_OPT)}, - {Tag_P, "p", (CM_BLOCK | CM_OPT)}, - {Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)}, - {Tag_PLAINTEXT, "plaintext", (CM_BLOCK | CM_OBSOLETE)}, - {Tag_PRE, "pre", (CM_BLOCK)}, - {Tag_Q, "q", (CM_INLINE)}, - {Tag_RB, "rb", (CM_INLINE)}, - {Tag_RBC, "rbc", (CM_INLINE)}, - {Tag_RP, "rp", (CM_INLINE)}, - {Tag_RT, "rt", (CM_INLINE)}, - {Tag_RTC, "rtc", (CM_INLINE)}, - {Tag_RUBY, "ruby", (CM_INLINE)}, - {Tag_S, "s", (CM_INLINE)}, - {Tag_SAMP, "samp", (CM_INLINE)}, - {Tag_SCRIPT, "script", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)}, - {Tag_SELECT, "select", (CM_INLINE | CM_FIELD)}, - {Tag_SMALL, "small", (CM_INLINE)}, - {Tag_SPAN, "span", (CM_INLINE)}, - {Tag_STRIKE, "strike", (CM_INLINE)}, - {Tag_STRONG, "strong", (CM_INLINE)}, - {Tag_STYLE, "style", (CM_HEAD)}, - {Tag_SUB, "sub", (CM_INLINE)}, - {Tag_SUP, "sup", (CM_INLINE)}, - {Tag_TABLE, "table", (CM_BLOCK)}, - {Tag_TBODY, "tbody", (CM_TABLE | CM_ROWGRP | CM_OPT)}, - {Tag_TD, "td", (CM_ROW | CM_OPT | CM_NO_INDENT)}, - {Tag_TEXTAREA, "textarea", (CM_INLINE | CM_FIELD)}, - {Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)}, - {Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT)}, - {Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)}, - {Tag_TITLE, "title", (CM_HEAD)}, - {Tag_TR, "tr", (CM_TABLE | CM_OPT)}, - {Tag_TT, "tt", (CM_INLINE)}, - {Tag_U, "u", (CM_INLINE)}, - {Tag_UL, "ul", (CM_BLOCK)}, - {Tag_VAR, "var", (CM_INLINE)}, - {Tag_XMP, "xmp", (CM_BLOCK | CM_OBSOLETE)}, - {Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)}, - - /* proprietary elements */ - {Tag_ALIGN, "align", (CM_BLOCK)}, - {Tag_BGSOUND, "bgsound", (CM_HEAD | CM_EMPTY)}, - {Tag_BLINK, "blink", (CM_INLINE)}, - {Tag_COMMENT, "comment", (CM_INLINE)}, - {Tag_EMBED, "embed", (CM_INLINE | CM_IMG | CM_EMPTY)}, - {Tag_ILAYER, "ilayer", (CM_INLINE)}, - {Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)}, - {Tag_LAYER, "layer", (CM_BLOCK)}, - {Tag_MARQUEE, "marquee", (CM_INLINE | CM_OPT)}, - {Tag_MULTICOL, "multicol", (CM_BLOCK)}, - {Tag_NOBR, "nobr", (CM_INLINE)}, - {Tag_NOEMBED, "noembed", (CM_INLINE)}, - {Tag_NOLAYER, "nolayer", (CM_BLOCK | CM_INLINE | CM_MIXED)}, - {Tag_NOSAVE, "nosave", (CM_BLOCK)}, - {Tag_SERVER, "server", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)}, - {Tag_SERVLET, "servlet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)}, - {Tag_SPACER, "spacer", (CM_INLINE | CM_EMPTY)}, - {Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)}, -}; - -static sig_atomic_t entities_sorted = 0; -struct _entity; -typedef struct _entity entity; - -struct _entity { - gchar *name; - uint code; - gchar *replacement; -}; - - -static entity entities_defs[] = { - /* - ** Markup pre-defined character entities - */ - {"quot", 34, "\""}, - {"amp", 38, "&"}, - {"apos", 39, "'"}, - {"lt", 60, "<"}, - {"gt", 62, ">"}, - - /* - ** Latin-1 character entities - */ - {"nbsp", 160, " "}, - {"iexcl", 161, "!"}, - {"cent", 162, "cent"}, - {"pound", 163, "pound"}, - {"curren", 164, "current"}, - {"yen", 165, "yen"}, - {"brvbar", 166, NULL}, - {"sect", 167, NULL}, - {"uml", 168, "uml"}, - {"copy", 169, "c"}, - {"ordf", 170, NULL}, - {"laquo", 171, "\""}, - {"not", 172, "!"}, - {"shy", 173, NULL}, - {"reg", 174, "r"}, - {"macr", 175, NULL}, - {"deg", 176, "deg"}, - {"plusmn", 177, "+-"}, - {"sup2", 178, "2"}, - {"sup3", 179, "3"}, - {"acute", 180, NULL}, - {"micro", 181, NULL}, - {"para", 182, NULL}, - {"middot", 183, "."}, - {"cedil", 184, NULL}, - {"sup1", 185, "1"}, - {"ordm", 186, NULL}, - {"raquo", 187, "\""}, - {"frac14", 188, "1/4"}, - {"frac12", 189, "1/2"}, - {"frac34", 190, "3/4"}, - {"iquest", 191, "i"}, - {"Agrave", 192, "a"}, - {"Aacute", 193, "a"}, - {"Acirc", 194, "a"}, - {"Atilde", 195, "a"}, - {"Auml", 196, "a"}, - {"Aring", 197, "a"}, - {"AElig", 198, "a"}, - {"Ccedil", 199, "c"}, - {"Egrave", 200, "e"}, - {"Eacute", 201, "e"}, - {"Ecirc", 202, "e"}, - {"Euml", 203, "e"}, - {"Igrave", 204, "i"}, - {"Iacute", 205, "i"}, - {"Icirc", 206, "i"}, - {"Iuml", 207, "i"}, - {"ETH", 208, "e"}, - {"Ntilde", 209, "n"}, - {"Ograve", 210, "o"}, - {"Oacute", 211, "o"}, - {"Ocirc", 212, "o"}, - {"Otilde", 213, "o"}, - {"Ouml", 214, "o"}, - {"times", 215, "t"}, - {"Oslash", 216, "o"}, - {"Ugrave", 217, "u"}, - {"Uacute", 218, "u"}, - {"Ucirc", 219, "u"}, - {"Uuml", 220, "u"}, - {"Yacute", 221, "y"}, - {"THORN", 222, "t"}, - {"szlig", 223, "s"}, - {"agrave", 224, "a"}, - {"aacute", 225, "a"}, - {"acirc", 226, "a"}, - {"atilde", 227, "a"}, - {"auml", 228, "a"}, - {"aring", 229, "a"}, - {"aelig", 230, "a"}, - {"ccedil", 231, "c"}, - {"egrave", 232, "e"}, - {"eacute", 233, "e"}, - {"ecirc", 234, "e"}, - {"euml", 235, "e"}, - {"igrave", 236, "e"}, - {"iacute", 237, "e"}, - {"icirc", 238, "e"}, - {"iuml", 239, "e"}, - {"eth", 240, "e"}, - {"ntilde", 241, "n"}, - {"ograve", 242, "o"}, - {"oacute", 243, "o"}, - {"ocirc", 244, "o"}, - {"otilde", 245, "o"}, - {"ouml", 246, "o"}, - {"divide", 247, "/"}, - {"oslash", 248, "/"}, - {"ugrave", 249, "u"}, - {"uacute", 250, "u"}, - {"ucirc", 251, "u"}, - {"uuml", 252, "u"}, - {"yacute", 253, "y"}, - {"thorn", 254, "t"}, - {"yuml", 255, "y"}, - - /* - ** Extended Entities defined in HTML 4: Symbols - */ - {"fnof", 402, "f"}, - {"Alpha", 913, "alpha"}, - {"Beta", 914, "beta"}, - {"Gamma", 915, "gamma"}, - {"Delta", 916, "delta"}, - {"Epsilon", 917, "epsilon"}, - {"Zeta", 918, "zeta"}, - {"Eta", 919, "eta"}, - {"Theta", 920, "theta"}, - {"Iota", 921, "iota"}, - {"Kappa", 922, "kappa"}, - {"Lambda", 923, "lambda"}, - {"Mu", 924, "mu"}, - {"Nu", 925, "nu"}, - {"Xi", 926, "xi"}, - {"Omicron", 927, "omicron"}, - {"Pi", 928, "pi"}, - {"Rho", 929, "rho"}, - {"Sigma", 931, "sigma"}, - {"Tau", 932, "tau"}, - {"Upsilon", 933, "upsilon"}, - {"Phi", 934, "phi"}, - {"Chi", 935, "chi"}, - {"Psi", 936, "psi"}, - {"Omega", 937, "omega"}, - {"alpha", 945, "alpha"}, - {"beta", 946, "beta"}, - {"gamma", 947, "gamma"}, - {"delta", 948, "delta"}, - {"epsilon", 949, "epsilon"}, - {"zeta", 950, "zeta"}, - {"eta", 951, "eta"}, - {"theta", 952, "theta"}, - {"iota", 953, "iota"}, - {"kappa", 954, "kappa"}, - {"lambda", 955, "lambda"}, - {"mu", 956, "mu"}, - {"nu", 957, "nu"}, - {"xi", 958, "xi"}, - {"omicron", 959, "omicron"}, - {"pi", 960, "pi"}, - {"rho", 961, "rho"}, - {"sigmaf", 962, "sigmaf"}, - {"sigma", 963, "sigma"}, - {"tau", 964, "tau"}, - {"upsilon", 965, "upsilon"}, - {"phi", 966, "phi"}, - {"chi", 967, "chi"}, - {"psi", 968, "psi"}, - {"omega", 969, "omega"}, - {"thetasym", 977, "thetasym"}, - {"upsih", 978, "upsih"}, - {"piv", 982, "piv"}, - {"bull", 8226, "bull"}, - {"hellip", 8230, "..."}, - {"prime", 8242, "'"}, - {"Prime", 8243, "'"}, - {"oline", 8254, "-"}, - {"frasl", 8260, NULL}, - {"weierp", 8472, NULL}, - {"image", 8465, NULL}, - {"real", 8476, NULL}, - {"trade", 8482, NULL}, - {"alefsym", 8501, "a"}, - {"larr", 8592, NULL}, - {"uarr", 8593, NULL}, - {"rarr", 8594, NULL}, - {"darr", 8595, NULL}, - {"harr", 8596, NULL}, - {"crarr", 8629, NULL}, - {"lArr", 8656, NULL}, - {"uArr", 8657, NULL}, - {"rArr", 8658, NULL}, - {"dArr", 8659, NULL}, - {"hArr", 8660, NULL}, - {"forall", 8704, NULL}, - {"part", 8706, NULL}, - {"exist", 8707, NULL}, - {"empty", 8709, NULL}, - {"nabla", 8711, NULL}, - {"isin", 8712, NULL}, - {"notin", 8713, NULL}, - {"ni", 8715, NULL}, - {"prod", 8719, NULL}, - {"sum", 8721, "E"}, - {"minus", 8722, "-"}, - {"lowast", 8727, NULL}, - {"radic", 8730, NULL}, - {"prop", 8733, NULL}, - {"infin", 8734, NULL}, - {"ang", 8736, "'"}, - {"and", 8743, "&"}, - {"or", 8744, "|"}, - {"cap", 8745, NULL}, - {"cup", 8746, NULL}, - {"gint", 8747, NULL}, - {"there4", 8756, NULL}, - {"sim", 8764, NULL}, - {"cong", 8773, NULL}, - {"asymp", 8776, NULL}, - {"ne", 8800, "!="}, - {"equiv", 8801, "=="}, - {"le", 8804, "<="}, - {"ge", 8805, ">="}, - {"sub", 8834, NULL}, - {"sup", 8835, NULL}, - {"nsub", 8836, NULL}, - {"sube", 8838, NULL}, - {"supe", 8839, NULL}, - {"oplus", 8853, NULL}, - {"otimes", 8855, NULL}, - {"perp", 8869, NULL}, - {"sdot", 8901, NULL}, - {"lceil", 8968, NULL}, - {"rceil", 8969, NULL}, - {"lfloor", 8970, NULL}, - {"rfloor", 8971, NULL}, - {"lang", 9001, NULL}, - {"rang", 9002, NULL}, - {"loz", 9674, NULL}, - {"spades", 9824, NULL}, - {"clubs", 9827, NULL}, - {"hearts", 9829, NULL}, - {"diams", 9830, NULL}, - - /* - ** Extended Entities defined in HTML 4: Special (less Markup at top) - */ - {"OElig", 338, NULL}, - {"oelig", 339, NULL}, - {"Scaron", 352, NULL}, - {"scaron", 353, NULL}, - {"Yuml", 376, NULL}, - {"circ", 710, NULL}, - {"tilde", 732, NULL}, - {"ensp", 8194, NULL}, - {"emsp", 8195, NULL}, - {"thinsp", 8201, NULL}, - {"zwnj", 8204, NULL}, - {"zwj", 8205, NULL}, - {"lrm", 8206, NULL}, - {"rlm", 8207, NULL}, - {"ndash", 8211, "-"}, - {"mdash", 8212, "-"}, - {"lsquo", 8216, "'"}, - {"rsquo", 8217, "'"}, - {"sbquo", 8218, "\""}, - {"ldquo", 8220, "\""}, - {"rdquo", 8221, "\""}, - {"bdquo", 8222, "\""}, - {"dagger", 8224, "T"}, - {"Dagger", 8225, "T"}, - {"permil", 8240, NULL}, - {"lsaquo", 8249, "\""}, - {"rsaquo", 8250, "\""}, - {"euro", 8364, "E"}, -}; - -static entity entities_defs_num[ (G_N_ELEMENTS (entities_defs)) ]; - -static gint -tag_cmp (const void *m1, const void *m2) -{ - const struct html_tag *p1 = m1; - const struct html_tag *p2 = m2; - - return g_ascii_strcasecmp (p1->name, p2->name); -} - -static gint -entity_cmp (const void *m1, const void *m2) -{ - const entity *p1 = m1; - const entity *p2 = m2; - - return g_ascii_strcasecmp (p1->name, p2->name); -} - -static gint -entity_cmp_num (const void *m1, const void *m2) -{ - const entity *p1 = m1; - const entity *p2 = m2; - - return p1->code - p2->code; -} - -static GNode * -construct_html_node (rspamd_mempool_t * pool, gchar *text, gsize tag_len) -{ - struct html_node *html; - GNode *n = NULL; - struct html_tag key, *found; - gchar t; - - if (text == NULL || *text == '\0') { - return NULL; - } - - html = rspamd_mempool_alloc0 (pool, sizeof (struct html_node)); - - /* Check whether this tag is fully closed */ - if (*(text + tag_len - 1) == '/') { - html->flags |= FL_CLOSED; - } - - /* Check xml tag */ - if (*text == '?' && g_ascii_strncasecmp (text + 1, "xml", sizeof ("xml") - 1) == 0) { - html->flags |= FL_XML; - html->tag = NULL; - } - else { - if (*text == '/') { - html->flags |= FL_CLOSING; - text++; - } - - /* Find end of tag name */ - key.name = text; - while (*text && g_ascii_isalnum (*(++text))); - - t = *text; - *text = '\0'; - - /* Match tag id by tag name */ - if ((found = bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp)) != NULL) { - *text = t; - html->tag = found; - } - else { - *text = t; - return NULL; - } - } - - n = g_node_new (html); - - return n; -} - -static gboolean -check_balance (GNode * node, GNode ** cur_level) -{ - struct html_node *arg = node->data, *tmp; - GNode *cur; - - if (arg->flags & FL_CLOSING) { - /* First of all check whether this tag is closing tag for parent node */ - cur = node->parent; - while (cur && cur->data) { - tmp = cur->data; - if ((tmp->tag && arg->tag) && tmp->tag->id == arg->tag->id && (tmp->flags & FL_CLOSED) == 0) { - tmp->flags |= FL_CLOSED; - /* Destroy current node as we find corresponding parent node */ - g_node_destroy (node); - /* Change level */ - *cur_level = cur->parent; - return TRUE; - } - cur = cur->parent; - } - } - else { - return TRUE; - } - - return FALSE; -} - -struct html_tag * -get_tag_by_name (const gchar *name) -{ - struct html_tag key; - - key.name = name; - - return bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp); -} - -/* Decode HTML entitles in text */ -void -decode_entitles (gchar *s, guint * len) -{ - guint l, rep_len; - gchar *t = s; /* t - tortoise */ - gchar *h = s; /* h - hare */ - gchar *e = s; - gchar *end_ptr; - gint state = 0, val, base; - entity *found, key; - - if (len == NULL || *len == 0) { - l = strlen (s); - } - else { - l = *len; - } - - while (h - s < (gint)l) { - switch (state) { - /* Out of entitle */ - case 0: - if (*h == '&') { - state = 1; - e = h; - h++; - continue; - } - else { - *t = *h; - h++; - t++; - } - break; - case 1: - if (*h == ';') { - /* Determine base */ - /* First find in entities table */ - - key.name = e + 1; - *h = '\0'; - if (*(e + 1) != '#' && (found = bsearch (&key, entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp)) != NULL) { - if (found->replacement) { - rep_len = strlen (found->replacement); - memcpy (t, found->replacement, rep_len); - t += rep_len; - } - } - else { - if (*(e + 2) == 'x' || *(e + 2) == 'X') { - base = 16; - } - else if (*(e + 2) == 'o' || *(e + 2) == 'O') { - base = 8; - } - else { - base = 10; - } - if (base == 10) { - val = strtoul ((e + 2), &end_ptr, base); - } - else { - val = strtoul ((e + 3), &end_ptr, base); - } - if (end_ptr != NULL && *end_ptr != '\0') { - /* Skip undecoded */ - t = h; - } - else { - /* Search for a replacement */ - key.code = val; - found = bsearch (&key, entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num); - if (found) { - if (found->replacement) { - rep_len = strlen (found->replacement); - memcpy (t, found->replacement, rep_len); - t += rep_len; - } - } - } - } - *h = ';'; - state = 0; - } - h++; - break; - } - } - *t = '\0'; - - if (len != NULL) { - *len = t - s; - } -} - -static void -check_phishing (struct rspamd_task *task, struct uri *href_url, const gchar *url_text, gsize remain, tag_id_t id) -{ - struct uri *new; - gchar *url_str; - const gchar *p, *c; - gchar tagbuf[128]; - struct html_tag *tag; - gsize len = 0; - gint rc; - - p = url_text; - while (len < remain) { - if (*p == '<') { - /* Check tag name */ - if (*(p + 1) == '/') { - c = p + 2; - } - else { - c = p + 1; - } - while (len < remain) { - if (!g_ascii_isspace (*p) && *p != '>') { - p ++; - len ++; - } - else { - break; - } - } - rspamd_strlcpy (tagbuf, c, MIN ((gint)sizeof(tagbuf), p - c + 1)); - if ((tag = get_tag_by_name (tagbuf)) != NULL) { - if (tag->id == id) { - break; - } - else if (tag->id == Tag_IMG) { - /* We should ignore IMG tag here */ - while (len < remain && *p != '>' && *p != '<') { - p ++; - len ++; - } - if (*p == '>' && len < remain) { - p ++; - } - - remain -= p - url_text; - url_text = p; - len = 0; - continue; - } - } - } - len ++; - p ++; - } - - if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str, TRUE) && url_str != NULL) { - new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri)); - if (new != NULL) { - g_strstrip (url_str); - rc = parse_uri (new, url_str, task->task_pool); - - if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) { - if (g_ascii_strncasecmp (href_url->host, new->host, - MAX (href_url->hostlen, new->hostlen)) != 0) { - /* Special check for urls beginning with 'www' */ - if (new->hostlen > 4 && href_url->hostlen > 4) { - p = new->host; - c = NULL; - if ((p[0] == 'w' || p[0] == 'W') && - (p[1] == 'w' || p[1] == 'W') && - (p[2] == 'w' || p[2] == 'W') && - (p[3] == '.')) { - p += 4; - c = href_url->host; - len = MAX (href_url->hostlen, new->hostlen - 4); - } - else { - p = href_url->host; - if ((p[0] == 'w' || p[0] == 'W') && - (p[1] == 'w' || p[1] == 'W') && - (p[2] == 'w' || p[2] == 'W') && - (p[3] == '.')) { - p += 4; - c = new->host; - len = MAX (href_url->hostlen - 4, new->hostlen); - } - } - /* Compare parts and check for phished hostname */ - if (c != NULL) { - if (g_ascii_strncasecmp (p, c, len) != 0) { - href_url->is_phished = TRUE; - href_url->phished_url = new; - } - } - else { - href_url->is_phished = TRUE; - href_url->phished_url = new; - } - } - else { - href_url->is_phished = TRUE; - href_url->phished_url = new; - } - } - } - else { - msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); - } - } - } - -} - -static void -parse_tag_url (struct rspamd_task *task, struct mime_text_part *part, tag_id_t id, - gchar *tag_text, gsize tag_len, gsize remain) -{ - gchar *c = NULL, *p, *url_text; - gint len, rc; - struct uri *url; - gboolean got_single_quote = FALSE, got_double_quote = FALSE; - - /* For A tags search for href= and for IMG tags search for src= */ - if (id == Tag_A) { - c = rspamd_strncasestr (tag_text, "href=", tag_len); - len = sizeof ("href=") - 1; - } - else if (id == Tag_IMG) { - c = rspamd_strncasestr (tag_text, "src=", tag_len); - len = sizeof ("src=") - 1; - } - - if (c != NULL) { - /* First calculate length */ - c += len; - /* Skip spaces after eqsign */ - while (g_ascii_isspace (*c)) { - c++; - } - len = 0; - p = c; - while (*p && (guint)(p - tag_text) < tag_len) { - if (got_double_quote) { - if (*p == '"') { - break; - } - else { - len++; - } - } - else if (got_single_quote) { - if (*p == '\'') { - break; - } - else { - len++; - } - } - else if (g_ascii_isspace (*p) || *p == '>' || (*p == '/' && *(p + 1) == '>') || *p == '\r' || *p == '\n') { - break; - } - else { - if (*p == '"' && !got_single_quote) { - got_double_quote = !got_double_quote; - } - else if (*p == '\'' && !got_double_quote) { - got_single_quote = !got_single_quote; - } - else { - len++; - } - } - p++; - } - - if (got_single_quote || got_double_quote) { - c++; - } - - if (len == 0) { - return; - } - - url_text = rspamd_mempool_alloc (task->task_pool, len + 1); - rspamd_strlcpy (url_text, c, len + 1); - decode_entitles (url_text, NULL); - - if (g_ascii_strncasecmp (url_text, "http://", sizeof ("http://") - 1) != 0 && - g_ascii_strncasecmp (url_text, "www", sizeof ("www") - 1) != 0 && - g_ascii_strncasecmp (url_text, "ftp://", sizeof ("ftp://") - 1) != 0 && - g_ascii_strncasecmp (url_text, "mailto:", sizeof ("mailto:") - 1) != 0) { - return; - } - - url = rspamd_mempool_alloc (task->task_pool, sizeof (struct uri)); - rc = parse_uri (url, url_text, task->task_pool); - - if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) { - /* - * Check for phishing - */ - if ((p = strchr (c, '>')) != NULL && id == Tag_A) { - p ++; - check_phishing (task, url, p, remain - (p - tag_text), id); - } - if (g_tree_lookup (task->urls, url) == NULL) { - g_tree_insert (task->urls, url, url); - } - } - } -} - -gboolean -add_html_node (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, - gchar *tag_text, gsize tag_len, gsize remain, GNode ** cur_level) -{ - GNode *new; - struct html_node *data; - - if (!tags_sorted) { - qsort (tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp); - tags_sorted = 1; - } - if (!entities_sorted) { - qsort (entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp); - memcpy (entities_defs_num, entities_defs, sizeof (entities_defs)); - qsort (entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num); - entities_sorted = 1; - } - - /* First call of this function */ - if (part->html_nodes == NULL) { - /* Insert root node */ - new = g_node_new (NULL); - *cur_level = new; - part->html_nodes = new; - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_node_destroy, part->html_nodes); - /* Call once again with root node */ - return add_html_node (task, pool, part, tag_text, tag_len, remain, cur_level); - } - else { - new = construct_html_node (pool, tag_text, tag_len); - if (new == NULL) { - debug_task ("cannot construct HTML node for text '%*s'", tag_len, tag_text); - return FALSE; - } - data = new->data; - if (data->tag && (data->tag->id == Tag_A || data->tag->id == Tag_IMG) && ((data->flags & FL_CLOSING) == 0)) { - parse_tag_url (task, part, data->tag->id, tag_text, tag_len, remain); - } - - if (data->flags & FL_CLOSING) { - if (!*cur_level) { - debug_task ("bad parent node"); - return FALSE; - } - g_node_append (*cur_level, new); - if (!check_balance (new, cur_level)) { - debug_task ("mark part as unbalanced as it has not pairable closing tags"); - part->is_balanced = FALSE; - } - } - else { - - g_node_append (*cur_level, new); - if ((data->flags & FL_CLOSED) == 0) { - *cur_level = new; - } - /* Skip some tags */ - if (data->tag && (data->tag->id == Tag_STYLE || - data->tag->id == Tag_SCRIPT || - data->tag->id == Tag_OBJECT || - data->tag->id == Tag_TITLE)) { - return FALSE; - } - } - } - - return TRUE; -} - -/* - * vi:ts=4 - */ diff --git a/src/html.h b/src/html.h deleted file mode 100644 index 3ea758e60..000000000 --- a/src/html.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Functions for simple html parsing - */ - -#ifndef RSPAMD_HTML_H -#define RSPAMD_HTML_H - -#include "config.h" -#include "mem_pool.h" - -/* Known HTML tags */ -typedef enum -{ - Tag_UNKNOWN, /**< Unknown tag! */ - Tag_A, /**< A */ - Tag_ABBR, /**< ABBR */ - Tag_ACRONYM, /**< ACRONYM */ - Tag_ADDRESS, /**< ADDRESS */ - Tag_ALIGN, /**< ALIGN */ - Tag_APPLET, /**< APPLET */ - Tag_AREA, /**< AREA */ - Tag_B, /**< B */ - Tag_BASE, /**< BASE */ - Tag_BASEFONT, /**< BASEFONT */ - Tag_BDO, /**< BDO */ - Tag_BGSOUND, /**< BGSOUND */ - Tag_BIG, /**< BIG */ - Tag_BLINK, /**< BLINK */ - Tag_BLOCKQUOTE, /**< BLOCKQUOTE */ - Tag_BODY, /**< BODY */ - Tag_BR, /**< BR */ - Tag_BUTTON, /**< BUTTON */ - Tag_CAPTION, /**< CAPTION */ - Tag_CENTER, /**< CENTER */ - Tag_CITE, /**< CITE */ - Tag_CODE, /**< CODE */ - Tag_COL, /**< COL */ - Tag_COLGROUP, /**< COLGROUP */ - Tag_COMMENT, /**< COMMENT */ - Tag_DD, /**< DD */ - Tag_DEL, /**< DEL */ - Tag_DFN, /**< DFN */ - Tag_DIR, /**< DIR */ - Tag_DIV, /**< DIF */ - Tag_DL, /**< DL */ - Tag_DT, /**< DT */ - Tag_EM, /**< EM */ - Tag_EMBED, /**< EMBED */ - Tag_FIELDSET, /**< FIELDSET */ - Tag_FONT, /**< FONT */ - Tag_FORM, /**< FORM */ - Tag_FRAME, /**< FRAME */ - Tag_FRAMESET, /**< FRAMESET */ - Tag_H1, /**< H1 */ - Tag_H2, /**< H2 */ - Tag_H3, /**< H3 */ - Tag_H4, /**< H4 */ - Tag_H5, /**< H5 */ - Tag_H6, /**< H6 */ - Tag_HEAD, /**< HEAD */ - Tag_HR, /**< HR */ - Tag_HTML, /**< HTML */ - Tag_I, /**< I */ - Tag_IFRAME, /**< IFRAME */ - Tag_ILAYER, /**< ILAYER */ - Tag_IMG, /**< IMG */ - Tag_INPUT, /**< INPUT */ - Tag_INS, /**< INS */ - Tag_ISINDEX, /**< ISINDEX */ - Tag_KBD, /**< KBD */ - Tag_KEYGEN, /**< KEYGEN */ - Tag_LABEL, /**< LABEL */ - Tag_LAYER, /**< LAYER */ - Tag_LEGEND, /**< LEGEND */ - Tag_LI, /**< LI */ - Tag_LINK, /**< LINK */ - Tag_LISTING, /**< LISTING */ - Tag_MAP, /**< MAP */ - Tag_MARQUEE, /**< MARQUEE */ - Tag_MENU, /**< MENU */ - Tag_META, /**< META */ - Tag_MULTICOL, /**< MULTICOL */ - Tag_NOBR, /**< NOBR */ - Tag_NOEMBED, /**< NOEMBED */ - Tag_NOFRAMES, /**< NOFRAMES */ - Tag_NOLAYER, /**< NOLAYER */ - Tag_NOSAVE, /**< NOSAVE */ - Tag_NOSCRIPT, /**< NOSCRIPT */ - Tag_OBJECT, /**< OBJECT */ - Tag_OL, /**< OL */ - Tag_OPTGROUP, /**< OPTGROUP */ - Tag_OPTION, /**< OPTION */ - Tag_P, /**< P */ - Tag_PARAM, /**< PARAM */ - Tag_PLAINTEXT,/**< PLAINTEXT */ - Tag_PRE, /**< PRE */ - Tag_Q, /**< Q */ - Tag_RB, /**< RB */ - Tag_RBC, /**< RBC */ - Tag_RP, /**< RP */ - Tag_RT, /**< RT */ - Tag_RTC, /**< RTC */ - Tag_RUBY, /**< RUBY */ - Tag_S, /**< S */ - Tag_SAMP, /**< SAMP */ - Tag_SCRIPT, /**< SCRIPT */ - Tag_SELECT, /**< SELECT */ - Tag_SERVER, /**< SERVER */ - Tag_SERVLET, /**< SERVLET */ - Tag_SMALL, /**< SMALL */ - Tag_SPACER, /**< SPACER */ - Tag_SPAN, /**< SPAN */ - Tag_STRIKE, /**< STRIKE */ - Tag_STRONG, /**< STRONG */ - Tag_STYLE, /**< STYLE */ - Tag_SUB, /**< SUB */ - Tag_SUP, /**< SUP */ - Tag_TABLE, /**< TABLE */ - Tag_TBODY, /**< TBODY */ - Tag_TD, /**< TD */ - Tag_TEXTAREA, /**< TEXTAREA */ - Tag_TFOOT, /**< TFOOT */ - Tag_TH, /**< TH */ - Tag_THEAD, /**< THEAD */ - Tag_TITLE, /**< TITLE */ - Tag_TR, /**< TR */ - Tag_TT, /**< TT */ - Tag_U, /**< U */ - Tag_UL, /**< UL */ - Tag_VAR, /**< VAR */ - Tag_WBR, /**< WBR */ - Tag_XMP, /**< XMP */ - Tag_XML, /**< XML */ - Tag_NEXTID, /**< NEXTID */ - - N_TAGS /**< Must be last */ -} tag_id_t; - -#define CM_UNKNOWN 0 -/* Elements with no content. Map to HTML specification. */ -#define CM_EMPTY (1 << 0) -/* Elements that appear outside of "BODY". */ -#define CM_HTML (1 << 1) -/* Elements that can appear within HEAD. */ -#define CM_HEAD (1 << 2) -/* HTML "block" elements. */ -#define CM_BLOCK (1 << 3) -/* HTML "inline" elements. */ -#define CM_INLINE (1 << 4) -/* Elements that mark list item ("LI"). */ -#define CM_LIST (1 << 5) -/* Elements that mark definition list item ("DL", "DT"). */ -#define CM_DEFLIST (1 << 6) -/* Elements that can appear inside TABLE. */ -#define CM_TABLE (1 << 7) -/* Used for "THEAD", "TFOOT" or "TBODY". */ -#define CM_ROWGRP (1 << 8) -/* Used for "TD", "TH" */ -#define CM_ROW (1 << 9) -/* Elements whose content must be protected against white space movement. - Includes some elements that can found in forms. */ -#define CM_FIELD (1 << 10) -/* Used to avoid propagating inline emphasis inside some elements - such as OBJECT or APPLET. */ -#define CM_OBJECT (1 << 11) -/* Elements that allows "PARAM". */ -#define CM_PARAM (1 << 12) -/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */ -#define CM_FRAMES (1 << 13) -/* Heading elements (h1, h2, ...). */ -#define CM_HEADING (1 << 14) -/* Elements with an optional end tag. */ -#define CM_OPT (1 << 15) -/* Elements that use "align" attribute for vertical position. */ -#define CM_IMG (1 << 16) -/* Elements with inline and block model. Used to avoid calling InlineDup. */ -#define CM_MIXED (1 << 17) -/* Elements whose content needs to be indented only if containing one - CM_BLOCK element. */ -#define CM_NO_INDENT (1 << 18) -/* Elements that are obsolete (such as "dir", "menu"). */ -#define CM_OBSOLETE (1 << 19) -/* User defined elements. Used to determine how attributes wihout value - should be printed. */ -#define CM_NEW (1 << 20) -/* Elements that cannot be omitted. */ -#define CM_OMITST (1 << 21) - -/* XML tag */ -#define FL_XML (1 << 0) -/* Closing tag */ -#define FL_CLOSING (1 << 1) -/* Fully closed tag (e.g. ) */ -#define FL_CLOSED (1 << 2) - -struct html_tag { - tag_id_t id; - const gchar *name; - gint flags; -}; - -struct html_node { - struct html_tag *tag; - gint flags; -}; - -/* Forwarded declaration */ -struct rspamd_task; - -/* - * Add a single node to the tags tree - */ -gboolean add_html_node (struct rspamd_task *task, rspamd_mempool_t *pool, - struct mime_text_part *part, gchar *tag_text, gsize tag_len, gsize remain, GNode **cur_level); - -/* - * Get tag structure by its name (binary search is used) - */ -struct html_tag * get_tag_by_name (const gchar *name); - -/* - * Decode HTML entitles in text. Text is modified in place. - */ -void decode_entitles (gchar *s, guint *len); - -#endif diff --git a/src/http.c b/src/http.c deleted file mode 100644 index 491468352..000000000 --- a/src/http.c +++ /dev/null @@ -1,1222 +0,0 @@ -/* Copyright (c) 2014, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "http.h" -#include "utlist.h" -#include "util.h" -#include "printf.h" -#include "logger.h" - -struct rspamd_http_connection_private { - GString *buf; - gboolean new_header; - struct rspamd_http_header *header; - struct http_parser parser; - struct http_parser_settings parser_cb; - struct event ev; - struct timeval tv; - struct timeval *ptv; - struct rspamd_http_message *msg; - struct iovec *out; - guint outlen; - gsize wr_pos; - gsize wr_total; -}; - -enum http_magic_type { - HTTP_MAGIC_PLAIN = 0, - HTTP_MAGIC_HTML, - HTTP_MAGIC_CSS, - HTTP_MAGIC_JS, - HTTP_MAGIC_PNG, - HTTP_MAGIC_JPG -}; - -static const struct _rspamd_http_magic { - const gchar *ext; - const gchar *ct; -} http_file_types[] = { - [HTTP_MAGIC_PLAIN] = { "txt", "text/plain" }, - [HTTP_MAGIC_HTML] = { "html", "text/html" }, - [HTTP_MAGIC_CSS] = { "css", "text/css" }, - [HTTP_MAGIC_JS] = { "js", "application/javascript" }, - [HTTP_MAGIC_PNG] = { "png", "image/png" }, - [HTTP_MAGIC_JPG] = { "jpg", "image/jpeg" }, -}; - -static gchar *http_week[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; -static gchar *http_month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; - - -#define HTTP_ERROR http_error_quark () -GQuark -http_error_quark (void) -{ - return g_quark_from_static_string ("http-error-quark"); -} - -static const gchar * -rspamd_http_code_to_str (gint code) -{ - if (code == 200) { - return "OK"; - } - else if (code == 404) { - return "Not found"; - } - else if (code == 403 || code == 401) { - return "Not authorized"; - } - else if (code >= 400 && code < 500) { - return "Bad request"; - } - else if (code >= 300 && code < 400) { - return "See Other"; - } - else if (code >= 500 && code < 600) { - return "Internal server error"; - } - - return "Unknown error"; -} - -/* - * Obtained from nginx - * Copyright (C) Igor Sysoev - */ -static guint mday[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; - -time_t -rspamd_http_parse_date (const gchar *header, gsize len) -{ - const gchar *p, *end; - gint month; - guint day, year, hour, min, sec; - guint64 time; - enum { - no = 0, rfc822, /* Tue, 10 Nov 2002 23:50:13 */ - rfc850, /* Tuesday, 10-Dec-02 23:50:13 */ - isoc /* Tue Dec 10 23:50:13 2002 */ - } fmt; - - fmt = 0; - if (len > 0) { - end = header + len; - } - else { - end = header + strlen (header); - } - -#if (NGX_SUPPRESS_WARN) - day = 32; - year = 2038; -#endif - - for (p = header; p < end; p++) { - if (*p == ',') { - break; - } - - if (*p == ' ') { - fmt = isoc; - break; - } - } - - for (p++; p < end; p++) - if (*p != ' ') { - break; - } - - if (end - p < 18) { - return (time_t)-1; - } - - if (fmt != isoc) { - if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { - return (time_t)-1; - } - - day = (*p - '0') * 10 + *(p + 1) - '0'; - p += 2; - - if (*p == ' ') { - if (end - p < 18) { - return (time_t)-1; - } - fmt = rfc822; - - } - else if (*p == '-') { - fmt = rfc850; - - } - else { - return (time_t)-1; - } - - p++; - } - - switch (*p) { - - case 'J': - month = *(p + 1) == 'a' ? 0 : *(p + 2) == 'n' ? 5 : 6; - break; - - case 'F': - month = 1; - break; - - case 'M': - month = *(p + 2) == 'r' ? 2 : 4; - break; - - case 'A': - month = *(p + 1) == 'p' ? 3 : 7; - break; - - case 'S': - month = 8; - break; - - case 'O': - month = 9; - break; - - case 'N': - month = 10; - break; - - case 'D': - month = 11; - break; - - default: - return (time_t)-1; - } - - p += 3; - - if ((fmt == rfc822 && *p != ' ') || (fmt == rfc850 && *p != '-')) { - return (time_t)-1; - } - - p++; - - if (fmt == rfc822) { - if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' - || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' - || *(p + 3) > '9') { - return (time_t)-1; - } - - year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 - + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; - p += 4; - - } - else if (fmt == rfc850) { - if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { - return (time_t)-1; - } - - year = (*p - '0') * 10 + *(p + 1) - '0'; - year += (year < 70) ? 2000 : 1900; - p += 2; - } - - if (fmt == isoc) { - if (*p == ' ') { - p++; - } - - if (*p < '0' || *p > '9') { - return (time_t)-1; - } - - day = *p++ - '0'; - - if (*p != ' ') { - if (*p < '0' || *p > '9') { - return (time_t)-1; - } - - day = day * 10 + *p++ - '0'; - } - - if (end - p < 14) { - return (time_t)-1; - } - } - - if (*p++ != ' ') { - return (time_t)-1; - } - - if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { - return (time_t)-1; - } - - hour = (*p - '0') * 10 + *(p + 1) - '0'; - p += 2; - - if (*p++ != ':') { - return (time_t)-1; - } - - if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { - return (time_t)-1; - } - - min = (*p - '0') * 10 + *(p + 1) - '0'; - p += 2; - - if (*p++ != ':') { - return (time_t)-1; - } - - if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { - return (time_t)-1; - } - - sec = (*p - '0') * 10 + *(p + 1) - '0'; - - if (fmt == isoc) { - p += 2; - - if (*p++ != ' ') { - return (time_t)-1; - } - - if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' - || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' - || *(p + 3) > '9') { - return (time_t)-1; - } - - year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 - + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; - } - - if (hour > 23 || min > 59 || sec > 59) { - return (time_t)-1; - } - - if (day == 29 && month == 1) { - if ((year & 3) || ((year % 100 == 0) && (year % 400) != 0)) { - return (time_t)-1; - } - - } - else if (day > mday[month]) { - return (time_t)-1; - } - - /* - * shift new year to March 1 and start months from 1 (not 0), - * it is needed for Gauss' formula - */ - - if (--month <= 0) { - month += 12; - year -= 1; - } - - /* Gauss' formula for Gregorian days since March 1, 1 BC */ - - time = (guint64) ( - /* days in years including leap years since March 1, 1 BC */ - - 365 * year + year / 4 - year / 100 + year / 400 - - /* days before the month */ - - + 367 * month / 12 - 30 - - /* days before the day */ - - + day - 1 - - /* - * 719527 days were between March 1, 1 BC and March 1, 1970, - * 31 and 28 days were in January and February 1970 - */ - - - 719527 + 31 + 28) * 86400 + hour * 3600 + min * 60 + sec; - - return (time_t) time; -} - -static inline void -rspamd_http_check_date (struct rspamd_http_connection_private *priv) -{ - if (g_ascii_strcasecmp (priv->header->name->str, "date") == 0) { - priv->msg->date = rspamd_http_parse_date (priv->header->value->str, - priv->header->value->len); - } -} - -static gint -rspamd_http_on_url (http_parser* parser, const gchar *at, size_t length) -{ - struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; - struct rspamd_http_connection_private *priv; - - priv = conn->priv; - - g_string_append_len (priv->msg->url, at, length); - - return 0; -} - -static gint -rspamd_http_on_header_field (http_parser* parser, const gchar *at, size_t length) -{ - struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; - struct rspamd_http_connection_private *priv; - - priv = conn->priv; - - if (priv->header == NULL) { - priv->header = g_slice_alloc (sizeof (struct rspamd_http_header)); - priv->header->name = g_string_sized_new (32); - priv->header->value = g_string_sized_new (32); - } - else if (priv->new_header) { - DL_APPEND (priv->msg->headers, priv->header); - rspamd_http_check_date (priv); - priv->header = g_slice_alloc (sizeof (struct rspamd_http_header)); - priv->header->name = g_string_sized_new (32); - priv->header->value = g_string_sized_new (32); - } - - priv->new_header = FALSE; - g_string_append_len (priv->header->name, at, length); - - return 0; -} - -static gint -rspamd_http_on_header_value (http_parser* parser, const gchar *at, size_t length) -{ - struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; - struct rspamd_http_connection_private *priv; - - priv = conn->priv; - - if (priv->header == NULL) { - /* Should not happen */ - return -1; - } - - priv->new_header = TRUE; - g_string_append_len (priv->header->value, at, length); - - return 0; -} - -static int -rspamd_http_on_headers_complete (http_parser* parser) -{ - struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; - struct rspamd_http_connection_private *priv; - - priv = conn->priv; - - if (priv->header != NULL) { - DL_APPEND (priv->msg->headers, priv->header); - rspamd_http_check_date (priv); - priv->header = NULL; - } - - if (parser->content_length != 0 && parser->content_length != ULLONG_MAX) { - priv->msg->body = g_string_sized_new (parser->content_length + 1); - } - else { - priv->msg->body = g_string_sized_new (BUFSIZ); - } - - priv->msg->method = parser->method; - - return 0; -} - -static int -rspamd_http_on_body (http_parser* parser, const gchar *at, size_t length) -{ - struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; - struct rspamd_http_connection_private *priv; - - priv = conn->priv; - - g_string_append_len (priv->msg->body, at, length); - - if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) { - return (conn->body_handler (conn, priv->msg, at, length)); - } - - return 0; -} - -static int -rspamd_http_on_message_complete (http_parser* parser) -{ - struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; - struct rspamd_http_connection_private *priv; - int ret = 0; - - priv = conn->priv; - - if (conn->body_handler != NULL) { - rspamd_http_connection_ref (conn); - if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) { - ret = conn->body_handler (conn, priv->msg, NULL, 0); - } - else { - ret = conn->body_handler (conn, priv->msg, priv->msg->body->str, priv->msg->body->len); - } - rspamd_http_connection_unref (conn); - } - - if (ret == 0) { - rspamd_http_connection_ref (conn); - ret = conn->finish_handler (conn, priv->msg); - rspamd_http_connection_unref (conn); - } - - return ret; -} - -static void -rspamd_http_write_helper (struct rspamd_http_connection *conn) -{ - struct rspamd_http_connection_private *priv; - struct iovec *start; - guint niov, i; - gsize remain; - gssize r; - GError *err; - - priv = conn->priv; - - if (priv->wr_pos == priv->wr_total) { - rspamd_http_connection_ref (conn); - conn->finish_handler (conn, priv->msg); - rspamd_http_connection_unref (conn); - return; - } - - start = &priv->out[0]; - niov = priv->outlen; - remain = priv->wr_pos; - for (i = 0; i < priv->outlen && remain > 0; i ++) { - /* Find out the first iov required */ - start = &priv->out[i]; - if (start->iov_len <= remain) { - remain -= start->iov_len; - start = &priv->out[i + 1]; - niov --; - } - else { - start->iov_base = (void *)((char *)start->iov_base + remain); - start->iov_len -= remain; - remain = 0; - } - } - - r = writev (conn->fd, start, MIN (IOV_MAX, niov)); - - if (r == -1) { - err = g_error_new (HTTP_ERROR, errno, "IO write error: %s", strerror (errno)); - rspamd_http_connection_ref (conn); - conn->error_handler (conn, err); - rspamd_http_connection_unref (conn); - g_error_free (err); - return; - } - else { - priv->wr_pos += r; - } - - if (priv->wr_pos >= priv->wr_total) { - rspamd_http_connection_ref (conn); - conn->finish_handler (conn, priv->msg); - rspamd_http_connection_unref (conn); - } - else { - /* Want to write more */ - event_add (&priv->ev, priv->ptv); - } -} - -static void -rspamd_http_event_handler (int fd, short what, gpointer ud) -{ - struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; - struct rspamd_http_connection_private *priv; - GString *buf; - gssize r; - GError *err; - - priv = conn->priv; - buf = priv->buf; - - if (what == EV_READ) { - r = read (fd, buf->str, buf->allocated_len); - if (r == -1) { - err = g_error_new (HTTP_ERROR, errno, "IO read error: %s", strerror (errno)); - conn->error_handler (conn, err); - g_error_free (err); - return; - } - else { - buf->len = r; - rspamd_http_connection_ref (conn); - if (http_parser_execute (&priv->parser, &priv->parser_cb, buf->str, r) != (size_t)r) { - err = g_error_new (HTTP_ERROR, priv->parser.http_errno, - "HTTP parser error: %s", http_errno_description (priv->parser.http_errno)); - conn->error_handler (conn, err); - g_error_free (err); - rspamd_http_connection_unref (conn); - return; - } - rspamd_http_connection_unref (conn); - } - } - else if (what == EV_TIMEOUT) { - err = g_error_new (HTTP_ERROR, ETIMEDOUT, - "IO timeout"); - rspamd_http_connection_ref (conn); - conn->error_handler (conn, err); - rspamd_http_connection_unref (conn); - g_error_free (err); - return; - } - else if (what == EV_WRITE) { - rspamd_http_write_helper (conn); - } -} - -struct rspamd_http_connection* -rspamd_http_connection_new (rspamd_http_body_handler_t body_handler, - rspamd_http_error_handler_t error_handler, - rspamd_http_finish_handler_t finish_handler, - enum rspamd_http_options opts, - enum rspamd_http_connection_type type) -{ - struct rspamd_http_connection *new; - struct rspamd_http_connection_private *priv; - - if (error_handler == NULL || finish_handler == NULL) { - return NULL; - } - - new = g_slice_alloc0 (sizeof (struct rspamd_http_connection)); - new->opts = opts; - new->type = type; - new->body_handler = body_handler; - new->error_handler = error_handler; - new->finish_handler = finish_handler; - new->fd = -1; - new->ref = 1; - - /* Init priv */ - priv = g_slice_alloc0 (sizeof (struct rspamd_http_connection_private)); - http_parser_init (&priv->parser, type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); - priv->parser.data = new; - priv->parser_cb.on_url = rspamd_http_on_url; - priv->parser_cb.on_header_field = rspamd_http_on_header_field; - priv->parser_cb.on_header_value = rspamd_http_on_header_value; - priv->parser_cb.on_headers_complete = rspamd_http_on_headers_complete; - priv->parser_cb.on_body = rspamd_http_on_body; - priv->parser_cb.on_message_complete = rspamd_http_on_message_complete; - - new->priv = priv; - - return new; -} - -void -rspamd_http_connection_reset (struct rspamd_http_connection *conn) -{ - struct rspamd_http_connection_private *priv; - struct rspamd_http_message *msg; - - priv = conn->priv; - msg = priv->msg; - - /* Clear request */ - if (msg != NULL) { - rspamd_http_message_free (msg); - priv->msg = NULL; - } - - /* Clear priv */ - event_del (&priv->ev); - if (priv->buf != NULL) { - g_string_free (priv->buf, TRUE); - priv->buf = NULL; - } - if (priv->out != NULL) { - g_slice_free1 (sizeof (struct iovec) * priv->outlen, priv->out); - priv->out = NULL; - } -} - -void -rspamd_http_connection_free (struct rspamd_http_connection *conn) -{ - struct rspamd_http_connection_private *priv; - - priv = conn->priv; - rspamd_http_connection_reset (conn); - g_slice_free1 (sizeof (struct rspamd_http_connection_private), priv); - g_slice_free1 (sizeof (struct rspamd_http_connection), conn); -} - -void -rspamd_http_connection_read_message (struct rspamd_http_connection *conn, - gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) -{ - struct rspamd_http_connection_private *priv = conn->priv; - struct rspamd_http_message *req; - - conn->fd = fd; - conn->ud = ud; - req = rspamd_http_new_message (conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); - priv->msg = req; - - if (timeout == NULL) { - priv->ptv = NULL; - } - else { - memcpy (&priv->tv, timeout, sizeof (struct timeval)); - priv->ptv = &priv->tv; - } - priv->header = NULL; - priv->buf = g_string_sized_new (BUFSIZ); - priv->new_header = TRUE; - - event_set (&priv->ev, fd, EV_READ | EV_PERSIST, rspamd_http_event_handler, conn); - event_base_set (base, &priv->ev); - event_add (&priv->ev, priv->ptv); -} - -void -rspamd_http_connection_write_message (struct rspamd_http_connection *conn, - struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type, - gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) -{ - struct rspamd_http_connection_private *priv = conn->priv; - struct rspamd_http_header *hdr; - struct tm t, *ptm; - gchar datebuf[64], *pbody; - gint i; - gsize bodylen; - - conn->fd = fd; - conn->ud = ud; - priv->msg = msg; - - if (timeout == NULL) { - priv->ptv = NULL; - } - else { - memcpy (&priv->tv, timeout, sizeof (struct timeval)); - priv->ptv = &priv->tv; - } - priv->header = NULL; - priv->buf = g_string_sized_new (128); - - if (msg->method < HTTP_SYMBOLS) { - if (msg->body == NULL || msg->body->len == 0) { - pbody = NULL; - bodylen = 0; - priv->outlen = 2; - msg->method = HTTP_GET; - } - else { - pbody = msg->body->str; - bodylen = msg->body->len; - priv->outlen = 3; - msg->method = HTTP_POST; - } - } - else if (msg->body != NULL) { - pbody = msg->body->str; - bodylen = msg->body->len; - priv->outlen = 2; - } - else { - /* Invalid body for spamc method */ - return; - } - - if (conn->type == RSPAMD_HTTP_SERVER) { - /* Format reply */ - if (msg->method < HTTP_SYMBOLS) { - ptm = gmtime (&msg->date); - t = *ptm; - rspamd_snprintf (datebuf, sizeof (datebuf), "%s, %02d %s %4d %02d:%02d:%02d GMT", - http_week[t.tm_wday], - t.tm_mday, - http_month[t.tm_mon], - t.tm_year + 1900, - t.tm_hour, - t.tm_min, - t.tm_sec); - if (mime_type == NULL) { - mime_type = "text/plain"; - } - rspamd_printf_gstring (priv->buf, "HTTP/1.1 %d %s\r\n" - "Connection: close\r\n" - "Server: %s\r\n" - "Date: %s\r\n" - "Content-Length: %z\r\n" - "Content-Type: %s\r\n", - msg->code, rspamd_http_code_to_str (msg->code), - "rspamd/" RVERSION, - datebuf, - msg->body->len, - mime_type); - } - else { - /* Legacy spamd reply */ - rspamd_printf_gstring (priv->buf, "RSPAMD/1.3 0 EX_OK\r\n"); - } - } - else { - /* Format request */ - if (host != NULL) { - rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.1\r\n" - "Connection: close\r\n" - "Host: %s\r\n" - "Content-Length: %z\r\n", - http_method_str (msg->method), msg->url, host, msg->body->len); - } - else { - /* Fallback to HTTP/1.0 */ - rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.0\r\n" - "Content-Length: %z\r\n", - http_method_str (msg->method), msg->url, msg->body->len); - } - } - /* Allocate iov */ - priv->wr_total = bodylen + priv->buf->len + 2; - DL_FOREACH (msg->headers, hdr) { - /* <: ><\r\n> */ - priv->wr_total += hdr->name->len + hdr->value->len + 4; - priv->outlen += 4; - } - priv->out = g_slice_alloc (sizeof (struct iovec) * priv->outlen); - priv->wr_pos = 0; - - /* Now set up all iov */ - priv->out[0].iov_base = priv->buf->str; - priv->out[0].iov_len = priv->buf->len; - i = 1; - LL_FOREACH (msg->headers, hdr) { - priv->out[i].iov_base = hdr->name->str; - priv->out[i++].iov_len = hdr->name->len; - priv->out[i].iov_base = ": "; - priv->out[i++].iov_len = 2; - priv->out[i].iov_base = hdr->value->str; - priv->out[i++].iov_len = hdr->value->len; - priv->out[i].iov_base = "\r\n"; - priv->out[i++].iov_len = 2; - } - if (msg->method < HTTP_SYMBOLS) { - priv->out[i].iov_base = "\r\n"; - priv->out[i++].iov_len = 2; - } - else { - /* No CRLF for compatibility reply */ - priv->wr_total -= 2; - } - if (msg->body != NULL) { - priv->out[i].iov_base = pbody; - priv->out[i++].iov_len = bodylen; - } - - event_set (&priv->ev, fd, EV_WRITE, rspamd_http_event_handler, conn); - event_base_set (base, &priv->ev); - event_add (&priv->ev, priv->ptv); -} - -struct rspamd_http_message* -rspamd_http_new_message (enum http_parser_type type) -{ - struct rspamd_http_message *new; - - new = g_slice_alloc (sizeof (struct rspamd_http_message)); - if (type == HTTP_REQUEST) { - new->url = g_string_sized_new (32); - } - else { - new->url = NULL; - new->code = 200; - } - new->headers = NULL; - new->date = 0; - new->body = NULL; - new->type = type; - new->method = HTTP_GET; - - return new; -} - -void -rspamd_http_message_free (struct rspamd_http_message *msg) -{ - struct rspamd_http_header *hdr, *tmp_hdr; - - LL_FOREACH_SAFE (msg->headers, hdr, tmp_hdr) { - g_string_free (hdr->name, TRUE); - g_string_free (hdr->value, TRUE); - g_slice_free1 (sizeof (struct rspamd_http_header), hdr); - } - if (msg->body != NULL) { - g_string_free (msg->body, TRUE); - } - if (msg->url != NULL) { - g_string_free (msg->url, TRUE); - } - g_slice_free1 (sizeof (struct rspamd_http_message), msg); -} - -void rspamd_http_message_add_header (struct rspamd_http_message *msg, - const gchar *name, - const gchar *value) -{ - struct rspamd_http_header *hdr; - - if (msg != NULL && name != NULL && value != NULL) { - hdr = g_slice_alloc (sizeof (struct rspamd_http_header)); - hdr->name = g_string_new (name); - hdr->value = g_string_new (value); - DL_APPEND (msg->headers, hdr); - } -} - -const gchar* -rspamd_http_message_find_header (struct rspamd_http_message *msg, const gchar *name) -{ - struct rspamd_http_header *hdr; - const gchar *res = NULL; - guint slen = strlen (name); - - if (msg != NULL) { - LL_FOREACH (msg->headers, hdr) { - if (hdr->name->len == slen) { - if (memcmp (hdr->name->str, name, slen) == 0) { - res = hdr->value->str; - break; - } - } - } - } - - return res; -} - -/* - * HTTP router functions - */ - -static void -rspamd_http_entry_free (struct rspamd_http_connection_entry *entry) -{ - if (entry != NULL) { - close (entry->conn->fd); - rspamd_http_connection_unref (entry->conn); - g_slice_free1 (sizeof (struct rspamd_http_connection_entry), entry); - if (entry->rt->finish_handler) { - entry->rt->finish_handler (entry); - } - } -} - -static void -rspamd_http_router_error_handler (struct rspamd_http_connection *conn, GError *err) -{ - struct rspamd_http_connection_entry *entry = conn->ud; - struct rspamd_http_message *msg; - - if (entry->is_reply) { - /* At this point we need to finish this session and close owned socket */ - if (entry->rt->error_handler != NULL) { - entry->rt->error_handler (entry, err); - } - rspamd_http_entry_free (entry); - } - else { - /* Here we can write a reply to a client */ - if (entry->rt->error_handler != NULL) { - entry->rt->error_handler (entry, err); - } - msg = rspamd_http_new_message (HTTP_RESPONSE); - msg->date = time (NULL); - msg->code = err->code; - msg->body = g_string_new (err->message); - rspamd_http_connection_reset (entry->conn); - rspamd_http_connection_write_message (entry->conn, msg, NULL, - "text/plain", entry, entry->conn->fd, entry->rt->ptv, entry->rt->ev_base); - entry->is_reply = TRUE; - } -} - -static const gchar * -rspamd_http_router_detect_ct (const gchar *path) -{ - const gchar *dot; - guint i; - - dot = strrchr (path, '.'); - if (dot == NULL) { - return http_file_types[HTTP_MAGIC_PLAIN].ct; - } - dot ++; - - for (i = 0; i < G_N_ELEMENTS (http_file_types); i ++) { - if (strcmp (http_file_types[i].ext, dot) == 0) { - return http_file_types[i].ct; - } - } - - return http_file_types[HTTP_MAGIC_PLAIN].ct; -} - -static gboolean -rspamd_http_router_try_file (struct rspamd_http_connection_entry *entry, - struct rspamd_http_message *msg, gboolean expand_path) -{ - struct stat st; - gint fd; - gchar filebuf[PATH_MAX], realbuf[PATH_MAX], *dir; - struct rspamd_http_message *reply_msg; - - /* XXX: filter filename component only */ - if (expand_path) { - rspamd_snprintf (filebuf, sizeof (filebuf), "%s%c%v", - entry->rt->default_fs_path, G_DIR_SEPARATOR, msg->url); - } - else { - rspamd_snprintf (filebuf, sizeof (filebuf), "%v", - msg->url); - } - - if (realpath (filebuf, realbuf) == NULL || - lstat (realbuf, &st) == -1) { - return FALSE; - } - - if (S_ISDIR (st.st_mode) && expand_path) { - /* Try to append 'index.html' to the url */ - g_string_append_printf (msg->url, "%c%s", G_DIR_SEPARATOR, - "index.html"); - return rspamd_http_router_try_file (entry, msg, FALSE); - } - else if (!S_ISREG (st.st_mode)) { - return FALSE; - } - - /* We also need to ensure that file is inside the defined dir */ - dir = dirname (realbuf); - if (dir == NULL || strncmp (dir, entry->rt->default_fs_path, - strlen (entry->rt->default_fs_path)) != 0) { - return FALSE; - } - - fd = open (realbuf, O_RDONLY); - if (fd == -1) { - return FALSE; - } - - reply_msg = rspamd_http_new_message (HTTP_RESPONSE); - reply_msg->date = time (NULL); - reply_msg->code = 200; - reply_msg->body = g_string_sized_new (st.st_size); - - if (read (fd, reply_msg->body->str, st.st_size) != st.st_size) { - close (fd); - rspamd_http_message_free (reply_msg); - return FALSE; - } - - reply_msg->body->len = st.st_size; - reply_msg->body->str[st.st_size] = '\0'; - close (fd); - - rspamd_http_connection_reset (entry->conn); - - /* XXX: detect content type */ - rspamd_http_connection_write_message (entry->conn, reply_msg, NULL, - rspamd_http_router_detect_ct (realbuf), entry, entry->conn->fd, - entry->rt->ptv, entry->rt->ev_base); - - return TRUE; -} - -static int -rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, - struct rspamd_http_message *msg) -{ - struct rspamd_http_connection_entry *entry = conn->ud; - rspamd_http_router_handler_t handler = NULL; - gpointer found; - struct rspamd_http_message *err_msg; - GError *err; - - G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer)); - - if (entry->is_reply) { - /* Request is finished, it is safe to free a connection */ - rspamd_http_entry_free (entry); - } - else { - /* Search for path */ - if (msg->url != NULL && msg->url->len != 0) { - found = g_hash_table_lookup (entry->rt->paths, msg->url->str); - memcpy (&handler, &found, sizeof (found)); - } - entry->is_reply = TRUE; - if (handler != NULL) { - return handler (entry, msg); - } - else { - if (entry->rt->default_fs_path == NULL || - rspamd_http_router_try_file (entry, msg, TRUE)) { - err = g_error_new (HTTP_ERROR, 404, - "Not found"); - if (entry->rt->error_handler != NULL) { - entry->rt->error_handler (entry, err); - } - err_msg = rspamd_http_new_message (HTTP_RESPONSE); - err_msg->date = time (NULL); - err_msg->code = err->code; - err_msg->body = g_string_new (err->message); - rspamd_http_connection_reset (entry->conn); - rspamd_http_connection_write_message (entry->conn, err_msg, NULL, - "text/plain", entry, entry->conn->fd, - entry->rt->ptv, entry->rt->ev_base); - g_error_free (err); - } - } - } - - return 0; -} - -struct rspamd_http_connection_router* -rspamd_http_router_new (rspamd_http_router_error_handler_t eh, - rspamd_http_router_finish_handler_t fh, - struct timeval *timeout, struct event_base *base, - const char *default_fs_path) -{ - struct rspamd_http_connection_router* new; - struct stat st; - - new = g_slice_alloc (sizeof (struct rspamd_http_connection_router)); - new->paths = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - new->conns = NULL; - new->error_handler = eh; - new->finish_handler = fh; - new->ev_base = base; - if (timeout) { - new->tv = *timeout; - new->ptv = &new->tv; - } - else { - new->ptv = NULL; - } - - new->default_fs_path = NULL; - if (default_fs_path != NULL) { - if (stat (default_fs_path, &st) == -1) { - msg_err ("cannot stat %s", default_fs_path); - } - else { - if (!S_ISDIR (st.st_mode)) { - msg_err ("path %s is not a directory", default_fs_path); - } - else { - new->default_fs_path = g_strdup (default_fs_path); - } - } - } - - return new; -} - -void -rspamd_http_router_add_path (struct rspamd_http_connection_router *router, - const gchar *path, rspamd_http_router_handler_t handler) -{ - gpointer ptr; - G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer)); - - if (path != NULL && handler != NULL && router != NULL) { - memcpy (&ptr, &handler, sizeof (ptr)); - g_hash_table_insert (router->paths, (gpointer)path, ptr); - } -} - -void -rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, - gint fd, gpointer ud) -{ - struct rspamd_http_connection_entry *conn; - - conn = g_slice_alloc (sizeof (struct rspamd_http_connection_entry)); - conn->rt = router; - conn->ud = ud; - conn->is_reply = FALSE; - - conn->conn = rspamd_http_connection_new (NULL, rspamd_http_router_error_handler, - rspamd_http_router_finish_handler, 0, RSPAMD_HTTP_SERVER); - - rspamd_http_connection_read_message (conn->conn, conn, fd, router->ptv, - router->ev_base); - LL_PREPEND (router->conns, conn); -} - -void -rspamd_http_router_free (struct rspamd_http_connection_router *router) -{ - struct rspamd_http_connection_entry *conn, *tmp; - - if (router) { - LL_FOREACH_SAFE (router->conns, conn, tmp) { - rspamd_http_entry_free (conn); - } - - if (router->default_fs_path != NULL) { - g_free (router->default_fs_path); - } - g_hash_table_unref (router->paths); - g_slice_free1 (sizeof (struct rspamd_http_connection_router), router); - } -} diff --git a/src/http.h b/src/http.h deleted file mode 100644 index 8af4429c6..000000000 --- a/src/http.h +++ /dev/null @@ -1,278 +0,0 @@ -/* Copyright (c) 2014, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef HTTP_H_ -#define HTTP_H_ - -/** - * @file http.h - * - * This is an interface for HTTP client and conn. This code uses HTTP parser written - * by Joyent Inc based on nginx code. - */ - -#include "config.h" -#include "http_parser.h" - -enum rspamd_http_connection_type { - RSPAMD_HTTP_SERVER, - RSPAMD_HTTP_CLIENT -}; - -/** - * HTTP header structure - */ -struct rspamd_http_header { - GString *name; - GString *value; - struct rspamd_http_header *next, *prev; -}; - -/** - * HTTP message structure, used for requests and replies - */ -struct rspamd_http_message { - GString *url; - struct rspamd_http_header *headers; - GString *body; - enum http_parser_type type; - time_t date; - gint code; - enum http_method method; -}; - - -/** - * Options for HTTP connection - */ -enum rspamd_http_options { - RSPAMD_HTTP_BODY_PARTIAL = 0x1//!< RSPAMD_HTTP_BODY_PARTIAL -}; - -struct rspamd_http_connection_private; -struct rspamd_http_connection; -struct rspamd_http_connection_router; -struct rspamd_http_connection_entry; - -typedef int (*rspamd_http_body_handler_t) (struct rspamd_http_connection *conn, - struct rspamd_http_message *msg, - const gchar *chunk, - gsize len); - -typedef void (*rspamd_http_error_handler_t) (struct rspamd_http_connection *conn, GError *err); - -typedef int (*rspamd_http_finish_handler_t) (struct rspamd_http_connection *conn, - struct rspamd_http_message *msg); - -typedef int (*rspamd_http_router_handler_t) (struct rspamd_http_connection_entry *conn_ent, - struct rspamd_http_message *msg); -typedef void (*rspamd_http_router_error_handler_t) (struct rspamd_http_connection_entry *conn_ent, - GError *err); -typedef void (*rspamd_http_router_finish_handler_t) (struct rspamd_http_connection_entry *conn_ent); - -/** - * HTTP connection structure - */ -struct rspamd_http_connection { - struct rspamd_http_connection_private *priv; - rspamd_http_body_handler_t body_handler; - rspamd_http_error_handler_t error_handler; - rspamd_http_finish_handler_t finish_handler; - gpointer ud; - enum rspamd_http_options opts; - enum rspamd_http_connection_type type; - gint fd; - gint ref; -}; - -struct rspamd_http_connection_entry { - struct rspamd_http_connection_router *rt; - struct rspamd_http_connection *conn; - gpointer ud; - gboolean is_reply; - struct rspamd_http_connection_entry *next; -}; - -struct rspamd_http_connection_router { - struct rspamd_http_connection_entry *conns; - GHashTable *paths; - struct timeval tv; - struct timeval *ptv; - struct event_base *ev_base; - gchar *default_fs_path; - rspamd_http_router_error_handler_t error_handler; - rspamd_http_router_finish_handler_t finish_handler; -}; - -/** - * Create new http connection - * @param handler_t handler_t for body - * @param opts options - * @return new connection structure - */ -struct rspamd_http_connection* rspamd_http_connection_new ( - rspamd_http_body_handler_t body_handler, - rspamd_http_error_handler_t error_handler, - rspamd_http_finish_handler_t finish_handler, - enum rspamd_http_options opts, - enum rspamd_http_connection_type type); - -/** - * Handle a request using socket fd and user data ud - * @param conn connection structure - * @param ud opaque user data - * @param fd fd to read/write - */ -void rspamd_http_connection_read_message ( - struct rspamd_http_connection *conn, - gpointer ud, - gint fd, - struct timeval *timeout, - struct event_base *base); - -/** - * Send reply using initialised connection - * @param conn connection structure - * @param msg HTTP message - * @param ud opaque user data - * @param fd fd to read/write - */ -void rspamd_http_connection_write_message ( - struct rspamd_http_connection *conn, - struct rspamd_http_message *msg, - const gchar *host, - const gchar *mime_type, - gpointer ud, - gint fd, - struct timeval *timeout, - struct event_base *base); - -/** - * Free connection structure - * @param conn - */ -void rspamd_http_connection_free (struct rspamd_http_connection *conn); - -/** - * Increase refcount for a connection - * @param conn - * @return - */ -static inline struct rspamd_http_connection * -rspamd_http_connection_ref (struct rspamd_http_connection *conn) -{ - conn->ref ++; - return conn; -} - -/** - * Decrease a refcount for a connection and free it if refcount is equal to zero - * @param conn - */ -static void -rspamd_http_connection_unref (struct rspamd_http_connection *conn) -{ - if (--conn->ref <= 0) { - rspamd_http_connection_free (conn); - } -} - -/** - * Reset connection for a new request - * @param conn - */ -void rspamd_http_connection_reset (struct rspamd_http_connection *conn); - -/** - * Create new HTTP reply - * @param code code to pass - * @return new reply object - */ -struct rspamd_http_message* rspamd_http_new_message (enum http_parser_type type); - -/** - * Append a header to reply - * @param rep - * @param name - * @param value - */ -void rspamd_http_message_add_header (struct rspamd_http_message *rep, const gchar *name, const gchar *value); - -/** - * Search for a specified header in message - * @param rep message - * @param name name of header - */ -const gchar* rspamd_http_message_find_header (struct rspamd_http_message *rep, const gchar *name); - -/** - * Free HTTP reply - * @param rep - */ -void rspamd_http_message_free (struct rspamd_http_message *msg); - -/** - * Parse HTTP date header and return it as time_t - * @param header HTTP date header - * @param len length of header - * @return time_t or (time_t)-1 in case of error - */ -time_t rspamd_http_parse_date (const gchar *header, gsize len); - -/** - * Create new http connection router and the associated HTTP connection - * @param eh error handler callback - * @param fh finish handler callback - * @param default_fs_path if not NULL try to serve static files from - * the specified directory - * @return - */ -struct rspamd_http_connection_router* rspamd_http_router_new ( - rspamd_http_router_error_handler_t eh, - rspamd_http_router_finish_handler_t fh, - struct timeval *timeout, - struct event_base *base, - const char *default_fs_path); - -/** - * Add new path to the router - */ -void rspamd_http_router_add_path (struct rspamd_http_connection_router *router, - const gchar *path, rspamd_http_router_handler_t handler); - -/** - * Handle new accepted socket - * @param router router object - * @param fd server socket - * @param ud opaque userdata - */ -void rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, - gint fd, gpointer ud); - -/** - * Free router and all connections associated - * @param router - */ -void rspamd_http_router_free (struct rspamd_http_connection_router *router); - -#endif /* HTTP_H_ */ diff --git a/src/images.c b/src/images.c deleted file mode 100644 index ff07bbd72..000000000 --- a/src/images.c +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "images.h" -#include "main.h" -#include "message.h" - -static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10}; -static const guint8 jpg_sig1[] = {0xff, 0xd8}; -static const guint8 jpg_sig2[] = {'J', 'F', 'I', 'F'}; -static const guint8 gif_signature[] = {'G', 'I', 'F', '8'}; -static const guint8 bmp_signature[] = {'B', 'M'}; - -static void process_image (struct rspamd_task *task, struct mime_part *part); - - -void -process_images (struct rspamd_task *task) -{ - GList *cur; - struct mime_part *part; - - cur = task->parts; - while (cur) { - part = cur->data; - if (g_mime_content_type_is_type (part->type, "image", "*") && part->content->len > 0) { - process_image (task, part); - } - cur = g_list_next (cur); - } - -} - -static enum known_image_types -detect_image_type (GByteArray *data) -{ - if (data->len > sizeof (png_signature) / sizeof (png_signature[0])) { - if (memcmp (data->data, png_signature, sizeof (png_signature)) == 0) { - return IMAGE_TYPE_PNG; - } - } - if (data->len > 10) { - if (memcmp (data->data, jpg_sig1, sizeof (jpg_sig1)) == 0) { - if (memcmp (data->data + 6, jpg_sig2, sizeof (jpg_sig2)) == 0) { - return IMAGE_TYPE_JPG; - } - } - } - if (data->len > sizeof (gif_signature) / sizeof (gif_signature[0])) { - if (memcmp (data->data, gif_signature, sizeof (gif_signature)) == 0) { - return IMAGE_TYPE_GIF; - } - } - if (data->len > sizeof (bmp_signature) / sizeof (bmp_signature[0])) { - if (memcmp (data->data, bmp_signature, sizeof (bmp_signature)) == 0) { - return IMAGE_TYPE_BMP; - } - } - - return IMAGE_TYPE_UNKNOWN; -} - - -static struct rspamd_image * -process_png_image (struct rspamd_task *task, GByteArray *data) -{ - struct rspamd_image *img; - guint32 t; - guint8 *p; - - if (data->len < 24) { - msg_info ("bad png detected (maybe striped): <%s>", task->message_id); - return NULL; - } - - /* In png we should find iHDR section and get data from it */ - /* Skip signature and read header section */ - p = data->data + 12; - if (memcmp (p, "IHDR", 4) != 0) { - msg_info ("png doesn't begins with IHDR section", task->message_id); - return NULL; - } - - img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); - img->type = IMAGE_TYPE_PNG; - img->data = data; - - p += 4; - memcpy (&t, p, sizeof (guint32)); - img->width = ntohl (t); - p += 4; - memcpy (&t, p, sizeof (guint32)); - img->height = ntohl (t); - - return img; -} - -static struct rspamd_image * -process_jpg_image (struct rspamd_task *task, GByteArray *data) -{ - guint8 *p; - guint16 t; - gsize remain; - struct rspamd_image *img; - - img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); - img->type = IMAGE_TYPE_JPG; - img->data = data; - - p = data->data; - remain = data->len; - /* In jpeg we should find any data stream (ff c0 .. ff c3) and extract its height and width */ - while (remain --) { - if (*p == 0xFF && remain > 8 && (*(p + 1) >= 0xC0 && *(p + 1) <= 0xC3)) { - memcpy (&t, p + 5, sizeof (guint16)); - img->height = ntohs (t); - memcpy (&t, p + 7, sizeof (guint16)); - img->width = ntohs (t); - return img; - } - p ++; - } - - return NULL; -} - -static struct rspamd_image * -process_gif_image (struct rspamd_task *task, GByteArray *data) -{ - struct rspamd_image *img; - guint8 *p; - guint16 t; - - if (data->len < 10) { - msg_info ("bad gif detected (maybe striped): <%s>", task->message_id); - return NULL; - } - - img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); - img->type = IMAGE_TYPE_GIF; - img->data = data; - - p = data->data + 6; - memcpy (&t, p, sizeof (guint16)); - img->width = GUINT16_FROM_LE (t); - memcpy (&t, p + 2, sizeof (guint16)); - img->height = GUINT16_FROM_LE (t); - - return img; -} - -static struct rspamd_image * -process_bmp_image (struct rspamd_task *task, GByteArray *data) -{ - struct rspamd_image *img; - gint32 t; - guint8 *p; - - - - if (data->len < 28) { - msg_info ("bad bmp detected (maybe striped): <%s>", task->message_id); - return NULL; - } - - img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); - img->type = IMAGE_TYPE_BMP; - img->data = data; - p = data->data + 18; - memcpy (&t, p, sizeof (gint32)); - img->width = abs (GINT32_FROM_LE (t)); - memcpy (&t, p + 4, sizeof (gint32)); - img->height = abs (GINT32_FROM_LE (t)); - - return img; -} - -static void -process_image (struct rspamd_task *task, struct mime_part *part) -{ - enum known_image_types type; - struct rspamd_image *img = NULL; - if ((type = detect_image_type (part->content)) != IMAGE_TYPE_UNKNOWN) { - switch (type) { - case IMAGE_TYPE_PNG: - img = process_png_image (task, part->content); - break; - case IMAGE_TYPE_JPG: - img = process_jpg_image (task, part->content); - break; - case IMAGE_TYPE_GIF: - img = process_gif_image (task, part->content); - break; - case IMAGE_TYPE_BMP: - img = process_bmp_image (task, part->content); - break; - default: - img = NULL; - break; - } - } - - if (img != NULL) { - debug_task ("detected %s image of size %ud x %ud in message <%s>", - image_type_str (img->type), - img->width, img->height, - task->message_id); - img->filename = part->filename; - task->images = g_list_prepend (task->images, img); - } -} - -const gchar * -image_type_str (enum known_image_types type) -{ - switch (type) { - case IMAGE_TYPE_PNG: - return "PNG"; - break; - case IMAGE_TYPE_JPG: - return "JPEG"; - break; - case IMAGE_TYPE_GIF: - return "GIF"; - break; - case IMAGE_TYPE_BMP: - return "BMP"; - break; - default: - return "unknown"; - } - - return "unknown"; -} diff --git a/src/images.h b/src/images.h deleted file mode 100644 index c43941ebc..000000000 --- a/src/images.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef IMAGES_H_ -#define IMAGES_H_ - -#include "config.h" -#include "main.h" - -enum known_image_types { - IMAGE_TYPE_PNG, - IMAGE_TYPE_JPG, - IMAGE_TYPE_GIF, - IMAGE_TYPE_BMP, - IMAGE_TYPE_UNKNOWN = 9000 -}; - -struct rspamd_image { - enum known_image_types type; - GByteArray *data; - guint32 width; - guint32 height; - const gchar *filename; -}; - -/* - * Process images from a worker task - */ -void process_images (struct rspamd_task *task); - -/* - * Get textual representation of an image's type - */ -const gchar *image_type_str (enum known_image_types type); - -#endif /* IMAGES_H_ */ diff --git a/src/libmime/CMakeLists.txt b/src/libmime/CMakeLists.txt new file mode 100644 index 000000000..303b7a088 --- /dev/null +++ b/src/libmime/CMakeLists.txt @@ -0,0 +1,29 @@ +# Librspamd mime +SET(LIBRSPAMDMIMESRC + expressions.c + filter.c + images.c + message.c + protocol.c + smtp_utils.c + smtp_proto.c + worker_util.c) + +# Librspamdmime +ADD_LIBRARY(rspamd-mime ${LINK_TYPE} ${LIBRSPAMDMIMESRC}) +IF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES VERSION ${RSPAMD_VERSION}) +ENDIF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES LINKER_LANGUAGE C) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB") +TARGET_LINK_LIBRARIES(rspamd-mime rspamd-server) +TARGET_LINK_LIBRARIES(rspamd-mime rspamd-util) +IF(CMAKE_COMPILER_IS_GNUCC) +SET_TARGET_PROPERTIES(rspamd-mime PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing") +ENDIF(CMAKE_COMPILER_IS_GNUCC) + +IF(NO_SHARED MATCHES "OFF") + INSTALL(TARGETS rspamd-mime + LIBRARY DESTINATION ${LIBDIR} + PUBLIC_HEADER DESTINATION ${INCLUDEDIR}) +ENDIF(NO_SHARED MATCHES "OFF") \ No newline at end of file diff --git a/src/libmime/expressions.c b/src/libmime/expressions.c new file mode 100644 index 000000000..5d19626bb --- /dev/null +++ b/src/libmime/expressions.c @@ -0,0 +1,1452 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "cfg_file.h" +#include "main.h" +#include "message.h" +#include "fuzzy.h" +#include "expressions.h" +#include "html.h" +#include "lua/lua_common.h" +#include "diff.h" + +gboolean rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_header_exists (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_parts_distance (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_only_html_part (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_is_recipients_sorted (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_compare_transfer_encoding (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_is_html_balanced (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_html_tag (struct rspamd_task *task, GList * args, void *unused); +gboolean rspamd_has_fake_html (struct rspamd_task *task, GList * args, void *unused); + +/* + * List of internal functions of rspamd + * Sorted by name to use bsearch + */ +static struct _fl { + const gchar *name; + rspamd_internal_func_t func; + void *user_data; +} rspamd_functions_list[] = { + {"compare_encoding", rspamd_compare_encoding, NULL}, + {"compare_parts_distance", rspamd_parts_distance, NULL}, + {"compare_recipients_distance", rspamd_recipients_distance, NULL}, + {"compare_transfer_encoding", rspamd_compare_transfer_encoding, NULL}, + {"has_fake_html", rspamd_has_fake_html, NULL}, + {"has_html_tag", rspamd_has_html_tag, NULL}, + {"has_only_html_part", rspamd_has_only_html_part, NULL}, + {"header_exists", rspamd_header_exists, NULL}, + {"is_html_balanced", rspamd_is_html_balanced, NULL}, + {"is_recipients_sorted", rspamd_is_recipients_sorted, NULL} +}; + +static struct _fl *list_ptr = &rspamd_functions_list[0]; +static guint32 functions_number = sizeof (rspamd_functions_list) / sizeof (struct _fl); +static gboolean list_allocated = FALSE; + +/* Bsearch routine */ +static gint +fl_cmp (const void *s1, const void *s2) +{ + struct _fl *fl1 = (struct _fl *)s1; + struct _fl *fl2 = (struct _fl *)s2; + return strcmp (fl1->name, fl2->name); +} + +/* Cache for regular expressions that are used in functions */ +void * +re_cache_check (const gchar *line, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache == NULL) { + re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); + return NULL; + } + return g_hash_table_lookup (re_cache, line); +} + +void +re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache == NULL) { + re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_set_variable (pool, "re_cache", re_cache, (rspamd_mempool_destruct_t)g_hash_table_destroy); + } + + g_hash_table_insert (re_cache, (gpointer)line, pointer); +} + +void +re_cache_del (const gchar *line, rspamd_mempool_t *pool) +{ + GHashTable *re_cache; + + re_cache = rspamd_mempool_get_variable (pool, "re_cache"); + + if (re_cache != NULL) { + g_hash_table_remove (re_cache, line); + } + +} + +/* + * Functions for parsing expressions + */ +struct expression_stack { + gchar op; + struct expression_stack *next; +}; + +/* + * Push operand or operator to stack + */ +static struct expression_stack * +push_expression_stack (rspamd_mempool_t * pool, struct expression_stack *head, gchar op) +{ + struct expression_stack *new; + new = rspamd_mempool_alloc (pool, sizeof (struct expression_stack)); + new->op = op; + new->next = head; + return new; +} + +/* + * Delete symbol from stack, return pointer to operand or operator (casted to void* ) + */ +static gchar +delete_expression_stack (struct expression_stack **head) +{ + struct expression_stack *cur; + gchar res; + + if (*head == NULL) + return 0; + + cur = *head; + res = cur->op; + + *head = cur->next; + return res; +} + +/* + * Return operation priority + */ +static gint +logic_priority (gchar a) +{ + switch (a) { + case '!': + return 3; + case '|': + case '&': + return 2; + case '(': + return 1; + default: + return 0; + } +} + +/* + * Return FALSE if symbol is not operation symbol (operand) + * Return TRUE if symbol is operation symbol + */ +static gboolean +is_operation_symbol (gchar *a) +{ + switch (*a) { + case '!': + case '&': + case '|': + case '(': + case ')': + return TRUE; + case 'O': + case 'o': + if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0&& g_ascii_isspace (a[2])) { + return TRUE; + } + break; + case 'A': + case 'a': + if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0&& g_ascii_isspace (a[3])) { + return TRUE; + } + break; + case 'N': + case 'n': + if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0 && g_ascii_isspace (a[3])) { + return TRUE; + } + break; + } + + return FALSE; +} + +/* Return character representation of operation */ +static gchar +op_to_char (gchar *a, gchar **next) +{ + switch (*a) { + case '!': + case '&': + case '|': + case '(': + case ')': + *next = a + 1; + return *a; + case 'O': + case 'o': + if (g_ascii_strncasecmp (a, "or", sizeof ("or") - 1) == 0) { + *next = a + sizeof ("or") - 1; + return '|'; + } + break; + case 'A': + case 'a': + if (g_ascii_strncasecmp (a, "and", sizeof ("and") - 1) == 0) { + *next = a + sizeof ("and") - 1; + return '&'; + } + break; + case 'N': + case 'n': + if (g_ascii_strncasecmp (a, "not", sizeof ("not") - 1) == 0) { + *next = a + sizeof ("not") - 1; + return '!'; + } + break; + } + + return '\0'; +} + +/* + * Return TRUE if symbol can be regexp flag + */ +static gboolean +is_regexp_flag (gchar a) +{ + switch (a) { + case 'i': + case 'm': + case 'x': + case 's': + case 'u': + case 'o': + case 'r': + case 'H': + case 'M': + case 'P': + case 'U': + case 'X': + case 'T': + case 'S': + return TRUE; + default: + return FALSE; + } +} + +static void +insert_expression (rspamd_mempool_t * pool, struct expression **head, gint type, gchar op, void *operand, const gchar *orig) +{ + struct expression *new, *cur; + + new = rspamd_mempool_alloc (pool, sizeof (struct expression)); + new->type = type; + new->orig = orig; + if (new->type != EXPR_OPERATION) { + new->content.operand = operand; + } + else { + new->content.operation = op; + } + new->next = NULL; + + if (!*head) { + *head = new; + } + else { + cur = *head; + while (cur->next) { + cur = cur->next; + } + cur->next = new; + } +} + +static struct expression * +maybe_parse_expression (rspamd_mempool_t * pool, gchar *line) +{ + struct expression *expr; + gchar *p = line; + + while (*p) { + if (is_operation_symbol (p)) { + return parse_expression (pool, line); + } + p++; + } + + expr = rspamd_mempool_alloc (pool, sizeof (struct expression)); + expr->type = EXPR_STR; + expr->content.operand = rspamd_mempool_strdup (pool, line); + expr->next = NULL; + + return expr; +} + +/* + * Make inverse polish record for specified expression + * Memory is allocated from given pool + */ +struct expression * +parse_expression (rspamd_mempool_t * pool, gchar *line) +{ + struct expression *expr = NULL; + struct expression_stack *stack = NULL; + struct expression_function *func = NULL; + struct expression *arg; + GQueue *function_stack; + gchar *p, *c, *str, op, newop, *copy, *next; + gboolean in_regexp = FALSE; + gint brackets = 0; + + enum { + SKIP_SPACES, + READ_OPERATOR, + READ_REGEXP, + READ_REGEXP_FLAGS, + READ_FUNCTION, + READ_FUNCTION_ARGUMENT, + } state = SKIP_SPACES; + + if (line == NULL || pool == NULL) { + return NULL; + } + + msg_debug ("parsing expression {{ %s }}", line); + + function_stack = g_queue_new (); + copy = rspamd_mempool_strdup (pool, line); + p = line; + c = p; + while (*p) { + switch (state) { + case SKIP_SPACES: + if (!g_ascii_isspace (*p)) { + if (is_operation_symbol (p)) { + state = READ_OPERATOR; + } + else if (*p == '/') { + c = ++p; + state = READ_REGEXP; + } + else { + c = p; + state = READ_FUNCTION; + } + } + else { + p++; + } + break; + case READ_OPERATOR: + if (*p == ')') { + if (stack == NULL) { + return NULL; + } + /* Pop all operators from stack to nearest '(' or to head */ + while (stack && stack->op != '(') { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + if (stack) { + op = delete_expression_stack (&stack); + } + } + else if (*p == '(') { + /* Push it to stack */ + stack = push_expression_stack (pool, stack, *p); + } + else { + if (stack == NULL) { + newop = op_to_char (p, &next); + if (newop != '\0') { + stack = push_expression_stack (pool, stack, newop); + p = next; + state = SKIP_SPACES; + continue; + } + } + /* Check priority of logic operation */ + else { + newop = op_to_char (p, &next); + if (newop != '\0') { + if (logic_priority (stack->op) < logic_priority (newop)) { + stack = push_expression_stack (pool, stack, newop); + } + else { + /* Pop all operations that have higher priority than this one */ + while ((stack != NULL) && (logic_priority (stack->op) >= logic_priority (newop))) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + stack = push_expression_stack (pool, stack, newop); + } + } + p = next; + state = SKIP_SPACES; + continue; + } + } + p++; + state = SKIP_SPACES; + break; + + case READ_REGEXP: + if (*p == '/' && *(p - 1) != '\\') { + if (*(p + 1)) { + p++; + } + state = READ_REGEXP_FLAGS; + } + else { + p++; + } + break; + + case READ_REGEXP_FLAGS: + if (!is_regexp_flag (*p) || *(p + 1) == '\0') { + if (c != p) { + if ((is_regexp_flag (*p) || *p == '/') && *(p + 1) == '\0') { + p++; + } + str = rspamd_mempool_alloc (pool, p - c + 2); + rspamd_strlcpy (str, c - 1, (p - c + 2)); + g_strstrip (str); + msg_debug ("found regexp: %s", str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_REGEXP, 0, str, copy); + } + } + c = p; + state = SKIP_SPACES; + } + else { + p++; + } + break; + + case READ_FUNCTION: + if (*p == '/') { + /* In fact it is regexp */ + state = READ_REGEXP; + c++; + p++; + } + else if (*p == '(') { + func = rspamd_mempool_alloc (pool, sizeof (struct expression_function)); + func->name = rspamd_mempool_alloc (pool, p - c + 1); + func->args = NULL; + rspamd_strlcpy (func->name, c, (p - c + 1)); + g_strstrip (func->name); + state = READ_FUNCTION_ARGUMENT; + g_queue_push_tail (function_stack, func); + insert_expression (pool, &expr, EXPR_FUNCTION, 0, func, copy); + c = ++p; + } + else if (is_operation_symbol (p)) { + /* In fact it is not function, but symbol */ + if (c != p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_STR, 0, str, copy); + } + } + state = READ_OPERATOR; + } + else if (*(p + 1) == '\0') { + /* In fact it is not function, but symbol */ + p++; + if (c != p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + if (strlen (str) > 0) { + insert_expression (pool, &expr, EXPR_STR, 0, str, copy); + } + } + state = SKIP_SPACES; + } + else { + p++; + } + break; + + case READ_FUNCTION_ARGUMENT: + if (*p == '/' && !in_regexp) { + in_regexp = TRUE; + p++; + } + if (!in_regexp) { + /* Append argument to list */ + if (*p == ',' || (*p == ')' && brackets == 0)) { + arg = NULL; + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, (p - c + 1)); + g_strstrip (str); + /* Recursive call */ + arg = maybe_parse_expression (pool, str); + func->args = g_list_append (func->args, arg); + /* Pop function */ + if (*p == ')') { + /* Last function in chain, goto skipping spaces state */ + func = g_queue_pop_tail (function_stack); + if (g_queue_get_length (function_stack) == 0) { + state = SKIP_SPACES; + } + } + c = p + 1; + } + else if (*p == '(') { + brackets++; + } + else if (*p == ')') { + brackets--; + } + } + else if (*p == '/' && *(p - 1) != '\\') { + in_regexp = FALSE; + } + p++; + break; + } + } + + g_queue_free (function_stack); + if (state != SKIP_SPACES) { + /* In fact we got bad expression */ + msg_warn ("expression \"%s\" is invalid", line); + return NULL; + } + /* Pop everything from stack */ + while (stack != NULL) { + op = delete_expression_stack (&stack); + if (op != '(') { + insert_expression (pool, &expr, EXPR_OPERATION, op, NULL, copy); + } + } + + return expr; +} + +/* + * Rspamd regexp utility functions + */ +struct rspamd_regexp * +parse_regexp (rspamd_mempool_t * pool, const gchar *line, gboolean raw_mode) +{ + const gchar *begin, *end, *p, *src, *start; + gchar *dbegin, *dend; + struct rspamd_regexp *result, *check; + gint regexp_flags = G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE; + GError *err = NULL; + + if (line == NULL) { + msg_err ("cannot parse NULL line"); + return NULL; + } + + src = line; + result = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_regexp)); + /* Skip whitespaces */ + while (g_ascii_isspace (*line)) { + line++; + } + if (*line == '\0') { + msg_warn ("got empty regexp"); + return NULL; + } + start = line; + /* First try to find header name */ + begin = strchr (line, '/'); + if (begin != NULL) { + p = begin; + end = NULL; + while (p != line) { + if (*p == '=') { + end = p; + break; + } + p --; + } + if (end) { + result->header = rspamd_mempool_alloc (pool, end - line + 1); + rspamd_strlcpy (result->header, line, end - line + 1); + result->type = REGEXP_HEADER; + line = end; + } + } + else { + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + line = start; + } + /* Find begin of regexp */ + while (*line && *line != '/') { + line++; + } + if (*line != '\0') { + begin = line + 1; + } + else if (result->header == NULL) { + /* Assume that line without // is just a header name */ + result->header = rspamd_mempool_strdup (pool, line); + result->type = REGEXP_HEADER; + return result; + } + else { + /* We got header name earlier but have not found // expression, so it is invalid regexp */ + msg_warn ("got no header name (eg. header=) but without corresponding regexp, %s", src); + return NULL; + } + /* Find end */ + end = begin; + while (*end && (*end != '/' || *(end - 1) == '\\')) { + end++; + } + if (end == begin || *end != '/') { + msg_warn ("no trailing / in regexp %s", src); + return NULL; + } + /* Parse flags */ + p = end + 1; + while (p != NULL) { + switch (*p) { + case 'i': + regexp_flags |= G_REGEX_CASELESS; + p++; + break; + case 'm': + regexp_flags |= G_REGEX_MULTILINE; + p++; + break; + case 's': + regexp_flags |= G_REGEX_DOTALL; + p++; + break; + case 'x': + regexp_flags |= G_REGEX_EXTENDED; + p++; + break; + case 'u': + regexp_flags |= G_REGEX_UNGREEDY; + p++; + break; + case 'o': + regexp_flags |= G_REGEX_OPTIMIZE; + p++; + break; + case 'r': + regexp_flags |= G_REGEX_RAW; + result->is_raw = TRUE; + p++; + break; + /* Type flags */ + case 'H': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_HEADER; + } + p++; + break; + case 'M': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MESSAGE; + } + p++; + break; + case 'P': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_MIME; + } + p++; + break; + case 'U': + if (result->type == REGEXP_NONE) { + result->type = REGEXP_URL; + } + p++; + break; + case 'X': + if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) { + result->type = REGEXP_RAW_HEADER; + } + p++; + break; + case 'T': + result->is_test = TRUE; + p ++; + break; + case 'S': + result->is_strong = TRUE; + p ++; + break; + /* Stop flags parsing */ + default: + p = NULL; + break; + } + } + + result->regexp_text = rspamd_mempool_strdup (pool, start); + dbegin = result->regexp_text + (begin - start); + dend = result->regexp_text + (end - start); + *dend = '\0'; + + if (raw_mode) { + regexp_flags |= G_REGEX_RAW; + } + + /* Avoid multiply regexp structures for similar regexps */ + if ((check = (struct rspamd_regexp *)re_cache_check (result->regexp_text, pool)) != NULL) { + /* Additional check for headers */ + if (result->type == REGEXP_HEADER || result->type == REGEXP_RAW_HEADER) { + if (result->header && check->header) { + if (strcmp (result->header, check->header) == 0) { + return check; + } + } + } + else { + return check; + } + } + result->regexp = g_regex_new (dbegin, regexp_flags, 0, &err); + if ((regexp_flags & G_REGEX_RAW) != 0) { + result->raw_regexp = result->regexp; + } + else { + result->raw_regexp = g_regex_new (dbegin, regexp_flags | G_REGEX_RAW, 0, &err); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->raw_regexp); + } + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_regex_unref, (void *)result->regexp); + + *dend = '/'; + + if (result->regexp == NULL || err != NULL) { + msg_warn ("could not read regexp: %s while reading regexp %s", err->message, src); + return NULL; + } + + if (result->raw_regexp == NULL || err != NULL) { + msg_warn ("could not read raw regexp: %s while reading regexp %s", err->message, src); + return NULL; + } + + /* Add to cache for further usage */ + re_cache_add (result->regexp_text, result, pool); + return result; +} + +gboolean +call_expression_function (struct expression_function * func, struct rspamd_task * task, lua_State *L) +{ + struct _fl *selected, key; + + key.name = func->name; + + selected = bsearch (&key, list_ptr, functions_number, sizeof (struct _fl), fl_cmp); + if (selected == NULL) { + /* Try to check lua function */ + return FALSE; + } + + return selected->func (task, func->args, selected->user_data); +} + +struct expression_argument * +get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string) +{ + GQueue *stack; + gsize cur, op1, op2; + struct expression_argument *res; + struct expression *it; + + if (expr == NULL) { + msg_warn ("NULL expression passed"); + return NULL; + } + if (expr->next == NULL) { + res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); + if (expr->type == EXPR_REGEXP || expr->type == EXPR_STR || expr->type == EXPR_REGEXP_PARSED) { + res->type = EXPRESSION_ARGUMENT_NORMAL; + res->data = expr->content.operand; + } + else if (expr->type == EXPR_FUNCTION && !want_string) { + res->type = EXPRESSION_ARGUMENT_BOOL; + cur = call_expression_function (expr->content.operand, task, NULL); + res->data = GSIZE_TO_POINTER (cur); + } + else { + msg_warn ("cannot parse argument: it contains operator or bool expression that is not wanted"); + return NULL; + } + return res; + } + else if (!want_string) { + res = rspamd_mempool_alloc (task->task_pool, sizeof (struct expression_argument)); + res->type = EXPRESSION_ARGUMENT_BOOL; + stack = g_queue_new (); + it = expr; + + while (it) { + if (it->type == EXPR_REGEXP || it->type == EXPR_REGEXP_PARSED || it->type == EXPR_STR) { + g_queue_free (stack); + res->type = EXPRESSION_ARGUMENT_EXPR; + res->data = expr; + return res; + } + else if (it->type == EXPR_FUNCTION) { + cur = (gsize) call_expression_function ((struct expression_function *)it->content.operand, task, NULL); + debug_task ("function %s returned %s", ((struct expression_function *)it->content.operand)->name, cur ? "true" : "false"); + } + else if (it->type == EXPR_OPERATION) { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + debug_task ("invalid expression"); + g_queue_free (stack); + return NULL; + } + switch (it->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + break; + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + break; + default: + it = it->next; + continue; + } + } + if (it) { + it = it->next; + } + } + if (!g_queue_is_empty (stack)) { + res->data = g_queue_pop_head (stack); + } + else { + res->data = GSIZE_TO_POINTER (FALSE); + } + + return res; + } + + msg_warn ("invalid expression argument"); + + return NULL; +} + +void +register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data) +{ + static struct _fl *new; + + functions_number++; + + new = g_new (struct _fl, functions_number); + memcpy (new, list_ptr, (functions_number - 1) * sizeof (struct _fl)); + if (list_allocated) { + g_free (list_ptr); + } + + list_allocated = TRUE; + new[functions_number - 1].name = name; + new[functions_number - 1].func = func; + new[functions_number - 1].user_data = user_data; + qsort (new, functions_number, sizeof (struct _fl), fl_cmp); + list_ptr = new; +} + +gboolean +rspamd_compare_encoding (struct rspamd_task *task, GList * args, void *unused) +{ + struct expression_argument *arg; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + if (arg->type == EXPRESSION_ARGUMENT_BOOL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + /* XXX: really write this function */ + return TRUE; +} + +gboolean +rspamd_header_exists (struct rspamd_task * task, GList * args, void *unused) +{ + struct expression_argument *arg; + GList *headerlist; + + if (args == NULL || task == NULL) { + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + if (!arg || arg->type == EXPRESSION_ARGUMENT_BOOL) { + msg_warn ("invalid argument to function is passed"); + return FALSE; + } + + debug_task ("try to get header %s", (gchar *)arg->data); + headerlist = message_get_header (task->task_pool, task->message, (gchar *)arg->data, FALSE); + if (headerlist) { + g_list_free (headerlist); + return TRUE; + } + return FALSE; +} + +/* + * This function is designed to find difference between text/html and text/plain parts + * It takes one argument: difference threshold, if we have two text parts, compare + * its hashes and check for threshold, if value is greater than threshold, return TRUE + * and return FALSE otherwise. + */ +gboolean +rspamd_parts_distance (struct rspamd_task * task, GList * args, void *unused) +{ + gint threshold, threshold2 = -1, diff; + struct mime_text_part *p1, *p2; + GList *cur; + struct expression_argument *arg; + GMimeObject *parent; + const GMimeContentType *ct; + gint *pdiff; + + if (args == NULL) { + debug_task ("no threshold is specified, assume it 100"); + threshold = 100; + } + else { + errno = 0; + arg = get_function_arg (args->data, task, TRUE); + threshold = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", assume it 100", (gchar *)args->data); + threshold = 100; + } + if (args->next) { + arg = get_function_arg (args->next->data, task, TRUE); + errno = 0; + threshold2 = strtoul ((gchar *)arg->data, NULL, 10); + if (errno != 0) { + msg_info ("bad numeric value for threshold \"%s\", ignore it", (gchar *)arg->data); + threshold2 = -1; + } + } + } + + if ((pdiff = rspamd_mempool_get_variable (task->task_pool, "parts_distance")) != NULL) { + diff = *pdiff; + if (diff != -1) { + if (threshold2 > 0) { + if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + return FALSE; + } + else { + return FALSE; + } + } + + if (g_list_length (task->text_parts) == 2) { + cur = g_list_first (task->text_parts); + p1 = cur->data; + cur = g_list_next (cur); + pdiff = rspamd_mempool_alloc (task->task_pool, sizeof (gint)); + *pdiff = -1; + + if (cur == NULL) { + msg_info ("bad parts list"); + return FALSE; + } + p2 = cur->data; + /* First of all check parent object */ + if (p1->parent && p1->parent == p2->parent) { + parent = p1->parent; + ct = g_mime_object_get_content_type (parent); +#ifndef GMIME24 + if (ct == NULL || ! g_mime_content_type_is_type (ct, "multipart", "alternative")) { +#else + if (ct == NULL || ! g_mime_content_type_is_type ((GMimeContentType *)ct, "multipart", "alternative")) { +#endif + debug_task ("two parts are not belong to multipart/alternative container, skip check"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + } + else { + debug_task ("message contains two parts but they are in different multi-parts"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + if (!p1->is_empty && !p2->is_empty) { + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance_normalized (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + debug_task ("got likeliness between parts of %d%%, threshold is %d%%", diff, threshold); + *pdiff = diff; + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + if (threshold2 > 0) { + if (diff >= MIN (threshold, threshold2) && diff < MAX (threshold, threshold2)) { + return TRUE; + } + } + else { + if (diff <= threshold) { + return TRUE; + } + } + } + else if ((p1->is_empty && !p2->is_empty) || (!p1->is_empty && p2->is_empty)) { + /* Empty and non empty parts are different */ + *pdiff = 0; + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return TRUE; + } + } + else { + debug_task ("message has too many text parts, so do not try to compare them with each other"); + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; + } + + rspamd_mempool_set_variable (task->task_pool, "parts_distance", pdiff, NULL); + return FALSE; +} + +struct addr_list { + const gchar *name; + const gchar *addr; +}; + +#define COMPARE_RCPT_LEN 3 +#define MIN_RCPT_TO_COMPARE 7 + +gboolean +rspamd_recipients_distance (struct rspamd_task *task, GList * args, void *unused) +{ + struct expression_argument *arg; + InternetAddressList *cur; + InternetAddress *addr; + double threshold; + struct addr_list *ar; + gchar *c; + gint num, i, j, hits = 0, total = 0; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + errno = 0; + threshold = strtod ((gchar *)arg->data, NULL); + if (errno != 0) { + msg_warn ("invalid numeric value '%s': %s", (gchar *)arg->data, strerror (errno)); + return FALSE; + } + + if (!task->rcpts) { + return FALSE; + } + num = internet_address_list_length (task->rcpts); + if (num < MIN_RCPT_TO_COMPARE) { + return FALSE; + } + ar = rspamd_mempool_alloc0 (task->task_pool, num * sizeof (struct addr_list)); + + /* Fill array */ + cur = task->rcpts; +#ifdef GMIME24 + for (i = 0; i < num; i ++) { + addr = internet_address_list_get_address (cur, i); + ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_name (addr)); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + *c = '\0'; + ar[i].addr = c + 1; + } + } +#else + i = 0; + while (cur) { + addr = internet_address_list_get_address (cur); + if (addr && internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + ar[i].name = rspamd_mempool_strdup (task->task_pool, internet_address_get_addr (addr)); + if (ar[i].name != NULL && (c = strchr (ar[i].name, '@')) != NULL) { + *c = '\0'; + ar[i].addr = c + 1; + } + cur = internet_address_list_next (cur); + i++; + } + else { + cur = internet_address_list_next (cur); + } + } +#endif + + /* Cycle all elements in array */ + for (i = 0; i < num; i++) { + for (j = i + 1; j < num; j++) { + if (ar[i].name && ar[j].name && g_ascii_strncasecmp (ar[i].name, ar[j].name, COMPARE_RCPT_LEN) == 0) { + /* Common name part */ + hits++; + } + else if (ar[i].addr && ar[j].addr && g_ascii_strcasecmp (ar[i].addr, ar[j].addr) == 0) { + /* Common address part, but different name */ + hits++; + } + total++; + } + } + + if ((double)(hits * num / 2.) / (double)total >= threshold) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_has_only_html_part (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (p->is_html) { + res = TRUE; + } + else { + res = FALSE; + break; + } + cur = g_list_next (cur); + } + + return res; +} + +static gboolean +is_recipient_list_sorted (const InternetAddressList * ia) +{ + const InternetAddressList *cur; + InternetAddress *addr; + gboolean res = TRUE; + struct addr_list current = { NULL, NULL }, previous = { + NULL, NULL}; +#ifdef GMIME24 + gint num, i; +#endif + + /* Do not check to short address lists */ + if (internet_address_list_length ((InternetAddressList *)ia) < MIN_RCPT_TO_COMPARE) { + return FALSE; + } +#ifdef GMIME24 + num = internet_address_list_length ((InternetAddressList *)ia); + cur = ia; + for (i = 0; i < num; i ++) { + addr = internet_address_list_get_address ((InternetAddressList *)cur, i); + current.addr = (gchar *)internet_address_get_name (addr); + if (previous.addr != NULL) { + if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } +#else + cur = ia; + while (cur) { + addr = internet_address_list_get_address (cur); + if (internet_address_get_type (addr) == INTERNET_ADDRESS_NAME) { + current.addr = internet_address_get_addr (addr); + if (previous.addr != NULL) { + if (current.addr && g_ascii_strcasecmp (current.addr, previous.addr) < 0) { + res = FALSE; + break; + } + } + previous.addr = current.addr; + } + cur = internet_address_list_next (cur); + } +#endif + + return res; +} + +gboolean +rspamd_is_recipients_sorted (struct rspamd_task * task, GList * args, void *unused) +{ + /* Check all types of addresses */ + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_TO)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_BCC)) == TRUE) { + return TRUE; + } + if (is_recipient_list_sorted (g_mime_message_get_recipients (task->message, GMIME_RECIPIENT_TYPE_CC)) == TRUE) { + return TRUE; + } + + return FALSE; +} + +gboolean +rspamd_compare_transfer_encoding (struct rspamd_task * task, GList * args, void *unused) +{ + GMimeObject *part; +#ifndef GMIME24 + GMimePartEncodingType enc_req, part_enc; +#else + GMimeContentEncoding enc_req, part_enc; +#endif + struct expression_argument *arg; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); +#ifndef GMIME24 + enc_req = g_mime_part_encoding_from_string (arg->data); + if (enc_req == GMIME_PART_ENCODING_DEFAULT) { +#else + enc_req = g_mime_content_encoding_from_string (arg->data); + if (enc_req == GMIME_CONTENT_ENCODING_DEFAULT) { +#endif + msg_warn ("bad encoding type: %s", (gchar *)arg->data); + return FALSE; + } + + part = g_mime_message_get_mime_part (task->message); + if (part) { + if (GMIME_IS_PART (part)) { +#ifndef GMIME24 + part_enc = g_mime_part_get_encoding (GMIME_PART (part)); + if (part_enc == GMIME_PART_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_PART_ENCODING_7BIT; + } +#else + part_enc = g_mime_part_get_content_encoding (GMIME_PART (part)); + if (part_enc == GMIME_CONTENT_ENCODING_DEFAULT) { + /* Assume 7bit as default transfer encoding */ + part_enc = GMIME_CONTENT_ENCODING_7BIT; + } +#endif + + + debug_task ("got encoding in part: %d and compare with %d", (gint)part_enc, (gint)enc_req); +#ifndef GMIME24 + g_object_unref (part); +#endif + + return part_enc == enc_req; + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + + return FALSE; +} + +gboolean +rspamd_is_html_balanced (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = TRUE; + + cur = g_list_first (task->text_parts); + while (cur) { + p = cur->data; + if (!p->is_empty && p->is_html) { + if (p->is_balanced) { + res = TRUE; + } + else { + res = FALSE; + break; + } + } + cur = g_list_next (cur); + } + + return res; + +} + +struct html_callback_data { + struct html_tag *tag; + gboolean *res; +}; + +static gboolean +search_html_node_callback (GNode * node, gpointer data) +{ + struct html_callback_data *cd = data; + struct html_node *nd; + + nd = node->data; + if (nd) { + if (nd->tag == cd->tag) { + *cd->res = TRUE; + return TRUE; + } + } + + return FALSE; +} + +gboolean +rspamd_has_html_tag (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + struct expression_argument *arg; + struct html_tag *tag; + gboolean res = FALSE; + struct html_callback_data cd; + + if (args == NULL) { + msg_warn ("no parameters to function"); + return FALSE; + } + + arg = get_function_arg (args->data, task, TRUE); + tag = get_tag_by_name (arg->data); + if (tag == NULL) { + msg_warn ("unknown tag type passed as argument: %s", (gchar *)arg->data); + return FALSE; + } + + cur = g_list_first (task->text_parts); + cd.res = &res; + cd.tag = tag; + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes) { + g_node_traverse (p->html_nodes, G_PRE_ORDER, G_TRAVERSE_ALL, -1, search_html_node_callback, &cd); + } + cur = g_list_next (cur); + } + + return res; + +} + +gboolean +rspamd_has_fake_html (struct rspamd_task * task, GList * args, void *unused) +{ + struct mime_text_part *p; + GList *cur; + gboolean res = FALSE; + + cur = g_list_first (task->text_parts); + + while (cur && res == FALSE) { + p = cur->data; + if (!p->is_empty && p->is_html && p->html_nodes == NULL) { + res = TRUE; + } + cur = g_list_next (cur); + } + + return res; + +} + + +/* + * vi:ts=4 + */ diff --git a/src/libmime/expressions.h b/src/libmime/expressions.h new file mode 100644 index 000000000..954cc74f7 --- /dev/null +++ b/src/libmime/expressions.h @@ -0,0 +1,133 @@ +/** + * @file expressions.h + * Rspamd expressions API + */ + +#ifndef RSPAMD_EXPRESSIONS_H +#define RSPAMD_EXPRESSIONS_H + +#include "config.h" +#include + +struct rspamd_task; +struct rspamd_regexp; + +/** + * Rspamd expression function + */ +struct expression_function { + gchar *name; /**< name of function */ + GList *args; /**< its args */ +}; + +/** + * Function's argument + */ +struct expression_argument { + enum { + EXPRESSION_ARGUMENT_NORMAL, + EXPRESSION_ARGUMENT_BOOL, + EXPRESSION_ARGUMENT_EXPR, + } type; /**< type of argument (text or other function) */ + void *data; /**< pointer to its data */ +}; + +/** + * Logic expression + */ +struct expression { + enum { + EXPR_REGEXP, + EXPR_OPERATION, + EXPR_FUNCTION, + EXPR_STR, + EXPR_REGEXP_PARSED, + } type; /**< expression type */ + union { + void *operand; + gchar operation; + } content; /**< union for storing operand or operation code */ + const gchar *orig; /**< original line */ + struct expression *next; /**< chain link */ +}; + +typedef gboolean (*rspamd_internal_func_t)(struct rspamd_task *, GList *args, void *user_data); + +/** + * Parse regexp line to regexp structure + * @param pool memory pool to use + * @param line incoming line + * @return regexp structure or NULL in case of error + */ +struct rspamd_regexp* parse_regexp (rspamd_mempool_t *pool, const gchar *line, gboolean raw_mode); + +/** + * Parse composites line to composites structure (eg. "SYMBOL1&SYMBOL2|!SYMBOL3") + * @param pool memory pool to use + * @param line incoming line + * @return expression structure or NULL in case of error + */ +struct expression* parse_expression (rspamd_mempool_t *pool, gchar *line); + +/** + * Call specified fucntion and return boolean result + * @param func function to call + * @param task task object + * @param L lua specific state + * @return TRUE or FALSE depending on function result + */ +gboolean call_expression_function (struct expression_function *func, struct rspamd_task *task, lua_State *L); + +/** + * Register specified function to rspamd internal functions list + * @param name name of function + * @param func pointer to function + */ +void register_expression_function (const gchar *name, rspamd_internal_func_t func, void *user_data); + +/** + * Add regexp to regexp cache + * @param line symbolic representation + * @param pointer regexp data + */ +void re_cache_add (const gchar *line, void *pointer, rspamd_mempool_t *pool); + +/** + * Check regexp in cache + * @param line symbolic representation + * @return pointer to regexp data or NULL if regexp is not found + */ +void * re_cache_check (const gchar *line, rspamd_mempool_t *pool); + +/** + * Remove regexp from regexp cache + * @param line symbolic representation + */ +void re_cache_del (const gchar *line, rspamd_mempool_t *pool); + +/** + * Add regexp to regexp task cache + * @param task task object + * @param pointer regexp data + * @param result numeric result of this regexp + */ +void task_cache_add (struct rspamd_task *task, struct rspamd_regexp *re, gint32 result); + +/** + * Check regexp in cache + * @param task task object + * @param pointer regexp data + * @return numeric result if value exists or -1 if not + */ +gint32 task_cache_check (struct rspamd_task *task, struct rspamd_regexp *re); + +/** + * Parse and return a single function argument for a function (may recurse) + * @param expr expression structure that represents function's argument + * @param task task object + * @param want_string return NULL if argument is not a string + * @return expression argument structure or NULL if failed + */ +struct expression_argument *get_function_arg (struct expression *expr, struct rspamd_task *task, gboolean want_string); + +#endif diff --git a/src/libmime/filter.c b/src/libmime/filter.c new file mode 100644 index 000000000..cb0630d9d --- /dev/null +++ b/src/libmime/filter.c @@ -0,0 +1,1096 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "mem_pool.h" +#include "filter.h" +#include "main.h" +#include "message.h" +#include "cfg_file.h" +#include "util.h" +#include "expressions.h" +#include "settings.h" +#include "binlog.h" +#include "diff.h" +#include "classifiers/classifiers.h" +#include "tokenizers/tokenizers.h" + +#ifdef WITH_LUA +# include "lua/lua_common.h" +#endif + +#define COMMON_PART_FACTOR 95 + +#ifndef PARAM_H_HAS_BITSET +/* Bit map related macros. */ +#define NBBY 8 /* number of bits in a byte */ +#define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) +#define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) +#define isset(a,i) \ + (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) +#define isclr(a,i) \ + ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) +#endif +#define BITSPERBYTE (8*sizeof (gchar)) +#define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) + +static inline GQuark +filter_error_quark (void) +{ + return g_quark_from_static_string ("g-filter-error-quark"); +} + +static void +insert_metric_result (struct rspamd_task *task, struct metric *metric, const gchar *symbol, + double flag, GList * opts, gboolean single) +{ + struct metric_result *metric_res; + struct symbol *s; + gdouble *weight, w; + + metric_res = g_hash_table_lookup (task->results, metric->name); + + if (metric_res == NULL) { + /* Create new metric chain */ + metric_res = rspamd_mempool_alloc (task->task_pool, sizeof (struct metric_result)); + metric_res->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + metric_res->checked = FALSE; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_hash_table_unref, metric_res->symbols); + metric_res->metric = metric; + metric_res->grow_factor = 0; + metric_res->score = 0; + metric_res->domain_settings = NULL; + metric_res->user_settings = NULL; + apply_metric_settings (task, metric, metric_res); + g_hash_table_insert (task->results, (gpointer) metric->name, metric_res); + } + + weight = g_hash_table_lookup (metric->symbols, symbol); + if (weight == NULL) { + w = 0.0; + } + else { + w = (*weight) * flag; + } + + + /* Add metric score */ + if ((s = g_hash_table_lookup (metric_res->symbols, symbol)) != NULL) { + if (s->options && opts && opts != s->options) { + /* Append new options */ + s->options = g_list_concat (s->options, g_list_copy(opts)); + /* + * Note that there is no need to add new destructor of GList as elements of appended + * GList are used directly, so just free initial GList + */ + } + else if (opts) { + s->options = g_list_copy (opts); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); + } + if (!single) { + /* Handle grow factor */ + if (metric_res->grow_factor && w > 0) { + w *= metric_res->grow_factor; + metric_res->grow_factor *= metric->grow_factor; + } + s->score += w; + metric_res->score += w; + } + else { + if (fabs (s->score) < fabs (w)) { + /* Replace less weight with a bigger one */ + metric_res->score = metric_res->score - s->score + w; + s->score = w; + } + } + } + else { + s = rspamd_mempool_alloc (task->task_pool, sizeof (struct symbol)); + + /* Handle grow factor */ + if (metric_res->grow_factor && w > 0) { + w *= metric_res->grow_factor; + metric_res->grow_factor *= metric->grow_factor; + } + else if (w > 0) { + metric_res->grow_factor = metric->grow_factor; + } + + s->score = w; + s->name = symbol; + metric_res->score += w; + + if (opts) { + s->options = g_list_copy (opts); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_list_free, s->options); + } + else { + s->options = NULL; + } + + g_hash_table_insert (metric_res->symbols, (gpointer) symbol, s); + } + debug_task ("symbol %s, score %.2f, metric %s, factor: %f", symbol, s->score, metric->name, w); + +} + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) +static GStaticMutex result_mtx = G_STATIC_MUTEX_INIT; +#else +G_LOCK_DEFINE (result_mtx); +#endif + +static void +insert_result_common (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts, gboolean single) +{ + struct metric *metric; + struct cache_item *item; + GList *cur, *metric_list; + + /* Avoid concurrenting inserting of results */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_lock (&result_mtx); +#else + G_LOCK (result_mtx); +#endif + metric_list = g_hash_table_lookup (task->cfg->metrics_symbols, symbol); + if (metric_list) { + cur = metric_list; + + while (cur) { + metric = cur->data; + insert_metric_result (task, metric, symbol, flag, opts, single); + cur = g_list_next (cur); + } + } + else { + /* Insert symbol to default metric */ + insert_metric_result (task, task->cfg->default_metric, symbol, flag, opts, single); + } + + /* Process cache item */ + if (task->cfg->cache) { + item = g_hash_table_lookup (task->cfg->cache->items_by_symbol, symbol); + if (item != NULL) { + item->s->frequency++; + } + } + + if (opts != NULL) { + /* XXX: it is not wise to destroy them here */ + g_list_free (opts); + } +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif +} + +/* Insert result that may be increased on next insertions */ +void +insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts) +{ + insert_result_common (task, symbol, flag, opts, task->cfg->one_shot_mode); +} + +/* Insert result as a single option */ +void +insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList * opts) +{ + insert_result_common (task, symbol, flag, opts, TRUE); +} + +/* Return true if metric has score that is more than spam score for it */ +static gboolean +check_metric_is_spam (struct rspamd_task *task, struct metric *metric) +{ + struct metric_result *res; + double ms, rs; + + /* Avoid concurrency while checking results */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_lock (&result_mtx); +#else + G_LOCK (result_mtx); +#endif + res = g_hash_table_lookup (task->results, metric->name); + if (res) { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif + if (!check_metric_settings (res, &ms, &rs)) { + ms = metric->actions[METRIC_ACTION_REJECT].score; + } + return (ms > 0 && res->score >= ms); + } + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + g_static_mutex_unlock (&result_mtx); +#else + G_UNLOCK (result_mtx); +#endif + + return FALSE; +} + +gint +process_filters (struct rspamd_task *task) +{ + GList *cur; + struct metric *metric; + gpointer item = NULL; + + /* Process metrics symbols */ + while (call_symbol_callback (task, task->cfg->cache, &item)) { + /* Check reject actions */ + cur = task->cfg->metrics_list; + while (cur) { + metric = cur->data; + if (!task->pass_all_filters && + metric->actions[METRIC_ACTION_REJECT].score > 0 && + check_metric_is_spam (task, metric)) { + task->state = WRITE_REPLY; + return 1; + } + cur = g_list_next (cur); + } + } + + task->state = WAIT_FILTER; + + return 1; +} + + +struct composites_data { + struct rspamd_task *task; + struct metric_result *metric_res; + GTree *symbols_to_remove; + guint8 *checked; +}; + +struct symbol_remove_data { + struct symbol *ms; + gboolean remove_weight; + gboolean remove_symbol; +}; + +static gint +remove_compare_data (gconstpointer a, gconstpointer b) +{ + const gchar *ca = a, *cb = b; + + return strcmp (ca, cb); +} + +static void +composites_foreach_callback (gpointer key, gpointer value, void *data) +{ + struct composites_data *cd = (struct composites_data *)data; + struct rspamd_composite *composite = value, *ncomp; + struct expression *expr; + GQueue *stack; + GList *symbols = NULL, *s; + gsize cur, op1, op2; + gchar logbuf[256], *sym, *check_sym; + gint r; + struct symbol *ms; + struct symbol_remove_data *rd; + + + expr = composite->expr; + if (isset (cd->checked, composite->id)) { + /* Symbol was already checked */ + return; + } + + stack = g_queue_new (); + + while (expr) { + if (expr->type == EXPR_STR) { + /* Find corresponding symbol */ + sym = expr->content.operand; + if (*sym == '~' || *sym == '-') { + sym ++; + } + if (g_hash_table_lookup (cd->metric_res->symbols, sym) == NULL) { + cur = 0; + if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, sym)) != NULL) { + /* Set checked for this symbol to avoid cyclic references */ + if (isclr (cd->checked, ncomp->id)) { + setbit (cd->checked, composite->id); + composites_foreach_callback (sym, ncomp, cd); + if (g_hash_table_lookup (cd->metric_res->symbols, sym) != NULL) { + cur = 1; + } + } + } + } + else { + cur = 1; + symbols = g_list_prepend (symbols, expr->content.operand); + } + g_queue_push_head (stack, GSIZE_TO_POINTER (cur)); + } + else { + if (g_queue_is_empty (stack)) { + /* Queue has no operands for operation, exiting */ + g_list_free (symbols); + g_queue_free (stack); + setbit (cd->checked, composite->id); + return; + } + switch (expr->content.operation) { + case '!': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op1 = !op1; + g_queue_push_head (stack, GSIZE_TO_POINTER (op1)); + break; + case '&': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 && op2)); + break; + case '|': + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + op2 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + g_queue_push_head (stack, GSIZE_TO_POINTER (op1 || op2)); + break; + default: + expr = expr->next; + continue; + } + } + expr = expr->next; + } + if (!g_queue_is_empty (stack)) { + op1 = GPOINTER_TO_SIZE (g_queue_pop_head (stack)); + if (op1) { + /* Remove all symbols that are in composite symbol */ + s = g_list_first (symbols); + r = rspamd_snprintf (logbuf, sizeof (logbuf), "<%s>, insert symbol %s instead of symbols: ", cd->task->message_id, key); + while (s) { + sym = s->data; + if (*sym == '~' || *sym == '-') { + check_sym = sym + 1; + } + else { + check_sym = sym; + } + ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym); + + if (ms == NULL) { + /* Try to process other composites */ + if ((ncomp = g_hash_table_lookup (cd->task->cfg->composite_symbols, check_sym)) != NULL) { + /* Set checked for this symbol to avoid cyclic references */ + if (isclr (cd->checked, ncomp->id)) { + setbit (cd->checked, composite->id); + composites_foreach_callback (check_sym, ncomp, cd); + ms = g_hash_table_lookup (cd->metric_res->symbols, check_sym); + } + } + } + + if (ms != NULL) { + rd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (struct symbol_remove_data)); + rd->ms = ms; + if (G_UNLIKELY (*sym == '~')) { + rd->remove_weight = FALSE; + rd->remove_symbol = TRUE; + } + else if (G_UNLIKELY (*sym == '-')) { + rd->remove_symbol = FALSE; + rd->remove_weight = FALSE; + } + else { + rd->remove_symbol = TRUE; + rd->remove_weight = TRUE; + } + if (!g_tree_lookup (cd->symbols_to_remove, rd)) { + g_tree_insert (cd->symbols_to_remove, (gpointer)ms->name, rd); + } + } + else { + + } + + if (s->next) { + r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s, ", s->data); + } + else { + r += rspamd_snprintf (logbuf + r, sizeof (logbuf) -r, "%s", s->data); + } + s = g_list_next (s); + } + /* Add new symbol */ + insert_result_single (cd->task, key, 1.0, NULL); + msg_info ("%s", logbuf); + } + } + + setbit (cd->checked, composite->id); + g_queue_free (stack); + g_list_free (symbols); + + return; +} + +static gboolean +check_autolearn (struct statfile_autolearn_params *params, struct rspamd_task *task) +{ + gchar *metric_name = DEFAULT_METRIC; + struct metric_result *metric_res; + GList *cur; + + if (params->metric != NULL) { + metric_name = (gchar *)params->metric; + } + + /* First check threshold */ + metric_res = g_hash_table_lookup (task->results, metric_name); + if (metric_res == NULL) { + if (params->symbols == NULL && params->threshold_max > 0) { + /* For ham messages */ + return TRUE; + } + debug_task ("metric %s has no results", metric_name); + return FALSE; + } + else { + /* Process score of metric */ + if ((params->threshold_min != 0 && metric_res->score > params->threshold_min) || (params->threshold_max != 0 && metric_res->score < params->threshold_max)) { + /* Now check for specific symbols */ + if (params->symbols) { + cur = params->symbols; + while (cur) { + if (g_hash_table_lookup (metric_res->symbols, cur->data) == NULL) { + return FALSE; + } + cur = g_list_next (cur); + } + } + /* Now allow processing of actual autolearn */ + return TRUE; + } + } + + return FALSE; +} + +void +process_autolearn (struct statfile *st, struct rspamd_task *task, GTree * tokens, struct classifier *classifier, gchar *filename, struct classifier_ctx *ctx) +{ + stat_file_t *statfile; + struct statfile *unused; + + if (check_autolearn (st->autolearn, task)) { + if (tokens) { + /* Take care of subject */ + tokenize_subject (task, &tokens); + msg_info ("message with id <%s> autolearned statfile '%s'", task->message_id, filename); + + /* Get or create statfile */ + statfile = get_statfile_by_symbol (task->worker->srv->statfile_pool, ctx->cfg, + st->symbol, &unused, TRUE); + + if (statfile == NULL) { + return; + } + + classifier->learn_func (ctx, task->worker->srv->statfile_pool, st->symbol, tokens, TRUE, NULL, 1., NULL); + maybe_write_binlog (ctx->cfg, st, statfile, tokens); + statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, DEFAULT_STATFILE_INVALIDATE_TIME, DEFAULT_STATFILE_INVALIDATE_JITTER); + } + } +} + +static gboolean +composites_remove_symbols (gpointer key, gpointer value, gpointer data) +{ + struct composites_data *cd = data; + struct symbol_remove_data *rd = value; + + if (rd->remove_symbol) { + g_hash_table_remove (cd->metric_res->symbols, key); + } + if (rd->remove_weight) { + cd->metric_res->score -= rd->ms->score; + } + + return FALSE; +} + +static void +composites_metric_callback (gpointer key, gpointer value, gpointer data) +{ + struct rspamd_task *task = (struct rspamd_task *)data; + struct composites_data *cd = rspamd_mempool_alloc (task->task_pool, sizeof (struct composites_data)); + struct metric_result *metric_res = (struct metric_result *)value; + + cd->task = task; + cd->metric_res = (struct metric_result *)metric_res; + cd->symbols_to_remove = g_tree_new (remove_compare_data); + cd->checked = rspamd_mempool_alloc0 (task->task_pool, NBYTES (g_hash_table_size (task->cfg->composite_symbols))); + + /* Process hash table */ + g_hash_table_foreach (task->cfg->composite_symbols, composites_foreach_callback, cd); + + /* Remove symbols that are in composites */ + g_tree_foreach (cd->symbols_to_remove, composites_remove_symbols, cd); + /* Free list */ + g_tree_destroy (cd->symbols_to_remove); +} + +void +make_composites (struct rspamd_task *task) +{ + g_hash_table_foreach (task->results, composites_metric_callback, task); +} + +struct classifiers_cbdata { + struct rspamd_task *task; + struct lua_locked_state *nL; +}; + +static void +classifiers_callback (gpointer value, void *arg) +{ + struct classifiers_cbdata *cbdata = arg; + struct rspamd_task *task; + struct classifier_config *cl = value; + struct classifier_ctx *ctx; + struct mime_text_part *text_part, *p1, *p2; + struct statfile *st; + GTree *tokens = NULL; + GList *cur; + f_str_t c; + gchar *header = NULL; + gint *dist = NULL, diff; + gboolean is_twopart = FALSE; + + task = cbdata->task; + + if ((header = g_hash_table_lookup (cl->opts, "header")) != NULL) { + cur = message_get_header (task->task_pool, task->message, header, FALSE); + if (cur) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); + } + } + else { + cur = g_list_first (task->text_parts); + dist = rspamd_mempool_get_variable (task->task_pool, "parts_distance"); + if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { + is_twopart = TRUE; + } + } + ctx = cl->classifier->init_func (task->task_pool, cl); + + if ((tokens = g_hash_table_lookup (task->tokens, cl->tokenizer)) == NULL) { + while (cur != NULL) { + if (header) { + c.len = strlen (cur->data); + if (c.len > 0) { + c.begin = cur->data; + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, FALSE, FALSE, NULL)) { + msg_info ("cannot tokenize input"); + return; + } + } + } + else { + text_part = (struct mime_text_part *)cur->data; + if (text_part->is_empty) { + cur = g_list_next (cur); + continue; + } + if (dist != NULL && cur->next == NULL) { + /* Compare part's content */ + + if (*dist >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + else if (cur->next == NULL && is_twopart) { + p1 = cur->prev->data; + p2 = text_part; + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + if (diff >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + c.begin = (gchar *)text_part->content->data; + c.len = text_part->content->len; + /* Tree would be freed at task pool freeing */ + if (!cl->tokenizer->tokenize_func (cl->tokenizer, task->task_pool, &c, &tokens, + FALSE, text_part->is_utf, text_part->urls_offset)) { + msg_info ("cannot tokenize input"); + return; + } + } + cur = g_list_next (cur); + } + g_hash_table_insert (task->tokens, cl->tokenizer, tokens); + } + + /* Take care of subject */ + tokenize_subject (task, &tokens); + + if (tokens == NULL) { + return; + } + + if (cbdata->nL != NULL) { + rspamd_mutex_lock (cbdata->nL->m); + cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, cbdata->nL->L); + rspamd_mutex_unlock (cbdata->nL->m); + } + else { + /* Non-threaded case */ + cl->classifier->classify_func (ctx, task->worker->srv->statfile_pool, tokens, task, task->cfg->lua_state); + } + + /* Autolearning */ + cur = g_list_first (cl->statfiles); + while (cur) { + st = cur->data; + if (st->autolearn) { + if (check_autolearn (st->autolearn, task)) { + /* Process autolearn */ + process_autolearn (st, task, tokens, cl->classifier, st->path, ctx); + } + } + cur = g_list_next (cur); + } +} + + +void +process_statfiles (struct rspamd_task *task) +{ + struct classifiers_cbdata cbdata; + + if (task->is_skipped) { + return; + } + + if (task->tokens == NULL) { + task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens); + } + cbdata.task = task; + cbdata.nL = NULL; + g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata); + + /* Process results */ + make_composites (task); +} + +void +process_statfiles_threaded (gpointer data, gpointer user_data) +{ + struct rspamd_task *task = (struct rspamd_task *)data; + struct lua_locked_state *nL = user_data; + struct classifiers_cbdata cbdata; + + if (task->is_skipped) { + remove_async_thread (task->s); + return; + } + + if (task->tokens == NULL) { + task->tokens = g_hash_table_new (g_direct_hash, g_direct_equal); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_hash_table_unref, task->tokens); + } + + cbdata.task = task; + cbdata.nL = nL; + g_list_foreach (task->cfg->classifiers, classifiers_callback, &cbdata); + remove_async_thread (task->s); +} + +static void +insert_metric_header (gpointer metric_name, gpointer metric_value, gpointer data) +{ +#ifndef GLIB_HASH_COMPAT + struct rspamd_task *task = (struct rspamd_task *)data; + gint r = 0; + /* Try to be rfc2822 compatible and avoid long headers with folding */ + gchar header_name[128], outbuf[1000]; + GList *symbols = NULL, *cur; + struct metric_result *metric_res = (struct metric_result *)metric_value; + double ms, rs; + + rspamd_snprintf (header_name, sizeof (header_name), "X-Spam-%s", metric_res->metric->name); + + if (!check_metric_settings (metric_res, &ms, &rs)) { + ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + } + if (ms > 0 && metric_res->score >= ms) { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "yes; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs); + } + else { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "no; %.2f/%.2f/%.2f; ", metric_res->score, ms, rs); + } + + symbols = g_hash_table_get_keys (metric_res->symbols); + cur = symbols; + while (cur) { + if (g_list_next (cur) != NULL) { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s,", (gchar *)cur->data); + } + else { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "%s", (gchar *)cur->data); + } + cur = g_list_next (cur); + } + g_list_free (symbols); +#ifdef GMIME24 + g_mime_object_append_header (GMIME_OBJECT (task->message), header_name, outbuf); +#else + g_mime_message_add_header (task->message, header_name, outbuf); +#endif + +#endif /* GLIB_COMPAT */ +} + +void +insert_headers (struct rspamd_task *task) +{ + g_hash_table_foreach (task->results, insert_metric_header, task); +} + +gboolean +check_action_str (const gchar *data, gint *result) +{ + if (g_ascii_strncasecmp (data, "reject", sizeof ("reject") - 1) == 0) { + *result = METRIC_ACTION_REJECT; + } + else if (g_ascii_strncasecmp (data, "greylist", sizeof ("greylist") - 1) == 0) { + *result = METRIC_ACTION_GREYLIST; + } + else if (g_ascii_strncasecmp (data, "add_header", sizeof ("add_header") - 1) == 0) { + *result = METRIC_ACTION_ADD_HEADER; + } + else if (g_ascii_strncasecmp (data, "rewrite_subject", sizeof ("rewrite_subject") - 1) == 0) { + *result = METRIC_ACTION_REWRITE_SUBJECT; + } + else { + return FALSE; + } + return TRUE; +} + +const gchar * +str_action_metric (enum rspamd_metric_action action) +{ + switch (action) { + case METRIC_ACTION_REJECT: + return "reject"; + case METRIC_ACTION_SOFT_REJECT: + return "soft_reject"; + case METRIC_ACTION_REWRITE_SUBJECT: + return "rewrite_subject"; + case METRIC_ACTION_ADD_HEADER: + return "add_header"; + case METRIC_ACTION_GREYLIST: + return "greylist"; + case METRIC_ACTION_NOACTION: + return "no_action"; + case METRIC_ACTION_MAX: + return "invalid max action"; + } + + return "unknown action"; +} + +gint +check_metric_action (double score, double required_score, struct metric *metric) +{ + struct metric_action *action, *selected_action = NULL; + double max_score = 0; + int i; + + if (score >= required_score) { + return METRIC_ACTION_REJECT; + } + else if (metric->actions == NULL) { + return METRIC_ACTION_NOACTION; + } + else { + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + action = &metric->actions[i]; + if (action->score < 0) { + continue; + } + if (score >= action->score && action->score > max_score) { + selected_action = action; + max_score = action->score; + } + } + if (selected_action) { + return selected_action->action; + } + else { + return METRIC_ACTION_NOACTION; + } + } +} + +gboolean +learn_task (const gchar *statfile, struct rspamd_task *task, GError **err) +{ + GList *cur, *ex; + struct classifier_config *cl; + struct classifier_ctx *cls_ctx; + gchar *s; + f_str_t c; + GTree *tokens = NULL; + struct statfile *st; + stat_file_t *stf; + gdouble sum; + struct mime_text_part *part, *p1, *p2; + gboolean is_utf = FALSE, is_twopart = FALSE; + gint diff; + + + /* Load classifier by symbol */ + cl = g_hash_table_lookup (task->cfg->classifiers_symbols, statfile); + if (cl == NULL) { + g_set_error (err, filter_error_quark(), 1, "Statfile %s is not configured in any classifier", statfile); + return FALSE; + } + + /* If classifier has 'header' option just classify header of this type */ + if ((s = g_hash_table_lookup (cl->opts, "header")) != NULL) { + cur = message_get_header (task->task_pool, task->message, s, FALSE); + if (cur) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, cur); + } + } + else { + /* Classify message otherwise */ + cur = g_list_first (task->text_parts); + if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { + is_twopart = TRUE; + } + } + + /* Get tokens from each element */ + while (cur) { + if (s != NULL) { + c.len = strlen (cur->data); + c.begin = cur->data; + ex = NULL; + } + else { + part = cur->data; + /* Skip empty parts */ + if (part->is_empty) { + cur = g_list_next (cur); + continue; + } + c.begin = (gchar *)part->content->data; + c.len = part->content->len; + is_utf = part->is_utf; + ex = part->urls_offset; + if (is_twopart && cur->next == NULL) { + /* Compare part's content */ + p1 = cur->prev->data; + p2 = part; + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + if (diff >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + } + /* Get tokens */ + if (!cl->tokenizer->tokenize_func ( + cl->tokenizer, task->task_pool, + &c, &tokens, FALSE, is_utf, ex)) { + g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message"); + return FALSE; + } + cur = g_list_next (cur); + } + + /* Handle messages without text */ + if (tokens == NULL) { + g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data"); + msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id); + return FALSE; + } + + /* Take care of subject */ + tokenize_subject (task, &tokens); + + /* Init classifier */ + cls_ctx = cl->classifier->init_func ( + task->task_pool, cl); + /* Get or create statfile */ + stf = get_statfile_by_symbol (task->worker->srv->statfile_pool, + cl, statfile, &st, TRUE); + + /* Learn */ + if (stf== NULL || !cl->classifier->learn_func ( + cls_ctx, task->worker->srv->statfile_pool, + statfile, tokens, TRUE, &sum, + 1.0, err)) { + if (*err) { + msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message); + return FALSE; + } + else { + g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error"); + msg_info ("learn failed for message <%s>, unknown learn error", task->message_id); + return FALSE; + } + } + /* Increase statistics */ + task->worker->srv->stat->messages_learned++; + + maybe_write_binlog (cl, st, stf, tokens); + msg_info ("learn success for message <%s>, for statfile: %s, sum weight: %.2f", + task->message_id, statfile, sum); + statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, + DEFAULT_STATFILE_INVALIDATE_TIME, + DEFAULT_STATFILE_INVALIDATE_JITTER); + + return TRUE; +} + +gboolean +learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err) +{ + GList *cur, *ex; + struct classifier_ctx *cls_ctx; + f_str_t c; + GTree *tokens = NULL; + struct mime_text_part *part, *p1, *p2; + gboolean is_utf = FALSE, is_twopart = FALSE; + gint diff; + + cur = g_list_first (task->text_parts); + if (cur != NULL && cur->next != NULL && cur->next->next == NULL) { + is_twopart = TRUE; + } + + /* Get tokens from each element */ + while (cur) { + part = cur->data; + /* Skip empty parts */ + if (part->is_empty) { + cur = g_list_next (cur); + continue; + } + c.begin = (gchar *)part->content->data; + c.len = part->content->len; + is_utf = part->is_utf; + ex = part->urls_offset; + if (is_twopart && cur->next == NULL) { + /* + * Compare part's content + * Note: here we don't have filters proceeded this message, so using pool variable is a bad idea + */ + p1 = cur->prev->data; + p2 = part; + if (p1->diff_str != NULL && p2->diff_str != NULL) { + diff = compare_diff_distance (p1->diff_str, p2->diff_str); + } + else { + diff = fuzzy_compare_parts (p1, p2); + } + if (diff >= COMMON_PART_FACTOR) { + msg_info ("message <%s> has two common text parts, ignore the last one", task->message_id); + break; + } + } + /* Get tokens */ + if (!cl->tokenizer->tokenize_func ( + cl->tokenizer, task->task_pool, + &c, &tokens, FALSE, is_utf, ex)) { + g_set_error (err, filter_error_quark(), 2, "Cannot tokenize message"); + return FALSE; + } + cur = g_list_next (cur); + } + + /* Handle messages without text */ + if (tokens == NULL) { + g_set_error (err, filter_error_quark(), 3, "Cannot tokenize message, no text data"); + msg_info ("learn failed for message <%s>, no tokens to extract", task->message_id); + return FALSE; + } + + /* Take care of subject */ + tokenize_subject (task, &tokens); + + /* Init classifier */ + cls_ctx = cl->classifier->init_func ( + task->task_pool, cl); + /* Learn */ + if (!cl->classifier->learn_spam_func ( + cls_ctx, task->worker->srv->statfile_pool, + tokens, task, is_spam, task->cfg->lua_state, err)) { + if (*err) { + msg_info ("learn failed for message <%s>, learn error: %s", task->message_id, (*err)->message); + return FALSE; + } + else { + g_set_error (err, filter_error_quark(), 4, "Learn failed, unknown learn classifier error"); + msg_info ("learn failed for message <%s>, unknown learn error", task->message_id); + return FALSE; + } + } + /* Increase statistics */ + task->worker->srv->stat->messages_learned++; + + msg_info ("learn success for message <%s>", + task->message_id); + statfile_pool_plan_invalidate (task->worker->srv->statfile_pool, + DEFAULT_STATFILE_INVALIDATE_TIME, + DEFAULT_STATFILE_INVALIDATE_JITTER); + + return TRUE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libmime/filter.h b/src/libmime/filter.h new file mode 100644 index 000000000..258bd9447 --- /dev/null +++ b/src/libmime/filter.h @@ -0,0 +1,167 @@ +/** + * @file filter.h + * Filters logic implemetation + */ + +#ifndef RSPAMD_FILTER_H +#define RSPAMD_FILTER_H + +#include "config.h" +#include "symbols_cache.h" +#include "task.h" + +struct rspamd_task; +struct rspamd_settings; +struct classifier_config; + +typedef double (*metric_cons_func)(struct rspamd_task *task, const gchar *metric_name, const gchar *func_name); +typedef void (*filter_func)(struct rspamd_task *task); + +enum filter_type { C_FILTER, PERL_FILTER }; + +/** + * Filter structure + */ +struct filter { + gchar *func_name; /**< function name */ + enum filter_type type; /**< filter type (c or perl) */ + module_t *module; +}; + +/** + * Rspamd symbol + */ +struct symbol { + double score; /**< symbol's score */ + GList *options; /**< list of symbol's options */ + const gchar *name; +}; + +struct metric_action { + enum rspamd_metric_action action; + gdouble score; +}; + +/** + * Common definition of metric + */ +struct metric { + const gchar *name; /**< name of metric */ + gchar *func_name; /**< name of consolidation function */ + metric_cons_func func; /**< c consolidation function */ + double grow_factor; /**< grow factor for metric */ + GHashTable *symbols; /**< weights of symbols in metric */ + GHashTable *descriptions; /**< descriptions of symbols in metric */ + struct metric_action actions[METRIC_ACTION_MAX]; /**< all actions of the metric */ + gchar *subject; /**< subject rewrite string */ +}; + +/** + * Result of metric processing + */ +struct metric_result { + struct metric *metric; /**< pointer to metric structure */ + double score; /**< total score */ + GHashTable *symbols; /**< symbols of metric */ + gboolean checked; /**< whether metric result is consolidated */ + double grow_factor; /**< current grow factor */ + struct rspamd_settings *user_settings; /**< settings for metric */ + struct rspamd_settings *domain_settings; /**< settings for metric */ +}; + +/** + * Composite structure + */ +struct rspamd_composite { + struct expression *expr; + gint id; +}; + +/** + * Process all filters + * @param task worker's task that present message from user + * @return 0 - if there is non-finished tasks and 1 if processing is completed + */ +gint process_filters (struct rspamd_task *task); + +/** + * Process message with statfiles + * @param task worker's task that present message from user + */ +void process_statfiles (struct rspamd_task *task); + +/** + * Process message with statfiles threaded + * @param data worker's task that present message from user + */ +void process_statfiles_threaded (gpointer data, gpointer user_data); + +/** + * Insert a result to task + * @param task worker's task that present message from user + * @param metric_name metric's name to which we need to insert result + * @param symbol symbol to insert + * @param flag numeric weight for symbol + * @param opts list of symbol's options + */ +void insert_result (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts); + +/** + * Insert a single result to task + * @param task worker's task that present message from user + * @param metric_name metric's name to which we need to insert result + * @param symbol symbol to insert + * @param flag numeric weight for symbol + * @param opts list of symbol's options + */ +void insert_result_single (struct rspamd_task *task, const gchar *symbol, double flag, GList *opts); + +/** + * Process all results and form composite metrics from existent metrics as it is defined in config + * @param task worker's task that present message from user + */ +void make_composites (struct rspamd_task *task); + +/** + * Default consolidation function for metric, it get all symbols and multiply symbol + * weight by some factor that is specified in config. Default factor is 1. + * @param task worker's task that present message from user + * @param metric_name name of metric + * @return result metric weight + */ +double factor_consolidation_func (struct rspamd_task *task, const gchar *metric_name, const gchar *unused); + +/* + * Learn specified statfile with message in a task + * @param statfile symbol of statfile + * @param task worker's task object + * @param err pointer to GError + * @return true if learn succeed + */ +gboolean learn_task (const gchar *statfile, struct rspamd_task *task, GError **err); + +/* + * Learn specified statfile with message in a task + * @param statfile symbol of statfile + * @param task worker's task object + * @param err pointer to GError + * @return true if learn succeed + */ +gboolean learn_task_spam (struct classifier_config *cl, struct rspamd_task *task, gboolean is_spam, GError **err); + +/* + * Get action from a string + */ +gboolean check_action_str (const gchar *data, gint *result); + +/* + * Return textual representation of action enumeration + */ +const gchar *str_action_metric (enum rspamd_metric_action action); + +/* + * Get action for specific metric + */ +gint check_metric_action (double score, double required_score, struct metric *metric); + +#endif diff --git a/src/libmime/images.c b/src/libmime/images.c new file mode 100644 index 000000000..ff07bbd72 --- /dev/null +++ b/src/libmime/images.c @@ -0,0 +1,255 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "images.h" +#include "main.h" +#include "message.h" + +static const guint8 png_signature[] = {137, 80, 78, 71, 13, 10, 26, 10}; +static const guint8 jpg_sig1[] = {0xff, 0xd8}; +static const guint8 jpg_sig2[] = {'J', 'F', 'I', 'F'}; +static const guint8 gif_signature[] = {'G', 'I', 'F', '8'}; +static const guint8 bmp_signature[] = {'B', 'M'}; + +static void process_image (struct rspamd_task *task, struct mime_part *part); + + +void +process_images (struct rspamd_task *task) +{ + GList *cur; + struct mime_part *part; + + cur = task->parts; + while (cur) { + part = cur->data; + if (g_mime_content_type_is_type (part->type, "image", "*") && part->content->len > 0) { + process_image (task, part); + } + cur = g_list_next (cur); + } + +} + +static enum known_image_types +detect_image_type (GByteArray *data) +{ + if (data->len > sizeof (png_signature) / sizeof (png_signature[0])) { + if (memcmp (data->data, png_signature, sizeof (png_signature)) == 0) { + return IMAGE_TYPE_PNG; + } + } + if (data->len > 10) { + if (memcmp (data->data, jpg_sig1, sizeof (jpg_sig1)) == 0) { + if (memcmp (data->data + 6, jpg_sig2, sizeof (jpg_sig2)) == 0) { + return IMAGE_TYPE_JPG; + } + } + } + if (data->len > sizeof (gif_signature) / sizeof (gif_signature[0])) { + if (memcmp (data->data, gif_signature, sizeof (gif_signature)) == 0) { + return IMAGE_TYPE_GIF; + } + } + if (data->len > sizeof (bmp_signature) / sizeof (bmp_signature[0])) { + if (memcmp (data->data, bmp_signature, sizeof (bmp_signature)) == 0) { + return IMAGE_TYPE_BMP; + } + } + + return IMAGE_TYPE_UNKNOWN; +} + + +static struct rspamd_image * +process_png_image (struct rspamd_task *task, GByteArray *data) +{ + struct rspamd_image *img; + guint32 t; + guint8 *p; + + if (data->len < 24) { + msg_info ("bad png detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + /* In png we should find iHDR section and get data from it */ + /* Skip signature and read header section */ + p = data->data + 12; + if (memcmp (p, "IHDR", 4) != 0) { + msg_info ("png doesn't begins with IHDR section", task->message_id); + return NULL; + } + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_PNG; + img->data = data; + + p += 4; + memcpy (&t, p, sizeof (guint32)); + img->width = ntohl (t); + p += 4; + memcpy (&t, p, sizeof (guint32)); + img->height = ntohl (t); + + return img; +} + +static struct rspamd_image * +process_jpg_image (struct rspamd_task *task, GByteArray *data) +{ + guint8 *p; + guint16 t; + gsize remain; + struct rspamd_image *img; + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_JPG; + img->data = data; + + p = data->data; + remain = data->len; + /* In jpeg we should find any data stream (ff c0 .. ff c3) and extract its height and width */ + while (remain --) { + if (*p == 0xFF && remain > 8 && (*(p + 1) >= 0xC0 && *(p + 1) <= 0xC3)) { + memcpy (&t, p + 5, sizeof (guint16)); + img->height = ntohs (t); + memcpy (&t, p + 7, sizeof (guint16)); + img->width = ntohs (t); + return img; + } + p ++; + } + + return NULL; +} + +static struct rspamd_image * +process_gif_image (struct rspamd_task *task, GByteArray *data) +{ + struct rspamd_image *img; + guint8 *p; + guint16 t; + + if (data->len < 10) { + msg_info ("bad gif detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_GIF; + img->data = data; + + p = data->data + 6; + memcpy (&t, p, sizeof (guint16)); + img->width = GUINT16_FROM_LE (t); + memcpy (&t, p + 2, sizeof (guint16)); + img->height = GUINT16_FROM_LE (t); + + return img; +} + +static struct rspamd_image * +process_bmp_image (struct rspamd_task *task, GByteArray *data) +{ + struct rspamd_image *img; + gint32 t; + guint8 *p; + + + + if (data->len < 28) { + msg_info ("bad bmp detected (maybe striped): <%s>", task->message_id); + return NULL; + } + + img = rspamd_mempool_alloc (task->task_pool, sizeof (struct rspamd_image)); + img->type = IMAGE_TYPE_BMP; + img->data = data; + p = data->data + 18; + memcpy (&t, p, sizeof (gint32)); + img->width = abs (GINT32_FROM_LE (t)); + memcpy (&t, p + 4, sizeof (gint32)); + img->height = abs (GINT32_FROM_LE (t)); + + return img; +} + +static void +process_image (struct rspamd_task *task, struct mime_part *part) +{ + enum known_image_types type; + struct rspamd_image *img = NULL; + if ((type = detect_image_type (part->content)) != IMAGE_TYPE_UNKNOWN) { + switch (type) { + case IMAGE_TYPE_PNG: + img = process_png_image (task, part->content); + break; + case IMAGE_TYPE_JPG: + img = process_jpg_image (task, part->content); + break; + case IMAGE_TYPE_GIF: + img = process_gif_image (task, part->content); + break; + case IMAGE_TYPE_BMP: + img = process_bmp_image (task, part->content); + break; + default: + img = NULL; + break; + } + } + + if (img != NULL) { + debug_task ("detected %s image of size %ud x %ud in message <%s>", + image_type_str (img->type), + img->width, img->height, + task->message_id); + img->filename = part->filename; + task->images = g_list_prepend (task->images, img); + } +} + +const gchar * +image_type_str (enum known_image_types type) +{ + switch (type) { + case IMAGE_TYPE_PNG: + return "PNG"; + break; + case IMAGE_TYPE_JPG: + return "JPEG"; + break; + case IMAGE_TYPE_GIF: + return "GIF"; + break; + case IMAGE_TYPE_BMP: + return "BMP"; + break; + default: + return "unknown"; + } + + return "unknown"; +} diff --git a/src/libmime/images.h b/src/libmime/images.h new file mode 100644 index 000000000..c43941ebc --- /dev/null +++ b/src/libmime/images.h @@ -0,0 +1,33 @@ +#ifndef IMAGES_H_ +#define IMAGES_H_ + +#include "config.h" +#include "main.h" + +enum known_image_types { + IMAGE_TYPE_PNG, + IMAGE_TYPE_JPG, + IMAGE_TYPE_GIF, + IMAGE_TYPE_BMP, + IMAGE_TYPE_UNKNOWN = 9000 +}; + +struct rspamd_image { + enum known_image_types type; + GByteArray *data; + guint32 width; + guint32 height; + const gchar *filename; +}; + +/* + * Process images from a worker task + */ +void process_images (struct rspamd_task *task); + +/* + * Get textual representation of an image's type + */ +const gchar *image_type_str (enum known_image_types type); + +#endif /* IMAGES_H_ */ diff --git a/src/libmime/message.c b/src/libmime/message.c new file mode 100644 index 000000000..4567869e9 --- /dev/null +++ b/src/libmime/message.c @@ -0,0 +1,1764 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "main.h" +#include "message.h" +#include "cfg_file.h" +#include "html.h" +#include "images.h" + +#define RECURSION_LIMIT 30 +#define UTF8_CHARSET "UTF-8" + +GByteArray * +strip_html_tags (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, GByteArray * src, gint *stateptr) +{ + uint8_t *p, *rp, *tbegin = NULL, *end, c, lc; + gint br, i = 0, depth = 0, in_q = 0; + gint state = 0; + GByteArray *buf; + GNode *level_ptr = NULL; + gboolean erase = FALSE; + + if (stateptr) + state = *stateptr; + + buf = g_byte_array_sized_new (src->len); + g_byte_array_append (buf, src->data, src->len); + + c = *src->data; + lc = '\0'; + p = src->data; + rp = buf->data; + end = src->data + src->len; + br = 0; + + while (i < (gint)src->len) { + switch (c) { + case '\0': + break; + case '<': + if (g_ascii_isspace (*(p + 1))) { + goto reg_char; + } + if (state == 0) { + lc = '<'; + tbegin = p + 1; + state = 1; + } + else if (state == 1) { + /* Opening bracket without closing one */ + p --; + while (g_ascii_isspace (*p) && p > src->data) { + p --; + } + p ++; + goto unbreak_tag; + } + break; + + case '(': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = '('; + br++; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + break; + + case ')': + if (state == 2) { + if (lc != '"' && lc != '\'') { + lc = ')'; + br--; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + break; + + case '>': + if (depth) { + depth--; + break; + } + + if (in_q) { + break; + } +unbreak_tag: + switch (state) { + case 1: /* HTML/XML */ + lc = '>'; + in_q = state = 0; + erase = !add_html_node (task, pool, part, tbegin, p - tbegin, end - tbegin, &level_ptr); + break; + + case 2: /* PHP */ + if (!br && lc != '\"' && *(p - 1) == '?') { + in_q = state = 0; + } + break; + + case 3: + in_q = state = 0; + break; + + case 4: /* JavaScript/CSS/etc... */ + if (p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '-') { + in_q = state = 0; + } + break; + + default: + if (!erase) { + *(rp++) = c; + } + break; + } + break; + + case '"': + case '\'': + if (state == 2 && *(p - 1) != '\\') { + if (lc == c) { + lc = '\0'; + } + else if (lc != '\\') { + lc = c; + } + } + else if (state == 0 && !erase) { + *(rp++) = c; + } + if (state && p != src->data && *(p - 1) != '\\' && (!in_q || *p == in_q)) { + if (in_q) { + in_q = 0; + } + else { + in_q = *p; + } + } + break; + + case '!': + /* JavaScript & Other HTML scripting languages */ + if (state == 1 && *(p - 1) == '<') { + state = 3; + lc = c; + } + else { + if (state == 0 && !erase) { + *(rp++) = c; + } + } + break; + + case '-': + if (state == 3 && p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '!') { + state = 4; + } + else { + goto reg_char; + } + break; + + case '?': + + if (state == 1 && *(p - 1) == '<') { + br = 0; + state = 2; + break; + } + + case 'E': + case 'e': + /* !DOCTYPE exception */ + if (state == 3 && p > src->data + 6 + && g_ascii_tolower (*(p - 1)) == 'p' + && g_ascii_tolower (*(p - 2)) == 'y' + && g_ascii_tolower (*(p - 3)) == 't' && g_ascii_tolower (*(p - 4)) == 'c' && g_ascii_tolower (*(p - 5)) == 'o' && g_ascii_tolower (*(p - 6)) == 'd') { + state = 1; + break; + } + /* fall-through */ + + case 'l': + + /* swm: If we encounter ' src->data + 2 && *(p - 1) == 'm' && *(p - 2) == 'x') { + state = 1; + break; + } + + /* fall-through */ + default: + reg_char: + if (state == 0 && !erase) { + *(rp++) = c; + } + break; + } + i++; + if (i < (gint)src->len) { + c = *(++p); + } + } + if (rp < buf->data + src->len) { + *rp = '\0'; + g_byte_array_set_size (buf, rp - buf->data); + } + + /* Check tag balancing */ + if (level_ptr && level_ptr->data != NULL) { + part->is_balanced = FALSE; + } + + if (stateptr) { + *stateptr = state; + } + + return buf; +} + +static void +parse_qmail_recv (rspamd_mempool_t * pool, gchar *line, struct received_header *r) +{ + gchar *s, *p, t; + + /* We are interested only with received from network headers */ + if ((p = strstr (line, "from network")) == NULL) { + r->is_error = 2; + return; + } + + p += sizeof ("from network") - 1; + while (g_ascii_isspace (*p) || *p == '[') { + p++; + } + /* format is ip/host */ + s = p; + if (*p) { + while (g_ascii_isdigit (*++p) || *p == '.'); + if (*p != '/') { + r->is_error = 1; + return; + } + else { + *p = '\0'; + r->real_ip = rspamd_mempool_strdup (pool, s); + *p = '/'; + /* Now try to parse hostname */ + s = ++p; + while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { + p++; + } + t = *p; + *p = '\0'; + r->real_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + } + } +} + +static void +parse_recv_header (rspamd_mempool_t * pool, gchar *line, struct received_header *r) +{ + gchar *p, *s, t, **res = NULL; + enum { + RSPAMD_RECV_STATE_INIT = 0, + RSPAMD_RECV_STATE_FROM, + RSPAMD_RECV_STATE_IP_BLOCK, + RSPAMD_RECV_STATE_BRACES_BLOCK, + RSPAMD_RECV_STATE_BY_BLOCK, + RSPAMD_RECV_STATE_PARSE_IP, + RSPAMD_RECV_STATE_SKIP_SPACES, + RSPAMD_RECV_STATE_ERROR + } state = RSPAMD_RECV_STATE_INIT, + next_state = RSPAMD_RECV_STATE_INIT; + gboolean is_exim = FALSE; + + g_strstrip (line); + p = line; + s = line; + + while (*p) { + switch (state) { + /* Initial state, search for from */ + case RSPAMD_RECV_STATE_INIT: + if (*p == 'f' || *p == 'F') { + if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == 'o' && g_ascii_tolower (*++p) == 'm') { + p++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_FROM; + } + } + else if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { + state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + /* This can be qmail header, parse it separately */ + parse_qmail_recv (pool, line, r); + return; + } + break; + /* Read hostname */ + case RSPAMD_RECV_STATE_FROM: + if (*p == '[') { + /* This should be IP address */ + res = &r->from_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + s = ++p; + } + else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { + p++; + } + else { + t = *p; + *p = '\0'; + r->from_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + break; + /* Try to extract additional info */ + case RSPAMD_RECV_STATE_IP_BLOCK: + /* Try to extract ip or () info or by */ + if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { + p += 2; + /* Skip spaces after by */ + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BY_BLOCK; + } + else if (*p == '(') { + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + p++; + } + else if (*p == '[') { + /* Got ip before '(' so extract it */ + s = ++p; + res = &r->from_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + p++; + } + break; + /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */ + case RSPAMD_RECV_STATE_BRACES_BLOCK: + /* End of block */ + if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || + *p == '_' || *p == ':') { + p++; + } + else if (*p == '[') { + s = ++p; + state = RSPAMD_RECV_STATE_PARSE_IP; + res = &r->real_ip; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else { + if (p > s) { + /* Got some real hostname */ + /* check whether it is helo or p is not space symbol */ + if (!g_ascii_isspace (*p) || *(p + 1) != '[') { + /* Exim style ([ip]:port helo=hostname) */ + if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) { + /* Ip ending */ + is_exim = TRUE; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) { + p ++; + is_exim = TRUE; + if (r->real_hostname == NULL && r->from_hostname != NULL) { + r->real_hostname = r->from_hostname; + } + s = p; + while (*p != ')' && !g_ascii_isspace (*p) && *p != '\0') { + p ++; + } + if (p > s) { + r->from_hostname = rspamd_mempool_alloc (pool, p - s + 1); + rspamd_strlcpy (r->from_hostname, s, p - s + 1); + } + } + else if (p - s == 4 && memcmp (s, "port=", 5) == 0) { + p ++; + is_exim = TRUE; + while (g_ascii_isdigit (*p)) { + p ++; + } + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else if (*p == '=' && is_exim) { + /* Just skip unknown pairs */ + p ++; + while (!g_ascii_isspace (*p) && *p != ')' && *p != '\0') { + p ++; + } + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + } + else { + /* skip all */ + while (*p++ != ')' && *p != '\0'); + state = RSPAMD_RECV_STATE_IP_BLOCK; + } + } + else { + /* Postfix style (hostname [ip]) */ + t = *p; + *p = '\0'; + r->real_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + /* Now parse ip */ + p += 2; + s = p; + res = &r->real_ip; + state = RSPAMD_RECV_STATE_PARSE_IP; + next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; + continue; + } + if (*p == ')') { + p ++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + } + else if (*p == ')') { + p ++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + next_state = RSPAMD_RECV_STATE_IP_BLOCK; + } + else { + r->is_error = 1; + return; + } + } + break; + /* Got by word */ + case RSPAMD_RECV_STATE_BY_BLOCK: + /* Here can be only hostname */ + if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-' + || *p == '_') && p[1] != '\0') { + p++; + } + else { + /* We got something like hostname */ + if (p[1] != '\0') { + t = *p; + *p = '\0'; + r->by_hostname = rspamd_mempool_strdup (pool, s); + *p = t; + } + else { + r->by_hostname = rspamd_mempool_strdup (pool, s); + } + /* Now end of parsing */ + if (is_exim) { + /* Adjust for exim received */ + if (r->real_ip == NULL && r->from_ip != NULL) { + r->real_ip = r->from_ip; + } + else if (r->from_ip == NULL && r->real_ip != NULL) { + r->from_ip = r->real_ip; + if (r->real_hostname == NULL && r->from_hostname != NULL) { + r->real_hostname = r->from_hostname; + } + } + } + return; + } + break; + + /* Extract ip */ + case RSPAMD_RECV_STATE_PARSE_IP: + while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') { + p ++; + } + if (*p != ']') { + /* Not an ip in fact */ + state = RSPAMD_RECV_STATE_SKIP_SPACES; + p++; + } + else { + *p = '\0'; + *res = rspamd_mempool_strdup (pool, s); + *p = ']'; + p++; + state = RSPAMD_RECV_STATE_SKIP_SPACES; + } + break; + + /* Skip spaces */ + case RSPAMD_RECV_STATE_SKIP_SPACES: + if (!g_ascii_isspace (*p)) { + state = next_state; + s = p; + } + else { + p++; + } + break; + default: + r->is_error = 1; + return; + break; + } + } + + r->is_error = 1; + return; +} + +/* Convert raw headers to a list of struct raw_header * */ +static void +process_raw_headers (struct rspamd_task *task) +{ + struct raw_header *new = NULL, *lp; + gchar *p, *c, *tmp, *tp; + gint state = 0, l, next_state = 100, err_state = 100, t_state; + gboolean valid_folding = FALSE; + + p = task->raw_headers_str; + c = p; + while (*p) { + /* FSM for processing headers */ + switch (state) { + case 0: + /* Begin processing headers */ + if (!g_ascii_isalpha (*p)) { + /* We have some garbage at the beginning of headers, skip this line */ + state = 100; + next_state = 0; + } + else { + state = 1; + c = p; + } + break; + case 1: + /* We got something like header's name */ + if (*p == ':') { + new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct raw_header)); + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->name = tmp; + new->empty_separator = TRUE; + p ++; + state = 2; + c = p; + } + else if (g_ascii_isspace (*p)) { + /* Not header but some garbage */ + state = 100; + next_state = 0; + } + else { + p ++; + } + break; + case 2: + /* We got header's name, so skip any \t or spaces */ + if (*p == '\t') { + new->tab_separated = TRUE; + new->empty_separator = FALSE; + p ++; + } + else if (*p == ' ') { + new->empty_separator = FALSE; + p ++; + } + else if (*p == '\n' || *p == '\r') { + /* Process folding */ + state = 99; + l = p - c; + if (l > 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + next_state = 3; + err_state = 5; + c = p; + } + else { + /* Process value */ + l = p - c; + if (l >= 0) { + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + rspamd_strlcpy (tmp, c, l + 1); + new->separator = tmp; + } + c = p; + state = 3; + } + break; + case 3: + if (*p == '\r' || *p == '\n') { + /* Hold folding */ + state = 99; + next_state = 3; + err_state = 4; + } + else if (*(p + 1) == '\0') { + state = 4; + } + else { + p ++; + } + break; + case 4: + /* Copy header's value */ + l = p - c; + tmp = rspamd_mempool_alloc (task->task_pool, l + 1); + tp = tmp; + t_state = 0; + while (l --) { + if (t_state == 0) { + /* Before folding */ + if (*c == '\n' || *c == '\r') { + t_state = 1; + c ++; + *tp ++ = ' '; + } + else { + *tp ++ = *c ++; + } + } + else if (t_state == 1) { + /* Inside folding */ + if (g_ascii_isspace (*c)) { + c++; + } + else { + t_state = 0; + *tp ++ = *c ++; + } + } + } + /* Strip last space that can be added by \r\n parsing */ + if (*(tp - 1) == ' ') { + tp --; + } + *tp = '\0'; + new->value = tmp; + new->next = NULL; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } + debug_task ("add raw header %s: %s", new->name, new->value); + state = 0; + break; + case 5: + /* Header has only name, no value */ + new->next = NULL; + new->value = ""; + if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { + while (lp->next != NULL) { + lp = lp->next; + } + lp->next = new; + } + else { + g_hash_table_insert (task->raw_headers, new->name, new); + } + state = 0; + debug_task ("add raw header %s: %s", new->name, new->value); + break; + case 99: + /* Folding state */ + if (*(p + 1) == '\0') { + state = err_state; + } + else { + if (*p == '\r' || *p == '\n') { + p ++; + valid_folding = FALSE; + } + else if (*p == '\t' || *p == ' ') { + /* Valid folding */ + p ++; + valid_folding = TRUE; + } + else { + if (valid_folding) { + debug_task ("go to state: %d->%d", state, next_state); + state = next_state; + } + else { + /* Fall back */ + debug_task ("go to state: %d->%d", state, err_state); + state = err_state; + } + } + } + break; + case 100: + /* Fail state, skip line */ + if (*p == '\r') { + if (*(p + 1) == '\n') { + p ++; + } + p ++; + state = next_state; + } + else if (*p == '\n') { + if (*(p + 1) == '\r') { + p ++; + } + p ++; + state = next_state; + } + else if (*(p + 1) == '\0') { + state = next_state; + p ++; + } + else { + p ++; + } + break; + } + } +} + +static void +free_byte_array_callback (void *pointer) +{ + GByteArray *arr = (GByteArray *) pointer; + g_byte_array_free (arr, TRUE); +} + +static GByteArray * +convert_text_to_utf (struct rspamd_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part) +{ + GError *err = NULL; + gsize read_bytes, write_bytes; + const gchar *charset; + gchar *res_str; + GByteArray *result_array; + + if (task->cfg->raw_mode) { + text_part->is_raw = TRUE; + return part_content; + } + + if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) { + text_part->is_raw = TRUE; + return part_content; + } + + if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) { + if (g_utf8_validate (part_content->data, part_content->len, NULL)) { + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + return part_content; + } + else { + msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id); + text_part->is_raw = TRUE; + return part_content; + } + } + + res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err); + if (res_str == NULL) { + msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem"); + text_part->is_raw = TRUE; + return part_content; + } + + result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); + result_array->data = res_str; + result_array->len = write_bytes; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, res_str); + text_part->is_raw = FALSE; + text_part->is_utf = TRUE; + + return result_array; +} + +static void +process_text_part (struct rspamd_task *task, GByteArray *part_content, GMimeContentType *type, + GMimeObject *part, GMimeObject *parent, gboolean is_empty) +{ + struct mime_text_part *text_part; + const gchar *cd; + + /* Skip attachements */ +#ifndef GMIME24 + cd = g_mime_part_get_content_disposition (GMIME_PART (part)); + if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#else + cd = g_mime_object_get_disposition (GMIME_OBJECT (part)); + if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) { + debug_task ("skip attachments for checking as text parts"); + return; + } +#endif + + if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) { + debug_task ("got urls from text/html part"); + + text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); + text_part->is_html = TRUE; + if (is_empty) { + text_part->is_empty = TRUE; + text_part->orig = NULL; + text_part->content = NULL; + task->text_parts = g_list_prepend (task->text_parts, text_part); + return; + } + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); + text_part->is_balanced = TRUE; + text_part->html_nodes = NULL; + text_part->parent = parent; + + text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL); + + if (text_part->html_nodes == NULL) { + url_parse_text (task->task_pool, task, text_part, FALSE); + } + else { + decode_entitles (text_part->content->data, &text_part->content->len); + url_parse_text (task->task_pool, task, text_part, FALSE); +#if 0 + url_parse_text (task->task_pool, task, text_part, TRUE); +#endif + } + + fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content); + task->text_parts = g_list_prepend (task->text_parts, text_part); + } + else if (g_mime_content_type_is_type (type, "text", "*")) { + debug_task ("got urls from text/plain part"); + + text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); + text_part->is_html = FALSE; + text_part->parent = parent; + if (is_empty) { + text_part->is_empty = TRUE; + text_part->orig = NULL; + text_part->content = NULL; + task->text_parts = g_list_prepend (task->text_parts, text_part); + return; + } + text_part->orig = convert_text_to_utf (task, part_content, type, text_part); + text_part->content = text_part->orig; + url_parse_text (task->task_pool, task, text_part, FALSE); + fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); + task->text_parts = g_list_prepend (task->text_parts, text_part); + } +} + +#ifdef GMIME24 +static void +mime_foreach_callback (GMimeObject * parent, GMimeObject * part, gpointer user_data) +#else +static void +mime_foreach_callback (GMimeObject * part, gpointer user_data) +#endif +{ + struct rspamd_task *task = (struct rspamd_task *)user_data; + struct mime_part *mime_part; + GMimeContentType *type; + GMimeDataWrapper *wrapper; + GMimeStream *part_stream; + GByteArray *part_content; + + task->parts_count++; + + /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ + + /* find out what class 'part' is... */ + if (GMIME_IS_MESSAGE_PART (part)) { + /* message/rfc822 or message/news */ + GMimeMessage *message; + + /* g_mime_message_foreach_part() won't descend into + child message parts, so if we want to count any + subparts of this child message, we'll have to call + g_mime_message_foreach_part() again here. */ + + message = g_mime_message_part_get_message ((GMimeMessagePart *) part); + if (task->parser_recursion++ < RECURSION_LIMIT) { +#ifdef GMIME24 + g_mime_message_foreach (message, mime_foreach_callback, task); +#else + g_mime_message_foreach_part (message, mime_foreach_callback, task); +#endif + } + else { + msg_err ("endless recursion detected: %d", task->parser_recursion); + return; + } +#ifndef GMIME24 + g_object_unref (message); +#endif + } + else if (GMIME_IS_MESSAGE_PARTIAL (part)) { + /* message/partial */ + + /* this is an incomplete message part, probably a + large message that the sender has broken into + smaller parts and is sending us bit by bit. we + could save some info about it so that we could + piece this back together again once we get all the + parts? */ + } + else if (GMIME_IS_MULTIPART (part)) { + /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ + task->parser_parent_part = part; +#ifndef GMIME24 + debug_task ("detected multipart part"); + /* we'll get to finding out if this is a signed/encrypted multipart later... */ + if (task->parser_recursion++ < RECURSION_LIMIT) { + g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, task); + } + else { + msg_err ("endless recursion detected: %d", task->parser_recursion); + return; + } +#endif + } + else if (GMIME_IS_PART (part)) { + /* a normal leaf part, could be text/plain or image/jpeg etc */ +#ifdef GMIME24 + type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (part)); +#else + type = (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part)); +#endif + if (type == NULL) { + msg_warn ("type of part is unknown, assume text/plain"); + type = g_mime_content_type_new ("text", "plain"); +#ifdef GMIME24 + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, type); +#else + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_mime_content_type_destroy, type); +#endif + } + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); +#ifdef GMIME24 + if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) { +#else + if (wrapper != NULL) { +#endif + part_stream = g_mime_stream_mem_new (); + if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE); + part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); + g_object_unref (part_stream); + mime_part = rspamd_mempool_alloc (task->task_pool, sizeof (struct mime_part)); + mime_part->type = type; + mime_part->content = part_content; + mime_part->parent = task->parser_parent_part; + mime_part->filename = g_mime_part_get_filename (GMIME_PART (part)); + debug_task ("found part with content-type: %s/%s", type->type, type->subtype); + task->parts = g_list_prepend (task->parts, mime_part); + /* Skip empty parts */ + process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0)); + } + else { + msg_warn ("write to stream failed: %d, %s", errno, strerror (errno)); + } +#ifndef GMIME24 + g_object_unref (wrapper); +#endif + } + else { + msg_warn ("cannot get wrapper for mime part, type of part: %s/%s", type->type, type->subtype); + } + } + else { + g_assert_not_reached (); + } +} + +static void +destroy_message (void *pointer) +{ + GMimeMessage *msg = pointer; + + msg_debug ("freeing pointer %p", msg); + g_object_unref (msg); +} + +gint +process_message (struct rspamd_task *task) +{ + GMimeMessage *message; + GMimeParser *parser; + GMimeStream *stream; + GByteArray *tmp; + GList *first, *cur; + GMimePart *part; + GMimeDataWrapper *wrapper; + struct received_header *recv; + gchar *mid, *url_str, *p, *end, *url_end; + struct uri *subject_url; + gsize len; + gint rc; + + tmp = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); + tmp->data = task->msg->str; + tmp->len = task->msg->len; + + stream = g_mime_stream_mem_new_with_byte_array (tmp); + /* + * This causes g_mime_stream not to free memory by itself as it is memory allocated by + * pool allocator + */ + g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); + + if (task->is_mime) { + + debug_task ("construct mime parser from string length %d", (gint)task->msg->len); + /* create a new parser object to parse the stream */ + parser = g_mime_parser_new_with_stream (stream); + g_object_unref (stream); + + /* parse the message from the stream */ + message = g_mime_parser_construct_message (parser); + + if (message == NULL) { + msg_warn ("cannot construct mime from stream"); + return -1; + } + + task->message = message; + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); + + /* Save message id for future use */ + task->message_id = g_mime_message_get_message_id (task->message); + if (task->message_id == NULL) { + task->message_id = "undef"; + } + + task->parser_recursion = 0; +#ifdef GMIME24 + g_mime_message_foreach (message, mime_foreach_callback, task); +#else + /* + * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback + * so we need to set up parent part by hands + */ + task->parser_parent_part = g_mime_message_get_mime_part (message); + g_object_unref (task->parser_parent_part); + g_mime_message_foreach_part (message, mime_foreach_callback, task); +#endif + + debug_task ("found %d parts in message", task->parts_count); + if (task->queue_id == NULL) { + task->queue_id = "undef"; + } + +#ifdef GMIME24 + task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message)); +#else + task->raw_headers_str = g_mime_message_get_headers (task->message); +#endif + + process_images (task); + + /* Parse received headers */ + first = message_get_header (task->task_pool, message, "Received", FALSE); + cur = first; + while (cur) { + recv = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct received_header)); + parse_recv_header (task->task_pool, cur->data, recv); + task->received = g_list_prepend (task->received, recv); + cur = g_list_next (cur); + } + if (first) { + g_list_free (first); + } + + if (task->raw_headers_str) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, task->raw_headers_str); + process_raw_headers (task); + } + + task->rcpts = g_mime_message_get_all_recipients (message); + if (task->rcpts) { +#ifdef GMIME24 + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, task->rcpts); +#else + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) internet_address_list_destroy, task->rcpts); +#endif + } + + + /* free the parser (and the stream) */ + g_object_unref (parser); + } + else { + /* We got only message, no mime headers or anything like this */ + /* Construct fake message for it */ + task->message = g_mime_message_new (TRUE); + if (task->from) { + g_mime_message_set_sender (task->message, task->from); + } + /* Construct part for it */ + part = g_mime_part_new_with_type ("text", "html"); +#ifdef GMIME24 + wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_CONTENT_ENCODING_8BIT); +#else + wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_PART_ENCODING_8BIT); +#endif + g_mime_part_set_content_object (part, wrapper); + g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part)); + /* Register destructors */ + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, wrapper); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, part); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); + /* Now parse in a normal way */ + task->parser_recursion = 0; +#ifdef GMIME24 + g_mime_message_foreach (task->message, mime_foreach_callback, task); +#else + g_mime_message_foreach_part (task->message, mime_foreach_callback, task); +#endif + /* Generate message ID */ + mid = g_mime_utils_generate_message_id ("localhost.localdomain"); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, mid); + g_mime_message_set_message_id (task->message, mid); + task->message_id = mid; + task->queue_id = mid; + /* Set headers for message */ + if (task->subject) { + g_mime_message_set_subject (task->message, task->subject); + } + + /* Add recipients */ +#ifndef GMIME24 + if (task->rcpt) { + cur = task->rcpt; + while (cur) { + g_mime_message_add_recipient (task->message, GMIME_RECIPIENT_TYPE_TO, NULL, (gchar *)cur->data); + cur = g_list_next (cur); + } + } +#endif + } + + /* Parse urls inside Subject header */ + cur = message_get_header (task->task_pool, task->message, "Subject", FALSE); + if (cur) { + p = cur->data; + len = strlen (p); + end = p + len; + + while (p < end) { + /* Search to the end of url */ + if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str, FALSE)) { + if (url_str != NULL) { + subject_url = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri)); + if (subject_url != NULL) { + /* Try to parse url */ + rc = parse_uri (subject_url, url_str, task->task_pool); + if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && + subject_url->hostlen > 0) { + if (subject_url->protocol != PROTOCOL_MAILTO) { + if (!g_tree_lookup (task->urls, subject_url)) { + g_tree_insert (task->urls, subject_url, subject_url); + } + } + } + else if (rc != URI_ERRNO_OK) { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + } + else { + break; + } + p = url_end + 1; + } + /* Free header's list */ + g_list_free (cur); + } + + return 0; +} + +struct gmime_raw_header { + struct raw_header *next; + gchar *name; + gchar *value; +}; + +typedef struct _GMimeHeader { + GHashTable *hash; + GHashTable *writers; + struct raw_header *headers; +} local_GMimeHeader; + + +/* known header field types */ +enum { + HEADER_FROM = 0, + HEADER_REPLY_TO, + HEADER_TO, + HEADER_CC, + HEADER_BCC, + HEADER_SUBJECT, + HEADER_DATE, + HEADER_MESSAGE_ID, + HEADER_UNKNOWN +}; + +/* + * Iterate throught all headers and make a list + */ +#ifndef GMIME24 +static void +header_iterate (rspamd_mempool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong) +{ + while (h) { + if (G_LIKELY (!strong)) { + if (h->value && !g_ascii_strncasecmp (field, h->name, strlen (field))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); + } + else { + *ret = g_list_prepend (*ret, g_strdup (h->value)); + } + } + } + else { + if (h->value && !strncmp (field, h->name, strlen (field))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); + } + else { + *ret = g_list_prepend (*ret, g_strdup (h->value)); + } + } + } + h = (struct gmime_raw_header *)h->next; + } +} +#else +static void +header_iterate (rspamd_mempool_t * pool, GMimeHeaderList * ls, GList ** ret, const gchar *field, gboolean strong) +{ + /* Use iterator in case of gmime 2.4 */ + GMimeHeaderIter *iter; + const gchar *name; + + if (ls == NULL) { + *ret = NULL; + return; + } + + iter = g_mime_header_iter_new (); + if (g_mime_header_list_get_iter (ls, iter) && g_mime_header_iter_first (iter)) { + /* Iterate throught headers */ + while (g_mime_header_iter_is_valid (iter)) { + name = g_mime_header_iter_get_name (iter); + if (G_LIKELY (!strong)) { + if (!g_ascii_strncasecmp (field, name, strlen (name))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); + } + else { + *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); + } + } + } + else { + if (!strncmp (field, name, strlen (name))) { + if (pool != NULL) { + *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); + } + else { + *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); + } + } + } + if (!g_mime_header_iter_next (iter)) { + break; + } + } + } + g_mime_header_iter_free (iter); +} +#endif + + +struct multipart_cb_data { + GList *ret; + rspamd_mempool_t *pool; + const gchar *field; + gboolean try_search; + gboolean strong; + gint rec; +}; + +#define MAX_REC 10 + +static void +#ifdef GMIME24 +multipart_iterate (GMimeObject * parent, GMimeObject * part, gpointer user_data) +#else +multipart_iterate (GMimeObject * part, gpointer user_data) +#endif +{ + struct multipart_cb_data *data = user_data; +#ifndef GMIME24 + struct gmime_raw_header *h; +#endif + GList *l = NULL; + + if (data->try_search && part != NULL && GMIME_IS_PART (part)) { +#ifdef GMIME24 + GMimeHeaderList *ls; + + ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); + header_iterate (data->pool, ls, &l, data->field, data->strong); +#else + h = (struct gmime_raw_header *)part->headers->headers; + header_iterate (data->pool, h, &l, data->field, data->strong); +#endif + if (l == NULL) { + /* Header not found, abandon search results */ + data->try_search = FALSE; + g_list_free (data->ret); + data->ret = NULL; + } + else { + data->ret = g_list_concat (l, data->ret); + } + } + else if (data->try_search && GMIME_IS_MULTIPART (part)) { + /* Maybe endless recursion here ? */ + if (data->rec++ < MAX_REC) { + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, data); + } + else { + msg_info ("maximum recurse limit is over, stop recursing, %d", data->rec); + data->try_search = FALSE; + } + } +} + +static GList * +local_message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) +{ + GList *gret = NULL; + GMimeObject *part; + struct multipart_cb_data cb = { + .try_search = TRUE, + .rec = 0, + .ret = NULL, + }; + cb.pool = pool; + cb.field = field; + cb.strong = strong; + +#ifndef GMIME24 + struct gmime_raw_header *h; + + if (field == NULL) { + return NULL; + } + + msg_debug ("iterate over headers to find header %s", field); + h = (struct gmime_raw_header *) (GMIME_OBJECT (message)->headers->headers); + header_iterate (pool, h, &gret, field, strong); + + if (gret == NULL) { + /* Try to iterate with mime part headers */ + msg_debug ("iterate over headers of mime part to find header %s", field); + part = g_mime_message_get_mime_part (message); + if (part) { + h = (struct gmime_raw_header *)part->headers->headers; + header_iterate (pool, h, &gret, field, strong); + if (gret == NULL && GMIME_IS_MULTIPART (part)) { + msg_debug ("iterate over headers of each multipart's subparts %s", field); + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); + if (cb.ret != NULL) { + gret = cb.ret; + } + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + } + + return gret; +#else + GMimeHeaderList *ls; + + ls = g_mime_object_get_header_list (GMIME_OBJECT (message)); + header_iterate (pool, ls, &gret, field, strong); + if (gret == NULL) { + /* Try to iterate with mime part headers */ + part = g_mime_message_get_mime_part (message); + if (part) { + ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); + header_iterate (pool, ls, &gret, field, strong); + if (gret == NULL && GMIME_IS_MULTIPART (part)) { + g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); + if (cb.ret != NULL) { + gret = cb.ret; + } + } +#ifndef GMIME24 + g_object_unref (part); +#endif + } + } + + + return gret; +#endif +} + +/** +* g_mime_message_set_date_from_string: Set the message sent-date +* @message: MIME Message +* @string: A string of date +* +* Set the sent-date on a MIME Message. +**/ +void +local_mime_message_set_date_from_string (GMimeMessage * message, const gchar * string) +{ + time_t date; + gint offset = 0; + + date = g_mime_utils_header_decode_date (string, &offset); + g_mime_message_set_date (message, date, offset); +} + +/* + * Replacements for standart gmime functions but converting adresses to IA + */ +static const gchar * +local_message_get_sender (GMimeMessage * message) +{ + gchar *res; + const gchar *from = g_mime_message_get_sender (message); + InternetAddressList *ia; + +#ifndef GMIME24 + ia = internet_address_parse_string (from); +#else + ia = internet_address_list_parse_string (from); +#endif + if (!ia) { + return NULL; + } + res = internet_address_list_to_string (ia, FALSE); +#ifndef GMIME24 + internet_address_list_destroy (ia); +#else + g_object_unref (ia); +#endif + + return res; +} + +static const gchar * +local_message_get_reply_to (GMimeMessage * message) +{ + gchar *res; + const gchar *from = g_mime_message_get_reply_to (message); + InternetAddressList *ia; + +#ifndef GMIME24 + ia = internet_address_parse_string (from); +#else + ia = internet_address_list_parse_string (from); +#endif + if (!ia) { + return NULL; + } + res = internet_address_list_to_string (ia, FALSE); +#ifndef GMIME24 + internet_address_list_destroy (ia); +#else + g_object_unref (ia); +#endif + + return res; +} + +#ifdef GMIME24 + +# define ADD_RECIPIENT_TEMPLATE(type,def) \ +static void \ +local_message_add_recipients_from_string_##type (GMimeMessage *message, const gchar *string, const gchar *value) \ +{ \ + InternetAddressList *il, *new; \ + \ + il = g_mime_message_get_recipients (message, (def)); \ + new = internet_address_list_parse_string (string); \ + internet_address_list_append (il, new); \ +} \ + +ADD_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) + ADD_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) + ADD_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) +# define GET_RECIPIENT_TEMPLATE(type,def) \ +static InternetAddressList* \ +local_message_get_recipients_##type (GMimeMessage *message, const gchar *unused) \ +{ \ + return g_mime_message_get_recipients (message, (def)); \ +} + GET_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) + GET_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) + GET_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) +#endif +/* different declarations for different types of set and get functions */ + typedef const gchar *(*GetFunc) (GMimeMessage * message); + typedef InternetAddressList *(*GetRcptFunc) (GMimeMessage * message, const gchar *type); + typedef GList *(*GetListFunc) (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *type, gboolean strong); + typedef void (*SetFunc) (GMimeMessage * message, const gchar *value); + typedef void (*SetListFunc) (GMimeMessage * message, const gchar *field, const gchar *value); + +/** different types of functions +* +* FUNC_CHARPTR +* - function with no arguments +* - get returns gchar* +* +* FUNC_IA (from Internet Address) +* - function with additional "field" argument from the fieldfunc table, +* - get returns Glist* +* +* FUNC_LIST +* - function with additional "field" argument (given arbitrary header field name) +* - get returns Glist* +**/ + enum { + FUNC_CHARPTR = 0, + FUNC_CHARFREEPTR, + FUNC_IA, + FUNC_LIST + }; + +/** +* fieldfunc struct: structure of MIME fields and corresponding get and set +* functions. +**/ + static struct { + gchar *name; + GetFunc func; + GetRcptFunc rcptfunc; + GetListFunc getlistfunc; + SetFunc setfunc; + SetListFunc setlfunc; + gint functype; + } fieldfunc[] = +{ + { + "From", local_message_get_sender, NULL, NULL, g_mime_message_set_sender, NULL, FUNC_CHARFREEPTR}, { + "Reply-To", local_message_get_reply_to, NULL, NULL, g_mime_message_set_reply_to, NULL, FUNC_CHARFREEPTR}, +#ifndef GMIME24 + { + "To", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Cc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Bcc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { + "Date", (GetFunc) g_mime_message_get_date_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, +#else + { + "To", NULL, local_message_get_recipients_to, NULL, NULL, local_message_add_recipients_from_string_to, FUNC_IA}, { + "Cc", NULL, local_message_get_recipients_cc, NULL, NULL, local_message_add_recipients_from_string_cc, FUNC_IA}, { + "Bcc", NULL, local_message_get_recipients_bcc, NULL, NULL, local_message_add_recipients_from_string_bcc, FUNC_IA}, { + "Date", (GetFunc)g_mime_message_get_date_as_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, +#endif + { + "Subject", g_mime_message_get_subject, NULL, NULL, g_mime_message_set_subject, NULL, FUNC_CHARPTR}, { + "Message-Id", g_mime_message_get_message_id, NULL, NULL, g_mime_message_set_message_id, NULL, FUNC_CHARPTR}, +#ifndef GMIME24 + { + NULL, NULL, NULL, local_message_get_header, NULL, g_mime_message_add_header, FUNC_LIST} +#else + { + NULL, NULL, NULL, local_message_get_header, NULL, (SetListFunc)g_mime_object_append_header, FUNC_LIST} +#endif +}; + +/** +* message_set_header: set header of any type excluding special (Content- and MIME-Version:) +**/ +void +message_set_header (GMimeMessage * message, const gchar *field, const gchar *value) +{ + gint i; + + if (!g_ascii_strcasecmp (field, "MIME-Version:") || !g_ascii_strncasecmp (field, "Content-", 8)) { + return; + } + for (i = 0; i <= HEADER_UNKNOWN; ++i) { + if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { + switch (fieldfunc[i].functype) { + case FUNC_CHARPTR: + (*(fieldfunc[i].setfunc)) (message, value); + break; + case FUNC_IA: + (*(fieldfunc[i].setlfunc)) (message, fieldfunc[i].name, value); + break; + case FUNC_LIST: + (*(fieldfunc[i].setlfunc)) (message, field, value); + break; + } + break; + } + } +} + + +/** +* message_get_header: returns the list of 'any header' values +* (except of unsupported yet Content- and MIME-Version special headers) +* +* You should free the GList list by yourself. +**/ +GList * +message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) +{ + gint i; + gchar *ret = NULL, *ia_string; + GList *gret = NULL; + InternetAddressList *ia_list = NULL, *ia; + + for (i = 0; i <= HEADER_UNKNOWN; ++i) { + if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { + switch (fieldfunc[i].functype) { + case FUNC_CHARFREEPTR: + ret = (gchar *)(*(fieldfunc[i].func)) (message); + break; + case FUNC_CHARPTR: + ret = (gchar *)(*(fieldfunc[i].func)) (message); + break; + case FUNC_IA: + ia_list = (*(fieldfunc[i].rcptfunc)) (message, field); + ia = ia_list; +#ifndef GMIME24 + while (ia && ia->address) { + + ia_string = internet_address_to_string ((InternetAddress *) ia->address, FALSE); + if (pool != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); + } + gret = g_list_prepend (gret, ia_string); + ia = ia->next; + } +#else + i = internet_address_list_length (ia); + while (--i >= 0) { + ia_string = internet_address_to_string (internet_address_list_get_address (ia, i), FALSE); + if (pool != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); + } + gret = g_list_prepend (gret, ia_string); + } +#endif + break; + case FUNC_LIST: + gret = (*(fieldfunc[i].getlistfunc)) (pool, message, field, strong); + break; + } + break; + } + } + if (gret == NULL && ret != NULL) { + if (pool != NULL) { + gret = g_list_prepend (gret, rspamd_mempool_strdup (pool, ret)); + } + else { + gret = g_list_prepend (gret, g_strdup (ret)); + } + } + if (fieldfunc[i].functype == FUNC_CHARFREEPTR && ret) { + g_free (ret); + } + + return gret; +} + +GList* +message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong) +{ + GList *gret = NULL; + struct raw_header *rh; + + rh = g_hash_table_lookup (task->raw_headers, field); + + if (rh == NULL) { + return NULL; + } + + while (rh) { + if (strong) { + if (strcmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + else { + if (g_ascii_strcasecmp (rh->name, field) == 0) { + gret = g_list_prepend (gret, rh); + } + } + rh = rh->next; + } + + if (gret != NULL) { + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, gret); + } + + return gret; +} diff --git a/src/libmime/message.h b/src/libmime/message.h new file mode 100644 index 000000000..5e27579d1 --- /dev/null +++ b/src/libmime/message.h @@ -0,0 +1,91 @@ +/** + * @file message.h + * Message processing functions and structures + */ + +#ifndef RSPAMD_MESSAGE_H +#define RSPAMD_MESSAGE_H + +#include "config.h" +#include "fuzzy.h" + +struct rspamd_task; +struct controller_session; + +struct mime_part { + GMimeContentType *type; + GByteArray *content; + GMimeObject *parent; + gchar *checksum; + const gchar *filename; +}; + +struct mime_text_part { + gboolean is_html; + gboolean is_raw; + gboolean is_balanced; + gboolean is_empty; + gboolean is_utf; + const gchar *real_charset; + GByteArray *orig; + GByteArray *content; + GNode *html_nodes; + GList *urls_offset; /**< list of offsets of urls */ + fuzzy_hash_t *fuzzy; + fuzzy_hash_t *double_fuzzy; + GMimeObject *parent; + GUnicodeScript script; + f_str_t *diff_str; +}; + +struct received_header { + gchar *from_hostname; + gchar *from_ip; + gchar *real_hostname; + gchar *real_ip; + gchar *by_hostname; + gint is_error; +}; + +struct raw_header { + gchar *name; + gchar *value; + gboolean tab_separated; + gboolean empty_separator; + gchar *separator; + struct raw_header *next; +}; + +/** + * Process message with all filters/statfiles, extract mime parts, urls and + * call metrics consolidation functions + * @param task worker_task object + * @return 0 if we have delayed filters to process and 1 if we have finished with processing + */ +gint process_message (struct rspamd_task *task); + +/* + * Set header with specified name and value + */ +void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value); + +/* + * Get a list of header's values with specified header's name + * @param pool if not NULL this pool would be used for storing header's values + * @param message g_mime_message object + * @param field header's name + * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not + * @return A list of header's values or NULL. If list is not NULL it MUST be freed. If pool is NULL elements must be freed as well. + */ +GList* message_get_header (rspamd_mempool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong); + +/* + * Get a list of header's values with specified header's name using raw headers + * @param task worker task structure + * @param field header's name + * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not + * @return A list of header's values or NULL. Unlike previous function it is NOT required to free list or values. I should rework one of these functions some time. + */ +GList* message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong); + +#endif diff --git a/src/libmime/protocol.c b/src/libmime/protocol.c new file mode 100644 index 000000000..8a5c3f0df --- /dev/null +++ b/src/libmime/protocol.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "util.h" +#include "cfg_file.h" +#include "settings.h" +#include "message.h" + +/* Max line size */ +#define OUTBUFSIZ BUFSIZ +/* + * Just check if the passed message is spam or not and reply as + * described below + */ +#define MSG_CMD_CHECK "check" +/* + * Check if message is spam or not, and return score plus list + * of symbols hit + */ +#define MSG_CMD_SYMBOLS "symbols" +/* + * Check if message is spam or not, and return score plus report + */ +#define MSG_CMD_REPORT "report" +/* + * Check if message is spam or not, and return score plus report + * if the message is spam + */ +#define MSG_CMD_REPORT_IFSPAM "report_ifspam" +/* + * Ignore this message -- client opened connection then changed + */ +#define MSG_CMD_SKIP "skip" +/* + * Return a confirmation that spamd is alive + */ +#define MSG_CMD_PING "ping" +/* + * Process this message as described above and return modified message + */ +#define MSG_CMD_PROCESS "process" + +/* + * Learn specified statfile using message + */ +#define MSG_CMD_LEARN "learn" + +/* + * spamassassin greeting: + */ +#define SPAMC_GREETING "SPAMC" +/* + * rspamd greeting: + */ +#define RSPAMC_GREETING "RSPAMC" +/* + * Headers + */ +#define CONTENT_LENGTH_HEADER "Content-length" +#define HELO_HEADER "Helo" +#define FROM_HEADER "From" +#define IP_ADDR_HEADER "IP" +#define NRCPT_HEADER "Recipient-Number" +#define RCPT_HEADER "Rcpt" +#define SUBJECT_HEADER "Subject" +#define STATFILE_HEADER "Statfile" +#define QUEUE_ID_HEADER "Queue-ID" +#define ERROR_HEADER "Error" +#define USER_HEADER "User" +#define PASS_HEADER "Pass" +#define JSON_HEADER "Json" +#define HOSTNAME_HEADER "Hostname" +#define DELIVER_TO_HEADER "Deliver-To" +#define NO_LOG_HEADER "Log" + +static GList *custom_commands = NULL; + + +/* + * Remove <> from the fixed string and copy it to the pool + */ +static gchar * +rspamd_protocol_escape_braces (GString *in) +{ + gint len = 0; + gchar *orig, *p; + + orig = in->str; + while ((g_ascii_isspace (*orig) || *orig == '<') && orig - in->str < (gint)in->len) { + orig ++; + } + + g_string_erase (in, 0, orig - in->str); + + p = orig; + while ((!g_ascii_isspace (*p) && *p != '>') && p - in->str < (gint)in->len) { + p ++; + len ++; + } + + g_string_truncate (in, len); + + return in->str; +} + +static gboolean +rspamd_protocol_handle_url (struct rspamd_task *task, struct rspamd_http_message *msg) +{ + GList *cur; + struct custom_command *cmd; + const gchar *p; + + if (msg->url == NULL || msg->url->len == 0) { + task->last_error = "command is absent"; + task->error_code = 400; + return FALSE; + } + + if (msg->url->str[0] == '/') { + p = &msg->url->str[1]; + } + else { + p = msg->url->str; + } + + switch (*p) { + case 'c': + case 'C': + /* check */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_CHECK + 1) == 0) { + task->cmd = CMD_CHECK; + } + else { + goto err; + } + break; + case 's': + case 'S': + /* symbols, skip */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_SYMBOLS + 1) == 0) { + task->cmd = CMD_SYMBOLS; + } + else if (g_ascii_strcasecmp (p + 1, MSG_CMD_SKIP + 1) == 0) { + task->cmd = CMD_SKIP; + } + else { + goto err; + } + break; + case 'p': + case 'P': + /* ping, process */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_PING + 1) == 0) { + task->cmd = CMD_PING; + } + else if (g_ascii_strcasecmp (p + 1, MSG_CMD_PROCESS + 1) == 0) { + task->cmd = CMD_PROCESS; + } + else { + goto err; + } + break; + case 'r': + case 'R': + /* report, report_ifspam */ + if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT + 1) == 0) { + task->cmd = CMD_REPORT; + } + else if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) { + task->cmd = CMD_REPORT_IFSPAM; + } + else { + goto err; + } + break; + default: + cur = custom_commands; + while (cur) { + cmd = cur->data; + if (g_ascii_strcasecmp (p, cmd->name) == 0) { + task->cmd = CMD_OTHER; + task->custom_cmd = cmd; + break; + } + cur = g_list_next (cur); + } + + if (cur == NULL) { + goto err; + } + break; + } + + return TRUE; + +err: + debug_task ("bad command: %s", p); + task->last_error = "invalid command"; + task->error_code = 400; + return FALSE; +} + +static gboolean +rspamd_protocol_handle_headers (struct rspamd_task *task, struct rspamd_http_message *msg) +{ + gchar *headern, *err, *tmp; + gboolean res = TRUE; + struct rspamd_http_header *h; + + LL_FOREACH (msg->headers, h) { + headern = h->name->str; + + switch (headern[0]) { + case 'd': + case 'D': + if (g_ascii_strcasecmp (headern, DELIVER_TO_HEADER) == 0) { + task->deliver_to = rspamd_protocol_escape_braces (h->value); + debug_task ("read deliver-to header, value: %s", task->deliver_to); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'h': + case 'H': + if (g_ascii_strcasecmp (headern, HELO_HEADER) == 0) { + task->helo = h->value->str; + debug_task ("read helo header, value: %s", task->helo); + } + else if (g_ascii_strcasecmp (headern, HOSTNAME_HEADER) == 0) { + task->hostname = h->value->str; + debug_task ("read hostname header, value: %s", task->hostname); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'f': + case 'F': + if (g_ascii_strcasecmp (headern, FROM_HEADER) == 0) { + task->from = rspamd_protocol_escape_braces (h->value); + debug_task ("read from header, value: %s", task->from); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'j': + case 'J': + if (g_ascii_strcasecmp (headern, JSON_HEADER) == 0) { + task->is_json = parse_flag (h->value->str); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'q': + case 'Q': + if (g_ascii_strcasecmp (headern, QUEUE_ID_HEADER) == 0) { + task->queue_id = h->value->str; + debug_task ("read queue_id header, value: %s", task->queue_id); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'r': + case 'R': + if (g_ascii_strcasecmp (headern, RCPT_HEADER) == 0) { + tmp = rspamd_protocol_escape_braces (h->value); + task->rcpt = g_list_prepend (task->rcpt, tmp); + debug_task ("read rcpt header, value: %s", tmp); + } + else if (g_ascii_strcasecmp (headern, NRCPT_HEADER) == 0) { + task->nrcpt = strtoul (h->value->str, &err, 10); + debug_task ("read rcpt header, value: %d", (gint)task->nrcpt); + } + else { + msg_info ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'i': + case 'I': + if (g_ascii_strcasecmp (headern, IP_ADDR_HEADER) == 0) { + tmp = h->value->str; + if (!rspamd_parse_inet_address (&task->from_addr, tmp)) { + msg_err ("bad ip header: '%s'", tmp); + return FALSE; + } + debug_task ("read IP header, value: %s", tmp); + } + else { + debug_task ("wrong header: %s", headern); + res = FALSE; + } + break; + case 'p': + case 'P': + if (g_ascii_strcasecmp (headern, PASS_HEADER) == 0) { + if (h->value->len == sizeof ("all") - 1 && + g_ascii_strcasecmp (h->value->str, "all") == 0) { + task->pass_all_filters = TRUE; + debug_task ("pass all filters"); + } + } + else { + res = FALSE; + } + break; + case 's': + case 'S': + if (g_ascii_strcasecmp (headern, SUBJECT_HEADER) == 0) { + task->subject = h->value->str; + } + else { + res = FALSE; + } + break; + case 'u': + case 'U': + if (g_ascii_strcasecmp (headern, USER_HEADER) == 0) { + task->user = h->value->str; + } + else { + res = FALSE; + } + break; + case 'l': + case 'L': + if (g_ascii_strcasecmp (headern, NO_LOG_HEADER) == 0) { + if (g_ascii_strcasecmp (h->value->str, "no") == 0) { + task->no_log = TRUE; + } + } + else { + res = FALSE; + } + break; + default: + debug_task ("wrong header: %s", headern); + res = FALSE; + break; + } + } + + if (!res && task->cfg->strict_protocol_headers) { + msg_err ("deny processing of a request with incorrect or unknown headers"); + task->last_error = "invalid header"; + task->error_code = 400; + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_protocol_handle_request (struct rspamd_task *task, + struct rspamd_http_message *msg) +{ + gboolean ret = TRUE; + + if (msg->method == HTTP_SYMBOLS) { + task->cmd = CMD_SYMBOLS; + task->is_json = FALSE; + } + else if (msg->method == HTTP_CHECK) { + task->cmd = CMD_CHECK; + task->is_json = FALSE; + } + else { + task->is_json = TRUE; + ret = rspamd_protocol_handle_url (task, msg); + } + + if (ret) { + ret = rspamd_protocol_handle_headers (task, msg); + } + + return ret; +} + +static void +write_hashes_to_log (struct rspamd_task *task, GString *logbuf) +{ + GList *cur; + struct mime_text_part *text_part; + + cur = task->text_parts; + + while (cur) { + text_part = cur->data; + if (text_part->fuzzy) { + if (cur->next != NULL) { + rspamd_printf_gstring (logbuf, " part: %Xd,", text_part->fuzzy->h); + } + else { + rspamd_printf_gstring (logbuf, " part: %Xd", text_part->fuzzy->h); + } + } + cur = g_list_next (cur); + } +} + + +/* Structure for writing tree data */ +struct tree_cb_data { + ucl_object_t *top; + struct rspamd_task *task; +}; + +/* + * Callback for writing urls + */ +static gboolean +urls_protocol_cb (gpointer key, gpointer value, gpointer ud) +{ + struct tree_cb_data *cb = ud; + struct uri *url = value; + ucl_object_t *obj; + + obj = ucl_object_fromlstring (url->host, url->hostlen); + DL_APPEND (cb->top->value.av, obj); + + if (cb->task->cfg->log_urls) { + msg_info ("<%s> URL: %s - %s: %s", cb->task->message_id, cb->task->user ? + cb->task->user : (cb->task->from ? cb->task->from : "unknown"), + rspamd_inet_address_to_string (&cb->task->from_addr), + struri (url)); + } + + return FALSE; +} + +static ucl_object_t * +rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task) +{ + struct tree_cb_data cb; + ucl_object_t *obj; + + obj = ucl_object_typed_new (UCL_ARRAY); + cb.top = obj; + cb.task = task; + + g_tree_foreach (input, urls_protocol_cb, &cb); + + return obj; +} + +static gboolean +emails_protocol_cb (gpointer key, gpointer value, gpointer ud) +{ + struct tree_cb_data *cb = ud; + struct uri *url = value; + ucl_object_t *obj; + + obj = ucl_object_fromlstring (url->user, url->userlen + url->hostlen + 1); + DL_APPEND (cb->top->value.av, obj); + + return FALSE; +} + +static ucl_object_t * +rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task) +{ + struct tree_cb_data cb; + ucl_object_t *obj; + + obj = ucl_object_typed_new (UCL_ARRAY); + cb.top = obj; + cb.task = task; + + g_tree_foreach (input, emails_protocol_cb, &cb); + + return obj; +} + + +/* Write new subject */ +static const gchar * +make_rewritten_subject (struct metric *metric, struct rspamd_task *task) +{ + static gchar subj_buf[1024]; + gchar *p = subj_buf, *end, *c, *res; + const gchar *s; + + end = p + sizeof(subj_buf); + c = metric->subject; + s = g_mime_message_get_subject (task->message); + + while (p < end) { + if (*c == '\0') { + *p = '\0'; + break; + } + else if (*c == '%' && *(c + 1) == 's') { + p += rspamd_strlcpy (p, (s != NULL) ? s : "", end - p); + c += 2; + } + else { + *p = *c ++; + } + p ++; + } + res = g_mime_utils_header_encode_text (subj_buf); + + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_free, res); + + return res; +} + +static ucl_object_t * +rspamd_str_list_ucl (GList *str_list) +{ + ucl_object_t *top = NULL, *obj; + GList *cur; + + top = ucl_object_typed_new (UCL_ARRAY); + cur = str_list; + while (cur) { + obj = ucl_object_fromstring (cur->data); + DL_APPEND (top->value.av, obj); + cur = g_list_next (cur); + } + + return top; +} + +static ucl_object_t * +rspamd_metric_symbol_ucl (struct rspamd_task *task, struct metric *m, + struct symbol *sym, GString *logbuf) +{ + ucl_object_t *obj = NULL; + const gchar *description = NULL; + + rspamd_printf_gstring (logbuf, "%s,", sym->name); + description = g_hash_table_lookup (m->descriptions, sym->name); + + obj = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (obj, ucl_object_fromstring (sym->name), "name", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (sym->score), "score", 0, false); + if (description) { + ucl_object_insert_key (obj, ucl_object_fromstring (description), "description", 0, false); + } + if (sym->options != NULL) { + ucl_object_insert_key (obj, rspamd_str_list_ucl (sym->options), "options", 0, false); + } + + return obj; +} + +static ucl_object_t * +rspamd_metric_result_ucl (struct rspamd_task *task, struct metric_result *mres, GString *logbuf) +{ + GHashTableIter hiter; + struct symbol *sym; + struct metric *m; + gboolean is_spam; + enum rspamd_metric_action action = METRIC_ACTION_NOACTION; + ucl_object_t *obj = NULL, *sobj; + gdouble required_score; + gpointer h, v; + const gchar *subject; + gchar action_char; + + m = mres->metric; + + /* XXX: handle settings */ + required_score = m->actions[METRIC_ACTION_REJECT].score; + is_spam = (mres->score >= required_score); + action = check_metric_action (mres->score, required_score, m); + if (task->is_skipped) { + action_char = 'S'; + } + else if (is_spam) { + action_char = 'T'; + } + else { + action_char = 'F'; + } + rspamd_printf_gstring (logbuf, "(%s: %c (%s): [%.2f/%.2f] [", + m->name, action_char, + str_action_metric (action), + mres->score, required_score); + + obj = ucl_object_typed_new (UCL_OBJECT); + ucl_object_insert_key (obj, ucl_object_frombool (is_spam), + "is_spam", 0, false); + ucl_object_insert_key (obj, ucl_object_frombool (task->is_skipped), + "is_skipped", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (mres->score), + "score", 0, false); + ucl_object_insert_key (obj, ucl_object_fromdouble (required_score), + "required_score", 0, false); + ucl_object_insert_key (obj, ucl_object_fromstring (str_action_metric (action)), + "action", 0, false); + + if (action == METRIC_ACTION_REWRITE_SUBJECT) { + subject = make_rewritten_subject (m, task); + ucl_object_insert_key (obj, ucl_object_fromstring (subject), + "subject", 0, false); + } + /* Now handle symbols */ + g_hash_table_iter_init (&hiter, mres->symbols); + while (g_hash_table_iter_next (&hiter, &h, &v)) { + sym = (struct symbol *)v; + sobj = rspamd_metric_symbol_ucl (task, m, sym, logbuf); + ucl_object_insert_key (obj, sobj, h, 0, false); + } + + /* Cut the trailing comma if needed */ + if (logbuf->str[logbuf->len - 1] == ',') { + logbuf->len --; + } + +#ifdef HAVE_CLOCK_GETTIME + rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,", + task->msg->len, calculate_check_time (&task->tv, &task->ts, + task->cfg->clock_res, &task->scan_milliseconds), task->dns_requests); +#else + rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,", + task->msg->len, + calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds), + task->dns_requests); +#endif + + return obj; +} + +static void +rspamd_ucl_tolegacy_output (struct rspamd_task *task, ucl_object_t *top, GString *out) +{ + const ucl_object_t *metric, *score, + *required_score, *is_spam, *elt, *symbols; + ucl_object_iter_t iter = NULL; + + metric = ucl_object_find_key (top, DEFAULT_METRIC); + if (metric != NULL) { + score = ucl_object_find_key (metric, "score"); + required_score = ucl_object_find_key (metric, "required_score"); + is_spam = ucl_object_find_key (metric, "is_spam"); + g_string_append_printf (out, "Metric: default; %s; %.2f / %.2f / 0.0\r\n", + ucl_object_toboolean (is_spam) ? "True" : "False", + ucl_object_todouble (score), + ucl_object_todouble (required_score)); + elt = ucl_object_find_key (metric, "action"); + if (elt != NULL) { + g_string_append_printf (out, "Action: %s\r\n", + ucl_object_tostring (elt)); + } + + symbols = ucl_object_find_key (metric, "symbols"); + while ((elt = ucl_iterate_object (symbols, &iter, true)) != NULL) { + const ucl_object_t *sym_score; + sym_score = ucl_object_find_key (elt, "score"); + g_string_append_printf (out, "Symbol: %s; %.2f\r\n", + ucl_object_key (elt), + ucl_object_todouble (sym_score)); + } + + elt = ucl_object_find_key (metric, "subject"); + if (elt != NULL) { + g_string_append_printf (out, "Subject: %s\r\n", + ucl_object_tostring (elt)); + } + } + g_string_append_printf (out, "Message-ID: %s\r\n", task->message_id); +} + +static void +write_check_reply (struct rspamd_http_message *msg, struct rspamd_task *task) +{ + GString *logbuf; + struct metric_result *metric_res; + GHashTableIter hiter; + gpointer h, v; + ucl_object_t *top = NULL, *obj; + + /* Output the first line - check status */ + logbuf = g_string_sized_new (BUFSIZ); + rspamd_printf_gstring (logbuf, "id: <%s>, qid: <%s>, ", task->message_id, task->queue_id); + + if (task->user) { + rspamd_printf_gstring (logbuf, "user: %s, ", task->user); + } + + if (!task->no_log) { + rspamd_roll_history_update (task->worker->srv->history, task); + } + g_hash_table_iter_init (&hiter, task->results); + + top = ucl_object_typed_new (UCL_OBJECT); + /* Convert results to an ucl object */ + while (g_hash_table_iter_next (&hiter, &h, &v)) { + metric_res = (struct metric_result *)v; + obj = rspamd_metric_result_ucl (task, metric_res, logbuf); + ucl_object_insert_key (top, obj, h, 0, false); + } + + if (task->messages != NULL) { + ucl_object_insert_key (top, rspamd_str_list_ucl (task->messages), "messages", 0, false); + } + if (g_tree_nnodes (task->urls) > 0) { + ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls, task), "urls", 0, false); + } + if (g_tree_nnodes (task->emails) > 0) { + ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task), + "emails", 0, false); + } + + ucl_object_insert_key (top, ucl_object_fromstring (task->message_id), + "message-id", 0, false); + + write_hashes_to_log (task, logbuf); + if (!task->no_log) { + msg_info ("%v", logbuf); + } + g_string_free (logbuf, TRUE); + + msg->body = g_string_sized_new (BUFSIZ); + + if (msg->method < HTTP_SYMBOLS) { + rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body); + } + else { + rspamd_ucl_tolegacy_output (task, top, msg->body); + } + ucl_object_unref (top); + + /* Increase counters */ + task->worker->srv->stat->messages_scanned++; +} + +void +rspamd_protocol_write_reply (struct rspamd_task *task) +{ + struct rspamd_http_message *msg; + const gchar *ctype = "application/json"; + ucl_object_t *top = NULL; + + msg = rspamd_http_new_message (HTTP_RESPONSE); + if (!task->is_json) { + /* Turn compatibility on */ + msg->method = HTTP_SYMBOLS; + } + msg->date = time (NULL); + + task->state = CLOSING_CONNECTION; + + top = ucl_object_typed_new (UCL_OBJECT); + debug_task ("writing reply to client"); + if (task->error_code != 0) { + msg->code = task->error_code; + ucl_object_insert_key (top, ucl_object_fromstring (task->last_error), "error", 0, false); + msg->body = g_string_sized_new (256); + rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body); + ucl_object_unref (top); + } + else { + switch (task->cmd) { + case CMD_REPORT_IFSPAM: + case CMD_REPORT: + case CMD_CHECK: + case CMD_SYMBOLS: + case CMD_PROCESS: + case CMD_SKIP: + write_check_reply (msg, task); + break; + case CMD_PING: + msg->body = g_string_new ("pong"); + break; + case CMD_OTHER: + msg_err ("BROKEN"); + break; + } + } + + rspamd_http_connection_reset (task->http_conn); + rspamd_http_connection_write_message (task->http_conn, msg, NULL, + ctype, task, task->sock, &task->tv, task->ev_base); +} + +void +register_protocol_command (const gchar *name, protocol_reply_func func) +{ + struct custom_command *cmd; + + cmd = g_malloc (sizeof (struct custom_command)); + cmd->name = name; + cmd->func = func; + + custom_commands = g_list_prepend (custom_commands, cmd); +} diff --git a/src/libmime/protocol.h b/src/libmime/protocol.h new file mode 100644 index 000000000..8d2efe118 --- /dev/null +++ b/src/libmime/protocol.h @@ -0,0 +1,46 @@ +/** + * @file protocol.h + * Rspamd protocol definition + */ + +#ifndef RSPAMD_PROTOCOL_H +#define RSPAMD_PROTOCOL_H + +#include "config.h" +#include "filter.h" +#include "http.h" +#include "task.h" + +#define RSPAMD_BASE_ERROR 500 +#define RSPAMD_FILTER_ERROR RSPAMD_BASE_ERROR + 1 +#define RSPAMD_NETWORK_ERROR RSPAMD_BASE_ERROR + 2 +#define RSPAMD_PROTOCOL_ERROR RSPAMD_BASE_ERROR + 3 +#define RSPAMD_LENGTH_ERROR RSPAMD_BASE_ERROR + 4 +#define RSPAMD_STATFILE_ERROR RSPAMD_BASE_ERROR + 5 + +struct metric; + +/** + * Process HTTP request to the task structure + * @param task + * @param msg + * @return + */ +gboolean rspamd_protocol_handle_request (struct rspamd_task *task, struct rspamd_http_message *msg); + +/** + * Write reply for specified task command + * @param task task object + * @return 0 if we wrote reply and -1 if there was some error + */ +void rspamd_protocol_write_reply (struct rspamd_task *task); + + +/** + * Register custom fucntion to extend protocol + * @param name symbolic name of custom function + * @param func callback function for writing reply + */ +void register_protocol_command (const gchar *name, protocol_reply_func func); + +#endif diff --git a/src/libmime/smtp_proto.c b/src/libmime/smtp_proto.c new file mode 100644 index 000000000..3af1c3910 --- /dev/null +++ b/src/libmime/smtp_proto.c @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "cfg_file.h" +#include "util.h" +#include "smtp.h" +#include "smtp_proto.h" +#include "smtp_utils.h" + +gchar * +make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...) +{ + va_list vp; + gchar *result = NULL, *p; + size_t len; + + va_start (vp, format); + len = g_printf_string_upper_bound (format, vp); + va_end (vp); + va_start (vp, format); + len += sizeof ("65535 ") + sizeof (CRLF) - 1; + result = rspamd_mempool_alloc (pool, len); + p = result + rspamd_snprintf (result, len, "%d ", error_code); + p = rspamd_vsnprintf (p, len - (p - result), format, vp); + *p++ = CR; *p++ = LF; *p = '\0'; + va_end (vp); + + return result; +} + + +gboolean +parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd) +{ + enum { + SMTP_PARSE_START = 0, + SMTP_PARSE_SPACES, + SMTP_PARSE_ARGUMENT, + SMTP_PARSE_DONE + } state; + gchar *p, *c, ch, cmd_buf[4]; + guint i; + f_str_t *arg = NULL; + struct smtp_command *pcmd; + + if (line->len == 0) { + return FALSE; + } + + state = SMTP_PARSE_START; + c = line->begin; + p = c; + *cmd = rspamd_mempool_alloc0 (session->pool, sizeof (struct smtp_command)); + pcmd = *cmd; + + for (i = 0; i < line->len; i ++, p ++) { + ch = *p; + switch (state) { + case SMTP_PARSE_START: + if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) { + if (i == line->len - 1) { + p ++; + } + if (p - c == 4) { + cmd_buf[0] = g_ascii_toupper (c[0]); + cmd_buf[1] = g_ascii_toupper (c[1]); + cmd_buf[2] = g_ascii_toupper (c[2]); + cmd_buf[3] = g_ascii_toupper (c[3]); + + if (memcmp (cmd_buf, "HELO", 4) == 0) { + pcmd->command = SMTP_COMMAND_HELO; + } + else if (memcmp (cmd_buf, "EHLO", 4) == 0) { + pcmd->command = SMTP_COMMAND_EHLO; + } + else if (memcmp (cmd_buf, "MAIL", 4) == 0) { + pcmd->command = SMTP_COMMAND_MAIL; + } + else if (memcmp (cmd_buf, "RCPT", 4) == 0) { + pcmd->command = SMTP_COMMAND_RCPT; + } + else if (memcmp (cmd_buf, "DATA", 4) == 0) { + pcmd->command = SMTP_COMMAND_DATA; + } + else if (memcmp (cmd_buf, "QUIT", 4) == 0) { + pcmd->command = SMTP_COMMAND_QUIT; + } + else if (memcmp (cmd_buf, "NOOP", 4) == 0) { + pcmd->command = SMTP_COMMAND_NOOP; + } + else if (memcmp (cmd_buf, "EXPN", 4) == 0) { + pcmd->command = SMTP_COMMAND_EXPN; + } + else if (memcmp (cmd_buf, "RSET", 4) == 0) { + pcmd->command = SMTP_COMMAND_RSET; + } + else if (memcmp (cmd_buf, "HELP", 4) == 0) { + pcmd->command = SMTP_COMMAND_HELP; + } + else if (memcmp (cmd_buf, "VRFY", 4) == 0) { + pcmd->command = SMTP_COMMAND_VRFY; + } + else { + msg_info ("invalid command: %*s", 4, cmd_buf); + return FALSE; + } + } + else { + /* Invalid command */ + msg_info ("invalid command: %*s", 4, c); + return FALSE; + } + /* Now check what we have */ + if (ch == ' ' || ch == ':') { + state = SMTP_PARSE_SPACES; + } + else if (ch == CR) { + state = SMTP_PARSE_DONE; + } + else if (ch == LF) { + return TRUE; + } + } + else if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z')) { + msg_info ("invalid letter code in SMTP command: %d", (gint)ch); + return FALSE; + } + break; + case SMTP_PARSE_SPACES: + if (ch == CR) { + state = SMTP_PARSE_DONE; + } + else if (ch == LF) { + goto end; + } + else if (ch != ' ' && ch != ':') { + state = SMTP_PARSE_ARGUMENT; + arg = rspamd_mempool_alloc (session->pool, sizeof (f_str_t)); + c = p; + } + break; + case SMTP_PARSE_ARGUMENT: + if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) { + if (i == line->len - 1 && (ch != ' ' && ch != CR && ch != LF)) { + p ++; + } + arg->len = p - c; + arg->begin = rspamd_mempool_alloc (session->pool, arg->len); + memcpy (arg->begin, c, arg->len); + pcmd->args = g_list_prepend (pcmd->args, arg); + if (ch == ' ' || ch == ':') { + state = SMTP_PARSE_SPACES; + } + else if (ch == CR) { + state = SMTP_PARSE_DONE; + } + else { + goto end; + } + } + break; + case SMTP_PARSE_DONE: + if (ch == LF) { + goto end; + } + msg_info ("CR without LF in SMTP command"); + return FALSE; + } + } + +end: + if (pcmd->args) { + pcmd->args = g_list_reverse (pcmd->args); + rspamd_mempool_add_destructor (session->pool, (rspamd_mempool_destruct_t)g_list_free, pcmd->args); + } + return TRUE; +} + +static gboolean +check_smtp_path (f_str_t *path) +{ + guint i; + gchar *p; + + p = path->begin; + if (*p != '<' || path->len < 2) { + return FALSE; + } + for (i = 0; i < path->len; i++, p ++) { + if (*p == '>' && i != path->len - 1) { + return FALSE; + } + } + + return *(p - 1) == '>'; +} + +gboolean +parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd) +{ + f_str_t *arg; + + if (cmd->args == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cmd->args->data; + session->helo = rspamd_mempool_alloc (session->pool, arg->len + 1); + rspamd_strlcpy (session->helo, arg->begin, arg->len + 1); + /* Now try to write reply */ + if (cmd->command == SMTP_COMMAND_HELO) { + /* No ESMTP */ + session->error = SMTP_ERROR_OK; + session->esmtp = FALSE; + return TRUE; + } + else { + /* Try to write all capabilities */ + session->esmtp = TRUE; + if (session->ctx->smtp_capabilities == NULL) { + session->error = SMTP_ERROR_OK; + return TRUE; + } + else { + session->error = session->ctx->smtp_capabilities; + return TRUE; + } + } + + return FALSE; +} + +gboolean +parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd) +{ + f_str_t *arg; + GList *cur = cmd->args; + + if (cmd->args == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + /* First argument MUST be FROM */ + if (arg->len != 4 || ( + g_ascii_toupper (arg->begin[0]) != 'F' || + g_ascii_toupper (arg->begin[1]) != 'R' || + g_ascii_toupper (arg->begin[2]) != 'O' || + g_ascii_toupper (arg->begin[3]) != 'M')) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + /* Next one is from address */ + cur = g_list_next (cur); + if (cur == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + if (check_smtp_path (arg)) { + session->from = cur; + } + else { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + + return TRUE; +} + +gboolean +parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd) +{ + f_str_t *arg; + GList *cur = cmd->args; + + if (cmd->args == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + /* First argument MUST be FROM */ + if (arg->len != 2 || ( + g_ascii_toupper (arg->begin[0]) != 'T' || + g_ascii_toupper (arg->begin[1]) != 'O')) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + /* Next one is from address */ + cur = g_list_next (cur); + if (cur == NULL) { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + arg = cur->data; + if (check_smtp_path (arg)) { + session->rcpt = g_list_prepend (session->rcpt, cur); + } + else { + session->error = SMTP_ERROR_BAD_ARGUMENTS; + return FALSE; + } + + return TRUE; + +} + +/* Return -1 if there are some error, 1 if all is ok and 0 in case of incomplete reply */ +static gint +check_smtp_ustream_reply (f_str_t *in, gchar success_code) +{ + gchar *p; + + /* Check for 250 at the begin of line */ + if (in->len >= sizeof ("220 ") - 1) { + p = in->begin; + if (p[0] == success_code) { + /* Last reply line */ + if (p[3] == ' ') { + return 1; + } + else { + return 0; + } + } + else { + return -1; + } + } + + return -1; +} + +size_t +smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen) +{ + GList *cur = args; + size_t r = 0; + f_str_t *arg; + + while (cur && r < buflen - 3) { + arg = cur->data; + r += rspamd_snprintf (buf + r, buflen - r, " %V", arg); + cur = g_list_next (cur); + } + + buf[r++] = CR; + buf[r++] = LF; + buf[r] = '\0'; + + return r; +} + +gboolean +smtp_upstream_write_socket (void *arg) +{ + struct smtp_session *session = arg; + + if (session->upstream_state == SMTP_STATE_IN_SENDFILE) { + session->upstream_state = SMTP_STATE_AFTER_DATA; + return rspamd_dispatcher_write (session->upstream_dispatcher, CRLF DATA_END_TRAILER, sizeof (CRLF DATA_END_TRAILER) - 1, FALSE, TRUE); + } + + return TRUE; +} + +gboolean +smtp_upstream_read_socket (f_str_t * in, void *arg) +{ + struct smtp_session *session = arg; + gchar outbuf[BUFSIZ]; + gint r; + + msg_debug ("in: %V, state: %d", in, session->upstream_state); + switch (session->upstream_state) { + case SMTP_STATE_GREETING: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + if (session->ctx->use_xclient) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "XCLIENT NAME=%s ADDR=%s" CRLF, + session->resolved ? session->hostname : "[UNDEFINED]", + inet_ntoa (session->client_addr)); + session->upstream_state = SMTP_STATE_HELO; + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + else { + session->upstream_state = SMTP_STATE_FROM; + if (session->helo) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, + session->esmtp ? "EHLO" : "HELO", + session->helo); + } + else { + return smtp_upstream_read_socket (in, arg); + } + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + } + break; + case SMTP_STATE_HELO: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + session->upstream_state = SMTP_STATE_FROM; + if (session->helo) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, + session->esmtp ? "EHLO" : "HELO", + session->helo); + } + else { + return smtp_upstream_read_socket (in, arg); + } + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + break; + case SMTP_STATE_FROM: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "MAIL FROM: "); + r += smtp_upstream_write_list (session->from, outbuf + r, sizeof (outbuf) - r); + session->upstream_state = SMTP_STATE_RCPT; + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + break; + case SMTP_STATE_RCPT: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: "); + session->cur_rcpt = g_list_first (session->rcpt); + r += smtp_upstream_write_list (session->cur_rcpt->data, outbuf + r, sizeof (outbuf) - r); + session->cur_rcpt = g_list_next (session->cur_rcpt); + session->upstream_state = SMTP_STATE_BEFORE_DATA; + return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); + } + break; + case SMTP_STATE_BEFORE_DATA: + r = check_smtp_ustream_reply (in, '2'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + if (session->cur_rcpt) { + session->rcpt = g_list_delete_link (session->rcpt, session->cur_rcpt); + } + else { + session->rcpt = g_list_delete_link (session->rcpt, session->rcpt); + } + session->errors ++; + session->state = SMTP_STATE_RCPT; + return TRUE; + } + else if (r == 1) { + if (session->cur_rcpt != NULL) { + r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: "); + r += smtp_upstream_write_list (session->cur_rcpt, outbuf + r, sizeof (outbuf) - r); + session->cur_rcpt = g_list_next (session->cur_rcpt); + if (! rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE)) { + goto err; + } + } + else { + session->upstream_state = SMTP_STATE_DATA; + rspamd_dispatcher_pause (session->upstream_dispatcher); + } + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* Write to client */ + if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + if (session->state == SMTP_STATE_WAIT_UPSTREAM) { + rspamd_dispatcher_restore (session->dispatcher); + session->state = SMTP_STATE_RCPT; + } + } + break; + case SMTP_STATE_DATA: + r = check_smtp_ustream_reply (in, '3'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else if (r == 1) { + if (! make_smtp_tempfile (session)) { + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + session->state = SMTP_STATE_AFTER_DATA; + session->error = SMTP_ERROR_DATA_OK; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + rspamd_dispatcher_pause (session->upstream_dispatcher); + rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_LINE, 0); + session->dispatcher->strip_eol = FALSE; + return TRUE; + } + break; + case SMTP_STATE_AFTER_DATA: + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + session->state = SMTP_STATE_DATA; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, sizeof ("QUIT" CRLF) - 1, FALSE, TRUE)) { + goto err; + } + session->upstream_state = SMTP_STATE_END; + return TRUE; + break; + case SMTP_STATE_END: + r = check_smtp_ustream_reply (in, '5'); + if (r == -1) { + session->error = rspamd_mempool_alloc (session->pool, in->len + 1); + rspamd_strlcpy (session->error, in->begin, in->len + 1); + /* XXX: assume upstream errors as critical errors */ + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (!rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + else { + remove_normal_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session); + } + return FALSE; + break; + default: + msg_err ("got upstream reply at unexpected state: %d, reply: %V", session->upstream_state, in); + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + + return TRUE; +err: + msg_warn ("write error occured"); + return FALSE; +} + +void +smtp_upstream_err_socket (GError *err, void *arg) +{ + struct smtp_session *session = arg; + + msg_info ("abnormally closing connection with upstream %s, error: %s", session->upstream->name, err->message); + session->error = SMTP_ERROR_UPSTREAM; + session->state = SMTP_STATE_CRITICAL_ERROR; + /* XXX: assume upstream errors as critical errors */ + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + return; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + return; + } + upstream_fail (&session->upstream->up, session->session_time); + destroy_session (session->s); +} + +void +smtp_upstream_finalize_connection (gpointer data) +{ + struct smtp_session *session = data; + + if (session->state != SMTP_STATE_CRITICAL_ERROR) { + if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, 0, FALSE, TRUE)) { + msg_warn ("cannot send correctly closing message to upstream"); + } + } + rspamd_remove_dispatcher (session->upstream_dispatcher); + session->upstream_dispatcher = NULL; + close (session->upstream_sock); + session->upstream_sock = -1; +} diff --git a/src/libmime/smtp_proto.h b/src/libmime/smtp_proto.h new file mode 100644 index 000000000..42fecd255 --- /dev/null +++ b/src/libmime/smtp_proto.h @@ -0,0 +1,95 @@ +#ifndef RSPAMD_SMTP_PROTO_H +#define RSPAMD_SMTP_PROTO_H + +#include "config.h" +#include "smtp.h" + +/* SMTP errors */ +#define SMTP_ERROR_BAD_COMMAND "500 Syntax error, command unrecognized" CRLF +#define SMTP_ERROR_BAD_ARGUMENTS "501 Syntax error in parameters or arguments" CRLF +#define SMTP_ERROR_SEQUENCE "503 Bad sequence of commands" CRLF +#define SMTP_ERROR_RECIPIENTS "554 No valid recipients" CRLF +#define SMTP_ERROR_UNIMPLIMENTED "502 Command not implemented" CRLF +#define SMTP_ERROR_LIMIT "505 Too many errors. Aborting." CRLF +#define SMTP_ERROR_UPSTREAM "421 Service not available, closing transmission channel" CRLF +#define SMTP_ERROR_FILE "420 Service not available, filesystem error" CRLF +#define SMTP_ERROR_OK "250 Requested mail action okay, completed" CRLF +#define SMTP_ERROR_DATA_OK "354 Start mail input; end with ." CRLF + +#define DATA_END_TRAILER "." CRLF + +#define XCLIENT_HOST_UNAVAILABLE "[UNAVAILABLE]" +#define XCLIENT_HOST_TEMPFAIL "[TEMPUNAVAIL]" + +#define MAX_SMTP_UPSTREAMS 128 + +struct smtp_command { + enum { + SMTP_COMMAND_HELO, + SMTP_COMMAND_EHLO, + SMTP_COMMAND_QUIT, + SMTP_COMMAND_NOOP, + SMTP_COMMAND_MAIL, + SMTP_COMMAND_RCPT, + SMTP_COMMAND_RSET, + SMTP_COMMAND_DATA, + SMTP_COMMAND_VRFY, + SMTP_COMMAND_EXPN, + SMTP_COMMAND_HELP + } command; + GList *args; +}; + +/* + * Generate SMTP error message + */ +gchar * make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...); + +/* + * Parse a single SMTP command + */ +gboolean parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd); + +/* + * Parse HELO command + */ +gboolean parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd); + +/* + * Parse MAIL command + */ +gboolean parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd); + +/* + * Parse RCPT command + */ +gboolean parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd); + +/* Upstream SMTP */ + +/* + * Read a line from SMTP upstream + */ +gboolean smtp_upstream_read_socket (f_str_t * in, void *arg); + +/* + * Write to SMTP upstream + */ +gboolean smtp_upstream_write_socket (void *arg); + +/* + * Error handler for SMTP upstream + */ +void smtp_upstream_err_socket (GError *err, void *arg); + +/* + * Terminate connection with upstream + */ +void smtp_upstream_finalize_connection (gpointer data); + +/* + * Write a list of strings to the upstream + */ +size_t smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen); + +#endif diff --git a/src/libmime/smtp_utils.c b/src/libmime/smtp_utils.c new file mode 100644 index 000000000..5178de9dd --- /dev/null +++ b/src/libmime/smtp_utils.c @@ -0,0 +1,362 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "filter.h" +#include "settings.h" +#include "smtp.h" +#include "smtp_proto.h" + +void +free_smtp_session (gpointer arg) +{ + struct smtp_session *session = arg; + + if (session) { + if (session->task) { + rspamd_task_free (session->task, FALSE); + if (session->task->msg->str) { + munmap (session->task->msg->str, session->task->msg->len); + } + } + if (session->rcpt) { + g_list_free (session->rcpt); + } + if (session->dispatcher) { + rspamd_remove_dispatcher (session->dispatcher); + } + close (session->sock); + if (session->temp_name != NULL) { + unlink (session->temp_name); + } + if (session->temp_fd != -1) { + close (session->temp_fd); + } + rspamd_mempool_delete (session->pool); + g_free (session); + } +} + +gboolean +create_smtp_upstream_connection (struct smtp_session *session) +{ + struct smtp_upstream *selected; + + /* Try to select upstream */ + selected = (struct smtp_upstream *)get_upstream_round_robin (session->ctx->upstreams, + session->ctx->upstream_num, sizeof (struct smtp_upstream), + session->session_time, DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS); + if (selected == NULL) { + msg_err ("no upstreams suitable found"); + return FALSE; + } + + session->upstream = selected; + + /* Now try to create socket */ + session->upstream_sock = make_universal_socket (selected->addr, selected->port, SOCK_STREAM, TRUE, FALSE, FALSE); + if (session->upstream_sock == -1) { + msg_err ("cannot make a connection to %s", selected->name); + upstream_fail (&selected->up, session->session_time); + return FALSE; + } + /* Create a dispatcher for upstream connection */ + session->upstream_dispatcher = rspamd_create_dispatcher (session->ev_base, session->upstream_sock, BUFFER_LINE, + smtp_upstream_read_socket, smtp_upstream_write_socket, smtp_upstream_err_socket, + &session->ctx->smtp_timeout, session); + session->state = SMTP_STATE_WAIT_UPSTREAM; + session->upstream_state = SMTP_STATE_GREETING; + register_async_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session, g_quark_from_static_string ("smtp proxy")); + return TRUE; +} + +gboolean +smtp_send_upstream_message (struct smtp_session *session) +{ + rspamd_dispatcher_pause (session->dispatcher); + rspamd_dispatcher_restore (session->upstream_dispatcher); + + session->upstream_state = SMTP_STATE_IN_SENDFILE; + session->state = SMTP_STATE_WAIT_UPSTREAM; + if (! rspamd_dispatcher_sendfile (session->upstream_dispatcher, session->temp_fd, session->temp_size)) { + msg_err ("sendfile failed: %s", strerror (errno)); + goto err; + } + return TRUE; + +err: + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + return FALSE; + } + destroy_session (session->s); + return FALSE; +} + +struct smtp_metric_callback_data { + struct smtp_session *session; + enum rspamd_metric_action action; + struct metric_result *res; + gchar *log_buf; + gint log_offset; + gint log_size; + gboolean alive; +}; + +static void +smtp_metric_symbols_callback (gpointer key, gpointer value, void *user_data) +{ + struct smtp_metric_callback_data *cd = user_data; + + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "%s,", (gchar *)key); +} + +static void +smtp_metric_callback (gpointer key, gpointer value, gpointer ud) +{ + struct smtp_metric_callback_data *cd = ud; + struct metric_result *metric_res = value; + enum rspamd_metric_action action = METRIC_ACTION_NOACTION; + double ms = 0, rs = 0; + gboolean is_spam = FALSE; + struct rspamd_task *task; + + task = cd->session->task; + + if (!check_metric_settings (metric_res, &ms, &rs)) { + ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + rs = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + } + if (! check_metric_action_settings (task, metric_res, metric_res->score, &action)) { + action = check_metric_action (metric_res->score, ms, metric_res->metric); + } + if (metric_res->score >= ms) { + is_spam = 1; + } + if (action < cd->action) { + cd->action = action; + cd->res = metric_res; + } + + if (!task->is_skipped) { + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (%s): [%.2f/%.2f/%.2f] [", + (gchar *)key, is_spam ? 'T' : 'F', str_action_metric (action), metric_res->score, ms, rs); + } + else { + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (default): [%.2f/%.2f/%.2f] [", + (gchar *)key, 'S', metric_res->score, ms, rs); + + } + g_hash_table_foreach (metric_res->symbols, smtp_metric_symbols_callback, cd); + /* Remove last , from log buf */ + if (cd->log_buf[cd->log_offset - 1] == ',') { + cd->log_buf[--cd->log_offset] = '\0'; + } + +#ifdef HAVE_CLOCK_GETTIME + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,", + task->msg->len, calculate_check_time (&task->tv, &task->ts, task->cfg->clock_res, &task->scan_milliseconds)); +#else + cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,", + task->msg->len, calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds)); +#endif +} + +gboolean +make_smtp_tempfile (struct smtp_session *session) +{ + gsize r; + + r = strlen (session->cfg->temp_dir) + sizeof ("/rspamd-XXXXXX"); + session->temp_name = rspamd_mempool_alloc (session->pool, r); + rspamd_snprintf (session->temp_name, r, "%s%crspamd-XXXXXX", session->cfg->temp_dir, G_DIR_SEPARATOR); +#ifdef HAVE_MKSTEMP + /* Umask is set before */ + session->temp_fd = mkstemp (session->temp_name); +#else + session->temp_fd = g_mkstemp_full (session->temp_name, O_RDWR, S_IWUSR | S_IRUSR); +#endif + if (session->temp_fd == -1) { + msg_err ("mkstemp error: %s", strerror (errno)); + + return FALSE; + } + + return TRUE; +} + +gboolean +write_smtp_reply (struct smtp_session *session) +{ + gchar logbuf[1024], *new_subject; + const gchar *old_subject; + struct smtp_metric_callback_data cd; + GMimeStream *stream; + gint old_fd, sublen; + + /* Check metrics */ + cd.session = session; + cd.action = METRIC_ACTION_NOACTION; + cd.res = NULL; + cd.log_buf = logbuf; + cd.log_offset = rspamd_snprintf (logbuf, sizeof (logbuf), "id: <%s>, qid: <%s>, ", + session->task->message_id, session->task->queue_id); + cd.log_size = sizeof (logbuf); + if (session->task->user) { + cd.log_offset += rspamd_snprintf (logbuf + cd.log_offset, sizeof (logbuf) - cd.log_offset, + "user: %s, ", session->task->user); + } + + g_hash_table_foreach (session->task->results, smtp_metric_callback, &cd); + + msg_info ("%s", logbuf); + + if (cd.action <= METRIC_ACTION_REJECT) { + if (! rspamd_dispatcher_write (session->dispatcher, session->ctx->reject_message, 0, FALSE, TRUE)) { + return FALSE; + } + if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { + return FALSE; + } + destroy_session (session->s); + return FALSE; + } + else if (cd.action <= METRIC_ACTION_ADD_HEADER || cd.action <= METRIC_ACTION_REWRITE_SUBJECT) { + old_fd = session->temp_fd; + if (! make_smtp_tempfile (session)) { + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + + if (cd.action <= METRIC_ACTION_REWRITE_SUBJECT) { + /* XXX: add this action */ + old_subject = g_mime_message_get_subject (session->task->message); + if (old_subject != NULL) { + sublen = strlen (old_subject) + sizeof (SPAM_SUBJECT); + new_subject = rspamd_mempool_alloc (session->pool, sublen); + rspamd_snprintf (new_subject, sublen, "%s%s", SPAM_SUBJECT, old_subject); + } + else { + new_subject = SPAM_SUBJECT; + } + g_mime_message_set_subject (session->task->message, new_subject); + } + else if (cd.action <= METRIC_ACTION_ADD_HEADER) { +#ifndef GMIME24 + g_mime_message_add_header (session->task->message, "X-Spam", "true"); +#else + g_mime_object_append_header (GMIME_OBJECT (session->task->message), "X-Spam", "true"); +#endif + } + stream = g_mime_stream_fs_new (session->temp_fd); + g_mime_stream_fs_set_owner (GMIME_STREAM_FS (stream), FALSE); + close (old_fd); + + if (g_mime_object_write_to_stream (GMIME_OBJECT (session->task->message), stream) == -1) { + msg_err ("cannot write MIME object to stream: %s", strerror (errno)); + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + rspamd_dispatcher_restore (session->dispatcher); + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + goto err; + } + destroy_session (session->s); + return FALSE; + } + g_object_unref (stream); + } + /* XXX: Add other actions */ + return smtp_send_upstream_message (session); +err: + session->error = SMTP_ERROR_FILE; + session->state = SMTP_STATE_CRITICAL_ERROR; + if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { + return FALSE; + } + destroy_session (session->s); + return FALSE; +} + +gboolean +parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count) +{ + gchar **strv, *p, *t, *tt, *err_str; + guint32 num, i; + struct smtp_upstream *cur; + gchar resolved_path[PATH_MAX]; + + strv = g_strsplit_set (line, ",; ", -1); + num = g_strv_length (strv); + + if (num >= MAX_SMTP_UPSTREAMS) { + msg_err ("cannot define %d upstreams %d is max", num, MAX_SMTP_UPSTREAMS); + return FALSE; + } + *count = 0; + + for (i = 0; i < num; i ++) { + p = strv[i]; + cur = &upstreams[*count]; + if ((t = strrchr (p, ':')) != NULL && (tt = strchr (p, ':')) != t) { + /* Assume that after last `:' we have weigth */ + *t = '\0'; + t ++; + errno = 0; + cur->up.priority = strtoul (t, &err_str, 10); + if (errno != 0 || (err_str && *err_str != '\0')) { + msg_err ("cannot convert weight: %s, %s", t, strerror (errno)); + g_strfreev (strv); + return FALSE; + } + } + if (*p == '/') { + cur->is_unix = TRUE; + if (realpath (p, resolved_path) == NULL) { + msg_err ("cannot resolve path: %s", resolved_path); + g_strfreev (strv); + return FALSE; + } + cur->name = rspamd_mempool_strdup (pool, resolved_path); + (*count) ++; + } + else { + if (! parse_host_port (pool, p, &cur->addr, &cur->port)) { + g_strfreev (strv); + return FALSE; + } + cur->name = rspamd_mempool_strdup (pool, p); + (*count) ++; + } + } + + g_strfreev (strv); + return TRUE; +} diff --git a/src/libmime/smtp_utils.h b/src/libmime/smtp_utils.h new file mode 100644 index 000000000..652b6759f --- /dev/null +++ b/src/libmime/smtp_utils.h @@ -0,0 +1,63 @@ +#ifndef SMTP_UTILS_H_ +#define SMTP_UTILS_H_ + +#include "config.h" +#include "main.h" +#include "smtp.h" + +/** + * @file smtp_utils.h + * Contains utilities for smtp protocol handling + */ + +struct smtp_upstream { + struct upstream up; + + const gchar *name; + gchar *addr; + guint16 port; + gboolean is_unix; +}; + +#define MAX_SMTP_UPSTREAMS 128 + +struct smtp_session; + +/** + * Send message to upstream + * @param session session object + */ +gboolean smtp_send_upstream_message (struct smtp_session *session); + +/** + * Create connection to upstream + * @param session session object + */ +gboolean create_smtp_upstream_connection (struct smtp_session *session); + +/** + * Create temporary file for smtp session + */ +gboolean make_smtp_tempfile (struct smtp_session *session); + +/** + * Write reply to upstream + * @param session session object + */ +gboolean write_smtp_reply (struct smtp_session *session); + +/** + * Frees smtp session object + */ +void free_smtp_session (gpointer arg); + +/** + * Parse upstreams line + * @param upstreams pointer to the array of upstreams (must be at least MAX_SMTP_UPSTREAMS size) + * @param line description line + * @param count targeted count + * @return + */ +gboolean parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count); + +#endif /* SMTP_UTILS_H_ */ diff --git a/src/libmime/worker_util.c b/src/libmime/worker_util.c new file mode 100644 index 000000000..d029f5dc4 --- /dev/null +++ b/src/libmime/worker_util.c @@ -0,0 +1,255 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "message.h" +#include "lua/lua_common.h" + +extern struct rspamd_main *rspamd_main; + +/** + * Return worker's control structure by its type + * @param type + * @return worker's control structure or NULL + */ +worker_t* +get_worker_by_type (GQuark type) +{ + worker_t **cur; + + cur = &workers[0]; + while (*cur) { + if (g_quark_from_string ((*cur)->name) == type) { + return *cur; + } + cur ++; + } + + return NULL; +} + +double +set_counter (const gchar *name, guint32 value) +{ + struct counter_data *cd; + double alpha; + gchar *key; + + cd = rspamd_hash_lookup (rspamd_main->counters, (gpointer) name); + + if (cd == NULL) { + cd = rspamd_mempool_alloc_shared (rspamd_main->counters->pool, sizeof (struct counter_data)); + cd->value = value; + cd->number = 0; + key = rspamd_mempool_strdup_shared (rspamd_main->counters->pool, name); + rspamd_hash_insert (rspamd_main->counters, (gpointer) key, (gpointer) cd); + } + else { + /* Calculate new value */ + rspamd_mempool_wlock_rwlock (rspamd_main->counters->lock); + + alpha = 2. / (++cd->number + 1); + cd->value = cd->value * (1. - alpha) + value * alpha; + + rspamd_mempool_wunlock_rwlock (rspamd_main->counters->lock); + } + + return cd->value; +} + +struct event_base * +prepare_worker (struct rspamd_worker *worker, const char *name, + rspamd_sig_handler_t sig_handler, + void (*accept_handler)(int, short, void *)) +{ + struct event_base *ev_base; + struct event *accept_event; + struct sigaction signals; + GList *cur; + gint listen_socket; + +#ifdef WITH_PROFILER + extern void _start (void), etext (void); + monstartup ((u_long) & _start, (u_long) & etext); +#endif + + gperf_profiler_init (worker->srv->cfg, name); + + worker->srv->pid = getpid (); + + ev_base = event_init (); + + init_signals (&signals, sig_handler); + sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL); + + /* Accept all sockets */ + cur = worker->cf->listen_socks; + while (cur) { + listen_socket = GPOINTER_TO_INT (cur->data); + if (listen_socket != -1) { + accept_event = g_slice_alloc0 (sizeof (struct event)); + event_set (accept_event, listen_socket, EV_READ | EV_PERSIST, + accept_handler, worker); + event_base_set (ev_base, accept_event); + event_add (accept_event, NULL); + worker->accept_events = g_list_prepend (worker->accept_events, accept_event); + } + cur = g_list_next (cur); + } + + return ev_base; +} + +void +worker_stop_accept (struct rspamd_worker *worker) +{ + GList *cur; + struct event *event; + + /* Remove all events */ + cur = worker->accept_events; + while (cur) { + event = cur->data; + event_del (event); + cur = g_list_next (cur); + g_slice_free1 (sizeof (struct event), event); + } + + if (worker->accept_events != NULL) { + g_list_free (worker->accept_events); + } +} + +/* + * Called if all filters are processed + * @return TRUE if session should be terminated + */ +gboolean +rspamd_task_fin (void *arg) +{ + struct rspamd_task *task = (struct rspamd_task *) arg; + gint r; + GError *err = NULL; + + /* Task is already finished or skipped */ + if (task->state == WRITE_REPLY) { + if (task->fin_callback) { + task->fin_callback (task->fin_arg); + } + else { + rspamd_protocol_write_reply (task); + } + return TRUE; + } + + /* We processed all filters and want to process statfiles */ + if (task->state != WAIT_POST_FILTER && task->state != WAIT_PRE_FILTER) { + /* Process all statfiles */ + if (task->classify_pool == NULL) { + /* Non-threaded version */ + process_statfiles (task); + } + else { + /* Just process composites */ + make_composites (task); + } + if (task->cfg->post_filters) { + /* More to process */ + /* Special state */ + task->state = WAIT_POST_FILTER; + return FALSE; + } + + } + + /* We are on post-filter waiting state */ + if (task->state != WAIT_PRE_FILTER) { + /* Check if we have all events finished */ + task->state = WRITE_REPLY; + if (task->fin_callback) { + task->fin_callback (task->fin_arg); + } + else { + rspamd_protocol_write_reply (task); + } + } + else { + /* We were waiting for pre-filter */ + if (task->pre_result.action != METRIC_ACTION_NOACTION) { + /* Write result based on pre filters */ + task->state = WRITE_REPLY; + if (task->fin_callback) { + task->fin_callback (task->fin_arg); + } + else { + rspamd_protocol_write_reply (task); + } + return TRUE; + } + else { + task->state = WAIT_FILTER; + r = process_filters (task); + if (r == -1) { + task->last_error = "Filter processing error"; + task->error_code = RSPAMD_FILTER_ERROR; + task->state = WRITE_REPLY; + rspamd_protocol_write_reply (task); + return TRUE; + } + /* Add task to classify to classify pool */ + if (!task->is_skipped && task->classify_pool) { + register_async_thread (task->s); + g_thread_pool_push (task->classify_pool, task, &err); + if (err != NULL) { + msg_err ("cannot pull task to the pool: %s", err->message); + remove_async_thread (task->s); + g_error_free (err); + } + } + if (task->is_skipped) { + rspamd_protocol_write_reply (task); + } + else { + return FALSE; + } + } + } + + return TRUE; +} + +/* + * Called if session was restored inside fin callback + */ +void +rspamd_task_restore (void *arg) +{ + struct rspamd_task *task = (struct rspamd_task *) arg; + + /* Call post filters */ + if (task->state == WAIT_POST_FILTER) { + lua_call_post_filters (task); + } + task->s->wanna_die = TRUE; +} diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt new file mode 100644 index 000000000..bd5df18b9 --- /dev/null +++ b/src/libserver/CMakeLists.txt @@ -0,0 +1,63 @@ +# Librspamdserver +SET(LIBRSPAMDSERVERSRC + binlog.c + buffer.c + cfg_utils.c + cfg_rcl.c + dkim.c + dns.c + dynamic_cfg.c + events.c + html.c + proxy.c + roll_history.c + settings.c + spf.c + statfile.c + statfile_sync.c + symbols_cache.c + task.c + url.c) +SET(TOKENIZERSSRC ../tokenizers/tokenizers.c + ../tokenizers/osb.c) + +SET(CLASSIFIERSSRC ../classifiers/classifiers.c + ../classifiers/bayes.c + ../classifiers/winnow.c) + +# Librspamd-server + +#IF(WITH_DB) +# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_bdb.c) +#ENDIF(WITH_DB) +#IF(WITH_SQLITE) +# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_sqlite.c) +#ENDIF(WITH_SQLITE) + +ADD_LIBRARY(rspamd-server ${LINK_TYPE} ${LIBRSPAMDSERVERSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC}) +IF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-server PROPERTIES VERSION ${RSPAMD_VERSION}) +ENDIF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-server PROPERTIES LINKER_LANGUAGE C COMPILE_FLAGS "-DRSPAMD_LIB") +TARGET_LINK_LIBRARIES(rspamd-server rspamd-lua) +TARGET_LINK_LIBRARIES(rspamd-server rspamd-json) +TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) +TARGET_LINK_LIBRARIES(rspamd-server rspamd-util) +TARGET_LINK_LIBRARIES(rspamd-server rdns) +IF(CMAKE_COMPILER_IS_GNUCC) +SET_TARGET_PROPERTIES(rspamd-server PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing") +ENDIF(CMAKE_COMPILER_IS_GNUCC) + +IF(WITH_DB) + TARGET_LINK_LIBRARIES(rspamd-server db) +ENDIF(WITH_DB) + +IF(OPENSSL_FOUND) + TARGET_LINK_LIBRARIES(rspamd-server ${OPENSSL_LIBRARIES}) +ENDIF(OPENSSL_FOUND) + +IF(NO_SHARED MATCHES "OFF") + INSTALL(TARGETS rspamd-server + LIBRARY DESTINATION ${LIBDIR} + PUBLIC_HEADER DESTINATION ${INCLUDEDIR}) +ENDIF(NO_SHARED MATCHES "OFF") \ No newline at end of file diff --git a/src/libserver/binlog.c b/src/libserver/binlog.c new file mode 100644 index 000000000..f085a7de0 --- /dev/null +++ b/src/libserver/binlog.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "binlog.h" +#include "cfg_file.h" +#include "tokenizers/tokenizers.h" + +#define BINLOG_SUFFIX ".binlog" +#define BACKUP_SUFFIX ".old" +#define VALID_MAGIC { 'r', 's', 'l' } +#define VALID_VERSION { '1', '0' } + +static GHashTable *binlog_opened = NULL; +static rspamd_mempool_t *binlog_pool = NULL; + +static gboolean +binlog_write_header (struct rspamd_binlog *log) +{ + struct rspamd_binlog_header header = { + .magic = VALID_MAGIC, + .version = VALID_VERSION, + .padding = { '\0', '\0' }, + }; + + header.create_time = time (NULL); + lock_file (log->fd, FALSE); + + if (write (log->fd, &header, sizeof (struct rspamd_binlog_header)) == -1) { + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + + memcpy (&log->header, &header, sizeof (struct rspamd_binlog_header)); + + /* Metaindex */ + log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex)); + bzero (log->metaindex, sizeof (struct rspamd_binlog_metaindex)); + /* Offset to metaindex */ + log->metaindex->indexes[0] = sizeof (struct rspamd_binlog_metaindex) + sizeof (struct rspamd_binlog_header); + + if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) { + g_free (log->metaindex); + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + unlock_file (log->fd, FALSE); + return FALSE; + } + + /* Alloc, write, mmap */ + log->cur_idx = g_malloc (sizeof (struct rspamd_index_block)); + bzero (log->cur_idx, sizeof (struct rspamd_index_block)); + if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { + g_free (log->cur_idx); + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + unlock_file (log->fd, FALSE); + return FALSE; + } + + unlock_file (log->fd, FALSE); + + return TRUE; +} + +static gboolean +binlog_check_file (struct rspamd_binlog *log) +{ + static gchar valid_magic[] = VALID_MAGIC, valid_version[] = VALID_VERSION; + + if (read (log->fd, &log->header, sizeof (struct rspamd_binlog_header)) != sizeof (struct rspamd_binlog_header)) { + msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + /* Now check all fields */ + if (memcmp (&log->header.magic, valid_magic, sizeof (valid_magic)) != 0 || + memcmp (&log->header.version, valid_version, sizeof (valid_version)) != 0) { + msg_warn ("cannot validate file %s"); + return FALSE; + } + /* Now mmap metaindex and current index */ + if (log->metaindex == NULL) { + log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex)); + } + if ((read (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex))) != sizeof (struct rspamd_binlog_metaindex)) { + msg_warn ("cannot read metaindex of file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + /* Current index */ + if (log->cur_idx == NULL) { + log->cur_idx = g_malloc (sizeof (struct rspamd_index_block)); + } + if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) { + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if ((read (log->fd, log->cur_idx, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) { + msg_warn ("cannot read index in file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + log->cur_seq = log->metaindex->last_index * BINLOG_IDX_LEN + log->cur_idx->last_index; + log->cur_time = log->cur_idx->indexes[log->cur_idx->last_index].time; + + return TRUE; + +} + +static gboolean +binlog_create (struct rspamd_binlog *log) +{ + if ((log->fd = open (log->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + return binlog_write_header (log); +} + +static gboolean +binlog_open_real (struct rspamd_binlog *log) +{ + if ((log->fd = open (log->filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + return binlog_check_file (log); +} + + +struct rspamd_binlog* +binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter) +{ + struct rspamd_binlog *new; + gint len = strlen (path); + struct stat st; + + new = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_binlog)); + new->pool = pool; + new->rotate_time = rotate_time; + new->fd = -1; + + if (rotate_time) { + new->rotate_jitter = g_random_int_range (0, rotate_jitter); + } + + new->filename = rspamd_mempool_alloc (pool, len + sizeof (BINLOG_SUFFIX)); + rspamd_strlcpy (new->filename, path, len + 1); + rspamd_strlcpy (new->filename + len, BINLOG_SUFFIX, sizeof (BINLOG_SUFFIX)); + + if (stat (new->filename, &st) == -1) { + /* Check errno to check whether we should create this file */ + if (errno != ENOENT) { + msg_err ("cannot stat file: %s, error %s", new->filename, strerror (errno)); + return NULL; + } + else { + /* In case of ENOENT try to create binlog */ + if (!binlog_create (new)) { + return NULL; + } + } + } + else { + /* Try to open binlog */ + if (!binlog_open_real (new)) { + return NULL; + } + } + + return new; +} + +void +binlog_close (struct rspamd_binlog *log) +{ + if (log) { + if (log->metaindex) { + g_free (log->metaindex); + } + if (log->cur_idx) { + g_free (log->cur_idx); + } + close (log->fd); + } +} + +static gboolean +binlog_tree_callback (gpointer key, gpointer value, gpointer data) +{ + token_node_t *node = key; + struct rspamd_binlog *log = data; + struct rspamd_binlog_element elt; + + elt.h1 = node->h1; + elt.h2 = node->h2; + elt.value = node->value; + + if (write (log->fd, &elt, sizeof (elt)) == -1) { + msg_info ("cannot write token to file: %s, error: %s", log->filename, strerror (errno)); + return TRUE; + } + + return FALSE; +} + +static gboolean +write_binlog_tree (struct rspamd_binlog *log, GTree *nodes) +{ + off_t seek; + struct rspamd_binlog_index *idx; + + lock_file (log->fd, FALSE); + log->cur_seq ++; + + /* Seek to end of file */ + if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + + /* Now write all nodes to file */ + g_tree_foreach (nodes, binlog_tree_callback, (gpointer)log); + + /* Write index */ + idx = &log->cur_idx->indexes[log->cur_idx->last_index]; + idx->seek = seek; + idx->time = (guint64)time (NULL); + log->cur_time = idx->time; + idx->len = g_tree_nnodes (nodes) * sizeof (struct rspamd_binlog_element); + if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s, seek: %L, op: insert index", log->filename, + strerror (errno), log->metaindex->indexes[log->metaindex->last_index]); + return FALSE; + } + log->cur_idx->last_index ++; + if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot write index to file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + + unlock_file (log->fd, FALSE); + + return TRUE; +} + +static gboolean +create_new_metaindex_block (struct rspamd_binlog *log) +{ + off_t seek; + + lock_file (log->fd, FALSE); + + log->metaindex->last_index ++; + /* Seek to end of file */ + if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { + unlock_file (log->fd, FALSE); + g_free (log->cur_idx); + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + /* Offset to metaindex */ + log->metaindex->indexes[log->metaindex->last_index] = seek; + /* Overwrite all metaindexes */ + if (lseek (log->fd, sizeof (struct rspamd_binlog_header), SEEK_SET) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot write metaindex in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + bzero (log->cur_idx, sizeof (struct rspamd_index_block)); + unlock_file (log->fd, FALSE); + + return TRUE; +} + +static gboolean +maybe_rotate_binlog (struct rspamd_binlog *log) +{ + guint64 now = time (NULL); + + if (log->rotate_time && ((now - log->header.create_time) > (guint)(log->rotate_time + log->rotate_jitter))) { + return TRUE; + } + return FALSE; +} + +static gboolean +rotate_binlog (struct rspamd_binlog *log) +{ + gchar *backup_name; + struct stat st; + + lock_file (log->fd, FALSE); + + /* Unmap mapped fragments */ + if (log->metaindex) { + g_free (log->metaindex); + log->metaindex = NULL; + } + if (log->cur_idx) { + g_free (log->cur_idx); + log->cur_idx = NULL; + } + /* Format backup name */ + backup_name = g_strdup_printf ("%s.%s", log->filename, BACKUP_SUFFIX); + + if (stat (backup_name, &st) != -1) { + msg_info ("replace old %s", backup_name); + unlink (backup_name); + } + + rename (log->filename, backup_name); + g_free (backup_name); + + /* XXX: maybe race condition here */ + unlock_file (log->fd, FALSE); + close (log->fd); + + return binlog_create (log); + +} + +gboolean +binlog_insert (struct rspamd_binlog *log, GTree *nodes) +{ + off_t seek; + + if (!log || !log->metaindex || !log->cur_idx || !nodes) { + msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown"); + return FALSE; + } + + if (maybe_rotate_binlog (log)) { + if (!rotate_binlog (log)) { + return FALSE; + } + } + /* First of all try to place new tokens in current index */ + if (log->cur_idx->last_index < BINLOG_IDX_LEN) { + /* All is ok */ + return write_binlog_tree (log, nodes); + } + /* Current index table is all busy, try to allocate new index */ + + /* Check metaindex free space */ + if (log->metaindex->last_index < METAINDEX_LEN) { + /* Create new index block */ + if ((seek = lseek (log->fd, 0, SEEK_END)) == (off_t)-1) { + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if (!create_new_metaindex_block (log)) { + return FALSE; + } + return write_binlog_tree (log, nodes); + } + + /* All binlog is filled, we need to rotate it forcefully */ + if (!rotate_binlog (log)) { + return FALSE; + } + + return write_binlog_tree (log, nodes); +} + +gboolean +binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep) +{ + guint32 metaindex_num; + struct rspamd_index_block *idxb; + struct rspamd_binlog_index *idx; + gboolean idx_mapped = FALSE, res = TRUE, is_first = FALSE; + + if (!log || !log->metaindex || !log->cur_idx) { + msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown"); + return FALSE; + } + + if (*rep == NULL) { + *rep = g_malloc (sizeof (GByteArray)); + is_first = TRUE; + } + else { + /* Unmap old fragment */ + g_free ((*rep)->data); + } + + if (from_rev == log->cur_seq) { + /* Last record */ + *rep = NULL; + return FALSE; + } + else if (from_rev > log->cur_seq) { + /* Slave has more actual copy, write this to log and abort sync */ + msg_warn ("slave has more recent revision of statfile %s: %uL and our is: %uL", log->filename, from_rev, log->cur_seq); + *rep = NULL; + *from_time = 0; + return FALSE; + } + + metaindex_num = from_rev / BINLOG_IDX_LEN; + /* First of all try to find this revision */ + if (metaindex_num > log->metaindex->last_index) { + return FALSE; + } + else if (metaindex_num != log->metaindex->last_index) { + /* Need to remap index block */ + lock_file (log->fd, FALSE); + idxb = g_malloc (sizeof (struct rspamd_index_block)); + idx_mapped = TRUE; + if (lseek (log->fd, log->metaindex->indexes[metaindex_num], SEEK_SET) == -1) { + unlock_file (log->fd, FALSE); + msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + if ((read (log->fd, idxb, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) { + unlock_file (log->fd, FALSE); + msg_warn ("cannot read index from file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + unlock_file (log->fd, FALSE); + } + else { + idxb = log->cur_idx; + } + /* Now check specified index */ + idx = &idxb->indexes[from_rev % BINLOG_IDX_LEN]; + if (is_first && idx->time != *from_time) { + res = FALSE; + *from_time = 0; + goto end; + } + else { + *from_time = idx->time; + } + + /* Now fill reply structure */ + (*rep)->len = idx->len; + /* Read result */ + msg_info ("update from binlog '%s' from revision: %uL to revision %uL size is %uL", + log->filename, from_rev, log->cur_seq, idx->len); + if (lseek (log->fd, idx->seek, SEEK_SET) == -1) { + msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + + (*rep)->data = g_malloc (idx->len); + if ((read (log->fd, (*rep)->data, idx->len)) != (ssize_t)idx->len) { + msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + +end: + if (idx_mapped) { + g_free (idxb); + } + + return res; +} + +static gboolean +maybe_init_static (void) +{ + if (!binlog_opened) { + binlog_opened = g_hash_table_new (g_direct_hash, g_direct_equal); + if (!binlog_opened) { + return FALSE; + } + } + + if (!binlog_pool) { + binlog_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + if (!binlog_pool) { + return FALSE; + } + } + + return TRUE; +} + +gboolean +maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes) +{ + struct rspamd_binlog *log; + + if (ccf == NULL) { + return FALSE; + } + + + if (st == NULL || nodes == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) { + return FALSE; + } + + if (!maybe_init_static ()) { + return FALSE; + } + + if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) { + if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) { + g_hash_table_insert (binlog_opened, st, log); + } + else { + return FALSE; + } + } + + if (binlog_insert (log, nodes)) { + msg_info ("set new revision of statfile %s: %uL", st->symbol, log->cur_seq); + (void)statfile_set_revision (file, log->cur_seq, log->cur_time); + return TRUE; + } + + return FALSE; +} + +struct rspamd_binlog* +get_binlog_by_statfile (struct statfile *st) +{ + struct rspamd_binlog *log; + + if (st == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) { + return NULL; + } + + if (!maybe_init_static ()) { + return NULL; + } + + if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) { + if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) { + g_hash_table_insert (binlog_opened, st, log); + } + else { + return NULL; + } + } + + return log; +} diff --git a/src/libserver/binlog.h b/src/libserver/binlog.h new file mode 100644 index 000000000..9e1a786d3 --- /dev/null +++ b/src/libserver/binlog.h @@ -0,0 +1,93 @@ +#ifndef RSPAMD_BINLOG_H +#define RSPAMD_BINLOG_H + +#include "config.h" +#include "main.h" +#include "statfile.h" + +/* How much records are in a single index */ +#define BINLOG_IDX_LEN 200 +#define METAINDEX_LEN 1024 + +/* Assume 8 bytes words */ +struct rspamd_binlog_header { + gchar magic[3]; + gchar version[2]; + gchar padding[3]; + guint64 create_time; +}; + +struct rspamd_binlog_index { + guint64 time; + guint64 seek; + guint32 len; +}; + +struct rspamd_index_block { + struct rspamd_binlog_index indexes[BINLOG_IDX_LEN]; + guint32 last_index; +}; + +struct rspamd_binlog_metaindex { + guint64 indexes[METAINDEX_LEN]; + guint64 last_index; +}; + +struct rspamd_binlog_element { + guint32 h1; + guint32 h2; + float value; +} __attribute__((__packed__)); + +struct rspamd_binlog { + gchar *filename; + time_t rotate_time; + gint rotate_jitter; + guint64 cur_seq; + guint64 cur_time; + gint fd; + rspamd_mempool_t *pool; + + struct rspamd_binlog_header header; + struct rspamd_binlog_metaindex *metaindex; + struct rspamd_index_block *cur_idx; +}; + +struct classifier_config; + +/* + * Open binlog at specified path with specified rotate params + */ +struct rspamd_binlog* binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter); + +/* + * Get and open binlog for specified statfile + */ +struct rspamd_binlog* get_binlog_by_statfile (struct statfile *st); + +/* + * Close binlog + */ +void binlog_close (struct rspamd_binlog *log); + +/* + * Insert new nodes inside binlog + */ +gboolean binlog_insert (struct rspamd_binlog *log, GTree *nodes); + +/* + * Sync binlog from specified revision + * @param log binlog structure + * @param from_rev from revision + * @param from_time from time + * @param rep a portion of changes for revision is stored here + * @return TRUE if there are more revisions to get and FALSE if synchronization is complete + */ +gboolean binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep); + +/* + * Conditional write to a binlog for specified statfile + */ +gboolean maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes); + +#endif diff --git a/src/libserver/buffer.c b/src/libserver/buffer.c new file mode 100644 index 000000000..864f2fad6 --- /dev/null +++ b/src/libserver/buffer.c @@ -0,0 +1,786 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "buffer.h" +#include "main.h" +#ifdef HAVE_SYS_SENDFILE_H +#include +#endif + +#define G_DISPATCHER_ERROR dispatcher_error_quark() +#define debug_ip(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__) + +static void dispatcher_cb (gint fd, short what, void *arg); + +static inline GQuark +dispatcher_error_quark (void) +{ + return g_quark_from_static_string ("g-dispatcher-error-quark"); +} + +static gboolean +sendfile_callback (rspamd_io_dispatcher_t *d) +{ + + GError *err; + +#ifdef HAVE_SENDFILE +# if defined(FREEBSD) || defined(DARWIN) + off_t off = 0; + #if defined(FREEBSD) + /* FreeBSD version */ + if (sendfile (d->sendfile_fd, d->fd, d->offset, 0, NULL, &off, 0) != 0) { + #elif defined(DARWIN) + /* Darwin version */ + if (sendfile (d->sendfile_fd, d->fd, d->offset, &off, NULL, 0) != 0) { + #endif + if (errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else { + debug_ip("partially write data, retry"); + /* Wait for other event */ + d->offset += off; + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + } + else { + if (d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + event_del (d->ev); + event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->in_sendfile = FALSE; + } +# else + ssize_t r; + /* Linux version */ + r = sendfile (d->fd, d->sendfile_fd, &d->offset, d->file_size); + if (r == -1) { + if (errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else { + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + } + else if (r + d->offset < (ssize_t)d->file_size) { + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + else { + if (d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + event_del (d->ev); + event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->in_sendfile = FALSE; + } +# endif +#else + ssize_t r; + r = write (d->fd, d->map, d->file_size - d->offset); + if (r == -1) { + if (errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else { + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + } + else if (r + d->offset < d->file_size) { + d->offset += r; + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + else { + if (d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + event_del (d->ev); + event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->in_sendfile = FALSE; + } +#endif + return TRUE; +} + +#define BUFREMAIN(x) (x)->data->size - ((x)->pos - (x)->data->begin) + +#define APPEND_OUT_BUFFER(d, buf) do { \ + DL_APPEND((d)->out_buffers.buffers, buf); \ + (d)->out_buffers.pending ++; \ + } while (0) +#define DELETE_OUT_BUFFER(d, buf) do { \ + DL_DELETE((d)->out_buffers.buffers, (buf)); \ + g_string_free((buf->data), (buf)->allocated); \ + g_slice_free1(sizeof (struct rspamd_out_buffer_s), (buf)); \ + (d)->out_buffers.pending --; \ + } while (0) + +static gboolean +write_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean is_delayed) +{ + GError *err = NULL; + struct rspamd_out_buffer_s *cur = NULL, *tmp; + ssize_t r; + struct iovec *iov; + guint i, len; + + len = d->out_buffers.pending; + while (len > 0) { + /* Unset delayed as actually we HAVE buffers to write */ + is_delayed = TRUE; + iov = g_slice_alloc (len * sizeof (struct iovec)); + i = 0; + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + iov[i].iov_base = cur->data->str; + iov[i].iov_len = cur->data->len; + i ++; + } + /* Now try to write the whole vector */ + r = writev (fd, iov, len); + if (r == -1 && errno != EAGAIN) { + g_slice_free1 (len * sizeof (struct iovec), iov); + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else if (r > 0) { + /* Find pos inside buffers */ + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + if (r >= (ssize_t)cur->data->len) { + /* Mark this buffer as read */ + r -= cur->data->len; + DELETE_OUT_BUFFER (d, cur); + } + else { + /* This buffer was not written completely */ + g_string_erase (cur->data, 0, r); + break; + } + } + g_slice_free1 (len * sizeof (struct iovec), iov); + if (d->out_buffers.pending > 0) { + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + return TRUE; + } + } + else if (r == 0) { + /* Got EOF while we wait for data */ + g_slice_free1 (len * sizeof (struct iovec), iov); + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else if (r == -1 && errno == EAGAIN) { + g_slice_free1 (len * sizeof (struct iovec), iov); + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + return TRUE; + } + len = d->out_buffers.pending; + } + + if (d->out_buffers.pending == 0) { + /* Disable write event for this time */ + + debug_ip ("all buffers were written successfully"); + + if (is_delayed && d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + + event_del (d->ev); + event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + else { + /* Plan other write event */ + event_del (d->ev); + event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + + return TRUE; +} + +static void +read_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean skip_read) +{ + ssize_t r; + GError *err = NULL; + f_str_t res; + gchar *c, *b; + gchar *end; + size_t len; + enum io_policy saved_policy; + + if (d->wanna_die) { + rspamd_remove_dispatcher (d); + return; + } + + if (d->in_buf == NULL) { + d->in_buf = rspamd_mempool_alloc_tmp (d->pool, sizeof (rspamd_buffer_t)); + if (d->policy == BUFFER_LINE || d->policy == BUFFER_ANY) { + d->in_buf->data = fstralloc_tmp (d->pool, d->default_buf_size); + } + else { + d->in_buf->data = fstralloc_tmp (d->pool, d->nchars + 1); + } + d->in_buf->pos = d->in_buf->data->begin; + } + + end = d->in_buf->pos; + len = d->in_buf->data->len; + + if (BUFREMAIN (d->in_buf) == 0) { + /* Buffer is full, try to call callback with overflow error */ + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, E2BIG, "buffer overflow"); + d->err_callback (err, d->user_data); + return; + } + } + else if (!skip_read) { + /* Try to read the whole buffer */ + r = read (fd, end, BUFREMAIN (d->in_buf)); + if (r == -1 && errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return; + } + } + else if (r == 0) { + /* Got EOF while we wait for data */ +#if 0 + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return; + } +#endif + /* Read returned 0, it may be shutdown or full quit */ + if (!d->want_read) { + d->half_closed = TRUE; + /* Do not expect any read after this */ + event_del (d->ev); + } + else { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return; + } + } + } + else if (r == -1 && errno == EAGAIN) { + debug_ip("partially read data, retry"); + return; + } + else { + /* Set current position in buffer */ + d->in_buf->pos += r; + d->in_buf->data->len += r; + } + debug_ip("read %z characters, policy is %s, watermark is: %z, buffer has %z bytes", r, + d->policy == BUFFER_LINE ? "LINE" : "CHARACTER", d->nchars, d->in_buf->data->len); + } + + saved_policy = d->policy; + c = d->in_buf->data->begin; + end = d->in_buf->pos; + len = d->in_buf->data->len; + b = c; + r = 0; + + switch (d->policy) { + case BUFFER_LINE: + /** Variables: + * b - begin of line + * r - current position in buffer + * *len - length of remaining buffer + * c - pointer to current position (buffer->begin + r) + * res - result string + */ + while (r < (ssize_t)len) { + if (*c == '\n') { + res.begin = b; + res.len = c - b; + /* Strip EOL */ + if (d->strip_eol) { + if (r != 0 && *(c - 1) == '\r') { + res.len--; + } + } + else { + /* Include EOL in reply */ + res.len ++; + } + /* Call callback for a line */ + if (d->read_callback) { + if (!d->read_callback (&res, d->user_data)) { + return; + } + if (d->policy != saved_policy) { + /* Drain buffer as policy is changed */ + /* Note that d->in_buffer is other pointer now, so we need to reinit all pointers */ + /* First detect how much symbols do we have */ + if (end == c) { + /* In fact we read the whole buffer and change input policy, so just set current pos to begin of buffer */ + d->in_buf->pos = d->in_buf->data->begin; + d->in_buf->data->len = 0; + } + else { + /* Otherwise we need to move buffer */ + /* Reinit pointers */ + len = d->in_buf->data->len - r - 1; + end = d->in_buf->data->begin + r + 1; + memmove (d->in_buf->data->begin, end, len); + d->in_buf->data->len = len; + d->in_buf->pos = d->in_buf->data->begin + len; + /* Process remaining buffer */ + read_buffers (fd, d, TRUE); + } + return; + } + } + /* Set new begin of line */ + b = c + 1; + } + r++; + c++; + } + /* Now drain remaining characters in buffer */ + memmove (d->in_buf->data->begin, b, c - b); + d->in_buf->data->len = c - b; + d->in_buf->pos = d->in_buf->data->begin + (c - b); + break; + case BUFFER_CHARACTER: + r = d->nchars; + if ((ssize_t)len >= r) { + res.begin = b; + res.len = r; + c = b + r; + if (d->read_callback) { + if (!d->read_callback (&res, d->user_data)) { + return; + } + /* Move remaining string to begin of buffer (draining) */ + if ((ssize_t)len > r) { + len -= r; + memmove (d->in_buf->data->begin, c, len); + d->in_buf->data->len = len; + d->in_buf->pos = d->in_buf->data->begin + len; + b = d->in_buf->data->begin; + } + else { + d->in_buf->data->len = 0; + d->in_buf->pos = d->in_buf->data->begin; + } + if (d->policy != saved_policy && (ssize_t)len != r) { + debug_ip("policy changed during callback, restart buffer's processing"); + read_buffers (fd, d, TRUE); + return; + } + } + } + break; + case BUFFER_ANY: + res.begin = d->in_buf->data->begin; + res.len = len; + + if (d->read_callback) { + /* + * Actually we do not want to send zero sized + * buffers to a read callback + */ + if (! (d->want_read && res.len == 0)) { + if (!d->read_callback (&res, d->user_data)) { + return; + } + } + if (d->policy != saved_policy) { + debug_ip("policy changed during callback, restart buffer's processing"); + read_buffers (fd, d, TRUE); + return; + } + } + d->in_buf->pos = d->in_buf->data->begin; + d->in_buf->data->len = 0; + break; + } +} + +#undef BUFREMAIN + +static void +dispatcher_cb (gint fd, short what, void *arg) +{ + rspamd_io_dispatcher_t *d = (rspamd_io_dispatcher_t *) arg; + GError *err = NULL; + + debug_ip("in dispatcher callback, what: %d, fd: %d", (gint)what, fd); + + if ((what & EV_TIMEOUT) != 0) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, ETIMEDOUT, "IO timeout"); + d->err_callback (err, d->user_data); + } + } + else if ((what & EV_READ) != 0) { + read_buffers (fd, d, FALSE); + } + else if ((what & EV_WRITE) != 0) { + /* No data to write, disable further EV_WRITE to this fd */ + if (d->in_sendfile) { + sendfile_callback (d); + } + else { + if (d->out_buffers.pending == 0) { + if (d->half_closed && !d->is_restored) { + /* Socket is half closed and there is nothing more to write, closing connection */ + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return; + } + } + else { + /* Want read again */ + event_del (d->ev); + event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + if (d->is_restored && d->write_callback) { + if (!d->write_callback (d->user_data)) { + return; + } + d->is_restored = FALSE; + } + } + } + else { + /* Delayed write */ + write_buffers (fd, d, TRUE); + } + } + } +} + + +rspamd_io_dispatcher_t * +rspamd_create_dispatcher (struct event_base *base, gint fd, enum io_policy policy, + dispatcher_read_callback_t read_cb, dispatcher_write_callback_t write_cb, dispatcher_err_callback_t err_cb, struct timeval *tv, void *user_data) +{ + rspamd_io_dispatcher_t *new; + + if (fd == -1) { + return NULL; + } + + new = g_slice_alloc0 (sizeof (rspamd_io_dispatcher_t)); + + new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + if (tv != NULL) { + new->tv = rspamd_mempool_alloc (new->pool, sizeof (struct timeval)); + memcpy (new->tv, tv, sizeof (struct timeval)); + } + else { + new->tv = NULL; + } + new->nchars = 0; + new->in_sendfile = FALSE; + new->policy = policy; + new->read_callback = read_cb; + new->write_callback = write_cb; + new->err_callback = err_cb; + new->user_data = user_data; + new->strip_eol = TRUE; + new->half_closed = FALSE; + new->want_read = TRUE; + new->is_restored = FALSE; + new->default_buf_size = sysconf (_SC_PAGESIZE); + + new->ev = rspamd_mempool_alloc0 (new->pool, sizeof (struct event)); + new->fd = fd; + new->ev_base = base; + + event_set (new->ev, fd, EV_WRITE, dispatcher_cb, (void *)new); + event_base_set (new->ev_base, new->ev); + event_add (new->ev, new->tv); + + return new; +} + +void +rspamd_remove_dispatcher (rspamd_io_dispatcher_t * d) +{ + struct rspamd_out_buffer_s *cur, *tmp; + + if (d != NULL) { + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + DELETE_OUT_BUFFER (d, cur); + } + event_del (d->ev); + rspamd_mempool_delete (d->pool); + g_slice_free1 (sizeof (rspamd_io_dispatcher_t), d); + } +} + +void +rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t * d, enum io_policy policy, size_t nchars) +{ + f_str_t *tmp; + gint t; + + if (d->policy != policy || nchars != d->nchars) { + d->policy = policy; + d->nchars = nchars ? nchars : d->default_buf_size; + /* Resize input buffer if needed */ + if (policy == BUFFER_CHARACTER && nchars != 0) { + if (d->in_buf && d->in_buf->data->size < nchars) { + tmp = fstralloc_tmp (d->pool, d->nchars + 1); + memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len); + t = d->in_buf->pos - d->in_buf->data->begin; + tmp->len = d->in_buf->data->len; + d->in_buf->data = tmp; + d->in_buf->pos = d->in_buf->data->begin + t; + } + } + else if (policy == BUFFER_LINE || policy == BUFFER_ANY) { + if (d->in_buf && d->nchars < d->default_buf_size) { + tmp = fstralloc_tmp (d->pool, d->default_buf_size); + memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len); + t = d->in_buf->pos - d->in_buf->data->begin; + tmp->len = d->in_buf->data->len; + d->in_buf->data = tmp; + d->in_buf->pos = d->in_buf->data->begin + t; + } + d->strip_eol = TRUE; + } + } + + debug_ip("new input length watermark is %uz", d->nchars); +} + +gboolean +rspamd_dispatcher_write (rspamd_io_dispatcher_t * d, + const void *data, size_t len, gboolean delayed, gboolean allocated) +{ + struct rspamd_out_buffer_s *newbuf; + + newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s)); + if (len == 0) { + /* Assume NULL terminated */ + len = strlen ((const gchar *)data); + } + + if (!allocated) { + newbuf->data = g_string_new_len (data, len); + newbuf->allocated = TRUE; + } + else { + newbuf->data = g_string_new (NULL); + newbuf->data->str = (gchar *)data; + newbuf->data->len = len; + newbuf->data->allocated_len = len; + newbuf->allocated = FALSE; + } + + APPEND_OUT_BUFFER (d, newbuf); + + if (!delayed) { + debug_ip("plan write event"); + return write_buffers (d->fd, d, FALSE); + } + /* Otherwise plan write event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + + return TRUE; +} + +gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d, + GString *str, + gboolean delayed, + gboolean free_on_write) +{ + struct rspamd_out_buffer_s *newbuf; + + newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s)); + newbuf->data = str; + newbuf->allocated = free_on_write; + + APPEND_OUT_BUFFER (d, newbuf); + + if (!delayed) { + debug_ip("plan write event"); + return write_buffers (d->fd, d, FALSE); + } + /* Otherwise plan write event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + + return TRUE; +} + +gboolean +rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len) +{ + if (lseek (fd, 0, SEEK_SET) == -1) { + msg_warn ("lseek failed: %s", strerror (errno)); + return FALSE; + } + + d->offset = 0; + d->in_sendfile = TRUE; + d->sendfile_fd = fd; + d->file_size = len; + +#ifndef HAVE_SENDFILE + #ifdef HAVE_MMAP_NOCORE + if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED | MAP_NOCORE, fd, 0)) == MAP_FAILED) { + #else + if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + #endif + msg_warn ("mmap failed: %s", strerror (errno)); + return FALSE; + } +#endif + + return sendfile_callback (d); +} + +void +rspamd_dispatcher_pause (rspamd_io_dispatcher_t * d) +{ + debug_ip ("paused dispatcher"); + event_del (d->ev); + d->is_restored = FALSE; +} + +void +rspamd_dispatcher_restore (rspamd_io_dispatcher_t * d) +{ + if (!d->is_restored) { + debug_ip ("restored dispatcher"); + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->is_restored = TRUE; + } +} + +void +rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *d) +{ + struct rspamd_out_buffer_s *cur, *tmp; + + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + DELETE_OUT_BUFFER (d, cur); + } + /* Cleanup temporary data */ + rspamd_mempool_cleanup_tmp (d->pool); + d->in_buf = NULL; +} + +#undef debug_ip + +/* + * vi:ts=4 + */ diff --git a/src/libserver/buffer.h b/src/libserver/buffer.h new file mode 100644 index 000000000..5ed42bfb3 --- /dev/null +++ b/src/libserver/buffer.h @@ -0,0 +1,158 @@ +/** + * @file buffer.h + * Implements buffered IO + */ + +#ifndef RSPAMD_BUFFER_H +#define RSPAMD_BUFFER_H + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" + +typedef gboolean (*dispatcher_read_callback_t)(f_str_t *in, void *user_data); +typedef gboolean (*dispatcher_write_callback_t)(void *user_data); +typedef void (*dispatcher_err_callback_t)(GError *err, void *user_data); + +/** + * Types of IO handling + */ +enum io_policy { + BUFFER_LINE, /**< call handler when we have line ready */ + BUFFER_CHARACTER, /**< call handler when we have some characters */ + BUFFER_ANY /**< call handler whenever we got data in buffer */ +}; + +/** + * Buffer structure + */ +typedef struct rspamd_buffer_s { + f_str_t *data; /**< buffer logic */ + gchar *pos; /**< current position */ +} rspamd_buffer_t; + +struct rspamd_out_buffer_s { + GString *data; + gboolean allocated; + struct rspamd_out_buffer_s *prev, *next; +}; + +typedef struct rspamd_io_dispatcher_s { + rspamd_buffer_t *in_buf; /**< input buffer */ + struct { + guint pending; + struct rspamd_out_buffer_s *buffers; + } out_buffers; /**< output buffers chain */ + struct timeval *tv; /**< io timeout */ + struct event *ev; /**< libevent io event */ + rspamd_mempool_t *pool; /**< where to store data */ + enum io_policy policy; /**< IO policy */ + size_t nchars; /**< how many chars to read */ + gint fd; /**< descriptor */ + guint32 peer_addr; /**< address of peer for debugging */ + gboolean wanna_die; /**< if dispatcher should be stopped */ + dispatcher_read_callback_t read_callback; /**< read callback */ + dispatcher_write_callback_t write_callback; /**< write callback */ + dispatcher_err_callback_t err_callback; /**< error callback */ + void *user_data; /**< user's data for callbacks */ + gulong default_buf_size; /**< default size for buffering */ + off_t offset; /**< for sendfile use */ + size_t file_size; + gint sendfile_fd; + gboolean in_sendfile; /**< whether buffer is in sendfile mode */ + gboolean strip_eol; /**< strip or not line ends in BUFFER_LINE policy */ + gboolean is_restored; /**< call a callback when dispatcher is restored */ + gboolean half_closed; /**< connection is half closed */ + gboolean want_read; /**< whether we want to read more data */ + struct event_base *ev_base; /**< event base for io operations */ +#ifndef HAVE_SENDFILE + void *map; +#endif +} rspamd_io_dispatcher_t; + +/** + * Creates rspamd IO dispatcher for specified descriptor + * @param fd descriptor to IO + * @param policy IO policy + * @param read_cb read callback handler + * @param write_cb write callback handler + * @param err_cb error callback handler + * @param tv IO timeout + * @param user_data pointer to user's data + * @return new dispatcher object or NULL in case of failure + */ +rspamd_io_dispatcher_t* rspamd_create_dispatcher (struct event_base *base, gint fd, + enum io_policy policy, + dispatcher_read_callback_t read_cb, + dispatcher_write_callback_t write_cb, + dispatcher_err_callback_t err_cb, + struct timeval *tv, + void *user_data); + +/** + * Set new policy for dispatcher + * @param d pointer to dispatcher's object + * @param policy IO policy + * @param nchars number of characters in buffer for character policy + */ +void rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t *d, + enum io_policy policy, + size_t nchars); + +/** + * Write data when it would be possible + * @param d pointer to dispatcher's object + * @param data data to write + * @param len length of data + */ +gboolean rspamd_dispatcher_write (rspamd_io_dispatcher_t *d, + const void *data, + size_t len, gboolean delayed, + gboolean allocated) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Write a GString to dispatcher + * @param d dipatcher object + * @param str string to write + * @param delayed delay write + * @param free_on_write free string after writing to a socket + * @return TRUE if write has been queued successfully + */ +gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d, + GString *str, + gboolean delayed, + gboolean free_on_write) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Send specified descriptor to dispatcher + * @param d pointer to dispatcher's object + * @param fd descriptor of file + * @param len length of data + */ +gboolean rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Pause IO events on dispatcher + * @param d pointer to dispatcher's object + */ +void rspamd_dispatcher_pause (rspamd_io_dispatcher_t *d); + +/** + * Restore IO events on dispatcher + * @param d pointer to dispatcher's object + */ +void rspamd_dispatcher_restore (rspamd_io_dispatcher_t *d); + +/** + * Frees dispatcher object + * @param dispatcher pointer to dispatcher's object + */ +void rspamd_remove_dispatcher (rspamd_io_dispatcher_t *dispatcher); + +/** + * Cleanup dispatcher freeing all temporary data + * @param dispatcher pointer to dispatcher's object + */ +void rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *dispatcher); + +#endif diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h new file mode 100644 index 000000000..6ecb441fd --- /dev/null +++ b/src/libserver/cfg_file.h @@ -0,0 +1,516 @@ +/** + * @file cfg_file.h + * Config file parser and config routines API + */ + +#ifndef CFG_FILE_H +#define CFG_FILE_H + +#include "config.h" +#include "mem_pool.h" +#include "upstream.h" +#include "memcached.h" +#include "symbols_cache.h" +#include "cfg_rcl.h" +#include "utlist.h" +#include "ucl.h" + +#define DEFAULT_BIND_PORT 11333 +#define DEFAULT_CONTROL_PORT 11334 +#define MAX_MEMCACHED_SERVERS 4 +#define DEFAULT_MEMCACHED_PORT 11211 +/* Memcached timeouts */ +#define DEFAULT_MEMCACHED_CONNECT_TIMEOUT 1000 +/* Upstream timeouts */ +#define DEFAULT_UPSTREAM_ERROR_TIME 10 +#define DEFAULT_UPSTREAM_ERROR_TIME 10 +#define DEFAULT_UPSTREAM_DEAD_TIME 300 +#define DEFAULT_UPSTREAM_MAXERRORS 10 + +struct expression; +struct tokenizer; +struct classifier; + +enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE }; + +/** + * Type of time configuration parameter + */ +enum time_type { + TIME_SECONDS = 0, + TIME_MILLISECONDS, + TIME_MINUTES, + TIME_HOURS +}; +/** + * Types of rspamd bind lines + */ +enum rspamd_cred_type { + CRED_NORMAL, + CRED_CONTROL, + CRED_LMTP, + CRED_DELIVERY +}; + +/** + * Regexp type: /H - header, /M - mime, /U - url /X - raw header + */ +enum rspamd_regexp_type { + REGEXP_NONE = 0, + REGEXP_HEADER, + REGEXP_MIME, + REGEXP_MESSAGE, + REGEXP_URL, + REGEXP_RAW_HEADER +}; + +/** + * Logging type + */ +enum rspamd_log_type { + RSPAMD_LOG_CONSOLE, + RSPAMD_LOG_SYSLOG, + RSPAMD_LOG_FILE +}; + +/** + * Regexp structure + */ +struct rspamd_regexp { + enum rspamd_regexp_type type; /**< regexp type */ + gchar *regexp_text; /**< regexp text representation */ + GRegex *regexp; /**< glib regexp structure */ + GRegex *raw_regexp; /**< glib regexp structure for raw matching */ + gchar *header; /**< header name for header regexps */ + gboolean is_test; /**< true if this expression must be tested */ + gboolean is_raw; /**< true if this regexp is done by raw matching */ + gboolean is_strong; /**< true if headers search must be case sensitive */ +}; + +/** + * Memcached server object + */ +struct memcached_server { + struct upstream up; /**< common upstream base */ + struct in_addr addr; /**< address of server */ + guint16 port; /**< port to connect */ + short alive; /**< is this server alive */ + gint16 num; /**< number of servers in case of mirror */ +}; + +/** + * script module list item + */ +struct script_module { + gchar *name; /**< name of module */ + gchar *path; /**< path to module */ +}; + +/** + * Type of lua variable + */ +enum lua_var_type { + LUA_VAR_NUM, + LUA_VAR_BOOLEAN, + LUA_VAR_STRING, + LUA_VAR_FUNCTION, + LUA_VAR_UNKNOWN +}; +/** + * Module option + */ +struct module_opt { + gchar *param; /**< parameter name */ + gchar *value; /**< parameter value */ + gchar *description; /**< parameter description */ + gchar *group; /**< parameter group */ + gpointer actual_data; /**< parsed data */ + gboolean is_lua; /**< actually this is lua variable */ + enum lua_var_type lua_type; /**< type of lua variable */ +}; + +struct module_meta_opt { + gchar *name; /**< Name of meta option */ + GList *options; /**< List of struct module_opt */ +}; + +/** + * Symbol definition + */ +struct symbol_def { + gchar *name; + gchar *description; + gdouble *weight_ptr; +}; + +/** + * Symbols group + */ +struct symbols_group { + gchar *name; + GList *symbols; +}; + +/** + * Statfile section definition + */ +struct statfile_section { + guint32 code; /**< section's code */ + guint64 size; /**< size of section */ + double weight; /**< weight coefficient for section */ +}; + +/** + * Statfile autolearn parameters + */ +struct statfile_autolearn_params { + const gchar *metric; /**< metric name for autolearn triggering */ + double threshold_min; /**< threshold mark */ + double threshold_max; /**< threshold mark */ + GList *symbols; /**< list of symbols */ +}; + +/** + * Sync affinity + */ +enum sync_affinity { + AFFINITY_NONE = 0, + AFFINITY_MASTER, + AFFINITY_SLAVE +}; + +/** + * Binlog params + */ +struct statfile_binlog_params { + enum sync_affinity affinity; + time_t rotate_time; + gchar *master_addr; + guint16 master_port; +}; + +typedef double (*statfile_normalize_func)(struct config_file *cfg, long double score, void *params); + +/** + * Statfile config definition + */ +struct statfile { + gchar *symbol; /**< symbol of statfile */ + gchar *path; /**< filesystem pattern (with %r or %f) */ + gchar *label; /**< label of this statfile */ + gsize size; /**< size of statfile */ + GList *sections; /**< list of sections in statfile */ + struct statfile_autolearn_params *autolearn; /**< autolearn params */ + struct statfile_binlog_params *binlog; /**< binlog params */ + statfile_normalize_func normalizer; /**< function that is used as normaliser */ + void *normalizer_data; /**< normalizer function params */ + gchar *normalizer_str; /**< source string (for dump) */ + ucl_object_t *opts; /**< other options */ + gboolean is_spam; /**< spam flag */ +}; + +/** + * Classifier config definition + */ +struct classifier_config { + GList *statfiles; /**< statfiles list */ + GHashTable *labels; /**< statfiles with labels */ + gchar *metric; /**< metric of this classifier */ + struct classifier *classifier; /**< classifier interface */ + struct tokenizer *tokenizer; /**< tokenizer used for classifier */ + GHashTable *opts; /**< other options */ + GList *pre_callbacks; /**< list of callbacks that are called before classification */ + GList *post_callbacks; /**< list of callbacks that are called after classification */ +}; + +struct rspamd_worker_bind_conf { + gchar *bind_host; + guint16 bind_port; + gint ai; + gboolean is_systemd; + struct rspamd_worker_bind_conf *next; +}; + +struct rspamd_worker_param_parser { + rspamd_rcl_handler_t handler; /**< handler function */ + struct rspamd_rcl_struct_parser parser; /**< parser attributes */ + const gchar *name; /**< parameter's name */ + UT_hash_handle hh; /**< hash by name */ +}; + +struct rspamd_worker_cfg_parser { + struct rspamd_worker_param_parser *parsers; /**< parsers hash */ + gint type; /**< workers quark */ + gboolean (*def_obj_parser)(const ucl_object_t *obj, gpointer ud); /**< default object parser */ + gpointer def_ud; + UT_hash_handle hh; /**< hash by type */ +}; + +/** + * Config params for rspamd worker + */ +struct worker_conf { + worker_t *worker; /**< pointer to worker type */ + GQuark type; /**< type of worker */ + struct rspamd_worker_bind_conf *bind_conf; /**< bind configuration */ + guint16 count; /**< number of workers */ + GList *listen_socks; /**< listening sockets desctiptors */ + guint32 rlimit_nofile; /**< max files limit */ + guint32 rlimit_maxcore; /**< maximum core file size */ + GHashTable *params; /**< params for worker */ + GQueue *active_workers; /**< linked list of spawned workers */ + gboolean has_socket; /**< whether we should make listening socket in main process */ + gpointer *ctx; /**< worker's context */ + ucl_object_t *options; /**< other worker's options */ +}; + +/** + * Structure that stores all config data + */ +struct config_file { + gchar *rspamd_user; /**< user to run as */ + gchar *rspamd_group; /**< group to run as */ + rspamd_mempool_t *cfg_pool; /**< memory pool for config */ + gchar *cfg_name; /**< name of config file */ + gchar *pid_file; /**< name of pid file */ + gchar *temp_dir; /**< dir for temp files */ +#ifdef WITH_GPERF_TOOLS + gchar *profile_path; +#endif + + gboolean no_fork; /**< if 1 do not call daemon() */ + gboolean config_test; /**< if TRUE do only config file test */ + gboolean raw_mode; /**< work in raw mode instead of utf one */ + gboolean one_shot_mode; /**< rules add only one symbol */ + gboolean check_text_attachements; /**< check text attachements as text */ + gboolean convert_config; /**< convert config to XML format */ + gboolean strict_protocol_headers; /**< strictly check protocol headers */ + + gsize max_diff; /**< maximum diff size for text parts */ + + enum rspamd_log_type log_type; /**< log type */ + gint log_facility; /**< log facility in case of syslog */ + gint log_level; /**< log level trigger */ + gchar *log_file; /**< path to logfile in case of file logging */ + gboolean log_buffered; /**< whether logging is buffered */ + guint32 log_buf_size; /**< length of log buffer */ + gchar *debug_ip_map; /**< turn on debugging for specified ip addresses */ + gboolean log_urls; /**< whether we should log URLs */ + GList *debug_symbols; /**< symbols to debug */ + gboolean log_color; /**< output colors for console output */ + gboolean log_extended; /**< log extended information */ + + guint32 statfile_sync_interval; /**< synchronization interval */ + guint32 statfile_sync_timeout; /**< synchronization timeout */ + gboolean mlock_statfile_pool; /**< use mlock (2) for locking statfiles */ + + struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS]; /**< memcached servers */ + gsize memcached_servers_num; /**< number of memcached servers */ + memc_proto_t memcached_protocol; /**< memcached protocol */ + guint memcached_error_time; /**< memcached error time (see upstream documentation) */ + guint memcached_dead_time; /**< memcached dead time */ + guint memcached_maxerrors; /**< maximum number of errors */ + guint memcached_connect_timeout; /**< connection timeout */ + + gboolean delivery_enable; /**< is delivery agent is enabled */ + gchar *deliver_host; /**< host for mail deliviring */ + struct in_addr deliver_addr; /**< its address */ + guint16 deliver_port; /**< port for deliviring */ + guint16 deliver_family; /**< socket family for delivirnig */ + gchar *deliver_agent_path; /**< deliver to pipe instead of socket */ + gboolean deliver_lmtp; /**< use LMTP instead of SMTP */ + + GList *script_modules; /**< linked list of script modules to load */ + + GList *filters; /**< linked list of all filters */ + GList *workers; /**< linked list of all workers params */ + struct rspamd_worker_cfg_parser *wrk_parsers; /**< hash for worker config parsers, indexed by worker quarks */ + gchar *filters_str; /**< string of filters */ + ucl_object_t *rcl_obj; /**< rcl object */ + GHashTable* metrics; /**< hash of metrics indexed by metric name */ + GList* symbols_groups; /**< groups of symbols */ + GList* metrics_list; /**< linked list of metrics */ + GHashTable* metrics_symbols; /**< hash table of metrics indexed by symbol */ + GHashTable* c_modules; /**< hash of c modules indexed by module name */ + GHashTable* composite_symbols; /**< hash of composite symbols indexed by its name */ + GList *classifiers; /**< list of all classifiers defined */ + GList *statfiles; /**< list of all statfiles in config file order */ + GHashTable *classifiers_symbols; /**< hashtable indexed by symbol name of classifiers */ + GHashTable* cfg_params; /**< all cfg params indexed by its name in this structure */ + GList *pre_filters; /**< list of pre-processing lua filters */ + GList *post_filters; /**< list of post-processing lua filters */ + gchar *dynamic_conf; /**< path to dynamic configuration */ + GList *current_dynamic_conf; /**< currently loaded dynamic configuration */ + GHashTable* domain_settings; /**< settings per-domains */ + GHashTable* user_settings; /**< settings per-user */ + gchar* domain_settings_str; /**< string representation of settings */ + gchar* user_settings_str; + gint clock_res; /**< resolution of clock used */ + + GList *maps; /**< maps active */ + rspamd_mempool_t *map_pool; /**< static maps pool */ + gdouble map_timeout; /**< maps watch timeout */ + + struct symbols_cache *cache; /**< symbols cache object */ + gchar *cache_filename; /**< filename of cache file */ + struct metric *default_metric; /**< default metric */ + + gchar* checksum; /**< real checksum of config file */ + gchar* dump_checksum; /**< dump checksum of config file */ + gpointer lua_state; /**< pointer to lua state */ + + gchar* rrd_file; /**< rrd file to store statistics */ + + gchar* history_file; /**< file to save rolling history */ + + gdouble dns_timeout; /**< timeout in milliseconds for waiting for dns reply */ + guint32 dns_retransmits; /**< maximum retransmits count */ + guint32 dns_throttling_errors; /**< maximum errors for starting resolver throttling */ + guint32 dns_throttling_time; /**< time in seconds for DNS throttling */ + guint32 dns_io_per_server; /**< number of sockets per DNS server */ + GList *nameservers; /**< list of nameservers or NULL to parse resolv.conf */ +}; + + +/** + * Parse host[:port[:priority]] line + * @param ina host address + * @param port port + * @param priority priority + * @return TRUE if string was parsed + */ +gboolean parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority); + +/** + * Parse host:port line + * @param ina host address + * @param port port + * @return TRUE if string was parsed + */ +gboolean parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port); + +/** + * Parse host:priority line + * @param ina host address + * @param priority priority + * @return TRUE if string was parsed + */ +gboolean parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority); + +/** + * Parse bind credits + * @param cf config file to use + * @param str line that presents bind line + * @param type type of credits + * @return 1 if line was successfully parsed and 0 in case of error + */ +gboolean parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str); + +/** + * Init default values + * @param cfg config file + */ +void init_defaults (struct config_file *cfg); + +/** + * Free memory used by config structure + * @param cfg config file + */ +void free_config (struct config_file *cfg); + +/** + * Gets module option with specified name + * @param cfg config file + * @param module_name name of module + * @param opt_name name of option to get + * @return module value or NULL if option does not defined + */ +const ucl_object_t* get_module_opt (struct config_file *cfg, const gchar *module_name, + const gchar *opt_name); + +/** + * Parse limit + * @param limit string representation of limit (eg. 1M) + * @return numeric value of limit + */ +guint64 parse_limit (const gchar *limit, guint len); + +/** + * Parse flag + * @param str string representation of flag (eg. 'on') + * @return numeric value of flag (0 or 1) + */ +gchar parse_flag (const gchar *str); + +/** + * Do post load actions for config + * @param cfg config file + */ +void post_load_config (struct config_file *cfg); + +/** + * Calculate checksum for config file + * @param cfg config file + */ +gboolean get_config_checksum (struct config_file *cfg); + + +/** + * Replace all \" with a single " in given string + * @param line input string + */ +void unescape_quotes (gchar *line); + +/* + * Convert comma separated string to a list of strings + */ +GList* parse_comma_list (rspamd_mempool_t *pool, const gchar *line); + +/* + * Return a new classifier_config structure, setting default and non-conflicting attributes + */ +struct classifier_config* check_classifier_conf (struct config_file *cfg, struct classifier_config *c); +/* + * Return a new worker_conf structure, setting default and non-conflicting attributes + */ +struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c); +/* + * Return a new metric structure, setting default and non-conflicting attributes + */ +struct metric* check_metric_conf (struct config_file *cfg, struct metric *c); +/* + * Return a new statfile structure, setting default and non-conflicting attributes + */ +struct statfile* check_statfile_conf (struct config_file *cfg, struct statfile *c); + +/* + * Read XML configuration file + */ +gboolean read_rspamd_config (struct config_file *cfg, + const gchar *filename, const gchar *convert_to, + rspamd_rcl_section_fin_t logger_fin, gpointer logger_ud); + +/* + * Register symbols of classifiers inside metrics + */ +void insert_classifier_symbols (struct config_file *cfg); + +/* + * Check statfiles inside a classifier + */ +gboolean check_classifier_statfiles (struct classifier_config *cf); + +/* + * Find classifier config by name + */ +struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name); + +/* + * Parse input `ip_list` to radix tree `tree`. Now supports only IPv4 addresses. + */ +gboolean rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree); + +#endif /* ifdef CFG_FILE_H */ +/* + * vi:ts=4 + */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c new file mode 100644 index 000000000..37b554dec --- /dev/null +++ b/src/libserver/cfg_rcl.c @@ -0,0 +1,1471 @@ +/* Copyright (c) 2013, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cfg_rcl.h" +#include "main.h" +#include "settings.h" +#include "cfg_file.h" +#include "lua/lua_common.h" +#include "expressions.h" +#include "classifiers/classifiers.h" +#include "tokenizers/tokenizers.h" + +/* + * Common section handlers + */ +static gboolean +rspamd_rcl_logging_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val; + const gchar *facility, *log_type, *log_level; + + val = ucl_object_find_key (obj, "type"); + if (val != NULL && ucl_object_tostring_safe (val, &log_type)) { + if (g_ascii_strcasecmp (log_type, "file") == 0) { + /* Need to get filename */ + val = ucl_object_find_key (obj, "filename"); + if (val == NULL || val->type != UCL_STRING) { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "filename attribute must be specified for file logging type"); + return FALSE; + } + cfg->log_type = RSPAMD_LOG_FILE; + cfg->log_file = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (val)); + } + else if (g_ascii_strcasecmp (log_type, "syslog") == 0) { + /* Need to get facility */ + cfg->log_facility = LOG_DAEMON; + cfg->log_type = RSPAMD_LOG_SYSLOG; + val = ucl_object_find_key (obj, "facility"); + if (val != NULL && ucl_object_tostring_safe (val, &facility)) { + if (g_ascii_strcasecmp (facility, "LOG_AUTH") == 0 || + g_ascii_strcasecmp (facility, "auth") == 0 ) { + cfg->log_facility = LOG_AUTH; + } + else if (g_ascii_strcasecmp (facility, "LOG_CRON") == 0 || + g_ascii_strcasecmp (facility, "cron") == 0 ) { + cfg->log_facility = LOG_CRON; + } + else if (g_ascii_strcasecmp (facility, "LOG_DAEMON") == 0 || + g_ascii_strcasecmp (facility, "daemon") == 0 ) { + cfg->log_facility = LOG_DAEMON; + } + else if (g_ascii_strcasecmp (facility, "LOG_MAIL") == 0 || + g_ascii_strcasecmp (facility, "mail") == 0) { + cfg->log_facility = LOG_MAIL; + } + else if (g_ascii_strcasecmp (facility, "LOG_USER") == 0 || + g_ascii_strcasecmp (facility, "user") == 0 ) { + cfg->log_facility = LOG_USER; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL0") == 0 || + g_ascii_strcasecmp (facility, "local0") == 0) { + cfg->log_facility = LOG_LOCAL0; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL1") == 0 || + g_ascii_strcasecmp (facility, "local1") == 0) { + cfg->log_facility = LOG_LOCAL1; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL2") == 0 || + g_ascii_strcasecmp (facility, "local2") == 0) { + cfg->log_facility = LOG_LOCAL2; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL3") == 0 || + g_ascii_strcasecmp (facility, "local3") == 0) { + cfg->log_facility = LOG_LOCAL3; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL4") == 0 || + g_ascii_strcasecmp (facility, "local4") == 0) { + cfg->log_facility = LOG_LOCAL4; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL5") == 0 || + g_ascii_strcasecmp (facility, "local5") == 0) { + cfg->log_facility = LOG_LOCAL5; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL6") == 0 || + g_ascii_strcasecmp (facility, "local6") == 0) { + cfg->log_facility = LOG_LOCAL6; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL7") == 0 || + g_ascii_strcasecmp (facility, "local7") == 0) { + cfg->log_facility = LOG_LOCAL7; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log facility: %s", facility); + return FALSE; + } + } + } + else if (g_ascii_strcasecmp (log_type, "stderr") == 0 || g_ascii_strcasecmp (log_type, "console") == 0) { + cfg->log_type = RSPAMD_LOG_CONSOLE; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log type: %s", log_type); + return FALSE; + } + } + else { + /* No type specified */ + msg_warn ("logging type is not specified correctly, log output to the console"); + } + + /* Handle log level */ + val = ucl_object_find_key (obj, "level"); + if (val != NULL && ucl_object_tostring_safe (val, &log_level)) { + if (g_ascii_strcasecmp (log_level, "error") == 0) { + cfg->log_level = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL; + } + else if (g_ascii_strcasecmp (log_level, "warning") == 0) { + cfg->log_level = G_LOG_LEVEL_WARNING; + } + else if (g_ascii_strcasecmp (log_level, "info") == 0) { + cfg->log_level = G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE; + } + else if (g_ascii_strcasecmp (log_level, "debug") == 0) { + cfg->log_level = G_LOG_LEVEL_DEBUG; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log level: %s", log_level); + return FALSE; + } + } + + return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); +} + +static gboolean +rspamd_rcl_options_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val; + const gchar *user_settings, *domain_settings; + + /* Handle user and domain settings */ + val = ucl_object_find_key (obj, "user_settings"); + if (val != NULL && ucl_object_tostring_safe (val, &user_settings)) { + if (!read_settings (user_settings, "Users' settings", cfg, cfg->user_settings)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", user_settings); + return FALSE; + } + cfg->user_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, user_settings); + } + + val = ucl_object_find_key (obj, "domain_settings"); + if (val != NULL && ucl_object_tostring_safe (val, &domain_settings)) { + if (!read_settings (domain_settings, "Domains settings", cfg, cfg->domain_settings)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", domain_settings); + return FALSE; + } + cfg->domain_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, domain_settings); + } + + return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); +} + +static gint +rspamd_symbols_group_find_func (gconstpointer a, gconstpointer b) +{ + const struct symbols_group *gr = a; + const gchar *uv = b; + + return g_ascii_strcasecmp (gr->name, uv); +} + +/** + * Insert a symbol to the metric + * @param cfg + * @param metric + * @param obj symbol rcl object (either float value or an object) + * @param err + * @return + */ +static gboolean +rspamd_rcl_insert_symbol (struct config_file *cfg, struct metric *metric, + const ucl_object_t *obj, gboolean is_legacy, GError **err) +{ + const gchar *group = "ungrouped", *description = NULL, *sym_name; + gdouble symbol_score, *score_ptr; + const ucl_object_t *val; + struct symbols_group *sym_group; + struct symbol_def *sym_def; + GList *metric_list, *group_list; + + /* + * We allow two type of definitions: + * symbol = weight + * or + * symbol { + * weight = ...; + * description = ...; + * group = ...; + * } + */ + if (is_legacy) { + val = ucl_object_find_key (obj, "name"); + if (val == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbol name is missing"); + return FALSE; + } + sym_name = ucl_object_tostring (val); + } + else { + sym_name = ucl_object_key (obj); + } + if (ucl_object_todouble_safe (obj, &symbol_score)) { + description = NULL; + } + else if (obj->type == UCL_OBJECT) { + val = ucl_object_find_key (obj, "weight"); + if (val == NULL || !ucl_object_todouble_safe (val, &symbol_score)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol score: %s", sym_name); + return FALSE; + } + val = ucl_object_find_key (obj, "description"); + if (val != NULL) { + description = ucl_object_tostring (val); + } + val = ucl_object_find_key (obj, "group"); + if (val != NULL) { + ucl_object_tostring_safe (val, &group); + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol type: %s", sym_name); + return FALSE; + } + + sym_def = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbol_def)); + score_ptr = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (gdouble)); + + *score_ptr = symbol_score; + sym_def->weight_ptr = score_ptr; + sym_def->name = rspamd_mempool_strdup (cfg->cfg_pool, sym_name); + sym_def->description = (gchar *)description; + + g_hash_table_insert (metric->symbols, sym_def->name, score_ptr); + + if ((metric_list = g_hash_table_lookup (cfg->metrics_symbols, sym_def->name)) == NULL) { + metric_list = g_list_prepend (NULL, metric); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, metric_list); + g_hash_table_insert (cfg->metrics_symbols, sym_def->name, metric_list); + } + else { + /* Slow but keep start element of list in safe */ + if (!g_list_find (metric_list, metric)) { + metric_list = g_list_append (metric_list, metric); + } + } + + /* Search for symbol group */ + group_list = g_list_find_custom (cfg->symbols_groups, group, rspamd_symbols_group_find_func); + if (group_list == NULL) { + /* Create new group */ + sym_group = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbols_group)); + sym_group->name = rspamd_mempool_strdup (cfg->cfg_pool, group); + sym_group->symbols = NULL; + cfg->symbols_groups = g_list_prepend (cfg->symbols_groups, sym_group); + } + else { + sym_group = group_list->data; + } + /* Insert symbol */ + sym_group->symbols = g_list_prepend (sym_group->symbols, sym_def); + + return TRUE; +} + +static gboolean +rspamd_rcl_metric_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + const gchar *metric_name, *subject_name, *semicolon, *act_str; + struct metric *metric; + struct metric_action *action; + gdouble action_score, grow_factor; + gint action_value; + gboolean new = TRUE, have_actions = FALSE; + ucl_object_iter_t it = NULL; + + val = ucl_object_find_key (obj, "name"); + if (val == NULL || !ucl_object_tostring_safe (val, &metric_name)) { + metric_name = DEFAULT_METRIC; + } + + metric = g_hash_table_lookup (cfg->metrics, metric_name); + if (metric == NULL) { + metric = check_metric_conf (cfg, metric); + metric->name = metric_name; + } + else { + new = FALSE; + } + + /* Handle actions */ + val = ucl_object_find_key (obj, "actions"); + if (val != NULL) { + if (val->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "actions must be an object"); + return FALSE; + } + while ((cur = ucl_iterate_object (val, &it, true)) != NULL) { + if (!check_action_str (ucl_object_key (cur), &action_value) || + !ucl_object_todouble_safe (cur, &action_score)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid action definition: %s", ucl_object_key (cur)); + return FALSE; + } + action = &metric->actions[action_value]; + action->action = action_value; + action->score = action_score; + } + } + else if (new) { + /* Switch to legacy mode */ + val = ucl_object_find_key (obj, "required_score"); + if (val != NULL && ucl_object_todouble_safe (val, &action_score)) { + action = &metric->actions[METRIC_ACTION_REJECT]; + action->action = METRIC_ACTION_REJECT; + action->score = action_score; + have_actions = TRUE; + } + val = ucl_object_find_key (obj, "action"); + LL_FOREACH (val, cur) { + if (cur->type == UCL_STRING) { + act_str = ucl_object_tostring (cur); + semicolon = strchr (act_str, ':'); + if (semicolon != NULL) { + if (check_action_str (act_str, &action_value)) { + action_score = strtod (semicolon + 1, NULL); + action = &metric->actions[action_value]; + action->action = action_value; + action->score = action_score; + have_actions = TRUE; + } + } + } + } + if (new && !have_actions) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no actions", metric_name); + return FALSE; + } + } + + /* Handle symbols */ + val = ucl_object_find_key (obj, "symbols"); + if (val != NULL) { + if (val->type == UCL_ARRAY) { + val = val->value.ov; + } + if (val->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object"); + return FALSE; + } + it = NULL; + while ((cur = ucl_iterate_object (val, &it, true)) != NULL) { + if (!rspamd_rcl_insert_symbol (cfg, metric, cur, FALSE, err)) { + return FALSE; + } + } + } + else { + /* Legacy variant */ + val = ucl_object_find_key (obj, "symbol"); + if (val != NULL) { + if (val->type == UCL_ARRAY) { + val = val->value.ov; + } + if (val->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object"); + return FALSE; + } + LL_FOREACH (val, cur) { + if (!rspamd_rcl_insert_symbol (cfg, metric, cur, TRUE, err)) { + return FALSE; + } + } + } + else if (new) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no symbols", metric_name); + return FALSE; + } + } + + val = ucl_object_find_key (obj, "grow_factor"); + if (val && ucl_object_todouble_safe (val, &grow_factor)) { + metric->grow_factor = grow_factor; + } + + val = ucl_object_find_key (obj, "subject"); + if (val && ucl_object_tostring_safe (val, &subject_name)) { + metric->subject = (gchar *)subject_name; + } + + /* Insert the resulting metric */ + if (new) { + g_hash_table_insert (cfg->metrics, (void *)metric->name, metric); + cfg->metrics_list = g_list_prepend (cfg->metrics_list, metric); + } + + return TRUE; +} + +static gboolean +rspamd_rcl_worker_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + ucl_object_iter_t it = NULL; + const gchar *worker_type, *worker_bind; + GQuark qtype; + struct worker_conf *wrk; + struct rspamd_worker_cfg_parser *wparser; + struct rspamd_worker_param_parser *whandler; + + val = ucl_object_find_key (obj, "type"); + if (val != NULL && ucl_object_tostring_safe (val, &worker_type)) { + qtype = g_quark_try_string (worker_type); + if (qtype != 0) { + wrk = check_worker_conf (cfg, NULL); + wrk->worker = get_worker_by_type (qtype); + if (wrk->worker == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type); + return FALSE; + } + wrk->type = qtype; + if (wrk->worker->worker_init_func) { + wrk->ctx = wrk->worker->worker_init_func (cfg); + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type); + return FALSE; + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "undefined worker type"); + return FALSE; + } + + val = ucl_object_find_key (obj, "bind_socket"); + if (val != NULL) { + if (val->type == UCL_ARRAY) { + val = val->value.ov; + } + LL_FOREACH (val, cur) { + if (!ucl_object_tostring_safe (cur, &worker_bind)) { + continue; + } + if (!parse_bind_line (cfg, wrk, worker_bind)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse bind line: %s", worker_bind); + return FALSE; + } + } + } + + wrk->options = (ucl_object_t *)obj; + + if (!rspamd_rcl_section_parse_defaults (section, cfg, obj, wrk, err)) { + return FALSE; + } + + /* Parse other attributes */ + HASH_FIND_INT (cfg->wrk_parsers, (gint *)&qtype, wparser); + if (wparser != NULL && obj->type == UCL_OBJECT) { + while ((cur = ucl_iterate_object (obj, &it, true)) != NULL) { + HASH_FIND_STR (wparser->parsers, ucl_object_key (cur), whandler); + if (whandler != NULL) { + if (!whandler->handler (cfg, cur, &whandler->parser, section, err)) { + return FALSE; + } + } + } + if (wparser->def_obj_parser != NULL) { + if (! wparser->def_obj_parser (obj, wparser->def_ud)) { + return FALSE; + } + } + } + + cfg->workers = g_list_prepend (cfg->workers, wrk); + + return TRUE; +} + +static void +rspamd_rcl_set_lua_globals (struct config_file *cfg, lua_State *L) +{ + struct config_file **pcfg; + + /* First check for global variable 'config' */ + lua_getglobal (L, "config"); + if (lua_isnil (L, -1)) { + /* Assign global table to set up attributes */ + lua_newtable (L); + lua_setglobal (L, "config"); + } + + lua_getglobal (L, "metrics"); + if (lua_isnil (L, -1)) { + lua_newtable (L); + lua_setglobal (L, "metrics"); + } + + lua_getglobal (L, "composites"); + if (lua_isnil (L, -1)) { + lua_newtable (L); + lua_setglobal (L, "composites"); + } + + lua_getglobal (L, "classifiers"); + if (lua_isnil (L, -1)) { + lua_newtable (L); + lua_setglobal (L, "classifiers"); + } + + pcfg = lua_newuserdata (L, sizeof (struct config_file *)); + lua_setclass (L, "rspamd{config}", -1); + *pcfg = cfg; + lua_setglobal (L, "rspamd_config"); + + /* Clear stack from globals */ + lua_pop (L, 4); +} + +static gboolean +rspamd_rcl_lua_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const gchar *lua_src = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (obj)); + gchar *cur_dir, *lua_dir, *lua_file, *tmp1, *tmp2; + lua_State *L = cfg->lua_state; + + tmp1 = g_strdup (lua_src); + tmp2 = g_strdup (lua_src); + lua_dir = dirname (tmp1); + lua_file = basename (tmp2); + if (lua_dir && lua_file) { + cur_dir = g_malloc (PATH_MAX); + if (getcwd (cur_dir, PATH_MAX) != NULL && chdir (lua_dir) != -1) { + /* Load file */ + if (luaL_loadfile (L, lua_file) != 0) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot load lua file %s: %s", + lua_src, lua_tostring (L, -1)); + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + } + rspamd_rcl_set_lua_globals (cfg, L); + /* Now do it */ + if (lua_pcall (L, 0, LUA_MULTRET, 0) != 0) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot init lua file %s: %s", + lua_src, lua_tostring (L, -1)); + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + } + } + else { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot chdir to %s: %s", + lua_src, strerror (errno)); + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + + } + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + } + else { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot find to %s: %s", + lua_src, strerror (errno)); + return FALSE; + } + + return TRUE; +} + +static gboolean +rspamd_rcl_add_module_path (struct config_file *cfg, const gchar *path, GError **err) +{ + struct stat st; + struct script_module *cur_mod; + glob_t globbuf; + gchar *pattern; + size_t len; + guint i; + + if (stat (path, &st) == -1) { + g_set_error (err, CFG_RCL_ERROR, errno, "cannot stat path %s, %s", path, strerror (errno)); + return FALSE; + } + + /* Handle directory */ + if (S_ISDIR (st.st_mode)) { + globbuf.gl_offs = 0; + len = strlen (path) + sizeof ("*.lua"); + pattern = g_malloc (len); + snprintf (pattern, len, "%s%s", path, "*.lua"); + + if (glob (pattern, GLOB_DOOFFS, NULL, &globbuf) == 0) { + for (i = 0; i < globbuf.gl_pathc; i ++) { + cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module)); + cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, globbuf.gl_pathv[i]); + cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod); + } + globfree (&globbuf); + g_free (pattern); + } + else { + g_set_error (err, CFG_RCL_ERROR, errno, "glob failed for %s, %s", pattern, strerror (errno)); + g_free (pattern); + return FALSE; + } + } + else { + /* Handle single file */ + cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module)); + cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, path); + cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod); + } + + return TRUE; +} + +static gboolean +rspamd_rcl_modules_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + const gchar *data; + + if (obj->type == UCL_OBJECT) { + val = ucl_object_find_key (obj, "path"); + + LL_FOREACH (val, cur) { + if (ucl_object_tostring_safe (cur, &data)) { + if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) { + return FALSE; + } + } + } + } + else if (ucl_object_tostring_safe (obj, &data)) { + if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) { + return FALSE; + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "module parameter has wrong type (must be an object or a string)"); + return FALSE; + } + + return TRUE; +} + +static gboolean +rspamd_rcl_statfile_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct classifier_config *ccf = ud; + const ucl_object_t *val; + struct statfile *st; + const gchar *data; + gdouble binlog_rotate; + GList *labels; + + st = check_statfile_conf (cfg, NULL); + + val = ucl_object_find_key (obj, "binlog"); + if (val != NULL && ucl_object_tostring_safe (val, &data)) { + if (st->binlog == NULL) { + st->binlog = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_binlog_params)); + } + if (g_ascii_strcasecmp (data, "master") == 0) { + st->binlog->affinity = AFFINITY_MASTER; + } + else if (g_ascii_strcasecmp (data, "slave") == 0) { + st->binlog->affinity = AFFINITY_SLAVE; + } + else { + st->binlog->affinity = AFFINITY_NONE; + } + /* Parse remaining binlog attributes */ + val = ucl_object_find_key (obj, "binlog_rotate"); + if (val != NULL && ucl_object_todouble_safe (val, &binlog_rotate)) { + st->binlog->rotate_time = binlog_rotate; + } + val = ucl_object_find_key (obj, "binlog_master"); + if (val != NULL && ucl_object_tostring_safe (val, &data)) { + if (!parse_host_port (cfg->cfg_pool, data, &st->binlog->master_addr, &st->binlog->master_port)) { + msg_err ("cannot parse master address: %s", data); + return FALSE; + } + } + } + + + if (rspamd_rcl_section_parse_defaults (section, cfg, obj, st, err)) { + ccf->statfiles = g_list_prepend (ccf->statfiles, st); + if (st->label != NULL) { + labels = g_hash_table_lookup (ccf->labels, st->label); + if (labels != NULL) { + labels = g_list_append (labels, st); + } + else { + g_hash_table_insert (ccf->labels, st->label, g_list_prepend (NULL, st)); + } + } + if (st->symbol != NULL) { + g_hash_table_insert (cfg->classifiers_symbols, st->symbol, st); + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a symbol defined"); + return FALSE; + } + + if (st->path == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a path defined"); + return FALSE; + } + + st->opts = (ucl_object_t *)obj; + + val = ucl_object_find_key (obj, "spam"); + if (val == NULL) { + msg_info ("statfile %s has no explicit 'spam' setting, trying to guess by symbol", st->symbol); + if (rspamd_strncasestr (st->symbol, "spam", strlen (st->symbol)) != NULL) { + st->is_spam = TRUE; + } + else if (rspamd_strncasestr (st->symbol, "ham", strlen (st->symbol)) != NULL) { + st->is_spam = FALSE; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot guess spam setting from %s", st->symbol); + return FALSE; + } + msg_info ("guessed that statfile with symbol %s is %s", st->symbol, st->is_spam ? + "spam" : "ham"); + } + return TRUE; + } + + return FALSE; +} + +static gboolean +rspamd_rcl_classifier_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + ucl_object_iter_t it = NULL; + const gchar *key, *type; + struct classifier_config *ccf, *found = NULL; + gboolean res = TRUE; + struct rspamd_rcl_section *stat_section; + GList *cur_cl; + + val = ucl_object_find_key (obj, "type"); + if (val == NULL || !ucl_object_tostring_safe (val, &type)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "classifier should have type defined"); + return FALSE; + } + + cur_cl = cfg->classifiers; + while (cur_cl != NULL) { + ccf = cur_cl->data; + if (g_ascii_strcasecmp (ccf->classifier->name, type) == 0) { + found = ccf; + break; + } + cur_cl = g_list_next (cur_cl); + } + + if (found == NULL) { + ccf = check_classifier_conf (cfg, NULL); + ccf->classifier = get_classifier (type); + } + else { + ccf = found; + } + + HASH_FIND_STR (section->subsections, "statfile", stat_section); + + while ((val = ucl_iterate_object (obj, &it, true)) != NULL && res) { + key = ucl_object_key (val); + if (key != NULL) { + if (g_ascii_strcasecmp (key, "statfile") == 0) { + LL_FOREACH (val, cur) { + res = rspamd_rcl_statfile_handler (cfg, cur, ccf, stat_section, err); + if (!res) { + return FALSE; + } + } + } + else if (g_ascii_strcasecmp (key, "type") == 0 && val->type == UCL_STRING) { + continue; + } + else if (g_ascii_strcasecmp (key, "tokenizer") == 0 && val->type == UCL_STRING) { + ccf->tokenizer = get_tokenizer (ucl_object_tostring (val)); + } + else { + /* Just insert a value of option to the hash */ + g_hash_table_insert (ccf->opts, (gpointer)key, (gpointer)ucl_object_tostring_forced (val)); + } + } + } + + if (found == NULL) { + cfg->classifiers = g_list_prepend (cfg->classifiers, ccf); + } + + + return res; +} + +static gboolean +rspamd_rcl_composite_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val; + struct expression *expr; + struct rspamd_composite *composite; + const gchar *composite_name, *composite_expression; + gboolean new = TRUE; + + val = ucl_object_find_key (obj, "name"); + if (val == NULL || !ucl_object_tostring_safe (val, &composite_name)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have a name defined"); + return FALSE; + } + + if (g_hash_table_lookup (cfg->composite_symbols, composite_name) != NULL) { + msg_warn ("composite %s is redefined", composite_name); + new = FALSE; + } + + val = ucl_object_find_key (obj, "expression"); + if (val == NULL || !ucl_object_tostring_safe (val, &composite_expression)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have an expression defined"); + return FALSE; + } + + if ((expr = parse_expression (cfg->cfg_pool, (gchar *)composite_expression)) == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse composite expression: %s", composite_expression); + return FALSE; + } + + composite = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct rspamd_composite)); + composite->expr = expr; + composite->id = g_hash_table_size (cfg->composite_symbols) + 1; + g_hash_table_insert (cfg->composite_symbols, (gpointer)composite_name, composite); + + if (new) { + register_virtual_symbol (&cfg->cache, composite_name, 1); + } + + return TRUE; +} + +/** + * Fake handler to parse default options only, uses struct cfg_file as pointer + * for default handlers + */ +static gboolean +rspamd_rcl_empty_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); +} + +/** + * Add new section to the configuration + * @param top top section + * @param name the name of the section + * @param handler handler function for all attributes + * @param type type of object handled by a handler + * @param required whether at least one of these sections is required + * @param strict_type turn on strict check for types for this section + * @return newly created structure + */ +static inline struct rspamd_rcl_section* +rspamd_rcl_add_section (struct rspamd_rcl_section **top, + const gchar *name, rspamd_rcl_handler_t handler, + enum ucl_type type, gboolean required, gboolean strict_type) +{ + struct rspamd_rcl_section *new; + + new = g_slice_alloc0 (sizeof (struct rspamd_rcl_section)); + new->name = name; + new->handler = handler; + new->type = type; + new->strict_type = strict_type; + + HASH_ADD_KEYPTR (hh, *top, new->name, strlen (new->name), new); + return new; +} + +/** + * Add a default handler for a section + * @param section section pointer + * @param name name of param + * @param handler handler of param + * @param offset offset in a structure + * @param flags flags for the parser + * @return newly created structure + */ +static inline struct rspamd_rcl_default_handler_data * +rspamd_rcl_add_default_handler (struct rspamd_rcl_section *section, const gchar *name, + rspamd_rcl_handler_t handler, gsize offset, gint flags) +{ + struct rspamd_rcl_default_handler_data *new; + + new = g_slice_alloc0 (sizeof (struct rspamd_rcl_default_handler_data)); + new->key = name; + new->handler = handler; + new->pd.offset = offset; + new->pd.flags = flags; + + HASH_ADD_KEYPTR (hh, section->default_parser, new->key, strlen (new->key), new); + return new; +} + +struct rspamd_rcl_section* +rspamd_rcl_config_init (void) +{ + struct rspamd_rcl_section *new = NULL, *sub, *ssub; + + /* TODO: add all known rspamd sections here */ + /** + * Logging section + */ + sub = rspamd_rcl_add_section (&new, "logging", rspamd_rcl_logging_handler, UCL_OBJECT, + FALSE, TRUE); + /* Default handlers */ + rspamd_rcl_add_default_handler (sub, "log_buffer", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, log_buf_size), 0); + rspamd_rcl_add_default_handler (sub, "log_urls", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, log_urls), 0); + rspamd_rcl_add_default_handler (sub, "debug_ip", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, debug_ip_map), 0); + rspamd_rcl_add_default_handler (sub, "debug_symbols", rspamd_rcl_parse_struct_string_list, + G_STRUCT_OFFSET (struct config_file, debug_symbols), 0); + rspamd_rcl_add_default_handler (sub, "log_color", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, log_color), 0); + /** + * Options section + */ + sub = rspamd_rcl_add_section (&new, "options", rspamd_rcl_options_handler, UCL_OBJECT, + FALSE, TRUE); + rspamd_rcl_add_default_handler (sub, "cache_file", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, cache_filename), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "dns_nameserver", rspamd_rcl_parse_struct_string_list, + G_STRUCT_OFFSET (struct config_file, nameservers), 0); + rspamd_rcl_add_default_handler (sub, "dns_timeout", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, dns_timeout), RSPAMD_CL_FLAG_TIME_FLOAT); + rspamd_rcl_add_default_handler (sub, "dns_retransmits", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, dns_retransmits), RSPAMD_CL_FLAG_INT_32); + rspamd_rcl_add_default_handler (sub, "dns_sockets", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, dns_io_per_server), RSPAMD_CL_FLAG_INT_32); + rspamd_rcl_add_default_handler (sub, "raw_mode", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, raw_mode), 0); + rspamd_rcl_add_default_handler (sub, "one_shot", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, one_shot_mode), 0); + rspamd_rcl_add_default_handler (sub, "check_attachements", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, check_text_attachements), 0); + rspamd_rcl_add_default_handler (sub, "tempdir", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, temp_dir), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "pidfile", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, pid_file), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "filters", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, filters_str), 0); + rspamd_rcl_add_default_handler (sub, "sync_interval", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, statfile_sync_interval), RSPAMD_CL_FLAG_TIME_INTEGER); + rspamd_rcl_add_default_handler (sub, "sync_timeout", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, statfile_sync_timeout), RSPAMD_CL_FLAG_TIME_INTEGER); + rspamd_rcl_add_default_handler (sub, "max_diff", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, max_diff), RSPAMD_CL_FLAG_INT_SIZE); + rspamd_rcl_add_default_handler (sub, "map_watch_interval", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, map_timeout), RSPAMD_CL_FLAG_TIME_FLOAT); + rspamd_rcl_add_default_handler (sub, "dynamic_conf", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, dynamic_conf), 0); + rspamd_rcl_add_default_handler (sub, "rrd", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, rrd_file), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "history_file", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, history_file), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "use_mlock", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, mlock_statfile_pool), 0); + rspamd_rcl_add_default_handler (sub, "strict_protocol_headers", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, strict_protocol_headers), 0); + + /** + * Metric section + */ + sub = rspamd_rcl_add_section (&new, "metric", rspamd_rcl_metric_handler, UCL_OBJECT, + FALSE, TRUE); + + /** + * Worker section + */ + sub = rspamd_rcl_add_section (&new, "worker", rspamd_rcl_worker_handler, UCL_OBJECT, + FALSE, TRUE); + rspamd_rcl_add_default_handler (sub, "count", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct worker_conf, count), RSPAMD_CL_FLAG_INT_16); + rspamd_rcl_add_default_handler (sub, "max_files", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct worker_conf, rlimit_nofile), RSPAMD_CL_FLAG_INT_32); + rspamd_rcl_add_default_handler (sub, "max_core", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct worker_conf, rlimit_maxcore), RSPAMD_CL_FLAG_INT_32); + + /** + * Lua handler + */ + sub = rspamd_rcl_add_section (&new, "lua", rspamd_rcl_lua_handler, UCL_STRING, + FALSE, TRUE); + + /** + * Modules handler + */ + sub = rspamd_rcl_add_section (&new, "modules", rspamd_rcl_modules_handler, UCL_OBJECT, + FALSE, FALSE); + + /** + * Classifiers handler + */ + sub = rspamd_rcl_add_section (&new, "classifier", rspamd_rcl_classifier_handler, UCL_OBJECT, + FALSE, TRUE); + ssub = rspamd_rcl_add_section (&sub->subsections, "statfile", rspamd_rcl_statfile_handler, + UCL_OBJECT, TRUE, TRUE); + rspamd_rcl_add_default_handler (ssub, "symbol", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct statfile, symbol), 0); + rspamd_rcl_add_default_handler (ssub, "path", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct statfile, path), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (ssub, "label", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct statfile, label), 0); + rspamd_rcl_add_default_handler (ssub, "size", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct statfile, size), RSPAMD_CL_FLAG_INT_SIZE); + rspamd_rcl_add_default_handler (ssub, "spam", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct statfile, is_spam), 0); + + /** + * Composites handler + */ + sub = rspamd_rcl_add_section (&new, "composite", rspamd_rcl_composite_handler, UCL_OBJECT, + FALSE, TRUE); + + return new; +} + +struct rspamd_rcl_section * +rspamd_rcl_config_get_section (struct rspamd_rcl_section *top, + const char *path) +{ + struct rspamd_rcl_section *cur, *found; + char **path_components; + gint ncomponents, i; + + + if (path == NULL) { + return top; + } + + path_components = g_strsplit_set (path, "/", -1); + ncomponents = g_strv_length (path_components); + + cur = top; + for (i = 0; i < ncomponents; i ++) { + if (cur == NULL) { + g_strfreev (path_components); + return NULL; + } + HASH_FIND_STR (cur, path_components[i], found); + if (found == NULL) { + g_strfreev (path_components); + return NULL; + } + cur = found; + } + + g_strfreev (path_components); + return found; +} + +gboolean +rspamd_read_rcl_config (struct rspamd_rcl_section *top, + struct config_file *cfg, const ucl_object_t *obj, GError **err) +{ + const ucl_object_t *found, *cur_obj; + struct rspamd_rcl_section *cur, *tmp; + + if (obj->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "top configuration must be an object"); + return FALSE; + } + + /* Iterate over known sections and ignore unknown ones */ + HASH_ITER (hh, top, cur, tmp) { + found = ucl_object_find_key (obj, cur->name); + if (found == NULL) { + if (cur->required) { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "required section %s is missing", cur->name); + return FALSE; + } + } + else { + /* Check type */ + if (cur->strict_type) { + if (cur->type != found->type) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "object in section %s has invalid type", cur->name); + return FALSE; + } + } + LL_FOREACH (found, cur_obj) { + if (!cur->handler (cfg, cur_obj, NULL, cur, err)) { + return FALSE; + } + } + } + if (cur->fin) { + cur->fin (cfg, cur->fin_ud); + } + } + + cfg->rcl_obj = (ucl_object_t *)obj; + + return TRUE; +} + +gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section, + struct config_file *cfg, const ucl_object_t *obj, gpointer ptr, + GError **err) +{ + const ucl_object_t *found; + struct rspamd_rcl_default_handler_data *cur, *tmp; + + if (obj->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "default configuration must be an object"); + return FALSE; + } + + HASH_ITER (hh, section->default_parser, cur, tmp) { + found = ucl_object_find_key (obj, cur->key); + if (found != NULL) { + cur->pd.user_struct = ptr; + if (!cur->handler (cfg, found, &cur->pd, section, err)) { + return FALSE; + } + } + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + gchar **target; + const gsize num_str_len = 32; + + target = (gchar **)(((gchar *)pd->user_struct) + pd->offset); + switch (obj->type) { + case UCL_STRING: + *target = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (obj)); + break; + case UCL_INT: + *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (*target, num_str_len, "%L", obj->value.iv); + break; + case UCL_FLOAT: + *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (*target, num_str_len, "%f", obj->value.dv); + break; + case UCL_BOOLEAN: + *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (*target, num_str_len, "%b", (gboolean)obj->value.iv); + break; + default: + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert object or array to string"); + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + union { + gint *ip; + gint32 *i32p; + gint16 *i16p; + gint64 *i64p; + gsize *sp; + } target; + gint64 val; + + if (pd->flags == RSPAMD_CL_FLAG_INT_32) { + target.i32p = (gint32 *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.i32p = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_INT_64) { + target.i64p = (gint64 *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.i64p = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_INT_SIZE) { + target.sp = (gsize *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.sp = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_INT_16) { + target.i16p = (gint16 *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.i16p = val; + } + else { + target.ip = (gint *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.ip = val; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + gdouble *target; + + target = (gdouble *)(((gchar *)pd->user_struct) + pd->offset); + + if (!ucl_object_todouble_safe (obj, target)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double"); + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + union { + gint *psec; + guint32 *pu32; + gdouble *pdv; + struct timeval *ptv; + struct timespec *pts; + } target; + gdouble val; + + if (!ucl_object_todouble_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double"); + return FALSE; + } + + if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMEVAL) { + target.ptv = (struct timeval *)(((gchar *)pd->user_struct) + pd->offset); + target.ptv->tv_sec = (glong)val; + target.ptv->tv_usec = (val - (glong)val) * 1000000; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMESPEC) { + target.pts = (struct timespec *)(((gchar *)pd->user_struct) + pd->offset); + target.pts->tv_sec = (glong)val; + target.pts->tv_nsec = (val - (glong)val) * 1000000000000LL; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_FLOAT) { + target.pdv = (double *)(((gchar *)pd->user_struct) + pd->offset); + *target.pdv = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_INTEGER) { + target.psec = (gint *)(((gchar *)pd->user_struct) + pd->offset); + *target.psec = val * 1000; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_UINT_32) { + target.pu32 = (guint32 *)(((gchar *)pd->user_struct) + pd->offset); + *target.pu32 = val * 1000; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid flags to parse time value"); + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + GList **target; + gchar *val; + const ucl_object_t *cur; + const gsize num_str_len = 32; + ucl_object_iter_t iter = NULL; + + target = (GList **)(((gchar *)pd->user_struct) + pd->offset); + + if (obj->type != UCL_ARRAY) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "an array of strings is expected"); + return FALSE; + } + + while ((cur = ucl_iterate_object (obj, &iter, true)) != NULL) { + switch (cur->type) { + case UCL_STRING: + val = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (cur)); + break; + case UCL_INT: + val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (val, num_str_len, "%L", cur->value.iv); + break; + case UCL_FLOAT: + val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (val, num_str_len, "%f", cur->value.dv); + break; + case UCL_BOOLEAN: + val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (val, num_str_len, "%b", (gboolean)cur->value.iv); + break; + default: + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object or array to string"); + return FALSE; + } + *target = g_list_prepend (*target, val); + } + + /* Add a destructor */ + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, *target); + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + gboolean *target; + + target = (gboolean *)(((gchar *)pd->user_struct) + pd->offset); + + if (obj->type == UCL_BOOLEAN) { + *target = obj->value.iv; + } + else if (obj->type == UCL_INT) { + *target = obj->value.iv; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object to boolean"); + return FALSE; + } + + return TRUE; +} + +void +rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name, + rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags) +{ + struct rspamd_worker_param_parser *nhandler; + struct rspamd_worker_cfg_parser *nparser; + + HASH_FIND_INT (cfg->wrk_parsers, &type, nparser); + if (nparser == NULL) { + /* Allocate new parser for this worker */ + nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser)); + nparser->type = type; + HASH_ADD_INT (cfg->wrk_parsers, type, nparser); + } + + HASH_FIND_STR (nparser->parsers, name, nhandler); + if (nhandler != NULL) { + msg_warn ("handler for parameter %s is already registered for worker type %s", + name, g_quark_to_string (type)); + return; + } + nhandler = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_param_parser)); + nhandler->name = name; + nhandler->parser.flags = flags; + nhandler->parser.offset = offset; + nhandler->parser.user_struct = target; + nhandler->handler = handler; + HASH_ADD_KEYPTR (hh, nparser->parsers, name, strlen (name), nhandler); +} + + +void +rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type, + gboolean (*func)(ucl_object_t *, gpointer), gpointer ud) +{ + struct rspamd_worker_cfg_parser *nparser; + HASH_FIND_INT (cfg->wrk_parsers, &type, nparser); + if (nparser == NULL) { + /* Allocate new parser for this worker */ + nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser)); + nparser->type = type; + HASH_ADD_INT (cfg->wrk_parsers, type, nparser); + } + + nparser->def_obj_parser = func; + nparser->def_ud = ud; +} diff --git a/src/libserver/cfg_rcl.h b/src/libserver/cfg_rcl.h new file mode 100644 index 000000000..99839d1ea --- /dev/null +++ b/src/libserver/cfg_rcl.h @@ -0,0 +1,238 @@ +/* Copyright (c) 2013, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CFG_RCL_H_ +#define CFG_RCL_H_ + +#include "config.h" +#include "ucl/include/ucl.h" +#include "uthash.h" + +#define CFG_RCL_ERROR cfg_rcl_error_quark () +static inline GQuark +cfg_rcl_error_quark (void) +{ + return g_quark_from_static_string ("cfg-rcl-error-quark"); +} + +struct rspamd_rcl_section; +struct config_file; + +struct rspamd_rcl_struct_parser { + gpointer user_struct; + goffset offset; + enum { + RSPAMD_CL_FLAG_TIME_FLOAT = 0x1 << 0, + RSPAMD_CL_FLAG_TIME_TIMEVAL = 0x1 << 1, + RSPAMD_CL_FLAG_TIME_TIMESPEC = 0x1 << 2, + RSPAMD_CL_FLAG_TIME_INTEGER = 0x1 << 3, + RSPAMD_CL_FLAG_TIME_UINT_32 = 0x1 << 4, + RSPAMD_CL_FLAG_INT_16 = 0x1 << 5, + RSPAMD_CL_FLAG_INT_32 = 0x1 << 6, + RSPAMD_CL_FLAG_INT_64 = 0x1 << 7, + RSPAMD_CL_FLAG_INT_SIZE = 0x1 << 8, + RSPAMD_CL_FLAG_STRING_PATH = 0x1 << 9 + } flags; +}; + +/** + * Common handler type + * @param cfg configuration + * @param obj object to parse + * @param ud user data (depends on section) + * @param err error object + * @return TRUE if a section has been parsed + */ +typedef gboolean (*rspamd_rcl_handler_t) (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * A handler type that is called at the end of section parsing + * @param cfg configuration + * @param ud user data + */ +typedef void (*rspamd_rcl_section_fin_t)(struct config_file *cfg, gpointer ud); + +struct rspamd_rcl_default_handler_data { + struct rspamd_rcl_struct_parser pd; + const gchar *key; + rspamd_rcl_handler_t handler; + UT_hash_handle hh; +}; + +struct rspamd_rcl_section { + const gchar *name; /**< name of section */ + rspamd_rcl_handler_t handler; /**< handler of section attributes */ + enum ucl_type type; /**< type of attribute */ + gboolean required; /**< whether this param is required */ + gboolean strict_type; /**< whether we need strict type */ + UT_hash_handle hh; /** hash handle */ + struct rspamd_rcl_section *subsections; /**< hash table of subsections */ + struct rspamd_rcl_default_handler_data *default_parser; /**< generic parsing fields */ + rspamd_rcl_section_fin_t fin; /** called at the end of section parsing */ + gpointer fin_ud; +}; + +/** + * Init common sections known to rspamd + * @return top section + */ +struct rspamd_rcl_section* rspamd_rcl_config_init (void); + +/** + * Get a section specified by path, it understand paths separated by '/' character + * @param top top section + * @param path '/' divided path + * @return + */ +struct rspamd_rcl_section *rspamd_rcl_config_get_section (struct rspamd_rcl_section *top, + const char *path); + +/** + * Read RCL configuration and parse it to a config file + * @param top top section + * @param cfg target configuration + * @param obj object to handle + * @return TRUE if an object can be parsed + */ +gboolean rspamd_read_rcl_config (struct rspamd_rcl_section *top, + struct config_file *cfg, const ucl_object_t *obj, GError **err); + + +/** + * Parse default structure for a section + * @param section section + * @param cfg config file + * @param obj object to parse + * @param ptr ptr to pass + * @param err error ptr + * @return TRUE if the object has been parsed + */ +gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section, + struct config_file *cfg, const ucl_object_t *obj, gpointer ptr, + GError **err); +/** + * Here is a section of common handlers that accepts rcl_struct_parser + * which itself contains a struct pointer and the offset of a member in a + * specific structure + */ + +/** + * Parse a string field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure + * @param section the current section + * @param err error pointer + * @return TRUE if a string value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse an integer field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + + +/** + * Parse a float field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse a time field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure (flags mean the exact structure used) + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse a string list field of a structure presented by a GList* object + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure (flags mean the exact structure used) + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse a boolean field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure (flags mean the exact structure used) + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Utility functions + */ + +/** + * Register new parser for a worker type of an option with the specified name + * @param cfg config structure + * @param type type of worker (GQuark) + * @param name name of option + * @param handler handler of option + * @param target opaque target structure + * @param offset offset inside a structure + */ +void rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name, + rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags); + +/** + * Regiester a default parser for a worker + * @param cfg config structure + * @param type type of worker (GQuark) + * @param func handler function + * @param ud userdata for handler function + */ +void rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type, + gboolean (*func)(ucl_object_t *, gpointer), gpointer ud); +#endif /* CFG_RCL_H_ */ diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c new file mode 100644 index 000000000..2ca846ebd --- /dev/null +++ b/src/libserver/cfg_utils.c @@ -0,0 +1,969 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" + +#include "cfg_file.h" +#include "main.h" +#include "filter.h" +#include "settings.h" +#include "classifiers/classifiers.h" +#include "lua/lua_common.h" +#include "kvstorage_config.h" +#include "map.h" +#include "dynamic_cfg.h" + +#define DEFAULT_SCORE 10.0 + +#define DEFAULT_RLIMIT_NOFILE 2048 +#define DEFAULT_RLIMIT_MAXCORE 0 +#define DEFAULT_MAP_TIMEOUT 10 + +struct rspamd_ucl_map_cbdata { + struct config_file *cfg; + GString *buf; +}; +static gchar* rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data); +static void rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data); + +static gboolean +parse_host_port_priority_strv (rspamd_mempool_t *pool, gchar **tokens, + gchar **addr, guint16 *port, guint *priority, guint default_port) +{ + gchar *err_str, portbuf[8]; + const gchar *cur_tok, *cur_port; + struct addrinfo hints, *res; + guint port_parsed, priority_parsed, saved_errno = errno; + gint r; + union { + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr_holder; + + /* Now try to parse host and write address to ina */ + memset (&hints, 0, sizeof (hints)); + hints.ai_socktype = SOCK_STREAM; /* Type of the socket */ + hints.ai_flags = AI_NUMERICSERV; + + cur_tok = tokens[0]; + + if (strcmp (cur_tok, "*v6") == 0) { + hints.ai_family = AF_INET6; + hints.ai_flags |= AI_PASSIVE; + cur_tok = NULL; + } + else if (strcmp (cur_tok, "*v4") == 0) { + hints.ai_family = AF_INET; + hints.ai_flags |= AI_PASSIVE; + cur_tok = NULL; + } + else { + hints.ai_family = AF_UNSPEC; + } + + if (tokens[1] != NULL) { + /* Port part */ + rspamd_strlcpy (portbuf, tokens[1], sizeof (portbuf)); + cur_port = portbuf; + if (port != NULL) { + errno = 0; + port_parsed = strtoul (tokens[1], &err_str, 10); + if (*err_str != '\0' || errno != 0) { + msg_warn ("cannot parse port: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno)); + hints.ai_flags ^= AI_NUMERICSERV; + } + else if (port_parsed > G_MAXUINT16) { + errno = ERANGE; + msg_warn ("cannot parse port: %s, error: %s", tokens[1], *err_str, strerror (errno)); + hints.ai_flags ^= AI_NUMERICSERV; + } + else { + *port = port_parsed; + } + } + if (priority != NULL) { + if (port != NULL) { + cur_tok = tokens[2]; + } + else { + cur_tok = tokens[1]; + } + if (cur_tok != NULL) { + /* Priority part */ + errno = 0; + priority_parsed = strtoul (cur_tok, &err_str, 10); + if (*err_str != '\0' || errno != 0) { + msg_warn ("cannot parse priority: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno)); + } + else { + *priority = priority_parsed; + } + } + } + } + else if (default_port != 0) { + rspamd_snprintf (portbuf, sizeof (portbuf), "%ud", default_port); + cur_port = portbuf; + } + else { + cur_port = NULL; + } + + if ((r = getaddrinfo (cur_tok, cur_port, &hints, &res)) == 0) { + memcpy (&addr_holder, res->ai_addr, MIN (sizeof (addr_holder), res->ai_addrlen)); + if (res->ai_family == AF_INET) { + if (pool != NULL) { + *addr = rspamd_mempool_alloc (pool, INET_ADDRSTRLEN + 1); + } + inet_ntop (res->ai_family, &addr_holder.v4.sin_addr, *addr, INET_ADDRSTRLEN + 1); + } + else { + if (pool != NULL) { + *addr = rspamd_mempool_alloc (pool, INET6_ADDRSTRLEN + 1); + } + inet_ntop (res->ai_family, &addr_holder.v6.sin6_addr, *addr, INET6_ADDRSTRLEN + 1); + } + freeaddrinfo (res); + } + else { + msg_err ("address resolution for %s failed: %s", tokens[0], gai_strerror (r)); + goto err; + } + + /* Restore errno */ + errno = saved_errno; + return TRUE; + +err: + errno = saved_errno; + return FALSE; +} + +gboolean +parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority) +{ + gchar **tokens; + gboolean ret; + + tokens = g_strsplit_set (str, ":", 0); + if (!tokens || !tokens[0]) { + return FALSE; + } + + ret = parse_host_port_priority_strv (pool, tokens, addr, port, priority, 0); + + g_strfreev (tokens); + + return ret; +} + +gboolean +parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port) +{ + return parse_host_port_priority (pool, str, addr, port, NULL); +} + +gboolean +parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority) +{ + return parse_host_port_priority (pool, str, addr, NULL, priority); +} + +gboolean +parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str) +{ + struct rspamd_worker_bind_conf *cnf; + gchar **tokens, *tmp, *err; + gboolean ret = TRUE; + + if (str == NULL) { + return FALSE; + } + + tokens = g_strsplit_set (str, ":", 0); + if (!tokens || !tokens[0]) { + return FALSE; + } + + cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf)); + cnf->bind_port = DEFAULT_BIND_PORT; + cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); + cnf->ai = AF_UNSPEC; + + if (*tokens[0] == '/' || *tokens[0] == '.') { + cnf->ai = AF_UNIX; + LL_PREPEND (cf->bind_conf, cnf); + return TRUE; + } + else if (strcmp (tokens[0], "*") == 0) { + /* We need to add two listen entries: one for ipv4 and one for ipv6 */ + tmp = tokens[0]; + tokens[0] = "*v4"; + cnf->ai = AF_INET; + if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens, + &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { + LL_PREPEND (cf->bind_conf, cnf); + } + cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf)); + cnf->bind_port = DEFAULT_BIND_PORT; + cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); + cnf->ai = AF_INET6; + tokens[0] = "*v6"; + if ((ret &= parse_host_port_priority_strv (cfg->cfg_pool, tokens, + &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { + LL_PREPEND (cf->bind_conf, cnf); + } + tokens[0] = tmp; + } + else if (strcmp (tokens[0], "systemd") == 0) { + /* The actual socket will be passed by systemd environment */ + cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); + cnf->ai = strtoul (tokens[1], &err, 10); + cnf->is_systemd = TRUE; + if (err == NULL || *err == '\0') { + LL_PREPEND (cf->bind_conf, cnf); + } + } + else { + if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens, + &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { + LL_PREPEND (cf->bind_conf, cnf); + } + } + + g_strfreev (tokens); + + return ret; +} + +void +init_defaults (struct config_file *cfg) +{ + + cfg->memcached_error_time = DEFAULT_UPSTREAM_ERROR_TIME; + cfg->memcached_dead_time = DEFAULT_UPSTREAM_DEAD_TIME; + cfg->memcached_maxerrors = DEFAULT_UPSTREAM_MAXERRORS; + cfg->memcached_protocol = TCP_TEXT; + + cfg->dns_timeout = 1000; + cfg->dns_retransmits = 5; + /* After 20 errors do throttling for 10 seconds */ + cfg->dns_throttling_errors = 20; + cfg->dns_throttling_time = 10000; + /* 16 sockets per DNS server */ + cfg->dns_io_per_server = 16; + + cfg->statfile_sync_interval = 60000; + cfg->statfile_sync_timeout = 20000; + + /* 20 Kb */ + cfg->max_diff = 20480; + + cfg->metrics = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->c_modules = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->composite_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->classifiers_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->cfg_params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->metrics_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + + cfg->map_timeout = DEFAULT_MAP_TIMEOUT; + + cfg->log_level = G_LOG_LEVEL_WARNING; + cfg->log_extended = TRUE; + + init_settings (cfg); + +} + +void +free_config (struct config_file *cfg) +{ + GList *cur; + struct symbols_group *gr; + + remove_all_maps (cfg); + ucl_obj_unref (cfg->rcl_obj); + g_hash_table_remove_all (cfg->metrics); + g_hash_table_unref (cfg->metrics); + g_hash_table_remove_all (cfg->c_modules); + g_hash_table_unref (cfg->c_modules); + g_hash_table_remove_all (cfg->composite_symbols); + g_hash_table_unref (cfg->composite_symbols); + g_hash_table_remove_all (cfg->cfg_params); + g_hash_table_unref (cfg->cfg_params); + g_hash_table_destroy (cfg->metrics_symbols); + g_hash_table_destroy (cfg->classifiers_symbols); + /* Free symbols groups */ + cur = cfg->symbols_groups; + while (cur) { + gr = cur->data; + if (gr->symbols) { + g_list_free (gr->symbols); + } + cur = g_list_next (cur); + } + if (cfg->symbols_groups) { + g_list_free (cfg->symbols_groups); + } + + if (cfg->checksum) { + g_free (cfg->checksum); + } + g_list_free (cfg->classifiers); + g_list_free (cfg->metrics_list); + rspamd_mempool_delete (cfg->cfg_pool); +} + +const ucl_object_t * +get_module_opt (struct config_file *cfg, const gchar *module_name, const gchar *opt_name) +{ + const ucl_object_t *res = NULL, *sec; + + sec = ucl_obj_get_key (cfg->rcl_obj, module_name); + if (sec != NULL) { + res = ucl_obj_get_key (sec, opt_name); + } + + return res; +} + +guint64 +parse_limit (const gchar *limit, guint len) +{ + guint64 result = 0; + const gchar *err_str; + + if (!limit || *limit == '\0' || len == 0) { + return 0; + } + + errno = 0; + result = strtoull (limit, (gchar **)&err_str, 10); + + if (*err_str != '\0') { + /* Megabytes */ + if (*err_str == 'm' || *err_str == 'M') { + result *= 1048576L; + } + /* Kilobytes */ + else if (*err_str == 'k' || *err_str == 'K') { + result *= 1024; + } + /* Gigabytes */ + else if (*err_str == 'g' || *err_str == 'G') { + result *= 1073741824L; + } + else if (len > 0 && err_str - limit != (gint)len) { + msg_warn ("invalid limit value '%s' at position '%s'", limit, err_str); + result = 0; + } + } + + return result; +} + +gchar +parse_flag (const gchar *str) +{ + guint len; + gchar c; + + if (!str || !*str) { + return -1; + } + + len = strlen (str); + + switch (len) { + case 1: + c = g_ascii_tolower (*str); + if (c == 'y' || c == '1') { + return 1; + } + else if (c == 'n' || c == '0') { + return 0; + } + break; + case 2: + if (g_ascii_strncasecmp (str, "no", len) == 0) { + return 0; + } + else if (g_ascii_strncasecmp (str, "on", len) == 0) { + return 1; + } + break; + case 3: + if (g_ascii_strncasecmp (str, "yes", len) == 0) { + return 1; + } + else if (g_ascii_strncasecmp (str, "off", len) == 0) { + return 0; + } + break; + case 4: + if (g_ascii_strncasecmp (str, "true", len) == 0) { + return 1; + } + break; + case 5: + if (g_ascii_strncasecmp (str, "false", len) == 0) { + return 0; + } + break; + } + + return -1; +} + +gboolean +get_config_checksum (struct config_file *cfg) +{ + gint fd; + void *map; + struct stat st; + + /* Compute checksum for config file that should be used by xml dumper */ + if ((fd = open (cfg->cfg_name, O_RDONLY)) == -1) { + msg_err ("config file %s is no longer available, cannot calculate checksum"); + return FALSE; + } + if (stat (cfg->cfg_name, &st) == -1) { + msg_err ("cannot stat %s: %s", cfg->cfg_name, strerror (errno)); + return FALSE; + } + + /* Now mmap this file to simplify reading process */ + if ((map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap %s: %s", cfg->cfg_name, strerror (errno)); + close (fd); + return FALSE; + } + close (fd); + + /* Get checksum for a file */ + cfg->checksum = g_compute_checksum_for_string (G_CHECKSUM_MD5, map, st.st_size); + munmap (map, st.st_size); + + return TRUE; +} +/* + * Perform post load actions + */ +void +post_load_config (struct config_file *cfg) +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; +#endif + struct metric *def_metric; + +#ifdef HAVE_CLOCK_GETTIME +#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_getres (CLOCK_PROCESS_CPUTIME_ID, &ts); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_getres (CLOCK_VIRTUAL, &ts); +# else + clock_getres (CLOCK_REALTIME, &ts); +# endif + + cfg->clock_res = (gint)log10 (1000000 / ts.tv_nsec); + if (cfg->clock_res < 0) { + cfg->clock_res = 0; + } + if (cfg->clock_res > 3) { + cfg->clock_res = 3; + } +#else + /* For gettimeofday */ + cfg->clock_res = 1; +#endif + + if ((def_metric = g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) { + def_metric = check_metric_conf (cfg, NULL); + def_metric->name = DEFAULT_METRIC; + def_metric->actions[METRIC_ACTION_REJECT].score = DEFAULT_SCORE; + cfg->metrics_list = g_list_prepend (cfg->metrics_list, def_metric); + g_hash_table_insert (cfg->metrics, DEFAULT_METRIC, def_metric); + } + + cfg->default_metric = def_metric; + + /* Lua options */ + (void)lua_post_load_config (cfg); + init_dynamic_config (cfg); +} + +#if 0 +void +parse_err (const gchar *fmt, ...) +{ + va_list aq; + gchar logbuf[BUFSIZ], readbuf[32]; + gint r; + + va_start (aq, fmt); + rspamd_strlcpy (readbuf, yytext, sizeof (readbuf)); + + r = snprintf (logbuf, sizeof (logbuf), "config file parse error! line: %d, text: %s, reason: ", yylineno, readbuf); + r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq); + + va_end (aq); + g_critical ("%s", logbuf); +} + +void +parse_warn (const gchar *fmt, ...) +{ + va_list aq; + gchar logbuf[BUFSIZ], readbuf[32]; + gint r; + + va_start (aq, fmt); + rspamd_strlcpy (readbuf, yytext, sizeof (readbuf)); + + r = snprintf (logbuf, sizeof (logbuf), "config file parse warning! line: %d, text: %s, reason: ", yylineno, readbuf); + r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq); + + va_end (aq); + g_warning ("%s", logbuf); +} +#endif + +void +unescape_quotes (gchar *line) +{ + gchar *c = line, *t; + + while (*c) { + if (*c == '\\' && *(c + 1) == '"') { + t = c; + while (*t) { + *t = *(t + 1); + t++; + } + } + c++; + } +} + +GList * +parse_comma_list (rspamd_mempool_t * pool, const gchar *line) +{ + GList *res = NULL; + const gchar *c, *p; + gchar *str; + + c = line; + p = c; + + while (*p) { + if (*p == ',' && *c != *p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, p - c + 1); + res = g_list_prepend (res, str); + /* Skip spaces */ + while (g_ascii_isspace (*(++p))); + c = p; + continue; + } + p++; + } + if (res != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_list_free, res); + } + + return res; +} + +struct classifier_config * +check_classifier_conf (struct config_file *cfg, struct classifier_config *c) +{ + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct classifier_config)); + } + if (c->opts == NULL) { + c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->opts); + } + if (c->labels == NULL) { + c->labels = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, NULL, (GDestroyNotify)g_list_free); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->labels); + } + + return c; +} + +struct statfile* +check_statfile_conf (struct config_file *cfg, struct statfile *c) +{ + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile)); + } + + return c; +} + +struct metric * +check_metric_conf (struct config_file *cfg, struct metric *c) +{ + int i; + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); + c->grow_factor = 1.0; + c->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + c->descriptions = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + c->actions[i].score = -1.0; + } + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->symbols); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->descriptions); + } + + return c; +} + +struct worker_conf * +check_worker_conf (struct config_file *cfg, struct worker_conf *c) +{ + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct worker_conf)); + c->params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + c->active_workers = g_queue_new (); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_hash_table_destroy, c->params); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_queue_free, c->active_workers); +#ifdef HAVE_SC_NPROCESSORS_ONLN + c->count = sysconf (_SC_NPROCESSORS_ONLN); +#else + c->count = DEFAULT_WORKERS_NUM; +#endif + c->rlimit_nofile = DEFAULT_RLIMIT_NOFILE; + c->rlimit_maxcore = DEFAULT_RLIMIT_MAXCORE; + } + + return c; +} + + +static bool +rspamd_include_map_handler (const guchar *data, gsize len, void* ud) +{ + struct config_file *cfg = (struct config_file *)ud; + struct rspamd_ucl_map_cbdata *cbdata, **pcbdata; + gchar *map_line; + + map_line = rspamd_mempool_alloc (cfg->cfg_pool, len + 1); + rspamd_strlcpy (map_line, data, len + 1); + + cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata)); + pcbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata *)); + cbdata->buf = NULL; + cbdata->cfg = cfg; + *pcbdata = cbdata; + + return add_map (cfg, map_line, "ucl include", rspamd_ucl_read_cb, rspamd_ucl_fin_cb, (void **)pcbdata); +} + +/* + * Variables: + * $CONFDIR - configuration directory + * $RUNDIR - local states directory + * $DBDIR - databases dir + * $LOGDIR - logs dir + * $PLUGINSDIR - pluggins dir + * $PREFIX - installation prefix + * $VERSION - rspamd version + */ + +#define RSPAMD_CONFDIR_MACRO "CONFDIR" +#define RSPAMD_RUNDIR_MACRO "RUNDIR" +#define RSPAMD_DBDIR_MACRO "DBDIR" +#define RSPAMD_LOGDIR_MACRO "LOGDIR" +#define RSPAMD_PLUGINSDIR_MACRO "PLUGINSDIR" +#define RSPAMD_PREFIX_MACRO "PREFIX" +#define RSPAMD_VERSION_MACRO "VERSION" + +static void +rspamd_ucl_add_conf_variables (struct ucl_parser *parser) +{ + ucl_parser_register_variable (parser, RSPAMD_CONFDIR_MACRO, RSPAMD_CONFDIR); + ucl_parser_register_variable (parser, RSPAMD_RUNDIR_MACRO, RSPAMD_RUNDIR); + ucl_parser_register_variable (parser, RSPAMD_DBDIR_MACRO, RSPAMD_DBDIR); + ucl_parser_register_variable (parser, RSPAMD_LOGDIR_MACRO, RSPAMD_LOGDIR); + ucl_parser_register_variable (parser, RSPAMD_PLUGINSDIR_MACRO, RSPAMD_PLUGINSDIR); + ucl_parser_register_variable (parser, RSPAMD_PREFIX_MACRO, RSPAMD_PREFIX); + ucl_parser_register_variable (parser, RSPAMD_VERSION_MACRO, RVERSION); +} + +static void +rspamd_ucl_add_conf_macros (struct ucl_parser *parser, struct config_file *cfg) +{ + ucl_parser_register_macro (parser, "include_map", rspamd_include_map_handler, cfg); +} + +gboolean +read_rspamd_config (struct config_file *cfg, const gchar *filename, + const gchar *convert_to, rspamd_rcl_section_fin_t logger_fin, + gpointer logger_ud) +{ + struct stat st; + gint fd; + gchar *data; + GError *err = NULL; + struct rspamd_rcl_section *top, *logger; + gboolean res; + struct ucl_parser *parser; + + if (stat (filename, &st) == -1) { + msg_err ("cannot stat %s: %s", filename, strerror (errno)); + return FALSE; + } + if ((fd = open (filename, O_RDONLY)) == -1) { + msg_err ("cannot open %s: %s", filename, strerror (errno)); + return FALSE; + + } + /* Now mmap this file to simplify reading process */ + if ((data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap %s: %s", filename, strerror (errno)); + close (fd); + return FALSE; + } + close (fd); + + parser = ucl_parser_new (0); + rspamd_ucl_add_conf_variables (parser); + rspamd_ucl_add_conf_macros (parser, cfg); + if (!ucl_parser_add_chunk (parser, data, st.st_size)) { + msg_err ("ucl parser error: %s", ucl_parser_get_error (parser)); + ucl_parser_free (parser); + munmap (data, st.st_size); + return FALSE; + } + munmap (data, st.st_size); + cfg->rcl_obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + res = TRUE; + + if (!res) { + return FALSE; + } + + top = rspamd_rcl_config_init (); + err = NULL; + + HASH_FIND_STR(top, "logging", logger); + if (logger != NULL) { + logger->fin = logger_fin; + logger->fin_ud = logger_ud; + } + + if (!rspamd_read_rcl_config (top, cfg, cfg->rcl_obj, &err)) { + msg_err ("rcl parse error: %s", err->message); + return FALSE; + } + + return TRUE; +} + +static void +symbols_classifiers_callback (gpointer key, gpointer value, gpointer ud) +{ + struct config_file *cfg = ud; + + register_virtual_symbol (&cfg->cache, key, 1.0); +} + +void +insert_classifier_symbols (struct config_file *cfg) +{ + g_hash_table_foreach (cfg->classifiers_symbols, symbols_classifiers_callback, cfg); +} + +struct classifier_config* +find_classifier_conf (struct config_file *cfg, const gchar *name) +{ + GList *cur; + struct classifier_config *cf; + + if (name == NULL) { + return NULL; + } + + cur = cfg->classifiers; + while (cur) { + cf = cur->data; + + if (g_ascii_strcasecmp (cf->classifier->name, name) == 0) { + return cf; + } + + cur = g_list_next (cur); + } + + return NULL; +} + +gboolean +check_classifier_statfiles (struct classifier_config *cf) +{ + struct statfile *st; + gboolean has_other = FALSE, res = FALSE, cur_class; + GList *cur; + + /* First check classes directly */ + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + return TRUE; + } + } + + cur = g_list_next (cur); + } + + if (!has_other) { + /* We have only one statfile */ + return FALSE; + } + /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */ + has_other = FALSE; + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) { + st->is_spam = TRUE; + } + else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) { + st->is_spam = FALSE; + } + + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + res = TRUE; + } + } + + cur = g_list_next (cur); + } + + return res; +} + +static gchar* +rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev; + + if (cbdata == NULL) { + cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata)); + prev = data->prev_data; + cbdata->buf = g_string_sized_new (BUFSIZ); + cbdata->cfg = prev->cfg; + data->cur_data = cbdata; + } + g_string_append_len (cbdata->buf, chunk, len); + + /* Say not to copy any part of this buffer */ + return NULL; +} + +static void +rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev = data->prev_data; + ucl_object_t *obj; + struct ucl_parser *parser; + guint32 checksum; + + if (prev != NULL) { + if (prev->buf != NULL) { + g_string_free (prev->buf, TRUE); + } + g_free (prev); + } + + if (cbdata == NULL) { + msg_err ("map fin error: new data is NULL"); + return; + } + + checksum = murmur32_hash (cbdata->buf->str, cbdata->buf->len); + if (data->map->checksum != checksum) { + /* New data available */ + parser = ucl_parser_new (0); + if (!ucl_parser_add_chunk (parser, cbdata->buf->str, cbdata->buf->len)) { + msg_err ("cannot parse map %s: %s", data->map->uri, ucl_parser_get_error (parser)); + ucl_parser_free (parser); + } + else { + obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + /* XXX: add replace objects code */ + ucl_object_unref (obj); + data->map->checksum = checksum; + } + } + else { + msg_info ("do not reload map %s, checksum is the same: %d", data->map->uri, checksum); + } +} + +gboolean +rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree) +{ + gchar **strvec, **cur; + struct in_addr ina; + guint32 mask; + + strvec = g_strsplit_set (ip_list, ",", 0); + cur = strvec; + + while (*cur != NULL) { + /* XXX: handle only ipv4 addresses */ + if (parse_ipmask_v4 (*cur, &ina, &mask)) { + if (*tree == NULL) { + *tree = radix_tree_create (); + } + radix32tree_add (*tree, htonl (ina.s_addr), mask, 1); + } + cur ++; + } + + return (*tree != NULL); +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c new file mode 100644 index 000000000..c7c8a35e1 --- /dev/null +++ b/src/libserver/dkim.c @@ -0,0 +1,1480 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "message.h" +#include "dkim.h" +#include "dns.h" + +/* Parser of dkim params */ +typedef gboolean (*dkim_parse_param_f) (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); + +static gboolean rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); + + +static const dkim_parse_param_f parser_funcs[] = { + [DKIM_PARAM_SIGNATURE] = rspamd_dkim_parse_signature, + [DKIM_PARAM_SIGNALG] = rspamd_dkim_parse_signalg, + [DKIM_PARAM_DOMAIN] = rspamd_dkim_parse_domain, + [DKIM_PARAM_CANONALG] = rspamd_dkim_parse_canonalg, + [DKIM_PARAM_QUERYMETHOD] = rspamd_dkim_parse_ignore, + [DKIM_PARAM_SELECTOR] = rspamd_dkim_parse_selector, + [DKIM_PARAM_HDRLIST] = rspamd_dkim_parse_hdrlist, + [DKIM_PARAM_VERSION] = rspamd_dkim_parse_version, + [DKIM_PARAM_IDENTITY] = rspamd_dkim_parse_ignore, + [DKIM_PARAM_TIMESTAMP] = rspamd_dkim_parse_timestamp, + [DKIM_PARAM_EXPIRATION] = rspamd_dkim_parse_expiration, + [DKIM_PARAM_COPIEDHDRS] = rspamd_dkim_parse_ignore, + [DKIM_PARAM_BODYHASH] = rspamd_dkim_parse_bodyhash, + [DKIM_PARAM_BODYLENGTH] = rspamd_dkim_parse_bodylength +}; + +struct rspamd_dkim_header { + gchar *name; + guint count; +}; + +#define DKIM_ERROR dkim_error_quark () +GQuark +dkim_error_quark (void) +{ + return g_quark_from_static_string ("dkim-error-quark"); +} + +/* Parsers implementation */ +static gboolean +rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->b = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->b, param, len + 1); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) + gchar *tmp; + gsize tmp_len = len; + tmp = g_base64_decode (ctx->b, &tmp_len); + rspamd_strlcpy (ctx->b, tmp, len + 1); + g_free (tmp); +#else + g_base64_decode_inplace (ctx->b, &len); +#endif + ctx->blen = len; + return TRUE; +} + +static gboolean +rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + if (len == 8) { + if (memcmp (param, "rsa-sha1", len) == 0) { + ctx->sig_alg = DKIM_SIGN_RSASHA1; + return TRUE; + } + } + else if (len == 10) { + if (memcmp (param, "rsa-sha256", len) == 0) { + ctx->sig_alg = DKIM_SIGN_RSASHA256; + return TRUE; + } + } + + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim sign algorithm"); + return FALSE; +} + +static gboolean +rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->domain = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->domain, param, len + 1); + return TRUE; +} + +static gboolean +rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + const gchar *p, *slash = NULL, *end = param + len; + gsize sl = 0; + + p = param; + while (p != end) { + if (*p == '/') { + slash = p; + break; + } + p ++; + sl ++; + } + + if (slash == NULL) { + /* Only check header */ + if (len == 6 && memcmp (param, "simple", len) == 0) { + ctx->header_canon_type = DKIM_CANON_SIMPLE; + return TRUE; + } + else if (len == 7 && memcmp (param, "relaxed", len) == 0) { + ctx->header_canon_type = DKIM_CANON_RELAXED; + return TRUE; + } + } + else { + /* First check header */ + if (sl == 6 && memcmp (param, "simple", sl) == 0) { + ctx->header_canon_type = DKIM_CANON_SIMPLE; + } + else if (sl == 7 && memcmp (param, "relaxed", sl) == 0) { + ctx->header_canon_type = DKIM_CANON_RELAXED; + } + else { + goto err; + } + /* Check body */ + len -= sl + 1; + slash ++; + if (len == 6 && memcmp (slash, "simple", len) == 0) { + ctx->body_canon_type = DKIM_CANON_SIMPLE; + return TRUE; + } + else if (len == 7 && memcmp (slash, "relaxed", len) == 0) { + ctx->body_canon_type = DKIM_CANON_RELAXED; + return TRUE; + } + } + +err: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim canonization algorithm"); + return FALSE; +} + +static gboolean +rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + /* Just ignore unused params */ + return TRUE; +} + +static gboolean +rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->selector = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->selector, param, len + 1); + return TRUE; +} + +static struct rspamd_dkim_header* +rspamd_dkim_find_header (GPtrArray *arr, const gchar *name, gsize len) +{ + guint i; + struct rspamd_dkim_header *h; + + for (i = 0; i < arr->len; i ++) { + h = g_ptr_array_index (arr, i); + if (g_ascii_strncasecmp (h->name, name, len) == 0) { + return h; + } + } + + return NULL; +} + +static void +rspamd_dkim_hlist_free (void *ud) +{ + GPtrArray *a = ud; + + g_ptr_array_free (a, TRUE); +} + +static gboolean +rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + const gchar *c, *p, *end = param + len; + gchar *h; + gboolean from_found = FALSE; + guint count = 0; + struct rspamd_dkim_header *new; + + p = param; + while (p <= end) { + if ((*p == ':' || p == end)) { + count ++; + } + p ++; + } + + if (count > 0) { + ctx->hlist = g_ptr_array_sized_new (count); + } + else { + return FALSE; + } + + c = param; + p = param; + while (p <= end) { + if ((*p == ':' || p == end) && p - c > 0) { + if ((new = rspamd_dkim_find_header (ctx->hlist, c, p - c)) != NULL) { + new->count ++; + } + else { + /* Insert new header to the list */ + new = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_header)); + h = rspamd_mempool_alloc (ctx->pool, p - c + 1); + rspamd_strlcpy (h, c, p - c + 1); + g_strstrip (h); + new->name = h; + new->count = 1; + /* Check mandatory from */ + if (!from_found && g_ascii_strcasecmp (h, "from") == 0) { + from_found = TRUE; + } + g_ptr_array_add (ctx->hlist, new); + } + c = p + 1; + p ++; + } + else { + p ++; + } + } + + if (!ctx->hlist) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list"); + return FALSE; + } + else { + if (!from_found) { + g_ptr_array_free (ctx->hlist, TRUE); + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list, from header is missing"); + return FALSE; + } + /* Reverse list */ + rspamd_mempool_add_destructor (ctx->pool, (rspamd_mempool_destruct_t)rspamd_dkim_hlist_free, ctx->hlist); + } + + return TRUE; +} + +static gboolean +rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + if (len != 1 || *param != '1') { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_VERSION, "invalid dkim version"); + return FALSE; + } + + ctx->ver = 1; + return TRUE; +} + +static gboolean +rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + gulong val; + + if (!rspamd_strtoul (param, len, &val)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim timestamp"); + return FALSE; + } + ctx->timestamp = val; + + return TRUE; +} + +static gboolean +rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + gulong val; + + if (!rspamd_strtoul (param, len, &val)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim expiration"); + return FALSE; + } + ctx->expiration = val; + + return TRUE; +} + +static gboolean +rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->bh = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->bh, param, len + 1); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) + gchar *tmp; + gsize tmp_len = len; + tmp = g_base64_decode (ctx->bh, &tmp_len); + rspamd_strlcpy (ctx->bh, tmp, len + 1); + g_free (tmp); +#else + g_base64_decode_inplace (ctx->bh, &len); +#endif + ctx->bhlen = len; + return TRUE; +} + +static gboolean +rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + gulong val; + + if (!rspamd_strtoul (param, len, &val)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_L, "invalid dkim body length"); + return FALSE; + } + ctx->len = val; + + return TRUE; +} + +/** + * Create new dkim context from signature + * @param sig message's signature + * @param pool pool to allocate memory from + * @param err pointer to error object + * @return new context or NULL + */ +rspamd_dkim_context_t* +rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err) +{ + const gchar *p, *c, *tag = NULL, *end; + gsize taglen; + gint param = DKIM_PARAM_UNKNOWN; + time_t now; + rspamd_dkim_context_t *new; + enum { + DKIM_STATE_TAG = 0, + DKIM_STATE_AFTER_TAG, + DKIM_STATE_VALUE, + DKIM_STATE_SKIP_SPACES = 99, + DKIM_STATE_ERROR = 100 + } state, next_state; + + + new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_dkim_context_t)); + new->pool = pool; + new->header_canon_type = DKIM_CANON_DEFAULT; + new->body_canon_type = DKIM_CANON_DEFAULT; + new->sig_alg = DKIM_SIGN_UNKNOWN; + /* A simple state machine of parsing tags */ + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_TAG; + taglen = 0; + p = sig; + c = sig; + end = p + strlen (p); + while (p <= end) { + switch (state) { + case DKIM_STATE_TAG: + if (g_ascii_isspace (*p)) { + taglen = p - c; + while (*p && g_ascii_isspace (*p)) { + /* Skip spaces before '=' sign */ + p ++; + } + if (*p != '=') { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param"); + state = DKIM_STATE_ERROR; + } + else { + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_AFTER_TAG; + param = DKIM_PARAM_UNKNOWN; + p ++; + tag = c; + } + } + else if (*p == '=') { + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_AFTER_TAG; + param = DKIM_PARAM_UNKNOWN; + p ++; + tag = c; + } + else { + taglen ++; + p ++; + } + break; + case DKIM_STATE_AFTER_TAG: + /* We got tag at tag and len at taglen */ + switch (taglen) { + case 0: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "zero length dkim param"); + state = DKIM_STATE_ERROR; + break; + case 1: + /* Simple tags */ + switch (*tag) { + case 'v': + param = DKIM_PARAM_VERSION; + break; + case 'a': + param = DKIM_PARAM_SIGNALG; + break; + case 'b': + param = DKIM_PARAM_SIGNATURE; + break; + case 'c': + param = DKIM_PARAM_CANONALG; + break; + case 'd': + param = DKIM_PARAM_DOMAIN; + break; + case 'h': + param = DKIM_PARAM_HDRLIST; + break; + case 'i': + param = DKIM_PARAM_IDENTITY; + break; + case 'l': + param = DKIM_PARAM_BODYLENGTH; + break; + case 'q': + param = DKIM_PARAM_QUERYMETHOD; + break; + case 's': + param = DKIM_PARAM_SELECTOR; + break; + case 't': + param = DKIM_PARAM_TIMESTAMP; + break; + case 'x': + param = DKIM_PARAM_EXPIRATION; + break; + case 'z': + param = DKIM_PARAM_COPIEDHDRS; + break; + default: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c", *tag); + state = DKIM_STATE_ERROR; + break; + } + break; + case 2: + if (tag[0] == 'b' && tag[1] == 'h') { + param = DKIM_PARAM_BODYHASH; + } + else { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c%c", tag[0], tag[1]); + state = DKIM_STATE_ERROR; + } + break; + default: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param length: %zd", taglen); + state = DKIM_STATE_ERROR; + break; + } + if (state != DKIM_STATE_ERROR) { + /* Skip spaces */ + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_VALUE; + } + break; + case DKIM_STATE_VALUE: + if (*p == ';') { + if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c, err)) { + state = DKIM_STATE_ERROR; + } + else { + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_TAG; + p ++; + taglen = 0; + } + } + else if (p == end) { + if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c + 1, err)) { + state = DKIM_STATE_ERROR; + } + else { + /* Finish processing */ + p ++; + } + } + else { + p ++; + } + break; + case DKIM_STATE_SKIP_SPACES: + if (g_ascii_isspace (*p)) { + p ++; + } + else { + c = p; + state = next_state; + } + break; + case DKIM_STATE_ERROR: + if (err) { + msg_info ("dkim parse failed: %s", (*err)->message); + return NULL; + } + else { + msg_info ("dkim parse failed: unknown error"); + return NULL; + } + break; + } + } + + /* Now check validity of signature */ + if (new->b == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_B, "b parameter missing"); + return NULL; + } + if (new->bh == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_BH, "bh parameter missing"); + return NULL; + } + if (new->domain == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_D, "domain parameter missing"); + return NULL; + } + if (new->selector == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "selector parameter missing"); + return NULL; + } + if (new->ver == 0) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_V, "v parameter missing"); + return NULL; + } + if (new->hlist == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_H, "h parameter missing"); + return NULL; + } + if (new->sig_alg == DKIM_SIGN_UNKNOWN) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "s parameter missing"); + return NULL; + } + if (new->sig_alg == DKIM_SIGN_RSASHA1) { + /* Check bh length */ + if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA1)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen); + return NULL; + } + + } + else if (new->sig_alg == DKIM_SIGN_RSASHA256) { + if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA256)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen); + return NULL; + } + } + /* Check expiration */ + now = time (NULL); + if (new->timestamp && now < new->timestamp && new->timestamp - now > (gint)time_jitter) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_FUTURE, "signature was made in future, ignoring"); + return NULL; + } + if (new->expiration && new->expiration < now) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EXPIRED, "signature has expired"); + return NULL; + } + + /* Now create dns key to request further */ + taglen = strlen (new->domain) + strlen (new->selector) + sizeof (DKIM_DNSKEYNAME) + 2; + new->dns_key = rspamd_mempool_alloc (new->pool, taglen); + rspamd_snprintf (new->dns_key, taglen, "%s.%s.%s", new->selector, DKIM_DNSKEYNAME, new->domain); + + /* Create checksums for further operations */ + if (new->sig_alg == DKIM_SIGN_RSASHA1) { + new->body_hash = g_checksum_new (G_CHECKSUM_SHA1); + new->headers_hash = g_checksum_new (G_CHECKSUM_SHA1); + } + else if (new->sig_alg == DKIM_SIGN_RSASHA256) { + new->body_hash = g_checksum_new (G_CHECKSUM_SHA256); + new->headers_hash = g_checksum_new (G_CHECKSUM_SHA256); + } + else { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has unsupported signature algorithm"); + return NULL; + } + + rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->body_hash); + rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->headers_hash); + + return new; +} + +struct rspamd_dkim_key_cbdata { + rspamd_dkim_context_t *ctx; + dkim_key_handler_f handler; + gpointer ud; +}; + +static rspamd_dkim_key_t* +rspamd_dkim_make_key (const gchar *keydata, guint keylen, GError **err) +{ + rspamd_dkim_key_t *key = NULL; + + key = g_slice_alloc0 (sizeof (rspamd_dkim_key_t)); + key->keydata = g_slice_alloc (keylen + 1); + rspamd_strlcpy (key->keydata, keydata, keylen + 1); + key->keylen = keylen + 1; + key->decoded_len = keylen + 1; +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) + gchar *tmp; + gsize tmp_len = keylen; + tmp = g_base64_decode (key->keydata, &tmp_len); + rspamd_strlcpy (key->keydata, tmp, keylen + 1); + g_free (tmp); + key->decoded_len = tmp_len; +#else + g_base64_decode_inplace (key->keydata, &key->decoded_len); +#endif +#ifdef HAVE_OPENSSL + key->key_bio = BIO_new_mem_buf (key->keydata, key->decoded_len); + if (key->key_bio == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot make ssl bio from key"); + rspamd_dkim_key_free (key); + return NULL; + } + + key->key_evp = d2i_PUBKEY_bio (key->key_bio, NULL); + if (key->key_evp == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract pubkey from bio"); + rspamd_dkim_key_free (key); + return NULL; + } + + key->key_rsa = EVP_PKEY_get1_RSA (key->key_evp); + if (key->key_rsa == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract rsa key from evp key"); + rspamd_dkim_key_free (key); + return NULL; + } + +#endif + + return key; +} + +/** + * Free DKIM key + * @param key + */ +void +rspamd_dkim_key_free (rspamd_dkim_key_t *key) +{ +#ifdef HAVE_OPENSSL + if (key->key_rsa) { + RSA_free (key->key_rsa); + } + if (key->key_bio) { + BIO_free (key->key_bio); + } +#endif + g_slice_free1 (key->keylen, key->keydata); + g_slice_free1 (sizeof (rspamd_dkim_key_t), key); +} + +static rspamd_dkim_key_t* +rspamd_dkim_parse_key (const gchar *txt, gsize *keylen, GError **err) +{ + const gchar *c, *p, *end; + gint state = 0; + gsize len; + + c = txt; + p = txt; + end = txt + strlen (txt); + + while (p <= end) { + switch (state) { + case 0: + if (p != end && p[0] == 'p' && p[1] == '=') { + /* We got something like public key */ + c = p + 2; + p = c; + state = 1; + } + else { + /* Ignore everything */ + p ++; + } + break; + case 1: + /* State when we got p= and looking for some public key */ + if ((*p == ';' || p == end) && p > c) { + len = p - c; + return rspamd_dkim_make_key (c, len, err); + } + else { + p ++; + } + break; + } + } + + if (p - c == 0) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYREVOKED, "key was revoked"); + } + else { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "key was not found"); + } + + return NULL; +} + +/* Get TXT request data and parse it */ +static void +rspamd_dkim_dns_cb (struct rdns_reply *reply, gpointer arg) +{ + struct rspamd_dkim_key_cbdata *cbdata = arg; + rspamd_dkim_key_t *key = NULL; + GError *err = NULL; + struct rdns_reply_entry *elt; + gsize keylen = 0; + + if (reply->code != RDNS_RC_NOERROR) { + g_set_error (&err, DKIM_ERROR, DKIM_SIGERROR_NOKEY, "dns request to %s failed: %s", cbdata->ctx->dns_key, + rdns_strerror (reply->code)); + cbdata->handler (NULL, 0, cbdata->ctx, cbdata->ud, err); + } + else { + LL_FOREACH (reply->entries, elt) { + if (elt->type == RDNS_REQUEST_TXT) { + key = rspamd_dkim_parse_key (elt->content.txt.data, &keylen, &err); + if (key) { + key->ttl = elt->ttl; + break; + } + } + } + if (key != NULL && err != NULL) { + /* Free error as it is insignificant */ + g_error_free (err); + err = NULL; + } + cbdata->handler (key, keylen, cbdata->ctx, cbdata->ud, err); + } +} + +/** + * Make DNS request for specified context and obtain and parse key + * @param ctx dkim context from signature + * @param resolver dns resolver object + * @param s async session to make request + * @return + */ +gboolean +rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud) +{ + struct rspamd_dkim_key_cbdata *cbdata; + + g_return_val_if_fail (ctx != NULL, FALSE); + g_return_val_if_fail (ctx->dns_key != NULL, FALSE); + + cbdata = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_key_cbdata)); + cbdata->ctx = ctx; + cbdata->handler = handler; + cbdata->ud = ud; + + return make_dns_request (resolver, s, ctx->pool, rspamd_dkim_dns_cb, cbdata, RDNS_REQUEST_TXT, ctx->dns_key); +} + +static gboolean +rspamd_dkim_relaxed_body_step (GChecksum *ck, const gchar **start, guint remain) +{ + const gchar *h; + static gchar buf[BUFSIZ]; + gchar *t; + guint len, inlen; + gboolean got_sp, finished = FALSE; + + if (remain > sizeof (buf)) { + len = sizeof (buf); + } + else { + len = remain; + finished = TRUE; + } + inlen = sizeof (buf) - 1; + h = *start; + t = &buf[0]; + got_sp = FALSE; + + while (len && inlen) { + if (*h == '\r' || *h == '\n') { + /* Ignore spaces at the end of line */ + if (got_sp) { + got_sp = FALSE; + t --; + } + /* Replace a single \n or \r with \r\n */ + if (*h == '\n' && *(h - 1) != '\r') { + *t ++ = '\r'; + inlen --; + } + else if (*h == '\r' && *(h + 1) != '\n') { + *t ++ = *h ++; + *t ++ = '\n'; + if (inlen > 1) { + inlen -= 2; + } + else { + /* It is safe as inlen = sizeof (buf) - 1 */ + inlen = 0; + } + len --; + continue; + } + } + else if (g_ascii_isspace (*h)) { + if (got_sp) { + /* Ignore multiply spaces */ + h ++; + len --; + continue; + } + else { + *t++ = ' '; + h ++; + inlen --; + len --; + got_sp = TRUE; + continue; + } + } + else { + got_sp = FALSE; + } + *t++ = *h++; + inlen --; + len --; + } + + *start = h; + + if (!finished && *(t - 1) == ' ' && g_ascii_isspace (*h)) { + /* Avoid border problems */ + t --; + } +#if 0 + msg_debug ("update signature with buffer: %*s", t - buf, buf); +#endif + g_checksum_update (ck, buf, t - buf); + + return !finished; +} + +static gboolean +rspamd_dkim_simple_body_step (GChecksum *ck, const gchar **start, guint remain) +{ + const gchar *h; + static gchar buf[BUFSIZ]; + gchar *t; + guint len, inlen; + gboolean finished = FALSE; + + if (remain > sizeof (buf)) { + len = sizeof (buf); + } + else { + len = remain; + finished = TRUE; + } + inlen = sizeof (buf) - 1; + h = *start; + t = &buf[0]; + + while (len && inlen) { + if (*h == '\r' || *h == '\n') { + /* Replace a single \n or \r with \r\n */ + if (*h == '\n' && *(h - 1) != '\r') { + *t ++ = '\r'; + inlen --; + } + else if (*h == '\r' && *(h + 1) != '\n') { + *t ++ = *h ++; + *t ++ = '\n'; + if (inlen > 1) { + inlen -= 2; + } + else { + /* It is safe as inlen = sizeof (buf) - 1 */ + inlen = 0; + } + len --; + continue; + } + } + *t++ = *h++; + inlen --; + len --; + } + + *start = h; + +#if 0 + msg_debug ("update signature with buffer: %*s", t - buf, buf); +#endif + g_checksum_update (ck, buf, t - buf); + + return !finished; +} + +static gboolean +rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, const gchar *start, const gchar *end) +{ + const gchar *p; + + if (start == NULL) { + /* Empty body */ + if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { + g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1); + } + else { + g_checksum_update (ctx->body_hash, "", 0); + } + } + else { + /* Strip extra ending CRLF */ + p = end - 1; + while (p >= start + 2) { + if (*p == '\n' && *(p - 1) == '\r' && *(p - 2) == '\n') { + p -= 2; + } + else if (*p == '\n' && *(p - 1) == '\n') { + p --; + } + else if (*p == '\r' && *(p - 1) == '\r') { + p --; + } + else { + break; + } + } + end = p + 1; + if (end == start || end == start + 2) { + /* Empty body */ + if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { + g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1); + } + else { + g_checksum_update (ctx->body_hash, "", 0); + } + } + else { + if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { + /* Simple canonization */ + while (rspamd_dkim_simple_body_step (ctx->body_hash, &start, end - start)); + } + else { + while (rspamd_dkim_relaxed_body_step (ctx->body_hash, &start, end - start)); + } + } + return TRUE; + } + + /* TODO: Implement relaxed algorithm */ + return FALSE; +} + +/* Update hash converting all CR and LF to CRLF */ +static void +rspamd_dkim_hash_update (GChecksum *ck, const gchar *begin, gsize len) +{ + const gchar *p, *c, *end; + + end = begin + len; + p = begin; + c = p; + while (p != end) { + if (*p == '\r') { + g_checksum_update (ck, c, p - c); + g_checksum_update (ck, CRLF, sizeof (CRLF) - 1); + p ++; + if (*p == '\n') { + p ++; + } + c = p; + } + else if (*p == '\n') { + g_checksum_update (ck, c, p - c); + g_checksum_update (ck, CRLF, sizeof (CRLF) - 1); + p ++; + c = p; + } + else { + p ++; + } + } + if (p != c) { + g_checksum_update (ck, c, p - c); + } +} + +/* Update hash by signature value (ignoring b= tag) */ +static void +rspamd_dkim_signature_update (rspamd_dkim_context_t *ctx, const gchar *begin, guint len) +{ + const gchar *p, *c, *end; + gboolean tag, skip; + + end = begin + len; + p = begin; + c = begin; + tag = TRUE; + skip = FALSE; + + while (p < end) { + if (tag && p[0] == 'b' && p[1] == '=') { + /* Add to signature */ + msg_debug ("initial update hash with signature part: %*s", p - c + 2, c); + rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 2); + skip = TRUE; + } + else if (skip && (*p == ';' || p == end - 1)) { + skip = FALSE; + c = p; + } + else if (!tag && *p == ';') { + tag = TRUE; + } + else if (tag && *p == '=') { + tag = FALSE; + } + p ++; + } + + p --; + /* Skip \r\n at the end */ + while ((*p == '\r' || *p == '\n') && p >= c) { + p --; + } + + if (p - c + 1 > 0) { + msg_debug ("final update hash with signature part: %*s", p - c + 1, c); + rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 1); + } +} + +static gboolean +rspamd_dkim_canonize_header_relaxed (rspamd_dkim_context_t *ctx, const gchar *header, const gchar *header_name, gboolean is_sign) +{ + const gchar *h; + gchar *t, *buf; + guint inlen; + gboolean got_sp, allocated = FALSE; + + inlen = strlen (header) + strlen (header_name) + sizeof (":" CRLF); + if (inlen > BUFSIZ) { + buf = g_malloc (inlen); + allocated = TRUE; + } + else { + /* Faster */ + buf = g_alloca (inlen); + } + + /* Name part */ + t = buf; + h = header_name; + while (*h) { + *t ++ = g_ascii_tolower (*h++); + } + *t++ = ':'; + + /* Value part */ + h = header; + /* Skip spaces at the beginning */ + while (g_ascii_isspace (*h)) { + h ++; + } + got_sp = FALSE; + + while (*h) { + if (g_ascii_isspace (*h)) { + if (got_sp) { + h ++; + continue; + } + else { + got_sp = TRUE; + *t ++ = ' '; + h ++; + continue; + } + } + else { + got_sp = FALSE; + } + *t ++ = *h ++; + } + if (g_ascii_isspace (*(t - 1))) { + t --; + } + *t++ = '\r'; + *t++ = '\n'; + *t = '\0'; + + if (!is_sign) { + msg_debug ("update signature with header: %s", buf); + g_checksum_update (ctx->headers_hash, buf, t - buf); + } + else { + rspamd_dkim_signature_update (ctx, buf, t - buf); + } + + if (allocated) { + g_free (buf); + } + + return TRUE; +} + +struct rspamd_dkim_sign_chunk { + const gchar *begin; + gsize len; + gboolean append_crlf; +}; + +static gboolean +rspamd_dkim_canonize_header_simple (rspamd_dkim_context_t *ctx, const gchar *headers, + const gchar *header_name, guint count, gboolean is_sign) +{ + const gchar *p, *c; + gint state = 0, hlen; + gboolean found = FALSE; + GArray *to_sign; + struct rspamd_dkim_sign_chunk chunk, *elt; + gint i; + + /* This process is very similar to raw headers processing */ + to_sign = g_array_sized_new (FALSE, FALSE, sizeof (struct rspamd_dkim_sign_chunk), count); + p = headers; + c = p; + hlen = strlen (header_name); + + while (*p) { + switch (state) { + case 0: + /* Compare state */ + if (*p == ':') { + /* Compare header's name with desired one */ + if (p - c == hlen) { + if (g_ascii_strncasecmp (c, header_name, hlen) == 0) { + /* Get value */ + state = 2; + } + else { + /* Skip the whole header */ + state = 1; + } + } + else { + /* Skip the whole header */ + state = 1; + } + } + p ++; + break; + case 1: + /* Skip header state */ + if (*p == '\n' && !g_ascii_isspace (p[1])) { + /* Header is skipped */ + state = 0; + c = p + 1; + } + p ++; + break; + case 2: + /* c contains the beginning of header */ + if (*p == '\n' && (!g_ascii_isspace (p[1]) || p[1] == '\0')) { + chunk.begin = c; + if (*(p - 1) == '\r') { + chunk.len = p - c + 1; + chunk.append_crlf = FALSE; + } + else { + /* Need append CRLF as linefeed is not proper */ + chunk.len = p - c; + chunk.append_crlf = TRUE; + } + g_array_append_val (to_sign, chunk); + c = p + 1; + state = 0; + found = TRUE; + } + p ++; + break; + } + } + + if (found) { + if (!is_sign) { + + for (i = to_sign->len - 1; i >= 0 && count > 0; i --, count --) { + elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, i); + + if (!chunk.append_crlf) { + msg_debug ("update signature with header: %*s", elt->len, elt->begin); + rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len); + } + else { + msg_debug ("update signature with header: %*s", elt->len + 1, elt->begin); + rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len + 1); + } + } + } + else { + elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, 0); + if (elt->append_crlf) { + rspamd_dkim_signature_update (ctx, elt->begin, elt->len + 1); + } + else { + rspamd_dkim_signature_update (ctx, elt->begin, elt->len); + } + } + } + + g_array_free (to_sign, TRUE); + + return found; +} + +static gboolean +rspamd_dkim_canonize_header (rspamd_dkim_context_t *ctx, struct rspamd_task *task, const gchar *header_name, + guint count, gboolean is_sig) +{ + struct raw_header *rh, *rh_iter; + guint rh_num = 0; + GList *nh = NULL, *cur; + + if (ctx->header_canon_type == DKIM_CANON_SIMPLE) { + return rspamd_dkim_canonize_header_simple (ctx, task->raw_headers_str, header_name, count, is_sig); + } + else { + rh = g_hash_table_lookup (task->raw_headers, header_name); + if (rh) { + if (!is_sig) { + rh_iter = rh; + while (rh_iter) { + rh_num ++; + rh_iter = rh_iter->next; + } + + if (rh_num > count) { + /* Set skip count */ + rh_num -= count; + } + else { + rh_num = 0; + } + rh_iter = rh; + while (rh_num) { + rh_iter = rh_iter->next; + rh_num --; + } + /* Now insert required headers */ + while (rh_iter) { + nh = g_list_prepend (nh, rh_iter); + rh_iter = rh_iter->next; + } + cur = nh; + while (cur) { + rh = cur->data; + if (! rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig)) { + g_list_free (nh); + return FALSE; + } + cur = g_list_next (cur); + } + if (nh != NULL) { + g_list_free (nh); + } + } + else { + /* For signature check just use the first dkim header */ + rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig); + } + return TRUE; + } + } + + /* TODO: Implement relaxed algorithm */ + return FALSE; +} + +/** + * Check task for dkim context using dkim key + * @param ctx dkim verify context + * @param key dkim key (from cache or from dns request) + * @param task task to check + * @return + */ +gint +rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task) +{ + const gchar *p, *headers_end = NULL, *end, *body_end; + gboolean got_cr = FALSE, got_crlf = FALSE, got_lf = FALSE; + gchar *digest; + gsize dlen; + gint res = DKIM_CONTINUE; + guint i; + struct rspamd_dkim_header *dh; +#ifdef HAVE_OPENSSL + gint nid; +#endif + + g_return_val_if_fail (ctx != NULL, DKIM_ERROR); + g_return_val_if_fail (key != NULL, DKIM_ERROR); + g_return_val_if_fail (task->msg != NULL, DKIM_ERROR); + + /* First of all find place of body */ + p = task->msg->str; + + end = task->msg->str + task->msg->len; + + while (p <= end) { + /* Search for \r\n\r\n at the end of headers */ + if (*p == '\n') { + if (got_cr && *(p - 1) == '\r') { + if (got_crlf) { + /* \r\n\r\n */ + headers_end = p + 1; + break; + } + else if (got_lf) { + /* \n\r\n */ + headers_end = p + 1; + break; + } + else { + /* Set got crlf flag */ + got_crlf = TRUE; + got_cr = FALSE; + got_lf = FALSE; + } + } + else if (got_cr && *(p - 1) != '\r') { + /* We got CR somewhere but not right before */ + got_cr = FALSE; + if (*(p - 1) == '\n') { + /* \r\n\n case */ + headers_end = p + 1; + break; + } + got_lf = TRUE; + } + else if (got_lf && *(p - 1) == '\n') { + /* \n\n case */ + headers_end = p + 1; + break; + } + else { + got_lf = TRUE; + } + } + else if (*p == '\r') { + if (got_cr && *(p - 1) == '\r') { + /* \r\r case */ + headers_end = p + 1; + break; + } + else if (got_lf && *(p - 1) != '\n') { + /* Sequence is broken */ + got_lf = FALSE; + got_cr = TRUE; + } + else { + got_cr = TRUE; + } + } + else { + got_cr = FALSE; + got_crlf = FALSE; + } + p ++; + } + + /* Start canonization of body part */ + if (headers_end) { + if (ctx->len == 0 || (gint)ctx->len > end - headers_end) { + body_end = end; + } + else { + /* Strip message */ + body_end = headers_end + ctx->len; + } + } + else { + body_end = end; + } + if (!rspamd_dkim_canonize_body (ctx, headers_end, body_end)) { + return DKIM_RECORD_ERROR; + } + /* Now canonize headers */ + for (i = 0; i < ctx->hlist->len; i ++) { + dh = g_ptr_array_index (ctx->hlist, i); + rspamd_dkim_canonize_header (ctx, task, dh->name, dh->count, FALSE); + } + + /* Canonize dkim signature */ + rspamd_dkim_canonize_header (ctx, task, DKIM_SIGNHEADER, 1, TRUE); + + dlen = ctx->bhlen; + digest = g_alloca (dlen); + g_checksum_get_digest (ctx->body_hash, digest, &dlen); + + /* Check bh field */ + if (memcmp (ctx->bh, digest, dlen) != 0) { + msg_debug ("bh value missmatch"); + return DKIM_REJECT; + } + + g_checksum_get_digest (ctx->headers_hash, digest, &dlen); +#ifdef HAVE_OPENSSL + /* Check headers signature */ + + if (ctx->sig_alg == DKIM_SIGN_RSASHA1) { + nid = NID_sha1; + } + else if (ctx->sig_alg == DKIM_SIGN_RSASHA256) { + nid = NID_sha256; + } + else { + /* Not reached */ + nid = NID_sha1; + } + + if (RSA_verify (nid, digest, dlen, ctx->b, ctx->blen, key->key_rsa) != 1) { + msg_debug ("rsa verify failed"); + res = DKIM_REJECT; + } +#endif + return res; +} diff --git a/src/libserver/dkim.h b/src/libserver/dkim.h new file mode 100644 index 000000000..29ec479b7 --- /dev/null +++ b/src/libserver/dkim.h @@ -0,0 +1,207 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef DKIM_H_ +#define DKIM_H_ + +#include "config.h" +#include "event.h" +#include "dns.h" +#ifdef HAVE_OPENSSL +#include +#include +#endif + +/* Main types and definitions */ + +#define DKIM_SIGNHEADER "DKIM-Signature" + /* DKIM signature header */ + +/* special DNS tokens */ +#define DKIM_DNSKEYNAME "_domainkey" + /* reserved DNS sub-zone */ +#define DKIM_DNSPOLICYNAME "_adsp" /* reserved DNS sub-zone */ + +/* Canonization methods */ +#define DKIM_CANON_UNKNOWN (-1) /* unknown method */ +#define DKIM_CANON_SIMPLE 0 /* as specified in DKIM spec */ +#define DKIM_CANON_RELAXED 1 /* as specified in DKIM spec */ + +#define DKIM_CANON_DEFAULT DKIM_CANON_SIMPLE + +/* Signature methods */ +#define DKIM_SIGN_UNKNOWN (-2) /* unknown method */ +#define DKIM_SIGN_DEFAULT (-1) /* use internal default */ +#define DKIM_SIGN_RSASHA1 0 /* an RSA-signed SHA1 digest */ +#define DKIM_SIGN_RSASHA256 1 /* an RSA-signed SHA256 digest */ + +/* Params */ +#define DKIM_PARAM_UNKNOWN (-1) /* unknown */ +#define DKIM_PARAM_SIGNATURE 0 /* b */ +#define DKIM_PARAM_SIGNALG 1 /* a */ +#define DKIM_PARAM_DOMAIN 2 /* d */ +#define DKIM_PARAM_CANONALG 3 /* c */ +#define DKIM_PARAM_QUERYMETHOD 4 /* q */ +#define DKIM_PARAM_SELECTOR 5 /* s */ +#define DKIM_PARAM_HDRLIST 6 /* h */ +#define DKIM_PARAM_VERSION 7 /* v */ +#define DKIM_PARAM_IDENTITY 8 /* i */ +#define DKIM_PARAM_TIMESTAMP 9 /* t */ +#define DKIM_PARAM_EXPIRATION 10 /* x */ +#define DKIM_PARAM_COPIEDHDRS 11 /* z */ +#define DKIM_PARAM_BODYHASH 12 /* bh */ +#define DKIM_PARAM_BODYLENGTH 13 /* l */ + +/* Errors (from OpenDKIM) */ + +#define DKIM_SIGERROR_UNKNOWN (-1) /* unknown error */ +#define DKIM_SIGERROR_OK 0 /* no error */ +#define DKIM_SIGERROR_VERSION 1 /* unsupported version */ +#define DKIM_SIGERROR_DOMAIN 2 /* invalid domain (d=/i=) */ +#define DKIM_SIGERROR_EXPIRED 3 /* signature expired */ +#define DKIM_SIGERROR_FUTURE 4 /* signature in the future */ +#define DKIM_SIGERROR_TIMESTAMPS 5 /* x= < t= */ +#define DKIM_SIGERROR_UNUSED 6 /* OBSOLETE */ +#define DKIM_SIGERROR_INVALID_HC 7 /* c= invalid (header) */ +#define DKIM_SIGERROR_INVALID_BC 8 /* c= invalid (body) */ +#define DKIM_SIGERROR_MISSING_A 9 /* a= missing */ +#define DKIM_SIGERROR_INVALID_A 10 /* a= invalid */ +#define DKIM_SIGERROR_MISSING_H 11 /* h= missing */ +#define DKIM_SIGERROR_INVALID_L 12 /* l= invalid */ +#define DKIM_SIGERROR_INVALID_Q 13 /* q= invalid */ +#define DKIM_SIGERROR_INVALID_QO 14 /* q= option invalid */ +#define DKIM_SIGERROR_MISSING_D 15 /* d= missing */ +#define DKIM_SIGERROR_EMPTY_D 16 /* d= empty */ +#define DKIM_SIGERROR_MISSING_S 17 /* s= missing */ +#define DKIM_SIGERROR_EMPTY_S 18 /* s= empty */ +#define DKIM_SIGERROR_MISSING_B 19 /* b= missing */ +#define DKIM_SIGERROR_EMPTY_B 20 /* b= empty */ +#define DKIM_SIGERROR_CORRUPT_B 21 /* b= corrupt */ +#define DKIM_SIGERROR_NOKEY 22 /* no key found in DNS */ +#define DKIM_SIGERROR_DNSSYNTAX 23 /* DNS reply corrupt */ +#define DKIM_SIGERROR_KEYFAIL 24 /* DNS query failed */ +#define DKIM_SIGERROR_MISSING_BH 25 /* bh= missing */ +#define DKIM_SIGERROR_EMPTY_BH 26 /* bh= empty */ +#define DKIM_SIGERROR_CORRUPT_BH 27 /* bh= corrupt */ +#define DKIM_SIGERROR_BADSIG 28 /* signature mismatch */ +#define DKIM_SIGERROR_SUBDOMAIN 29 /* unauthorized subdomain */ +#define DKIM_SIGERROR_MULTIREPLY 30 /* multiple records returned */ +#define DKIM_SIGERROR_EMPTY_H 31 /* h= empty */ +#define DKIM_SIGERROR_INVALID_H 32 /* h= missing req'd entries */ +#define DKIM_SIGERROR_TOOLARGE_L 33 /* l= value exceeds body size */ +#define DKIM_SIGERROR_MBSFAILED 34 /* "must be signed" failure */ +#define DKIM_SIGERROR_KEYVERSION 35 /* unknown key version */ +#define DKIM_SIGERROR_KEYUNKNOWNHASH 36 /* unknown key hash */ +#define DKIM_SIGERROR_KEYHASHMISMATCH 37 /* sig-key hash mismatch */ +#define DKIM_SIGERROR_NOTEMAILKEY 38 /* not an e-mail key */ +#define DKIM_SIGERROR_UNUSED2 39 /* OBSOLETE */ +#define DKIM_SIGERROR_KEYTYPEMISSING 40 /* key type missing */ +#define DKIM_SIGERROR_KEYTYPEUNKNOWN 41 /* key type unknown */ +#define DKIM_SIGERROR_KEYREVOKED 42 /* key revoked */ +#define DKIM_SIGERROR_KEYDECODE 43 /* key couldn't be decoded */ +#define DKIM_SIGERROR_MISSING_V 44 /* v= tag missing */ +#define DKIM_SIGERROR_EMPTY_V 45 /* v= tag empty */ + +/* Check results */ +#define DKIM_CONTINUE 0 /* continue */ +#define DKIM_REJECT 1 /* reject */ +#define DKIM_TRYAGAIN 2 /* try again later */ +#define DKIM_NOTFOUND 3 /* requested record not found */ +#define DKIM_RECORD_ERROR 4 /* error requesting record */ + +typedef struct rspamd_dkim_context_s { + rspamd_mempool_t *pool; + gint sig_alg; + gint header_canon_type; + gint body_canon_type; + gsize len; + gchar *domain; + gchar *selector; + time_t timestamp; + time_t expiration; + gint8 *b; + gint8 *bh; + guint bhlen; + guint blen; + GPtrArray *hlist; + guint ver; + gchar *dns_key; + GChecksum *headers_hash; + GChecksum *body_hash; +} rspamd_dkim_context_t; + +typedef struct rspamd_dkim_key_s { + guint8 *keydata; + guint keylen; + gsize decoded_len; + guint ttl; +#ifdef HAVE_OPENSSL + RSA *key_rsa; + BIO *key_bio; + EVP_PKEY *key_evp; +#endif +} +rspamd_dkim_key_t; + +struct rspamd_task; + +/* Err MUST be freed if it is not NULL, key is allocated by slice allocator */ +typedef void (*dkim_key_handler_f)(rspamd_dkim_key_t *key, gsize keylen, rspamd_dkim_context_t *ctx, gpointer ud, GError *err); + +/** + * Create new dkim context from signature + * @param sig message's signature + * @param pool pool to allocate memory from + * @param time_jitter jitter in seconds to allow time diff while checking + * @param err pointer to error object + * @return new context or NULL + */ +rspamd_dkim_context_t* rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err); + +/** + * Make DNS request for specified context and obtain and parse key + * @param ctx dkim context from signature + * @param resolver dns resolver object + * @param s async session to make request + * @return + */ +gboolean rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud); + +/** + * Check task for dkim context using dkim key + * @param ctx dkim verify context + * @param key dkim key (from cache or from dns request) + * @param task task to check + * @return + */ +gint rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task); + +/** + * Free DKIM key + * @param key + */ +void rspamd_dkim_key_free (rspamd_dkim_key_t *key); + +#endif /* DKIM_H_ */ diff --git a/src/libserver/dns.c b/src/libserver/dns.c new file mode 100644 index 000000000..e20cca9df --- /dev/null +++ b/src/libserver/dns.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2009-2013, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "dns.h" +#include "main.h" +#include "utlist.h" +#include "uthash.h" +#include "rdns_event.h" + +struct rspamd_dns_resolver { + struct rdns_resolver *r; + struct event_base *ev_base; + gdouble request_timeout; + guint max_retransmits; +}; + +struct rspamd_dns_request_ud { + struct rspamd_async_session *session; + dns_callback_type cb; + gpointer ud; + struct rdns_request *req; +}; + +static void +rspamd_dns_fin_cb (gpointer arg) +{ + struct rdns_request *req = arg; + + rdns_request_release (req); +} + +static void +rspamd_dns_callback (struct rdns_reply *reply, gpointer ud) +{ + struct rspamd_dns_request_ud *reqdata = ud; + + reqdata->cb (reply, reqdata->ud); + + remove_normal_event (reqdata->session, rspamd_dns_fin_cb, reqdata->req); +} + +gboolean +make_dns_request (struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *session, rspamd_mempool_t *pool, dns_callback_type cb, + gpointer ud, enum rdns_request_type type, const char *name) +{ + struct rdns_request *req; + struct rspamd_dns_request_ud *reqdata; + + reqdata = rspamd_mempool_alloc (pool, sizeof (struct rspamd_dns_request_ud)); + reqdata->session = session; + reqdata->cb = cb; + reqdata->ud = ud; + + req = rdns_make_request_full (resolver->r, rspamd_dns_callback, reqdata, + resolver->request_timeout, resolver->max_retransmits, 1, name, type); + + if (req != NULL) { + register_async_event (session, (event_finalizer_t)rspamd_dns_fin_cb, req, + g_quark_from_static_string ("dns resolver")); + /* Ref event to free it only when according async event is deleted from the session */ + rdns_request_retain (req); + reqdata->req = req; + } + else { + return FALSE; + } + + return TRUE; +} + + +struct rspamd_dns_resolver * +dns_resolver_init (rspamd_logger_t *logger, struct event_base *ev_base, struct config_file *cfg) +{ + GList *cur; + struct rspamd_dns_resolver *new; + gchar *begin, *p, *err; + gint priority; + + new = g_slice_alloc0 (sizeof (struct rspamd_dns_resolver)); + new->ev_base = ev_base; + new->request_timeout = cfg->dns_timeout; + new->max_retransmits = cfg->dns_retransmits; + + new->r = rdns_resolver_new (); + rdns_bind_libevent (new->r, new->ev_base); + rdns_resolver_set_log_level (new->r, cfg->log_level); + rdns_resolver_set_logger (new->r, (rdns_log_function)rspamd_common_logv, logger); + + if (cfg->nameservers == NULL) { + /* Parse resolv.conf */ + if (!rdns_resolver_parse_resolv_conf (new->r, "/etc/resolv.conf")) { + msg_err ("cannot parse resolv.conf and no nameservers defined, so no ways to resolve addresses"); + return new; + } + } + else { + cur = cfg->nameservers; + while (cur) { + begin = cur->data; + p = strchr (begin, ':'); + if (p != NULL) { + *p = '\0'; + p ++; + priority = strtoul (p, &err, 10); + if (err != NULL && *err != '\0') { + msg_info ("bad character '%x', must be 'm' or 's' or a numeric priority", *err); + } + } + else { + priority = 0; + } + if (!rdns_resolver_add_server (new->r, begin, 53, priority, cfg->dns_io_per_server)) { + msg_warn ("cannot parse ip address of nameserver: %s", begin); + cur = g_list_next (cur); + continue; + } + + cur = g_list_next (cur); + } + + } + + rdns_resolver_init (new->r); + + return new; +} diff --git a/src/libserver/dns.h b/src/libserver/dns.h new file mode 100644 index 000000000..26ae71387 --- /dev/null +++ b/src/libserver/dns.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2013, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RSPAMD_DNS_H +#define RSPAMD_DNS_H + +#include "config.h" +#include "mem_pool.h" +#include "events.h" +#include "logger.h" +#include "rdns.h" + +struct rspamd_dns_resolver; + +/* Rspamd DNS API */ + +/** + * Init DNS resolver, params are obtained from a config file or system file /etc/resolv.conf + */ +struct rspamd_dns_resolver *dns_resolver_init (rspamd_logger_t *logger, + struct event_base *ev_base, struct config_file *cfg); + +/** + * Make a DNS request + * @param resolver resolver object + * @param session async session to register event + * @param pool memory pool for storage + * @param cb callback to call on resolve completing + * @param ud user data for callback + * @param type request type + * @param ... string or ip address based on a request type + * @return TRUE if request was sent. + */ +gboolean make_dns_request (struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *session, rspamd_mempool_t *pool, + dns_callback_type cb, gpointer ud, enum rdns_request_type type, const char *name); + +#endif diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c new file mode 100644 index 000000000..7f5e8530d --- /dev/null +++ b/src/libserver/dynamic_cfg.c @@ -0,0 +1,599 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "map.h" +#include "filter.h" +#include "dynamic_cfg.h" +#include "json/jansson.h" + +struct dynamic_cfg_symbol { + gchar *name; + gdouble value; +}; + +struct dynamic_cfg_action { + enum rspamd_metric_action action; + gdouble value; +}; + +struct dynamic_cfg_metric { + GList *symbols; + struct dynamic_cfg_action actions[METRIC_ACTION_MAX]; + gchar *name; +}; + +struct config_json_buf { + gchar *buf; + gchar *pos; + size_t buflen; + struct config_file *cfg; + GList *config_metrics; +}; + +/** + * Free dynamic configuration + * @param conf_metrics + */ +static void +dynamic_cfg_free (GList *conf_metrics) +{ + GList *cur, *cur_elt; + struct dynamic_cfg_metric *metric; + struct dynamic_cfg_symbol *sym; + + if (conf_metrics) { + cur = conf_metrics; + while (cur) { + metric = cur->data; + if (metric->symbols) { + cur_elt = metric->symbols; + while (cur_elt) { + sym = cur_elt->data; + g_free (sym->name); + g_slice_free1 (sizeof (struct dynamic_cfg_symbol), sym); + cur_elt = g_list_next (cur_elt); + } + g_list_free (metric->symbols); + } + g_slice_free1 (sizeof (struct dynamic_cfg_metric), metric); + cur = g_list_next (cur); + } + g_list_free (conf_metrics); + } +} +/** + * Apply configuration to the specified configuration + * @param conf_metrics + * @param cfg + */ +static void +apply_dynamic_conf (GList *conf_metrics, struct config_file *cfg) +{ + GList *cur, *cur_elt; + struct dynamic_cfg_metric *metric; + struct dynamic_cfg_symbol *sym; + struct dynamic_cfg_action *act; + struct metric *real_metric; + struct metric_action *real_act; + gdouble *w; + gint i, j; + + cur = conf_metrics; + while (cur) { + metric = cur->data; + if ((real_metric = g_hash_table_lookup (cfg->metrics, metric->name)) != NULL) { + cur_elt = metric->symbols; + while (cur_elt) { + sym = cur_elt->data; + if ((w = g_hash_table_lookup (real_metric->symbols, sym->name)) != NULL) { + *w = sym->value; + } + else { + msg_info ("symbol %s is not found in the main configuration", sym->name); + } + cur_elt = g_list_next (cur_elt); + } + + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + act = &metric->actions[i]; + if (act->value < 0) { + continue; + } + for (j = METRIC_ACTION_REJECT; j < METRIC_ACTION_MAX; j ++) { + real_act = &real_metric->actions[j]; + if (real_act->action == act->action) { + real_act->score = act->value; + } + /* Update required score accordingly to metric's action */ + if (act->action == METRIC_ACTION_REJECT) { + real_metric->actions[METRIC_ACTION_REJECT].score = act->value; + } + } + } + } + cur = g_list_next (cur); + } +} + +/* Callbacks for reading json dynamic rules */ +gchar * +json_config_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + struct config_json_buf *jb; + gint free, off; + + if (data->cur_data == NULL) { + jb = g_malloc (sizeof (struct config_json_buf)); + jb->cfg = ((struct config_json_buf *)data->prev_data)->cfg; + jb->buf = NULL; + jb->pos = NULL; + jb->config_metrics = NULL; + data->cur_data = jb; + } + else { + jb = data->cur_data; + } + + if (jb->buf == NULL) { + /* Allocate memory for buffer */ + jb->buflen = len * 2; + jb->buf = g_malloc (jb->buflen); + jb->pos = jb->buf; + } + + off = jb->pos - jb->buf; + free = jb->buflen - off; + + if (free < len) { + jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2); + jb->buf = g_realloc (jb->buf, jb->buflen); + jb->pos = jb->buf + off; + } + + memcpy (jb->pos, chunk, len); + jb->pos += len; + + /* Say not to copy any part of this buffer */ + return NULL; +} + +void +json_config_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + struct config_json_buf *jb; + guint nelts, i, j, selts; + gint test_act; + json_t *js, *cur_elt, *cur_nm, *it_val; + json_error_t je; + struct dynamic_cfg_metric *cur_metric; + struct dynamic_cfg_symbol *cur_symbol; + struct dynamic_cfg_action *cur_action; + + if (data->prev_data) { + jb = data->prev_data; + /* Clean prev data */ + if (jb->buf) { + g_free (jb->buf); + } + g_free (jb); + } + + /* Now parse json */ + if (data->cur_data) { + jb = data->cur_data; + } + else { + msg_err ("no data read"); + return; + } + if (jb->buf == NULL) { + msg_err ("no data read"); + return; + } + /* NULL terminate current buf */ + *jb->pos = '\0'; + + js = json_loads (jb->buf, &je); + if (!js) { + msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line); + return; + } + + if (!json_is_array (js)) { + json_decref (js); + msg_err ("loaded json is not an array"); + return; + } + + jb->cfg->current_dynamic_conf = NULL; + dynamic_cfg_free (jb->config_metrics); + jb->config_metrics = NULL; + + /* Parse configuration */ + nelts = json_array_size (js); + for (i = 0; i < nelts; i++) { + cur_elt = json_array_get (js, i); + if (!cur_elt || !json_is_object (cur_elt)) { + msg_err ("loaded json array element is not an object"); + continue; + } + + cur_nm = json_object_get (cur_elt, "metric"); + if (!cur_nm || !json_is_string (cur_nm)) { + msg_err ("loaded json metric object element has no 'metric' attribute"); + continue; + } + cur_metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + cur_metric->actions[i].value = -1.0; + } + cur_metric->name = g_strdup (json_string_value (cur_nm)); + cur_nm = json_object_get (cur_elt, "symbols"); + /* Parse symbols */ + if (cur_nm && json_is_array (cur_nm)) { + selts = json_array_size (cur_nm); + for (j = 0; j < selts; j ++) { + it_val = json_array_get (cur_nm, j); + if (it_val && json_is_object (it_val)) { + if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) { + cur_symbol = g_slice_alloc0 (sizeof (struct dynamic_cfg_symbol)); + cur_symbol->name = g_strdup (json_string_value (json_object_get (it_val, "name"))); + cur_symbol->value = json_number_value (json_object_get (it_val, "value")); + /* Insert symbol */ + cur_metric->symbols = g_list_prepend (cur_metric->symbols, cur_symbol); + } + else { + msg_info ("json symbol object has no mandatory 'name' and 'value' attributes"); + } + } + } + } + cur_nm = json_object_get (cur_elt, "actions"); + /* Parse actions */ + if (cur_nm && json_is_array (cur_nm)) { + selts = json_array_size (cur_nm); + for (j = 0; j < selts; j ++) { + it_val = json_array_get (cur_nm, j); + if (it_val && json_is_object (it_val)) { + if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) { + if (!check_action_str (json_string_value (json_object_get (it_val, "name")), &test_act)) { + msg_err ("unknown action: %s", json_string_value (json_object_get (it_val, "name"))); + g_slice_free1 (sizeof (struct dynamic_cfg_action), cur_action); + continue; + } + cur_action = &cur_metric->actions[test_act]; + cur_action->action = test_act; + cur_action->value = json_number_value (json_object_get (it_val, "value")); + } + else { + msg_info ("json symbol object has no mandatory 'name' and 'value' attributes"); + } + } + } + } + jb->config_metrics = g_list_prepend (jb->config_metrics, cur_metric); + } + /* + * Note about thread safety: we are updating values that are gdoubles so it is not atomic in general case + * but on the other hand all that data is used only in the main thread, so why it is *likely* safe + * to do this task in this way without explicit lock. + */ + apply_dynamic_conf (jb->config_metrics, jb->cfg); + + jb->cfg->current_dynamic_conf = jb->config_metrics; + + json_decref (js); +} + +/** + * Init dynamic configuration using map logic and specific configuration + * @param cfg config file + */ +void +init_dynamic_config (struct config_file *cfg) +{ + struct config_json_buf *jb, **pjb; + + if (cfg->dynamic_conf == NULL) { + /* No dynamic conf has been specified, so do not try to load it */ + return; + } + + /* Now try to add map with json data */ + jb = g_malloc0 (sizeof (struct config_json_buf)); + pjb = g_malloc (sizeof (struct config_json_buf *)); + jb->buf = NULL; + jb->cfg = cfg; + *pjb = jb; + if (!add_map (cfg, cfg->dynamic_conf, "Dynamic configuration map", json_config_read_cb, json_config_fin_cb, (void **)pjb)) { + msg_err ("cannot add map for configuration %s", cfg->dynamic_conf); + } +} + +static gboolean +dump_dynamic_list (gint fd, GList *rules) +{ + GList *cur, *cur_elt; + struct dynamic_cfg_metric *metric; + struct dynamic_cfg_symbol *sym; + struct dynamic_cfg_action *act; + FILE *f; + gint i; + gboolean start = TRUE; + + /* Open buffered stream for the descriptor */ + if ((f = fdopen (fd, "a+")) == NULL) { + msg_err ("fdopen failed: %s", strerror (errno)); + return FALSE; + } + + + if (rules) { + fprintf (f, "[\n"); + cur = rules; + while (cur) { + metric = cur->data; + fprintf (f, "{\n \"metric\": \"%s\",\n", metric->name); + if (metric->symbols) { + fprintf (f, " \"symbols\": [\n"); + cur_elt = metric->symbols; + while (cur_elt) { + sym = cur_elt->data; + cur_elt = g_list_next (cur_elt); + if (cur_elt) { + fprintf (f, " {\"name\": \"%s\",\"value\": %.2f},\n", sym->name, sym->value); + } + else { + fprintf (f, " {\"name\": \"%s\",\"value\": %.2f}\n", sym->name, sym->value); + } + } + if (metric->actions) { + fprintf (f, " ],\n"); + } + else { + fprintf (f, " ]\n"); + } + } + + if (metric->actions) { + fprintf (f, " \"actions\": [\n"); + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + act = &metric->actions[i]; + if (act->value < 0) { + continue; + } + fprintf (f, " %s{\"name\": \"%s\",\"value\": %.2f}\n", + (start ? "" : ","), str_action_metric (act->action), act->value); + if (start) { + start = FALSE; + } + } + fprintf (f, " ]\n"); + } + cur = g_list_next (cur); + if (cur) { + fprintf (f, "},\n"); + } + else { + fprintf (f, "}\n]\n"); + } + } + } + fclose (f); + + return TRUE; +} + +/** + * Dump dynamic configuration to the disk + * @param cfg + * @return + */ +gboolean +dump_dynamic_config (struct config_file *cfg) +{ + struct stat st; + gchar *dir, pathbuf[PATH_MAX]; + gint fd; + + if (cfg->dynamic_conf == NULL || cfg->current_dynamic_conf == NULL) { + /* No dynamic conf has been specified, so do not try to dump it */ + return FALSE; + } + + dir = g_path_get_dirname (cfg->dynamic_conf); + if (dir == NULL) { + /* Inaccessible path */ + if (dir != NULL) { + g_free (dir); + } + msg_err ("invalid file: %s", cfg->dynamic_conf); + return FALSE; + } + + if (stat (cfg->dynamic_conf, &st) == -1) { + msg_debug ("%s is unavailable: %s", cfg->dynamic_conf, strerror (errno)); + st.st_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; + } + if (access (dir, W_OK | R_OK) == -1) { + msg_warn ("%s is inaccessible: %s", dir, strerror (errno)); + g_free (dir); + return FALSE; + } + rspamd_snprintf (pathbuf, sizeof (pathbuf), "%s%crconf-XXXXXX", dir, G_DIR_SEPARATOR); + g_free (dir); +#ifdef HAVE_MKSTEMP + /* Umask is set before */ + fd = mkstemp (pathbuf); +#else + fd = g_mkstemp_full (pathbuf, O_RDWR, S_IWUSR | S_IRUSR); +#endif + if (fd == -1) { + msg_err ("mkstemp error: %s", strerror (errno)); + + return FALSE; + } + + if (!dump_dynamic_list (fd, cfg->current_dynamic_conf)) { + close (fd); + unlink (pathbuf); + return FALSE; + } + + (void)unlink (cfg->dynamic_conf); + + /* Rename old config */ + if (rename (pathbuf, cfg->dynamic_conf) == -1) { + msg_err ("rename error: %s", strerror (errno)); + close (fd); + unlink (pathbuf); + return FALSE; + } + /* Set permissions */ + + if (chmod (cfg->dynamic_conf, st.st_mode) == -1) { + msg_warn ("chmod failed: %s", strerror (errno)); + } + + close (fd); + return TRUE; +} + +/** + * Add symbol for specified metric + * @param cfg config file object + * @param metric metric's name + * @param symbol symbol's name + * @param value value of symbol + * @return + */ +gboolean +add_dynamic_symbol (struct config_file *cfg, const gchar *metric_name, const gchar *symbol, gdouble value) +{ + GList *cur; + struct dynamic_cfg_metric *metric = NULL; + struct dynamic_cfg_symbol *sym = NULL; + + if (cfg->dynamic_conf == NULL) { + msg_info ("dynamic conf is disabled"); + return FALSE; + } + + cur = cfg->current_dynamic_conf; + while (cur) { + metric = cur->data; + if (g_ascii_strcasecmp (metric->name, metric_name) == 0) { + break; + } + metric = NULL; + cur = g_list_next (cur); + } + + if (metric != NULL) { + /* Search for a symbol */ + cur = metric->symbols; + while (cur) { + sym = cur->data; + if (g_ascii_strcasecmp (sym->name, symbol) == 0) { + sym->value = value; + msg_debug ("change value of action %s to %.2f", symbol, value); + break; + } + sym = NULL; + cur = g_list_next (cur); + } + if (sym == NULL) { + /* Symbol not found, insert it */ + sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol)); + sym->name = g_strdup (symbol); + sym->value = value; + metric->symbols = g_list_prepend (metric->symbols, sym); + msg_debug ("create symbol %s in metric %s", symbol, metric_name); + } + } + else { + /* Metric not found, create it */ + metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); + sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol)); + sym->name = g_strdup (symbol); + sym->value = value; + metric->symbols = g_list_prepend (metric->symbols, sym); + metric->name = g_strdup (metric_name); + cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric); + msg_debug ("create metric %s for symbol %s", metric_name, symbol); + } + + apply_dynamic_conf (cfg->current_dynamic_conf, cfg); + + return TRUE; +} + + +/** + * Add action for specified metric + * @param cfg config file object + * @param metric metric's name + * @param action action's name + * @param value value of symbol + * @return + */ +gboolean +add_dynamic_action (struct config_file *cfg, const gchar *metric_name, guint action, gdouble value) +{ + GList *cur; + struct dynamic_cfg_metric *metric = NULL; + + if (cfg->dynamic_conf == NULL) { + msg_info ("dynamic conf is disabled"); + return FALSE; + } + + cur = cfg->current_dynamic_conf; + while (cur) { + metric = cur->data; + if (g_ascii_strcasecmp (metric->name, metric_name) == 0) { + break; + } + metric = NULL; + cur = g_list_next (cur); + } + + if (metric != NULL) { + /* Search for an action */ + metric->actions[action].value = value; + } + else { + /* Metric not found, create it */ + metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); + metric->actions[action].value = value; + metric->name = g_strdup (metric_name); + cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric); + msg_debug ("create metric %s for action %d", metric_name, action); + } + + apply_dynamic_conf (cfg->current_dynamic_conf, cfg); + + return TRUE; +} diff --git a/src/libserver/dynamic_cfg.h b/src/libserver/dynamic_cfg.h new file mode 100644 index 000000000..b65d7aa9a --- /dev/null +++ b/src/libserver/dynamic_cfg.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef DYNAMIC_CFG_H_ +#define DYNAMIC_CFG_H_ + +#include "config.h" +#include "cfg_file.h" + +/** + * Init dynamic configuration using map logic and specific configuration + * @param cfg config file + */ +void init_dynamic_config (struct config_file *cfg); + +/** + * Dump dynamic configuration to the disk + * @param cfg + * @return + */ +gboolean dump_dynamic_config (struct config_file *cfg); + +/** + * Add symbol for specified metric + * @param cfg config file object + * @param metric metric's name + * @param symbol symbol's name + * @param value value of symbol + * @return + */ +gboolean add_dynamic_symbol (struct config_file *cfg, const gchar *metric, const gchar *symbol, gdouble value); + + +/** + * Add action for specified metric + * @param cfg config file object + * @param metric metric's name + * @param action action's name + * @param value value of symbol + * @return + */ +gboolean add_dynamic_action (struct config_file *cfg, const gchar *metric, guint action, gdouble value); + + +#endif /* DYNAMIC_CFG_H_ */ diff --git a/src/libserver/events.c b/src/libserver/events.c new file mode 100644 index 000000000..85843fd05 --- /dev/null +++ b/src/libserver/events.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "events.h" + +static gboolean +rspamd_event_equal (gconstpointer a, gconstpointer b) +{ + const struct rspamd_async_event *ev1 = a, *ev2 = b; + + if (ev1->fin == ev2->fin) { + return ev1->user_data == ev2->user_data; + } + + return FALSE; +} + +static guint +rspamd_event_hash (gconstpointer a) +{ + const struct rspamd_async_event *ev = a; + + return GPOINTER_TO_UINT (ev->user_data); +} + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) +static void +event_mutex_free (gpointer data) +{ + GMutex *mtx = data; + + g_mutex_free (mtx); +} + +static void +event_cond_free (gpointer data) +{ + GCond *cond = data; + + g_cond_free (cond); +} +#endif + +struct rspamd_async_session * +new_async_session (rspamd_mempool_t * pool, session_finalizer_t fin, + event_finalizer_t restore, event_finalizer_t cleanup, void *user_data) +{ + struct rspamd_async_session *new; + + new = rspamd_mempool_alloc (pool, sizeof (struct rspamd_async_session)); + new->pool = pool; + new->fin = fin; + new->restore = restore; + new->cleanup = cleanup; + new->user_data = user_data; + new->wanna_die = FALSE; + new->events = g_hash_table_new (rspamd_event_hash, rspamd_event_equal); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + new->mtx = g_mutex_new (); + new->cond = g_cond_new (); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_mutex_free, new->mtx); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_cond_free, new->cond); +#else + new->mtx = rspamd_mempool_alloc (pool, sizeof (GMutex)); + g_mutex_init (new->mtx); + new->cond = rspamd_mempool_alloc (pool, sizeof (GCond)); + g_cond_init (new->cond); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_mutex_clear, new->mtx); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_cond_clear, new->cond); +#endif + new->threads = 0; + + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, new->events); + + return new; +} + +void +register_async_event (struct rspamd_async_session *session, event_finalizer_t fin, void *user_data, GQuark subsystem) +{ + struct rspamd_async_event *new; + + if (session == NULL) { + msg_info ("session is NULL"); + return; + } + + g_mutex_lock (session->mtx); + new = rspamd_mempool_alloc (session->pool, sizeof (struct rspamd_async_event)); + new->fin = fin; + new->user_data = user_data; + new->subsystem = subsystem; + + g_hash_table_insert (session->events, new, new); + + msg_debug ("added event: %p, pending %d events, subsystem: %s", user_data, g_hash_table_size (session->events), + g_quark_to_string (subsystem)); + + g_mutex_unlock (session->mtx); +} + +void +remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud) +{ + struct rspamd_async_event search_ev, *found_ev; + + if (session == NULL) { + msg_info ("session is NULL"); + return; + } + + g_mutex_lock (session->mtx); + /* Search for event */ + search_ev.fin = fin; + search_ev.user_data = ud; + if ((found_ev = g_hash_table_lookup (session->events, &search_ev)) != NULL) { + g_hash_table_remove (session->events, found_ev); + msg_debug ("removed event: %p, subsystem: %s, pending %d events", ud, + g_quark_to_string (found_ev->subsystem), g_hash_table_size (session->events)); + /* Remove event */ + fin (ud); + } + g_mutex_unlock (session->mtx); + + check_session_pending (session); +} + +static gboolean +rspamd_session_destroy (gpointer k, gpointer v, gpointer unused) +{ + struct rspamd_async_event *ev = v; + + /* Call event's finalizer */ + if (ev->fin != NULL) { + ev->fin (ev->user_data); + } + + return TRUE; +} + +gboolean +destroy_session (struct rspamd_async_session *session) +{ + if (session == NULL) { + msg_info ("session is NULL"); + return FALSE; + } + + g_mutex_lock (session->mtx); + if (session->threads > 0) { + /* Wait for conditional variable to finish processing */ + g_mutex_unlock (session->mtx); + g_cond_wait (session->cond, session->mtx); + } + + session->wanna_die = TRUE; + + g_hash_table_foreach_remove (session->events, rspamd_session_destroy, session); + + /* Mutex can be destroyed here */ + g_mutex_unlock (session->mtx); + + if (session->cleanup != NULL) { + session->cleanup (session->user_data); + } + return TRUE; +} + +gboolean +check_session_pending (struct rspamd_async_session *session) +{ + g_mutex_lock (session->mtx); + if (session->wanna_die && g_hash_table_size (session->events) == 0) { + session->wanna_die = FALSE; + if (session->threads > 0) { + /* Wait for conditional variable to finish processing */ + g_cond_wait (session->cond, session->mtx); + } + if (session->fin != NULL) { + g_mutex_unlock (session->mtx); + if (! session->fin (session->user_data)) { + /* Session finished incompletely, perform restoration */ + if (session->restore != NULL) { + session->restore (session->user_data); + /* Call pending once more */ + return check_session_pending (session); + } + return TRUE; + } + else { + return FALSE; + } + } + g_mutex_unlock (session->mtx); + return FALSE; + } + g_mutex_unlock (session->mtx); + return TRUE; +} + + +/** + * Add new async thread to session + * @param session session object + */ +void +register_async_thread (struct rspamd_async_session *session) +{ + g_atomic_int_inc (&session->threads); + msg_debug ("added thread: pending %d thread", session->threads); +} + +/** + * Remove async thread from session and check whether session can be terminated + * @param session session object + */ +void +remove_async_thread (struct rspamd_async_session *session) +{ + if (g_atomic_int_dec_and_test (&session->threads)) { + /* Signal if there are any sessions waiting */ + g_mutex_lock (session->mtx); + g_cond_signal (session->cond); + g_mutex_unlock (session->mtx); + } + msg_debug ("removed thread: pending %d thread", session->threads); +} diff --git a/src/libserver/events.h b/src/libserver/events.h new file mode 100644 index 000000000..6728288eb --- /dev/null +++ b/src/libserver/events.h @@ -0,0 +1,88 @@ +#ifndef RSPAMD_EVENTS_H +#define RSPAMD_EVENTS_H + +#include "config.h" +#include "mem_pool.h" + +struct rspamd_async_event; + +typedef void (*event_finalizer_t)(void *user_data); +typedef gboolean (*session_finalizer_t)(void *user_data); + +struct rspamd_async_event { + GQuark subsystem; + event_finalizer_t fin; + void *user_data; + guint ref; +}; + +struct rspamd_async_session { + session_finalizer_t fin; + event_finalizer_t restore; + event_finalizer_t cleanup; + GHashTable *events; + void *user_data; + rspamd_mempool_t *pool; + gboolean wanna_die; + guint threads; + GMutex *mtx; + GCond *cond; +}; + +/** + * Make new async session + * @param pool pool to alloc memory from + * @param fin a callback called when no events are found in session + * @param restore a callback is called to restore processing of session + * @param cleanup a callback called when session is forcefully destroyed + * @param user_data abstract user data + * @return + */ +struct rspamd_async_session *new_async_session (rspamd_mempool_t *pool, + session_finalizer_t fin, event_finalizer_t restore, + event_finalizer_t cleanup, void *user_data); + +/** + * Insert new event to the session + * @param session session object + * @param fin finalizer callback + * @param user_data abstract user_data + * @param forced unused + */ +void register_async_event (struct rspamd_async_session *session, + event_finalizer_t fin, void *user_data, GQuark subsystem); + +/** + * Remove normal event + * @param session session object + * @param fin final callback + * @param ud user data object + */ +void remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud); + +/** + * Must be called at the end of session, it calls fin functions for all non-forced callbacks + * @return true if the whole session was destroyed and false if there are forced events + */ +gboolean destroy_session (struct rspamd_async_session *session); + +/** + * Check session for events pending and call fin callback if no events are pending + * @param session session object + * @return TRUE if session has pending events + */ +gboolean check_session_pending (struct rspamd_async_session *session); + +/** + * Add new async thread to session + * @param session session object + */ +void register_async_thread (struct rspamd_async_session *session); + +/** + * Remove async thread from session and check whether session can be terminated + * @param session session object + */ +void remove_async_thread (struct rspamd_async_session *session); + +#endif /* RSPAMD_EVENTS_H */ diff --git a/src/libserver/html.c b/src/libserver/html.c new file mode 100644 index 000000000..028c54f6c --- /dev/null +++ b/src/libserver/html.c @@ -0,0 +1,942 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "main.h" +#include "message.h" +#include "html.h" +#include "url.h" + +static sig_atomic_t tags_sorted = 0; + +static struct html_tag tag_defs[] = { + /* W3C defined elements */ + {Tag_A, "a", (CM_INLINE)}, + {Tag_ABBR, "abbr", (CM_INLINE)}, + {Tag_ACRONYM, "acronym", (CM_INLINE)}, + {Tag_ADDRESS, "address", (CM_BLOCK)}, + {Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)}, + {Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)}, + {Tag_B, "b", (CM_INLINE)}, + {Tag_BASE, "base", (CM_HEAD | CM_EMPTY)}, + {Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)}, + {Tag_BDO, "bdo", (CM_INLINE)}, + {Tag_BIG, "big", (CM_INLINE)}, + {Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)}, + {Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_BR, "br", (CM_INLINE | CM_EMPTY)}, + {Tag_BUTTON, "button", (CM_INLINE)}, + {Tag_CAPTION, "caption", (CM_TABLE)}, + {Tag_CENTER, "center", (CM_BLOCK)}, + {Tag_CITE, "cite", (CM_INLINE)}, + {Tag_CODE, "code", (CM_INLINE)}, + {Tag_COL, "col", (CM_TABLE | CM_EMPTY)}, + {Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)}, + {Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)}, + {Tag_DEL, "del", (CM_INLINE | CM_BLOCK | CM_MIXED)}, + {Tag_DFN, "dfn", (CM_INLINE)}, + {Tag_DIR, "dir", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_DIV, "div", (CM_BLOCK)}, + {Tag_DL, "dl", (CM_BLOCK)}, + {Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)}, + {Tag_EM, "em", (CM_INLINE)}, + {Tag_FIELDSET, "fieldset", (CM_BLOCK)}, + {Tag_FONT, "font", (CM_INLINE)}, + {Tag_FORM, "form", (CM_BLOCK)}, + {Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)}, + {Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)}, + {Tag_H1, "h1", (CM_BLOCK | CM_HEADING)}, + {Tag_H2, "h2", (CM_BLOCK | CM_HEADING)}, + {Tag_H3, "h3", (CM_BLOCK | CM_HEADING)}, + {Tag_H4, "h4", (CM_BLOCK | CM_HEADING)}, + {Tag_H5, "h5", (CM_BLOCK | CM_HEADING)}, + {Tag_H6, "h6", (CM_BLOCK | CM_HEADING)}, + {Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)}, + {Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_I, "i", (CM_INLINE)}, + {Tag_IFRAME, "iframe", (CM_INLINE)}, + {Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)}, + {Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)}, + {Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)}, + {Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)}, + {Tag_KBD, "kbd", (CM_INLINE)}, + {Tag_LABEL, "label", (CM_INLINE)}, + {Tag_LEGEND, "legend", (CM_INLINE)}, + {Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT)}, + {Tag_LINK, "link", (CM_HEAD | CM_EMPTY)}, + {Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_MAP, "map", (CM_INLINE)}, + {Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_META, "meta", (CM_HEAD | CM_EMPTY)}, + {Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)}, + {Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_MIXED)}, + {Tag_OBJECT, "object", (CM_OBJECT | CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)}, + {Tag_OL, "ol", (CM_BLOCK)}, + {Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)}, + {Tag_OPTION, "option", (CM_FIELD | CM_OPT)}, + {Tag_P, "p", (CM_BLOCK | CM_OPT)}, + {Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)}, + {Tag_PLAINTEXT, "plaintext", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_PRE, "pre", (CM_BLOCK)}, + {Tag_Q, "q", (CM_INLINE)}, + {Tag_RB, "rb", (CM_INLINE)}, + {Tag_RBC, "rbc", (CM_INLINE)}, + {Tag_RP, "rp", (CM_INLINE)}, + {Tag_RT, "rt", (CM_INLINE)}, + {Tag_RTC, "rtc", (CM_INLINE)}, + {Tag_RUBY, "ruby", (CM_INLINE)}, + {Tag_S, "s", (CM_INLINE)}, + {Tag_SAMP, "samp", (CM_INLINE)}, + {Tag_SCRIPT, "script", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)}, + {Tag_SELECT, "select", (CM_INLINE | CM_FIELD)}, + {Tag_SMALL, "small", (CM_INLINE)}, + {Tag_SPAN, "span", (CM_INLINE)}, + {Tag_STRIKE, "strike", (CM_INLINE)}, + {Tag_STRONG, "strong", (CM_INLINE)}, + {Tag_STYLE, "style", (CM_HEAD)}, + {Tag_SUB, "sub", (CM_INLINE)}, + {Tag_SUP, "sup", (CM_INLINE)}, + {Tag_TABLE, "table", (CM_BLOCK)}, + {Tag_TBODY, "tbody", (CM_TABLE | CM_ROWGRP | CM_OPT)}, + {Tag_TD, "td", (CM_ROW | CM_OPT | CM_NO_INDENT)}, + {Tag_TEXTAREA, "textarea", (CM_INLINE | CM_FIELD)}, + {Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)}, + {Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT)}, + {Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)}, + {Tag_TITLE, "title", (CM_HEAD)}, + {Tag_TR, "tr", (CM_TABLE | CM_OPT)}, + {Tag_TT, "tt", (CM_INLINE)}, + {Tag_U, "u", (CM_INLINE)}, + {Tag_UL, "ul", (CM_BLOCK)}, + {Tag_VAR, "var", (CM_INLINE)}, + {Tag_XMP, "xmp", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)}, + + /* proprietary elements */ + {Tag_ALIGN, "align", (CM_BLOCK)}, + {Tag_BGSOUND, "bgsound", (CM_HEAD | CM_EMPTY)}, + {Tag_BLINK, "blink", (CM_INLINE)}, + {Tag_COMMENT, "comment", (CM_INLINE)}, + {Tag_EMBED, "embed", (CM_INLINE | CM_IMG | CM_EMPTY)}, + {Tag_ILAYER, "ilayer", (CM_INLINE)}, + {Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)}, + {Tag_LAYER, "layer", (CM_BLOCK)}, + {Tag_MARQUEE, "marquee", (CM_INLINE | CM_OPT)}, + {Tag_MULTICOL, "multicol", (CM_BLOCK)}, + {Tag_NOBR, "nobr", (CM_INLINE)}, + {Tag_NOEMBED, "noembed", (CM_INLINE)}, + {Tag_NOLAYER, "nolayer", (CM_BLOCK | CM_INLINE | CM_MIXED)}, + {Tag_NOSAVE, "nosave", (CM_BLOCK)}, + {Tag_SERVER, "server", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)}, + {Tag_SERVLET, "servlet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)}, + {Tag_SPACER, "spacer", (CM_INLINE | CM_EMPTY)}, + {Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)}, +}; + +static sig_atomic_t entities_sorted = 0; +struct _entity; +typedef struct _entity entity; + +struct _entity { + gchar *name; + uint code; + gchar *replacement; +}; + + +static entity entities_defs[] = { + /* + ** Markup pre-defined character entities + */ + {"quot", 34, "\""}, + {"amp", 38, "&"}, + {"apos", 39, "'"}, + {"lt", 60, "<"}, + {"gt", 62, ">"}, + + /* + ** Latin-1 character entities + */ + {"nbsp", 160, " "}, + {"iexcl", 161, "!"}, + {"cent", 162, "cent"}, + {"pound", 163, "pound"}, + {"curren", 164, "current"}, + {"yen", 165, "yen"}, + {"brvbar", 166, NULL}, + {"sect", 167, NULL}, + {"uml", 168, "uml"}, + {"copy", 169, "c"}, + {"ordf", 170, NULL}, + {"laquo", 171, "\""}, + {"not", 172, "!"}, + {"shy", 173, NULL}, + {"reg", 174, "r"}, + {"macr", 175, NULL}, + {"deg", 176, "deg"}, + {"plusmn", 177, "+-"}, + {"sup2", 178, "2"}, + {"sup3", 179, "3"}, + {"acute", 180, NULL}, + {"micro", 181, NULL}, + {"para", 182, NULL}, + {"middot", 183, "."}, + {"cedil", 184, NULL}, + {"sup1", 185, "1"}, + {"ordm", 186, NULL}, + {"raquo", 187, "\""}, + {"frac14", 188, "1/4"}, + {"frac12", 189, "1/2"}, + {"frac34", 190, "3/4"}, + {"iquest", 191, "i"}, + {"Agrave", 192, "a"}, + {"Aacute", 193, "a"}, + {"Acirc", 194, "a"}, + {"Atilde", 195, "a"}, + {"Auml", 196, "a"}, + {"Aring", 197, "a"}, + {"AElig", 198, "a"}, + {"Ccedil", 199, "c"}, + {"Egrave", 200, "e"}, + {"Eacute", 201, "e"}, + {"Ecirc", 202, "e"}, + {"Euml", 203, "e"}, + {"Igrave", 204, "i"}, + {"Iacute", 205, "i"}, + {"Icirc", 206, "i"}, + {"Iuml", 207, "i"}, + {"ETH", 208, "e"}, + {"Ntilde", 209, "n"}, + {"Ograve", 210, "o"}, + {"Oacute", 211, "o"}, + {"Ocirc", 212, "o"}, + {"Otilde", 213, "o"}, + {"Ouml", 214, "o"}, + {"times", 215, "t"}, + {"Oslash", 216, "o"}, + {"Ugrave", 217, "u"}, + {"Uacute", 218, "u"}, + {"Ucirc", 219, "u"}, + {"Uuml", 220, "u"}, + {"Yacute", 221, "y"}, + {"THORN", 222, "t"}, + {"szlig", 223, "s"}, + {"agrave", 224, "a"}, + {"aacute", 225, "a"}, + {"acirc", 226, "a"}, + {"atilde", 227, "a"}, + {"auml", 228, "a"}, + {"aring", 229, "a"}, + {"aelig", 230, "a"}, + {"ccedil", 231, "c"}, + {"egrave", 232, "e"}, + {"eacute", 233, "e"}, + {"ecirc", 234, "e"}, + {"euml", 235, "e"}, + {"igrave", 236, "e"}, + {"iacute", 237, "e"}, + {"icirc", 238, "e"}, + {"iuml", 239, "e"}, + {"eth", 240, "e"}, + {"ntilde", 241, "n"}, + {"ograve", 242, "o"}, + {"oacute", 243, "o"}, + {"ocirc", 244, "o"}, + {"otilde", 245, "o"}, + {"ouml", 246, "o"}, + {"divide", 247, "/"}, + {"oslash", 248, "/"}, + {"ugrave", 249, "u"}, + {"uacute", 250, "u"}, + {"ucirc", 251, "u"}, + {"uuml", 252, "u"}, + {"yacute", 253, "y"}, + {"thorn", 254, "t"}, + {"yuml", 255, "y"}, + + /* + ** Extended Entities defined in HTML 4: Symbols + */ + {"fnof", 402, "f"}, + {"Alpha", 913, "alpha"}, + {"Beta", 914, "beta"}, + {"Gamma", 915, "gamma"}, + {"Delta", 916, "delta"}, + {"Epsilon", 917, "epsilon"}, + {"Zeta", 918, "zeta"}, + {"Eta", 919, "eta"}, + {"Theta", 920, "theta"}, + {"Iota", 921, "iota"}, + {"Kappa", 922, "kappa"}, + {"Lambda", 923, "lambda"}, + {"Mu", 924, "mu"}, + {"Nu", 925, "nu"}, + {"Xi", 926, "xi"}, + {"Omicron", 927, "omicron"}, + {"Pi", 928, "pi"}, + {"Rho", 929, "rho"}, + {"Sigma", 931, "sigma"}, + {"Tau", 932, "tau"}, + {"Upsilon", 933, "upsilon"}, + {"Phi", 934, "phi"}, + {"Chi", 935, "chi"}, + {"Psi", 936, "psi"}, + {"Omega", 937, "omega"}, + {"alpha", 945, "alpha"}, + {"beta", 946, "beta"}, + {"gamma", 947, "gamma"}, + {"delta", 948, "delta"}, + {"epsilon", 949, "epsilon"}, + {"zeta", 950, "zeta"}, + {"eta", 951, "eta"}, + {"theta", 952, "theta"}, + {"iota", 953, "iota"}, + {"kappa", 954, "kappa"}, + {"lambda", 955, "lambda"}, + {"mu", 956, "mu"}, + {"nu", 957, "nu"}, + {"xi", 958, "xi"}, + {"omicron", 959, "omicron"}, + {"pi", 960, "pi"}, + {"rho", 961, "rho"}, + {"sigmaf", 962, "sigmaf"}, + {"sigma", 963, "sigma"}, + {"tau", 964, "tau"}, + {"upsilon", 965, "upsilon"}, + {"phi", 966, "phi"}, + {"chi", 967, "chi"}, + {"psi", 968, "psi"}, + {"omega", 969, "omega"}, + {"thetasym", 977, "thetasym"}, + {"upsih", 978, "upsih"}, + {"piv", 982, "piv"}, + {"bull", 8226, "bull"}, + {"hellip", 8230, "..."}, + {"prime", 8242, "'"}, + {"Prime", 8243, "'"}, + {"oline", 8254, "-"}, + {"frasl", 8260, NULL}, + {"weierp", 8472, NULL}, + {"image", 8465, NULL}, + {"real", 8476, NULL}, + {"trade", 8482, NULL}, + {"alefsym", 8501, "a"}, + {"larr", 8592, NULL}, + {"uarr", 8593, NULL}, + {"rarr", 8594, NULL}, + {"darr", 8595, NULL}, + {"harr", 8596, NULL}, + {"crarr", 8629, NULL}, + {"lArr", 8656, NULL}, + {"uArr", 8657, NULL}, + {"rArr", 8658, NULL}, + {"dArr", 8659, NULL}, + {"hArr", 8660, NULL}, + {"forall", 8704, NULL}, + {"part", 8706, NULL}, + {"exist", 8707, NULL}, + {"empty", 8709, NULL}, + {"nabla", 8711, NULL}, + {"isin", 8712, NULL}, + {"notin", 8713, NULL}, + {"ni", 8715, NULL}, + {"prod", 8719, NULL}, + {"sum", 8721, "E"}, + {"minus", 8722, "-"}, + {"lowast", 8727, NULL}, + {"radic", 8730, NULL}, + {"prop", 8733, NULL}, + {"infin", 8734, NULL}, + {"ang", 8736, "'"}, + {"and", 8743, "&"}, + {"or", 8744, "|"}, + {"cap", 8745, NULL}, + {"cup", 8746, NULL}, + {"gint", 8747, NULL}, + {"there4", 8756, NULL}, + {"sim", 8764, NULL}, + {"cong", 8773, NULL}, + {"asymp", 8776, NULL}, + {"ne", 8800, "!="}, + {"equiv", 8801, "=="}, + {"le", 8804, "<="}, + {"ge", 8805, ">="}, + {"sub", 8834, NULL}, + {"sup", 8835, NULL}, + {"nsub", 8836, NULL}, + {"sube", 8838, NULL}, + {"supe", 8839, NULL}, + {"oplus", 8853, NULL}, + {"otimes", 8855, NULL}, + {"perp", 8869, NULL}, + {"sdot", 8901, NULL}, + {"lceil", 8968, NULL}, + {"rceil", 8969, NULL}, + {"lfloor", 8970, NULL}, + {"rfloor", 8971, NULL}, + {"lang", 9001, NULL}, + {"rang", 9002, NULL}, + {"loz", 9674, NULL}, + {"spades", 9824, NULL}, + {"clubs", 9827, NULL}, + {"hearts", 9829, NULL}, + {"diams", 9830, NULL}, + + /* + ** Extended Entities defined in HTML 4: Special (less Markup at top) + */ + {"OElig", 338, NULL}, + {"oelig", 339, NULL}, + {"Scaron", 352, NULL}, + {"scaron", 353, NULL}, + {"Yuml", 376, NULL}, + {"circ", 710, NULL}, + {"tilde", 732, NULL}, + {"ensp", 8194, NULL}, + {"emsp", 8195, NULL}, + {"thinsp", 8201, NULL}, + {"zwnj", 8204, NULL}, + {"zwj", 8205, NULL}, + {"lrm", 8206, NULL}, + {"rlm", 8207, NULL}, + {"ndash", 8211, "-"}, + {"mdash", 8212, "-"}, + {"lsquo", 8216, "'"}, + {"rsquo", 8217, "'"}, + {"sbquo", 8218, "\""}, + {"ldquo", 8220, "\""}, + {"rdquo", 8221, "\""}, + {"bdquo", 8222, "\""}, + {"dagger", 8224, "T"}, + {"Dagger", 8225, "T"}, + {"permil", 8240, NULL}, + {"lsaquo", 8249, "\""}, + {"rsaquo", 8250, "\""}, + {"euro", 8364, "E"}, +}; + +static entity entities_defs_num[ (G_N_ELEMENTS (entities_defs)) ]; + +static gint +tag_cmp (const void *m1, const void *m2) +{ + const struct html_tag *p1 = m1; + const struct html_tag *p2 = m2; + + return g_ascii_strcasecmp (p1->name, p2->name); +} + +static gint +entity_cmp (const void *m1, const void *m2) +{ + const entity *p1 = m1; + const entity *p2 = m2; + + return g_ascii_strcasecmp (p1->name, p2->name); +} + +static gint +entity_cmp_num (const void *m1, const void *m2) +{ + const entity *p1 = m1; + const entity *p2 = m2; + + return p1->code - p2->code; +} + +static GNode * +construct_html_node (rspamd_mempool_t * pool, gchar *text, gsize tag_len) +{ + struct html_node *html; + GNode *n = NULL; + struct html_tag key, *found; + gchar t; + + if (text == NULL || *text == '\0') { + return NULL; + } + + html = rspamd_mempool_alloc0 (pool, sizeof (struct html_node)); + + /* Check whether this tag is fully closed */ + if (*(text + tag_len - 1) == '/') { + html->flags |= FL_CLOSED; + } + + /* Check xml tag */ + if (*text == '?' && g_ascii_strncasecmp (text + 1, "xml", sizeof ("xml") - 1) == 0) { + html->flags |= FL_XML; + html->tag = NULL; + } + else { + if (*text == '/') { + html->flags |= FL_CLOSING; + text++; + } + + /* Find end of tag name */ + key.name = text; + while (*text && g_ascii_isalnum (*(++text))); + + t = *text; + *text = '\0'; + + /* Match tag id by tag name */ + if ((found = bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp)) != NULL) { + *text = t; + html->tag = found; + } + else { + *text = t; + return NULL; + } + } + + n = g_node_new (html); + + return n; +} + +static gboolean +check_balance (GNode * node, GNode ** cur_level) +{ + struct html_node *arg = node->data, *tmp; + GNode *cur; + + if (arg->flags & FL_CLOSING) { + /* First of all check whether this tag is closing tag for parent node */ + cur = node->parent; + while (cur && cur->data) { + tmp = cur->data; + if ((tmp->tag && arg->tag) && tmp->tag->id == arg->tag->id && (tmp->flags & FL_CLOSED) == 0) { + tmp->flags |= FL_CLOSED; + /* Destroy current node as we find corresponding parent node */ + g_node_destroy (node); + /* Change level */ + *cur_level = cur->parent; + return TRUE; + } + cur = cur->parent; + } + } + else { + return TRUE; + } + + return FALSE; +} + +struct html_tag * +get_tag_by_name (const gchar *name) +{ + struct html_tag key; + + key.name = name; + + return bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp); +} + +/* Decode HTML entitles in text */ +void +decode_entitles (gchar *s, guint * len) +{ + guint l, rep_len; + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ + gchar *e = s; + gchar *end_ptr; + gint state = 0, val, base; + entity *found, key; + + if (len == NULL || *len == 0) { + l = strlen (s); + } + else { + l = *len; + } + + while (h - s < (gint)l) { + switch (state) { + /* Out of entitle */ + case 0: + if (*h == '&') { + state = 1; + e = h; + h++; + continue; + } + else { + *t = *h; + h++; + t++; + } + break; + case 1: + if (*h == ';') { + /* Determine base */ + /* First find in entities table */ + + key.name = e + 1; + *h = '\0'; + if (*(e + 1) != '#' && (found = bsearch (&key, entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp)) != NULL) { + if (found->replacement) { + rep_len = strlen (found->replacement); + memcpy (t, found->replacement, rep_len); + t += rep_len; + } + } + else { + if (*(e + 2) == 'x' || *(e + 2) == 'X') { + base = 16; + } + else if (*(e + 2) == 'o' || *(e + 2) == 'O') { + base = 8; + } + else { + base = 10; + } + if (base == 10) { + val = strtoul ((e + 2), &end_ptr, base); + } + else { + val = strtoul ((e + 3), &end_ptr, base); + } + if (end_ptr != NULL && *end_ptr != '\0') { + /* Skip undecoded */ + t = h; + } + else { + /* Search for a replacement */ + key.code = val; + found = bsearch (&key, entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num); + if (found) { + if (found->replacement) { + rep_len = strlen (found->replacement); + memcpy (t, found->replacement, rep_len); + t += rep_len; + } + } + } + } + *h = ';'; + state = 0; + } + h++; + break; + } + } + *t = '\0'; + + if (len != NULL) { + *len = t - s; + } +} + +static void +check_phishing (struct rspamd_task *task, struct uri *href_url, const gchar *url_text, gsize remain, tag_id_t id) +{ + struct uri *new; + gchar *url_str; + const gchar *p, *c; + gchar tagbuf[128]; + struct html_tag *tag; + gsize len = 0; + gint rc; + + p = url_text; + while (len < remain) { + if (*p == '<') { + /* Check tag name */ + if (*(p + 1) == '/') { + c = p + 2; + } + else { + c = p + 1; + } + while (len < remain) { + if (!g_ascii_isspace (*p) && *p != '>') { + p ++; + len ++; + } + else { + break; + } + } + rspamd_strlcpy (tagbuf, c, MIN ((gint)sizeof(tagbuf), p - c + 1)); + if ((tag = get_tag_by_name (tagbuf)) != NULL) { + if (tag->id == id) { + break; + } + else if (tag->id == Tag_IMG) { + /* We should ignore IMG tag here */ + while (len < remain && *p != '>' && *p != '<') { + p ++; + len ++; + } + if (*p == '>' && len < remain) { + p ++; + } + + remain -= p - url_text; + url_text = p; + len = 0; + continue; + } + } + } + len ++; + p ++; + } + + if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str, TRUE) && url_str != NULL) { + new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri)); + if (new != NULL) { + g_strstrip (url_str); + rc = parse_uri (new, url_str, task->task_pool); + + if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) { + if (g_ascii_strncasecmp (href_url->host, new->host, + MAX (href_url->hostlen, new->hostlen)) != 0) { + /* Special check for urls beginning with 'www' */ + if (new->hostlen > 4 && href_url->hostlen > 4) { + p = new->host; + c = NULL; + if ((p[0] == 'w' || p[0] == 'W') && + (p[1] == 'w' || p[1] == 'W') && + (p[2] == 'w' || p[2] == 'W') && + (p[3] == '.')) { + p += 4; + c = href_url->host; + len = MAX (href_url->hostlen, new->hostlen - 4); + } + else { + p = href_url->host; + if ((p[0] == 'w' || p[0] == 'W') && + (p[1] == 'w' || p[1] == 'W') && + (p[2] == 'w' || p[2] == 'W') && + (p[3] == '.')) { + p += 4; + c = new->host; + len = MAX (href_url->hostlen - 4, new->hostlen); + } + } + /* Compare parts and check for phished hostname */ + if (c != NULL) { + if (g_ascii_strncasecmp (p, c, len) != 0) { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + else { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + else { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + } + else { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + +} + +static void +parse_tag_url (struct rspamd_task *task, struct mime_text_part *part, tag_id_t id, + gchar *tag_text, gsize tag_len, gsize remain) +{ + gchar *c = NULL, *p, *url_text; + gint len, rc; + struct uri *url; + gboolean got_single_quote = FALSE, got_double_quote = FALSE; + + /* For A tags search for href= and for IMG tags search for src= */ + if (id == Tag_A) { + c = rspamd_strncasestr (tag_text, "href=", tag_len); + len = sizeof ("href=") - 1; + } + else if (id == Tag_IMG) { + c = rspamd_strncasestr (tag_text, "src=", tag_len); + len = sizeof ("src=") - 1; + } + + if (c != NULL) { + /* First calculate length */ + c += len; + /* Skip spaces after eqsign */ + while (g_ascii_isspace (*c)) { + c++; + } + len = 0; + p = c; + while (*p && (guint)(p - tag_text) < tag_len) { + if (got_double_quote) { + if (*p == '"') { + break; + } + else { + len++; + } + } + else if (got_single_quote) { + if (*p == '\'') { + break; + } + else { + len++; + } + } + else if (g_ascii_isspace (*p) || *p == '>' || (*p == '/' && *(p + 1) == '>') || *p == '\r' || *p == '\n') { + break; + } + else { + if (*p == '"' && !got_single_quote) { + got_double_quote = !got_double_quote; + } + else if (*p == '\'' && !got_double_quote) { + got_single_quote = !got_single_quote; + } + else { + len++; + } + } + p++; + } + + if (got_single_quote || got_double_quote) { + c++; + } + + if (len == 0) { + return; + } + + url_text = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (url_text, c, len + 1); + decode_entitles (url_text, NULL); + + if (g_ascii_strncasecmp (url_text, "http://", sizeof ("http://") - 1) != 0 && + g_ascii_strncasecmp (url_text, "www", sizeof ("www") - 1) != 0 && + g_ascii_strncasecmp (url_text, "ftp://", sizeof ("ftp://") - 1) != 0 && + g_ascii_strncasecmp (url_text, "mailto:", sizeof ("mailto:") - 1) != 0) { + return; + } + + url = rspamd_mempool_alloc (task->task_pool, sizeof (struct uri)); + rc = parse_uri (url, url_text, task->task_pool); + + if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) { + /* + * Check for phishing + */ + if ((p = strchr (c, '>')) != NULL && id == Tag_A) { + p ++; + check_phishing (task, url, p, remain - (p - tag_text), id); + } + if (g_tree_lookup (task->urls, url) == NULL) { + g_tree_insert (task->urls, url, url); + } + } + } +} + +gboolean +add_html_node (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, + gchar *tag_text, gsize tag_len, gsize remain, GNode ** cur_level) +{ + GNode *new; + struct html_node *data; + + if (!tags_sorted) { + qsort (tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp); + tags_sorted = 1; + } + if (!entities_sorted) { + qsort (entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp); + memcpy (entities_defs_num, entities_defs, sizeof (entities_defs)); + qsort (entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num); + entities_sorted = 1; + } + + /* First call of this function */ + if (part->html_nodes == NULL) { + /* Insert root node */ + new = g_node_new (NULL); + *cur_level = new; + part->html_nodes = new; + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_node_destroy, part->html_nodes); + /* Call once again with root node */ + return add_html_node (task, pool, part, tag_text, tag_len, remain, cur_level); + } + else { + new = construct_html_node (pool, tag_text, tag_len); + if (new == NULL) { + debug_task ("cannot construct HTML node for text '%*s'", tag_len, tag_text); + return FALSE; + } + data = new->data; + if (data->tag && (data->tag->id == Tag_A || data->tag->id == Tag_IMG) && ((data->flags & FL_CLOSING) == 0)) { + parse_tag_url (task, part, data->tag->id, tag_text, tag_len, remain); + } + + if (data->flags & FL_CLOSING) { + if (!*cur_level) { + debug_task ("bad parent node"); + return FALSE; + } + g_node_append (*cur_level, new); + if (!check_balance (new, cur_level)) { + debug_task ("mark part as unbalanced as it has not pairable closing tags"); + part->is_balanced = FALSE; + } + } + else { + + g_node_append (*cur_level, new); + if ((data->flags & FL_CLOSED) == 0) { + *cur_level = new; + } + /* Skip some tags */ + if (data->tag && (data->tag->id == Tag_STYLE || + data->tag->id == Tag_SCRIPT || + data->tag->id == Tag_OBJECT || + data->tag->id == Tag_TITLE)) { + return FALSE; + } + } + } + + return TRUE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/html.h b/src/libserver/html.h new file mode 100644 index 000000000..3ea758e60 --- /dev/null +++ b/src/libserver/html.h @@ -0,0 +1,226 @@ +/* + * Functions for simple html parsing + */ + +#ifndef RSPAMD_HTML_H +#define RSPAMD_HTML_H + +#include "config.h" +#include "mem_pool.h" + +/* Known HTML tags */ +typedef enum +{ + Tag_UNKNOWN, /**< Unknown tag! */ + Tag_A, /**< A */ + Tag_ABBR, /**< ABBR */ + Tag_ACRONYM, /**< ACRONYM */ + Tag_ADDRESS, /**< ADDRESS */ + Tag_ALIGN, /**< ALIGN */ + Tag_APPLET, /**< APPLET */ + Tag_AREA, /**< AREA */ + Tag_B, /**< B */ + Tag_BASE, /**< BASE */ + Tag_BASEFONT, /**< BASEFONT */ + Tag_BDO, /**< BDO */ + Tag_BGSOUND, /**< BGSOUND */ + Tag_BIG, /**< BIG */ + Tag_BLINK, /**< BLINK */ + Tag_BLOCKQUOTE, /**< BLOCKQUOTE */ + Tag_BODY, /**< BODY */ + Tag_BR, /**< BR */ + Tag_BUTTON, /**< BUTTON */ + Tag_CAPTION, /**< CAPTION */ + Tag_CENTER, /**< CENTER */ + Tag_CITE, /**< CITE */ + Tag_CODE, /**< CODE */ + Tag_COL, /**< COL */ + Tag_COLGROUP, /**< COLGROUP */ + Tag_COMMENT, /**< COMMENT */ + Tag_DD, /**< DD */ + Tag_DEL, /**< DEL */ + Tag_DFN, /**< DFN */ + Tag_DIR, /**< DIR */ + Tag_DIV, /**< DIF */ + Tag_DL, /**< DL */ + Tag_DT, /**< DT */ + Tag_EM, /**< EM */ + Tag_EMBED, /**< EMBED */ + Tag_FIELDSET, /**< FIELDSET */ + Tag_FONT, /**< FONT */ + Tag_FORM, /**< FORM */ + Tag_FRAME, /**< FRAME */ + Tag_FRAMESET, /**< FRAMESET */ + Tag_H1, /**< H1 */ + Tag_H2, /**< H2 */ + Tag_H3, /**< H3 */ + Tag_H4, /**< H4 */ + Tag_H5, /**< H5 */ + Tag_H6, /**< H6 */ + Tag_HEAD, /**< HEAD */ + Tag_HR, /**< HR */ + Tag_HTML, /**< HTML */ + Tag_I, /**< I */ + Tag_IFRAME, /**< IFRAME */ + Tag_ILAYER, /**< ILAYER */ + Tag_IMG, /**< IMG */ + Tag_INPUT, /**< INPUT */ + Tag_INS, /**< INS */ + Tag_ISINDEX, /**< ISINDEX */ + Tag_KBD, /**< KBD */ + Tag_KEYGEN, /**< KEYGEN */ + Tag_LABEL, /**< LABEL */ + Tag_LAYER, /**< LAYER */ + Tag_LEGEND, /**< LEGEND */ + Tag_LI, /**< LI */ + Tag_LINK, /**< LINK */ + Tag_LISTING, /**< LISTING */ + Tag_MAP, /**< MAP */ + Tag_MARQUEE, /**< MARQUEE */ + Tag_MENU, /**< MENU */ + Tag_META, /**< META */ + Tag_MULTICOL, /**< MULTICOL */ + Tag_NOBR, /**< NOBR */ + Tag_NOEMBED, /**< NOEMBED */ + Tag_NOFRAMES, /**< NOFRAMES */ + Tag_NOLAYER, /**< NOLAYER */ + Tag_NOSAVE, /**< NOSAVE */ + Tag_NOSCRIPT, /**< NOSCRIPT */ + Tag_OBJECT, /**< OBJECT */ + Tag_OL, /**< OL */ + Tag_OPTGROUP, /**< OPTGROUP */ + Tag_OPTION, /**< OPTION */ + Tag_P, /**< P */ + Tag_PARAM, /**< PARAM */ + Tag_PLAINTEXT,/**< PLAINTEXT */ + Tag_PRE, /**< PRE */ + Tag_Q, /**< Q */ + Tag_RB, /**< RB */ + Tag_RBC, /**< RBC */ + Tag_RP, /**< RP */ + Tag_RT, /**< RT */ + Tag_RTC, /**< RTC */ + Tag_RUBY, /**< RUBY */ + Tag_S, /**< S */ + Tag_SAMP, /**< SAMP */ + Tag_SCRIPT, /**< SCRIPT */ + Tag_SELECT, /**< SELECT */ + Tag_SERVER, /**< SERVER */ + Tag_SERVLET, /**< SERVLET */ + Tag_SMALL, /**< SMALL */ + Tag_SPACER, /**< SPACER */ + Tag_SPAN, /**< SPAN */ + Tag_STRIKE, /**< STRIKE */ + Tag_STRONG, /**< STRONG */ + Tag_STYLE, /**< STYLE */ + Tag_SUB, /**< SUB */ + Tag_SUP, /**< SUP */ + Tag_TABLE, /**< TABLE */ + Tag_TBODY, /**< TBODY */ + Tag_TD, /**< TD */ + Tag_TEXTAREA, /**< TEXTAREA */ + Tag_TFOOT, /**< TFOOT */ + Tag_TH, /**< TH */ + Tag_THEAD, /**< THEAD */ + Tag_TITLE, /**< TITLE */ + Tag_TR, /**< TR */ + Tag_TT, /**< TT */ + Tag_U, /**< U */ + Tag_UL, /**< UL */ + Tag_VAR, /**< VAR */ + Tag_WBR, /**< WBR */ + Tag_XMP, /**< XMP */ + Tag_XML, /**< XML */ + Tag_NEXTID, /**< NEXTID */ + + N_TAGS /**< Must be last */ +} tag_id_t; + +#define CM_UNKNOWN 0 +/* Elements with no content. Map to HTML specification. */ +#define CM_EMPTY (1 << 0) +/* Elements that appear outside of "BODY". */ +#define CM_HTML (1 << 1) +/* Elements that can appear within HEAD. */ +#define CM_HEAD (1 << 2) +/* HTML "block" elements. */ +#define CM_BLOCK (1 << 3) +/* HTML "inline" elements. */ +#define CM_INLINE (1 << 4) +/* Elements that mark list item ("LI"). */ +#define CM_LIST (1 << 5) +/* Elements that mark definition list item ("DL", "DT"). */ +#define CM_DEFLIST (1 << 6) +/* Elements that can appear inside TABLE. */ +#define CM_TABLE (1 << 7) +/* Used for "THEAD", "TFOOT" or "TBODY". */ +#define CM_ROWGRP (1 << 8) +/* Used for "TD", "TH" */ +#define CM_ROW (1 << 9) +/* Elements whose content must be protected against white space movement. + Includes some elements that can found in forms. */ +#define CM_FIELD (1 << 10) +/* Used to avoid propagating inline emphasis inside some elements + such as OBJECT or APPLET. */ +#define CM_OBJECT (1 << 11) +/* Elements that allows "PARAM". */ +#define CM_PARAM (1 << 12) +/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */ +#define CM_FRAMES (1 << 13) +/* Heading elements (h1, h2, ...). */ +#define CM_HEADING (1 << 14) +/* Elements with an optional end tag. */ +#define CM_OPT (1 << 15) +/* Elements that use "align" attribute for vertical position. */ +#define CM_IMG (1 << 16) +/* Elements with inline and block model. Used to avoid calling InlineDup. */ +#define CM_MIXED (1 << 17) +/* Elements whose content needs to be indented only if containing one + CM_BLOCK element. */ +#define CM_NO_INDENT (1 << 18) +/* Elements that are obsolete (such as "dir", "menu"). */ +#define CM_OBSOLETE (1 << 19) +/* User defined elements. Used to determine how attributes wihout value + should be printed. */ +#define CM_NEW (1 << 20) +/* Elements that cannot be omitted. */ +#define CM_OMITST (1 << 21) + +/* XML tag */ +#define FL_XML (1 << 0) +/* Closing tag */ +#define FL_CLOSING (1 << 1) +/* Fully closed tag (e.g. ) */ +#define FL_CLOSED (1 << 2) + +struct html_tag { + tag_id_t id; + const gchar *name; + gint flags; +}; + +struct html_node { + struct html_tag *tag; + gint flags; +}; + +/* Forwarded declaration */ +struct rspamd_task; + +/* + * Add a single node to the tags tree + */ +gboolean add_html_node (struct rspamd_task *task, rspamd_mempool_t *pool, + struct mime_text_part *part, gchar *tag_text, gsize tag_len, gsize remain, GNode **cur_level); + +/* + * Get tag structure by its name (binary search is used) + */ +struct html_tag * get_tag_by_name (const gchar *name); + +/* + * Decode HTML entitles in text. Text is modified in place. + */ +void decode_entitles (gchar *s, guint *len); + +#endif diff --git a/src/libserver/proxy.c b/src/libserver/proxy.c new file mode 100644 index 000000000..67c7665b8 --- /dev/null +++ b/src/libserver/proxy.c @@ -0,0 +1,241 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "proxy.h" + +static void rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data); +static void rspamd_proxy_client_handler (gint fd, gshort what, gpointer data); + +static inline GQuark +proxy_error_quark (void) +{ + return g_quark_from_static_string ("proxy-error"); +} + +void +rspamd_proxy_close (rspamd_proxy_t *proxy) +{ + if (!proxy->closed) { + close (proxy->cfd); + close (proxy->bfd); + + event_del (&proxy->client_ev); + event_del (&proxy->backend_ev); + proxy->closed = TRUE; + } +} + +static void +rspamd_proxy_client_handler (gint fd, gshort what, gpointer data) +{ + rspamd_proxy_t *proxy = data; + gint r; + GError *err = NULL; + + if (what == EV_READ) { + /* Got data from client */ + event_del (&proxy->client_ev); + r = read (proxy->cfd, proxy->buf, proxy->bufsize); + if (r > 0) { + /* Write this buffer to backend */ + proxy->read_len = r; + proxy->buf_offset = 0; + event_del (&proxy->backend_ev); + event_set (&proxy->backend_ev, proxy->bfd, EV_WRITE, rspamd_proxy_backend_handler, proxy); + event_add (&proxy->backend_ev, proxy->tv); + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Client read error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else if (what == EV_WRITE) { + /* Can write to client */ + r = write (proxy->cfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset); + if (r > 0) { + /* We wrote something */ + proxy->buf_offset +=r; + if (proxy->buf_offset == proxy->read_len) { + /* We wrote everything */ + event_del (&proxy->client_ev); + event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy); + event_add (&proxy->client_ev, proxy->tv); + event_del (&proxy->backend_ev); + event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy); + event_add (&proxy->backend_ev, proxy->tv); + } + else { + /* Plan another write event */ + event_add (&proxy->backend_ev, proxy->tv); + } + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Client write error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else { + /* Got timeout */ + g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout"); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } +} + +static void +rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data) +{ + rspamd_proxy_t *proxy = data; + gint r; + GError *err = NULL; + + if (what == EV_READ) { + /* Got data from backend */ + event_del (&proxy->backend_ev); + r = read (proxy->bfd, proxy->buf, proxy->bufsize); + if (r > 0) { + /* Write this buffer to client */ + proxy->read_len = r; + proxy->buf_offset = 0; + event_del (&proxy->client_ev); + event_set (&proxy->client_ev, proxy->bfd, EV_WRITE, rspamd_proxy_client_handler, proxy); + event_add (&proxy->client_ev, proxy->tv); + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Backend read error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else if (what == EV_WRITE) { + /* Can write to backend */ + r = write (proxy->bfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset); + if (r > 0) { + /* We wrote something */ + proxy->buf_offset +=r; + if (proxy->buf_offset == proxy->read_len) { + /* We wrote everything */ + event_del (&proxy->backend_ev); + event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy); + event_add (&proxy->backend_ev, proxy->tv); + event_del (&proxy->client_ev); + event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy); + event_add (&proxy->client_ev, proxy->tv); + } + else { + /* Plan another write event */ + event_add (&proxy->backend_ev, proxy->tv); + } + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Backend write error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else { + /* Got timeout */ + g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout"); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } +} + +/** + * Create new proxy between cfd and bfd + * @param cfd client's socket + * @param bfd backend's socket + * @param bufsize size of exchange buffer + * @param err_cb callback for erorrs or completing + * @param ud user data for callback + * @return new proxy object + */ +rspamd_proxy_t* +rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool, struct event_base *base, + gsize bufsize, struct timeval *tv, dispatcher_err_callback_t err_cb, gpointer ud) +{ + rspamd_proxy_t *new; + + new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_proxy_t)); + + new->cfd = dup (cfd); + new->bfd = dup (bfd); + new->pool = pool; + new->base = base; + new->bufsize = bufsize; + new->buf = rspamd_mempool_alloc (pool, bufsize); + new->err_cb = err_cb; + new->user_data = ud; + new->tv = tv; + + /* Set client's and backend's interfaces to read events */ + event_set (&new->client_ev, new->cfd, EV_READ, rspamd_proxy_client_handler, new); + event_base_set (new->base, &new->client_ev); + event_add (&new->client_ev, new->tv); + + event_set (&new->backend_ev, new->bfd, EV_READ, rspamd_proxy_backend_handler, new); + event_base_set (new->base, &new->backend_ev); + event_add (&new->backend_ev, new->tv); + + return new; +} diff --git a/src/libserver/proxy.h b/src/libserver/proxy.h new file mode 100644 index 000000000..c505fe83d --- /dev/null +++ b/src/libserver/proxy.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef PROXY_H_ +#define PROXY_H_ + +#include "config.h" +#include "buffer.h" + +/** + * @file proxy.h + * Direct asynchronous proxy implementation + */ + +typedef struct rspamd_proxy_s { + struct event client_ev; /**< event for client's communication */ + struct event backend_ev; /**< event for backend communication */ + struct event_base *base; /**< base for event operations */ + rspamd_mempool_t *pool; /**< memory pool */ + dispatcher_err_callback_t err_cb; /**< error callback */ + struct event_base *ev_base; /**< event base */ + gint cfd; /**< client's socket */ + gint bfd; /**< backend's socket */ + guint8 *buf; /**< exchange buffer */ + gsize bufsize; /**< buffer size */ + gint read_len; /**< read length */ + gint buf_offset; /**< offset to write */ + gpointer user_data; /**< user's data for callbacks */ + struct timeval *tv; /**< timeout for communications */ + gboolean closed; /**< whether descriptors are closed */ +} rspamd_proxy_t; + +/** + * Create new proxy between cfd and bfd + * @param cfd client's socket + * @param bfd backend's socket + * @param bufsize size of exchange buffer + * @param err_cb callback for erorrs or completing + * @param ud user data for callback + * @return new proxy object + */ +rspamd_proxy_t* rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool, + struct event_base *base, gsize bufsize, struct timeval *tv, + dispatcher_err_callback_t err_cb, gpointer ud); + +void rspamd_proxy_close (rspamd_proxy_t *proxy); + +#endif /* PROXY_H_ */ diff --git a/src/libserver/roll_history.c b/src/libserver/roll_history.c new file mode 100644 index 000000000..504f8ae3b --- /dev/null +++ b/src/libserver/roll_history.c @@ -0,0 +1,212 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include "config.h" +#include "main.h" +#include "roll_history.h" + + +/** + * Returns new roll history + * @param pool pool for shared memory + * @return new structure + */ +struct roll_history* +rspamd_roll_history_new (rspamd_mempool_t *pool) +{ + struct roll_history *new; + + if (pool == NULL) { + return NULL; + } + + new = rspamd_mempool_alloc0_shared (pool, sizeof (struct roll_history)); + new->pool = pool; + new->mtx = rspamd_mempool_get_mutex (pool); + + return new; +} + +struct history_metric_callback_data { + gchar *pos; + gint remain; +}; + +static void +roll_history_symbols_callback (gpointer key, gpointer value, void *user_data) +{ + struct history_metric_callback_data *cb = user_data; + struct symbol *s = value; + guint wr; + + if (cb->remain > 0) { + wr = rspamd_snprintf (cb->pos, cb->remain, "%s, ", s->name); + cb->pos += wr; + cb->remain -= wr; + } +} + +/** + * Update roll history with data from task + * @param history roll history object + * @param task task object + */ +void +rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task) +{ + gint row_num; + struct roll_history_row *row; + struct metric_result *metric_res; + struct history_metric_callback_data cbdata; + + if (history->need_lock) { + /* Some process is getting history, so wait on a mutex */ + rspamd_mempool_lock_mutex (history->mtx); + history->need_lock = FALSE; + rspamd_mempool_unlock_mutex (history->mtx); + } + + /* First of all obtain check and obtain row number */ + g_atomic_int_compare_and_exchange (&history->cur_row, HISTORY_MAX_ROWS, 0); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + row_num = g_atomic_int_add (&history->cur_row, 1); +#else + row_num = g_atomic_int_exchange_and_add (&history->cur_row, 1); +#endif + + if (row_num < HISTORY_MAX_ROWS) { + row = &history->rows[row_num]; + row->completed = FALSE; + } + else { + /* Race condition */ + history->cur_row = 0; + return; + } + + /* Add information from task to roll history */ + memcpy (&row->from_addr, &task->from_addr, sizeof (row->from_addr)); + memcpy (&row->tv, &task->tv, sizeof (row->tv)); + + /* Strings */ + rspamd_strlcpy (row->message_id, task->message_id, sizeof (row->message_id)); + if (task->user) { + rspamd_strlcpy (row->user, task->user, sizeof (row->message_id)); + } + else { + row->user[0] = '\0'; + } + + /* Get default metric */ + metric_res = g_hash_table_lookup (task->results, DEFAULT_METRIC); + if (metric_res == NULL) { + row->symbols[0] = '\0'; + row->action = METRIC_ACTION_NOACTION; + } + else { + row->score = metric_res->score; + row->required_score = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + row->action = check_metric_action (metric_res->score, + metric_res->metric->actions[METRIC_ACTION_REJECT].score, metric_res->metric); + cbdata.pos = row->symbols; + cbdata.remain = sizeof (row->symbols); + g_hash_table_foreach (metric_res->symbols, roll_history_symbols_callback, &cbdata); + if (cbdata.remain > 0) { + /* Remove last whitespace and comma */ + *cbdata.pos-- = '\0'; + *cbdata.pos-- = '\0'; + *cbdata.pos = '\0'; + } + } + + row->scan_time = task->scan_milliseconds; + row->len = (task->msg == NULL ? 0 : task->msg->len); + row->completed = TRUE; +} + +/** + * Load previously saved history from file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been loaded + */ +gboolean +rspamd_roll_history_load (struct roll_history *history, const gchar *filename) +{ + gint fd; + struct stat st; + + if (stat (filename, &st) == -1) { + msg_info ("cannot load history from %s: %s", filename, strerror (errno)); + return FALSE; + } + + if (st.st_size != sizeof (history->rows)) { + msg_info ("cannot load history from %s: size mismatch", filename); + return FALSE; + } + + if ((fd = open (filename, O_RDONLY)) == -1) { + msg_info ("cannot load history from %s: %s", filename, strerror (errno)); + return FALSE; + } + + if (read (fd, history->rows, sizeof (history->rows)) == -1) { + close (fd); + msg_info ("cannot read history from %s: %s", filename, strerror (errno)); + return FALSE; + } + + close (fd); + + return TRUE; +} + +/** + * Save history to file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been saved + */ +gboolean +rspamd_roll_history_save (struct roll_history *history, const gchar *filename) +{ + gint fd; + + if ((fd = open (filename, O_WRONLY | O_CREAT | O_TRUNC, 00600)) == -1) { + msg_info ("cannot save history to %s: %s", filename, strerror (errno)); + return FALSE; + } + + if (write (fd, history->rows, sizeof (history->rows)) == -1) { + close (fd); + msg_info ("cannot write history to %s: %s", filename, strerror (errno)); + return FALSE; + } + + close (fd); + + return TRUE; +} diff --git a/src/libserver/roll_history.h b/src/libserver/roll_history.h new file mode 100644 index 000000000..1dff93a4f --- /dev/null +++ b/src/libserver/roll_history.h @@ -0,0 +1,106 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef ROLL_HISTORY_H_ +#define ROLL_HISTORY_H_ + +#include "config.h" +#include "mem_pool.h" + +/* + * Roll history is a special cycled buffer for checked messages, it is designed for writing history messages + * and displaying them in webui + */ + +#define HISTORY_MAX_ID 100 +#define HISTORY_MAX_SYMBOLS 200 +#define HISTORY_MAX_USER 20 +#define HISTORY_MAX_ROWS 200 + +struct rspamd_task; + +struct roll_history_row { + struct timeval tv; + gchar message_id[HISTORY_MAX_ID]; + gchar symbols[HISTORY_MAX_SYMBOLS]; + gchar user[HISTORY_MAX_USER]; +#ifdef HAVE_INET_PTON + struct { + union { + struct in_addr in4; + struct in6_addr in6; + } d; + gboolean ipv6; + gboolean has_addr; + } from_addr; +#else + struct in_addr from_addr; +#endif + gsize len; + guint scan_time; + gint action; + gdouble score; + gdouble required_score; + guint8 completed; +}; + +struct roll_history { + struct roll_history_row rows[HISTORY_MAX_ROWS]; + gint cur_row; + rspamd_mempool_t *pool; + gboolean need_lock; + rspamd_mempool_mutex_t *mtx; +}; + +/** + * Returns new roll history + * @param pool pool for shared memory + * @return new structure + */ +struct roll_history* rspamd_roll_history_new (rspamd_mempool_t *pool); + +/** + * Update roll history with data from task + * @param history roll history object + * @param task task object + */ +void rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task); + +/** + * Load previously saved history from file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been loaded + */ +gboolean rspamd_roll_history_load (struct roll_history *history, const gchar *filename); + +/** + * Save history to file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been saved + */ +gboolean rspamd_roll_history_save (struct roll_history *history, const gchar *filename); + +#endif /* ROLL_HISTORY_H_ */ diff --git a/src/libserver/settings.c b/src/libserver/settings.c new file mode 100644 index 000000000..c3292c8ab --- /dev/null +++ b/src/libserver/settings.c @@ -0,0 +1,657 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cfg_file.h" +#include "map.h" +#include "main.h" +#include "settings.h" +#include "filter.h" +#include "json/jansson.h" + +struct json_buf { + GHashTable *table; + gchar *buf; + gchar *pos; + size_t buflen; +}; + +static void +settings_actions_free (gpointer data) +{ + GList *cur = data; + + while (cur) { + g_free (cur->data); + cur = g_list_next (cur); + } + + g_list_free ((GList *)data); +} + +static void +settings_free (gpointer data) +{ + struct rspamd_settings *s = data; + + if (s->statfile_alias) { + g_free (s->statfile_alias); + } + if (s->factors) { + g_hash_table_destroy (s->factors); + } + if (s->metric_scores) { + g_hash_table_destroy (s->metric_scores); + } + if (s->reject_scores) { + g_hash_table_destroy (s->reject_scores); + } + if (s->whitelist) { + g_hash_table_destroy (s->whitelist); + } + if (s->blacklist) { + g_hash_table_destroy (s->blacklist); + } + if (s->metric_actions) { + g_hash_table_destroy (s->metric_actions); + } + + g_slice_free1 (sizeof (struct rspamd_settings), s); +} + +static struct rspamd_settings * +settings_ref (struct rspamd_settings *s) +{ + if (s == NULL) { + s = g_slice_alloc (sizeof (struct rspamd_settings)); + s->metric_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->reject_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->metric_actions = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, settings_actions_free); + s->factors = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->whitelist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->blacklist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->statfile_alias = NULL; + s->want_spam = FALSE; + s->ref_count = 1; + } + else { + s->ref_count ++; + } + + return s; +} + +static void +settings_unref (struct rspamd_settings *s) +{ + if (s != NULL) { + s->ref_count --; + if (s->ref_count <= 0) { + settings_free (s); + } + } +} + + +gchar * +json_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + struct json_buf *jb; + size_t free, off; + + if (data->cur_data == NULL) { + jb = g_malloc (sizeof (struct json_buf)); + jb->table = g_hash_table_ref (((struct json_buf *)data->prev_data)->table); + jb->buf = NULL; + jb->pos = NULL; + data->cur_data = jb; + } + else { + jb = data->cur_data; + } + + if (jb->buf == NULL) { + /* Allocate memory for buffer */ + jb->buflen = len * 2; + jb->buf = g_malloc (jb->buflen); + jb->pos = jb->buf; + } + + off = jb->pos - jb->buf; + free = jb->buflen - off; + + if ((gint)free < len) { + jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2); + jb->buf = g_realloc (jb->buf, jb->buflen); + jb->pos = jb->buf + off; + } + + memcpy (jb->pos, chunk, len); + jb->pos += len; + + /* Say not to copy any part of this buffer */ + return NULL; +} + +void +json_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + struct json_buf *jb; + gint nelts, i, n, j; + json_t *js, *cur_elt, *cur_nm, *it_val, *act_it, *act_value; + json_error_t je; + struct metric_action *new_act; + struct rspamd_settings *cur_settings; + GList *cur_act; + gchar *cur_name; + void *json_it; + double *score; + + if (data->prev_data) { + jb = data->prev_data; + /* Clean prev data */ + if (jb->table) { + g_hash_table_unref (jb->table); + } + if (jb->buf) { + g_free (jb->buf); + } + g_free (jb); + } + + /* Now parse json */ + if (data->cur_data) { + jb = data->cur_data; + } + else { + msg_err ("no data read"); + return; + } + if (jb->buf == NULL) { + msg_err ("no data read"); + return; + } + /* NULL terminate current buf */ + *jb->pos = '\0'; + + js = json_loads (jb->buf, &je); + if (!js) { + msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line); + return; + } + + if (!json_is_array (js)) { + json_decref (js); + msg_err ("loaded json is not an array"); + return; + } + + nelts = json_array_size (js); + for (i = 0; i < nelts; i++) { + cur_settings = settings_ref (NULL); + + cur_elt = json_array_get (js, i); + if (!cur_elt || !json_is_object (cur_elt)) { + json_decref (js); + msg_err ("loaded json is not an object"); + settings_unref (cur_settings); + return; + } + cur_nm = json_object_get (cur_elt, "name"); + if (cur_nm == NULL || !json_is_string (cur_nm)) { + json_decref (js); + msg_err ("name is not a string or not exists"); + settings_unref (cur_settings); + return; + } + cur_name = g_strdup (json_string_value (cur_nm)); + /* Now check other settings */ + /* Statfile */ + cur_nm = json_object_get (cur_elt, "statfile"); + if (cur_nm != NULL && json_is_string (cur_nm)) { + cur_settings->statfile_alias = g_strdup (json_string_value (cur_nm)); + } + /* Factors object */ + cur_nm = json_object_get (cur_elt, "factors"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_string (it_val)) { + g_hash_table_insert (cur_settings->factors, g_strdup (json_object_iter_key (json_it)), g_strdup (json_string_value (it_val))); + } + json_it = json_object_iter_next (cur_nm, json_it); + } + } + /* Metrics object */ + cur_nm = json_object_get (cur_elt, "metrics"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_number (it_val)) { + score = g_malloc (sizeof (double)); + *score = json_number_value (it_val); + g_hash_table_insert (cur_settings->metric_scores, + g_strdup (json_object_iter_key (json_it)), score); + } + else if (it_val && json_is_object (it_val)) { + /* Assume this as actions hash */ + cur_act = NULL; + act_it = json_object_iter (it_val); + while (act_it) { + act_value = json_object_iter_value (act_it); + + if (act_value && json_is_number (act_value)) { + /* Special cases */ + if (g_ascii_strcasecmp (json_object_iter_key (act_it), "spam_score") == 0) { + score = g_malloc (sizeof (double)); + *score = json_number_value (act_value); + g_hash_table_insert (cur_settings->metric_scores, + g_strdup (json_object_iter_key (json_it)), score); + } + else if (g_ascii_strcasecmp (json_object_iter_key (act_it), "reject_score") == 0) { + score = g_malloc (sizeof (double)); + *score = json_number_value (act_value); + g_hash_table_insert (cur_settings->reject_scores, + g_strdup (json_object_iter_key (json_it)), score); + } + else if (check_action_str (json_object_iter_key (act_it), &j)) { + new_act = g_malloc (sizeof (struct metric_action)); + new_act->action = j; + new_act->score = json_number_value (act_value); + cur_act = g_list_prepend (cur_act, new_act); + } + } + act_it = json_object_iter_next (it_val, act_it); + } + if (cur_act != NULL) { + g_hash_table_insert (cur_settings->metric_actions, + g_strdup (json_object_iter_key (json_it)), cur_act); + cur_act = NULL; + } + } + json_it = json_object_iter_next (cur_nm, json_it); + } + } + /* Rejects object */ + cur_nm = json_object_get (cur_elt, "rejects"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_number (it_val)) { + score = g_malloc (sizeof (double)); + *score = json_number_value (it_val); + g_hash_table_insert (cur_settings->reject_scores, g_strdup (json_object_iter_key (json_it)), + score); + } + json_it = json_object_iter_next(cur_nm, json_it); + } + } + /* Whitelist object */ + cur_nm = json_object_get (cur_elt, "whitelist"); + if (cur_nm != NULL && json_is_array (cur_nm)) { + n = json_array_size(cur_nm); + for(j = 0; j < n; j++) { + it_val = json_array_get(cur_nm, j); + if (it_val && json_is_string (it_val)) { + if (strlen (json_string_value (it_val)) > 0) { + g_hash_table_insert (cur_settings->whitelist, + g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val))); + } + } + + } + } + /* Blacklist object */ + cur_nm = json_object_get (cur_elt, "blacklist"); + if (cur_nm != NULL && json_is_array (cur_nm)) { + n = json_array_size(cur_nm); + for(j = 0; j < n; j++) { + it_val = json_array_get(cur_nm, j); + if (it_val && json_is_string (it_val)) { + if (strlen (json_string_value (it_val)) > 0) { + g_hash_table_insert (cur_settings->blacklist, + g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val))); + } + } + + } + } + /* Want spam */ + cur_nm = json_object_get (cur_elt, "want_spam"); + if (cur_nm != NULL) { + if (json_is_true (cur_nm)) { + cur_settings->want_spam = TRUE; + } + } + g_hash_table_replace (((struct json_buf *)data->cur_data)->table, cur_name, cur_settings); + } + json_decref (js); +} + +gboolean +read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable * table) +{ + struct json_buf *jb = g_malloc (sizeof (struct json_buf)), **pjb; + + pjb = g_malloc (sizeof (struct json_buf *)); + + jb->table = table; + jb->buf = NULL; + *pjb = jb; + + if (!add_map (cfg, path, description, json_read_cb, json_fin_cb, (void **)pjb)) { + msg_err ("cannot add map %s", path); + return FALSE; + } + + return TRUE; +} + +void +init_settings (struct config_file *cfg) +{ + cfg->domain_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, + g_free, (GDestroyNotify)settings_unref); + cfg->user_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, + g_free, (GDestroyNotify)settings_unref); +} + +static gboolean +check_setting (struct rspamd_task *task, struct rspamd_settings **user_settings, struct rspamd_settings **domain_settings) +{ + gchar *field = NULL, *domain = NULL; + gchar cmp_buf[1024]; + gint len; + + if (task->deliver_to != NULL) { + /* First try to use deliver-to field */ + field = task->deliver_to; + } + else if (task->user != NULL) { + /* Then user field */ + field = task->user; + } + else if (task->rcpt != NULL) { + /* Then first recipient */ + field = task->rcpt->data; + } + else { + return FALSE; + } + + domain = strchr (field, '@'); + if (domain == NULL) { + /* First try to search in first recipient */ + if (task->rcpt) { + domain = strchr (task->rcpt->data, '@'); + } + } + if (domain != NULL) { + domain++; + } + + /* First try to search per-user settings */ + if (field != NULL) { + if (*field == '<') { + field ++; + } + len = strcspn (field, ">"); + rspamd_strlcpy (cmp_buf, field, MIN ((gint)sizeof (cmp_buf), len + 1)); + *user_settings = g_hash_table_lookup (task->cfg->user_settings, cmp_buf); + } + if (domain != NULL) { + len = strcspn (domain, ">"); + rspamd_strlcpy (cmp_buf, domain, MIN ((gint)sizeof (cmp_buf), len + 1)); + *domain_settings = g_hash_table_lookup (task->cfg->domain_settings, cmp_buf); + } + + if (*domain_settings != NULL || *user_settings != NULL) { + return TRUE; + } + + return FALSE; +} + +static gboolean +check_bwhitelist (struct rspamd_task *task, struct rspamd_settings *s, gboolean *is_black) +{ + gchar *src_email = NULL, *src_domain = NULL, *data; + + if (task->from != NULL && *task->from != '\0') { + src_email = task->from; + } else { + return FALSE; + } + + src_domain = strchr (src_email, '@'); + if(src_domain != NULL) { + src_domain++; + } + + if ((((data = g_hash_table_lookup (s->blacklist, src_email)) != NULL) || + ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->blacklist, src_domain)) != NULL)) )) { + *is_black = TRUE; + msg_info ("<%s> blacklisted as domain %s is in settings blacklist", task->message_id, data); + return TRUE; + } + if ((((data = g_hash_table_lookup (s->whitelist, src_email)) != NULL) || + ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->whitelist, src_domain)) != NULL)) )) { + *is_black = FALSE; + msg_info ("<%s> whitelisted as domain %s is in settings blacklist", task->message_id, data); + return TRUE; + } + return FALSE; +} + +gboolean +check_metric_settings (struct metric_result *res, double *score, double *rscore) +{ + struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings; + double *sc, *rs; + struct metric *metric = res->metric; + + /* XXX: what the fuck is that? */ + *rscore = 10.0; + + if (us != NULL) { + if ((rs = g_hash_table_lookup (us->reject_scores, metric->name)) != NULL) { + *rscore = *rs; + } + if ((sc = g_hash_table_lookup (us->metric_scores, metric->name)) != NULL) { + *score = *sc; + return TRUE; + } + /* Now check in domain settings */ + if (ds && ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL)) { + *rscore = *rs; + } + if (ds && (sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) { + *score = *sc; + return TRUE; + } + } + else if (ds != NULL) { + if ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL) { + *rscore = *rs; + } + if ((sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) { + *score = *sc; + return TRUE; + } + } + + return FALSE; +} + +gboolean +check_metric_action_settings (struct rspamd_task *task, struct metric_result *res, + double score, enum rspamd_metric_action *result) +{ + struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings; + struct metric_action *act, *sel = NULL; + GList *cur; + enum rspamd_metric_action r = METRIC_ACTION_NOACTION; + gboolean black; + + if (us != NULL) { + /* Check whitelist and set appropriate action for whitelisted users */ + if (check_bwhitelist(task, us, &black)) { + if (black) { + *result = METRIC_ACTION_REJECT; + } + else { + *result = METRIC_ACTION_NOACTION; + } + return TRUE; + } + if ((cur = g_hash_table_lookup (us->metric_actions, res->metric->name)) != NULL) { + while (cur) { + act = cur->data; + if (score >= act->score) { + r = act->action; + sel = act; + } + cur = g_list_next (cur); + } + } + } + else if (ds != NULL) { + /* Check whitelist and set appropriate action for whitelisted users */ + if (check_bwhitelist(task, ds, &black)) { + if (black) { + *result = METRIC_ACTION_REJECT; + } + else { + *result = METRIC_ACTION_NOACTION; + } + return TRUE; + } + if ((cur = g_hash_table_lookup (ds->metric_actions, res->metric->name)) != NULL) { + while (cur) { + act = cur->data; + if (score >= act->score) { + r = act->action; + sel = act; + } + cur = g_list_next (cur); + } + } + } + + if (sel != NULL && result != NULL) { + *result = r; + return TRUE; + } + + return FALSE; +} + +gboolean +apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res) +{ + struct rspamd_settings *us = NULL, *ds = NULL; + + if (check_setting (task, &us, &ds)) { + if (us != NULL || ds != NULL) { + if (us != NULL) { + res->user_settings = settings_ref (us); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref, + us); + } + if (ds != NULL) { + /* Need to ref hash table to avoid occasional data corruption */ + res->domain_settings = settings_ref (ds); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref, + ds); + } + } + else { + return FALSE; + } + } + + return TRUE; +} + +gboolean +check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor) +{ + double *fc; + + if (res->user_settings != NULL) { + /* First search in user's settings */ + if ((fc = g_hash_table_lookup (res->user_settings->factors, symbol)) != NULL) { + *factor = *fc; + return TRUE; + } + /* Now check in domain settings */ + if (res->domain_settings && (fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) { + *factor = *fc; + return TRUE; + } + } + else if (res->domain_settings != NULL) { + if ((fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) { + *factor = *fc; + return TRUE; + } + } + + return FALSE; + +} + + +gboolean +check_want_spam (struct rspamd_task *task) +{ + struct rspamd_settings *us = NULL, *ds = NULL; + + if (check_setting (task, &us, &ds)) { + if (us != NULL) { + /* First search in user's settings */ + if (us->want_spam) { + return TRUE; + } + /* Now check in domain settings */ + if (ds && ds->want_spam) { + return TRUE; + } + } + else if (ds != NULL) { + if (ds->want_spam) { + return TRUE; + } + } + } + + return FALSE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/settings.h b/src/libserver/settings.h new file mode 100644 index 000000000..361700094 --- /dev/null +++ b/src/libserver/settings.h @@ -0,0 +1,55 @@ +#ifndef RSPAMD_SETTINGS_H +#define RSPAMD_SETTINGS_H + +#include "config.h" +#include "main.h" + +struct rspamd_settings { + GHashTable *metric_scores; /**< hash table of metric require scores for this setting */ + GHashTable *reject_scores; /**< hash table of metric reject scores for this setting */ + GHashTable *metric_actions; /**< hash table of metric actions for this setting */ + GHashTable *factors; /**< hash table of new factors for this setting */ + GHashTable *whitelist; /**< hash table of whitelist for this setting */ + GHashTable *blacklist; /**< hash table of whitelist for this setting */ + gchar *statfile_alias; /**< alias for statfile used */ + gboolean want_spam; /**< if true disable rspamd checks */ + gint ref_count; /**< reference counter */ +}; + + +/* + * Read settings from specified path + */ +gboolean read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable *table); + +/* + * Init configuration structures for settings + */ +void init_settings (struct config_file *cfg); + +/* + * Check scores settings + */ +gboolean check_metric_settings (struct metric_result *res, double *score, double *rscore); + +/* + * Check actions settings + */ +gboolean check_metric_action_settings (struct rspamd_task *task, struct metric_result *res, double score, enum rspamd_metric_action *result); + +/* + * Check individual weights for settings + */ +gboolean check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor); + +/* + * Check want_spam flag + */ +gboolean check_want_spam (struct rspamd_task *task); + +/* + * Search settings for metric and store pointers to settings into metric_result structure + */ +gboolean apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res); + +#endif diff --git a/src/libserver/spf.c b/src/libserver/spf.c new file mode 100644 index 000000000..12f1513d4 --- /dev/null +++ b/src/libserver/spf.c @@ -0,0 +1,1465 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "dns.h" +#include "spf.h" +#include "main.h" +#include "message.h" +#include "filter.h" + +#define SPF_VER1_STR "v=spf1" +#define SPF_VER2_STR "spf2." +#define SPF_SCOPE_PRA "pra" +#define SPF_SCOPE_MFROM "mfrom" +#define SPF_ALL "all" +#define SPF_A "a" +#define SPF_IP4 "ip4" +#define SPF_IP6 "ip6" +#define SPF_PTR "ptr" +#define SPF_MX "mx" +#define SPF_EXISTS "exists" +#define SPF_INCLUDE "include" +#define SPF_REDIRECT "redirect" +#define SPF_EXP "exp" + +/** SPF limits for avoiding abuse **/ +#define SPF_MAX_NESTING 10 +#define SPF_MAX_DNS_REQUESTS 30 + +/** + * State machine for SPF record: + * + * spf_mech ::= +|-|~|? + * + * spf_body ::= spf=v1 [] + * spf_command ::= [spf_mech]all|a|||ptr|mx||| + * + * spf_domain ::= [:domain][/mask] + * spf_ip4 ::= ip[/mask] + * ip4 ::= ip4: + * mx ::= mx + * a ::= a + * ptr ::= ptr[:domain] + * exists ::= exists:domain + * include ::= include:domain + * redirect ::= redirect:domain + * exp ::= exp:domain + * + */ + +#undef SPF_DEBUG + +struct spf_dns_cb { + struct spf_record *rec; + struct spf_addr *addr; + spf_action_t cur_action; + gboolean in_include; +}; + +#define CHECK_REC(rec) \ +do { \ + if ((rec)->nested > SPF_MAX_NESTING || \ + (rec)->dns_requests > SPF_MAX_DNS_REQUESTS) { \ + msg_info ("<%s> spf recursion limit %d is reached, domain: %s", \ + (rec)->task->message_id, (rec)->dns_requests, \ + (rec)->sender_domain); \ + return FALSE; \ + } \ +} while (0) \ + +static gboolean parse_spf_record (struct rspamd_task *task, struct spf_record *rec); +static void start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl); + +/* Determine spf mech */ +static spf_mech_t +check_spf_mech (const gchar *elt, gboolean *need_shift) +{ + g_assert (elt != NULL); + + *need_shift = TRUE; + + switch (*elt) { + case '-': + return SPF_FAIL; + case '~': + return SPF_SOFT_FAIL; + case '+': + return SPF_PASS; + case '?': + return SPF_NEUTRAL; + default: + *need_shift = FALSE; + return SPF_PASS; + } +} + +/* Debugging function that dumps spf record in log */ +static void +dump_spf_record (GList *addrs) +{ + struct spf_addr *addr; + GList *cur; + gint r = 0; + gchar logbuf[BUFSIZ], c; +#ifdef HAVE_INET_PTON + gchar ipbuf[INET6_ADDRSTRLEN]; +#else + struct in_addr ina; +#endif + + cur = addrs; + + while (cur) { + addr = cur->data; + if (!addr->is_list) { + switch (addr->mech) { + case SPF_FAIL: + c = '-'; + break; + case SPF_SOFT_FAIL: + case SPF_NEUTRAL: + c = '~'; + break; + case SPF_PASS: + c = '+'; + break; + } +#ifdef HAVE_INET_PTON + if (addr->data.normal.ipv6) { + inet_ntop (AF_INET6, &addr->data.normal.d.in6, ipbuf, sizeof (ipbuf)); + + } + else { + inet_ntop (AF_INET, &addr->data.normal.d.in4, ipbuf, sizeof (ipbuf)); + } + r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, ipbuf, addr->data.normal.mask); +#else + ina.s_addr = addr->data.normal.d.in4.s_addr; + r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, inet_ntoa (ina), addr->data.normal.mask); +#endif + } + else { + r += snprintf (logbuf + r, sizeof (logbuf) - r, "%s; ", addr->spf_string); + dump_spf_record (addr->data.list); + } + cur = g_list_next (cur); + } + msg_info ("spf record: %s", logbuf); +} + +/* Find position of address inside addrs list */ +static GList * +spf_addr_find (GList *addrs, gpointer to_find) +{ + struct spf_addr *addr; + GList *cur, *res = NULL; + + cur = addrs; + while (cur) { + addr = cur->data; + if (addr->is_list) { + if ((res = spf_addr_find (addr->data.list, to_find)) != NULL) { + return cur; + } + } + else { + if (cur->data == to_find) { + return cur; + } + } + cur = g_list_next (cur); + } + + return res; +} + +/* + * Destructor for spf record + */ +static void +spf_record_destructor (gpointer r) +{ + struct spf_record *rec = r; + GList *cur; + struct spf_addr *addr; + + if (rec->addrs) { + cur = rec->addrs; + while (cur) { + addr = cur->data; + if (addr->is_list && addr->data.list != NULL) { + g_list_free (addr->data.list); + } + cur = g_list_next (cur); + } + g_list_free (rec->addrs); + } +} + +static gboolean +parse_spf_ipmask (const gchar *begin, struct spf_addr *addr, struct spf_record *rec) +{ + const gchar *pos; + gchar mask_buf[5] = {'\0'}, *p; + gint state = 0, dots = 0; +#ifdef HAVE_INET_PTON + gchar ip_buf[INET6_ADDRSTRLEN]; +#else + gchar ip_buf[INET_ADDRSTRLEN]; +#endif + + bzero (ip_buf, sizeof (ip_buf)); + bzero (mask_buf, sizeof (mask_buf)); + pos = begin; + p = ip_buf; + + while (*pos) { + switch (state) { + case 0: + /* Require ':' */ + if (*pos != ':') { + msg_info ("<%s>: spf error for domain %s: semicolon missing", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + state = 1; + pos ++; + p = ip_buf; + dots = 0; + break; + case 1: +#ifdef HAVE_INET_PTON + if (p - ip_buf >= (gint)sizeof (ip_buf)) { + return FALSE; + } + if (g_ascii_isxdigit (*pos)) { + *p ++ = *pos ++; + } + else if (*pos == '.' || *pos == ':') { + *p ++ = *pos ++; + dots ++; + } +#else + /* Begin parse ip */ + if (p - ip_buf >= (gint)sizeof (ip_buf) || dots > 3) { + return FALSE; + } + if (g_ascii_isdigit (*pos)) { + *p ++ = *pos ++; + } + else if (*pos == '.') { + *p ++ = *pos ++; + dots ++; + } +#endif + else if (*pos == '/') { + pos ++; + p = mask_buf; + state = 2; + } + else { + /* Invalid character */ + msg_info ("<%s>: spf error for domain %s: invalid ip address", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + break; + case 2: + /* Parse mask */ + if (p - mask_buf >= (gint)sizeof (mask_buf)) { + msg_info ("<%s>: spf error for domain %s: too long mask", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + if (g_ascii_isdigit (*pos)) { + *p ++ = *pos ++; + } + else { + return FALSE; + } + break; + } + } + +#ifdef HAVE_INET_PTON + if (inet_pton (AF_INET, ip_buf, &addr->data.normal.d.in4) != 1) { + if (inet_pton (AF_INET6, ip_buf, &addr->data.normal.d.in6) == 1) { + addr->data.normal.ipv6 = TRUE; + } + else { + msg_info ("<%s>: spf error for domain %s: invalid ip address", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + } + else { + addr->data.normal.ipv6 = FALSE; + } +#else + if (!inet_aton (ip_buf, &addr->data.normal.d.in4)) { + return FALSE; + } +#endif + if (state == 2) { + /* Also parse mask */ + if (!addr->data.normal.ipv6) { + addr->data.normal.mask = strtoul (mask_buf, NULL, 10); + if (addr->data.normal.mask > 32) { + msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'", + rec->task->message_id, rec->sender_domain, begin); + return FALSE; + } + } + else { + addr->data.normal.mask = strtoul (mask_buf, NULL, 10); + if (addr->data.normal.mask > 128) { + msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'", + rec->task->message_id, rec->sender_domain, begin); + return FALSE; + } + } + } + else { + addr->data.normal.mask = addr->data.normal.ipv6 ? 128 : 32; + } + addr->data.normal.parsed = TRUE; + return TRUE; + +} + +static gchar * +parse_spf_hostmask (struct rspamd_task *task, const gchar *begin, struct spf_addr *addr, struct spf_record *rec) +{ + gchar *host = NULL, *p, mask_buf[3]; + gint hostlen; + + bzero (mask_buf, sizeof (mask_buf)); + if (*begin == '\0' || *begin == '/') { + /* Assume host as host to resolve from record */ + host = rec->cur_domain; + } + p = strchr (begin, '/'); + if (p != NULL) { + /* Extract mask */ + rspamd_strlcpy (mask_buf, p + 1, sizeof (mask_buf)); + addr->data.normal.mask = strtoul (mask_buf, NULL, 10); + if (addr->data.normal.mask > 32) { + msg_info ("<%s>: spf error for domain %s: too long mask", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + if (host == NULL) { + hostlen = p - begin; + host = rspamd_mempool_alloc (task->task_pool, hostlen); + rspamd_strlcpy (host, begin, hostlen); + } + } + else { + addr->data.normal.mask = 32; + if (host == NULL) { + host = rspamd_mempool_strdup (task->task_pool, begin); + } + } + + return host; +} + +static void +spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) +{ + struct spf_dns_cb *cb = arg; + gchar *begin; + struct rdns_reply_entry *elt_data; + GList *tmp = NULL; + struct rspamd_task *task; + struct spf_addr *new_addr; + + task = cb->rec->task; + + cb->rec->requests_inflight --; + + if (reply->code == RDNS_RC_NOERROR) { + /* Add all logic for all DNS states here */ + LL_FOREACH (reply->entries, elt_data) { + switch (cb->cur_action) { + case SPF_RESOLVE_MX: + if (elt_data->type == RDNS_REQUEST_MX) { + /* Now resolve A record for this MX */ + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, elt_data->content.mx.name)) { + task->dns_requests ++; + cb->rec->requests_inflight ++; + } + } + else if (elt_data->type == RDNS_REQUEST_A) { + if (!cb->addr->data.normal.parsed) { + cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + } + else { + /* Insert one more address */ + tmp = spf_addr_find (cb->rec->addrs, cb->addr); + if (tmp) { + new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); + memcpy (new_addr, cb->addr, sizeof (struct spf_addr)); + new_addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + new_addr->data.normal.parsed = TRUE; + cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr); + } + else { + msg_info ("<%s>: spf error for domain %s: addresses mismatch", + task->message_id, cb->rec->sender_domain); + } + } + + } +#ifdef HAVE_INET_PTON + else if (elt_data->type == RDNS_REQUEST_AAAA) { + if (!cb->addr->data.normal.parsed) { + memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + cb->addr->data.normal.ipv6 = TRUE; + } + else { + /* Insert one more address */ + tmp = spf_addr_find (cb->rec->addrs, cb->addr); + if (tmp) { + new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); + memcpy (new_addr, cb->addr, sizeof (struct spf_addr)); + memcpy (&new_addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + new_addr->data.normal.parsed = TRUE; + new_addr->data.normal.ipv6 = TRUE; + cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr); + } + else { + msg_info ("<%s>: spf error for domain %s: addresses mismatch", + task->message_id, cb->rec->sender_domain); + } + } + + } +#endif + break; + case SPF_RESOLVE_A: + if (elt_data->type == RDNS_REQUEST_A) { + /* XXX: process only one record */ + cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + } +#ifdef HAVE_INET_PTON + else if (elt_data->type == RDNS_REQUEST_AAAA) { + memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + cb->addr->data.normal.ipv6 = TRUE; + } +#endif + break; +#ifdef HAVE_INET_PTON + case SPF_RESOLVE_AAA: + if (elt_data->type == RDNS_REQUEST_A) { + /* XXX: process only one record */ + cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + } + else if (elt_data->type == RDNS_REQUEST_AAAA) { + memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + cb->addr->data.normal.ipv6 = TRUE; + } +#endif + break; + case SPF_RESOLVE_PTR: + break; + case SPF_RESOLVE_REDIRECT: + if (elt_data->type == RDNS_REQUEST_TXT) { + begin = elt_data->content.txt.data; + + if (!cb->in_include && cb->rec->addrs) { + g_list_free (cb->rec->addrs); + cb->rec->addrs = NULL; + } + start_spf_parse (cb->rec, begin, elt_data->ttl); + + } + break; + case SPF_RESOLVE_INCLUDE: + if (elt_data->type == RDNS_REQUEST_TXT) { + begin = elt_data->content.txt.data; +#ifdef SPF_DEBUG + msg_info ("before include"); + dump_spf_record (cb->rec->addrs); +#endif + tmp = cb->rec->addrs; + cb->rec->addrs = NULL; + cb->rec->in_include = TRUE; + start_spf_parse (cb->rec, begin, 0); + cb->rec->in_include = FALSE; + +#ifdef SPF_DEBUG + msg_info ("after include"); + dump_spf_record (cb->rec->addrs); +#endif + /* Insert new list */ + cb->addr->is_list = TRUE; + cb->addr->data.list = cb->rec->addrs; + cb->rec->addrs = tmp; + } + break; + case SPF_RESOLVE_EXP: + break; + case SPF_RESOLVE_EXISTS: + if (elt_data->type == RDNS_REQUEST_A) { + /* If specified address resolves, we can accept connection from every IP */ + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 0; + } + break; + } + } + } + else if (reply->code == RDNS_RC_NXDOMAIN) { + switch (cb->cur_action) { + case SPF_RESOLVE_MX: + if (rdns_request_has_type (reply->request, RDNS_REQUEST_MX)) { + msg_info ("<%s>: spf error for domain %s: cannot find MX record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + } + else { + msg_info ("<%s>: spf error for domain %s: cannot resolve MX record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + } + break; + case SPF_RESOLVE_A: + if (rdns_request_has_type (reply->request, RDNS_REQUEST_A)) { + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + } + break; +#ifdef HAVE_INET_PTON + case SPF_RESOLVE_AAA: + if (rdns_request_has_type (reply->request, RDNS_REQUEST_AAAA)) { + memset (&cb->addr->data.normal.d.in6, 0xff, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + } + break; +#endif + case SPF_RESOLVE_PTR: + break; + case SPF_RESOLVE_REDIRECT: + msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + break; + case SPF_RESOLVE_INCLUDE: + msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + break; + case SPF_RESOLVE_EXP: + break; + case SPF_RESOLVE_EXISTS: + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + break; + } + } + + if (cb->rec->requests_inflight == 0) { + cb->rec->callback (cb->rec, cb->rec->task); + } +} + +static gboolean +parse_spf_a (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *host; + + CHECK_REC (rec); + + if (begin == NULL || *begin != ':') { + return FALSE; + } + begin ++; + + host = parse_spf_hostmask (task, begin, addr, rec); + + if (!host) { + return FALSE; + } + rec->dns_requests ++; + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_A; + cb->in_include = rec->in_include; + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) { + task->dns_requests ++; + rec->requests_inflight ++; + return TRUE; + } + + return FALSE; + +} + +static gboolean +parse_spf_ptr (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + CHECK_REC (rec); + + msg_info ("<%s>: spf error for domain %s: ptr elements are not implemented", + rec->task->message_id, rec->sender_domain); + return FALSE; +} + +static gboolean +parse_spf_mx (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *host; + + CHECK_REC (rec); + + if (begin == NULL) { + return FALSE; + } + if (*begin == ':') { + begin ++; + } + + host = parse_spf_hostmask (task, begin, addr, rec); + + if (!host) { + return FALSE; + } + rec->dns_requests ++; + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + memset (&addr->data.normal, 0, sizeof (addr->data.normal)); + cb->cur_action = SPF_RESOLVE_MX; + cb->in_include = rec->in_include; + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_MX, host)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + return FALSE; +} + +static gboolean +parse_spf_all (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + /* All is 0/0 */ + memset (&addr->data.normal.d, 0, sizeof (addr->data.normal.d)); + if (rec->in_include) { + /* Ignore all record in include */ + addr->data.normal.mask = 32; + } + else { + addr->data.normal.mask = 0; + addr->data.normal.addr_any = TRUE; + } + + return TRUE; +} + +static gboolean +parse_spf_ip4 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + /* ip4:addr[/mask] */ + + CHECK_REC (rec); + return parse_spf_ipmask (begin, addr, rec); +} + +#ifdef HAVE_INET_PTON +static gboolean +parse_spf_ip6 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + /* ip6:addr[/mask] */ + + CHECK_REC (rec); + return parse_spf_ipmask (begin, addr, rec); +} +#endif + +static gboolean +parse_spf_include (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *domain; + + CHECK_REC (rec); + + if (begin == NULL || *begin != ':') { + return FALSE; + } + begin ++; + rec->dns_requests ++; + + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_INCLUDE; + cb->in_include = rec->in_include; + addr->is_list = TRUE; + addr->data.list = NULL; + domain = rspamd_mempool_strdup (task->task_pool, begin); + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + + return FALSE; +} + +static gboolean +parse_spf_exp (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + CHECK_REC (rec); + + msg_info ("exp record is ignored"); + return TRUE; +} + +static gboolean +parse_spf_redirect (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *domain; + + CHECK_REC (rec); + + if (begin == NULL || *begin != '=') { + return FALSE; + } + begin ++; + rec->dns_requests ++; + + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_REDIRECT; + cb->in_include = rec->in_include; + domain = rspamd_mempool_strdup (task->task_pool, begin); + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + return FALSE; +} + +static gboolean +parse_spf_exists (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *host; + + CHECK_REC (rec); + + if (begin == NULL || *begin != ':') { + return FALSE; + } + begin ++; + rec->dns_requests ++; + + addr->data.normal.mask = 32; + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_EXISTS; + cb->in_include = rec->in_include; + host = rspamd_mempool_strdup (task->task_pool, begin); + + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + return FALSE; +} + +static void +reverse_spf_ip (gchar *ip, gint len) +{ + gchar ipbuf[sizeof("255.255.255.255") - 1], *p, *c; + gint t = 0, l = len; + + if (len > (gint)sizeof (ipbuf)) { + msg_info ("cannot reverse string of length %d", len); + return; + } + + p = ipbuf + len; + c = ip; + while (-- l) { + if (*c == '.') { + memcpy (p, c - t, t); + *--p = '.'; + c ++; + t = 0; + continue; + } + + t ++; + c ++; + p --; + } + + memcpy (p - 1, c - t, t + 1); + + memcpy (ip, ipbuf, len); +} + +static gchar * +expand_spf_macro (struct rspamd_task *task, struct spf_record *rec, gchar *begin) +{ + gchar *p, *c, *new, *tmp; + gint len = 0, slen = 0, state = 0; +#ifdef HAVE_INET_PTON + gchar ip_buf[INET6_ADDRSTRLEN]; +#endif + gboolean need_expand = FALSE; + + p = begin; + /* Calculate length */ + while (*p) { + switch (state) { + case 0: + /* Skip any character and wait for % in input */ + if (*p == '%') { + state = 1; + } + else { + len ++; + } + + slen ++; + p ++; + break; + case 1: + /* We got % sign, so we should whether wait for { or for - or for _ or for % */ + if (*p == '%' || *p == '-') { + /* Just a single % sign or space */ + len ++; + } + else if (*p == '_') { + /* %20 */ + len += sizeof ("%20") - 1; + } + else if (*p == '{') { + state = 2; + } + else { + /* Something unknown */ + msg_info ("<%s>: spf error for domain %s: unknown spf element", + task->message_id, rec->sender_domain); + return begin; + } + p ++; + slen ++; + break; + case 2: + /* Read macro name */ + switch (g_ascii_tolower (*p)) { + case 'i': +#ifdef HAVE_INET_PTON + len += sizeof (INET6_ADDRSTRLEN) - 1; +#else + len += sizeof (INET_ADDRSTRLEN) - 1; +#endif + break; + case 's': + len += strlen (rec->sender); + break; + case 'l': + len += strlen (rec->local_part); + break; + case 'o': + len += strlen (rec->sender_domain); + break; + case 'd': + len += strlen (rec->cur_domain); + break; + case 'v': + len += sizeof ("in-addr") - 1; + break; + case 'h': + if (task->helo) { + len += strlen (task->helo); + } + break; + default: + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + slen ++; + state = 3; + break; + case 3: + /* Read modifier */ + if (*p == '}') { + state = 0; + need_expand = TRUE; + } + else if (*p != 'r' && !g_ascii_isdigit (*p)) { + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf modifier %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + slen ++; + break; + } + } + + if (!need_expand) { + /* No expansion needed */ + return begin; + } + + new = rspamd_mempool_alloc (task->task_pool, len + 1); + + c = new; + p = begin; + state = 0; + /* Begin macro expansion */ + + while (*p) { + switch (state) { + case 0: + /* Skip any character and wait for % in input */ + if (*p == '%') { + state = 1; + } + else { + *c = *p; + c ++; + } + + p ++; + break; + case 1: + /* We got % sign, so we should whether wait for { or for - or for _ or for % */ + if (*p == '%') { + /* Just a single % sign or space */ + *c++ = '%'; + } + else if (*p == '-') { + *c++ = ' '; + } + else if (*p == '_') { + /* %20 */ + *c++ = '%'; + *c++ = '2'; + *c++ = '0'; + } + else if (*p == '{') { + state = 2; + } + else { + /* Something unknown */ + msg_info ("<%s>: spf error for domain %s: unknown spf element", + task->message_id, rec->sender_domain); + return begin; + } + p ++; + break; + case 2: + /* Read macro name */ + switch (g_ascii_tolower (*p)) { + case 'i': +#ifdef HAVE_INET_PTON + len = rspamd_strlcpy (ip_buf, + rspamd_inet_address_to_string (&task->from_addr), + sizeof (ip_buf)); + memcpy (c, ip_buf, len); +#else + tmp = inet_ntoa (task->from_addr); + len = strlen (tmp); + memcpy (c, tmp, len); +#endif + c += len; + break; + case 's': + len = strlen (rec->sender); + memcpy (c, rec->sender, len); + c += len; + break; + case 'l': + len = strlen (rec->local_part); + memcpy (c, rec->local_part, len); + c += len; + break; + case 'o': + len = strlen (rec->sender_domain); + memcpy (c, rec->sender_domain, len); + c += len; + break; + case 'd': + len = strlen (rec->cur_domain); + memcpy (c, rec->cur_domain, len); + c += len; + break; + case 'v': + len = sizeof ("in-addr") - 1; + memcpy (c, "in-addr", len); + c += len; + break; + case 'h': + if (task->helo) { + tmp = strchr (task->helo, '@'); + if (tmp) { + len = strlen (tmp + 1); + memcpy (c, tmp + 1, len); + c += len; + } + } + break; + default: + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + state = 3; + break; + case 3: + /* Read modifier */ + if (*p == '}') { + state = 0; + } + else if (*p == 'r' && len != 0) { + reverse_spf_ip (c - len, len); + len = 0; + } + else if (g_ascii_isdigit (*p)) { + /*XXX: try to implement domain strimming */ + } + else { + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + break; + } + } + /* Null terminate */ + *c = '\0'; + + return new; + +} + +#define NEW_ADDR(x) do { \ + (x) = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); \ + (x)->mech = check_spf_mech (rec->cur_elt, &need_shift); \ + (x)->spf_string = rspamd_mempool_strdup (task->task_pool, begin); \ + memset (&(x)->data.normal, 0, sizeof ((x)->data.normal)); \ + (x)->data.normal.mask = 32; \ + (x)->is_list = FALSE; \ +} while (0); + +/* Read current element and try to parse record */ +static gboolean +parse_spf_record (struct rspamd_task *task, struct spf_record *rec) +{ + struct spf_addr *new = NULL; + gboolean need_shift, res = FALSE; + gchar *begin; + + rec->cur_elt = rec->elts[rec->elt_num]; + if (rec->cur_elt == NULL) { + return FALSE; + } + else if (*rec->cur_elt == '\0') { + /* Silently skip empty elements */ + rec->elt_num ++; + return TRUE; + } + else { + begin = expand_spf_macro (task, rec, rec->cur_elt); + if (*begin == '?' || *begin == '+' || *begin == '-' || *begin == '~') { + begin ++; + } + + + /* Now check what we have */ + switch (g_ascii_tolower (*begin)) { + case 'a': + /* all or a */ + if (g_ascii_strncasecmp (begin, SPF_ALL, sizeof (SPF_ALL) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_ALL) - 1; + res = parse_spf_all (task, begin, rec, new); + } + else if (g_ascii_strncasecmp (begin, SPF_A, sizeof (SPF_A) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_A) - 1; + res = parse_spf_a (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'i': + /* include or ip4 */ + if (g_ascii_strncasecmp (begin, SPF_IP4, sizeof (SPF_IP4) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_IP4) - 1; + res = parse_spf_ip4 (task, begin, rec, new); + } + else if (g_ascii_strncasecmp (begin, SPF_INCLUDE, sizeof (SPF_INCLUDE) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_INCLUDE) - 1; + res = parse_spf_include (task, begin, rec, new); + } + else if (g_ascii_strncasecmp (begin, SPF_IP6, sizeof (SPF_IP6) - 1) == 0) { +#ifdef HAVE_INET_PTON + NEW_ADDR (new); + begin += sizeof (SPF_IP6) - 1; + res = parse_spf_ip6 (task, begin, rec, new); +#else + msg_info ("ignoring ip6 spf command as IPv6 is not supported: %s", begin); + new = NULL; + res = TRUE; + begin += sizeof (SPF_IP6) - 1; +#endif + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'm': + /* mx */ + if (g_ascii_strncasecmp (begin, SPF_MX, sizeof (SPF_MX) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_MX) - 1; + res = parse_spf_mx (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'p': + /* ptr */ + if (g_ascii_strncasecmp (begin, SPF_PTR, sizeof (SPF_PTR) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_PTR) - 1; + res = parse_spf_ptr (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'e': + /* exp or exists */ + if (g_ascii_strncasecmp (begin, SPF_EXP, sizeof (SPF_EXP) - 1) == 0) { + begin += sizeof (SPF_EXP) - 1; + res = parse_spf_exp (task, begin, rec, NULL); + } + else if (g_ascii_strncasecmp (begin, SPF_EXISTS, sizeof (SPF_EXISTS) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_EXISTS) - 1; + res = parse_spf_exists (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'r': + /* redirect */ + if (g_ascii_strncasecmp (begin, SPF_REDIRECT, sizeof (SPF_REDIRECT) - 1) == 0) { + begin += sizeof (SPF_REDIRECT) - 1; + res = parse_spf_redirect (task, begin, rec, NULL); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'v': + if (g_ascii_strncasecmp (begin, "v=spf", sizeof ("v=spf") - 1) == 0) { + /* Skip this element till the end of record */ + while (*begin && !g_ascii_isspace (*begin)) { + begin ++; + } + } + break; + default: + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + break; + } + if (res) { + if (new != NULL) { + rec->addrs = g_list_prepend (rec->addrs, new); + } + rec->elt_num ++; + } + } + + return res; +} +#undef NEW_ADDR + +static void +parse_spf_scopes (struct spf_record *rec, gchar **begin) +{ + for (;;) { + if (g_ascii_strncasecmp (*begin, SPF_SCOPE_PRA, sizeof (SPF_SCOPE_PRA) - 1) == 0) { + *begin += sizeof (SPF_SCOPE_PRA) - 1; + /* XXX: Implement actual PRA check */ + /* extract_pra_info (rec); */ + continue; + } + else if (g_ascii_strncasecmp (*begin, SPF_SCOPE_MFROM, sizeof (SPF_SCOPE_MFROM) - 1) == 0) { + /* mfrom is standart spf1 check */ + *begin += sizeof (SPF_SCOPE_MFROM) - 1; + continue; + } + else if (**begin != ',') { + break; + } + (*begin) ++; + } +} + +static void +start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl) +{ + /* Skip spaces */ + while (g_ascii_isspace (*begin)) { + begin ++; + } + + if (g_ascii_strncasecmp (begin, SPF_VER1_STR, sizeof (SPF_VER1_STR) - 1) == 0) { + begin += sizeof (SPF_VER1_STR) - 1; + while (g_ascii_isspace (*begin) && *begin) { + begin ++; + } + rec->elts = g_strsplit_set (begin, " ", 0); + rec->elt_num = 0; + if (rec->elts) { + rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts); + rec->cur_elt = rec->elts[0]; + while (parse_spf_record (rec->task, rec)); + if (ttl != 0) { + rec->ttl = ttl; + } + } + } + else if (g_ascii_strncasecmp (begin, SPF_VER2_STR, sizeof (SPF_VER2_STR) - 1) == 0) { + /* Skip one number of record, so no we are here spf2.0/ */ + begin += sizeof (SPF_VER2_STR); + if (*begin != '/') { + msg_info ("<%s>: spf error for domain %s: sender id is invalid", + rec->task->message_id, rec->sender_domain); + } + else { + begin ++; + parse_spf_scopes (rec, &begin); + } + /* Now common spf record */ + while (g_ascii_isspace (*begin) && *begin) { + begin ++; + } + rec->elts = g_strsplit_set (begin, " ", 0); + rec->elt_num = 0; + if (rec->elts) { + rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts); + rec->cur_elt = rec->elts[0]; + while (parse_spf_record (rec->task, rec)); + if (ttl != 0) { + rec->ttl = ttl; + } + } + } + else { + msg_debug ("<%s>: spf error for domain %s: bad spf record version: %*s", + rec->task->message_id, rec->sender_domain, sizeof (SPF_VER1_STR) - 1, begin); + } +} + +static void +spf_dns_callback (struct rdns_reply *reply, gpointer arg) +{ + struct spf_record *rec = arg; + struct rdns_reply_entry *elt; + + rec->requests_inflight --; + if (reply->code == RDNS_RC_NOERROR) { + LL_FOREACH (reply->entries, elt) { + start_spf_parse (rec, elt->content.txt.data, elt->ttl); + } + } + + if (rec->requests_inflight == 0) { + rec->callback (rec, rec->task); + } +} + +gchar * +get_spf_domain (struct rspamd_task *task) +{ + gchar *domain, *res = NULL; + GList *domains; + + if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') { + res = rspamd_mempool_strdup (task->task_pool, domain + 1); + if ((domain = strchr (res, '>')) != NULL) { + *domain = '\0'; + } + } + else { + /* Extract from header */ + domains = message_get_header (task->task_pool, task->message, "From", FALSE); + + if (domains != NULL) { + res = rspamd_mempool_strdup (task->task_pool, domains->data); + + if ((domain = strrchr (res, '@')) == NULL) { + g_list_free (domains); + return NULL; + } + res = rspamd_mempool_strdup (task->task_pool, domain + 1); + g_list_free (domains); + + if ((domain = strchr (res, '>')) != NULL) { + *domain = '\0'; + } + } + } + + return res; +} + +gboolean +resolve_spf (struct rspamd_task *task, spf_cb_t callback) +{ + struct spf_record *rec; + gchar *domain; + GList *domains; + + rec = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct spf_record)); + rec->task = task; + rec->callback = callback; + /* Add destructor */ + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)spf_record_destructor, rec); + + /* Extract from data */ + if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') { + rec->sender = task->from; + + rec->local_part = rspamd_mempool_strdup (task->task_pool, task->from); + *(rec->local_part + (domain - task->from)) = '\0'; + if (*rec->local_part == '<') { + memmove (rec->local_part, rec->local_part + 1, strlen (rec->local_part)); + } + rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domain + 1); + if ((domain = strchr (rec->cur_domain, '>')) != NULL) { + *domain = '\0'; + } + rec->sender_domain = rec->cur_domain; + + if (make_dns_request (task->resolver, task->s, task->task_pool, spf_dns_callback, + (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + return TRUE; + } + } + else { + /* Extract from header */ + domains = message_get_header (task->task_pool, task->message, "From", FALSE); + + if (domains != NULL) { + rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domains->data); + g_list_free (domains); + + if ((domain = strrchr (rec->cur_domain, '@')) == NULL) { + return FALSE; + } + rec->sender = rspamd_mempool_strdup (task->task_pool, rec->cur_domain); + rec->local_part = rec->cur_domain; + *domain = '\0'; + rec->cur_domain = domain + 1; + + if ((domain = strchr (rec->local_part, '<')) != NULL) { + memmove (rec->local_part, domain + 1, strlen (domain)); + } + + if ((domain = strchr (rec->cur_domain, '>')) != NULL) { + *domain = '\0'; + } + rec->sender_domain = rec->cur_domain; + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_dns_callback, (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + return TRUE; + } + } + } + + return FALSE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/spf.h b/src/libserver/spf.h new file mode 100644 index 000000000..94c613e42 --- /dev/null +++ b/src/libserver/spf.h @@ -0,0 +1,84 @@ +#ifndef RSPAMD_SPF_H +#define RSPAMD_SPF_H + +#include "config.h" + +struct rspamd_task; +struct spf_record; + +typedef void (*spf_cb_t)(struct spf_record *record, struct rspamd_task *task); + +typedef enum spf_mech_e { + SPF_FAIL, + SPF_SOFT_FAIL, + SPF_PASS, + SPF_NEUTRAL +} spf_mech_t; + +typedef enum spf_action_e { + SPF_RESOLVE_MX, + SPF_RESOLVE_A, + SPF_RESOLVE_PTR, + SPF_RESOLVE_AAA, + SPF_RESOLVE_REDIRECT, + SPF_RESOLVE_INCLUDE, + SPF_RESOLVE_EXISTS, + SPF_RESOLVE_EXP +} spf_action_t; + +struct spf_addr { + union { + struct { + union { + struct in_addr in4; +#ifdef HAVE_INET_PTON + struct in6_addr in6; +#endif + } d; + guint32 mask; + gboolean ipv6; + gboolean parsed; + gboolean addr_any; + } normal; + GList *list; + } data; + gboolean is_list; + spf_mech_t mech; + gchar *spf_string; +}; + +struct spf_record { + gchar **elts; + + gchar *cur_elt; + gint elt_num; + gint nested; + gint dns_requests; + gint requests_inflight; + + guint ttl; + + GList *addrs; + gchar *cur_domain; + gchar *sender; + gchar *sender_domain; + gchar *local_part; + struct rspamd_task *task; + spf_cb_t callback; + + gboolean in_include; +}; + + +/* + * Resolve spf record for specified task and call a callback after resolution fails/succeed + */ +gboolean resolve_spf (struct rspamd_task *task, spf_cb_t callback); + +/* + * Get a domain for spf for specified task + */ +gchar *get_spf_domain (struct rspamd_task *task); + + +#endif diff --git a/src/libserver/statfile.c b/src/libserver/statfile.c new file mode 100644 index 000000000..4c1cc13fb --- /dev/null +++ b/src/libserver/statfile.c @@ -0,0 +1,927 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "statfile.h" +#include "main.h" + +#define RSPAMD_STATFILE_VERSION {'1', '2'} +#define BACKUP_SUFFIX ".old" + +/* Maximum number of statistics files */ +#define STATFILES_MAX 255 +static void statfile_pool_set_block_common ( + statfile_pool_t * pool, stat_file_t * file, + guint32 h1, guint32 h2, + time_t t, double value, + gboolean from_now); + +static gint +cmpstatfile (const void *a, const void *b) +{ + const stat_file_t *s1 = a, *s2 = b; + + return g_ascii_strcasecmp (s1->filename, s2->filename); +} + +/* Convert statfile version 1.0 to statfile version 1.2, saving backup */ +struct stat_file_header_10 { + u_char magic[3]; /**< magic signature ('r' 's' 'd') */ + u_char version[2]; /**< version of statfile */ + u_char padding[3]; /**< padding */ + guint64 create_time; /**< create time (time_t->guint64) */ +}; + +static gboolean +convert_statfile_10 (stat_file_t * file) +{ + gchar *backup_name; + struct stat st; + struct stat_file_header header = { + .magic = {'r', 's', 'd'}, + .version = RSPAMD_STATFILE_VERSION, + .padding = {0, 0, 0}, + .revision = 0, + .rev_time = 0 + }; + + + /* Format backup name */ + backup_name = g_strdup_printf ("%s.%s", file->filename, BACKUP_SUFFIX); + + msg_info ("convert old statfile %s to version %c.%c, backup in %s", file->filename, + header.version[0], header.version[1], backup_name); + + if (stat (backup_name, &st) != -1) { + msg_info ("replace old %s", backup_name); + unlink (backup_name); + } + + rename (file->filename, backup_name); + g_free (backup_name); + + /* XXX: maybe race condition here */ + unlock_file (file->fd, FALSE); + close (file->fd); + if ((file->fd = open (file->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + lock_file (file->fd, FALSE); + /* Now make new header and copy it to new file */ + if (write (file->fd, &header, sizeof (header)) == -1) { + msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + /* Now write old map to new file */ + if (write (file->fd, ((u_char *)file->map + sizeof (struct stat_file_header_10)), + file->len - sizeof (struct stat_file_header_10)) == -1) { + msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + /* Unmap old memory and map new */ + munmap (file->map, file->len); + file->len = file->len + sizeof (struct stat_file_header) - sizeof (struct stat_file_header_10); +#ifdef HAVE_MMAP_NOCORE + if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NOCORE, file->fd, 0)) == MAP_FAILED) { +#else + if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == MAP_FAILED) { +#endif + msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + + return TRUE; +} + +/* Check whether specified file is statistic file and calculate its len in blocks */ +static gint +statfile_pool_check (stat_file_t * file) +{ + struct stat_file *f; + gchar *c; + static gchar valid_version[] = RSPAMD_STATFILE_VERSION; + + + if (!file || !file->map) { + return -1; + } + + if (file->len < sizeof (struct stat_file)) { + msg_info ("file %s is too short to be stat file: %z", file->filename, file->len); + return -1; + } + + f = (struct stat_file *)file->map; + c = f->header.magic; + /* Check magic and version */ + if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') { + msg_info ("file %s is invalid stat file", file->filename); + return -1; + } + /* Now check version and convert old version to new one (that can be used for sync */ + if (*c == 1 && *(c + 1) == 0) { + if (!convert_statfile_10 (file)) { + return -1; + } + f = (struct stat_file *)file->map; + } + else if (memcmp (c, valid_version, sizeof (valid_version)) != 0) { + /* Unknown version */ + msg_info ("file %s has invalid version %c.%c", file->filename, '0' + *c, '0' + *(c + 1)); + return -1; + } + + /* Check first section and set new offset */ + file->cur_section.code = f->section.code; + file->cur_section.length = f->section.length; + if (file->cur_section.length * sizeof (struct stat_file_block) > file->len) { + msg_info ("file %s is truncated: %z, must be %z", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block)); + return -1; + } + file->seek_pos = sizeof (struct stat_file) - sizeof (struct stat_file_block); + + return 0; +} + + +statfile_pool_t * +statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock) +{ + statfile_pool_t *new; + + new = rspamd_mempool_alloc0 (pool, sizeof (statfile_pool_t)); + new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + new->files = rspamd_mempool_alloc0 (new->pool, STATFILES_MAX * sizeof (stat_file_t)); + new->lock = rspamd_mempool_get_mutex (new->pool); + new->mlock_ok = use_mlock; + + return new; +} + +static stat_file_t * +statfile_pool_reindex (statfile_pool_t * pool, gchar *filename, size_t old_size, size_t size) +{ + gchar *backup; + gint fd; + stat_file_t *new; + u_char *map, *pos; + struct stat_file_block *block; + struct stat_file_header *header; + + if (size < + sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) { + msg_err ("file %s is too small to carry any statistic: %z", filename, size); + return NULL; + } + + /* First of all rename old file */ + rspamd_mempool_lock_mutex (pool->lock); + + backup = g_strconcat (filename, ".old", NULL); + if (rename (filename, backup) == -1) { + msg_err ("cannot rename %s to %s: %s", filename, backup, strerror (errno)); + g_free (backup); + rspamd_mempool_unlock_mutex (pool->lock); + return NULL; + } + + rspamd_mempool_unlock_mutex (pool->lock); + + /* Now create new file with required size */ + if (statfile_pool_create (pool, filename, size) != 0) { + msg_err ("cannot create new file"); + g_free (backup); + return NULL; + } + /* Now open new file and start copying */ + fd = open (backup, O_RDONLY); + new = statfile_pool_open (pool, filename, size, TRUE); + + if (fd == -1 || new == NULL) { + msg_err ("cannot open file: %s", strerror (errno)); + g_free (backup); + return NULL; + } + + /* Now start reading blocks from old statfile */ + if ((map = mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap file: %s", strerror (errno)); + close (fd); + g_free (backup); + return NULL; + } + + pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block)); + while (old_size - (pos - map) >= sizeof (struct stat_file_block)) { + block = (struct stat_file_block *)pos; + if (block->hash1 != 0 && block->value != 0) { + statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, 0, block->value, FALSE); + } + pos += sizeof (block); + } + + header = (struct stat_file_header *)map; + statfile_set_revision (new, header->revision, header->rev_time); + + munmap (map, old_size); + close (fd); + unlink (backup); + g_free (backup); + + return new; + +} + +/* + * Pre-load mmaped file into memory + */ +static void +statfile_preload (stat_file_t *file) +{ + guint8 *pos, *end; + volatile guint8 t; + gsize size; + + pos = (guint8 *)file->map; + end = (guint8 *)file->map + file->len; + + if (madvise (pos, end - pos, MADV_SEQUENTIAL) == -1) { + msg_info ("madvise failed: %s", strerror (errno)); + } + else { + /* Load pages of file */ +#ifdef HAVE_GETPAGESIZE + size = getpagesize (); +#else + size = sysconf (_SC_PAGESIZE); +#endif + while (pos < end) { + t = *pos; + (void)t; + pos += size; + } + } +} + +stat_file_t * +statfile_pool_open (statfile_pool_t * pool, gchar *filename, size_t size, gboolean forced) +{ + struct stat st; + stat_file_t *new_file; + + if ((new_file = statfile_pool_is_open (pool, filename)) != NULL) { + return new_file; + } + + if (pool->opened >= STATFILES_MAX - 1) { + msg_err ("reached hard coded limit of statfiles opened: %d", STATFILES_MAX); + return NULL; + } + + if (stat (filename, &st) == -1) { + msg_info ("cannot stat file %s, error %s, %d", filename, strerror (errno), errno); + return NULL; + } + + rspamd_mempool_lock_mutex (pool->lock); + if (!forced && labs (size - st.st_size) > (long)sizeof (struct stat_file) * 2 + && size > sizeof (struct stat_file)) { + rspamd_mempool_unlock_mutex (pool->lock); + msg_warn ("need to reindex statfile old size: %Hz, new size: %Hz", (size_t)st.st_size, size); + return statfile_pool_reindex (pool, filename, st.st_size, size); + } + else if (size < sizeof (struct stat_file)) { + msg_err ("requested to shrink statfile to %Hz but it is too small", size); + } + + new_file = &pool->files[pool->opened++]; + bzero (new_file, sizeof (stat_file_t)); + if ((new_file->fd = open (filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno)); + rspamd_mempool_unlock_mutex (pool->lock); + pool->opened--; + return NULL; + } + + if ((new_file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, new_file->fd, 0)) == MAP_FAILED) { + close (new_file->fd); + rspamd_mempool_unlock_mutex (pool->lock); + msg_info ("cannot mmap file %s, error %d, %s", filename, errno, strerror (errno)); + pool->opened--; + return NULL; + + } + + rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename)); + new_file->len = st.st_size; + /* Try to lock pages in RAM */ + if (pool->mlock_ok) { + if (mlock (new_file->map, new_file->len) == -1) { + msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno)); + pool->mlock_ok = FALSE; + } + } + /* Acquire lock for this operation */ + lock_file (new_file->fd, FALSE); + if (statfile_pool_check (new_file) == -1) { + pool->opened--; + rspamd_mempool_unlock_mutex (pool->lock); + unlock_file (new_file->fd, FALSE); + munmap (new_file->map, st.st_size); + return NULL; + } + unlock_file (new_file->fd, FALSE); + + new_file->open_time = time (NULL); + new_file->access_time = new_file->open_time; + new_file->lock = rspamd_mempool_get_mutex (pool->pool); + + statfile_preload (new_file); + + rspamd_mempool_unlock_mutex (pool->lock); + + return statfile_pool_is_open (pool, filename); +} + +gint +statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_sorted) +{ + stat_file_t *pos; + + if ((pos = statfile_pool_is_open (pool, file->filename)) == NULL) { + msg_info ("file %s is not opened", file->filename); + return -1; + } + + rspamd_mempool_lock_mutex (pool->lock); + + if (file->map) { + msg_info ("syncing statfile %s", file->filename); + msync (file->map, file->len, MS_ASYNC); + munmap (file->map, file->len); + } + if (file->fd != -1) { + close (file->fd); + } + /* Move the remain statfiles */ + memmove (pos, ((guint8 *)pos) + sizeof (stat_file_t), + (--pool->opened - (pos - pool->files)) * sizeof (stat_file_t)); + + rspamd_mempool_unlock_mutex (pool->lock); + + return 0; +} + +gint +statfile_pool_create (statfile_pool_t * pool, gchar *filename, size_t size) +{ + struct stat_file_header header = { + .magic = {'r', 's', 'd'}, + .version = RSPAMD_STATFILE_VERSION, + .padding = {0, 0, 0}, + .revision = 0, + .rev_time = 0, + .used_blocks = 0 + }; + struct stat_file_section section = { + .code = STATFILE_SECTION_COMMON, + }; + struct stat_file_block block = { 0, 0, 0 }; + gint fd; + guint buflen = 0, nblocks; + gchar *buf = NULL; + + if (statfile_pool_is_open (pool, filename) != NULL) { + msg_info ("file %s is already opened", filename); + return 0; + } + + if (size < + sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) { + msg_err ("file %s is too small to carry any statistic: %z", filename, size); + return -1; + } + + rspamd_mempool_lock_mutex (pool->lock); + nblocks = (size - sizeof (struct stat_file_header) - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block); + header.total_blocks = nblocks; + + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + rspamd_fallocate (fd, 0, sizeof (header) + sizeof (section) + sizeof (block) * nblocks); + + header.create_time = (guint64) time (NULL); + if (write (fd, &header, sizeof (header)) == -1) { + msg_info ("cannot write header to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + section.length = (guint64) nblocks; + if (write (fd, §ion, sizeof (section)) == -1) { + msg_info ("cannot write section header to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + /* Buffer for write 256 blocks at once */ + if (nblocks > 256) { + buflen = sizeof (block) * 256; + buf = g_malloc0 (buflen); + } + + while (nblocks) { + if (nblocks > 256) { + /* Just write buffer */ + if (write (fd, buf, buflen) == -1) { + msg_info ("cannot write blocks buffer to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + g_free (buf); + return -1; + } + nblocks -= 256; + } + else { + if (write (fd, &block, sizeof (block)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + if (buf) { + g_free (buf); + } + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + nblocks --; + } + } + + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + + if (buf) { + g_free (buf); + } + + return 0; +} + +void +statfile_pool_delete (statfile_pool_t * pool) +{ + gint i; + + for (i = 0; i < pool->opened; i++) { + statfile_pool_close (pool, &pool->files[i], FALSE); + } + rspamd_mempool_delete (pool->pool); +} + +void +statfile_pool_lock_file (statfile_pool_t * pool, stat_file_t * file) +{ + + rspamd_mempool_lock_mutex (file->lock); +} + +void +statfile_pool_unlock_file (statfile_pool_t * pool, stat_file_t * file) +{ + + rspamd_mempool_unlock_mutex (file->lock); +} + +double +statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now) +{ + struct stat_file_block *block; + guint i, blocknum; + u_char *c; + + + file->access_time = now; + if (!file->map) { + return 0; + } + + blocknum = h1 % file->cur_section.length; + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + + for (i = 0; i < CHAIN_LENGTH; i++) { + if (i + blocknum >= file->cur_section.length) { + break; + } + if (block->hash1 == h1 && block->hash2 == h2) { + return block->value; + } + c += sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + + return 0; +} + +static void +statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t t, double value, gboolean from_now) +{ + struct stat_file_block *block, *to_expire = NULL; + struct stat_file_header *header; + guint i, blocknum; + u_char *c; + double min = G_MAXDOUBLE; + + if (from_now) { + file->access_time = t; + } + if (!file->map) { + return; + } + + blocknum = h1 % file->cur_section.length; + header = (struct stat_file_header *)file->map; + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + + for (i = 0; i < CHAIN_LENGTH; i++) { + if (i + blocknum >= file->cur_section.length) { + /* Need to expire some block in chain */ + msg_info ("chain %ud is full in statfile %s, starting expire", blocknum, file->filename); + break; + } + /* First try to find block in chain */ + if (block->hash1 == h1 && block->hash2 == h2) { + block->value = value; + return; + } + /* Check whether we have a free block in chain */ + if (block->hash1 == 0 && block->hash2 == 0) { + /* Write new block here */ + msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", i, blocknum, h1, h2); + block->hash1 = h1; + block->hash2 = h2; + block->value = value; + header->used_blocks ++; + + return; + } + + /* Expire block with minimum value otherwise */ + if (block->value < min) { + to_expire = block; + min = block->value; + } + c += sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + /* Try expire some block */ + if (to_expire) { + block = to_expire; + } + else { + /* Expire first block in chain */ + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + block->hash1 = h1; + block->hash2 = h2; + block->value = value; +} + +void +statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now, double value) +{ + statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE); +} + +stat_file_t * +statfile_pool_is_open (statfile_pool_t * pool, gchar *filename) +{ + static stat_file_t f, *ret; + rspamd_strlcpy (f.filename, filename, sizeof (f.filename)); + ret = lfind (&f, pool->files, (size_t *)&pool->opened, sizeof (stat_file_t), cmpstatfile); + return ret; +} + +guint32 +statfile_pool_get_section (statfile_pool_t * pool, stat_file_t * file) +{ + + return file->cur_section.code; +} + +gboolean +statfile_pool_set_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, gboolean from_begin) +{ + struct stat_file_section *sec; + off_t cur_offset; + + + /* Try to find section */ + if (from_begin) { + cur_offset = sizeof (struct stat_file_header); + } + else { + cur_offset = file->seek_pos - sizeof (struct stat_file_section); + } + while (cur_offset < (off_t)file->len) { + sec = (struct stat_file_section *)((gchar *)file->map + cur_offset); + if (sec->code == code) { + file->cur_section.code = code; + file->cur_section.length = sec->length; + file->seek_pos = cur_offset + sizeof (struct stat_file_section); + return TRUE; + } + cur_offset += sec->length; + } + + return FALSE; +} + +gboolean +statfile_pool_add_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, guint64 length) +{ + struct stat_file_section sect; + struct stat_file_block block = { 0, 0, 0 }; + + if (lseek (file->fd, 0, SEEK_END) == -1) { + msg_info ("cannot lseek file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + + sect.code = code; + sect.length = length; + + if (write (file->fd, §, sizeof (sect)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + + while (length--) { + if (write (file->fd, &block, sizeof (block)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + } + + /* Lock statfile to remap memory */ + statfile_pool_lock_file (pool, file); + munmap (file->map, file->len); + fsync (file->fd); + file->len += length; + + if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == NULL) { + msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + statfile_pool_unlock_file (pool, file); + + return TRUE; + +} + +guint32 +statfile_get_section_by_name (const gchar *name) +{ + if (g_ascii_strcasecmp (name, "common") == 0) { + return STATFILE_SECTION_COMMON; + } + else if (g_ascii_strcasecmp (name, "header") == 0) { + return STATFILE_SECTION_HEADERS; + } + else if (g_ascii_strcasecmp (name, "url") == 0) { + return STATFILE_SECTION_URLS; + } + else if (g_ascii_strcasecmp (name, "regexp") == 0) { + return STATFILE_SECTION_REGEXP; + } + + return 0; +} + +gboolean +statfile_set_revision (stat_file_t *file, guint64 rev, time_t time) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + header->revision = rev; + header->rev_time = time; + + return TRUE; +} + +gboolean +statfile_inc_revision (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + header->revision ++; + + return TRUE; +} + +gboolean +statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + if (rev != NULL) { + *rev = header->revision; + } + if (time != NULL) { + *time = header->rev_time; + } + + return TRUE; +} + +guint64 +statfile_get_used_blocks (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return (guint64)-1; + } + + header = (struct stat_file_header *)file->map; + + return header->used_blocks; +} + +guint64 +statfile_get_total_blocks (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return (guint64)-1; + } + + header = (struct stat_file_header *)file->map; + + /* If total blocks is 0 we have old version of header, so set total blocks correctly */ + if (header->total_blocks == 0) { + header->total_blocks = file->cur_section.length; + } + + return header->total_blocks; +} + +static void +statfile_pool_invalidate_callback (gint fd, short what, void *ud) +{ + statfile_pool_t *pool = ud; + stat_file_t *file; + gint i; + + msg_info ("invalidating %d statfiles", pool->opened); + + for (i = 0; i < pool->opened; i ++) { + file = &pool->files[i]; + msync (file->map, file->len, MS_ASYNC); + } + +} + + +void +statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter) +{ + gboolean pending; + + + if (pool->invalidate_event != NULL) { + pending = evtimer_pending (pool->invalidate_event, NULL); + if (pending) { + /* Replan event */ + pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); + pool->invalidate_tv.tv_usec = 0; + evtimer_add (pool->invalidate_event, &pool->invalidate_tv); + } + } + else { + pool->invalidate_event = rspamd_mempool_alloc (pool->pool, sizeof (struct event)); + pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); + pool->invalidate_tv.tv_usec = 0; + evtimer_set (pool->invalidate_event, statfile_pool_invalidate_callback, pool); + evtimer_add (pool->invalidate_event, &pool->invalidate_tv); + msg_info ("invalidate of statfile pool is planned in %d seconds", (gint)pool->invalidate_tv.tv_sec); + } +} + + +stat_file_t * +get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, + const gchar *symbol, struct statfile **st, gboolean try_create) +{ + stat_file_t *res = NULL; + GList *cur; + + if (pool == NULL || ccf == NULL || symbol == NULL) { + msg_err ("invalid input arguments"); + return NULL; + } + + cur = g_list_first (ccf->statfiles); + while (cur) { + *st = cur->data; + if (strcmp (symbol, (*st)->symbol) == 0) { + break; + } + *st = NULL; + cur = g_list_next (cur); + } + if (*st == NULL) { + msg_info ("cannot find statfile with symbol %s", symbol); + return NULL; + } + + if ((res = statfile_pool_is_open (pool, (*st)->path)) == NULL) { + if ((res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE)) == NULL) { + msg_warn ("cannot open %s", (*st)->path); + if (try_create) { + if (statfile_pool_create (pool, (*st)->path, (*st)->size) == -1) { + msg_err ("cannot create statfile %s", (*st)->path); + return NULL; + } + res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE); + if (res == NULL) { + msg_err ("cannot open statfile %s after creation", (*st)->path); + } + } + } + } + + return res; +} + +void +statfile_pool_lockall (statfile_pool_t *pool) +{ + stat_file_t *file; + gint i; + + if (pool->mlock_ok) { + for (i = 0; i < pool->opened; i ++) { + file = &pool->files[i]; + if (mlock (file->map, file->len) == -1) { + msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno)); + pool->mlock_ok = FALSE; + return; + } + } + } + /* Do not try to lock if mlock failed */ +} + diff --git a/src/libserver/statfile.h b/src/libserver/statfile.h new file mode 100644 index 000000000..5786c4927 --- /dev/null +++ b/src/libserver/statfile.h @@ -0,0 +1,284 @@ +/** + * @file statfile.h + * Describes common methods for accessing statistics files and caching them in memory + */ + +#ifndef RSPAMD_STATFILE_H +#define RSPAMD_STATFILE_H + +#include "config.h" +#include "mem_pool.h" +#include "hash.h" + +#define CHAIN_LENGTH 128 + +/* Section types */ +#define STATFILE_SECTION_COMMON 1 +#define STATFILE_SECTION_HEADERS 2 +#define STATFILE_SECTION_URLS 3 +#define STATFILE_SECTION_REGEXP 4 + +#define DEFAULT_STATFILE_INVALIDATE_TIME 30 +#define DEFAULT_STATFILE_INVALIDATE_JITTER 30 + +/** + * Common statfile header + */ +struct stat_file_header { + u_char magic[3]; /**< magic signature ('r' 's' 'd') */ + u_char version[2]; /**< version of statfile */ + u_char padding[3]; /**< padding */ + guint64 create_time; /**< create time (time_t->guint64) */ + guint64 revision; /**< revision number */ + guint64 rev_time; /**< revision time */ + guint64 used_blocks; /**< used blocks number */ + guint64 total_blocks; /**< total number of blocks */ + u_char unused[239]; /**< some bytes that can be used in future */ +}; + +/** + * Section header + */ +struct stat_file_section { + guint64 code; /**< section's code */ + guint64 length; /**< section's length in blocks */ +}; + +/** + * Block of data in statfile + */ +struct stat_file_block { + guint32 hash1; /**< hash1 (also acts as index) */ + guint32 hash2; /**< hash2 */ + double value; /**< double value */ +}; + +/** + * Statistic file + */ +struct stat_file { + struct stat_file_header header; /**< header */ + struct stat_file_section section; /**< first section */ + struct stat_file_block blocks[1]; /**< first block of data */ +}; + +/** + * Common view of statfile object + */ +typedef struct stat_file_s { +#ifdef HAVE_PATH_MAX + gchar filename[PATH_MAX]; /**< name of file */ +#else + gchar filename[MAXPATHLEN]; /**< name of file */ +#endif + gint fd; /**< descriptor */ + void *map; /**< mmaped area */ + off_t seek_pos; /**< current seek position */ + struct stat_file_section cur_section; /**< current section */ + time_t open_time; /**< time when file was opened */ + time_t access_time; /**< last access time */ + size_t len; /**< length of file(in bytes) */ + rspamd_mempool_mutex_t *lock; /**< mutex */ +} stat_file_t; + +/** + * Statfiles pool + */ +typedef struct statfile_pool_s { + stat_file_t *files; /**< hash table of opened files indexed by name */ + void **maps; /**< shared hash table of mmaped areas indexed by name */ + gint opened; /**< number of opened files */ + rspamd_mempool_t *pool; /**< memory pool object */ + rspamd_mempool_mutex_t *lock; /**< mutex */ + struct event *invalidate_event; /**< event for pool invalidation */ + struct timeval invalidate_tv; + gboolean mlock_ok; /**< whether it is possible to use mlock (2) to avoid statfiles unloading */ +} statfile_pool_t; + +/* Forwarded declarations */ +struct classifier_config; +struct statfile; + +/** + * Create new statfile pool + * @param max_size maximum size + * @return statfile pool object + */ +statfile_pool_t* statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock); + +/** + * Open statfile and attach it to pool + * @param pool statfile pool object + * @param filename name of statfile to open + * @return 0 if specified statfile is attached and -1 in case of error + */ +stat_file_t* statfile_pool_open (statfile_pool_t *pool, gchar *filename, size_t len, gboolean forced); + +/** + * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching + * @param pool statfile pool object + * @param filename name of statfile to create + * @param len length of new statfile + * @return 0 if file was created and -1 in case of error + */ +gint statfile_pool_create (statfile_pool_t *pool, gchar *filename, size_t len); + +/** + * Close specified statfile + * @param pool statfile pool object + * @param filename name of statfile to close + * @param remove_hash remove filename from opened files hash also + * @return 0 if file was closed and -1 if statfile was not opened + */ +gint statfile_pool_close (statfile_pool_t *pool, stat_file_t *file, gboolean keep_sorted); + +/** + * Delete statfile pool and close all attached statfiles + * @param pool statfile pool object + */ +void statfile_pool_delete (statfile_pool_t *pool); + +/** + * Try to lock all statfiles in memory + * @param pool statfile pool object + */ +void statfile_pool_lockall (statfile_pool_t *pool); + +/** + * Lock specified file for exclusive use (eg. learning) + * @param pool statfile pool object + * @param filename name of statfile + */ +void statfile_pool_lock_file (statfile_pool_t *pool, stat_file_t *file); + +/** + * Unlock specified file + * @param pool statfile pool object + * @param filename name of statfile + */ +void statfile_pool_unlock_file (statfile_pool_t *pool, stat_file_t *file); + +/** + * Get block from statfile with h1 and h2 values, use time argument for current time + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @return block value or 0 if block is not found + */ +double statfile_pool_get_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now); + +/** + * Set specified block in statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @param value value of block + */ +void statfile_pool_set_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now, double value); + +/** + * Check whether statfile is opened + * @param pool statfile pool object + * @param filename name of statfile + * @return TRUE if specified statfile is opened and FALSE otherwise + */ +stat_file_t* statfile_pool_is_open (statfile_pool_t *pool, gchar *filename); + +/** + * Returns current statfile section + * @param pool statfile pool object + * @param filename name of statfile + * @return code of section or 0 if file is not opened + */ +guint32 statfile_pool_get_section (statfile_pool_t *pool, stat_file_t *file); + +/** + * Go to other section of statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param code code of section to seek to + * @param from_begin search for section from begin of file if true + * @return TRUE if section was set and FALSE otherwise + */ +gboolean statfile_pool_set_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, gboolean from_begin); + +/** + * Add new section to statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param code code of section to seek to + * @param length length in blocks of new section + * @return TRUE if section was successfully added and FALSE in case of error + */ +gboolean statfile_pool_add_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, guint64 length); + + +/** + * Return code of section identified by name + * @param name name of section + * @return code of section or 0 if name of section is unknown + */ +guint32 statfile_get_section_by_name (const gchar *name); + +/** + * Set statfile revision and revision time + * @param filename name of statfile + * @param revision number of revision + * @param time time of revision + * @return TRUE if revision was set + */ +gboolean statfile_set_revision (stat_file_t *file, guint64 rev, time_t time); + +/** + * Increment statfile revision and revision time + * @param filename name of statfile + * @param time time of revision + * @return TRUE if revision was set + */ +gboolean statfile_inc_revision (stat_file_t *file); + +/** + * Set statfile revision and revision time + * @param filename name of statfile + * @param revision saved number of revision + * @param time saved time of revision + * @return TRUE if revision was saved in rev and time + */ +gboolean statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time); + +/** + * Get statfile used blocks + * @param file file to get number of used blocks + * @return number of used blocks or (guint64)-1 in case of error + */ +guint64 statfile_get_used_blocks (stat_file_t *file); + +/** + * Get statfile total blocks + * @param file file to get number of used blocks + * @return number of used blocks or (guint64)-1 in case of error + */ +guint64 statfile_get_total_blocks (stat_file_t *file); + + +/** + * Plan statfile pool invalidation + */ +void statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter); + +/** + * Get a statfile by symbol + * @param pool pool object + * @param ccf ccf classifier config + * @param symbol symbol to search + * @param st statfile to get + * @param try_create whether we need to create statfile if it is absent + */ +stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, + const gchar *symbol, struct statfile **st, gboolean try_create); + +#endif diff --git a/src/libserver/statfile_sync.c b/src/libserver/statfile_sync.c new file mode 100644 index 000000000..6b545af17 --- /dev/null +++ b/src/libserver/statfile_sync.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cfg_file.h" +#include "tokenizers/tokenizers.h" +#include "classifiers/classifiers.h" +#include "statfile.h" +#include "binlog.h" +#include "buffer.h" +#include "statfile_sync.h" + +enum rspamd_sync_state { + SYNC_STATE_GREETING, + SYNC_STATE_READ_LINE, + SYNC_STATE_READ_REV, + SYNC_STATE_QUIT, +}; + +/* Context of sync process */ +struct rspamd_sync_ctx { + struct statfile *st; + stat_file_t *real_statfile; + statfile_pool_t *pool; + rspamd_io_dispatcher_t *dispatcher; + struct event_base *ev_base; + + struct event tm_ev; + + struct timeval interval; + struct timeval io_tv; + gint sock; + guint32 timeout; + guint32 sync_interval; + enum rspamd_sync_state state; + gboolean is_busy; + + guint64 new_rev; + guint64 new_time; + guint64 new_len; +}; + +static void +log_next_sync (const gchar *symbol, time_t delay) +{ + gchar outstr[200]; + time_t t; + struct tm *tmp; + gint r; + + t = time(NULL); + t += delay; + tmp = localtime(&t); + + if (tmp) { + r = rspamd_snprintf (outstr, sizeof (outstr), "statfile_sync: next sync of %s at ", symbol); + if ((r = strftime(outstr + r, sizeof(outstr) - r, "%T", tmp)) != 0) { + msg_info (outstr); + } + } +} + +static gboolean +parse_revision_line (struct rspamd_sync_ctx *ctx, f_str_t *in) +{ + guint i, state = 0; + gchar *p, *c, numbuf[sizeof("18446744073709551615")]; + guint64 *val; + + /* First of all try to find END line */ + if (in->len >= sizeof ("END") - 1 && memcmp (in->begin, "END", sizeof ("END") - 1) == 0) { + ctx->state = SYNC_STATE_QUIT; + ctx->is_busy = FALSE; + return TRUE; + } + + /* Next check for error line */ + if (in->len >= sizeof ("FAIL") - 1 && memcmp (in->begin, "FAIL", sizeof ("FAIL") - 1) == 0) { + ctx->state = SYNC_STATE_QUIT; + ctx->is_busy = FALSE; + return TRUE; + } + + /* Now try to extract 3 numbers from string: revision, time and length */ + p = in->begin; + val = &ctx->new_rev; + for (i = 0; i < in->len; i ++, p ++) { + if (g_ascii_isspace (*p) || i == in->len - 1) { + if (state == 1) { + if (i == in->len - 1) { + /* One more character */ + p ++; + } + rspamd_strlcpy (numbuf, c, MIN (p - c + 1, (gint)sizeof (numbuf))); + errno = 0; + *val = strtoull (numbuf, NULL, 10); + if (errno != 0) { + msg_info ("cannot parse number %s", strerror (errno)); + return FALSE; + } + state = 2; + } + } + else { + if (state == 0) { + c = p; + state = 1; + } + else if (state == 2) { + if (val == &ctx->new_rev) { + val = &ctx->new_time; + } + else if (val == &ctx->new_time) { + val = &ctx->new_len; + } + c = p; + state = 1; + } + } + } + + /* Current value must be len value and its value must not be 0 */ + return ((val == &ctx->new_len)); +} + +static gboolean +read_blocks (struct rspamd_sync_ctx *ctx, f_str_t *in) +{ + struct rspamd_binlog_element *elt; + guint i; + + statfile_pool_lock_file (ctx->pool, ctx->real_statfile); + elt = (struct rspamd_binlog_element *)in->begin; + for (i = 0; i < in->len / sizeof (struct rspamd_binlog_element); i ++, elt ++) { + statfile_pool_set_block (ctx->pool, ctx->real_statfile, elt->h1, elt->h2, ctx->new_time, elt->value); + } + statfile_pool_unlock_file (ctx->pool, ctx->real_statfile); + + return TRUE; +} + +static gboolean +sync_read (f_str_t * in, void *arg) +{ + struct rspamd_sync_ctx *ctx = arg; + gchar buf[256]; + guint64 rev = 0; + time_t ti = 0; + + if (in->len == 0) { + /* Skip empty lines */ + return TRUE; + } + switch (ctx->state) { + case SYNC_STATE_GREETING: + /* Skip greeting line and write sync command */ + /* Write initial data */ + statfile_get_revision (ctx->real_statfile, &rev, &ti); + rev = rspamd_snprintf (buf, sizeof (buf), "sync %s %uL %T" CRLF, ctx->st->symbol, rev, ti); + ctx->state = SYNC_STATE_READ_LINE; + return rspamd_dispatcher_write (ctx->dispatcher, buf, rev, FALSE, FALSE); + break; + case SYNC_STATE_READ_LINE: + /* Try to parse line from server */ + if (!parse_revision_line (ctx, in)) { + msg_info ("cannot parse line of length %z: '%*s'", in->len, (gint)in->len, in->begin); + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + return FALSE; + } + else if (ctx->state != SYNC_STATE_QUIT) { + if (ctx->new_len > 0) { + ctx->state = SYNC_STATE_READ_REV; + rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_CHARACTER, ctx->new_len); + } + } + else { + /* Quit this session */ + msg_info ("sync ended for: %s", ctx->st->symbol); + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + /* Immediately return from callback */ + return FALSE; + } + break; + case SYNC_STATE_READ_REV: + /* In now contains all blocks of specified revision, so we can read them directly */ + if (!read_blocks (ctx, in)) { + msg_info ("cannot read blocks"); + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + return FALSE; + } + statfile_set_revision (ctx->real_statfile, ctx->new_rev, ctx->new_time); + msg_info ("set new revision: %uL, readed %z bytes", ctx->new_rev, in->len); + /* Now try to read other revision or END line */ + ctx->state = SYNC_STATE_READ_LINE; + rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_LINE, 0); + break; + case SYNC_STATE_QUIT: + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + return FALSE; + } + + return TRUE; +} + +static void +sync_err (GError *err, void *arg) +{ + struct rspamd_sync_ctx *ctx = arg; + + msg_info ("abnormally closing connection, error: %s", err->message); + ctx->is_busy = FALSE; + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); +} + + +static void +sync_timer_callback (gint fd, short what, void *ud) +{ + struct rspamd_sync_ctx *ctx = ud; + guint32 jittered_interval; + + /* Plan new event */ + evtimer_del (&ctx->tm_ev); + /* Add some jittering for synchronization */ + jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2); + msec_to_tv (jittered_interval, &ctx->interval); + evtimer_add (&ctx->tm_ev, &ctx->interval); + log_next_sync (ctx->st->symbol, ctx->interval.tv_sec); + + if (ctx->is_busy) { + /* Sync is in progress */ + msg_info ("syncronization process is in progress, do not start new one"); + return; + } + + if ((ctx->sock = make_universal_socket (ctx->st->binlog->master_addr, ctx->st->binlog->master_port, + SOCK_STREAM, TRUE, FALSE, TRUE)) == -1) { + msg_info ("cannot connect to %s", ctx->st->binlog->master_addr); + return; + } + /* Now create and activate dispatcher */ + msec_to_tv (ctx->timeout, &ctx->io_tv); + ctx->dispatcher = rspamd_create_dispatcher (ctx->ev_base, ctx->sock, BUFFER_LINE, sync_read, NULL, sync_err, &ctx->io_tv, ctx); + + ctx->state = SYNC_STATE_GREETING; + ctx->is_busy = TRUE; + + msg_info ("starting synchronization of %s", ctx->st->symbol); + +} + +static gboolean +add_statfile_watch (statfile_pool_t *pool, struct statfile *st, struct config_file *cfg, struct event_base *ev_base) +{ + struct rspamd_sync_ctx *ctx; + guint32 jittered_interval; + + if (st->binlog->master_addr != NULL) { + ctx = rspamd_mempool_alloc (pool->pool, sizeof (struct rspamd_sync_ctx)); + ctx->st = st; + ctx->timeout = cfg->statfile_sync_timeout; + ctx->sync_interval = cfg->statfile_sync_interval; + ctx->ev_base = ev_base; + /* Add some jittering for synchronization */ + jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2); + msec_to_tv (jittered_interval, &ctx->interval); + /* Open statfile and attach it to pool */ + if ((ctx->real_statfile = statfile_pool_is_open (pool, st->path)) == NULL) { + if ((ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { + msg_warn ("cannot open %s", st->path); + if (statfile_pool_create (pool, st->path, st->size) == -1) { + msg_err ("cannot create statfile %s", st->path); + return FALSE; + } + ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE); + } + } + /* Now plan event for it's future executing */ + evtimer_set (&ctx->tm_ev, sync_timer_callback, ctx); + event_base_set (ctx->ev_base, &ctx->tm_ev); + evtimer_add (&ctx->tm_ev, &ctx->interval); + log_next_sync (st->symbol, ctx->interval.tv_sec); + } + else { + msg_err ("cannot add statfile watch for statfile %s: no master defined", st->symbol); + return FALSE; + } + + return TRUE; +} + +gboolean +start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base) +{ + GList *cur, *l; + struct classifier_config *cl; + struct statfile *st; + + /* + * First of all walk through all classifiers and find those statfiles + * for which we should do sync (slave affinity) + */ + cur = cfg->classifiers; + while (cur) { + cl = cur->data; + l = cl->statfiles; + while (l) { + st = l->data; + if (st->binlog != NULL && st->binlog->affinity == AFFINITY_SLAVE) { + if (!add_statfile_watch (pool, st, cfg, ev_base)) { + return FALSE; + } + } + l = g_list_next (l); + } + cur = g_list_next (cur); + } + + return TRUE; +} diff --git a/src/libserver/statfile_sync.h b/src/libserver/statfile_sync.h new file mode 100644 index 000000000..b3abb8b91 --- /dev/null +++ b/src/libserver/statfile_sync.h @@ -0,0 +1,14 @@ +#ifndef RSPAMD_STATFILE_SYNC_H +#define RSPAMD_STATFILE_SYNC_H + +#include "config.h" +#include "main.h" +#include "statfile.h" +#include "cfg_file.h" + +/* + * Start synchronization of statfiles. Must be called after event_init as it adds events + */ +gboolean start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base); + +#endif diff --git a/src/libserver/symbols_cache.c b/src/libserver/symbols_cache.c new file mode 100644 index 000000000..dfca57c66 --- /dev/null +++ b/src/libserver/symbols_cache.c @@ -0,0 +1,1055 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "main.h" +#include "message.h" +#include "symbols_cache.h" +#include "cfg_file.h" + +#define WEIGHT_MULT 4.0 +#define FREQUENCY_MULT 10.0 +#define TIME_MULT -1.0 + +/* After which number of messages try to resort cache */ +#define MAX_USES 100 +/* + * Symbols cache utility functions + */ + +#define MIN_CACHE 17 + +static guint64 total_frequency = 0; +static guint32 nsymbols = 0; + +gint +cache_cmp (const void *p1, const void *p2) +{ + const struct cache_item *i1 = p1, *i2 = p2; + + return strcmp (i1->s->symbol, i2->s->symbol); +} + +gint +cache_logic_cmp (const void *p1, const void *p2) +{ + const struct cache_item *i1 = p1, *i2 = p2; + double w1, w2; + double weight1, weight2; + double f1 = 0, f2 = 0; + + if (i1->priority == 0 && i2->priority == 0) { + if (total_frequency > 0) { + f1 = ((double)i1->s->frequency * nsymbols) / (double)total_frequency; + f2 = ((double)i2->s->frequency * nsymbols) / (double)total_frequency; + } + weight1 = i1->metric_weight == 0 ? i1->s->weight : i1->metric_weight; + weight2 = i2->metric_weight == 0 ? i2->s->weight : i2->metric_weight; + w1 = abs (weight1) * WEIGHT_MULT + f1 * FREQUENCY_MULT + i1->s->avg_time * TIME_MULT; + w2 = abs (weight2) * WEIGHT_MULT + f2 * FREQUENCY_MULT + i2->s->avg_time * TIME_MULT; + } + else { + /* Strict sorting */ + w1 = abs (i1->priority); + w2 = abs (i2->priority); + } + + return (gint)w2 - w1; +} + +static GChecksum * +get_mem_cksum (struct symbols_cache *cache) +{ + GChecksum *result; + GList *cur, *l; + struct cache_item *item; + + result = g_checksum_new (G_CHECKSUM_SHA1); + + l = g_list_copy (cache->negative_items); + l = g_list_sort (l, cache_cmp); + cur = g_list_first (l); + while (cur) { + item = cur->data; + if (item->s->symbol[0] != '\0') { + g_checksum_update (result, item->s->symbol, strlen (item->s->symbol)); + } + cur = g_list_next (cur); + } + g_list_free (l); + + + l = g_list_copy (cache->static_items); + l = g_list_sort (l, cache_cmp); + cur = g_list_first (l); + while (cur) { + item = cur->data; + if (item->s->symbol[0] != '\0') { + g_checksum_update (result, item->s->symbol, strlen (item->s->symbol)); + } + total_frequency += item->s->frequency; + cur = g_list_next (cur); + } + g_list_free (l); + + return result; +} + +/* Sort items in logical order */ +static void +post_cache_init (struct symbols_cache *cache) +{ + GList *cur; + struct cache_item *item; + + total_frequency = 0; + nsymbols = cache->used_items; + cur = g_list_first (cache->negative_items); + while (cur) { + item = cur->data; + total_frequency += item->s->frequency; + cur = g_list_next (cur); + } + cur = g_list_first (cache->static_items); + while (cur) { + item = cur->data; + total_frequency += item->s->frequency; + cur = g_list_next (cur); + } + + cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp); + cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp); +} + +/* Unmap cache file */ +static void +unmap_cache_file (gpointer arg) +{ + struct symbols_cache *cache = arg; + + /* A bit ugly usage */ + munmap (cache->map, cache->used_items * sizeof (struct saved_cache_item)); +} + +static gboolean +mmap_cache_file (struct symbols_cache *cache, gint fd, rspamd_mempool_t *pool) +{ + guint8 *map; + gint i; + GList *cur; + struct cache_item *item; + + if (cache->used_items > 0) { + map = mmap (NULL, cache->used_items * sizeof (struct saved_cache_item), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + msg_err ("cannot mmap cache file: %d, %s", errno, strerror (errno)); + close (fd); + return FALSE; + } + /* Close descriptor as it would never be used */ + close (fd); + cache->map = map; + /* Now free old values for saved cache items and fill them with mmapped ones */ + i = 0; + cur = g_list_first (cache->negative_items); + while (cur) { + item = cur->data; + item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item)); + cur = g_list_next (cur); + i ++; + } + cur = g_list_first (cache->static_items); + while (cur) { + item = cur->data; + item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item)); + cur = g_list_next (cur); + i ++; + } + + post_cache_init (cache); + } + + return TRUE; +} + +/* Fd must be opened for writing, after creating file is mmapped */ +static gboolean +create_cache_file (struct symbols_cache *cache, const gchar *filename, gint fd, rspamd_mempool_t *pool) +{ + GChecksum *cksum; + u_char *digest; + gsize cklen; + GList *cur; + struct cache_item *item; + + /* Calculate checksum */ + cksum = get_mem_cksum (cache); + if (cksum == NULL) { + msg_err ("cannot calculate checksum for symbols"); + close (fd); + return FALSE; + } + + cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1); + digest = g_malloc (cklen); + + g_checksum_get_digest (cksum, digest, &cklen); + /* Now write data to file */ + cur = g_list_first (cache->negative_items); + while (cur) { + item = cur->data; + if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) { + msg_err ("cannot write to file %d, %s", errno, strerror (errno)); + close (fd); + g_checksum_free (cksum); + g_free (digest); + return FALSE; + } + cur = g_list_next (cur); + } + cur = g_list_first (cache->static_items); + while (cur) { + item = cur->data; + if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) { + msg_err ("cannot write to file %d, %s", errno, strerror (errno)); + close (fd); + g_checksum_free (cksum); + g_free (digest); + return FALSE; + } + cur = g_list_next (cur); + } + /* Write checksum */ + if (write (fd, digest, cklen) == -1) { + msg_err ("cannot write to file %d, %s", errno, strerror (errno)); + close (fd); + g_checksum_free (cksum); + g_free (digest); + return FALSE; + } + + close (fd); + g_checksum_free (cksum); + g_free (digest); + /* Reopen for reading */ + if ((fd = open (filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", errno, strerror (errno)); + return FALSE; + } + + return mmap_cache_file (cache, fd, pool); +} + +enum rspamd_symbol_type { + SYMBOL_TYPE_NORMAL, + SYMBOL_TYPE_VIRTUAL, + SYMBOL_TYPE_CALLBACK +}; + +static void +register_symbol_common (struct symbols_cache **cache, const gchar *name, double weight, gint priority, + symbol_func_t func, gpointer user_data, enum rspamd_symbol_type type) +{ + struct cache_item *item = NULL; + struct symbols_cache *pcache = *cache; + GList **target; + double *w; + + if (*cache == NULL) { + pcache = g_new0 (struct symbols_cache, 1); + *cache = pcache; + pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + pcache->items_by_symbol = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + } + + item = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct cache_item)); + item->s = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct saved_cache_item)); + rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); + item->func = func; + item->user_data = user_data; + item->priority = priority; + + switch (type) { + case SYMBOL_TYPE_NORMAL: + break; + case SYMBOL_TYPE_VIRTUAL: + item->is_virtual = TRUE; + break; + case SYMBOL_TYPE_CALLBACK: + item->is_callback = TRUE; + break; + } + + /* Handle weight using default metric */ + if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) { + item->s->weight = weight * (*w); + } + else { + item->s->weight = weight; + } + + /* If we have undefined priority determine list according to weight */ + if (priority == 0) { + if (item->s->weight > 0) { + target = &(*cache)->static_items; + } + else { + target = &(*cache)->negative_items; + } + } + else { + /* Items with more priority are called before items with less priority */ + if (priority < 0) { + target = &(*cache)->negative_items; + } + else { + target = &(*cache)->static_items; + } + } + + pcache->used_items++; + g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item); + msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name); + set_counter (item->s->symbol, 0); + + *target = g_list_prepend (*target, item); +} + +void +register_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_NORMAL); +} + +void +register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight) +{ + register_symbol_common (cache, name, weight, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL); +} + +void +register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_CALLBACK); +} + +void +register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight, gint priority, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, priority, func, user_data, SYMBOL_TYPE_CALLBACK); +} + +void +register_dynamic_symbol (rspamd_mempool_t *dynamic_pool, struct symbols_cache **cache, + const gchar *name, double weight, symbol_func_t func, + gpointer user_data, GList *networks) +{ + struct cache_item *item = NULL; + struct symbols_cache *pcache = *cache; + GList *t, *cur; + uintptr_t r; + double *w; + guint32 mask = 0xFFFFFFFF; + struct dynamic_map_item *it; + gint rr; + + if (*cache == NULL) { + pcache = g_new0 (struct symbols_cache, 1); + *cache = pcache; + pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + } + + item = rspamd_mempool_alloc0 (dynamic_pool, sizeof (struct cache_item)); + item->s = rspamd_mempool_alloc (dynamic_pool, sizeof (struct saved_cache_item)); + rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); + item->func = func; + item->user_data = user_data; + /* Handle weight using default metric */ + if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) { + item->s->weight = weight * (*w); + } + else { + item->s->weight = weight; + } + item->is_dynamic = TRUE; + item->priority = 0; + + pcache->used_items++; + msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name); + set_counter (item->s->symbol, 0); + + g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item); + + if (networks == NULL) { + pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item); + } + else { + if (pcache->dynamic_map == NULL) { + pcache->dynamic_map = radix_tree_create (); + pcache->negative_dynamic_map = radix_tree_create (); + } + cur = networks; + while (cur) { + it = cur->data; + mask = mask << (32 - it->mask); + r = ntohl (it->addr.s_addr & mask); + if (it->negative) { + /* For negatve items insert into list and into negative cache map */ + if ((r = radix32tree_find (pcache->negative_dynamic_map, r)) != RADIX_NO_VALUE) { + t = (GList *)((gpointer)r); + t = g_list_prepend (t, item); + /* Replace pointers in radix tree and in destructor function */ + rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t); + rr = radix32tree_replace (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + } + else { + t = g_list_prepend (NULL, item); + rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t); + rr = radix32tree_insert (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + else if (rr == 1) { + msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask); + } + } + /* Insert into list */ + pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item); + } + else { + if ((r = radix32tree_find (pcache->dynamic_map, r)) != RADIX_NO_VALUE) { + t = (GList *)((gpointer)r); + t = g_list_prepend (t, item); + /* Replace pointers in radix tree and in destructor function */ + rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t); + rr = radix32tree_replace (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + } + else { + t = g_list_prepend (NULL, item); + rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t); + rr = radix32tree_insert (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + else if (rr == 1) { + msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask); + } + } + } + cur = g_list_next (cur); + } + } +} + +void +remove_dynamic_rules (struct symbols_cache *cache) +{ + if (cache->dynamic_items) { + g_list_free (cache->dynamic_items); + cache->dynamic_items = NULL; + } + + if (cache->dynamic_map) { + radix_tree_free (cache->dynamic_map); + cache->dynamic_map = NULL; + } + if (cache->negative_dynamic_map) { + radix_tree_free (cache->negative_dynamic_map); + cache->negative_dynamic_map = NULL; + } +} + +static void +free_cache (gpointer arg) +{ + struct symbols_cache *cache = arg; + + if (cache->map != NULL) { + unmap_cache_file (cache); + } + + if (cache->static_items) { + g_list_free (cache->static_items); + } + if (cache->negative_items) { + g_list_free (cache->negative_items); + } + if (cache->dynamic_items) { + g_list_free (cache->dynamic_items); + } + if (cache->dynamic_map) { + radix_tree_free (cache->dynamic_map); + } + if (cache->negative_dynamic_map) { + radix_tree_free (cache->negative_dynamic_map); + } + g_hash_table_destroy (cache->items_by_symbol); + rspamd_mempool_delete (cache->static_pool); + + g_free (cache); +} + +gboolean +init_symbols_cache (rspamd_mempool_t * pool, struct symbols_cache *cache, struct config_file *cfg, + const gchar *filename, gboolean ignore_checksum) +{ + struct stat st; + gint fd; + GChecksum *cksum; + u_char *mem_sum, *file_sum; + gsize cklen; + gboolean res; + + if (cache == NULL) { + return FALSE; + } + + /* Init locking */ + cache->lock = rspamd_mempool_get_rwlock (pool); + + cache->cfg = cfg; + + /* Just in-memory cache */ + if (filename == NULL) { + post_cache_init (cache); + return TRUE; + } + + /* First of all try to stat file */ + if (stat (filename, &st) == -1) { + /* Check errno */ + if (errno == ENOENT) { + /* Try to create file */ + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + else { + return create_cache_file (cache, filename, fd, pool); + } + } + else { + msg_info ("cannot stat file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + } + else { + if ((fd = open (filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + } + + if (!ignore_checksum) { + /* Calculate checksum */ + cksum = get_mem_cksum (cache); + if (cksum == NULL) { + msg_err ("cannot calculate checksum for symbols"); + close (fd); + return FALSE; + } + + cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1); + mem_sum = g_malloc (cklen); + + g_checksum_get_digest (cksum, mem_sum, &cklen); + /* Now try to read file sum */ + if (lseek (fd, -(cklen), SEEK_END) == -1) { + if (errno == EINVAL) { + /* Try to create file */ + msg_info ("recreate cache file"); + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + else { + return create_cache_file (cache, filename, fd, pool); + } + } + close (fd); + g_free (mem_sum); + g_checksum_free (cksum); + msg_err ("cannot seek to read checksum, %d, %s", errno, strerror (errno)); + return FALSE; + } + file_sum = g_malloc (cklen); + if (read (fd, file_sum, cklen) == -1) { + close (fd); + g_free (mem_sum); + g_free (file_sum); + g_checksum_free (cksum); + msg_err ("cannot read checksum, %d, %s", errno, strerror (errno)); + return FALSE; + } + + if (memcmp (file_sum, mem_sum, cklen) != 0) { + close (fd); + g_free (mem_sum); + g_free (file_sum); + g_checksum_free (cksum); + msg_info ("checksum mismatch, recreating file"); + /* Reopen with rw permissions */ + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + else { + return create_cache_file (cache, filename, fd, pool); + } + } + + g_free (mem_sum); + g_free (file_sum); + g_checksum_free (cksum); + } + /* MMap cache file and copy saved_cache structures */ + res = mmap_cache_file (cache, fd, pool); + + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t)free_cache, cache); + + return res; +} + +static GList * +check_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache) +{ +#ifdef HAVE_INET_PTON + /* TODO: radix doesn't support ipv6 addrs */ + return NULL; +#else + GList *res = NULL; + uintptr_t r; + if (cache->dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) { + if ((r = radix32tree_find (cache->dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) { + res = (GList *)((gpointer)r); + return res; + } + else { + return NULL; + } + } + return res; +#endif +} + +static gboolean +check_negative_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache, struct cache_item *item) +{ + +#ifdef HAVE_INET_PTON + /* TODO: radix doesn't support ipv6 addrs */ + return FALSE; +#else + GList *res = NULL; + uintptr_t r; + + if (cache->negative_dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) { + if ((r = radix32tree_find (cache->negative_dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) { + res = (GList *)((gpointer)r); + while (res) { + if (res->data == (gpointer)item) { + return TRUE; + } + res = g_list_next (res); + } + } + } + return FALSE; +#endif + +} + +static gboolean +check_debug_symbol (struct config_file *cfg, const gchar *symbol) +{ + GList *cur; + + cur = cfg->debug_symbols; + while (cur) { + if (strcmp (symbol, (const gchar *)cur->data) == 0) { + return TRUE; + } + cur = g_list_next (cur); + } + + return FALSE; +} + +static void +rspamd_symbols_cache_metric_cb (gpointer k, gpointer v, gpointer ud) +{ + struct symbols_cache *cache = (struct symbols_cache *)ud; + GList *cur; + const gchar *sym = k; + gdouble weight = *(gdouble *)v; + struct cache_item *item; + + cur = cache->negative_items; + while (cur) { + item = cur->data; + if (strcmp (item->s->symbol, sym) == 0) { + item->metric_weight = weight; + return; + } + cur = g_list_next (cur); + } + cur = cache->static_items; + while (cur) { + item = cur->data; + if (strcmp (item->s->symbol, sym) == 0) { + item->metric_weight = weight; + return; + } + cur = g_list_next (cur); + } +} + +gboolean +validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict) +{ + struct cache_item *item; + GList *cur, *p, *metric_symbols; + gboolean res; + + if (cache == NULL) { + msg_err ("empty cache is invalid"); + return FALSE; + } + + /* Check each symbol in a cache and find its weight definition */ + cur = cache->negative_items; + while (cur) { + item = cur->data; + if (!item->is_callback) { + if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { + if (strict) { + msg_warn ("no weight registered for symbol %s", item->s->symbol); + return FALSE; + } + else { + msg_info ("no weight registered for symbol %s", item->s->symbol); + } + } + } + cur = g_list_next (cur); + } + cur = cache->static_items; + while (cur) { + item = cur->data; + if (!item->is_callback) { + if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { + if (strict) { + msg_warn ("no weight registered for symbol %s", item->s->symbol); + return FALSE; + } + else { + msg_info ("no weight registered for symbol %s", item->s->symbol); + } + } + } + cur = g_list_next (cur); + } +#ifndef GLIB_HASH_COMPAT + /* Now check each metric item and find corresponding symbol in a cache */ + metric_symbols = g_hash_table_get_keys (cfg->metrics_symbols); + cur = metric_symbols; + while (cur) { + res = FALSE; + p = cache->negative_items; + while (p) { + item = p->data; + if (strcmp (item->s->symbol, cur->data) == 0) { + res = TRUE; + break; + } + p = g_list_next (p); + } + if (!res) { + p = cache->static_items; + while (p) { + item = p->data; + if (strcmp (item->s->symbol, cur->data) == 0) { + res = TRUE; + break; + } + p = g_list_next (p); + } + } + if (!res) { + msg_warn ("symbol '%s' is registered in metric but not found in cache", cur->data); + if (strict) { + return FALSE; + } + } + cur = g_list_next (cur); + } + g_list_free (metric_symbols); +#endif /* GLIB_COMPAT */ + + /* Now adjust symbol weights according to default metric */ + if (cfg->default_metric != NULL) { + g_hash_table_foreach (cfg->default_metric->symbols, rspamd_symbols_cache_metric_cb, cache); + /* Resort caches */ + cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp); + cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp); + } + + return TRUE; +} + +struct symbol_callback_data { + enum { + CACHE_STATE_NEGATIVE, + CACHE_STATE_DYNAMIC_MAP, + CACHE_STATE_DYNAMIC, + CACHE_STATE_STATIC + } state; + struct cache_item *saved_item; + GList *list_pointer; +}; + +gboolean +call_symbol_callback (struct rspamd_task * task, struct symbols_cache * cache, gpointer *save) +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts1, ts2; +#else + struct timeval tv1, tv2; +#endif + guint64 diff; + struct cache_item *item = NULL; + struct symbol_callback_data *s = *save; + + if (s == NULL) { + if (cache == NULL) { + return FALSE; + } + if (cache->uses++ >= MAX_USES) { + msg_info ("resort symbols cache"); + rspamd_mempool_wlock_rwlock (cache->lock); + cache->uses = 0; + /* Resort while having write lock */ + post_cache_init (cache); + rspamd_mempool_wunlock_rwlock (cache->lock); + } + s = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct symbol_callback_data)); + *save = s; + if (cache->negative_items != NULL) { + s->list_pointer = g_list_first (cache->negative_items); + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_NEGATIVE; + } + else if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) { + if (s->list_pointer == NULL) { + s->list_pointer = g_list_first (cache->dynamic_items); + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC; + } + else { + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC_MAP; + } + } + else { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + item = s->saved_item; + } + else { + if (cache == NULL) { + return FALSE; + } + switch (s->state) { + case CACHE_STATE_NEGATIVE: + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer == NULL) { + if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) { + if (s->list_pointer == NULL) { + s->list_pointer = g_list_first (cache->dynamic_items); + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC; + } + else { + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC_MAP; + } + } + else { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + } + else { + s->saved_item = s->list_pointer->data; + } + item = s->saved_item; + break; + case CACHE_STATE_DYNAMIC_MAP: + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer == NULL) { + s->list_pointer = g_list_first (cache->dynamic_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC; + } + else { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + } + else { + s->saved_item = s->list_pointer->data; + } + item = s->saved_item; + break; + case CACHE_STATE_DYNAMIC: + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer == NULL) { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + else { + s->saved_item = s->list_pointer->data; + /* Skip items that are in negative map */ + while (s->list_pointer != NULL && check_negative_dynamic_item (task, cache, s->saved_item)) { + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer != NULL) { + s->saved_item = s->list_pointer->data; + } + } + if (s->list_pointer == NULL) { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + } + item = s->saved_item; + break; + case CACHE_STATE_STATIC: + /* Next pointer */ + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + item = s->saved_item; + break; + } + } + if (!item) { + return FALSE; + } + if (!item->is_virtual) { +#ifdef HAVE_CLOCK_GETTIME +# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &ts1); +# else + clock_gettime (CLOCK_REALTIME, &ts1); +# endif +#else + if (gettimeofday (&tv1, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } +#endif + if (G_UNLIKELY (check_debug_symbol (task->cfg, item->s->symbol))) { + rspamd_log_debug (rspamd_main->logger); + item->func (task, item->user_data); + rspamd_log_nodebug (rspamd_main->logger); + } + else { + item->func (task, item->user_data); + } + + +#ifdef HAVE_CLOCK_GETTIME +# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts2); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &ts2); +# else + clock_gettime (CLOCK_REALTIME, &ts2); +# endif +#else + if (gettimeofday (&tv2, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } +#endif + +#ifdef HAVE_CLOCK_GETTIME + diff = (ts2.tv_sec - ts1.tv_sec) * 1000000 + (ts2.tv_nsec - ts1.tv_nsec) / 1000; +#else + diff = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec); +#endif + item->s->avg_time = set_counter (item->s->symbol, diff); + } + + s->saved_item = item; + + return TRUE; + +} diff --git a/src/libserver/symbols_cache.h b/src/libserver/symbols_cache.h new file mode 100644 index 000000000..bb2100fc1 --- /dev/null +++ b/src/libserver/symbols_cache.h @@ -0,0 +1,150 @@ +#ifndef RSPAMD_SYMBOLS_CACHE_H +#define RSPAMD_SYMBOLS_CACHE_H + +#include "config.h" +#include "radix.h" + +#define MAX_SYMBOL 128 + +struct rspamd_task; +struct config_file; + +typedef void (*symbol_func_t)(struct rspamd_task *task, gpointer user_data); + +struct saved_cache_item { + gchar symbol[MAX_SYMBOL]; + double weight; + guint32 frequency; + double avg_time; +}; + +struct dynamic_map_item { + struct in_addr addr; + guint32 mask; + gboolean negative; +}; + +struct cache_item { + /* Static item's data */ + struct saved_cache_item *s; + + /* For dynamic rules */ + struct dynamic_map_item *networks; + guint32 networks_number; + gboolean is_dynamic; + + /* Callback data */ + symbol_func_t func; + gpointer user_data; + + /* Flags of virtual symbols */ + gboolean is_virtual; + gboolean is_callback; + + /* Priority */ + gint priority; + gdouble metric_weight; +}; + + +struct symbols_cache { + /* Normal cache items */ + GList *static_items; + + /* Items that have negative weights */ + GList *negative_items; + + /* Radix map of dynamic rules with ip mappings */ + radix_tree_t *dynamic_map; + radix_tree_t *negative_dynamic_map; + + /* Common dynamic rules */ + GList *dynamic_items; + + /* Hash table for fast access */ + GHashTable *items_by_symbol; + + rspamd_mempool_t *static_pool; + + guint cur_items; + guint used_items; + guint uses; + gpointer map; + rspamd_mempool_rwlock_t *lock; + struct config_file *cfg; +}; + +/** + * Load symbols cache from file, must be called _after_ init_symbols_cache + */ +gboolean init_symbols_cache (rspamd_mempool_t *pool, struct symbols_cache *cache, struct config_file *cfg, + const gchar *filename, gboolean ignore_checksum); + +/** + * Register function for symbols parsing + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data); + + +/** + * Register virtual symbol + * @param name name of symbol + */ +void register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight); + +/** + * Register callback function for symbols parsing + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data); + +/** + * Register function for symbols parsing with strict priority + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight, + gint priority, symbol_func_t func, gpointer user_data); + +/** + * Register function for dynamic symbols parsing + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_dynamic_symbol (rspamd_mempool_t *pool, struct symbols_cache **cache, const gchar *name, + double weight, symbol_func_t func, + gpointer user_data, GList *networks); + +/** + * Call function for cached symbol using saved callback + * @param task task object + * @param cache symbols cache + * @param saved_item pointer to currently saved item + */ +gboolean call_symbol_callback (struct rspamd_task *task, struct symbols_cache *cache, gpointer *save); + +/** + * Remove all dynamic rules from cache + * @param cache symbols cache + */ +void remove_dynamic_rules (struct symbols_cache *cache); + +/** + * Validate cache items agains theirs weights defined in metrics + * @param cache symbols cache + * @param cfg configuration + * @param strict do strict checks - symbols MUST be described in metrics + */ +gboolean validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict); + + +#endif diff --git a/src/libserver/task.c b/src/libserver/task.c new file mode 100644 index 000000000..f389793dd --- /dev/null +++ b/src/libserver/task.c @@ -0,0 +1,159 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "task.h" +#include "main.h" +#include "filter.h" +#include "message.h" + +/* + * Destructor for recipients list in a task + */ +static void +rcpt_destruct (void *pointer) +{ + struct rspamd_task *task = (struct rspamd_task *) pointer; + + if (task->rcpt) { + g_list_free (task->rcpt); + } +} + +/* + * Create new task + */ +struct rspamd_task * +rspamd_task_new (struct rspamd_worker *worker) +{ + struct rspamd_task *new_task; + + new_task = g_slice_alloc0 (sizeof (struct rspamd_task)); + + new_task->worker = worker; + new_task->state = READ_MESSAGE; + if (worker) { + new_task->cfg = worker->srv->cfg; + } +#ifdef HAVE_CLOCK_GETTIME +# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &new_task->ts); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &new_task->ts); +# else + clock_gettime (CLOCK_REALTIME, &new_task->ts); +# endif +#endif + if (gettimeofday (&new_task->tv, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } + + new_task->task_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + + /* Add destructor for recipients list (it would be better to use anonymous function here */ + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) rcpt_destruct, new_task); + new_task->results = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_hash_table_destroy, + new_task->results); + new_task->re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_hash_table_destroy, + new_task->re_cache); + new_task->raw_headers = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_hash_table_destroy, + new_task->raw_headers); + new_task->emails = g_tree_new (compare_email_func); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_tree_destroy, + new_task->emails); + new_task->urls = g_tree_new (compare_url_func); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_tree_destroy, + new_task->urls); + new_task->sock = -1; + new_task->is_mime = TRUE; + new_task->pre_result.action = METRIC_ACTION_NOACTION; + + new_task->message_id = new_task->queue_id = "undef"; + + return new_task; +} + + +/* + * Free all structures of worker_task + */ +void +rspamd_task_free (struct rspamd_task *task, gboolean is_soft) +{ + GList *part; + struct mime_part *p; + + if (task) { + debug_task ("free pointer %p", task); + while ((part = g_list_first (task->parts))) { + task->parts = g_list_remove_link (task->parts, part); + p = (struct mime_part *) part->data; + g_byte_array_free (p->content, TRUE); + g_list_free_1 (part); + } + if (task->text_parts) { + g_list_free (task->text_parts); + } + if (task->images) { + g_list_free (task->images); + } + if (task->messages) { + g_list_free (task->messages); + } + if (task->received) { + g_list_free (task->received); + } + if (task->http_conn != NULL) { + rspamd_http_connection_unref (task->http_conn); + } + if (task->sock != -1) { + close (task->sock); + } + rspamd_mempool_delete (task->task_pool); + g_slice_free1 (sizeof (struct rspamd_task), task); + } +} + +void +rspamd_task_free_hard (gpointer ud) +{ + struct rspamd_task *task = ud; + + rspamd_task_free (task, FALSE); +} + +void +rspamd_task_free_soft (gpointer ud) +{ + struct rspamd_task *task = ud; + + rspamd_task_free (task, FALSE); +} diff --git a/src/libserver/task.h b/src/libserver/task.h new file mode 100644 index 000000000..f8f7c89e3 --- /dev/null +++ b/src/libserver/task.h @@ -0,0 +1,165 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TASK_H_ +#define TASK_H_ + +#include "config.h" +#include "http.h" +#include "events.h" +#include "util.h" +#include "mem_pool.h" +#include "dns.h" + +enum rspamd_command { + CMD_CHECK, + CMD_SYMBOLS, + CMD_REPORT, + CMD_REPORT_IFSPAM, + CMD_SKIP, + CMD_PING, + CMD_PROCESS, + CMD_OTHER +}; + +enum rspamd_metric_action { + METRIC_ACTION_REJECT = 0, + METRIC_ACTION_SOFT_REJECT, + METRIC_ACTION_REWRITE_SUBJECT, + METRIC_ACTION_ADD_HEADER, + METRIC_ACTION_GREYLIST, + METRIC_ACTION_NOACTION, + METRIC_ACTION_MAX +}; + +typedef gint (*protocol_reply_func)(struct rspamd_task *task); + +struct custom_command { + const gchar *name; + protocol_reply_func func; +}; + +/** + * Worker task structure + */ +struct rspamd_task { + struct rspamd_worker *worker; /**< pointer to worker object */ + enum { + READ_MESSAGE, + WAIT_PRE_FILTER, + WAIT_FILTER, + WAIT_POST_FILTER, + WRITE_REPLY, + CLOSING_CONNECTION + } state; /**< current session state */ + enum rspamd_command cmd; /**< command */ + struct custom_command *custom_cmd; /**< custom command if any */ + gint sock; /**< socket descriptor */ + gboolean is_mime; /**< if this task is mime task */ + gboolean is_json; /**< output is JSON */ + gboolean allow_learn; /**< allow learning */ + gboolean is_skipped; /**< whether message was skipped by configuration */ + + gchar *helo; /**< helo header value */ + gchar *from; /**< from header value */ + gchar *queue_id; /**< queue id if specified */ + const gchar *message_id; /**< message id */ + GList *rcpt; /**< recipients list */ + guint nrcpt; /**< number of recipients */ + rspamd_inet_addr_t from_addr; /**< from addr for a task */ + rspamd_inet_addr_t client_addr; /**< address of connected socket */ + gchar *deliver_to; /**< address to deliver */ + gchar *user; /**< user to deliver */ + gchar *subject; /**< subject (for non-mime) */ + gchar *hostname; /**< hostname reported by MTA */ + GString *msg; /**< message buffer */ + struct rspamd_http_connection *http_conn; /**< HTTP server connection */ + struct rspamd_async_session* s; /**< async session object */ + gint parts_count; /**< mime parts count */ + GMimeMessage *message; /**< message, parsed with GMime */ + GMimeObject *parser_parent_part; /**< current parent part */ + InternetAddressList *rcpts; /**< list of all recipients */ + GList *parts; /**< list of parsed parts */ + GList *text_parts; /**< list of text parts */ + gchar *raw_headers_str; /**< list of raw headers */ + GList *received; /**< list of received headers */ + GTree *urls; /**< list of parsed urls */ + GTree *emails; /**< list of parsed emails */ + GList *images; /**< list of images */ + GHashTable *raw_headers; /**< list of raw headers */ + GHashTable *results; /**< hash table of metric_result indexed by + * metric's name */ + GHashTable *tokens; /**< hash table of tokens indexed by tokenizer + * pointer */ + GList *messages; /**< list of messages that would be reported */ + GHashTable *re_cache; /**< cache for matched or not matched regexps */ + struct config_file *cfg; /**< pointer to config object */ + gchar *last_error; /**< last error */ + gint error_code; /**< code of last error */ + rspamd_mempool_t *task_pool; /**< memory pool for task */ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; /**< time of connection */ +#endif + struct timeval tv; /**< time of connection */ + guint32 scan_milliseconds; /**< how much milliseconds passed */ + gboolean pass_all_filters; /**< pass task throught every rule */ + gboolean no_log; /**< do not log or write this task to the history */ + guint32 parser_recursion; /**< for avoiding recursion stack overflow */ + gboolean (*fin_callback)(void *arg); /**< calback for filters finalizing */ + void *fin_arg; /**< argument for fin callback */ + + guint32 dns_requests; /**< number of DNS requests per this task */ + + struct rspamd_dns_resolver *resolver; /**< DNS resolver */ + struct event_base *ev_base; /**< Event base */ + + GThreadPool *classify_pool; /**< A pool of classify threads */ + + struct { + enum rspamd_metric_action action; /**< Action of pre filters */ + gchar *str; /**< String describing action */ + } pre_result; /**< Result of pre-filters */ +}; + +/** + * Construct new task for worker + */ +struct rspamd_task* rspamd_task_new (struct rspamd_worker *worker); +/** + * Destroy task object and remove its IO dispatcher if it exists + */ +void rspamd_task_free (struct rspamd_task *task, gboolean is_soft); +void rspamd_task_free_hard (gpointer ud); +void rspamd_task_free_soft (gpointer ud); + +/** + * Called if session was restored inside fin callback + */ +void rspamd_task_restore (void *arg); + +/** + * Called if all filters are processed + * @return TRUE if session should be terminated + */ +gboolean rspamd_task_fin (void *arg); + +#endif /* TASK_H_ */ diff --git a/src/libserver/url.c b/src/libserver/url.c new file mode 100644 index 000000000..c4313e8a9 --- /dev/null +++ b/src/libserver/url.c @@ -0,0 +1,1620 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "url.h" +#include "util.h" +#include "fstring.h" +#include "main.h" +#include "message.h" +#include "trie.h" + +#define POST_CHAR 1 +#define POST_CHAR_S "\001" + +/* Tcp port range */ +#define LOWEST_PORT 0 +#define HIGHEST_PORT 65535 + +#define uri_port_is_valid(port) \ + (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) + +struct _proto { + guchar *name; + gint port; + uintptr_t *unused; + guint need_slashes:1; + guint need_slash_after_host:1; + guint free_syntax:1; + guint need_ssl:1; +}; + +typedef struct url_match_s { + const gchar *m_begin; + gsize m_len; + const gchar *pattern; + const gchar *prefix; + gboolean add_prefix; +} url_match_t; + +#define URL_FLAG_NOHTML 0x1 +#define URL_FLAG_STRICT_MATCH 0x2 + +struct url_matcher { + const gchar *pattern; + const gchar *prefix; + gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + gint flags; +}; + +static gboolean url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +static gboolean url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +static gboolean url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +struct url_matcher matchers[] = { + /* Common prefixes */ + { "file://", "", url_file_start, url_file_end, 0 }, + { "ftp://", "", url_web_start, url_web_end, 0 }, + { "sftp://", "", url_web_start, url_web_end, 0 }, + { "http://", "", url_web_start, url_web_end, 0 }, + { "https://", "", url_web_start, url_web_end, 0 }, + { "news://", "", url_web_start, url_web_end, 0 }, + { "nntp://", "", url_web_start, url_web_end, 0 }, + { "telnet://", "", url_web_start, url_web_end, 0 }, + { "webcal://", "", url_web_start, url_web_end, 0 }, + { "mailto://", "", url_email_start, url_email_end, 0 }, + { "callto://", "", url_web_start, url_web_end, 0 }, + { "h323:", "", url_web_start, url_web_end, 0 }, + { "sip:", "", url_web_start, url_web_end, 0 }, + { "www.", "http://", url_web_start, url_web_end, 0 }, + { "ftp.", "ftp://", url_web_start, url_web_end, URL_FLAG_NOHTML }, + /* TLD domains parts */ + { ".ac", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ad", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ae", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aero", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".af", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ag", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ai", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".al", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".am", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".an", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ao", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ar", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".arpa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".as", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".asia", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".at", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".au", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ax", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".az", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ba", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".be", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".biz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".br", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".by", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ca", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cat", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ch", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ci", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ck", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".co", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".com", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".coop", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".de", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".do", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ec", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".edu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ee", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".eg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".er", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".es", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".et", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".eu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ga", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ge", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gov", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ht", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".id", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ie", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".il", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".im", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".in", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".info", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".int", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".io", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".iq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ir", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".is", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".it", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".je", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jobs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ke", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ki", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".km", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ky", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".la", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".li", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ls", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ly", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ma", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".md", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".me", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mil", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ml", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mobi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ms", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".museum", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".my", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".na", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".name", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ne", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".net", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ng", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ni", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".no", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".np", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".om", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".org", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pe", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ph", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ps", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".py", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".qa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".re", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".rs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ru", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".rw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".se", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".si", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".so", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".st", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".su", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".td", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".th", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".to", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".travel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ua", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ug", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".us", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".va", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ve", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".wf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ws", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".xxx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ye", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".yt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".za", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".zm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".zw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + /* Likely emails */ + { "@", "mailto://",url_email_start, url_email_end, URL_FLAG_NOHTML } +}; + +struct url_match_scanner { + struct url_matcher *matchers; + gsize matchers_count; + rspamd_trie_t *patterns; +}; + +struct url_match_scanner *url_scanner = NULL; + +static const struct _proto protocol_backends[] = { + {"file", 0, NULL, 1, 0, 0, 0}, + {"ftp", 21, NULL, 1, 0, 0, 0}, + {"http", 80, NULL, 1, 0, 0, 0}, + {"https", 443, NULL, 1, 0, 0, 1}, + {"mailto", 25, NULL, 1, 0, 0, 0}, + /* Keep these last! */ + {NULL, 0, NULL, 0, 0, 1, 0} +}; + +/* Convert an ASCII hex digit to the corresponding number between 0 + and 15. H should be a hexadecimal digit that satisfies isxdigit; + otherwise, the result is undefined. */ +#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + 10) +#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2)) +/* The reverse of the above: convert a number in the [0, 16) range to + the ASCII representation of the corresponding hexadecimal digit. + `+ 0' is there so you can't accidentally use it as an lvalue. */ +#define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0) +#define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0) + +static guchar url_scanner_table[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128, + 160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192, + 128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +enum { + IS_CTRL = (1 << 0), + IS_ALPHA = (1 << 1), + IS_DIGIT = (1 << 2), + IS_LWSP = (1 << 3), + IS_SPACE = (1 << 4), + IS_SPECIAL = (1 << 5), + IS_DOMAIN = (1 << 6), + IS_URLSAFE = (1 << 7) +}; + +#define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0) +#define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0) +#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0) +#define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0) +#define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0) +#define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0) +#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0) + + +const gchar * +url_strerror (enum uri_errno err) +{ + switch (err) { + case URI_ERRNO_OK: + return "Parsing went well"; + case URI_ERRNO_EMPTY: + return "The URI string was empty"; + case URI_ERRNO_INVALID_PROTOCOL: + return "No protocol was found"; + case URI_ERRNO_NO_SLASHES: + return "Slashes after protocol missing"; + case URI_ERRNO_TOO_MANY_SLASHES: + return "Too many slashes after protocol"; + case URI_ERRNO_TRAILING_DOTS: + return "'.' after host"; + case URI_ERRNO_NO_HOST: + return "Host part is missing"; + case URI_ERRNO_NO_PORT_COLON: + return "':' after host without port"; + case URI_ERRNO_NO_HOST_SLASH: + return "Slash after host missing"; + case URI_ERRNO_IPV6_SECURITY: + return "IPv6 security bug detected"; + case URI_ERRNO_INVALID_PORT: + return "Port number is bad"; + case URI_ERRNO_INVALID_PORT_RANGE: + return "Port number is not within 0-65535"; + } + return NULL; +} + +static gint +check_uri_file (gchar *name) +{ + static const gchar chars[] = POST_CHAR_S "#?"; + + return strcspn (name, chars); +} + +static gint +url_init (void) +{ + guint i; + gchar patbuf[128]; + + if (url_scanner == NULL) { + url_scanner = g_malloc (sizeof (struct url_match_scanner)); + url_scanner->matchers = matchers; + url_scanner->matchers_count = G_N_ELEMENTS (matchers); + url_scanner->patterns = rspamd_trie_create (TRUE); + for (i = 0; i < url_scanner->matchers_count; i ++) { + if (matchers[i].flags & URL_FLAG_STRICT_MATCH) { + /* Insert more specific patterns */ + + /* some.tld/ */ + rspamd_snprintf (patbuf, sizeof (patbuf), "%s/", matchers[i].pattern); + rspamd_trie_insert (url_scanner->patterns, patbuf, i); + /* some.tld */ + rspamd_snprintf (patbuf, sizeof (patbuf), "%s ", matchers[i].pattern); + rspamd_trie_insert (url_scanner->patterns, patbuf, i); + /* some.tld: */ + rspamd_snprintf (patbuf, sizeof (patbuf), "%s:", matchers[i].pattern); + rspamd_trie_insert (url_scanner->patterns, patbuf, i); + } + else { + rspamd_trie_insert (url_scanner->patterns, matchers[i].pattern, i); + } + } + } + + return 0; +} + +enum protocol +get_protocol (gchar *name, gint namelen) +{ + /* These are really enum protocol values but can take on negative + * values and since 0 <= -1 for enum values it's better to use clean + * integer type. */ + gint start, end; + enum protocol protocol; + guchar *pname; + gint pnamelen, minlen, compare; + + /* Almost dichotomic search is used here */ + /* Starting at the HTTP entry which is the most common that will make + * file and NNTP the next entries checked and amongst the third checks + * are proxy and FTP. */ + start = 0; + end = PROTOCOL_UNKNOWN - 1; + protocol = PROTOCOL_HTTP; + + while (start <= end) { + pname = protocol_backends[protocol].name; + pnamelen = strlen (pname); + minlen = MIN (pnamelen, namelen); + compare = g_ascii_strncasecmp (pname, name, minlen); + + if (compare == 0) { + if (pnamelen == namelen) + return protocol; + + /* If the current protocol name is longer than the + * protocol name being searched for move @end else move + * @start. */ + compare = pnamelen > namelen ? 1 : -1; + } + + if (compare > 0) + end = protocol - 1; + else + start = protocol + 1; + + protocol = (start + end) / 2; + } + + return PROTOCOL_UNKNOWN; +} + + +gint +get_protocol_port (enum protocol protocol) +{ + return protocol_backends[protocol].port; +} + +gint +get_protocol_need_slashes (enum protocol protocol) +{ + return protocol_backends[protocol].need_slashes; +} + +gint +get_protocol_need_slash_after_host (enum protocol protocol) +{ + return protocol_backends[protocol].need_slash_after_host; +} + +gint +get_protocol_free_syntax (enum protocol protocol) +{ + return protocol_backends[protocol].free_syntax; +} + +static gint +get_protocol_length (const gchar *url) +{ + gchar *end = (gchar *)url; + + /* Seek the end of the protocol name if any. */ + /* RFC1738: + * scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] + * (but per its recommendations we accept "upalpha" too) */ + while (g_ascii_isalnum (*end) || *end == '+' || *end == '-' || *end == '.') + end++; + + /* Also return 0 if there's no protocol name (@end == @url). */ + return (*end == ':') ? end - url : 0; +} + + +/* + * Calcualte new length of unescaped hostlen + */ +static guint +url_calculate_escaped_hostlen (gchar *host, guint hostlen) +{ + guint i, result = hostlen; + gchar *p = host, c; + + for (i = 0; i < hostlen; i++, p++) { + if (*p == '%' && g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) { + c = X2DIGITS_TO_NUM (*(p + 1), *(p + 2)); + if (c != '\0') { + result -= 2; + } + } + } + + return result; +} + +/* URL-unescape the string S. + + This is done by transforming the sequences "%HH" to the character + represented by the hexadecimal digits HH. If % is not followed by + two hexadecimal digits, it is inserted literally. + + The transformation is done in place. If you need the original + string intact, make a copy before calling this function. */ + +static void +url_unescape (gchar *s) +{ + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ + + for (; *h; h++, t++) { + if (*h != '%') { + copychar: + *t = *h; + } + else { + gchar c; + /* Do nothing if '%' is not followed by two hex digits. */ + if (!h[1] || !h[2] || !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2]))) + goto copychar; + c = X2DIGITS_TO_NUM (h[1], h[2]); + /* Don't unescape %00 because there is no way to insert it + * into a C string without effectively truncating it. */ + if (c == '\0') + goto copychar; + *t = c; + h += 2; + } + } + *t = '\0'; +} + +static void +url_strip (gchar *s) +{ + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ + + while (*h) { + if (g_ascii_isgraph (*h)) { + *t = *h; + t++; + } + h++; + } + *t = '\0'; +} + +static gchar * +url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool) +{ + const gchar *p1; + gchar *p2, *newstr; + gint newlen; + gint addition = 0; + + for (p1 = s; *p1; p1++) + if (!is_urlsafe (*p1)) { + addition += 2; /* Two more characters (hex digits) */ + } + + if (!addition) { + if (allow_passthrough) { + return (gchar *)s; + } + else { + return rspamd_mempool_strdup (pool, s); + } + } + + newlen = (p1 - s) + addition; + newstr = (gchar *)rspamd_mempool_alloc (pool, newlen + 1); + + p1 = s; + p2 = newstr; + while (*p1) { + /* Quote the characters that match the test mask. */ + if (!is_urlsafe (*p1)) { + guchar c = *p1++; + *p2++ = '%'; + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); + } + else + *p2++ = *p1++; + } + *p2 = '\0'; + + return newstr; +} + +/* URL-escape the unsafe characters (see urlchr_table) in a given + string, returning a freshly allocated string. */ + +gchar * +url_escape (const gchar *s, rspamd_mempool_t * pool) +{ + return url_escape_1 (s, 0, pool); +} + +/* Decide whether the gchar at position P needs to be encoded. (It is + not enough to pass a single gchar *P because the function may need + to inspect the surrounding context.) + + Return 1 if the gchar should be escaped as %XX, 0 otherwise. */ + +static inline gboolean +char_needs_escaping (const gchar *p) +{ + if (*p == '%') { + if (g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2))) { + return FALSE; + } + else { + return TRUE; + } + } + else if (! is_urlsafe (*p)) { + return TRUE; + } + return FALSE; +} + +/* Translate a %-escaped (but possibly non-conformant) input string S + into a %-escaped (and conformant) output string. +*/ + +static gchar * +reencode_escapes (gchar *s, rspamd_mempool_t * pool) +{ + const gchar *p1; + gchar *newstr, *p2; + gint oldlen, newlen; + + gint encode_count = 0; + + /* First pass: inspect the string to see if there's anything to do, + and to calculate the new length. */ + for (p1 = s; *p1; p1++) + if (char_needs_escaping (p1)) + ++encode_count; + + if (!encode_count) { + /* The string is good as it is. */ + return s; + } + + oldlen = p1 - s; + /* Each encoding adds two characters (hex digits). */ + newlen = oldlen + 2 * encode_count; + newstr = rspamd_mempool_alloc (pool, newlen + 1); + + /* Second pass: copy the string to the destination address, encoding + chars when needed. */ + p1 = s; + p2 = newstr; + + while (*p1) + if (char_needs_escaping (p1)) { + guchar c = *p1++; + *p2++ = '%'; + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); + } + else { + *p2++ = *p1++; + } + + *p2 = '\0'; + return newstr; +} + +/* Unescape CHR in an otherwise escaped STR. Used to selectively + escaping of certain characters, such as "/" and ":". Returns a + count of unescaped chars. */ + +static void +unescape_single_char (gchar *str, gchar chr) +{ + const gchar c1 = XNUM_TO_DIGIT (chr >> 4); + const gchar c2 = XNUM_TO_DIGIT (chr & 0xf); + gchar *h = str; /* hare */ + gchar *t = str; /* tortoise */ + + for (; *h; h++, t++) { + if (h[0] == '%' && h[1] == c1 && h[2] == c2) { + *t = chr; + h += 2; + } + else { + *t = *h; + } + } + *t = '\0'; +} + + +/* + * Resolve "." and ".." elements of PATH by destructively modifying + * PATH and return non-zero if PATH has been modified, zero otherwise. + */ + +static gboolean +path_simplify (gchar *path) +{ + gchar *h = path; /* hare */ + gchar *t = path; /* tortoise */ + gchar *beg = path; /* boundary for backing the tortoise */ + gchar *end = path + strlen (path); + + while (h < end) { + /* Hare should be at the beginning of a path element. */ + if (h[0] == '.' && (h[1] == '/' || h[1] == '\0')) { + /* Ignore "./". */ + h += 2; + } + else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0')) { + /* Handle "../" by retreating the tortoise by one path + element -- but not past beginning. */ + if (t > beg) { + /* Move backwards until T hits the beginning of the + previous path element or the beginning of path. */ + for (--t; t > beg && t[-1] != '/'; t--); + } + else { + /* If we're at the beginning, copy the "../" literally + move the beginning so a later ".." doesn't remove + it. */ + beg = t + 3; + goto regular; + } + h += 3; + } + else { + regular: + /* A regular path element. If H hasn't advanced past T, + simply skip to the next path element. Otherwise, copy + the path element until the next slash. */ + if (t == h) { + /* Skip the path element, including the slash. */ + while (h < end && *h != '/') + t++, h++; + if (h < end) + t++, h++; + } + else { + /* Copy the path element, including the final slash. */ + while (h < end && *h != '/') + *t++ = *h++; + if (h < end) + *t++ = *h++; + } + } + } + + if (t != h) + *t = '\0'; + + return t != h; +} + +enum uri_errno +parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) +{ + guchar *prefix_end, *host_end, *p; + guchar *lbracket, *rbracket; + gint datalen, n, addrlen; + guchar *frag_or_post, *user_end, *port_end; + + memset (uri, 0, sizeof (*uri)); + + /* Nothing to do for an empty url. */ + if (!*uristring) + return URI_ERRNO_EMPTY; + + uri->string = reencode_escapes (uristring, pool); + msg_debug ("reencoding escapes in original url: '%s'", struri (uri)); + uri->protocollen = get_protocol_length (struri (uri)); + + /* Assume http as default protocol */ + if (!uri->protocollen || (uri->protocol = get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) { + /* Make exception for numeric urls */ + p = uri->string; + while (*p && (g_ascii_isalnum (*p) || *p == ':')) { + p ++; + } + if (*p == '\0') { + return URI_ERRNO_INVALID_PROTOCOL; + } + p = g_strconcat ("http://", uri->string, NULL); + uri->string = rspamd_mempool_strdup (pool, p); + g_free (p); + uri->protocol = PROTOCOL_HTTP; + prefix_end = struri (uri) + 7; + } + else { + /* Figure out whether the protocol is known */ + msg_debug ("getting protocol from url: %d", uri->protocol); + + prefix_end = struri (uri) + uri->protocollen; /* ':' */ + + /* Check if there's a digit after the protocol name. */ + if (g_ascii_isdigit (*prefix_end)) { + p = struri (uri); + uri->ip_family = p[uri->protocollen] - '0'; + prefix_end++; + } + if (*prefix_end != ':') { + msg_debug ("invalid protocol in uri"); + return URI_ERRNO_INVALID_PROTOCOL; + } + prefix_end++; + + /* Skip slashes */ + + if (prefix_end[0] == '/' && prefix_end[1] == '/') { + if (prefix_end[2] == '/') { + msg_debug ("too many '/' in uri"); + return URI_ERRNO_TOO_MANY_SLASHES; + } + + prefix_end += 2; + + } + else { + msg_debug ("no '/' in uri"); + return URI_ERRNO_NO_SLASHES; + } + } + + if (get_protocol_free_syntax (uri->protocol)) { + uri->data = prefix_end; + uri->datalen = strlen (prefix_end); + return URI_ERRNO_OK; + + } + else if (uri->protocol == PROTOCOL_FILE) { + datalen = check_uri_file (prefix_end); + frag_or_post = prefix_end + datalen; + + /* Extract the fragment part. */ + if (datalen >= 0) { + if (*frag_or_post == '#') { + uri->fragment = frag_or_post + 1; + uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S); + frag_or_post = uri->fragment + uri->fragmentlen; + } + if (*frag_or_post == POST_CHAR) { + uri->post = frag_or_post + 1; + } + } + else { + datalen = strlen (prefix_end); + } + + uri->data = prefix_end; + uri->datalen = datalen; + + return URI_ERRNO_OK; + } + + /* Isolate host */ + + /* Get brackets enclosing IPv6 address */ + lbracket = strchr (prefix_end, '['); + if (lbracket) { + rbracket = strchr (lbracket, ']'); + /* [address] is handled only inside of hostname part (surprisingly). */ + if (rbracket && rbracket < prefix_end + strcspn (prefix_end, "/")) + uri->ipv6 = 1; + else + lbracket = rbracket = NULL; + } + else { + rbracket = NULL; + } + + /* Possibly skip auth part */ + host_end = prefix_end + strcspn (prefix_end, "@"); + + if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */ + + /* Allow '@' in the password component */ + while (strcspn (host_end + 1, "@") < strcspn (host_end + 1, "/?")) + host_end = host_end + 1 + strcspn (host_end + 1, "@"); + + user_end = strchr (prefix_end, ':'); + + if (!user_end || user_end > host_end) { + uri->user = prefix_end; + uri->userlen = host_end - prefix_end; + } + else { + uri->user = prefix_end; + uri->userlen = user_end - prefix_end; + uri->password = user_end + 1; + uri->passwordlen = host_end - user_end - 1; + } + prefix_end = host_end + 1; + } + + if (uri->ipv6 && rbracket != NULL) { + host_end = rbracket + strcspn (rbracket, ":/?"); + } + else { + host_end = prefix_end + strcspn (prefix_end, ":/?"); + } + + if (uri->ipv6) { + addrlen = rbracket - lbracket - 1; + + + uri->host = lbracket + 1; + uri->hostlen = addrlen; + } + else { + uri->host = prefix_end; + uri->hostlen = host_end - prefix_end; + + /* Trim trailing '.'s */ + if (uri->hostlen && uri->host[uri->hostlen - 1] == '.') + return URI_ERRNO_TRAILING_DOTS; + } + + if (*host_end == ':') { /* we have port here */ + port_end = host_end + 1 + strcspn (host_end + 1, "/"); + + host_end++; + + uri->port = host_end; + uri->portlen = port_end - host_end; + + if (uri->portlen == 0) + return URI_ERRNO_NO_PORT_COLON; + + /* We only use 8 bits for portlen so better check */ + if ((gint)uri->portlen != port_end - host_end) + return URI_ERRNO_INVALID_PORT; + + /* test if port is number */ + for (; host_end < port_end; host_end++) + if (!g_ascii_isdigit (*host_end)) + return URI_ERRNO_INVALID_PORT; + + /* Check valid port value, and let show an error message + * about invalid url syntax. */ + if (uri->port && uri->portlen) { + + errno = 0; + n = strtol (uri->port, NULL, 10); + if (errno || !uri_port_is_valid (n)) + return URI_ERRNO_INVALID_PORT; + } + } + + if (*host_end == '/') { + host_end++; + + } + else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != '?') { + /* The need for slash after the host component depends on the + * need for a host component. -- The dangerous mind of Jonah */ + if (!uri->hostlen) + return URI_ERRNO_NO_HOST; + + return URI_ERRNO_NO_HOST_SLASH; + } + + /* Look for #fragment or POST_CHAR */ + prefix_end = host_end + strcspn (host_end, "#" POST_CHAR_S); + uri->data = host_end; + uri->datalen = prefix_end - host_end; + + if (*prefix_end == '#') { + uri->fragment = prefix_end + 1; + uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S); + prefix_end = uri->fragment + uri->fragmentlen; + } + + if (*prefix_end == POST_CHAR) { + uri->post = prefix_end + 1; + } + + convert_to_lowercase (uri->string, uri->protocollen); + convert_to_lowercase (uri->host, uri->hostlen); + /* Decode %HH sequences in host name. This is important not so much + to support %HH sequences in host names (which other browser + don't), but to support binary characters (which will have been + converted to %HH by reencode_escapes). */ + if (strchr (uri->host, '%')) { + uri->hostlen = url_calculate_escaped_hostlen (uri->host, uri->hostlen); + } + + url_strip (struri (uri)); + url_unescape (uri->host); + + path_simplify (uri->data); + + return URI_ERRNO_OK; +} + +static const gchar url_braces[] = { + '(', ')' , + '{', '}' , + '[', ']' , + '<', '>' , + '|', '|' , + '\'', '\'' +}; + +static gboolean +is_open_brace (gchar c) +{ + if (c == '(' || + c == '{' || + c == '[' || + c == '<' || + c == '|' || + c == '\'') { + return TRUE; + } + + return FALSE; +} + +static gboolean +url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + match->m_begin = pos; + return TRUE; +} +static gboolean +url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + gchar stop; + guint i; + + p = pos + strlen (match->pattern); + stop = *p; + if (*p == '/') { + p ++; + } + + for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { + if (*p == url_braces[i]) { + stop = url_braces[i + 1]; + break; + } + } + + while (p < end && *p != stop && is_urlsafe (*p)) { + p ++; + } + + if (p == begin) { + return FALSE; + } + match->m_len = p - match->m_begin; + + return TRUE; + +} + +static gboolean +url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p = pos; + + /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */ + while (p >= begin) { + if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) { + p ++; + if (!g_ascii_isalnum (*p)) { + /* Urls cannot start with strange symbols */ + return FALSE; + } + match->m_begin = p; + return TRUE; + } + else if (p == begin && p != pos) { + match->m_begin = p; + return TRUE; + } + else if (*p == '.') { + if (p == begin) { + /* Urls cannot start with a dot */ + return FALSE; + } + if (!g_ascii_isalnum (p[1])) { + /* Wrong we have an invalid character after dot */ + return FALSE; + } + } + else if (*p == '/') { + /* Urls cannot contain '/' in their body */ + return FALSE; + } + p --; + } + + return FALSE; +} + +static gboolean +url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + + /* A url must be finished by tld, so it must be followed by space character */ + p = pos + strlen (match->pattern); + if (p == end || g_ascii_isspace (*p) || *p == ',') { + match->m_len = p - match->m_begin; + return TRUE; + } + else if (*p == '/' || *p == ':') { + /* Parse arguments, ports by normal way by url default function */ + p = match->m_begin; + /* Check common prefix */ + if (g_ascii_strncasecmp (p, "http://", sizeof ("http://") - 1) == 0) { + return url_web_end (begin, end, match->m_begin + sizeof ("http://") - 1, match); + } + else { + return url_web_end (begin, end, match->m_begin, match); + } + + } + return FALSE; +} + +static gboolean +url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + /* Check what we have found */ + if (pos > begin && (g_ascii_strncasecmp (pos, "www", 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { + if (!is_open_brace (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) { + return FALSE; + } + } + if (*pos == '.') { + /* Urls cannot start with . */ + return FALSE; + } + match->m_begin = pos; + + return TRUE; +} + +static gboolean +url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p, *c; + gchar open_brace = '\0', close_brace = '\0'; + gint brace_stack = 0; + gboolean passwd = FALSE; + guint port, i; + + p = pos + strlen (match->pattern); + for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { + if (*p == url_braces[i]) { + close_brace = url_braces[i + 1]; + open_brace = *p; + break; + } + } + + /* find the end of the domain */ + if (is_atom (*p)) { + /* might be a domain or user@domain */ + c = p; + while (p < end) { + if (!is_atom (*p)) { + break; + } + + p++; + + while (p < end && is_atom (*p)) { + p++; + } + + if ((p + 1) < end && *p == '.' && (is_atom (*(p + 1)) || *(p + 1) == '/')) { + p++; + } + } + + if (*p != '@') { + p = c; + } + else { + p++; + } + + goto domain; + } + else if (is_domain (*p) || (*p & 0x80)) { +domain: + while (p < end) { + if (!is_domain (*p) && !(*p & 0x80)) { + break; + } + + p++; + + while (p < end && (is_domain (*p) || (*p & 0x80))) { + p++; + } + + if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/' || (*(p + 1) & 0x80))) { + p++; + } + } + } + else { + return FALSE; + } + + if (p < end) { + switch (*p) { + case ':': /* we either have a port or a password */ + p++; + + if (is_digit (*p) || passwd) { + port = (*p++ - '0'); + + while (p < end && is_digit (*p) && port < 65536) { + port = (port * 10) + (*p++ - '0'); + } + + if (!passwd && (port >= 65536 || *p == '@')) { + if (p < end && *p == '@') { + /* this must be a password? */ + goto passwd; + } + else if (p < end) { + return FALSE; + } + + p--; + } + } + else { + passwd: + passwd = TRUE; + c = p; + + while (p < end && is_atom (*p)) { + p++; + } + + if ((p + 2) < end) { + if (*p == '@') { + p++; + if (is_domain (*p)) { + goto domain; + } + } + + return FALSE; + } + } + + if (p >= end || *p != '/') { + break; + } + + /* we have a '/' so there could be a path - fall through */ + case '/': /* we've detected a path component to our url */ + p++; + case '?': + while (p < end && is_urlsafe (*p)) { + if (*p == open_brace) { + brace_stack++; + } + else if (*p == close_brace) { + brace_stack--; + if (brace_stack == -1) { + break; + } + } + p++; + } + + break; + default: + break; + } + } + + /* urls are extremely unlikely to end with any + * punctuation, so strip any trailing + * punctuation off. Also strip off any closing + * double-quotes. */ + while (p > pos && strchr (",.:;?!-|}])\"", p[-1])) { + p--; + } + + match->m_len = (p - pos); + + return TRUE; +} + + +static gboolean +url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + /* Check what we have found */ + if (pos > begin && *pos == '@') { + /* Try to extract it with username */ + p = pos - 1; + while (p > begin && (is_domain (*p) || *p == '.' || *p == '_')) { + p --; + } + if (!is_domain (*p) && p != pos - 1) { + match->m_begin = p + 1; + return TRUE; + } + else if (p == begin) { + match->m_begin = p; + return TRUE; + } + } + else { + p = pos + strlen (match->pattern); + if (is_domain (*p)) { + match->m_begin = pos; + return TRUE; + } + } + return FALSE; +} + +static gboolean +url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + gboolean got_at = FALSE; + + p = pos + strlen (match->pattern); + if (*pos == '@') { + got_at = TRUE; + } + + while (p < end && (is_domain (*p) || *p == '_' + || (*p == '@' && !got_at) || + (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) { + if (*p == '@') { + got_at = TRUE; + } + p ++; + } + match->m_len = p - match->m_begin; + match->add_prefix = TRUE; + return got_at; +} + +void +url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html) +{ + gint rc; + gchar *url_str = NULL, *url_start, *url_end; + struct uri *new; + struct process_exception *ex; + gchar *p, *end, *begin; + + + if (!part->orig->data || part->orig->len == 0) { + msg_warn ("got empty text part"); + return; + } + + if (url_init () == 0) { + if (is_html) { + begin = part->orig->data; + end = begin + part->orig->len; + p = begin; + } + else { + begin = part->content->data; + end = begin + part->content->len; + p = begin; + } + while (p < end) { + if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, is_html)) { + if (url_str != NULL) { + new = rspamd_mempool_alloc0 (pool, sizeof (struct uri)); + ex = rspamd_mempool_alloc0 (pool, sizeof (struct process_exception)); + if (new != NULL) { + g_strstrip (url_str); + rc = parse_uri (new, url_str, pool); + if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && + new->hostlen > 0) { + ex->pos = url_start - begin; + ex->len = url_end - url_start; + if (new->protocol == PROTOCOL_MAILTO) { + if (new->userlen > 0) { + if (!g_tree_lookup (task->emails, new)) { + g_tree_insert (task->emails, new, new); + } + } + } + else { + if (!g_tree_lookup (task->urls, new)) { + g_tree_insert (task->urls, new, new); + } + } + part->urls_offset = g_list_prepend (part->urls_offset, ex); + } + else if (rc != URI_ERRNO_OK) { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + } + else { + break; + } + p = url_end + 1; + } + } + /* Handle offsets of this part */ + if (part->urls_offset != NULL) { + part->urls_offset = g_list_reverse (part->urls_offset); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, part->urls_offset); + } +} + +gboolean +url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str, gboolean is_html) +{ + const gchar *end, *pos; + gint idx, l; + struct url_matcher *matcher; + url_match_t m; + + end = begin + len; + if (url_init () == 0) { + if ((pos = rspamd_trie_lookup (url_scanner->patterns, begin, len, &idx)) == NULL) { + return FALSE; + } + else { + matcher = &matchers[idx]; + if ((matcher->flags & URL_FLAG_NOHTML) && is_html) { + /* Do not try to match non-html like urls in html texts */ + return FALSE; + } + m.pattern = matcher->pattern; + m.prefix = matcher->prefix; + m.add_prefix = FALSE; + if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) { + if (m.add_prefix) { + l = m.m_len + 1 + strlen (m.prefix); + *url_str = rspamd_mempool_alloc (pool, l); + rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin); + } + else { + *url_str = rspamd_mempool_alloc (pool, m.m_len + 1); + memcpy (*url_str, m.m_begin, m.m_len); + (*url_str)[m.m_len] = '\0'; + } + if (start != NULL) { + *start = (gchar *)m.m_begin; + } + if (fin != NULL) { + *fin = (gchar *)m.m_begin + m.m_len; + } + } + else { + *url_str = NULL; + if (start != NULL) { + *start = (gchar *)pos; + } + if (fin != NULL) { + *fin = (gchar *)pos + strlen (m.prefix); + } + } + + return TRUE; + } + } + + return FALSE; +} + +/* + * vi: ts=4 + */ diff --git a/src/libserver/url.h b/src/libserver/url.h new file mode 100644 index 000000000..60535ba5c --- /dev/null +++ b/src/libserver/url.h @@ -0,0 +1,111 @@ +/* URL check functions */ +#ifndef URL_H +#define URL_H + +#include "config.h" +#include "mem_pool.h" + +struct rspamd_task; +struct mime_text_part; + +struct uri { + /* The start of the uri (and thus start of the protocol string). */ + gchar *string; + + /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */ + gint protocol; /* enum protocol */ + + gint ip_family; + + gchar *user; + gchar *password; + gchar *host; + gchar *port; + /* @data can contain both the path and query uri fields. + * It can never be NULL but can have zero length. */ + gchar *data; + gchar *fragment; + /* @post can contain some special encoded form data, used internally + * to make form data handling more efficient. The data is marked by + * POST_CHAR in the uri string. */ + gchar *post; + + struct uri *phished_url; + + /* @protocollen should only be usable if @protocol is either + * PROTOCOL_USER or an uri string should be composed. */ + guint protocollen; + guint userlen; + guint passwordlen; + guint hostlen; + guint portlen; + guint datalen; + guint fragmentlen; + + /* Flags */ + gboolean ipv6; /* URI contains IPv6 host */ + gboolean form; /* URI originated from form */ + gboolean is_phished; /* URI maybe phishing */ +}; + +enum uri_errno { + URI_ERRNO_OK, /* Parsing went well */ + URI_ERRNO_EMPTY, /* The URI string was empty */ + URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */ + URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */ + URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */ + URI_ERRNO_TRAILING_DOTS, /* '.' after host */ + URI_ERRNO_NO_HOST, /* Host part is missing */ + URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */ + URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */ + URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */ + URI_ERRNO_INVALID_PORT, /* Port number is bad */ + URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */ +}; + +enum protocol { + PROTOCOL_FILE, + PROTOCOL_FTP, + PROTOCOL_HTTP, + PROTOCOL_HTTPS, + PROTOCOL_MAILTO, + PROTOCOL_UNKNOWN +}; + +#define struri(uri) ((uri)->string) + +/* + * Parse urls inside text + * @param pool memory pool + * @param task task object + * @param part current text part + * @param is_html turn on html euristic + */ +void url_parse_text (rspamd_mempool_t *pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html); + +/* + * Parse a single url into an uri structure + * @param pool memory pool + * @param uristring text form of url + * @param uri url object, must be pre allocated + */ +enum uri_errno parse_uri(struct uri *uri, gchar *uristring, rspamd_mempool_t *pool); + +/* + * Try to extract url from a text + * @param pool memory pool + * @param begin begin of text + * @param len length of text + * @param start storage for start position of url found (or NULL) + * @param end storage for end position of url found (or NULL) + * @param url_str storage for url string(or NULL) + * @return TRUE if url is found in specified text + */ +gboolean url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str, gboolean is_html); + +/* + * Return text representation of url parsing error + */ +const gchar* url_strerror (enum uri_errno err); + +#endif diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt new file mode 100644 index 000000000..2a5ab46c5 --- /dev/null +++ b/src/libutil/CMakeLists.txt @@ -0,0 +1,50 @@ +# Librspamd-util +SET(LIBRSPAMDUTILSRC aio_event.c + bloom.c + diff.c + fstring.c + fuzzy.c + hash.c + http.c + logger.c + map.c + memcached.c + mem_pool.c + printf.c + radix.c + rrd.c + trie.c + upstream.c + util.c) +# Rspamdutil +ADD_LIBRARY(rspamd-util ${LINK_TYPE} ${LIBRSPAMDUTILSRC}) +IF(CMAKE_COMPILER_IS_GNUCC) +SET_TARGET_PROPERTIES(rspamd-util PROPERTIES COMPILE_FLAGS "-fno-strict-aliasing") +ENDIF(CMAKE_COMPILER_IS_GNUCC) + + +TARGET_LINK_LIBRARIES(rspamd-util ${RSPAMD_REQUIRED_LIBRARIES}) +TARGET_LINK_LIBRARIES(rspamd-util pcre) +TARGET_LINK_LIBRARIES(rspamd-util ucl) +TARGET_LINK_LIBRARIES(rspamd-util ottery) +TARGET_LINK_LIBRARIES(rspamd-util rspamd-http-parser) +TARGET_LINK_LIBRARIES(rspamd-util event) +TARGET_LINK_LIBRARIES(rspamd-util xxhash) +IF(OPENSSL_FOUND) + TARGET_LINK_LIBRARIES(rspamd-util ${OPENSSL_LIBRARIES}) +ENDIF(OPENSSL_FOUND) + +IF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-util PROPERTIES VERSION ${RSPAMD_VERSION}) +ENDIF(NOT DEBIAN_BUILD) + +IF(GLIB_COMPAT) + INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/contrib/lgpl") + TARGET_LINK_LIBRARIES(rspamd-util glibadditions) +ENDIF(GLIB_COMPAT) + +IF(NO_SHARED MATCHES "OFF") + INSTALL(TARGETS rspamd-util + LIBRARY DESTINATION ${LIBDIR} + PUBLIC_HEADER DESTINATION include) +ENDIF(NO_SHARED MATCHES "OFF") \ No newline at end of file diff --git a/src/libutil/aio_event.c b/src/libutil/aio_event.c new file mode 100644 index 000000000..ccda37083 --- /dev/null +++ b/src/libutil/aio_event.c @@ -0,0 +1,487 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "aio_event.h" +#include "main.h" + +#ifdef HAVE_SYS_EVENTFD_H +#include +#endif + +#ifdef HAVE_AIO_H +#include +#endif + +/* Linux syscall numbers */ +#if defined(__i386__) +# define SYS_io_setup 245 +# define SYS_io_destroy 246 +# define SYS_io_getevents 247 +# define SYS_io_submit 248 +# define SYS_io_cancel 249 +#elif defined(__x86_64__) +# define SYS_io_setup 206 +# define SYS_io_destroy 207 +# define SYS_io_getevents 208 +# define SYS_io_submit 209 +# define SYS_io_cancel 210 +#else +# warning "aio is not supported on this platform, please contact author for details" +# define SYS_io_setup 0 +# define SYS_io_destroy 0 +# define SYS_io_getevents 0 +# define SYS_io_submit 0 +# define SYS_io_cancel 0 +#endif + +#define SYS_eventfd 323 +#define MAX_AIO_EV 64 + +struct io_cbdata { + gint fd; + rspamd_aio_cb cb; + guint64 len; + gpointer buf; + gpointer io_buf; + gpointer ud; +}; + +#ifdef LINUX + +/* Linux specific mappings and utilities to avoid using of libaio */ + +typedef unsigned long aio_context_t; + +typedef enum io_iocb_cmd { + IO_CMD_PREAD = 0, + IO_CMD_PWRITE = 1, + + IO_CMD_FSYNC = 2, + IO_CMD_FDSYNC = 3, + + IO_CMD_POLL = 5, + IO_CMD_NOOP = 6, +} io_iocb_cmd_t; + +#if defined(__LITTLE_ENDIAN) +#define PADDED(x,y) x, y +#elif defined(__BIG_ENDIAN) +#define PADDED(x,y) y, x +#else +#error edit for your odd byteorder. +#endif + +/* + * we always use a 64bit off_t when communicating + * with userland. its up to libraries to do the + * proper padding and aio_error abstraction + */ + +struct iocb { + /* these are internal to the kernel/libc. */ + guint64 aio_data; /* data to be returned in event's data */ + guint32 PADDED(aio_key, aio_reserved1); + /* the kernel sets aio_key to the req # */ + + /* common fields */ + guint16 aio_lio_opcode; /* see IOCB_CMD_ above */ + gint16 aio_reqprio; + guint32 aio_fildes; + + guint64 aio_buf; + guint64 aio_nbytes; + gint64 aio_offset; + + /* extra parameters */ + guint64 aio_reserved2; /* TODO: use this for a (struct sigevent *) */ + + /* flags for the "struct iocb" */ + guint32 aio_flags; + + /* + * if the IOCB_FLAG_RESFD flag of "aio_flags" is set, this is an + * eventfd to signal AIO readiness to + */ + guint32 aio_resfd; +}; + +struct io_event { + guint64 data; /* the data field from the iocb */ + guint64 obj; /* what iocb this event came from */ + gint64 res; /* result code for this event */ + gint64 res2; /* secondary result */ +}; + +/* Linux specific io calls */ +static int +io_setup (guint nr_reqs, aio_context_t *ctx) +{ + return syscall (SYS_io_setup, nr_reqs, ctx); +} + +static int +io_destroy (aio_context_t ctx) +{ + return syscall (SYS_io_destroy, ctx); +} + +static int +io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *events, struct timespec *tmo) +{ + return syscall (SYS_io_getevents, ctx, min_nr, nr, events, tmo); +} + +static int +io_submit (aio_context_t ctx, long n, struct iocb **paiocb) +{ + return syscall (SYS_io_submit, ctx, n, paiocb); +} + +static int +io_cancel (aio_context_t ctx, struct iocb *iocb, struct io_event *result) +{ + return syscall (SYS_io_cancel, ctx, iocb, result); +} + +# ifndef HAVE_SYS_EVENTFD_H +static int +eventfd (guint initval, guint flags) +{ + return syscall (SYS_eventfd, initval); +} +# endif + +#endif + +/** + * AIO context + */ +struct aio_context { + struct event_base *base; + gboolean has_aio; /**< Whether we have aio support on a system */ +#ifdef LINUX + /* Eventfd variant */ + gint event_fd; + struct event eventfd_ev; + aio_context_t io_ctx; +#elif defined(HAVE_AIO_H) + /* POSIX aio */ + struct event rtsigs[128]; +#endif +}; + +#ifdef LINUX +/* Eventfd read callback */ +static void +rspamd_eventfdcb (gint fd, gshort what, gpointer ud) +{ + struct aio_context *ctx = ud; + guint64 ready; + gint done, i; + struct io_event event[32]; + struct timespec ts; + struct io_cbdata *ev_data; + + /* Eventfd returns number of events ready got from kernel */ + if (read (fd, &ready, 8) != 8) { + if (errno == EAGAIN) { + return; + } + msg_err ("eventfd read returned error: %s", strerror (errno)); + } + + ts.tv_sec = 0; + ts.tv_nsec = 0; + + while (ready) { + /* Get events ready */ + done = io_getevents (ctx->io_ctx, 1, 32, event, &ts); + + if (done > 0) { + ready -= done; + + for (i = 0; i < done; i ++) { + ev_data = (struct io_cbdata *) (uintptr_t) event[i].data; + /* Call this callback */ + ev_data->cb (ev_data->fd, event[i].res, ev_data->len, ev_data->buf, ev_data->ud); + if (ev_data->io_buf) { + free (ev_data->io_buf); + } + g_slice_free1 (sizeof (struct io_cbdata), ev_data); + } + } + else if (done == 0) { + /* No more events are ready */ + return; + } + else { + msg_err ("io_getevents failed: %s", strerror (errno)); + return; + } + } +} + +#endif + +/** + * Initialize aio with specified event base + */ +struct aio_context* +rspamd_aio_init (struct event_base *base) +{ + struct aio_context *new; + + /* First of all we need to detect which type of aio we can try to use */ + new = g_malloc0 (sizeof (struct aio_context)); + new->base = base; + +#ifdef LINUX + /* On linux we are trying to use io (3) and eventfd for notifying */ + new->event_fd = eventfd (0, 0); + if (new->event_fd == -1) { + msg_err ("eventfd failed: %s", strerror (errno)); + } + else { + /* Set this socket non-blocking */ + if (make_socket_nonblocking (new->event_fd) == -1) { + msg_err ("non blocking for eventfd failed: %s", strerror (errno)); + close (new->event_fd); + } + else { + event_set (&new->eventfd_ev, new->event_fd, EV_READ|EV_PERSIST, rspamd_eventfdcb, new); + event_base_set (new->base, &new->eventfd_ev); + event_add (&new->eventfd_ev, NULL); + if (io_setup (MAX_AIO_EV, &new->io_ctx) == -1) { + msg_err ("io_setup failed: %s", strerror (errno)); + close (new->event_fd); + } + else { + new->has_aio = TRUE; + } + } + } +#elif defined(HAVE_AIO_H) + /* TODO: implement this */ +#endif + + return new; +} + +/** + * Open file for aio + */ +gint +rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags) +{ + gint fd = -1; + /* Fallback */ + if (!ctx->has_aio) { + return open (path, flags); + } +#ifdef LINUX + + fd = open (path, flags | O_DIRECT); + + return fd; +#elif defined(HAVE_AIO_H) + fd = open (path, flags); +#endif + + return fd; +} + +/** + * Asynchronous read of file + */ +gint +rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud) +{ + struct io_cbdata *cbdata; + gint r = -1; + + if (ctx->has_aio) { +#ifdef LINUX + struct iocb *iocb[1]; + + cbdata = g_slice_alloc (sizeof (struct io_cbdata)); + cbdata->cb = cb; + cbdata->buf = buf; + cbdata->len = len; + cbdata->ud = ud; + cbdata->fd = fd; + cbdata->io_buf = NULL; + + iocb[0] = alloca (sizeof (struct iocb)); + memset (iocb[0], 0, sizeof (struct iocb)); + iocb[0]->aio_fildes = fd; + iocb[0]->aio_lio_opcode = IO_CMD_PREAD; + iocb[0]->aio_reqprio = 0; + iocb[0]->aio_buf = (guint64)((uintptr_t)buf); + iocb[0]->aio_nbytes = len; + iocb[0]->aio_offset = offset; + iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */; + iocb[0]->aio_resfd = ctx->event_fd; + iocb[0]->aio_data = (guint64)((uintptr_t)cbdata); + + /* Iocb is copied to kernel internally, so it is safe to put it on stack */ + if (io_submit (ctx->io_ctx, 1, iocb) == 1) { + return len; + } + else { + if (errno == EAGAIN || errno == ENOSYS) { + /* Fall back to sync read */ + goto blocking; + } + return -1; + } + +#elif defined(HAVE_AIO_H) +#endif + } + else { + /* Blocking variant */ +blocking: +#ifdef _LARGEFILE64_SOURCE + r = lseek64 (fd, offset, SEEK_SET); +#else + r = lseek (fd, offset, SEEK_SET); +#endif + if (r > 0) { + r = read (fd, buf, len); + if (r >= 0) { + cb (fd, 0, r, buf, ud); + } + else { + cb (fd, r, -1, buf, ud); + } + } + } + + return r; +} + +/** + * Asynchronous write of file + */ +gint +rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset, struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud) +{ + struct io_cbdata *cbdata; + gint r = -1; + + if (ctx->has_aio) { +#ifdef LINUX + struct iocb *iocb[1]; + + cbdata = g_slice_alloc (sizeof (struct io_cbdata)); + cbdata->cb = cb; + cbdata->buf = buf; + cbdata->len = len; + cbdata->ud = ud; + cbdata->fd = fd; + /* We need to align pointer on boundary of 512 bytes here */ + if (posix_memalign (&cbdata->io_buf, 512, len) != 0) { + return -1; + } + memcpy (cbdata->io_buf, buf, len); + + iocb[0] = alloca (sizeof (struct iocb)); + memset (iocb[0], 0, sizeof (struct iocb)); + iocb[0]->aio_fildes = fd; + iocb[0]->aio_lio_opcode = IO_CMD_PWRITE; + iocb[0]->aio_reqprio = 0; + iocb[0]->aio_buf = (guint64)((uintptr_t)cbdata->io_buf); + iocb[0]->aio_nbytes = len; + iocb[0]->aio_offset = offset; + iocb[0]->aio_flags |= (1 << 0) /* IOCB_FLAG_RESFD */; + iocb[0]->aio_resfd = ctx->event_fd; + iocb[0]->aio_data = (guint64)((uintptr_t)cbdata); + + /* Iocb is copied to kernel internally, so it is safe to put it on stack */ + if (io_submit (ctx->io_ctx, 1, iocb) == 1) { + return len; + } + else { + if (errno == EAGAIN || errno == ENOSYS) { + /* Fall back to sync read */ + goto blocking; + } + return -1; + } + +#elif defined(HAVE_AIO_H) +#endif + } + else { + /* Blocking variant */ +blocking: +#ifdef _LARGEFILE64_SOURCE + r = lseek64 (fd, offset, SEEK_SET); +#else + r = lseek (fd, offset, SEEK_SET); +#endif + if (r > 0) { + r = write (fd, buf, len); + if (r >= 0) { + cb (fd, 0, r, buf, ud); + } + else { + cb (fd, r, -1, buf, ud); + } + } + } + + return r; +} + +/** + * Close of aio operations + */ +gint +rspamd_aio_close (gint fd, struct aio_context *ctx) +{ + gint r = -1; + + if (ctx->has_aio) { +#ifdef LINUX + struct iocb iocb; + struct io_event ev; + + memset (&iocb, 0, sizeof (struct iocb)); + iocb.aio_fildes = fd; + iocb.aio_lio_opcode = IO_CMD_NOOP; + + /* Iocb is copied to kernel internally, so it is safe to put it on stack */ + r = io_cancel (ctx->io_ctx, &iocb, &ev); + close (fd); + return r; + +#elif defined(HAVE_AIO_H) +#endif + } + + r = close (fd); + + return r; +} diff --git a/src/libutil/aio_event.h b/src/libutil/aio_event.h new file mode 100644 index 000000000..45f6015de --- /dev/null +++ b/src/libutil/aio_event.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef AIO_EVENT_H_ +#define AIO_EVENT_H_ + +#include "config.h" + +/** + * AIO context + */ +struct aio_context; + +/** + * Callback for notifying + */ +typedef void (*rspamd_aio_cb) (gint fd, gint res, guint64 len, gpointer data, gpointer ud); + +/** + * Initialize aio with specified event base + */ +struct aio_context* rspamd_aio_init (struct event_base *base); + +/** + * Open file for aio + */ +gint rspamd_aio_open (struct aio_context *ctx, const gchar *path, int flags); + +/** + * Asynchronous read of file + */ +gint rspamd_aio_read (gint fd, gpointer buf, guint64 len, guint64 offset, + struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud); + +/** + * Asynchronous write of file + */ +gint rspamd_aio_write (gint fd, gpointer buf, guint64 len, guint64 offset, + struct aio_context *ctx, rspamd_aio_cb cb, gpointer ud); + +/** + * Close of aio operations + */ +gint rspamd_aio_close (gint fd, struct aio_context *ctx); + +#endif /* AIO_EVENT_H_ */ diff --git a/src/libutil/bloom.c b/src/libutil/bloom.c new file mode 100644 index 000000000..f857d2e49 --- /dev/null +++ b/src/libutil/bloom.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "bloom.h" +#include "xxhash.h" + +/* 4 bits are used for counting (implementing delete operation) */ +#define SIZE_BIT 4 + +/* These macroes are for 4 bits for counting element */ +#define INCBIT(a, n, acc) do { \ + acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \ + acc ++; \ + acc &= 0xF; \ + \ + a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \ + a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \ +} while (0); + +#define DECBIT(a, n, acc) do { \ + acc = a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT / SIZE_BIT) * SIZE_BIT)); \ + acc --; \ + acc &= 0xF; \ + \ + a[n * SIZE_BIT / CHAR_BIT] &= (0xF << (4 - (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))); \ + a[n * SIZE_BIT / CHAR_BIT] |= (acc << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT)); \ +} while (0); + +#define GETBIT(a, n) (a[n * SIZE_BIT / CHAR_BIT] & (0xF << (n % (CHAR_BIT/SIZE_BIT) * SIZE_BIT))) + +/* Common hash functions */ + + +rspamd_bloom_filter_t * +rspamd_bloom_create (size_t size, size_t nfuncs, ...) +{ + rspamd_bloom_filter_t *bloom; + va_list l; + gsize n; + + if (!(bloom = g_malloc (sizeof (rspamd_bloom_filter_t)))) { + return NULL; + } + if (!(bloom->a = g_new0 (gchar, (size + CHAR_BIT - 1) / CHAR_BIT * SIZE_BIT))) { + g_free (bloom); + return NULL; + } + if (!(bloom->seeds = g_new0 (guint32, nfuncs))) { + g_free (bloom->a); + g_free (bloom); + return NULL; + } + + va_start (l, nfuncs); + for (n = 0; n < nfuncs; ++n) { + bloom->seeds[n] = va_arg (l, guint32); + } + va_end (l); + + bloom->nfuncs = nfuncs; + bloom->asize = size; + + return bloom; +} + +void +rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom) +{ + g_free (bloom->a); + g_free (bloom->seeds); + g_free (bloom); +} + +gboolean +rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s) +{ + size_t n, len; + u_char t; + guint v; + + if (s == NULL) { + return FALSE; + } + len = strlen (s); + for (n = 0; n < bloom->nfuncs; ++n) { + v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; + INCBIT (bloom->a, v, t); + } + + return TRUE; +} + +gboolean +rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s) +{ + size_t n, len; + u_char t; + guint v; + + if (s == NULL) { + return FALSE; + } + len = strlen (s); + for (n = 0; n < bloom->nfuncs; ++n) { + v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; + DECBIT (bloom->a, v, t); + } + + return TRUE; + +} + +gboolean +rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s) +{ + size_t n, len; + guint v; + + if (s == NULL) { + return FALSE; + } + len = strlen (s); + for (n = 0; n < bloom->nfuncs; ++n) { + v = XXH32 (s, len, bloom->seeds[n]) % bloom->asize; + if (!(GETBIT (bloom->a, v))) { + return FALSE; + } + } + + return TRUE; +} diff --git a/src/libutil/bloom.h b/src/libutil/bloom.h new file mode 100644 index 000000000..380143c80 --- /dev/null +++ b/src/libutil/bloom.h @@ -0,0 +1,48 @@ +#ifndef __RSPAMD_BLOOM_H__ +#define __RSPAMD_BLOOM_H__ + +#include "config.h" + +typedef struct rspamd_bloom_filter_s { + size_t asize; + gchar *a; + size_t nfuncs; + guint32 *seeds; +} rspamd_bloom_filter_t; + + +/* + * Some random uint32 seeds for hashing + */ +#define RSPAMD_DEFAULT_BLOOM_HASHES 8, 0x61782caaU, 0x79ab8141U, 0xe45ee2d1U, \ + 0xf97542d1U, 0x1e2623edU, 0xf5a23cfeU, 0xa41b2508U, 0x85abdce8U + +/* + * Create new bloom filter + * @param size length of bloom buffer + * @param nfuncs number of hash functions + * @param ... hash functions list + */ +rspamd_bloom_filter_t* rspamd_bloom_create (size_t size, size_t nfuncs, ...); + +/* + * Destroy bloom filter + */ +void rspamd_bloom_destroy (rspamd_bloom_filter_t * bloom); + +/* + * Add a string to bloom filter + */ +gboolean rspamd_bloom_add (rspamd_bloom_filter_t * bloom, const gchar *s); + +/* + * Delete a string from bloom filter + */ +gboolean rspamd_bloom_del (rspamd_bloom_filter_t * bloom, const gchar *s); + +/* + * Check whether this string is in bloom filter (algorithm produces FALSE-POSITIVES, so result must be checked if it is positive) + */ +gboolean rspamd_bloom_check (rspamd_bloom_filter_t * bloom, const gchar *s); + +#endif diff --git a/src/libutil/diff.c b/src/libutil/diff.c new file mode 100644 index 000000000..4038d8680 --- /dev/null +++ b/src/libutil/diff.c @@ -0,0 +1,445 @@ +/* diff - compute a shortest edit script (SES) given two sequences + * Copyright (c) 2004 Michael B. Allen + * Copyright (c) 2010-2014, Vsevolod Stakhov + * + * The MIT License + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* This algorithm is basically Myers' solution to SES/LCS with + * the Hirschberg linear space refinement as described in the + * following publication: + * + * E. Myers, ``An O(ND) Difference Algorithm and Its Variations,'' + * Algorithmica 1, 2 (1986), 251-266. + * http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps + * + * This is the same algorithm used by GNU diff(1). + */ + + +#include "config.h" +#include "diff.h" + + +#define FV(k) _v(ctx, (k), 0) +#define RV(k) _v(ctx, (k), 1) + +#define MAX_DIFF 1024 + +struct _ctx +{ + GArray *buf; + GArray *ses; + gint si; + gint dmax; +}; + +struct middle_snake +{ + gint x, y, u, v; +}; + +static +void maybe_resize_array(GArray *arr, guint k) +{ + if (k > arr->len) { + g_array_set_size (arr, k); + } + +} + +static void +_setv(struct _ctx *ctx, gint k, gint r, gint val) +{ + gint j; + gint *i; + /* Pack -N to N ginto 0 to N * 2 + */ + j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2); + + maybe_resize_array (ctx->buf, j); + i = (gint *) &g_array_index (ctx->buf, gint, j); + *i = val; +} + +static gint +_v(struct _ctx *ctx, gint k, gint r) +{ + gint j; + + j = k <= 0 ? -k * 4 + r : k * 4 + (r - 2); + + return *((gint *) &g_array_index (ctx->buf, gint, j)); +} + +static gint +_find_middle_snake(const void *a, gint aoff, gint n, const void *b, + gint boff, gint m, struct _ctx *ctx, struct middle_snake *ms) +{ + gint delta, odd, mid, d; + + delta = n - m; + odd = delta & 1; + mid = (n + m) / 2; + mid += odd; + + _setv (ctx, 1, 0, 0); + _setv (ctx, delta - 1, 1, n); + + for (d = 0; d <= mid; d++) { + gint k, x, y; + + if ((2 * d - 1) >= ctx->dmax) { + return ctx->dmax; + } + + for (k = d; k >= -d; k -= 2) { + if (k == -d || (k != d && FV(k - 1) < FV(k + 1))) { + x = FV(k + 1); + } + else { + x = FV(k - 1) + 1; + } + y = x - k; + + ms->x = x; + ms->y = y; + const guchar *a0 = (const guchar *) a + aoff; + const guchar *b0 = (const guchar *) b + boff; + while (x < n && y < m && a0[x] == b0[y]) { + x++; + y++; + } + _setv (ctx, k, 0, x); + + if (odd && k >= (delta - (d - 1)) && k <= (delta + (d - 1))) { + if (x >= RV(k)) { + ms->u = x; + ms->v = y; + return 2 * d - 1; + } + } + } + for (k = d; k >= -d; k -= 2) { + gint kr = (n - m) + k; + + if (k == d || (k != -d && RV(kr - 1) < RV(kr + 1))) { + x = RV(kr - 1); + } + else { + x = RV(kr + 1) - 1; + } + y = x - kr; + + ms->u = x; + ms->v = y; + const guchar *a0 = (const guchar *) a + aoff; + const guchar *b0 = (const guchar *) b + boff; + while (x > 0 && y > 0 && a0[x - 1] == b0[y - 1]) { + x--; + y--; + } + _setv (ctx, kr, 1, x); + + if (!odd && kr >= -d && kr <= d) { + if (x <= FV(kr)) { + ms->x = x; + ms->y = y; + return 2 * d; + } + } + } + } + + errno = EFAULT; + + return -1; +} + +static void +_edit(struct _ctx *ctx, gint op, gint off, gint len) +{ + struct diff_edit *e = NULL, newe; + + if (len == 0 || ctx->ses == NULL) { + return; + } + /* + * Add an edit to the SES (or + * coalesce if the op is the same) + */ + if (ctx->ses->len != 0) { + e = &g_array_index (ctx->ses, struct diff_edit, ctx->ses->len - 1); + } + if (e == NULL || e->op != op) { + newe.op = op; + newe.off = off; + newe.len = len; + g_array_append_val (ctx->ses, newe); + } + else { + e->len += len; + } +} + +static gint +_ses(const void *a, gint aoff, gint n, const void *b, gint boff, + gint m, struct _ctx *ctx) +{ + struct middle_snake ms = { + .x = 0, + .y = 0, + .u = 0, + .v = 0 + }; + gint d; + + if (n == 0) { + _edit (ctx, DIFF_INSERT, boff, m); + d = m; + } + else if (m == 0) { + _edit (ctx, DIFF_DELETE, aoff, n); + d = n; + } + else { + /* Find the middle "snake" around which we + * recursively solve the sub-problems. + */ + d = _find_middle_snake (a, aoff, n, b, boff, m, ctx, &ms); + if (d == -1) { + return -1; + } + else if (d >= ctx->dmax) { + return ctx->dmax; + } + else if (ctx->ses == NULL) { + return d; + } + else if (d > 1) { + if (_ses (a, aoff, ms.x, b, boff, ms.y, ctx) == -1) { + return -1; + } + + _edit (ctx, DIFF_MATCH, aoff + ms.x, ms.u - ms.x); + + aoff += ms.u; + boff += ms.v; + n -= ms.u; + m -= ms.v; + if (_ses (a, aoff, n, b, boff, m, ctx) == -1) { + return -1; + } + } + else { + gint x = ms.x; + gint u = ms.u; + + /* There are only 4 base cases when the + * edit distance is 1. + * + * n > m m > n + * + * - | + * \ \ x != u + * \ \ + * + * \ \ + * \ \ x == u + * - | + */ + + if (m > n) { + if (x == u) { + _edit (ctx, DIFF_MATCH, aoff, n); + _edit (ctx, DIFF_INSERT, boff + (m - 1), 1); + } + else { + _edit (ctx, DIFF_INSERT, boff, 1); + _edit (ctx, DIFF_MATCH, aoff, n); + } + } + else { + if (x == u) { + _edit (ctx, DIFF_MATCH, aoff, m); + _edit (ctx, DIFF_DELETE, aoff + (n - 1), 1); + } + else { + _edit (ctx, DIFF_DELETE, aoff, 1); + _edit (ctx, DIFF_MATCH, aoff + 1, m); + } + } + } + } + + return d; +} + +gint +rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m, + gint dmax, GArray *ses, gint *sn) +{ + struct _ctx ctx; + gint d, x, y; + struct diff_edit *e = NULL; + GArray *tmp; + + tmp = g_array_sized_new (FALSE, TRUE, sizeof(gint), dmax); + ctx.buf = tmp; + ctx.ses = ses; + ctx.si = 0; + ctx.dmax = dmax; + + /* The _ses function assumes the SES will begin or end with a delete + * or insert. The following will insure this is true by eating any + * beginning matches. This is also a quick to process sequences + * that match entirely. + */ + x = y = 0; + const guchar *a0 = (const guchar *) a + aoff; + const guchar *b0 = (const guchar *) b + boff; + while (x < n && y < m && a0[x] == b0[y]) { + x++; + y++; + } + _edit (&ctx, DIFF_MATCH, aoff, x); + + if ((d = _ses (a, aoff + x, n - x, b, boff + y, m - y, &ctx)) == -1) { + g_array_free (tmp, TRUE); + return -1; + } + if (ses && sn && e) { + *sn = e->op ? ctx.si + 1 : 0; + } + + g_array_free (tmp, TRUE); + return d; +} + +static guint32 +compare_diff_distance_unnormalized (f_str_t *s1, f_str_t *s2) +{ + GArray *ses; + struct diff_edit *e; + guint i; + guint32 distance = 0; + + ses = g_array_sized_new (FALSE, TRUE, sizeof (struct diff_edit), MAX_DIFF); + + if (rspamd_diff (s1->begin, 0, s1->len, + s2->begin, 0, s2->len, MAX_DIFF, ses, NULL) == -1) { + /* Diff failed, strings are different */ + g_array_free (ses, TRUE); + return 0; + } + + for (i = 0; i < ses->len; i ++) { + e = &g_array_index(ses, struct diff_edit, i); + if (e->op != DIFF_MATCH) { + distance += e->len; + } + } + + g_array_free (ses, TRUE); + + return distance; +} + +guint32 +compare_diff_distance (f_str_t *s1, f_str_t *s2) +{ + + return 100 - (2 * compare_diff_distance_unnormalized (s1, s2) * 100) / (s1->len + s2->len); +} + + +guint32 +compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2) +{ + gchar b1[BUFSIZ], b2[BUFSIZ], *t, *h, *p1, *p2; + gsize r1, r2; + f_str_t t1, t2; + guint32 cur_diff = 0; + + r1 = s1->len; + r2 = s2->len; + p1 = s1->begin; + p2 = s2->begin; + + while (r1 > 0 && r2 > 0) { + /* Copy strings to the buffer normalized */ + h = p1; + t = b1; + + /* The first string */ + while (r1 > 0 && t - b1 < (gint)sizeof (b1)) { + if (!g_ascii_isspace (*h)) { + *t++ = g_ascii_tolower (*h); + } + h ++; + p1 ++; + r1 --; + } + + t1.begin = b1; + t1.len = t - b1; + + /* The second string */ + h = p2; + t = b2; + while (r2 > 0 && t - b2 < (gint)sizeof (b2)) { + if (!g_ascii_isspace (*h)) { + *t++ = g_ascii_tolower (*h); + } + h ++; + p2 ++; + r2 --; + } + + t2.begin = b2; + t2.len = t - b2; + + cur_diff += compare_diff_distance_unnormalized (&t1, &t2); + } + + if (r1 > 0) { + h = p1; + while (r1 > 0) { + if (!g_ascii_isspace (*h)) { + cur_diff ++; + } + r1 --; + h ++; + } + } + else if (r2 > 0) { + h = p2; + while (r2 > 0) { + if (!g_ascii_isspace (*h)) { + cur_diff ++; + } + r2 --; + h ++; + } + } + + return 100 - (2 * cur_diff * 100) / (s1->len + s2->len); +} diff --git a/src/libutil/diff.h b/src/libutil/diff.h new file mode 100644 index 000000000..cea5e5d4a --- /dev/null +++ b/src/libutil/diff.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef DIFF_H_ +#define DIFF_H_ + +#include "config.h" +#include "fstring.h" + +typedef enum +{ + DIFF_MATCH = 1, + DIFF_DELETE, + DIFF_INSERT +} diff_op; + +struct diff_edit +{ + gshort op; + gint off; /* off ginto s1 if MATCH or DELETE but s2 if INSERT */ + gint len; +}; + +/* + * Calculate difference between two strings using diff algorithm + * @param a the first line begin + * @param aoff the first line offset + * @param n the first line length + * @param b the second line begin + * @param boff the second line offset + * @param b the second line length + * @param dmax maximum differences number + * @param ses here would be stored the shortest script to transform a to b + * @param sn here would be stored a number of differences between a and b + * @return distance between strings or -1 in case of error + */ +gint rspamd_diff(const void *a, gint aoff, gint n, const void *b, gint boff, gint m, + gint dmax, GArray *ses, gint *sn); + +/* + * Calculate distance between two strings (in percentage) using diff algorithm. + * @return 100 in case of identical strings and 0 in case of totally different strings. + */ +guint32 compare_diff_distance (f_str_t *s1, f_str_t *s2); + +/* + * Calculate distance between two strings (in percentage) using diff algorithm. Strings are normalized before: + * all spaces are removed and all characters are lowercased. + * @return 100 in case of identical strings and 0 in case of totally different strings. +*/ +guint32 compare_diff_distance_normalized (f_str_t *s1, f_str_t *s2); + +#endif /* DIFF_H_ */ diff --git a/src/libutil/fstring.c b/src/libutil/fstring.c new file mode 100644 index 000000000..098824101 --- /dev/null +++ b/src/libutil/fstring.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "fstring.h" + +/* + * Search first occurence of character in string + */ +ssize_t +fstrchr (f_str_t * src, gchar c) +{ + register size_t cur = 0; + + while (cur < src->len) { + if (*(src->begin + cur) == c) { + return cur; + } + cur++; + } + + return -1; +} + +/* + * Search last occurence of character in string + */ +ssize_t +fstrrchr (f_str_t * src, gchar c) +{ + register ssize_t cur = src->len; + + while (cur > 0) { + if (*(src->begin + cur) == c) { + return cur; + } + cur--; + } + + return -1; +} + +/* + * Search for pattern in orig + */ +ssize_t +fstrstr (f_str_t * orig, f_str_t * pattern) +{ + register size_t cur = 0, pcur = 0; + + if (pattern->len > orig->len) { + return -1; + } + + while (cur < orig->len) { + if (*(orig->begin + cur) == *pattern->begin) { + while (cur < orig->len && pcur < pattern->len) { + if (*(orig->begin + cur) != *(pattern->begin + pcur)) { + pcur = 0; + break; + } + cur++; + pcur++; + } + return cur - pattern->len; + } + cur++; + } + + return -1; + +} + +/* + * Search for pattern in orig ignoring case + */ +ssize_t +fstrstri (f_str_t * orig, f_str_t * pattern) +{ + register size_t cur = 0, pcur = 0; + + if (pattern->len > orig->len) { + return -1; + } + + while (cur < orig->len) { + if (g_ascii_tolower (*(orig->begin + cur)) == g_ascii_tolower (*pattern->begin)) { + while (cur < orig->len && pcur < pattern->len) { + if (g_ascii_tolower (*(orig->begin + cur)) != g_ascii_tolower (*(pattern->begin + pcur))) { + pcur = 0; + break; + } + cur++; + pcur++; + } + return cur - pattern->len; + } + cur++; + } + + return -1; + +} + +/* + * Split string by tokens + * word contains parsed word + * + * Return: -1 - no new words can be extracted + * 1 - word was extracted and there are more words + * 0 - last word extracted + */ +gint +fstrtok (f_str_t * text, const gchar *sep, f_tok_t * state) +{ + register size_t cur; + const gchar *csep = sep; + + if (state->pos >= text->len) { + return -1; + } + + cur = state->pos; + + while (cur < text->len) { + while (*csep) { + if (*(text->begin + cur) == *csep) { + state->word.begin = (text->begin + state->pos); + state->word.len = cur - state->pos; + state->pos = cur + 1; + return 1; + } + csep++; + } + csep = sep; + cur++; + } + + /* Last word */ + state->word.begin = (text->begin + state->pos); + state->word.len = cur - state->pos; + state->pos = cur; + + return 0; +} + +/* + * Copy one string into other + */ +size_t +fstrcpy (f_str_t * dest, f_str_t * src) +{ + register size_t cur = 0; + + if (dest->size < src->len) { + return 0; + } + + while (cur < src->len && cur < dest->size) { + *(dest->begin + cur) = *(src->begin + cur); + cur++; + } + + return cur; +} + +/* + * Concatenate two strings + */ +size_t +fstrcat (f_str_t * dest, f_str_t * src) +{ + register size_t cur = 0; + gchar *p = dest->begin + dest->len; + + if (dest->size < src->len + dest->len) { + return 0; + } + + while (cur < src->len) { + *p = *(src->begin + cur); + p++; + cur++; + } + + dest->len += src->len; + + return cur; + +} + +/* + * Make copy of string to 0-terminated string + */ +gchar * +fstrcstr (f_str_t * str, rspamd_mempool_t * pool) +{ + gchar *res; + res = rspamd_mempool_alloc (pool, str->len + 1); + + /* Do not allow multiply \0 characters */ + memccpy (res, str->begin, '\0', str->len); + res[str->len] = 0; + + return res; +} + +/* + * Push one character to fstr + */ +gint +fstrpush (f_str_t * dest, gchar c) +{ + if (dest->size < dest->len) { + /* Need to reallocate string */ + return 0; + } + + *(dest->begin + dest->len) = c; + dest->len++; + return 1; +} + +/* + * Push one character to fstr + */ +gint +fstrpush_unichar (f_str_t * dest, gunichar c) +{ + int l; + if (dest->size < dest->len) { + /* Need to reallocate string */ + return 0; + } + + l = g_unichar_to_utf8 (c, dest->begin + dest->len); + dest->len += l; + return l; +} + +/* + * Allocate memory for f_str_t + */ +f_str_t * +fstralloc (rspamd_mempool_t * pool, size_t len) +{ + f_str_t *res = rspamd_mempool_alloc (pool, sizeof (f_str_t)); + + res->begin = rspamd_mempool_alloc (pool, len); + + res->size = len; + res->len = 0; + return res; +} + +/* + * Allocate memory for f_str_t from temporary pool + */ +f_str_t * +fstralloc_tmp (rspamd_mempool_t * pool, size_t len) +{ + f_str_t *res = rspamd_mempool_alloc_tmp (pool, sizeof (f_str_t)); + + res->begin = rspamd_mempool_alloc_tmp (pool, len); + + res->size = len; + res->len = 0; + return res; +} + +/* + * Truncate string to its len + */ +f_str_t * +fstrtruncate (rspamd_mempool_t * pool, f_str_t * orig) +{ + f_str_t *res; + + if (orig == NULL || orig->len == 0 || orig->size <= orig->len) { + return orig; + } + + res = fstralloc (pool, orig->len); + if (res == NULL) { + return NULL; + } + fstrcpy (res, orig); + + return res; +} + +/* + * Enlarge string to new size + */ +f_str_t * +fstrgrow (rspamd_mempool_t * pool, f_str_t * orig, size_t newlen) +{ + f_str_t *res; + + if (orig == NULL || orig->len == 0 || orig->size >= newlen) { + return orig; + } + + res = fstralloc (pool, newlen); + if (res == NULL) { + return NULL; + } + fstrcpy (res, orig); + + return res; +} + +static guint32 +fstrhash_c (gchar c, guint32 hval) +{ + guint32 tmp; + /* + * xor in the current byte against each byte of hval + * (which alone gaurantees that every bit of input will have + * an effect on the output) + */ + tmp = c & 0xFF; + tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24); + hval ^= tmp; + + /* add some bits out of the middle as low order bits */ + hval = hval + ((hval >> 12) & 0x0000ffff); + + /* swap most and min significative bytes */ + tmp = (hval << 24) | ((hval >> 24) & 0xff); + /* zero most and min significative bytes of hval */ + hval &= 0x00ffff00; + hval |= tmp; + /* + * rotate hval 3 bits to the left (thereby making the + * 3rd msb of the above mess the hsb of the output hash) + */ + return (hval << 3) + (hval >> 29); +} + +/* + * Return hash value for a string + */ +guint32 +fstrhash (f_str_t * str) +{ + size_t i; + guint32 hval; + gchar *c = str->begin; + + if (str == NULL) { + return 0; + } + hval = str->len; + + for (i = 0; i < str->len; i++, c++) { + hval = fstrhash_c (*c, hval); + } + return hval; +} + +/* + * Return hash value for a string + */ +guint32 +fstrhash_lowercase (f_str_t * str, gboolean is_utf) +{ + gsize i; + guint32 j, hval; + const gchar *p = str->begin, *end = NULL; + gchar t; + gunichar uc; + + if (str == NULL) { + return 0; + } + hval = str->len; + + if (is_utf) { + while (end < str->begin + str->len) { + if (!g_utf8_validate (p, str->len, &end)) { + return fstrhash_lowercase (str, FALSE); + } + while (p < end) { + uc = g_unichar_tolower (g_utf8_get_char (p)); + for (j = 0; j < sizeof (gunichar); j ++) { + t = (uc >> (j * 8)) & 0xff; + if (t != 0) { + hval = fstrhash_c (t, hval); + } + } + p = g_utf8_next_char (p); + } + p = end + 1; + } + + } + else { + for (i = 0; i < str->len; i++, p++) { + hval = fstrhash_c (g_ascii_tolower (*p), hval); + } + } + + return hval; +} + +void +fstrstrip (f_str_t * str) +{ + gchar *p = str->begin; + guint r = 0; + + while (r < str->len) { + if (g_ascii_isspace (*p)) { + p++; + r++; + } + else { + break; + } + } + + if (r > 0) { + memmove (str->begin, p, str->len - r); + str->len -= r; + } + + r = str->len; + p = str->begin + str->len; + while (r > 0) { + if (g_ascii_isspace (*p)) { + p--; + r--; + } + else { + break; + } + } + + str->len = r; +} diff --git a/src/libutil/fstring.h b/src/libutil/fstring.h new file mode 100644 index 000000000..bd680e365 --- /dev/null +++ b/src/libutil/fstring.h @@ -0,0 +1,120 @@ +/* + * Functions for handling with fixed size strings + */ + +#ifndef FSTRING_H +#define FSTRING_H + +#include "config.h" +#include "mem_pool.h" + +#define update_buf_size(x) (x)->free = (x)->buf->size - ((x)->pos - (x)->buf->begin); (x)->buf->len = (x)->pos - (x)->buf->begin + +typedef struct f_str_s { + gchar *begin; + size_t len; + size_t size; +} f_str_t; + +typedef struct f_str_buf_s { + f_str_t *buf; + gchar *pos; + size_t free; +} f_str_buf_t; + +typedef struct f_tok_s { + f_str_t word; + size_t pos; +} f_tok_t; + +/* + * Search first occurence of character in string + */ +ssize_t fstrchr (f_str_t *src, gchar c); + +/* + * Search last occurence of character in string + */ +ssize_t fstrrchr (f_str_t *src, gchar c); + +/* + * Search for pattern in orig + */ +ssize_t fstrstr (f_str_t *orig, f_str_t *pattern); + +/* + * Search for pattern in orig ignoring case + */ +ssize_t fstrstri (f_str_t *orig, f_str_t *pattern); + +/* + * Split string by tokens + * word contains parsed word + */ +gint fstrtok (f_str_t *text, const gchar *sep, f_tok_t *state); + +/* + * Copy one string into other + */ +size_t fstrcpy (f_str_t *dest, f_str_t *src); + +/* + * Concatenate two strings + */ +size_t fstrcat (f_str_t *dest, f_str_t *src); + +/* + * Push one character to fstr + */ +gint fstrpush (f_str_t *dest, gchar c); + +/* + * Push one character to fstr + */ +gint fstrpush_unichar (f_str_t *dest, gunichar c); + +/* + * Allocate memory for f_str_t + */ +f_str_t* fstralloc (rspamd_mempool_t *pool, size_t len); + +/* + * Allocate memory for f_str_t from temporary pool + */ +f_str_t* fstralloc_tmp (rspamd_mempool_t *pool, size_t len); + +/* + * Truncate string to its len + */ +f_str_t* fstrtruncate (rspamd_mempool_t *pool, f_str_t *orig); + +/* + * Enlarge string to new size + */ +f_str_t* fstrgrow (rspamd_mempool_t *pool, f_str_t *orig, size_t newlen); + +/* + * Return specified character + */ +#define fstridx(str, pos) *((str)->begin + (pos)) + +/* + * Return fast hash value for fixed string + */ +guint32 fstrhash (f_str_t *str); + +/* + * Return fast hash value for fixed string converted to lowercase + */ +guint32 fstrhash_lowercase (f_str_t *str, gboolean is_utf); +/* + * Make copy of string to 0-terminated string + */ +gchar* fstrcstr (f_str_t *str, rspamd_mempool_t *pool); + +/* + * Strip fstr string from space symbols + */ +void fstrstrip (f_str_t *str); + +#endif diff --git a/src/libutil/fuzzy.c b/src/libutil/fuzzy.c new file mode 100644 index 000000000..7e8a01ce3 --- /dev/null +++ b/src/libutil/fuzzy.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "fuzzy.h" +#include "message.h" +#include "url.h" +#include "main.h" + +#define ROLL_WINDOW_SIZE 9 +#define MIN_FUZZY_BLOCK_SIZE 3 +#define HASH_INIT 0x28021967 + +static const char *b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +struct roll_state { + guint32 h[3]; + gchar window[ROLL_WINDOW_SIZE]; + gint n; +}; + +static struct roll_state rs; + + +/* Rolling hash function based on Adler-32 checksum */ +static guint32 +fuzzy_roll_hash (guint c) +{ + /* Check window position */ + if (rs.n == ROLL_WINDOW_SIZE) { + rs.n = 0; + } + + rs.h[1] -= rs.h[0]; + rs.h[1] += ROLL_WINDOW_SIZE * c; + + rs.h[0] += c; + rs.h[0] -= rs.window[rs.n]; + + /* Save current symbol */ + rs.window[rs.n] = c; + rs.n++; + + rs.h[2] <<= 5; + rs.h[2] ^= c; + + return rs.h[0] + rs.h[1] + rs.h[2]; +} + +/* A simple non-rolling hash, based on the FNV hash */ +static guint32 +fuzzy_fnv_hash (guint c, guint32 hval) +{ + hval ^= c; + hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24); + return hval; +} + +/* Calculate blocksize depending on length of input */ +static guint32 +fuzzy_blocksize (guint32 len) +{ + guint32 nlen = MIN_FUZZY_BLOCK_SIZE; + + while (nlen * (FUZZY_HASHLEN - 1) < len) { + nlen *= 2; + } + return nlen; +} + + +/* Update hash with new symbol */ +static void +fuzzy_update (fuzzy_hash_t * h, guint c) +{ + h->rh = fuzzy_roll_hash (c); + h->h = fuzzy_fnv_hash (c, h->h); + + if (h->rh % h->block_size == (h->block_size - 1)) { + h->hash_pipe[h->hi] = b64[h->h % 64]; + if (h->hi < FUZZY_HASHLEN - 2) { + h->h = HASH_INIT; + h->hi++; + } + } +} + +static void +fuzzy_update2 (fuzzy_hash_t * h1, fuzzy_hash_t *h2, guint c) +{ + h1->rh = fuzzy_roll_hash (c); + h1->h = fuzzy_fnv_hash (c, h1->h); + h2->rh = h1->rh; + h2->h = fuzzy_fnv_hash (c, h2->h); + + if (h1->rh % h1->block_size == (h1->block_size - 1)) { + h1->hash_pipe[h1->hi] = b64[h1->h % 64]; + if (h1->hi < FUZZY_HASHLEN - 2) { + h1->h = HASH_INIT; + h1->hi++; + } + } + if (h2->rh % h2->block_size == (h2->block_size - 1)) { + h2->hash_pipe[h2->hi] = b64[h2->h % 64]; + if (h2->hi < FUZZY_HASHLEN - 2) { + h2->h = HASH_INIT; + h2->hi++; + } + } +} + +/* + * Levenshtein distance between string1 and string2. + * + * Replace cost is normally 1, and 2 with nonzero xcost. + */ +guint32 +lev_distance (gchar *s1, gint len1, gchar *s2, gint len2) +{ + gint i; + gint *row; /* we only need to keep one row of costs */ + gint *end; + gint half, nx; + gchar *sx, *char2p, char1; + gint *p, D, x, offset, c3; + + /* strip common prefix */ + while (len1 > 0 && len2 > 0 && *s1 == *s2) { + len1--; + len2--; + s1++; + s2++; + } + + /* strip common suffix */ + while (len1 > 0 && len2 > 0 && s1[len1 - 1] == s2[len2 - 1]) { + len1--; + len2--; + } + + /* catch trivial cases */ + if (len1 == 0) { + return len2; + } + + if (len2 == 0) { + return len1; + } + + /* make the inner cycle (i.e. string2) the longer one */ + if (len1 > len2) { + nx = len1; + sx = s1; + len1 = len2; + len2 = nx; + s1 = s2; + s2 = sx; + } + /* check len1 == 1 separately */ + if (len1 == 1) { + return len2 - (memchr (s2, *s1, len2) != NULL); + } + + len1++; + len2++; + half = len1 >> 1; + + /* initalize first row */ + row = g_malloc (len2 * sizeof (gint)); + end = row + len2 - 1; + for (i = 0; i < len2; i++) { + row[i] = i; + } + + /* in this case we don't have to scan two corner triangles (of size len1/2) + * in the matrix because no best path can go throught them. note this + * breaks when len1 == len2 == 2 so the memchr() special case above is + * necessary */ + row[0] = len1 - half - 1; + for (i = 1; i < len1; i++) { + char1 = s1[i - 1]; + /* skip the upper triangle */ + if (i >= len1 - half) { + offset = i - (len1 - half); + char2p = s2 + offset; + p = row + offset; + c3 = *(p++) + (char1 != *(char2p++)); + x = *p; + x++; + D = x; + if (x > c3) + x = c3; + *(p++) = x; + } + else { + p = row + 1; + char2p = s2; + D = x = i; + } + /* skip the lower triangle */ + if (i <= half + 1) + end = row + len2 + i - half - 2; + /* main */ + while (p <= end) { + c3 = --D + (char1 != *(char2p++)); + x++; + if (x > c3) + x = c3; + D = *p; + D++; + if (x > D) + x = D; + *(p++) = x; + } + /* lower triangle sentinel */ + if (i <= half) { + c3 = --D + (char1 != *char2p); + x++; + if (x > c3) + x = c3; + *p = x; + } + } + + i = *end; + g_free (row); + return i; +} + +/* Calculate fuzzy hash for specified string */ +fuzzy_hash_t * +fuzzy_init (f_str_t * in, rspamd_mempool_t * pool) +{ + fuzzy_hash_t *new; + guint i, repeats = 0; + gchar *c = in->begin, last = '\0'; + gsize real_len = 0; + + new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); + bzero (&rs, sizeof (rs)); + for (i = 0; i < in->len; i++) { + if (*c == last) { + repeats++; + } + else { + repeats = 0; + } + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) { + real_len ++; + } + last = *c; + c++; + } + + new->block_size = fuzzy_blocksize (real_len); + c = in->begin; + + for (i = 0; i < in->len; i++) { + if (*c == last) { + repeats++; + } + else { + repeats = 0; + } + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c) && repeats < 3) { + fuzzy_update (new, *c); + } + last = *c; + c++; + } + + /* Check whether we have more bytes in a rolling window */ + if (new->rh != 0) { + new->hash_pipe[new->hi] = b64[new->h % 64]; + } + + return new; +} + +fuzzy_hash_t * +fuzzy_init_byte_array (GByteArray * in, rspamd_mempool_t * pool) +{ + f_str_t f; + + f.begin = (gchar *)in->data; + f.len = in->len; + + return fuzzy_init (&f, pool); +} + +void +fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff) +{ + fuzzy_hash_t *new, *new2; + gchar *c, *end, *begin; + gsize real_len = 0, len = part->content->len; + GList *cur_offset; + struct process_exception *cur_ex = NULL; + gunichar uc; + gboolean write_diff = FALSE; + + cur_offset = part->urls_offset; + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + + begin = (gchar *)part->content->data; + c = begin; + new = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); + new2 = rspamd_mempool_alloc0 (pool, sizeof (fuzzy_hash_t)); + bzero (&rs, sizeof (rs)); + end = c + len; + + if (part->is_utf) { + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + uc = g_utf8_get_char (c); + if (g_unichar_isalnum (uc)) { + real_len ++; + } + c = g_utf8_next_char (c); + } + } + } + else { + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) { + real_len ++; + } + c++; + } + } + } + + write_diff = real_len > 0 && real_len < max_diff; + + if (write_diff) { + part->diff_str = fstralloc (pool, real_len); + } + else { + part->diff_str = NULL; + } + + new->block_size = fuzzy_blocksize (real_len); + new2->block_size = new->block_size * 2; + + cur_offset = part->urls_offset; + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + + begin = (gchar *)part->content->data; + c = begin; + end = c + len; + if (part->is_utf) { + + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + uc = g_utf8_get_char (c); + if (g_unichar_isalnum (uc)) { + fuzzy_update2 (new, new2, uc); + if (write_diff) { + fstrpush_unichar (part->diff_str, uc); + } + } + c = g_utf8_next_char (c); + } + } + } + else { + while (c < end) { + if (cur_ex != NULL && (gint)cur_ex->pos == c - begin) { + c += cur_ex->len + 1; + cur_offset = g_list_next (cur_offset); + if (cur_offset != NULL) { + cur_ex = cur_offset->data; + } + } + else { + if (!g_ascii_isspace (*c) && !g_ascii_ispunct (*c)) { + fuzzy_update2 (new, new2, *c); + if (write_diff) { + fstrpush (part->diff_str, *c); + } + } + c++; + } + } + } + + /* Check whether we have more bytes in a rolling window */ + if (new->rh != 0) { + new->hash_pipe[new->hi] = b64[new->h % 64]; + } + if (new2->rh != 0) { + new2->hash_pipe[new2->hi] = b64[new2->h % 64]; + } + + part->fuzzy = new; + part->double_fuzzy = new2; +} + +/* Compare score of difference between two hashes 0 - different hashes, 100 - identical hashes */ +gint +fuzzy_compare_hashes (fuzzy_hash_t * h1, fuzzy_hash_t * h2) +{ + gint res, l1, l2; + + /* If we have hashes of different size, input strings are too different */ + if (h1->block_size != h2->block_size) { + return 0; + } + + l1 = strlen (h1->hash_pipe); + l2 = strlen (h2->hash_pipe); + + if (l1 == 0 || l2 == 0) { + if (l1 == 0 && l2 == 0) { + return 100; + } + else { + return 0; + } + } + + res = lev_distance (h1->hash_pipe, l1, h2->hash_pipe, l2); + res = 100 - (2 * res * 100) / (l1 + l2); + + return res; +} + +gint +fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2) +{ + if (p1->fuzzy != NULL && p2->fuzzy != NULL) { + if (p1->fuzzy->block_size == p2->fuzzy->block_size) { + return fuzzy_compare_hashes (p1->fuzzy, p2->fuzzy); + } + else if (p1->double_fuzzy->block_size == p2->fuzzy->block_size) { + return fuzzy_compare_hashes (p1->double_fuzzy, p2->fuzzy); + } + else if (p2->double_fuzzy->block_size == p1->fuzzy->block_size) { + return fuzzy_compare_hashes (p2->double_fuzzy, p1->fuzzy); + } + } + + return 0; +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/fuzzy.h b/src/libutil/fuzzy.h new file mode 100644 index 000000000..c226c5765 --- /dev/null +++ b/src/libutil/fuzzy.h @@ -0,0 +1,69 @@ +/** + * @file fuzzy.h + * Fuzzy hashes API + */ + +#ifndef RSPAMD_FUZZY_H +#define RSPAMD_FUZZY_H + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" + +#define FUZZY_HASHLEN 64 + +typedef struct fuzzy_hash_s { + gchar hash_pipe[FUZZY_HASHLEN]; /**< result hash */ + guint32 block_size; /**< current blocksize */ + guint32 rh; /**< roll hash value */ + guint32 h; /**< hash of block */ + guint32 hi; /**< current index in hash pipe */ +} fuzzy_hash_t; + +struct mime_text_part; + +/** + * Calculate fuzzy hash for specified string + * @param in input string + * @param pool pool object + * @return fuzzy_hash object allocated in pool + */ +fuzzy_hash_t * fuzzy_init (f_str_t *in, rspamd_mempool_t *pool); +/** + * Calculate fuzzy hash for specified byte array + * @param in input string + * @param pool pool object + * @return fuzzy_hash object allocated in pool + */ +fuzzy_hash_t * fuzzy_init_byte_array (GByteArray *in, rspamd_mempool_t *pool); + +/** + * Calculate fuzzy hash for specified text part + * @param part text part object + * @param pool pool object + * @param max_diff maximum text length to use diff algorithm in comparasions + * @return fuzzy_hash object allocated in pool + */ +void fuzzy_init_part (struct mime_text_part *part, rspamd_mempool_t *pool, gsize max_diff); + +/** + * Compare score of difference between two hashes + * @param h1 first hash + * @param h2 second hash + * @return result in percents 0 - different hashes, 100 - identical hashes + */ +gint fuzzy_compare_hashes (fuzzy_hash_t *h1, fuzzy_hash_t *h2); + +/* + * Compare two text parts and return percents of difference + */ +gint fuzzy_compare_parts (struct mime_text_part *p1, struct mime_text_part *p2); + +/* + * Calculate levenstein distance between two strings. Note: this algorithm should be used + * only for short texts - it runs too slow on long ones. + */ +guint32 lev_distance (gchar *s1, gint len1, gchar *s2, gint len2); + + +#endif diff --git a/src/libutil/hash.c b/src/libutil/hash.c new file mode 100644 index 000000000..3bb381651 --- /dev/null +++ b/src/libutil/hash.c @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "hash.h" + +#define HASH_TABLE_MIN_SIZE 19 +#define HASH_TABLE_MAX_SIZE 13845163 + +/* + * Performs a lookup in the hash table. Virtually all hash operations + * will use this function internally. + */ +static inline struct rspamd_hash_node ** +rspamd_hash_lookup_node (rspamd_hash_t * hash, gconstpointer key, guint * hash_return) +{ + struct rspamd_hash_node **node_ptr, *node; + guint hash_value; + hash_value = (*hash->hash_func) (key); + + if (hash->shared) { + rspamd_mempool_rlock_rwlock (hash->lock); + } + node_ptr = &hash->nodes[hash_value % hash->size]; + + if (hash_return) + *hash_return = hash_value; + + /* Hash table lookup needs to be fast. + * We therefore remove the extra conditional of testing + * whether to call the key_equal_func or not from + * the inner loop. + * + * Additional optimisation: first check if our full hash + * values are equal so we can avoid calling the full-blown + * key equality function in most cases. + */ + if (hash->key_equal_func) { + while ((node = *node_ptr)) { + if (node->key_hash == hash_value && hash->key_equal_func (node->key, key)) { + break; + } + node_ptr = &(*node_ptr)->next; + } + } + else { + while ((node = *node_ptr)) { + if (node->key == key) { + break; + } + node_ptr = &(*node_ptr)->next; + } + } + if (hash->shared) { + rspamd_mempool_runlock_rwlock (hash->lock); + } + return node_ptr; +} + +/* + * Removes a node from the hash table and updates the node count. + * No table resize is performed. + */ +static void +rspamd_hash_remove_node (rspamd_hash_t * hash, struct rspamd_hash_node ***node_ptr_ptr) +{ + struct rspamd_hash_node **node_ptr, *node; + + if (hash->shared) { + rspamd_mempool_wlock_rwlock (hash->lock); + } + node_ptr = *node_ptr_ptr; + node = *node_ptr; + + *node_ptr = node->next; + + hash->nnodes--; + if (hash->shared) { + rspamd_mempool_wunlock_rwlock (hash->lock); + } +} + +/* + * Resizes the hash table to the optimal size based on the number of + * nodes currently held. + */ +static void +rspamd_hash_resize (rspamd_hash_t * hash) +{ + struct rspamd_hash_node **new_nodes; + struct rspamd_hash_node *node, *next; + guint hash_val; + gint new_size, i; + + new_size = g_spaced_primes_closest (hash->nnodes); + new_size = CLAMP (new_size, HASH_TABLE_MIN_SIZE, HASH_TABLE_MAX_SIZE); + + if (hash->shared) { + new_nodes = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node *) * new_size); + } + else { + new_nodes = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node *) * new_size); + } + + if (hash->shared) { + rspamd_mempool_wlock_rwlock (hash->lock); + } + + for (i = 0; i < hash->size; i++) { + for (node = hash->nodes[i]; node; node = next) { + next = node->next; + hash_val = node->key_hash % new_size; + node->next = new_nodes[hash_val]; + new_nodes[hash_val] = node; + } + } + + hash->nodes = new_nodes; + hash->size = new_size; + + if (hash->shared) { + rspamd_mempool_wunlock_rwlock (hash->lock); + } +} + +/* + * Resizes the hash table, if needed. + */ +static inline void +rspamd_hash_maybe_resize (rspamd_hash_t * hash) +{ + gint nnodes = hash->nnodes; + gint size = hash->size; + + if ((size >= 3 * nnodes && size > HASH_TABLE_MIN_SIZE) || (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE)) { + rspamd_hash_resize (hash); + } +} + +/* Create new hash in specified pool */ +rspamd_hash_t * +rspamd_hash_new (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func) +{ + rspamd_hash_t *hash; + + hash = rspamd_mempool_alloc (pool, sizeof (rspamd_hash_t)); + hash->size = HASH_TABLE_MIN_SIZE; + hash->nnodes = 0; + hash->hash_func = hash_func ? hash_func : g_direct_hash; + hash->key_equal_func = key_equal_func; + hash->nodes = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_hash_node *) * hash->size); + hash->shared = 0; + hash->pool = pool; + + return hash; +} + +/* + * Create new hash in specified pool using shared memory + */ +rspamd_hash_t * +rspamd_hash_new_shared (rspamd_mempool_t * pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size) +{ + rspamd_hash_t *hash; + + hash = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_hash_t)); + hash->size = size; + hash->nnodes = 0; + hash->hash_func = hash_func ? hash_func : g_direct_hash; + hash->key_equal_func = key_equal_func; + hash->nodes = rspamd_mempool_alloc0_shared (pool, sizeof (struct rspamd_hash_node *) * hash->size); + hash->shared = 1; + /* Get mutex from pool for locking on insert/remove operations */ + hash->lock = rspamd_mempool_get_rwlock (pool); + hash->pool = pool; + + return hash; +} + +/* + * Insert item in hash + */ +void +rspamd_hash_insert (rspamd_hash_t * hash, gpointer key, gpointer value) +{ + struct rspamd_hash_node **node_ptr, *node; + guint key_hash; + + g_return_if_fail (hash != NULL); + node_ptr = rspamd_hash_lookup_node (hash, key, &key_hash); + + if (hash->shared) { + rspamd_mempool_wlock_rwlock (hash->lock); + } + if ((node = *node_ptr)) { + node->key = key; + node->value = value; + } + else { + if (hash->shared) { + node = rspamd_mempool_alloc_shared (hash->pool, sizeof (struct rspamd_hash_node)); + } + else { + node = rspamd_mempool_alloc (hash->pool, sizeof (struct rspamd_hash_node)); + } + + node->key = key; + node->value = value; + node->key_hash = key_hash; + node->next = NULL; + + *node_ptr = node; + hash->nnodes++; + } + if (hash->shared) { + rspamd_mempool_wunlock_rwlock (hash->lock); + } + + if (!hash->shared) { + rspamd_hash_maybe_resize (hash); + } +} + +/* + * Remove item from hash + */ +gboolean +rspamd_hash_remove (rspamd_hash_t * hash, gpointer key) +{ + struct rspamd_hash_node **node_ptr; + + g_return_val_if_fail (hash != NULL, FALSE); + + node_ptr = rspamd_hash_lookup_node (hash, key, NULL); + if (*node_ptr == NULL) + return FALSE; + + rspamd_hash_remove_node (hash, &node_ptr); + rspamd_hash_maybe_resize (hash); + + return TRUE; +} + +/* + * Lookup item from hash + */ +gpointer +rspamd_hash_lookup (rspamd_hash_t * hash, gpointer key) +{ + struct rspamd_hash_node *node; + g_return_val_if_fail (hash != NULL, NULL); + + node = *rspamd_hash_lookup_node (hash, key, NULL); + + return node ? node->value : NULL; +} + +/* + * Iterate throught hash + */ +void +rspamd_hash_foreach (rspamd_hash_t * hash, GHFunc func, gpointer user_data) +{ + struct rspamd_hash_node *node; + gint i; + + g_return_if_fail (hash != NULL); + g_return_if_fail (func != NULL); + + if (hash->shared) { + rspamd_mempool_rlock_rwlock (hash->lock); + } + for (i = 0; i < hash->size; i++) { + for (node = hash->nodes[i]; node; node = node->next) { + (*func) (node->key, node->value, user_data); + } + } + if (hash->shared) { + rspamd_mempool_runlock_rwlock (hash->lock); + } +} + +/** + * LRU hashing + */ + +static void +rspamd_lru_hash_destroy_node (gpointer v) +{ + rspamd_lru_element_t *node = v; + + if (node->hash->value_destroy) { + node->hash->value_destroy (node->data); + } + g_queue_delete_link (node->hash->q, node->link); + g_slice_free1 (sizeof (rspamd_lru_element_t), node); +} + +static rspamd_lru_element_t* +rspamd_lru_create_node (rspamd_lru_hash_t *hash, gpointer key, gpointer value, time_t now, guint ttl) +{ + rspamd_lru_element_t *node; + + node = g_slice_alloc (sizeof (rspamd_lru_element_t)); + node->data = value; + node->key = key; + node->store_time = now; + node->ttl = ttl; + node->hash = hash; + + return node; +} + +/** + * Create new lru hash with GHashTable as storage + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* +rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func, gint maxsize, gint maxage, + GDestroyNotify key_destroy, GDestroyNotify value_destroy) +{ + rspamd_lru_hash_t *new; + + new = g_malloc (sizeof (rspamd_lru_hash_t)); + new->storage = g_hash_table_new_full (hash_func, key_equal_func, key_destroy, rspamd_lru_hash_destroy_node); + new->maxage = maxage; + new->maxsize = maxsize; + new->value_destroy = value_destroy; + new->key_destroy = NULL; + new->q = g_queue_new (); + new->insert_func = (lru_cache_insert_func)g_hash_table_replace; + new->lookup_func = (lru_cache_lookup_func)g_hash_table_lookup; + new->delete_func = (lru_cache_delete_func)g_hash_table_remove; + new->destroy_func = (lru_cache_destroy_func)g_hash_table_destroy; + + return new; +} +/** + * Create new lru hash with custom storage + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* +rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, + gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy, + gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func, + lru_cache_delete_func delete_func) +{ + rspamd_lru_hash_t *new; + + new = g_malloc (sizeof (rspamd_lru_hash_t)); + new->storage = storage; + new->maxage = maxage; + new->maxsize = maxsize; + new->value_destroy = value_destroy; + new->key_destroy = key_destroy; + new->q = g_queue_new (); + new->insert_func = insert_func; + new->lookup_func = lookup_func; + new->delete_func = delete_func; + new->destroy_func = NULL; + + return new; +} + +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ +gpointer +rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now) +{ + rspamd_lru_element_t *res; + + if ((res = hash->lookup_func (hash->storage, key)) != NULL) { + if (res->ttl != 0) { + if (now - res->store_time > res->ttl) { + hash->delete_func (hash->storage, key); + return NULL; + } + } + if (hash->maxage > 0) { + if (now - res->store_time > hash->maxage) { + res = g_queue_peek_tail (hash->q); + /* Expire elements from queue tail */ + while (res != NULL && now - res->store_time > hash->maxage) { + hash->delete_func (hash->storage, res->key); + res = g_queue_peek_tail (hash->q); + } + + return NULL; + } + } + return res->data; + } + + return NULL; +} +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ +void +rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value, + time_t now, guint ttl) +{ + rspamd_lru_element_t *res; + gint removed = 0; + + if ((res = hash->lookup_func (hash->storage, key)) != NULL) { + hash->delete_func (hash->storage, res->key); + } + else { + if (hash->maxsize > 0 && + (gint)g_queue_get_length (hash->q) >= hash->maxsize) { + /* Expire some elements */ + res = g_queue_peek_tail (hash->q); + if (hash->maxage > 0) { + while (res != NULL && now - res->store_time > hash->maxage) { + if (res->key != NULL) { + hash->delete_func (hash->storage, res->key); + } + else { + break; + } + res = g_queue_peek_tail (hash->q); + removed ++; + } + } + if (removed == 0) { + /* Remove explicitly */ + if (res->key != NULL) { + hash->delete_func (hash->storage, res->key); + } + } + } + } + + res = rspamd_lru_create_node (hash, key, value, now, ttl); + hash->insert_func (hash->storage, key, res); + g_queue_push_head (hash->q, res); + res->link = g_queue_peek_head_link (hash->q); +} + +void +rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash) +{ + if (hash->destroy_func) { + hash->destroy_func (hash->storage); + } + g_queue_free (hash->q); + g_free (hash); +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/hash.h b/src/libutil/hash.h new file mode 100644 index 000000000..c5d4639af --- /dev/null +++ b/src/libutil/hash.h @@ -0,0 +1,160 @@ +/** + * @file hash.h + * Hash table implementation that allows using memory pools for storage as well as using + * shared memory for this purpose + */ + +#ifndef RSPAMD_HASH_H +#define RSPAMD_HASH_H + +#include "mem_pool.h" + +struct rspamd_hash_node { + gpointer key; + gpointer value; + guint key_hash; + struct rspamd_hash_node *next; +}; + +typedef struct rspamd_hash_s { + gint size; + gint nnodes; + struct rspamd_hash_node **nodes; + + GHashFunc hash_func; + GEqualFunc key_equal_func; + gint shared; + rspamd_mempool_rwlock_t *lock; + rspamd_mempool_t *pool; +} rspamd_hash_t; + +typedef void (*lru_cache_insert_func)(gpointer storage, gpointer key, gpointer value); +typedef gpointer (*lru_cache_lookup_func)(gpointer storage, gpointer key); +typedef gboolean (*lru_cache_delete_func)(gpointer storage, gpointer key); +typedef void (*lru_cache_destroy_func)(gpointer storage); + +typedef struct rspamd_lru_hash_s { + gint maxsize; + gint maxage; + GDestroyNotify value_destroy; + GDestroyNotify key_destroy; + GQueue *q; + gpointer storage; + lru_cache_insert_func insert_func; + lru_cache_lookup_func lookup_func; + lru_cache_delete_func delete_func; + lru_cache_destroy_func destroy_func; +} rspamd_lru_hash_t; + +typedef struct rspamd_lru_element_s { + gpointer data; + gpointer key; + time_t store_time; + guint ttl; + rspamd_lru_hash_t *hash; + GList *link; +} rspamd_lru_element_t; + + +#define rspamd_hash_size(x) (x)->nnodes + +/** + * Create new hash in specified pool + * @param pool memory pool object + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_hash_t* rspamd_hash_new (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func); + +/** + * Create new hash in specified pool using shared memory + * @param pool memory pool object + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_hash_t* rspamd_hash_new_shared (rspamd_mempool_t *pool, GHashFunc hash_func, GEqualFunc key_equal_func, gint size); + +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ +void rspamd_hash_insert (rspamd_hash_t *hash, gpointer key, gpointer value); + +/** + * Remove item from hash + * @param hash hash object + * @param key key to delete + */ +gboolean rspamd_hash_remove (rspamd_hash_t *hash, gpointer key); + +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ +gpointer rspamd_hash_lookup (rspamd_hash_t *hash, gpointer key); + +/** + * Iterate throught hash + * @param hash hash object + * @param func user's function that would be called for each key/value pair + * @param user_data pointer to user's data that would be passed to user's function + */ +void rspamd_hash_foreach (rspamd_hash_t *hash, GHFunc func, gpointer user_data); + +/** + * Create new lru hash + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* rspamd_lru_hash_new (GHashFunc hash_func, GEqualFunc key_equal_func, + gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy); + +/** + * Create new lru hash with custom storage + * @param maxsize maximum elements in a hash + * @param maxage maximum age of elemnt + * @param hash_func pointer to hash function + * @param key_equal_func pointer to function for comparing keys + * @return new rspamd_hash object + */ +rspamd_lru_hash_t* rspamd_lru_hash_new_full (GHashFunc hash_func, GEqualFunc key_equal_func, + gint maxsize, gint maxage, GDestroyNotify key_destroy, GDestroyNotify value_destroy, + gpointer storage, lru_cache_insert_func insert_func, lru_cache_lookup_func lookup_func, + lru_cache_delete_func delete_func); +/** + * Lookup item from hash + * @param hash hash object + * @param key key to find + * @return value of key or NULL if key is not found + */ +gpointer rspamd_lru_hash_lookup (rspamd_lru_hash_t *hash, gpointer key, time_t now); +/** + * Insert item in hash + * @param hash hash object + * @param key key to insert + * @param value value of key + */ +void rspamd_lru_hash_insert (rspamd_lru_hash_t *hash, gpointer key, gpointer value, + time_t now, guint ttl); + +/** + * Remove lru hash + * @param hash hash object + */ + +void rspamd_lru_hash_destroy (rspamd_lru_hash_t *hash); + +#endif + +/* + * vi:ts=4 + */ diff --git a/src/libutil/http.c b/src/libutil/http.c new file mode 100644 index 000000000..491468352 --- /dev/null +++ b/src/libutil/http.c @@ -0,0 +1,1222 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "http.h" +#include "utlist.h" +#include "util.h" +#include "printf.h" +#include "logger.h" + +struct rspamd_http_connection_private { + GString *buf; + gboolean new_header; + struct rspamd_http_header *header; + struct http_parser parser; + struct http_parser_settings parser_cb; + struct event ev; + struct timeval tv; + struct timeval *ptv; + struct rspamd_http_message *msg; + struct iovec *out; + guint outlen; + gsize wr_pos; + gsize wr_total; +}; + +enum http_magic_type { + HTTP_MAGIC_PLAIN = 0, + HTTP_MAGIC_HTML, + HTTP_MAGIC_CSS, + HTTP_MAGIC_JS, + HTTP_MAGIC_PNG, + HTTP_MAGIC_JPG +}; + +static const struct _rspamd_http_magic { + const gchar *ext; + const gchar *ct; +} http_file_types[] = { + [HTTP_MAGIC_PLAIN] = { "txt", "text/plain" }, + [HTTP_MAGIC_HTML] = { "html", "text/html" }, + [HTTP_MAGIC_CSS] = { "css", "text/css" }, + [HTTP_MAGIC_JS] = { "js", "application/javascript" }, + [HTTP_MAGIC_PNG] = { "png", "image/png" }, + [HTTP_MAGIC_JPG] = { "jpg", "image/jpeg" }, +}; + +static gchar *http_week[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; +static gchar *http_month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; + + +#define HTTP_ERROR http_error_quark () +GQuark +http_error_quark (void) +{ + return g_quark_from_static_string ("http-error-quark"); +} + +static const gchar * +rspamd_http_code_to_str (gint code) +{ + if (code == 200) { + return "OK"; + } + else if (code == 404) { + return "Not found"; + } + else if (code == 403 || code == 401) { + return "Not authorized"; + } + else if (code >= 400 && code < 500) { + return "Bad request"; + } + else if (code >= 300 && code < 400) { + return "See Other"; + } + else if (code >= 500 && code < 600) { + return "Internal server error"; + } + + return "Unknown error"; +} + +/* + * Obtained from nginx + * Copyright (C) Igor Sysoev + */ +static guint mday[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + +time_t +rspamd_http_parse_date (const gchar *header, gsize len) +{ + const gchar *p, *end; + gint month; + guint day, year, hour, min, sec; + guint64 time; + enum { + no = 0, rfc822, /* Tue, 10 Nov 2002 23:50:13 */ + rfc850, /* Tuesday, 10-Dec-02 23:50:13 */ + isoc /* Tue Dec 10 23:50:13 2002 */ + } fmt; + + fmt = 0; + if (len > 0) { + end = header + len; + } + else { + end = header + strlen (header); + } + +#if (NGX_SUPPRESS_WARN) + day = 32; + year = 2038; +#endif + + for (p = header; p < end; p++) { + if (*p == ',') { + break; + } + + if (*p == ' ') { + fmt = isoc; + break; + } + } + + for (p++; p < end; p++) + if (*p != ' ') { + break; + } + + if (end - p < 18) { + return (time_t)-1; + } + + if (fmt != isoc) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + day = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p == ' ') { + if (end - p < 18) { + return (time_t)-1; + } + fmt = rfc822; + + } + else if (*p == '-') { + fmt = rfc850; + + } + else { + return (time_t)-1; + } + + p++; + } + + switch (*p) { + + case 'J': + month = *(p + 1) == 'a' ? 0 : *(p + 2) == 'n' ? 5 : 6; + break; + + case 'F': + month = 1; + break; + + case 'M': + month = *(p + 2) == 'r' ? 2 : 4; + break; + + case 'A': + month = *(p + 1) == 'p' ? 3 : 7; + break; + + case 'S': + month = 8; + break; + + case 'O': + month = 9; + break; + + case 'N': + month = 10; + break; + + case 'D': + month = 11; + break; + + default: + return (time_t)-1; + } + + p += 3; + + if ((fmt == rfc822 && *p != ' ') || (fmt == rfc850 && *p != '-')) { + return (time_t)-1; + } + + p++; + + if (fmt == rfc822) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' + || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' + || *(p + 3) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; + p += 4; + + } + else if (fmt == rfc850) { + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 10 + *(p + 1) - '0'; + year += (year < 70) ? 2000 : 1900; + p += 2; + } + + if (fmt == isoc) { + if (*p == ' ') { + p++; + } + + if (*p < '0' || *p > '9') { + return (time_t)-1; + } + + day = *p++ - '0'; + + if (*p != ' ') { + if (*p < '0' || *p > '9') { + return (time_t)-1; + } + + day = day * 10 + *p++ - '0'; + } + + if (end - p < 14) { + return (time_t)-1; + } + } + + if (*p++ != ' ') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + hour = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p++ != ':') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + min = (*p - '0') * 10 + *(p + 1) - '0'; + p += 2; + + if (*p++ != ':') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9') { + return (time_t)-1; + } + + sec = (*p - '0') * 10 + *(p + 1) - '0'; + + if (fmt == isoc) { + p += 2; + + if (*p++ != ' ') { + return (time_t)-1; + } + + if (*p < '0' || *p > '9' || *(p + 1) < '0' || *(p + 1) > '9' + || *(p + 2) < '0' || *(p + 2) > '9' || *(p + 3) < '0' + || *(p + 3) > '9') { + return (time_t)-1; + } + + year = (*p - '0') * 1000 + (*(p + 1) - '0') * 100 + + (*(p + 2) - '0') * 10 + *(p + 3) - '0'; + } + + if (hour > 23 || min > 59 || sec > 59) { + return (time_t)-1; + } + + if (day == 29 && month == 1) { + if ((year & 3) || ((year % 100 == 0) && (year % 400) != 0)) { + return (time_t)-1; + } + + } + else if (day > mday[month]) { + return (time_t)-1; + } + + /* + * shift new year to March 1 and start months from 1 (not 0), + * it is needed for Gauss' formula + */ + + if (--month <= 0) { + month += 12; + year -= 1; + } + + /* Gauss' formula for Gregorian days since March 1, 1 BC */ + + time = (guint64) ( + /* days in years including leap years since March 1, 1 BC */ + + 365 * year + year / 4 - year / 100 + year / 400 + + /* days before the month */ + + + 367 * month / 12 - 30 + + /* days before the day */ + + + day - 1 + + /* + * 719527 days were between March 1, 1 BC and March 1, 1970, + * 31 and 28 days were in January and February 1970 + */ + + - 719527 + 31 + 28) * 86400 + hour * 3600 + min * 60 + sec; + + return (time_t) time; +} + +static inline void +rspamd_http_check_date (struct rspamd_http_connection_private *priv) +{ + if (g_ascii_strcasecmp (priv->header->name->str, "date") == 0) { + priv->msg->date = rspamd_http_parse_date (priv->header->value->str, + priv->header->value->len); + } +} + +static gint +rspamd_http_on_url (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + g_string_append_len (priv->msg->url, at, length); + + return 0; +} + +static gint +rspamd_http_on_header_field (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header == NULL) { + priv->header = g_slice_alloc (sizeof (struct rspamd_http_header)); + priv->header->name = g_string_sized_new (32); + priv->header->value = g_string_sized_new (32); + } + else if (priv->new_header) { + DL_APPEND (priv->msg->headers, priv->header); + rspamd_http_check_date (priv); + priv->header = g_slice_alloc (sizeof (struct rspamd_http_header)); + priv->header->name = g_string_sized_new (32); + priv->header->value = g_string_sized_new (32); + } + + priv->new_header = FALSE; + g_string_append_len (priv->header->name, at, length); + + return 0; +} + +static gint +rspamd_http_on_header_value (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header == NULL) { + /* Should not happen */ + return -1; + } + + priv->new_header = TRUE; + g_string_append_len (priv->header->value, at, length); + + return 0; +} + +static int +rspamd_http_on_headers_complete (http_parser* parser) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + if (priv->header != NULL) { + DL_APPEND (priv->msg->headers, priv->header); + rspamd_http_check_date (priv); + priv->header = NULL; + } + + if (parser->content_length != 0 && parser->content_length != ULLONG_MAX) { + priv->msg->body = g_string_sized_new (parser->content_length + 1); + } + else { + priv->msg->body = g_string_sized_new (BUFSIZ); + } + + priv->msg->method = parser->method; + + return 0; +} + +static int +rspamd_http_on_body (http_parser* parser, const gchar *at, size_t length) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + + g_string_append_len (priv->msg->body, at, length); + + if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) { + return (conn->body_handler (conn, priv->msg, at, length)); + } + + return 0; +} + +static int +rspamd_http_on_message_complete (http_parser* parser) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)parser->data; + struct rspamd_http_connection_private *priv; + int ret = 0; + + priv = conn->priv; + + if (conn->body_handler != NULL) { + rspamd_http_connection_ref (conn); + if (conn->opts & RSPAMD_HTTP_BODY_PARTIAL) { + ret = conn->body_handler (conn, priv->msg, NULL, 0); + } + else { + ret = conn->body_handler (conn, priv->msg, priv->msg->body->str, priv->msg->body->len); + } + rspamd_http_connection_unref (conn); + } + + if (ret == 0) { + rspamd_http_connection_ref (conn); + ret = conn->finish_handler (conn, priv->msg); + rspamd_http_connection_unref (conn); + } + + return ret; +} + +static void +rspamd_http_write_helper (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + struct iovec *start; + guint niov, i; + gsize remain; + gssize r; + GError *err; + + priv = conn->priv; + + if (priv->wr_pos == priv->wr_total) { + rspamd_http_connection_ref (conn); + conn->finish_handler (conn, priv->msg); + rspamd_http_connection_unref (conn); + return; + } + + start = &priv->out[0]; + niov = priv->outlen; + remain = priv->wr_pos; + for (i = 0; i < priv->outlen && remain > 0; i ++) { + /* Find out the first iov required */ + start = &priv->out[i]; + if (start->iov_len <= remain) { + remain -= start->iov_len; + start = &priv->out[i + 1]; + niov --; + } + else { + start->iov_base = (void *)((char *)start->iov_base + remain); + start->iov_len -= remain; + remain = 0; + } + } + + r = writev (conn->fd, start, MIN (IOV_MAX, niov)); + + if (r == -1) { + err = g_error_new (HTTP_ERROR, errno, "IO write error: %s", strerror (errno)); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return; + } + else { + priv->wr_pos += r; + } + + if (priv->wr_pos >= priv->wr_total) { + rspamd_http_connection_ref (conn); + conn->finish_handler (conn, priv->msg); + rspamd_http_connection_unref (conn); + } + else { + /* Want to write more */ + event_add (&priv->ev, priv->ptv); + } +} + +static void +rspamd_http_event_handler (int fd, short what, gpointer ud) +{ + struct rspamd_http_connection *conn = (struct rspamd_http_connection *)ud; + struct rspamd_http_connection_private *priv; + GString *buf; + gssize r; + GError *err; + + priv = conn->priv; + buf = priv->buf; + + if (what == EV_READ) { + r = read (fd, buf->str, buf->allocated_len); + if (r == -1) { + err = g_error_new (HTTP_ERROR, errno, "IO read error: %s", strerror (errno)); + conn->error_handler (conn, err); + g_error_free (err); + return; + } + else { + buf->len = r; + rspamd_http_connection_ref (conn); + if (http_parser_execute (&priv->parser, &priv->parser_cb, buf->str, r) != (size_t)r) { + err = g_error_new (HTTP_ERROR, priv->parser.http_errno, + "HTTP parser error: %s", http_errno_description (priv->parser.http_errno)); + conn->error_handler (conn, err); + g_error_free (err); + rspamd_http_connection_unref (conn); + return; + } + rspamd_http_connection_unref (conn); + } + } + else if (what == EV_TIMEOUT) { + err = g_error_new (HTTP_ERROR, ETIMEDOUT, + "IO timeout"); + rspamd_http_connection_ref (conn); + conn->error_handler (conn, err); + rspamd_http_connection_unref (conn); + g_error_free (err); + return; + } + else if (what == EV_WRITE) { + rspamd_http_write_helper (conn); + } +} + +struct rspamd_http_connection* +rspamd_http_connection_new (rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + enum rspamd_http_options opts, + enum rspamd_http_connection_type type) +{ + struct rspamd_http_connection *new; + struct rspamd_http_connection_private *priv; + + if (error_handler == NULL || finish_handler == NULL) { + return NULL; + } + + new = g_slice_alloc0 (sizeof (struct rspamd_http_connection)); + new->opts = opts; + new->type = type; + new->body_handler = body_handler; + new->error_handler = error_handler; + new->finish_handler = finish_handler; + new->fd = -1; + new->ref = 1; + + /* Init priv */ + priv = g_slice_alloc0 (sizeof (struct rspamd_http_connection_private)); + http_parser_init (&priv->parser, type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); + priv->parser.data = new; + priv->parser_cb.on_url = rspamd_http_on_url; + priv->parser_cb.on_header_field = rspamd_http_on_header_field; + priv->parser_cb.on_header_value = rspamd_http_on_header_value; + priv->parser_cb.on_headers_complete = rspamd_http_on_headers_complete; + priv->parser_cb.on_body = rspamd_http_on_body; + priv->parser_cb.on_message_complete = rspamd_http_on_message_complete; + + new->priv = priv; + + return new; +} + +void +rspamd_http_connection_reset (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + struct rspamd_http_message *msg; + + priv = conn->priv; + msg = priv->msg; + + /* Clear request */ + if (msg != NULL) { + rspamd_http_message_free (msg); + priv->msg = NULL; + } + + /* Clear priv */ + event_del (&priv->ev); + if (priv->buf != NULL) { + g_string_free (priv->buf, TRUE); + priv->buf = NULL; + } + if (priv->out != NULL) { + g_slice_free1 (sizeof (struct iovec) * priv->outlen, priv->out); + priv->out = NULL; + } +} + +void +rspamd_http_connection_free (struct rspamd_http_connection *conn) +{ + struct rspamd_http_connection_private *priv; + + priv = conn->priv; + rspamd_http_connection_reset (conn); + g_slice_free1 (sizeof (struct rspamd_http_connection_private), priv); + g_slice_free1 (sizeof (struct rspamd_http_connection), conn); +} + +void +rspamd_http_connection_read_message (struct rspamd_http_connection *conn, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + struct rspamd_http_connection_private *priv = conn->priv; + struct rspamd_http_message *req; + + conn->fd = fd; + conn->ud = ud; + req = rspamd_http_new_message (conn->type == RSPAMD_HTTP_SERVER ? HTTP_REQUEST : HTTP_RESPONSE); + priv->msg = req; + + if (timeout == NULL) { + priv->ptv = NULL; + } + else { + memcpy (&priv->tv, timeout, sizeof (struct timeval)); + priv->ptv = &priv->tv; + } + priv->header = NULL; + priv->buf = g_string_sized_new (BUFSIZ); + priv->new_header = TRUE; + + event_set (&priv->ev, fd, EV_READ | EV_PERSIST, rspamd_http_event_handler, conn); + event_base_set (base, &priv->ev); + event_add (&priv->ev, priv->ptv); +} + +void +rspamd_http_connection_write_message (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, const gchar *host, const gchar *mime_type, + gpointer ud, gint fd, struct timeval *timeout, struct event_base *base) +{ + struct rspamd_http_connection_private *priv = conn->priv; + struct rspamd_http_header *hdr; + struct tm t, *ptm; + gchar datebuf[64], *pbody; + gint i; + gsize bodylen; + + conn->fd = fd; + conn->ud = ud; + priv->msg = msg; + + if (timeout == NULL) { + priv->ptv = NULL; + } + else { + memcpy (&priv->tv, timeout, sizeof (struct timeval)); + priv->ptv = &priv->tv; + } + priv->header = NULL; + priv->buf = g_string_sized_new (128); + + if (msg->method < HTTP_SYMBOLS) { + if (msg->body == NULL || msg->body->len == 0) { + pbody = NULL; + bodylen = 0; + priv->outlen = 2; + msg->method = HTTP_GET; + } + else { + pbody = msg->body->str; + bodylen = msg->body->len; + priv->outlen = 3; + msg->method = HTTP_POST; + } + } + else if (msg->body != NULL) { + pbody = msg->body->str; + bodylen = msg->body->len; + priv->outlen = 2; + } + else { + /* Invalid body for spamc method */ + return; + } + + if (conn->type == RSPAMD_HTTP_SERVER) { + /* Format reply */ + if (msg->method < HTTP_SYMBOLS) { + ptm = gmtime (&msg->date); + t = *ptm; + rspamd_snprintf (datebuf, sizeof (datebuf), "%s, %02d %s %4d %02d:%02d:%02d GMT", + http_week[t.tm_wday], + t.tm_mday, + http_month[t.tm_mon], + t.tm_year + 1900, + t.tm_hour, + t.tm_min, + t.tm_sec); + if (mime_type == NULL) { + mime_type = "text/plain"; + } + rspamd_printf_gstring (priv->buf, "HTTP/1.1 %d %s\r\n" + "Connection: close\r\n" + "Server: %s\r\n" + "Date: %s\r\n" + "Content-Length: %z\r\n" + "Content-Type: %s\r\n", + msg->code, rspamd_http_code_to_str (msg->code), + "rspamd/" RVERSION, + datebuf, + msg->body->len, + mime_type); + } + else { + /* Legacy spamd reply */ + rspamd_printf_gstring (priv->buf, "RSPAMD/1.3 0 EX_OK\r\n"); + } + } + else { + /* Format request */ + if (host != NULL) { + rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.1\r\n" + "Connection: close\r\n" + "Host: %s\r\n" + "Content-Length: %z\r\n", + http_method_str (msg->method), msg->url, host, msg->body->len); + } + else { + /* Fallback to HTTP/1.0 */ + rspamd_printf_gstring (priv->buf, "%s %v HTTP/1.0\r\n" + "Content-Length: %z\r\n", + http_method_str (msg->method), msg->url, msg->body->len); + } + } + /* Allocate iov */ + priv->wr_total = bodylen + priv->buf->len + 2; + DL_FOREACH (msg->headers, hdr) { + /* <: ><\r\n> */ + priv->wr_total += hdr->name->len + hdr->value->len + 4; + priv->outlen += 4; + } + priv->out = g_slice_alloc (sizeof (struct iovec) * priv->outlen); + priv->wr_pos = 0; + + /* Now set up all iov */ + priv->out[0].iov_base = priv->buf->str; + priv->out[0].iov_len = priv->buf->len; + i = 1; + LL_FOREACH (msg->headers, hdr) { + priv->out[i].iov_base = hdr->name->str; + priv->out[i++].iov_len = hdr->name->len; + priv->out[i].iov_base = ": "; + priv->out[i++].iov_len = 2; + priv->out[i].iov_base = hdr->value->str; + priv->out[i++].iov_len = hdr->value->len; + priv->out[i].iov_base = "\r\n"; + priv->out[i++].iov_len = 2; + } + if (msg->method < HTTP_SYMBOLS) { + priv->out[i].iov_base = "\r\n"; + priv->out[i++].iov_len = 2; + } + else { + /* No CRLF for compatibility reply */ + priv->wr_total -= 2; + } + if (msg->body != NULL) { + priv->out[i].iov_base = pbody; + priv->out[i++].iov_len = bodylen; + } + + event_set (&priv->ev, fd, EV_WRITE, rspamd_http_event_handler, conn); + event_base_set (base, &priv->ev); + event_add (&priv->ev, priv->ptv); +} + +struct rspamd_http_message* +rspamd_http_new_message (enum http_parser_type type) +{ + struct rspamd_http_message *new; + + new = g_slice_alloc (sizeof (struct rspamd_http_message)); + if (type == HTTP_REQUEST) { + new->url = g_string_sized_new (32); + } + else { + new->url = NULL; + new->code = 200; + } + new->headers = NULL; + new->date = 0; + new->body = NULL; + new->type = type; + new->method = HTTP_GET; + + return new; +} + +void +rspamd_http_message_free (struct rspamd_http_message *msg) +{ + struct rspamd_http_header *hdr, *tmp_hdr; + + LL_FOREACH_SAFE (msg->headers, hdr, tmp_hdr) { + g_string_free (hdr->name, TRUE); + g_string_free (hdr->value, TRUE); + g_slice_free1 (sizeof (struct rspamd_http_header), hdr); + } + if (msg->body != NULL) { + g_string_free (msg->body, TRUE); + } + if (msg->url != NULL) { + g_string_free (msg->url, TRUE); + } + g_slice_free1 (sizeof (struct rspamd_http_message), msg); +} + +void rspamd_http_message_add_header (struct rspamd_http_message *msg, + const gchar *name, + const gchar *value) +{ + struct rspamd_http_header *hdr; + + if (msg != NULL && name != NULL && value != NULL) { + hdr = g_slice_alloc (sizeof (struct rspamd_http_header)); + hdr->name = g_string_new (name); + hdr->value = g_string_new (value); + DL_APPEND (msg->headers, hdr); + } +} + +const gchar* +rspamd_http_message_find_header (struct rspamd_http_message *msg, const gchar *name) +{ + struct rspamd_http_header *hdr; + const gchar *res = NULL; + guint slen = strlen (name); + + if (msg != NULL) { + LL_FOREACH (msg->headers, hdr) { + if (hdr->name->len == slen) { + if (memcmp (hdr->name->str, name, slen) == 0) { + res = hdr->value->str; + break; + } + } + } + } + + return res; +} + +/* + * HTTP router functions + */ + +static void +rspamd_http_entry_free (struct rspamd_http_connection_entry *entry) +{ + if (entry != NULL) { + close (entry->conn->fd); + rspamd_http_connection_unref (entry->conn); + g_slice_free1 (sizeof (struct rspamd_http_connection_entry), entry); + if (entry->rt->finish_handler) { + entry->rt->finish_handler (entry); + } + } +} + +static void +rspamd_http_router_error_handler (struct rspamd_http_connection *conn, GError *err) +{ + struct rspamd_http_connection_entry *entry = conn->ud; + struct rspamd_http_message *msg; + + if (entry->is_reply) { + /* At this point we need to finish this session and close owned socket */ + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + rspamd_http_entry_free (entry); + } + else { + /* Here we can write a reply to a client */ + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + msg = rspamd_http_new_message (HTTP_RESPONSE); + msg->date = time (NULL); + msg->code = err->code; + msg->body = g_string_new (err->message); + rspamd_http_connection_reset (entry->conn); + rspamd_http_connection_write_message (entry->conn, msg, NULL, + "text/plain", entry, entry->conn->fd, entry->rt->ptv, entry->rt->ev_base); + entry->is_reply = TRUE; + } +} + +static const gchar * +rspamd_http_router_detect_ct (const gchar *path) +{ + const gchar *dot; + guint i; + + dot = strrchr (path, '.'); + if (dot == NULL) { + return http_file_types[HTTP_MAGIC_PLAIN].ct; + } + dot ++; + + for (i = 0; i < G_N_ELEMENTS (http_file_types); i ++) { + if (strcmp (http_file_types[i].ext, dot) == 0) { + return http_file_types[i].ct; + } + } + + return http_file_types[HTTP_MAGIC_PLAIN].ct; +} + +static gboolean +rspamd_http_router_try_file (struct rspamd_http_connection_entry *entry, + struct rspamd_http_message *msg, gboolean expand_path) +{ + struct stat st; + gint fd; + gchar filebuf[PATH_MAX], realbuf[PATH_MAX], *dir; + struct rspamd_http_message *reply_msg; + + /* XXX: filter filename component only */ + if (expand_path) { + rspamd_snprintf (filebuf, sizeof (filebuf), "%s%c%v", + entry->rt->default_fs_path, G_DIR_SEPARATOR, msg->url); + } + else { + rspamd_snprintf (filebuf, sizeof (filebuf), "%v", + msg->url); + } + + if (realpath (filebuf, realbuf) == NULL || + lstat (realbuf, &st) == -1) { + return FALSE; + } + + if (S_ISDIR (st.st_mode) && expand_path) { + /* Try to append 'index.html' to the url */ + g_string_append_printf (msg->url, "%c%s", G_DIR_SEPARATOR, + "index.html"); + return rspamd_http_router_try_file (entry, msg, FALSE); + } + else if (!S_ISREG (st.st_mode)) { + return FALSE; + } + + /* We also need to ensure that file is inside the defined dir */ + dir = dirname (realbuf); + if (dir == NULL || strncmp (dir, entry->rt->default_fs_path, + strlen (entry->rt->default_fs_path)) != 0) { + return FALSE; + } + + fd = open (realbuf, O_RDONLY); + if (fd == -1) { + return FALSE; + } + + reply_msg = rspamd_http_new_message (HTTP_RESPONSE); + reply_msg->date = time (NULL); + reply_msg->code = 200; + reply_msg->body = g_string_sized_new (st.st_size); + + if (read (fd, reply_msg->body->str, st.st_size) != st.st_size) { + close (fd); + rspamd_http_message_free (reply_msg); + return FALSE; + } + + reply_msg->body->len = st.st_size; + reply_msg->body->str[st.st_size] = '\0'; + close (fd); + + rspamd_http_connection_reset (entry->conn); + + /* XXX: detect content type */ + rspamd_http_connection_write_message (entry->conn, reply_msg, NULL, + rspamd_http_router_detect_ct (realbuf), entry, entry->conn->fd, + entry->rt->ptv, entry->rt->ev_base); + + return TRUE; +} + +static int +rspamd_http_router_finish_handler (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg) +{ + struct rspamd_http_connection_entry *entry = conn->ud; + rspamd_http_router_handler_t handler = NULL; + gpointer found; + struct rspamd_http_message *err_msg; + GError *err; + + G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer)); + + if (entry->is_reply) { + /* Request is finished, it is safe to free a connection */ + rspamd_http_entry_free (entry); + } + else { + /* Search for path */ + if (msg->url != NULL && msg->url->len != 0) { + found = g_hash_table_lookup (entry->rt->paths, msg->url->str); + memcpy (&handler, &found, sizeof (found)); + } + entry->is_reply = TRUE; + if (handler != NULL) { + return handler (entry, msg); + } + else { + if (entry->rt->default_fs_path == NULL || + rspamd_http_router_try_file (entry, msg, TRUE)) { + err = g_error_new (HTTP_ERROR, 404, + "Not found"); + if (entry->rt->error_handler != NULL) { + entry->rt->error_handler (entry, err); + } + err_msg = rspamd_http_new_message (HTTP_RESPONSE); + err_msg->date = time (NULL); + err_msg->code = err->code; + err_msg->body = g_string_new (err->message); + rspamd_http_connection_reset (entry->conn); + rspamd_http_connection_write_message (entry->conn, err_msg, NULL, + "text/plain", entry, entry->conn->fd, + entry->rt->ptv, entry->rt->ev_base); + g_error_free (err); + } + } + } + + return 0; +} + +struct rspamd_http_connection_router* +rspamd_http_router_new (rspamd_http_router_error_handler_t eh, + rspamd_http_router_finish_handler_t fh, + struct timeval *timeout, struct event_base *base, + const char *default_fs_path) +{ + struct rspamd_http_connection_router* new; + struct stat st; + + new = g_slice_alloc (sizeof (struct rspamd_http_connection_router)); + new->paths = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + new->conns = NULL; + new->error_handler = eh; + new->finish_handler = fh; + new->ev_base = base; + if (timeout) { + new->tv = *timeout; + new->ptv = &new->tv; + } + else { + new->ptv = NULL; + } + + new->default_fs_path = NULL; + if (default_fs_path != NULL) { + if (stat (default_fs_path, &st) == -1) { + msg_err ("cannot stat %s", default_fs_path); + } + else { + if (!S_ISDIR (st.st_mode)) { + msg_err ("path %s is not a directory", default_fs_path); + } + else { + new->default_fs_path = g_strdup (default_fs_path); + } + } + } + + return new; +} + +void +rspamd_http_router_add_path (struct rspamd_http_connection_router *router, + const gchar *path, rspamd_http_router_handler_t handler) +{ + gpointer ptr; + G_STATIC_ASSERT (sizeof (rspamd_http_router_handler_t) == sizeof (gpointer)); + + if (path != NULL && handler != NULL && router != NULL) { + memcpy (&ptr, &handler, sizeof (ptr)); + g_hash_table_insert (router->paths, (gpointer)path, ptr); + } +} + +void +rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, + gint fd, gpointer ud) +{ + struct rspamd_http_connection_entry *conn; + + conn = g_slice_alloc (sizeof (struct rspamd_http_connection_entry)); + conn->rt = router; + conn->ud = ud; + conn->is_reply = FALSE; + + conn->conn = rspamd_http_connection_new (NULL, rspamd_http_router_error_handler, + rspamd_http_router_finish_handler, 0, RSPAMD_HTTP_SERVER); + + rspamd_http_connection_read_message (conn->conn, conn, fd, router->ptv, + router->ev_base); + LL_PREPEND (router->conns, conn); +} + +void +rspamd_http_router_free (struct rspamd_http_connection_router *router) +{ + struct rspamd_http_connection_entry *conn, *tmp; + + if (router) { + LL_FOREACH_SAFE (router->conns, conn, tmp) { + rspamd_http_entry_free (conn); + } + + if (router->default_fs_path != NULL) { + g_free (router->default_fs_path); + } + g_hash_table_unref (router->paths); + g_slice_free1 (sizeof (struct rspamd_http_connection_router), router); + } +} diff --git a/src/libutil/http.h b/src/libutil/http.h new file mode 100644 index 000000000..8af4429c6 --- /dev/null +++ b/src/libutil/http.h @@ -0,0 +1,278 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HTTP_H_ +#define HTTP_H_ + +/** + * @file http.h + * + * This is an interface for HTTP client and conn. This code uses HTTP parser written + * by Joyent Inc based on nginx code. + */ + +#include "config.h" +#include "http_parser.h" + +enum rspamd_http_connection_type { + RSPAMD_HTTP_SERVER, + RSPAMD_HTTP_CLIENT +}; + +/** + * HTTP header structure + */ +struct rspamd_http_header { + GString *name; + GString *value; + struct rspamd_http_header *next, *prev; +}; + +/** + * HTTP message structure, used for requests and replies + */ +struct rspamd_http_message { + GString *url; + struct rspamd_http_header *headers; + GString *body; + enum http_parser_type type; + time_t date; + gint code; + enum http_method method; +}; + + +/** + * Options for HTTP connection + */ +enum rspamd_http_options { + RSPAMD_HTTP_BODY_PARTIAL = 0x1//!< RSPAMD_HTTP_BODY_PARTIAL +}; + +struct rspamd_http_connection_private; +struct rspamd_http_connection; +struct rspamd_http_connection_router; +struct rspamd_http_connection_entry; + +typedef int (*rspamd_http_body_handler_t) (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *chunk, + gsize len); + +typedef void (*rspamd_http_error_handler_t) (struct rspamd_http_connection *conn, GError *err); + +typedef int (*rspamd_http_finish_handler_t) (struct rspamd_http_connection *conn, + struct rspamd_http_message *msg); + +typedef int (*rspamd_http_router_handler_t) (struct rspamd_http_connection_entry *conn_ent, + struct rspamd_http_message *msg); +typedef void (*rspamd_http_router_error_handler_t) (struct rspamd_http_connection_entry *conn_ent, + GError *err); +typedef void (*rspamd_http_router_finish_handler_t) (struct rspamd_http_connection_entry *conn_ent); + +/** + * HTTP connection structure + */ +struct rspamd_http_connection { + struct rspamd_http_connection_private *priv; + rspamd_http_body_handler_t body_handler; + rspamd_http_error_handler_t error_handler; + rspamd_http_finish_handler_t finish_handler; + gpointer ud; + enum rspamd_http_options opts; + enum rspamd_http_connection_type type; + gint fd; + gint ref; +}; + +struct rspamd_http_connection_entry { + struct rspamd_http_connection_router *rt; + struct rspamd_http_connection *conn; + gpointer ud; + gboolean is_reply; + struct rspamd_http_connection_entry *next; +}; + +struct rspamd_http_connection_router { + struct rspamd_http_connection_entry *conns; + GHashTable *paths; + struct timeval tv; + struct timeval *ptv; + struct event_base *ev_base; + gchar *default_fs_path; + rspamd_http_router_error_handler_t error_handler; + rspamd_http_router_finish_handler_t finish_handler; +}; + +/** + * Create new http connection + * @param handler_t handler_t for body + * @param opts options + * @return new connection structure + */ +struct rspamd_http_connection* rspamd_http_connection_new ( + rspamd_http_body_handler_t body_handler, + rspamd_http_error_handler_t error_handler, + rspamd_http_finish_handler_t finish_handler, + enum rspamd_http_options opts, + enum rspamd_http_connection_type type); + +/** + * Handle a request using socket fd and user data ud + * @param conn connection structure + * @param ud opaque user data + * @param fd fd to read/write + */ +void rspamd_http_connection_read_message ( + struct rspamd_http_connection *conn, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); + +/** + * Send reply using initialised connection + * @param conn connection structure + * @param msg HTTP message + * @param ud opaque user data + * @param fd fd to read/write + */ +void rspamd_http_connection_write_message ( + struct rspamd_http_connection *conn, + struct rspamd_http_message *msg, + const gchar *host, + const gchar *mime_type, + gpointer ud, + gint fd, + struct timeval *timeout, + struct event_base *base); + +/** + * Free connection structure + * @param conn + */ +void rspamd_http_connection_free (struct rspamd_http_connection *conn); + +/** + * Increase refcount for a connection + * @param conn + * @return + */ +static inline struct rspamd_http_connection * +rspamd_http_connection_ref (struct rspamd_http_connection *conn) +{ + conn->ref ++; + return conn; +} + +/** + * Decrease a refcount for a connection and free it if refcount is equal to zero + * @param conn + */ +static void +rspamd_http_connection_unref (struct rspamd_http_connection *conn) +{ + if (--conn->ref <= 0) { + rspamd_http_connection_free (conn); + } +} + +/** + * Reset connection for a new request + * @param conn + */ +void rspamd_http_connection_reset (struct rspamd_http_connection *conn); + +/** + * Create new HTTP reply + * @param code code to pass + * @return new reply object + */ +struct rspamd_http_message* rspamd_http_new_message (enum http_parser_type type); + +/** + * Append a header to reply + * @param rep + * @param name + * @param value + */ +void rspamd_http_message_add_header (struct rspamd_http_message *rep, const gchar *name, const gchar *value); + +/** + * Search for a specified header in message + * @param rep message + * @param name name of header + */ +const gchar* rspamd_http_message_find_header (struct rspamd_http_message *rep, const gchar *name); + +/** + * Free HTTP reply + * @param rep + */ +void rspamd_http_message_free (struct rspamd_http_message *msg); + +/** + * Parse HTTP date header and return it as time_t + * @param header HTTP date header + * @param len length of header + * @return time_t or (time_t)-1 in case of error + */ +time_t rspamd_http_parse_date (const gchar *header, gsize len); + +/** + * Create new http connection router and the associated HTTP connection + * @param eh error handler callback + * @param fh finish handler callback + * @param default_fs_path if not NULL try to serve static files from + * the specified directory + * @return + */ +struct rspamd_http_connection_router* rspamd_http_router_new ( + rspamd_http_router_error_handler_t eh, + rspamd_http_router_finish_handler_t fh, + struct timeval *timeout, + struct event_base *base, + const char *default_fs_path); + +/** + * Add new path to the router + */ +void rspamd_http_router_add_path (struct rspamd_http_connection_router *router, + const gchar *path, rspamd_http_router_handler_t handler); + +/** + * Handle new accepted socket + * @param router router object + * @param fd server socket + * @param ud opaque userdata + */ +void rspamd_http_router_handle_socket (struct rspamd_http_connection_router *router, + gint fd, gpointer ud); + +/** + * Free router and all connections associated + * @param router + */ +void rspamd_http_router_free (struct rspamd_http_connection_router *router); + +#endif /* HTTP_H_ */ diff --git a/src/libutil/logger.c b/src/libutil/logger.c new file mode 100644 index 000000000..01814d24d --- /dev/null +++ b/src/libutil/logger.c @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "logger.h" +#include "util.h" +#include "main.h" +#include "map.h" + +/* How much message should be repeated before it is count to be repeated one */ +#define REPEATS_MIN 3 +#define REPEATS_MAX 300 + +/** + * Static structure that store logging parameters + * It is NOT shared between processes and is created by main process + */ +struct rspamd_logger_s { + rspamd_log_func_t log_func; + struct config_file *cfg; + struct { + guint32 size; + guint32 used; + u_char *buf; + } io_buf; + gint fd; + gboolean is_buffered; + gboolean enabled; + gboolean is_debug; + gboolean throttling; + time_t throttling_time; + sig_atomic_t do_reopen_log; + enum rspamd_log_type type; + pid_t pid; + GQuark process_type; + radix_tree_t *debug_ip; + guint32 last_line_cksum; + guint32 repeats; + gchar *saved_message; + gchar *saved_function; + GMutex *mtx; +}; + +static const gchar lf_chr = '\n'; + +static rspamd_logger_t *default_logger = NULL; + + +static void +syslog_log_function (const gchar * log_domain, const gchar *function, + GLogLevelFlags log_level, const gchar * message, + gboolean forced, gpointer arg); +static void +file_log_function (const gchar * log_domain, const gchar *function, + GLogLevelFlags log_level, const gchar * message, + gboolean forced, gpointer arg); + +/** + * Calculate checksum for log line (used for repeating logic) + */ +static inline guint32 +rspamd_log_calculate_cksum (const gchar *message, size_t mlen) +{ + const gchar *bp = message; + const gchar *be = bp + mlen; + guint32 hval = 0; + + while (bp < be) { + hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); + hval ^= (guint32)*bp++; + } + + /* return our new hash value */ + return hval; + +} + +/* + * Write a line to log file (unbuffered) + */ +static void +direct_write_log_line (rspamd_logger_t *rspamd_log, void *data, gint count, gboolean is_iov) +{ + gchar errmsg[128]; + struct iovec *iov; + const gchar *line; + gint r; + + if (rspamd_log->enabled) { + if (is_iov) { + iov = (struct iovec *)data; + r = writev (rspamd_log->fd, iov, count); + } + else { + line = (const gchar *)data; + r = write (rspamd_log->fd, line, count); + } + if (r == -1) { + /* We cannot write message to file, so we need to detect error and make decision */ + r = rspamd_snprintf (errmsg, sizeof (errmsg), "direct_write_log_line: cannot write log line: %s", strerror (errno)); + if (errno == EIO || errno == EINTR) { + /* Descriptor is somehow invalid, try to restart */ + reopen_log (rspamd_log); + if (write (rspamd_log->fd, errmsg, r) != -1) { + /* Try again */ + direct_write_log_line (rspamd_log, data, count, is_iov); + } + } + else if (errno == EFAULT || errno == EINVAL || errno == EFBIG || errno == ENOSPC) { + /* Rare case */ + rspamd_log->throttling = TRUE; + rspamd_log->throttling_time = time (NULL); + } + else if (errno == EPIPE || errno == EBADF) { + /* We write to some pipe and it disappears, disable logging or we has opened bad file descriptor */ + rspamd_log->enabled = FALSE; + } + } + else if (rspamd_log->throttling) { + rspamd_log->throttling = FALSE; + } + } +} + +static void +rspamd_escape_log_string (gchar *str) +{ + guchar *p = (guchar *)str; + + while (*p) { + if ((*p & 0x80) || !g_ascii_isprint (*p)) { + *p = '?'; + } + else if (*p == '\n' || *p == '\r') { + *p = ' '; + } + p ++; + } +} + +/* Logging utility functions */ +gint +open_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) +{ + switch (rspamd_log->cfg->log_type) { + case RSPAMD_LOG_CONSOLE: + /* Do nothing with console */ + rspamd_log->enabled = TRUE; + return 0; + case RSPAMD_LOG_SYSLOG: + openlog ("rspamd", LOG_NDELAY | LOG_PID, rspamd_log->cfg->log_facility); + rspamd_log->enabled = TRUE; + return 0; + case RSPAMD_LOG_FILE: + rspamd_log->fd = open (rspamd_log->cfg->log_file, O_CREAT | O_WRONLY | O_APPEND, + S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); + if (rspamd_log->fd == -1) { + fprintf (stderr, "open_log: cannot open desired log file: %s, %s", + rspamd_log->cfg->log_file, strerror (errno)); + return -1; + } + if (fchown (rspamd_log->fd, uid, gid) == -1) { + fprintf (stderr, "open_log: cannot chown desired log file: %s, %s", + rspamd_log->cfg->log_file, strerror (errno)); + close (rspamd_log->fd); + return -1; + } + rspamd_log->enabled = TRUE; + return 0; + } + return -1; +} + +void +close_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) +{ + gchar tmpbuf[256]; + flush_log_buf (rspamd_log); + + switch (rspamd_log->type) { + case RSPAMD_LOG_CONSOLE: + /* Do nothing special */ + break; + case RSPAMD_LOG_SYSLOG: + closelog (); + break; + case RSPAMD_LOG_FILE: + if (rspamd_log->enabled) { + if (rspamd_log->repeats > REPEATS_MIN) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); + rspamd_log->repeats = 0; + if (rspamd_log->saved_message) { + file_log_function (NULL, rspamd_log->saved_function, rspamd_log->cfg->log_level, rspamd_log->saved_message, TRUE, rspamd_log); + g_free (rspamd_log->saved_message); + g_free (rspamd_log->saved_function); + rspamd_log->saved_message = NULL; + rspamd_log->saved_function = NULL; + } + /* It is safe to use temporary buffer here as it is not static */ + file_log_function (NULL, __FUNCTION__, rspamd_log->cfg->log_level, tmpbuf, TRUE, rspamd_log); + return; + } + + if (fsync (rspamd_log->fd) == -1) { + msg_err ("error syncing log file: %s", strerror (errno)); + } + close (rspamd_log->fd); + } + break; + } + + rspamd_log->enabled = FALSE; +} + +gint +reopen_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) +{ + close_log_priv (rspamd_log, uid, gid); + if (open_log_priv (rspamd_log, uid, gid) == 0) { + msg_info ("log file reopened"); + return 0; + } + + return -1; +} + +/** + * Open log file or initialize other structures + */ +gint +open_log (rspamd_logger_t *logger) +{ + return open_log_priv (logger, -1, -1); +} +/** + * Close log file or destroy other structures + */ +void +close_log (rspamd_logger_t *logger) +{ + close_log_priv (logger, -1, -1); +} +/** + * Close and open log again + */ +gint +reopen_log (rspamd_logger_t *logger) +{ + return reopen_log_priv (logger, -1, -1); +} + +/* + * Setup logger + */ +void +rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *rspamd) +{ + gchar **strvec, *p, *err; + gint num, i, k; + struct in_addr addr; + guint32 mask = 0xFFFFFFFF; + + if (rspamd->logger == NULL) { + rspamd->logger = g_malloc (sizeof (rspamd_logger_t)); + memset (rspamd->logger, 0, sizeof (rspamd_logger_t)); + } + + rspamd->logger->type = cfg->log_type; + rspamd->logger->pid = getpid (); + rspamd->logger->process_type = ptype; + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + rspamd->logger->mtx = g_mutex_new (); +#else + rspamd->logger->mtx = g_malloc (sizeof (GMutex)); + g_mutex_init (rspamd->logger->mtx); +#endif + + switch (cfg->log_type) { + case RSPAMD_LOG_CONSOLE: + rspamd->logger->log_func = file_log_function; + rspamd->logger->fd = STDERR_FILENO; + break; + case RSPAMD_LOG_SYSLOG: + rspamd->logger->log_func = syslog_log_function; + break; + case RSPAMD_LOG_FILE: + rspamd->logger->log_func = file_log_function; + break; + } + + rspamd->logger->cfg = cfg; + /* Set up buffer */ + if (rspamd->cfg->log_buffered) { + if (rspamd->cfg->log_buf_size != 0) { + rspamd->logger->io_buf.size = rspamd->cfg->log_buf_size; + } + else { + rspamd->logger->io_buf.size = BUFSIZ; + } + rspamd->logger->is_buffered = TRUE; + rspamd->logger->io_buf.buf = g_malloc (rspamd->logger->io_buf.size); + } + /* Set up conditional logging */ + if (rspamd->cfg->debug_ip_map != NULL) { + /* Try to add it as map first of all */ + if (rspamd->logger->debug_ip) { + radix_tree_free (rspamd->logger->debug_ip); + } + rspamd->logger->debug_ip = radix_tree_create (); + if (!add_map (rspamd->cfg, rspamd->cfg->debug_ip_map, "IP addresses for which debug logs are enabled", + read_radix_list, fin_radix_list, (void **)&rspamd->logger->debug_ip)) { + /* Try to parse it as list */ + strvec = g_strsplit_set (rspamd->cfg->debug_ip_map, ",; ", 0); + num = g_strv_length (strvec); + + for (i = 0; i < num; i++) { + g_strstrip (strvec[i]); + + if ((p = strchr (strvec[i], '/')) != NULL) { + /* Try to extract mask */ + *p = '\0'; + p ++; + errno = 0; + k = strtoul (p, &err, 10); + if (errno != 0 || *err != '\0' || k > 32) { + continue; + } + } + else { + k = 32; + } + if (inet_aton (strvec[i], &addr)) { + /* Check ip */ + mask = mask << (32 - k); + radix32tree_insert (rspamd->logger->debug_ip, ntohl (addr.s_addr), mask, 1); + } + } + g_strfreev (strvec); + } + } + else if (rspamd->logger->debug_ip) { + radix_tree_free (rspamd->logger->debug_ip); + rspamd->logger->debug_ip = NULL; + } + + default_logger = rspamd->logger; +} + +/** + * Used after fork() for updating structure params + */ +void +update_log_pid (GQuark ptype, rspamd_logger_t *rspamd_log) +{ + rspamd_log->pid = getpid (); + rspamd_log->process_type = ptype; +} + +/** + * Flush logging buffer + */ +void +flush_log_buf (rspamd_logger_t *rspamd_log) +{ + if (rspamd_log->is_buffered && (rspamd_log->type == RSPAMD_LOG_CONSOLE || rspamd_log->type == RSPAMD_LOG_FILE)) { + direct_write_log_line (rspamd_log, rspamd_log->io_buf.buf, rspamd_log->io_buf.used, FALSE); + rspamd_log->io_buf.used = 0; + } +} + + +void +rspamd_common_logv (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, const gchar *function, + const gchar *fmt, va_list args) +{ + static gchar logbuf[BUFSIZ]; + u_char *end; + + if (rspamd_log == NULL) { + rspamd_log = default_logger; + } + + if (rspamd_log == NULL) { + /* Just fprintf message to stderr */ + if (log_level >= G_LOG_LEVEL_INFO) { + end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); + *end = '\0'; + rspamd_escape_log_string (logbuf); + fprintf (stderr, "%s\n", logbuf); + } + } + else if (log_level <= rspamd_log->cfg->log_level) { + g_mutex_lock (rspamd_log->mtx); + end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); + *end = '\0'; + rspamd_escape_log_string (logbuf); + rspamd_log->log_func (NULL, function, log_level, logbuf, FALSE, rspamd_log); + g_mutex_unlock (rspamd_log->mtx); + } +} + +/** + * This log functions select real logger and write message if level is less or equal to configured log level + */ +void +rspamd_common_log_function (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, + const gchar *function, const gchar *fmt, ...) +{ + va_list vp; + + va_start (vp, fmt); + rspamd_common_logv (rspamd_log, log_level, function, fmt, vp); + va_end (vp); +} + +void +rspamd_default_logv (GLogLevelFlags log_level, const gchar *function, + const gchar *fmt, va_list args) +{ + rspamd_common_logv (NULL, log_level, function, fmt, args); +} + +void +rspamd_default_log_function (GLogLevelFlags log_level, + const gchar *function, const gchar *fmt, ...) +{ + + va_list vp; + + va_start (vp, fmt); + rspamd_default_logv (log_level, function, fmt, vp); + va_end (vp); +} + + +/** + * Fill buffer with message (limits must be checked BEFORE this call) + */ +static void +fill_buffer (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt) +{ + gint i; + + for (i = 0; i < iovcnt; i ++) { + memcpy (rspamd_log->io_buf.buf + rspamd_log->io_buf.used, iov[i].iov_base, iov[i].iov_len); + rspamd_log->io_buf.used += iov[i].iov_len; + } + +} + +/* + * Write message to buffer or to file (using direct_write_log_line function) + */ +static void +file_log_helper (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt) +{ + size_t len = 0; + gint i; + + if (! rspamd_log->is_buffered) { + /* Write string directly */ + direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE); + } + else { + /* Calculate total length */ + for (i = 0; i < iovcnt; i ++) { + len += iov[i].iov_len; + } + /* Fill buffer */ + if (rspamd_log->io_buf.size < len) { + /* Buffer is too small to hold this string, so write it dirrectly */ + flush_log_buf (rspamd_log); + direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE); + } + else if (rspamd_log->io_buf.used + len >= rspamd_log->io_buf.size) { + /* Buffer is full, try to write it dirrectly */ + flush_log_buf (rspamd_log); + fill_buffer (rspamd_log, iov, iovcnt); + } + else { + /* Copy incoming string to buffer */ + fill_buffer (rspamd_log, iov, iovcnt); + } + } +} + +/** + * Syslog interface for logging + */ +static void +syslog_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg) +{ + rspamd_logger_t *rspamd_log = arg; + + if (! rspamd_log->enabled) { + return; + } + if (function == NULL) { + if (forced || log_level <= rspamd_log->cfg->log_level) { + if (forced || log_level >= G_LOG_LEVEL_DEBUG) { + syslog (LOG_DEBUG, "%s", message); + } + else if (log_level >= G_LOG_LEVEL_INFO) { + syslog (LOG_INFO, "%s", message); + } + else if (log_level >= G_LOG_LEVEL_WARNING) { + syslog (LOG_WARNING, "%s", message); + } + else if (log_level >= G_LOG_LEVEL_CRITICAL) { + syslog (LOG_ERR, "%s", message); + } + } + } + else { + if (forced || log_level <= rspamd_log->cfg->log_level) { + if (log_level >= G_LOG_LEVEL_DEBUG) { + syslog (LOG_DEBUG, "%s: %s", function, message); + } + else if (log_level >= G_LOG_LEVEL_INFO) { + syslog (LOG_INFO, "%s: %s", function, message); + } + else if (log_level >= G_LOG_LEVEL_WARNING) { + syslog (LOG_WARNING, "%s: %s", function, message); + } + else if (log_level >= G_LOG_LEVEL_CRITICAL) { + syslog (LOG_ERR, "%s: %s", function, message); + } + } + } +} + +/** + * Main file interface for logging + */ +static void +file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg) +{ + gchar tmpbuf[256], timebuf[32]; + time_t now; + struct tm *tms; + struct iovec iov[4]; + gint r = 0; + guint32 cksum; + size_t mlen; + const gchar *cptype = NULL; + gboolean got_time = FALSE; + rspamd_logger_t *rspamd_log = arg; + + if (! rspamd_log->enabled) { + return; + } + + + if (forced || log_level <= rspamd_log->cfg->log_level) { + /* Check throttling due to write errors */ + if (rspamd_log->throttling) { + now = time (NULL); + if (rspamd_log->throttling_time != now) { + rspamd_log->throttling_time = now; + got_time = TRUE; + } + else { + /* Do not try to write to file too often while throttling */ + return; + } + } + /* Check repeats */ + mlen = strlen (message); + cksum = rspamd_log_calculate_cksum (message, mlen); + if (cksum == rspamd_log->last_line_cksum) { + rspamd_log->repeats ++; + if (rspamd_log->repeats > REPEATS_MIN && rspamd_log->repeats < REPEATS_MAX) { + /* Do not log anything */ + if (rspamd_log->saved_message == 0) { + rspamd_log->saved_message = g_strdup (message); + rspamd_log->saved_function = g_strdup (function); + } + return; + } + else if (rspamd_log->repeats > REPEATS_MAX) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); + rspamd_log->repeats = 0; + /* It is safe to use temporary buffer here as it is not static */ + if (rspamd_log->saved_message) { + file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg); + } + file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg); + file_log_function (log_domain, function, log_level, message, forced, arg); + rspamd_log->repeats = REPEATS_MIN + 1; + return; + } + } + else { + /* Reset counter if new message differs from saved message */ + rspamd_log->last_line_cksum = cksum; + if (rspamd_log->repeats > REPEATS_MIN) { + rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); + rspamd_log->repeats = 0; + if (rspamd_log->saved_message) { + file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg); + g_free (rspamd_log->saved_message); + g_free (rspamd_log->saved_function); + rspamd_log->saved_message = NULL; + rspamd_log->saved_function = NULL; + } + file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg); + /* It is safe to use temporary buffer here as it is not static */ + file_log_function (log_domain, function, log_level, message, forced, arg); + return; + } + else { + rspamd_log->repeats = 0; + } + } + + if (rspamd_log->cfg->log_extended) { + if (! got_time) { + now = time (NULL); + } + + /* Format time */ + tms = localtime (&now); + + strftime (timebuf, sizeof (timebuf), "%F %H:%M:%S", tms); + cptype = g_quark_to_string (rspamd_log->process_type); + + if (rspamd_log->cfg->log_color) { + if (log_level >= G_LOG_LEVEL_INFO) { + /* White */ + r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;37m"); + } + else if (log_level >= G_LOG_LEVEL_WARNING) { + /* Magenta */ + r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[2;32m"); + } + else if (log_level >= G_LOG_LEVEL_CRITICAL) { + /* Red */ + r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;31m"); + } + } + else { + r = 0; + } + if (function == NULL) { + r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) - r, "%s #%P(%s) ", timebuf, rspamd_log->pid, cptype); + } + else { + r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) -r, "%s #%P(%s) %s: ", timebuf, rspamd_log->pid, cptype, function); + } + /* Construct IOV for log line */ + iov[0].iov_base = tmpbuf; + iov[0].iov_len = r; + iov[1].iov_base = (void *)message; + iov[1].iov_len = mlen; + iov[2].iov_base = (void *)&lf_chr; + iov[2].iov_len = 1; + if (rspamd_log->cfg->log_color) { + iov[3].iov_base = "\033[0m"; + iov[3].iov_len = sizeof ("\033[0m") - 1; + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 4); + } + else { + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 3); + } + } + else { + iov[0].iov_base = (void *)message; + iov[0].iov_len = mlen; + iov[1].iov_base = (void *)&lf_chr; + iov[1].iov_len = 1; + if (rspamd_log->cfg->log_color) { + iov[2].iov_base = "\033[0m"; + iov[2].iov_len = sizeof ("\033[0m") - 1; + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 3); + } + else { + /* Call helper (for buffering) */ + file_log_helper (rspamd_log, iov, 2); + } + } + } +} + +/** + * Write log line depending on ip + */ +void +rspamd_conditional_debug (rspamd_logger_t *rspamd_log, + rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...) +{ + static gchar logbuf[BUFSIZ]; + va_list vp; + u_char *end; + + if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || rspamd_log->is_debug) { + if (rspamd_log->debug_ip && addr != NULL) { + if (addr->af == AF_INET && radix32tree_find (rspamd_log->debug_ip, + ntohl (addr->addr.s4.sin_addr.s_addr)) == RADIX_NO_VALUE) { + return; + } + } + g_mutex_lock (rspamd_log->mtx); + va_start (vp, fmt); + end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, vp); + *end = '\0'; + rspamd_escape_log_string (logbuf); + va_end (vp); + rspamd_log->log_func (NULL, function, G_LOG_LEVEL_DEBUG, logbuf, TRUE, rspamd_log); + g_mutex_unlock (rspamd_log->mtx); + } +} +/** + * Wrapper for glib logger + */ +void +rspamd_glib_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg) +{ + rspamd_logger_t *rspamd_log = arg; + + if (rspamd_log->enabled) { + g_mutex_lock (rspamd_log->mtx); + rspamd_log->log_func (log_domain, NULL, log_level, message, FALSE, rspamd_log); + g_mutex_unlock (rspamd_log->mtx); + } +} + +/** + * Temporary turn on debugging + */ +void +rspamd_log_debug (rspamd_logger_t *rspamd_log) +{ + rspamd_log->is_debug = TRUE; +} + +/** + * Turn off temporary debugging + */ +void +rspamd_log_nodebug (rspamd_logger_t *rspamd_log) +{ + rspamd_log->is_debug = FALSE; +} diff --git a/src/libutil/logger.h b/src/libutil/logger.h new file mode 100644 index 000000000..b0766b938 --- /dev/null +++ b/src/libutil/logger.h @@ -0,0 +1,117 @@ +#ifndef RSPAMD_LOGGER_H +#define RSPAMD_LOGGER_H + +#include "config.h" +#include "cfg_file.h" +#include "radix.h" +#include "util.h" + + +typedef void (*rspamd_log_func_t)(const gchar * log_domain, const gchar *function, + GLogLevelFlags log_level, const gchar * message, + gboolean forced, gpointer arg); + +typedef struct rspamd_logger_s rspamd_logger_t; +/** + * Init logger + */ +void rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *main); +/** + * Open log file or initialize other structures + */ +gint open_log (rspamd_logger_t *logger); +/** + * Close log file or destroy other structures + */ +void close_log (rspamd_logger_t *logger); +/** + * Close and open log again + */ +gint reopen_log (rspamd_logger_t *logger); + +/** + * Open log file or initialize other structures for privileged processes + */ +gint open_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); +/** + * Close log file or destroy other structures for privileged processes + */ +void close_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); +/** + * Close and open log again for privileged processes + */ +gint reopen_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); + +/** + * Set log pid + */ +void update_log_pid (GQuark ptype, rspamd_logger_t *logger); + +/** + * Flush log buffer for some types of logging + */ +void flush_log_buf (rspamd_logger_t *logger); +/** + * Log function that is compatible for glib messages + */ +void rspamd_glib_log_function (const gchar *log_domain, + GLogLevelFlags log_level, const gchar *message, gpointer arg); + +/** + * Function with variable number of arguments support + */ +void rspamd_common_log_function (rspamd_logger_t *logger, + GLogLevelFlags log_level, const gchar *function, const gchar *fmt, ...); + +void rspamd_common_logv (rspamd_logger_t *logger, + GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args); + +/** + * Conditional debug function + */ +void rspamd_conditional_debug (rspamd_logger_t *logger, + rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...) ; + +/** + * Function with variable number of arguments support that uses static default logger + */ +void rspamd_default_log_function (GLogLevelFlags log_level, const gchar *function, + const gchar *fmt, ...); + +/** + * Varargs version of default log function + * @param log_level + * @param function + * @param fmt + * @param args + */ +void rspamd_default_logv (GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args); + +/** + * Temporary turn on debug + */ +void rspamd_log_debug (rspamd_logger_t *logger); + +/** + * Turn off debug + */ +void rspamd_log_nodebug (rspamd_logger_t *logger); + +/* Typical functions */ + +/* Logging in postfix style */ +#if defined(RSPAMD_MAIN) +#define msg_err(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__) +#define msg_warn(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__) +#define msg_info(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__) +#define msg_debug(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__) +#define debug_task(...) rspamd_conditional_debug(rspamd_main->logger, &task->from_addr, __FUNCTION__, __VA_ARGS__) +#else +#define msg_err(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__) +#define msg_warn(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__) +#define msg_info(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__) +#define msg_debug(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__) +#define debug_task(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__) +#endif + +#endif diff --git a/src/libutil/map.c b/src/libutil/map.c new file mode 100644 index 000000000..703622585 --- /dev/null +++ b/src/libutil/map.c @@ -0,0 +1,1148 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Implementation of map files handling + */ +#include "config.h" +#include "map.h" +#include "http.h" +#include "main.h" +#include "util.h" +#include "mem_pool.h" + +static const gchar *hash_fill = "1"; + +/* Http reply */ +struct http_reply { + gint code; + GHashTable *headers; + gchar *cur_header; + gint parser_state; +}; + +struct http_callback_data { + struct event ev; + struct event_base *ev_base; + struct timeval tv; + struct rspamd_map *map; + struct http_map_data *data; + struct http_reply *reply; + struct map_cb_data cbdata; + + gint state; + gint fd; +}; + +/* Value in seconds after whitch we would try to do stat on list file */ + +/* HTTP timeouts */ +#define HTTP_CONNECT_TIMEOUT 2 +#define HTTP_READ_TIMEOUT 10 + +/** + * Helper for HTTP connection establishment + */ +static gint +connect_http (struct rspamd_map *map, struct http_map_data *data, gboolean is_async) +{ + gint sock; + + if ((sock = make_tcp_socket (data->addr, FALSE, is_async)) == -1) { + msg_info ("cannot connect to http server %s: %d, %s", data->host, errno, strerror (errno)); + return -1; + } + + return sock; +} + +/** + * Write HTTP request + */ +static void +write_http_request (struct rspamd_map *map, struct http_map_data *data, gint sock) +{ + gchar outbuf[BUFSIZ], datebuf[128]; + gint r; + struct tm *tm; + + tm = gmtime (&data->last_checked); + strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %H:%M:%S %Z", tm); + r = rspamd_snprintf (outbuf, sizeof (outbuf), "GET %s%s HTTP/1.1" CRLF "Connection: close" CRLF "Host: %s" CRLF, (*data->path == '/') ? "" : "/", data->path, data->host); + if (data->last_checked != 0) { + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "If-Modified-Since: %s" CRLF, datebuf); + } + + r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, CRLF); + + if (write (sock, outbuf, r) == -1) { + msg_err ("failed to write request: %d, %s", errno, strerror (errno)); + } +} + +/** + * FSM for parsing HTTP reply + */ +static gchar * +parse_http_reply (gchar * chunk, gint len, struct http_reply *reply) +{ + gchar *s, *p, *err_str, *tmp; + p = chunk; + s = chunk; + + while (p - chunk < len) { + switch (reply->parser_state) { + /* Search status code */ + case 0: + /* Search for status code */ + if (*p != ' ') { + p++; + } + else { + /* Try to parse HTTP reply code */ + reply->code = strtoul (++p, (gchar **)&err_str, 10); + if (*err_str != ' ') { + msg_info ("error while reading HTTP status code: %s", p); + return NULL; + } + /* Now skip to end of status string */ + reply->parser_state = 1; + continue; + } + break; + /* Skip to end of line */ + case 1: + if (*p == '\n') { + /* Switch to read header state */ + reply->parser_state = 2; + } + /* Each skipped symbol is proceeded */ + s = ++p; + break; + /* Read header value */ + case 2: + if (*p == ':') { + reply->cur_header = g_malloc (p - s + 1); + rspamd_strlcpy (reply->cur_header, s, p - s + 1); + reply->parser_state = 3; + } + else if (*p == '\r' && *(p + 1) == '\n') { + /* Last empty line */ + reply->parser_state = 5; + } + p++; + break; + /* Skip spaces after header name */ + case 3: + if (*p != ' ') { + s = p; + reply->parser_state = 4; + } + else { + p++; + } + break; + /* Read header value */ + case 4: + if (*p == '\r') { + if (reply->cur_header != NULL) { + tmp = g_malloc (p - s + 1); + rspamd_strlcpy (tmp, s, p - s + 1); + g_hash_table_insert (reply->headers, reply->cur_header, tmp); + reply->cur_header = NULL; + } + reply->parser_state = 1; + } + p++; + break; + case 5: + /* Set pointer to begining of HTTP body */ + p++; + s = p; + reply->parser_state = 6; + break; + case 6: + /* Headers parsed, just return */ + return p; + break; + } + } + + return s; +} + +/** + * Read and parse chunked header + */ +static gint +read_chunk_header (gchar * buf, gint len, struct http_map_data *data) +{ + gchar chunkbuf[32], *p, *c, *err_str; + gint skip = 0; + + p = chunkbuf; + c = buf; + /* Find hex digits */ + while (g_ascii_isxdigit (*c) && p - chunkbuf < (gint)(sizeof (chunkbuf) - 1) && skip < len) { + *p++ = *c++; + skip++; + } + *p = '\0'; + data->chunk = strtoul (chunkbuf, &err_str, 16); + if (*err_str != '\0') { + return -1; + } + + /* Now skip to CRLF */ + while (*c != '\n' && skip < len) { + c++; + skip++; + } + if (*c == '\n' && skip < len) { + skip++; + c++; + } + data->chunk_remain = data->chunk; + + return skip; +} + +/** + * Helper callback for reading chunked reply + */ +static gboolean +read_http_chunked (gchar * buf, size_t len, struct rspamd_map *map, struct http_map_data *data, struct map_cb_data *cbdata) +{ + gchar *p = buf, *remain; + gint skip = 0; + + if (data->chunked == 1) { + /* Read first chunk data */ + if ((skip = read_chunk_header (buf, len, data)) != -1) { + p += skip; + len -= skip; + data->chunked = 2; + } + else { + msg_info ("invalid chunked reply: %*s", (gint)len, buf); + return FALSE; + } + } + + if (data->chunk_remain == 0) { + /* Read another chunk */ + if ((skip = read_chunk_header (buf, len, data)) != -1) { + p += skip; + len -= skip; + } + else { + msg_info ("invalid chunked reply: %*s", (gint)len, buf); + return FALSE; + } + if (data->chunk == 0) { + return FALSE; + } + } + + if (data->chunk_remain <= len ) { + /* Call callback and move remaining buffer */ + remain = map->read_callback (map->pool, p, data->chunk_remain, cbdata); + if (remain != NULL && remain != p + data->chunk_remain) { + /* Copy remaining buffer to start of buffer */ + data->rlen = len - (remain - p); + memmove (buf, remain, data->rlen); + data->chunk_remain -= data->rlen; + } + else { + /* Copy other part */ + data->rlen = len - data->chunk_remain; + if (data->rlen > 0) { + memmove (buf, p + data->chunk_remain, data->rlen); + } + data->chunk_remain = 0; + } + + } + else { + /* Just read another portion of chunk */ + data->chunk_remain -= len; + remain = map->read_callback (map->pool, p, len, cbdata); + if (remain != NULL && remain != p + len) { + /* copy remaining buffer to start of buffer */ + data->rlen = len - (remain - p); + memmove (buf, remain, data->rlen); + } + } + + return TRUE; +} + +/** + * Callback for reading HTTP reply + */ +static gboolean +read_http_common (struct rspamd_map *map, struct http_map_data *data, struct http_reply *reply, struct map_cb_data *cbdata, gint fd) +{ + gchar *remain, *pos; + ssize_t r; + gchar *te, *date; + + if ((r = read (fd, data->read_buf + data->rlen, sizeof (data->read_buf) - data->rlen)) > 0) { + r += data->rlen; + data->rlen = 0; + remain = parse_http_reply (data->read_buf, r, reply); + if (remain != NULL && remain != data->read_buf) { + /* copy remaining data->read_buffer to start of data->read_buffer */ + data->rlen = r - (remain - data->read_buf); + memmove (data->read_buf, remain, data->rlen); + r = data->rlen; + data->rlen = 0; + } + if (r <= 0) { + return TRUE; + } + if (reply->parser_state == 6) { + /* If reply header is parsed successfully, try to read further data */ + if (reply->code != 200 && reply->code != 304) { + msg_err ("got error reply from server %s, %d", data->host, reply->code); + return FALSE; + } + else if (reply->code == 304) { + /* Do not read anything */ + return FALSE; + } + pos = data->read_buf; + /* Check for chunked */ + if (data->chunked == 0) { + if ((te = g_hash_table_lookup (reply->headers, "Transfer-Encoding")) != NULL) { + if (g_ascii_strcasecmp (te, "chunked") == 0) { + data->chunked = 1; + } + else { + data->chunked = -1; + } + } + else { + data->chunked = -1; + } + } + /* Check for date */ + date = g_hash_table_lookup (reply->headers, "Date"); + if (date != NULL) { + data->last_checked = rspamd_http_parse_date (date, -1); + } + else { + data->last_checked = (time_t)-1; + } + + if (data->chunked > 0) { + return read_http_chunked (data->read_buf, r, map, data, cbdata); + } + /* Read more data */ + remain = map->read_callback (map->pool, pos, r, cbdata); + if (remain != NULL && remain != pos + r) { + /* copy remaining data->read_buffer to start of data->read_buffer */ + data->rlen = r - (remain - pos); + memmove (pos, remain, data->rlen); + } + } + } + else { + return FALSE; + } + + return TRUE; +} + +/** + * Sync read of HTTP reply + */ +static void +read_http_sync (struct rspamd_map *map, struct http_map_data *data) +{ + struct map_cb_data cbdata; + gint fd; + struct http_reply *repl; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err ("bad callback for reading map file"); + return; + } + + /* Connect synced */ + if ((fd = connect_http (map, data, FALSE)) == -1) { + return; + } + write_http_request (map, data, fd); + + cbdata.state = 0; + cbdata.map = map; + cbdata.prev_data = *map->user_data; + cbdata.cur_data = NULL; + + repl = g_malloc (sizeof (struct http_reply)); + repl->parser_state = 0; + repl->code = 404; + repl->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free); + + while (read_http_common (map, data, repl, &cbdata, fd)); + + close (fd); + + map->fin_callback (map->pool, &cbdata); + *map->user_data = cbdata.cur_data; + if (data->last_checked == (time_t)-1) { + data->last_checked = time (NULL); + } + + g_hash_table_destroy (repl->headers); + g_free (repl); +} + +/** + * Callback for reading data from file + */ +static void +read_map_file (struct rspamd_map *map, struct file_map_data *data) +{ + struct map_cb_data cbdata; + gchar buf[BUFSIZ], *remain; + ssize_t r; + gint fd, rlen; + + if (map->read_callback == NULL || map->fin_callback == NULL) { + msg_err ("bad callback for reading map file"); + return; + } + + if ((fd = open (data->filename, O_RDONLY)) == -1) { + msg_warn ("cannot open file '%s': %s", data->filename, strerror (errno)); + return; + } + + cbdata.state = 0; + cbdata.prev_data = *map->user_data; + cbdata.cur_data = NULL; + cbdata.map = map; + + rlen = 0; + while ((r = read (fd, buf + rlen, sizeof (buf) - rlen - 1)) > 0) { + r += rlen; + buf[r] = '\0'; + remain = map->read_callback (map->pool, buf, r, &cbdata); + if (remain != NULL) { + /* copy remaining buffer to start of buffer */ + rlen = r - (remain - buf); + memmove (buf, remain, rlen); + } + } + + close (fd); + + map->fin_callback (map->pool, &cbdata); + *map->user_data = cbdata.cur_data; +} + +/** + * FSM for parsing lists + */ +gchar * +abstract_parse_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func) +{ + gchar *c, *p, *key = NULL, *value = NULL; + + p = chunk; + c = p; + + while (p - chunk < len) { + switch (data->state) { + case 0: + /* read key */ + /* Check here comments, eol and end of buffer */ + if (*p == '#') { + if (key != NULL && p - c >= 0) { + value = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (value, c, p - c); + value[p - c] = '\0'; + value = g_strstrip (value); + func (data->cur_data, key, value); + msg_debug ("insert kv pair: %s -> %s", key, value); + } + data->state = 99; + } + else if (*p == '\r' || *p == '\n' || p - chunk == len - 1) { + if (key != NULL && p - c >= 0) { + value = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (value, c, p - c); + value[p - c] = '\0'; + + value = g_strstrip (value); + func (data->cur_data, key, value); + msg_debug ("insert kv pair: %s -> %s", key, value); + } + else if (key == NULL && p - c > 0) { + /* Key only line */ + key = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (key, c, p - c); + key[p - c] = '\0'; + value = rspamd_mempool_alloc (pool, 1); + *value = '\0'; + func (data->cur_data, key, value); + msg_debug ("insert kv pair: %s -> %s", key, value); + } + data->state = 100; + key = NULL; + } + else if (g_ascii_isspace (*p)) { + if (p - c > 0) { + key = rspamd_mempool_alloc (pool, p - c + 1); + memcpy (key, c, p - c); + key[p - c] = '\0'; + data->state = 2; + } + else { + key = NULL; + } + } + else { + p ++; + } + break; + case 2: + /* Skip spaces before value */ + if (!g_ascii_isspace (*p)) { + c = p; + data->state = 0; + } + else { + p ++; + } + break; + case 99: + /* SKIP_COMMENT */ + /* Skip comment till end of line */ + if (*p == '\r' || *p == '\n') { + while ((*p == '\r' || *p == '\n') && p - chunk < len) { + p++; + } + c = p; + key = NULL; + data->state = 0; + } + else { + p++; + } + break; + case 100: + /* Skip \r\n and whitespaces */ + if (*p == '\r' || *p == '\n' || g_ascii_isspace (*p)) { + p ++; + } + else { + c = p; + key = NULL; + data->state = 0; + } + break; + } + } + + return c; +} + +gchar * +abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func) +{ + gchar *s, *p, *str, *start; + + p = chunk; + start = p; + + str = g_malloc (len + 1); + s = str; + + while (p - chunk < len) { + switch (data->state) { + /* READ_SYMBOL */ + case 0: + if (*p == '#') { + /* Got comment */ + if (s != str) { + /* Save previous string in lines like: "127.0.0.1 #localhost" */ + *s = '\0'; + s = rspamd_mempool_strdup (pool, g_strstrip (str)); + if (strlen (s) > 0) { + func (data->cur_data, s, hash_fill); + } + s = str; + start = p; + } + data->state = 1; + } + else if (*p == '\r' || *p == '\n') { + /* Got EOL marker, save stored string */ + if (s != str) { + *s = '\0'; + s = rspamd_mempool_strdup (pool, g_strstrip (str)); + if (strlen (s) > 0) { + func (data->cur_data, s, hash_fill); + } + s = str; + } + /* Skip EOL symbols */ + while ((*p == '\r' || *p == '\n') && p - chunk < len) { + p++; + } + start = p; + } + else { + /* Store new string in s */ + *s = *p; + s++; + p++; + } + break; + /* SKIP_COMMENT */ + case 1: + /* Skip comment till end of line */ + if (*p == '\r' || *p == '\n') { + while ((*p == '\r' || *p == '\n') && p - chunk < len) { + p++; + } + s = str; + start = p; + data->state = 0; + } + else { + p++; + } + break; + } + } + + g_free (str); + + return start; +} + +/** + * Radix tree helper function + */ +static void +radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value) +{ + radix_tree_t *tree = st; + + guint32 mask = 0xFFFFFFFF; + guint32 ip; + gchar *token, *ipnet, *err_str, **strv, **cur; + struct in_addr ina; + gint k; + + /* Split string if there are multiple items inside a single string */ + strv = g_strsplit_set ((gchar *)key, " ,;", 0); + cur = strv; + while (*cur) { + if (**cur == '\0') { + cur++; + continue; + } + /* Extract ipnet */ + ipnet = *cur; + token = strsep (&ipnet, "/"); + + if (ipnet != NULL) { + errno = 0; + /* Get mask */ + k = strtoul (ipnet, &err_str, 10); + if (errno != 0) { + msg_warn ("invalid netmask, error detected on symbol: %s, erorr: %s", err_str, strerror (errno)); + k = 32; + } + else if (k > 32 || k < 0) { + msg_warn ("invalid netmask value: %d", k); + k = 32; + } + /* Calculate mask based on CIDR presentation */ + mask = mask << (32 - k); + } + + /* Check IP */ + if (inet_aton (token, &ina) == 0) { + msg_err ("invalid ip address: %s", token); + return; + } + + /* Insert ip in a tree */ + ip = ntohl ((guint32) ina.s_addr); + k = radix32tree_insert (tree, ip, mask, 1); + if (k == -1) { + msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (ina), mask); + } + else if (k == 1) { + msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (ina), mask); + } + cur++; + } + + g_strfreev (strv); +} + +/* Helpers */ +gchar * +read_host_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + if (data->cur_data == NULL) { + data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + } + return abstract_parse_list (pool, chunk, len, data, (insert_func) g_hash_table_insert); +} + +void +fin_host_list (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + if (data->prev_data) { + g_hash_table_destroy (data->prev_data); + } +} + +gchar * +read_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + if (data->cur_data == NULL) { + data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + } + return abstract_parse_kv_list (pool, chunk, len, data, (insert_func) g_hash_table_insert); +} + +void +fin_kv_list (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + if (data->prev_data) { + g_hash_table_destroy (data->prev_data); + } +} + +gchar * +read_radix_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + if (data->cur_data == NULL) { + data->cur_data = radix_tree_create (); + } + return abstract_parse_list (pool, chunk, len, data, (insert_func) radix_tree_insert_helper); +} + +void +fin_radix_list (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + if (data->prev_data) { + radix_tree_free (data->prev_data); + } +} + +/** + * Common file callback + */ +static void +file_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map = ud; + struct file_map_data *data = map->map_data; + struct stat st; + gdouble jittered_sec; + + /* Plan event again with jitter */ + evtimer_del (&map->ev); + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); + double_to_tv (jittered_sec, &map->tv); + + evtimer_add (&map->ev, &map->tv); + + if (g_atomic_int_get (map->locked)) { + msg_info ("don't try to reread map as it is locked by other process, will reread it later"); + return; + } + + if (stat (data->filename, &st) != -1 && (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { + /* File was modified since last check */ + memcpy (&data->st, &st, sizeof (struct stat)); + } + else { + return; + } + + msg_info ("rereading map file %s", data->filename); + read_map_file (map, data); +} + +/** + * Callback for destroying HTTP callback data + */ +static void +free_http_cbdata (struct http_callback_data *cbd) +{ + if (cbd->reply) { + g_hash_table_destroy (cbd->reply->headers); + g_free (cbd->reply); + } + g_atomic_int_set (cbd->map->locked, 0); + event_del (&cbd->ev); + close (cbd->fd); + g_free (cbd); +} + +/** + * Async HTTP request parser + */ +static void +http_async_callback (gint fd, short what, void *ud) +{ + struct http_callback_data *cbd = ud; + + /* Begin of connection */ + if (what == EV_WRITE) { + if (cbd->state == 0) { + /* Can write request */ + write_http_request (cbd->map, cbd->data, fd); + /* Plan reading */ + event_set (&cbd->ev, cbd->fd, EV_READ | EV_PERSIST, http_async_callback, cbd); + event_base_set (cbd->ev_base, &cbd->ev); + cbd->tv.tv_sec = HTTP_READ_TIMEOUT; + cbd->tv.tv_usec = 0; + cbd->state = 1; + /* Allocate reply structure */ + cbd->reply = g_malloc (sizeof (struct http_reply)); + cbd->reply->parser_state = 0; + cbd->reply->code = 404; + cbd->reply->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free); + cbd->cbdata.state = 0; + cbd->cbdata.prev_data = *cbd->map->user_data; + cbd->cbdata.cur_data = NULL; + cbd->cbdata.map = cbd->map; + cbd->data->rlen = 0; + cbd->data->chunk = 0; + cbd->data->chunk_remain = 0; + cbd->data->chunked = FALSE; + cbd->data->read_buf[0] = '\0'; + + event_add (&cbd->ev, &cbd->tv); + } + else { + msg_err ("bad state when got write readiness"); + free_http_cbdata (cbd); + return; + } + } + /* Got reply, parse it */ + else if (what == EV_READ) { + if (cbd->state >= 1) { + if (!read_http_common (cbd->map, cbd->data, cbd->reply, &cbd->cbdata, cbd->fd)) { + /* Handle Not-Modified in a special way */ + if (cbd->reply->code == 304) { + if (cbd->data->last_checked == (time_t)-1) { + cbd->data->last_checked = time (NULL); + } + msg_info ("data is not modified for server %s", cbd->data->host); + } + else if (cbd->cbdata.cur_data != NULL) { + /* Destroy old data and start reading request data */ + cbd->map->fin_callback (cbd->map->pool, &cbd->cbdata); + *cbd->map->user_data = cbd->cbdata.cur_data; + if (cbd->data->last_checked == (time_t)-1) { + cbd->data->last_checked = time (NULL); + } + } + if (cbd->state == 1 && cbd->reply->code == 200) { + /* Write to log that data is modified */ + msg_info ("rereading map data from %s", cbd->data->host); + } + + free_http_cbdata (cbd); + return; + } + else if (cbd->state == 1) { + /* Write to log that data is modified */ + msg_info ("rereading map data from %s", cbd->data->host); + } + cbd->state = 2; + } + } + else { + msg_err ("connection with http server terminated incorrectly"); + free_http_cbdata (cbd); + } +} + +/** + * Async HTTP callback + */ +static void +http_callback (gint fd, short what, void *ud) +{ + struct rspamd_map *map = ud; + struct http_map_data *data = map->map_data; + gint sock; + struct http_callback_data *cbd; + gdouble jittered_sec; + + /* Plan event again with jitter */ + evtimer_del (&map->ev); + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); + + if (g_atomic_int_get (map->locked)) { + msg_info ("don't try to reread map as it is locked by other process, will reread it later"); + return; + } + + g_atomic_int_inc (map->locked); + + /* Connect asynced */ + if ((sock = connect_http (map, data, TRUE)) == -1) { + g_atomic_int_set (map->locked, 0); + return; + } + else { + /* Plan event */ + cbd = g_malloc (sizeof (struct http_callback_data)); + cbd->ev_base = map->ev_base; + event_set (&cbd->ev, sock, EV_WRITE, http_async_callback, cbd); + event_base_set (cbd->ev_base, &cbd->ev); + cbd->tv.tv_sec = HTTP_CONNECT_TIMEOUT; + cbd->tv.tv_usec = 0; + cbd->map = map; + cbd->data = data; + cbd->state = 0; + cbd->fd = sock; + cbd->reply = NULL; + event_add (&cbd->ev, &cbd->tv); + } +} + +/* Start watching event for all maps */ +void +start_map_watch (struct config_file *cfg, struct event_base *ev_base) +{ + GList *cur = cfg->maps; + struct rspamd_map *map; + struct file_map_data *fdata; + gdouble jittered_sec; + + /* First of all do synced read of data */ + while (cur) { + map = cur->data; + map->ev_base = ev_base; + if (map->protocol == MAP_PROTO_FILE) { + evtimer_set (&map->ev, file_callback, map); + event_base_set (map->ev_base, &map->ev); + /* Read initial data */ + fdata = map->map_data; + if (fdata->st.st_mtime != -1) { + /* Do not try to read non-existent file */ + read_map_file (map, map->map_data); + } + /* Plan event with jitter */ + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout) / 2.; + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); + } + else if (map->protocol == MAP_PROTO_HTTP) { + evtimer_set (&map->ev, http_callback, map); + event_base_set (map->ev_base, &map->ev); + /* Read initial data */ + read_http_sync (map, map->map_data); + /* Plan event with jitter */ + jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); + double_to_tv (jittered_sec, &map->tv); + evtimer_add (&map->ev, &map->tv); + } + cur = g_list_next (cur); + } +} + +void +remove_all_maps (struct config_file *cfg) +{ + g_list_free (cfg->maps); + cfg->maps = NULL; + if (cfg->map_pool != NULL) { + rspamd_mempool_delete (cfg->map_pool); + cfg->map_pool = NULL; + } +} + +gboolean +check_map_proto (const gchar *map_line, gint *res, const gchar **pos) +{ + if (g_ascii_strncasecmp (map_line, "http://", sizeof ("http://") - 1) == 0) { + if (res && pos) { + *res = MAP_PROTO_HTTP; + *pos = map_line + sizeof ("http://") - 1; + } + } + else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") - 1) == 0) { + if (res && pos) { + *res = MAP_PROTO_FILE; + *pos = map_line + sizeof ("file://") - 1; + } + } + else if (*map_line == '/') { + /* Trivial file case */ + *res = MAP_PROTO_FILE; + *pos = map_line; + } + else { + msg_debug ("invalid map fetching protocol: %s", map_line); + return FALSE; + } + + return TRUE; +} + +gboolean +add_map (struct config_file *cfg, const gchar *map_line, const gchar *description, + map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data) +{ + struct rspamd_map *new_map; + enum fetch_proto proto; + const gchar *def, *p, *hostend; + struct file_map_data *fdata; + struct http_map_data *hdata; + gchar portbuf[6]; + gint i, s, r; + struct addrinfo hints, *res; + + /* First of all detect protocol line */ + if (!check_map_proto (map_line, (int *)&proto, &def)) { + return FALSE; + } + /* Constant pool */ + if (cfg->map_pool == NULL) { + cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + } + new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map)); + new_map->read_callback = read_callback; + new_map->fin_callback = fin_callback; + new_map->user_data = user_data; + new_map->protocol = proto; + new_map->cfg = cfg; + new_map->id = g_random_int (); + new_map->locked = rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); + + if (proto == MAP_PROTO_FILE) { + new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, def); + def = new_map->uri; + } + else { + new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, map_line); + } + if (description != NULL) { + new_map->description = rspamd_mempool_strdup (cfg->cfg_pool, description); + } + + /* Now check for each proto separately */ + if (proto == MAP_PROTO_FILE) { + fdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct file_map_data)); + if (access (def, R_OK) == -1) { + if (errno != ENOENT) { + msg_err ("cannot open file '%s': %s", def, strerror (errno)); + return FALSE; + + } + msg_info ("map '%s' is not found, but it can be loaded automatically later", def); + /* We still can add this file */ + fdata->st.st_mtime = -1; + } + else { + stat (def, &fdata->st); + } + fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def); + new_map->map_data = fdata; + } + else if (proto == MAP_PROTO_HTTP) { + hdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct http_map_data)); + /* Try to search port */ + if ((p = strchr (def, ':')) != NULL) { + hostend = p; + i = 0; + p++; + while (g_ascii_isdigit (*p) && i < (gint)sizeof (portbuf) - 1) { + portbuf[i++] = *p++; + } + if (*p != '/') { + msg_info ("bad http map definition: %s", def); + return FALSE; + } + portbuf[i] = '\0'; + hdata->port = atoi (portbuf); + } + else { + /* Default http port */ + rspamd_snprintf (portbuf, sizeof (portbuf), "80"); + hdata->port = 80; + /* Now separate host from path */ + if ((p = strchr (def, '/')) == NULL) { + msg_info ("bad http map definition: %s", def); + return FALSE; + } + hostend = p; + } + hdata->host = rspamd_mempool_alloc (cfg->map_pool, hostend - def + 1); + rspamd_strlcpy (hdata->host, def, hostend - def + 1); + hdata->path = rspamd_mempool_strdup (cfg->map_pool, p); + hdata->rlen = 0; + /* Now try to resolve */ + memset (&hints, 0, sizeof (hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = SOCK_STREAM; /* Stream socket */ + hints.ai_flags = 0; + hints.ai_protocol = 0; /* Any protocol */ + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + if ((r = getaddrinfo (hdata->host, portbuf, &hints, &res)) == 0) { + hdata->addr = res; + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)freeaddrinfo, hdata->addr); + } + else { + msg_err ("address resolution for %s failed: %s", hdata->host, gai_strerror (r)); + return FALSE; + } + /* Now try to connect */ + if ((s = make_tcp_socket (hdata->addr, FALSE, FALSE)) == -1) { + msg_info ("cannot connect to http server %s: %d, %s", hdata->host, errno, strerror (errno)); + return FALSE; + } + close (s); + new_map->map_data = hdata; + } + /* Temp pool */ + new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + + cfg->maps = g_list_prepend (cfg->maps, new_map); + + return TRUE; +} diff --git a/src/libutil/map.h b/src/libutil/map.h new file mode 100644 index 000000000..1f34cdcc0 --- /dev/null +++ b/src/libutil/map.h @@ -0,0 +1,134 @@ +#ifndef RSPAMD_MAP_H +#define RSPAMD_MAP_H + +#include "config.h" +#include "mem_pool.h" +#include "radix.h" + +/** + * Maps API is designed to load lists data from different dynamic sources. + * It monitor files and HTTP locations for modifications and reload them if they are + * modified. + */ + +enum fetch_proto { + MAP_PROTO_FILE, + MAP_PROTO_HTTP, +}; + +/** + * Data specific to file maps + */ +struct file_map_data { + const gchar *filename; + struct stat st; +}; + +/** + * Data specific to HTTP maps + */ +struct http_map_data { + struct addrinfo *addr; + guint16 port; + gchar *path; + gchar *host; + time_t last_checked; + gshort chunked; + gchar read_buf[BUFSIZ]; + guint32 rlen; + guint32 chunk; + guint32 chunk_remain; +}; + +struct map_cb_data; + +/** + * Callback types + */ +typedef gchar* (*map_cb_t)(rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +typedef void (*map_fin_cb_t)(rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * Common map object + */ +struct config_file; +struct rspamd_map { + rspamd_mempool_t *pool; + struct config_file *cfg; + enum fetch_proto protocol; + map_cb_t read_callback; + map_fin_cb_t fin_callback; + void **user_data; + struct event ev; + struct timeval tv; + struct event_base *ev_base; + void *map_data; + gchar *uri; + gchar *description; + guint32 id; + guint32 checksum; + /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ + gint *locked; +}; + +/** + * Callback data for async load + */ +struct map_cb_data { + struct rspamd_map *map; + gint state; + void *prev_data; + void *cur_data; +}; + + +/** + * Check map protocol + */ +gboolean check_map_proto (const gchar *map_line, gint *res, const gchar **pos); +/** + * Add map from line + */ +gboolean add_map (struct config_file *cfg, const gchar *map_line, const gchar *description, + map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data); + +/** + * Start watching of maps by adding events to libevent event loop + */ +void start_map_watch (struct config_file *cfg, struct event_base *ev_base); + +/** + * Remove all maps watched (remove events) + */ +void remove_all_maps (struct config_file *cfg); + +typedef void (*insert_func) (gpointer st, gconstpointer key, gconstpointer value); + +/** + * Common callbacks for frequent types of lists + */ + +/** + * Radix list is a list like ip/mask + */ +gchar* read_radix_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +void fin_radix_list (rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * Host list is an ordinal list of hosts or domains + */ +gchar* read_host_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +void fin_host_list (rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * Kv list is an ordinal list of keys and values separated by whitespace + */ +gchar* read_kv_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); +void fin_kv_list (rspamd_mempool_t *pool, struct map_cb_data *data); + +/** + * FSM for lists parsing (support comments, blank lines and partial replies) + */ +gchar * abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func); + +#endif diff --git a/src/libutil/mem_pool.c b/src/libutil/mem_pool.c new file mode 100644 index 000000000..8f1105add --- /dev/null +++ b/src/libutil/mem_pool.c @@ -0,0 +1,776 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" +#include "logger.h" +#include "util.h" +#include "main.h" + +/* Sleep time for spin lock in nanoseconds */ +#define MUTEX_SLEEP_TIME 10000000L +#define MUTEX_SPIN_COUNT 100 + +#ifdef _THREAD_SAFE +pthread_mutex_t stat_mtx = PTHREAD_MUTEX_INITIALIZER; +# define STAT_LOCK() do { pthread_mutex_lock (&stat_mtx); } while (0) +# define STAT_UNLOCK() do { pthread_mutex_unlock (&stat_mtx); } while (0) +#else +# define STAT_LOCK() do {} while (0) +# define STAT_UNLOCK() do {} while (0) +#endif + +#define POOL_MTX_LOCK() do { rspamd_mutex_lock (pool->mtx); } while (0) +#define POOL_MTX_UNLOCK() do { rspamd_mutex_unlock (pool->mtx); } while (0) + +/* + * This define specify whether we should check all pools for free space for new object + * or just begin scan from current (recently attached) pool + * If MEMORY_GREEDY is defined, then we scan all pools to find free space (more CPU usage, slower + * but requires less memory). If it is not defined check only current pool and if object is too large + * to place in it allocate new one (this may cause huge CPU usage in some cases too, but generally faster than + * greedy method) + */ +#undef MEMORY_GREEDY + +/* Internal statistic */ +static rspamd_mempool_stat_t *mem_pool_stat = NULL; + +/** + * Function that return free space in pool page + * @param x pool page struct + */ +static gint +pool_chain_free (struct _pool_chain *chain) +{ + return (gint)chain->len - (chain->pos - chain->begin + MEM_ALIGNMENT); +} + +static struct _pool_chain * +pool_chain_new (gsize size) +{ + struct _pool_chain *chain; + + g_return_val_if_fail (size > 0, NULL); + + chain = g_slice_alloc (sizeof (struct _pool_chain)); + + if (chain == NULL) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (struct _pool_chain)); + abort (); + } + + chain->begin = g_slice_alloc (size); + if (chain->begin == NULL) { + msg_err ("cannot allocate %z bytes, aborting", size); + abort (); + } + + chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT); + chain->len = size; + chain->next = NULL; + STAT_LOCK (); + mem_pool_stat->bytes_allocated += size; + mem_pool_stat->chunks_allocated++; + STAT_UNLOCK (); + + return chain; +} + +static struct _pool_chain_shared * +pool_chain_new_shared (gsize size) +{ + struct _pool_chain_shared *chain; + gpointer map; + + +#if defined(HAVE_MMAP_ANON) + map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain)); + abort (); + } + chain = (struct _pool_chain_shared *)map; + chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared); +#elif defined(HAVE_MMAP_ZERO) + gint fd; + + fd = open ("/dev/zero", O_RDWR); + if (fd == -1) { + return NULL; + } + map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain)); + abort (); + } + chain = (struct _pool_chain_shared *)map; + chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared); +#else +# error No mmap methods are defined +#endif + chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT); + chain->len = size; + chain->lock = NULL; + chain->next = NULL; + STAT_LOCK (); + mem_pool_stat->shared_chunks_allocated++; + mem_pool_stat->bytes_allocated += size; + STAT_UNLOCK (); + + return chain; +} + + +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ +rspamd_mempool_t * +rspamd_mempool_new (gsize size) +{ + rspamd_mempool_t *new; + gpointer map; + + g_return_val_if_fail (size > 0, NULL); + /* Allocate statistic structure if it is not allocated before */ + if (mem_pool_stat == NULL) { +#if defined(HAVE_MMAP_ANON) + map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t)); + abort (); + } + mem_pool_stat = (rspamd_mempool_stat_t *)map; +#elif defined(HAVE_MMAP_ZERO) + gint fd; + + fd = open ("/dev/zero", O_RDWR); + g_assert (fd != -1); + map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t)); + abort (); + } + mem_pool_stat = (rspamd_mempool_stat_t *)map; +#else +# error No mmap methods are defined +#endif + memset (map, 0, sizeof (rspamd_mempool_stat_t)); + } + + new = g_slice_alloc (sizeof (rspamd_mempool_t)); + if (new == NULL) { + msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_t)); + abort (); + } + + new->cur_pool = pool_chain_new (size); + new->shared_pool = NULL; + new->first_pool = new->cur_pool; + new->cur_pool_tmp = NULL; + new->first_pool_tmp = NULL; + new->destructors = NULL; + /* Set it upon first call of set variable */ + new->variables = NULL; + new->mtx = rspamd_mutex_new (); + + mem_pool_stat->pools_allocated++; + + return new; +} + +static void * +memory_pool_alloc_common (rspamd_mempool_t * pool, gsize size, gboolean is_tmp) +{ + guint8 *tmp; + struct _pool_chain *new, *cur; + gint free; + + if (pool) { + POOL_MTX_LOCK (); +#ifdef MEMORY_GREEDY + if (is_tmp) { + cur = pool->first_pool_tmp; + } + else { + cur = pool->first_pool; + } +#else + if (is_tmp) { + cur = pool->cur_pool_tmp; + } + else { + cur = pool->cur_pool; + } +#endif + /* Find free space in pool chain */ + while (cur != NULL && + (free = pool_chain_free (cur)) < (gint)size && + cur->next != NULL) { + cur = cur->next; + } + + if (cur == NULL || (free < (gint)size && cur->next == NULL)) { + /* Allocate new pool */ + if (cur == NULL) { + if (pool->first_pool->len >= size + MEM_ALIGNMENT) { + new = pool_chain_new (pool->first_pool->len); + } + else { + new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT); + } + /* Connect to pool subsystem */ + if (is_tmp) { + pool->first_pool_tmp = new; + } + else { + pool->first_pool = new; + } + } + else { + if (cur->len >= size + MEM_ALIGNMENT) { + new = pool_chain_new (cur->len); + } + else { + mem_pool_stat->oversized_chunks++; + new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT); + } + /* Attach new pool to chain */ + cur->next = new; + } + if (is_tmp) { + pool->cur_pool_tmp = new; + } + else { + pool->cur_pool = new; + } + /* No need to align again */ + tmp = new->pos; + new->pos = tmp + size; + POOL_MTX_UNLOCK (); + return tmp; + } + /* No need to allocate page */ + tmp = align_ptr (cur->pos, MEM_ALIGNMENT); + cur->pos = tmp + size; + POOL_MTX_UNLOCK (); + return tmp; + } + return NULL; +} + + +void * +rspamd_mempool_alloc (rspamd_mempool_t * pool, gsize size) +{ + return memory_pool_alloc_common (pool, size, FALSE); +} + +void * +rspamd_mempool_alloc_tmp (rspamd_mempool_t * pool, gsize size) +{ + return memory_pool_alloc_common (pool, size, TRUE); +} + +void * +rspamd_mempool_alloc0 (rspamd_mempool_t * pool, gsize size) +{ + void *pointer = rspamd_mempool_alloc (pool, size); + if (pointer) { + memset (pointer, 0, size); + } + return pointer; +} + +void * +rspamd_mempool_alloc0_tmp (rspamd_mempool_t * pool, gsize size) +{ + void *pointer = rspamd_mempool_alloc_tmp (pool, size); + if (pointer) { + memset (pointer, 0, size); + } + return pointer; +} + +void * +rspamd_mempool_alloc0_shared (rspamd_mempool_t * pool, gsize size) +{ + void *pointer = rspamd_mempool_alloc_shared (pool, size); + if (pointer) { + memset (pointer, 0, size); + } + return pointer; +} + +void * +rspamd_mempool_alloc_shared (rspamd_mempool_t * pool, gsize size) +{ + guint8 *tmp; + struct _pool_chain_shared *new, *cur; + gint free; + + if (pool) { + g_return_val_if_fail(size > 0, NULL); + + POOL_MTX_LOCK () + ; + cur = pool->shared_pool; + if (!cur) { + cur = pool_chain_new_shared (pool->first_pool->len); + pool->shared_pool = cur; + } + + /* Find free space in pool chain */ + while ((free = pool_chain_free ((struct _pool_chain *) cur)) + < (gint) size && cur->next) { + cur = cur->next; + } + if (free < (gint) size && cur->next == NULL) { + /* Allocate new pool */ + + if (cur->len >= size + MEM_ALIGNMENT) { + new = pool_chain_new_shared (cur->len); + } + else { + mem_pool_stat->oversized_chunks++; + new = pool_chain_new_shared ( + size + pool->first_pool->len + MEM_ALIGNMENT); + } + /* Attach new pool to chain */ + cur->next = new; + new->pos += size; + STAT_LOCK (); + mem_pool_stat->bytes_allocated += size; + STAT_UNLOCK (); + POOL_MTX_UNLOCK () + ; + return new->begin; + } + tmp = align_ptr(cur->pos, MEM_ALIGNMENT); + cur->pos = tmp + size; + POOL_MTX_UNLOCK () + ; + return tmp; + } + return NULL; +} + + +gchar * +rspamd_mempool_strdup (rspamd_mempool_t * pool, const gchar *src) +{ + gsize len; + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + len = strlen (src); + newstr = rspamd_mempool_alloc (pool, len + 1); + memcpy (newstr, src, len); + newstr[len] = '\0'; + return newstr; +} + +gchar * +rspamd_mempool_fstrdup (rspamd_mempool_t * pool, const struct f_str_s *src) +{ + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + newstr = rspamd_mempool_alloc (pool, src->len + 1); + memcpy (newstr, src->begin, src->len); + newstr[src->len] = '\0'; + return newstr; +} + + +gchar * +rspamd_mempool_strdup_shared (rspamd_mempool_t * pool, const gchar *src) +{ + gsize len; + gchar *newstr; + + if (src == NULL) { + return NULL; + } + + len = strlen (src); + newstr = rspamd_mempool_alloc_shared (pool, len + 1); + memcpy (newstr, src, len); + newstr[len] = '\0'; + return newstr; +} + +/* Find pool for a pointer, returns NULL if pointer is not in pool */ +static struct _pool_chain_shared * +memory_pool_find_pool (rspamd_mempool_t * pool, void *pointer) +{ + struct _pool_chain_shared *cur = pool->shared_pool; + + while (cur) { + if ((guint8 *) pointer >= cur->begin && (guint8 *) pointer <= (cur->begin + cur->len)) { + return cur; + } + cur = cur->next; + } + + return NULL; +} + +static inline gint +__mutex_spin (rspamd_mempool_mutex_t * mutex) +{ + /* check spin count */ + if (g_atomic_int_dec_and_test (&mutex->spin)) { + /* This may be deadlock, so check owner of this lock */ + if (mutex->owner == getpid ()) { + /* This mutex was locked by calling process, so it is just double lock and we can easily unlock it */ + g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); + return 0; + } + else if (kill (mutex->owner, 0) == -1) { + /* Owner process was not found, so release lock */ + g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); + return 0; + } + /* Spin again */ + g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); + } +#ifdef HAVE_ASM_PAUSE + __asm __volatile ("pause"); +#elif defined(HAVE_SCHED_YIELD) + (void)sched_yield (); +#endif + +#if defined(HAVE_NANOSLEEP) + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = MUTEX_SLEEP_TIME; + /* Spin */ + while (nanosleep (&ts, &ts) == -1 && errno == EINTR); +#else +# error No methods to spin are defined +#endif + return 1; +} + +static void +memory_pool_mutex_spin (rspamd_mempool_mutex_t * mutex) +{ + while (!g_atomic_int_compare_and_exchange (&mutex->lock, 0, 1)) { + if (!__mutex_spin (mutex)) { + return; + } + } +} + +/* Simple implementation of spinlock */ +void +rspamd_mempool_lock_shared (rspamd_mempool_t * pool, void *pointer) +{ + struct _pool_chain_shared *chain; + + chain = memory_pool_find_pool (pool, pointer); + if (chain == NULL) { + return; + } + if (chain->lock == NULL) { + chain->lock = rspamd_mempool_get_mutex (pool); + } + rspamd_mempool_lock_mutex (chain->lock); +} + +void +rspamd_mempool_unlock_shared (rspamd_mempool_t * pool, void *pointer) +{ + struct _pool_chain_shared *chain; + + chain = memory_pool_find_pool (pool, pointer); + if (chain == NULL) { + return; + } + if (chain->lock == NULL) { + chain->lock = rspamd_mempool_get_mutex (pool); + return; + } + + rspamd_mempool_unlock_mutex (chain->lock); +} + +void +rspamd_mempool_add_destructor_full (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *data, + const gchar *function, const gchar *line) +{ + struct _pool_destructors *cur; + + cur = rspamd_mempool_alloc (pool, sizeof (struct _pool_destructors)); + if (cur) { + POOL_MTX_LOCK (); + cur->func = func; + cur->data = data; + cur->function = function; + cur->loc = line; + cur->prev = pool->destructors; + pool->destructors = cur; + POOL_MTX_UNLOCK (); + } +} + +void +rspamd_mempool_replace_destructor (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *old_data, void *new_data) +{ + struct _pool_destructors *tmp; + + tmp = pool->destructors; + while (tmp) { + if (tmp->func == func && tmp->data == old_data) { + tmp->func = func; + tmp->data = new_data; + break; + } + tmp = tmp->prev; + } + +} + +void +rspamd_mempool_delete (rspamd_mempool_t * pool) +{ + struct _pool_chain *cur = pool->first_pool, *tmp; + struct _pool_chain_shared *cur_shared = pool->shared_pool, *tmp_shared; + struct _pool_destructors *destructor = pool->destructors; + + POOL_MTX_LOCK (); + /* Call all pool destructors */ + while (destructor) { + /* Avoid calling destructors for NULL pointers */ + if (destructor->data != NULL) { + destructor->func (destructor->data); + } + destructor = destructor->prev; + } + + while (cur) { + tmp = cur; + cur = cur->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp->len; + STAT_UNLOCK (); + g_slice_free1 (tmp->len, tmp->begin); + g_slice_free (struct _pool_chain, tmp); + } + /* Clean temporary pools */ + cur = pool->first_pool_tmp; + while (cur) { + tmp = cur; + cur = cur->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp->len; + STAT_UNLOCK (); + g_slice_free1 (tmp->len, tmp->begin); + g_slice_free (struct _pool_chain, tmp); + } + /* Unmap shared memory */ + while (cur_shared) { + tmp_shared = cur_shared; + cur_shared = cur_shared->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp_shared->len; + STAT_UNLOCK (); + munmap ((void *)tmp_shared, tmp_shared->len + sizeof (struct _pool_chain_shared)); + } + if (pool->variables) { + g_hash_table_destroy (pool->variables); + } + + mem_pool_stat->pools_freed++; + POOL_MTX_UNLOCK (); + rspamd_mutex_free (pool->mtx); + g_slice_free (rspamd_mempool_t, pool); +} + +void +rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool) +{ + struct _pool_chain *cur = pool->first_pool, *tmp; + + POOL_MTX_LOCK (); + cur = pool->first_pool_tmp; + while (cur) { + tmp = cur; + cur = cur->next; + STAT_LOCK (); + mem_pool_stat->chunks_freed++; + mem_pool_stat->bytes_allocated -= tmp->len; + STAT_UNLOCK (); + g_slice_free1 (tmp->len, tmp->begin); + g_slice_free (struct _pool_chain, tmp); + } + mem_pool_stat->pools_freed++; + POOL_MTX_UNLOCK (); +} + +void +rspamd_mempool_stat (rspamd_mempool_stat_t * st) +{ + st->pools_allocated = mem_pool_stat->pools_allocated; + st->pools_freed = mem_pool_stat->pools_freed; + st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated; + st->bytes_allocated = mem_pool_stat->bytes_allocated; + st->chunks_allocated = mem_pool_stat->chunks_allocated; + st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated; + st->chunks_freed = mem_pool_stat->chunks_freed; + st->oversized_chunks = mem_pool_stat->oversized_chunks; +} + +/* By default allocate 8Kb chunks of memory */ +#define FIXED_POOL_SIZE 8192 +gsize +rspamd_mempool_suggest_size (void) +{ +#ifdef HAVE_GETPAGESIZE + return MAX (getpagesize (), FIXED_POOL_SIZE); +#else + return MAX (sysconf (_SC_PAGESIZE), FIXED_POOL_SIZE); +#endif +} + +rspamd_mempool_mutex_t * +rspamd_mempool_get_mutex (rspamd_mempool_t * pool) +{ + rspamd_mempool_mutex_t *res; + if (pool != NULL) { + res = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_mutex_t)); + res->lock = 0; + res->owner = 0; + res->spin = MUTEX_SPIN_COUNT; + return res; + } + return NULL; +} + +void +rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t * mutex) +{ + memory_pool_mutex_spin (mutex); + mutex->owner = getpid (); +} + +void +rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t * mutex) +{ + mutex->owner = 0; + (void)g_atomic_int_compare_and_exchange (&mutex->lock, 1, 0); +} + +rspamd_mempool_rwlock_t * +rspamd_mempool_get_rwlock (rspamd_mempool_t * pool) +{ + rspamd_mempool_rwlock_t *lock; + + lock = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_rwlock_t)); + lock->__r_lock = rspamd_mempool_get_mutex (pool); + lock->__w_lock = rspamd_mempool_get_mutex (pool); + + return lock; +} + +void +rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + /* Spin on write lock */ + while (g_atomic_int_get (&lock->__w_lock->lock)) { + if (!__mutex_spin (lock->__w_lock)) { + break; + } + } + + g_atomic_int_inc (&lock->__r_lock->lock); + lock->__r_lock->owner = getpid (); +} + +void +rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + /* Spin on write lock first */ + rspamd_mempool_lock_mutex (lock->__w_lock); + /* Now we have write lock set up */ + /* Wait all readers */ + while (g_atomic_int_get (&lock->__r_lock->lock)) { + __mutex_spin (lock->__r_lock); + } +} + +void +rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + if (g_atomic_int_get (&lock->__r_lock->lock)) { + (void)g_atomic_int_dec_and_test (&lock->__r_lock->lock); + } +} + +void +rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t * lock) +{ + rspamd_mempool_unlock_mutex (lock->__w_lock); +} + +void +rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name, gpointer value, rspamd_mempool_destruct_t destructor) +{ + if (pool->variables == NULL) { + pool->variables = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + } + + g_hash_table_insert (pool->variables, rspamd_mempool_strdup (pool, name), value); + if (destructor != NULL) { + rspamd_mempool_add_destructor (pool, destructor, value); + } +} + +gpointer +rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name) +{ + if (pool->variables == NULL) { + return NULL; + } + + return g_hash_table_lookup (pool->variables, name); +} + + +/* + * vi:ts=4 + */ diff --git a/src/libutil/mem_pool.h b/src/libutil/mem_pool.h new file mode 100644 index 000000000..f759ed60a --- /dev/null +++ b/src/libutil/mem_pool.h @@ -0,0 +1,299 @@ +/** + * @file mem_pool.h + * \brief Memory pools library. + * + * Memory pools library. Library is designed to implement efficient way to + * store data in memory avoiding calling of many malloc/free. It has overhead + * because of fact that objects live in pool for rather long time and are not freed + * immediately after use, but if we know certainly when these objects can be used, we + * can use pool for them + */ + +#ifndef RSPAMD_MEM_POOL_H +#define RSPAMD_MEM_POOL_H + +#include "config.h" + + +struct f_str_s; + +#define MEM_ALIGNMENT sizeof(unsigned long) /* platform word */ +#define align_ptr(p, a) \ + (guint8 *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1)) + +/** + * Destructor type definition + */ +typedef void (*rspamd_mempool_destruct_t)(void *ptr); + +/** + * Pool mutex structure + */ +typedef struct memory_pool_mutex_s { + gint lock; + pid_t owner; + guint spin; +} rspamd_mempool_mutex_t; + +/** + * Pool page structure + */ +struct _pool_chain { + guint8 *begin; /**< begin of pool chain block */ + guint8 *pos; /**< current start of free space in block */ + gsize len; /**< length of block */ + struct _pool_chain *next; /**< chain link */ +}; + +/** + * Shared pool page + */ +struct _pool_chain_shared { + guint8 *begin; + guint8 *pos; + gsize len; + struct _pool_chain_shared *next; + rspamd_mempool_mutex_t *lock; +}; + +/** + * Destructors list item structure + */ +struct _pool_destructors { + rspamd_mempool_destruct_t func; /**< pointer to destructor */ + void *data; /**< data to free */ + const gchar *function; /**< function from which this destructor was added */ + const gchar *loc; /**< line number */ + struct _pool_destructors *prev; /**< chain link */ +}; + +/** + * Memory pool type + */ +struct rspamd_mutex_s; +typedef struct memory_pool_s { + struct _pool_chain *cur_pool; /**< currently used page */ + struct _pool_chain *first_pool; /**< first page */ + struct _pool_chain *cur_pool_tmp; /**< currently used temporary page */ + struct _pool_chain *first_pool_tmp; /**< first temporary page */ + struct _pool_chain_shared *shared_pool; /**< shared chain */ + struct _pool_destructors *destructors; /**< destructors chain */ + GHashTable *variables; /**< private memory pool variables */ + struct rspamd_mutex_s *mtx; /**< threads lock */ +} rspamd_mempool_t; + +/** + * Statistics structure + */ +typedef struct memory_pool_stat_s { + gsize pools_allocated; /**< total number of allocated pools */ + gsize pools_freed; /**< number of freed pools */ + gsize bytes_allocated; /**< bytes that are allocated with pool allocator */ + gsize chunks_allocated; /**< number of chunks that are allocated */ + gsize shared_chunks_allocated; /**< shared chunks allocated */ + gsize chunks_freed; /**< chunks freed */ + gsize oversized_chunks; /**< oversized chunks */ +} rspamd_mempool_stat_t; + +/** + * Rwlock for locking shared memory regions + */ +typedef struct memory_pool_rwlock_s { + rspamd_mempool_mutex_t *__r_lock; /**< read mutex (private) */ + rspamd_mempool_mutex_t *__w_lock; /**< write mutex (private) */ +} rspamd_mempool_rwlock_t; + +/** + * Allocate new memory poll + * @param size size of pool's page + * @return new memory pool object + */ +rspamd_mempool_t* rspamd_mempool_new (gsize size); + +/** + * Get memory from pool + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc (rspamd_mempool_t* pool, gsize size); + +/** + * Get memory from temporary pool + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc_tmp (rspamd_mempool_t* pool, gsize size); + +/** + * Get memory and set it to zero + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc0 (rspamd_mempool_t* pool, gsize size); + +/** + * Get memory and set it to zero + * @param pool memory pool object + * @param size bytes to allocate + * @return pointer to allocated object + */ +void* rspamd_mempool_alloc0_tmp (rspamd_mempool_t* pool, gsize size); + +/** + * Cleanup temporary data in pool + */ +void rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool); + +/** + * Make a copy of string in pool + * @param pool memory pool object + * @param src source string + * @return pointer to newly created string that is copy of src + */ +gchar* rspamd_mempool_strdup (rspamd_mempool_t* pool, const gchar *src); + +/** + * Make a copy of fixed string in pool as null terminated string + * @param pool memory pool object + * @param src source string + * @return pointer to newly created string that is copy of src + */ +gchar* rspamd_mempool_fstrdup (rspamd_mempool_t* pool, const struct f_str_s *src); + +/** + * Allocate piece of shared memory + * @param pool memory pool object + * @param size bytes to allocate + */ +void* rspamd_mempool_alloc_shared (rspamd_mempool_t* pool, gsize size); +void* rspamd_mempool_alloc0_shared (rspamd_mempool_t *pool, gsize size); +gchar* rspamd_mempool_strdup_shared (rspamd_mempool_t* pool, const gchar *src); + +/** + * Lock chunk of shared memory in which pointer is placed + * @param pool memory pool object + * @param pointer pointer of shared memory object that is to be locked (the whole page that contains that object is locked) + */ +void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer); + +/** + * Unlock chunk of shared memory in which pointer is placed + * @param pool memory pool object + * @param pointer pointer of shared memory object that is to be unlocked (the whole page that contains that object is locked) + */ +void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer); + +/** + * Add destructor callback to pool + * @param pool memory pool object + * @param func pointer to function-destructor + * @param data pointer to data that would be passed to destructor + */ +void rspamd_mempool_add_destructor_full (rspamd_mempool_t *pool, rspamd_mempool_destruct_t func, void *data, + const gchar *function, const gchar *line); + +/* Macros for common usage */ +#define rspamd_mempool_add_destructor(pool, func, data) \ + rspamd_mempool_add_destructor_full(pool, func, data, G_STRFUNC, G_STRLOC) + +/** + * Replace destructor callback to pool for specified pointer + * @param pool memory pool object + * @param func pointer to function-destructor + * @param old_data pointer to old data + * @param new_data pointer to data that would be passed to destructor + */ +void rspamd_mempool_replace_destructor (rspamd_mempool_t *pool, + rspamd_mempool_destruct_t func, void *old_data, void *new_data); + +/** + * Delete pool, free all its chunks and call destructors chain + * @param pool memory pool object + */ +void rspamd_mempool_delete (rspamd_mempool_t *pool); + +/** + * Get new mutex from pool (allocated in shared memory) + * @param pool memory pool object + * @return mutex object + */ +rspamd_mempool_mutex_t* rspamd_mempool_get_mutex (rspamd_mempool_t *pool); + +/** + * Lock mutex + * @param mutex mutex to lock + */ +void rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t *mutex); + +/** + * Unlock mutex + * @param mutex mutex to unlock + */ +void rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t *mutex); + +/** + * Create new rwlock and place it in shared memory + * @param pool memory pool object + * @return rwlock object + */ +rspamd_mempool_rwlock_t* rspamd_mempool_get_rwlock (rspamd_mempool_t *pool); + +/** + * Aquire read lock + * @param lock rwlock object + */ +void rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Aquire write lock + * @param lock rwlock object + */ +void rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Release read lock + * @param lock rwlock object + */ +void rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Release write lock + * @param lock rwlock object + */ +void rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t *lock); + +/** + * Get pool allocator statistics + * @param st stat pool struct + */ +void rspamd_mempool_stat (rspamd_mempool_stat_t *st); + +/** + * Get optimal pool size based on page size for this system + * @return size of memory page in system + */ +gsize rspamd_mempool_suggest_size (void); + +/** + * Set memory pool variable + * @param pool memory pool object + * @param name name of variable + * @param gpointer value value of variable + * @param destructor pointer to function-destructor + */ +void rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name, + gpointer value, rspamd_mempool_destruct_t destructor); + +/** + * Get memory pool variable + * @param pool memory pool object + * @param name name of variable + * @return NULL or pointer to variable data + */ +gpointer rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name); + + +#endif diff --git a/src/libutil/memcached.c b/src/libutil/memcached.c new file mode 100644 index 000000000..e4c9be9d2 --- /dev/null +++ b/src/libutil/memcached.c @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _THREAD_SAFE +# include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "memcached.h" + +#define CRLF "\r\n" +#define END_TRAILER "END" CRLF +#define STORED_TRAILER "STORED" CRLF +#define NOT_STORED_TRAILER "NOT STORED" CRLF +#define EXISTS_TRAILER "EXISTS" CRLF +#define DELETED_TRAILER "DELETED" CRLF +#define NOT_FOUND_TRAILER "NOT_FOUND" CRLF +#define CLIENT_ERROR_TRAILER "CLIENT_ERROR" +#define SERVER_ERROR_TRAILER "SERVER_ERROR" + +#define READ_BUFSIZ 1500 +#define MAX_RETRIES 3 + +/* Header for udp protocol */ +struct memc_udp_header { + guint16 req_id; + guint16 seq_num; + guint16 dg_sent; + guint16 unused; +}; + +static void socket_callback (gint fd, short what, void *arg); +static gint memc_parse_header (gchar *buf, size_t * len, gchar **end); + +/* + * Write to syslog if OPT_DEBUG is specified + */ +static void +memc_log (const memcached_ctx_t * ctx, gint line, const gchar *fmt, ...) +{ + va_list args; + if (ctx->options & MEMC_OPT_DEBUG) { + va_start (args, fmt); + g_log (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, "memc_debug(%d): host: %s, port: %d", line, inet_ntoa (ctx->addr), ntohs (ctx->port)); + g_logv (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, fmt, args); + va_end (args); + } +} + +/* + * Callback for write command + */ +static void +write_handler (gint fd, short what, memcached_ctx_t * ctx) +{ + gchar read_buf[READ_BUFSIZ]; + gint retries; + ssize_t r; + struct memc_udp_header header; + struct iovec iov[4]; + + /* Write something to memcached */ + if (what == EV_WRITE) { + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + + r = snprintf (read_buf, READ_BUFSIZ, "%s %s 0 %d %zu" CRLF, ctx->cmd, ctx->param->key, ctx->param->expire, ctx->param->bufsize); + memc_log (ctx, __LINE__, "memc_write: send write request to memcached: %s", read_buf); + + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + if (ctx->param->bufpos == 0) { + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + } + else { + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + } + iov[2].iov_base = ctx->param->buf + ctx->param->bufpos; + iov[2].iov_len = ctx->param->bufsize - ctx->param->bufpos; + iov[3].iov_base = CRLF; + iov[3].iov_len = sizeof (CRLF) - 1; + if (writev (ctx->sock, iov, 4) == -1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + else { + iov[0].iov_base = read_buf; + iov[0].iov_len = r; + iov[1].iov_base = ctx->param->buf + ctx->param->bufpos; + iov[1].iov_len = ctx->param->bufsize - ctx->param->bufpos; + iov[2].iov_base = CRLF; + iov[2].iov_len = sizeof (CRLF) - 1; + if (writev (ctx->sock, iov, 3) == -1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + /* Read header */ + retries = 0; + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + retries++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + memc_log (ctx, __LINE__, "memc_write: read reply from memcached: %s", read_buf); + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + if (strncmp (read_buf, STORED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, OK, ctx->callback_data); + } + else if (strncmp (read_buf, NOT_STORED_TRAILER, sizeof (NOT_STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, CLIENT_ERROR, ctx->callback_data); + } + else if (strncmp (read_buf, EXISTS_TRAILER, sizeof (EXISTS_TRAILER) - 1) == 0) { + ctx->callback (ctx, EXISTS, ctx->callback_data); + } + else { + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } +} + +/* + * Callback for read command + */ +static void +read_handler (gint fd, short what, memcached_ctx_t * ctx) +{ + gchar read_buf[READ_BUFSIZ]; + gchar *p; + ssize_t r; + size_t datalen; + struct memc_udp_header header; + struct iovec iov[2]; + gint retries = 0, t; + + if (what == EV_WRITE) { + /* Send command to memcached */ + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + + r = snprintf (read_buf, READ_BUFSIZ, "%s %s" CRLF, ctx->cmd, ctx->param->key); + memc_log (ctx, __LINE__, "memc_read: send read request to memcached: %s", read_buf); + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + if (writev (ctx->sock, iov, 2) == -1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + else { + if (write (ctx->sock, read_buf, r) == -1) { + memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno)); + } + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + memc_log (ctx, __LINE__, "memc_read: got read_buf: %s", read_buf); + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + memc_log (ctx, __LINE__, "memc_read: got wrong packet id: %d, %d was awaited", header.req_id, ctx->count); + retries++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + + if (r > 0) { + read_buf[r] = 0; + if (ctx->param->bufpos == 0) { + t = memc_parse_header (read_buf, &datalen, &p); + if (t < 0) { + event_del (&ctx->mem_ev); + memc_log (ctx, __LINE__, "memc_read: cannot parse memcached reply"); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + else if (t == 0) { + memc_log (ctx, __LINE__, "memc_read: record does not exists"); + event_del (&ctx->mem_ev); + ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); + return; + } + + if (datalen > ctx->param->bufsize) { + memc_log (ctx, __LINE__, "memc_read: user's buffer is too small: %zd, %zd required", ctx->param->bufsize, datalen); + event_del (&ctx->mem_ev); + ctx->callback (ctx, WRONG_LENGTH, ctx->callback_data); + return; + } + /* Check if we already have all data in buffer */ + if (r >= (ssize_t)(datalen + sizeof (END_TRAILER) + sizeof (CRLF) - 2)) { + /* Store all data in param's buffer */ + memcpy (ctx->param->buf + ctx->param->bufpos, p, datalen); + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + ctx->callback (ctx, OK, ctx->callback_data); + return; + } + /* Subtract from sum parsed header's length */ + r -= p - read_buf; + } + else { + p = read_buf; + } + + if (strncmp (ctx->param->buf + ctx->param->bufpos + r - sizeof (END_TRAILER) - sizeof (CRLF) + 2, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { + r -= sizeof (END_TRAILER) - sizeof (CRLF) - 2; + memcpy (ctx->param->buf + ctx->param->bufpos, p, r); + event_del (&ctx->mem_ev); + ctx->callback (ctx, OK, ctx->callback_data); + return; + } + /* Store this part of data in param's buffer */ + memcpy (ctx->param->buf + ctx->param->bufpos, p, r); + ctx->param->bufpos += r; + } + else { + memc_log (ctx, __LINE__, "memc_read: read(v) failed: %d, %s", r, strerror (errno)); + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + + ctx->count++; + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } + +} + +/* + * Callback for delete command + */ +static void +delete_handler (gint fd, short what, memcached_ctx_t * ctx) +{ + gchar read_buf[READ_BUFSIZ]; + gint retries; + ssize_t r; + struct memc_udp_header header; + struct iovec iov[2]; + + /* Write something to memcached */ + if (what == EV_WRITE) { + if (ctx->protocol == UDP_TEXT) { + /* Send udp header */ + bzero (&header, sizeof (header)); + header.dg_sent = htons (1); + header.req_id = ctx->count; + } + r = snprintf (read_buf, READ_BUFSIZ, "delete %s" CRLF, ctx->param->key); + memc_log (ctx, __LINE__, "memc_delete: send delete request to memcached: %s", read_buf); + + if (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = r; + ctx->param->bufpos = writev (ctx->sock, iov, 2); + if (ctx->param->bufpos == (size_t)-1) { + memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); + } + } + else { + if (write (ctx->sock, read_buf, r) == -1) { + memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno)); + } + } + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + } + else if (what == EV_READ) { + /* Read header */ + retries = 0; + while (ctx->protocol == UDP_TEXT) { + iov[0].iov_base = &header; + iov[0].iov_len = sizeof (struct memc_udp_header); + iov[1].iov_base = read_buf; + iov[1].iov_len = READ_BUFSIZ; + if ((r = readv (ctx->sock, iov, 2)) == -1) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + return; + } + if (header.req_id != ctx->count && retries < MAX_RETRIES) { + retries++; + /* Not our reply packet */ + continue; + } + break; + } + if (ctx->protocol != UDP_TEXT) { + r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); + } + /* Increment count */ + ctx->count++; + event_del (&ctx->mem_ev); + if (strncmp (read_buf, DELETED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { + ctx->callback (ctx, OK, ctx->callback_data); + } + else if (strncmp (read_buf, NOT_FOUND_TRAILER, sizeof (NOT_FOUND_TRAILER) - 1) == 0) { + ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); + } + else { + ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); + } + } + else if (what == EV_TIMEOUT) { + event_del (&ctx->mem_ev); + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + } +} + +/* + * Callback for our socket events + */ +static void +socket_callback (gint fd, short what, void *arg) +{ + memcached_ctx_t *ctx = (memcached_ctx_t *) arg; + + switch (ctx->op) { + case CMD_NULL: + /* Do nothing here */ + break; + case CMD_CONNECT: + /* We have write readiness after connect call, so reinit event */ + ctx->cmd = "connect"; + if (what == EV_WRITE) { + event_del (&ctx->mem_ev); + event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, NULL); + ctx->callback (ctx, OK, ctx->callback_data); + ctx->alive = 1; + } + else { + ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); + ctx->alive = 0; + } + break; + case CMD_WRITE: + write_handler (fd, what, ctx); + break; + case CMD_READ: + read_handler (fd, what, ctx); + break; + case CMD_DELETE: + delete_handler (fd, what, ctx); + break; + } +} + +/* + * Common callback function for memcached operations if no user's callback is specified + */ +static void +common_memc_callback (memcached_ctx_t * ctx, memc_error_t error, void *data) +{ + memc_log (ctx, __LINE__, "common_memc_callback: result of memc command '%s' is '%s'", ctx->cmd, memc_strerror (error)); +} + +/* + * Make socket for udp connection + */ +static gint +memc_make_udp_sock (memcached_ctx_t * ctx) +{ + struct sockaddr_in sc; + gint ofl; + + bzero (&sc, sizeof (struct sockaddr_in *)); + sc.sin_family = AF_INET; + sc.sin_port = ctx->port; + memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); + + ctx->sock = socket (PF_INET, SOCK_DGRAM, 0); + + if (ctx->sock == -1) { + memc_log (ctx, __LINE__, "memc_make_udp_sock: socket() failed: %s", strerror (errno)); + return -1; + } + + /* set nonblocking */ + ofl = fcntl (ctx->sock, F_GETFL, 0); + fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK); + + /* + * Call connect to set default destination for datagrams + * May not block + */ + ctx->op = CMD_CONNECT; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, NULL); + return connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in)); +} + +/* + * Make socket for tcp connection + */ +static gint +memc_make_tcp_sock (memcached_ctx_t * ctx) +{ + struct sockaddr_in sc; + gint ofl, r; + + bzero (&sc, sizeof (struct sockaddr_in *)); + sc.sin_family = AF_INET; + sc.sin_port = ctx->port; + memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); + + ctx->sock = socket (PF_INET, SOCK_STREAM, 0); + + if (ctx->sock == -1) { + memc_log (ctx, __LINE__, "memc_make_tcp_sock: socket() failed: %s", strerror (errno)); + return -1; + } + + /* set nonblocking */ + ofl = fcntl (ctx->sock, F_GETFL, 0); + fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK); + + if ((r = connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in))) == -1) { + if (errno != EINPROGRESS) { + close (ctx->sock); + ctx->sock = -1; + memc_log (ctx, __LINE__, "memc_make_tcp_sock: connect() failed: %s", strerror (errno)); + return -1; + } + } + ctx->op = CMD_CONNECT; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + return 0; +} + +/* + * Parse VALUE reply from server and set len argument to value returned by memcached + */ +static gint +memc_parse_header (gchar *buf, size_t * len, gchar **end) +{ + gchar *p, *c; + gint i; + + /* VALUE []\r\n */ + c = strstr (buf, CRLF); + if (c == NULL) { + return -1; + } + *end = c + sizeof (CRLF) - 1; + + if (strncmp (buf, "VALUE ", sizeof ("VALUE ") - 1) == 0) { + p = buf + sizeof ("VALUE ") - 1; + + /* Read bytes value and ignore all other fields, such as flags and key */ + for (i = 0; i < 2; i++) { + while (p++ < c && *p != ' '); + + if (p > c) { + return -1; + } + } + *len = strtoul (p, &c, 10); + return 1; + } + /* If value not found memcached return just END\r\n , in this case return 0 */ + else if (strncmp (buf, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { + return 0; + } + + return -1; +} + + +/* + * Common read command handler for memcached + */ +memc_error_t +memc_read (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param) +{ + ctx->cmd = cmd; + ctx->op = CMD_READ; + ctx->param = param; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Common write command handler for memcached + */ +memc_error_t +memc_write (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param, gint expire) +{ + ctx->cmd = cmd; + ctx->op = CMD_WRITE; + ctx->param = param; + param->expire = expire; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Delete command handler + */ +memc_error_t +memc_delete (memcached_ctx_t * ctx, memcached_param_t * param) +{ + ctx->cmd = "delete"; + ctx->op = CMD_DELETE; + ctx->param = param; + event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); + event_add (&ctx->mem_ev, &ctx->timeout); + + return OK; +} + +/* + * Write handler for memcached mirroring + * writing is done to each memcached server + */ +memc_error_t +memc_write_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param, gint expire) +{ + memc_error_t r, result = OK; + + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_write (&ctx[memcached_num], cmd, param, expire); + if (r != OK) { + memc_log (&ctx[memcached_num], __LINE__, "memc_write_mirror: cannot write to mirror server: %s", memc_strerror (r)); + result = r; + ctx[memcached_num].alive = 0; + } + } + } + + return result; +} + +/* + * Read handler for memcached mirroring + * reading is done from first active memcached server + */ +memc_error_t +memc_read_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param) +{ + memc_error_t r, result = OK; + + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_read (&ctx[memcached_num], cmd, param); + if (r != OK) { + result = r; + if (r != NOT_EXISTS) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: cannot write read from mirror server: %s", memc_strerror (r)); + } + else { + memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: record not exists", memc_strerror (r)); + } + } + else { + break; + } + } + } + + return result; +} + +/* + * Delete handler for memcached mirroring + * deleting is done for each active memcached server + */ +memc_error_t +memc_delete_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param) +{ + memc_error_t r, result = OK; + + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_delete (&ctx[memcached_num], param); + if (r != OK) { + result = r; + if (r != NOT_EXISTS) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_delete_mirror: cannot delete from mirror server: %s", memc_strerror (r)); + } + } + } + } + + return result; +} + + +/* + * Initialize memcached context for specified protocol + */ +gint +memc_init_ctx (memcached_ctx_t * ctx) +{ + if (ctx == NULL) { + return -1; + } + + ctx->count = 0; + ctx->alive = 0; + ctx->op = CMD_NULL; + /* Set default callback */ + if (ctx->callback == NULL) { + ctx->callback = common_memc_callback; + } + + switch (ctx->protocol) { + case UDP_TEXT: + return memc_make_udp_sock (ctx); + break; + case TCP_TEXT: + return memc_make_tcp_sock (ctx); + break; + /* Not implemented */ + case UDP_BIN: + case TCP_BIN: + default: + return -1; + } +} + +/* + * Mirror init + */ +gint +memc_init_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num) +{ + gint r, result = -1; + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_init_ctx (&ctx[memcached_num]); + if (r == -1) { + ctx[memcached_num].alive = 0; + memc_log (&ctx[memcached_num], __LINE__, "memc_init_ctx_mirror: cannot connect to server"); + } + else { + result = 1; + } + } + } + + return result; +} + +/* + * Close context connection + */ +gint +memc_close_ctx (memcached_ctx_t * ctx) +{ + if (ctx != NULL && ctx->sock != -1) { + event_del (&ctx->mem_ev); + return close (ctx->sock); + } + + return -1; +} + +/* + * Mirror close + */ +gint +memc_close_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num) +{ + gint r = 0; + while (memcached_num--) { + if (ctx[memcached_num].alive == 1) { + r = memc_close_ctx (&ctx[memcached_num]); + if (r == -1) { + memc_log (&ctx[memcached_num], __LINE__, "memc_close_ctx_mirror: cannot close connection to server properly"); + ctx[memcached_num].alive = 0; + } + } + } + + return r; +} + + +const gchar * +memc_strerror (memc_error_t err) +{ + const gchar *p; + + switch (err) { + case OK: + p = "Ok"; + break; + case BAD_COMMAND: + p = "Bad command"; + break; + case CLIENT_ERROR: + p = "Client error"; + break; + case SERVER_ERROR: + p = "Server error"; + break; + case SERVER_TIMEOUT: + p = "Server timeout"; + break; + case NOT_EXISTS: + p = "Key not found"; + break; + case EXISTS: + p = "Key already exists"; + break; + case WRONG_LENGTH: + p = "Wrong result length"; + break; + default: + p = "Unknown error"; + break; + } + + return p; +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/memcached.h b/src/libutil/memcached.h new file mode 100644 index 000000000..098e26eea --- /dev/null +++ b/src/libutil/memcached.h @@ -0,0 +1,142 @@ +#ifndef MEMCACHED_H +#define MEMCACHED_H + +#include +#include +#include +#include + +#define MAXKEYLEN 250 + +#define MEMC_OPT_DEBUG 0x1 + +struct event; + +typedef enum memc_error { + OK, + BAD_COMMAND, + CLIENT_ERROR, + SERVER_ERROR, + SERVER_TIMEOUT, + NOT_EXISTS, + EXISTS, + WRONG_LENGTH +} memc_error_t; + +/* XXX: Only UDP_TEXT is supported at present */ +typedef enum memc_proto { + UDP_TEXT, + TCP_TEXT, + UDP_BIN, + TCP_BIN +} memc_proto_t; + +typedef enum memc_op { + CMD_NULL, + CMD_CONNECT, + CMD_READ, + CMD_WRITE, + CMD_DELETE, +} memc_opt_t; + +typedef struct memcached_param_s { + gchar key[MAXKEYLEN]; + u_char *buf; + size_t bufsize; + size_t bufpos; + gint expire; +} memcached_param_t; + + +/* Port must be in network byte order */ +typedef struct memcached_ctx_s { + memc_proto_t protocol; + struct in_addr addr; + guint16 port; + gint sock; + struct timeval timeout; + /* Counter that is used for memcached operations in network byte order */ + guint16 count; + /* Flag that signalize that this memcached is alive */ + short alive; + /* Options that can be specified for memcached connection */ + short options; + /* Current operation */ + memc_opt_t op; + /* Current command */ + const gchar *cmd; + /* Current param */ + memcached_param_t *param; + /* Callback for current operation */ + void (*callback) (struct memcached_ctx_s *ctx, memc_error_t error, void *data); + /* Data for callback function */ + void *callback_data; + /* Event structure */ + struct event mem_ev; +} memcached_ctx_t; + +typedef void (*memcached_callback_t) (memcached_ctx_t *ctx, memc_error_t error, void *data); + +/* + * Initialize connection to memcached server: + * addr, port and timeout fields in ctx must be filled with valid values + * Return: + * 0 - success + * -1 - error (error is stored in errno) + */ +gint memc_init_ctx (memcached_ctx_t *ctx); +gint memc_init_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); +/* + * Memcached function for getting, setting, adding values to memcached server + * ctx - valid memcached context + * key - key to extract (max 250 characters as it specified in memcached API) + * buf, elemsize, nelem - allocated buffer of length nelem structures each of elemsize + * that would contain extracted data (NOT NULL TERMINATED) + * Return: + * memc_error_t + * nelem is changed according to actual number of extracted data + * + * "set" means "store this data". + * + * "add" means "store this data, but only if the server *doesn't* already + * hold data for this key". + + * "replace" means "store this data, but only if the server *does* + * already hold data for this key". + + * "append" means "add this data to an existing key after existing data". + + * "prepend" means "add this data to an existing key before existing data". + */ +#define memc_get(ctx, param) memc_read(ctx, "get", param) +#define memc_set(ctx, param, expire) memc_write(ctx, "set", param, expire) +#define memc_add(ctx, param, expire) memc_write(ctx, "add", param, expire) +#define memc_replace(ctx, param, expire) memc_write(ctx, "replace", param, expire) +#define memc_append(ctx, param, expire) memc_write(ctx, "append", param, expire) +#define memc_prepend(ctx, param, expire) memc_write(ctx, "prepend", param, expire) + +/* Functions that works with mirror of memcached servers */ +#define memc_get_mirror(ctx, num, param) memc_read_mirror(ctx, num, "get", param) +#define memc_set_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "set", param, expire) +#define memc_add_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "add", param, expire) +#define memc_replace_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "replace", param, expire) +#define memc_append_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "append", param, expire) +#define memc_prepend_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "prepend", param, expire) + + +memc_error_t memc_read (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param); +memc_error_t memc_write (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param, gint expire); +memc_error_t memc_delete (memcached_ctx_t *ctx, memcached_param_t *params); + +memc_error_t memc_write_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param, gint expire); +memc_error_t memc_read_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param); +memc_error_t memc_delete_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param); + +/* Return symbolic name of memcached error*/ +const gchar * memc_strerror (memc_error_t err); + +/* Destroy socket from ctx */ +gint memc_close_ctx (memcached_ctx_t *ctx); +gint memc_close_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); + +#endif diff --git a/src/libutil/printf.c b/src/libutil/printf.c new file mode 100644 index 000000000..d72ec95c8 --- /dev/null +++ b/src/libutil/printf.c @@ -0,0 +1,635 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "printf.h" +#include "fstring.h" +#include "main.h" + +/** + * From FreeBSD libutil code + */ +static const int maxscale = 6; + +static gchar * +rspamd_humanize_number (gchar *buf, gchar *last, gint64 num, gboolean bytes) +{ + const gchar *prefixes; + int i, r, remainder, sign; + gint64 divisor; + gsize baselen, len = last - buf; + + remainder = 0; + + baselen = 1; + if (!bytes) { + divisor = 1000; + prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; + } + else { + divisor = 1024; + prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; + } + + +#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3]) + + if (num < 0) { + sign = -1; + num = -num; + baselen += 2; /* sign, digit */ + } + else { + sign = 1; + baselen += 1; /* digit */ + } + + /* Check if enough room for `x y' + suffix + `\0' */ + if (len < baselen + 1) { + return buf; + } + + /* + * Divide the number until it fits the given column. + * If there will be an overflow by the rounding below, + * divide once more. + */ + for (i = 0; i < maxscale && num > divisor; i++) { + remainder = num % divisor; + num /= divisor; + } + + r = rspamd_snprintf (buf, len, "%L%s", + sign * (num + (remainder + 50) / 1000), + SCALE2PREFIX (i)); + +#undef SCALE2PREFIX + + return buf + r; +} + + +static gchar * +rspamd_sprintf_num (gchar *buf, gchar *last, guint64 ui64, gchar zero, + guint hexadecimal, guint width) +{ + gchar *p, temp[sizeof ("18446744073709551615")]; + size_t len; + guint32 ui32; + static gchar hex[] = "0123456789abcdef"; + static gchar HEX[] = "0123456789ABCDEF"; + + p = temp + sizeof(temp); + + if (hexadecimal == 0) { + + if (ui64 <= G_MAXUINT32) { + + /* + * To divide 64-bit numbers and to find remainders + * on the x86 platform gcc and icc call the libc functions + * [u]divdi3() and [u]moddi3(), they call another function + * in its turn. On FreeBSD it is the qdivrem() function, + * its source code is about 170 lines of the code. + * The glibc counterpart is about 150 lines of the code. + * + * For 32-bit numbers and some divisors gcc and icc use + * a inlined multiplication and shifts. For example, + * guint "i32 / 10" is compiled to + * + * (i32 * 0xCCCCCCCD) >> 35 + */ + + ui32 = (guint32) ui64; + + do { + *--p = (gchar) (ui32 % 10 + '0'); + } while (ui32 /= 10); + + } else { + do { + *--p = (gchar) (ui64 % 10 + '0'); + } while (ui64 /= 10); + } + + } else if (hexadecimal == 1) { + + do { + + /* the "(guint32)" cast disables the BCC's warning */ + *--p = hex[(guint32) (ui64 & 0xf)]; + + } while (ui64 >>= 4); + + } else { /* hexadecimal == 2 */ + + do { + + /* the "(guint32)" cast disables the BCC's warning */ + *--p = HEX[(guint32) (ui64 & 0xf)]; + + } while (ui64 >>= 4); + } + + /* zero or space padding */ + + len = (temp + sizeof (temp)) - p; + + while (len++ < width && buf < last) { + *buf++ = zero; + } + + /* number safe copy */ + + len = (temp + sizeof (temp)) - p; + + if (buf + len > last) { + len = last - buf; + } + + return ((gchar *)memcpy (buf, p, len)) + len; +} + +struct rspamd_printf_char_buf { + char *begin; + char *pos; + glong remain; +}; + +static glong +rspamd_printf_append_char (const gchar *buf, glong buflen, gpointer ud) +{ + struct rspamd_printf_char_buf *dst = (struct rspamd_printf_char_buf *)ud; + glong wr; + + if (dst->remain <= 0) { + return dst->remain; + } + + wr = MIN (dst->remain, buflen); + memcpy (dst->pos, buf, wr); + dst->remain -= wr; + dst->pos += wr; + + return wr; +} + +static glong +rspamd_printf_append_file (const gchar *buf, glong buflen, gpointer ud) +{ + FILE *dst = (FILE *)ud; + + return fwrite (buf, 1, buflen, dst); +} + +static glong +rspamd_printf_append_gstring (const gchar *buf, glong buflen, gpointer ud) +{ + GString *dst = (GString *)ud; + + g_string_append_len (dst, buf, buflen); + + return buflen; +} + +glong +rspamd_fprintf (FILE *f, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start (args, fmt); + r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args); + va_end (args); + + return r; +} + +glong +rspamd_log_fprintf (FILE *f, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start (args, fmt); + r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args); + va_end (args); + + fflush (f); + + return r; +} + + +glong +rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...) +{ + gchar *r; + va_list args; + + va_start (args, fmt); + r = rspamd_vsnprintf (buf, max, fmt, args); + va_end (args); + + return (r - buf); +} + +gchar * +rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args) +{ + struct rspamd_printf_char_buf dst; + + dst.begin = buf; + dst.pos = dst.begin; + dst.remain = max - 1; + (void)rspamd_vprintf_common (rspamd_printf_append_char, &dst, fmt, args); + *dst.pos = '\0'; + + return dst.pos; +} + +glong +rspamd_printf_gstring (GString *s, const gchar *fmt, ...) +{ + va_list args; + glong r; + + va_start (args, fmt); + r = rspamd_vprintf_common (rspamd_printf_append_gstring, s, fmt, args); + va_end (args); + + return r; +} + +#define RSPAMD_PRINTF_APPEND(buf, len) \ + do { \ + wr = func ((buf), (len), apd); \ + if (wr <= 0) { \ + goto oob; \ + } \ + written += wr; \ + fmt ++; \ + buf_start = fmt; \ + } while(0) + +glong +rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args) +{ + gchar zero, numbuf[G_ASCII_DTOSTR_BUF_SIZE], *p, *last, c; + const gchar *buf_start = fmt; + gint d; + long double f, scale; + glong written = 0, wr, slen; + gint64 i64; + guint64 ui64; + guint width, sign, hex, humanize, bytes, frac_width, i; + f_str_t *v; + GString *gs; + gboolean bv; + + while (*fmt) { + + /* + * "buf < last" means that we could copy at least one character: + * the plain character, "%%", "%c", and minus without the checking + */ + + if (*fmt == '%') { + + /* Append what we have in buf */ + if (fmt > buf_start) { + wr = func (buf_start, fmt - buf_start, apd); + if (wr <= 0) { + goto oob; + } + written += wr; + } + + i64 = 0; + ui64 = 0; + + zero = (gchar) ((*++fmt == '0') ? '0' : ' '); + width = 0; + sign = 1; + hex = 0; + bytes = 0; + humanize = 0; + frac_width = 0; + slen = -1; + + while (*fmt >= '0' && *fmt <= '9') { + width = width * 10 + *fmt++ - '0'; + } + + + for ( ;; ) { + switch (*fmt) { + + case 'u': + sign = 0; + fmt++; + continue; + + case 'm': + fmt++; + continue; + + case 'X': + hex = 2; + sign = 0; + fmt++; + continue; + + case 'x': + hex = 1; + sign = 0; + fmt++; + continue; + case 'H': + humanize = 1; + bytes = 1; + sign = 0; + fmt ++; + continue; + case 'h': + humanize = 1; + sign = 0; + fmt ++; + continue; + case '.': + fmt++; + + while (*fmt >= '0' && *fmt <= '9') { + frac_width = frac_width * 10 + *fmt++ - '0'; + } + + break; + + case '*': + d = (gint)va_arg (args, gint); + if (G_UNLIKELY (d < 0)) { + msg_err ("critical error: size is less than 0"); + return 0; + } + slen = (glong)d; + fmt++; + continue; + + default: + break; + } + + break; + } + + + switch (*fmt) { + + case 'V': + v = va_arg (args, f_str_t *); + RSPAMD_PRINTF_APPEND (v->begin, v->len); + + continue; + + case 'v': + gs = va_arg (args, GString *); + RSPAMD_PRINTF_APPEND (gs->str, gs->len); + + continue; + + case 's': + p = va_arg (args, gchar *); + if (p == NULL) { + p = "(NULL)"; + } + + if (slen == -1) { + /* NULL terminated string */ + slen = strlen (p); + } + + RSPAMD_PRINTF_APPEND (p, slen); + + continue; + + case 'O': + i64 = (gint64) va_arg (args, off_t); + sign = 1; + break; + + case 'P': + i64 = (gint64) va_arg (args, pid_t); + sign = 1; + break; + + case 'T': + i64 = (gint64) va_arg (args, time_t); + sign = 1; + break; + + case 'z': + if (sign) { + i64 = (gint64) va_arg (args, ssize_t); + } else { + ui64 = (guint64) va_arg (args, size_t); + } + break; + + case 'd': + if (sign) { + i64 = (gint64) va_arg (args, gint); + } else { + ui64 = (guint64) va_arg (args, guint); + } + break; + + case 'l': + if (sign) { + i64 = (gint64) va_arg(args, glong); + } else { + ui64 = (guint64) va_arg(args, gulong); + } + break; + + case 'D': + if (sign) { + i64 = (gint64) va_arg(args, gint32); + } else { + ui64 = (guint64) va_arg(args, guint32); + } + break; + + case 'L': + if (sign) { + i64 = va_arg (args, gint64); + } else { + ui64 = va_arg (args, guint64); + } + break; + + + case 'f': + case 'F': + if (*fmt == 'f') { + f = (long double) va_arg (args, double); + } + else { + f = (long double) va_arg (args, long double); + } + p = numbuf; + last = p + sizeof (numbuf); + if (f < 0) { + *p++ = '-'; + f = -f; + } + + ui64 = (gint64) f; + + p = rspamd_sprintf_num (p, last, ui64, zero, 0, width); + + if (frac_width) { + + if (p < last) { + *p++ = '.'; + } + + scale = 1.0; + + for (i = 0; i < frac_width; i++) { + scale *= 10.0; + } + + /* + * (gint64) cast is required for msvc6: + * it can not convert guint64 to double + */ + ui64 = (guint64) ((f - (gint64) ui64) * scale); + + p = rspamd_sprintf_num (p, last, ui64, '0', 0, frac_width); + } + + slen = p - numbuf; + RSPAMD_PRINTF_APPEND (numbuf, slen); + + continue; + + case 'g': + case 'G': + if (*fmt == 'g') { + f = (long double) va_arg (args, double); + } + else { + f = (long double) va_arg (args, long double); + } + + g_ascii_formatd (numbuf, sizeof (numbuf), "%g", (double)f); + slen = strlen (numbuf); + RSPAMD_PRINTF_APPEND (numbuf, slen); + + continue; + + case 'b': + bv = (gboolean) va_arg (args, double); + RSPAMD_PRINTF_APPEND (bv ? "true" : "false", bv ? 4 : 5); + + continue; + + case 'p': + ui64 = (uintptr_t) va_arg (args, void *); + hex = 2; + sign = 0; + zero = '0'; + width = sizeof (void *) * 2; + break; + + case 'c': + c = va_arg (args, gint); + c &= 0xff; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + case 'Z': + c = '\0'; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + case 'N': + c = LF; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + case '%': + c = '%'; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + + default: + c = *fmt; + RSPAMD_PRINTF_APPEND (&c, 1); + + continue; + } + + /* Print number */ + p = numbuf; + last = p + sizeof (numbuf); + if (sign) { + if (i64 < 0) { + *p++ = '-'; + ui64 = (guint64) -i64; + + } else { + ui64 = (guint64) i64; + } + } + + if (!humanize) { + p = rspamd_sprintf_num (p, last, ui64, zero, hex, width); + } + else { + p = rspamd_humanize_number (p, last, ui64, bytes); + } + slen = p - numbuf; + RSPAMD_PRINTF_APPEND (numbuf, slen); + + } else { + fmt++; + } + } + + /* Finish buffer */ + if (fmt > buf_start) { + wr = func (buf_start, fmt - buf_start, apd); + if (wr <= 0) { + goto oob; + } + written += wr; + } + +oob: + return written; +} + diff --git a/src/libutil/printf.h b/src/libutil/printf.h new file mode 100644 index 000000000..a4e03791d --- /dev/null +++ b/src/libutil/printf.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef PRINTF_H_ +#define PRINTF_H_ + +#include "config.h" + +/* + * supported formats: + * %[0][width][x][X]O off_t + * %[0][width]T time_t + * %[0][width][u][x|X|h|H]z ssize_t/size_t + * %[0][width][u][x|X|h|H]d gint/guint + * %[0][width][u][x|X|h|H]l long + * %[0][width][u][x|X|h|H]D gint32/guint32 + * %[0][width][u][x|X|h|H]L gint64/guint64 + * %[0][width][.width]f double + * %[0][width][.width]F long double + * %[0][width][.width]g double + * %[0][width][.width]G long double + * %b boolean (true or false) + * %P pid_t + * %r rlim_t + * %p void * + * %V f_str_t * + * %v GString * + * %s null-terminated string + * %*s length and string + * %Z '\0' + * %N '\n' + * %c gchar + * %% % + * + */ + +/** + * Callback used for common printf operations + * @param buf buffer to append + * @param buflen lenght of the buffer + * @param ud opaque pointer + * @return number of characters written + */ +typedef glong (*rspamd_printf_append_func)(const gchar *buf, glong buflen, gpointer ud); + +glong rspamd_fprintf (FILE *f, const gchar *fmt, ...); +glong rspamd_log_fprintf (FILE *f, const gchar *fmt, ...); +glong rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...); +gchar *rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args); +glong rspamd_printf_gstring (GString *s, const gchar *fmt, ...); + +glong rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args); + +#endif /* PRINTF_H_ */ diff --git a/src/libutil/radix.c b/src/libutil/radix.c new file mode 100644 index 000000000..1a05db178 --- /dev/null +++ b/src/libutil/radix.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "radix.h" +#include "mem_pool.h" + +static void *radix_alloc (radix_tree_t * tree); + +radix_tree_t * +radix_tree_create (void) +{ + radix_tree_t *tree; + + tree = g_malloc (sizeof (radix_tree_t)); + if (tree == NULL) { + return NULL; + } + + tree->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + tree->size = 0; + + tree->root = radix_alloc (tree); + if (tree->root == NULL) { + return NULL; + } + + tree->root->right = NULL; + tree->root->left = NULL; + tree->root->parent = NULL; + tree->root->value = RADIX_NO_VALUE; + + return tree; +} + +enum radix_insert_type { + RADIX_INSERT, + RADIX_ADD, + RADIX_REPLACE +}; + +static uintptr_t +radix32tree_insert_common (radix_tree_t * tree, guint32 key, guint32 mask, uintptr_t value, enum radix_insert_type type) +{ + guint32 bit; + radix_node_t *node, *next; + + bit = 0x80000000; + + node = tree->root; + next = tree->root; + /* Find a place in trie to insert */ + while (bit & mask) { + if (key & bit) { + next = node->right; + } + else { + next = node->left; + } + + if (next == NULL) { + break; + } + + bit >>= 1; + node = next; + } + + if (next) { + if (node->value != RADIX_NO_VALUE) { + /* Value was found, switch on insert type */ + switch (type) { + case RADIX_INSERT: + return 1; + case RADIX_ADD: + node->value += value; + return value; + case RADIX_REPLACE: + node->value = value; + return 1; + } + } + + node->value = value; + node->key = key; + return 0; + } + /* Inserting value in trie creating all path components */ + while (bit & mask) { + next = radix_alloc (tree); + if (next == NULL) { + return -1; + } + + next->right = NULL; + next->left = NULL; + next->parent = node; + next->value = RADIX_NO_VALUE; + + if (key & bit) { + node->right = next; + + } + else { + node->left = next; + } + + bit >>= 1; + node = next; + } + + node->value = value; + node->key = key; + + return 0; +} + +gint +radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) +{ + return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_INSERT); +} + +uintptr_t +radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) +{ + return radix32tree_insert_common (tree, key, mask, value, RADIX_ADD); +} + +gint +radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) +{ + return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_REPLACE); +} + +/* + * per recursion step: + * ptr + ptr + ptr + gint = 4 words + * result = 1 word + * 5 words total in stack + */ +static gboolean +radix_recurse_nodes (radix_node_t *node, radix_tree_traverse_func func, void *user_data, gint level) +{ + if (node->left) { + if (radix_recurse_nodes (node->left, func, user_data, level + 1)) { + return TRUE; + } + } + + if (node->value != RADIX_NO_VALUE) { + if (func (node->key, level, node->value, user_data)) { + return TRUE; + } + } + + if (node->right) { + if (radix_recurse_nodes (node->right, func, user_data, level + 1)) { + return TRUE; + } + } + + return FALSE; +} + +void +radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data) +{ + radix_recurse_nodes (tree->root, func, user_data, 0); +} + + +gint +radix32tree_delete (radix_tree_t * tree, guint32 key, guint32 mask) +{ + guint32 bit; + radix_node_t *node; + + bit = 0x80000000; + node = tree->root; + + while (node && (bit & mask)) { + if (key & bit) { + node = node->right; + + } + else { + node = node->left; + } + + bit >>= 1; + } + + if (node == NULL || node->parent == NULL) { + return -1; + } + + if (node->right || node->left) { + if (node->value != RADIX_NO_VALUE) { + node->value = RADIX_NO_VALUE; + return 0; + } + + return -1; + } + + for (;;) { + if (node->parent->right == node) { + node->parent->right = NULL; + + } + else { + node->parent->left = NULL; + } + + node = node->parent; + + if (node->right || node->left) { + break; + } + + if (node->value != RADIX_NO_VALUE) { + break; + } + + if (node->parent == NULL) { + break; + } + } + + return 0; +} + + +uintptr_t +radix32tree_find (radix_tree_t * tree, guint32 key) +{ + guint32 bit; + uintptr_t value; + radix_node_t *node; + + bit = 0x80000000; + value = RADIX_NO_VALUE; + node = tree->root; + + while (node) { + if (node->value != RADIX_NO_VALUE) { + value = node->value; + } + + if (key & bit) { + node = node->right; + + } + else { + node = node->left; + } + + bit >>= 1; + } + + return value; +} + + +static void * +radix_alloc (radix_tree_t * tree) +{ + gchar *p; + + p = rspamd_mempool_alloc (tree->pool, sizeof (radix_node_t)); + + tree->size += sizeof (radix_node_t); + + return p; +} + +void +radix_tree_free (radix_tree_t * tree) +{ + + g_return_if_fail (tree != NULL); + rspamd_mempool_delete (tree->pool); + g_free (tree); +} + +/* + * vi:ts=4 + */ diff --git a/src/libutil/radix.h b/src/libutil/radix.h new file mode 100644 index 000000000..4cc2873c7 --- /dev/null +++ b/src/libutil/radix.h @@ -0,0 +1,82 @@ +#ifndef RADIX_H +#define RADIX_H + +#include "config.h" +#include "mem_pool.h" + +#define RADIX_NO_VALUE (uintptr_t)-1 + +typedef struct radix_node_s radix_node_t; + +struct radix_node_s { + radix_node_t *right; + radix_node_t *left; + radix_node_t *parent; + uintptr_t value; + guint32 key; +}; + + +typedef struct { + radix_node_t *root; + size_t size; + rspamd_mempool_t *pool; +} radix_tree_t; + +typedef gboolean (*radix_tree_traverse_func)(guint32 key, guint32 mask, uintptr_t value, void *user_data); + +/** + * Create new radix tree + */ +radix_tree_t *radix_tree_create (void); + +/** + * Insert value to radix tree + * returns: 1 if value already exists + * 0 if operation was successfull + * -1 if there was some error + */ +gint radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); + +/** + * Add value to radix tree or insert it if value does not exists + * returns: value if value already exists and was added + * 0 if value was inserted + * -1 if there was some error + */ +uintptr_t radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); + +/** + * Replace value in radix tree or insert it if value does not exists + * returns: 1 if value already exists and was replaced + * 0 if value was inserted + * -1 if there was some error + */ +gint radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); + +/** + * Delete value from radix tree + * returns: 1 if value does not exist + * 0 if value was deleted + * -1 if there was some error + */ +gint radix32tree_delete (radix_tree_t *tree, guint32 key, guint32 mask); + +/** + * Find value in radix tree + * returns: value if value was found + * RADIX_NO_VALUE if value was not found + */ +uintptr_t radix32tree_find (radix_tree_t *tree, guint32 key); + +/** + * Traverse via the whole tree calling specified callback + */ +void radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data); + +/** + * Frees radix tree + */ +void radix_tree_free (radix_tree_t *tree); + +#endif diff --git a/src/libutil/rrd.c b/src/libutil/rrd.c new file mode 100644 index 000000000..a0e21eaed --- /dev/null +++ b/src/libutil/rrd.c @@ -0,0 +1,1015 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "rrd.h" +#include "util.h" + +static GQuark +rrd_error_quark (void) +{ + return g_quark_from_static_string ("rrd-error"); +} + +/** + * Convert rrd dst type from string to numeric value + */ +enum rrd_dst_type +rrd_dst_from_string (const gchar *str) +{ + if (g_ascii_strcasecmp (str, "counter") == 0) { + return RRD_DST_COUNTER; + } + else if (g_ascii_strcasecmp (str, "absolute") == 0) { + return RRD_DST_ABSOLUTE; + } + else if (g_ascii_strcasecmp (str, "gauge") == 0) { + return RRD_DST_GAUGE; + } + else if (g_ascii_strcasecmp (str, "cdef") == 0) { + return RRD_DST_CDEF; + } + else if (g_ascii_strcasecmp (str, "derive") == 0) { + return RRD_DST_DERIVE; + } + return -1; +} + +/** + * Convert numeric presentation of dst to string + */ +const gchar* +rrd_dst_to_string (enum rrd_dst_type type) +{ + switch (type) { + case RRD_DST_COUNTER: + return "COUNTER"; + case RRD_DST_ABSOLUTE: + return "ABSOLUTE"; + case RRD_DST_GAUGE: + return "GAUGE"; + case RRD_DST_CDEF: + return "CDEF"; + case RRD_DST_DERIVE: + return "DERIVE"; + default: + return "U"; + } + + return "U"; +} + +/** + * Convert rrd consolidation function type from string to numeric value + */ +enum rrd_cf_type +rrd_cf_from_string (const gchar *str) +{ + if (g_ascii_strcasecmp (str, "average") == 0) { + return RRD_CF_AVERAGE; + } + else if (g_ascii_strcasecmp (str, "minimum") == 0) { + return RRD_CF_MINIMUM; + } + else if (g_ascii_strcasecmp (str, "maximum") == 0) { + return RRD_CF_MAXIMUM; + } + else if (g_ascii_strcasecmp (str, "last") == 0) { + return RRD_CF_LAST; + } + /* XXX: add other CF functions supported by rrd */ + + return -1; +} + +/** + * Convert numeric presentation of cf to string + */ +const gchar* +rrd_cf_to_string (enum rrd_cf_type type) +{ + switch (type) { + case RRD_CF_AVERAGE: + return "AVERAGE"; + case RRD_CF_MINIMUM: + return "MINIMUM"; + case RRD_CF_MAXIMUM: + return "MAXIMUM"; + case RRD_CF_LAST: + return "LAST"; + default: + return "U"; + } + + /* XXX: add other CF functions supported by rrd */ + + return "U"; +} + +void +rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra) +{ + rra->pdp_cnt = pdp_cnt; + rra->row_cnt = rows; + rspamd_strlcpy (rra->cf_nam, cf_name, sizeof (rra->cf_nam)); + memset (rra->par, 0, sizeof (rra->par)); + rra->par[RRA_cdp_xff_val].dv = 0.5; +} + +void +rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds) +{ + rspamd_strlcpy (ds->ds_nam, name, sizeof (ds->ds_nam)); + rspamd_strlcpy (ds->dst, "COUNTER", sizeof (ds->dst)); + memset (ds->par, 0, sizeof (ds->par)); + ds->par[RRD_DS_mrhb_cnt].lv = pdp_step * 2; + ds->par[RRD_DS_min_val].dv = NAN; + ds->par[RRD_DS_max_val].dv = NAN; +} + +/** + * Check rrd file for correctness (size, cookies, etc) + */ +static gboolean +rspamd_rrd_check_file (const gchar *filename, gboolean need_data, GError **err) +{ + gint fd, i; + struct stat st; + struct rrd_file_head head; + struct rrd_rra_def rra; + gint head_size; + + fd = open (filename, O_RDWR); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); + return FALSE; + } + + if (fstat (fd, &st) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); + close (fd); + return FALSE; + } + if (st.st_size < (goffset)sizeof (struct rrd_file_head)) { + /* We have trimmed file */ + g_set_error (err, rrd_error_quark (), EINVAL, "rrd size is bad: %ud", (guint)st.st_size); + close (fd); + return FALSE; + } + + /* Try to read header */ + if (read (fd, &head, sizeof (head)) != sizeof (head)) { + g_set_error (err, rrd_error_quark (), errno, "rrd read head error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Check magic */ + if (memcmp (head.cookie, RRD_COOKIE, sizeof (head.cookie)) != 0 || + memcmp (head.version, RRD_VERSION, sizeof (head.version)) != 0 || + head.float_cookie != RRD_FLOAT_COOKIE) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Check for other params */ + if (head.ds_cnt <= 0 || head.rra_cnt <= 0) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Now we can calculate the overall size of rrd */ + head_size = sizeof (struct rrd_file_head) + + sizeof (struct rrd_ds_def) * head.ds_cnt + + sizeof (struct rrd_rra_def) * head.rra_cnt + + sizeof (struct rrd_live_head) + + sizeof (struct rrd_pdp_prep) * head.ds_cnt + + sizeof (struct rrd_cdp_prep) * head.ds_cnt * head.rra_cnt + + sizeof (struct rrd_rra_ptr) * head.rra_cnt; + if (st.st_size < (goffset)head_size) { + g_set_error (err, rrd_error_quark (), errno, "rrd file seems to have stripped header: %d", head_size); + close (fd); + return FALSE; + } + + if (need_data) { + /* Now check rra */ + if (lseek (fd, sizeof (struct rrd_ds_def) * head.ds_cnt, SEEK_CUR) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd head lseek error: %s", strerror (errno)); + close (fd); + return FALSE; + } + for (i = 0; i < (gint)head.rra_cnt; i ++) { + if (read (fd, &rra, sizeof (rra)) != sizeof (rra)) { + g_set_error (err, rrd_error_quark (), errno, "rrd read rra error: %s", strerror (errno)); + close (fd); + return FALSE; + } + head_size += rra.row_cnt * head.ds_cnt * sizeof (gdouble); + } + + if (st.st_size != head_size) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd file seems to have incorrect size: %d, must be %d", (gint)st.st_size, head_size); + close (fd); + return FALSE; + } + } + + close (fd); + return TRUE; +} + +/** + * Adjust pointers in mmapped rrd file + * @param file + */ +static void +rspamd_rrd_adjust_pointers (struct rspamd_rrd_file *file, gboolean completed) +{ + guint8 *ptr; + + ptr = file->map; + file->stat_head = (struct rrd_file_head *)ptr; + ptr += sizeof (struct rrd_file_head); + file->ds_def = (struct rrd_ds_def *)ptr; + ptr += sizeof (struct rrd_ds_def) * file->stat_head->ds_cnt; + file->rra_def = (struct rrd_rra_def *)ptr; + ptr += sizeof (struct rrd_rra_def) * file->stat_head->rra_cnt; + file->live_head = (struct rrd_live_head *)ptr; + ptr += sizeof (struct rrd_live_head); + file->pdp_prep = (struct rrd_pdp_prep *)ptr; + ptr += sizeof (struct rrd_pdp_prep) * file->stat_head->ds_cnt; + file->cdp_prep = (struct rrd_cdp_prep *)ptr; + ptr += sizeof (struct rrd_cdp_prep) * file->stat_head->rra_cnt * file->stat_head->ds_cnt; + file->rra_ptr = (struct rrd_rra_ptr *)ptr; + if (completed) { + ptr += sizeof (struct rrd_rra_ptr) * file->stat_head->rra_cnt; + file->rrd_value = (gdouble *)ptr; + } + else { + file->rrd_value = NULL; + } +} + +/** + * Open completed or incompleted rrd file + * @param filename + * @param completed + * @param err + * @return + */ +static struct rspamd_rrd_file* +rspamd_rrd_open_common (const gchar *filename, gboolean completed, GError **err) +{ + struct rspamd_rrd_file *new; + gint fd; + struct stat st; + + if (!rspamd_rrd_check_file (filename, completed, err)) { + return NULL; + } + + new = g_slice_alloc0 (sizeof (struct rspamd_rrd_file)); + + if (new == NULL) { + g_set_error (err, rrd_error_quark (), ENOMEM, "not enough memory"); + return NULL; + } + + /* Open file */ + fd = open (filename, O_RDWR); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); + return FALSE; + } + + if (fstat (fd, &st) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); + close (fd); + return FALSE; + } + /* Mmap file */ + new->size = st.st_size; + if ((new->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { + close (fd); + g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno)); + g_slice_free1 (sizeof (struct rspamd_rrd_file), new); + return NULL; + } + + close (fd); + + /* Adjust pointers */ + rspamd_rrd_adjust_pointers (new, completed); + + /* Mark it as finalized */ + new->finalized = completed; + + new->filename = g_strdup (filename); + + return new; +} + +/** + * Open (and mmap) existing RRD file + * @param filename path + * @param err error pointer + * @return rrd file structure + */ +struct rspamd_rrd_file* +rspamd_rrd_open (const gchar *filename, GError **err) +{ + return rspamd_rrd_open_common (filename, TRUE, err); +} + +/** + * Create basic header for rrd file + * @param filename file path + * @param ds_count number of data sources + * @param rra_count number of round robin archives + * @param pdp_step step of primary data points + * @param err error pointer + * @return TRUE if file has been created + */ +struct rspamd_rrd_file* +rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err) +{ + struct rspamd_rrd_file *new; + struct rrd_file_head head; + struct rrd_ds_def ds; + struct rrd_rra_def rra; + struct rrd_live_head lh; + struct rrd_pdp_prep pdp; + struct rrd_cdp_prep cdp; + struct rrd_rra_ptr rra_ptr; + gint fd; + guint i, j; + struct timeval tv; + + /* Open file */ + fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd create error: %s", strerror (errno)); + return NULL; + } + + /* Fill header */ + memset (&head, 0, sizeof (head)); + head.rra_cnt = rra_count; + head.ds_cnt = ds_count; + head.pdp_step = pdp_step; + memcpy (head.cookie, RRD_COOKIE, sizeof (head.cookie)); + memcpy (head.version, RRD_VERSION, sizeof (head.version)); + head.float_cookie = RRD_FLOAT_COOKIE; + + if (write (fd, &head, sizeof (head)) != sizeof (head)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + + /* Fill DS section */ + memset (&ds.ds_nam, 0, sizeof (ds.ds_nam)); + memcpy (&ds.dst, "COUNTER", sizeof ("COUNTER")); + memset (&ds.par, 0, sizeof (ds.par)); + for (i = 0; i < ds_count; i ++) { + if (write (fd, &ds, sizeof (ds)) != sizeof (ds)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + /* Fill RRA section */ + memcpy (&rra.cf_nam, "AVERAGE", sizeof ("AVERAGE")); + rra.pdp_cnt = 1; + memset (&rra.par, 0, sizeof (rra.par)); + for (i = 0; i < rra_count; i ++) { + if (write (fd, &rra, sizeof (rra)) != sizeof (rra)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + /* Fill live header */ + gettimeofday (&tv, NULL); + lh.last_up = tv.tv_sec; + lh.last_up_usec = tv.tv_usec; + + if (write (fd, &lh, sizeof (lh)) != sizeof (lh)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + + /* Fill pdp prep */ + memcpy (&pdp.last_ds, "U", sizeof ("U")); + memset (&pdp.scratch, 0, sizeof (pdp.scratch)); + pdp.scratch[PDP_val].dv = 0.; + pdp.scratch[PDP_unkn_sec_cnt].lv = 0; + for (i = 0; i < ds_count; i ++) { + if (write (fd, &pdp, sizeof (pdp)) != sizeof (pdp)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + /* Fill cdp prep */ + memset (&cdp.scratch, 0, sizeof (cdp.scratch)); + cdp.scratch[CDP_val].dv = NAN; + for (i = 0; i < rra_count; i ++) { + cdp.scratch[CDP_unkn_pdp_cnt].lv = 0; + for (j = 0; j < ds_count; j ++) { + if (write (fd, &cdp, sizeof (cdp)) != sizeof (cdp)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + } + + /* Set row pointers */ + memset (&rra_ptr, 0, sizeof (rra_ptr)); + for (i = 0; i < rra_count; i ++) { + if (write (fd, &rra_ptr, sizeof (rra_ptr)) != sizeof (rra_ptr)) { + close (fd); + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + return NULL; + } + } + + close (fd); + new = rspamd_rrd_open_common (filename, FALSE, err); + + return new; +} + +/** + * Add data sources to rrd file + * @param filename path to file + * @param ds array of struct rrd_ds_def + * @param err error pointer + * @return TRUE if data sources were added + */ +gboolean +rspamd_rrd_add_ds (struct rspamd_rrd_file *file, GArray *ds, GError **err) +{ + + if (file == NULL || file->stat_head->ds_cnt * sizeof (struct rrd_ds_def) != ds->len) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add ds failed: wrong arguments"); + return FALSE; + } + + /* Straightforward memcpy */ + memcpy (file->ds_def, ds->data, ds->len); + + return TRUE; +} + +/** + * Add round robin archives to rrd file + * @param filename path to file + * @param ds array of struct rrd_rra_def + * @param err error pointer + * @return TRUE if archives were added + */ +gboolean +rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err) +{ + if (file == NULL || file->stat_head->rra_cnt * sizeof (struct rrd_rra_def) != rra->len) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments"); + return FALSE; + } + + /* Straightforward memcpy */ + memcpy (file->rra_def, rra->data, rra->len); + + return TRUE; +} + +/** + * Finalize rrd file header and initialize all RRA in the file + * @param filename file path + * @param err error pointer + * @return TRUE if rrd file is ready for use + */ +gboolean +rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err) +{ + gint fd; + guint i; + gint count = 0; + gdouble vbuf[1024]; + struct stat st; + + if (file == NULL || file->filename == NULL) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments"); + return FALSE; + } + + fd = open (file->filename, O_RDWR); + if (fd == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); + return FALSE; + } + + if (lseek (fd, 0, SEEK_END) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd seek error: %s", strerror (errno)); + close (fd); + return FALSE; + } + + /* Adjust CDP */ + for (i = 0; i < file->stat_head->rra_cnt; i ++) { + file->cdp_prep->scratch[CDP_unkn_pdp_cnt].lv = 0; + /* Randomize row pointer */ + file->rra_ptr->cur_row = g_random_int () % file->rra_def[i].row_cnt; + /* Calculate values count */ + count += file->rra_def[i].row_cnt * file->stat_head->ds_cnt; + } + + munmap (file->map, file->size); + /* Write values */ + for (i = 0; i < G_N_ELEMENTS (vbuf); i ++) { + vbuf[i] = NAN; + } + + while (count > 0) { + /* Write values in buffered matter */ + if (write (fd, vbuf, MIN ((gint)G_N_ELEMENTS (vbuf), count) * sizeof (gdouble)) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); + close (fd); + return FALSE; + } + count -= G_N_ELEMENTS (vbuf); + } + + if (fstat (fd, &st) == -1) { + g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); + close (fd); + return FALSE; + } + + /* Mmap again */ + file->size = st.st_size; + if ((file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { + close (fd); + g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno)); + g_slice_free1 (sizeof (struct rspamd_rrd_file), file); + return FALSE; + } + close (fd); + /* Adjust pointers */ + rspamd_rrd_adjust_pointers (file, TRUE); + + file->finalized = TRUE; + + return TRUE; +} + +/** + * Update pdp_prep data + * @param file rrd file + * @param vals new values + * @param pdp_new new pdp array + * @param interval time elapsed from the last update + * @return + */ +static gboolean +rspamd_rrd_update_pdp_prep (struct rspamd_rrd_file *file, gdouble *vals, gdouble *pdp_new, gdouble interval) +{ + guint i; + enum rrd_dst_type type; + + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + type = rrd_dst_from_string (file->ds_def[i].dst); + + if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv < interval) { + rspamd_strlcpy (file->pdp_prep[i].last_ds, "U", sizeof (file->pdp_prep[i].last_ds)); + } + + if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv >= interval) { + switch (type) { + case RRD_DST_COUNTER: + case RRD_DST_DERIVE: + if (file->pdp_prep[i].last_ds[0] == 'U') { + pdp_new[i] = NAN; + } + else { + pdp_new[i] = vals[i] - strtod (file->pdp_prep[i].last_ds, NULL); + } + break; + case RRD_DST_GAUGE: + pdp_new[i] = vals[i] * interval; + break; + case RRD_DST_ABSOLUTE: + pdp_new[i] = vals[i]; + break; + default: + return FALSE; + } + } + else { + pdp_new[i] = NAN; + } + /* Copy value to the last_ds */ + if (!isnan (vals[i])) { + rspamd_snprintf (file->pdp_prep[i].last_ds, sizeof (file->pdp_prep[i].last_ds), "%.4f", vals[i]); + } + else { + file->pdp_prep[i].last_ds[0] = 'U'; + file->pdp_prep[i].last_ds[1] = '\0'; + } + } + + + return TRUE; +} + +/** + * Update step for this pdp + * @param file + * @param pdp_new new pdp array + * @param pdp_temp temp pdp array + * @param interval time till last update + * @param pre_int pre interval + * @param post_int post intervall + * @param pdp_diff time till last pdp update + */ +static void +rspamd_rrd_update_pdp_step (struct rspamd_rrd_file *file, gdouble *pdp_new, gdouble *pdp_temp, gdouble interval, + gdouble pre_int, gdouble post_int, gulong pdp_diff) +{ + guint i; + rrd_value_t *scratch; + gulong heartbeat; + + + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + scratch = file->pdp_prep[i].scratch; + heartbeat = file->ds_def[i].par[RRD_DS_mrhb_cnt].lv; + if (!isnan (pdp_new[i])) { + if (isnan (scratch[PDP_val].dv)) { + scratch[PDP_val].dv = 0; + } + scratch[PDP_val].dv += pdp_new[i] / interval * pre_int; + pre_int = 0.0; + } + /* Check interval value for heartbeat for this DS */ + if ((interval > heartbeat) || (file->stat_head->pdp_step / 2.0 < scratch[PDP_unkn_sec_cnt].lv)) { + pdp_temp[i] = NAN; + } + else { + pdp_temp[i] = scratch[PDP_val].dv / + ((double) (pdp_diff - scratch[PDP_unkn_sec_cnt].lv) - pre_int); + } + + if (isnan (pdp_new[i])) { + scratch[PDP_unkn_sec_cnt].lv = floor (post_int); + scratch[PDP_val].dv = NAN; + } else { + scratch[PDP_unkn_sec_cnt].lv = 0; + scratch[PDP_val].dv = pdp_new[i] / interval * post_int; + } + } +} + +/** + * Update CDP for this rra + * @param file rrd file + * @param pdp_steps how much pdp steps elapsed from the last update + * @param pdp_offset offset from pdp + * @param rra_steps how much steps must be updated for this rra + * @param rra_index index of desired rra + * @param pdp_temp temporary pdp points + */ +static void +rspamd_rrd_update_cdp (struct rspamd_rrd_file *file, gdouble pdp_steps, gdouble pdp_offset, gulong *rra_steps, gulong rra_index, + gdouble *pdp_temp) +{ + guint i; + struct rrd_rra_def *rra; + rrd_value_t *scratch; + enum rrd_cf_type cf; + gdouble last_cdp, cur_cdp; + gulong pdp_in_cdp; + + rra = &file->rra_def[rra_index]; + cf = rrd_cf_from_string (rra->cf_nam); + + /* Iterate over all DS for this RRA */ + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + /* Get CDP for this RRA and DS */ + scratch = file->cdp_prep[rra_index * file->stat_head->ds_cnt + i].scratch; + if (rra->pdp_cnt > 1) { + /* Do we have any CDP to update for this rra ? */ + if (rra_steps[rra_index] > 0) { + if (isnan (pdp_temp[i])) { + /* New pdp is nan */ + /* Increment unknown points count */ + scratch[CDP_unkn_pdp_cnt].lv += pdp_offset; + /* Reset secondary value */ + scratch[CDP_secondary_val].dv = NAN; + } + else { + scratch[CDP_secondary_val].dv = pdp_temp[i]; + } + + /* Check XFF for this rra */ + if (scratch[CDP_unkn_pdp_cnt].lv > rra->pdp_cnt * rra->par[RRA_cdp_xff_val].lv) { + /* XFF is reached */ + scratch[CDP_primary_val].dv = NAN; + } + else { + /* Need to initialize CDP using specified consolidation */ + switch (cf) { + case RRD_CF_AVERAGE: + last_cdp = isnan (scratch[CDP_val].dv) ? 0.0 : scratch[CDP_val].dv; + cur_cdp = isnan (pdp_temp[i]) ? 0.0 : pdp_temp[i]; + scratch[CDP_primary_val].dv = (last_cdp + cur_cdp * pdp_offset) / (rra->pdp_cnt - scratch[CDP_unkn_pdp_cnt].lv); + break; + case RRD_CF_MAXIMUM: + last_cdp = isnan (scratch[CDP_val].dv) ? -INFINITY : scratch[CDP_val].dv; + cur_cdp = isnan (pdp_temp[i]) ? -INFINITY : pdp_temp[i]; + scratch[CDP_primary_val].dv = MAX (last_cdp, cur_cdp); + break; + case RRD_CF_MINIMUM: + last_cdp = isnan (scratch[CDP_val].dv) ? INFINITY : scratch[CDP_val].dv; + cur_cdp = isnan (pdp_temp[i]) ? INFINITY : pdp_temp[i]; + scratch[CDP_primary_val].dv = MIN (last_cdp, cur_cdp); + break; + case RRD_CF_LAST: + default: + scratch[CDP_primary_val].dv = pdp_temp[i]; + break; + } + } + /* Init carry of this CDP */ + pdp_in_cdp = (pdp_steps - pdp_offset) / rra->pdp_cnt; + if (pdp_in_cdp == 0 || isnan (pdp_temp[i])) { + /* Set overflow */ + switch (cf) { + case RRD_CF_AVERAGE: + scratch[CDP_val].dv = 0; + break; + case RRD_CF_MAXIMUM: + scratch[CDP_val].dv = -INFINITY; + break; + case RRD_CF_MINIMUM: + scratch[CDP_val].dv = INFINITY; + break; + default: + scratch[CDP_val].dv = NAN; + break; + } + } + else { + /* Special carry for average */ + if (cf == RRD_CF_AVERAGE) { + scratch[CDP_val].dv = pdp_temp[i] * pdp_in_cdp; + } + else { + scratch[CDP_val].dv = pdp_temp[i]; + } + } + } + /* In this case we just need to update cdp_prep for this RRA */ + else { + if (isnan (pdp_temp[i])) { + /* Just increase undefined zone */ + scratch[CDP_unkn_pdp_cnt].lv += pdp_steps; + } + else { + /* Calculate cdp value */ + last_cdp = scratch[CDP_val].dv; + switch (cf) { + case RRD_CF_AVERAGE: + if (isnan (last_cdp)) { + scratch[CDP_val].dv = pdp_temp[i] * pdp_steps; + } + else { + scratch[CDP_val].dv = last_cdp + pdp_temp[i] * pdp_steps; + } + break; + case RRD_CF_MAXIMUM: + scratch[CDP_val].dv = MAX (last_cdp, pdp_temp[i]); + break; + case RRD_CF_MINIMUM: + scratch[CDP_val].dv = MIN (last_cdp, pdp_temp[i]); + break; + case RRD_CF_LAST: + scratch[CDP_val].dv = pdp_temp[i]; + break; + default: + scratch[CDP_val].dv = NAN; + break; + } + } + } + } + else { + /* We have nothing to consolidate, but we may miss some pdp */ + if (pdp_steps > 2) { + /* Just write PDP value */ + scratch[CDP_primary_val].dv = pdp_temp[i]; + scratch[CDP_secondary_val].dv = pdp_temp[i]; + } + } + } +} + +/** + * Update RRA in a file + * @param file rrd file + * @param rra_steps steps for each rra + * @param now current time + */ +void +rspamd_rrd_write_rra (struct rspamd_rrd_file *file, gulong *rra_steps) +{ + guint i, j, scratch_idx, cdp_idx, k; + struct rrd_rra_def *rra; + gdouble *rra_row; + + /* Iterate over all RRA */ + for (i = 0; i < file->stat_head->rra_cnt; i ++) { + rra = &file->rra_def[i]; + /* How much steps need to be updated */ + for (j = 0, scratch_idx = CDP_primary_val; j < rra_steps[i]; j ++, scratch_idx = CDP_secondary_val) { + /* Move row ptr */ + if (++file->rra_ptr[i].cur_row >= rra->row_cnt) { + file->rra_ptr[i].cur_row = 0; + } + /* Calculate seek */ + rra_row = file->rrd_value + (file->stat_head->ds_cnt * i + file->rra_ptr[i].cur_row); + /* Iterate over DS */ + for (k = 0; k < file->stat_head->ds_cnt; k ++) { + cdp_idx = i * file->stat_head->ds_cnt + k; + memcpy (rra_row, &file->cdp_prep[cdp_idx].scratch[scratch_idx].dv, sizeof (gdouble)); + rra_row ++; + } + } + } +} + +/** + * Add record to rrd file + * @param file rrd file object + * @param points points (must be row suitable for this RRA, depending on ds count) + * @param err error pointer + * @return TRUE if a row has been added + */ +gboolean +rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err) +{ + gdouble interval, *pdp_new, *pdp_temp, pre_int, post_int; + guint i; + gulong pdp_steps, cur_pdp_count, prev_pdp_step, cur_pdp_step, + prev_pdp_age, cur_pdp_age, *rra_steps, pdp_offset; + struct timeval tv; + + if (file == NULL || file->stat_head->ds_cnt * sizeof (gdouble) != points->len) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd add points failed: wrong arguments"); + return FALSE; + } + + /* Get interval */ + gettimeofday (&tv, NULL); + interval = (gdouble)(tv.tv_sec - file->live_head->last_up) + + (gdouble)(tv.tv_usec - file->live_head->last_up_usec) / 1e6f; + + /* Update PDP preparation values */ + pdp_new = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt); + pdp_temp = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt); + /* How much steps need to be updated in each RRA */ + rra_steps = g_malloc0 (sizeof (gulong) * file->stat_head->rra_cnt); + + if (!rspamd_rrd_update_pdp_prep (file, (gdouble *)points->data, pdp_new, interval)) { + g_set_error (err, rrd_error_quark (), EINVAL, "rrd update pdp failed: wrong arguments"); + g_free (pdp_new); + g_free (pdp_temp); + g_free (rra_steps); + return FALSE; + } + + /* Calculate elapsed steps */ + /* Age in seconds for previous pdp store */ + prev_pdp_age = file->live_head->last_up % file->stat_head->pdp_step; + /* Time in seconds for last pdp update */ + prev_pdp_step = file->live_head->last_up - prev_pdp_age; + /* Age in seconds from current time to required pdp time */ + cur_pdp_age = tv.tv_sec % file->stat_head->pdp_step; + /* Time of desired pdp step */ + cur_pdp_step = tv.tv_sec - cur_pdp_age; + + if (cur_pdp_step > prev_pdp_step) { + pre_int = (gdouble)(cur_pdp_step - file->live_head->last_up) - ((double)file->live_head->last_up_usec) / 1e6f; + post_int = (gdouble)cur_pdp_age + ((double)tv.tv_usec) / 1e6f; + } + else { + pre_int = interval; + post_int = 0; + } + cur_pdp_count = cur_pdp_step / file->stat_head->pdp_step; + pdp_steps = (cur_pdp_step - prev_pdp_step) / file->stat_head->pdp_step; + + + if (pdp_steps == 0) { + /* Simple update of pdp prep */ + for (i = 0; i < file->stat_head->ds_cnt; i ++) { + if (isnan (pdp_new[i])) { + /* Increment unknown period */ + file->pdp_prep[i].scratch[PDP_unkn_sec_cnt].lv += floor (interval); + } + else { + if (isnan (file->pdp_prep[i].scratch[PDP_val].dv)) { + /* Reset pdp to the current value */ + file->pdp_prep[i].scratch[PDP_val].dv = pdp_new[i]; + } + else { + /* Increment pdp value */ + file->pdp_prep[i].scratch[PDP_val].dv += pdp_new[i]; + } + } + } + } + else { + /* Complex update of PDP, CDP and RRA */ + + /* Update PDP for this step */ + rspamd_rrd_update_pdp_step (file, pdp_new, pdp_temp, interval, pre_int, post_int, pdp_steps * file->stat_head->pdp_step); + + + /* Update CDP points for each RRA*/ + for (i = 0; i < file->stat_head->rra_cnt; i ++) { + /* Calculate pdp offset for this RRA */ + pdp_offset = file->rra_def[i].pdp_cnt - cur_pdp_count % file->rra_def[i].pdp_cnt; + /* How much steps we got for this RRA */ + if (pdp_offset <= pdp_steps) { + rra_steps[i] = (pdp_steps - pdp_offset) / file->rra_def[i].pdp_cnt + 1; + } + else { + /* This rra have not passed enough pdp steps */ + rra_steps[i] = 0; + } + /* Update this specific CDP */ + rspamd_rrd_update_cdp (file, pdp_steps, pdp_offset, rra_steps, i, pdp_temp); + /* Write RRA */ + rspamd_rrd_write_rra (file, rra_steps); + } + } + file->live_head->last_up = tv.tv_sec; + file->live_head->last_up_usec = tv.tv_usec; + + /* Sync and invalidate */ + msync (file->map, file->size, MS_ASYNC | MS_INVALIDATE); + + g_free (pdp_new); + g_free (pdp_temp); + g_free (rra_steps); + + return TRUE; +} + +/** + * Close rrd file + * @param file + * @return + */ +gint +rspamd_rrd_close (struct rspamd_rrd_file* file) +{ + if (file == NULL) { + errno = EINVAL; + return -1; + } + + munmap (file->map, file->size); + if (file->filename != NULL) { + g_free (file->filename); + } + g_slice_free1 (sizeof (struct rspamd_rrd_file), file); + + return 0; +} diff --git a/src/libutil/rrd.h b/src/libutil/rrd.h new file mode 100644 index 000000000..ff6902894 --- /dev/null +++ b/src/libutil/rrd.h @@ -0,0 +1,374 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef RRD_H_ +#define RRD_H_ + +#include "config.h" + +/** + * This file contains basic structure and functions to operate with round-robin databases + */ + +#define RRD_COOKIE "RRD" +#define RRD_VERSION "0003" +#define RRD_FLOAT_COOKIE ((double)8.642135E130) + +typedef union { + unsigned long lv; + double dv; +} rrd_value_t; + +struct rrd_file_head { + /* Data Base Identification Section ** */ + gchar cookie[4]; /* RRD */ + gchar version[5]; /* version of the format */ + gdouble float_cookie; /* is it the correct double representation ? */ + + /* Data Base Structure Definition **** */ + gulong ds_cnt; /* how many different ds provid input to the rrd */ + gulong rra_cnt; /* how many rras will be maintained in the rrd */ + gulong pdp_step; /* pdp interval in seconds */ + + rrd_value_t par[10]; /* global parameters ... unused + at the moment */ +}; + +enum rrd_dst_type { + RRD_DST_COUNTER = 0, /* data source types available */ + RRD_DST_ABSOLUTE, + RRD_DST_GAUGE, + RRD_DST_DERIVE, + RRD_DST_CDEF +}; +enum rrd_ds_param { + RRD_DS_mrhb_cnt = 0, /* minimum required heartbeat */ + RRD_DS_min_val, /* the processed input of a ds must */ + RRD_DS_max_val, /* be between max_val and min_val + * both can be set to UNKNOWN if you + * do not care. Data outside the limits + * set to UNKNOWN */ + RRD_DS_cdef = RRD_DS_mrhb_cnt +}; /* pointer to encoded rpn expression only applies to DST_CDEF */ + + +/* The magic number here is one less than DS_NAM_SIZE */ +#define RRD_DS_NAM_SIZE 20 + +#define RRD_DST_SIZE 20 + +struct rrd_ds_def { + gchar ds_nam[RRD_DS_NAM_SIZE]; /* Name of the data source (null terminated) */ + gchar dst[RRD_DST_SIZE]; /* Type of data source (null terminated) */ + rrd_value_t par[10]; /* index of this array see ds_param_en */ +}; + +/* RRA definition */ + +enum rrd_cf_type { + RRD_CF_AVERAGE = 0, /* data consolidation functions */ + RRD_CF_MINIMUM, + RRD_CF_MAXIMUM, + RRD_CF_LAST, + RRD_CF_HWPREDICT, + /* An array of predictions using the seasonal + * Holt-Winters algorithm. Requires an RRA of type + * CF_SEASONAL for this data source. */ + RRD_CF_SEASONAL, + /* An array of seasonal effects. Requires an RRA of + * type CF_HWPREDICT for this data source. */ + RRD_CF_DEVPREDICT, + /* An array of deviation predictions based upon + * smoothed seasonal deviations. Requires an RRA of + * type CF_DEVSEASONAL for this data source. */ + RRD_CF_DEVSEASONAL, + /* An array of smoothed seasonal deviations. Requires + * an RRA of type CF_HWPREDICT for this data source. + * */ + RRD_CF_FAILURES, + /* HWPREDICT that follows a moving baseline */ + RRD_CF_MHWPREDICT + /* new entries must come last !!! */ +}; + + +#define MAX_RRA_PAR_EN 10 + +enum rrd_rra_param { + RRA_cdp_xff_val = 0, /* what part of the consolidated + * datapoint must be known, to produce a + * valid entry in the rra */ + /* CF_HWPREDICT: */ + RRA_hw_alpha = 1, + /* exponential smoothing parameter for the intercept in + * the Holt-Winters prediction algorithm. */ + RRA_hw_beta = 2, + /* exponential smoothing parameter for the slope in + * the Holt-Winters prediction algorithm. */ + + RRA_dependent_rra_idx = 3, + /* For CF_HWPREDICT: index of the RRA with the seasonal + * effects of the Holt-Winters algorithm (of type + * CF_SEASONAL). + * For CF_DEVPREDICT: index of the RRA with the seasonal + * deviation predictions (of type CF_DEVSEASONAL). + * For CF_SEASONAL: index of the RRA with the Holt-Winters + * intercept and slope coefficient (of type CF_HWPREDICT). + * For CF_DEVSEASONAL: index of the RRA with the + * Holt-Winters prediction (of type CF_HWPREDICT). + * For CF_FAILURES: index of the CF_DEVSEASONAL array. + * */ + + /* CF_SEASONAL and CF_DEVSEASONAL: */ + RRA_seasonal_gamma = 1, + /* exponential smoothing parameter for seasonal effects. */ + + RRA_seasonal_smoothing_window = 2, + /* fraction of the season to include in the running average + * smoother */ + + /* RRA_dependent_rra_idx = 3, */ + + RRA_seasonal_smooth_idx = 4, + /* an integer between 0 and row_count - 1 which + * is index in the seasonal cycle for applying + * the period smoother. */ + + /* CF_FAILURES: */ + RRA_delta_pos = 1, /* confidence bound scaling parameters */ + RRA_delta_neg = 2, + /* RRA_dependent_rra_idx = 3, */ + RRA_window_len = 4, + RRA_failure_threshold = 5 + /* For CF_FAILURES, number of violations within the last + * window required to mark a failure. */ +}; + + +#define RRD_CF_NAM_SIZE 20 + +struct rrd_rra_def { + gchar cf_nam[RRD_CF_NAM_SIZE]; /* consolidation function (null term) */ + gulong row_cnt; /* number of entries in the store */ + gulong pdp_cnt; /* how many primary data points are + * required for a consolidated data point?*/ + rrd_value_t par[MAX_RRA_PAR_EN]; /* index see rra_param_en */ + +}; + +struct rrd_live_head { + time_t last_up; /* when was rrd last updated */ + glong last_up_usec; /* micro seconds part of the update timestamp. Always >= 0 */ +}; + +#define RRD_LAST_DS_LEN 30 + +enum rrd_pdp_param { + PDP_unkn_sec_cnt = 0, /* how many seconds of the current + * pdp value is unknown data? */ + PDP_val +}; /* current value of the pdp. + this depends on dst */ + +struct rrd_pdp_prep { + gchar last_ds[RRD_LAST_DS_LEN]; /* the last reading from the data + * source. this is stored in ASCII + * to cater for very large counters + * we might encounter in connection + * with SNMP. */ + rrd_value_t scratch[10]; /* contents according to pdp_par_en */ +}; + +#define RRD_MAX_CDP_PAR_EN 10 +#define RRD_MAX_CDP_FAILURES_IDX 8 +/* max CDP scratch entries avail to record violations for a FAILURES RRA */ +#define RRD_MAX_FAILURES_WINDOW_LEN 28 + +enum rrd_cdp_param { + CDP_val = 0, + /* the base_interval is always an + * average */ + CDP_unkn_pdp_cnt, + /* how many unknown pdp were + * integrated. This and the cdp_xff + * will decide if this is going to + * be a UNKNOWN or a valid value */ + CDP_hw_intercept, + /* Current intercept coefficient for the Holt-Winters + * prediction algorithm. */ + CDP_hw_last_intercept, + /* Last iteration intercept coefficient for the Holt-Winters + * prediction algorihtm. */ + CDP_hw_slope, + /* Current slope coefficient for the Holt-Winters + * prediction algorithm. */ + CDP_hw_last_slope, + /* Last iteration slope coeffient. */ + CDP_null_count, + /* Number of sequential Unknown (DNAN) values + 1 preceding + * the current prediction. + * */ + CDP_last_null_count, + /* Last iteration count of Unknown (DNAN) values. */ + CDP_primary_val = 8, + /* optimization for bulk updates: the value of the first CDP + * value to be written in the bulk update. */ + CDP_secondary_val = 9, + /* optimization for bulk updates: the value of subsequent + * CDP values to be written in the bulk update. */ + CDP_hw_seasonal = CDP_hw_intercept, + /* Current seasonal coefficient for the Holt-Winters + * prediction algorithm. This is stored in CDP prep to avoid + * redundant seek operations. */ + CDP_hw_last_seasonal = CDP_hw_last_intercept, + /* Last iteration seasonal coefficient. */ + CDP_seasonal_deviation = CDP_hw_intercept, + CDP_last_seasonal_deviation = CDP_hw_last_intercept, + CDP_init_seasonal = CDP_null_count +}; + +struct rrd_cdp_prep { + rrd_value_t scratch[RRD_MAX_CDP_PAR_EN]; + /* contents according to cdp_par_en * + * init state should be NAN */ +}; + +struct rrd_rra_ptr { + gulong cur_row; /* current row in the rra */ +}; + +/* Final rrd file structure */ +struct rspamd_rrd_file { + struct rrd_file_head *stat_head; /* the static header */ + struct rrd_ds_def *ds_def; /* list of data source definitions */ + struct rrd_rra_def *rra_def; /* list of round robin archive def */ + struct rrd_live_head *live_head; /* rrd v >= 3 last_up with us */ + struct rrd_pdp_prep *pdp_prep; /* pdp data prep area */ + struct rrd_cdp_prep *cdp_prep; /* cdp prep area */ + struct rrd_rra_ptr *rra_ptr; /* list of rra pointers */ + gdouble *rrd_value; /* list of rrd values */ + + gchar *filename; + guint8* map; /* mmapped area */ + gsize size; /* its size */ + gboolean finalized; +}; + + +/* Public API */ + +/** + * Open (and mmap) existing RRD file + * @param filename path + * @param err error pointer + * @return rrd file structure + */ +struct rspamd_rrd_file* rspamd_rrd_open (const gchar *filename, GError **err); + +/** + * Create basic header for rrd file + * @param filename file path + * @param ds_count number of data sources + * @param rra_count number of round robin archives + * @param pdp_step step of primary data points + * @param err error pointer + * @return TRUE if file has been created + */ +struct rspamd_rrd_file* rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err); + +/** + * Add data sources to rrd file + * @param filename path to file + * @param ds array of struct rrd_ds_def + * @param err error pointer + * @return TRUE if data sources were added + */ +gboolean rspamd_rrd_add_ds (struct rspamd_rrd_file* file, GArray *ds, GError **err); + +/** + * Add round robin archives to rrd file + * @param filename path to file + * @param ds array of struct rrd_rra_def + * @param err error pointer + * @return TRUE if archives were added + */ +gboolean rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err); + +/** + * Finalize rrd file header and initialize all RRA in the file + * @param filename file path + * @param err error pointer + * @return TRUE if rrd file is ready for use + */ +gboolean rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err); + +/** + * Add record to rrd file + * @param file rrd file object + * @param points points (must be row suitable for this RRA, depending on ds count) + * @param err error pointer + * @return TRUE if a row has been added + */ +gboolean rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err); + +/** + * Close rrd file + * @param file + * @return + */ +gint rspamd_rrd_close (struct rspamd_rrd_file* file); + +/* + * Conversion functions + */ + +/** + * Convert rrd dst type from string to numeric value + */ +enum rrd_dst_type rrd_dst_from_string (const gchar *str); +/** + * Convert numeric presentation of dst to string + */ +const gchar* rrd_dst_to_string (enum rrd_dst_type type); +/** + * Convert rrd consolidation function type from string to numeric value + */ +enum rrd_cf_type rrd_cf_from_string (const gchar *str); +/** + * Convert numeric presentation of cf to string + */ +const gchar* rrd_cf_to_string (enum rrd_cf_type type); + +/* Default RRA and DS */ + +/** + * Create default RRA + */ +void rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra); + +/** + * Create default DS + */ +void rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds); +#endif /* RRD_H_ */ diff --git a/src/libutil/trie.c b/src/libutil/trie.c new file mode 100644 index 000000000..394c4e939 --- /dev/null +++ b/src/libutil/trie.c @@ -0,0 +1,230 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "mem_pool.h" +#include "trie.h" + +rspamd_trie_t* +rspamd_trie_create (gboolean icase) +{ + rspamd_trie_t *new; + + new = g_malloc (sizeof (rspamd_trie_t)); + + new->icase = icase; + new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + new->root.fail = NULL; + new->root.final = 0; + new->root.id = 0; + new->root.next = NULL; + new->root.match = NULL; + new->fail_states = g_ptr_array_sized_new (8); + + return new; +} + +/* + * Insert a single character as the specified level of the suffix tree + */ +static struct rspamd_trie_state * +rspamd_trie_insert_char (rspamd_trie_t *trie, guint depth, struct rspamd_trie_state *pos, gchar c) +{ + struct rspamd_trie_match *new_match; + struct rspamd_trie_state *new_pos; + + /* New match is inserted before pos */ + new_match = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_match)); + new_match->next = pos->match; + new_match->c = c; + + /* Now set match link */ + pos->match = new_match; + + new_match->state = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_state)); + new_pos = new_match->state; + new_pos->match = NULL; + new_pos->fail = &trie->root; + new_pos->final = 0; + new_pos->id = -1; + + if (trie->fail_states->len < depth + 1) { + /* Grow fail states array if depth is more than its size */ + guint size = trie->fail_states->len; + + size = MAX (size * 2, depth + 1); + g_ptr_array_set_size (trie->fail_states, size); + } + + new_pos->next = trie->fail_states->pdata[depth]; + trie->fail_states->pdata[depth] = new_pos; + + return new_pos; +} + +/* Traverse the specified node to find corresponding match */ +static inline struct rspamd_trie_match * +check_match (struct rspamd_trie_state *s, gchar c) +{ + struct rspamd_trie_match *match = s->match; + + while (match && match->c != c) { + match = match->next; + } + + return match; +} + +void +rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id) +{ + const guchar *p = pattern; + struct rspamd_trie_state *q, *q1, *r, *cur_node; + struct rspamd_trie_match *m, *n; + guint i, depth = 0; + gchar c; + + /* Insert pattern to the trie */ + + cur_node = &trie->root; + + while (*p) { + c = trie->icase ? g_ascii_tolower (*p) : *p; + m = check_match (cur_node, c); + if (m == NULL) { + /* Insert a character at specified level depth */ + cur_node = rspamd_trie_insert_char (trie, depth, cur_node, c); + } + else { + cur_node = m->state; + } + p ++; + depth ++; + } + + cur_node->final = depth; + cur_node->id = pattern_id; + + /* Update fail states and build fail states graph */ + /* Go through the whole depth of prefixes */ + for (i = 0; i < trie->fail_states->len; i++) { + q = trie->fail_states->pdata[i]; + while (q) { + m = q->match; + while (m) { + c = m->c; + q1 = m->state; + r = q->fail; + /* Move q->fail to last known fail location for this character (or to NULL) */ + while (r && (n = check_match (r, c)) == NULL) { + r = r->fail; + } + + /* We have found new fail location for character c, so set it in q1 */ + if (r != NULL) { + q1->fail = n->state; + if (q1->fail->final > q1->final) { + q1->final = q1->fail->final; + } + } + else { + /* Search from root */ + if ((n = check_match (&trie->root, c))) { + q1->fail = n->state; + } + else { + q1->fail = &trie->root; + } + } + + m = m->next; + } + + q = q->next; + } + } +} + +const gchar* +rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id) +{ + const guchar *p = buffer, *prev, *ret; + struct rspamd_trie_state *cur_node; + struct rspamd_trie_match *m = NULL; + gchar c; + + + cur_node = &trie->root; + prev = p; + ret = p; + + while (buflen) { + c = trie->icase ? g_ascii_tolower (*p) : *p; + + /* Match pattern or use fail-path to restore state */ + while (cur_node != NULL && (m = check_match (cur_node, c)) == NULL) { + cur_node = cur_node->fail; + } + + /* Shift left in the text */ + if (cur_node == &trie->root) { + /* 1 character pattern found */ + ret = prev; + } + else if (cur_node == NULL) { + /* We have tried the pattern but eventually it was not found */ + cur_node = &trie->root; + ret = p; + p ++; + prev = p; + buflen --; + continue; + } + + if (m != NULL) { + /* Match found */ + cur_node = m->state; + + if (cur_node->final) { + /* The complete pattern found */ + if (matched_id != NULL) { + *matched_id = cur_node->id; + } + return (const gchar *) ret; + } + } + p ++; + prev = p; + buflen --; + } + + return NULL; +} + +void +rspamd_trie_free (rspamd_trie_t *trie) +{ + g_ptr_array_free (trie->fail_states, TRUE); + rspamd_mempool_delete (trie->pool); + g_free (trie); +} diff --git a/src/libutil/trie.h b/src/libutil/trie.h new file mode 100644 index 000000000..2792ee4a5 --- /dev/null +++ b/src/libutil/trie.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2010, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef TRIE_H_ +#define TRIE_H_ + +#include "config.h" +#include "mem_pool.h" + +/* + * Rspamd implements basic bitwise prefixed trie structure + */ + +struct rspamd_trie_match; + +struct rspamd_trie_state { + struct rspamd_trie_state *next; + struct rspamd_trie_state *fail; + struct rspamd_trie_match *match; + guint final; + gint id; +}; + +struct rspamd_trie_match { + struct rspamd_trie_match *next; + struct rspamd_trie_state *state; + gchar c; +}; + +typedef struct rspamd_trie_s { + struct rspamd_trie_state root; + GPtrArray *fail_states; + gboolean icase; + rspamd_mempool_t *pool; +} rspamd_trie_t; + +/* + * Create a new suffix trie + */ +rspamd_trie_t* rspamd_trie_create (gboolean icase); + +/* + * Insert a pattern into the trie + * @param trie suffix trie + * @param pattern text of element + * @param pattern_id id of element + */ +void rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id); + +/* + * Search for a text using suffix trie + * @param trie suffix trie + * @param buffer a text where to search for trie patterns + * @param buflen a length of text + * @param mached_id on a successfull search here would be stored id of pattern found + * @return Position in a text where pattern was found or NULL if no patterns were found + */ +const gchar* rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id); + +/* + * Deallocate suffix trie + */ +void rspamd_trie_free (rspamd_trie_t *trie); + +#endif /* TRIE_H_ */ diff --git a/src/libutil/upstream.c b/src/libutil/upstream.c new file mode 100644 index 000000000..f82d3ba50 --- /dev/null +++ b/src/libutil/upstream.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "upstream.h" + + +#ifdef _THREAD_SAFE +pthread_rwlock_t upstream_mtx = PTHREAD_RWLOCK_INITIALIZER; +# define U_RLOCK() do { pthread_rwlock_rdlock (&upstream_mtx); } while (0) +# define U_WLOCK() do { pthread_rwlock_wrlock (&upstream_mtx); } while (0) +# define U_UNLOCK() do { pthread_rwlock_unlock (&upstream_mtx); } while (0) +#else +# define U_RLOCK() do {} while (0) +# define U_WLOCK() do {} while (0) +# define U_UNLOCK() do {} while (0) +#endif + +#define MAX_TRIES 20 +#define HASH_COMPAT + +/* + * Poly: 0xedb88320 + * Init: 0x0 + */ + +static const guint32 crc32lookup[256] = { + 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU, + 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U, + 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, + 0xf3b97148U, 0x84be41deU, 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, + 0x136c9856U, 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U, + 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 0xa2677172U, + 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 0x35b5a8faU, 0x42b2986cU, + 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, + 0x26d930acU, 0x51de003aU, 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, + 0xcfba9599U, 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U, + 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 0x01db7106U, + 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 0x9fbfe4a5U, 0xe8b8d433U, + 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, + 0x91646c97U, 0xe6635c01U, 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, + 0x6c0695edU, 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U, + 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 0xfbd44c65U, + 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 0x4adfa541U, 0x3dd895d7U, + 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, + 0x44042d73U, 0x33031de5U, 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, + 0xbe0b1010U, 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU, + 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 0x2eb40d81U, + 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 0x03b6e20cU, 0x74b1d29aU, + 0xead54739U, 0x9dd277afU, 0x04db2615U, 0x73dc1683U, 0xe3630b12U, 0x94643b84U, + 0x0d6d6a3eU, 0x7a6a5aa8U, 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, + 0xf00f9344U, 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU, + 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 0x67dd4accU, + 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 0xd6d6a3e8U, 0xa1d1937eU, + 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, + 0xd80d2bdaU, 0xaf0a1b4cU, 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, + 0x316e8eefU, 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U, + 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 0xb2bd0b28U, + 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 0x2cd99e8bU, 0x5bdeae1dU, + 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, + 0x72076785U, 0x05005713U, 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, + 0x92d28e9bU, 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U, + 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 0x18b74777U, + 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 0x8f659effU, 0xf862ae69U, + 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, + 0xa7672661U, 0xd06016f7U, 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, + 0x40df0b66U, 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U, + 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 0xcdd70693U, + 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U, + 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU +}; + +/* + * Check upstream parameters and mark it whether valid or dead + */ +static void +check_upstream (struct upstream *up, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + if (up->dead) { + if (now - up->time >= revive_timeout) { + U_WLOCK (); + up->dead = 0; + up->errors = 0; + up->time = 0; + up->weight = up->priority; + U_UNLOCK (); + } + } + else { + if (now - up->time >= error_timeout && up->errors >= max_errors) { + U_WLOCK (); + up->dead = 1; + up->time = now; + up->weight = 0; + U_UNLOCK (); + } + } +} + +/* + * Call this function after failed upstream request + */ +void +upstream_fail (struct upstream *up, time_t now) +{ + if (up->time != 0) { + up->errors++; + } + else { + U_WLOCK (); + up->time = now; + up->errors++; + U_UNLOCK (); + } +} + +/* + * Call this function after successfull upstream request + */ +void +upstream_ok (struct upstream *up, time_t now) +{ + if (up->errors != 0) { + U_WLOCK (); + up->errors = 0; + up->time = 0; + U_UNLOCK (); + } + + up->weight--; +} + +/* + * Mark all upstreams as active. This function is used when all upstreams are marked as inactive + */ +void +revive_all_upstreams (void *ups, size_t members, size_t msize) +{ + guint i; + struct upstream *cur; + guchar *p; + + U_WLOCK (); + p = ups; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + cur->time = 0; + cur->errors = 0; + cur->dead = 0; + cur->weight = cur->priority; + p += msize; + } + U_UNLOCK (); +} + +/* + * Scan all upstreams for errors and mark upstreams dead or alive depends on conditions, + * return number of alive upstreams + */ +static gint +rescan_upstreams (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) +{ + guint i, alive; + struct upstream *cur; + guchar *p; + + /* Recheck all upstreams */ + p = ups; + alive = members; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + check_upstream (cur, now, error_timeout, revive_timeout, max_errors); + alive -= cur->dead; + p += msize; + } + + /* All upstreams are dead */ + if (alive == 0) { + revive_all_upstreams (ups, members, msize); + alive = members; + } + + + return alive; + +} + +/* Return alive upstream by its number */ +static struct upstream * +get_upstream_by_number (void *ups, size_t members, size_t msize, gint selected) +{ + guint i; + u_char *p, *c; + struct upstream *cur; + + i = 0; + p = ups; + c = ups; + U_RLOCK (); + for (;;) { + /* Out of range, return NULL */ + if (p > c + members * msize) { + break; + } + + cur = (struct upstream *)p; + p += msize; + + if (cur->dead) { + /* Skip inactive upstreams */ + continue; + } + /* Return selected upstream */ + if ((gint)i == selected) { + U_UNLOCK (); + return cur; + } + i++; + } + U_UNLOCK (); + + /* Error */ + return NULL; + +} + +/* + * Get hash key for specified key (perl hash) + */ +static guint32 +get_hash_for_key (guint32 hash, const gchar *key, size_t keylen) +{ + guint32 h, index; + const gchar *end = key + keylen; + + h = ~hash; + + if (end != key) { + while (key < end) { + index = (h ^ (u_char) * key) & 0x000000ffU; + h = (h >> 8) ^ crc32lookup[index]; + ++key; + } + } + else { + while (*key) { + index = (h ^ (u_char) * key) & 0x000000ffU; + h = (h >> 8) ^ crc32lookup[index]; + ++key; + } + } + + return (~h); +} + +/* + * Recheck all upstreams and return random active upstream + */ +struct upstream * +get_random_upstream (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors) +{ + gint alive, selected; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + selected = rand () % alive; + + return get_upstream_by_number (ups, members, msize, selected); +} + +/* + * Return upstream by hash, that is calculated from active upstreams number + */ +struct upstream * +get_upstream_by_hash (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen) +{ + gint alive, tries = 0, r; + guint32 h = 0, ht; + gchar *p, numbuf[4]; + struct upstream *cur; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + if (alive == 0) { + return NULL; + } + + h = get_hash_for_key (0, key, keylen); +#ifdef HASH_COMPAT + h = (h >> 16) & 0x7fff; +#endif + h %= members; + + for (;;) { + p = (gchar *)ups + msize * h; + cur = (struct upstream *)p; + if (!cur->dead) { + break; + } + r = snprintf (numbuf, sizeof (numbuf), "%d", tries); + ht = get_hash_for_key (0, numbuf, r); + ht = get_hash_for_key (ht, key, keylen); +#ifdef HASH_COMPAT + h += (ht >> 16) & 0x7fff; +#else + h += ht; +#endif + h %= members; + tries++; + if (tries > MAX_TRIES) { + return NULL; + } + } + + U_RLOCK (); + p = ups; + U_UNLOCK (); + return cur; +} + +/* + * Recheck all upstreams and return upstream in round-robin order according to weight and priority + */ +struct upstream * +get_upstream_round_robin (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors) +{ + guint max_weight, i; + struct upstream *cur, *selected = NULL; + u_char *p; + + /* Recheck all upstreams */ + (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + p = ups; + max_weight = 0; + selected = (struct upstream *)p; + U_RLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead) { + if (max_weight < (guint)cur->weight) { + max_weight = cur->weight; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + + if (max_weight == 0) { + p = ups; + U_WLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + cur->weight = cur->priority; + if (!cur->dead) { + if (max_weight < cur->priority) { + max_weight = cur->priority; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + } + + return selected; +} + +/* + * Recheck all upstreams and return upstream in round-robin order according to only priority (master-slaves) + */ +struct upstream * +get_upstream_master_slave (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors) +{ + guint max_weight, i; + struct upstream *cur, *selected = NULL; + u_char *p; + + /* Recheck all upstreams */ + (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + p = ups; + max_weight = 0; + selected = (struct upstream *)p; + U_RLOCK (); + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead) { + if (max_weight < cur->priority) { + max_weight = cur->priority; + selected = cur; + } + } + p += msize; + } + U_UNLOCK (); + + return selected; +} + +/* + * Ketama manipulation functions + */ + +static gint +ketama_sort_cmp (const void *a1, const void *a2) +{ + return *((guint32 *) a1) - *((guint32 *) a2); +} + +/* + * Add ketama points for specified upstream + */ +gint +upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints) +{ + guint32 h = 0; + gchar tmp[4]; + guint i; + + /* Allocate ketama points array */ + if (up->ketama_points == NULL) { + up->ketama_points_size = keypoints; + up->ketama_points = malloc (sizeof (guint32) * up->ketama_points_size); + if (up->ketama_points == NULL) { + return -1; + } + } + + h = get_hash_for_key (h, up_key, keylen); + + for (i = 0; i < keypoints; i++) { + tmp[0] = i & 0xff; + tmp[1] = (i >> 8) & 0xff; + tmp[2] = (i >> 16) & 0xff; + tmp[3] = (i >> 24) & 0xff; + + h = get_hash_for_key (h, tmp, sizeof (tmp) * sizeof (gchar)); + up->ketama_points[i] = h; + } + /* Keep points sorted */ + qsort (up->ketama_points, keypoints, sizeof (guint32), ketama_sort_cmp); + + return 0; +} + +/* + * Return upstream by hash and find nearest ketama point in some server + */ +struct upstream * +get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen) +{ + guint alive, i; + guint32 h = 0, step, middle, d, min_diff = UINT_MAX; + gchar *p; + struct upstream *cur = NULL, *nearest = NULL; + + alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); + + if (alive == 0) { + return NULL; + } + + h = get_hash_for_key (h, key, keylen); + + U_RLOCK (); + p = ups; + nearest = (struct upstream *)p; + for (i = 0; i < members; i++) { + cur = (struct upstream *)p; + if (!cur->dead && cur->ketama_points != NULL) { + /* Find nearest ketama point for this key */ + step = cur->ketama_points_size / 2; + middle = step; + while (step != 1) { + d = cur->ketama_points[middle] - h; + if (abs (d) < (gint)min_diff) { + min_diff = abs (d); + nearest = cur; + } + step /= 2; + if (d > 0) { + middle -= step; + } + else { + middle += step; + } + } + } + } + U_UNLOCK (); + return nearest; +} + +#undef U_LOCK +#undef U_UNLOCK +/* + * vi:ts=4 + */ diff --git a/src/libutil/upstream.h b/src/libutil/upstream.h new file mode 100644 index 000000000..da0a00013 --- /dev/null +++ b/src/libutil/upstream.h @@ -0,0 +1,127 @@ +#ifndef UPSTREAM_H +#define UPSTREAM_H + +#include +#include + +/** + * Structure of generic upstream + */ +struct upstream { + guint errors; /**< Errors for this upstream */ + time_t time; /**< Time of marking */ + guint dead; /**< Dead flag */ + guint priority; /**< Fixed priority */ + gint16 weight; /**< Dynamic weight */ + guint32 *ketama_points; /**< Ketama points array */ + size_t ketama_points_size; /**< Ketama array size */ +}; + +/** + * Upstream error logic + * 1. During error time we count upstream_ok and upstream_fail + * 2. If failcount is more then maxerrors then we mark upstream as unavailable for dead time + * 3. After dead time we mark upstream as alive and go to the step 1 + * 4. If all upstreams are dead, marks every upstream as alive + */ + +/** + * Add an error to an upstream + */ +void upstream_fail (struct upstream *up, time_t now); + +/** + * Increase upstream successes count + */ +void upstream_ok (struct upstream *up, time_t now); + +/** + * Make all upstreams alive + */ +void revive_all_upstreams (void *ups, size_t members, size_t msize); + +/** + * Add ketama points for upstream + */ +gint upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints); + +/** + * Get a random upstream from array of upstreams + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ +struct upstream* get_random_upstream (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + +/** + * Get upstream based on hash from array of upstreams + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + * @param key key for hashing + * @param keylen length of the key + */ +struct upstream* get_upstream_by_hash (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors, + const gchar *key, size_t keylen); + +/** + * Get an upstream from array of upstreams based on its current weight + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ +struct upstream* get_upstream_round_robin (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + +/** + * Get upstream based on hash from array of upstreams, this functions is using ketama algorithm + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + * @param key key for hashing + * @param keylen length of the key + */ +struct upstream* get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now, + time_t error_timeout, time_t revive_timeout, size_t max_errors, + const gchar *key, size_t keylen); + +/** + * Get an upstream from array of upstreams based on its current priority (not weight) + * @param ups array of structures that contains struct upstream as their first element + * @param members number of elements in array + * @param msize size of each member + * @param now current time + * @param error_timeout time during which we are counting errors + * @param revive_timeout time during which we counts upstream dead + * @param max_errors maximum errors during error_timeout to mark upstream dead + */ +struct upstream* get_upstream_master_slave (void *ups, size_t members, size_t msize, + time_t now, time_t error_timeout, + time_t revive_timeout, size_t max_errors); + + +#endif /* UPSTREAM_H */ +/* + * vi:ts=4 + */ diff --git a/src/libutil/util.c b/src/libutil/util.c new file mode 100644 index 000000000..03b38e087 --- /dev/null +++ b/src/libutil/util.c @@ -0,0 +1,2275 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "util.h" +#include "cfg_file.h" +#include "main.h" +#include "statfile.h" +#include "filter.h" +#include "message.h" + +#ifdef HAVE_OPENSSL +#include +#include +#endif + +#ifdef HAVE_TERMIOS_H +#include +#endif +#ifdef HAVE_READPASSPHRASE_H +#include +#endif + +/* Check log messages intensity once per minute */ +#define CHECK_TIME 60 +/* More than 2 log messages per second */ +#define BUF_INTENSITY 2 +/* Default connect timeout for sync sockets */ +#define CONNECT_TIMEOUT 3 + +gint +make_socket_nonblocking (gint fd) +{ + gint ofl; + + ofl = fcntl (fd, F_GETFL, 0); + + if (fcntl (fd, F_SETFL, ofl | O_NONBLOCK) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + return -1; + } + return 0; +} + +gint +make_socket_blocking (gint fd) +{ + gint ofl; + + ofl = fcntl (fd, F_GETFL, 0); + + if (fcntl (fd, F_SETFL, ofl & (~O_NONBLOCK)) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + return -1; + } + return 0; +} + +gint +poll_sync_socket (gint fd, gint timeout, short events) +{ + gint r; + struct pollfd fds[1]; + + fds->fd = fd; + fds->events = events; + fds->revents = 0; + while ((r = poll (fds, 1, timeout)) < 0) { + if (errno != EINTR) { + break; + } + } + + return r; +} + +static gint +make_inet_socket (gint type, struct addrinfo *addr, gboolean is_server, gboolean async, GList **list) +{ + gint fd, r, optlen, on = 1, s_error; + struct addrinfo *cur; + + cur = addr; + while (cur) { + /* Create socket */ + fd = socket (cur->ai_family, type, 0); + if (fd == -1) { + msg_warn ("socket failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + if (make_socket_nonblocking (fd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + if (is_server) { + setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint)); +#ifdef HAVE_IPV6_V6ONLY + if (cur->ai_family == AF_INET6) { + setsockopt (fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *)&on, sizeof (gint)); + } +#endif + r = bind (fd, cur->ai_addr, cur->ai_addrlen); + } + else { + r = connect (fd, cur->ai_addr, cur->ai_addrlen); + } + + if (r == -1) { + if (errno != EINPROGRESS) { + msg_warn ("bind/connect failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + if (!async) { + /* Try to poll */ + if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) { + errno = ETIMEDOUT; + msg_warn ("bind/connect failed: timeout"); + goto out; + } + else { + /* Make synced again */ + if (make_socket_blocking (fd) < 0) { + goto out; + } + } + } + } + else { + /* Still need to check SO_ERROR on socket */ + optlen = sizeof (s_error); + getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen); + if (s_error) { + errno = s_error; + goto out; + } + } + if (list == NULL) { + /* Go out immediately */ + break; + } + else if (fd != -1) { + *list = g_list_prepend (*list, GINT_TO_POINTER (fd)); + cur = cur->ai_next; + continue; + } +out: + if (fd != -1) { + close (fd); + } + fd = -1; + cur = cur->ai_next; + } + return (fd); +} + +gint +make_tcp_socket (struct addrinfo *addr, gboolean is_server, gboolean async) +{ + return make_inet_socket (SOCK_STREAM, addr, is_server, async, NULL); +} + +gint +make_udp_socket (struct addrinfo *addr, gboolean is_server, gboolean async) +{ + return make_inet_socket (SOCK_DGRAM, addr, is_server, async, NULL); +} + +gint +make_unix_socket (const gchar *path, struct sockaddr_un *addr, gint type, gboolean is_server, gboolean async) +{ + gint fd = -1, s_error, r, optlen, serrno, on = 1; + struct stat st; + + if (path == NULL) + return -1; + + addr->sun_family = AF_UNIX; + + rspamd_strlcpy (addr->sun_path, path, sizeof (addr->sun_path)); +#ifdef FREEBSD + addr->sun_len = SUN_LEN (addr); +#endif + + if (is_server) { + /* Unlink socket if it exists already */ + if (lstat (addr->sun_path, &st) != -1) { + if (S_ISSOCK (st.st_mode)) { + if (unlink (addr->sun_path) == -1) { + msg_warn ("unlink %s failed: %d, '%s'", addr->sun_path, errno, strerror (errno)); + goto out; + } + } + else { + msg_warn ("%s is not a socket", addr->sun_path); + goto out; + } + } + } + fd = socket (PF_LOCAL, type, 0); + + if (fd == -1) { + msg_warn ("socket failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); + return -1; + } + + if (make_socket_nonblocking (fd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); + goto out; + } + if (is_server) { + setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint)); + r = bind (fd, (struct sockaddr *)addr, SUN_LEN (addr)); + } + else { + r = connect (fd, (struct sockaddr *)addr, SUN_LEN (addr)); + } + + if (r == -1) { + if (errno != EINPROGRESS) { + msg_warn ("bind/connect failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); + goto out; + } + if (!async) { + /* Try to poll */ + if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) { + errno = ETIMEDOUT; + msg_warn ("bind/connect failed %s: timeout", addr->sun_path); + goto out; + } + else { + /* Make synced again */ + if (make_socket_blocking (fd) < 0) { + goto out; + } + } + } + } + else { + /* Still need to check SO_ERROR on socket */ + optlen = sizeof (s_error); + getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen); + if (s_error) { + errno = s_error; + goto out; + } + } + + + return (fd); + + out: + serrno = errno; + if (fd != -1) { + close (fd); + } + errno = serrno; + return (-1); +} + +/** + * Make a universal socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +gint +make_universal_socket (const gchar *credits, guint16 port, + gint type, gboolean async, gboolean is_server, gboolean try_resolve) +{ + struct sockaddr_un un; + struct stat st; + struct addrinfo hints, *res; + gint r; + gchar portbuf[8]; + + if (*credits == '/') { + if (is_server) { + return make_unix_socket (credits, &un, type, is_server, async); + } + else { + r = stat (credits, &st); + if (r == -1) { + /* Unix socket doesn't exists it must be created first */ + errno = ENOENT; + return -1; + } + else { + if ((st.st_mode & S_IFSOCK) == 0) { + /* Path is not valid socket */ + errno = EINVAL; + return -1; + } + else { + return make_unix_socket (credits, &un, type, is_server, async); + } + } + } + } + else { + /* TCP related part */ + memset (&hints, 0, sizeof (hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = type; /* Type of the socket */ + hints.ai_flags = is_server ? AI_PASSIVE : 0; + hints.ai_protocol = 0; /* Any protocol */ + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + if (!try_resolve) { + hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; + } + + rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port); + if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) { + r = make_inet_socket (type, res, is_server, async, NULL); + freeaddrinfo (res); + return r; + } + else { + msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r)); + return FALSE; + } + } +} + +/** + * Make universal stream socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +GList* +make_universal_sockets_list (const gchar *credits, guint16 port, + gint type, gboolean async, gboolean is_server, gboolean try_resolve) +{ + struct sockaddr_un un; + struct stat st; + struct addrinfo hints, *res; + gint r, fd, serrno; + gchar portbuf[8], **strv, **cur; + GList *result = NULL, *rcur; + + strv = g_strsplit_set (credits, ",", -1); + if (strv == NULL) { + msg_err ("invalid sockets credentials: %s", credits); + return NULL; + } + cur = strv; + while (*cur != NULL) { + if (*credits == '/') { + if (is_server) { + fd = make_unix_socket (credits, &un, type, is_server, async); + } + else { + r = stat (credits, &st); + if (r == -1) { + /* Unix socket doesn't exists it must be created first */ + errno = ENOENT; + goto err; + } + else { + if ((st.st_mode & S_IFSOCK) == 0) { + /* Path is not valid socket */ + errno = EINVAL; + goto err; + } + else { + fd = make_unix_socket (credits, &un, type, is_server, async); + } + } + } + if (fd != -1) { + result = g_list_prepend (result, GINT_TO_POINTER (fd)); + } + else { + goto err; + } + } + else { + /* TCP related part */ + memset (&hints, 0, sizeof (hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = type; /* Type of the socket */ + hints.ai_flags = is_server ? AI_PASSIVE : 0; + hints.ai_protocol = 0; /* Any protocol */ + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + if (!try_resolve) { + hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; + } + + rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port); + if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) { + r = make_inet_socket (type, res, is_server, async, &result); + freeaddrinfo (res); + if (r == -1) { + goto err; + } + } + else { + msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r)); + goto err; + } + } + cur ++; + } + + g_strfreev (strv); + return result; + +err: + g_strfreev (strv); + serrno = errno; + rcur = result; + while (rcur != NULL) { + fd = GPOINTER_TO_INT (rcur->data); + if (fd != -1) { + close (fd); + } + rcur = g_list_next (rcur); + } + if (result != NULL) { + g_list_free (result); + } + + errno = serrno; + return NULL; +} + +gint +make_socketpair (gint pair[2]) +{ + gint r; + + r = socketpair (AF_LOCAL, SOCK_STREAM, 0, pair); + + if (r == -1) { + msg_warn ("socketpair failed: %d, '%s'", errno, strerror (errno), pair[0], pair[1]); + return -1; + } + /* Set close on exec */ + if (fcntl (pair[0], F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + if (fcntl (pair[1], F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + return 0; + +out: + close (pair[0]); + close (pair[1]); + return (-1); +} + +gint +write_pid (struct rspamd_main *main) +{ + pid_t pid; + + if (main->cfg->pid_file == NULL) { + return -1; + } + main->pfh = rspamd_pidfile_open (main->cfg->pid_file, 0644, &pid); + + if (main->pfh == NULL) { + return -1; + } + + if (main->is_privilleged) { + /* Force root user as owner of pid file */ +#ifdef HAVE_PIDFILE_FILENO + if (fchown (pidfile_fileno (main->pfh), 0, 0) == -1) { +#else + if (fchown (main->pfh->pf_fd, 0, 0) == -1) { +#endif + msg_err ("cannot chown of pidfile %s to 0:0 user", main->cfg->pid_file); + } + } + + rspamd_pidfile_write (main->pfh); + + return 0; +} + +#ifdef HAVE_SA_SIGINFO +void +init_signals (struct sigaction *signals, void (*sig_handler)(gint, siginfo_t *, void *)) +#else +void +init_signals (struct sigaction *signals, void (*sig_handler)(gint)) +#endif +{ + struct sigaction sigpipe_act; + /* Setting up signal handlers */ + /* SIGUSR1 - reopen config file */ + /* SIGUSR2 - worker is ready for accept */ + sigemptyset (&signals->sa_mask); + sigaddset (&signals->sa_mask, SIGTERM); + sigaddset (&signals->sa_mask, SIGINT); + sigaddset (&signals->sa_mask, SIGHUP); + sigaddset (&signals->sa_mask, SIGCHLD); + sigaddset (&signals->sa_mask, SIGUSR1); + sigaddset (&signals->sa_mask, SIGUSR2); + sigaddset (&signals->sa_mask, SIGALRM); + + +#ifdef HAVE_SA_SIGINFO + signals->sa_flags = SA_SIGINFO; + signals->sa_handler = NULL; + signals->sa_sigaction = sig_handler; +#else + signals->sa_handler = sig_handler; + signals->sa_flags = 0; +#endif + sigaction (SIGTERM, signals, NULL); + sigaction (SIGINT, signals, NULL); + sigaction (SIGHUP, signals, NULL); + sigaction (SIGCHLD, signals, NULL); + sigaction (SIGUSR1, signals, NULL); + sigaction (SIGUSR2, signals, NULL); + sigaction (SIGALRM, signals, NULL); + + /* Ignore SIGPIPE as we handle write errors manually */ + sigemptyset (&sigpipe_act.sa_mask); + sigaddset (&sigpipe_act.sa_mask, SIGPIPE); + sigpipe_act.sa_handler = SIG_IGN; + sigpipe_act.sa_flags = 0; + sigaction (SIGPIPE, &sigpipe_act, NULL); +} + +static void +pass_signal_cb (gpointer key, gpointer value, gpointer ud) +{ + struct rspamd_worker *cur = value; + gint signo = GPOINTER_TO_INT (ud); + + kill (cur->pid, signo); +} + +void +pass_signal_worker (GHashTable * workers, gint signo) +{ + g_hash_table_foreach (workers, pass_signal_cb, GINT_TO_POINTER (signo)); +} + +void +convert_to_lowercase (gchar *str, guint size) +{ + while (size--) { + *str = g_ascii_tolower (*str); + str++; + } +} + +#ifndef HAVE_SETPROCTITLE + +static gchar *title_buffer = 0; +static size_t title_buffer_size = 0; +static gchar *title_progname, *title_progname_full; + +gint +setproctitle (const gchar *fmt, ...) +{ + if (!title_buffer || !title_buffer_size) { + errno = ENOMEM; + return -1; + } + + memset (title_buffer, '\0', title_buffer_size); + + ssize_t written; + + if (fmt) { + ssize_t written2; + va_list ap; + + written = snprintf (title_buffer, title_buffer_size, "%s: ", title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + + va_start (ap, fmt); + written2 = vsnprintf (title_buffer + written, title_buffer_size - written, fmt, ap); + va_end (ap); + if (written2 < 0 || (size_t) written2 >= title_buffer_size - written) + return -1; + } + else { + written = snprintf (title_buffer, title_buffer_size, "%s", title_progname); + if (written < 0 || (size_t) written >= title_buffer_size) + return -1; + } + + written = strlen (title_buffer); + memset (title_buffer + written, '\0', title_buffer_size - written); + + return 0; +} + +/* + It has to be _init function, because __attribute__((constructor)) + functions gets called without arguments. +*/ + +gint +init_title (gint argc, gchar *argv[], gchar *envp[]) +{ +#if defined(DARWIN) || defined(SOLARIS) + /* XXX: try to handle these OSes too */ + return 0; +#else + gchar *begin_of_buffer = 0, *end_of_buffer = 0; + gint i; + + for (i = 0; i < argc; ++i) { + if (!begin_of_buffer) + begin_of_buffer = argv[i]; + if (!end_of_buffer || end_of_buffer + 1 == argv[i]) + end_of_buffer = argv[i] + strlen (argv[i]); + } + + for (i = 0; envp[i]; ++i) { + if (!begin_of_buffer) + begin_of_buffer = envp[i]; + if (!end_of_buffer || end_of_buffer + 1 == envp[i]) + end_of_buffer = envp[i] + strlen (envp[i]); + } + + if (!end_of_buffer) + return 0; + + gchar **new_environ = g_malloc ((i + 1) * sizeof (envp[0])); + + if (!new_environ) + return 0; + + for (i = 0; envp[i]; ++i) { + if (!(new_environ[i] = g_strdup (envp[i]))) + goto cleanup_enomem; + } + new_environ[i] = 0; + + if (program_invocation_name) { + title_progname_full = g_strdup (program_invocation_name); + + if (!title_progname_full) + goto cleanup_enomem; + + gchar *p = strrchr (title_progname_full, '/'); + + if (p) + title_progname = p + 1; + else + title_progname = title_progname_full; + + program_invocation_name = title_progname_full; + program_invocation_short_name = title_progname; + } + + environ = new_environ; + title_buffer = begin_of_buffer; + title_buffer_size = end_of_buffer - begin_of_buffer; + + return 0; + + cleanup_enomem: + for (--i; i >= 0; --i) { + g_free (new_environ[i]); + } + g_free (new_environ); + return 0; +#endif +} +#endif + +#ifndef HAVE_PIDFILE +extern gchar *__progname; +static gint _rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit); + +static gint +rspamd_pidfile_verify (rspamd_pidfh_t *pfh) +{ + struct stat sb; + + if (pfh == NULL || pfh->pf_fd == -1) + return (-1); + /* + * Check remembered descriptor. + */ + if (fstat (pfh->pf_fd, &sb) == -1) + return (errno); + if (sb.st_dev != pfh->pf_dev || sb.st_ino != pfh->pf_ino) + return -1; + return 0; +} + +static gint +rspamd_pidfile_read (const gchar *path, pid_t * pidptr) +{ + gchar buf[16], *endptr; + gint error, fd, i; + + fd = open (path, O_RDONLY); + if (fd == -1) + return (errno); + + i = read (fd, buf, sizeof (buf) - 1); + error = errno; /* Remember errno in case close() wants to change it. */ + close (fd); + if (i == -1) + return error; + else if (i == 0) + return EAGAIN; + buf[i] = '\0'; + + *pidptr = strtol (buf, &endptr, 10); + if (endptr != &buf[i]) + return EINVAL; + + return 0; +} + +rspamd_pidfh_t * +rspamd_pidfile_open (const gchar *path, mode_t mode, pid_t * pidptr) +{ + rspamd_pidfh_t *pfh; + struct stat sb; + gint error, fd, len, count; + struct timespec rqtp; + + pfh = g_malloc (sizeof (*pfh)); + if (pfh == NULL) + return NULL; + + if (path == NULL) + len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "/var/run/%s.pid", g_get_prgname ()); + else + len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "%s", path); + if (len >= (gint)sizeof (pfh->pf_path)) { + g_free (pfh); + errno = ENAMETOOLONG; + return NULL; + } + + /* + * Open the PID file and obtain exclusive lock. + * We truncate PID file here only to remove old PID immediatelly, + * PID file will be truncated again in pidfile_write(), so + * pidfile_write() can be called multiple times. + */ + fd = open (pfh->pf_path, O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, mode); + lock_file (fd, TRUE); + if (fd == -1) { + count = 0; + rqtp.tv_sec = 0; + rqtp.tv_nsec = 5000000; + if (errno == EWOULDBLOCK && pidptr != NULL) { + again: + errno = rspamd_pidfile_read (pfh->pf_path, pidptr); + if (errno == 0) + errno = EEXIST; + else if (errno == EAGAIN) { + if (++count <= 3) { + nanosleep (&rqtp, 0); + goto again; + } + } + } + g_free (pfh); + return NULL; + } + /* + * Remember file information, so in pidfile_write() we are sure we write + * to the proper descriptor. + */ + if (fstat (fd, &sb) == -1) { + error = errno; + unlink (pfh->pf_path); + close (fd); + g_free (pfh); + errno = error; + return NULL; + } + + pfh->pf_fd = fd; + pfh->pf_dev = sb.st_dev; + pfh->pf_ino = sb.st_ino; + + return pfh; +} + +gint +rspamd_pidfile_write (rspamd_pidfh_t *pfh) +{ + gchar pidstr[16]; + gint error, fd; + + /* + * Check remembered descriptor, so we don't overwrite some other + * file if pidfile was closed and descriptor reused. + */ + errno = rspamd_pidfile_verify (pfh); + if (errno != 0) { + /* + * Don't close descriptor, because we are not sure if it's ours. + */ + return -1; + } + fd = pfh->pf_fd; + + /* + * Truncate PID file, so multiple calls of pidfile_write() are allowed. + */ + if (ftruncate (fd, 0) == -1) { + error = errno; + _rspamd_pidfile_remove (pfh, 0); + errno = error; + return -1; + } + + rspamd_snprintf (pidstr, sizeof (pidstr), "%P", getpid ()); + if (pwrite (fd, pidstr, strlen (pidstr), 0) != (ssize_t) strlen (pidstr)) { + error = errno; + _rspamd_pidfile_remove (pfh, 0); + errno = error; + return -1; + } + + return 0; +} + +gint +rspamd_pidfile_close (rspamd_pidfh_t *pfh) +{ + gint error; + + error = rspamd_pidfile_verify (pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (close (pfh->pf_fd) == -1) + error = errno; + g_free (pfh); + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +static gint +_rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit) +{ + gint error; + + error = rspamd_pidfile_verify (pfh); + if (error != 0) { + errno = error; + return -1; + } + + if (unlink (pfh->pf_path) == -1) + error = errno; + if (!unlock_file (pfh->pf_fd, FALSE)) { + if (error == 0) + error = errno; + } + if (close (pfh->pf_fd) == -1) { + if (error == 0) + error = errno; + } + if (freeit) + g_free (pfh); + else + pfh->pf_fd = -1; + if (error != 0) { + errno = error; + return -1; + } + return 0; +} + +gint +rspamd_pidfile_remove (rspamd_pidfh_t *pfh) +{ + + return (_rspamd_pidfile_remove (pfh, 1)); +} +#endif + +/* Replace %r with rcpt value and %f with from value, new string is allocated in pool */ +gchar * +resolve_stat_filename (rspamd_mempool_t * pool, gchar *pattern, gchar *rcpt, gchar *from) +{ + gint need_to_format = 0, len = 0; + gint rcptlen, fromlen; + gchar *c = pattern, *new, *s; + + if (rcpt) { + rcptlen = strlen (rcpt); + } + else { + rcptlen = 0; + } + + if (from) { + fromlen = strlen (from); + } + else { + fromlen = 0; + } + + /* Calculate length */ + while (*c++) { + if (*c == '%' && *(c + 1) == 'r') { + len += rcptlen; + c += 2; + need_to_format = 1; + continue; + } + else if (*c == '%' && *(c + 1) == 'f') { + len += fromlen; + c += 2; + need_to_format = 1; + continue; + } + len++; + } + + /* Do not allocate extra memory if we do not need to format string */ + if (!need_to_format) { + return pattern; + } + + /* Allocate new string */ + new = rspamd_mempool_alloc (pool, len); + c = pattern; + s = new; + + /* Format string */ + while (*c++) { + if (*c == '%' && *(c + 1) == 'r') { + c += 2; + memcpy (s, rcpt, rcptlen); + s += rcptlen; + continue; + } + else if (*c == '%' && *(c + 1) == 'r') { + c += 2; + memcpy (s, from, fromlen); + s += fromlen; + continue; + } + *s++ = *c; + } + + *s = '\0'; + + return new; +} + +#ifdef HAVE_CLOCK_GETTIME +const gchar * +calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_time) +#else +const gchar * +calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_time) +#endif +{ + double vdiff, diff; + static gchar res[64]; + static gchar fmt[sizeof ("%.10f ms real, %.10f ms virtual")]; + struct timeval tv_now; + + if (gettimeofday (&tv_now, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; + + diff = (tv_now.tv_sec - tv->tv_sec) * 1000. + /* Seconds */ + (tv_now.tv_usec - tv->tv_usec) / 1000.; /* Microseconds */ +#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts); +#elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &ts); +#else + clock_gettime (CLOCK_REALTIME, &ts); +#endif + + vdiff = (ts.tv_sec - begin->tv_sec) * 1000. + /* Seconds */ + (ts.tv_nsec - begin->tv_nsec) / 1000000.; /* Nanoseconds */ +#else + diff = (tv_now.tv_sec - begin->tv_sec) * 1000. + /* Seconds */ + (tv_now.tv_usec - begin->tv_usec) / 1000.; /* Microseconds */ + + vdiff = diff; +#endif + + *scan_time = diff; + + sprintf (fmt, "%%.%dfms real, %%.%dfms virtual", resolution, resolution); + snprintf (res, sizeof (res), fmt, diff, vdiff); + + return (const gchar *)res; +} + +#ifndef g_tolower +# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) +#endif + + +gboolean +rspamd_strcase_equal (gconstpointer v, gconstpointer v2) +{ + if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) { + return TRUE; + } + + return FALSE; +} + + +guint +rspamd_strcase_hash (gconstpointer key) +{ + const gchar *p = key; + gchar buf[256]; + guint h = 0, i = 0; + + + while (*p != '\0') { + buf[i] = g_ascii_tolower (*p); + i++; + p++; + if (i == sizeof (buf)) { + h ^= murmur32_hash (buf, i); + i = 0; + } + } + + if (i > 0) { + h ^= murmur32_hash (buf, i); + } + + return h; +} + +guint +rspamd_str_hash (gconstpointer key) +{ + gsize len; + + len = strlen ((const gchar *)key); + + return murmur32_hash (key, len); +} + +gboolean +rspamd_str_equal (gconstpointer v, gconstpointer v2) +{ + return strcmp ((const gchar *)v, (const gchar *)v2) == 0; +} + +gboolean +fstr_strcase_equal (gconstpointer v, gconstpointer v2) +{ + const f_str_t *f1 = v, *f2 = v2; + if (f1->len == f2->len && g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) { + return TRUE; + } + + return FALSE; +} + + +guint +fstr_strcase_hash (gconstpointer key) +{ + const f_str_t *f = key; + const gchar *p; + guint h = 0, i = 0; + gchar buf[256]; + + p = f->begin; + while (p - f->begin < (gint)f->len) { + buf[i] = g_ascii_tolower (*p); + i++; + p++; + if (i == sizeof (buf)) { + h ^= murmur32_hash (buf, i); + i = 0; + } + } + + if (i > 0) { + h ^= murmur32_hash (buf, i); + } + + return h; +} + +void +gperf_profiler_init (struct config_file *cfg, const gchar *descr) +{ +#if defined(WITH_GPERF_TOOLS) + gchar prof_path[PATH_MAX]; + + if (getenv ("CPUPROFILE")) { + + /* disable inherited Profiler enabled in master process */ + ProfilerStop (); + } + /* Try to create temp directory for gmon.out and chdir to it */ + if (cfg->profile_path == NULL) { + cfg->profile_path = g_strdup_printf ("%s/rspamd-profile", cfg->temp_dir); + } + + snprintf (prof_path, sizeof (prof_path), "%s-%s.%d", cfg->profile_path, descr, (gint)getpid ()); + if (ProfilerStart (prof_path)) { + /* start ITIMER_PROF timer */ + ProfilerRegisterThread (); + } + else { + msg_warn ("cannot start google perftools profiler"); + } + +#endif +} + +#ifdef HAVE_FLOCK +/* Flock version */ +gboolean +lock_file (gint fd, gboolean async) +{ + gint flags; + + if (async) { + flags = LOCK_EX | LOCK_NB; + } + else { + flags = LOCK_EX; + } + + if (flock (fd, flags) == -1) { + if (async && errno == EAGAIN) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; +} + +gboolean +unlock_file (gint fd, gboolean async) +{ + gint flags; + + if (async) { + flags = LOCK_UN | LOCK_NB; + } + else { + flags = LOCK_UN; + } + + if (flock (fd, flags) == -1) { + if (async && errno == EAGAIN) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; + +} +#else /* HAVE_FLOCK */ +/* Fctnl version */ +gboolean +lock_file (gint fd, gboolean async) +{ + struct flock fl = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 + }; + + if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { + if (async && (errno == EAGAIN || errno == EACCES)) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; +} + +gboolean +unlock_file (gint fd, gboolean async) +{ + struct flock fl = { + .l_type = F_UNLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 + }; + + if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { + if (async && (errno == EAGAIN || errno == EACCES)) { + return FALSE; + } + msg_warn ("lock on file failed: %s", strerror (errno)); + return FALSE; + } + + return TRUE; + +} +#endif /* HAVE_FLOCK */ + + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) +void +g_ptr_array_unref (GPtrArray *array) +{ + g_ptr_array_free (array, TRUE); +} +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) +void +g_queue_clear (GQueue *queue) +{ + g_return_if_fail (queue != NULL); + + g_list_free (queue->head); + queue->head = queue->tail = NULL; + queue->length = 0; +} +#endif + +gsize +rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + const gchar *s = src; + gsize n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') { + break; + } + } + } + + if (n == 0 && siz != 0) { + *d = '\0'; + } + + return (s - src - 1); /* count does not include NUL */ +} + +gsize +rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz) +{ + gchar *d = dst; + const gchar *s = src; + gsize n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = g_ascii_tolower (*s++)) == '\0') { + break; + } + } + } + + if (n == 0 && siz != 0) { + *d = '\0'; + } + + return (s - src - 1); /* count does not include NUL */ +} + +/* Compare two emails for building emails tree */ +gint +compare_email_func (gconstpointer a, gconstpointer b) +{ + const struct uri *u1 = a, *u2 = b; + gint r; + + if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { + return u1->hostlen - u2->hostlen; + } + else { + if ((r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen)) == 0){ + if (u1->userlen != u2->userlen || u1->userlen == 0) { + return u1->userlen - u2->userlen; + } + else { + return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen); + } + } + else { + return r; + } + } + + return 0; +} + +gint +compare_url_func (gconstpointer a, gconstpointer b) +{ + const struct uri *u1 = a, *u2 = b; + int r; + + if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { + return u1->hostlen - u2->hostlen; + } + else { + r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen); + if (r == 0 && u1->is_phished != u2->is_phished) { + /* Always insert phished urls to the tree */ + return -1; + } + } + + return r; +} + +/* + * Find the first occurrence of find in s, ignore case. + */ +gchar * +rspamd_strncasestr (const gchar *s, const gchar *find, gint len) +{ + gchar c, sc; + gsize mlen; + + if ((c = *find++) != 0) { + c = g_ascii_tolower (c); + mlen = strlen (find); + do { + do { + if ((sc = *s++) == 0 || len -- == 0) + return (NULL); + } while (g_ascii_tolower (sc) != c); + } while (g_ascii_strncasecmp (s, find, mlen) != 0); + s--; + } + return ((gchar *)s); +} + +/* + * Try to convert string of length to long + */ +gboolean +rspamd_strtol (const gchar *s, gsize len, glong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + glong v = 0; + const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10; + gboolean neg; + + /* Case negative values */ + if (*p == '-') { + neg = TRUE; + p ++; + } + else { + neg = FALSE; + } + /* Some preparations for range errors */ + + while (p < end) { + c = *p; + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && c > cutlim)) { + /* Range error */ + *value = neg ? G_MINLONG : G_MAXLONG; + return FALSE; + } + else { + v *= 10; + v += c; + } + } + else { + return FALSE; + } + p ++; + } + + *value = neg ? -(v) : v; + return TRUE; +} + +/* + * Try to convert string of length to long + */ +gboolean +rspamd_strtoul (const gchar *s, gsize len, gulong *value) +{ + const gchar *p = s, *end = s + len; + gchar c; + gulong v = 0; + const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10; + + /* Some preparations for range errors */ + while (p < end) { + c = *p; + if (c >= '0' && c <= '9') { + c -= '0'; + if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) { + /* Range error */ + *value = G_MAXULONG; + return FALSE; + } + else { + v *= 10; + v += c; + } + } + else { + return FALSE; + } + p ++; + } + + *value = v; + return TRUE; +} + +gint +rspamd_fallocate (gint fd, off_t offset, off_t len) +{ +#if defined(HAVE_FALLOCATE) + return fallocate (fd, 0, offset, len); +#elif defined(HAVE_POSIX_FALLOCATE) + return posix_fallocate (fd, offset, len); +#else + /* Return 0 as nothing can be done on this system */ + return 0; +#endif +} + + +/** + * Create new mutex + * @return mutex or NULL + */ +inline rspamd_mutex_t* +rspamd_mutex_new (void) +{ + rspamd_mutex_t *new; + + new = g_slice_alloc (sizeof (rspamd_mutex_t)); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_init (&new->mtx); +#else + g_static_mutex_init (&new->mtx); +#endif + + return new; +} + +/** + * Lock mutex + * @param mtx + */ +inline void +rspamd_mutex_lock (rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_lock (&mtx->mtx); +#else + g_static_mutex_lock (&mtx->mtx); +#endif +} + +/** + * Unlock mutex + * @param mtx + */ +inline void +rspamd_mutex_unlock (rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_unlock (&mtx->mtx); +#else + g_static_mutex_unlock (&mtx->mtx); +#endif +} + +void +rspamd_mutex_free (rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_mutex_clear (&mtx->mtx); +#endif + g_slice_free1 (sizeof (rspamd_mutex_t), mtx); +} + +/** + * Create new rwlock + * @return + */ +rspamd_rwlock_t* +rspamd_rwlock_new (void) +{ + rspamd_rwlock_t *new; + + new = g_malloc (sizeof (rspamd_rwlock_t)); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_init (&new->rwlock); +#else + g_static_rw_lock_init (&new->rwlock); +#endif + + return new; +} + +/** + * Lock rwlock for writing + * @param mtx + */ +inline void +rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_writer_lock (&mtx->rwlock); +#else + g_static_rw_lock_writer_lock (&mtx->rwlock); +#endif +} + +/** + * Lock rwlock for reading + * @param mtx + */ +inline void +rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_reader_lock (&mtx->rwlock); +#else + g_static_rw_lock_reader_lock (&mtx->rwlock); +#endif +} + +/** + * Unlock rwlock from writing + * @param mtx + */ +inline void +rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_writer_unlock (&mtx->rwlock); +#else + g_static_rw_lock_writer_unlock (&mtx->rwlock); +#endif +} + +/** + * Unlock rwlock from reading + * @param mtx + */ +inline void +rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_reader_unlock (&mtx->rwlock); +#else + g_static_rw_lock_reader_unlock (&mtx->rwlock); +#endif +} + +void +rspamd_rwlock_free (rspamd_rwlock_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_rw_lock_clear (&mtx->rwlock); +#endif + g_slice_free1 (sizeof (rspamd_rwlock_t), mtx); +} + +struct rspamd_thread_data { + gchar *name; + gint id; + GThreadFunc func; + gpointer data; +}; + +static gpointer +rspamd_thread_func (gpointer ud) +{ + struct rspamd_thread_data *td = ud; + sigset_t s_mask; + + /* Ignore signals in thread */ + sigemptyset (&s_mask); + sigaddset (&s_mask, SIGTERM); + sigaddset (&s_mask, SIGINT); + sigaddset (&s_mask, SIGHUP); + sigaddset (&s_mask, SIGCHLD); + sigaddset (&s_mask, SIGUSR1); + sigaddset (&s_mask, SIGUSR2); + sigaddset (&s_mask, SIGALRM); + sigaddset (&s_mask, SIGPIPE); + + sigprocmask (SIG_BLOCK, &s_mask, NULL); + + ud = td->func (td->data); + g_free (td->name); + g_free (td); + + return ud; +} + +/** + * Create new named thread + * @param name name pattern + * @param func function to start + * @param data data to pass to function + * @param err error pointer + * @return new thread object that can be joined + */ +GThread* +rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err) +{ + GThread *new; + struct rspamd_thread_data *td; + static gint32 id; + guint r; + + r = strlen (name); + td = g_malloc (sizeof (struct rspamd_thread_data)); + td->id = ++id; + td->name = g_malloc (r + sizeof ("4294967296")); + td->func = func; + td->data = data; + + rspamd_snprintf (td->name, r + sizeof ("4294967296"), "%s-%d", name, id); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + new = g_thread_try_new (td->name, rspamd_thread_func, td, err); +#else + new = g_thread_create (rspamd_thread_func, td, TRUE, err); +#endif + + return new; +} + +guint32 +murmur32_hash (const guint8 *in, gsize len) +{ + + + const guint32 c1 = 0xcc9e2d51; + const guint32 c2 = 0x1b873593; + + const int nblocks = len / 4; + const guint32 *blocks = (const guint32 *)(in); + const guint8 *tail; + guint32 h = 0; + gint i; + guint32 k; + + if (in == NULL || len == 0) { + return 0; + } + + tail = (const guint8 *)(in + (nblocks * 4)); + + for (i = 0; i < nblocks; i++) { + k = blocks[i]; + + k *= c1; + k = (k << 15) | (k >> (32 - 15)); + k *= c2; + + h ^= k; + h = (h << 13) | (h >> (32 - 13)); + h = (h * 5) + 0xe6546b64; + } + + k = 0; + switch (len & 3) { + case 3: + k ^= tail[2] << 16; + case 2: + k ^= tail[1] << 8; + case 1: + k ^= tail[0]; + k *= c1; + k = (k << 13) | (k >> (32 - 15)); + k *= c2; + h ^= k; + }; + + h ^= len; + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +void +murmur128_hash (const guint8 *in, gsize len, guint64 out[]) +{ + const guint64 c1 = 0x87c37b91114253d5ULL; + const guint64 c2 = 0x4cf5ad432745937fULL; + const gint nblocks = len / 16; + const guint64 *blocks = (const guint64 *)(in); + const guint8 *tail; + guint64 h1 = 0; + guint64 h2 = 0; + int i; + guint64 k1, k2; + + if (in == NULL || len == 0 || out == NULL) { + return; + } + + tail = (const guint8 *)(in + (nblocks * 16)); + + for (i = 0; i < nblocks; i++) { + k1 = blocks[i*2+0]; + k2 = blocks[i*2+1]; + + k1 *= c1; + k1 = (k1 << 31) | (k1 >> (64 - 31)); + k1 *= c2; + h1 ^= k1; + + h1 = (h1 << 27) | (h1 >> (64 - 27)); + h1 += h2; + h1 = h1*5+0x52dce729; + + k2 *= c2; + k2 = (k2 << 33) | (k2 >> (64 - 33)); + k2 *= c1; + h2 ^= k2; + + h2 = (h2 << 31) | (h2 >> (64 - 31)); + h2 += h1; + h2 = h2*5+0x38495ab5; + } + + k1 = k2 = 0; + switch (len & 15) { + case 15: + k2 ^= (guint64)(tail[14]) << 48; + case 14: + k2 ^= (guint64)(tail[13]) << 40; + case 13: + k2 ^= (guint64)(tail[12]) << 32; + case 12: + k2 ^= (guint64)(tail[11]) << 24; + case 11: + k2 ^= (guint64)(tail[10]) << 16; + case 10: + k2 ^= (guint64)(tail[ 9]) << 8; + case 9: + k2 ^= (guint64)(tail[ 8]) << 0; + k2 *= c2; + k2 = (k2 << 33) | (k2 >> (64 - 33)); + k2 *= c1; + h2 ^= k2; + + case 8: + k1 ^= (guint64)(tail[ 7]) << 56; + case 7: + k1 ^= (guint64)(tail[ 6]) << 48; + case 6: + k1 ^= (guint64)(tail[ 5]) << 40; + case 5: + k1 ^= (guint64)(tail[ 4]) << 32; + case 4: + k1 ^= (guint64)(tail[ 3]) << 24; + case 3: + k1 ^= (guint64)(tail[ 2]) << 16; + case 2: + k1 ^= (guint64)(tail[ 1]) << 8; + case 1: + k1 ^= (guint64)(tail[ 0]) << 0; + k1 *= c1; + k1 = (k1 << 31) | (k1 >> (64 - 31)); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 ^= h1 >> 33; + h1 *= 0xff51afd7ed558ccdULL; + h1 ^= h1 >> 33; + h1 *= 0xc4ceb9fe1a85ec53ULL; + h1 ^= h1 >> 33; + + h2 ^= h2 >> 33; + h2 *= 0xff51afd7ed558ccdULL; + h2 ^= h2 >> 33; + h2 *= 0xc4ceb9fe1a85ec53ULL; + h2 ^= h2 >> 33; + + h1 += h2; + h2 += h1; + + out[0] = h1; + out[1] = h2; +} + +struct hash_copy_callback_data { + gpointer (*key_copy_func)(gconstpointer data, gpointer ud); + gpointer (*value_copy_func)(gconstpointer data, gpointer ud); + gpointer ud; + GHashTable *dst; +}; + +static void +copy_foreach_callback (gpointer key, gpointer value, gpointer ud) +{ + struct hash_copy_callback_data *cb = ud; + gpointer nkey, nvalue; + + nkey = cb->key_copy_func ? cb->key_copy_func (key, cb->ud) : (gpointer)key; + nvalue = cb->value_copy_func ? cb->value_copy_func (value, cb->ud) : (gpointer)value; + g_hash_table_insert (cb->dst, nkey, nvalue); +} +/** + * Deep copy of one hash table to another + * @param src source hash + * @param dst destination hash + * @param key_copy_func function called to copy or modify keys (or NULL) + * @param value_copy_func function called to copy or modify values (or NULL) + * @param ud user data for copy functions + */ +void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, + gpointer (*key_copy_func)(gconstpointer data, gpointer ud), + gpointer (*value_copy_func)(gconstpointer data, gpointer ud), + gpointer ud) +{ + struct hash_copy_callback_data cb; + if (src != NULL && dst != NULL) { + cb.key_copy_func = key_copy_func; + cb.value_copy_func = value_copy_func; + cb.ud = ud; + cb.dst = dst; + g_hash_table_foreach (src, copy_foreach_callback, &cb); + } +} + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer +rspamd_str_pool_copy (gconstpointer data, gpointer ud) +{ + rspamd_mempool_t *pool = ud; + + return data ? rspamd_mempool_strdup (pool, data) : NULL; +} + +gboolean +parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask) +{ + const char *pos; + char ip_buf[INET_ADDRSTRLEN + 1], mask_buf[3] = { '\0', '\0', '\0' }; + + bzero (ip_buf, sizeof (ip_buf)); + + if ((pos = strchr (line, '/')) != NULL) { + rspamd_strlcpy (ip_buf, line, MIN ((gsize)(pos - line), sizeof (ip_buf))); + rspamd_strlcpy (mask_buf, pos + 1, sizeof (mask_buf)); + } + else { + rspamd_strlcpy (ip_buf, line, sizeof (ip_buf)); + } + + if (!inet_aton (ip_buf, ina)) { + return FALSE; + } + + if (mask_buf[0] != '\0') { + /* Also parse mask */ + *mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0'; + if (*mask > 32) { + return FALSE; + } + } + else { + *mask = 32; + } + + *mask = G_MAXUINT32 << (32 - *mask); + + return TRUE; +} + +static volatile sig_atomic_t saved_signo[NSIG]; + +static +void read_pass_tmp_sig_handler (int s) +{ + + saved_signo[s] = 1; +} + +#ifndef _PATH_TTY +# define _PATH_TTY "/dev/tty" +#endif + +gint +rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key) +{ +#ifdef HAVE_PASSPHRASE_H + gint len = 0; + gchar pass[BUFSIZ]; + + if (readpassphrase ("Enter passphrase: ", buf, size, RPP_ECHO_OFF | RPP_REQUIRE_TTY) == NULL) { + return 0; + } + + return strlen (buf); +#else + struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; + struct sigaction savetstp, savettin, savettou, savepipe; + struct termios term, oterm; + gint input, output, i; + gchar *end, *p, ch; + +restart: + if ((input = output = open (_PATH_TTY, O_RDWR)) == -1) { + errno = ENOTTY; + return 0; + } + if (fcntl (input, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + } + + /* Turn echo off */ + if (tcgetattr (input, &oterm) != 0) { + errno = ENOTTY; + return 0; + } + memcpy(&term, &oterm, sizeof(term)); + term.c_lflag &= ~(ECHO | ECHONL); + (void)tcsetattr(input, TCSAFLUSH, &term); + (void)write (output, "Enter passphrase: ", sizeof ("Enter passphrase: ") - 1); + + /* Save the current sighandler */ + for (i = 0; i < NSIG; i++) { + saved_signo[i] = 0; + } + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = read_pass_tmp_sig_handler; + (void)sigaction (SIGALRM, &sa, &savealrm); + (void)sigaction (SIGHUP, &sa, &savehup); + (void)sigaction (SIGINT, &sa, &saveint); + (void)sigaction (SIGPIPE, &sa, &savepipe); + (void)sigaction (SIGQUIT, &sa, &savequit); + (void)sigaction (SIGTERM, &sa, &saveterm); + (void)sigaction (SIGTSTP, &sa, &savetstp); + (void)sigaction (SIGTTIN, &sa, &savettin); + (void)sigaction (SIGTTOU, &sa, &savettou); + + /* Now read a passphrase */ + p = buf; + end = p + size - 1; + while (read (input, &ch, 1) == 1 && ch != '\n' && ch != '\r') { + if (p < end) { + *p++ = ch; + } + } + *p = '\0'; + (void)write (output, "\n", 1); + + /* Restore terminal state */ + if (memcmp (&term, &oterm, sizeof (term)) != 0) { + while (tcsetattr (input, TCSAFLUSH, &oterm) == -1 && + errno == EINTR && !saved_signo[SIGTTOU]); + } + + /* Restore signal handlers */ + (void)sigaction (SIGALRM, &savealrm, NULL); + (void)sigaction (SIGHUP, &savehup, NULL); + (void)sigaction (SIGINT, &saveint, NULL); + (void)sigaction (SIGQUIT, &savequit, NULL); + (void)sigaction (SIGPIPE, &savepipe, NULL); + (void)sigaction (SIGTERM, &saveterm, NULL); + (void)sigaction (SIGTSTP, &savetstp, NULL); + (void)sigaction (SIGTTIN, &savettin, NULL); + (void)sigaction (SIGTTOU, &savettou, NULL); + + close (input); + + /* Send signals pending */ + for (i = 0; i < NSIG; i++) { + if (saved_signo[i]) { + kill(getpid(), i); + switch (i) { + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + goto restart; + } + } + } + + return p - buf; +#endif +} + +gboolean +rspamd_ip_is_valid (rspamd_inet_addr_t *addr) +{ + const struct in_addr ip4_any = { INADDR_ANY }, ip4_none = { INADDR_NONE }; + const struct in6_addr ip6_any = IN6ADDR_ANY_INIT; + + gboolean ret = FALSE; + + if (G_LIKELY (addr->af == AF_INET)) { + if (memcmp (&addr->addr.s4.sin_addr, &ip4_any, sizeof (struct in_addr)) != 0 && + memcmp (&addr->addr.s4.sin_addr, &ip4_none, + sizeof (struct in_addr)) != 0) { + ret = TRUE; + } + } + else if (G_UNLIKELY (addr->af == AF_INET6)) { + if (memcmp (&addr->addr.s6.sin6_addr, &ip6_any, + sizeof (struct in6_addr)) != 0) { + ret = TRUE; + } + } + + return ret; +} + +/* + * GString ucl emitting functions + */ +static int +rspamd_gstring_append_character (unsigned char c, size_t len, void *ud) +{ + GString *buf = ud; + gsize old_len; + + if (len == 1) { + g_string_append_c (buf, c); + } + else { + if (buf->allocated_len - buf->len <= len) { + old_len = buf->len; + g_string_set_size (buf, buf->len + len + 1); + buf->len = old_len; + } + memset (&buf->str[buf->len], c, len); + buf->len += len; + } + + return 0; +} + +static int +rspamd_gstring_append_len (const unsigned char *str, size_t len, void *ud) +{ + GString *buf = ud; + + g_string_append_len (buf, str, len); + + return 0; +} + +static int +rspamd_gstring_append_int (int64_t val, void *ud) +{ + GString *buf = ud; + + rspamd_printf_gstring (buf, "%L", (intmax_t)val); + return 0; +} + +static int +rspamd_gstring_append_double (double val, void *ud) +{ + GString *buf = ud; + const double delta = 0.0000001; + + if (val == (double)(int)val) { + rspamd_printf_gstring (buf, "%.1f", val); + } + else if (fabs (val - (double)(int)val) < delta) { + /* Write at maximum precision */ + rspamd_printf_gstring (buf, "%.*g", DBL_DIG, val); + } + else { + rspamd_printf_gstring (buf, "%f", val); + } + + return 0; +} + +void +rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target) +{ + struct ucl_emitter_functions func = { + .ucl_emitter_append_character = rspamd_gstring_append_character, + .ucl_emitter_append_len = rspamd_gstring_append_len, + .ucl_emitter_append_int = rspamd_gstring_append_int, + .ucl_emitter_append_double = rspamd_gstring_append_double + }; + + func.ud = target; + ucl_object_emit_full (obj, emit_type, &func); +} + +gint +rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr) +{ + gint nfd, serrno; + socklen_t len = sizeof (addr->addr.ss); + + if ((nfd = accept (sock, &addr->addr.sa, &len)) == -1) { + if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) { + return 0; + } + return -1; + } + + addr->slen = len; + addr->af = addr->addr.sa.sa_family; + + if (make_socket_nonblocking (nfd) < 0) { + goto out; + } + + /* Set close on exec */ + if (fcntl (nfd, F_SETFD, FD_CLOEXEC) == -1) { + msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); + goto out; + } + + return (nfd); + + out: + serrno = errno; + close (nfd); + errno = serrno; + return (-1); + +} + +gboolean +rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src) +{ + gboolean ret = FALSE; + + if (inet_pton (AF_INET6, src, &target->addr.s6.sin6_addr) == 1) { + target->af = AF_INET6; + target->slen = sizeof (target->addr.s6); + ret = TRUE; + } + else if (inet_pton (AF_INET, src, &target->addr.s4.sin_addr) == 1) { + target->af = AF_INET; + target->slen = sizeof (target->addr.s4); + ret = TRUE; + } + + target->addr.sa.sa_family = target->af; + + return ret; +} + +const char* +rspamd_inet_address_to_string (rspamd_inet_addr_t *addr) +{ + static char addr_str[INET6_ADDRSTRLEN + 1]; + + switch (addr->af) { + case AF_INET: + return inet_ntop (addr->af, &addr->addr.s4.sin_addr, addr_str, + sizeof (addr_str)); + case AF_INET6: + return inet_ntop (addr->af, &addr->addr.s6.sin6_addr, addr_str, + sizeof (addr_str)); + case AF_UNIX: + return addr->addr.su.sun_path; + } + + return "undefined"; +} + +uint16_t +rspamd_inet_address_get_port (rspamd_inet_addr_t *addr) +{ + switch (addr->af) { + case AF_INET: + return ntohs (addr->addr.s4.sin_port); + case AF_INET6: + return ntohs (addr->addr.s6.sin6_port); + } + + return 0; +} diff --git a/src/libutil/util.h b/src/libutil/util.h new file mode 100644 index 000000000..0605fe87d --- /dev/null +++ b/src/libutil/util.h @@ -0,0 +1,491 @@ +#ifndef RSPAMD_UTIL_H +#define RSPAMD_UTIL_H + +#include "config.h" +#include "mem_pool.h" +#include "radix.h" +#include "statfile.h" +#include "printf.h" +#include "fstring.h" +#include "ucl.h" + +struct config_file; +struct rspamd_main; +struct workq; +struct statfile; +struct classifier_config; + +/** + * Union that is used for storing sockaddrs + */ +union sa_union { + struct sockaddr_storage ss; + struct sockaddr sa; + struct sockaddr_in s4; + struct sockaddr_in6 s6; + struct sockaddr_un su; +}; + +typedef struct _rspamd_inet_addr_s { + union sa_union addr; + socklen_t slen; + int af; +} rspamd_inet_addr_t; + + +/* + * Create socket and bind or connect it to specified address and port + */ +gint make_tcp_socket (struct addrinfo *, gboolean is_server, gboolean async); +/* + * Create socket and bind or connect it to specified address and port + */ +gint make_udp_socket (struct addrinfo *, gboolean is_server, gboolean async); + +/* + * Create and bind or connect unix socket + */ +gint make_unix_socket (const gchar *, struct sockaddr_un *, gint type, gboolean is_server, gboolean async); + +/** + * Make a universal socket + * @param credits host, ip or path to unix socket + * @param port port (used for network sockets) + * @param type type of socket (SO_STREAM or SO_DGRAM) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +gint make_universal_socket (const gchar *credits, guint16 port, gint type, + gboolean async, gboolean is_server, gboolean try_resolve); + +/** + * Make a universal sockets + * @param credits host, ip or path to unix socket (several items may be separated by ',') + * @param port port (used for network sockets) + * @param type type of socket (SO_STREAM or SO_DGRAM) + * @param async make this socket asynced + * @param is_server make this socket as server socket + * @param try_resolve try name resolution for a socket (BLOCKING) + */ +GList* make_universal_sockets_list (const gchar *credits, guint16 port, gint type, + gboolean async, gboolean is_server, gboolean try_resolve); +/* + * Create socketpair + */ +gint make_socketpair (gint pair[2]); + +/* + * Write pid to file + */ +gint write_pid (struct rspamd_main *); + +/* + * Make specified socket non-blocking + */ +gint make_socket_nonblocking (gint); +/* + * Make specified socket blocking + */ +gint make_socket_blocking (gint); + +/* + * Poll a sync socket for specified events + */ +gint poll_sync_socket (gint fd, gint timeout, short events); + +/* + * Init signals + */ +#ifdef HAVE_SA_SIGINFO +void init_signals (struct sigaction *sa, void (*sig_handler)(gint, siginfo_t *, void *)); +#else +void init_signals (struct sigaction *sa, void (*sig_handler)(gint)); +#endif + +/* + * Send specified signal to each worker + */ +void pass_signal_worker (GHashTable *, gint ); +/* + * Convert string to lowercase + */ +void convert_to_lowercase (gchar *str, guint size); + +#ifndef HAVE_SETPROCTITLE +/* + * Process title utility functions + */ +gint init_title(gint argc, gchar *argv[], gchar *envp[]); +gint setproctitle(const gchar *fmt, ...); +#endif + +#ifndef HAVE_PIDFILE +/* + * Pidfile functions from FreeBSD libutil code + */ +typedef struct rspamd_pidfh_s { + gint pf_fd; +#ifdef HAVE_PATH_MAX + gchar pf_path[PATH_MAX + 1]; +#elif defined(HAVE_MAXPATHLEN) + gchar pf_path[MAXPATHLEN + 1]; +#else + gchar pf_path[1024 + 1]; +#endif + dev_t pf_dev; + ino_t pf_ino; +} rspamd_pidfh_t; +rspamd_pidfh_t *rspamd_pidfile_open(const gchar *path, mode_t mode, pid_t *pidptr); +gint rspamd_pidfile_write(rspamd_pidfh_t *pfh); +gint rspamd_pidfile_close(rspamd_pidfh_t *pfh); +gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh); +#else +typedef struct pidfh rspamd_pidfh_t; +#define rspamd_pidfile_open pidfile_open +#define rspamd_pidfile_write pidfile_write +#define rspamd_pidfile_close pidfile_close +#define rspamd_pidfile_remove pidfile_remove +#endif + +/* + * Replace %r with rcpt value and %f with from value, new string is allocated in pool + */ +gchar* resolve_stat_filename (rspamd_mempool_t *pool, gchar *pattern, gchar *rcpt, gchar *from); +#ifdef HAVE_CLOCK_GETTIME +/* + * Calculate check time with specified resolution of timer + */ +const gchar* calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_ms); +#else +const gchar* calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_ms); +#endif + +/* + * File locking functions + */ +gboolean lock_file (gint fd, gboolean async); +gboolean unlock_file (gint fd, gboolean async); + +/* + * Hash table utility functions for case insensitive hashing + */ +guint rspamd_strcase_hash (gconstpointer key); +gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); + +/* + * Hash table utility functions for case sensitive hashing + */ +guint rspamd_str_hash (gconstpointer key); +gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2); + + +/* + * Hash table utility functions for hashing fixed strings + */ +guint fstr_strcase_hash (gconstpointer key); +gboolean fstr_strcase_equal (gconstpointer v, gconstpointer v2); + +/* + * Google perf-tools initialization function + */ +void gperf_profiler_init (struct config_file *cfg, const gchar *descr); + +/* + * Workarounds for older versions of glib + */ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) +void g_ptr_array_unref (GPtrArray *array); +#endif +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) +void g_queue_clear (GQueue *queue); +#endif + + +/** + * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not + * traverse the whole string and it is possible to use it for non NULL terminated strings. This is + * more like memccpy(dst, src, size, '\0') + * + * @param dst destination string + * @param src source string + * @param siz length of destination buffer + * @return bytes copied + */ +gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz); + +/** + * Lowercase strlcpy variant + * @param dst + * @param src + * @param siz + * @return + */ +gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz); + +/* + * Convert milliseconds to timeval fields + */ +#define msec_to_tv(msec, tv) do { (tv)->tv_sec = (msec) / 1000; (tv)->tv_usec = ((msec) - (tv)->tv_sec * 1000) * 1000; } while(0) +#define double_to_tv(dbl, tv) do { (tv)->tv_sec = (int)(dbl); (tv)->tv_usec = ((dbl) - (int)(dbl))*1000*1000; } while(0) +#define tv_to_msec(tv) (tv)->tv_sec * 1000 + (tv)->tv_usec / 1000 + +/* Compare two emails for building emails tree */ +gint compare_email_func (gconstpointer a, gconstpointer b); + +/* Compare two urls for building emails tree */ +gint compare_url_func (gconstpointer a, gconstpointer b); + +/* + * Find string find in string s ignoring case + */ +gchar* rspamd_strncasestr (const gchar *s, const gchar *find, gint len); + +/* + * Try to convert string of length to long + */ +gboolean rspamd_strtol (const gchar *s, gsize len, glong *value); + +/* + * Try to convert string of length to unsigned long + */ +gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value); + +/** + * Try to allocate a file on filesystem (using fallocate or posix_fallocate) + * @param fd descriptor + * @param offset offset of file + * @param len length to allocate + * @return -1 in case of failure + */ +gint rspamd_fallocate (gint fd, off_t offset, off_t len); + +/** + * Return worker's control structure by its type + * @param type + * @return worker's control structure or NULL + */ +extern worker_t* get_worker_by_type (GQuark type); + +/** + * Utils for working with threads to be compatible with all glib versions + */ +typedef struct rspamd_mutex_s { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + GMutex mtx; +#else + GStaticMutex mtx; +#endif +} rspamd_mutex_t; + +typedef struct rspamd_rwlock_s { +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + GRWLock rwlock; +#else + GStaticRWLock rwlock; +#endif +} rspamd_rwlock_t; + + +/** + * Create new mutex + * @return mutex or NULL + */ +rspamd_mutex_t* rspamd_mutex_new (void); + +/** + * Lock mutex + * @param mtx + */ +void rspamd_mutex_lock (rspamd_mutex_t *mtx); + +/** + * Unlock mutex + * @param mtx + */ +void rspamd_mutex_unlock (rspamd_mutex_t *mtx); + +/** + * Clear rspamd mutex + * @param mtx + */ +void rspamd_mutex_free (rspamd_mutex_t *mtx); + +/** + * Create new rwloc + * @return + */ +rspamd_rwlock_t* rspamd_rwlock_new (void); + +/** + * Lock rwlock for writing + * @param mtx + */ +void rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx); + +/** + * Lock rwlock for reading + * @param mtx + */ +void rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx); + +/** + * Unlock rwlock from writing + * @param mtx + */ +void rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx); + +/** + * Unlock rwlock from reading + * @param mtx + */ +void rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx); + +/** + * Free rwlock + * @param mtx + */ +void rspamd_rwlock_free (rspamd_rwlock_t *mtx); + +static inline void +rspamd_cond_wait (GCond *cond, rspamd_mutex_t *mtx) +{ +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + g_cond_wait (cond, &mtx->mtx); +#else + g_cond_wait (cond, g_static_mutex_get_mutex (&mtx->mtx)); +#endif +} + +/** + * Create new named thread + * @param name name pattern + * @param func function to start + * @param data data to pass to function + * @param err error pointer + * @return new thread object that can be joined + */ +GThread* rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err); + +/** + * Return 32bit murmur hash value for specified input + * @param in input data + * @param len length of the input data + * @code + * MurmurHash3 was created by Austin Appleby in 2008. The cannonical + * implementations are in C++ and placed in the public. + * + * https://sites.google.com/site/murmurhash/ + * + * Seungyoung Kim has ported it's cannonical implementation to C language + * in 2012 and published it as a part of qLibc component. + * @endcode + * @return + */ +guint32 murmur32_hash (const guint8 *in, gsize len); + +/** + * Return 32bit murmur hash value for specified input + * @param in input data + * @param len length of the input data + * @param out array of 2 guint64 variables + * @code + * MurmurHash3 was created by Austin Appleby in 2008. The cannonical + * implementations are in C++ and placed in the public. + * + * https://sites.google.com/site/murmurhash/ + * + * Seungyoung Kim has ported it's cannonical implementation to C language + * in 2012 and published it as a part of qLibc component. + * @endcode + * @return + */ +void murmur128_hash (const guint8 *in, gsize len, guint64 out[]); + +/** + * Deep copy of one hash table to another + * @param src source hash + * @param dst destination hash + * @param key_copy_func function called to copy or modify keys (or NULL) + * @param value_copy_func function called to copy or modify values (or NULL) + * @param ud user data for copy functions + */ +void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, + gpointer (*key_copy_func)(gconstpointer data, gpointer ud), + gpointer (*value_copy_func)(gconstpointer data, gpointer ud), + gpointer ud); + +/** + * Utility function to provide mem_pool copy for rspamd_hash_table_copy function + * @param data string to copy + * @param ud memory pool to use + * @return + */ +gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud); + +/** + * Parse ipv4 address with optional mask in CIDR format + * @param line cidr notation of ipv4 address + * @param ina destination address + * @param mask destination mask + * @return + */ +gboolean parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask); + +/** + * Read passphrase from tty + * @param buf buffer to fill with a password + * @param size size of the buffer + * @param rwflag unused flag + * @param key unused key + * @return size of password read + */ +gint rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key); + +/** + * Check whether specified ip is valid (not INADDR_ANY or INADDR_NONE) for ipv4 or ipv6 + * @param ptr pointer to struct in_addr or struct in6_addr + * @param af address family (AF_INET or AF_INET6) + * @return TRUE if the address is valid + */ +gboolean rspamd_ip_is_valid (rspamd_inet_addr_t *addr); + +/** + * Emit UCL object to gstring + * @param obj object to emit + * @param emit_type emitter type + * @param target target string + */ +void rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target); + +/** + * Accept from listening socket filling addr structure + * @param sock listening socket + * @param addr + * @return + */ +gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr); + +/** + * Try to parse address from string + * @param target target to fill + * @param src IP string representation + * @return TRUE if addr has been parsed + */ +gboolean rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src); + +/** + * Returns string representation of inet address + * @param addr + * @return statically allocated string pointer (not thread safe) + */ +const char* rspamd_inet_address_to_string (rspamd_inet_addr_t *addr); + +/** + * Returns port number for the specified inet address in host byte order + * @param addr + * @return + */ +uint16_t rspamd_inet_address_get_port (rspamd_inet_addr_t *addr); + +#endif diff --git a/src/logger.c b/src/logger.c deleted file mode 100644 index 01814d24d..000000000 --- a/src/logger.c +++ /dev/null @@ -1,769 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "config.h" -#include "logger.h" -#include "util.h" -#include "main.h" -#include "map.h" - -/* How much message should be repeated before it is count to be repeated one */ -#define REPEATS_MIN 3 -#define REPEATS_MAX 300 - -/** - * Static structure that store logging parameters - * It is NOT shared between processes and is created by main process - */ -struct rspamd_logger_s { - rspamd_log_func_t log_func; - struct config_file *cfg; - struct { - guint32 size; - guint32 used; - u_char *buf; - } io_buf; - gint fd; - gboolean is_buffered; - gboolean enabled; - gboolean is_debug; - gboolean throttling; - time_t throttling_time; - sig_atomic_t do_reopen_log; - enum rspamd_log_type type; - pid_t pid; - GQuark process_type; - radix_tree_t *debug_ip; - guint32 last_line_cksum; - guint32 repeats; - gchar *saved_message; - gchar *saved_function; - GMutex *mtx; -}; - -static const gchar lf_chr = '\n'; - -static rspamd_logger_t *default_logger = NULL; - - -static void -syslog_log_function (const gchar * log_domain, const gchar *function, - GLogLevelFlags log_level, const gchar * message, - gboolean forced, gpointer arg); -static void -file_log_function (const gchar * log_domain, const gchar *function, - GLogLevelFlags log_level, const gchar * message, - gboolean forced, gpointer arg); - -/** - * Calculate checksum for log line (used for repeating logic) - */ -static inline guint32 -rspamd_log_calculate_cksum (const gchar *message, size_t mlen) -{ - const gchar *bp = message; - const gchar *be = bp + mlen; - guint32 hval = 0; - - while (bp < be) { - hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); - hval ^= (guint32)*bp++; - } - - /* return our new hash value */ - return hval; - -} - -/* - * Write a line to log file (unbuffered) - */ -static void -direct_write_log_line (rspamd_logger_t *rspamd_log, void *data, gint count, gboolean is_iov) -{ - gchar errmsg[128]; - struct iovec *iov; - const gchar *line; - gint r; - - if (rspamd_log->enabled) { - if (is_iov) { - iov = (struct iovec *)data; - r = writev (rspamd_log->fd, iov, count); - } - else { - line = (const gchar *)data; - r = write (rspamd_log->fd, line, count); - } - if (r == -1) { - /* We cannot write message to file, so we need to detect error and make decision */ - r = rspamd_snprintf (errmsg, sizeof (errmsg), "direct_write_log_line: cannot write log line: %s", strerror (errno)); - if (errno == EIO || errno == EINTR) { - /* Descriptor is somehow invalid, try to restart */ - reopen_log (rspamd_log); - if (write (rspamd_log->fd, errmsg, r) != -1) { - /* Try again */ - direct_write_log_line (rspamd_log, data, count, is_iov); - } - } - else if (errno == EFAULT || errno == EINVAL || errno == EFBIG || errno == ENOSPC) { - /* Rare case */ - rspamd_log->throttling = TRUE; - rspamd_log->throttling_time = time (NULL); - } - else if (errno == EPIPE || errno == EBADF) { - /* We write to some pipe and it disappears, disable logging or we has opened bad file descriptor */ - rspamd_log->enabled = FALSE; - } - } - else if (rspamd_log->throttling) { - rspamd_log->throttling = FALSE; - } - } -} - -static void -rspamd_escape_log_string (gchar *str) -{ - guchar *p = (guchar *)str; - - while (*p) { - if ((*p & 0x80) || !g_ascii_isprint (*p)) { - *p = '?'; - } - else if (*p == '\n' || *p == '\r') { - *p = ' '; - } - p ++; - } -} - -/* Logging utility functions */ -gint -open_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) -{ - switch (rspamd_log->cfg->log_type) { - case RSPAMD_LOG_CONSOLE: - /* Do nothing with console */ - rspamd_log->enabled = TRUE; - return 0; - case RSPAMD_LOG_SYSLOG: - openlog ("rspamd", LOG_NDELAY | LOG_PID, rspamd_log->cfg->log_facility); - rspamd_log->enabled = TRUE; - return 0; - case RSPAMD_LOG_FILE: - rspamd_log->fd = open (rspamd_log->cfg->log_file, O_CREAT | O_WRONLY | O_APPEND, - S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); - if (rspamd_log->fd == -1) { - fprintf (stderr, "open_log: cannot open desired log file: %s, %s", - rspamd_log->cfg->log_file, strerror (errno)); - return -1; - } - if (fchown (rspamd_log->fd, uid, gid) == -1) { - fprintf (stderr, "open_log: cannot chown desired log file: %s, %s", - rspamd_log->cfg->log_file, strerror (errno)); - close (rspamd_log->fd); - return -1; - } - rspamd_log->enabled = TRUE; - return 0; - } - return -1; -} - -void -close_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) -{ - gchar tmpbuf[256]; - flush_log_buf (rspamd_log); - - switch (rspamd_log->type) { - case RSPAMD_LOG_CONSOLE: - /* Do nothing special */ - break; - case RSPAMD_LOG_SYSLOG: - closelog (); - break; - case RSPAMD_LOG_FILE: - if (rspamd_log->enabled) { - if (rspamd_log->repeats > REPEATS_MIN) { - rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); - rspamd_log->repeats = 0; - if (rspamd_log->saved_message) { - file_log_function (NULL, rspamd_log->saved_function, rspamd_log->cfg->log_level, rspamd_log->saved_message, TRUE, rspamd_log); - g_free (rspamd_log->saved_message); - g_free (rspamd_log->saved_function); - rspamd_log->saved_message = NULL; - rspamd_log->saved_function = NULL; - } - /* It is safe to use temporary buffer here as it is not static */ - file_log_function (NULL, __FUNCTION__, rspamd_log->cfg->log_level, tmpbuf, TRUE, rspamd_log); - return; - } - - if (fsync (rspamd_log->fd) == -1) { - msg_err ("error syncing log file: %s", strerror (errno)); - } - close (rspamd_log->fd); - } - break; - } - - rspamd_log->enabled = FALSE; -} - -gint -reopen_log_priv (rspamd_logger_t *rspamd_log, uid_t uid, gid_t gid) -{ - close_log_priv (rspamd_log, uid, gid); - if (open_log_priv (rspamd_log, uid, gid) == 0) { - msg_info ("log file reopened"); - return 0; - } - - return -1; -} - -/** - * Open log file or initialize other structures - */ -gint -open_log (rspamd_logger_t *logger) -{ - return open_log_priv (logger, -1, -1); -} -/** - * Close log file or destroy other structures - */ -void -close_log (rspamd_logger_t *logger) -{ - close_log_priv (logger, -1, -1); -} -/** - * Close and open log again - */ -gint -reopen_log (rspamd_logger_t *logger) -{ - return reopen_log_priv (logger, -1, -1); -} - -/* - * Setup logger - */ -void -rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *rspamd) -{ - gchar **strvec, *p, *err; - gint num, i, k; - struct in_addr addr; - guint32 mask = 0xFFFFFFFF; - - if (rspamd->logger == NULL) { - rspamd->logger = g_malloc (sizeof (rspamd_logger_t)); - memset (rspamd->logger, 0, sizeof (rspamd_logger_t)); - } - - rspamd->logger->type = cfg->log_type; - rspamd->logger->pid = getpid (); - rspamd->logger->process_type = ptype; - -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) - rspamd->logger->mtx = g_mutex_new (); -#else - rspamd->logger->mtx = g_malloc (sizeof (GMutex)); - g_mutex_init (rspamd->logger->mtx); -#endif - - switch (cfg->log_type) { - case RSPAMD_LOG_CONSOLE: - rspamd->logger->log_func = file_log_function; - rspamd->logger->fd = STDERR_FILENO; - break; - case RSPAMD_LOG_SYSLOG: - rspamd->logger->log_func = syslog_log_function; - break; - case RSPAMD_LOG_FILE: - rspamd->logger->log_func = file_log_function; - break; - } - - rspamd->logger->cfg = cfg; - /* Set up buffer */ - if (rspamd->cfg->log_buffered) { - if (rspamd->cfg->log_buf_size != 0) { - rspamd->logger->io_buf.size = rspamd->cfg->log_buf_size; - } - else { - rspamd->logger->io_buf.size = BUFSIZ; - } - rspamd->logger->is_buffered = TRUE; - rspamd->logger->io_buf.buf = g_malloc (rspamd->logger->io_buf.size); - } - /* Set up conditional logging */ - if (rspamd->cfg->debug_ip_map != NULL) { - /* Try to add it as map first of all */ - if (rspamd->logger->debug_ip) { - radix_tree_free (rspamd->logger->debug_ip); - } - rspamd->logger->debug_ip = radix_tree_create (); - if (!add_map (rspamd->cfg, rspamd->cfg->debug_ip_map, "IP addresses for which debug logs are enabled", - read_radix_list, fin_radix_list, (void **)&rspamd->logger->debug_ip)) { - /* Try to parse it as list */ - strvec = g_strsplit_set (rspamd->cfg->debug_ip_map, ",; ", 0); - num = g_strv_length (strvec); - - for (i = 0; i < num; i++) { - g_strstrip (strvec[i]); - - if ((p = strchr (strvec[i], '/')) != NULL) { - /* Try to extract mask */ - *p = '\0'; - p ++; - errno = 0; - k = strtoul (p, &err, 10); - if (errno != 0 || *err != '\0' || k > 32) { - continue; - } - } - else { - k = 32; - } - if (inet_aton (strvec[i], &addr)) { - /* Check ip */ - mask = mask << (32 - k); - radix32tree_insert (rspamd->logger->debug_ip, ntohl (addr.s_addr), mask, 1); - } - } - g_strfreev (strvec); - } - } - else if (rspamd->logger->debug_ip) { - radix_tree_free (rspamd->logger->debug_ip); - rspamd->logger->debug_ip = NULL; - } - - default_logger = rspamd->logger; -} - -/** - * Used after fork() for updating structure params - */ -void -update_log_pid (GQuark ptype, rspamd_logger_t *rspamd_log) -{ - rspamd_log->pid = getpid (); - rspamd_log->process_type = ptype; -} - -/** - * Flush logging buffer - */ -void -flush_log_buf (rspamd_logger_t *rspamd_log) -{ - if (rspamd_log->is_buffered && (rspamd_log->type == RSPAMD_LOG_CONSOLE || rspamd_log->type == RSPAMD_LOG_FILE)) { - direct_write_log_line (rspamd_log, rspamd_log->io_buf.buf, rspamd_log->io_buf.used, FALSE); - rspamd_log->io_buf.used = 0; - } -} - - -void -rspamd_common_logv (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, const gchar *function, - const gchar *fmt, va_list args) -{ - static gchar logbuf[BUFSIZ]; - u_char *end; - - if (rspamd_log == NULL) { - rspamd_log = default_logger; - } - - if (rspamd_log == NULL) { - /* Just fprintf message to stderr */ - if (log_level >= G_LOG_LEVEL_INFO) { - end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); - *end = '\0'; - rspamd_escape_log_string (logbuf); - fprintf (stderr, "%s\n", logbuf); - } - } - else if (log_level <= rspamd_log->cfg->log_level) { - g_mutex_lock (rspamd_log->mtx); - end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, args); - *end = '\0'; - rspamd_escape_log_string (logbuf); - rspamd_log->log_func (NULL, function, log_level, logbuf, FALSE, rspamd_log); - g_mutex_unlock (rspamd_log->mtx); - } -} - -/** - * This log functions select real logger and write message if level is less or equal to configured log level - */ -void -rspamd_common_log_function (rspamd_logger_t *rspamd_log, GLogLevelFlags log_level, - const gchar *function, const gchar *fmt, ...) -{ - va_list vp; - - va_start (vp, fmt); - rspamd_common_logv (rspamd_log, log_level, function, fmt, vp); - va_end (vp); -} - -void -rspamd_default_logv (GLogLevelFlags log_level, const gchar *function, - const gchar *fmt, va_list args) -{ - rspamd_common_logv (NULL, log_level, function, fmt, args); -} - -void -rspamd_default_log_function (GLogLevelFlags log_level, - const gchar *function, const gchar *fmt, ...) -{ - - va_list vp; - - va_start (vp, fmt); - rspamd_default_logv (log_level, function, fmt, vp); - va_end (vp); -} - - -/** - * Fill buffer with message (limits must be checked BEFORE this call) - */ -static void -fill_buffer (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt) -{ - gint i; - - for (i = 0; i < iovcnt; i ++) { - memcpy (rspamd_log->io_buf.buf + rspamd_log->io_buf.used, iov[i].iov_base, iov[i].iov_len); - rspamd_log->io_buf.used += iov[i].iov_len; - } - -} - -/* - * Write message to buffer or to file (using direct_write_log_line function) - */ -static void -file_log_helper (rspamd_logger_t *rspamd_log, const struct iovec *iov, gint iovcnt) -{ - size_t len = 0; - gint i; - - if (! rspamd_log->is_buffered) { - /* Write string directly */ - direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE); - } - else { - /* Calculate total length */ - for (i = 0; i < iovcnt; i ++) { - len += iov[i].iov_len; - } - /* Fill buffer */ - if (rspamd_log->io_buf.size < len) { - /* Buffer is too small to hold this string, so write it dirrectly */ - flush_log_buf (rspamd_log); - direct_write_log_line (rspamd_log, (void *)iov, iovcnt, TRUE); - } - else if (rspamd_log->io_buf.used + len >= rspamd_log->io_buf.size) { - /* Buffer is full, try to write it dirrectly */ - flush_log_buf (rspamd_log); - fill_buffer (rspamd_log, iov, iovcnt); - } - else { - /* Copy incoming string to buffer */ - fill_buffer (rspamd_log, iov, iovcnt); - } - } -} - -/** - * Syslog interface for logging - */ -static void -syslog_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg) -{ - rspamd_logger_t *rspamd_log = arg; - - if (! rspamd_log->enabled) { - return; - } - if (function == NULL) { - if (forced || log_level <= rspamd_log->cfg->log_level) { - if (forced || log_level >= G_LOG_LEVEL_DEBUG) { - syslog (LOG_DEBUG, "%s", message); - } - else if (log_level >= G_LOG_LEVEL_INFO) { - syslog (LOG_INFO, "%s", message); - } - else if (log_level >= G_LOG_LEVEL_WARNING) { - syslog (LOG_WARNING, "%s", message); - } - else if (log_level >= G_LOG_LEVEL_CRITICAL) { - syslog (LOG_ERR, "%s", message); - } - } - } - else { - if (forced || log_level <= rspamd_log->cfg->log_level) { - if (log_level >= G_LOG_LEVEL_DEBUG) { - syslog (LOG_DEBUG, "%s: %s", function, message); - } - else if (log_level >= G_LOG_LEVEL_INFO) { - syslog (LOG_INFO, "%s: %s", function, message); - } - else if (log_level >= G_LOG_LEVEL_WARNING) { - syslog (LOG_WARNING, "%s: %s", function, message); - } - else if (log_level >= G_LOG_LEVEL_CRITICAL) { - syslog (LOG_ERR, "%s: %s", function, message); - } - } - } -} - -/** - * Main file interface for logging - */ -static void -file_log_function (const gchar * log_domain, const gchar *function, GLogLevelFlags log_level, const gchar * message, gboolean forced, gpointer arg) -{ - gchar tmpbuf[256], timebuf[32]; - time_t now; - struct tm *tms; - struct iovec iov[4]; - gint r = 0; - guint32 cksum; - size_t mlen; - const gchar *cptype = NULL; - gboolean got_time = FALSE; - rspamd_logger_t *rspamd_log = arg; - - if (! rspamd_log->enabled) { - return; - } - - - if (forced || log_level <= rspamd_log->cfg->log_level) { - /* Check throttling due to write errors */ - if (rspamd_log->throttling) { - now = time (NULL); - if (rspamd_log->throttling_time != now) { - rspamd_log->throttling_time = now; - got_time = TRUE; - } - else { - /* Do not try to write to file too often while throttling */ - return; - } - } - /* Check repeats */ - mlen = strlen (message); - cksum = rspamd_log_calculate_cksum (message, mlen); - if (cksum == rspamd_log->last_line_cksum) { - rspamd_log->repeats ++; - if (rspamd_log->repeats > REPEATS_MIN && rspamd_log->repeats < REPEATS_MAX) { - /* Do not log anything */ - if (rspamd_log->saved_message == 0) { - rspamd_log->saved_message = g_strdup (message); - rspamd_log->saved_function = g_strdup (function); - } - return; - } - else if (rspamd_log->repeats > REPEATS_MAX) { - rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); - rspamd_log->repeats = 0; - /* It is safe to use temporary buffer here as it is not static */ - if (rspamd_log->saved_message) { - file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg); - } - file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg); - file_log_function (log_domain, function, log_level, message, forced, arg); - rspamd_log->repeats = REPEATS_MIN + 1; - return; - } - } - else { - /* Reset counter if new message differs from saved message */ - rspamd_log->last_line_cksum = cksum; - if (rspamd_log->repeats > REPEATS_MIN) { - rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "Last message repeated %ud times", rspamd_log->repeats); - rspamd_log->repeats = 0; - if (rspamd_log->saved_message) { - file_log_function (log_domain, rspamd_log->saved_function, log_level, rspamd_log->saved_message, forced, arg); - g_free (rspamd_log->saved_message); - g_free (rspamd_log->saved_function); - rspamd_log->saved_message = NULL; - rspamd_log->saved_function = NULL; - } - file_log_function (log_domain, __FUNCTION__, log_level, tmpbuf, forced, arg); - /* It is safe to use temporary buffer here as it is not static */ - file_log_function (log_domain, function, log_level, message, forced, arg); - return; - } - else { - rspamd_log->repeats = 0; - } - } - - if (rspamd_log->cfg->log_extended) { - if (! got_time) { - now = time (NULL); - } - - /* Format time */ - tms = localtime (&now); - - strftime (timebuf, sizeof (timebuf), "%F %H:%M:%S", tms); - cptype = g_quark_to_string (rspamd_log->process_type); - - if (rspamd_log->cfg->log_color) { - if (log_level >= G_LOG_LEVEL_INFO) { - /* White */ - r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;37m"); - } - else if (log_level >= G_LOG_LEVEL_WARNING) { - /* Magenta */ - r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[2;32m"); - } - else if (log_level >= G_LOG_LEVEL_CRITICAL) { - /* Red */ - r = rspamd_snprintf (tmpbuf, sizeof (tmpbuf), "\033[1;31m"); - } - } - else { - r = 0; - } - if (function == NULL) { - r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) - r, "%s #%P(%s) ", timebuf, rspamd_log->pid, cptype); - } - else { - r += rspamd_snprintf (tmpbuf + r, sizeof (tmpbuf) -r, "%s #%P(%s) %s: ", timebuf, rspamd_log->pid, cptype, function); - } - /* Construct IOV for log line */ - iov[0].iov_base = tmpbuf; - iov[0].iov_len = r; - iov[1].iov_base = (void *)message; - iov[1].iov_len = mlen; - iov[2].iov_base = (void *)&lf_chr; - iov[2].iov_len = 1; - if (rspamd_log->cfg->log_color) { - iov[3].iov_base = "\033[0m"; - iov[3].iov_len = sizeof ("\033[0m") - 1; - /* Call helper (for buffering) */ - file_log_helper (rspamd_log, iov, 4); - } - else { - /* Call helper (for buffering) */ - file_log_helper (rspamd_log, iov, 3); - } - } - else { - iov[0].iov_base = (void *)message; - iov[0].iov_len = mlen; - iov[1].iov_base = (void *)&lf_chr; - iov[1].iov_len = 1; - if (rspamd_log->cfg->log_color) { - iov[2].iov_base = "\033[0m"; - iov[2].iov_len = sizeof ("\033[0m") - 1; - /* Call helper (for buffering) */ - file_log_helper (rspamd_log, iov, 3); - } - else { - /* Call helper (for buffering) */ - file_log_helper (rspamd_log, iov, 2); - } - } - } -} - -/** - * Write log line depending on ip - */ -void -rspamd_conditional_debug (rspamd_logger_t *rspamd_log, - rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...) -{ - static gchar logbuf[BUFSIZ]; - va_list vp; - u_char *end; - - if (rspamd_log->cfg->log_level >= G_LOG_LEVEL_DEBUG || rspamd_log->is_debug) { - if (rspamd_log->debug_ip && addr != NULL) { - if (addr->af == AF_INET && radix32tree_find (rspamd_log->debug_ip, - ntohl (addr->addr.s4.sin_addr.s_addr)) == RADIX_NO_VALUE) { - return; - } - } - g_mutex_lock (rspamd_log->mtx); - va_start (vp, fmt); - end = rspamd_vsnprintf (logbuf, sizeof (logbuf), fmt, vp); - *end = '\0'; - rspamd_escape_log_string (logbuf); - va_end (vp); - rspamd_log->log_func (NULL, function, G_LOG_LEVEL_DEBUG, logbuf, TRUE, rspamd_log); - g_mutex_unlock (rspamd_log->mtx); - } -} -/** - * Wrapper for glib logger - */ -void -rspamd_glib_log_function (const gchar *log_domain, GLogLevelFlags log_level, const gchar *message, gpointer arg) -{ - rspamd_logger_t *rspamd_log = arg; - - if (rspamd_log->enabled) { - g_mutex_lock (rspamd_log->mtx); - rspamd_log->log_func (log_domain, NULL, log_level, message, FALSE, rspamd_log); - g_mutex_unlock (rspamd_log->mtx); - } -} - -/** - * Temporary turn on debugging - */ -void -rspamd_log_debug (rspamd_logger_t *rspamd_log) -{ - rspamd_log->is_debug = TRUE; -} - -/** - * Turn off temporary debugging - */ -void -rspamd_log_nodebug (rspamd_logger_t *rspamd_log) -{ - rspamd_log->is_debug = FALSE; -} diff --git a/src/logger.h b/src/logger.h deleted file mode 100644 index b0766b938..000000000 --- a/src/logger.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef RSPAMD_LOGGER_H -#define RSPAMD_LOGGER_H - -#include "config.h" -#include "cfg_file.h" -#include "radix.h" -#include "util.h" - - -typedef void (*rspamd_log_func_t)(const gchar * log_domain, const gchar *function, - GLogLevelFlags log_level, const gchar * message, - gboolean forced, gpointer arg); - -typedef struct rspamd_logger_s rspamd_logger_t; -/** - * Init logger - */ -void rspamd_set_logger (struct config_file *cfg, GQuark ptype, struct rspamd_main *main); -/** - * Open log file or initialize other structures - */ -gint open_log (rspamd_logger_t *logger); -/** - * Close log file or destroy other structures - */ -void close_log (rspamd_logger_t *logger); -/** - * Close and open log again - */ -gint reopen_log (rspamd_logger_t *logger); - -/** - * Open log file or initialize other structures for privileged processes - */ -gint open_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); -/** - * Close log file or destroy other structures for privileged processes - */ -void close_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); -/** - * Close and open log again for privileged processes - */ -gint reopen_log_priv (rspamd_logger_t *logger, uid_t uid, gid_t gid); - -/** - * Set log pid - */ -void update_log_pid (GQuark ptype, rspamd_logger_t *logger); - -/** - * Flush log buffer for some types of logging - */ -void flush_log_buf (rspamd_logger_t *logger); -/** - * Log function that is compatible for glib messages - */ -void rspamd_glib_log_function (const gchar *log_domain, - GLogLevelFlags log_level, const gchar *message, gpointer arg); - -/** - * Function with variable number of arguments support - */ -void rspamd_common_log_function (rspamd_logger_t *logger, - GLogLevelFlags log_level, const gchar *function, const gchar *fmt, ...); - -void rspamd_common_logv (rspamd_logger_t *logger, - GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args); - -/** - * Conditional debug function - */ -void rspamd_conditional_debug (rspamd_logger_t *logger, - rspamd_inet_addr_t *addr, const gchar *function, const gchar *fmt, ...) ; - -/** - * Function with variable number of arguments support that uses static default logger - */ -void rspamd_default_log_function (GLogLevelFlags log_level, const gchar *function, - const gchar *fmt, ...); - -/** - * Varargs version of default log function - * @param log_level - * @param function - * @param fmt - * @param args - */ -void rspamd_default_logv (GLogLevelFlags log_level, const gchar *function, const gchar *fmt, va_list args); - -/** - * Temporary turn on debug - */ -void rspamd_log_debug (rspamd_logger_t *logger); - -/** - * Turn off debug - */ -void rspamd_log_nodebug (rspamd_logger_t *logger); - -/* Typical functions */ - -/* Logging in postfix style */ -#if defined(RSPAMD_MAIN) -#define msg_err(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__) -#define msg_warn(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__) -#define msg_info(...) rspamd_common_log_function(rspamd_main->logger, G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__) -#define msg_debug(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__) -#define debug_task(...) rspamd_conditional_debug(rspamd_main->logger, &task->from_addr, __FUNCTION__, __VA_ARGS__) -#else -#define msg_err(...) rspamd_default_log_function(G_LOG_LEVEL_CRITICAL, __FUNCTION__, __VA_ARGS__) -#define msg_warn(...) rspamd_default_log_function(G_LOG_LEVEL_WARNING, __FUNCTION__, __VA_ARGS__) -#define msg_info(...) rspamd_default_log_function(G_LOG_LEVEL_INFO, __FUNCTION__, __VA_ARGS__) -#define msg_debug(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__) -#define debug_task(...) rspamd_default_log_function(G_LOG_LEVEL_DEBUG, __FUNCTION__, __VA_ARGS__) -#endif - -#endif diff --git a/src/map.c b/src/map.c deleted file mode 100644 index 703622585..000000000 --- a/src/map.c +++ /dev/null @@ -1,1148 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Implementation of map files handling - */ -#include "config.h" -#include "map.h" -#include "http.h" -#include "main.h" -#include "util.h" -#include "mem_pool.h" - -static const gchar *hash_fill = "1"; - -/* Http reply */ -struct http_reply { - gint code; - GHashTable *headers; - gchar *cur_header; - gint parser_state; -}; - -struct http_callback_data { - struct event ev; - struct event_base *ev_base; - struct timeval tv; - struct rspamd_map *map; - struct http_map_data *data; - struct http_reply *reply; - struct map_cb_data cbdata; - - gint state; - gint fd; -}; - -/* Value in seconds after whitch we would try to do stat on list file */ - -/* HTTP timeouts */ -#define HTTP_CONNECT_TIMEOUT 2 -#define HTTP_READ_TIMEOUT 10 - -/** - * Helper for HTTP connection establishment - */ -static gint -connect_http (struct rspamd_map *map, struct http_map_data *data, gboolean is_async) -{ - gint sock; - - if ((sock = make_tcp_socket (data->addr, FALSE, is_async)) == -1) { - msg_info ("cannot connect to http server %s: %d, %s", data->host, errno, strerror (errno)); - return -1; - } - - return sock; -} - -/** - * Write HTTP request - */ -static void -write_http_request (struct rspamd_map *map, struct http_map_data *data, gint sock) -{ - gchar outbuf[BUFSIZ], datebuf[128]; - gint r; - struct tm *tm; - - tm = gmtime (&data->last_checked); - strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %H:%M:%S %Z", tm); - r = rspamd_snprintf (outbuf, sizeof (outbuf), "GET %s%s HTTP/1.1" CRLF "Connection: close" CRLF "Host: %s" CRLF, (*data->path == '/') ? "" : "/", data->path, data->host); - if (data->last_checked != 0) { - r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, "If-Modified-Since: %s" CRLF, datebuf); - } - - r += rspamd_snprintf (outbuf + r, sizeof (outbuf) - r, CRLF); - - if (write (sock, outbuf, r) == -1) { - msg_err ("failed to write request: %d, %s", errno, strerror (errno)); - } -} - -/** - * FSM for parsing HTTP reply - */ -static gchar * -parse_http_reply (gchar * chunk, gint len, struct http_reply *reply) -{ - gchar *s, *p, *err_str, *tmp; - p = chunk; - s = chunk; - - while (p - chunk < len) { - switch (reply->parser_state) { - /* Search status code */ - case 0: - /* Search for status code */ - if (*p != ' ') { - p++; - } - else { - /* Try to parse HTTP reply code */ - reply->code = strtoul (++p, (gchar **)&err_str, 10); - if (*err_str != ' ') { - msg_info ("error while reading HTTP status code: %s", p); - return NULL; - } - /* Now skip to end of status string */ - reply->parser_state = 1; - continue; - } - break; - /* Skip to end of line */ - case 1: - if (*p == '\n') { - /* Switch to read header state */ - reply->parser_state = 2; - } - /* Each skipped symbol is proceeded */ - s = ++p; - break; - /* Read header value */ - case 2: - if (*p == ':') { - reply->cur_header = g_malloc (p - s + 1); - rspamd_strlcpy (reply->cur_header, s, p - s + 1); - reply->parser_state = 3; - } - else if (*p == '\r' && *(p + 1) == '\n') { - /* Last empty line */ - reply->parser_state = 5; - } - p++; - break; - /* Skip spaces after header name */ - case 3: - if (*p != ' ') { - s = p; - reply->parser_state = 4; - } - else { - p++; - } - break; - /* Read header value */ - case 4: - if (*p == '\r') { - if (reply->cur_header != NULL) { - tmp = g_malloc (p - s + 1); - rspamd_strlcpy (tmp, s, p - s + 1); - g_hash_table_insert (reply->headers, reply->cur_header, tmp); - reply->cur_header = NULL; - } - reply->parser_state = 1; - } - p++; - break; - case 5: - /* Set pointer to begining of HTTP body */ - p++; - s = p; - reply->parser_state = 6; - break; - case 6: - /* Headers parsed, just return */ - return p; - break; - } - } - - return s; -} - -/** - * Read and parse chunked header - */ -static gint -read_chunk_header (gchar * buf, gint len, struct http_map_data *data) -{ - gchar chunkbuf[32], *p, *c, *err_str; - gint skip = 0; - - p = chunkbuf; - c = buf; - /* Find hex digits */ - while (g_ascii_isxdigit (*c) && p - chunkbuf < (gint)(sizeof (chunkbuf) - 1) && skip < len) { - *p++ = *c++; - skip++; - } - *p = '\0'; - data->chunk = strtoul (chunkbuf, &err_str, 16); - if (*err_str != '\0') { - return -1; - } - - /* Now skip to CRLF */ - while (*c != '\n' && skip < len) { - c++; - skip++; - } - if (*c == '\n' && skip < len) { - skip++; - c++; - } - data->chunk_remain = data->chunk; - - return skip; -} - -/** - * Helper callback for reading chunked reply - */ -static gboolean -read_http_chunked (gchar * buf, size_t len, struct rspamd_map *map, struct http_map_data *data, struct map_cb_data *cbdata) -{ - gchar *p = buf, *remain; - gint skip = 0; - - if (data->chunked == 1) { - /* Read first chunk data */ - if ((skip = read_chunk_header (buf, len, data)) != -1) { - p += skip; - len -= skip; - data->chunked = 2; - } - else { - msg_info ("invalid chunked reply: %*s", (gint)len, buf); - return FALSE; - } - } - - if (data->chunk_remain == 0) { - /* Read another chunk */ - if ((skip = read_chunk_header (buf, len, data)) != -1) { - p += skip; - len -= skip; - } - else { - msg_info ("invalid chunked reply: %*s", (gint)len, buf); - return FALSE; - } - if (data->chunk == 0) { - return FALSE; - } - } - - if (data->chunk_remain <= len ) { - /* Call callback and move remaining buffer */ - remain = map->read_callback (map->pool, p, data->chunk_remain, cbdata); - if (remain != NULL && remain != p + data->chunk_remain) { - /* Copy remaining buffer to start of buffer */ - data->rlen = len - (remain - p); - memmove (buf, remain, data->rlen); - data->chunk_remain -= data->rlen; - } - else { - /* Copy other part */ - data->rlen = len - data->chunk_remain; - if (data->rlen > 0) { - memmove (buf, p + data->chunk_remain, data->rlen); - } - data->chunk_remain = 0; - } - - } - else { - /* Just read another portion of chunk */ - data->chunk_remain -= len; - remain = map->read_callback (map->pool, p, len, cbdata); - if (remain != NULL && remain != p + len) { - /* copy remaining buffer to start of buffer */ - data->rlen = len - (remain - p); - memmove (buf, remain, data->rlen); - } - } - - return TRUE; -} - -/** - * Callback for reading HTTP reply - */ -static gboolean -read_http_common (struct rspamd_map *map, struct http_map_data *data, struct http_reply *reply, struct map_cb_data *cbdata, gint fd) -{ - gchar *remain, *pos; - ssize_t r; - gchar *te, *date; - - if ((r = read (fd, data->read_buf + data->rlen, sizeof (data->read_buf) - data->rlen)) > 0) { - r += data->rlen; - data->rlen = 0; - remain = parse_http_reply (data->read_buf, r, reply); - if (remain != NULL && remain != data->read_buf) { - /* copy remaining data->read_buffer to start of data->read_buffer */ - data->rlen = r - (remain - data->read_buf); - memmove (data->read_buf, remain, data->rlen); - r = data->rlen; - data->rlen = 0; - } - if (r <= 0) { - return TRUE; - } - if (reply->parser_state == 6) { - /* If reply header is parsed successfully, try to read further data */ - if (reply->code != 200 && reply->code != 304) { - msg_err ("got error reply from server %s, %d", data->host, reply->code); - return FALSE; - } - else if (reply->code == 304) { - /* Do not read anything */ - return FALSE; - } - pos = data->read_buf; - /* Check for chunked */ - if (data->chunked == 0) { - if ((te = g_hash_table_lookup (reply->headers, "Transfer-Encoding")) != NULL) { - if (g_ascii_strcasecmp (te, "chunked") == 0) { - data->chunked = 1; - } - else { - data->chunked = -1; - } - } - else { - data->chunked = -1; - } - } - /* Check for date */ - date = g_hash_table_lookup (reply->headers, "Date"); - if (date != NULL) { - data->last_checked = rspamd_http_parse_date (date, -1); - } - else { - data->last_checked = (time_t)-1; - } - - if (data->chunked > 0) { - return read_http_chunked (data->read_buf, r, map, data, cbdata); - } - /* Read more data */ - remain = map->read_callback (map->pool, pos, r, cbdata); - if (remain != NULL && remain != pos + r) { - /* copy remaining data->read_buffer to start of data->read_buffer */ - data->rlen = r - (remain - pos); - memmove (pos, remain, data->rlen); - } - } - } - else { - return FALSE; - } - - return TRUE; -} - -/** - * Sync read of HTTP reply - */ -static void -read_http_sync (struct rspamd_map *map, struct http_map_data *data) -{ - struct map_cb_data cbdata; - gint fd; - struct http_reply *repl; - - if (map->read_callback == NULL || map->fin_callback == NULL) { - msg_err ("bad callback for reading map file"); - return; - } - - /* Connect synced */ - if ((fd = connect_http (map, data, FALSE)) == -1) { - return; - } - write_http_request (map, data, fd); - - cbdata.state = 0; - cbdata.map = map; - cbdata.prev_data = *map->user_data; - cbdata.cur_data = NULL; - - repl = g_malloc (sizeof (struct http_reply)); - repl->parser_state = 0; - repl->code = 404; - repl->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free); - - while (read_http_common (map, data, repl, &cbdata, fd)); - - close (fd); - - map->fin_callback (map->pool, &cbdata); - *map->user_data = cbdata.cur_data; - if (data->last_checked == (time_t)-1) { - data->last_checked = time (NULL); - } - - g_hash_table_destroy (repl->headers); - g_free (repl); -} - -/** - * Callback for reading data from file - */ -static void -read_map_file (struct rspamd_map *map, struct file_map_data *data) -{ - struct map_cb_data cbdata; - gchar buf[BUFSIZ], *remain; - ssize_t r; - gint fd, rlen; - - if (map->read_callback == NULL || map->fin_callback == NULL) { - msg_err ("bad callback for reading map file"); - return; - } - - if ((fd = open (data->filename, O_RDONLY)) == -1) { - msg_warn ("cannot open file '%s': %s", data->filename, strerror (errno)); - return; - } - - cbdata.state = 0; - cbdata.prev_data = *map->user_data; - cbdata.cur_data = NULL; - cbdata.map = map; - - rlen = 0; - while ((r = read (fd, buf + rlen, sizeof (buf) - rlen - 1)) > 0) { - r += rlen; - buf[r] = '\0'; - remain = map->read_callback (map->pool, buf, r, &cbdata); - if (remain != NULL) { - /* copy remaining buffer to start of buffer */ - rlen = r - (remain - buf); - memmove (buf, remain, rlen); - } - } - - close (fd); - - map->fin_callback (map->pool, &cbdata); - *map->user_data = cbdata.cur_data; -} - -/** - * FSM for parsing lists - */ -gchar * -abstract_parse_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func) -{ - gchar *c, *p, *key = NULL, *value = NULL; - - p = chunk; - c = p; - - while (p - chunk < len) { - switch (data->state) { - case 0: - /* read key */ - /* Check here comments, eol and end of buffer */ - if (*p == '#') { - if (key != NULL && p - c >= 0) { - value = rspamd_mempool_alloc (pool, p - c + 1); - memcpy (value, c, p - c); - value[p - c] = '\0'; - value = g_strstrip (value); - func (data->cur_data, key, value); - msg_debug ("insert kv pair: %s -> %s", key, value); - } - data->state = 99; - } - else if (*p == '\r' || *p == '\n' || p - chunk == len - 1) { - if (key != NULL && p - c >= 0) { - value = rspamd_mempool_alloc (pool, p - c + 1); - memcpy (value, c, p - c); - value[p - c] = '\0'; - - value = g_strstrip (value); - func (data->cur_data, key, value); - msg_debug ("insert kv pair: %s -> %s", key, value); - } - else if (key == NULL && p - c > 0) { - /* Key only line */ - key = rspamd_mempool_alloc (pool, p - c + 1); - memcpy (key, c, p - c); - key[p - c] = '\0'; - value = rspamd_mempool_alloc (pool, 1); - *value = '\0'; - func (data->cur_data, key, value); - msg_debug ("insert kv pair: %s -> %s", key, value); - } - data->state = 100; - key = NULL; - } - else if (g_ascii_isspace (*p)) { - if (p - c > 0) { - key = rspamd_mempool_alloc (pool, p - c + 1); - memcpy (key, c, p - c); - key[p - c] = '\0'; - data->state = 2; - } - else { - key = NULL; - } - } - else { - p ++; - } - break; - case 2: - /* Skip spaces before value */ - if (!g_ascii_isspace (*p)) { - c = p; - data->state = 0; - } - else { - p ++; - } - break; - case 99: - /* SKIP_COMMENT */ - /* Skip comment till end of line */ - if (*p == '\r' || *p == '\n') { - while ((*p == '\r' || *p == '\n') && p - chunk < len) { - p++; - } - c = p; - key = NULL; - data->state = 0; - } - else { - p++; - } - break; - case 100: - /* Skip \r\n and whitespaces */ - if (*p == '\r' || *p == '\n' || g_ascii_isspace (*p)) { - p ++; - } - else { - c = p; - key = NULL; - data->state = 0; - } - break; - } - } - - return c; -} - -gchar * -abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func) -{ - gchar *s, *p, *str, *start; - - p = chunk; - start = p; - - str = g_malloc (len + 1); - s = str; - - while (p - chunk < len) { - switch (data->state) { - /* READ_SYMBOL */ - case 0: - if (*p == '#') { - /* Got comment */ - if (s != str) { - /* Save previous string in lines like: "127.0.0.1 #localhost" */ - *s = '\0'; - s = rspamd_mempool_strdup (pool, g_strstrip (str)); - if (strlen (s) > 0) { - func (data->cur_data, s, hash_fill); - } - s = str; - start = p; - } - data->state = 1; - } - else if (*p == '\r' || *p == '\n') { - /* Got EOL marker, save stored string */ - if (s != str) { - *s = '\0'; - s = rspamd_mempool_strdup (pool, g_strstrip (str)); - if (strlen (s) > 0) { - func (data->cur_data, s, hash_fill); - } - s = str; - } - /* Skip EOL symbols */ - while ((*p == '\r' || *p == '\n') && p - chunk < len) { - p++; - } - start = p; - } - else { - /* Store new string in s */ - *s = *p; - s++; - p++; - } - break; - /* SKIP_COMMENT */ - case 1: - /* Skip comment till end of line */ - if (*p == '\r' || *p == '\n') { - while ((*p == '\r' || *p == '\n') && p - chunk < len) { - p++; - } - s = str; - start = p; - data->state = 0; - } - else { - p++; - } - break; - } - } - - g_free (str); - - return start; -} - -/** - * Radix tree helper function - */ -static void -radix_tree_insert_helper (gpointer st, gconstpointer key, gpointer value) -{ - radix_tree_t *tree = st; - - guint32 mask = 0xFFFFFFFF; - guint32 ip; - gchar *token, *ipnet, *err_str, **strv, **cur; - struct in_addr ina; - gint k; - - /* Split string if there are multiple items inside a single string */ - strv = g_strsplit_set ((gchar *)key, " ,;", 0); - cur = strv; - while (*cur) { - if (**cur == '\0') { - cur++; - continue; - } - /* Extract ipnet */ - ipnet = *cur; - token = strsep (&ipnet, "/"); - - if (ipnet != NULL) { - errno = 0; - /* Get mask */ - k = strtoul (ipnet, &err_str, 10); - if (errno != 0) { - msg_warn ("invalid netmask, error detected on symbol: %s, erorr: %s", err_str, strerror (errno)); - k = 32; - } - else if (k > 32 || k < 0) { - msg_warn ("invalid netmask value: %d", k); - k = 32; - } - /* Calculate mask based on CIDR presentation */ - mask = mask << (32 - k); - } - - /* Check IP */ - if (inet_aton (token, &ina) == 0) { - msg_err ("invalid ip address: %s", token); - return; - } - - /* Insert ip in a tree */ - ip = ntohl ((guint32) ina.s_addr); - k = radix32tree_insert (tree, ip, mask, 1); - if (k == -1) { - msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (ina), mask); - } - else if (k == 1) { - msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (ina), mask); - } - cur++; - } - - g_strfreev (strv); -} - -/* Helpers */ -gchar * -read_host_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) -{ - if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - } - return abstract_parse_list (pool, chunk, len, data, (insert_func) g_hash_table_insert); -} - -void -fin_host_list (rspamd_mempool_t * pool, struct map_cb_data *data) -{ - if (data->prev_data) { - g_hash_table_destroy (data->prev_data); - } -} - -gchar * -read_kv_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) -{ - if (data->cur_data == NULL) { - data->cur_data = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - } - return abstract_parse_kv_list (pool, chunk, len, data, (insert_func) g_hash_table_insert); -} - -void -fin_kv_list (rspamd_mempool_t * pool, struct map_cb_data *data) -{ - if (data->prev_data) { - g_hash_table_destroy (data->prev_data); - } -} - -gchar * -read_radix_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) -{ - if (data->cur_data == NULL) { - data->cur_data = radix_tree_create (); - } - return abstract_parse_list (pool, chunk, len, data, (insert_func) radix_tree_insert_helper); -} - -void -fin_radix_list (rspamd_mempool_t * pool, struct map_cb_data *data) -{ - if (data->prev_data) { - radix_tree_free (data->prev_data); - } -} - -/** - * Common file callback - */ -static void -file_callback (gint fd, short what, void *ud) -{ - struct rspamd_map *map = ud; - struct file_map_data *data = map->map_data; - struct stat st; - gdouble jittered_sec; - - /* Plan event again with jitter */ - evtimer_del (&map->ev); - jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); - double_to_tv (jittered_sec, &map->tv); - - evtimer_add (&map->ev, &map->tv); - - if (g_atomic_int_get (map->locked)) { - msg_info ("don't try to reread map as it is locked by other process, will reread it later"); - return; - } - - if (stat (data->filename, &st) != -1 && (st.st_mtime > data->st.st_mtime || data->st.st_mtime == -1)) { - /* File was modified since last check */ - memcpy (&data->st, &st, sizeof (struct stat)); - } - else { - return; - } - - msg_info ("rereading map file %s", data->filename); - read_map_file (map, data); -} - -/** - * Callback for destroying HTTP callback data - */ -static void -free_http_cbdata (struct http_callback_data *cbd) -{ - if (cbd->reply) { - g_hash_table_destroy (cbd->reply->headers); - g_free (cbd->reply); - } - g_atomic_int_set (cbd->map->locked, 0); - event_del (&cbd->ev); - close (cbd->fd); - g_free (cbd); -} - -/** - * Async HTTP request parser - */ -static void -http_async_callback (gint fd, short what, void *ud) -{ - struct http_callback_data *cbd = ud; - - /* Begin of connection */ - if (what == EV_WRITE) { - if (cbd->state == 0) { - /* Can write request */ - write_http_request (cbd->map, cbd->data, fd); - /* Plan reading */ - event_set (&cbd->ev, cbd->fd, EV_READ | EV_PERSIST, http_async_callback, cbd); - event_base_set (cbd->ev_base, &cbd->ev); - cbd->tv.tv_sec = HTTP_READ_TIMEOUT; - cbd->tv.tv_usec = 0; - cbd->state = 1; - /* Allocate reply structure */ - cbd->reply = g_malloc (sizeof (struct http_reply)); - cbd->reply->parser_state = 0; - cbd->reply->code = 404; - cbd->reply->headers = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, g_free); - cbd->cbdata.state = 0; - cbd->cbdata.prev_data = *cbd->map->user_data; - cbd->cbdata.cur_data = NULL; - cbd->cbdata.map = cbd->map; - cbd->data->rlen = 0; - cbd->data->chunk = 0; - cbd->data->chunk_remain = 0; - cbd->data->chunked = FALSE; - cbd->data->read_buf[0] = '\0'; - - event_add (&cbd->ev, &cbd->tv); - } - else { - msg_err ("bad state when got write readiness"); - free_http_cbdata (cbd); - return; - } - } - /* Got reply, parse it */ - else if (what == EV_READ) { - if (cbd->state >= 1) { - if (!read_http_common (cbd->map, cbd->data, cbd->reply, &cbd->cbdata, cbd->fd)) { - /* Handle Not-Modified in a special way */ - if (cbd->reply->code == 304) { - if (cbd->data->last_checked == (time_t)-1) { - cbd->data->last_checked = time (NULL); - } - msg_info ("data is not modified for server %s", cbd->data->host); - } - else if (cbd->cbdata.cur_data != NULL) { - /* Destroy old data and start reading request data */ - cbd->map->fin_callback (cbd->map->pool, &cbd->cbdata); - *cbd->map->user_data = cbd->cbdata.cur_data; - if (cbd->data->last_checked == (time_t)-1) { - cbd->data->last_checked = time (NULL); - } - } - if (cbd->state == 1 && cbd->reply->code == 200) { - /* Write to log that data is modified */ - msg_info ("rereading map data from %s", cbd->data->host); - } - - free_http_cbdata (cbd); - return; - } - else if (cbd->state == 1) { - /* Write to log that data is modified */ - msg_info ("rereading map data from %s", cbd->data->host); - } - cbd->state = 2; - } - } - else { - msg_err ("connection with http server terminated incorrectly"); - free_http_cbdata (cbd); - } -} - -/** - * Async HTTP callback - */ -static void -http_callback (gint fd, short what, void *ud) -{ - struct rspamd_map *map = ud; - struct http_map_data *data = map->map_data; - gint sock; - struct http_callback_data *cbd; - gdouble jittered_sec; - - /* Plan event again with jitter */ - evtimer_del (&map->ev); - jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); - double_to_tv (jittered_sec, &map->tv); - evtimer_add (&map->ev, &map->tv); - - if (g_atomic_int_get (map->locked)) { - msg_info ("don't try to reread map as it is locked by other process, will reread it later"); - return; - } - - g_atomic_int_inc (map->locked); - - /* Connect asynced */ - if ((sock = connect_http (map, data, TRUE)) == -1) { - g_atomic_int_set (map->locked, 0); - return; - } - else { - /* Plan event */ - cbd = g_malloc (sizeof (struct http_callback_data)); - cbd->ev_base = map->ev_base; - event_set (&cbd->ev, sock, EV_WRITE, http_async_callback, cbd); - event_base_set (cbd->ev_base, &cbd->ev); - cbd->tv.tv_sec = HTTP_CONNECT_TIMEOUT; - cbd->tv.tv_usec = 0; - cbd->map = map; - cbd->data = data; - cbd->state = 0; - cbd->fd = sock; - cbd->reply = NULL; - event_add (&cbd->ev, &cbd->tv); - } -} - -/* Start watching event for all maps */ -void -start_map_watch (struct config_file *cfg, struct event_base *ev_base) -{ - GList *cur = cfg->maps; - struct rspamd_map *map; - struct file_map_data *fdata; - gdouble jittered_sec; - - /* First of all do synced read of data */ - while (cur) { - map = cur->data; - map->ev_base = ev_base; - if (map->protocol == MAP_PROTO_FILE) { - evtimer_set (&map->ev, file_callback, map); - event_base_set (map->ev_base, &map->ev); - /* Read initial data */ - fdata = map->map_data; - if (fdata->st.st_mtime != -1) { - /* Do not try to read non-existent file */ - read_map_file (map, map->map_data); - } - /* Plan event with jitter */ - jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout) / 2.; - double_to_tv (jittered_sec, &map->tv); - evtimer_add (&map->ev, &map->tv); - } - else if (map->protocol == MAP_PROTO_HTTP) { - evtimer_set (&map->ev, http_callback, map); - event_base_set (map->ev_base, &map->ev); - /* Read initial data */ - read_http_sync (map, map->map_data); - /* Plan event with jitter */ - jittered_sec = (map->cfg->map_timeout + g_random_double () * map->cfg->map_timeout); - double_to_tv (jittered_sec, &map->tv); - evtimer_add (&map->ev, &map->tv); - } - cur = g_list_next (cur); - } -} - -void -remove_all_maps (struct config_file *cfg) -{ - g_list_free (cfg->maps); - cfg->maps = NULL; - if (cfg->map_pool != NULL) { - rspamd_mempool_delete (cfg->map_pool); - cfg->map_pool = NULL; - } -} - -gboolean -check_map_proto (const gchar *map_line, gint *res, const gchar **pos) -{ - if (g_ascii_strncasecmp (map_line, "http://", sizeof ("http://") - 1) == 0) { - if (res && pos) { - *res = MAP_PROTO_HTTP; - *pos = map_line + sizeof ("http://") - 1; - } - } - else if (g_ascii_strncasecmp (map_line, "file://", sizeof ("file://") - 1) == 0) { - if (res && pos) { - *res = MAP_PROTO_FILE; - *pos = map_line + sizeof ("file://") - 1; - } - } - else if (*map_line == '/') { - /* Trivial file case */ - *res = MAP_PROTO_FILE; - *pos = map_line; - } - else { - msg_debug ("invalid map fetching protocol: %s", map_line); - return FALSE; - } - - return TRUE; -} - -gboolean -add_map (struct config_file *cfg, const gchar *map_line, const gchar *description, - map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data) -{ - struct rspamd_map *new_map; - enum fetch_proto proto; - const gchar *def, *p, *hostend; - struct file_map_data *fdata; - struct http_map_data *hdata; - gchar portbuf[6]; - gint i, s, r; - struct addrinfo hints, *res; - - /* First of all detect protocol line */ - if (!check_map_proto (map_line, (int *)&proto, &def)) { - return FALSE; - } - /* Constant pool */ - if (cfg->map_pool == NULL) { - cfg->map_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - } - new_map = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct rspamd_map)); - new_map->read_callback = read_callback; - new_map->fin_callback = fin_callback; - new_map->user_data = user_data; - new_map->protocol = proto; - new_map->cfg = cfg; - new_map->id = g_random_int (); - new_map->locked = rspamd_mempool_alloc0_shared (cfg->cfg_pool, sizeof (gint)); - - if (proto == MAP_PROTO_FILE) { - new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, def); - def = new_map->uri; - } - else { - new_map->uri = rspamd_mempool_strdup (cfg->cfg_pool, map_line); - } - if (description != NULL) { - new_map->description = rspamd_mempool_strdup (cfg->cfg_pool, description); - } - - /* Now check for each proto separately */ - if (proto == MAP_PROTO_FILE) { - fdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct file_map_data)); - if (access (def, R_OK) == -1) { - if (errno != ENOENT) { - msg_err ("cannot open file '%s': %s", def, strerror (errno)); - return FALSE; - - } - msg_info ("map '%s' is not found, but it can be loaded automatically later", def); - /* We still can add this file */ - fdata->st.st_mtime = -1; - } - else { - stat (def, &fdata->st); - } - fdata->filename = rspamd_mempool_strdup (cfg->map_pool, def); - new_map->map_data = fdata; - } - else if (proto == MAP_PROTO_HTTP) { - hdata = rspamd_mempool_alloc0 (cfg->map_pool, sizeof (struct http_map_data)); - /* Try to search port */ - if ((p = strchr (def, ':')) != NULL) { - hostend = p; - i = 0; - p++; - while (g_ascii_isdigit (*p) && i < (gint)sizeof (portbuf) - 1) { - portbuf[i++] = *p++; - } - if (*p != '/') { - msg_info ("bad http map definition: %s", def); - return FALSE; - } - portbuf[i] = '\0'; - hdata->port = atoi (portbuf); - } - else { - /* Default http port */ - rspamd_snprintf (portbuf, sizeof (portbuf), "80"); - hdata->port = 80; - /* Now separate host from path */ - if ((p = strchr (def, '/')) == NULL) { - msg_info ("bad http map definition: %s", def); - return FALSE; - } - hostend = p; - } - hdata->host = rspamd_mempool_alloc (cfg->map_pool, hostend - def + 1); - rspamd_strlcpy (hdata->host, def, hostend - def + 1); - hdata->path = rspamd_mempool_strdup (cfg->map_pool, p); - hdata->rlen = 0; - /* Now try to resolve */ - memset (&hints, 0, sizeof (hints)); - hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ - hints.ai_socktype = SOCK_STREAM; /* Stream socket */ - hints.ai_flags = 0; - hints.ai_protocol = 0; /* Any protocol */ - hints.ai_canonname = NULL; - hints.ai_addr = NULL; - hints.ai_next = NULL; - - if ((r = getaddrinfo (hdata->host, portbuf, &hints, &res)) == 0) { - hdata->addr = res; - rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)freeaddrinfo, hdata->addr); - } - else { - msg_err ("address resolution for %s failed: %s", hdata->host, gai_strerror (r)); - return FALSE; - } - /* Now try to connect */ - if ((s = make_tcp_socket (hdata->addr, FALSE, FALSE)) == -1) { - msg_info ("cannot connect to http server %s: %d, %s", hdata->host, errno, strerror (errno)); - return FALSE; - } - close (s); - new_map->map_data = hdata; - } - /* Temp pool */ - new_map->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - - cfg->maps = g_list_prepend (cfg->maps, new_map); - - return TRUE; -} diff --git a/src/map.h b/src/map.h deleted file mode 100644 index 1f34cdcc0..000000000 --- a/src/map.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef RSPAMD_MAP_H -#define RSPAMD_MAP_H - -#include "config.h" -#include "mem_pool.h" -#include "radix.h" - -/** - * Maps API is designed to load lists data from different dynamic sources. - * It monitor files and HTTP locations for modifications and reload them if they are - * modified. - */ - -enum fetch_proto { - MAP_PROTO_FILE, - MAP_PROTO_HTTP, -}; - -/** - * Data specific to file maps - */ -struct file_map_data { - const gchar *filename; - struct stat st; -}; - -/** - * Data specific to HTTP maps - */ -struct http_map_data { - struct addrinfo *addr; - guint16 port; - gchar *path; - gchar *host; - time_t last_checked; - gshort chunked; - gchar read_buf[BUFSIZ]; - guint32 rlen; - guint32 chunk; - guint32 chunk_remain; -}; - -struct map_cb_data; - -/** - * Callback types - */ -typedef gchar* (*map_cb_t)(rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); -typedef void (*map_fin_cb_t)(rspamd_mempool_t *pool, struct map_cb_data *data); - -/** - * Common map object - */ -struct config_file; -struct rspamd_map { - rspamd_mempool_t *pool; - struct config_file *cfg; - enum fetch_proto protocol; - map_cb_t read_callback; - map_fin_cb_t fin_callback; - void **user_data; - struct event ev; - struct timeval tv; - struct event_base *ev_base; - void *map_data; - gchar *uri; - gchar *description; - guint32 id; - guint32 checksum; - /* Shared lock for temporary disabling of map reading (e.g. when this map is written by UI) */ - gint *locked; -}; - -/** - * Callback data for async load - */ -struct map_cb_data { - struct rspamd_map *map; - gint state; - void *prev_data; - void *cur_data; -}; - - -/** - * Check map protocol - */ -gboolean check_map_proto (const gchar *map_line, gint *res, const gchar **pos); -/** - * Add map from line - */ -gboolean add_map (struct config_file *cfg, const gchar *map_line, const gchar *description, - map_cb_t read_callback, map_fin_cb_t fin_callback, void **user_data); - -/** - * Start watching of maps by adding events to libevent event loop - */ -void start_map_watch (struct config_file *cfg, struct event_base *ev_base); - -/** - * Remove all maps watched (remove events) - */ -void remove_all_maps (struct config_file *cfg); - -typedef void (*insert_func) (gpointer st, gconstpointer key, gconstpointer value); - -/** - * Common callbacks for frequent types of lists - */ - -/** - * Radix list is a list like ip/mask - */ -gchar* read_radix_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); -void fin_radix_list (rspamd_mempool_t *pool, struct map_cb_data *data); - -/** - * Host list is an ordinal list of hosts or domains - */ -gchar* read_host_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); -void fin_host_list (rspamd_mempool_t *pool, struct map_cb_data *data); - -/** - * Kv list is an ordinal list of keys and values separated by whitespace - */ -gchar* read_kv_list (rspamd_mempool_t *pool, gchar *chunk, gint len, struct map_cb_data *data); -void fin_kv_list (rspamd_mempool_t *pool, struct map_cb_data *data); - -/** - * FSM for lists parsing (support comments, blank lines and partial replies) - */ -gchar * abstract_parse_list (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data, insert_func func); - -#endif diff --git a/src/mem_pool.c b/src/mem_pool.c deleted file mode 100644 index 8f1105add..000000000 --- a/src/mem_pool.c +++ /dev/null @@ -1,776 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "mem_pool.h" -#include "fstring.h" -#include "logger.h" -#include "util.h" -#include "main.h" - -/* Sleep time for spin lock in nanoseconds */ -#define MUTEX_SLEEP_TIME 10000000L -#define MUTEX_SPIN_COUNT 100 - -#ifdef _THREAD_SAFE -pthread_mutex_t stat_mtx = PTHREAD_MUTEX_INITIALIZER; -# define STAT_LOCK() do { pthread_mutex_lock (&stat_mtx); } while (0) -# define STAT_UNLOCK() do { pthread_mutex_unlock (&stat_mtx); } while (0) -#else -# define STAT_LOCK() do {} while (0) -# define STAT_UNLOCK() do {} while (0) -#endif - -#define POOL_MTX_LOCK() do { rspamd_mutex_lock (pool->mtx); } while (0) -#define POOL_MTX_UNLOCK() do { rspamd_mutex_unlock (pool->mtx); } while (0) - -/* - * This define specify whether we should check all pools for free space for new object - * or just begin scan from current (recently attached) pool - * If MEMORY_GREEDY is defined, then we scan all pools to find free space (more CPU usage, slower - * but requires less memory). If it is not defined check only current pool and if object is too large - * to place in it allocate new one (this may cause huge CPU usage in some cases too, but generally faster than - * greedy method) - */ -#undef MEMORY_GREEDY - -/* Internal statistic */ -static rspamd_mempool_stat_t *mem_pool_stat = NULL; - -/** - * Function that return free space in pool page - * @param x pool page struct - */ -static gint -pool_chain_free (struct _pool_chain *chain) -{ - return (gint)chain->len - (chain->pos - chain->begin + MEM_ALIGNMENT); -} - -static struct _pool_chain * -pool_chain_new (gsize size) -{ - struct _pool_chain *chain; - - g_return_val_if_fail (size > 0, NULL); - - chain = g_slice_alloc (sizeof (struct _pool_chain)); - - if (chain == NULL) { - msg_err ("cannot allocate %z bytes, aborting", sizeof (struct _pool_chain)); - abort (); - } - - chain->begin = g_slice_alloc (size); - if (chain->begin == NULL) { - msg_err ("cannot allocate %z bytes, aborting", size); - abort (); - } - - chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT); - chain->len = size; - chain->next = NULL; - STAT_LOCK (); - mem_pool_stat->bytes_allocated += size; - mem_pool_stat->chunks_allocated++; - STAT_UNLOCK (); - - return chain; -} - -static struct _pool_chain_shared * -pool_chain_new_shared (gsize size) -{ - struct _pool_chain_shared *chain; - gpointer map; - - -#if defined(HAVE_MMAP_ANON) - map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); - if (map == MAP_FAILED) { - msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain)); - abort (); - } - chain = (struct _pool_chain_shared *)map; - chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared); -#elif defined(HAVE_MMAP_ZERO) - gint fd; - - fd = open ("/dev/zero", O_RDWR); - if (fd == -1) { - return NULL; - } - map = mmap (NULL, size + sizeof (struct _pool_chain_shared), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (map == MAP_FAILED) { - msg_err ("cannot allocate %z bytes, aborting", size + sizeof (struct _pool_chain)); - abort (); - } - chain = (struct _pool_chain_shared *)map; - chain->begin = ((guint8 *) chain) + sizeof (struct _pool_chain_shared); -#else -# error No mmap methods are defined -#endif - chain->pos = align_ptr (chain->begin, MEM_ALIGNMENT); - chain->len = size; - chain->lock = NULL; - chain->next = NULL; - STAT_LOCK (); - mem_pool_stat->shared_chunks_allocated++; - mem_pool_stat->bytes_allocated += size; - STAT_UNLOCK (); - - return chain; -} - - -/** - * Allocate new memory poll - * @param size size of pool's page - * @return new memory pool object - */ -rspamd_mempool_t * -rspamd_mempool_new (gsize size) -{ - rspamd_mempool_t *new; - gpointer map; - - g_return_val_if_fail (size > 0, NULL); - /* Allocate statistic structure if it is not allocated before */ - if (mem_pool_stat == NULL) { -#if defined(HAVE_MMAP_ANON) - map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0); - if (map == MAP_FAILED) { - msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t)); - abort (); - } - mem_pool_stat = (rspamd_mempool_stat_t *)map; -#elif defined(HAVE_MMAP_ZERO) - gint fd; - - fd = open ("/dev/zero", O_RDWR); - g_assert (fd != -1); - map = mmap (NULL, sizeof (rspamd_mempool_stat_t), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (map == MAP_FAILED) { - msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_stat_t)); - abort (); - } - mem_pool_stat = (rspamd_mempool_stat_t *)map; -#else -# error No mmap methods are defined -#endif - memset (map, 0, sizeof (rspamd_mempool_stat_t)); - } - - new = g_slice_alloc (sizeof (rspamd_mempool_t)); - if (new == NULL) { - msg_err ("cannot allocate %z bytes, aborting", sizeof (rspamd_mempool_t)); - abort (); - } - - new->cur_pool = pool_chain_new (size); - new->shared_pool = NULL; - new->first_pool = new->cur_pool; - new->cur_pool_tmp = NULL; - new->first_pool_tmp = NULL; - new->destructors = NULL; - /* Set it upon first call of set variable */ - new->variables = NULL; - new->mtx = rspamd_mutex_new (); - - mem_pool_stat->pools_allocated++; - - return new; -} - -static void * -memory_pool_alloc_common (rspamd_mempool_t * pool, gsize size, gboolean is_tmp) -{ - guint8 *tmp; - struct _pool_chain *new, *cur; - gint free; - - if (pool) { - POOL_MTX_LOCK (); -#ifdef MEMORY_GREEDY - if (is_tmp) { - cur = pool->first_pool_tmp; - } - else { - cur = pool->first_pool; - } -#else - if (is_tmp) { - cur = pool->cur_pool_tmp; - } - else { - cur = pool->cur_pool; - } -#endif - /* Find free space in pool chain */ - while (cur != NULL && - (free = pool_chain_free (cur)) < (gint)size && - cur->next != NULL) { - cur = cur->next; - } - - if (cur == NULL || (free < (gint)size && cur->next == NULL)) { - /* Allocate new pool */ - if (cur == NULL) { - if (pool->first_pool->len >= size + MEM_ALIGNMENT) { - new = pool_chain_new (pool->first_pool->len); - } - else { - new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT); - } - /* Connect to pool subsystem */ - if (is_tmp) { - pool->first_pool_tmp = new; - } - else { - pool->first_pool = new; - } - } - else { - if (cur->len >= size + MEM_ALIGNMENT) { - new = pool_chain_new (cur->len); - } - else { - mem_pool_stat->oversized_chunks++; - new = pool_chain_new (size + pool->first_pool->len + MEM_ALIGNMENT); - } - /* Attach new pool to chain */ - cur->next = new; - } - if (is_tmp) { - pool->cur_pool_tmp = new; - } - else { - pool->cur_pool = new; - } - /* No need to align again */ - tmp = new->pos; - new->pos = tmp + size; - POOL_MTX_UNLOCK (); - return tmp; - } - /* No need to allocate page */ - tmp = align_ptr (cur->pos, MEM_ALIGNMENT); - cur->pos = tmp + size; - POOL_MTX_UNLOCK (); - return tmp; - } - return NULL; -} - - -void * -rspamd_mempool_alloc (rspamd_mempool_t * pool, gsize size) -{ - return memory_pool_alloc_common (pool, size, FALSE); -} - -void * -rspamd_mempool_alloc_tmp (rspamd_mempool_t * pool, gsize size) -{ - return memory_pool_alloc_common (pool, size, TRUE); -} - -void * -rspamd_mempool_alloc0 (rspamd_mempool_t * pool, gsize size) -{ - void *pointer = rspamd_mempool_alloc (pool, size); - if (pointer) { - memset (pointer, 0, size); - } - return pointer; -} - -void * -rspamd_mempool_alloc0_tmp (rspamd_mempool_t * pool, gsize size) -{ - void *pointer = rspamd_mempool_alloc_tmp (pool, size); - if (pointer) { - memset (pointer, 0, size); - } - return pointer; -} - -void * -rspamd_mempool_alloc0_shared (rspamd_mempool_t * pool, gsize size) -{ - void *pointer = rspamd_mempool_alloc_shared (pool, size); - if (pointer) { - memset (pointer, 0, size); - } - return pointer; -} - -void * -rspamd_mempool_alloc_shared (rspamd_mempool_t * pool, gsize size) -{ - guint8 *tmp; - struct _pool_chain_shared *new, *cur; - gint free; - - if (pool) { - g_return_val_if_fail(size > 0, NULL); - - POOL_MTX_LOCK () - ; - cur = pool->shared_pool; - if (!cur) { - cur = pool_chain_new_shared (pool->first_pool->len); - pool->shared_pool = cur; - } - - /* Find free space in pool chain */ - while ((free = pool_chain_free ((struct _pool_chain *) cur)) - < (gint) size && cur->next) { - cur = cur->next; - } - if (free < (gint) size && cur->next == NULL) { - /* Allocate new pool */ - - if (cur->len >= size + MEM_ALIGNMENT) { - new = pool_chain_new_shared (cur->len); - } - else { - mem_pool_stat->oversized_chunks++; - new = pool_chain_new_shared ( - size + pool->first_pool->len + MEM_ALIGNMENT); - } - /* Attach new pool to chain */ - cur->next = new; - new->pos += size; - STAT_LOCK (); - mem_pool_stat->bytes_allocated += size; - STAT_UNLOCK (); - POOL_MTX_UNLOCK () - ; - return new->begin; - } - tmp = align_ptr(cur->pos, MEM_ALIGNMENT); - cur->pos = tmp + size; - POOL_MTX_UNLOCK () - ; - return tmp; - } - return NULL; -} - - -gchar * -rspamd_mempool_strdup (rspamd_mempool_t * pool, const gchar *src) -{ - gsize len; - gchar *newstr; - - if (src == NULL) { - return NULL; - } - - len = strlen (src); - newstr = rspamd_mempool_alloc (pool, len + 1); - memcpy (newstr, src, len); - newstr[len] = '\0'; - return newstr; -} - -gchar * -rspamd_mempool_fstrdup (rspamd_mempool_t * pool, const struct f_str_s *src) -{ - gchar *newstr; - - if (src == NULL) { - return NULL; - } - - newstr = rspamd_mempool_alloc (pool, src->len + 1); - memcpy (newstr, src->begin, src->len); - newstr[src->len] = '\0'; - return newstr; -} - - -gchar * -rspamd_mempool_strdup_shared (rspamd_mempool_t * pool, const gchar *src) -{ - gsize len; - gchar *newstr; - - if (src == NULL) { - return NULL; - } - - len = strlen (src); - newstr = rspamd_mempool_alloc_shared (pool, len + 1); - memcpy (newstr, src, len); - newstr[len] = '\0'; - return newstr; -} - -/* Find pool for a pointer, returns NULL if pointer is not in pool */ -static struct _pool_chain_shared * -memory_pool_find_pool (rspamd_mempool_t * pool, void *pointer) -{ - struct _pool_chain_shared *cur = pool->shared_pool; - - while (cur) { - if ((guint8 *) pointer >= cur->begin && (guint8 *) pointer <= (cur->begin + cur->len)) { - return cur; - } - cur = cur->next; - } - - return NULL; -} - -static inline gint -__mutex_spin (rspamd_mempool_mutex_t * mutex) -{ - /* check spin count */ - if (g_atomic_int_dec_and_test (&mutex->spin)) { - /* This may be deadlock, so check owner of this lock */ - if (mutex->owner == getpid ()) { - /* This mutex was locked by calling process, so it is just double lock and we can easily unlock it */ - g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); - return 0; - } - else if (kill (mutex->owner, 0) == -1) { - /* Owner process was not found, so release lock */ - g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); - return 0; - } - /* Spin again */ - g_atomic_int_set (&mutex->spin, MUTEX_SPIN_COUNT); - } -#ifdef HAVE_ASM_PAUSE - __asm __volatile ("pause"); -#elif defined(HAVE_SCHED_YIELD) - (void)sched_yield (); -#endif - -#if defined(HAVE_NANOSLEEP) - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = MUTEX_SLEEP_TIME; - /* Spin */ - while (nanosleep (&ts, &ts) == -1 && errno == EINTR); -#else -# error No methods to spin are defined -#endif - return 1; -} - -static void -memory_pool_mutex_spin (rspamd_mempool_mutex_t * mutex) -{ - while (!g_atomic_int_compare_and_exchange (&mutex->lock, 0, 1)) { - if (!__mutex_spin (mutex)) { - return; - } - } -} - -/* Simple implementation of spinlock */ -void -rspamd_mempool_lock_shared (rspamd_mempool_t * pool, void *pointer) -{ - struct _pool_chain_shared *chain; - - chain = memory_pool_find_pool (pool, pointer); - if (chain == NULL) { - return; - } - if (chain->lock == NULL) { - chain->lock = rspamd_mempool_get_mutex (pool); - } - rspamd_mempool_lock_mutex (chain->lock); -} - -void -rspamd_mempool_unlock_shared (rspamd_mempool_t * pool, void *pointer) -{ - struct _pool_chain_shared *chain; - - chain = memory_pool_find_pool (pool, pointer); - if (chain == NULL) { - return; - } - if (chain->lock == NULL) { - chain->lock = rspamd_mempool_get_mutex (pool); - return; - } - - rspamd_mempool_unlock_mutex (chain->lock); -} - -void -rspamd_mempool_add_destructor_full (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *data, - const gchar *function, const gchar *line) -{ - struct _pool_destructors *cur; - - cur = rspamd_mempool_alloc (pool, sizeof (struct _pool_destructors)); - if (cur) { - POOL_MTX_LOCK (); - cur->func = func; - cur->data = data; - cur->function = function; - cur->loc = line; - cur->prev = pool->destructors; - pool->destructors = cur; - POOL_MTX_UNLOCK (); - } -} - -void -rspamd_mempool_replace_destructor (rspamd_mempool_t * pool, rspamd_mempool_destruct_t func, void *old_data, void *new_data) -{ - struct _pool_destructors *tmp; - - tmp = pool->destructors; - while (tmp) { - if (tmp->func == func && tmp->data == old_data) { - tmp->func = func; - tmp->data = new_data; - break; - } - tmp = tmp->prev; - } - -} - -void -rspamd_mempool_delete (rspamd_mempool_t * pool) -{ - struct _pool_chain *cur = pool->first_pool, *tmp; - struct _pool_chain_shared *cur_shared = pool->shared_pool, *tmp_shared; - struct _pool_destructors *destructor = pool->destructors; - - POOL_MTX_LOCK (); - /* Call all pool destructors */ - while (destructor) { - /* Avoid calling destructors for NULL pointers */ - if (destructor->data != NULL) { - destructor->func (destructor->data); - } - destructor = destructor->prev; - } - - while (cur) { - tmp = cur; - cur = cur->next; - STAT_LOCK (); - mem_pool_stat->chunks_freed++; - mem_pool_stat->bytes_allocated -= tmp->len; - STAT_UNLOCK (); - g_slice_free1 (tmp->len, tmp->begin); - g_slice_free (struct _pool_chain, tmp); - } - /* Clean temporary pools */ - cur = pool->first_pool_tmp; - while (cur) { - tmp = cur; - cur = cur->next; - STAT_LOCK (); - mem_pool_stat->chunks_freed++; - mem_pool_stat->bytes_allocated -= tmp->len; - STAT_UNLOCK (); - g_slice_free1 (tmp->len, tmp->begin); - g_slice_free (struct _pool_chain, tmp); - } - /* Unmap shared memory */ - while (cur_shared) { - tmp_shared = cur_shared; - cur_shared = cur_shared->next; - STAT_LOCK (); - mem_pool_stat->chunks_freed++; - mem_pool_stat->bytes_allocated -= tmp_shared->len; - STAT_UNLOCK (); - munmap ((void *)tmp_shared, tmp_shared->len + sizeof (struct _pool_chain_shared)); - } - if (pool->variables) { - g_hash_table_destroy (pool->variables); - } - - mem_pool_stat->pools_freed++; - POOL_MTX_UNLOCK (); - rspamd_mutex_free (pool->mtx); - g_slice_free (rspamd_mempool_t, pool); -} - -void -rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool) -{ - struct _pool_chain *cur = pool->first_pool, *tmp; - - POOL_MTX_LOCK (); - cur = pool->first_pool_tmp; - while (cur) { - tmp = cur; - cur = cur->next; - STAT_LOCK (); - mem_pool_stat->chunks_freed++; - mem_pool_stat->bytes_allocated -= tmp->len; - STAT_UNLOCK (); - g_slice_free1 (tmp->len, tmp->begin); - g_slice_free (struct _pool_chain, tmp); - } - mem_pool_stat->pools_freed++; - POOL_MTX_UNLOCK (); -} - -void -rspamd_mempool_stat (rspamd_mempool_stat_t * st) -{ - st->pools_allocated = mem_pool_stat->pools_allocated; - st->pools_freed = mem_pool_stat->pools_freed; - st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated; - st->bytes_allocated = mem_pool_stat->bytes_allocated; - st->chunks_allocated = mem_pool_stat->chunks_allocated; - st->shared_chunks_allocated = mem_pool_stat->shared_chunks_allocated; - st->chunks_freed = mem_pool_stat->chunks_freed; - st->oversized_chunks = mem_pool_stat->oversized_chunks; -} - -/* By default allocate 8Kb chunks of memory */ -#define FIXED_POOL_SIZE 8192 -gsize -rspamd_mempool_suggest_size (void) -{ -#ifdef HAVE_GETPAGESIZE - return MAX (getpagesize (), FIXED_POOL_SIZE); -#else - return MAX (sysconf (_SC_PAGESIZE), FIXED_POOL_SIZE); -#endif -} - -rspamd_mempool_mutex_t * -rspamd_mempool_get_mutex (rspamd_mempool_t * pool) -{ - rspamd_mempool_mutex_t *res; - if (pool != NULL) { - res = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_mutex_t)); - res->lock = 0; - res->owner = 0; - res->spin = MUTEX_SPIN_COUNT; - return res; - } - return NULL; -} - -void -rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t * mutex) -{ - memory_pool_mutex_spin (mutex); - mutex->owner = getpid (); -} - -void -rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t * mutex) -{ - mutex->owner = 0; - (void)g_atomic_int_compare_and_exchange (&mutex->lock, 1, 0); -} - -rspamd_mempool_rwlock_t * -rspamd_mempool_get_rwlock (rspamd_mempool_t * pool) -{ - rspamd_mempool_rwlock_t *lock; - - lock = rspamd_mempool_alloc_shared (pool, sizeof (rspamd_mempool_rwlock_t)); - lock->__r_lock = rspamd_mempool_get_mutex (pool); - lock->__w_lock = rspamd_mempool_get_mutex (pool); - - return lock; -} - -void -rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t * lock) -{ - /* Spin on write lock */ - while (g_atomic_int_get (&lock->__w_lock->lock)) { - if (!__mutex_spin (lock->__w_lock)) { - break; - } - } - - g_atomic_int_inc (&lock->__r_lock->lock); - lock->__r_lock->owner = getpid (); -} - -void -rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t * lock) -{ - /* Spin on write lock first */ - rspamd_mempool_lock_mutex (lock->__w_lock); - /* Now we have write lock set up */ - /* Wait all readers */ - while (g_atomic_int_get (&lock->__r_lock->lock)) { - __mutex_spin (lock->__r_lock); - } -} - -void -rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t * lock) -{ - if (g_atomic_int_get (&lock->__r_lock->lock)) { - (void)g_atomic_int_dec_and_test (&lock->__r_lock->lock); - } -} - -void -rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t * lock) -{ - rspamd_mempool_unlock_mutex (lock->__w_lock); -} - -void -rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name, gpointer value, rspamd_mempool_destruct_t destructor) -{ - if (pool->variables == NULL) { - pool->variables = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - } - - g_hash_table_insert (pool->variables, rspamd_mempool_strdup (pool, name), value); - if (destructor != NULL) { - rspamd_mempool_add_destructor (pool, destructor, value); - } -} - -gpointer -rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name) -{ - if (pool->variables == NULL) { - return NULL; - } - - return g_hash_table_lookup (pool->variables, name); -} - - -/* - * vi:ts=4 - */ diff --git a/src/mem_pool.h b/src/mem_pool.h deleted file mode 100644 index f759ed60a..000000000 --- a/src/mem_pool.h +++ /dev/null @@ -1,299 +0,0 @@ -/** - * @file mem_pool.h - * \brief Memory pools library. - * - * Memory pools library. Library is designed to implement efficient way to - * store data in memory avoiding calling of many malloc/free. It has overhead - * because of fact that objects live in pool for rather long time and are not freed - * immediately after use, but if we know certainly when these objects can be used, we - * can use pool for them - */ - -#ifndef RSPAMD_MEM_POOL_H -#define RSPAMD_MEM_POOL_H - -#include "config.h" - - -struct f_str_s; - -#define MEM_ALIGNMENT sizeof(unsigned long) /* platform word */ -#define align_ptr(p, a) \ - (guint8 *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1)) - -/** - * Destructor type definition - */ -typedef void (*rspamd_mempool_destruct_t)(void *ptr); - -/** - * Pool mutex structure - */ -typedef struct memory_pool_mutex_s { - gint lock; - pid_t owner; - guint spin; -} rspamd_mempool_mutex_t; - -/** - * Pool page structure - */ -struct _pool_chain { - guint8 *begin; /**< begin of pool chain block */ - guint8 *pos; /**< current start of free space in block */ - gsize len; /**< length of block */ - struct _pool_chain *next; /**< chain link */ -}; - -/** - * Shared pool page - */ -struct _pool_chain_shared { - guint8 *begin; - guint8 *pos; - gsize len; - struct _pool_chain_shared *next; - rspamd_mempool_mutex_t *lock; -}; - -/** - * Destructors list item structure - */ -struct _pool_destructors { - rspamd_mempool_destruct_t func; /**< pointer to destructor */ - void *data; /**< data to free */ - const gchar *function; /**< function from which this destructor was added */ - const gchar *loc; /**< line number */ - struct _pool_destructors *prev; /**< chain link */ -}; - -/** - * Memory pool type - */ -struct rspamd_mutex_s; -typedef struct memory_pool_s { - struct _pool_chain *cur_pool; /**< currently used page */ - struct _pool_chain *first_pool; /**< first page */ - struct _pool_chain *cur_pool_tmp; /**< currently used temporary page */ - struct _pool_chain *first_pool_tmp; /**< first temporary page */ - struct _pool_chain_shared *shared_pool; /**< shared chain */ - struct _pool_destructors *destructors; /**< destructors chain */ - GHashTable *variables; /**< private memory pool variables */ - struct rspamd_mutex_s *mtx; /**< threads lock */ -} rspamd_mempool_t; - -/** - * Statistics structure - */ -typedef struct memory_pool_stat_s { - gsize pools_allocated; /**< total number of allocated pools */ - gsize pools_freed; /**< number of freed pools */ - gsize bytes_allocated; /**< bytes that are allocated with pool allocator */ - gsize chunks_allocated; /**< number of chunks that are allocated */ - gsize shared_chunks_allocated; /**< shared chunks allocated */ - gsize chunks_freed; /**< chunks freed */ - gsize oversized_chunks; /**< oversized chunks */ -} rspamd_mempool_stat_t; - -/** - * Rwlock for locking shared memory regions - */ -typedef struct memory_pool_rwlock_s { - rspamd_mempool_mutex_t *__r_lock; /**< read mutex (private) */ - rspamd_mempool_mutex_t *__w_lock; /**< write mutex (private) */ -} rspamd_mempool_rwlock_t; - -/** - * Allocate new memory poll - * @param size size of pool's page - * @return new memory pool object - */ -rspamd_mempool_t* rspamd_mempool_new (gsize size); - -/** - * Get memory from pool - * @param pool memory pool object - * @param size bytes to allocate - * @return pointer to allocated object - */ -void* rspamd_mempool_alloc (rspamd_mempool_t* pool, gsize size); - -/** - * Get memory from temporary pool - * @param pool memory pool object - * @param size bytes to allocate - * @return pointer to allocated object - */ -void* rspamd_mempool_alloc_tmp (rspamd_mempool_t* pool, gsize size); - -/** - * Get memory and set it to zero - * @param pool memory pool object - * @param size bytes to allocate - * @return pointer to allocated object - */ -void* rspamd_mempool_alloc0 (rspamd_mempool_t* pool, gsize size); - -/** - * Get memory and set it to zero - * @param pool memory pool object - * @param size bytes to allocate - * @return pointer to allocated object - */ -void* rspamd_mempool_alloc0_tmp (rspamd_mempool_t* pool, gsize size); - -/** - * Cleanup temporary data in pool - */ -void rspamd_mempool_cleanup_tmp (rspamd_mempool_t* pool); - -/** - * Make a copy of string in pool - * @param pool memory pool object - * @param src source string - * @return pointer to newly created string that is copy of src - */ -gchar* rspamd_mempool_strdup (rspamd_mempool_t* pool, const gchar *src); - -/** - * Make a copy of fixed string in pool as null terminated string - * @param pool memory pool object - * @param src source string - * @return pointer to newly created string that is copy of src - */ -gchar* rspamd_mempool_fstrdup (rspamd_mempool_t* pool, const struct f_str_s *src); - -/** - * Allocate piece of shared memory - * @param pool memory pool object - * @param size bytes to allocate - */ -void* rspamd_mempool_alloc_shared (rspamd_mempool_t* pool, gsize size); -void* rspamd_mempool_alloc0_shared (rspamd_mempool_t *pool, gsize size); -gchar* rspamd_mempool_strdup_shared (rspamd_mempool_t* pool, const gchar *src); - -/** - * Lock chunk of shared memory in which pointer is placed - * @param pool memory pool object - * @param pointer pointer of shared memory object that is to be locked (the whole page that contains that object is locked) - */ -void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer); - -/** - * Unlock chunk of shared memory in which pointer is placed - * @param pool memory pool object - * @param pointer pointer of shared memory object that is to be unlocked (the whole page that contains that object is locked) - */ -void rspamd_mempool_lock_shared (rspamd_mempool_t *pool, void *pointer); - -/** - * Add destructor callback to pool - * @param pool memory pool object - * @param func pointer to function-destructor - * @param data pointer to data that would be passed to destructor - */ -void rspamd_mempool_add_destructor_full (rspamd_mempool_t *pool, rspamd_mempool_destruct_t func, void *data, - const gchar *function, const gchar *line); - -/* Macros for common usage */ -#define rspamd_mempool_add_destructor(pool, func, data) \ - rspamd_mempool_add_destructor_full(pool, func, data, G_STRFUNC, G_STRLOC) - -/** - * Replace destructor callback to pool for specified pointer - * @param pool memory pool object - * @param func pointer to function-destructor - * @param old_data pointer to old data - * @param new_data pointer to data that would be passed to destructor - */ -void rspamd_mempool_replace_destructor (rspamd_mempool_t *pool, - rspamd_mempool_destruct_t func, void *old_data, void *new_data); - -/** - * Delete pool, free all its chunks and call destructors chain - * @param pool memory pool object - */ -void rspamd_mempool_delete (rspamd_mempool_t *pool); - -/** - * Get new mutex from pool (allocated in shared memory) - * @param pool memory pool object - * @return mutex object - */ -rspamd_mempool_mutex_t* rspamd_mempool_get_mutex (rspamd_mempool_t *pool); - -/** - * Lock mutex - * @param mutex mutex to lock - */ -void rspamd_mempool_lock_mutex (rspamd_mempool_mutex_t *mutex); - -/** - * Unlock mutex - * @param mutex mutex to unlock - */ -void rspamd_mempool_unlock_mutex (rspamd_mempool_mutex_t *mutex); - -/** - * Create new rwlock and place it in shared memory - * @param pool memory pool object - * @return rwlock object - */ -rspamd_mempool_rwlock_t* rspamd_mempool_get_rwlock (rspamd_mempool_t *pool); - -/** - * Aquire read lock - * @param lock rwlock object - */ -void rspamd_mempool_rlock_rwlock (rspamd_mempool_rwlock_t *lock); - -/** - * Aquire write lock - * @param lock rwlock object - */ -void rspamd_mempool_wlock_rwlock (rspamd_mempool_rwlock_t *lock); - -/** - * Release read lock - * @param lock rwlock object - */ -void rspamd_mempool_runlock_rwlock (rspamd_mempool_rwlock_t *lock); - -/** - * Release write lock - * @param lock rwlock object - */ -void rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t *lock); - -/** - * Get pool allocator statistics - * @param st stat pool struct - */ -void rspamd_mempool_stat (rspamd_mempool_stat_t *st); - -/** - * Get optimal pool size based on page size for this system - * @return size of memory page in system - */ -gsize rspamd_mempool_suggest_size (void); - -/** - * Set memory pool variable - * @param pool memory pool object - * @param name name of variable - * @param gpointer value value of variable - * @param destructor pointer to function-destructor - */ -void rspamd_mempool_set_variable (rspamd_mempool_t *pool, const gchar *name, - gpointer value, rspamd_mempool_destruct_t destructor); - -/** - * Get memory pool variable - * @param pool memory pool object - * @param name name of variable - * @return NULL or pointer to variable data - */ -gpointer rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name); - - -#endif diff --git a/src/memcached.c b/src/memcached.c deleted file mode 100644 index e4c9be9d2..000000000 --- a/src/memcached.c +++ /dev/null @@ -1,831 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef _THREAD_SAFE -# include -#endif - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "memcached.h" - -#define CRLF "\r\n" -#define END_TRAILER "END" CRLF -#define STORED_TRAILER "STORED" CRLF -#define NOT_STORED_TRAILER "NOT STORED" CRLF -#define EXISTS_TRAILER "EXISTS" CRLF -#define DELETED_TRAILER "DELETED" CRLF -#define NOT_FOUND_TRAILER "NOT_FOUND" CRLF -#define CLIENT_ERROR_TRAILER "CLIENT_ERROR" -#define SERVER_ERROR_TRAILER "SERVER_ERROR" - -#define READ_BUFSIZ 1500 -#define MAX_RETRIES 3 - -/* Header for udp protocol */ -struct memc_udp_header { - guint16 req_id; - guint16 seq_num; - guint16 dg_sent; - guint16 unused; -}; - -static void socket_callback (gint fd, short what, void *arg); -static gint memc_parse_header (gchar *buf, size_t * len, gchar **end); - -/* - * Write to syslog if OPT_DEBUG is specified - */ -static void -memc_log (const memcached_ctx_t * ctx, gint line, const gchar *fmt, ...) -{ - va_list args; - if (ctx->options & MEMC_OPT_DEBUG) { - va_start (args, fmt); - g_log (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, "memc_debug(%d): host: %s, port: %d", line, inet_ntoa (ctx->addr), ntohs (ctx->port)); - g_logv (G_LOG_DOMAIN, G_LOG_LEVEL_DEBUG, fmt, args); - va_end (args); - } -} - -/* - * Callback for write command - */ -static void -write_handler (gint fd, short what, memcached_ctx_t * ctx) -{ - gchar read_buf[READ_BUFSIZ]; - gint retries; - ssize_t r; - struct memc_udp_header header; - struct iovec iov[4]; - - /* Write something to memcached */ - if (what == EV_WRITE) { - if (ctx->protocol == UDP_TEXT) { - /* Send udp header */ - bzero (&header, sizeof (header)); - header.dg_sent = htons (1); - header.req_id = ctx->count; - } - - r = snprintf (read_buf, READ_BUFSIZ, "%s %s 0 %d %zu" CRLF, ctx->cmd, ctx->param->key, ctx->param->expire, ctx->param->bufsize); - memc_log (ctx, __LINE__, "memc_write: send write request to memcached: %s", read_buf); - - if (ctx->protocol == UDP_TEXT) { - iov[0].iov_base = &header; - iov[0].iov_len = sizeof (struct memc_udp_header); - if (ctx->param->bufpos == 0) { - iov[1].iov_base = read_buf; - iov[1].iov_len = r; - } - else { - iov[1].iov_base = NULL; - iov[1].iov_len = 0; - } - iov[2].iov_base = ctx->param->buf + ctx->param->bufpos; - iov[2].iov_len = ctx->param->bufsize - ctx->param->bufpos; - iov[3].iov_base = CRLF; - iov[3].iov_len = sizeof (CRLF) - 1; - if (writev (ctx->sock, iov, 4) == -1) { - memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); - } - } - else { - iov[0].iov_base = read_buf; - iov[0].iov_len = r; - iov[1].iov_base = ctx->param->buf + ctx->param->bufpos; - iov[1].iov_len = ctx->param->bufsize - ctx->param->bufpos; - iov[2].iov_base = CRLF; - iov[2].iov_len = sizeof (CRLF) - 1; - if (writev (ctx->sock, iov, 3) == -1) { - memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); - } - } - event_del (&ctx->mem_ev); - event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, &ctx->timeout); - } - else if (what == EV_READ) { - /* Read header */ - retries = 0; - while (ctx->protocol == UDP_TEXT) { - iov[0].iov_base = &header; - iov[0].iov_len = sizeof (struct memc_udp_header); - iov[1].iov_base = read_buf; - iov[1].iov_len = READ_BUFSIZ; - if ((r = readv (ctx->sock, iov, 2)) == -1) { - event_del (&ctx->mem_ev); - ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); - } - if (header.req_id != ctx->count && retries < MAX_RETRIES) { - retries++; - /* Not our reply packet */ - continue; - } - break; - } - if (ctx->protocol != UDP_TEXT) { - r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); - } - memc_log (ctx, __LINE__, "memc_write: read reply from memcached: %s", read_buf); - /* Increment count */ - ctx->count++; - event_del (&ctx->mem_ev); - if (strncmp (read_buf, STORED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { - ctx->callback (ctx, OK, ctx->callback_data); - } - else if (strncmp (read_buf, NOT_STORED_TRAILER, sizeof (NOT_STORED_TRAILER) - 1) == 0) { - ctx->callback (ctx, CLIENT_ERROR, ctx->callback_data); - } - else if (strncmp (read_buf, EXISTS_TRAILER, sizeof (EXISTS_TRAILER) - 1) == 0) { - ctx->callback (ctx, EXISTS, ctx->callback_data); - } - else { - ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); - } - } - else if (what == EV_TIMEOUT) { - event_del (&ctx->mem_ev); - ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); - } -} - -/* - * Callback for read command - */ -static void -read_handler (gint fd, short what, memcached_ctx_t * ctx) -{ - gchar read_buf[READ_BUFSIZ]; - gchar *p; - ssize_t r; - size_t datalen; - struct memc_udp_header header; - struct iovec iov[2]; - gint retries = 0, t; - - if (what == EV_WRITE) { - /* Send command to memcached */ - if (ctx->protocol == UDP_TEXT) { - /* Send udp header */ - bzero (&header, sizeof (header)); - header.dg_sent = htons (1); - header.req_id = ctx->count; - } - - r = snprintf (read_buf, READ_BUFSIZ, "%s %s" CRLF, ctx->cmd, ctx->param->key); - memc_log (ctx, __LINE__, "memc_read: send read request to memcached: %s", read_buf); - if (ctx->protocol == UDP_TEXT) { - iov[0].iov_base = &header; - iov[0].iov_len = sizeof (struct memc_udp_header); - iov[1].iov_base = read_buf; - iov[1].iov_len = r; - if (writev (ctx->sock, iov, 2) == -1) { - memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); - } - } - else { - if (write (ctx->sock, read_buf, r) == -1) { - memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno)); - } - } - event_del (&ctx->mem_ev); - event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, &ctx->timeout); - } - else if (what == EV_READ) { - while (ctx->protocol == UDP_TEXT) { - iov[0].iov_base = &header; - iov[0].iov_len = sizeof (struct memc_udp_header); - iov[1].iov_base = read_buf; - iov[1].iov_len = READ_BUFSIZ; - if ((r = readv (ctx->sock, iov, 2)) == -1) { - event_del (&ctx->mem_ev); - ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); - return; - } - memc_log (ctx, __LINE__, "memc_read: got read_buf: %s", read_buf); - if (header.req_id != ctx->count && retries < MAX_RETRIES) { - memc_log (ctx, __LINE__, "memc_read: got wrong packet id: %d, %d was awaited", header.req_id, ctx->count); - retries++; - /* Not our reply packet */ - continue; - } - break; - } - if (ctx->protocol != UDP_TEXT) { - r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); - } - - if (r > 0) { - read_buf[r] = 0; - if (ctx->param->bufpos == 0) { - t = memc_parse_header (read_buf, &datalen, &p); - if (t < 0) { - event_del (&ctx->mem_ev); - memc_log (ctx, __LINE__, "memc_read: cannot parse memcached reply"); - ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); - return; - } - else if (t == 0) { - memc_log (ctx, __LINE__, "memc_read: record does not exists"); - event_del (&ctx->mem_ev); - ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); - return; - } - - if (datalen > ctx->param->bufsize) { - memc_log (ctx, __LINE__, "memc_read: user's buffer is too small: %zd, %zd required", ctx->param->bufsize, datalen); - event_del (&ctx->mem_ev); - ctx->callback (ctx, WRONG_LENGTH, ctx->callback_data); - return; - } - /* Check if we already have all data in buffer */ - if (r >= (ssize_t)(datalen + sizeof (END_TRAILER) + sizeof (CRLF) - 2)) { - /* Store all data in param's buffer */ - memcpy (ctx->param->buf + ctx->param->bufpos, p, datalen); - /* Increment count */ - ctx->count++; - event_del (&ctx->mem_ev); - ctx->callback (ctx, OK, ctx->callback_data); - return; - } - /* Subtract from sum parsed header's length */ - r -= p - read_buf; - } - else { - p = read_buf; - } - - if (strncmp (ctx->param->buf + ctx->param->bufpos + r - sizeof (END_TRAILER) - sizeof (CRLF) + 2, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { - r -= sizeof (END_TRAILER) - sizeof (CRLF) - 2; - memcpy (ctx->param->buf + ctx->param->bufpos, p, r); - event_del (&ctx->mem_ev); - ctx->callback (ctx, OK, ctx->callback_data); - return; - } - /* Store this part of data in param's buffer */ - memcpy (ctx->param->buf + ctx->param->bufpos, p, r); - ctx->param->bufpos += r; - } - else { - memc_log (ctx, __LINE__, "memc_read: read(v) failed: %d, %s", r, strerror (errno)); - event_del (&ctx->mem_ev); - ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); - return; - } - - ctx->count++; - } - else if (what == EV_TIMEOUT) { - event_del (&ctx->mem_ev); - ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); - } - -} - -/* - * Callback for delete command - */ -static void -delete_handler (gint fd, short what, memcached_ctx_t * ctx) -{ - gchar read_buf[READ_BUFSIZ]; - gint retries; - ssize_t r; - struct memc_udp_header header; - struct iovec iov[2]; - - /* Write something to memcached */ - if (what == EV_WRITE) { - if (ctx->protocol == UDP_TEXT) { - /* Send udp header */ - bzero (&header, sizeof (header)); - header.dg_sent = htons (1); - header.req_id = ctx->count; - } - r = snprintf (read_buf, READ_BUFSIZ, "delete %s" CRLF, ctx->param->key); - memc_log (ctx, __LINE__, "memc_delete: send delete request to memcached: %s", read_buf); - - if (ctx->protocol == UDP_TEXT) { - iov[0].iov_base = &header; - iov[0].iov_len = sizeof (struct memc_udp_header); - iov[1].iov_base = read_buf; - iov[1].iov_len = r; - ctx->param->bufpos = writev (ctx->sock, iov, 2); - if (ctx->param->bufpos == (size_t)-1) { - memc_log (ctx, __LINE__, "memc_write: writev failed: %s", strerror (errno)); - } - } - else { - if (write (ctx->sock, read_buf, r) == -1) { - memc_log (ctx, __LINE__, "memc_write: write failed: %s", strerror (errno)); - } - } - event_del (&ctx->mem_ev); - event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, &ctx->timeout); - } - else if (what == EV_READ) { - /* Read header */ - retries = 0; - while (ctx->protocol == UDP_TEXT) { - iov[0].iov_base = &header; - iov[0].iov_len = sizeof (struct memc_udp_header); - iov[1].iov_base = read_buf; - iov[1].iov_len = READ_BUFSIZ; - if ((r = readv (ctx->sock, iov, 2)) == -1) { - event_del (&ctx->mem_ev); - ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); - return; - } - if (header.req_id != ctx->count && retries < MAX_RETRIES) { - retries++; - /* Not our reply packet */ - continue; - } - break; - } - if (ctx->protocol != UDP_TEXT) { - r = read (ctx->sock, read_buf, READ_BUFSIZ - 1); - } - /* Increment count */ - ctx->count++; - event_del (&ctx->mem_ev); - if (strncmp (read_buf, DELETED_TRAILER, sizeof (STORED_TRAILER) - 1) == 0) { - ctx->callback (ctx, OK, ctx->callback_data); - } - else if (strncmp (read_buf, NOT_FOUND_TRAILER, sizeof (NOT_FOUND_TRAILER) - 1) == 0) { - ctx->callback (ctx, NOT_EXISTS, ctx->callback_data); - } - else { - ctx->callback (ctx, SERVER_ERROR, ctx->callback_data); - } - } - else if (what == EV_TIMEOUT) { - event_del (&ctx->mem_ev); - ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); - } -} - -/* - * Callback for our socket events - */ -static void -socket_callback (gint fd, short what, void *arg) -{ - memcached_ctx_t *ctx = (memcached_ctx_t *) arg; - - switch (ctx->op) { - case CMD_NULL: - /* Do nothing here */ - break; - case CMD_CONNECT: - /* We have write readiness after connect call, so reinit event */ - ctx->cmd = "connect"; - if (what == EV_WRITE) { - event_del (&ctx->mem_ev); - event_set (&ctx->mem_ev, ctx->sock, EV_READ | EV_PERSIST | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, NULL); - ctx->callback (ctx, OK, ctx->callback_data); - ctx->alive = 1; - } - else { - ctx->callback (ctx, SERVER_TIMEOUT, ctx->callback_data); - ctx->alive = 0; - } - break; - case CMD_WRITE: - write_handler (fd, what, ctx); - break; - case CMD_READ: - read_handler (fd, what, ctx); - break; - case CMD_DELETE: - delete_handler (fd, what, ctx); - break; - } -} - -/* - * Common callback function for memcached operations if no user's callback is specified - */ -static void -common_memc_callback (memcached_ctx_t * ctx, memc_error_t error, void *data) -{ - memc_log (ctx, __LINE__, "common_memc_callback: result of memc command '%s' is '%s'", ctx->cmd, memc_strerror (error)); -} - -/* - * Make socket for udp connection - */ -static gint -memc_make_udp_sock (memcached_ctx_t * ctx) -{ - struct sockaddr_in sc; - gint ofl; - - bzero (&sc, sizeof (struct sockaddr_in *)); - sc.sin_family = AF_INET; - sc.sin_port = ctx->port; - memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); - - ctx->sock = socket (PF_INET, SOCK_DGRAM, 0); - - if (ctx->sock == -1) { - memc_log (ctx, __LINE__, "memc_make_udp_sock: socket() failed: %s", strerror (errno)); - return -1; - } - - /* set nonblocking */ - ofl = fcntl (ctx->sock, F_GETFL, 0); - fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK); - - /* - * Call connect to set default destination for datagrams - * May not block - */ - ctx->op = CMD_CONNECT; - event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, NULL); - return connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in)); -} - -/* - * Make socket for tcp connection - */ -static gint -memc_make_tcp_sock (memcached_ctx_t * ctx) -{ - struct sockaddr_in sc; - gint ofl, r; - - bzero (&sc, sizeof (struct sockaddr_in *)); - sc.sin_family = AF_INET; - sc.sin_port = ctx->port; - memcpy (&sc.sin_addr, &ctx->addr, sizeof (struct in_addr)); - - ctx->sock = socket (PF_INET, SOCK_STREAM, 0); - - if (ctx->sock == -1) { - memc_log (ctx, __LINE__, "memc_make_tcp_sock: socket() failed: %s", strerror (errno)); - return -1; - } - - /* set nonblocking */ - ofl = fcntl (ctx->sock, F_GETFL, 0); - fcntl (ctx->sock, F_SETFL, ofl | O_NONBLOCK); - - if ((r = connect (ctx->sock, (struct sockaddr *)&sc, sizeof (struct sockaddr_in))) == -1) { - if (errno != EINPROGRESS) { - close (ctx->sock); - ctx->sock = -1; - memc_log (ctx, __LINE__, "memc_make_tcp_sock: connect() failed: %s", strerror (errno)); - return -1; - } - } - ctx->op = CMD_CONNECT; - event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, &ctx->timeout); - return 0; -} - -/* - * Parse VALUE reply from server and set len argument to value returned by memcached - */ -static gint -memc_parse_header (gchar *buf, size_t * len, gchar **end) -{ - gchar *p, *c; - gint i; - - /* VALUE []\r\n */ - c = strstr (buf, CRLF); - if (c == NULL) { - return -1; - } - *end = c + sizeof (CRLF) - 1; - - if (strncmp (buf, "VALUE ", sizeof ("VALUE ") - 1) == 0) { - p = buf + sizeof ("VALUE ") - 1; - - /* Read bytes value and ignore all other fields, such as flags and key */ - for (i = 0; i < 2; i++) { - while (p++ < c && *p != ' '); - - if (p > c) { - return -1; - } - } - *len = strtoul (p, &c, 10); - return 1; - } - /* If value not found memcached return just END\r\n , in this case return 0 */ - else if (strncmp (buf, END_TRAILER, sizeof (END_TRAILER) - 1) == 0) { - return 0; - } - - return -1; -} - - -/* - * Common read command handler for memcached - */ -memc_error_t -memc_read (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param) -{ - ctx->cmd = cmd; - ctx->op = CMD_READ; - ctx->param = param; - event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, &ctx->timeout); - - return OK; -} - -/* - * Common write command handler for memcached - */ -memc_error_t -memc_write (memcached_ctx_t * ctx, const gchar *cmd, memcached_param_t * param, gint expire) -{ - ctx->cmd = cmd; - ctx->op = CMD_WRITE; - ctx->param = param; - param->expire = expire; - event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, &ctx->timeout); - - return OK; -} - -/* - * Delete command handler - */ -memc_error_t -memc_delete (memcached_ctx_t * ctx, memcached_param_t * param) -{ - ctx->cmd = "delete"; - ctx->op = CMD_DELETE; - ctx->param = param; - event_set (&ctx->mem_ev, ctx->sock, EV_WRITE | EV_TIMEOUT, socket_callback, (void *)ctx); - event_add (&ctx->mem_ev, &ctx->timeout); - - return OK; -} - -/* - * Write handler for memcached mirroring - * writing is done to each memcached server - */ -memc_error_t -memc_write_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param, gint expire) -{ - memc_error_t r, result = OK; - - while (memcached_num--) { - if (ctx[memcached_num].alive == 1) { - r = memc_write (&ctx[memcached_num], cmd, param, expire); - if (r != OK) { - memc_log (&ctx[memcached_num], __LINE__, "memc_write_mirror: cannot write to mirror server: %s", memc_strerror (r)); - result = r; - ctx[memcached_num].alive = 0; - } - } - } - - return result; -} - -/* - * Read handler for memcached mirroring - * reading is done from first active memcached server - */ -memc_error_t -memc_read_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param) -{ - memc_error_t r, result = OK; - - while (memcached_num--) { - if (ctx[memcached_num].alive == 1) { - r = memc_read (&ctx[memcached_num], cmd, param); - if (r != OK) { - result = r; - if (r != NOT_EXISTS) { - ctx[memcached_num].alive = 0; - memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: cannot write read from mirror server: %s", memc_strerror (r)); - } - else { - memc_log (&ctx[memcached_num], __LINE__, "memc_read_mirror: record not exists", memc_strerror (r)); - } - } - else { - break; - } - } - } - - return result; -} - -/* - * Delete handler for memcached mirroring - * deleting is done for each active memcached server - */ -memc_error_t -memc_delete_mirror (memcached_ctx_t * ctx, size_t memcached_num, const gchar *cmd, memcached_param_t * param) -{ - memc_error_t r, result = OK; - - while (memcached_num--) { - if (ctx[memcached_num].alive == 1) { - r = memc_delete (&ctx[memcached_num], param); - if (r != OK) { - result = r; - if (r != NOT_EXISTS) { - ctx[memcached_num].alive = 0; - memc_log (&ctx[memcached_num], __LINE__, "memc_delete_mirror: cannot delete from mirror server: %s", memc_strerror (r)); - } - } - } - } - - return result; -} - - -/* - * Initialize memcached context for specified protocol - */ -gint -memc_init_ctx (memcached_ctx_t * ctx) -{ - if (ctx == NULL) { - return -1; - } - - ctx->count = 0; - ctx->alive = 0; - ctx->op = CMD_NULL; - /* Set default callback */ - if (ctx->callback == NULL) { - ctx->callback = common_memc_callback; - } - - switch (ctx->protocol) { - case UDP_TEXT: - return memc_make_udp_sock (ctx); - break; - case TCP_TEXT: - return memc_make_tcp_sock (ctx); - break; - /* Not implemented */ - case UDP_BIN: - case TCP_BIN: - default: - return -1; - } -} - -/* - * Mirror init - */ -gint -memc_init_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num) -{ - gint r, result = -1; - while (memcached_num--) { - if (ctx[memcached_num].alive == 1) { - r = memc_init_ctx (&ctx[memcached_num]); - if (r == -1) { - ctx[memcached_num].alive = 0; - memc_log (&ctx[memcached_num], __LINE__, "memc_init_ctx_mirror: cannot connect to server"); - } - else { - result = 1; - } - } - } - - return result; -} - -/* - * Close context connection - */ -gint -memc_close_ctx (memcached_ctx_t * ctx) -{ - if (ctx != NULL && ctx->sock != -1) { - event_del (&ctx->mem_ev); - return close (ctx->sock); - } - - return -1; -} - -/* - * Mirror close - */ -gint -memc_close_ctx_mirror (memcached_ctx_t * ctx, size_t memcached_num) -{ - gint r = 0; - while (memcached_num--) { - if (ctx[memcached_num].alive == 1) { - r = memc_close_ctx (&ctx[memcached_num]); - if (r == -1) { - memc_log (&ctx[memcached_num], __LINE__, "memc_close_ctx_mirror: cannot close connection to server properly"); - ctx[memcached_num].alive = 0; - } - } - } - - return r; -} - - -const gchar * -memc_strerror (memc_error_t err) -{ - const gchar *p; - - switch (err) { - case OK: - p = "Ok"; - break; - case BAD_COMMAND: - p = "Bad command"; - break; - case CLIENT_ERROR: - p = "Client error"; - break; - case SERVER_ERROR: - p = "Server error"; - break; - case SERVER_TIMEOUT: - p = "Server timeout"; - break; - case NOT_EXISTS: - p = "Key not found"; - break; - case EXISTS: - p = "Key already exists"; - break; - case WRONG_LENGTH: - p = "Wrong result length"; - break; - default: - p = "Unknown error"; - break; - } - - return p; -} - -/* - * vi:ts=4 - */ diff --git a/src/memcached.h b/src/memcached.h deleted file mode 100644 index 098e26eea..000000000 --- a/src/memcached.h +++ /dev/null @@ -1,142 +0,0 @@ -#ifndef MEMCACHED_H -#define MEMCACHED_H - -#include -#include -#include -#include - -#define MAXKEYLEN 250 - -#define MEMC_OPT_DEBUG 0x1 - -struct event; - -typedef enum memc_error { - OK, - BAD_COMMAND, - CLIENT_ERROR, - SERVER_ERROR, - SERVER_TIMEOUT, - NOT_EXISTS, - EXISTS, - WRONG_LENGTH -} memc_error_t; - -/* XXX: Only UDP_TEXT is supported at present */ -typedef enum memc_proto { - UDP_TEXT, - TCP_TEXT, - UDP_BIN, - TCP_BIN -} memc_proto_t; - -typedef enum memc_op { - CMD_NULL, - CMD_CONNECT, - CMD_READ, - CMD_WRITE, - CMD_DELETE, -} memc_opt_t; - -typedef struct memcached_param_s { - gchar key[MAXKEYLEN]; - u_char *buf; - size_t bufsize; - size_t bufpos; - gint expire; -} memcached_param_t; - - -/* Port must be in network byte order */ -typedef struct memcached_ctx_s { - memc_proto_t protocol; - struct in_addr addr; - guint16 port; - gint sock; - struct timeval timeout; - /* Counter that is used for memcached operations in network byte order */ - guint16 count; - /* Flag that signalize that this memcached is alive */ - short alive; - /* Options that can be specified for memcached connection */ - short options; - /* Current operation */ - memc_opt_t op; - /* Current command */ - const gchar *cmd; - /* Current param */ - memcached_param_t *param; - /* Callback for current operation */ - void (*callback) (struct memcached_ctx_s *ctx, memc_error_t error, void *data); - /* Data for callback function */ - void *callback_data; - /* Event structure */ - struct event mem_ev; -} memcached_ctx_t; - -typedef void (*memcached_callback_t) (memcached_ctx_t *ctx, memc_error_t error, void *data); - -/* - * Initialize connection to memcached server: - * addr, port and timeout fields in ctx must be filled with valid values - * Return: - * 0 - success - * -1 - error (error is stored in errno) - */ -gint memc_init_ctx (memcached_ctx_t *ctx); -gint memc_init_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); -/* - * Memcached function for getting, setting, adding values to memcached server - * ctx - valid memcached context - * key - key to extract (max 250 characters as it specified in memcached API) - * buf, elemsize, nelem - allocated buffer of length nelem structures each of elemsize - * that would contain extracted data (NOT NULL TERMINATED) - * Return: - * memc_error_t - * nelem is changed according to actual number of extracted data - * - * "set" means "store this data". - * - * "add" means "store this data, but only if the server *doesn't* already - * hold data for this key". - - * "replace" means "store this data, but only if the server *does* - * already hold data for this key". - - * "append" means "add this data to an existing key after existing data". - - * "prepend" means "add this data to an existing key before existing data". - */ -#define memc_get(ctx, param) memc_read(ctx, "get", param) -#define memc_set(ctx, param, expire) memc_write(ctx, "set", param, expire) -#define memc_add(ctx, param, expire) memc_write(ctx, "add", param, expire) -#define memc_replace(ctx, param, expire) memc_write(ctx, "replace", param, expire) -#define memc_append(ctx, param, expire) memc_write(ctx, "append", param, expire) -#define memc_prepend(ctx, param, expire) memc_write(ctx, "prepend", param, expire) - -/* Functions that works with mirror of memcached servers */ -#define memc_get_mirror(ctx, num, param) memc_read_mirror(ctx, num, "get", param) -#define memc_set_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "set", param, expire) -#define memc_add_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "add", param, expire) -#define memc_replace_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "replace", param, expire) -#define memc_append_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "append", param, expire) -#define memc_prepend_mirror(ctx, num, param, expire) memc_write_mirror(ctx, num, "prepend", param, expire) - - -memc_error_t memc_read (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param); -memc_error_t memc_write (memcached_ctx_t *ctx, const gchar *cmd, memcached_param_t *param, gint expire); -memc_error_t memc_delete (memcached_ctx_t *ctx, memcached_param_t *params); - -memc_error_t memc_write_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param, gint expire); -memc_error_t memc_read_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param); -memc_error_t memc_delete_mirror (memcached_ctx_t *ctx, size_t memcached_num, const gchar *cmd, memcached_param_t *param); - -/* Return symbolic name of memcached error*/ -const gchar * memc_strerror (memc_error_t err); - -/* Destroy socket from ctx */ -gint memc_close_ctx (memcached_ctx_t *ctx); -gint memc_close_ctx_mirror (memcached_ctx_t *ctx, size_t memcached_num); - -#endif diff --git a/src/message.c b/src/message.c deleted file mode 100644 index 4567869e9..000000000 --- a/src/message.c +++ /dev/null @@ -1,1764 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "util.h" -#include "main.h" -#include "message.h" -#include "cfg_file.h" -#include "html.h" -#include "images.h" - -#define RECURSION_LIMIT 30 -#define UTF8_CHARSET "UTF-8" - -GByteArray * -strip_html_tags (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, GByteArray * src, gint *stateptr) -{ - uint8_t *p, *rp, *tbegin = NULL, *end, c, lc; - gint br, i = 0, depth = 0, in_q = 0; - gint state = 0; - GByteArray *buf; - GNode *level_ptr = NULL; - gboolean erase = FALSE; - - if (stateptr) - state = *stateptr; - - buf = g_byte_array_sized_new (src->len); - g_byte_array_append (buf, src->data, src->len); - - c = *src->data; - lc = '\0'; - p = src->data; - rp = buf->data; - end = src->data + src->len; - br = 0; - - while (i < (gint)src->len) { - switch (c) { - case '\0': - break; - case '<': - if (g_ascii_isspace (*(p + 1))) { - goto reg_char; - } - if (state == 0) { - lc = '<'; - tbegin = p + 1; - state = 1; - } - else if (state == 1) { - /* Opening bracket without closing one */ - p --; - while (g_ascii_isspace (*p) && p > src->data) { - p --; - } - p ++; - goto unbreak_tag; - } - break; - - case '(': - if (state == 2) { - if (lc != '"' && lc != '\'') { - lc = '('; - br++; - } - } - else if (state == 0 && !erase) { - *(rp++) = c; - } - break; - - case ')': - if (state == 2) { - if (lc != '"' && lc != '\'') { - lc = ')'; - br--; - } - } - else if (state == 0 && !erase) { - *(rp++) = c; - } - break; - - case '>': - if (depth) { - depth--; - break; - } - - if (in_q) { - break; - } -unbreak_tag: - switch (state) { - case 1: /* HTML/XML */ - lc = '>'; - in_q = state = 0; - erase = !add_html_node (task, pool, part, tbegin, p - tbegin, end - tbegin, &level_ptr); - break; - - case 2: /* PHP */ - if (!br && lc != '\"' && *(p - 1) == '?') { - in_q = state = 0; - } - break; - - case 3: - in_q = state = 0; - break; - - case 4: /* JavaScript/CSS/etc... */ - if (p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '-') { - in_q = state = 0; - } - break; - - default: - if (!erase) { - *(rp++) = c; - } - break; - } - break; - - case '"': - case '\'': - if (state == 2 && *(p - 1) != '\\') { - if (lc == c) { - lc = '\0'; - } - else if (lc != '\\') { - lc = c; - } - } - else if (state == 0 && !erase) { - *(rp++) = c; - } - if (state && p != src->data && *(p - 1) != '\\' && (!in_q || *p == in_q)) { - if (in_q) { - in_q = 0; - } - else { - in_q = *p; - } - } - break; - - case '!': - /* JavaScript & Other HTML scripting languages */ - if (state == 1 && *(p - 1) == '<') { - state = 3; - lc = c; - } - else { - if (state == 0 && !erase) { - *(rp++) = c; - } - } - break; - - case '-': - if (state == 3 && p >= src->data + 2 && *(p - 1) == '-' && *(p - 2) == '!') { - state = 4; - } - else { - goto reg_char; - } - break; - - case '?': - - if (state == 1 && *(p - 1) == '<') { - br = 0; - state = 2; - break; - } - - case 'E': - case 'e': - /* !DOCTYPE exception */ - if (state == 3 && p > src->data + 6 - && g_ascii_tolower (*(p - 1)) == 'p' - && g_ascii_tolower (*(p - 2)) == 'y' - && g_ascii_tolower (*(p - 3)) == 't' && g_ascii_tolower (*(p - 4)) == 'c' && g_ascii_tolower (*(p - 5)) == 'o' && g_ascii_tolower (*(p - 6)) == 'd') { - state = 1; - break; - } - /* fall-through */ - - case 'l': - - /* swm: If we encounter ' src->data + 2 && *(p - 1) == 'm' && *(p - 2) == 'x') { - state = 1; - break; - } - - /* fall-through */ - default: - reg_char: - if (state == 0 && !erase) { - *(rp++) = c; - } - break; - } - i++; - if (i < (gint)src->len) { - c = *(++p); - } - } - if (rp < buf->data + src->len) { - *rp = '\0'; - g_byte_array_set_size (buf, rp - buf->data); - } - - /* Check tag balancing */ - if (level_ptr && level_ptr->data != NULL) { - part->is_balanced = FALSE; - } - - if (stateptr) { - *stateptr = state; - } - - return buf; -} - -static void -parse_qmail_recv (rspamd_mempool_t * pool, gchar *line, struct received_header *r) -{ - gchar *s, *p, t; - - /* We are interested only with received from network headers */ - if ((p = strstr (line, "from network")) == NULL) { - r->is_error = 2; - return; - } - - p += sizeof ("from network") - 1; - while (g_ascii_isspace (*p) || *p == '[') { - p++; - } - /* format is ip/host */ - s = p; - if (*p) { - while (g_ascii_isdigit (*++p) || *p == '.'); - if (*p != '/') { - r->is_error = 1; - return; - } - else { - *p = '\0'; - r->real_ip = rspamd_mempool_strdup (pool, s); - *p = '/'; - /* Now try to parse hostname */ - s = ++p; - while (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { - p++; - } - t = *p; - *p = '\0'; - r->real_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - } - } -} - -static void -parse_recv_header (rspamd_mempool_t * pool, gchar *line, struct received_header *r) -{ - gchar *p, *s, t, **res = NULL; - enum { - RSPAMD_RECV_STATE_INIT = 0, - RSPAMD_RECV_STATE_FROM, - RSPAMD_RECV_STATE_IP_BLOCK, - RSPAMD_RECV_STATE_BRACES_BLOCK, - RSPAMD_RECV_STATE_BY_BLOCK, - RSPAMD_RECV_STATE_PARSE_IP, - RSPAMD_RECV_STATE_SKIP_SPACES, - RSPAMD_RECV_STATE_ERROR - } state = RSPAMD_RECV_STATE_INIT, - next_state = RSPAMD_RECV_STATE_INIT; - gboolean is_exim = FALSE; - - g_strstrip (line); - p = line; - s = line; - - while (*p) { - switch (state) { - /* Initial state, search for from */ - case RSPAMD_RECV_STATE_INIT: - if (*p == 'f' || *p == 'F') { - if (g_ascii_tolower (*++p) == 'r' && g_ascii_tolower (*++p) == 'o' && g_ascii_tolower (*++p) == 'm') { - p++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_FROM; - } - } - else if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { - state = RSPAMD_RECV_STATE_IP_BLOCK; - } - else { - /* This can be qmail header, parse it separately */ - parse_qmail_recv (pool, line, r); - return; - } - break; - /* Read hostname */ - case RSPAMD_RECV_STATE_FROM: - if (*p == '[') { - /* This should be IP address */ - res = &r->from_ip; - state = RSPAMD_RECV_STATE_PARSE_IP; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - s = ++p; - } - else if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || *p == '_') { - p++; - } - else { - t = *p; - *p = '\0'; - r->from_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - break; - /* Try to extract additional info */ - case RSPAMD_RECV_STATE_IP_BLOCK: - /* Try to extract ip or () info or by */ - if (g_ascii_tolower (*p) == 'b' && g_ascii_tolower (*(p + 1)) == 'y') { - p += 2; - /* Skip spaces after by */ - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BY_BLOCK; - } - else if (*p == '(') { - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - p++; - } - else if (*p == '[') { - /* Got ip before '(' so extract it */ - s = ++p; - res = &r->from_ip; - state = RSPAMD_RECV_STATE_PARSE_IP; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - else { - p++; - } - break; - /* We are in () block. Here can be found real hostname and real ip, this is written by some MTA */ - case RSPAMD_RECV_STATE_BRACES_BLOCK: - /* End of block */ - if (g_ascii_isalnum (*p) || *p == '.' || *p == '-' || - *p == '_' || *p == ':') { - p++; - } - else if (*p == '[') { - s = ++p; - state = RSPAMD_RECV_STATE_PARSE_IP; - res = &r->real_ip; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else { - if (p > s) { - /* Got some real hostname */ - /* check whether it is helo or p is not space symbol */ - if (!g_ascii_isspace (*p) || *(p + 1) != '[') { - /* Exim style ([ip]:port helo=hostname) */ - if (*s == ':' && (g_ascii_isspace (*p) || *p == ')')) { - /* Ip ending */ - is_exim = TRUE; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else if (p - s == 4 && memcmp (s, "helo=", 5) == 0) { - p ++; - is_exim = TRUE; - if (r->real_hostname == NULL && r->from_hostname != NULL) { - r->real_hostname = r->from_hostname; - } - s = p; - while (*p != ')' && !g_ascii_isspace (*p) && *p != '\0') { - p ++; - } - if (p > s) { - r->from_hostname = rspamd_mempool_alloc (pool, p - s + 1); - rspamd_strlcpy (r->from_hostname, s, p - s + 1); - } - } - else if (p - s == 4 && memcmp (s, "port=", 5) == 0) { - p ++; - is_exim = TRUE; - while (g_ascii_isdigit (*p)) { - p ++; - } - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else if (*p == '=' && is_exim) { - /* Just skip unknown pairs */ - p ++; - while (!g_ascii_isspace (*p) && *p != ')' && *p != '\0') { - p ++; - } - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - } - else { - /* skip all */ - while (*p++ != ')' && *p != '\0'); - state = RSPAMD_RECV_STATE_IP_BLOCK; - } - } - else { - /* Postfix style (hostname [ip]) */ - t = *p; - *p = '\0'; - r->real_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - /* Now parse ip */ - p += 2; - s = p; - res = &r->real_ip; - state = RSPAMD_RECV_STATE_PARSE_IP; - next_state = RSPAMD_RECV_STATE_BRACES_BLOCK; - continue; - } - if (*p == ')') { - p ++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - } - else if (*p == ')') { - p ++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - next_state = RSPAMD_RECV_STATE_IP_BLOCK; - } - else { - r->is_error = 1; - return; - } - } - break; - /* Got by word */ - case RSPAMD_RECV_STATE_BY_BLOCK: - /* Here can be only hostname */ - if ((g_ascii_isalnum (*p) || *p == '.' || *p == '-' - || *p == '_') && p[1] != '\0') { - p++; - } - else { - /* We got something like hostname */ - if (p[1] != '\0') { - t = *p; - *p = '\0'; - r->by_hostname = rspamd_mempool_strdup (pool, s); - *p = t; - } - else { - r->by_hostname = rspamd_mempool_strdup (pool, s); - } - /* Now end of parsing */ - if (is_exim) { - /* Adjust for exim received */ - if (r->real_ip == NULL && r->from_ip != NULL) { - r->real_ip = r->from_ip; - } - else if (r->from_ip == NULL && r->real_ip != NULL) { - r->from_ip = r->real_ip; - if (r->real_hostname == NULL && r->from_hostname != NULL) { - r->real_hostname = r->from_hostname; - } - } - } - return; - } - break; - - /* Extract ip */ - case RSPAMD_RECV_STATE_PARSE_IP: - while (g_ascii_isxdigit (*p) || *p == '.' || *p == ':') { - p ++; - } - if (*p != ']') { - /* Not an ip in fact */ - state = RSPAMD_RECV_STATE_SKIP_SPACES; - p++; - } - else { - *p = '\0'; - *res = rspamd_mempool_strdup (pool, s); - *p = ']'; - p++; - state = RSPAMD_RECV_STATE_SKIP_SPACES; - } - break; - - /* Skip spaces */ - case RSPAMD_RECV_STATE_SKIP_SPACES: - if (!g_ascii_isspace (*p)) { - state = next_state; - s = p; - } - else { - p++; - } - break; - default: - r->is_error = 1; - return; - break; - } - } - - r->is_error = 1; - return; -} - -/* Convert raw headers to a list of struct raw_header * */ -static void -process_raw_headers (struct rspamd_task *task) -{ - struct raw_header *new = NULL, *lp; - gchar *p, *c, *tmp, *tp; - gint state = 0, l, next_state = 100, err_state = 100, t_state; - gboolean valid_folding = FALSE; - - p = task->raw_headers_str; - c = p; - while (*p) { - /* FSM for processing headers */ - switch (state) { - case 0: - /* Begin processing headers */ - if (!g_ascii_isalpha (*p)) { - /* We have some garbage at the beginning of headers, skip this line */ - state = 100; - next_state = 0; - } - else { - state = 1; - c = p; - } - break; - case 1: - /* We got something like header's name */ - if (*p == ':') { - new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct raw_header)); - l = p - c; - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_strlcpy (tmp, c, l + 1); - new->name = tmp; - new->empty_separator = TRUE; - p ++; - state = 2; - c = p; - } - else if (g_ascii_isspace (*p)) { - /* Not header but some garbage */ - state = 100; - next_state = 0; - } - else { - p ++; - } - break; - case 2: - /* We got header's name, so skip any \t or spaces */ - if (*p == '\t') { - new->tab_separated = TRUE; - new->empty_separator = FALSE; - p ++; - } - else if (*p == ' ') { - new->empty_separator = FALSE; - p ++; - } - else if (*p == '\n' || *p == '\r') { - /* Process folding */ - state = 99; - l = p - c; - if (l > 0) { - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_strlcpy (tmp, c, l + 1); - new->separator = tmp; - } - next_state = 3; - err_state = 5; - c = p; - } - else { - /* Process value */ - l = p - c; - if (l >= 0) { - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - rspamd_strlcpy (tmp, c, l + 1); - new->separator = tmp; - } - c = p; - state = 3; - } - break; - case 3: - if (*p == '\r' || *p == '\n') { - /* Hold folding */ - state = 99; - next_state = 3; - err_state = 4; - } - else if (*(p + 1) == '\0') { - state = 4; - } - else { - p ++; - } - break; - case 4: - /* Copy header's value */ - l = p - c; - tmp = rspamd_mempool_alloc (task->task_pool, l + 1); - tp = tmp; - t_state = 0; - while (l --) { - if (t_state == 0) { - /* Before folding */ - if (*c == '\n' || *c == '\r') { - t_state = 1; - c ++; - *tp ++ = ' '; - } - else { - *tp ++ = *c ++; - } - } - else if (t_state == 1) { - /* Inside folding */ - if (g_ascii_isspace (*c)) { - c++; - } - else { - t_state = 0; - *tp ++ = *c ++; - } - } - } - /* Strip last space that can be added by \r\n parsing */ - if (*(tp - 1) == ' ') { - tp --; - } - *tp = '\0'; - new->value = tmp; - new->next = NULL; - if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { - while (lp->next != NULL) { - lp = lp->next; - } - lp->next = new; - } - else { - g_hash_table_insert (task->raw_headers, new->name, new); - } - debug_task ("add raw header %s: %s", new->name, new->value); - state = 0; - break; - case 5: - /* Header has only name, no value */ - new->next = NULL; - new->value = ""; - if ((lp = g_hash_table_lookup (task->raw_headers, new->name)) != NULL) { - while (lp->next != NULL) { - lp = lp->next; - } - lp->next = new; - } - else { - g_hash_table_insert (task->raw_headers, new->name, new); - } - state = 0; - debug_task ("add raw header %s: %s", new->name, new->value); - break; - case 99: - /* Folding state */ - if (*(p + 1) == '\0') { - state = err_state; - } - else { - if (*p == '\r' || *p == '\n') { - p ++; - valid_folding = FALSE; - } - else if (*p == '\t' || *p == ' ') { - /* Valid folding */ - p ++; - valid_folding = TRUE; - } - else { - if (valid_folding) { - debug_task ("go to state: %d->%d", state, next_state); - state = next_state; - } - else { - /* Fall back */ - debug_task ("go to state: %d->%d", state, err_state); - state = err_state; - } - } - } - break; - case 100: - /* Fail state, skip line */ - if (*p == '\r') { - if (*(p + 1) == '\n') { - p ++; - } - p ++; - state = next_state; - } - else if (*p == '\n') { - if (*(p + 1) == '\r') { - p ++; - } - p ++; - state = next_state; - } - else if (*(p + 1) == '\0') { - state = next_state; - p ++; - } - else { - p ++; - } - break; - } - } -} - -static void -free_byte_array_callback (void *pointer) -{ - GByteArray *arr = (GByteArray *) pointer; - g_byte_array_free (arr, TRUE); -} - -static GByteArray * -convert_text_to_utf (struct rspamd_task *task, GByteArray * part_content, GMimeContentType * type, struct mime_text_part *text_part) -{ - GError *err = NULL; - gsize read_bytes, write_bytes; - const gchar *charset; - gchar *res_str; - GByteArray *result_array; - - if (task->cfg->raw_mode) { - text_part->is_raw = TRUE; - return part_content; - } - - if ((charset = g_mime_content_type_get_parameter (type, "charset")) == NULL) { - text_part->is_raw = TRUE; - return part_content; - } - - if (g_ascii_strcasecmp (charset, "utf-8") == 0 || g_ascii_strcasecmp (charset, "utf8") == 0) { - if (g_utf8_validate (part_content->data, part_content->len, NULL)) { - text_part->is_raw = FALSE; - text_part->is_utf = TRUE; - return part_content; - } - else { - msg_info ("<%s>: contains invalid utf8 characters, assume it as raw", task->message_id); - text_part->is_raw = TRUE; - return part_content; - } - } - - res_str = g_convert_with_fallback (part_content->data, part_content->len, UTF8_CHARSET, charset, NULL, &read_bytes, &write_bytes, &err); - if (res_str == NULL) { - msg_warn ("<%s>: cannot convert from %s to utf8: %s", task->message_id, charset, err ? err->message : "unknown problem"); - text_part->is_raw = TRUE; - return part_content; - } - - result_array = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); - result_array->data = res_str; - result_array->len = write_bytes; - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, res_str); - text_part->is_raw = FALSE; - text_part->is_utf = TRUE; - - return result_array; -} - -static void -process_text_part (struct rspamd_task *task, GByteArray *part_content, GMimeContentType *type, - GMimeObject *part, GMimeObject *parent, gboolean is_empty) -{ - struct mime_text_part *text_part; - const gchar *cd; - - /* Skip attachements */ -#ifndef GMIME24 - cd = g_mime_part_get_content_disposition (GMIME_PART (part)); - if (cd && g_ascii_strcasecmp (cd, "attachment") == 0 && !task->cfg->check_text_attachements) { - debug_task ("skip attachments for checking as text parts"); - return; - } -#else - cd = g_mime_object_get_disposition (GMIME_OBJECT (part)); - if (cd && g_ascii_strcasecmp (cd, GMIME_DISPOSITION_ATTACHMENT) == 0 && !task->cfg->check_text_attachements) { - debug_task ("skip attachments for checking as text parts"); - return; - } -#endif - - if (g_mime_content_type_is_type (type, "text", "html") || g_mime_content_type_is_type (type, "text", "xhtml")) { - debug_task ("got urls from text/html part"); - - text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); - text_part->is_html = TRUE; - if (is_empty) { - text_part->is_empty = TRUE; - text_part->orig = NULL; - text_part->content = NULL; - task->text_parts = g_list_prepend (task->text_parts, text_part); - return; - } - text_part->orig = convert_text_to_utf (task, part_content, type, text_part); - text_part->is_balanced = TRUE; - text_part->html_nodes = NULL; - text_part->parent = parent; - - text_part->content = strip_html_tags (task, task->task_pool, text_part, text_part->orig, NULL); - - if (text_part->html_nodes == NULL) { - url_parse_text (task->task_pool, task, text_part, FALSE); - } - else { - decode_entitles (text_part->content->data, &text_part->content->len); - url_parse_text (task->task_pool, task, text_part, FALSE); -#if 0 - url_parse_text (task->task_pool, task, text_part, TRUE); -#endif - } - - fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) free_byte_array_callback, text_part->content); - task->text_parts = g_list_prepend (task->text_parts, text_part); - } - else if (g_mime_content_type_is_type (type, "text", "*")) { - debug_task ("got urls from text/plain part"); - - text_part = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct mime_text_part)); - text_part->is_html = FALSE; - text_part->parent = parent; - if (is_empty) { - text_part->is_empty = TRUE; - text_part->orig = NULL; - text_part->content = NULL; - task->text_parts = g_list_prepend (task->text_parts, text_part); - return; - } - text_part->orig = convert_text_to_utf (task, part_content, type, text_part); - text_part->content = text_part->orig; - url_parse_text (task->task_pool, task, text_part, FALSE); - fuzzy_init_part (text_part, task->task_pool, task->cfg->max_diff); - task->text_parts = g_list_prepend (task->text_parts, text_part); - } -} - -#ifdef GMIME24 -static void -mime_foreach_callback (GMimeObject * parent, GMimeObject * part, gpointer user_data) -#else -static void -mime_foreach_callback (GMimeObject * part, gpointer user_data) -#endif -{ - struct rspamd_task *task = (struct rspamd_task *)user_data; - struct mime_part *mime_part; - GMimeContentType *type; - GMimeDataWrapper *wrapper; - GMimeStream *part_stream; - GByteArray *part_content; - - task->parts_count++; - - /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ - - /* find out what class 'part' is... */ - if (GMIME_IS_MESSAGE_PART (part)) { - /* message/rfc822 or message/news */ - GMimeMessage *message; - - /* g_mime_message_foreach_part() won't descend into - child message parts, so if we want to count any - subparts of this child message, we'll have to call - g_mime_message_foreach_part() again here. */ - - message = g_mime_message_part_get_message ((GMimeMessagePart *) part); - if (task->parser_recursion++ < RECURSION_LIMIT) { -#ifdef GMIME24 - g_mime_message_foreach (message, mime_foreach_callback, task); -#else - g_mime_message_foreach_part (message, mime_foreach_callback, task); -#endif - } - else { - msg_err ("endless recursion detected: %d", task->parser_recursion); - return; - } -#ifndef GMIME24 - g_object_unref (message); -#endif - } - else if (GMIME_IS_MESSAGE_PARTIAL (part)) { - /* message/partial */ - - /* this is an incomplete message part, probably a - large message that the sender has broken into - smaller parts and is sending us bit by bit. we - could save some info about it so that we could - piece this back together again once we get all the - parts? */ - } - else if (GMIME_IS_MULTIPART (part)) { - /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ - task->parser_parent_part = part; -#ifndef GMIME24 - debug_task ("detected multipart part"); - /* we'll get to finding out if this is a signed/encrypted multipart later... */ - if (task->parser_recursion++ < RECURSION_LIMIT) { - g_mime_multipart_foreach ((GMimeMultipart *) part, mime_foreach_callback, task); - } - else { - msg_err ("endless recursion detected: %d", task->parser_recursion); - return; - } -#endif - } - else if (GMIME_IS_PART (part)) { - /* a normal leaf part, could be text/plain or image/jpeg etc */ -#ifdef GMIME24 - type = (GMimeContentType *) g_mime_object_get_content_type (GMIME_OBJECT (part)); -#else - type = (GMimeContentType *) g_mime_part_get_content_type (GMIME_PART (part)); -#endif - if (type == NULL) { - msg_warn ("type of part is unknown, assume text/plain"); - type = g_mime_content_type_new ("text", "plain"); -#ifdef GMIME24 - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, type); -#else - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_mime_content_type_destroy, type); -#endif - } - wrapper = g_mime_part_get_content_object (GMIME_PART (part)); -#ifdef GMIME24 - if (wrapper != NULL && GMIME_IS_DATA_WRAPPER (wrapper)) { -#else - if (wrapper != NULL) { -#endif - part_stream = g_mime_stream_mem_new (); - if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { - g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE); - part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); - g_object_unref (part_stream); - mime_part = rspamd_mempool_alloc (task->task_pool, sizeof (struct mime_part)); - mime_part->type = type; - mime_part->content = part_content; - mime_part->parent = task->parser_parent_part; - mime_part->filename = g_mime_part_get_filename (GMIME_PART (part)); - debug_task ("found part with content-type: %s/%s", type->type, type->subtype); - task->parts = g_list_prepend (task->parts, mime_part); - /* Skip empty parts */ - process_text_part (task, part_content, type, part, task->parser_parent_part, (part_content->len <= 0)); - } - else { - msg_warn ("write to stream failed: %d, %s", errno, strerror (errno)); - } -#ifndef GMIME24 - g_object_unref (wrapper); -#endif - } - else { - msg_warn ("cannot get wrapper for mime part, type of part: %s/%s", type->type, type->subtype); - } - } - else { - g_assert_not_reached (); - } -} - -static void -destroy_message (void *pointer) -{ - GMimeMessage *msg = pointer; - - msg_debug ("freeing pointer %p", msg); - g_object_unref (msg); -} - -gint -process_message (struct rspamd_task *task) -{ - GMimeMessage *message; - GMimeParser *parser; - GMimeStream *stream; - GByteArray *tmp; - GList *first, *cur; - GMimePart *part; - GMimeDataWrapper *wrapper; - struct received_header *recv; - gchar *mid, *url_str, *p, *end, *url_end; - struct uri *subject_url; - gsize len; - gint rc; - - tmp = rspamd_mempool_alloc (task->task_pool, sizeof (GByteArray)); - tmp->data = task->msg->str; - tmp->len = task->msg->len; - - stream = g_mime_stream_mem_new_with_byte_array (tmp); - /* - * This causes g_mime_stream not to free memory by itself as it is memory allocated by - * pool allocator - */ - g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE); - - if (task->is_mime) { - - debug_task ("construct mime parser from string length %d", (gint)task->msg->len); - /* create a new parser object to parse the stream */ - parser = g_mime_parser_new_with_stream (stream); - g_object_unref (stream); - - /* parse the message from the stream */ - message = g_mime_parser_construct_message (parser); - - if (message == NULL) { - msg_warn ("cannot construct mime from stream"); - return -1; - } - - task->message = message; - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); - - /* Save message id for future use */ - task->message_id = g_mime_message_get_message_id (task->message); - if (task->message_id == NULL) { - task->message_id = "undef"; - } - - task->parser_recursion = 0; -#ifdef GMIME24 - g_mime_message_foreach (message, mime_foreach_callback, task); -#else - /* - * This is rather strange, but gmime 2.2 do NOT pass top-level part to foreach callback - * so we need to set up parent part by hands - */ - task->parser_parent_part = g_mime_message_get_mime_part (message); - g_object_unref (task->parser_parent_part); - g_mime_message_foreach_part (message, mime_foreach_callback, task); -#endif - - debug_task ("found %d parts in message", task->parts_count); - if (task->queue_id == NULL) { - task->queue_id = "undef"; - } - -#ifdef GMIME24 - task->raw_headers_str = g_mime_object_get_headers (GMIME_OBJECT (task->message)); -#else - task->raw_headers_str = g_mime_message_get_headers (task->message); -#endif - - process_images (task); - - /* Parse received headers */ - first = message_get_header (task->task_pool, message, "Received", FALSE); - cur = first; - while (cur) { - recv = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct received_header)); - parse_recv_header (task->task_pool, cur->data, recv); - task->received = g_list_prepend (task->received, recv); - cur = g_list_next (cur); - } - if (first) { - g_list_free (first); - } - - if (task->raw_headers_str) { - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, task->raw_headers_str); - process_raw_headers (task); - } - - task->rcpts = g_mime_message_get_all_recipients (message); - if (task->rcpts) { -#ifdef GMIME24 - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, task->rcpts); -#else - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) internet_address_list_destroy, task->rcpts); -#endif - } - - - /* free the parser (and the stream) */ - g_object_unref (parser); - } - else { - /* We got only message, no mime headers or anything like this */ - /* Construct fake message for it */ - task->message = g_mime_message_new (TRUE); - if (task->from) { - g_mime_message_set_sender (task->message, task->from); - } - /* Construct part for it */ - part = g_mime_part_new_with_type ("text", "html"); -#ifdef GMIME24 - wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_CONTENT_ENCODING_8BIT); -#else - wrapper = g_mime_data_wrapper_new_with_stream (stream, GMIME_PART_ENCODING_8BIT); -#endif - g_mime_part_set_content_object (part, wrapper); - g_mime_message_set_mime_part (task->message, GMIME_OBJECT (part)); - /* Register destructors */ - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, wrapper); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_object_unref, part); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) destroy_message, task->message); - /* Now parse in a normal way */ - task->parser_recursion = 0; -#ifdef GMIME24 - g_mime_message_foreach (task->message, mime_foreach_callback, task); -#else - g_mime_message_foreach_part (task->message, mime_foreach_callback, task); -#endif - /* Generate message ID */ - mid = g_mime_utils_generate_message_id ("localhost.localdomain"); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t) g_free, mid); - g_mime_message_set_message_id (task->message, mid); - task->message_id = mid; - task->queue_id = mid; - /* Set headers for message */ - if (task->subject) { - g_mime_message_set_subject (task->message, task->subject); - } - - /* Add recipients */ -#ifndef GMIME24 - if (task->rcpt) { - cur = task->rcpt; - while (cur) { - g_mime_message_add_recipient (task->message, GMIME_RECIPIENT_TYPE_TO, NULL, (gchar *)cur->data); - cur = g_list_next (cur); - } - } -#endif - } - - /* Parse urls inside Subject header */ - cur = message_get_header (task->task_pool, task->message, "Subject", FALSE); - if (cur) { - p = cur->data; - len = strlen (p); - end = p + len; - - while (p < end) { - /* Search to the end of url */ - if (url_try_text (task->task_pool, p, end - p, NULL, &url_end, &url_str, FALSE)) { - if (url_str != NULL) { - subject_url = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri)); - if (subject_url != NULL) { - /* Try to parse url */ - rc = parse_uri (subject_url, url_str, task->task_pool); - if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && - subject_url->hostlen > 0) { - if (subject_url->protocol != PROTOCOL_MAILTO) { - if (!g_tree_lookup (task->urls, subject_url)) { - g_tree_insert (task->urls, subject_url, subject_url); - } - } - } - else if (rc != URI_ERRNO_OK) { - msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); - } - } - } - } - else { - break; - } - p = url_end + 1; - } - /* Free header's list */ - g_list_free (cur); - } - - return 0; -} - -struct gmime_raw_header { - struct raw_header *next; - gchar *name; - gchar *value; -}; - -typedef struct _GMimeHeader { - GHashTable *hash; - GHashTable *writers; - struct raw_header *headers; -} local_GMimeHeader; - - -/* known header field types */ -enum { - HEADER_FROM = 0, - HEADER_REPLY_TO, - HEADER_TO, - HEADER_CC, - HEADER_BCC, - HEADER_SUBJECT, - HEADER_DATE, - HEADER_MESSAGE_ID, - HEADER_UNKNOWN -}; - -/* - * Iterate throught all headers and make a list - */ -#ifndef GMIME24 -static void -header_iterate (rspamd_mempool_t * pool, struct gmime_raw_header *h, GList ** ret, const gchar *field, gboolean strong) -{ - while (h) { - if (G_LIKELY (!strong)) { - if (h->value && !g_ascii_strncasecmp (field, h->name, strlen (field))) { - if (pool != NULL) { - *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); - } - else { - *ret = g_list_prepend (*ret, g_strdup (h->value)); - } - } - } - else { - if (h->value && !strncmp (field, h->name, strlen (field))) { - if (pool != NULL) { - *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, h->value)); - } - else { - *ret = g_list_prepend (*ret, g_strdup (h->value)); - } - } - } - h = (struct gmime_raw_header *)h->next; - } -} -#else -static void -header_iterate (rspamd_mempool_t * pool, GMimeHeaderList * ls, GList ** ret, const gchar *field, gboolean strong) -{ - /* Use iterator in case of gmime 2.4 */ - GMimeHeaderIter *iter; - const gchar *name; - - if (ls == NULL) { - *ret = NULL; - return; - } - - iter = g_mime_header_iter_new (); - if (g_mime_header_list_get_iter (ls, iter) && g_mime_header_iter_first (iter)) { - /* Iterate throught headers */ - while (g_mime_header_iter_is_valid (iter)) { - name = g_mime_header_iter_get_name (iter); - if (G_LIKELY (!strong)) { - if (!g_ascii_strncasecmp (field, name, strlen (name))) { - if (pool != NULL) { - *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); - } - else { - *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); - } - } - } - else { - if (!strncmp (field, name, strlen (name))) { - if (pool != NULL) { - *ret = g_list_prepend (*ret, rspamd_mempool_strdup (pool, g_mime_header_iter_get_value (iter))); - } - else { - *ret = g_list_prepend (*ret, g_strdup (g_mime_header_iter_get_value (iter))); - } - } - } - if (!g_mime_header_iter_next (iter)) { - break; - } - } - } - g_mime_header_iter_free (iter); -} -#endif - - -struct multipart_cb_data { - GList *ret; - rspamd_mempool_t *pool; - const gchar *field; - gboolean try_search; - gboolean strong; - gint rec; -}; - -#define MAX_REC 10 - -static void -#ifdef GMIME24 -multipart_iterate (GMimeObject * parent, GMimeObject * part, gpointer user_data) -#else -multipart_iterate (GMimeObject * part, gpointer user_data) -#endif -{ - struct multipart_cb_data *data = user_data; -#ifndef GMIME24 - struct gmime_raw_header *h; -#endif - GList *l = NULL; - - if (data->try_search && part != NULL && GMIME_IS_PART (part)) { -#ifdef GMIME24 - GMimeHeaderList *ls; - - ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); - header_iterate (data->pool, ls, &l, data->field, data->strong); -#else - h = (struct gmime_raw_header *)part->headers->headers; - header_iterate (data->pool, h, &l, data->field, data->strong); -#endif - if (l == NULL) { - /* Header not found, abandon search results */ - data->try_search = FALSE; - g_list_free (data->ret); - data->ret = NULL; - } - else { - data->ret = g_list_concat (l, data->ret); - } - } - else if (data->try_search && GMIME_IS_MULTIPART (part)) { - /* Maybe endless recursion here ? */ - if (data->rec++ < MAX_REC) { - g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, data); - } - else { - msg_info ("maximum recurse limit is over, stop recursing, %d", data->rec); - data->try_search = FALSE; - } - } -} - -static GList * -local_message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) -{ - GList *gret = NULL; - GMimeObject *part; - struct multipart_cb_data cb = { - .try_search = TRUE, - .rec = 0, - .ret = NULL, - }; - cb.pool = pool; - cb.field = field; - cb.strong = strong; - -#ifndef GMIME24 - struct gmime_raw_header *h; - - if (field == NULL) { - return NULL; - } - - msg_debug ("iterate over headers to find header %s", field); - h = (struct gmime_raw_header *) (GMIME_OBJECT (message)->headers->headers); - header_iterate (pool, h, &gret, field, strong); - - if (gret == NULL) { - /* Try to iterate with mime part headers */ - msg_debug ("iterate over headers of mime part to find header %s", field); - part = g_mime_message_get_mime_part (message); - if (part) { - h = (struct gmime_raw_header *)part->headers->headers; - header_iterate (pool, h, &gret, field, strong); - if (gret == NULL && GMIME_IS_MULTIPART (part)) { - msg_debug ("iterate over headers of each multipart's subparts %s", field); - g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); - if (cb.ret != NULL) { - gret = cb.ret; - } - } -#ifndef GMIME24 - g_object_unref (part); -#endif - } - } - - return gret; -#else - GMimeHeaderList *ls; - - ls = g_mime_object_get_header_list (GMIME_OBJECT (message)); - header_iterate (pool, ls, &gret, field, strong); - if (gret == NULL) { - /* Try to iterate with mime part headers */ - part = g_mime_message_get_mime_part (message); - if (part) { - ls = g_mime_object_get_header_list (GMIME_OBJECT (part)); - header_iterate (pool, ls, &gret, field, strong); - if (gret == NULL && GMIME_IS_MULTIPART (part)) { - g_mime_multipart_foreach (GMIME_MULTIPART (part), multipart_iterate, &cb); - if (cb.ret != NULL) { - gret = cb.ret; - } - } -#ifndef GMIME24 - g_object_unref (part); -#endif - } - } - - - return gret; -#endif -} - -/** -* g_mime_message_set_date_from_string: Set the message sent-date -* @message: MIME Message -* @string: A string of date -* -* Set the sent-date on a MIME Message. -**/ -void -local_mime_message_set_date_from_string (GMimeMessage * message, const gchar * string) -{ - time_t date; - gint offset = 0; - - date = g_mime_utils_header_decode_date (string, &offset); - g_mime_message_set_date (message, date, offset); -} - -/* - * Replacements for standart gmime functions but converting adresses to IA - */ -static const gchar * -local_message_get_sender (GMimeMessage * message) -{ - gchar *res; - const gchar *from = g_mime_message_get_sender (message); - InternetAddressList *ia; - -#ifndef GMIME24 - ia = internet_address_parse_string (from); -#else - ia = internet_address_list_parse_string (from); -#endif - if (!ia) { - return NULL; - } - res = internet_address_list_to_string (ia, FALSE); -#ifndef GMIME24 - internet_address_list_destroy (ia); -#else - g_object_unref (ia); -#endif - - return res; -} - -static const gchar * -local_message_get_reply_to (GMimeMessage * message) -{ - gchar *res; - const gchar *from = g_mime_message_get_reply_to (message); - InternetAddressList *ia; - -#ifndef GMIME24 - ia = internet_address_parse_string (from); -#else - ia = internet_address_list_parse_string (from); -#endif - if (!ia) { - return NULL; - } - res = internet_address_list_to_string (ia, FALSE); -#ifndef GMIME24 - internet_address_list_destroy (ia); -#else - g_object_unref (ia); -#endif - - return res; -} - -#ifdef GMIME24 - -# define ADD_RECIPIENT_TEMPLATE(type,def) \ -static void \ -local_message_add_recipients_from_string_##type (GMimeMessage *message, const gchar *string, const gchar *value) \ -{ \ - InternetAddressList *il, *new; \ - \ - il = g_mime_message_get_recipients (message, (def)); \ - new = internet_address_list_parse_string (string); \ - internet_address_list_append (il, new); \ -} \ - -ADD_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) - ADD_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) - ADD_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) -# define GET_RECIPIENT_TEMPLATE(type,def) \ -static InternetAddressList* \ -local_message_get_recipients_##type (GMimeMessage *message, const gchar *unused) \ -{ \ - return g_mime_message_get_recipients (message, (def)); \ -} - GET_RECIPIENT_TEMPLATE (to, GMIME_RECIPIENT_TYPE_TO) - GET_RECIPIENT_TEMPLATE (cc, GMIME_RECIPIENT_TYPE_CC) - GET_RECIPIENT_TEMPLATE (bcc, GMIME_RECIPIENT_TYPE_BCC) -#endif -/* different declarations for different types of set and get functions */ - typedef const gchar *(*GetFunc) (GMimeMessage * message); - typedef InternetAddressList *(*GetRcptFunc) (GMimeMessage * message, const gchar *type); - typedef GList *(*GetListFunc) (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *type, gboolean strong); - typedef void (*SetFunc) (GMimeMessage * message, const gchar *value); - typedef void (*SetListFunc) (GMimeMessage * message, const gchar *field, const gchar *value); - -/** different types of functions -* -* FUNC_CHARPTR -* - function with no arguments -* - get returns gchar* -* -* FUNC_IA (from Internet Address) -* - function with additional "field" argument from the fieldfunc table, -* - get returns Glist* -* -* FUNC_LIST -* - function with additional "field" argument (given arbitrary header field name) -* - get returns Glist* -**/ - enum { - FUNC_CHARPTR = 0, - FUNC_CHARFREEPTR, - FUNC_IA, - FUNC_LIST - }; - -/** -* fieldfunc struct: structure of MIME fields and corresponding get and set -* functions. -**/ - static struct { - gchar *name; - GetFunc func; - GetRcptFunc rcptfunc; - GetListFunc getlistfunc; - SetFunc setfunc; - SetListFunc setlfunc; - gint functype; - } fieldfunc[] = -{ - { - "From", local_message_get_sender, NULL, NULL, g_mime_message_set_sender, NULL, FUNC_CHARFREEPTR}, { - "Reply-To", local_message_get_reply_to, NULL, NULL, g_mime_message_set_reply_to, NULL, FUNC_CHARFREEPTR}, -#ifndef GMIME24 - { - "To", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { - "Cc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { - "Bcc", NULL, (GetRcptFunc) g_mime_message_get_recipients, NULL, NULL, (SetListFunc) g_mime_message_add_recipients_from_string, FUNC_IA}, { - "Date", (GetFunc) g_mime_message_get_date_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, -#else - { - "To", NULL, local_message_get_recipients_to, NULL, NULL, local_message_add_recipients_from_string_to, FUNC_IA}, { - "Cc", NULL, local_message_get_recipients_cc, NULL, NULL, local_message_add_recipients_from_string_cc, FUNC_IA}, { - "Bcc", NULL, local_message_get_recipients_bcc, NULL, NULL, local_message_add_recipients_from_string_bcc, FUNC_IA}, { - "Date", (GetFunc)g_mime_message_get_date_as_string, NULL, NULL, local_mime_message_set_date_from_string, NULL, FUNC_CHARFREEPTR}, -#endif - { - "Subject", g_mime_message_get_subject, NULL, NULL, g_mime_message_set_subject, NULL, FUNC_CHARPTR}, { - "Message-Id", g_mime_message_get_message_id, NULL, NULL, g_mime_message_set_message_id, NULL, FUNC_CHARPTR}, -#ifndef GMIME24 - { - NULL, NULL, NULL, local_message_get_header, NULL, g_mime_message_add_header, FUNC_LIST} -#else - { - NULL, NULL, NULL, local_message_get_header, NULL, (SetListFunc)g_mime_object_append_header, FUNC_LIST} -#endif -}; - -/** -* message_set_header: set header of any type excluding special (Content- and MIME-Version:) -**/ -void -message_set_header (GMimeMessage * message, const gchar *field, const gchar *value) -{ - gint i; - - if (!g_ascii_strcasecmp (field, "MIME-Version:") || !g_ascii_strncasecmp (field, "Content-", 8)) { - return; - } - for (i = 0; i <= HEADER_UNKNOWN; ++i) { - if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { - switch (fieldfunc[i].functype) { - case FUNC_CHARPTR: - (*(fieldfunc[i].setfunc)) (message, value); - break; - case FUNC_IA: - (*(fieldfunc[i].setlfunc)) (message, fieldfunc[i].name, value); - break; - case FUNC_LIST: - (*(fieldfunc[i].setlfunc)) (message, field, value); - break; - } - break; - } - } -} - - -/** -* message_get_header: returns the list of 'any header' values -* (except of unsupported yet Content- and MIME-Version special headers) -* -* You should free the GList list by yourself. -**/ -GList * -message_get_header (rspamd_mempool_t * pool, GMimeMessage * message, const gchar *field, gboolean strong) -{ - gint i; - gchar *ret = NULL, *ia_string; - GList *gret = NULL; - InternetAddressList *ia_list = NULL, *ia; - - for (i = 0; i <= HEADER_UNKNOWN; ++i) { - if (!fieldfunc[i].name || !g_ascii_strncasecmp (field, fieldfunc[i].name, strlen (fieldfunc[i].name))) { - switch (fieldfunc[i].functype) { - case FUNC_CHARFREEPTR: - ret = (gchar *)(*(fieldfunc[i].func)) (message); - break; - case FUNC_CHARPTR: - ret = (gchar *)(*(fieldfunc[i].func)) (message); - break; - case FUNC_IA: - ia_list = (*(fieldfunc[i].rcptfunc)) (message, field); - ia = ia_list; -#ifndef GMIME24 - while (ia && ia->address) { - - ia_string = internet_address_to_string ((InternetAddress *) ia->address, FALSE); - if (pool != NULL) { - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); - } - gret = g_list_prepend (gret, ia_string); - ia = ia->next; - } -#else - i = internet_address_list_length (ia); - while (--i >= 0) { - ia_string = internet_address_to_string (internet_address_list_get_address (ia, i), FALSE); - if (pool != NULL) { - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_free, ia_string); - } - gret = g_list_prepend (gret, ia_string); - } -#endif - break; - case FUNC_LIST: - gret = (*(fieldfunc[i].getlistfunc)) (pool, message, field, strong); - break; - } - break; - } - } - if (gret == NULL && ret != NULL) { - if (pool != NULL) { - gret = g_list_prepend (gret, rspamd_mempool_strdup (pool, ret)); - } - else { - gret = g_list_prepend (gret, g_strdup (ret)); - } - } - if (fieldfunc[i].functype == FUNC_CHARFREEPTR && ret) { - g_free (ret); - } - - return gret; -} - -GList* -message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong) -{ - GList *gret = NULL; - struct raw_header *rh; - - rh = g_hash_table_lookup (task->raw_headers, field); - - if (rh == NULL) { - return NULL; - } - - while (rh) { - if (strong) { - if (strcmp (rh->name, field) == 0) { - gret = g_list_prepend (gret, rh); - } - } - else { - if (g_ascii_strcasecmp (rh->name, field) == 0) { - gret = g_list_prepend (gret, rh); - } - } - rh = rh->next; - } - - if (gret != NULL) { - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, gret); - } - - return gret; -} diff --git a/src/message.h b/src/message.h deleted file mode 100644 index 5e27579d1..000000000 --- a/src/message.h +++ /dev/null @@ -1,91 +0,0 @@ -/** - * @file message.h - * Message processing functions and structures - */ - -#ifndef RSPAMD_MESSAGE_H -#define RSPAMD_MESSAGE_H - -#include "config.h" -#include "fuzzy.h" - -struct rspamd_task; -struct controller_session; - -struct mime_part { - GMimeContentType *type; - GByteArray *content; - GMimeObject *parent; - gchar *checksum; - const gchar *filename; -}; - -struct mime_text_part { - gboolean is_html; - gboolean is_raw; - gboolean is_balanced; - gboolean is_empty; - gboolean is_utf; - const gchar *real_charset; - GByteArray *orig; - GByteArray *content; - GNode *html_nodes; - GList *urls_offset; /**< list of offsets of urls */ - fuzzy_hash_t *fuzzy; - fuzzy_hash_t *double_fuzzy; - GMimeObject *parent; - GUnicodeScript script; - f_str_t *diff_str; -}; - -struct received_header { - gchar *from_hostname; - gchar *from_ip; - gchar *real_hostname; - gchar *real_ip; - gchar *by_hostname; - gint is_error; -}; - -struct raw_header { - gchar *name; - gchar *value; - gboolean tab_separated; - gboolean empty_separator; - gchar *separator; - struct raw_header *next; -}; - -/** - * Process message with all filters/statfiles, extract mime parts, urls and - * call metrics consolidation functions - * @param task worker_task object - * @return 0 if we have delayed filters to process and 1 if we have finished with processing - */ -gint process_message (struct rspamd_task *task); - -/* - * Set header with specified name and value - */ -void message_set_header (GMimeMessage *message, const gchar *field, const gchar *value); - -/* - * Get a list of header's values with specified header's name - * @param pool if not NULL this pool would be used for storing header's values - * @param message g_mime_message object - * @param field header's name - * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not - * @return A list of header's values or NULL. If list is not NULL it MUST be freed. If pool is NULL elements must be freed as well. - */ -GList* message_get_header (rspamd_mempool_t *pool, GMimeMessage *message, const gchar *field, gboolean strong); - -/* - * Get a list of header's values with specified header's name using raw headers - * @param task worker task structure - * @param field header's name - * @param strong if this flag is TRUE header's name is case sensitive, otherwise it is not - * @return A list of header's values or NULL. Unlike previous function it is NOT required to free list or values. I should rework one of these functions some time. - */ -GList* message_get_raw_header (struct rspamd_task *task, const gchar *field, gboolean strong); - -#endif diff --git a/src/printf.c b/src/printf.c deleted file mode 100644 index d72ec95c8..000000000 --- a/src/printf.c +++ /dev/null @@ -1,635 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "printf.h" -#include "fstring.h" -#include "main.h" - -/** - * From FreeBSD libutil code - */ -static const int maxscale = 6; - -static gchar * -rspamd_humanize_number (gchar *buf, gchar *last, gint64 num, gboolean bytes) -{ - const gchar *prefixes; - int i, r, remainder, sign; - gint64 divisor; - gsize baselen, len = last - buf; - - remainder = 0; - - baselen = 1; - if (!bytes) { - divisor = 1000; - prefixes = "\0\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; - } - else { - divisor = 1024; - prefixes = "B\0\0k\0\0M\0\0G\0\0T\0\0P\0\0E"; - } - - -#define SCALE2PREFIX(scale) (&prefixes[(scale) * 3]) - - if (num < 0) { - sign = -1; - num = -num; - baselen += 2; /* sign, digit */ - } - else { - sign = 1; - baselen += 1; /* digit */ - } - - /* Check if enough room for `x y' + suffix + `\0' */ - if (len < baselen + 1) { - return buf; - } - - /* - * Divide the number until it fits the given column. - * If there will be an overflow by the rounding below, - * divide once more. - */ - for (i = 0; i < maxscale && num > divisor; i++) { - remainder = num % divisor; - num /= divisor; - } - - r = rspamd_snprintf (buf, len, "%L%s", - sign * (num + (remainder + 50) / 1000), - SCALE2PREFIX (i)); - -#undef SCALE2PREFIX - - return buf + r; -} - - -static gchar * -rspamd_sprintf_num (gchar *buf, gchar *last, guint64 ui64, gchar zero, - guint hexadecimal, guint width) -{ - gchar *p, temp[sizeof ("18446744073709551615")]; - size_t len; - guint32 ui32; - static gchar hex[] = "0123456789abcdef"; - static gchar HEX[] = "0123456789ABCDEF"; - - p = temp + sizeof(temp); - - if (hexadecimal == 0) { - - if (ui64 <= G_MAXUINT32) { - - /* - * To divide 64-bit numbers and to find remainders - * on the x86 platform gcc and icc call the libc functions - * [u]divdi3() and [u]moddi3(), they call another function - * in its turn. On FreeBSD it is the qdivrem() function, - * its source code is about 170 lines of the code. - * The glibc counterpart is about 150 lines of the code. - * - * For 32-bit numbers and some divisors gcc and icc use - * a inlined multiplication and shifts. For example, - * guint "i32 / 10" is compiled to - * - * (i32 * 0xCCCCCCCD) >> 35 - */ - - ui32 = (guint32) ui64; - - do { - *--p = (gchar) (ui32 % 10 + '0'); - } while (ui32 /= 10); - - } else { - do { - *--p = (gchar) (ui64 % 10 + '0'); - } while (ui64 /= 10); - } - - } else if (hexadecimal == 1) { - - do { - - /* the "(guint32)" cast disables the BCC's warning */ - *--p = hex[(guint32) (ui64 & 0xf)]; - - } while (ui64 >>= 4); - - } else { /* hexadecimal == 2 */ - - do { - - /* the "(guint32)" cast disables the BCC's warning */ - *--p = HEX[(guint32) (ui64 & 0xf)]; - - } while (ui64 >>= 4); - } - - /* zero or space padding */ - - len = (temp + sizeof (temp)) - p; - - while (len++ < width && buf < last) { - *buf++ = zero; - } - - /* number safe copy */ - - len = (temp + sizeof (temp)) - p; - - if (buf + len > last) { - len = last - buf; - } - - return ((gchar *)memcpy (buf, p, len)) + len; -} - -struct rspamd_printf_char_buf { - char *begin; - char *pos; - glong remain; -}; - -static glong -rspamd_printf_append_char (const gchar *buf, glong buflen, gpointer ud) -{ - struct rspamd_printf_char_buf *dst = (struct rspamd_printf_char_buf *)ud; - glong wr; - - if (dst->remain <= 0) { - return dst->remain; - } - - wr = MIN (dst->remain, buflen); - memcpy (dst->pos, buf, wr); - dst->remain -= wr; - dst->pos += wr; - - return wr; -} - -static glong -rspamd_printf_append_file (const gchar *buf, glong buflen, gpointer ud) -{ - FILE *dst = (FILE *)ud; - - return fwrite (buf, 1, buflen, dst); -} - -static glong -rspamd_printf_append_gstring (const gchar *buf, glong buflen, gpointer ud) -{ - GString *dst = (GString *)ud; - - g_string_append_len (dst, buf, buflen); - - return buflen; -} - -glong -rspamd_fprintf (FILE *f, const gchar *fmt, ...) -{ - va_list args; - glong r; - - va_start (args, fmt); - r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args); - va_end (args); - - return r; -} - -glong -rspamd_log_fprintf (FILE *f, const gchar *fmt, ...) -{ - va_list args; - glong r; - - va_start (args, fmt); - r = rspamd_vprintf_common (rspamd_printf_append_file, f, fmt, args); - va_end (args); - - fflush (f); - - return r; -} - - -glong -rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...) -{ - gchar *r; - va_list args; - - va_start (args, fmt); - r = rspamd_vsnprintf (buf, max, fmt, args); - va_end (args); - - return (r - buf); -} - -gchar * -rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args) -{ - struct rspamd_printf_char_buf dst; - - dst.begin = buf; - dst.pos = dst.begin; - dst.remain = max - 1; - (void)rspamd_vprintf_common (rspamd_printf_append_char, &dst, fmt, args); - *dst.pos = '\0'; - - return dst.pos; -} - -glong -rspamd_printf_gstring (GString *s, const gchar *fmt, ...) -{ - va_list args; - glong r; - - va_start (args, fmt); - r = rspamd_vprintf_common (rspamd_printf_append_gstring, s, fmt, args); - va_end (args); - - return r; -} - -#define RSPAMD_PRINTF_APPEND(buf, len) \ - do { \ - wr = func ((buf), (len), apd); \ - if (wr <= 0) { \ - goto oob; \ - } \ - written += wr; \ - fmt ++; \ - buf_start = fmt; \ - } while(0) - -glong -rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args) -{ - gchar zero, numbuf[G_ASCII_DTOSTR_BUF_SIZE], *p, *last, c; - const gchar *buf_start = fmt; - gint d; - long double f, scale; - glong written = 0, wr, slen; - gint64 i64; - guint64 ui64; - guint width, sign, hex, humanize, bytes, frac_width, i; - f_str_t *v; - GString *gs; - gboolean bv; - - while (*fmt) { - - /* - * "buf < last" means that we could copy at least one character: - * the plain character, "%%", "%c", and minus without the checking - */ - - if (*fmt == '%') { - - /* Append what we have in buf */ - if (fmt > buf_start) { - wr = func (buf_start, fmt - buf_start, apd); - if (wr <= 0) { - goto oob; - } - written += wr; - } - - i64 = 0; - ui64 = 0; - - zero = (gchar) ((*++fmt == '0') ? '0' : ' '); - width = 0; - sign = 1; - hex = 0; - bytes = 0; - humanize = 0; - frac_width = 0; - slen = -1; - - while (*fmt >= '0' && *fmt <= '9') { - width = width * 10 + *fmt++ - '0'; - } - - - for ( ;; ) { - switch (*fmt) { - - case 'u': - sign = 0; - fmt++; - continue; - - case 'm': - fmt++; - continue; - - case 'X': - hex = 2; - sign = 0; - fmt++; - continue; - - case 'x': - hex = 1; - sign = 0; - fmt++; - continue; - case 'H': - humanize = 1; - bytes = 1; - sign = 0; - fmt ++; - continue; - case 'h': - humanize = 1; - sign = 0; - fmt ++; - continue; - case '.': - fmt++; - - while (*fmt >= '0' && *fmt <= '9') { - frac_width = frac_width * 10 + *fmt++ - '0'; - } - - break; - - case '*': - d = (gint)va_arg (args, gint); - if (G_UNLIKELY (d < 0)) { - msg_err ("critical error: size is less than 0"); - return 0; - } - slen = (glong)d; - fmt++; - continue; - - default: - break; - } - - break; - } - - - switch (*fmt) { - - case 'V': - v = va_arg (args, f_str_t *); - RSPAMD_PRINTF_APPEND (v->begin, v->len); - - continue; - - case 'v': - gs = va_arg (args, GString *); - RSPAMD_PRINTF_APPEND (gs->str, gs->len); - - continue; - - case 's': - p = va_arg (args, gchar *); - if (p == NULL) { - p = "(NULL)"; - } - - if (slen == -1) { - /* NULL terminated string */ - slen = strlen (p); - } - - RSPAMD_PRINTF_APPEND (p, slen); - - continue; - - case 'O': - i64 = (gint64) va_arg (args, off_t); - sign = 1; - break; - - case 'P': - i64 = (gint64) va_arg (args, pid_t); - sign = 1; - break; - - case 'T': - i64 = (gint64) va_arg (args, time_t); - sign = 1; - break; - - case 'z': - if (sign) { - i64 = (gint64) va_arg (args, ssize_t); - } else { - ui64 = (guint64) va_arg (args, size_t); - } - break; - - case 'd': - if (sign) { - i64 = (gint64) va_arg (args, gint); - } else { - ui64 = (guint64) va_arg (args, guint); - } - break; - - case 'l': - if (sign) { - i64 = (gint64) va_arg(args, glong); - } else { - ui64 = (guint64) va_arg(args, gulong); - } - break; - - case 'D': - if (sign) { - i64 = (gint64) va_arg(args, gint32); - } else { - ui64 = (guint64) va_arg(args, guint32); - } - break; - - case 'L': - if (sign) { - i64 = va_arg (args, gint64); - } else { - ui64 = va_arg (args, guint64); - } - break; - - - case 'f': - case 'F': - if (*fmt == 'f') { - f = (long double) va_arg (args, double); - } - else { - f = (long double) va_arg (args, long double); - } - p = numbuf; - last = p + sizeof (numbuf); - if (f < 0) { - *p++ = '-'; - f = -f; - } - - ui64 = (gint64) f; - - p = rspamd_sprintf_num (p, last, ui64, zero, 0, width); - - if (frac_width) { - - if (p < last) { - *p++ = '.'; - } - - scale = 1.0; - - for (i = 0; i < frac_width; i++) { - scale *= 10.0; - } - - /* - * (gint64) cast is required for msvc6: - * it can not convert guint64 to double - */ - ui64 = (guint64) ((f - (gint64) ui64) * scale); - - p = rspamd_sprintf_num (p, last, ui64, '0', 0, frac_width); - } - - slen = p - numbuf; - RSPAMD_PRINTF_APPEND (numbuf, slen); - - continue; - - case 'g': - case 'G': - if (*fmt == 'g') { - f = (long double) va_arg (args, double); - } - else { - f = (long double) va_arg (args, long double); - } - - g_ascii_formatd (numbuf, sizeof (numbuf), "%g", (double)f); - slen = strlen (numbuf); - RSPAMD_PRINTF_APPEND (numbuf, slen); - - continue; - - case 'b': - bv = (gboolean) va_arg (args, double); - RSPAMD_PRINTF_APPEND (bv ? "true" : "false", bv ? 4 : 5); - - continue; - - case 'p': - ui64 = (uintptr_t) va_arg (args, void *); - hex = 2; - sign = 0; - zero = '0'; - width = sizeof (void *) * 2; - break; - - case 'c': - c = va_arg (args, gint); - c &= 0xff; - RSPAMD_PRINTF_APPEND (&c, 1); - - continue; - - case 'Z': - c = '\0'; - RSPAMD_PRINTF_APPEND (&c, 1); - - continue; - - case 'N': - c = LF; - RSPAMD_PRINTF_APPEND (&c, 1); - - continue; - - case '%': - c = '%'; - RSPAMD_PRINTF_APPEND (&c, 1); - - continue; - - default: - c = *fmt; - RSPAMD_PRINTF_APPEND (&c, 1); - - continue; - } - - /* Print number */ - p = numbuf; - last = p + sizeof (numbuf); - if (sign) { - if (i64 < 0) { - *p++ = '-'; - ui64 = (guint64) -i64; - - } else { - ui64 = (guint64) i64; - } - } - - if (!humanize) { - p = rspamd_sprintf_num (p, last, ui64, zero, hex, width); - } - else { - p = rspamd_humanize_number (p, last, ui64, bytes); - } - slen = p - numbuf; - RSPAMD_PRINTF_APPEND (numbuf, slen); - - } else { - fmt++; - } - } - - /* Finish buffer */ - if (fmt > buf_start) { - wr = func (buf_start, fmt - buf_start, apd); - if (wr <= 0) { - goto oob; - } - written += wr; - } - -oob: - return written; -} - diff --git a/src/printf.h b/src/printf.h deleted file mode 100644 index a4e03791d..000000000 --- a/src/printf.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef PRINTF_H_ -#define PRINTF_H_ - -#include "config.h" - -/* - * supported formats: - * %[0][width][x][X]O off_t - * %[0][width]T time_t - * %[0][width][u][x|X|h|H]z ssize_t/size_t - * %[0][width][u][x|X|h|H]d gint/guint - * %[0][width][u][x|X|h|H]l long - * %[0][width][u][x|X|h|H]D gint32/guint32 - * %[0][width][u][x|X|h|H]L gint64/guint64 - * %[0][width][.width]f double - * %[0][width][.width]F long double - * %[0][width][.width]g double - * %[0][width][.width]G long double - * %b boolean (true or false) - * %P pid_t - * %r rlim_t - * %p void * - * %V f_str_t * - * %v GString * - * %s null-terminated string - * %*s length and string - * %Z '\0' - * %N '\n' - * %c gchar - * %% % - * - */ - -/** - * Callback used for common printf operations - * @param buf buffer to append - * @param buflen lenght of the buffer - * @param ud opaque pointer - * @return number of characters written - */ -typedef glong (*rspamd_printf_append_func)(const gchar *buf, glong buflen, gpointer ud); - -glong rspamd_fprintf (FILE *f, const gchar *fmt, ...); -glong rspamd_log_fprintf (FILE *f, const gchar *fmt, ...); -glong rspamd_snprintf (gchar *buf, glong max, const gchar *fmt, ...); -gchar *rspamd_vsnprintf (gchar *buf, glong max, const gchar *fmt, va_list args); -glong rspamd_printf_gstring (GString *s, const gchar *fmt, ...); - -glong rspamd_vprintf_common (rspamd_printf_append_func func, gpointer apd, const gchar *fmt, va_list args); - -#endif /* PRINTF_H_ */ diff --git a/src/protocol.c b/src/protocol.c deleted file mode 100644 index 8a5c3f0df..000000000 --- a/src/protocol.c +++ /dev/null @@ -1,821 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "util.h" -#include "cfg_file.h" -#include "settings.h" -#include "message.h" - -/* Max line size */ -#define OUTBUFSIZ BUFSIZ -/* - * Just check if the passed message is spam or not and reply as - * described below - */ -#define MSG_CMD_CHECK "check" -/* - * Check if message is spam or not, and return score plus list - * of symbols hit - */ -#define MSG_CMD_SYMBOLS "symbols" -/* - * Check if message is spam or not, and return score plus report - */ -#define MSG_CMD_REPORT "report" -/* - * Check if message is spam or not, and return score plus report - * if the message is spam - */ -#define MSG_CMD_REPORT_IFSPAM "report_ifspam" -/* - * Ignore this message -- client opened connection then changed - */ -#define MSG_CMD_SKIP "skip" -/* - * Return a confirmation that spamd is alive - */ -#define MSG_CMD_PING "ping" -/* - * Process this message as described above and return modified message - */ -#define MSG_CMD_PROCESS "process" - -/* - * Learn specified statfile using message - */ -#define MSG_CMD_LEARN "learn" - -/* - * spamassassin greeting: - */ -#define SPAMC_GREETING "SPAMC" -/* - * rspamd greeting: - */ -#define RSPAMC_GREETING "RSPAMC" -/* - * Headers - */ -#define CONTENT_LENGTH_HEADER "Content-length" -#define HELO_HEADER "Helo" -#define FROM_HEADER "From" -#define IP_ADDR_HEADER "IP" -#define NRCPT_HEADER "Recipient-Number" -#define RCPT_HEADER "Rcpt" -#define SUBJECT_HEADER "Subject" -#define STATFILE_HEADER "Statfile" -#define QUEUE_ID_HEADER "Queue-ID" -#define ERROR_HEADER "Error" -#define USER_HEADER "User" -#define PASS_HEADER "Pass" -#define JSON_HEADER "Json" -#define HOSTNAME_HEADER "Hostname" -#define DELIVER_TO_HEADER "Deliver-To" -#define NO_LOG_HEADER "Log" - -static GList *custom_commands = NULL; - - -/* - * Remove <> from the fixed string and copy it to the pool - */ -static gchar * -rspamd_protocol_escape_braces (GString *in) -{ - gint len = 0; - gchar *orig, *p; - - orig = in->str; - while ((g_ascii_isspace (*orig) || *orig == '<') && orig - in->str < (gint)in->len) { - orig ++; - } - - g_string_erase (in, 0, orig - in->str); - - p = orig; - while ((!g_ascii_isspace (*p) && *p != '>') && p - in->str < (gint)in->len) { - p ++; - len ++; - } - - g_string_truncate (in, len); - - return in->str; -} - -static gboolean -rspamd_protocol_handle_url (struct rspamd_task *task, struct rspamd_http_message *msg) -{ - GList *cur; - struct custom_command *cmd; - const gchar *p; - - if (msg->url == NULL || msg->url->len == 0) { - task->last_error = "command is absent"; - task->error_code = 400; - return FALSE; - } - - if (msg->url->str[0] == '/') { - p = &msg->url->str[1]; - } - else { - p = msg->url->str; - } - - switch (*p) { - case 'c': - case 'C': - /* check */ - if (g_ascii_strcasecmp (p + 1, MSG_CMD_CHECK + 1) == 0) { - task->cmd = CMD_CHECK; - } - else { - goto err; - } - break; - case 's': - case 'S': - /* symbols, skip */ - if (g_ascii_strcasecmp (p + 1, MSG_CMD_SYMBOLS + 1) == 0) { - task->cmd = CMD_SYMBOLS; - } - else if (g_ascii_strcasecmp (p + 1, MSG_CMD_SKIP + 1) == 0) { - task->cmd = CMD_SKIP; - } - else { - goto err; - } - break; - case 'p': - case 'P': - /* ping, process */ - if (g_ascii_strcasecmp (p + 1, MSG_CMD_PING + 1) == 0) { - task->cmd = CMD_PING; - } - else if (g_ascii_strcasecmp (p + 1, MSG_CMD_PROCESS + 1) == 0) { - task->cmd = CMD_PROCESS; - } - else { - goto err; - } - break; - case 'r': - case 'R': - /* report, report_ifspam */ - if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT + 1) == 0) { - task->cmd = CMD_REPORT; - } - else if (g_ascii_strcasecmp (p + 1, MSG_CMD_REPORT_IFSPAM + 1) == 0) { - task->cmd = CMD_REPORT_IFSPAM; - } - else { - goto err; - } - break; - default: - cur = custom_commands; - while (cur) { - cmd = cur->data; - if (g_ascii_strcasecmp (p, cmd->name) == 0) { - task->cmd = CMD_OTHER; - task->custom_cmd = cmd; - break; - } - cur = g_list_next (cur); - } - - if (cur == NULL) { - goto err; - } - break; - } - - return TRUE; - -err: - debug_task ("bad command: %s", p); - task->last_error = "invalid command"; - task->error_code = 400; - return FALSE; -} - -static gboolean -rspamd_protocol_handle_headers (struct rspamd_task *task, struct rspamd_http_message *msg) -{ - gchar *headern, *err, *tmp; - gboolean res = TRUE; - struct rspamd_http_header *h; - - LL_FOREACH (msg->headers, h) { - headern = h->name->str; - - switch (headern[0]) { - case 'd': - case 'D': - if (g_ascii_strcasecmp (headern, DELIVER_TO_HEADER) == 0) { - task->deliver_to = rspamd_protocol_escape_braces (h->value); - debug_task ("read deliver-to header, value: %s", task->deliver_to); - } - else { - debug_task ("wrong header: %s", headern); - res = FALSE; - } - break; - case 'h': - case 'H': - if (g_ascii_strcasecmp (headern, HELO_HEADER) == 0) { - task->helo = h->value->str; - debug_task ("read helo header, value: %s", task->helo); - } - else if (g_ascii_strcasecmp (headern, HOSTNAME_HEADER) == 0) { - task->hostname = h->value->str; - debug_task ("read hostname header, value: %s", task->hostname); - } - else { - debug_task ("wrong header: %s", headern); - res = FALSE; - } - break; - case 'f': - case 'F': - if (g_ascii_strcasecmp (headern, FROM_HEADER) == 0) { - task->from = rspamd_protocol_escape_braces (h->value); - debug_task ("read from header, value: %s", task->from); - } - else { - debug_task ("wrong header: %s", headern); - res = FALSE; - } - break; - case 'j': - case 'J': - if (g_ascii_strcasecmp (headern, JSON_HEADER) == 0) { - task->is_json = parse_flag (h->value->str); - } - else { - debug_task ("wrong header: %s", headern); - res = FALSE; - } - break; - case 'q': - case 'Q': - if (g_ascii_strcasecmp (headern, QUEUE_ID_HEADER) == 0) { - task->queue_id = h->value->str; - debug_task ("read queue_id header, value: %s", task->queue_id); - } - else { - debug_task ("wrong header: %s", headern); - res = FALSE; - } - break; - case 'r': - case 'R': - if (g_ascii_strcasecmp (headern, RCPT_HEADER) == 0) { - tmp = rspamd_protocol_escape_braces (h->value); - task->rcpt = g_list_prepend (task->rcpt, tmp); - debug_task ("read rcpt header, value: %s", tmp); - } - else if (g_ascii_strcasecmp (headern, NRCPT_HEADER) == 0) { - task->nrcpt = strtoul (h->value->str, &err, 10); - debug_task ("read rcpt header, value: %d", (gint)task->nrcpt); - } - else { - msg_info ("wrong header: %s", headern); - res = FALSE; - } - break; - case 'i': - case 'I': - if (g_ascii_strcasecmp (headern, IP_ADDR_HEADER) == 0) { - tmp = h->value->str; - if (!rspamd_parse_inet_address (&task->from_addr, tmp)) { - msg_err ("bad ip header: '%s'", tmp); - return FALSE; - } - debug_task ("read IP header, value: %s", tmp); - } - else { - debug_task ("wrong header: %s", headern); - res = FALSE; - } - break; - case 'p': - case 'P': - if (g_ascii_strcasecmp (headern, PASS_HEADER) == 0) { - if (h->value->len == sizeof ("all") - 1 && - g_ascii_strcasecmp (h->value->str, "all") == 0) { - task->pass_all_filters = TRUE; - debug_task ("pass all filters"); - } - } - else { - res = FALSE; - } - break; - case 's': - case 'S': - if (g_ascii_strcasecmp (headern, SUBJECT_HEADER) == 0) { - task->subject = h->value->str; - } - else { - res = FALSE; - } - break; - case 'u': - case 'U': - if (g_ascii_strcasecmp (headern, USER_HEADER) == 0) { - task->user = h->value->str; - } - else { - res = FALSE; - } - break; - case 'l': - case 'L': - if (g_ascii_strcasecmp (headern, NO_LOG_HEADER) == 0) { - if (g_ascii_strcasecmp (h->value->str, "no") == 0) { - task->no_log = TRUE; - } - } - else { - res = FALSE; - } - break; - default: - debug_task ("wrong header: %s", headern); - res = FALSE; - break; - } - } - - if (!res && task->cfg->strict_protocol_headers) { - msg_err ("deny processing of a request with incorrect or unknown headers"); - task->last_error = "invalid header"; - task->error_code = 400; - return FALSE; - } - - return TRUE; -} - -gboolean -rspamd_protocol_handle_request (struct rspamd_task *task, - struct rspamd_http_message *msg) -{ - gboolean ret = TRUE; - - if (msg->method == HTTP_SYMBOLS) { - task->cmd = CMD_SYMBOLS; - task->is_json = FALSE; - } - else if (msg->method == HTTP_CHECK) { - task->cmd = CMD_CHECK; - task->is_json = FALSE; - } - else { - task->is_json = TRUE; - ret = rspamd_protocol_handle_url (task, msg); - } - - if (ret) { - ret = rspamd_protocol_handle_headers (task, msg); - } - - return ret; -} - -static void -write_hashes_to_log (struct rspamd_task *task, GString *logbuf) -{ - GList *cur; - struct mime_text_part *text_part; - - cur = task->text_parts; - - while (cur) { - text_part = cur->data; - if (text_part->fuzzy) { - if (cur->next != NULL) { - rspamd_printf_gstring (logbuf, " part: %Xd,", text_part->fuzzy->h); - } - else { - rspamd_printf_gstring (logbuf, " part: %Xd", text_part->fuzzy->h); - } - } - cur = g_list_next (cur); - } -} - - -/* Structure for writing tree data */ -struct tree_cb_data { - ucl_object_t *top; - struct rspamd_task *task; -}; - -/* - * Callback for writing urls - */ -static gboolean -urls_protocol_cb (gpointer key, gpointer value, gpointer ud) -{ - struct tree_cb_data *cb = ud; - struct uri *url = value; - ucl_object_t *obj; - - obj = ucl_object_fromlstring (url->host, url->hostlen); - DL_APPEND (cb->top->value.av, obj); - - if (cb->task->cfg->log_urls) { - msg_info ("<%s> URL: %s - %s: %s", cb->task->message_id, cb->task->user ? - cb->task->user : (cb->task->from ? cb->task->from : "unknown"), - rspamd_inet_address_to_string (&cb->task->from_addr), - struri (url)); - } - - return FALSE; -} - -static ucl_object_t * -rspamd_urls_tree_ucl (GTree *input, struct rspamd_task *task) -{ - struct tree_cb_data cb; - ucl_object_t *obj; - - obj = ucl_object_typed_new (UCL_ARRAY); - cb.top = obj; - cb.task = task; - - g_tree_foreach (input, urls_protocol_cb, &cb); - - return obj; -} - -static gboolean -emails_protocol_cb (gpointer key, gpointer value, gpointer ud) -{ - struct tree_cb_data *cb = ud; - struct uri *url = value; - ucl_object_t *obj; - - obj = ucl_object_fromlstring (url->user, url->userlen + url->hostlen + 1); - DL_APPEND (cb->top->value.av, obj); - - return FALSE; -} - -static ucl_object_t * -rspamd_emails_tree_ucl (GTree *input, struct rspamd_task *task) -{ - struct tree_cb_data cb; - ucl_object_t *obj; - - obj = ucl_object_typed_new (UCL_ARRAY); - cb.top = obj; - cb.task = task; - - g_tree_foreach (input, emails_protocol_cb, &cb); - - return obj; -} - - -/* Write new subject */ -static const gchar * -make_rewritten_subject (struct metric *metric, struct rspamd_task *task) -{ - static gchar subj_buf[1024]; - gchar *p = subj_buf, *end, *c, *res; - const gchar *s; - - end = p + sizeof(subj_buf); - c = metric->subject; - s = g_mime_message_get_subject (task->message); - - while (p < end) { - if (*c == '\0') { - *p = '\0'; - break; - } - else if (*c == '%' && *(c + 1) == 's') { - p += rspamd_strlcpy (p, (s != NULL) ? s : "", end - p); - c += 2; - } - else { - *p = *c ++; - } - p ++; - } - res = g_mime_utils_header_encode_text (subj_buf); - - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_free, res); - - return res; -} - -static ucl_object_t * -rspamd_str_list_ucl (GList *str_list) -{ - ucl_object_t *top = NULL, *obj; - GList *cur; - - top = ucl_object_typed_new (UCL_ARRAY); - cur = str_list; - while (cur) { - obj = ucl_object_fromstring (cur->data); - DL_APPEND (top->value.av, obj); - cur = g_list_next (cur); - } - - return top; -} - -static ucl_object_t * -rspamd_metric_symbol_ucl (struct rspamd_task *task, struct metric *m, - struct symbol *sym, GString *logbuf) -{ - ucl_object_t *obj = NULL; - const gchar *description = NULL; - - rspamd_printf_gstring (logbuf, "%s,", sym->name); - description = g_hash_table_lookup (m->descriptions, sym->name); - - obj = ucl_object_typed_new (UCL_OBJECT); - ucl_object_insert_key (obj, ucl_object_fromstring (sym->name), "name", 0, false); - ucl_object_insert_key (obj, ucl_object_fromdouble (sym->score), "score", 0, false); - if (description) { - ucl_object_insert_key (obj, ucl_object_fromstring (description), "description", 0, false); - } - if (sym->options != NULL) { - ucl_object_insert_key (obj, rspamd_str_list_ucl (sym->options), "options", 0, false); - } - - return obj; -} - -static ucl_object_t * -rspamd_metric_result_ucl (struct rspamd_task *task, struct metric_result *mres, GString *logbuf) -{ - GHashTableIter hiter; - struct symbol *sym; - struct metric *m; - gboolean is_spam; - enum rspamd_metric_action action = METRIC_ACTION_NOACTION; - ucl_object_t *obj = NULL, *sobj; - gdouble required_score; - gpointer h, v; - const gchar *subject; - gchar action_char; - - m = mres->metric; - - /* XXX: handle settings */ - required_score = m->actions[METRIC_ACTION_REJECT].score; - is_spam = (mres->score >= required_score); - action = check_metric_action (mres->score, required_score, m); - if (task->is_skipped) { - action_char = 'S'; - } - else if (is_spam) { - action_char = 'T'; - } - else { - action_char = 'F'; - } - rspamd_printf_gstring (logbuf, "(%s: %c (%s): [%.2f/%.2f] [", - m->name, action_char, - str_action_metric (action), - mres->score, required_score); - - obj = ucl_object_typed_new (UCL_OBJECT); - ucl_object_insert_key (obj, ucl_object_frombool (is_spam), - "is_spam", 0, false); - ucl_object_insert_key (obj, ucl_object_frombool (task->is_skipped), - "is_skipped", 0, false); - ucl_object_insert_key (obj, ucl_object_fromdouble (mres->score), - "score", 0, false); - ucl_object_insert_key (obj, ucl_object_fromdouble (required_score), - "required_score", 0, false); - ucl_object_insert_key (obj, ucl_object_fromstring (str_action_metric (action)), - "action", 0, false); - - if (action == METRIC_ACTION_REWRITE_SUBJECT) { - subject = make_rewritten_subject (m, task); - ucl_object_insert_key (obj, ucl_object_fromstring (subject), - "subject", 0, false); - } - /* Now handle symbols */ - g_hash_table_iter_init (&hiter, mres->symbols); - while (g_hash_table_iter_next (&hiter, &h, &v)) { - sym = (struct symbol *)v; - sobj = rspamd_metric_symbol_ucl (task, m, sym, logbuf); - ucl_object_insert_key (obj, sobj, h, 0, false); - } - - /* Cut the trailing comma if needed */ - if (logbuf->str[logbuf->len - 1] == ',') { - logbuf->len --; - } - -#ifdef HAVE_CLOCK_GETTIME - rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,", - task->msg->len, calculate_check_time (&task->tv, &task->ts, - task->cfg->clock_res, &task->scan_milliseconds), task->dns_requests); -#else - rspamd_printf_gstring (logbuf, "]), len: %z, time: %s, dns req: %d,", - task->msg->len, - calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds), - task->dns_requests); -#endif - - return obj; -} - -static void -rspamd_ucl_tolegacy_output (struct rspamd_task *task, ucl_object_t *top, GString *out) -{ - const ucl_object_t *metric, *score, - *required_score, *is_spam, *elt, *symbols; - ucl_object_iter_t iter = NULL; - - metric = ucl_object_find_key (top, DEFAULT_METRIC); - if (metric != NULL) { - score = ucl_object_find_key (metric, "score"); - required_score = ucl_object_find_key (metric, "required_score"); - is_spam = ucl_object_find_key (metric, "is_spam"); - g_string_append_printf (out, "Metric: default; %s; %.2f / %.2f / 0.0\r\n", - ucl_object_toboolean (is_spam) ? "True" : "False", - ucl_object_todouble (score), - ucl_object_todouble (required_score)); - elt = ucl_object_find_key (metric, "action"); - if (elt != NULL) { - g_string_append_printf (out, "Action: %s\r\n", - ucl_object_tostring (elt)); - } - - symbols = ucl_object_find_key (metric, "symbols"); - while ((elt = ucl_iterate_object (symbols, &iter, true)) != NULL) { - const ucl_object_t *sym_score; - sym_score = ucl_object_find_key (elt, "score"); - g_string_append_printf (out, "Symbol: %s; %.2f\r\n", - ucl_object_key (elt), - ucl_object_todouble (sym_score)); - } - - elt = ucl_object_find_key (metric, "subject"); - if (elt != NULL) { - g_string_append_printf (out, "Subject: %s\r\n", - ucl_object_tostring (elt)); - } - } - g_string_append_printf (out, "Message-ID: %s\r\n", task->message_id); -} - -static void -write_check_reply (struct rspamd_http_message *msg, struct rspamd_task *task) -{ - GString *logbuf; - struct metric_result *metric_res; - GHashTableIter hiter; - gpointer h, v; - ucl_object_t *top = NULL, *obj; - - /* Output the first line - check status */ - logbuf = g_string_sized_new (BUFSIZ); - rspamd_printf_gstring (logbuf, "id: <%s>, qid: <%s>, ", task->message_id, task->queue_id); - - if (task->user) { - rspamd_printf_gstring (logbuf, "user: %s, ", task->user); - } - - if (!task->no_log) { - rspamd_roll_history_update (task->worker->srv->history, task); - } - g_hash_table_iter_init (&hiter, task->results); - - top = ucl_object_typed_new (UCL_OBJECT); - /* Convert results to an ucl object */ - while (g_hash_table_iter_next (&hiter, &h, &v)) { - metric_res = (struct metric_result *)v; - obj = rspamd_metric_result_ucl (task, metric_res, logbuf); - ucl_object_insert_key (top, obj, h, 0, false); - } - - if (task->messages != NULL) { - ucl_object_insert_key (top, rspamd_str_list_ucl (task->messages), "messages", 0, false); - } - if (g_tree_nnodes (task->urls) > 0) { - ucl_object_insert_key (top, rspamd_urls_tree_ucl (task->urls, task), "urls", 0, false); - } - if (g_tree_nnodes (task->emails) > 0) { - ucl_object_insert_key (top, rspamd_emails_tree_ucl (task->emails, task), - "emails", 0, false); - } - - ucl_object_insert_key (top, ucl_object_fromstring (task->message_id), - "message-id", 0, false); - - write_hashes_to_log (task, logbuf); - if (!task->no_log) { - msg_info ("%v", logbuf); - } - g_string_free (logbuf, TRUE); - - msg->body = g_string_sized_new (BUFSIZ); - - if (msg->method < HTTP_SYMBOLS) { - rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body); - } - else { - rspamd_ucl_tolegacy_output (task, top, msg->body); - } - ucl_object_unref (top); - - /* Increase counters */ - task->worker->srv->stat->messages_scanned++; -} - -void -rspamd_protocol_write_reply (struct rspamd_task *task) -{ - struct rspamd_http_message *msg; - const gchar *ctype = "application/json"; - ucl_object_t *top = NULL; - - msg = rspamd_http_new_message (HTTP_RESPONSE); - if (!task->is_json) { - /* Turn compatibility on */ - msg->method = HTTP_SYMBOLS; - } - msg->date = time (NULL); - - task->state = CLOSING_CONNECTION; - - top = ucl_object_typed_new (UCL_OBJECT); - debug_task ("writing reply to client"); - if (task->error_code != 0) { - msg->code = task->error_code; - ucl_object_insert_key (top, ucl_object_fromstring (task->last_error), "error", 0, false); - msg->body = g_string_sized_new (256); - rspamd_ucl_emit_gstring (top, UCL_EMIT_JSON_COMPACT, msg->body); - ucl_object_unref (top); - } - else { - switch (task->cmd) { - case CMD_REPORT_IFSPAM: - case CMD_REPORT: - case CMD_CHECK: - case CMD_SYMBOLS: - case CMD_PROCESS: - case CMD_SKIP: - write_check_reply (msg, task); - break; - case CMD_PING: - msg->body = g_string_new ("pong"); - break; - case CMD_OTHER: - msg_err ("BROKEN"); - break; - } - } - - rspamd_http_connection_reset (task->http_conn); - rspamd_http_connection_write_message (task->http_conn, msg, NULL, - ctype, task, task->sock, &task->tv, task->ev_base); -} - -void -register_protocol_command (const gchar *name, protocol_reply_func func) -{ - struct custom_command *cmd; - - cmd = g_malloc (sizeof (struct custom_command)); - cmd->name = name; - cmd->func = func; - - custom_commands = g_list_prepend (custom_commands, cmd); -} diff --git a/src/protocol.h b/src/protocol.h deleted file mode 100644 index 8d2efe118..000000000 --- a/src/protocol.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - * @file protocol.h - * Rspamd protocol definition - */ - -#ifndef RSPAMD_PROTOCOL_H -#define RSPAMD_PROTOCOL_H - -#include "config.h" -#include "filter.h" -#include "http.h" -#include "task.h" - -#define RSPAMD_BASE_ERROR 500 -#define RSPAMD_FILTER_ERROR RSPAMD_BASE_ERROR + 1 -#define RSPAMD_NETWORK_ERROR RSPAMD_BASE_ERROR + 2 -#define RSPAMD_PROTOCOL_ERROR RSPAMD_BASE_ERROR + 3 -#define RSPAMD_LENGTH_ERROR RSPAMD_BASE_ERROR + 4 -#define RSPAMD_STATFILE_ERROR RSPAMD_BASE_ERROR + 5 - -struct metric; - -/** - * Process HTTP request to the task structure - * @param task - * @param msg - * @return - */ -gboolean rspamd_protocol_handle_request (struct rspamd_task *task, struct rspamd_http_message *msg); - -/** - * Write reply for specified task command - * @param task task object - * @return 0 if we wrote reply and -1 if there was some error - */ -void rspamd_protocol_write_reply (struct rspamd_task *task); - - -/** - * Register custom fucntion to extend protocol - * @param name symbolic name of custom function - * @param func callback function for writing reply - */ -void register_protocol_command (const gchar *name, protocol_reply_func func); - -#endif diff --git a/src/proxy.c b/src/proxy.c deleted file mode 100644 index 67c7665b8..000000000 --- a/src/proxy.c +++ /dev/null @@ -1,241 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "proxy.h" - -static void rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data); -static void rspamd_proxy_client_handler (gint fd, gshort what, gpointer data); - -static inline GQuark -proxy_error_quark (void) -{ - return g_quark_from_static_string ("proxy-error"); -} - -void -rspamd_proxy_close (rspamd_proxy_t *proxy) -{ - if (!proxy->closed) { - close (proxy->cfd); - close (proxy->bfd); - - event_del (&proxy->client_ev); - event_del (&proxy->backend_ev); - proxy->closed = TRUE; - } -} - -static void -rspamd_proxy_client_handler (gint fd, gshort what, gpointer data) -{ - rspamd_proxy_t *proxy = data; - gint r; - GError *err = NULL; - - if (what == EV_READ) { - /* Got data from client */ - event_del (&proxy->client_ev); - r = read (proxy->cfd, proxy->buf, proxy->bufsize); - if (r > 0) { - /* Write this buffer to backend */ - proxy->read_len = r; - proxy->buf_offset = 0; - event_del (&proxy->backend_ev); - event_set (&proxy->backend_ev, proxy->bfd, EV_WRITE, rspamd_proxy_backend_handler, proxy); - event_add (&proxy->backend_ev, proxy->tv); - } - else { - /* Error case or zero reply */ - if (r < 0) { - /* Error case */ - g_set_error (&err, proxy_error_quark(), r, "Client read error: %s", strerror (errno)); - rspamd_proxy_close (proxy); - proxy->err_cb (err, proxy->user_data); - } - else { - /* Client closes connection */ - rspamd_proxy_close (proxy); - proxy->err_cb (NULL, proxy->user_data); - } - } - } - else if (what == EV_WRITE) { - /* Can write to client */ - r = write (proxy->cfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset); - if (r > 0) { - /* We wrote something */ - proxy->buf_offset +=r; - if (proxy->buf_offset == proxy->read_len) { - /* We wrote everything */ - event_del (&proxy->client_ev); - event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy); - event_add (&proxy->client_ev, proxy->tv); - event_del (&proxy->backend_ev); - event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy); - event_add (&proxy->backend_ev, proxy->tv); - } - else { - /* Plan another write event */ - event_add (&proxy->backend_ev, proxy->tv); - } - } - else { - /* Error case or zero reply */ - if (r < 0) { - /* Error case */ - g_set_error (&err, proxy_error_quark(), r, "Client write error: %s", strerror (errno)); - rspamd_proxy_close (proxy); - proxy->err_cb (err, proxy->user_data); - } - else { - /* Client closes connection */ - rspamd_proxy_close (proxy); - proxy->err_cb (NULL, proxy->user_data); - } - } - } - else { - /* Got timeout */ - g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout"); - rspamd_proxy_close (proxy); - proxy->err_cb (err, proxy->user_data); - } -} - -static void -rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data) -{ - rspamd_proxy_t *proxy = data; - gint r; - GError *err = NULL; - - if (what == EV_READ) { - /* Got data from backend */ - event_del (&proxy->backend_ev); - r = read (proxy->bfd, proxy->buf, proxy->bufsize); - if (r > 0) { - /* Write this buffer to client */ - proxy->read_len = r; - proxy->buf_offset = 0; - event_del (&proxy->client_ev); - event_set (&proxy->client_ev, proxy->bfd, EV_WRITE, rspamd_proxy_client_handler, proxy); - event_add (&proxy->client_ev, proxy->tv); - } - else { - /* Error case or zero reply */ - if (r < 0) { - /* Error case */ - g_set_error (&err, proxy_error_quark(), r, "Backend read error: %s", strerror (errno)); - rspamd_proxy_close (proxy); - proxy->err_cb (err, proxy->user_data); - } - else { - /* Client closes connection */ - rspamd_proxy_close (proxy); - proxy->err_cb (NULL, proxy->user_data); - } - } - } - else if (what == EV_WRITE) { - /* Can write to backend */ - r = write (proxy->bfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset); - if (r > 0) { - /* We wrote something */ - proxy->buf_offset +=r; - if (proxy->buf_offset == proxy->read_len) { - /* We wrote everything */ - event_del (&proxy->backend_ev); - event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy); - event_add (&proxy->backend_ev, proxy->tv); - event_del (&proxy->client_ev); - event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy); - event_add (&proxy->client_ev, proxy->tv); - } - else { - /* Plan another write event */ - event_add (&proxy->backend_ev, proxy->tv); - } - } - else { - /* Error case or zero reply */ - if (r < 0) { - /* Error case */ - g_set_error (&err, proxy_error_quark(), r, "Backend write error: %s", strerror (errno)); - rspamd_proxy_close (proxy); - proxy->err_cb (err, proxy->user_data); - } - else { - /* Client closes connection */ - rspamd_proxy_close (proxy); - proxy->err_cb (NULL, proxy->user_data); - } - } - } - else { - /* Got timeout */ - g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout"); - rspamd_proxy_close (proxy); - proxy->err_cb (err, proxy->user_data); - } -} - -/** - * Create new proxy between cfd and bfd - * @param cfd client's socket - * @param bfd backend's socket - * @param bufsize size of exchange buffer - * @param err_cb callback for erorrs or completing - * @param ud user data for callback - * @return new proxy object - */ -rspamd_proxy_t* -rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool, struct event_base *base, - gsize bufsize, struct timeval *tv, dispatcher_err_callback_t err_cb, gpointer ud) -{ - rspamd_proxy_t *new; - - new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_proxy_t)); - - new->cfd = dup (cfd); - new->bfd = dup (bfd); - new->pool = pool; - new->base = base; - new->bufsize = bufsize; - new->buf = rspamd_mempool_alloc (pool, bufsize); - new->err_cb = err_cb; - new->user_data = ud; - new->tv = tv; - - /* Set client's and backend's interfaces to read events */ - event_set (&new->client_ev, new->cfd, EV_READ, rspamd_proxy_client_handler, new); - event_base_set (new->base, &new->client_ev); - event_add (&new->client_ev, new->tv); - - event_set (&new->backend_ev, new->bfd, EV_READ, rspamd_proxy_backend_handler, new); - event_base_set (new->base, &new->backend_ev); - event_add (&new->backend_ev, new->tv); - - return new; -} diff --git a/src/proxy.h b/src/proxy.h deleted file mode 100644 index c505fe83d..000000000 --- a/src/proxy.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef PROXY_H_ -#define PROXY_H_ - -#include "config.h" -#include "buffer.h" - -/** - * @file proxy.h - * Direct asynchronous proxy implementation - */ - -typedef struct rspamd_proxy_s { - struct event client_ev; /**< event for client's communication */ - struct event backend_ev; /**< event for backend communication */ - struct event_base *base; /**< base for event operations */ - rspamd_mempool_t *pool; /**< memory pool */ - dispatcher_err_callback_t err_cb; /**< error callback */ - struct event_base *ev_base; /**< event base */ - gint cfd; /**< client's socket */ - gint bfd; /**< backend's socket */ - guint8 *buf; /**< exchange buffer */ - gsize bufsize; /**< buffer size */ - gint read_len; /**< read length */ - gint buf_offset; /**< offset to write */ - gpointer user_data; /**< user's data for callbacks */ - struct timeval *tv; /**< timeout for communications */ - gboolean closed; /**< whether descriptors are closed */ -} rspamd_proxy_t; - -/** - * Create new proxy between cfd and bfd - * @param cfd client's socket - * @param bfd backend's socket - * @param bufsize size of exchange buffer - * @param err_cb callback for erorrs or completing - * @param ud user data for callback - * @return new proxy object - */ -rspamd_proxy_t* rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool, - struct event_base *base, gsize bufsize, struct timeval *tv, - dispatcher_err_callback_t err_cb, gpointer ud); - -void rspamd_proxy_close (rspamd_proxy_t *proxy); - -#endif /* PROXY_H_ */ diff --git a/src/radix.c b/src/radix.c deleted file mode 100644 index 1a05db178..000000000 --- a/src/radix.c +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "config.h" -#include "radix.h" -#include "mem_pool.h" - -static void *radix_alloc (radix_tree_t * tree); - -radix_tree_t * -radix_tree_create (void) -{ - radix_tree_t *tree; - - tree = g_malloc (sizeof (radix_tree_t)); - if (tree == NULL) { - return NULL; - } - - tree->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - tree->size = 0; - - tree->root = radix_alloc (tree); - if (tree->root == NULL) { - return NULL; - } - - tree->root->right = NULL; - tree->root->left = NULL; - tree->root->parent = NULL; - tree->root->value = RADIX_NO_VALUE; - - return tree; -} - -enum radix_insert_type { - RADIX_INSERT, - RADIX_ADD, - RADIX_REPLACE -}; - -static uintptr_t -radix32tree_insert_common (radix_tree_t * tree, guint32 key, guint32 mask, uintptr_t value, enum radix_insert_type type) -{ - guint32 bit; - radix_node_t *node, *next; - - bit = 0x80000000; - - node = tree->root; - next = tree->root; - /* Find a place in trie to insert */ - while (bit & mask) { - if (key & bit) { - next = node->right; - } - else { - next = node->left; - } - - if (next == NULL) { - break; - } - - bit >>= 1; - node = next; - } - - if (next) { - if (node->value != RADIX_NO_VALUE) { - /* Value was found, switch on insert type */ - switch (type) { - case RADIX_INSERT: - return 1; - case RADIX_ADD: - node->value += value; - return value; - case RADIX_REPLACE: - node->value = value; - return 1; - } - } - - node->value = value; - node->key = key; - return 0; - } - /* Inserting value in trie creating all path components */ - while (bit & mask) { - next = radix_alloc (tree); - if (next == NULL) { - return -1; - } - - next->right = NULL; - next->left = NULL; - next->parent = node; - next->value = RADIX_NO_VALUE; - - if (key & bit) { - node->right = next; - - } - else { - node->left = next; - } - - bit >>= 1; - node = next; - } - - node->value = value; - node->key = key; - - return 0; -} - -gint -radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) -{ - return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_INSERT); -} - -uintptr_t -radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) -{ - return radix32tree_insert_common (tree, key, mask, value, RADIX_ADD); -} - -gint -radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value) -{ - return (gint)radix32tree_insert_common (tree, key, mask, value, RADIX_REPLACE); -} - -/* - * per recursion step: - * ptr + ptr + ptr + gint = 4 words - * result = 1 word - * 5 words total in stack - */ -static gboolean -radix_recurse_nodes (radix_node_t *node, radix_tree_traverse_func func, void *user_data, gint level) -{ - if (node->left) { - if (radix_recurse_nodes (node->left, func, user_data, level + 1)) { - return TRUE; - } - } - - if (node->value != RADIX_NO_VALUE) { - if (func (node->key, level, node->value, user_data)) { - return TRUE; - } - } - - if (node->right) { - if (radix_recurse_nodes (node->right, func, user_data, level + 1)) { - return TRUE; - } - } - - return FALSE; -} - -void -radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data) -{ - radix_recurse_nodes (tree->root, func, user_data, 0); -} - - -gint -radix32tree_delete (radix_tree_t * tree, guint32 key, guint32 mask) -{ - guint32 bit; - radix_node_t *node; - - bit = 0x80000000; - node = tree->root; - - while (node && (bit & mask)) { - if (key & bit) { - node = node->right; - - } - else { - node = node->left; - } - - bit >>= 1; - } - - if (node == NULL || node->parent == NULL) { - return -1; - } - - if (node->right || node->left) { - if (node->value != RADIX_NO_VALUE) { - node->value = RADIX_NO_VALUE; - return 0; - } - - return -1; - } - - for (;;) { - if (node->parent->right == node) { - node->parent->right = NULL; - - } - else { - node->parent->left = NULL; - } - - node = node->parent; - - if (node->right || node->left) { - break; - } - - if (node->value != RADIX_NO_VALUE) { - break; - } - - if (node->parent == NULL) { - break; - } - } - - return 0; -} - - -uintptr_t -radix32tree_find (radix_tree_t * tree, guint32 key) -{ - guint32 bit; - uintptr_t value; - radix_node_t *node; - - bit = 0x80000000; - value = RADIX_NO_VALUE; - node = tree->root; - - while (node) { - if (node->value != RADIX_NO_VALUE) { - value = node->value; - } - - if (key & bit) { - node = node->right; - - } - else { - node = node->left; - } - - bit >>= 1; - } - - return value; -} - - -static void * -radix_alloc (radix_tree_t * tree) -{ - gchar *p; - - p = rspamd_mempool_alloc (tree->pool, sizeof (radix_node_t)); - - tree->size += sizeof (radix_node_t); - - return p; -} - -void -radix_tree_free (radix_tree_t * tree) -{ - - g_return_if_fail (tree != NULL); - rspamd_mempool_delete (tree->pool); - g_free (tree); -} - -/* - * vi:ts=4 - */ diff --git a/src/radix.h b/src/radix.h deleted file mode 100644 index 4cc2873c7..000000000 --- a/src/radix.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef RADIX_H -#define RADIX_H - -#include "config.h" -#include "mem_pool.h" - -#define RADIX_NO_VALUE (uintptr_t)-1 - -typedef struct radix_node_s radix_node_t; - -struct radix_node_s { - radix_node_t *right; - radix_node_t *left; - radix_node_t *parent; - uintptr_t value; - guint32 key; -}; - - -typedef struct { - radix_node_t *root; - size_t size; - rspamd_mempool_t *pool; -} radix_tree_t; - -typedef gboolean (*radix_tree_traverse_func)(guint32 key, guint32 mask, uintptr_t value, void *user_data); - -/** - * Create new radix tree - */ -radix_tree_t *radix_tree_create (void); - -/** - * Insert value to radix tree - * returns: 1 if value already exists - * 0 if operation was successfull - * -1 if there was some error - */ -gint radix32tree_insert (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); - -/** - * Add value to radix tree or insert it if value does not exists - * returns: value if value already exists and was added - * 0 if value was inserted - * -1 if there was some error - */ -uintptr_t radix32tree_add (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); - -/** - * Replace value in radix tree or insert it if value does not exists - * returns: 1 if value already exists and was replaced - * 0 if value was inserted - * -1 if there was some error - */ -gint radix32tree_replace (radix_tree_t *tree, guint32 key, guint32 mask, uintptr_t value); - -/** - * Delete value from radix tree - * returns: 1 if value does not exist - * 0 if value was deleted - * -1 if there was some error - */ -gint radix32tree_delete (radix_tree_t *tree, guint32 key, guint32 mask); - -/** - * Find value in radix tree - * returns: value if value was found - * RADIX_NO_VALUE if value was not found - */ -uintptr_t radix32tree_find (radix_tree_t *tree, guint32 key); - -/** - * Traverse via the whole tree calling specified callback - */ -void radix32tree_traverse (radix_tree_t *tree, radix_tree_traverse_func func, void *user_data); - -/** - * Frees radix tree - */ -void radix_tree_free (radix_tree_t *tree); - -#endif diff --git a/src/roll_history.c b/src/roll_history.c deleted file mode 100644 index 504f8ae3b..000000000 --- a/src/roll_history.c +++ /dev/null @@ -1,212 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - - -#include "config.h" -#include "main.h" -#include "roll_history.h" - - -/** - * Returns new roll history - * @param pool pool for shared memory - * @return new structure - */ -struct roll_history* -rspamd_roll_history_new (rspamd_mempool_t *pool) -{ - struct roll_history *new; - - if (pool == NULL) { - return NULL; - } - - new = rspamd_mempool_alloc0_shared (pool, sizeof (struct roll_history)); - new->pool = pool; - new->mtx = rspamd_mempool_get_mutex (pool); - - return new; -} - -struct history_metric_callback_data { - gchar *pos; - gint remain; -}; - -static void -roll_history_symbols_callback (gpointer key, gpointer value, void *user_data) -{ - struct history_metric_callback_data *cb = user_data; - struct symbol *s = value; - guint wr; - - if (cb->remain > 0) { - wr = rspamd_snprintf (cb->pos, cb->remain, "%s, ", s->name); - cb->pos += wr; - cb->remain -= wr; - } -} - -/** - * Update roll history with data from task - * @param history roll history object - * @param task task object - */ -void -rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task) -{ - gint row_num; - struct roll_history_row *row; - struct metric_result *metric_res; - struct history_metric_callback_data cbdata; - - if (history->need_lock) { - /* Some process is getting history, so wait on a mutex */ - rspamd_mempool_lock_mutex (history->mtx); - history->need_lock = FALSE; - rspamd_mempool_unlock_mutex (history->mtx); - } - - /* First of all obtain check and obtain row number */ - g_atomic_int_compare_and_exchange (&history->cur_row, HISTORY_MAX_ROWS, 0); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - row_num = g_atomic_int_add (&history->cur_row, 1); -#else - row_num = g_atomic_int_exchange_and_add (&history->cur_row, 1); -#endif - - if (row_num < HISTORY_MAX_ROWS) { - row = &history->rows[row_num]; - row->completed = FALSE; - } - else { - /* Race condition */ - history->cur_row = 0; - return; - } - - /* Add information from task to roll history */ - memcpy (&row->from_addr, &task->from_addr, sizeof (row->from_addr)); - memcpy (&row->tv, &task->tv, sizeof (row->tv)); - - /* Strings */ - rspamd_strlcpy (row->message_id, task->message_id, sizeof (row->message_id)); - if (task->user) { - rspamd_strlcpy (row->user, task->user, sizeof (row->message_id)); - } - else { - row->user[0] = '\0'; - } - - /* Get default metric */ - metric_res = g_hash_table_lookup (task->results, DEFAULT_METRIC); - if (metric_res == NULL) { - row->symbols[0] = '\0'; - row->action = METRIC_ACTION_NOACTION; - } - else { - row->score = metric_res->score; - row->required_score = metric_res->metric->actions[METRIC_ACTION_REJECT].score; - row->action = check_metric_action (metric_res->score, - metric_res->metric->actions[METRIC_ACTION_REJECT].score, metric_res->metric); - cbdata.pos = row->symbols; - cbdata.remain = sizeof (row->symbols); - g_hash_table_foreach (metric_res->symbols, roll_history_symbols_callback, &cbdata); - if (cbdata.remain > 0) { - /* Remove last whitespace and comma */ - *cbdata.pos-- = '\0'; - *cbdata.pos-- = '\0'; - *cbdata.pos = '\0'; - } - } - - row->scan_time = task->scan_milliseconds; - row->len = (task->msg == NULL ? 0 : task->msg->len); - row->completed = TRUE; -} - -/** - * Load previously saved history from file - * @param history roll history object - * @param filename filename to load from - * @return TRUE if history has been loaded - */ -gboolean -rspamd_roll_history_load (struct roll_history *history, const gchar *filename) -{ - gint fd; - struct stat st; - - if (stat (filename, &st) == -1) { - msg_info ("cannot load history from %s: %s", filename, strerror (errno)); - return FALSE; - } - - if (st.st_size != sizeof (history->rows)) { - msg_info ("cannot load history from %s: size mismatch", filename); - return FALSE; - } - - if ((fd = open (filename, O_RDONLY)) == -1) { - msg_info ("cannot load history from %s: %s", filename, strerror (errno)); - return FALSE; - } - - if (read (fd, history->rows, sizeof (history->rows)) == -1) { - close (fd); - msg_info ("cannot read history from %s: %s", filename, strerror (errno)); - return FALSE; - } - - close (fd); - - return TRUE; -} - -/** - * Save history to file - * @param history roll history object - * @param filename filename to load from - * @return TRUE if history has been saved - */ -gboolean -rspamd_roll_history_save (struct roll_history *history, const gchar *filename) -{ - gint fd; - - if ((fd = open (filename, O_WRONLY | O_CREAT | O_TRUNC, 00600)) == -1) { - msg_info ("cannot save history to %s: %s", filename, strerror (errno)); - return FALSE; - } - - if (write (fd, history->rows, sizeof (history->rows)) == -1) { - close (fd); - msg_info ("cannot write history to %s: %s", filename, strerror (errno)); - return FALSE; - } - - close (fd); - - return TRUE; -} diff --git a/src/roll_history.h b/src/roll_history.h deleted file mode 100644 index 1dff93a4f..000000000 --- a/src/roll_history.h +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef ROLL_HISTORY_H_ -#define ROLL_HISTORY_H_ - -#include "config.h" -#include "mem_pool.h" - -/* - * Roll history is a special cycled buffer for checked messages, it is designed for writing history messages - * and displaying them in webui - */ - -#define HISTORY_MAX_ID 100 -#define HISTORY_MAX_SYMBOLS 200 -#define HISTORY_MAX_USER 20 -#define HISTORY_MAX_ROWS 200 - -struct rspamd_task; - -struct roll_history_row { - struct timeval tv; - gchar message_id[HISTORY_MAX_ID]; - gchar symbols[HISTORY_MAX_SYMBOLS]; - gchar user[HISTORY_MAX_USER]; -#ifdef HAVE_INET_PTON - struct { - union { - struct in_addr in4; - struct in6_addr in6; - } d; - gboolean ipv6; - gboolean has_addr; - } from_addr; -#else - struct in_addr from_addr; -#endif - gsize len; - guint scan_time; - gint action; - gdouble score; - gdouble required_score; - guint8 completed; -}; - -struct roll_history { - struct roll_history_row rows[HISTORY_MAX_ROWS]; - gint cur_row; - rspamd_mempool_t *pool; - gboolean need_lock; - rspamd_mempool_mutex_t *mtx; -}; - -/** - * Returns new roll history - * @param pool pool for shared memory - * @return new structure - */ -struct roll_history* rspamd_roll_history_new (rspamd_mempool_t *pool); - -/** - * Update roll history with data from task - * @param history roll history object - * @param task task object - */ -void rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task); - -/** - * Load previously saved history from file - * @param history roll history object - * @param filename filename to load from - * @return TRUE if history has been loaded - */ -gboolean rspamd_roll_history_load (struct roll_history *history, const gchar *filename); - -/** - * Save history to file - * @param history roll history object - * @param filename filename to load from - * @return TRUE if history has been saved - */ -gboolean rspamd_roll_history_save (struct roll_history *history, const gchar *filename); - -#endif /* ROLL_HISTORY_H_ */ diff --git a/src/rrd.c b/src/rrd.c deleted file mode 100644 index a0e21eaed..000000000 --- a/src/rrd.c +++ /dev/null @@ -1,1015 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "rrd.h" -#include "util.h" - -static GQuark -rrd_error_quark (void) -{ - return g_quark_from_static_string ("rrd-error"); -} - -/** - * Convert rrd dst type from string to numeric value - */ -enum rrd_dst_type -rrd_dst_from_string (const gchar *str) -{ - if (g_ascii_strcasecmp (str, "counter") == 0) { - return RRD_DST_COUNTER; - } - else if (g_ascii_strcasecmp (str, "absolute") == 0) { - return RRD_DST_ABSOLUTE; - } - else if (g_ascii_strcasecmp (str, "gauge") == 0) { - return RRD_DST_GAUGE; - } - else if (g_ascii_strcasecmp (str, "cdef") == 0) { - return RRD_DST_CDEF; - } - else if (g_ascii_strcasecmp (str, "derive") == 0) { - return RRD_DST_DERIVE; - } - return -1; -} - -/** - * Convert numeric presentation of dst to string - */ -const gchar* -rrd_dst_to_string (enum rrd_dst_type type) -{ - switch (type) { - case RRD_DST_COUNTER: - return "COUNTER"; - case RRD_DST_ABSOLUTE: - return "ABSOLUTE"; - case RRD_DST_GAUGE: - return "GAUGE"; - case RRD_DST_CDEF: - return "CDEF"; - case RRD_DST_DERIVE: - return "DERIVE"; - default: - return "U"; - } - - return "U"; -} - -/** - * Convert rrd consolidation function type from string to numeric value - */ -enum rrd_cf_type -rrd_cf_from_string (const gchar *str) -{ - if (g_ascii_strcasecmp (str, "average") == 0) { - return RRD_CF_AVERAGE; - } - else if (g_ascii_strcasecmp (str, "minimum") == 0) { - return RRD_CF_MINIMUM; - } - else if (g_ascii_strcasecmp (str, "maximum") == 0) { - return RRD_CF_MAXIMUM; - } - else if (g_ascii_strcasecmp (str, "last") == 0) { - return RRD_CF_LAST; - } - /* XXX: add other CF functions supported by rrd */ - - return -1; -} - -/** - * Convert numeric presentation of cf to string - */ -const gchar* -rrd_cf_to_string (enum rrd_cf_type type) -{ - switch (type) { - case RRD_CF_AVERAGE: - return "AVERAGE"; - case RRD_CF_MINIMUM: - return "MINIMUM"; - case RRD_CF_MAXIMUM: - return "MAXIMUM"; - case RRD_CF_LAST: - return "LAST"; - default: - return "U"; - } - - /* XXX: add other CF functions supported by rrd */ - - return "U"; -} - -void -rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra) -{ - rra->pdp_cnt = pdp_cnt; - rra->row_cnt = rows; - rspamd_strlcpy (rra->cf_nam, cf_name, sizeof (rra->cf_nam)); - memset (rra->par, 0, sizeof (rra->par)); - rra->par[RRA_cdp_xff_val].dv = 0.5; -} - -void -rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds) -{ - rspamd_strlcpy (ds->ds_nam, name, sizeof (ds->ds_nam)); - rspamd_strlcpy (ds->dst, "COUNTER", sizeof (ds->dst)); - memset (ds->par, 0, sizeof (ds->par)); - ds->par[RRD_DS_mrhb_cnt].lv = pdp_step * 2; - ds->par[RRD_DS_min_val].dv = NAN; - ds->par[RRD_DS_max_val].dv = NAN; -} - -/** - * Check rrd file for correctness (size, cookies, etc) - */ -static gboolean -rspamd_rrd_check_file (const gchar *filename, gboolean need_data, GError **err) -{ - gint fd, i; - struct stat st; - struct rrd_file_head head; - struct rrd_rra_def rra; - gint head_size; - - fd = open (filename, O_RDWR); - if (fd == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); - return FALSE; - } - - if (fstat (fd, &st) == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); - close (fd); - return FALSE; - } - if (st.st_size < (goffset)sizeof (struct rrd_file_head)) { - /* We have trimmed file */ - g_set_error (err, rrd_error_quark (), EINVAL, "rrd size is bad: %ud", (guint)st.st_size); - close (fd); - return FALSE; - } - - /* Try to read header */ - if (read (fd, &head, sizeof (head)) != sizeof (head)) { - g_set_error (err, rrd_error_quark (), errno, "rrd read head error: %s", strerror (errno)); - close (fd); - return FALSE; - } - /* Check magic */ - if (memcmp (head.cookie, RRD_COOKIE, sizeof (head.cookie)) != 0 || - memcmp (head.version, RRD_VERSION, sizeof (head.version)) != 0 || - head.float_cookie != RRD_FLOAT_COOKIE) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno)); - close (fd); - return FALSE; - } - /* Check for other params */ - if (head.ds_cnt <= 0 || head.rra_cnt <= 0) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd head cookies error: %s", strerror (errno)); - close (fd); - return FALSE; - } - /* Now we can calculate the overall size of rrd */ - head_size = sizeof (struct rrd_file_head) + - sizeof (struct rrd_ds_def) * head.ds_cnt + - sizeof (struct rrd_rra_def) * head.rra_cnt + - sizeof (struct rrd_live_head) + - sizeof (struct rrd_pdp_prep) * head.ds_cnt + - sizeof (struct rrd_cdp_prep) * head.ds_cnt * head.rra_cnt + - sizeof (struct rrd_rra_ptr) * head.rra_cnt; - if (st.st_size < (goffset)head_size) { - g_set_error (err, rrd_error_quark (), errno, "rrd file seems to have stripped header: %d", head_size); - close (fd); - return FALSE; - } - - if (need_data) { - /* Now check rra */ - if (lseek (fd, sizeof (struct rrd_ds_def) * head.ds_cnt, SEEK_CUR) == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd head lseek error: %s", strerror (errno)); - close (fd); - return FALSE; - } - for (i = 0; i < (gint)head.rra_cnt; i ++) { - if (read (fd, &rra, sizeof (rra)) != sizeof (rra)) { - g_set_error (err, rrd_error_quark (), errno, "rrd read rra error: %s", strerror (errno)); - close (fd); - return FALSE; - } - head_size += rra.row_cnt * head.ds_cnt * sizeof (gdouble); - } - - if (st.st_size != head_size) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd file seems to have incorrect size: %d, must be %d", (gint)st.st_size, head_size); - close (fd); - return FALSE; - } - } - - close (fd); - return TRUE; -} - -/** - * Adjust pointers in mmapped rrd file - * @param file - */ -static void -rspamd_rrd_adjust_pointers (struct rspamd_rrd_file *file, gboolean completed) -{ - guint8 *ptr; - - ptr = file->map; - file->stat_head = (struct rrd_file_head *)ptr; - ptr += sizeof (struct rrd_file_head); - file->ds_def = (struct rrd_ds_def *)ptr; - ptr += sizeof (struct rrd_ds_def) * file->stat_head->ds_cnt; - file->rra_def = (struct rrd_rra_def *)ptr; - ptr += sizeof (struct rrd_rra_def) * file->stat_head->rra_cnt; - file->live_head = (struct rrd_live_head *)ptr; - ptr += sizeof (struct rrd_live_head); - file->pdp_prep = (struct rrd_pdp_prep *)ptr; - ptr += sizeof (struct rrd_pdp_prep) * file->stat_head->ds_cnt; - file->cdp_prep = (struct rrd_cdp_prep *)ptr; - ptr += sizeof (struct rrd_cdp_prep) * file->stat_head->rra_cnt * file->stat_head->ds_cnt; - file->rra_ptr = (struct rrd_rra_ptr *)ptr; - if (completed) { - ptr += sizeof (struct rrd_rra_ptr) * file->stat_head->rra_cnt; - file->rrd_value = (gdouble *)ptr; - } - else { - file->rrd_value = NULL; - } -} - -/** - * Open completed or incompleted rrd file - * @param filename - * @param completed - * @param err - * @return - */ -static struct rspamd_rrd_file* -rspamd_rrd_open_common (const gchar *filename, gboolean completed, GError **err) -{ - struct rspamd_rrd_file *new; - gint fd; - struct stat st; - - if (!rspamd_rrd_check_file (filename, completed, err)) { - return NULL; - } - - new = g_slice_alloc0 (sizeof (struct rspamd_rrd_file)); - - if (new == NULL) { - g_set_error (err, rrd_error_quark (), ENOMEM, "not enough memory"); - return NULL; - } - - /* Open file */ - fd = open (filename, O_RDWR); - if (fd == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); - return FALSE; - } - - if (fstat (fd, &st) == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); - close (fd); - return FALSE; - } - /* Mmap file */ - new->size = st.st_size; - if ((new->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { - close (fd); - g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno)); - g_slice_free1 (sizeof (struct rspamd_rrd_file), new); - return NULL; - } - - close (fd); - - /* Adjust pointers */ - rspamd_rrd_adjust_pointers (new, completed); - - /* Mark it as finalized */ - new->finalized = completed; - - new->filename = g_strdup (filename); - - return new; -} - -/** - * Open (and mmap) existing RRD file - * @param filename path - * @param err error pointer - * @return rrd file structure - */ -struct rspamd_rrd_file* -rspamd_rrd_open (const gchar *filename, GError **err) -{ - return rspamd_rrd_open_common (filename, TRUE, err); -} - -/** - * Create basic header for rrd file - * @param filename file path - * @param ds_count number of data sources - * @param rra_count number of round robin archives - * @param pdp_step step of primary data points - * @param err error pointer - * @return TRUE if file has been created - */ -struct rspamd_rrd_file* -rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err) -{ - struct rspamd_rrd_file *new; - struct rrd_file_head head; - struct rrd_ds_def ds; - struct rrd_rra_def rra; - struct rrd_live_head lh; - struct rrd_pdp_prep pdp; - struct rrd_cdp_prep cdp; - struct rrd_rra_ptr rra_ptr; - gint fd; - guint i, j; - struct timeval tv; - - /* Open file */ - fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644); - if (fd == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd create error: %s", strerror (errno)); - return NULL; - } - - /* Fill header */ - memset (&head, 0, sizeof (head)); - head.rra_cnt = rra_count; - head.ds_cnt = ds_count; - head.pdp_step = pdp_step; - memcpy (head.cookie, RRD_COOKIE, sizeof (head.cookie)); - memcpy (head.version, RRD_VERSION, sizeof (head.version)); - head.float_cookie = RRD_FLOAT_COOKIE; - - if (write (fd, &head, sizeof (head)) != sizeof (head)) { - close (fd); - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - return NULL; - } - - /* Fill DS section */ - memset (&ds.ds_nam, 0, sizeof (ds.ds_nam)); - memcpy (&ds.dst, "COUNTER", sizeof ("COUNTER")); - memset (&ds.par, 0, sizeof (ds.par)); - for (i = 0; i < ds_count; i ++) { - if (write (fd, &ds, sizeof (ds)) != sizeof (ds)) { - close (fd); - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - return NULL; - } - } - - /* Fill RRA section */ - memcpy (&rra.cf_nam, "AVERAGE", sizeof ("AVERAGE")); - rra.pdp_cnt = 1; - memset (&rra.par, 0, sizeof (rra.par)); - for (i = 0; i < rra_count; i ++) { - if (write (fd, &rra, sizeof (rra)) != sizeof (rra)) { - close (fd); - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - return NULL; - } - } - - /* Fill live header */ - gettimeofday (&tv, NULL); - lh.last_up = tv.tv_sec; - lh.last_up_usec = tv.tv_usec; - - if (write (fd, &lh, sizeof (lh)) != sizeof (lh)) { - close (fd); - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - return NULL; - } - - /* Fill pdp prep */ - memcpy (&pdp.last_ds, "U", sizeof ("U")); - memset (&pdp.scratch, 0, sizeof (pdp.scratch)); - pdp.scratch[PDP_val].dv = 0.; - pdp.scratch[PDP_unkn_sec_cnt].lv = 0; - for (i = 0; i < ds_count; i ++) { - if (write (fd, &pdp, sizeof (pdp)) != sizeof (pdp)) { - close (fd); - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - return NULL; - } - } - - /* Fill cdp prep */ - memset (&cdp.scratch, 0, sizeof (cdp.scratch)); - cdp.scratch[CDP_val].dv = NAN; - for (i = 0; i < rra_count; i ++) { - cdp.scratch[CDP_unkn_pdp_cnt].lv = 0; - for (j = 0; j < ds_count; j ++) { - if (write (fd, &cdp, sizeof (cdp)) != sizeof (cdp)) { - close (fd); - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - return NULL; - } - } - } - - /* Set row pointers */ - memset (&rra_ptr, 0, sizeof (rra_ptr)); - for (i = 0; i < rra_count; i ++) { - if (write (fd, &rra_ptr, sizeof (rra_ptr)) != sizeof (rra_ptr)) { - close (fd); - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - return NULL; - } - } - - close (fd); - new = rspamd_rrd_open_common (filename, FALSE, err); - - return new; -} - -/** - * Add data sources to rrd file - * @param filename path to file - * @param ds array of struct rrd_ds_def - * @param err error pointer - * @return TRUE if data sources were added - */ -gboolean -rspamd_rrd_add_ds (struct rspamd_rrd_file *file, GArray *ds, GError **err) -{ - - if (file == NULL || file->stat_head->ds_cnt * sizeof (struct rrd_ds_def) != ds->len) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd add ds failed: wrong arguments"); - return FALSE; - } - - /* Straightforward memcpy */ - memcpy (file->ds_def, ds->data, ds->len); - - return TRUE; -} - -/** - * Add round robin archives to rrd file - * @param filename path to file - * @param ds array of struct rrd_rra_def - * @param err error pointer - * @return TRUE if archives were added - */ -gboolean -rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err) -{ - if (file == NULL || file->stat_head->rra_cnt * sizeof (struct rrd_rra_def) != rra->len) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments"); - return FALSE; - } - - /* Straightforward memcpy */ - memcpy (file->rra_def, rra->data, rra->len); - - return TRUE; -} - -/** - * Finalize rrd file header and initialize all RRA in the file - * @param filename file path - * @param err error pointer - * @return TRUE if rrd file is ready for use - */ -gboolean -rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err) -{ - gint fd; - guint i; - gint count = 0; - gdouble vbuf[1024]; - struct stat st; - - if (file == NULL || file->filename == NULL) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd add rra failed: wrong arguments"); - return FALSE; - } - - fd = open (file->filename, O_RDWR); - if (fd == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd open error: %s", strerror (errno)); - return FALSE; - } - - if (lseek (fd, 0, SEEK_END) == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd seek error: %s", strerror (errno)); - close (fd); - return FALSE; - } - - /* Adjust CDP */ - for (i = 0; i < file->stat_head->rra_cnt; i ++) { - file->cdp_prep->scratch[CDP_unkn_pdp_cnt].lv = 0; - /* Randomize row pointer */ - file->rra_ptr->cur_row = g_random_int () % file->rra_def[i].row_cnt; - /* Calculate values count */ - count += file->rra_def[i].row_cnt * file->stat_head->ds_cnt; - } - - munmap (file->map, file->size); - /* Write values */ - for (i = 0; i < G_N_ELEMENTS (vbuf); i ++) { - vbuf[i] = NAN; - } - - while (count > 0) { - /* Write values in buffered matter */ - if (write (fd, vbuf, MIN ((gint)G_N_ELEMENTS (vbuf), count) * sizeof (gdouble)) == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd write error: %s", strerror (errno)); - close (fd); - return FALSE; - } - count -= G_N_ELEMENTS (vbuf); - } - - if (fstat (fd, &st) == -1) { - g_set_error (err, rrd_error_quark (), errno, "rrd stat error: %s", strerror (errno)); - close (fd); - return FALSE; - } - - /* Mmap again */ - file->size = st.st_size; - if ((file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) { - close (fd); - g_set_error (err, rrd_error_quark (), ENOMEM, "mmap failed: %s", strerror (errno)); - g_slice_free1 (sizeof (struct rspamd_rrd_file), file); - return FALSE; - } - close (fd); - /* Adjust pointers */ - rspamd_rrd_adjust_pointers (file, TRUE); - - file->finalized = TRUE; - - return TRUE; -} - -/** - * Update pdp_prep data - * @param file rrd file - * @param vals new values - * @param pdp_new new pdp array - * @param interval time elapsed from the last update - * @return - */ -static gboolean -rspamd_rrd_update_pdp_prep (struct rspamd_rrd_file *file, gdouble *vals, gdouble *pdp_new, gdouble interval) -{ - guint i; - enum rrd_dst_type type; - - for (i = 0; i < file->stat_head->ds_cnt; i ++) { - type = rrd_dst_from_string (file->ds_def[i].dst); - - if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv < interval) { - rspamd_strlcpy (file->pdp_prep[i].last_ds, "U", sizeof (file->pdp_prep[i].last_ds)); - } - - if (file->ds_def[i].par[RRD_DS_mrhb_cnt].lv >= interval) { - switch (type) { - case RRD_DST_COUNTER: - case RRD_DST_DERIVE: - if (file->pdp_prep[i].last_ds[0] == 'U') { - pdp_new[i] = NAN; - } - else { - pdp_new[i] = vals[i] - strtod (file->pdp_prep[i].last_ds, NULL); - } - break; - case RRD_DST_GAUGE: - pdp_new[i] = vals[i] * interval; - break; - case RRD_DST_ABSOLUTE: - pdp_new[i] = vals[i]; - break; - default: - return FALSE; - } - } - else { - pdp_new[i] = NAN; - } - /* Copy value to the last_ds */ - if (!isnan (vals[i])) { - rspamd_snprintf (file->pdp_prep[i].last_ds, sizeof (file->pdp_prep[i].last_ds), "%.4f", vals[i]); - } - else { - file->pdp_prep[i].last_ds[0] = 'U'; - file->pdp_prep[i].last_ds[1] = '\0'; - } - } - - - return TRUE; -} - -/** - * Update step for this pdp - * @param file - * @param pdp_new new pdp array - * @param pdp_temp temp pdp array - * @param interval time till last update - * @param pre_int pre interval - * @param post_int post intervall - * @param pdp_diff time till last pdp update - */ -static void -rspamd_rrd_update_pdp_step (struct rspamd_rrd_file *file, gdouble *pdp_new, gdouble *pdp_temp, gdouble interval, - gdouble pre_int, gdouble post_int, gulong pdp_diff) -{ - guint i; - rrd_value_t *scratch; - gulong heartbeat; - - - for (i = 0; i < file->stat_head->ds_cnt; i ++) { - scratch = file->pdp_prep[i].scratch; - heartbeat = file->ds_def[i].par[RRD_DS_mrhb_cnt].lv; - if (!isnan (pdp_new[i])) { - if (isnan (scratch[PDP_val].dv)) { - scratch[PDP_val].dv = 0; - } - scratch[PDP_val].dv += pdp_new[i] / interval * pre_int; - pre_int = 0.0; - } - /* Check interval value for heartbeat for this DS */ - if ((interval > heartbeat) || (file->stat_head->pdp_step / 2.0 < scratch[PDP_unkn_sec_cnt].lv)) { - pdp_temp[i] = NAN; - } - else { - pdp_temp[i] = scratch[PDP_val].dv / - ((double) (pdp_diff - scratch[PDP_unkn_sec_cnt].lv) - pre_int); - } - - if (isnan (pdp_new[i])) { - scratch[PDP_unkn_sec_cnt].lv = floor (post_int); - scratch[PDP_val].dv = NAN; - } else { - scratch[PDP_unkn_sec_cnt].lv = 0; - scratch[PDP_val].dv = pdp_new[i] / interval * post_int; - } - } -} - -/** - * Update CDP for this rra - * @param file rrd file - * @param pdp_steps how much pdp steps elapsed from the last update - * @param pdp_offset offset from pdp - * @param rra_steps how much steps must be updated for this rra - * @param rra_index index of desired rra - * @param pdp_temp temporary pdp points - */ -static void -rspamd_rrd_update_cdp (struct rspamd_rrd_file *file, gdouble pdp_steps, gdouble pdp_offset, gulong *rra_steps, gulong rra_index, - gdouble *pdp_temp) -{ - guint i; - struct rrd_rra_def *rra; - rrd_value_t *scratch; - enum rrd_cf_type cf; - gdouble last_cdp, cur_cdp; - gulong pdp_in_cdp; - - rra = &file->rra_def[rra_index]; - cf = rrd_cf_from_string (rra->cf_nam); - - /* Iterate over all DS for this RRA */ - for (i = 0; i < file->stat_head->ds_cnt; i ++) { - /* Get CDP for this RRA and DS */ - scratch = file->cdp_prep[rra_index * file->stat_head->ds_cnt + i].scratch; - if (rra->pdp_cnt > 1) { - /* Do we have any CDP to update for this rra ? */ - if (rra_steps[rra_index] > 0) { - if (isnan (pdp_temp[i])) { - /* New pdp is nan */ - /* Increment unknown points count */ - scratch[CDP_unkn_pdp_cnt].lv += pdp_offset; - /* Reset secondary value */ - scratch[CDP_secondary_val].dv = NAN; - } - else { - scratch[CDP_secondary_val].dv = pdp_temp[i]; - } - - /* Check XFF for this rra */ - if (scratch[CDP_unkn_pdp_cnt].lv > rra->pdp_cnt * rra->par[RRA_cdp_xff_val].lv) { - /* XFF is reached */ - scratch[CDP_primary_val].dv = NAN; - } - else { - /* Need to initialize CDP using specified consolidation */ - switch (cf) { - case RRD_CF_AVERAGE: - last_cdp = isnan (scratch[CDP_val].dv) ? 0.0 : scratch[CDP_val].dv; - cur_cdp = isnan (pdp_temp[i]) ? 0.0 : pdp_temp[i]; - scratch[CDP_primary_val].dv = (last_cdp + cur_cdp * pdp_offset) / (rra->pdp_cnt - scratch[CDP_unkn_pdp_cnt].lv); - break; - case RRD_CF_MAXIMUM: - last_cdp = isnan (scratch[CDP_val].dv) ? -INFINITY : scratch[CDP_val].dv; - cur_cdp = isnan (pdp_temp[i]) ? -INFINITY : pdp_temp[i]; - scratch[CDP_primary_val].dv = MAX (last_cdp, cur_cdp); - break; - case RRD_CF_MINIMUM: - last_cdp = isnan (scratch[CDP_val].dv) ? INFINITY : scratch[CDP_val].dv; - cur_cdp = isnan (pdp_temp[i]) ? INFINITY : pdp_temp[i]; - scratch[CDP_primary_val].dv = MIN (last_cdp, cur_cdp); - break; - case RRD_CF_LAST: - default: - scratch[CDP_primary_val].dv = pdp_temp[i]; - break; - } - } - /* Init carry of this CDP */ - pdp_in_cdp = (pdp_steps - pdp_offset) / rra->pdp_cnt; - if (pdp_in_cdp == 0 || isnan (pdp_temp[i])) { - /* Set overflow */ - switch (cf) { - case RRD_CF_AVERAGE: - scratch[CDP_val].dv = 0; - break; - case RRD_CF_MAXIMUM: - scratch[CDP_val].dv = -INFINITY; - break; - case RRD_CF_MINIMUM: - scratch[CDP_val].dv = INFINITY; - break; - default: - scratch[CDP_val].dv = NAN; - break; - } - } - else { - /* Special carry for average */ - if (cf == RRD_CF_AVERAGE) { - scratch[CDP_val].dv = pdp_temp[i] * pdp_in_cdp; - } - else { - scratch[CDP_val].dv = pdp_temp[i]; - } - } - } - /* In this case we just need to update cdp_prep for this RRA */ - else { - if (isnan (pdp_temp[i])) { - /* Just increase undefined zone */ - scratch[CDP_unkn_pdp_cnt].lv += pdp_steps; - } - else { - /* Calculate cdp value */ - last_cdp = scratch[CDP_val].dv; - switch (cf) { - case RRD_CF_AVERAGE: - if (isnan (last_cdp)) { - scratch[CDP_val].dv = pdp_temp[i] * pdp_steps; - } - else { - scratch[CDP_val].dv = last_cdp + pdp_temp[i] * pdp_steps; - } - break; - case RRD_CF_MAXIMUM: - scratch[CDP_val].dv = MAX (last_cdp, pdp_temp[i]); - break; - case RRD_CF_MINIMUM: - scratch[CDP_val].dv = MIN (last_cdp, pdp_temp[i]); - break; - case RRD_CF_LAST: - scratch[CDP_val].dv = pdp_temp[i]; - break; - default: - scratch[CDP_val].dv = NAN; - break; - } - } - } - } - else { - /* We have nothing to consolidate, but we may miss some pdp */ - if (pdp_steps > 2) { - /* Just write PDP value */ - scratch[CDP_primary_val].dv = pdp_temp[i]; - scratch[CDP_secondary_val].dv = pdp_temp[i]; - } - } - } -} - -/** - * Update RRA in a file - * @param file rrd file - * @param rra_steps steps for each rra - * @param now current time - */ -void -rspamd_rrd_write_rra (struct rspamd_rrd_file *file, gulong *rra_steps) -{ - guint i, j, scratch_idx, cdp_idx, k; - struct rrd_rra_def *rra; - gdouble *rra_row; - - /* Iterate over all RRA */ - for (i = 0; i < file->stat_head->rra_cnt; i ++) { - rra = &file->rra_def[i]; - /* How much steps need to be updated */ - for (j = 0, scratch_idx = CDP_primary_val; j < rra_steps[i]; j ++, scratch_idx = CDP_secondary_val) { - /* Move row ptr */ - if (++file->rra_ptr[i].cur_row >= rra->row_cnt) { - file->rra_ptr[i].cur_row = 0; - } - /* Calculate seek */ - rra_row = file->rrd_value + (file->stat_head->ds_cnt * i + file->rra_ptr[i].cur_row); - /* Iterate over DS */ - for (k = 0; k < file->stat_head->ds_cnt; k ++) { - cdp_idx = i * file->stat_head->ds_cnt + k; - memcpy (rra_row, &file->cdp_prep[cdp_idx].scratch[scratch_idx].dv, sizeof (gdouble)); - rra_row ++; - } - } - } -} - -/** - * Add record to rrd file - * @param file rrd file object - * @param points points (must be row suitable for this RRA, depending on ds count) - * @param err error pointer - * @return TRUE if a row has been added - */ -gboolean -rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err) -{ - gdouble interval, *pdp_new, *pdp_temp, pre_int, post_int; - guint i; - gulong pdp_steps, cur_pdp_count, prev_pdp_step, cur_pdp_step, - prev_pdp_age, cur_pdp_age, *rra_steps, pdp_offset; - struct timeval tv; - - if (file == NULL || file->stat_head->ds_cnt * sizeof (gdouble) != points->len) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd add points failed: wrong arguments"); - return FALSE; - } - - /* Get interval */ - gettimeofday (&tv, NULL); - interval = (gdouble)(tv.tv_sec - file->live_head->last_up) + - (gdouble)(tv.tv_usec - file->live_head->last_up_usec) / 1e6f; - - /* Update PDP preparation values */ - pdp_new = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt); - pdp_temp = g_malloc (sizeof (gdouble) * file->stat_head->ds_cnt); - /* How much steps need to be updated in each RRA */ - rra_steps = g_malloc0 (sizeof (gulong) * file->stat_head->rra_cnt); - - if (!rspamd_rrd_update_pdp_prep (file, (gdouble *)points->data, pdp_new, interval)) { - g_set_error (err, rrd_error_quark (), EINVAL, "rrd update pdp failed: wrong arguments"); - g_free (pdp_new); - g_free (pdp_temp); - g_free (rra_steps); - return FALSE; - } - - /* Calculate elapsed steps */ - /* Age in seconds for previous pdp store */ - prev_pdp_age = file->live_head->last_up % file->stat_head->pdp_step; - /* Time in seconds for last pdp update */ - prev_pdp_step = file->live_head->last_up - prev_pdp_age; - /* Age in seconds from current time to required pdp time */ - cur_pdp_age = tv.tv_sec % file->stat_head->pdp_step; - /* Time of desired pdp step */ - cur_pdp_step = tv.tv_sec - cur_pdp_age; - - if (cur_pdp_step > prev_pdp_step) { - pre_int = (gdouble)(cur_pdp_step - file->live_head->last_up) - ((double)file->live_head->last_up_usec) / 1e6f; - post_int = (gdouble)cur_pdp_age + ((double)tv.tv_usec) / 1e6f; - } - else { - pre_int = interval; - post_int = 0; - } - cur_pdp_count = cur_pdp_step / file->stat_head->pdp_step; - pdp_steps = (cur_pdp_step - prev_pdp_step) / file->stat_head->pdp_step; - - - if (pdp_steps == 0) { - /* Simple update of pdp prep */ - for (i = 0; i < file->stat_head->ds_cnt; i ++) { - if (isnan (pdp_new[i])) { - /* Increment unknown period */ - file->pdp_prep[i].scratch[PDP_unkn_sec_cnt].lv += floor (interval); - } - else { - if (isnan (file->pdp_prep[i].scratch[PDP_val].dv)) { - /* Reset pdp to the current value */ - file->pdp_prep[i].scratch[PDP_val].dv = pdp_new[i]; - } - else { - /* Increment pdp value */ - file->pdp_prep[i].scratch[PDP_val].dv += pdp_new[i]; - } - } - } - } - else { - /* Complex update of PDP, CDP and RRA */ - - /* Update PDP for this step */ - rspamd_rrd_update_pdp_step (file, pdp_new, pdp_temp, interval, pre_int, post_int, pdp_steps * file->stat_head->pdp_step); - - - /* Update CDP points for each RRA*/ - for (i = 0; i < file->stat_head->rra_cnt; i ++) { - /* Calculate pdp offset for this RRA */ - pdp_offset = file->rra_def[i].pdp_cnt - cur_pdp_count % file->rra_def[i].pdp_cnt; - /* How much steps we got for this RRA */ - if (pdp_offset <= pdp_steps) { - rra_steps[i] = (pdp_steps - pdp_offset) / file->rra_def[i].pdp_cnt + 1; - } - else { - /* This rra have not passed enough pdp steps */ - rra_steps[i] = 0; - } - /* Update this specific CDP */ - rspamd_rrd_update_cdp (file, pdp_steps, pdp_offset, rra_steps, i, pdp_temp); - /* Write RRA */ - rspamd_rrd_write_rra (file, rra_steps); - } - } - file->live_head->last_up = tv.tv_sec; - file->live_head->last_up_usec = tv.tv_usec; - - /* Sync and invalidate */ - msync (file->map, file->size, MS_ASYNC | MS_INVALIDATE); - - g_free (pdp_new); - g_free (pdp_temp); - g_free (rra_steps); - - return TRUE; -} - -/** - * Close rrd file - * @param file - * @return - */ -gint -rspamd_rrd_close (struct rspamd_rrd_file* file) -{ - if (file == NULL) { - errno = EINVAL; - return -1; - } - - munmap (file->map, file->size); - if (file->filename != NULL) { - g_free (file->filename); - } - g_slice_free1 (sizeof (struct rspamd_rrd_file), file); - - return 0; -} diff --git a/src/rrd.h b/src/rrd.h deleted file mode 100644 index ff6902894..000000000 --- a/src/rrd.h +++ /dev/null @@ -1,374 +0,0 @@ -/* Copyright (c) 2010-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef RRD_H_ -#define RRD_H_ - -#include "config.h" - -/** - * This file contains basic structure and functions to operate with round-robin databases - */ - -#define RRD_COOKIE "RRD" -#define RRD_VERSION "0003" -#define RRD_FLOAT_COOKIE ((double)8.642135E130) - -typedef union { - unsigned long lv; - double dv; -} rrd_value_t; - -struct rrd_file_head { - /* Data Base Identification Section ** */ - gchar cookie[4]; /* RRD */ - gchar version[5]; /* version of the format */ - gdouble float_cookie; /* is it the correct double representation ? */ - - /* Data Base Structure Definition **** */ - gulong ds_cnt; /* how many different ds provid input to the rrd */ - gulong rra_cnt; /* how many rras will be maintained in the rrd */ - gulong pdp_step; /* pdp interval in seconds */ - - rrd_value_t par[10]; /* global parameters ... unused - at the moment */ -}; - -enum rrd_dst_type { - RRD_DST_COUNTER = 0, /* data source types available */ - RRD_DST_ABSOLUTE, - RRD_DST_GAUGE, - RRD_DST_DERIVE, - RRD_DST_CDEF -}; -enum rrd_ds_param { - RRD_DS_mrhb_cnt = 0, /* minimum required heartbeat */ - RRD_DS_min_val, /* the processed input of a ds must */ - RRD_DS_max_val, /* be between max_val and min_val - * both can be set to UNKNOWN if you - * do not care. Data outside the limits - * set to UNKNOWN */ - RRD_DS_cdef = RRD_DS_mrhb_cnt -}; /* pointer to encoded rpn expression only applies to DST_CDEF */ - - -/* The magic number here is one less than DS_NAM_SIZE */ -#define RRD_DS_NAM_SIZE 20 - -#define RRD_DST_SIZE 20 - -struct rrd_ds_def { - gchar ds_nam[RRD_DS_NAM_SIZE]; /* Name of the data source (null terminated) */ - gchar dst[RRD_DST_SIZE]; /* Type of data source (null terminated) */ - rrd_value_t par[10]; /* index of this array see ds_param_en */ -}; - -/* RRA definition */ - -enum rrd_cf_type { - RRD_CF_AVERAGE = 0, /* data consolidation functions */ - RRD_CF_MINIMUM, - RRD_CF_MAXIMUM, - RRD_CF_LAST, - RRD_CF_HWPREDICT, - /* An array of predictions using the seasonal - * Holt-Winters algorithm. Requires an RRA of type - * CF_SEASONAL for this data source. */ - RRD_CF_SEASONAL, - /* An array of seasonal effects. Requires an RRA of - * type CF_HWPREDICT for this data source. */ - RRD_CF_DEVPREDICT, - /* An array of deviation predictions based upon - * smoothed seasonal deviations. Requires an RRA of - * type CF_DEVSEASONAL for this data source. */ - RRD_CF_DEVSEASONAL, - /* An array of smoothed seasonal deviations. Requires - * an RRA of type CF_HWPREDICT for this data source. - * */ - RRD_CF_FAILURES, - /* HWPREDICT that follows a moving baseline */ - RRD_CF_MHWPREDICT - /* new entries must come last !!! */ -}; - - -#define MAX_RRA_PAR_EN 10 - -enum rrd_rra_param { - RRA_cdp_xff_val = 0, /* what part of the consolidated - * datapoint must be known, to produce a - * valid entry in the rra */ - /* CF_HWPREDICT: */ - RRA_hw_alpha = 1, - /* exponential smoothing parameter for the intercept in - * the Holt-Winters prediction algorithm. */ - RRA_hw_beta = 2, - /* exponential smoothing parameter for the slope in - * the Holt-Winters prediction algorithm. */ - - RRA_dependent_rra_idx = 3, - /* For CF_HWPREDICT: index of the RRA with the seasonal - * effects of the Holt-Winters algorithm (of type - * CF_SEASONAL). - * For CF_DEVPREDICT: index of the RRA with the seasonal - * deviation predictions (of type CF_DEVSEASONAL). - * For CF_SEASONAL: index of the RRA with the Holt-Winters - * intercept and slope coefficient (of type CF_HWPREDICT). - * For CF_DEVSEASONAL: index of the RRA with the - * Holt-Winters prediction (of type CF_HWPREDICT). - * For CF_FAILURES: index of the CF_DEVSEASONAL array. - * */ - - /* CF_SEASONAL and CF_DEVSEASONAL: */ - RRA_seasonal_gamma = 1, - /* exponential smoothing parameter for seasonal effects. */ - - RRA_seasonal_smoothing_window = 2, - /* fraction of the season to include in the running average - * smoother */ - - /* RRA_dependent_rra_idx = 3, */ - - RRA_seasonal_smooth_idx = 4, - /* an integer between 0 and row_count - 1 which - * is index in the seasonal cycle for applying - * the period smoother. */ - - /* CF_FAILURES: */ - RRA_delta_pos = 1, /* confidence bound scaling parameters */ - RRA_delta_neg = 2, - /* RRA_dependent_rra_idx = 3, */ - RRA_window_len = 4, - RRA_failure_threshold = 5 - /* For CF_FAILURES, number of violations within the last - * window required to mark a failure. */ -}; - - -#define RRD_CF_NAM_SIZE 20 - -struct rrd_rra_def { - gchar cf_nam[RRD_CF_NAM_SIZE]; /* consolidation function (null term) */ - gulong row_cnt; /* number of entries in the store */ - gulong pdp_cnt; /* how many primary data points are - * required for a consolidated data point?*/ - rrd_value_t par[MAX_RRA_PAR_EN]; /* index see rra_param_en */ - -}; - -struct rrd_live_head { - time_t last_up; /* when was rrd last updated */ - glong last_up_usec; /* micro seconds part of the update timestamp. Always >= 0 */ -}; - -#define RRD_LAST_DS_LEN 30 - -enum rrd_pdp_param { - PDP_unkn_sec_cnt = 0, /* how many seconds of the current - * pdp value is unknown data? */ - PDP_val -}; /* current value of the pdp. - this depends on dst */ - -struct rrd_pdp_prep { - gchar last_ds[RRD_LAST_DS_LEN]; /* the last reading from the data - * source. this is stored in ASCII - * to cater for very large counters - * we might encounter in connection - * with SNMP. */ - rrd_value_t scratch[10]; /* contents according to pdp_par_en */ -}; - -#define RRD_MAX_CDP_PAR_EN 10 -#define RRD_MAX_CDP_FAILURES_IDX 8 -/* max CDP scratch entries avail to record violations for a FAILURES RRA */ -#define RRD_MAX_FAILURES_WINDOW_LEN 28 - -enum rrd_cdp_param { - CDP_val = 0, - /* the base_interval is always an - * average */ - CDP_unkn_pdp_cnt, - /* how many unknown pdp were - * integrated. This and the cdp_xff - * will decide if this is going to - * be a UNKNOWN or a valid value */ - CDP_hw_intercept, - /* Current intercept coefficient for the Holt-Winters - * prediction algorithm. */ - CDP_hw_last_intercept, - /* Last iteration intercept coefficient for the Holt-Winters - * prediction algorihtm. */ - CDP_hw_slope, - /* Current slope coefficient for the Holt-Winters - * prediction algorithm. */ - CDP_hw_last_slope, - /* Last iteration slope coeffient. */ - CDP_null_count, - /* Number of sequential Unknown (DNAN) values + 1 preceding - * the current prediction. - * */ - CDP_last_null_count, - /* Last iteration count of Unknown (DNAN) values. */ - CDP_primary_val = 8, - /* optimization for bulk updates: the value of the first CDP - * value to be written in the bulk update. */ - CDP_secondary_val = 9, - /* optimization for bulk updates: the value of subsequent - * CDP values to be written in the bulk update. */ - CDP_hw_seasonal = CDP_hw_intercept, - /* Current seasonal coefficient for the Holt-Winters - * prediction algorithm. This is stored in CDP prep to avoid - * redundant seek operations. */ - CDP_hw_last_seasonal = CDP_hw_last_intercept, - /* Last iteration seasonal coefficient. */ - CDP_seasonal_deviation = CDP_hw_intercept, - CDP_last_seasonal_deviation = CDP_hw_last_intercept, - CDP_init_seasonal = CDP_null_count -}; - -struct rrd_cdp_prep { - rrd_value_t scratch[RRD_MAX_CDP_PAR_EN]; - /* contents according to cdp_par_en * - * init state should be NAN */ -}; - -struct rrd_rra_ptr { - gulong cur_row; /* current row in the rra */ -}; - -/* Final rrd file structure */ -struct rspamd_rrd_file { - struct rrd_file_head *stat_head; /* the static header */ - struct rrd_ds_def *ds_def; /* list of data source definitions */ - struct rrd_rra_def *rra_def; /* list of round robin archive def */ - struct rrd_live_head *live_head; /* rrd v >= 3 last_up with us */ - struct rrd_pdp_prep *pdp_prep; /* pdp data prep area */ - struct rrd_cdp_prep *cdp_prep; /* cdp prep area */ - struct rrd_rra_ptr *rra_ptr; /* list of rra pointers */ - gdouble *rrd_value; /* list of rrd values */ - - gchar *filename; - guint8* map; /* mmapped area */ - gsize size; /* its size */ - gboolean finalized; -}; - - -/* Public API */ - -/** - * Open (and mmap) existing RRD file - * @param filename path - * @param err error pointer - * @return rrd file structure - */ -struct rspamd_rrd_file* rspamd_rrd_open (const gchar *filename, GError **err); - -/** - * Create basic header for rrd file - * @param filename file path - * @param ds_count number of data sources - * @param rra_count number of round robin archives - * @param pdp_step step of primary data points - * @param err error pointer - * @return TRUE if file has been created - */ -struct rspamd_rrd_file* rspamd_rrd_create (const gchar *filename, gulong ds_count, gulong rra_count, gulong pdp_step, GError **err); - -/** - * Add data sources to rrd file - * @param filename path to file - * @param ds array of struct rrd_ds_def - * @param err error pointer - * @return TRUE if data sources were added - */ -gboolean rspamd_rrd_add_ds (struct rspamd_rrd_file* file, GArray *ds, GError **err); - -/** - * Add round robin archives to rrd file - * @param filename path to file - * @param ds array of struct rrd_rra_def - * @param err error pointer - * @return TRUE if archives were added - */ -gboolean rspamd_rrd_add_rra (struct rspamd_rrd_file *file, GArray *rra, GError **err); - -/** - * Finalize rrd file header and initialize all RRA in the file - * @param filename file path - * @param err error pointer - * @return TRUE if rrd file is ready for use - */ -gboolean rspamd_rrd_finalize (struct rspamd_rrd_file *file, GError **err); - -/** - * Add record to rrd file - * @param file rrd file object - * @param points points (must be row suitable for this RRA, depending on ds count) - * @param err error pointer - * @return TRUE if a row has been added - */ -gboolean rspamd_rrd_add_record (struct rspamd_rrd_file* file, GArray *points, GError **err); - -/** - * Close rrd file - * @param file - * @return - */ -gint rspamd_rrd_close (struct rspamd_rrd_file* file); - -/* - * Conversion functions - */ - -/** - * Convert rrd dst type from string to numeric value - */ -enum rrd_dst_type rrd_dst_from_string (const gchar *str); -/** - * Convert numeric presentation of dst to string - */ -const gchar* rrd_dst_to_string (enum rrd_dst_type type); -/** - * Convert rrd consolidation function type from string to numeric value - */ -enum rrd_cf_type rrd_cf_from_string (const gchar *str); -/** - * Convert numeric presentation of cf to string - */ -const gchar* rrd_cf_to_string (enum rrd_cf_type type); - -/* Default RRA and DS */ - -/** - * Create default RRA - */ -void rrd_make_default_rra (const gchar *cf_name, gulong pdp_cnt, gulong rows, struct rrd_rra_def *rra); - -/** - * Create default DS - */ -void rrd_make_default_ds (const gchar *name, gulong pdp_step, struct rrd_ds_def *ds); -#endif /* RRD_H_ */ diff --git a/src/settings.c b/src/settings.c deleted file mode 100644 index c3292c8ab..000000000 --- a/src/settings.c +++ /dev/null @@ -1,657 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "cfg_file.h" -#include "map.h" -#include "main.h" -#include "settings.h" -#include "filter.h" -#include "json/jansson.h" - -struct json_buf { - GHashTable *table; - gchar *buf; - gchar *pos; - size_t buflen; -}; - -static void -settings_actions_free (gpointer data) -{ - GList *cur = data; - - while (cur) { - g_free (cur->data); - cur = g_list_next (cur); - } - - g_list_free ((GList *)data); -} - -static void -settings_free (gpointer data) -{ - struct rspamd_settings *s = data; - - if (s->statfile_alias) { - g_free (s->statfile_alias); - } - if (s->factors) { - g_hash_table_destroy (s->factors); - } - if (s->metric_scores) { - g_hash_table_destroy (s->metric_scores); - } - if (s->reject_scores) { - g_hash_table_destroy (s->reject_scores); - } - if (s->whitelist) { - g_hash_table_destroy (s->whitelist); - } - if (s->blacklist) { - g_hash_table_destroy (s->blacklist); - } - if (s->metric_actions) { - g_hash_table_destroy (s->metric_actions); - } - - g_slice_free1 (sizeof (struct rspamd_settings), s); -} - -static struct rspamd_settings * -settings_ref (struct rspamd_settings *s) -{ - if (s == NULL) { - s = g_slice_alloc (sizeof (struct rspamd_settings)); - s->metric_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); - s->reject_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); - s->metric_actions = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, settings_actions_free); - s->factors = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); - s->whitelist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); - s->blacklist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); - s->statfile_alias = NULL; - s->want_spam = FALSE; - s->ref_count = 1; - } - else { - s->ref_count ++; - } - - return s; -} - -static void -settings_unref (struct rspamd_settings *s) -{ - if (s != NULL) { - s->ref_count --; - if (s->ref_count <= 0) { - settings_free (s); - } - } -} - - -gchar * -json_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) -{ - struct json_buf *jb; - size_t free, off; - - if (data->cur_data == NULL) { - jb = g_malloc (sizeof (struct json_buf)); - jb->table = g_hash_table_ref (((struct json_buf *)data->prev_data)->table); - jb->buf = NULL; - jb->pos = NULL; - data->cur_data = jb; - } - else { - jb = data->cur_data; - } - - if (jb->buf == NULL) { - /* Allocate memory for buffer */ - jb->buflen = len * 2; - jb->buf = g_malloc (jb->buflen); - jb->pos = jb->buf; - } - - off = jb->pos - jb->buf; - free = jb->buflen - off; - - if ((gint)free < len) { - jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2); - jb->buf = g_realloc (jb->buf, jb->buflen); - jb->pos = jb->buf + off; - } - - memcpy (jb->pos, chunk, len); - jb->pos += len; - - /* Say not to copy any part of this buffer */ - return NULL; -} - -void -json_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) -{ - struct json_buf *jb; - gint nelts, i, n, j; - json_t *js, *cur_elt, *cur_nm, *it_val, *act_it, *act_value; - json_error_t je; - struct metric_action *new_act; - struct rspamd_settings *cur_settings; - GList *cur_act; - gchar *cur_name; - void *json_it; - double *score; - - if (data->prev_data) { - jb = data->prev_data; - /* Clean prev data */ - if (jb->table) { - g_hash_table_unref (jb->table); - } - if (jb->buf) { - g_free (jb->buf); - } - g_free (jb); - } - - /* Now parse json */ - if (data->cur_data) { - jb = data->cur_data; - } - else { - msg_err ("no data read"); - return; - } - if (jb->buf == NULL) { - msg_err ("no data read"); - return; - } - /* NULL terminate current buf */ - *jb->pos = '\0'; - - js = json_loads (jb->buf, &je); - if (!js) { - msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line); - return; - } - - if (!json_is_array (js)) { - json_decref (js); - msg_err ("loaded json is not an array"); - return; - } - - nelts = json_array_size (js); - for (i = 0; i < nelts; i++) { - cur_settings = settings_ref (NULL); - - cur_elt = json_array_get (js, i); - if (!cur_elt || !json_is_object (cur_elt)) { - json_decref (js); - msg_err ("loaded json is not an object"); - settings_unref (cur_settings); - return; - } - cur_nm = json_object_get (cur_elt, "name"); - if (cur_nm == NULL || !json_is_string (cur_nm)) { - json_decref (js); - msg_err ("name is not a string or not exists"); - settings_unref (cur_settings); - return; - } - cur_name = g_strdup (json_string_value (cur_nm)); - /* Now check other settings */ - /* Statfile */ - cur_nm = json_object_get (cur_elt, "statfile"); - if (cur_nm != NULL && json_is_string (cur_nm)) { - cur_settings->statfile_alias = g_strdup (json_string_value (cur_nm)); - } - /* Factors object */ - cur_nm = json_object_get (cur_elt, "factors"); - if (cur_nm != NULL && json_is_object (cur_nm)) { - json_it = json_object_iter (cur_nm); - while (json_it) { - it_val = json_object_iter_value (json_it); - if (it_val && json_is_string (it_val)) { - g_hash_table_insert (cur_settings->factors, g_strdup (json_object_iter_key (json_it)), g_strdup (json_string_value (it_val))); - } - json_it = json_object_iter_next (cur_nm, json_it); - } - } - /* Metrics object */ - cur_nm = json_object_get (cur_elt, "metrics"); - if (cur_nm != NULL && json_is_object (cur_nm)) { - json_it = json_object_iter (cur_nm); - while (json_it) { - it_val = json_object_iter_value (json_it); - if (it_val && json_is_number (it_val)) { - score = g_malloc (sizeof (double)); - *score = json_number_value (it_val); - g_hash_table_insert (cur_settings->metric_scores, - g_strdup (json_object_iter_key (json_it)), score); - } - else if (it_val && json_is_object (it_val)) { - /* Assume this as actions hash */ - cur_act = NULL; - act_it = json_object_iter (it_val); - while (act_it) { - act_value = json_object_iter_value (act_it); - - if (act_value && json_is_number (act_value)) { - /* Special cases */ - if (g_ascii_strcasecmp (json_object_iter_key (act_it), "spam_score") == 0) { - score = g_malloc (sizeof (double)); - *score = json_number_value (act_value); - g_hash_table_insert (cur_settings->metric_scores, - g_strdup (json_object_iter_key (json_it)), score); - } - else if (g_ascii_strcasecmp (json_object_iter_key (act_it), "reject_score") == 0) { - score = g_malloc (sizeof (double)); - *score = json_number_value (act_value); - g_hash_table_insert (cur_settings->reject_scores, - g_strdup (json_object_iter_key (json_it)), score); - } - else if (check_action_str (json_object_iter_key (act_it), &j)) { - new_act = g_malloc (sizeof (struct metric_action)); - new_act->action = j; - new_act->score = json_number_value (act_value); - cur_act = g_list_prepend (cur_act, new_act); - } - } - act_it = json_object_iter_next (it_val, act_it); - } - if (cur_act != NULL) { - g_hash_table_insert (cur_settings->metric_actions, - g_strdup (json_object_iter_key (json_it)), cur_act); - cur_act = NULL; - } - } - json_it = json_object_iter_next (cur_nm, json_it); - } - } - /* Rejects object */ - cur_nm = json_object_get (cur_elt, "rejects"); - if (cur_nm != NULL && json_is_object (cur_nm)) { - json_it = json_object_iter (cur_nm); - while (json_it) { - it_val = json_object_iter_value (json_it); - if (it_val && json_is_number (it_val)) { - score = g_malloc (sizeof (double)); - *score = json_number_value (it_val); - g_hash_table_insert (cur_settings->reject_scores, g_strdup (json_object_iter_key (json_it)), - score); - } - json_it = json_object_iter_next(cur_nm, json_it); - } - } - /* Whitelist object */ - cur_nm = json_object_get (cur_elt, "whitelist"); - if (cur_nm != NULL && json_is_array (cur_nm)) { - n = json_array_size(cur_nm); - for(j = 0; j < n; j++) { - it_val = json_array_get(cur_nm, j); - if (it_val && json_is_string (it_val)) { - if (strlen (json_string_value (it_val)) > 0) { - g_hash_table_insert (cur_settings->whitelist, - g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val))); - } - } - - } - } - /* Blacklist object */ - cur_nm = json_object_get (cur_elt, "blacklist"); - if (cur_nm != NULL && json_is_array (cur_nm)) { - n = json_array_size(cur_nm); - for(j = 0; j < n; j++) { - it_val = json_array_get(cur_nm, j); - if (it_val && json_is_string (it_val)) { - if (strlen (json_string_value (it_val)) > 0) { - g_hash_table_insert (cur_settings->blacklist, - g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val))); - } - } - - } - } - /* Want spam */ - cur_nm = json_object_get (cur_elt, "want_spam"); - if (cur_nm != NULL) { - if (json_is_true (cur_nm)) { - cur_settings->want_spam = TRUE; - } - } - g_hash_table_replace (((struct json_buf *)data->cur_data)->table, cur_name, cur_settings); - } - json_decref (js); -} - -gboolean -read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable * table) -{ - struct json_buf *jb = g_malloc (sizeof (struct json_buf)), **pjb; - - pjb = g_malloc (sizeof (struct json_buf *)); - - jb->table = table; - jb->buf = NULL; - *pjb = jb; - - if (!add_map (cfg, path, description, json_read_cb, json_fin_cb, (void **)pjb)) { - msg_err ("cannot add map %s", path); - return FALSE; - } - - return TRUE; -} - -void -init_settings (struct config_file *cfg) -{ - cfg->domain_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, - g_free, (GDestroyNotify)settings_unref); - cfg->user_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, - g_free, (GDestroyNotify)settings_unref); -} - -static gboolean -check_setting (struct rspamd_task *task, struct rspamd_settings **user_settings, struct rspamd_settings **domain_settings) -{ - gchar *field = NULL, *domain = NULL; - gchar cmp_buf[1024]; - gint len; - - if (task->deliver_to != NULL) { - /* First try to use deliver-to field */ - field = task->deliver_to; - } - else if (task->user != NULL) { - /* Then user field */ - field = task->user; - } - else if (task->rcpt != NULL) { - /* Then first recipient */ - field = task->rcpt->data; - } - else { - return FALSE; - } - - domain = strchr (field, '@'); - if (domain == NULL) { - /* First try to search in first recipient */ - if (task->rcpt) { - domain = strchr (task->rcpt->data, '@'); - } - } - if (domain != NULL) { - domain++; - } - - /* First try to search per-user settings */ - if (field != NULL) { - if (*field == '<') { - field ++; - } - len = strcspn (field, ">"); - rspamd_strlcpy (cmp_buf, field, MIN ((gint)sizeof (cmp_buf), len + 1)); - *user_settings = g_hash_table_lookup (task->cfg->user_settings, cmp_buf); - } - if (domain != NULL) { - len = strcspn (domain, ">"); - rspamd_strlcpy (cmp_buf, domain, MIN ((gint)sizeof (cmp_buf), len + 1)); - *domain_settings = g_hash_table_lookup (task->cfg->domain_settings, cmp_buf); - } - - if (*domain_settings != NULL || *user_settings != NULL) { - return TRUE; - } - - return FALSE; -} - -static gboolean -check_bwhitelist (struct rspamd_task *task, struct rspamd_settings *s, gboolean *is_black) -{ - gchar *src_email = NULL, *src_domain = NULL, *data; - - if (task->from != NULL && *task->from != '\0') { - src_email = task->from; - } else { - return FALSE; - } - - src_domain = strchr (src_email, '@'); - if(src_domain != NULL) { - src_domain++; - } - - if ((((data = g_hash_table_lookup (s->blacklist, src_email)) != NULL) || - ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->blacklist, src_domain)) != NULL)) )) { - *is_black = TRUE; - msg_info ("<%s> blacklisted as domain %s is in settings blacklist", task->message_id, data); - return TRUE; - } - if ((((data = g_hash_table_lookup (s->whitelist, src_email)) != NULL) || - ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->whitelist, src_domain)) != NULL)) )) { - *is_black = FALSE; - msg_info ("<%s> whitelisted as domain %s is in settings blacklist", task->message_id, data); - return TRUE; - } - return FALSE; -} - -gboolean -check_metric_settings (struct metric_result *res, double *score, double *rscore) -{ - struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings; - double *sc, *rs; - struct metric *metric = res->metric; - - /* XXX: what the fuck is that? */ - *rscore = 10.0; - - if (us != NULL) { - if ((rs = g_hash_table_lookup (us->reject_scores, metric->name)) != NULL) { - *rscore = *rs; - } - if ((sc = g_hash_table_lookup (us->metric_scores, metric->name)) != NULL) { - *score = *sc; - return TRUE; - } - /* Now check in domain settings */ - if (ds && ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL)) { - *rscore = *rs; - } - if (ds && (sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) { - *score = *sc; - return TRUE; - } - } - else if (ds != NULL) { - if ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL) { - *rscore = *rs; - } - if ((sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) { - *score = *sc; - return TRUE; - } - } - - return FALSE; -} - -gboolean -check_metric_action_settings (struct rspamd_task *task, struct metric_result *res, - double score, enum rspamd_metric_action *result) -{ - struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings; - struct metric_action *act, *sel = NULL; - GList *cur; - enum rspamd_metric_action r = METRIC_ACTION_NOACTION; - gboolean black; - - if (us != NULL) { - /* Check whitelist and set appropriate action for whitelisted users */ - if (check_bwhitelist(task, us, &black)) { - if (black) { - *result = METRIC_ACTION_REJECT; - } - else { - *result = METRIC_ACTION_NOACTION; - } - return TRUE; - } - if ((cur = g_hash_table_lookup (us->metric_actions, res->metric->name)) != NULL) { - while (cur) { - act = cur->data; - if (score >= act->score) { - r = act->action; - sel = act; - } - cur = g_list_next (cur); - } - } - } - else if (ds != NULL) { - /* Check whitelist and set appropriate action for whitelisted users */ - if (check_bwhitelist(task, ds, &black)) { - if (black) { - *result = METRIC_ACTION_REJECT; - } - else { - *result = METRIC_ACTION_NOACTION; - } - return TRUE; - } - if ((cur = g_hash_table_lookup (ds->metric_actions, res->metric->name)) != NULL) { - while (cur) { - act = cur->data; - if (score >= act->score) { - r = act->action; - sel = act; - } - cur = g_list_next (cur); - } - } - } - - if (sel != NULL && result != NULL) { - *result = r; - return TRUE; - } - - return FALSE; -} - -gboolean -apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res) -{ - struct rspamd_settings *us = NULL, *ds = NULL; - - if (check_setting (task, &us, &ds)) { - if (us != NULL || ds != NULL) { - if (us != NULL) { - res->user_settings = settings_ref (us); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref, - us); - } - if (ds != NULL) { - /* Need to ref hash table to avoid occasional data corruption */ - res->domain_settings = settings_ref (ds); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref, - ds); - } - } - else { - return FALSE; - } - } - - return TRUE; -} - -gboolean -check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor) -{ - double *fc; - - if (res->user_settings != NULL) { - /* First search in user's settings */ - if ((fc = g_hash_table_lookup (res->user_settings->factors, symbol)) != NULL) { - *factor = *fc; - return TRUE; - } - /* Now check in domain settings */ - if (res->domain_settings && (fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) { - *factor = *fc; - return TRUE; - } - } - else if (res->domain_settings != NULL) { - if ((fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) { - *factor = *fc; - return TRUE; - } - } - - return FALSE; - -} - - -gboolean -check_want_spam (struct rspamd_task *task) -{ - struct rspamd_settings *us = NULL, *ds = NULL; - - if (check_setting (task, &us, &ds)) { - if (us != NULL) { - /* First search in user's settings */ - if (us->want_spam) { - return TRUE; - } - /* Now check in domain settings */ - if (ds && ds->want_spam) { - return TRUE; - } - } - else if (ds != NULL) { - if (ds->want_spam) { - return TRUE; - } - } - } - - return FALSE; -} - -/* - * vi:ts=4 - */ diff --git a/src/settings.h b/src/settings.h deleted file mode 100644 index 361700094..000000000 --- a/src/settings.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef RSPAMD_SETTINGS_H -#define RSPAMD_SETTINGS_H - -#include "config.h" -#include "main.h" - -struct rspamd_settings { - GHashTable *metric_scores; /**< hash table of metric require scores for this setting */ - GHashTable *reject_scores; /**< hash table of metric reject scores for this setting */ - GHashTable *metric_actions; /**< hash table of metric actions for this setting */ - GHashTable *factors; /**< hash table of new factors for this setting */ - GHashTable *whitelist; /**< hash table of whitelist for this setting */ - GHashTable *blacklist; /**< hash table of whitelist for this setting */ - gchar *statfile_alias; /**< alias for statfile used */ - gboolean want_spam; /**< if true disable rspamd checks */ - gint ref_count; /**< reference counter */ -}; - - -/* - * Read settings from specified path - */ -gboolean read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable *table); - -/* - * Init configuration structures for settings - */ -void init_settings (struct config_file *cfg); - -/* - * Check scores settings - */ -gboolean check_metric_settings (struct metric_result *res, double *score, double *rscore); - -/* - * Check actions settings - */ -gboolean check_metric_action_settings (struct rspamd_task *task, struct metric_result *res, double score, enum rspamd_metric_action *result); - -/* - * Check individual weights for settings - */ -gboolean check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor); - -/* - * Check want_spam flag - */ -gboolean check_want_spam (struct rspamd_task *task); - -/* - * Search settings for metric and store pointers to settings into metric_result structure - */ -gboolean apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res); - -#endif diff --git a/src/smtp_proto.c b/src/smtp_proto.c deleted file mode 100644 index 3af1c3910..000000000 --- a/src/smtp_proto.c +++ /dev/null @@ -1,701 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "cfg_file.h" -#include "util.h" -#include "smtp.h" -#include "smtp_proto.h" -#include "smtp_utils.h" - -gchar * -make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...) -{ - va_list vp; - gchar *result = NULL, *p; - size_t len; - - va_start (vp, format); - len = g_printf_string_upper_bound (format, vp); - va_end (vp); - va_start (vp, format); - len += sizeof ("65535 ") + sizeof (CRLF) - 1; - result = rspamd_mempool_alloc (pool, len); - p = result + rspamd_snprintf (result, len, "%d ", error_code); - p = rspamd_vsnprintf (p, len - (p - result), format, vp); - *p++ = CR; *p++ = LF; *p = '\0'; - va_end (vp); - - return result; -} - - -gboolean -parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd) -{ - enum { - SMTP_PARSE_START = 0, - SMTP_PARSE_SPACES, - SMTP_PARSE_ARGUMENT, - SMTP_PARSE_DONE - } state; - gchar *p, *c, ch, cmd_buf[4]; - guint i; - f_str_t *arg = NULL; - struct smtp_command *pcmd; - - if (line->len == 0) { - return FALSE; - } - - state = SMTP_PARSE_START; - c = line->begin; - p = c; - *cmd = rspamd_mempool_alloc0 (session->pool, sizeof (struct smtp_command)); - pcmd = *cmd; - - for (i = 0; i < line->len; i ++, p ++) { - ch = *p; - switch (state) { - case SMTP_PARSE_START: - if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) { - if (i == line->len - 1) { - p ++; - } - if (p - c == 4) { - cmd_buf[0] = g_ascii_toupper (c[0]); - cmd_buf[1] = g_ascii_toupper (c[1]); - cmd_buf[2] = g_ascii_toupper (c[2]); - cmd_buf[3] = g_ascii_toupper (c[3]); - - if (memcmp (cmd_buf, "HELO", 4) == 0) { - pcmd->command = SMTP_COMMAND_HELO; - } - else if (memcmp (cmd_buf, "EHLO", 4) == 0) { - pcmd->command = SMTP_COMMAND_EHLO; - } - else if (memcmp (cmd_buf, "MAIL", 4) == 0) { - pcmd->command = SMTP_COMMAND_MAIL; - } - else if (memcmp (cmd_buf, "RCPT", 4) == 0) { - pcmd->command = SMTP_COMMAND_RCPT; - } - else if (memcmp (cmd_buf, "DATA", 4) == 0) { - pcmd->command = SMTP_COMMAND_DATA; - } - else if (memcmp (cmd_buf, "QUIT", 4) == 0) { - pcmd->command = SMTP_COMMAND_QUIT; - } - else if (memcmp (cmd_buf, "NOOP", 4) == 0) { - pcmd->command = SMTP_COMMAND_NOOP; - } - else if (memcmp (cmd_buf, "EXPN", 4) == 0) { - pcmd->command = SMTP_COMMAND_EXPN; - } - else if (memcmp (cmd_buf, "RSET", 4) == 0) { - pcmd->command = SMTP_COMMAND_RSET; - } - else if (memcmp (cmd_buf, "HELP", 4) == 0) { - pcmd->command = SMTP_COMMAND_HELP; - } - else if (memcmp (cmd_buf, "VRFY", 4) == 0) { - pcmd->command = SMTP_COMMAND_VRFY; - } - else { - msg_info ("invalid command: %*s", 4, cmd_buf); - return FALSE; - } - } - else { - /* Invalid command */ - msg_info ("invalid command: %*s", 4, c); - return FALSE; - } - /* Now check what we have */ - if (ch == ' ' || ch == ':') { - state = SMTP_PARSE_SPACES; - } - else if (ch == CR) { - state = SMTP_PARSE_DONE; - } - else if (ch == LF) { - return TRUE; - } - } - else if ((ch < 'A' || ch > 'Z') && (ch < 'a' || ch > 'z')) { - msg_info ("invalid letter code in SMTP command: %d", (gint)ch); - return FALSE; - } - break; - case SMTP_PARSE_SPACES: - if (ch == CR) { - state = SMTP_PARSE_DONE; - } - else if (ch == LF) { - goto end; - } - else if (ch != ' ' && ch != ':') { - state = SMTP_PARSE_ARGUMENT; - arg = rspamd_mempool_alloc (session->pool, sizeof (f_str_t)); - c = p; - } - break; - case SMTP_PARSE_ARGUMENT: - if (ch == ' ' || ch == ':' || ch == CR || ch == LF || i == line->len - 1) { - if (i == line->len - 1 && (ch != ' ' && ch != CR && ch != LF)) { - p ++; - } - arg->len = p - c; - arg->begin = rspamd_mempool_alloc (session->pool, arg->len); - memcpy (arg->begin, c, arg->len); - pcmd->args = g_list_prepend (pcmd->args, arg); - if (ch == ' ' || ch == ':') { - state = SMTP_PARSE_SPACES; - } - else if (ch == CR) { - state = SMTP_PARSE_DONE; - } - else { - goto end; - } - } - break; - case SMTP_PARSE_DONE: - if (ch == LF) { - goto end; - } - msg_info ("CR without LF in SMTP command"); - return FALSE; - } - } - -end: - if (pcmd->args) { - pcmd->args = g_list_reverse (pcmd->args); - rspamd_mempool_add_destructor (session->pool, (rspamd_mempool_destruct_t)g_list_free, pcmd->args); - } - return TRUE; -} - -static gboolean -check_smtp_path (f_str_t *path) -{ - guint i; - gchar *p; - - p = path->begin; - if (*p != '<' || path->len < 2) { - return FALSE; - } - for (i = 0; i < path->len; i++, p ++) { - if (*p == '>' && i != path->len - 1) { - return FALSE; - } - } - - return *(p - 1) == '>'; -} - -gboolean -parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd) -{ - f_str_t *arg; - - if (cmd->args == NULL) { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - arg = cmd->args->data; - session->helo = rspamd_mempool_alloc (session->pool, arg->len + 1); - rspamd_strlcpy (session->helo, arg->begin, arg->len + 1); - /* Now try to write reply */ - if (cmd->command == SMTP_COMMAND_HELO) { - /* No ESMTP */ - session->error = SMTP_ERROR_OK; - session->esmtp = FALSE; - return TRUE; - } - else { - /* Try to write all capabilities */ - session->esmtp = TRUE; - if (session->ctx->smtp_capabilities == NULL) { - session->error = SMTP_ERROR_OK; - return TRUE; - } - else { - session->error = session->ctx->smtp_capabilities; - return TRUE; - } - } - - return FALSE; -} - -gboolean -parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd) -{ - f_str_t *arg; - GList *cur = cmd->args; - - if (cmd->args == NULL) { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - arg = cur->data; - /* First argument MUST be FROM */ - if (arg->len != 4 || ( - g_ascii_toupper (arg->begin[0]) != 'F' || - g_ascii_toupper (arg->begin[1]) != 'R' || - g_ascii_toupper (arg->begin[2]) != 'O' || - g_ascii_toupper (arg->begin[3]) != 'M')) { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - /* Next one is from address */ - cur = g_list_next (cur); - if (cur == NULL) { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - arg = cur->data; - if (check_smtp_path (arg)) { - session->from = cur; - } - else { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - - return TRUE; -} - -gboolean -parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd) -{ - f_str_t *arg; - GList *cur = cmd->args; - - if (cmd->args == NULL) { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - arg = cur->data; - /* First argument MUST be FROM */ - if (arg->len != 2 || ( - g_ascii_toupper (arg->begin[0]) != 'T' || - g_ascii_toupper (arg->begin[1]) != 'O')) { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - /* Next one is from address */ - cur = g_list_next (cur); - if (cur == NULL) { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - arg = cur->data; - if (check_smtp_path (arg)) { - session->rcpt = g_list_prepend (session->rcpt, cur); - } - else { - session->error = SMTP_ERROR_BAD_ARGUMENTS; - return FALSE; - } - - return TRUE; - -} - -/* Return -1 if there are some error, 1 if all is ok and 0 in case of incomplete reply */ -static gint -check_smtp_ustream_reply (f_str_t *in, gchar success_code) -{ - gchar *p; - - /* Check for 250 at the begin of line */ - if (in->len >= sizeof ("220 ") - 1) { - p = in->begin; - if (p[0] == success_code) { - /* Last reply line */ - if (p[3] == ' ') { - return 1; - } - else { - return 0; - } - } - else { - return -1; - } - } - - return -1; -} - -size_t -smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen) -{ - GList *cur = args; - size_t r = 0; - f_str_t *arg; - - while (cur && r < buflen - 3) { - arg = cur->data; - r += rspamd_snprintf (buf + r, buflen - r, " %V", arg); - cur = g_list_next (cur); - } - - buf[r++] = CR; - buf[r++] = LF; - buf[r] = '\0'; - - return r; -} - -gboolean -smtp_upstream_write_socket (void *arg) -{ - struct smtp_session *session = arg; - - if (session->upstream_state == SMTP_STATE_IN_SENDFILE) { - session->upstream_state = SMTP_STATE_AFTER_DATA; - return rspamd_dispatcher_write (session->upstream_dispatcher, CRLF DATA_END_TRAILER, sizeof (CRLF DATA_END_TRAILER) - 1, FALSE, TRUE); - } - - return TRUE; -} - -gboolean -smtp_upstream_read_socket (f_str_t * in, void *arg) -{ - struct smtp_session *session = arg; - gchar outbuf[BUFSIZ]; - gint r; - - msg_debug ("in: %V, state: %d", in, session->upstream_state); - switch (session->upstream_state) { - case SMTP_STATE_GREETING: - r = check_smtp_ustream_reply (in, '2'); - if (r == -1) { - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - /* XXX: assume upstream errors as critical errors */ - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - else if (r == 1) { - if (session->ctx->use_xclient) { - r = rspamd_snprintf (outbuf, sizeof (outbuf), "XCLIENT NAME=%s ADDR=%s" CRLF, - session->resolved ? session->hostname : "[UNDEFINED]", - inet_ntoa (session->client_addr)); - session->upstream_state = SMTP_STATE_HELO; - return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); - } - else { - session->upstream_state = SMTP_STATE_FROM; - if (session->helo) { - r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, - session->esmtp ? "EHLO" : "HELO", - session->helo); - } - else { - return smtp_upstream_read_socket (in, arg); - } - return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); - } - } - break; - case SMTP_STATE_HELO: - r = check_smtp_ustream_reply (in, '2'); - if (r == -1) { - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - /* XXX: assume upstream errors as critical errors */ - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - else if (r == 1) { - session->upstream_state = SMTP_STATE_FROM; - if (session->helo) { - r = rspamd_snprintf (outbuf, sizeof (outbuf), "%s %s" CRLF, - session->esmtp ? "EHLO" : "HELO", - session->helo); - } - else { - return smtp_upstream_read_socket (in, arg); - } - return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); - } - break; - case SMTP_STATE_FROM: - r = check_smtp_ustream_reply (in, '2'); - if (r == -1) { - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - /* XXX: assume upstream errors as critical errors */ - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - else if (r == 1) { - r = rspamd_snprintf (outbuf, sizeof (outbuf), "MAIL FROM: "); - r += smtp_upstream_write_list (session->from, outbuf + r, sizeof (outbuf) - r); - session->upstream_state = SMTP_STATE_RCPT; - return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); - } - break; - case SMTP_STATE_RCPT: - r = check_smtp_ustream_reply (in, '2'); - if (r == -1) { - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - /* XXX: assume upstream errors as critical errors */ - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - else if (r == 1) { - r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: "); - session->cur_rcpt = g_list_first (session->rcpt); - r += smtp_upstream_write_list (session->cur_rcpt->data, outbuf + r, sizeof (outbuf) - r); - session->cur_rcpt = g_list_next (session->cur_rcpt); - session->upstream_state = SMTP_STATE_BEFORE_DATA; - return rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE); - } - break; - case SMTP_STATE_BEFORE_DATA: - r = check_smtp_ustream_reply (in, '2'); - if (r == -1) { - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - if (session->cur_rcpt) { - session->rcpt = g_list_delete_link (session->rcpt, session->cur_rcpt); - } - else { - session->rcpt = g_list_delete_link (session->rcpt, session->rcpt); - } - session->errors ++; - session->state = SMTP_STATE_RCPT; - return TRUE; - } - else if (r == 1) { - if (session->cur_rcpt != NULL) { - r = rspamd_snprintf (outbuf, sizeof (outbuf), "RCPT TO: "); - r += smtp_upstream_write_list (session->cur_rcpt, outbuf + r, sizeof (outbuf) - r); - session->cur_rcpt = g_list_next (session->cur_rcpt); - if (! rspamd_dispatcher_write (session->upstream_dispatcher, outbuf, r, FALSE, FALSE)) { - goto err; - } - } - else { - session->upstream_state = SMTP_STATE_DATA; - rspamd_dispatcher_pause (session->upstream_dispatcher); - } - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - /* Write to client */ - if (! rspamd_dispatcher_write (session->dispatcher, session->error, in->len, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - if (session->state == SMTP_STATE_WAIT_UPSTREAM) { - rspamd_dispatcher_restore (session->dispatcher); - session->state = SMTP_STATE_RCPT; - } - } - break; - case SMTP_STATE_DATA: - r = check_smtp_ustream_reply (in, '3'); - if (r == -1) { - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - /* XXX: assume upstream errors as critical errors */ - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - else if (r == 1) { - if (! make_smtp_tempfile (session)) { - session->error = SMTP_ERROR_FILE; - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - session->state = SMTP_STATE_AFTER_DATA; - session->error = SMTP_ERROR_DATA_OK; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - rspamd_dispatcher_pause (session->upstream_dispatcher); - rspamd_set_dispatcher_policy (session->dispatcher, BUFFER_LINE, 0); - session->dispatcher->strip_eol = FALSE; - return TRUE; - } - break; - case SMTP_STATE_AFTER_DATA: - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - session->state = SMTP_STATE_DATA; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, sizeof ("QUIT" CRLF) - 1, FALSE, TRUE)) { - goto err; - } - session->upstream_state = SMTP_STATE_END; - return TRUE; - break; - case SMTP_STATE_END: - r = check_smtp_ustream_reply (in, '5'); - if (r == -1) { - session->error = rspamd_mempool_alloc (session->pool, in->len + 1); - rspamd_strlcpy (session->error, in->begin, in->len + 1); - /* XXX: assume upstream errors as critical errors */ - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (!rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - else { - remove_normal_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session); - } - return FALSE; - break; - default: - msg_err ("got upstream reply at unexpected state: %d, reply: %V", session->upstream_state, in); - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - - return TRUE; -err: - msg_warn ("write error occured"); - return FALSE; -} - -void -smtp_upstream_err_socket (GError *err, void *arg) -{ - struct smtp_session *session = arg; - - msg_info ("abnormally closing connection with upstream %s, error: %s", session->upstream->name, err->message); - session->error = SMTP_ERROR_UPSTREAM; - session->state = SMTP_STATE_CRITICAL_ERROR; - /* XXX: assume upstream errors as critical errors */ - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - return; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - return; - } - upstream_fail (&session->upstream->up, session->session_time); - destroy_session (session->s); -} - -void -smtp_upstream_finalize_connection (gpointer data) -{ - struct smtp_session *session = data; - - if (session->state != SMTP_STATE_CRITICAL_ERROR) { - if (! rspamd_dispatcher_write (session->upstream_dispatcher, "QUIT" CRLF, 0, FALSE, TRUE)) { - msg_warn ("cannot send correctly closing message to upstream"); - } - } - rspamd_remove_dispatcher (session->upstream_dispatcher); - session->upstream_dispatcher = NULL; - close (session->upstream_sock); - session->upstream_sock = -1; -} diff --git a/src/smtp_proto.h b/src/smtp_proto.h deleted file mode 100644 index 42fecd255..000000000 --- a/src/smtp_proto.h +++ /dev/null @@ -1,95 +0,0 @@ -#ifndef RSPAMD_SMTP_PROTO_H -#define RSPAMD_SMTP_PROTO_H - -#include "config.h" -#include "smtp.h" - -/* SMTP errors */ -#define SMTP_ERROR_BAD_COMMAND "500 Syntax error, command unrecognized" CRLF -#define SMTP_ERROR_BAD_ARGUMENTS "501 Syntax error in parameters or arguments" CRLF -#define SMTP_ERROR_SEQUENCE "503 Bad sequence of commands" CRLF -#define SMTP_ERROR_RECIPIENTS "554 No valid recipients" CRLF -#define SMTP_ERROR_UNIMPLIMENTED "502 Command not implemented" CRLF -#define SMTP_ERROR_LIMIT "505 Too many errors. Aborting." CRLF -#define SMTP_ERROR_UPSTREAM "421 Service not available, closing transmission channel" CRLF -#define SMTP_ERROR_FILE "420 Service not available, filesystem error" CRLF -#define SMTP_ERROR_OK "250 Requested mail action okay, completed" CRLF -#define SMTP_ERROR_DATA_OK "354 Start mail input; end with ." CRLF - -#define DATA_END_TRAILER "." CRLF - -#define XCLIENT_HOST_UNAVAILABLE "[UNAVAILABLE]" -#define XCLIENT_HOST_TEMPFAIL "[TEMPUNAVAIL]" - -#define MAX_SMTP_UPSTREAMS 128 - -struct smtp_command { - enum { - SMTP_COMMAND_HELO, - SMTP_COMMAND_EHLO, - SMTP_COMMAND_QUIT, - SMTP_COMMAND_NOOP, - SMTP_COMMAND_MAIL, - SMTP_COMMAND_RCPT, - SMTP_COMMAND_RSET, - SMTP_COMMAND_DATA, - SMTP_COMMAND_VRFY, - SMTP_COMMAND_EXPN, - SMTP_COMMAND_HELP - } command; - GList *args; -}; - -/* - * Generate SMTP error message - */ -gchar * make_smtp_error (rspamd_mempool_t *pool, gint error_code, const gchar *format, ...); - -/* - * Parse a single SMTP command - */ -gboolean parse_smtp_command (struct smtp_session *session, f_str_t *line, struct smtp_command **cmd); - -/* - * Parse HELO command - */ -gboolean parse_smtp_helo (struct smtp_session *session, struct smtp_command *cmd); - -/* - * Parse MAIL command - */ -gboolean parse_smtp_from (struct smtp_session *session, struct smtp_command *cmd); - -/* - * Parse RCPT command - */ -gboolean parse_smtp_rcpt (struct smtp_session *session, struct smtp_command *cmd); - -/* Upstream SMTP */ - -/* - * Read a line from SMTP upstream - */ -gboolean smtp_upstream_read_socket (f_str_t * in, void *arg); - -/* - * Write to SMTP upstream - */ -gboolean smtp_upstream_write_socket (void *arg); - -/* - * Error handler for SMTP upstream - */ -void smtp_upstream_err_socket (GError *err, void *arg); - -/* - * Terminate connection with upstream - */ -void smtp_upstream_finalize_connection (gpointer data); - -/* - * Write a list of strings to the upstream - */ -size_t smtp_upstream_write_list (GList *args, gchar *buf, size_t buflen); - -#endif diff --git a/src/smtp_utils.c b/src/smtp_utils.c deleted file mode 100644 index 5178de9dd..000000000 --- a/src/smtp_utils.c +++ /dev/null @@ -1,362 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "filter.h" -#include "settings.h" -#include "smtp.h" -#include "smtp_proto.h" - -void -free_smtp_session (gpointer arg) -{ - struct smtp_session *session = arg; - - if (session) { - if (session->task) { - rspamd_task_free (session->task, FALSE); - if (session->task->msg->str) { - munmap (session->task->msg->str, session->task->msg->len); - } - } - if (session->rcpt) { - g_list_free (session->rcpt); - } - if (session->dispatcher) { - rspamd_remove_dispatcher (session->dispatcher); - } - close (session->sock); - if (session->temp_name != NULL) { - unlink (session->temp_name); - } - if (session->temp_fd != -1) { - close (session->temp_fd); - } - rspamd_mempool_delete (session->pool); - g_free (session); - } -} - -gboolean -create_smtp_upstream_connection (struct smtp_session *session) -{ - struct smtp_upstream *selected; - - /* Try to select upstream */ - selected = (struct smtp_upstream *)get_upstream_round_robin (session->ctx->upstreams, - session->ctx->upstream_num, sizeof (struct smtp_upstream), - session->session_time, DEFAULT_UPSTREAM_ERROR_TIME, DEFAULT_UPSTREAM_DEAD_TIME, DEFAULT_UPSTREAM_MAXERRORS); - if (selected == NULL) { - msg_err ("no upstreams suitable found"); - return FALSE; - } - - session->upstream = selected; - - /* Now try to create socket */ - session->upstream_sock = make_universal_socket (selected->addr, selected->port, SOCK_STREAM, TRUE, FALSE, FALSE); - if (session->upstream_sock == -1) { - msg_err ("cannot make a connection to %s", selected->name); - upstream_fail (&selected->up, session->session_time); - return FALSE; - } - /* Create a dispatcher for upstream connection */ - session->upstream_dispatcher = rspamd_create_dispatcher (session->ev_base, session->upstream_sock, BUFFER_LINE, - smtp_upstream_read_socket, smtp_upstream_write_socket, smtp_upstream_err_socket, - &session->ctx->smtp_timeout, session); - session->state = SMTP_STATE_WAIT_UPSTREAM; - session->upstream_state = SMTP_STATE_GREETING; - register_async_event (session->s, (event_finalizer_t)smtp_upstream_finalize_connection, session, g_quark_from_static_string ("smtp proxy")); - return TRUE; -} - -gboolean -smtp_send_upstream_message (struct smtp_session *session) -{ - rspamd_dispatcher_pause (session->dispatcher); - rspamd_dispatcher_restore (session->upstream_dispatcher); - - session->upstream_state = SMTP_STATE_IN_SENDFILE; - session->state = SMTP_STATE_WAIT_UPSTREAM; - if (! rspamd_dispatcher_sendfile (session->upstream_dispatcher, session->temp_fd, session->temp_size)) { - msg_err ("sendfile failed: %s", strerror (errno)); - goto err; - } - return TRUE; - -err: - session->error = SMTP_ERROR_FILE; - session->state = SMTP_STATE_CRITICAL_ERROR; - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - return FALSE; - } - destroy_session (session->s); - return FALSE; -} - -struct smtp_metric_callback_data { - struct smtp_session *session; - enum rspamd_metric_action action; - struct metric_result *res; - gchar *log_buf; - gint log_offset; - gint log_size; - gboolean alive; -}; - -static void -smtp_metric_symbols_callback (gpointer key, gpointer value, void *user_data) -{ - struct smtp_metric_callback_data *cd = user_data; - - cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "%s,", (gchar *)key); -} - -static void -smtp_metric_callback (gpointer key, gpointer value, gpointer ud) -{ - struct smtp_metric_callback_data *cd = ud; - struct metric_result *metric_res = value; - enum rspamd_metric_action action = METRIC_ACTION_NOACTION; - double ms = 0, rs = 0; - gboolean is_spam = FALSE; - struct rspamd_task *task; - - task = cd->session->task; - - if (!check_metric_settings (metric_res, &ms, &rs)) { - ms = metric_res->metric->actions[METRIC_ACTION_REJECT].score; - rs = metric_res->metric->actions[METRIC_ACTION_REJECT].score; - } - if (! check_metric_action_settings (task, metric_res, metric_res->score, &action)) { - action = check_metric_action (metric_res->score, ms, metric_res->metric); - } - if (metric_res->score >= ms) { - is_spam = 1; - } - if (action < cd->action) { - cd->action = action; - cd->res = metric_res; - } - - if (!task->is_skipped) { - cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (%s): [%.2f/%.2f/%.2f] [", - (gchar *)key, is_spam ? 'T' : 'F', str_action_metric (action), metric_res->score, ms, rs); - } - else { - cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "(%s: %c (default): [%.2f/%.2f/%.2f] [", - (gchar *)key, 'S', metric_res->score, ms, rs); - - } - g_hash_table_foreach (metric_res->symbols, smtp_metric_symbols_callback, cd); - /* Remove last , from log buf */ - if (cd->log_buf[cd->log_offset - 1] == ',') { - cd->log_buf[--cd->log_offset] = '\0'; - } - -#ifdef HAVE_CLOCK_GETTIME - cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,", - task->msg->len, calculate_check_time (&task->tv, &task->ts, task->cfg->clock_res, &task->scan_milliseconds)); -#else - cd->log_offset += rspamd_snprintf (cd->log_buf + cd->log_offset, cd->log_size - cd->log_offset, "]), len: %z, time: %s,", - task->msg->len, calculate_check_time (&task->tv, task->cfg->clock_res, &task->scan_milliseconds)); -#endif -} - -gboolean -make_smtp_tempfile (struct smtp_session *session) -{ - gsize r; - - r = strlen (session->cfg->temp_dir) + sizeof ("/rspamd-XXXXXX"); - session->temp_name = rspamd_mempool_alloc (session->pool, r); - rspamd_snprintf (session->temp_name, r, "%s%crspamd-XXXXXX", session->cfg->temp_dir, G_DIR_SEPARATOR); -#ifdef HAVE_MKSTEMP - /* Umask is set before */ - session->temp_fd = mkstemp (session->temp_name); -#else - session->temp_fd = g_mkstemp_full (session->temp_name, O_RDWR, S_IWUSR | S_IRUSR); -#endif - if (session->temp_fd == -1) { - msg_err ("mkstemp error: %s", strerror (errno)); - - return FALSE; - } - - return TRUE; -} - -gboolean -write_smtp_reply (struct smtp_session *session) -{ - gchar logbuf[1024], *new_subject; - const gchar *old_subject; - struct smtp_metric_callback_data cd; - GMimeStream *stream; - gint old_fd, sublen; - - /* Check metrics */ - cd.session = session; - cd.action = METRIC_ACTION_NOACTION; - cd.res = NULL; - cd.log_buf = logbuf; - cd.log_offset = rspamd_snprintf (logbuf, sizeof (logbuf), "id: <%s>, qid: <%s>, ", - session->task->message_id, session->task->queue_id); - cd.log_size = sizeof (logbuf); - if (session->task->user) { - cd.log_offset += rspamd_snprintf (logbuf + cd.log_offset, sizeof (logbuf) - cd.log_offset, - "user: %s, ", session->task->user); - } - - g_hash_table_foreach (session->task->results, smtp_metric_callback, &cd); - - msg_info ("%s", logbuf); - - if (cd.action <= METRIC_ACTION_REJECT) { - if (! rspamd_dispatcher_write (session->dispatcher, session->ctx->reject_message, 0, FALSE, TRUE)) { - return FALSE; - } - if (! rspamd_dispatcher_write (session->dispatcher, CRLF, sizeof (CRLF) - 1, FALSE, TRUE)) { - return FALSE; - } - destroy_session (session->s); - return FALSE; - } - else if (cd.action <= METRIC_ACTION_ADD_HEADER || cd.action <= METRIC_ACTION_REWRITE_SUBJECT) { - old_fd = session->temp_fd; - if (! make_smtp_tempfile (session)) { - session->error = SMTP_ERROR_FILE; - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - - if (cd.action <= METRIC_ACTION_REWRITE_SUBJECT) { - /* XXX: add this action */ - old_subject = g_mime_message_get_subject (session->task->message); - if (old_subject != NULL) { - sublen = strlen (old_subject) + sizeof (SPAM_SUBJECT); - new_subject = rspamd_mempool_alloc (session->pool, sublen); - rspamd_snprintf (new_subject, sublen, "%s%s", SPAM_SUBJECT, old_subject); - } - else { - new_subject = SPAM_SUBJECT; - } - g_mime_message_set_subject (session->task->message, new_subject); - } - else if (cd.action <= METRIC_ACTION_ADD_HEADER) { -#ifndef GMIME24 - g_mime_message_add_header (session->task->message, "X-Spam", "true"); -#else - g_mime_object_append_header (GMIME_OBJECT (session->task->message), "X-Spam", "true"); -#endif - } - stream = g_mime_stream_fs_new (session->temp_fd); - g_mime_stream_fs_set_owner (GMIME_STREAM_FS (stream), FALSE); - close (old_fd); - - if (g_mime_object_write_to_stream (GMIME_OBJECT (session->task->message), stream) == -1) { - msg_err ("cannot write MIME object to stream: %s", strerror (errno)); - session->error = SMTP_ERROR_FILE; - session->state = SMTP_STATE_CRITICAL_ERROR; - rspamd_dispatcher_restore (session->dispatcher); - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - goto err; - } - destroy_session (session->s); - return FALSE; - } - g_object_unref (stream); - } - /* XXX: Add other actions */ - return smtp_send_upstream_message (session); -err: - session->error = SMTP_ERROR_FILE; - session->state = SMTP_STATE_CRITICAL_ERROR; - if (! rspamd_dispatcher_write (session->dispatcher, session->error, 0, FALSE, TRUE)) { - return FALSE; - } - destroy_session (session->s); - return FALSE; -} - -gboolean -parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count) -{ - gchar **strv, *p, *t, *tt, *err_str; - guint32 num, i; - struct smtp_upstream *cur; - gchar resolved_path[PATH_MAX]; - - strv = g_strsplit_set (line, ",; ", -1); - num = g_strv_length (strv); - - if (num >= MAX_SMTP_UPSTREAMS) { - msg_err ("cannot define %d upstreams %d is max", num, MAX_SMTP_UPSTREAMS); - return FALSE; - } - *count = 0; - - for (i = 0; i < num; i ++) { - p = strv[i]; - cur = &upstreams[*count]; - if ((t = strrchr (p, ':')) != NULL && (tt = strchr (p, ':')) != t) { - /* Assume that after last `:' we have weigth */ - *t = '\0'; - t ++; - errno = 0; - cur->up.priority = strtoul (t, &err_str, 10); - if (errno != 0 || (err_str && *err_str != '\0')) { - msg_err ("cannot convert weight: %s, %s", t, strerror (errno)); - g_strfreev (strv); - return FALSE; - } - } - if (*p == '/') { - cur->is_unix = TRUE; - if (realpath (p, resolved_path) == NULL) { - msg_err ("cannot resolve path: %s", resolved_path); - g_strfreev (strv); - return FALSE; - } - cur->name = rspamd_mempool_strdup (pool, resolved_path); - (*count) ++; - } - else { - if (! parse_host_port (pool, p, &cur->addr, &cur->port)) { - g_strfreev (strv); - return FALSE; - } - cur->name = rspamd_mempool_strdup (pool, p); - (*count) ++; - } - } - - g_strfreev (strv); - return TRUE; -} diff --git a/src/smtp_utils.h b/src/smtp_utils.h deleted file mode 100644 index 652b6759f..000000000 --- a/src/smtp_utils.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef SMTP_UTILS_H_ -#define SMTP_UTILS_H_ - -#include "config.h" -#include "main.h" -#include "smtp.h" - -/** - * @file smtp_utils.h - * Contains utilities for smtp protocol handling - */ - -struct smtp_upstream { - struct upstream up; - - const gchar *name; - gchar *addr; - guint16 port; - gboolean is_unix; -}; - -#define MAX_SMTP_UPSTREAMS 128 - -struct smtp_session; - -/** - * Send message to upstream - * @param session session object - */ -gboolean smtp_send_upstream_message (struct smtp_session *session); - -/** - * Create connection to upstream - * @param session session object - */ -gboolean create_smtp_upstream_connection (struct smtp_session *session); - -/** - * Create temporary file for smtp session - */ -gboolean make_smtp_tempfile (struct smtp_session *session); - -/** - * Write reply to upstream - * @param session session object - */ -gboolean write_smtp_reply (struct smtp_session *session); - -/** - * Frees smtp session object - */ -void free_smtp_session (gpointer arg); - -/** - * Parse upstreams line - * @param upstreams pointer to the array of upstreams (must be at least MAX_SMTP_UPSTREAMS size) - * @param line description line - * @param count targeted count - * @return - */ -gboolean parse_upstreams_line (rspamd_mempool_t *pool, struct smtp_upstream *upstreams, const gchar *line, gsize *count); - -#endif /* SMTP_UTILS_H_ */ diff --git a/src/spf.c b/src/spf.c deleted file mode 100644 index 12f1513d4..000000000 --- a/src/spf.c +++ /dev/null @@ -1,1465 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "dns.h" -#include "spf.h" -#include "main.h" -#include "message.h" -#include "filter.h" - -#define SPF_VER1_STR "v=spf1" -#define SPF_VER2_STR "spf2." -#define SPF_SCOPE_PRA "pra" -#define SPF_SCOPE_MFROM "mfrom" -#define SPF_ALL "all" -#define SPF_A "a" -#define SPF_IP4 "ip4" -#define SPF_IP6 "ip6" -#define SPF_PTR "ptr" -#define SPF_MX "mx" -#define SPF_EXISTS "exists" -#define SPF_INCLUDE "include" -#define SPF_REDIRECT "redirect" -#define SPF_EXP "exp" - -/** SPF limits for avoiding abuse **/ -#define SPF_MAX_NESTING 10 -#define SPF_MAX_DNS_REQUESTS 30 - -/** - * State machine for SPF record: - * - * spf_mech ::= +|-|~|? - * - * spf_body ::= spf=v1 [] - * spf_command ::= [spf_mech]all|a|||ptr|mx||| - * - * spf_domain ::= [:domain][/mask] - * spf_ip4 ::= ip[/mask] - * ip4 ::= ip4: - * mx ::= mx - * a ::= a - * ptr ::= ptr[:domain] - * exists ::= exists:domain - * include ::= include:domain - * redirect ::= redirect:domain - * exp ::= exp:domain - * - */ - -#undef SPF_DEBUG - -struct spf_dns_cb { - struct spf_record *rec; - struct spf_addr *addr; - spf_action_t cur_action; - gboolean in_include; -}; - -#define CHECK_REC(rec) \ -do { \ - if ((rec)->nested > SPF_MAX_NESTING || \ - (rec)->dns_requests > SPF_MAX_DNS_REQUESTS) { \ - msg_info ("<%s> spf recursion limit %d is reached, domain: %s", \ - (rec)->task->message_id, (rec)->dns_requests, \ - (rec)->sender_domain); \ - return FALSE; \ - } \ -} while (0) \ - -static gboolean parse_spf_record (struct rspamd_task *task, struct spf_record *rec); -static void start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl); - -/* Determine spf mech */ -static spf_mech_t -check_spf_mech (const gchar *elt, gboolean *need_shift) -{ - g_assert (elt != NULL); - - *need_shift = TRUE; - - switch (*elt) { - case '-': - return SPF_FAIL; - case '~': - return SPF_SOFT_FAIL; - case '+': - return SPF_PASS; - case '?': - return SPF_NEUTRAL; - default: - *need_shift = FALSE; - return SPF_PASS; - } -} - -/* Debugging function that dumps spf record in log */ -static void -dump_spf_record (GList *addrs) -{ - struct spf_addr *addr; - GList *cur; - gint r = 0; - gchar logbuf[BUFSIZ], c; -#ifdef HAVE_INET_PTON - gchar ipbuf[INET6_ADDRSTRLEN]; -#else - struct in_addr ina; -#endif - - cur = addrs; - - while (cur) { - addr = cur->data; - if (!addr->is_list) { - switch (addr->mech) { - case SPF_FAIL: - c = '-'; - break; - case SPF_SOFT_FAIL: - case SPF_NEUTRAL: - c = '~'; - break; - case SPF_PASS: - c = '+'; - break; - } -#ifdef HAVE_INET_PTON - if (addr->data.normal.ipv6) { - inet_ntop (AF_INET6, &addr->data.normal.d.in6, ipbuf, sizeof (ipbuf)); - - } - else { - inet_ntop (AF_INET, &addr->data.normal.d.in4, ipbuf, sizeof (ipbuf)); - } - r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, ipbuf, addr->data.normal.mask); -#else - ina.s_addr = addr->data.normal.d.in4.s_addr; - r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, inet_ntoa (ina), addr->data.normal.mask); -#endif - } - else { - r += snprintf (logbuf + r, sizeof (logbuf) - r, "%s; ", addr->spf_string); - dump_spf_record (addr->data.list); - } - cur = g_list_next (cur); - } - msg_info ("spf record: %s", logbuf); -} - -/* Find position of address inside addrs list */ -static GList * -spf_addr_find (GList *addrs, gpointer to_find) -{ - struct spf_addr *addr; - GList *cur, *res = NULL; - - cur = addrs; - while (cur) { - addr = cur->data; - if (addr->is_list) { - if ((res = spf_addr_find (addr->data.list, to_find)) != NULL) { - return cur; - } - } - else { - if (cur->data == to_find) { - return cur; - } - } - cur = g_list_next (cur); - } - - return res; -} - -/* - * Destructor for spf record - */ -static void -spf_record_destructor (gpointer r) -{ - struct spf_record *rec = r; - GList *cur; - struct spf_addr *addr; - - if (rec->addrs) { - cur = rec->addrs; - while (cur) { - addr = cur->data; - if (addr->is_list && addr->data.list != NULL) { - g_list_free (addr->data.list); - } - cur = g_list_next (cur); - } - g_list_free (rec->addrs); - } -} - -static gboolean -parse_spf_ipmask (const gchar *begin, struct spf_addr *addr, struct spf_record *rec) -{ - const gchar *pos; - gchar mask_buf[5] = {'\0'}, *p; - gint state = 0, dots = 0; -#ifdef HAVE_INET_PTON - gchar ip_buf[INET6_ADDRSTRLEN]; -#else - gchar ip_buf[INET_ADDRSTRLEN]; -#endif - - bzero (ip_buf, sizeof (ip_buf)); - bzero (mask_buf, sizeof (mask_buf)); - pos = begin; - p = ip_buf; - - while (*pos) { - switch (state) { - case 0: - /* Require ':' */ - if (*pos != ':') { - msg_info ("<%s>: spf error for domain %s: semicolon missing", - rec->task->message_id, rec->sender_domain); - return FALSE; - } - state = 1; - pos ++; - p = ip_buf; - dots = 0; - break; - case 1: -#ifdef HAVE_INET_PTON - if (p - ip_buf >= (gint)sizeof (ip_buf)) { - return FALSE; - } - if (g_ascii_isxdigit (*pos)) { - *p ++ = *pos ++; - } - else if (*pos == '.' || *pos == ':') { - *p ++ = *pos ++; - dots ++; - } -#else - /* Begin parse ip */ - if (p - ip_buf >= (gint)sizeof (ip_buf) || dots > 3) { - return FALSE; - } - if (g_ascii_isdigit (*pos)) { - *p ++ = *pos ++; - } - else if (*pos == '.') { - *p ++ = *pos ++; - dots ++; - } -#endif - else if (*pos == '/') { - pos ++; - p = mask_buf; - state = 2; - } - else { - /* Invalid character */ - msg_info ("<%s>: spf error for domain %s: invalid ip address", - rec->task->message_id, rec->sender_domain); - return FALSE; - } - break; - case 2: - /* Parse mask */ - if (p - mask_buf >= (gint)sizeof (mask_buf)) { - msg_info ("<%s>: spf error for domain %s: too long mask", - rec->task->message_id, rec->sender_domain); - return FALSE; - } - if (g_ascii_isdigit (*pos)) { - *p ++ = *pos ++; - } - else { - return FALSE; - } - break; - } - } - -#ifdef HAVE_INET_PTON - if (inet_pton (AF_INET, ip_buf, &addr->data.normal.d.in4) != 1) { - if (inet_pton (AF_INET6, ip_buf, &addr->data.normal.d.in6) == 1) { - addr->data.normal.ipv6 = TRUE; - } - else { - msg_info ("<%s>: spf error for domain %s: invalid ip address", - rec->task->message_id, rec->sender_domain); - return FALSE; - } - } - else { - addr->data.normal.ipv6 = FALSE; - } -#else - if (!inet_aton (ip_buf, &addr->data.normal.d.in4)) { - return FALSE; - } -#endif - if (state == 2) { - /* Also parse mask */ - if (!addr->data.normal.ipv6) { - addr->data.normal.mask = strtoul (mask_buf, NULL, 10); - if (addr->data.normal.mask > 32) { - msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'", - rec->task->message_id, rec->sender_domain, begin); - return FALSE; - } - } - else { - addr->data.normal.mask = strtoul (mask_buf, NULL, 10); - if (addr->data.normal.mask > 128) { - msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'", - rec->task->message_id, rec->sender_domain, begin); - return FALSE; - } - } - } - else { - addr->data.normal.mask = addr->data.normal.ipv6 ? 128 : 32; - } - addr->data.normal.parsed = TRUE; - return TRUE; - -} - -static gchar * -parse_spf_hostmask (struct rspamd_task *task, const gchar *begin, struct spf_addr *addr, struct spf_record *rec) -{ - gchar *host = NULL, *p, mask_buf[3]; - gint hostlen; - - bzero (mask_buf, sizeof (mask_buf)); - if (*begin == '\0' || *begin == '/') { - /* Assume host as host to resolve from record */ - host = rec->cur_domain; - } - p = strchr (begin, '/'); - if (p != NULL) { - /* Extract mask */ - rspamd_strlcpy (mask_buf, p + 1, sizeof (mask_buf)); - addr->data.normal.mask = strtoul (mask_buf, NULL, 10); - if (addr->data.normal.mask > 32) { - msg_info ("<%s>: spf error for domain %s: too long mask", - rec->task->message_id, rec->sender_domain); - return FALSE; - } - if (host == NULL) { - hostlen = p - begin; - host = rspamd_mempool_alloc (task->task_pool, hostlen); - rspamd_strlcpy (host, begin, hostlen); - } - } - else { - addr->data.normal.mask = 32; - if (host == NULL) { - host = rspamd_mempool_strdup (task->task_pool, begin); - } - } - - return host; -} - -static void -spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) -{ - struct spf_dns_cb *cb = arg; - gchar *begin; - struct rdns_reply_entry *elt_data; - GList *tmp = NULL; - struct rspamd_task *task; - struct spf_addr *new_addr; - - task = cb->rec->task; - - cb->rec->requests_inflight --; - - if (reply->code == RDNS_RC_NOERROR) { - /* Add all logic for all DNS states here */ - LL_FOREACH (reply->entries, elt_data) { - switch (cb->cur_action) { - case SPF_RESOLVE_MX: - if (elt_data->type == RDNS_REQUEST_MX) { - /* Now resolve A record for this MX */ - if (make_dns_request (task->resolver, task->s, task->task_pool, - spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, elt_data->content.mx.name)) { - task->dns_requests ++; - cb->rec->requests_inflight ++; - } - } - else if (elt_data->type == RDNS_REQUEST_A) { - if (!cb->addr->data.normal.parsed) { - cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; - cb->addr->data.normal.mask = 32; - cb->addr->data.normal.parsed = TRUE; - } - else { - /* Insert one more address */ - tmp = spf_addr_find (cb->rec->addrs, cb->addr); - if (tmp) { - new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); - memcpy (new_addr, cb->addr, sizeof (struct spf_addr)); - new_addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; - new_addr->data.normal.parsed = TRUE; - cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr); - } - else { - msg_info ("<%s>: spf error for domain %s: addresses mismatch", - task->message_id, cb->rec->sender_domain); - } - } - - } -#ifdef HAVE_INET_PTON - else if (elt_data->type == RDNS_REQUEST_AAAA) { - if (!cb->addr->data.normal.parsed) { - memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); - cb->addr->data.normal.mask = 32; - cb->addr->data.normal.parsed = TRUE; - cb->addr->data.normal.ipv6 = TRUE; - } - else { - /* Insert one more address */ - tmp = spf_addr_find (cb->rec->addrs, cb->addr); - if (tmp) { - new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); - memcpy (new_addr, cb->addr, sizeof (struct spf_addr)); - memcpy (&new_addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); - new_addr->data.normal.parsed = TRUE; - new_addr->data.normal.ipv6 = TRUE; - cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr); - } - else { - msg_info ("<%s>: spf error for domain %s: addresses mismatch", - task->message_id, cb->rec->sender_domain); - } - } - - } -#endif - break; - case SPF_RESOLVE_A: - if (elt_data->type == RDNS_REQUEST_A) { - /* XXX: process only one record */ - cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; - cb->addr->data.normal.mask = 32; - cb->addr->data.normal.parsed = TRUE; - } -#ifdef HAVE_INET_PTON - else if (elt_data->type == RDNS_REQUEST_AAAA) { - memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); - cb->addr->data.normal.mask = 32; - cb->addr->data.normal.parsed = TRUE; - cb->addr->data.normal.ipv6 = TRUE; - } -#endif - break; -#ifdef HAVE_INET_PTON - case SPF_RESOLVE_AAA: - if (elt_data->type == RDNS_REQUEST_A) { - /* XXX: process only one record */ - cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; - cb->addr->data.normal.mask = 32; - cb->addr->data.normal.parsed = TRUE; - } - else if (elt_data->type == RDNS_REQUEST_AAAA) { - memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); - cb->addr->data.normal.mask = 32; - cb->addr->data.normal.parsed = TRUE; - cb->addr->data.normal.ipv6 = TRUE; - } -#endif - break; - case SPF_RESOLVE_PTR: - break; - case SPF_RESOLVE_REDIRECT: - if (elt_data->type == RDNS_REQUEST_TXT) { - begin = elt_data->content.txt.data; - - if (!cb->in_include && cb->rec->addrs) { - g_list_free (cb->rec->addrs); - cb->rec->addrs = NULL; - } - start_spf_parse (cb->rec, begin, elt_data->ttl); - - } - break; - case SPF_RESOLVE_INCLUDE: - if (elt_data->type == RDNS_REQUEST_TXT) { - begin = elt_data->content.txt.data; -#ifdef SPF_DEBUG - msg_info ("before include"); - dump_spf_record (cb->rec->addrs); -#endif - tmp = cb->rec->addrs; - cb->rec->addrs = NULL; - cb->rec->in_include = TRUE; - start_spf_parse (cb->rec, begin, 0); - cb->rec->in_include = FALSE; - -#ifdef SPF_DEBUG - msg_info ("after include"); - dump_spf_record (cb->rec->addrs); -#endif - /* Insert new list */ - cb->addr->is_list = TRUE; - cb->addr->data.list = cb->rec->addrs; - cb->rec->addrs = tmp; - } - break; - case SPF_RESOLVE_EXP: - break; - case SPF_RESOLVE_EXISTS: - if (elt_data->type == RDNS_REQUEST_A) { - /* If specified address resolves, we can accept connection from every IP */ - cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; - cb->addr->data.normal.mask = 0; - } - break; - } - } - } - else if (reply->code == RDNS_RC_NXDOMAIN) { - switch (cb->cur_action) { - case SPF_RESOLVE_MX: - if (rdns_request_has_type (reply->request, RDNS_REQUEST_MX)) { - msg_info ("<%s>: spf error for domain %s: cannot find MX record for %s", - task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); - cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; - cb->addr->data.normal.mask = 32; - } - else { - msg_info ("<%s>: spf error for domain %s: cannot resolve MX record for %s", - task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); - cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; - cb->addr->data.normal.mask = 32; - } - break; - case SPF_RESOLVE_A: - if (rdns_request_has_type (reply->request, RDNS_REQUEST_A)) { - cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; - cb->addr->data.normal.mask = 32; - } - break; -#ifdef HAVE_INET_PTON - case SPF_RESOLVE_AAA: - if (rdns_request_has_type (reply->request, RDNS_REQUEST_AAAA)) { - memset (&cb->addr->data.normal.d.in6, 0xff, sizeof (struct in6_addr)); - cb->addr->data.normal.mask = 32; - } - break; -#endif - case SPF_RESOLVE_PTR: - break; - case SPF_RESOLVE_REDIRECT: - msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s", - task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); - break; - case SPF_RESOLVE_INCLUDE: - msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s", - task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); - break; - case SPF_RESOLVE_EXP: - break; - case SPF_RESOLVE_EXISTS: - cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; - cb->addr->data.normal.mask = 32; - break; - } - } - - if (cb->rec->requests_inflight == 0) { - cb->rec->callback (cb->rec, cb->rec->task); - } -} - -static gboolean -parse_spf_a (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - struct spf_dns_cb *cb; - gchar *host; - - CHECK_REC (rec); - - if (begin == NULL || *begin != ':') { - return FALSE; - } - begin ++; - - host = parse_spf_hostmask (task, begin, addr, rec); - - if (!host) { - return FALSE; - } - rec->dns_requests ++; - cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); - cb->rec = rec; - cb->addr = addr; - cb->cur_action = SPF_RESOLVE_A; - cb->in_include = rec->in_include; - if (make_dns_request (task->resolver, task->s, task->task_pool, - spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) { - task->dns_requests ++; - rec->requests_inflight ++; - return TRUE; - } - - return FALSE; - -} - -static gboolean -parse_spf_ptr (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - CHECK_REC (rec); - - msg_info ("<%s>: spf error for domain %s: ptr elements are not implemented", - rec->task->message_id, rec->sender_domain); - return FALSE; -} - -static gboolean -parse_spf_mx (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - struct spf_dns_cb *cb; - gchar *host; - - CHECK_REC (rec); - - if (begin == NULL) { - return FALSE; - } - if (*begin == ':') { - begin ++; - } - - host = parse_spf_hostmask (task, begin, addr, rec); - - if (!host) { - return FALSE; - } - rec->dns_requests ++; - cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); - cb->rec = rec; - cb->addr = addr; - memset (&addr->data.normal, 0, sizeof (addr->data.normal)); - cb->cur_action = SPF_RESOLVE_MX; - cb->in_include = rec->in_include; - if (make_dns_request (task->resolver, task->s, task->task_pool, - spf_record_dns_callback, (void *)cb, RDNS_REQUEST_MX, host)) { - task->dns_requests ++; - rec->requests_inflight ++; - - return TRUE; - } - - return FALSE; -} - -static gboolean -parse_spf_all (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - /* All is 0/0 */ - memset (&addr->data.normal.d, 0, sizeof (addr->data.normal.d)); - if (rec->in_include) { - /* Ignore all record in include */ - addr->data.normal.mask = 32; - } - else { - addr->data.normal.mask = 0; - addr->data.normal.addr_any = TRUE; - } - - return TRUE; -} - -static gboolean -parse_spf_ip4 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - /* ip4:addr[/mask] */ - - CHECK_REC (rec); - return parse_spf_ipmask (begin, addr, rec); -} - -#ifdef HAVE_INET_PTON -static gboolean -parse_spf_ip6 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - /* ip6:addr[/mask] */ - - CHECK_REC (rec); - return parse_spf_ipmask (begin, addr, rec); -} -#endif - -static gboolean -parse_spf_include (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - struct spf_dns_cb *cb; - gchar *domain; - - CHECK_REC (rec); - - if (begin == NULL || *begin != ':') { - return FALSE; - } - begin ++; - rec->dns_requests ++; - - cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); - cb->rec = rec; - cb->addr = addr; - cb->cur_action = SPF_RESOLVE_INCLUDE; - cb->in_include = rec->in_include; - addr->is_list = TRUE; - addr->data.list = NULL; - domain = rspamd_mempool_strdup (task->task_pool, begin); - if (make_dns_request (task->resolver, task->s, task->task_pool, - spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) { - task->dns_requests ++; - rec->requests_inflight ++; - - return TRUE; - } - - - return FALSE; -} - -static gboolean -parse_spf_exp (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - CHECK_REC (rec); - - msg_info ("exp record is ignored"); - return TRUE; -} - -static gboolean -parse_spf_redirect (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - struct spf_dns_cb *cb; - gchar *domain; - - CHECK_REC (rec); - - if (begin == NULL || *begin != '=') { - return FALSE; - } - begin ++; - rec->dns_requests ++; - - cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); - cb->rec = rec; - cb->addr = addr; - cb->cur_action = SPF_RESOLVE_REDIRECT; - cb->in_include = rec->in_include; - domain = rspamd_mempool_strdup (task->task_pool, begin); - if (make_dns_request (task->resolver, task->s, task->task_pool, - spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) { - task->dns_requests ++; - rec->requests_inflight ++; - - return TRUE; - } - - return FALSE; -} - -static gboolean -parse_spf_exists (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) -{ - struct spf_dns_cb *cb; - gchar *host; - - CHECK_REC (rec); - - if (begin == NULL || *begin != ':') { - return FALSE; - } - begin ++; - rec->dns_requests ++; - - addr->data.normal.mask = 32; - cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); - cb->rec = rec; - cb->addr = addr; - cb->cur_action = SPF_RESOLVE_EXISTS; - cb->in_include = rec->in_include; - host = rspamd_mempool_strdup (task->task_pool, begin); - - if (make_dns_request (task->resolver, task->s, task->task_pool, - spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) { - task->dns_requests ++; - rec->requests_inflight ++; - - return TRUE; - } - - return FALSE; -} - -static void -reverse_spf_ip (gchar *ip, gint len) -{ - gchar ipbuf[sizeof("255.255.255.255") - 1], *p, *c; - gint t = 0, l = len; - - if (len > (gint)sizeof (ipbuf)) { - msg_info ("cannot reverse string of length %d", len); - return; - } - - p = ipbuf + len; - c = ip; - while (-- l) { - if (*c == '.') { - memcpy (p, c - t, t); - *--p = '.'; - c ++; - t = 0; - continue; - } - - t ++; - c ++; - p --; - } - - memcpy (p - 1, c - t, t + 1); - - memcpy (ip, ipbuf, len); -} - -static gchar * -expand_spf_macro (struct rspamd_task *task, struct spf_record *rec, gchar *begin) -{ - gchar *p, *c, *new, *tmp; - gint len = 0, slen = 0, state = 0; -#ifdef HAVE_INET_PTON - gchar ip_buf[INET6_ADDRSTRLEN]; -#endif - gboolean need_expand = FALSE; - - p = begin; - /* Calculate length */ - while (*p) { - switch (state) { - case 0: - /* Skip any character and wait for % in input */ - if (*p == '%') { - state = 1; - } - else { - len ++; - } - - slen ++; - p ++; - break; - case 1: - /* We got % sign, so we should whether wait for { or for - or for _ or for % */ - if (*p == '%' || *p == '-') { - /* Just a single % sign or space */ - len ++; - } - else if (*p == '_') { - /* %20 */ - len += sizeof ("%20") - 1; - } - else if (*p == '{') { - state = 2; - } - else { - /* Something unknown */ - msg_info ("<%s>: spf error for domain %s: unknown spf element", - task->message_id, rec->sender_domain); - return begin; - } - p ++; - slen ++; - break; - case 2: - /* Read macro name */ - switch (g_ascii_tolower (*p)) { - case 'i': -#ifdef HAVE_INET_PTON - len += sizeof (INET6_ADDRSTRLEN) - 1; -#else - len += sizeof (INET_ADDRSTRLEN) - 1; -#endif - break; - case 's': - len += strlen (rec->sender); - break; - case 'l': - len += strlen (rec->local_part); - break; - case 'o': - len += strlen (rec->sender_domain); - break; - case 'd': - len += strlen (rec->cur_domain); - break; - case 'v': - len += sizeof ("in-addr") - 1; - break; - case 'h': - if (task->helo) { - len += strlen (task->helo); - } - break; - default: - msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", - task->message_id, rec->sender_domain, *p, begin); - return begin; - } - p ++; - slen ++; - state = 3; - break; - case 3: - /* Read modifier */ - if (*p == '}') { - state = 0; - need_expand = TRUE; - } - else if (*p != 'r' && !g_ascii_isdigit (*p)) { - msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf modifier %c in %s", - task->message_id, rec->sender_domain, *p, begin); - return begin; - } - p ++; - slen ++; - break; - } - } - - if (!need_expand) { - /* No expansion needed */ - return begin; - } - - new = rspamd_mempool_alloc (task->task_pool, len + 1); - - c = new; - p = begin; - state = 0; - /* Begin macro expansion */ - - while (*p) { - switch (state) { - case 0: - /* Skip any character and wait for % in input */ - if (*p == '%') { - state = 1; - } - else { - *c = *p; - c ++; - } - - p ++; - break; - case 1: - /* We got % sign, so we should whether wait for { or for - or for _ or for % */ - if (*p == '%') { - /* Just a single % sign or space */ - *c++ = '%'; - } - else if (*p == '-') { - *c++ = ' '; - } - else if (*p == '_') { - /* %20 */ - *c++ = '%'; - *c++ = '2'; - *c++ = '0'; - } - else if (*p == '{') { - state = 2; - } - else { - /* Something unknown */ - msg_info ("<%s>: spf error for domain %s: unknown spf element", - task->message_id, rec->sender_domain); - return begin; - } - p ++; - break; - case 2: - /* Read macro name */ - switch (g_ascii_tolower (*p)) { - case 'i': -#ifdef HAVE_INET_PTON - len = rspamd_strlcpy (ip_buf, - rspamd_inet_address_to_string (&task->from_addr), - sizeof (ip_buf)); - memcpy (c, ip_buf, len); -#else - tmp = inet_ntoa (task->from_addr); - len = strlen (tmp); - memcpy (c, tmp, len); -#endif - c += len; - break; - case 's': - len = strlen (rec->sender); - memcpy (c, rec->sender, len); - c += len; - break; - case 'l': - len = strlen (rec->local_part); - memcpy (c, rec->local_part, len); - c += len; - break; - case 'o': - len = strlen (rec->sender_domain); - memcpy (c, rec->sender_domain, len); - c += len; - break; - case 'd': - len = strlen (rec->cur_domain); - memcpy (c, rec->cur_domain, len); - c += len; - break; - case 'v': - len = sizeof ("in-addr") - 1; - memcpy (c, "in-addr", len); - c += len; - break; - case 'h': - if (task->helo) { - tmp = strchr (task->helo, '@'); - if (tmp) { - len = strlen (tmp + 1); - memcpy (c, tmp + 1, len); - c += len; - } - } - break; - default: - msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", - task->message_id, rec->sender_domain, *p, begin); - return begin; - } - p ++; - state = 3; - break; - case 3: - /* Read modifier */ - if (*p == '}') { - state = 0; - } - else if (*p == 'r' && len != 0) { - reverse_spf_ip (c - len, len); - len = 0; - } - else if (g_ascii_isdigit (*p)) { - /*XXX: try to implement domain strimming */ - } - else { - msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", - task->message_id, rec->sender_domain, *p, begin); - return begin; - } - p ++; - break; - } - } - /* Null terminate */ - *c = '\0'; - - return new; - -} - -#define NEW_ADDR(x) do { \ - (x) = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); \ - (x)->mech = check_spf_mech (rec->cur_elt, &need_shift); \ - (x)->spf_string = rspamd_mempool_strdup (task->task_pool, begin); \ - memset (&(x)->data.normal, 0, sizeof ((x)->data.normal)); \ - (x)->data.normal.mask = 32; \ - (x)->is_list = FALSE; \ -} while (0); - -/* Read current element and try to parse record */ -static gboolean -parse_spf_record (struct rspamd_task *task, struct spf_record *rec) -{ - struct spf_addr *new = NULL; - gboolean need_shift, res = FALSE; - gchar *begin; - - rec->cur_elt = rec->elts[rec->elt_num]; - if (rec->cur_elt == NULL) { - return FALSE; - } - else if (*rec->cur_elt == '\0') { - /* Silently skip empty elements */ - rec->elt_num ++; - return TRUE; - } - else { - begin = expand_spf_macro (task, rec, rec->cur_elt); - if (*begin == '?' || *begin == '+' || *begin == '-' || *begin == '~') { - begin ++; - } - - - /* Now check what we have */ - switch (g_ascii_tolower (*begin)) { - case 'a': - /* all or a */ - if (g_ascii_strncasecmp (begin, SPF_ALL, sizeof (SPF_ALL) - 1) == 0) { - NEW_ADDR (new); - begin += sizeof (SPF_ALL) - 1; - res = parse_spf_all (task, begin, rec, new); - } - else if (g_ascii_strncasecmp (begin, SPF_A, sizeof (SPF_A) - 1) == 0) { - NEW_ADDR (new); - begin += sizeof (SPF_A) - 1; - res = parse_spf_a (task, begin, rec, new); - } - else { - msg_info ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); - } - break; - case 'i': - /* include or ip4 */ - if (g_ascii_strncasecmp (begin, SPF_IP4, sizeof (SPF_IP4) - 1) == 0) { - NEW_ADDR (new); - begin += sizeof (SPF_IP4) - 1; - res = parse_spf_ip4 (task, begin, rec, new); - } - else if (g_ascii_strncasecmp (begin, SPF_INCLUDE, sizeof (SPF_INCLUDE) - 1) == 0) { - NEW_ADDR (new); - begin += sizeof (SPF_INCLUDE) - 1; - res = parse_spf_include (task, begin, rec, new); - } - else if (g_ascii_strncasecmp (begin, SPF_IP6, sizeof (SPF_IP6) - 1) == 0) { -#ifdef HAVE_INET_PTON - NEW_ADDR (new); - begin += sizeof (SPF_IP6) - 1; - res = parse_spf_ip6 (task, begin, rec, new); -#else - msg_info ("ignoring ip6 spf command as IPv6 is not supported: %s", begin); - new = NULL; - res = TRUE; - begin += sizeof (SPF_IP6) - 1; -#endif - } - else { - msg_info ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); - } - break; - case 'm': - /* mx */ - if (g_ascii_strncasecmp (begin, SPF_MX, sizeof (SPF_MX) - 1) == 0) { - NEW_ADDR (new); - begin += sizeof (SPF_MX) - 1; - res = parse_spf_mx (task, begin, rec, new); - } - else { - msg_info ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); - } - break; - case 'p': - /* ptr */ - if (g_ascii_strncasecmp (begin, SPF_PTR, sizeof (SPF_PTR) - 1) == 0) { - NEW_ADDR (new); - begin += sizeof (SPF_PTR) - 1; - res = parse_spf_ptr (task, begin, rec, new); - } - else { - msg_info ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); - } - break; - case 'e': - /* exp or exists */ - if (g_ascii_strncasecmp (begin, SPF_EXP, sizeof (SPF_EXP) - 1) == 0) { - begin += sizeof (SPF_EXP) - 1; - res = parse_spf_exp (task, begin, rec, NULL); - } - else if (g_ascii_strncasecmp (begin, SPF_EXISTS, sizeof (SPF_EXISTS) - 1) == 0) { - NEW_ADDR (new); - begin += sizeof (SPF_EXISTS) - 1; - res = parse_spf_exists (task, begin, rec, new); - } - else { - msg_info ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); - } - break; - case 'r': - /* redirect */ - if (g_ascii_strncasecmp (begin, SPF_REDIRECT, sizeof (SPF_REDIRECT) - 1) == 0) { - begin += sizeof (SPF_REDIRECT) - 1; - res = parse_spf_redirect (task, begin, rec, NULL); - } - else { - msg_info ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); - } - break; - case 'v': - if (g_ascii_strncasecmp (begin, "v=spf", sizeof ("v=spf") - 1) == 0) { - /* Skip this element till the end of record */ - while (*begin && !g_ascii_isspace (*begin)) { - begin ++; - } - } - break; - default: - msg_info ("<%s>: spf error for domain %s: bad spf command %s", - task->message_id, rec->sender_domain, begin); - break; - } - if (res) { - if (new != NULL) { - rec->addrs = g_list_prepend (rec->addrs, new); - } - rec->elt_num ++; - } - } - - return res; -} -#undef NEW_ADDR - -static void -parse_spf_scopes (struct spf_record *rec, gchar **begin) -{ - for (;;) { - if (g_ascii_strncasecmp (*begin, SPF_SCOPE_PRA, sizeof (SPF_SCOPE_PRA) - 1) == 0) { - *begin += sizeof (SPF_SCOPE_PRA) - 1; - /* XXX: Implement actual PRA check */ - /* extract_pra_info (rec); */ - continue; - } - else if (g_ascii_strncasecmp (*begin, SPF_SCOPE_MFROM, sizeof (SPF_SCOPE_MFROM) - 1) == 0) { - /* mfrom is standart spf1 check */ - *begin += sizeof (SPF_SCOPE_MFROM) - 1; - continue; - } - else if (**begin != ',') { - break; - } - (*begin) ++; - } -} - -static void -start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl) -{ - /* Skip spaces */ - while (g_ascii_isspace (*begin)) { - begin ++; - } - - if (g_ascii_strncasecmp (begin, SPF_VER1_STR, sizeof (SPF_VER1_STR) - 1) == 0) { - begin += sizeof (SPF_VER1_STR) - 1; - while (g_ascii_isspace (*begin) && *begin) { - begin ++; - } - rec->elts = g_strsplit_set (begin, " ", 0); - rec->elt_num = 0; - if (rec->elts) { - rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts); - rec->cur_elt = rec->elts[0]; - while (parse_spf_record (rec->task, rec)); - if (ttl != 0) { - rec->ttl = ttl; - } - } - } - else if (g_ascii_strncasecmp (begin, SPF_VER2_STR, sizeof (SPF_VER2_STR) - 1) == 0) { - /* Skip one number of record, so no we are here spf2.0/ */ - begin += sizeof (SPF_VER2_STR); - if (*begin != '/') { - msg_info ("<%s>: spf error for domain %s: sender id is invalid", - rec->task->message_id, rec->sender_domain); - } - else { - begin ++; - parse_spf_scopes (rec, &begin); - } - /* Now common spf record */ - while (g_ascii_isspace (*begin) && *begin) { - begin ++; - } - rec->elts = g_strsplit_set (begin, " ", 0); - rec->elt_num = 0; - if (rec->elts) { - rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts); - rec->cur_elt = rec->elts[0]; - while (parse_spf_record (rec->task, rec)); - if (ttl != 0) { - rec->ttl = ttl; - } - } - } - else { - msg_debug ("<%s>: spf error for domain %s: bad spf record version: %*s", - rec->task->message_id, rec->sender_domain, sizeof (SPF_VER1_STR) - 1, begin); - } -} - -static void -spf_dns_callback (struct rdns_reply *reply, gpointer arg) -{ - struct spf_record *rec = arg; - struct rdns_reply_entry *elt; - - rec->requests_inflight --; - if (reply->code == RDNS_RC_NOERROR) { - LL_FOREACH (reply->entries, elt) { - start_spf_parse (rec, elt->content.txt.data, elt->ttl); - } - } - - if (rec->requests_inflight == 0) { - rec->callback (rec, rec->task); - } -} - -gchar * -get_spf_domain (struct rspamd_task *task) -{ - gchar *domain, *res = NULL; - GList *domains; - - if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') { - res = rspamd_mempool_strdup (task->task_pool, domain + 1); - if ((domain = strchr (res, '>')) != NULL) { - *domain = '\0'; - } - } - else { - /* Extract from header */ - domains = message_get_header (task->task_pool, task->message, "From", FALSE); - - if (domains != NULL) { - res = rspamd_mempool_strdup (task->task_pool, domains->data); - - if ((domain = strrchr (res, '@')) == NULL) { - g_list_free (domains); - return NULL; - } - res = rspamd_mempool_strdup (task->task_pool, domain + 1); - g_list_free (domains); - - if ((domain = strchr (res, '>')) != NULL) { - *domain = '\0'; - } - } - } - - return res; -} - -gboolean -resolve_spf (struct rspamd_task *task, spf_cb_t callback) -{ - struct spf_record *rec; - gchar *domain; - GList *domains; - - rec = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct spf_record)); - rec->task = task; - rec->callback = callback; - /* Add destructor */ - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)spf_record_destructor, rec); - - /* Extract from data */ - if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') { - rec->sender = task->from; - - rec->local_part = rspamd_mempool_strdup (task->task_pool, task->from); - *(rec->local_part + (domain - task->from)) = '\0'; - if (*rec->local_part == '<') { - memmove (rec->local_part, rec->local_part + 1, strlen (rec->local_part)); - } - rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domain + 1); - if ((domain = strchr (rec->cur_domain, '>')) != NULL) { - *domain = '\0'; - } - rec->sender_domain = rec->cur_domain; - - if (make_dns_request (task->resolver, task->s, task->task_pool, spf_dns_callback, - (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) { - task->dns_requests ++; - rec->requests_inflight ++; - return TRUE; - } - } - else { - /* Extract from header */ - domains = message_get_header (task->task_pool, task->message, "From", FALSE); - - if (domains != NULL) { - rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domains->data); - g_list_free (domains); - - if ((domain = strrchr (rec->cur_domain, '@')) == NULL) { - return FALSE; - } - rec->sender = rspamd_mempool_strdup (task->task_pool, rec->cur_domain); - rec->local_part = rec->cur_domain; - *domain = '\0'; - rec->cur_domain = domain + 1; - - if ((domain = strchr (rec->local_part, '<')) != NULL) { - memmove (rec->local_part, domain + 1, strlen (domain)); - } - - if ((domain = strchr (rec->cur_domain, '>')) != NULL) { - *domain = '\0'; - } - rec->sender_domain = rec->cur_domain; - if (make_dns_request (task->resolver, task->s, task->task_pool, - spf_dns_callback, (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) { - task->dns_requests ++; - rec->requests_inflight ++; - return TRUE; - } - } - } - - return FALSE; -} - -/* - * vi:ts=4 - */ diff --git a/src/spf.h b/src/spf.h deleted file mode 100644 index 94c613e42..000000000 --- a/src/spf.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef RSPAMD_SPF_H -#define RSPAMD_SPF_H - -#include "config.h" - -struct rspamd_task; -struct spf_record; - -typedef void (*spf_cb_t)(struct spf_record *record, struct rspamd_task *task); - -typedef enum spf_mech_e { - SPF_FAIL, - SPF_SOFT_FAIL, - SPF_PASS, - SPF_NEUTRAL -} spf_mech_t; - -typedef enum spf_action_e { - SPF_RESOLVE_MX, - SPF_RESOLVE_A, - SPF_RESOLVE_PTR, - SPF_RESOLVE_AAA, - SPF_RESOLVE_REDIRECT, - SPF_RESOLVE_INCLUDE, - SPF_RESOLVE_EXISTS, - SPF_RESOLVE_EXP -} spf_action_t; - -struct spf_addr { - union { - struct { - union { - struct in_addr in4; -#ifdef HAVE_INET_PTON - struct in6_addr in6; -#endif - } d; - guint32 mask; - gboolean ipv6; - gboolean parsed; - gboolean addr_any; - } normal; - GList *list; - } data; - gboolean is_list; - spf_mech_t mech; - gchar *spf_string; -}; - -struct spf_record { - gchar **elts; - - gchar *cur_elt; - gint elt_num; - gint nested; - gint dns_requests; - gint requests_inflight; - - guint ttl; - - GList *addrs; - gchar *cur_domain; - gchar *sender; - gchar *sender_domain; - gchar *local_part; - struct rspamd_task *task; - spf_cb_t callback; - - gboolean in_include; -}; - - -/* - * Resolve spf record for specified task and call a callback after resolution fails/succeed - */ -gboolean resolve_spf (struct rspamd_task *task, spf_cb_t callback); - -/* - * Get a domain for spf for specified task - */ -gchar *get_spf_domain (struct rspamd_task *task); - - -#endif diff --git a/src/statfile.c b/src/statfile.c deleted file mode 100644 index 4c1cc13fb..000000000 --- a/src/statfile.c +++ /dev/null @@ -1,927 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "statfile.h" -#include "main.h" - -#define RSPAMD_STATFILE_VERSION {'1', '2'} -#define BACKUP_SUFFIX ".old" - -/* Maximum number of statistics files */ -#define STATFILES_MAX 255 -static void statfile_pool_set_block_common ( - statfile_pool_t * pool, stat_file_t * file, - guint32 h1, guint32 h2, - time_t t, double value, - gboolean from_now); - -static gint -cmpstatfile (const void *a, const void *b) -{ - const stat_file_t *s1 = a, *s2 = b; - - return g_ascii_strcasecmp (s1->filename, s2->filename); -} - -/* Convert statfile version 1.0 to statfile version 1.2, saving backup */ -struct stat_file_header_10 { - u_char magic[3]; /**< magic signature ('r' 's' 'd') */ - u_char version[2]; /**< version of statfile */ - u_char padding[3]; /**< padding */ - guint64 create_time; /**< create time (time_t->guint64) */ -}; - -static gboolean -convert_statfile_10 (stat_file_t * file) -{ - gchar *backup_name; - struct stat st; - struct stat_file_header header = { - .magic = {'r', 's', 'd'}, - .version = RSPAMD_STATFILE_VERSION, - .padding = {0, 0, 0}, - .revision = 0, - .rev_time = 0 - }; - - - /* Format backup name */ - backup_name = g_strdup_printf ("%s.%s", file->filename, BACKUP_SUFFIX); - - msg_info ("convert old statfile %s to version %c.%c, backup in %s", file->filename, - header.version[0], header.version[1], backup_name); - - if (stat (backup_name, &st) != -1) { - msg_info ("replace old %s", backup_name); - unlink (backup_name); - } - - rename (file->filename, backup_name); - g_free (backup_name); - - /* XXX: maybe race condition here */ - unlock_file (file->fd, FALSE); - close (file->fd); - if ((file->fd = open (file->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - lock_file (file->fd, FALSE); - /* Now make new header and copy it to new file */ - if (write (file->fd, &header, sizeof (header)) == -1) { - msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - /* Now write old map to new file */ - if (write (file->fd, ((u_char *)file->map + sizeof (struct stat_file_header_10)), - file->len - sizeof (struct stat_file_header_10)) == -1) { - msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - /* Unmap old memory and map new */ - munmap (file->map, file->len); - file->len = file->len + sizeof (struct stat_file_header) - sizeof (struct stat_file_header_10); -#ifdef HAVE_MMAP_NOCORE - if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NOCORE, file->fd, 0)) == MAP_FAILED) { -#else - if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == MAP_FAILED) { -#endif - msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - - return TRUE; -} - -/* Check whether specified file is statistic file and calculate its len in blocks */ -static gint -statfile_pool_check (stat_file_t * file) -{ - struct stat_file *f; - gchar *c; - static gchar valid_version[] = RSPAMD_STATFILE_VERSION; - - - if (!file || !file->map) { - return -1; - } - - if (file->len < sizeof (struct stat_file)) { - msg_info ("file %s is too short to be stat file: %z", file->filename, file->len); - return -1; - } - - f = (struct stat_file *)file->map; - c = f->header.magic; - /* Check magic and version */ - if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') { - msg_info ("file %s is invalid stat file", file->filename); - return -1; - } - /* Now check version and convert old version to new one (that can be used for sync */ - if (*c == 1 && *(c + 1) == 0) { - if (!convert_statfile_10 (file)) { - return -1; - } - f = (struct stat_file *)file->map; - } - else if (memcmp (c, valid_version, sizeof (valid_version)) != 0) { - /* Unknown version */ - msg_info ("file %s has invalid version %c.%c", file->filename, '0' + *c, '0' + *(c + 1)); - return -1; - } - - /* Check first section and set new offset */ - file->cur_section.code = f->section.code; - file->cur_section.length = f->section.length; - if (file->cur_section.length * sizeof (struct stat_file_block) > file->len) { - msg_info ("file %s is truncated: %z, must be %z", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block)); - return -1; - } - file->seek_pos = sizeof (struct stat_file) - sizeof (struct stat_file_block); - - return 0; -} - - -statfile_pool_t * -statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock) -{ - statfile_pool_t *new; - - new = rspamd_mempool_alloc0 (pool, sizeof (statfile_pool_t)); - new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - new->files = rspamd_mempool_alloc0 (new->pool, STATFILES_MAX * sizeof (stat_file_t)); - new->lock = rspamd_mempool_get_mutex (new->pool); - new->mlock_ok = use_mlock; - - return new; -} - -static stat_file_t * -statfile_pool_reindex (statfile_pool_t * pool, gchar *filename, size_t old_size, size_t size) -{ - gchar *backup; - gint fd; - stat_file_t *new; - u_char *map, *pos; - struct stat_file_block *block; - struct stat_file_header *header; - - if (size < - sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) { - msg_err ("file %s is too small to carry any statistic: %z", filename, size); - return NULL; - } - - /* First of all rename old file */ - rspamd_mempool_lock_mutex (pool->lock); - - backup = g_strconcat (filename, ".old", NULL); - if (rename (filename, backup) == -1) { - msg_err ("cannot rename %s to %s: %s", filename, backup, strerror (errno)); - g_free (backup); - rspamd_mempool_unlock_mutex (pool->lock); - return NULL; - } - - rspamd_mempool_unlock_mutex (pool->lock); - - /* Now create new file with required size */ - if (statfile_pool_create (pool, filename, size) != 0) { - msg_err ("cannot create new file"); - g_free (backup); - return NULL; - } - /* Now open new file and start copying */ - fd = open (backup, O_RDONLY); - new = statfile_pool_open (pool, filename, size, TRUE); - - if (fd == -1 || new == NULL) { - msg_err ("cannot open file: %s", strerror (errno)); - g_free (backup); - return NULL; - } - - /* Now start reading blocks from old statfile */ - if ((map = mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { - msg_err ("cannot mmap file: %s", strerror (errno)); - close (fd); - g_free (backup); - return NULL; - } - - pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block)); - while (old_size - (pos - map) >= sizeof (struct stat_file_block)) { - block = (struct stat_file_block *)pos; - if (block->hash1 != 0 && block->value != 0) { - statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, 0, block->value, FALSE); - } - pos += sizeof (block); - } - - header = (struct stat_file_header *)map; - statfile_set_revision (new, header->revision, header->rev_time); - - munmap (map, old_size); - close (fd); - unlink (backup); - g_free (backup); - - return new; - -} - -/* - * Pre-load mmaped file into memory - */ -static void -statfile_preload (stat_file_t *file) -{ - guint8 *pos, *end; - volatile guint8 t; - gsize size; - - pos = (guint8 *)file->map; - end = (guint8 *)file->map + file->len; - - if (madvise (pos, end - pos, MADV_SEQUENTIAL) == -1) { - msg_info ("madvise failed: %s", strerror (errno)); - } - else { - /* Load pages of file */ -#ifdef HAVE_GETPAGESIZE - size = getpagesize (); -#else - size = sysconf (_SC_PAGESIZE); -#endif - while (pos < end) { - t = *pos; - (void)t; - pos += size; - } - } -} - -stat_file_t * -statfile_pool_open (statfile_pool_t * pool, gchar *filename, size_t size, gboolean forced) -{ - struct stat st; - stat_file_t *new_file; - - if ((new_file = statfile_pool_is_open (pool, filename)) != NULL) { - return new_file; - } - - if (pool->opened >= STATFILES_MAX - 1) { - msg_err ("reached hard coded limit of statfiles opened: %d", STATFILES_MAX); - return NULL; - } - - if (stat (filename, &st) == -1) { - msg_info ("cannot stat file %s, error %s, %d", filename, strerror (errno), errno); - return NULL; - } - - rspamd_mempool_lock_mutex (pool->lock); - if (!forced && labs (size - st.st_size) > (long)sizeof (struct stat_file) * 2 - && size > sizeof (struct stat_file)) { - rspamd_mempool_unlock_mutex (pool->lock); - msg_warn ("need to reindex statfile old size: %Hz, new size: %Hz", (size_t)st.st_size, size); - return statfile_pool_reindex (pool, filename, st.st_size, size); - } - else if (size < sizeof (struct stat_file)) { - msg_err ("requested to shrink statfile to %Hz but it is too small", size); - } - - new_file = &pool->files[pool->opened++]; - bzero (new_file, sizeof (stat_file_t)); - if ((new_file->fd = open (filename, O_RDWR)) == -1) { - msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno)); - rspamd_mempool_unlock_mutex (pool->lock); - pool->opened--; - return NULL; - } - - if ((new_file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, new_file->fd, 0)) == MAP_FAILED) { - close (new_file->fd); - rspamd_mempool_unlock_mutex (pool->lock); - msg_info ("cannot mmap file %s, error %d, %s", filename, errno, strerror (errno)); - pool->opened--; - return NULL; - - } - - rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename)); - new_file->len = st.st_size; - /* Try to lock pages in RAM */ - if (pool->mlock_ok) { - if (mlock (new_file->map, new_file->len) == -1) { - msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno)); - pool->mlock_ok = FALSE; - } - } - /* Acquire lock for this operation */ - lock_file (new_file->fd, FALSE); - if (statfile_pool_check (new_file) == -1) { - pool->opened--; - rspamd_mempool_unlock_mutex (pool->lock); - unlock_file (new_file->fd, FALSE); - munmap (new_file->map, st.st_size); - return NULL; - } - unlock_file (new_file->fd, FALSE); - - new_file->open_time = time (NULL); - new_file->access_time = new_file->open_time; - new_file->lock = rspamd_mempool_get_mutex (pool->pool); - - statfile_preload (new_file); - - rspamd_mempool_unlock_mutex (pool->lock); - - return statfile_pool_is_open (pool, filename); -} - -gint -statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_sorted) -{ - stat_file_t *pos; - - if ((pos = statfile_pool_is_open (pool, file->filename)) == NULL) { - msg_info ("file %s is not opened", file->filename); - return -1; - } - - rspamd_mempool_lock_mutex (pool->lock); - - if (file->map) { - msg_info ("syncing statfile %s", file->filename); - msync (file->map, file->len, MS_ASYNC); - munmap (file->map, file->len); - } - if (file->fd != -1) { - close (file->fd); - } - /* Move the remain statfiles */ - memmove (pos, ((guint8 *)pos) + sizeof (stat_file_t), - (--pool->opened - (pos - pool->files)) * sizeof (stat_file_t)); - - rspamd_mempool_unlock_mutex (pool->lock); - - return 0; -} - -gint -statfile_pool_create (statfile_pool_t * pool, gchar *filename, size_t size) -{ - struct stat_file_header header = { - .magic = {'r', 's', 'd'}, - .version = RSPAMD_STATFILE_VERSION, - .padding = {0, 0, 0}, - .revision = 0, - .rev_time = 0, - .used_blocks = 0 - }; - struct stat_file_section section = { - .code = STATFILE_SECTION_COMMON, - }; - struct stat_file_block block = { 0, 0, 0 }; - gint fd; - guint buflen = 0, nblocks; - gchar *buf = NULL; - - if (statfile_pool_is_open (pool, filename) != NULL) { - msg_info ("file %s is already opened", filename); - return 0; - } - - if (size < - sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) { - msg_err ("file %s is too small to carry any statistic: %z", filename, size); - return -1; - } - - rspamd_mempool_lock_mutex (pool->lock); - nblocks = (size - sizeof (struct stat_file_header) - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block); - header.total_blocks = nblocks; - - if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - - rspamd_fallocate (fd, 0, sizeof (header) + sizeof (section) + sizeof (block) * nblocks); - - header.create_time = (guint64) time (NULL); - if (write (fd, &header, sizeof (header)) == -1) { - msg_info ("cannot write header to file %s, error %d, %s", filename, errno, strerror (errno)); - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - - section.length = (guint64) nblocks; - if (write (fd, §ion, sizeof (section)) == -1) { - msg_info ("cannot write section header to file %s, error %d, %s", filename, errno, strerror (errno)); - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - - /* Buffer for write 256 blocks at once */ - if (nblocks > 256) { - buflen = sizeof (block) * 256; - buf = g_malloc0 (buflen); - } - - while (nblocks) { - if (nblocks > 256) { - /* Just write buffer */ - if (write (fd, buf, buflen) == -1) { - msg_info ("cannot write blocks buffer to file %s, error %d, %s", filename, errno, strerror (errno)); - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - g_free (buf); - return -1; - } - nblocks -= 256; - } - else { - if (write (fd, &block, sizeof (block)) == -1) { - msg_info ("cannot write block to file %s, error %d, %s", filename, errno, strerror (errno)); - close (fd); - if (buf) { - g_free (buf); - } - rspamd_mempool_unlock_mutex (pool->lock); - return -1; - } - nblocks --; - } - } - - close (fd); - rspamd_mempool_unlock_mutex (pool->lock); - - if (buf) { - g_free (buf); - } - - return 0; -} - -void -statfile_pool_delete (statfile_pool_t * pool) -{ - gint i; - - for (i = 0; i < pool->opened; i++) { - statfile_pool_close (pool, &pool->files[i], FALSE); - } - rspamd_mempool_delete (pool->pool); -} - -void -statfile_pool_lock_file (statfile_pool_t * pool, stat_file_t * file) -{ - - rspamd_mempool_lock_mutex (file->lock); -} - -void -statfile_pool_unlock_file (statfile_pool_t * pool, stat_file_t * file) -{ - - rspamd_mempool_unlock_mutex (file->lock); -} - -double -statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now) -{ - struct stat_file_block *block; - guint i, blocknum; - u_char *c; - - - file->access_time = now; - if (!file->map) { - return 0; - } - - blocknum = h1 % file->cur_section.length; - c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - - for (i = 0; i < CHAIN_LENGTH; i++) { - if (i + blocknum >= file->cur_section.length) { - break; - } - if (block->hash1 == h1 && block->hash2 == h2) { - return block->value; - } - c += sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - } - - - return 0; -} - -static void -statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t t, double value, gboolean from_now) -{ - struct stat_file_block *block, *to_expire = NULL; - struct stat_file_header *header; - guint i, blocknum; - u_char *c; - double min = G_MAXDOUBLE; - - if (from_now) { - file->access_time = t; - } - if (!file->map) { - return; - } - - blocknum = h1 % file->cur_section.length; - header = (struct stat_file_header *)file->map; - c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - - for (i = 0; i < CHAIN_LENGTH; i++) { - if (i + blocknum >= file->cur_section.length) { - /* Need to expire some block in chain */ - msg_info ("chain %ud is full in statfile %s, starting expire", blocknum, file->filename); - break; - } - /* First try to find block in chain */ - if (block->hash1 == h1 && block->hash2 == h2) { - block->value = value; - return; - } - /* Check whether we have a free block in chain */ - if (block->hash1 == 0 && block->hash2 == 0) { - /* Write new block here */ - msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", i, blocknum, h1, h2); - block->hash1 = h1; - block->hash2 = h2; - block->value = value; - header->used_blocks ++; - - return; - } - - /* Expire block with minimum value otherwise */ - if (block->value < min) { - to_expire = block; - min = block->value; - } - c += sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - } - - /* Try expire some block */ - if (to_expire) { - block = to_expire; - } - else { - /* Expire first block in chain */ - c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); - block = (struct stat_file_block *)c; - } - - block->hash1 = h1; - block->hash2 = h2; - block->value = value; -} - -void -statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now, double value) -{ - statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE); -} - -stat_file_t * -statfile_pool_is_open (statfile_pool_t * pool, gchar *filename) -{ - static stat_file_t f, *ret; - rspamd_strlcpy (f.filename, filename, sizeof (f.filename)); - ret = lfind (&f, pool->files, (size_t *)&pool->opened, sizeof (stat_file_t), cmpstatfile); - return ret; -} - -guint32 -statfile_pool_get_section (statfile_pool_t * pool, stat_file_t * file) -{ - - return file->cur_section.code; -} - -gboolean -statfile_pool_set_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, gboolean from_begin) -{ - struct stat_file_section *sec; - off_t cur_offset; - - - /* Try to find section */ - if (from_begin) { - cur_offset = sizeof (struct stat_file_header); - } - else { - cur_offset = file->seek_pos - sizeof (struct stat_file_section); - } - while (cur_offset < (off_t)file->len) { - sec = (struct stat_file_section *)((gchar *)file->map + cur_offset); - if (sec->code == code) { - file->cur_section.code = code; - file->cur_section.length = sec->length; - file->seek_pos = cur_offset + sizeof (struct stat_file_section); - return TRUE; - } - cur_offset += sec->length; - } - - return FALSE; -} - -gboolean -statfile_pool_add_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, guint64 length) -{ - struct stat_file_section sect; - struct stat_file_block block = { 0, 0, 0 }; - - if (lseek (file->fd, 0, SEEK_END) == -1) { - msg_info ("cannot lseek file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - - sect.code = code; - sect.length = length; - - if (write (file->fd, §, sizeof (sect)) == -1) { - msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - - while (length--) { - if (write (file->fd, &block, sizeof (block)) == -1) { - msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - } - - /* Lock statfile to remap memory */ - statfile_pool_lock_file (pool, file); - munmap (file->map, file->len); - fsync (file->fd); - file->len += length; - - if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == NULL) { - msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno)); - return FALSE; - } - statfile_pool_unlock_file (pool, file); - - return TRUE; - -} - -guint32 -statfile_get_section_by_name (const gchar *name) -{ - if (g_ascii_strcasecmp (name, "common") == 0) { - return STATFILE_SECTION_COMMON; - } - else if (g_ascii_strcasecmp (name, "header") == 0) { - return STATFILE_SECTION_HEADERS; - } - else if (g_ascii_strcasecmp (name, "url") == 0) { - return STATFILE_SECTION_URLS; - } - else if (g_ascii_strcasecmp (name, "regexp") == 0) { - return STATFILE_SECTION_REGEXP; - } - - return 0; -} - -gboolean -statfile_set_revision (stat_file_t *file, guint64 rev, time_t time) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return FALSE; - } - - header = (struct stat_file_header *)file->map; - - header->revision = rev; - header->rev_time = time; - - return TRUE; -} - -gboolean -statfile_inc_revision (stat_file_t *file) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return FALSE; - } - - header = (struct stat_file_header *)file->map; - - header->revision ++; - - return TRUE; -} - -gboolean -statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return FALSE; - } - - header = (struct stat_file_header *)file->map; - - if (rev != NULL) { - *rev = header->revision; - } - if (time != NULL) { - *time = header->rev_time; - } - - return TRUE; -} - -guint64 -statfile_get_used_blocks (stat_file_t *file) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return (guint64)-1; - } - - header = (struct stat_file_header *)file->map; - - return header->used_blocks; -} - -guint64 -statfile_get_total_blocks (stat_file_t *file) -{ - struct stat_file_header *header; - - if (file == NULL || file->map == NULL) { - return (guint64)-1; - } - - header = (struct stat_file_header *)file->map; - - /* If total blocks is 0 we have old version of header, so set total blocks correctly */ - if (header->total_blocks == 0) { - header->total_blocks = file->cur_section.length; - } - - return header->total_blocks; -} - -static void -statfile_pool_invalidate_callback (gint fd, short what, void *ud) -{ - statfile_pool_t *pool = ud; - stat_file_t *file; - gint i; - - msg_info ("invalidating %d statfiles", pool->opened); - - for (i = 0; i < pool->opened; i ++) { - file = &pool->files[i]; - msync (file->map, file->len, MS_ASYNC); - } - -} - - -void -statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter) -{ - gboolean pending; - - - if (pool->invalidate_event != NULL) { - pending = evtimer_pending (pool->invalidate_event, NULL); - if (pending) { - /* Replan event */ - pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); - pool->invalidate_tv.tv_usec = 0; - evtimer_add (pool->invalidate_event, &pool->invalidate_tv); - } - } - else { - pool->invalidate_event = rspamd_mempool_alloc (pool->pool, sizeof (struct event)); - pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); - pool->invalidate_tv.tv_usec = 0; - evtimer_set (pool->invalidate_event, statfile_pool_invalidate_callback, pool); - evtimer_add (pool->invalidate_event, &pool->invalidate_tv); - msg_info ("invalidate of statfile pool is planned in %d seconds", (gint)pool->invalidate_tv.tv_sec); - } -} - - -stat_file_t * -get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, - const gchar *symbol, struct statfile **st, gboolean try_create) -{ - stat_file_t *res = NULL; - GList *cur; - - if (pool == NULL || ccf == NULL || symbol == NULL) { - msg_err ("invalid input arguments"); - return NULL; - } - - cur = g_list_first (ccf->statfiles); - while (cur) { - *st = cur->data; - if (strcmp (symbol, (*st)->symbol) == 0) { - break; - } - *st = NULL; - cur = g_list_next (cur); - } - if (*st == NULL) { - msg_info ("cannot find statfile with symbol %s", symbol); - return NULL; - } - - if ((res = statfile_pool_is_open (pool, (*st)->path)) == NULL) { - if ((res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE)) == NULL) { - msg_warn ("cannot open %s", (*st)->path); - if (try_create) { - if (statfile_pool_create (pool, (*st)->path, (*st)->size) == -1) { - msg_err ("cannot create statfile %s", (*st)->path); - return NULL; - } - res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE); - if (res == NULL) { - msg_err ("cannot open statfile %s after creation", (*st)->path); - } - } - } - } - - return res; -} - -void -statfile_pool_lockall (statfile_pool_t *pool) -{ - stat_file_t *file; - gint i; - - if (pool->mlock_ok) { - for (i = 0; i < pool->opened; i ++) { - file = &pool->files[i]; - if (mlock (file->map, file->len) == -1) { - msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno)); - pool->mlock_ok = FALSE; - return; - } - } - } - /* Do not try to lock if mlock failed */ -} - diff --git a/src/statfile.h b/src/statfile.h deleted file mode 100644 index 5786c4927..000000000 --- a/src/statfile.h +++ /dev/null @@ -1,284 +0,0 @@ -/** - * @file statfile.h - * Describes common methods for accessing statistics files and caching them in memory - */ - -#ifndef RSPAMD_STATFILE_H -#define RSPAMD_STATFILE_H - -#include "config.h" -#include "mem_pool.h" -#include "hash.h" - -#define CHAIN_LENGTH 128 - -/* Section types */ -#define STATFILE_SECTION_COMMON 1 -#define STATFILE_SECTION_HEADERS 2 -#define STATFILE_SECTION_URLS 3 -#define STATFILE_SECTION_REGEXP 4 - -#define DEFAULT_STATFILE_INVALIDATE_TIME 30 -#define DEFAULT_STATFILE_INVALIDATE_JITTER 30 - -/** - * Common statfile header - */ -struct stat_file_header { - u_char magic[3]; /**< magic signature ('r' 's' 'd') */ - u_char version[2]; /**< version of statfile */ - u_char padding[3]; /**< padding */ - guint64 create_time; /**< create time (time_t->guint64) */ - guint64 revision; /**< revision number */ - guint64 rev_time; /**< revision time */ - guint64 used_blocks; /**< used blocks number */ - guint64 total_blocks; /**< total number of blocks */ - u_char unused[239]; /**< some bytes that can be used in future */ -}; - -/** - * Section header - */ -struct stat_file_section { - guint64 code; /**< section's code */ - guint64 length; /**< section's length in blocks */ -}; - -/** - * Block of data in statfile - */ -struct stat_file_block { - guint32 hash1; /**< hash1 (also acts as index) */ - guint32 hash2; /**< hash2 */ - double value; /**< double value */ -}; - -/** - * Statistic file - */ -struct stat_file { - struct stat_file_header header; /**< header */ - struct stat_file_section section; /**< first section */ - struct stat_file_block blocks[1]; /**< first block of data */ -}; - -/** - * Common view of statfile object - */ -typedef struct stat_file_s { -#ifdef HAVE_PATH_MAX - gchar filename[PATH_MAX]; /**< name of file */ -#else - gchar filename[MAXPATHLEN]; /**< name of file */ -#endif - gint fd; /**< descriptor */ - void *map; /**< mmaped area */ - off_t seek_pos; /**< current seek position */ - struct stat_file_section cur_section; /**< current section */ - time_t open_time; /**< time when file was opened */ - time_t access_time; /**< last access time */ - size_t len; /**< length of file(in bytes) */ - rspamd_mempool_mutex_t *lock; /**< mutex */ -} stat_file_t; - -/** - * Statfiles pool - */ -typedef struct statfile_pool_s { - stat_file_t *files; /**< hash table of opened files indexed by name */ - void **maps; /**< shared hash table of mmaped areas indexed by name */ - gint opened; /**< number of opened files */ - rspamd_mempool_t *pool; /**< memory pool object */ - rspamd_mempool_mutex_t *lock; /**< mutex */ - struct event *invalidate_event; /**< event for pool invalidation */ - struct timeval invalidate_tv; - gboolean mlock_ok; /**< whether it is possible to use mlock (2) to avoid statfiles unloading */ -} statfile_pool_t; - -/* Forwarded declarations */ -struct classifier_config; -struct statfile; - -/** - * Create new statfile pool - * @param max_size maximum size - * @return statfile pool object - */ -statfile_pool_t* statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock); - -/** - * Open statfile and attach it to pool - * @param pool statfile pool object - * @param filename name of statfile to open - * @return 0 if specified statfile is attached and -1 in case of error - */ -stat_file_t* statfile_pool_open (statfile_pool_t *pool, gchar *filename, size_t len, gboolean forced); - -/** - * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching - * @param pool statfile pool object - * @param filename name of statfile to create - * @param len length of new statfile - * @return 0 if file was created and -1 in case of error - */ -gint statfile_pool_create (statfile_pool_t *pool, gchar *filename, size_t len); - -/** - * Close specified statfile - * @param pool statfile pool object - * @param filename name of statfile to close - * @param remove_hash remove filename from opened files hash also - * @return 0 if file was closed and -1 if statfile was not opened - */ -gint statfile_pool_close (statfile_pool_t *pool, stat_file_t *file, gboolean keep_sorted); - -/** - * Delete statfile pool and close all attached statfiles - * @param pool statfile pool object - */ -void statfile_pool_delete (statfile_pool_t *pool); - -/** - * Try to lock all statfiles in memory - * @param pool statfile pool object - */ -void statfile_pool_lockall (statfile_pool_t *pool); - -/** - * Lock specified file for exclusive use (eg. learning) - * @param pool statfile pool object - * @param filename name of statfile - */ -void statfile_pool_lock_file (statfile_pool_t *pool, stat_file_t *file); - -/** - * Unlock specified file - * @param pool statfile pool object - * @param filename name of statfile - */ -void statfile_pool_unlock_file (statfile_pool_t *pool, stat_file_t *file); - -/** - * Get block from statfile with h1 and h2 values, use time argument for current time - * @param pool statfile pool object - * @param filename name of statfile - * @param h1 h1 in file - * @param h2 h2 in file - * @param now current time - * @return block value or 0 if block is not found - */ -double statfile_pool_get_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now); - -/** - * Set specified block in statfile - * @param pool statfile pool object - * @param filename name of statfile - * @param h1 h1 in file - * @param h2 h2 in file - * @param now current time - * @param value value of block - */ -void statfile_pool_set_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now, double value); - -/** - * Check whether statfile is opened - * @param pool statfile pool object - * @param filename name of statfile - * @return TRUE if specified statfile is opened and FALSE otherwise - */ -stat_file_t* statfile_pool_is_open (statfile_pool_t *pool, gchar *filename); - -/** - * Returns current statfile section - * @param pool statfile pool object - * @param filename name of statfile - * @return code of section or 0 if file is not opened - */ -guint32 statfile_pool_get_section (statfile_pool_t *pool, stat_file_t *file); - -/** - * Go to other section of statfile - * @param pool statfile pool object - * @param filename name of statfile - * @param code code of section to seek to - * @param from_begin search for section from begin of file if true - * @return TRUE if section was set and FALSE otherwise - */ -gboolean statfile_pool_set_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, gboolean from_begin); - -/** - * Add new section to statfile - * @param pool statfile pool object - * @param filename name of statfile - * @param code code of section to seek to - * @param length length in blocks of new section - * @return TRUE if section was successfully added and FALSE in case of error - */ -gboolean statfile_pool_add_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, guint64 length); - - -/** - * Return code of section identified by name - * @param name name of section - * @return code of section or 0 if name of section is unknown - */ -guint32 statfile_get_section_by_name (const gchar *name); - -/** - * Set statfile revision and revision time - * @param filename name of statfile - * @param revision number of revision - * @param time time of revision - * @return TRUE if revision was set - */ -gboolean statfile_set_revision (stat_file_t *file, guint64 rev, time_t time); - -/** - * Increment statfile revision and revision time - * @param filename name of statfile - * @param time time of revision - * @return TRUE if revision was set - */ -gboolean statfile_inc_revision (stat_file_t *file); - -/** - * Set statfile revision and revision time - * @param filename name of statfile - * @param revision saved number of revision - * @param time saved time of revision - * @return TRUE if revision was saved in rev and time - */ -gboolean statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time); - -/** - * Get statfile used blocks - * @param file file to get number of used blocks - * @return number of used blocks or (guint64)-1 in case of error - */ -guint64 statfile_get_used_blocks (stat_file_t *file); - -/** - * Get statfile total blocks - * @param file file to get number of used blocks - * @return number of used blocks or (guint64)-1 in case of error - */ -guint64 statfile_get_total_blocks (stat_file_t *file); - - -/** - * Plan statfile pool invalidation - */ -void statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter); - -/** - * Get a statfile by symbol - * @param pool pool object - * @param ccf ccf classifier config - * @param symbol symbol to search - * @param st statfile to get - * @param try_create whether we need to create statfile if it is absent - */ -stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, - const gchar *symbol, struct statfile **st, gboolean try_create); - -#endif diff --git a/src/statfile_sync.c b/src/statfile_sync.c deleted file mode 100644 index 6b545af17..000000000 --- a/src/statfile_sync.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "cfg_file.h" -#include "tokenizers/tokenizers.h" -#include "classifiers/classifiers.h" -#include "statfile.h" -#include "binlog.h" -#include "buffer.h" -#include "statfile_sync.h" - -enum rspamd_sync_state { - SYNC_STATE_GREETING, - SYNC_STATE_READ_LINE, - SYNC_STATE_READ_REV, - SYNC_STATE_QUIT, -}; - -/* Context of sync process */ -struct rspamd_sync_ctx { - struct statfile *st; - stat_file_t *real_statfile; - statfile_pool_t *pool; - rspamd_io_dispatcher_t *dispatcher; - struct event_base *ev_base; - - struct event tm_ev; - - struct timeval interval; - struct timeval io_tv; - gint sock; - guint32 timeout; - guint32 sync_interval; - enum rspamd_sync_state state; - gboolean is_busy; - - guint64 new_rev; - guint64 new_time; - guint64 new_len; -}; - -static void -log_next_sync (const gchar *symbol, time_t delay) -{ - gchar outstr[200]; - time_t t; - struct tm *tmp; - gint r; - - t = time(NULL); - t += delay; - tmp = localtime(&t); - - if (tmp) { - r = rspamd_snprintf (outstr, sizeof (outstr), "statfile_sync: next sync of %s at ", symbol); - if ((r = strftime(outstr + r, sizeof(outstr) - r, "%T", tmp)) != 0) { - msg_info (outstr); - } - } -} - -static gboolean -parse_revision_line (struct rspamd_sync_ctx *ctx, f_str_t *in) -{ - guint i, state = 0; - gchar *p, *c, numbuf[sizeof("18446744073709551615")]; - guint64 *val; - - /* First of all try to find END line */ - if (in->len >= sizeof ("END") - 1 && memcmp (in->begin, "END", sizeof ("END") - 1) == 0) { - ctx->state = SYNC_STATE_QUIT; - ctx->is_busy = FALSE; - return TRUE; - } - - /* Next check for error line */ - if (in->len >= sizeof ("FAIL") - 1 && memcmp (in->begin, "FAIL", sizeof ("FAIL") - 1) == 0) { - ctx->state = SYNC_STATE_QUIT; - ctx->is_busy = FALSE; - return TRUE; - } - - /* Now try to extract 3 numbers from string: revision, time and length */ - p = in->begin; - val = &ctx->new_rev; - for (i = 0; i < in->len; i ++, p ++) { - if (g_ascii_isspace (*p) || i == in->len - 1) { - if (state == 1) { - if (i == in->len - 1) { - /* One more character */ - p ++; - } - rspamd_strlcpy (numbuf, c, MIN (p - c + 1, (gint)sizeof (numbuf))); - errno = 0; - *val = strtoull (numbuf, NULL, 10); - if (errno != 0) { - msg_info ("cannot parse number %s", strerror (errno)); - return FALSE; - } - state = 2; - } - } - else { - if (state == 0) { - c = p; - state = 1; - } - else if (state == 2) { - if (val == &ctx->new_rev) { - val = &ctx->new_time; - } - else if (val == &ctx->new_time) { - val = &ctx->new_len; - } - c = p; - state = 1; - } - } - } - - /* Current value must be len value and its value must not be 0 */ - return ((val == &ctx->new_len)); -} - -static gboolean -read_blocks (struct rspamd_sync_ctx *ctx, f_str_t *in) -{ - struct rspamd_binlog_element *elt; - guint i; - - statfile_pool_lock_file (ctx->pool, ctx->real_statfile); - elt = (struct rspamd_binlog_element *)in->begin; - for (i = 0; i < in->len / sizeof (struct rspamd_binlog_element); i ++, elt ++) { - statfile_pool_set_block (ctx->pool, ctx->real_statfile, elt->h1, elt->h2, ctx->new_time, elt->value); - } - statfile_pool_unlock_file (ctx->pool, ctx->real_statfile); - - return TRUE; -} - -static gboolean -sync_read (f_str_t * in, void *arg) -{ - struct rspamd_sync_ctx *ctx = arg; - gchar buf[256]; - guint64 rev = 0; - time_t ti = 0; - - if (in->len == 0) { - /* Skip empty lines */ - return TRUE; - } - switch (ctx->state) { - case SYNC_STATE_GREETING: - /* Skip greeting line and write sync command */ - /* Write initial data */ - statfile_get_revision (ctx->real_statfile, &rev, &ti); - rev = rspamd_snprintf (buf, sizeof (buf), "sync %s %uL %T" CRLF, ctx->st->symbol, rev, ti); - ctx->state = SYNC_STATE_READ_LINE; - return rspamd_dispatcher_write (ctx->dispatcher, buf, rev, FALSE, FALSE); - break; - case SYNC_STATE_READ_LINE: - /* Try to parse line from server */ - if (!parse_revision_line (ctx, in)) { - msg_info ("cannot parse line of length %z: '%*s'", in->len, (gint)in->len, in->begin); - close (ctx->sock); - rspamd_remove_dispatcher (ctx->dispatcher); - ctx->is_busy = FALSE; - return FALSE; - } - else if (ctx->state != SYNC_STATE_QUIT) { - if (ctx->new_len > 0) { - ctx->state = SYNC_STATE_READ_REV; - rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_CHARACTER, ctx->new_len); - } - } - else { - /* Quit this session */ - msg_info ("sync ended for: %s", ctx->st->symbol); - close (ctx->sock); - rspamd_remove_dispatcher (ctx->dispatcher); - ctx->is_busy = FALSE; - /* Immediately return from callback */ - return FALSE; - } - break; - case SYNC_STATE_READ_REV: - /* In now contains all blocks of specified revision, so we can read them directly */ - if (!read_blocks (ctx, in)) { - msg_info ("cannot read blocks"); - close (ctx->sock); - rspamd_remove_dispatcher (ctx->dispatcher); - ctx->is_busy = FALSE; - return FALSE; - } - statfile_set_revision (ctx->real_statfile, ctx->new_rev, ctx->new_time); - msg_info ("set new revision: %uL, readed %z bytes", ctx->new_rev, in->len); - /* Now try to read other revision or END line */ - ctx->state = SYNC_STATE_READ_LINE; - rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_LINE, 0); - break; - case SYNC_STATE_QUIT: - close (ctx->sock); - rspamd_remove_dispatcher (ctx->dispatcher); - ctx->is_busy = FALSE; - return FALSE; - } - - return TRUE; -} - -static void -sync_err (GError *err, void *arg) -{ - struct rspamd_sync_ctx *ctx = arg; - - msg_info ("abnormally closing connection, error: %s", err->message); - ctx->is_busy = FALSE; - close (ctx->sock); - rspamd_remove_dispatcher (ctx->dispatcher); -} - - -static void -sync_timer_callback (gint fd, short what, void *ud) -{ - struct rspamd_sync_ctx *ctx = ud; - guint32 jittered_interval; - - /* Plan new event */ - evtimer_del (&ctx->tm_ev); - /* Add some jittering for synchronization */ - jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2); - msec_to_tv (jittered_interval, &ctx->interval); - evtimer_add (&ctx->tm_ev, &ctx->interval); - log_next_sync (ctx->st->symbol, ctx->interval.tv_sec); - - if (ctx->is_busy) { - /* Sync is in progress */ - msg_info ("syncronization process is in progress, do not start new one"); - return; - } - - if ((ctx->sock = make_universal_socket (ctx->st->binlog->master_addr, ctx->st->binlog->master_port, - SOCK_STREAM, TRUE, FALSE, TRUE)) == -1) { - msg_info ("cannot connect to %s", ctx->st->binlog->master_addr); - return; - } - /* Now create and activate dispatcher */ - msec_to_tv (ctx->timeout, &ctx->io_tv); - ctx->dispatcher = rspamd_create_dispatcher (ctx->ev_base, ctx->sock, BUFFER_LINE, sync_read, NULL, sync_err, &ctx->io_tv, ctx); - - ctx->state = SYNC_STATE_GREETING; - ctx->is_busy = TRUE; - - msg_info ("starting synchronization of %s", ctx->st->symbol); - -} - -static gboolean -add_statfile_watch (statfile_pool_t *pool, struct statfile *st, struct config_file *cfg, struct event_base *ev_base) -{ - struct rspamd_sync_ctx *ctx; - guint32 jittered_interval; - - if (st->binlog->master_addr != NULL) { - ctx = rspamd_mempool_alloc (pool->pool, sizeof (struct rspamd_sync_ctx)); - ctx->st = st; - ctx->timeout = cfg->statfile_sync_timeout; - ctx->sync_interval = cfg->statfile_sync_interval; - ctx->ev_base = ev_base; - /* Add some jittering for synchronization */ - jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2); - msec_to_tv (jittered_interval, &ctx->interval); - /* Open statfile and attach it to pool */ - if ((ctx->real_statfile = statfile_pool_is_open (pool, st->path)) == NULL) { - if ((ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { - msg_warn ("cannot open %s", st->path); - if (statfile_pool_create (pool, st->path, st->size) == -1) { - msg_err ("cannot create statfile %s", st->path); - return FALSE; - } - ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE); - } - } - /* Now plan event for it's future executing */ - evtimer_set (&ctx->tm_ev, sync_timer_callback, ctx); - event_base_set (ctx->ev_base, &ctx->tm_ev); - evtimer_add (&ctx->tm_ev, &ctx->interval); - log_next_sync (st->symbol, ctx->interval.tv_sec); - } - else { - msg_err ("cannot add statfile watch for statfile %s: no master defined", st->symbol); - return FALSE; - } - - return TRUE; -} - -gboolean -start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base) -{ - GList *cur, *l; - struct classifier_config *cl; - struct statfile *st; - - /* - * First of all walk through all classifiers and find those statfiles - * for which we should do sync (slave affinity) - */ - cur = cfg->classifiers; - while (cur) { - cl = cur->data; - l = cl->statfiles; - while (l) { - st = l->data; - if (st->binlog != NULL && st->binlog->affinity == AFFINITY_SLAVE) { - if (!add_statfile_watch (pool, st, cfg, ev_base)) { - return FALSE; - } - } - l = g_list_next (l); - } - cur = g_list_next (cur); - } - - return TRUE; -} diff --git a/src/statfile_sync.h b/src/statfile_sync.h deleted file mode 100644 index b3abb8b91..000000000 --- a/src/statfile_sync.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef RSPAMD_STATFILE_SYNC_H -#define RSPAMD_STATFILE_SYNC_H - -#include "config.h" -#include "main.h" -#include "statfile.h" -#include "cfg_file.h" - -/* - * Start synchronization of statfiles. Must be called after event_init as it adds events - */ -gboolean start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base); - -#endif diff --git a/src/symbols_cache.c b/src/symbols_cache.c deleted file mode 100644 index dfca57c66..000000000 --- a/src/symbols_cache.c +++ /dev/null @@ -1,1055 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "util.h" -#include "main.h" -#include "message.h" -#include "symbols_cache.h" -#include "cfg_file.h" - -#define WEIGHT_MULT 4.0 -#define FREQUENCY_MULT 10.0 -#define TIME_MULT -1.0 - -/* After which number of messages try to resort cache */ -#define MAX_USES 100 -/* - * Symbols cache utility functions - */ - -#define MIN_CACHE 17 - -static guint64 total_frequency = 0; -static guint32 nsymbols = 0; - -gint -cache_cmp (const void *p1, const void *p2) -{ - const struct cache_item *i1 = p1, *i2 = p2; - - return strcmp (i1->s->symbol, i2->s->symbol); -} - -gint -cache_logic_cmp (const void *p1, const void *p2) -{ - const struct cache_item *i1 = p1, *i2 = p2; - double w1, w2; - double weight1, weight2; - double f1 = 0, f2 = 0; - - if (i1->priority == 0 && i2->priority == 0) { - if (total_frequency > 0) { - f1 = ((double)i1->s->frequency * nsymbols) / (double)total_frequency; - f2 = ((double)i2->s->frequency * nsymbols) / (double)total_frequency; - } - weight1 = i1->metric_weight == 0 ? i1->s->weight : i1->metric_weight; - weight2 = i2->metric_weight == 0 ? i2->s->weight : i2->metric_weight; - w1 = abs (weight1) * WEIGHT_MULT + f1 * FREQUENCY_MULT + i1->s->avg_time * TIME_MULT; - w2 = abs (weight2) * WEIGHT_MULT + f2 * FREQUENCY_MULT + i2->s->avg_time * TIME_MULT; - } - else { - /* Strict sorting */ - w1 = abs (i1->priority); - w2 = abs (i2->priority); - } - - return (gint)w2 - w1; -} - -static GChecksum * -get_mem_cksum (struct symbols_cache *cache) -{ - GChecksum *result; - GList *cur, *l; - struct cache_item *item; - - result = g_checksum_new (G_CHECKSUM_SHA1); - - l = g_list_copy (cache->negative_items); - l = g_list_sort (l, cache_cmp); - cur = g_list_first (l); - while (cur) { - item = cur->data; - if (item->s->symbol[0] != '\0') { - g_checksum_update (result, item->s->symbol, strlen (item->s->symbol)); - } - cur = g_list_next (cur); - } - g_list_free (l); - - - l = g_list_copy (cache->static_items); - l = g_list_sort (l, cache_cmp); - cur = g_list_first (l); - while (cur) { - item = cur->data; - if (item->s->symbol[0] != '\0') { - g_checksum_update (result, item->s->symbol, strlen (item->s->symbol)); - } - total_frequency += item->s->frequency; - cur = g_list_next (cur); - } - g_list_free (l); - - return result; -} - -/* Sort items in logical order */ -static void -post_cache_init (struct symbols_cache *cache) -{ - GList *cur; - struct cache_item *item; - - total_frequency = 0; - nsymbols = cache->used_items; - cur = g_list_first (cache->negative_items); - while (cur) { - item = cur->data; - total_frequency += item->s->frequency; - cur = g_list_next (cur); - } - cur = g_list_first (cache->static_items); - while (cur) { - item = cur->data; - total_frequency += item->s->frequency; - cur = g_list_next (cur); - } - - cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp); - cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp); -} - -/* Unmap cache file */ -static void -unmap_cache_file (gpointer arg) -{ - struct symbols_cache *cache = arg; - - /* A bit ugly usage */ - munmap (cache->map, cache->used_items * sizeof (struct saved_cache_item)); -} - -static gboolean -mmap_cache_file (struct symbols_cache *cache, gint fd, rspamd_mempool_t *pool) -{ - guint8 *map; - gint i; - GList *cur; - struct cache_item *item; - - if (cache->used_items > 0) { - map = mmap (NULL, cache->used_items * sizeof (struct saved_cache_item), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (map == MAP_FAILED) { - msg_err ("cannot mmap cache file: %d, %s", errno, strerror (errno)); - close (fd); - return FALSE; - } - /* Close descriptor as it would never be used */ - close (fd); - cache->map = map; - /* Now free old values for saved cache items and fill them with mmapped ones */ - i = 0; - cur = g_list_first (cache->negative_items); - while (cur) { - item = cur->data; - item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item)); - cur = g_list_next (cur); - i ++; - } - cur = g_list_first (cache->static_items); - while (cur) { - item = cur->data; - item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item)); - cur = g_list_next (cur); - i ++; - } - - post_cache_init (cache); - } - - return TRUE; -} - -/* Fd must be opened for writing, after creating file is mmapped */ -static gboolean -create_cache_file (struct symbols_cache *cache, const gchar *filename, gint fd, rspamd_mempool_t *pool) -{ - GChecksum *cksum; - u_char *digest; - gsize cklen; - GList *cur; - struct cache_item *item; - - /* Calculate checksum */ - cksum = get_mem_cksum (cache); - if (cksum == NULL) { - msg_err ("cannot calculate checksum for symbols"); - close (fd); - return FALSE; - } - - cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1); - digest = g_malloc (cklen); - - g_checksum_get_digest (cksum, digest, &cklen); - /* Now write data to file */ - cur = g_list_first (cache->negative_items); - while (cur) { - item = cur->data; - if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) { - msg_err ("cannot write to file %d, %s", errno, strerror (errno)); - close (fd); - g_checksum_free (cksum); - g_free (digest); - return FALSE; - } - cur = g_list_next (cur); - } - cur = g_list_first (cache->static_items); - while (cur) { - item = cur->data; - if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) { - msg_err ("cannot write to file %d, %s", errno, strerror (errno)); - close (fd); - g_checksum_free (cksum); - g_free (digest); - return FALSE; - } - cur = g_list_next (cur); - } - /* Write checksum */ - if (write (fd, digest, cklen) == -1) { - msg_err ("cannot write to file %d, %s", errno, strerror (errno)); - close (fd); - g_checksum_free (cksum); - g_free (digest); - return FALSE; - } - - close (fd); - g_checksum_free (cksum); - g_free (digest); - /* Reopen for reading */ - if ((fd = open (filename, O_RDWR)) == -1) { - msg_info ("cannot open file %s, error %d, %s", errno, strerror (errno)); - return FALSE; - } - - return mmap_cache_file (cache, fd, pool); -} - -enum rspamd_symbol_type { - SYMBOL_TYPE_NORMAL, - SYMBOL_TYPE_VIRTUAL, - SYMBOL_TYPE_CALLBACK -}; - -static void -register_symbol_common (struct symbols_cache **cache, const gchar *name, double weight, gint priority, - symbol_func_t func, gpointer user_data, enum rspamd_symbol_type type) -{ - struct cache_item *item = NULL; - struct symbols_cache *pcache = *cache; - GList **target; - double *w; - - if (*cache == NULL) { - pcache = g_new0 (struct symbols_cache, 1); - *cache = pcache; - pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - pcache->items_by_symbol = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - } - - item = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct cache_item)); - item->s = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct saved_cache_item)); - rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); - item->func = func; - item->user_data = user_data; - item->priority = priority; - - switch (type) { - case SYMBOL_TYPE_NORMAL: - break; - case SYMBOL_TYPE_VIRTUAL: - item->is_virtual = TRUE; - break; - case SYMBOL_TYPE_CALLBACK: - item->is_callback = TRUE; - break; - } - - /* Handle weight using default metric */ - if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) { - item->s->weight = weight * (*w); - } - else { - item->s->weight = weight; - } - - /* If we have undefined priority determine list according to weight */ - if (priority == 0) { - if (item->s->weight > 0) { - target = &(*cache)->static_items; - } - else { - target = &(*cache)->negative_items; - } - } - else { - /* Items with more priority are called before items with less priority */ - if (priority < 0) { - target = &(*cache)->negative_items; - } - else { - target = &(*cache)->static_items; - } - } - - pcache->used_items++; - g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item); - msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name); - set_counter (item->s->symbol, 0); - - *target = g_list_prepend (*target, item); -} - -void -register_symbol (struct symbols_cache **cache, const gchar *name, double weight, - symbol_func_t func, gpointer user_data) -{ - register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_NORMAL); -} - -void -register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight) -{ - register_symbol_common (cache, name, weight, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL); -} - -void -register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, - symbol_func_t func, gpointer user_data) -{ - register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_CALLBACK); -} - -void -register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight, gint priority, - symbol_func_t func, gpointer user_data) -{ - register_symbol_common (cache, name, weight, priority, func, user_data, SYMBOL_TYPE_CALLBACK); -} - -void -register_dynamic_symbol (rspamd_mempool_t *dynamic_pool, struct symbols_cache **cache, - const gchar *name, double weight, symbol_func_t func, - gpointer user_data, GList *networks) -{ - struct cache_item *item = NULL; - struct symbols_cache *pcache = *cache; - GList *t, *cur; - uintptr_t r; - double *w; - guint32 mask = 0xFFFFFFFF; - struct dynamic_map_item *it; - gint rr; - - if (*cache == NULL) { - pcache = g_new0 (struct symbols_cache, 1); - *cache = pcache; - pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - } - - item = rspamd_mempool_alloc0 (dynamic_pool, sizeof (struct cache_item)); - item->s = rspamd_mempool_alloc (dynamic_pool, sizeof (struct saved_cache_item)); - rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); - item->func = func; - item->user_data = user_data; - /* Handle weight using default metric */ - if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) { - item->s->weight = weight * (*w); - } - else { - item->s->weight = weight; - } - item->is_dynamic = TRUE; - item->priority = 0; - - pcache->used_items++; - msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name); - set_counter (item->s->symbol, 0); - - g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item); - - if (networks == NULL) { - pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item); - } - else { - if (pcache->dynamic_map == NULL) { - pcache->dynamic_map = radix_tree_create (); - pcache->negative_dynamic_map = radix_tree_create (); - } - cur = networks; - while (cur) { - it = cur->data; - mask = mask << (32 - it->mask); - r = ntohl (it->addr.s_addr & mask); - if (it->negative) { - /* For negatve items insert into list and into negative cache map */ - if ((r = radix32tree_find (pcache->negative_dynamic_map, r)) != RADIX_NO_VALUE) { - t = (GList *)((gpointer)r); - t = g_list_prepend (t, item); - /* Replace pointers in radix tree and in destructor function */ - rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t); - rr = radix32tree_replace (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); - if (rr == -1) { - msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); - } - } - else { - t = g_list_prepend (NULL, item); - rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t); - rr = radix32tree_insert (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); - if (rr == -1) { - msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); - } - else if (rr == 1) { - msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask); - } - } - /* Insert into list */ - pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item); - } - else { - if ((r = radix32tree_find (pcache->dynamic_map, r)) != RADIX_NO_VALUE) { - t = (GList *)((gpointer)r); - t = g_list_prepend (t, item); - /* Replace pointers in radix tree and in destructor function */ - rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t); - rr = radix32tree_replace (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); - if (rr == -1) { - msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); - } - } - else { - t = g_list_prepend (NULL, item); - rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t); - rr = radix32tree_insert (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); - if (rr == -1) { - msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); - } - else if (rr == 1) { - msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask); - } - } - } - cur = g_list_next (cur); - } - } -} - -void -remove_dynamic_rules (struct symbols_cache *cache) -{ - if (cache->dynamic_items) { - g_list_free (cache->dynamic_items); - cache->dynamic_items = NULL; - } - - if (cache->dynamic_map) { - radix_tree_free (cache->dynamic_map); - cache->dynamic_map = NULL; - } - if (cache->negative_dynamic_map) { - radix_tree_free (cache->negative_dynamic_map); - cache->negative_dynamic_map = NULL; - } -} - -static void -free_cache (gpointer arg) -{ - struct symbols_cache *cache = arg; - - if (cache->map != NULL) { - unmap_cache_file (cache); - } - - if (cache->static_items) { - g_list_free (cache->static_items); - } - if (cache->negative_items) { - g_list_free (cache->negative_items); - } - if (cache->dynamic_items) { - g_list_free (cache->dynamic_items); - } - if (cache->dynamic_map) { - radix_tree_free (cache->dynamic_map); - } - if (cache->negative_dynamic_map) { - radix_tree_free (cache->negative_dynamic_map); - } - g_hash_table_destroy (cache->items_by_symbol); - rspamd_mempool_delete (cache->static_pool); - - g_free (cache); -} - -gboolean -init_symbols_cache (rspamd_mempool_t * pool, struct symbols_cache *cache, struct config_file *cfg, - const gchar *filename, gboolean ignore_checksum) -{ - struct stat st; - gint fd; - GChecksum *cksum; - u_char *mem_sum, *file_sum; - gsize cklen; - gboolean res; - - if (cache == NULL) { - return FALSE; - } - - /* Init locking */ - cache->lock = rspamd_mempool_get_rwlock (pool); - - cache->cfg = cfg; - - /* Just in-memory cache */ - if (filename == NULL) { - post_cache_init (cache); - return TRUE; - } - - /* First of all try to stat file */ - if (stat (filename, &st) == -1) { - /* Check errno */ - if (errno == ENOENT) { - /* Try to create file */ - if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); - return FALSE; - } - else { - return create_cache_file (cache, filename, fd, pool); - } - } - else { - msg_info ("cannot stat file %s, error %d, %s", filename, errno, strerror (errno)); - return FALSE; - } - } - else { - if ((fd = open (filename, O_RDWR)) == -1) { - msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno)); - return FALSE; - } - } - - if (!ignore_checksum) { - /* Calculate checksum */ - cksum = get_mem_cksum (cache); - if (cksum == NULL) { - msg_err ("cannot calculate checksum for symbols"); - close (fd); - return FALSE; - } - - cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1); - mem_sum = g_malloc (cklen); - - g_checksum_get_digest (cksum, mem_sum, &cklen); - /* Now try to read file sum */ - if (lseek (fd, -(cklen), SEEK_END) == -1) { - if (errno == EINVAL) { - /* Try to create file */ - msg_info ("recreate cache file"); - if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); - return FALSE; - } - else { - return create_cache_file (cache, filename, fd, pool); - } - } - close (fd); - g_free (mem_sum); - g_checksum_free (cksum); - msg_err ("cannot seek to read checksum, %d, %s", errno, strerror (errno)); - return FALSE; - } - file_sum = g_malloc (cklen); - if (read (fd, file_sum, cklen) == -1) { - close (fd); - g_free (mem_sum); - g_free (file_sum); - g_checksum_free (cksum); - msg_err ("cannot read checksum, %d, %s", errno, strerror (errno)); - return FALSE; - } - - if (memcmp (file_sum, mem_sum, cklen) != 0) { - close (fd); - g_free (mem_sum); - g_free (file_sum); - g_checksum_free (cksum); - msg_info ("checksum mismatch, recreating file"); - /* Reopen with rw permissions */ - if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { - msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); - return FALSE; - } - else { - return create_cache_file (cache, filename, fd, pool); - } - } - - g_free (mem_sum); - g_free (file_sum); - g_checksum_free (cksum); - } - /* MMap cache file and copy saved_cache structures */ - res = mmap_cache_file (cache, fd, pool); - - rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t)free_cache, cache); - - return res; -} - -static GList * -check_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache) -{ -#ifdef HAVE_INET_PTON - /* TODO: radix doesn't support ipv6 addrs */ - return NULL; -#else - GList *res = NULL; - uintptr_t r; - if (cache->dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) { - if ((r = radix32tree_find (cache->dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) { - res = (GList *)((gpointer)r); - return res; - } - else { - return NULL; - } - } - return res; -#endif -} - -static gboolean -check_negative_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache, struct cache_item *item) -{ - -#ifdef HAVE_INET_PTON - /* TODO: radix doesn't support ipv6 addrs */ - return FALSE; -#else - GList *res = NULL; - uintptr_t r; - - if (cache->negative_dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) { - if ((r = radix32tree_find (cache->negative_dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) { - res = (GList *)((gpointer)r); - while (res) { - if (res->data == (gpointer)item) { - return TRUE; - } - res = g_list_next (res); - } - } - } - return FALSE; -#endif - -} - -static gboolean -check_debug_symbol (struct config_file *cfg, const gchar *symbol) -{ - GList *cur; - - cur = cfg->debug_symbols; - while (cur) { - if (strcmp (symbol, (const gchar *)cur->data) == 0) { - return TRUE; - } - cur = g_list_next (cur); - } - - return FALSE; -} - -static void -rspamd_symbols_cache_metric_cb (gpointer k, gpointer v, gpointer ud) -{ - struct symbols_cache *cache = (struct symbols_cache *)ud; - GList *cur; - const gchar *sym = k; - gdouble weight = *(gdouble *)v; - struct cache_item *item; - - cur = cache->negative_items; - while (cur) { - item = cur->data; - if (strcmp (item->s->symbol, sym) == 0) { - item->metric_weight = weight; - return; - } - cur = g_list_next (cur); - } - cur = cache->static_items; - while (cur) { - item = cur->data; - if (strcmp (item->s->symbol, sym) == 0) { - item->metric_weight = weight; - return; - } - cur = g_list_next (cur); - } -} - -gboolean -validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict) -{ - struct cache_item *item; - GList *cur, *p, *metric_symbols; - gboolean res; - - if (cache == NULL) { - msg_err ("empty cache is invalid"); - return FALSE; - } - - /* Check each symbol in a cache and find its weight definition */ - cur = cache->negative_items; - while (cur) { - item = cur->data; - if (!item->is_callback) { - if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { - if (strict) { - msg_warn ("no weight registered for symbol %s", item->s->symbol); - return FALSE; - } - else { - msg_info ("no weight registered for symbol %s", item->s->symbol); - } - } - } - cur = g_list_next (cur); - } - cur = cache->static_items; - while (cur) { - item = cur->data; - if (!item->is_callback) { - if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { - if (strict) { - msg_warn ("no weight registered for symbol %s", item->s->symbol); - return FALSE; - } - else { - msg_info ("no weight registered for symbol %s", item->s->symbol); - } - } - } - cur = g_list_next (cur); - } -#ifndef GLIB_HASH_COMPAT - /* Now check each metric item and find corresponding symbol in a cache */ - metric_symbols = g_hash_table_get_keys (cfg->metrics_symbols); - cur = metric_symbols; - while (cur) { - res = FALSE; - p = cache->negative_items; - while (p) { - item = p->data; - if (strcmp (item->s->symbol, cur->data) == 0) { - res = TRUE; - break; - } - p = g_list_next (p); - } - if (!res) { - p = cache->static_items; - while (p) { - item = p->data; - if (strcmp (item->s->symbol, cur->data) == 0) { - res = TRUE; - break; - } - p = g_list_next (p); - } - } - if (!res) { - msg_warn ("symbol '%s' is registered in metric but not found in cache", cur->data); - if (strict) { - return FALSE; - } - } - cur = g_list_next (cur); - } - g_list_free (metric_symbols); -#endif /* GLIB_COMPAT */ - - /* Now adjust symbol weights according to default metric */ - if (cfg->default_metric != NULL) { - g_hash_table_foreach (cfg->default_metric->symbols, rspamd_symbols_cache_metric_cb, cache); - /* Resort caches */ - cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp); - cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp); - } - - return TRUE; -} - -struct symbol_callback_data { - enum { - CACHE_STATE_NEGATIVE, - CACHE_STATE_DYNAMIC_MAP, - CACHE_STATE_DYNAMIC, - CACHE_STATE_STATIC - } state; - struct cache_item *saved_item; - GList *list_pointer; -}; - -gboolean -call_symbol_callback (struct rspamd_task * task, struct symbols_cache * cache, gpointer *save) -{ -#ifdef HAVE_CLOCK_GETTIME - struct timespec ts1, ts2; -#else - struct timeval tv1, tv2; -#endif - guint64 diff; - struct cache_item *item = NULL; - struct symbol_callback_data *s = *save; - - if (s == NULL) { - if (cache == NULL) { - return FALSE; - } - if (cache->uses++ >= MAX_USES) { - msg_info ("resort symbols cache"); - rspamd_mempool_wlock_rwlock (cache->lock); - cache->uses = 0; - /* Resort while having write lock */ - post_cache_init (cache); - rspamd_mempool_wunlock_rwlock (cache->lock); - } - s = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct symbol_callback_data)); - *save = s; - if (cache->negative_items != NULL) { - s->list_pointer = g_list_first (cache->negative_items); - s->saved_item = s->list_pointer->data; - s->state = CACHE_STATE_NEGATIVE; - } - else if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) { - if (s->list_pointer == NULL) { - s->list_pointer = g_list_first (cache->dynamic_items); - s->saved_item = s->list_pointer->data; - s->state = CACHE_STATE_DYNAMIC; - } - else { - s->saved_item = s->list_pointer->data; - s->state = CACHE_STATE_DYNAMIC_MAP; - } - } - else { - s->state = CACHE_STATE_STATIC; - s->list_pointer = g_list_first (cache->static_items); - if (s->list_pointer) { - s->saved_item = s->list_pointer->data; - } - else { - return FALSE; - } - } - item = s->saved_item; - } - else { - if (cache == NULL) { - return FALSE; - } - switch (s->state) { - case CACHE_STATE_NEGATIVE: - s->list_pointer = g_list_next (s->list_pointer); - if (s->list_pointer == NULL) { - if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) { - if (s->list_pointer == NULL) { - s->list_pointer = g_list_first (cache->dynamic_items); - s->saved_item = s->list_pointer->data; - s->state = CACHE_STATE_DYNAMIC; - } - else { - s->saved_item = s->list_pointer->data; - s->state = CACHE_STATE_DYNAMIC_MAP; - } - } - else { - s->state = CACHE_STATE_STATIC; - s->list_pointer = g_list_first (cache->static_items); - if (s->list_pointer) { - s->saved_item = s->list_pointer->data; - } - else { - return FALSE; - } - } - } - else { - s->saved_item = s->list_pointer->data; - } - item = s->saved_item; - break; - case CACHE_STATE_DYNAMIC_MAP: - s->list_pointer = g_list_next (s->list_pointer); - if (s->list_pointer == NULL) { - s->list_pointer = g_list_first (cache->dynamic_items); - if (s->list_pointer) { - s->saved_item = s->list_pointer->data; - s->state = CACHE_STATE_DYNAMIC; - } - else { - s->state = CACHE_STATE_STATIC; - s->list_pointer = g_list_first (cache->static_items); - if (s->list_pointer) { - s->saved_item = s->list_pointer->data; - } - else { - return FALSE; - } - } - } - else { - s->saved_item = s->list_pointer->data; - } - item = s->saved_item; - break; - case CACHE_STATE_DYNAMIC: - s->list_pointer = g_list_next (s->list_pointer); - if (s->list_pointer == NULL) { - s->state = CACHE_STATE_STATIC; - s->list_pointer = g_list_first (cache->static_items); - if (s->list_pointer) { - s->saved_item = s->list_pointer->data; - } - else { - return FALSE; - } - } - else { - s->saved_item = s->list_pointer->data; - /* Skip items that are in negative map */ - while (s->list_pointer != NULL && check_negative_dynamic_item (task, cache, s->saved_item)) { - s->list_pointer = g_list_next (s->list_pointer); - if (s->list_pointer != NULL) { - s->saved_item = s->list_pointer->data; - } - } - if (s->list_pointer == NULL) { - s->state = CACHE_STATE_STATIC; - s->list_pointer = g_list_first (cache->static_items); - if (s->list_pointer) { - s->saved_item = s->list_pointer->data; - } - else { - return FALSE; - } - } - } - item = s->saved_item; - break; - case CACHE_STATE_STATIC: - /* Next pointer */ - s->list_pointer = g_list_next (s->list_pointer); - if (s->list_pointer) { - s->saved_item = s->list_pointer->data; - } - else { - return FALSE; - } - item = s->saved_item; - break; - } - } - if (!item) { - return FALSE; - } - if (!item->is_virtual) { -#ifdef HAVE_CLOCK_GETTIME -# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1); -# elif defined(HAVE_CLOCK_VIRTUAL) - clock_gettime (CLOCK_VIRTUAL, &ts1); -# else - clock_gettime (CLOCK_REALTIME, &ts1); -# endif -#else - if (gettimeofday (&tv1, NULL) == -1) { - msg_warn ("gettimeofday failed: %s", strerror (errno)); - } -#endif - if (G_UNLIKELY (check_debug_symbol (task->cfg, item->s->symbol))) { - rspamd_log_debug (rspamd_main->logger); - item->func (task, item->user_data); - rspamd_log_nodebug (rspamd_main->logger); - } - else { - item->func (task, item->user_data); - } - - -#ifdef HAVE_CLOCK_GETTIME -# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts2); -# elif defined(HAVE_CLOCK_VIRTUAL) - clock_gettime (CLOCK_VIRTUAL, &ts2); -# else - clock_gettime (CLOCK_REALTIME, &ts2); -# endif -#else - if (gettimeofday (&tv2, NULL) == -1) { - msg_warn ("gettimeofday failed: %s", strerror (errno)); - } -#endif - -#ifdef HAVE_CLOCK_GETTIME - diff = (ts2.tv_sec - ts1.tv_sec) * 1000000 + (ts2.tv_nsec - ts1.tv_nsec) / 1000; -#else - diff = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec); -#endif - item->s->avg_time = set_counter (item->s->symbol, diff); - } - - s->saved_item = item; - - return TRUE; - -} diff --git a/src/symbols_cache.h b/src/symbols_cache.h deleted file mode 100644 index bb2100fc1..000000000 --- a/src/symbols_cache.h +++ /dev/null @@ -1,150 +0,0 @@ -#ifndef RSPAMD_SYMBOLS_CACHE_H -#define RSPAMD_SYMBOLS_CACHE_H - -#include "config.h" -#include "radix.h" - -#define MAX_SYMBOL 128 - -struct rspamd_task; -struct config_file; - -typedef void (*symbol_func_t)(struct rspamd_task *task, gpointer user_data); - -struct saved_cache_item { - gchar symbol[MAX_SYMBOL]; - double weight; - guint32 frequency; - double avg_time; -}; - -struct dynamic_map_item { - struct in_addr addr; - guint32 mask; - gboolean negative; -}; - -struct cache_item { - /* Static item's data */ - struct saved_cache_item *s; - - /* For dynamic rules */ - struct dynamic_map_item *networks; - guint32 networks_number; - gboolean is_dynamic; - - /* Callback data */ - symbol_func_t func; - gpointer user_data; - - /* Flags of virtual symbols */ - gboolean is_virtual; - gboolean is_callback; - - /* Priority */ - gint priority; - gdouble metric_weight; -}; - - -struct symbols_cache { - /* Normal cache items */ - GList *static_items; - - /* Items that have negative weights */ - GList *negative_items; - - /* Radix map of dynamic rules with ip mappings */ - radix_tree_t *dynamic_map; - radix_tree_t *negative_dynamic_map; - - /* Common dynamic rules */ - GList *dynamic_items; - - /* Hash table for fast access */ - GHashTable *items_by_symbol; - - rspamd_mempool_t *static_pool; - - guint cur_items; - guint used_items; - guint uses; - gpointer map; - rspamd_mempool_rwlock_t *lock; - struct config_file *cfg; -}; - -/** - * Load symbols cache from file, must be called _after_ init_symbols_cache - */ -gboolean init_symbols_cache (rspamd_mempool_t *pool, struct symbols_cache *cache, struct config_file *cfg, - const gchar *filename, gboolean ignore_checksum); - -/** - * Register function for symbols parsing - * @param name name of symbol - * @param func pointer to handler - * @param user_data pointer to user_data - */ -void register_symbol (struct symbols_cache **cache, const gchar *name, double weight, - symbol_func_t func, gpointer user_data); - - -/** - * Register virtual symbol - * @param name name of symbol - */ -void register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight); - -/** - * Register callback function for symbols parsing - * @param name name of symbol - * @param func pointer to handler - * @param user_data pointer to user_data - */ -void register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, - symbol_func_t func, gpointer user_data); - -/** - * Register function for symbols parsing with strict priority - * @param name name of symbol - * @param func pointer to handler - * @param user_data pointer to user_data - */ -void register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight, - gint priority, symbol_func_t func, gpointer user_data); - -/** - * Register function for dynamic symbols parsing - * @param name name of symbol - * @param func pointer to handler - * @param user_data pointer to user_data - */ -void register_dynamic_symbol (rspamd_mempool_t *pool, struct symbols_cache **cache, const gchar *name, - double weight, symbol_func_t func, - gpointer user_data, GList *networks); - -/** - * Call function for cached symbol using saved callback - * @param task task object - * @param cache symbols cache - * @param saved_item pointer to currently saved item - */ -gboolean call_symbol_callback (struct rspamd_task *task, struct symbols_cache *cache, gpointer *save); - -/** - * Remove all dynamic rules from cache - * @param cache symbols cache - */ -void remove_dynamic_rules (struct symbols_cache *cache); - -/** - * Validate cache items agains theirs weights defined in metrics - * @param cache symbols cache - * @param cfg configuration - * @param strict do strict checks - symbols MUST be described in metrics - */ -gboolean validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict); - - -#endif diff --git a/src/task.c b/src/task.c deleted file mode 100644 index f389793dd..000000000 --- a/src/task.c +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2014, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "task.h" -#include "main.h" -#include "filter.h" -#include "message.h" - -/* - * Destructor for recipients list in a task - */ -static void -rcpt_destruct (void *pointer) -{ - struct rspamd_task *task = (struct rspamd_task *) pointer; - - if (task->rcpt) { - g_list_free (task->rcpt); - } -} - -/* - * Create new task - */ -struct rspamd_task * -rspamd_task_new (struct rspamd_worker *worker) -{ - struct rspamd_task *new_task; - - new_task = g_slice_alloc0 (sizeof (struct rspamd_task)); - - new_task->worker = worker; - new_task->state = READ_MESSAGE; - if (worker) { - new_task->cfg = worker->srv->cfg; - } -#ifdef HAVE_CLOCK_GETTIME -# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &new_task->ts); -# elif defined(HAVE_CLOCK_VIRTUAL) - clock_gettime (CLOCK_VIRTUAL, &new_task->ts); -# else - clock_gettime (CLOCK_REALTIME, &new_task->ts); -# endif -#endif - if (gettimeofday (&new_task->tv, NULL) == -1) { - msg_warn ("gettimeofday failed: %s", strerror (errno)); - } - - new_task->task_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - - /* Add destructor for recipients list (it would be better to use anonymous function here */ - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) rcpt_destruct, new_task); - new_task->results = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_destroy, - new_task->results); - new_task->re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_destroy, - new_task->re_cache); - new_task->raw_headers = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_hash_table_destroy, - new_task->raw_headers); - new_task->emails = g_tree_new (compare_email_func); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_tree_destroy, - new_task->emails); - new_task->urls = g_tree_new (compare_url_func); - rspamd_mempool_add_destructor (new_task->task_pool, - (rspamd_mempool_destruct_t) g_tree_destroy, - new_task->urls); - new_task->sock = -1; - new_task->is_mime = TRUE; - new_task->pre_result.action = METRIC_ACTION_NOACTION; - - new_task->message_id = new_task->queue_id = "undef"; - - return new_task; -} - - -/* - * Free all structures of worker_task - */ -void -rspamd_task_free (struct rspamd_task *task, gboolean is_soft) -{ - GList *part; - struct mime_part *p; - - if (task) { - debug_task ("free pointer %p", task); - while ((part = g_list_first (task->parts))) { - task->parts = g_list_remove_link (task->parts, part); - p = (struct mime_part *) part->data; - g_byte_array_free (p->content, TRUE); - g_list_free_1 (part); - } - if (task->text_parts) { - g_list_free (task->text_parts); - } - if (task->images) { - g_list_free (task->images); - } - if (task->messages) { - g_list_free (task->messages); - } - if (task->received) { - g_list_free (task->received); - } - if (task->http_conn != NULL) { - rspamd_http_connection_unref (task->http_conn); - } - if (task->sock != -1) { - close (task->sock); - } - rspamd_mempool_delete (task->task_pool); - g_slice_free1 (sizeof (struct rspamd_task), task); - } -} - -void -rspamd_task_free_hard (gpointer ud) -{ - struct rspamd_task *task = ud; - - rspamd_task_free (task, FALSE); -} - -void -rspamd_task_free_soft (gpointer ud) -{ - struct rspamd_task *task = ud; - - rspamd_task_free (task, FALSE); -} diff --git a/src/task.h b/src/task.h deleted file mode 100644 index f8f7c89e3..000000000 --- a/src/task.h +++ /dev/null @@ -1,165 +0,0 @@ -/* Copyright (c) 2014, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef TASK_H_ -#define TASK_H_ - -#include "config.h" -#include "http.h" -#include "events.h" -#include "util.h" -#include "mem_pool.h" -#include "dns.h" - -enum rspamd_command { - CMD_CHECK, - CMD_SYMBOLS, - CMD_REPORT, - CMD_REPORT_IFSPAM, - CMD_SKIP, - CMD_PING, - CMD_PROCESS, - CMD_OTHER -}; - -enum rspamd_metric_action { - METRIC_ACTION_REJECT = 0, - METRIC_ACTION_SOFT_REJECT, - METRIC_ACTION_REWRITE_SUBJECT, - METRIC_ACTION_ADD_HEADER, - METRIC_ACTION_GREYLIST, - METRIC_ACTION_NOACTION, - METRIC_ACTION_MAX -}; - -typedef gint (*protocol_reply_func)(struct rspamd_task *task); - -struct custom_command { - const gchar *name; - protocol_reply_func func; -}; - -/** - * Worker task structure - */ -struct rspamd_task { - struct rspamd_worker *worker; /**< pointer to worker object */ - enum { - READ_MESSAGE, - WAIT_PRE_FILTER, - WAIT_FILTER, - WAIT_POST_FILTER, - WRITE_REPLY, - CLOSING_CONNECTION - } state; /**< current session state */ - enum rspamd_command cmd; /**< command */ - struct custom_command *custom_cmd; /**< custom command if any */ - gint sock; /**< socket descriptor */ - gboolean is_mime; /**< if this task is mime task */ - gboolean is_json; /**< output is JSON */ - gboolean allow_learn; /**< allow learning */ - gboolean is_skipped; /**< whether message was skipped by configuration */ - - gchar *helo; /**< helo header value */ - gchar *from; /**< from header value */ - gchar *queue_id; /**< queue id if specified */ - const gchar *message_id; /**< message id */ - GList *rcpt; /**< recipients list */ - guint nrcpt; /**< number of recipients */ - rspamd_inet_addr_t from_addr; /**< from addr for a task */ - rspamd_inet_addr_t client_addr; /**< address of connected socket */ - gchar *deliver_to; /**< address to deliver */ - gchar *user; /**< user to deliver */ - gchar *subject; /**< subject (for non-mime) */ - gchar *hostname; /**< hostname reported by MTA */ - GString *msg; /**< message buffer */ - struct rspamd_http_connection *http_conn; /**< HTTP server connection */ - struct rspamd_async_session* s; /**< async session object */ - gint parts_count; /**< mime parts count */ - GMimeMessage *message; /**< message, parsed with GMime */ - GMimeObject *parser_parent_part; /**< current parent part */ - InternetAddressList *rcpts; /**< list of all recipients */ - GList *parts; /**< list of parsed parts */ - GList *text_parts; /**< list of text parts */ - gchar *raw_headers_str; /**< list of raw headers */ - GList *received; /**< list of received headers */ - GTree *urls; /**< list of parsed urls */ - GTree *emails; /**< list of parsed emails */ - GList *images; /**< list of images */ - GHashTable *raw_headers; /**< list of raw headers */ - GHashTable *results; /**< hash table of metric_result indexed by - * metric's name */ - GHashTable *tokens; /**< hash table of tokens indexed by tokenizer - * pointer */ - GList *messages; /**< list of messages that would be reported */ - GHashTable *re_cache; /**< cache for matched or not matched regexps */ - struct config_file *cfg; /**< pointer to config object */ - gchar *last_error; /**< last error */ - gint error_code; /**< code of last error */ - rspamd_mempool_t *task_pool; /**< memory pool for task */ -#ifdef HAVE_CLOCK_GETTIME - struct timespec ts; /**< time of connection */ -#endif - struct timeval tv; /**< time of connection */ - guint32 scan_milliseconds; /**< how much milliseconds passed */ - gboolean pass_all_filters; /**< pass task throught every rule */ - gboolean no_log; /**< do not log or write this task to the history */ - guint32 parser_recursion; /**< for avoiding recursion stack overflow */ - gboolean (*fin_callback)(void *arg); /**< calback for filters finalizing */ - void *fin_arg; /**< argument for fin callback */ - - guint32 dns_requests; /**< number of DNS requests per this task */ - - struct rspamd_dns_resolver *resolver; /**< DNS resolver */ - struct event_base *ev_base; /**< Event base */ - - GThreadPool *classify_pool; /**< A pool of classify threads */ - - struct { - enum rspamd_metric_action action; /**< Action of pre filters */ - gchar *str; /**< String describing action */ - } pre_result; /**< Result of pre-filters */ -}; - -/** - * Construct new task for worker - */ -struct rspamd_task* rspamd_task_new (struct rspamd_worker *worker); -/** - * Destroy task object and remove its IO dispatcher if it exists - */ -void rspamd_task_free (struct rspamd_task *task, gboolean is_soft); -void rspamd_task_free_hard (gpointer ud); -void rspamd_task_free_soft (gpointer ud); - -/** - * Called if session was restored inside fin callback - */ -void rspamd_task_restore (void *arg); - -/** - * Called if all filters are processed - * @return TRUE if session should be terminated - */ -gboolean rspamd_task_fin (void *arg); - -#endif /* TASK_H_ */ diff --git a/src/trie.c b/src/trie.c deleted file mode 100644 index 394c4e939..000000000 --- a/src/trie.c +++ /dev/null @@ -1,230 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "mem_pool.h" -#include "trie.h" - -rspamd_trie_t* -rspamd_trie_create (gboolean icase) -{ - rspamd_trie_t *new; - - new = g_malloc (sizeof (rspamd_trie_t)); - - new->icase = icase; - new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); - new->root.fail = NULL; - new->root.final = 0; - new->root.id = 0; - new->root.next = NULL; - new->root.match = NULL; - new->fail_states = g_ptr_array_sized_new (8); - - return new; -} - -/* - * Insert a single character as the specified level of the suffix tree - */ -static struct rspamd_trie_state * -rspamd_trie_insert_char (rspamd_trie_t *trie, guint depth, struct rspamd_trie_state *pos, gchar c) -{ - struct rspamd_trie_match *new_match; - struct rspamd_trie_state *new_pos; - - /* New match is inserted before pos */ - new_match = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_match)); - new_match->next = pos->match; - new_match->c = c; - - /* Now set match link */ - pos->match = new_match; - - new_match->state = rspamd_mempool_alloc (trie->pool, sizeof (struct rspamd_trie_state)); - new_pos = new_match->state; - new_pos->match = NULL; - new_pos->fail = &trie->root; - new_pos->final = 0; - new_pos->id = -1; - - if (trie->fail_states->len < depth + 1) { - /* Grow fail states array if depth is more than its size */ - guint size = trie->fail_states->len; - - size = MAX (size * 2, depth + 1); - g_ptr_array_set_size (trie->fail_states, size); - } - - new_pos->next = trie->fail_states->pdata[depth]; - trie->fail_states->pdata[depth] = new_pos; - - return new_pos; -} - -/* Traverse the specified node to find corresponding match */ -static inline struct rspamd_trie_match * -check_match (struct rspamd_trie_state *s, gchar c) -{ - struct rspamd_trie_match *match = s->match; - - while (match && match->c != c) { - match = match->next; - } - - return match; -} - -void -rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id) -{ - const guchar *p = pattern; - struct rspamd_trie_state *q, *q1, *r, *cur_node; - struct rspamd_trie_match *m, *n; - guint i, depth = 0; - gchar c; - - /* Insert pattern to the trie */ - - cur_node = &trie->root; - - while (*p) { - c = trie->icase ? g_ascii_tolower (*p) : *p; - m = check_match (cur_node, c); - if (m == NULL) { - /* Insert a character at specified level depth */ - cur_node = rspamd_trie_insert_char (trie, depth, cur_node, c); - } - else { - cur_node = m->state; - } - p ++; - depth ++; - } - - cur_node->final = depth; - cur_node->id = pattern_id; - - /* Update fail states and build fail states graph */ - /* Go through the whole depth of prefixes */ - for (i = 0; i < trie->fail_states->len; i++) { - q = trie->fail_states->pdata[i]; - while (q) { - m = q->match; - while (m) { - c = m->c; - q1 = m->state; - r = q->fail; - /* Move q->fail to last known fail location for this character (or to NULL) */ - while (r && (n = check_match (r, c)) == NULL) { - r = r->fail; - } - - /* We have found new fail location for character c, so set it in q1 */ - if (r != NULL) { - q1->fail = n->state; - if (q1->fail->final > q1->final) { - q1->final = q1->fail->final; - } - } - else { - /* Search from root */ - if ((n = check_match (&trie->root, c))) { - q1->fail = n->state; - } - else { - q1->fail = &trie->root; - } - } - - m = m->next; - } - - q = q->next; - } - } -} - -const gchar* -rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id) -{ - const guchar *p = buffer, *prev, *ret; - struct rspamd_trie_state *cur_node; - struct rspamd_trie_match *m = NULL; - gchar c; - - - cur_node = &trie->root; - prev = p; - ret = p; - - while (buflen) { - c = trie->icase ? g_ascii_tolower (*p) : *p; - - /* Match pattern or use fail-path to restore state */ - while (cur_node != NULL && (m = check_match (cur_node, c)) == NULL) { - cur_node = cur_node->fail; - } - - /* Shift left in the text */ - if (cur_node == &trie->root) { - /* 1 character pattern found */ - ret = prev; - } - else if (cur_node == NULL) { - /* We have tried the pattern but eventually it was not found */ - cur_node = &trie->root; - ret = p; - p ++; - prev = p; - buflen --; - continue; - } - - if (m != NULL) { - /* Match found */ - cur_node = m->state; - - if (cur_node->final) { - /* The complete pattern found */ - if (matched_id != NULL) { - *matched_id = cur_node->id; - } - return (const gchar *) ret; - } - } - p ++; - prev = p; - buflen --; - } - - return NULL; -} - -void -rspamd_trie_free (rspamd_trie_t *trie) -{ - g_ptr_array_free (trie->fail_states, TRUE); - rspamd_mempool_delete (trie->pool); - g_free (trie); -} diff --git a/src/trie.h b/src/trie.h deleted file mode 100644 index 2792ee4a5..000000000 --- a/src/trie.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (c) 2010, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef TRIE_H_ -#define TRIE_H_ - -#include "config.h" -#include "mem_pool.h" - -/* - * Rspamd implements basic bitwise prefixed trie structure - */ - -struct rspamd_trie_match; - -struct rspamd_trie_state { - struct rspamd_trie_state *next; - struct rspamd_trie_state *fail; - struct rspamd_trie_match *match; - guint final; - gint id; -}; - -struct rspamd_trie_match { - struct rspamd_trie_match *next; - struct rspamd_trie_state *state; - gchar c; -}; - -typedef struct rspamd_trie_s { - struct rspamd_trie_state root; - GPtrArray *fail_states; - gboolean icase; - rspamd_mempool_t *pool; -} rspamd_trie_t; - -/* - * Create a new suffix trie - */ -rspamd_trie_t* rspamd_trie_create (gboolean icase); - -/* - * Insert a pattern into the trie - * @param trie suffix trie - * @param pattern text of element - * @param pattern_id id of element - */ -void rspamd_trie_insert (rspamd_trie_t *trie, const gchar *pattern, gint pattern_id); - -/* - * Search for a text using suffix trie - * @param trie suffix trie - * @param buffer a text where to search for trie patterns - * @param buflen a length of text - * @param mached_id on a successfull search here would be stored id of pattern found - * @return Position in a text where pattern was found or NULL if no patterns were found - */ -const gchar* rspamd_trie_lookup (rspamd_trie_t *trie, const gchar *buffer, gsize buflen, gint *matched_id); - -/* - * Deallocate suffix trie - */ -void rspamd_trie_free (rspamd_trie_t *trie); - -#endif /* TRIE_H_ */ diff --git a/src/upstream.c b/src/upstream.c deleted file mode 100644 index f82d3ba50..000000000 --- a/src/upstream.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "upstream.h" - - -#ifdef _THREAD_SAFE -pthread_rwlock_t upstream_mtx = PTHREAD_RWLOCK_INITIALIZER; -# define U_RLOCK() do { pthread_rwlock_rdlock (&upstream_mtx); } while (0) -# define U_WLOCK() do { pthread_rwlock_wrlock (&upstream_mtx); } while (0) -# define U_UNLOCK() do { pthread_rwlock_unlock (&upstream_mtx); } while (0) -#else -# define U_RLOCK() do {} while (0) -# define U_WLOCK() do {} while (0) -# define U_UNLOCK() do {} while (0) -#endif - -#define MAX_TRIES 20 -#define HASH_COMPAT - -/* - * Poly: 0xedb88320 - * Init: 0x0 - */ - -static const guint32 crc32lookup[256] = { - 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU, - 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U, - 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, - 0xf3b97148U, 0x84be41deU, 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, - 0x136c9856U, 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U, - 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 0xa2677172U, - 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 0x35b5a8faU, 0x42b2986cU, - 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, - 0x26d930acU, 0x51de003aU, 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, - 0xcfba9599U, 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U, - 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 0x01db7106U, - 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 0x9fbfe4a5U, 0xe8b8d433U, - 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, - 0x91646c97U, 0xe6635c01U, 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, - 0x6c0695edU, 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U, - 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 0xfbd44c65U, - 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 0x4adfa541U, 0x3dd895d7U, - 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, - 0x44042d73U, 0x33031de5U, 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, - 0xbe0b1010U, 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU, - 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 0x2eb40d81U, - 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 0x03b6e20cU, 0x74b1d29aU, - 0xead54739U, 0x9dd277afU, 0x04db2615U, 0x73dc1683U, 0xe3630b12U, 0x94643b84U, - 0x0d6d6a3eU, 0x7a6a5aa8U, 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, - 0xf00f9344U, 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU, - 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 0x67dd4accU, - 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 0xd6d6a3e8U, 0xa1d1937eU, - 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, - 0xd80d2bdaU, 0xaf0a1b4cU, 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, - 0x316e8eefU, 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U, - 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 0xb2bd0b28U, - 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 0x2cd99e8bU, 0x5bdeae1dU, - 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, - 0x72076785U, 0x05005713U, 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, - 0x92d28e9bU, 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U, - 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 0x18b74777U, - 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 0x8f659effU, 0xf862ae69U, - 0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, - 0xa7672661U, 0xd06016f7U, 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, - 0x40df0b66U, 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U, - 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 0xcdd70693U, - 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U, - 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU -}; - -/* - * Check upstream parameters and mark it whether valid or dead - */ -static void -check_upstream (struct upstream *up, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) -{ - if (up->dead) { - if (now - up->time >= revive_timeout) { - U_WLOCK (); - up->dead = 0; - up->errors = 0; - up->time = 0; - up->weight = up->priority; - U_UNLOCK (); - } - } - else { - if (now - up->time >= error_timeout && up->errors >= max_errors) { - U_WLOCK (); - up->dead = 1; - up->time = now; - up->weight = 0; - U_UNLOCK (); - } - } -} - -/* - * Call this function after failed upstream request - */ -void -upstream_fail (struct upstream *up, time_t now) -{ - if (up->time != 0) { - up->errors++; - } - else { - U_WLOCK (); - up->time = now; - up->errors++; - U_UNLOCK (); - } -} - -/* - * Call this function after successfull upstream request - */ -void -upstream_ok (struct upstream *up, time_t now) -{ - if (up->errors != 0) { - U_WLOCK (); - up->errors = 0; - up->time = 0; - U_UNLOCK (); - } - - up->weight--; -} - -/* - * Mark all upstreams as active. This function is used when all upstreams are marked as inactive - */ -void -revive_all_upstreams (void *ups, size_t members, size_t msize) -{ - guint i; - struct upstream *cur; - guchar *p; - - U_WLOCK (); - p = ups; - for (i = 0; i < members; i++) { - cur = (struct upstream *)p; - cur->time = 0; - cur->errors = 0; - cur->dead = 0; - cur->weight = cur->priority; - p += msize; - } - U_UNLOCK (); -} - -/* - * Scan all upstreams for errors and mark upstreams dead or alive depends on conditions, - * return number of alive upstreams - */ -static gint -rescan_upstreams (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors) -{ - guint i, alive; - struct upstream *cur; - guchar *p; - - /* Recheck all upstreams */ - p = ups; - alive = members; - for (i = 0; i < members; i++) { - cur = (struct upstream *)p; - check_upstream (cur, now, error_timeout, revive_timeout, max_errors); - alive -= cur->dead; - p += msize; - } - - /* All upstreams are dead */ - if (alive == 0) { - revive_all_upstreams (ups, members, msize); - alive = members; - } - - - return alive; - -} - -/* Return alive upstream by its number */ -static struct upstream * -get_upstream_by_number (void *ups, size_t members, size_t msize, gint selected) -{ - guint i; - u_char *p, *c; - struct upstream *cur; - - i = 0; - p = ups; - c = ups; - U_RLOCK (); - for (;;) { - /* Out of range, return NULL */ - if (p > c + members * msize) { - break; - } - - cur = (struct upstream *)p; - p += msize; - - if (cur->dead) { - /* Skip inactive upstreams */ - continue; - } - /* Return selected upstream */ - if ((gint)i == selected) { - U_UNLOCK (); - return cur; - } - i++; - } - U_UNLOCK (); - - /* Error */ - return NULL; - -} - -/* - * Get hash key for specified key (perl hash) - */ -static guint32 -get_hash_for_key (guint32 hash, const gchar *key, size_t keylen) -{ - guint32 h, index; - const gchar *end = key + keylen; - - h = ~hash; - - if (end != key) { - while (key < end) { - index = (h ^ (u_char) * key) & 0x000000ffU; - h = (h >> 8) ^ crc32lookup[index]; - ++key; - } - } - else { - while (*key) { - index = (h ^ (u_char) * key) & 0x000000ffU; - h = (h >> 8) ^ crc32lookup[index]; - ++key; - } - } - - return (~h); -} - -/* - * Recheck all upstreams and return random active upstream - */ -struct upstream * -get_random_upstream (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors) -{ - gint alive, selected; - - alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); - selected = rand () % alive; - - return get_upstream_by_number (ups, members, msize, selected); -} - -/* - * Return upstream by hash, that is calculated from active upstreams number - */ -struct upstream * -get_upstream_by_hash (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen) -{ - gint alive, tries = 0, r; - guint32 h = 0, ht; - gchar *p, numbuf[4]; - struct upstream *cur; - - alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); - - if (alive == 0) { - return NULL; - } - - h = get_hash_for_key (0, key, keylen); -#ifdef HASH_COMPAT - h = (h >> 16) & 0x7fff; -#endif - h %= members; - - for (;;) { - p = (gchar *)ups + msize * h; - cur = (struct upstream *)p; - if (!cur->dead) { - break; - } - r = snprintf (numbuf, sizeof (numbuf), "%d", tries); - ht = get_hash_for_key (0, numbuf, r); - ht = get_hash_for_key (ht, key, keylen); -#ifdef HASH_COMPAT - h += (ht >> 16) & 0x7fff; -#else - h += ht; -#endif - h %= members; - tries++; - if (tries > MAX_TRIES) { - return NULL; - } - } - - U_RLOCK (); - p = ups; - U_UNLOCK (); - return cur; -} - -/* - * Recheck all upstreams and return upstream in round-robin order according to weight and priority - */ -struct upstream * -get_upstream_round_robin (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors) -{ - guint max_weight, i; - struct upstream *cur, *selected = NULL; - u_char *p; - - /* Recheck all upstreams */ - (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); - - p = ups; - max_weight = 0; - selected = (struct upstream *)p; - U_RLOCK (); - for (i = 0; i < members; i++) { - cur = (struct upstream *)p; - if (!cur->dead) { - if (max_weight < (guint)cur->weight) { - max_weight = cur->weight; - selected = cur; - } - } - p += msize; - } - U_UNLOCK (); - - if (max_weight == 0) { - p = ups; - U_WLOCK (); - for (i = 0; i < members; i++) { - cur = (struct upstream *)p; - cur->weight = cur->priority; - if (!cur->dead) { - if (max_weight < cur->priority) { - max_weight = cur->priority; - selected = cur; - } - } - p += msize; - } - U_UNLOCK (); - } - - return selected; -} - -/* - * Recheck all upstreams and return upstream in round-robin order according to only priority (master-slaves) - */ -struct upstream * -get_upstream_master_slave (void *ups, size_t members, size_t msize, time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors) -{ - guint max_weight, i; - struct upstream *cur, *selected = NULL; - u_char *p; - - /* Recheck all upstreams */ - (void)rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); - - p = ups; - max_weight = 0; - selected = (struct upstream *)p; - U_RLOCK (); - for (i = 0; i < members; i++) { - cur = (struct upstream *)p; - if (!cur->dead) { - if (max_weight < cur->priority) { - max_weight = cur->priority; - selected = cur; - } - } - p += msize; - } - U_UNLOCK (); - - return selected; -} - -/* - * Ketama manipulation functions - */ - -static gint -ketama_sort_cmp (const void *a1, const void *a2) -{ - return *((guint32 *) a1) - *((guint32 *) a2); -} - -/* - * Add ketama points for specified upstream - */ -gint -upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints) -{ - guint32 h = 0; - gchar tmp[4]; - guint i; - - /* Allocate ketama points array */ - if (up->ketama_points == NULL) { - up->ketama_points_size = keypoints; - up->ketama_points = malloc (sizeof (guint32) * up->ketama_points_size); - if (up->ketama_points == NULL) { - return -1; - } - } - - h = get_hash_for_key (h, up_key, keylen); - - for (i = 0; i < keypoints; i++) { - tmp[0] = i & 0xff; - tmp[1] = (i >> 8) & 0xff; - tmp[2] = (i >> 16) & 0xff; - tmp[3] = (i >> 24) & 0xff; - - h = get_hash_for_key (h, tmp, sizeof (tmp) * sizeof (gchar)); - up->ketama_points[i] = h; - } - /* Keep points sorted */ - qsort (up->ketama_points, keypoints, sizeof (guint32), ketama_sort_cmp); - - return 0; -} - -/* - * Return upstream by hash and find nearest ketama point in some server - */ -struct upstream * -get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, - time_t now, time_t error_timeout, time_t revive_timeout, size_t max_errors, const gchar *key, size_t keylen) -{ - guint alive, i; - guint32 h = 0, step, middle, d, min_diff = UINT_MAX; - gchar *p; - struct upstream *cur = NULL, *nearest = NULL; - - alive = rescan_upstreams (ups, members, msize, now, error_timeout, revive_timeout, max_errors); - - if (alive == 0) { - return NULL; - } - - h = get_hash_for_key (h, key, keylen); - - U_RLOCK (); - p = ups; - nearest = (struct upstream *)p; - for (i = 0; i < members; i++) { - cur = (struct upstream *)p; - if (!cur->dead && cur->ketama_points != NULL) { - /* Find nearest ketama point for this key */ - step = cur->ketama_points_size / 2; - middle = step; - while (step != 1) { - d = cur->ketama_points[middle] - h; - if (abs (d) < (gint)min_diff) { - min_diff = abs (d); - nearest = cur; - } - step /= 2; - if (d > 0) { - middle -= step; - } - else { - middle += step; - } - } - } - } - U_UNLOCK (); - return nearest; -} - -#undef U_LOCK -#undef U_UNLOCK -/* - * vi:ts=4 - */ diff --git a/src/upstream.h b/src/upstream.h deleted file mode 100644 index da0a00013..000000000 --- a/src/upstream.h +++ /dev/null @@ -1,127 +0,0 @@ -#ifndef UPSTREAM_H -#define UPSTREAM_H - -#include -#include - -/** - * Structure of generic upstream - */ -struct upstream { - guint errors; /**< Errors for this upstream */ - time_t time; /**< Time of marking */ - guint dead; /**< Dead flag */ - guint priority; /**< Fixed priority */ - gint16 weight; /**< Dynamic weight */ - guint32 *ketama_points; /**< Ketama points array */ - size_t ketama_points_size; /**< Ketama array size */ -}; - -/** - * Upstream error logic - * 1. During error time we count upstream_ok and upstream_fail - * 2. If failcount is more then maxerrors then we mark upstream as unavailable for dead time - * 3. After dead time we mark upstream as alive and go to the step 1 - * 4. If all upstreams are dead, marks every upstream as alive - */ - -/** - * Add an error to an upstream - */ -void upstream_fail (struct upstream *up, time_t now); - -/** - * Increase upstream successes count - */ -void upstream_ok (struct upstream *up, time_t now); - -/** - * Make all upstreams alive - */ -void revive_all_upstreams (void *ups, size_t members, size_t msize); - -/** - * Add ketama points for upstream - */ -gint upstream_ketama_add (struct upstream *up, gchar *up_key, size_t keylen, size_t keypoints); - -/** - * Get a random upstream from array of upstreams - * @param ups array of structures that contains struct upstream as their first element - * @param members number of elements in array - * @param msize size of each member - * @param now current time - * @param error_timeout time during which we are counting errors - * @param revive_timeout time during which we counts upstream dead - * @param max_errors maximum errors during error_timeout to mark upstream dead - */ -struct upstream* get_random_upstream (void *ups, size_t members, size_t msize, - time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors); - -/** - * Get upstream based on hash from array of upstreams - * @param ups array of structures that contains struct upstream as their first element - * @param members number of elements in array - * @param msize size of each member - * @param now current time - * @param error_timeout time during which we are counting errors - * @param revive_timeout time during which we counts upstream dead - * @param max_errors maximum errors during error_timeout to mark upstream dead - * @param key key for hashing - * @param keylen length of the key - */ -struct upstream* get_upstream_by_hash (void *ups, size_t members, size_t msize, - time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors, - const gchar *key, size_t keylen); - -/** - * Get an upstream from array of upstreams based on its current weight - * @param ups array of structures that contains struct upstream as their first element - * @param members number of elements in array - * @param msize size of each member - * @param now current time - * @param error_timeout time during which we are counting errors - * @param revive_timeout time during which we counts upstream dead - * @param max_errors maximum errors during error_timeout to mark upstream dead - */ -struct upstream* get_upstream_round_robin (void *ups, size_t members, size_t msize, - time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors); - -/** - * Get upstream based on hash from array of upstreams, this functions is using ketama algorithm - * @param ups array of structures that contains struct upstream as their first element - * @param members number of elements in array - * @param msize size of each member - * @param now current time - * @param error_timeout time during which we are counting errors - * @param revive_timeout time during which we counts upstream dead - * @param max_errors maximum errors during error_timeout to mark upstream dead - * @param key key for hashing - * @param keylen length of the key - */ -struct upstream* get_upstream_by_hash_ketama (void *ups, size_t members, size_t msize, time_t now, - time_t error_timeout, time_t revive_timeout, size_t max_errors, - const gchar *key, size_t keylen); - -/** - * Get an upstream from array of upstreams based on its current priority (not weight) - * @param ups array of structures that contains struct upstream as their first element - * @param members number of elements in array - * @param msize size of each member - * @param now current time - * @param error_timeout time during which we are counting errors - * @param revive_timeout time during which we counts upstream dead - * @param max_errors maximum errors during error_timeout to mark upstream dead - */ -struct upstream* get_upstream_master_slave (void *ups, size_t members, size_t msize, - time_t now, time_t error_timeout, - time_t revive_timeout, size_t max_errors); - - -#endif /* UPSTREAM_H */ -/* - * vi:ts=4 - */ diff --git a/src/url.c b/src/url.c deleted file mode 100644 index c4313e8a9..000000000 --- a/src/url.c +++ /dev/null @@ -1,1620 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "url.h" -#include "util.h" -#include "fstring.h" -#include "main.h" -#include "message.h" -#include "trie.h" - -#define POST_CHAR 1 -#define POST_CHAR_S "\001" - -/* Tcp port range */ -#define LOWEST_PORT 0 -#define HIGHEST_PORT 65535 - -#define uri_port_is_valid(port) \ - (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) - -struct _proto { - guchar *name; - gint port; - uintptr_t *unused; - guint need_slashes:1; - guint need_slash_after_host:1; - guint free_syntax:1; - guint need_ssl:1; -}; - -typedef struct url_match_s { - const gchar *m_begin; - gsize m_len; - const gchar *pattern; - const gchar *prefix; - gboolean add_prefix; -} url_match_t; - -#define URL_FLAG_NOHTML 0x1 -#define URL_FLAG_STRICT_MATCH 0x2 - -struct url_matcher { - const gchar *pattern; - const gchar *prefix; - gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - gint flags; -}; - -static gboolean url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - -static gboolean url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - -static gboolean url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - -static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); -static gboolean url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); - -struct url_matcher matchers[] = { - /* Common prefixes */ - { "file://", "", url_file_start, url_file_end, 0 }, - { "ftp://", "", url_web_start, url_web_end, 0 }, - { "sftp://", "", url_web_start, url_web_end, 0 }, - { "http://", "", url_web_start, url_web_end, 0 }, - { "https://", "", url_web_start, url_web_end, 0 }, - { "news://", "", url_web_start, url_web_end, 0 }, - { "nntp://", "", url_web_start, url_web_end, 0 }, - { "telnet://", "", url_web_start, url_web_end, 0 }, - { "webcal://", "", url_web_start, url_web_end, 0 }, - { "mailto://", "", url_email_start, url_email_end, 0 }, - { "callto://", "", url_web_start, url_web_end, 0 }, - { "h323:", "", url_web_start, url_web_end, 0 }, - { "sip:", "", url_web_start, url_web_end, 0 }, - { "www.", "http://", url_web_start, url_web_end, 0 }, - { "ftp.", "ftp://", url_web_start, url_web_end, URL_FLAG_NOHTML }, - /* TLD domains parts */ - { ".ac", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ad", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ae", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".aero", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".af", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ag", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ai", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".al", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".am", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".an", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ao", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".aq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ar", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".arpa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".as", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".asia", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".at", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".au", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".aw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ax", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".az", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ba", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".be", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".biz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".br", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".by", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".bz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ca", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cat", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ch", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ci", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ck", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".co", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".com", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".coop", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".cz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".de", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".do", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".dz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ec", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".edu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ee", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".eg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".er", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".es", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".et", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".eu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".fr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ga", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ge", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gov", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".gy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ht", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".hu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".id", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ie", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".il", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".im", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".in", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".info", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".int", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".io", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".iq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ir", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".is", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".it", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".je", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jobs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".jp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ke", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ki", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".km", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ky", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".kz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".la", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".li", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ls", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".lv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ly", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ma", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".md", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".me", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mil", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ml", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mobi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ms", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".museum", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".my", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".mz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".na", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".name", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ne", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".net", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ng", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ni", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".no", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".np", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".nz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".om", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".org", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pe", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ph", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ps", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".pw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".py", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".qa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".re", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".rs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ru", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".rw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".se", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".si", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".so", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".st", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".su", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".sz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".td", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".th", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".to", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".travel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".tz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ua", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ug", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".uk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".us", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".uy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".uz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".va", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ve", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".vu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".wf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ws", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".xxx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".ye", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".yt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".za", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".zm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - { ".zw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, - /* Likely emails */ - { "@", "mailto://",url_email_start, url_email_end, URL_FLAG_NOHTML } -}; - -struct url_match_scanner { - struct url_matcher *matchers; - gsize matchers_count; - rspamd_trie_t *patterns; -}; - -struct url_match_scanner *url_scanner = NULL; - -static const struct _proto protocol_backends[] = { - {"file", 0, NULL, 1, 0, 0, 0}, - {"ftp", 21, NULL, 1, 0, 0, 0}, - {"http", 80, NULL, 1, 0, 0, 0}, - {"https", 443, NULL, 1, 0, 0, 1}, - {"mailto", 25, NULL, 1, 0, 0, 0}, - /* Keep these last! */ - {NULL, 0, NULL, 0, 0, 1, 0} -}; - -/* Convert an ASCII hex digit to the corresponding number between 0 - and 15. H should be a hexadecimal digit that satisfies isxdigit; - otherwise, the result is undefined. */ -#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + 10) -#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2)) -/* The reverse of the above: convert a number in the [0, 16) range to - the ASCII representation of the corresponding hexadecimal digit. - `+ 0' is there so you can't accidentally use it as an lvalue. */ -#define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0) -#define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0) - -static guchar url_scanner_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160, - 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128, - 160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, - 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192, - 128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, - 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -enum { - IS_CTRL = (1 << 0), - IS_ALPHA = (1 << 1), - IS_DIGIT = (1 << 2), - IS_LWSP = (1 << 3), - IS_SPACE = (1 << 4), - IS_SPECIAL = (1 << 5), - IS_DOMAIN = (1 << 6), - IS_URLSAFE = (1 << 7) -}; - -#define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0) -#define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0) -#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0) -#define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0) -#define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0) -#define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0) -#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0) - - -const gchar * -url_strerror (enum uri_errno err) -{ - switch (err) { - case URI_ERRNO_OK: - return "Parsing went well"; - case URI_ERRNO_EMPTY: - return "The URI string was empty"; - case URI_ERRNO_INVALID_PROTOCOL: - return "No protocol was found"; - case URI_ERRNO_NO_SLASHES: - return "Slashes after protocol missing"; - case URI_ERRNO_TOO_MANY_SLASHES: - return "Too many slashes after protocol"; - case URI_ERRNO_TRAILING_DOTS: - return "'.' after host"; - case URI_ERRNO_NO_HOST: - return "Host part is missing"; - case URI_ERRNO_NO_PORT_COLON: - return "':' after host without port"; - case URI_ERRNO_NO_HOST_SLASH: - return "Slash after host missing"; - case URI_ERRNO_IPV6_SECURITY: - return "IPv6 security bug detected"; - case URI_ERRNO_INVALID_PORT: - return "Port number is bad"; - case URI_ERRNO_INVALID_PORT_RANGE: - return "Port number is not within 0-65535"; - } - return NULL; -} - -static gint -check_uri_file (gchar *name) -{ - static const gchar chars[] = POST_CHAR_S "#?"; - - return strcspn (name, chars); -} - -static gint -url_init (void) -{ - guint i; - gchar patbuf[128]; - - if (url_scanner == NULL) { - url_scanner = g_malloc (sizeof (struct url_match_scanner)); - url_scanner->matchers = matchers; - url_scanner->matchers_count = G_N_ELEMENTS (matchers); - url_scanner->patterns = rspamd_trie_create (TRUE); - for (i = 0; i < url_scanner->matchers_count; i ++) { - if (matchers[i].flags & URL_FLAG_STRICT_MATCH) { - /* Insert more specific patterns */ - - /* some.tld/ */ - rspamd_snprintf (patbuf, sizeof (patbuf), "%s/", matchers[i].pattern); - rspamd_trie_insert (url_scanner->patterns, patbuf, i); - /* some.tld */ - rspamd_snprintf (patbuf, sizeof (patbuf), "%s ", matchers[i].pattern); - rspamd_trie_insert (url_scanner->patterns, patbuf, i); - /* some.tld: */ - rspamd_snprintf (patbuf, sizeof (patbuf), "%s:", matchers[i].pattern); - rspamd_trie_insert (url_scanner->patterns, patbuf, i); - } - else { - rspamd_trie_insert (url_scanner->patterns, matchers[i].pattern, i); - } - } - } - - return 0; -} - -enum protocol -get_protocol (gchar *name, gint namelen) -{ - /* These are really enum protocol values but can take on negative - * values and since 0 <= -1 for enum values it's better to use clean - * integer type. */ - gint start, end; - enum protocol protocol; - guchar *pname; - gint pnamelen, minlen, compare; - - /* Almost dichotomic search is used here */ - /* Starting at the HTTP entry which is the most common that will make - * file and NNTP the next entries checked and amongst the third checks - * are proxy and FTP. */ - start = 0; - end = PROTOCOL_UNKNOWN - 1; - protocol = PROTOCOL_HTTP; - - while (start <= end) { - pname = protocol_backends[protocol].name; - pnamelen = strlen (pname); - minlen = MIN (pnamelen, namelen); - compare = g_ascii_strncasecmp (pname, name, minlen); - - if (compare == 0) { - if (pnamelen == namelen) - return protocol; - - /* If the current protocol name is longer than the - * protocol name being searched for move @end else move - * @start. */ - compare = pnamelen > namelen ? 1 : -1; - } - - if (compare > 0) - end = protocol - 1; - else - start = protocol + 1; - - protocol = (start + end) / 2; - } - - return PROTOCOL_UNKNOWN; -} - - -gint -get_protocol_port (enum protocol protocol) -{ - return protocol_backends[protocol].port; -} - -gint -get_protocol_need_slashes (enum protocol protocol) -{ - return protocol_backends[protocol].need_slashes; -} - -gint -get_protocol_need_slash_after_host (enum protocol protocol) -{ - return protocol_backends[protocol].need_slash_after_host; -} - -gint -get_protocol_free_syntax (enum protocol protocol) -{ - return protocol_backends[protocol].free_syntax; -} - -static gint -get_protocol_length (const gchar *url) -{ - gchar *end = (gchar *)url; - - /* Seek the end of the protocol name if any. */ - /* RFC1738: - * scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] - * (but per its recommendations we accept "upalpha" too) */ - while (g_ascii_isalnum (*end) || *end == '+' || *end == '-' || *end == '.') - end++; - - /* Also return 0 if there's no protocol name (@end == @url). */ - return (*end == ':') ? end - url : 0; -} - - -/* - * Calcualte new length of unescaped hostlen - */ -static guint -url_calculate_escaped_hostlen (gchar *host, guint hostlen) -{ - guint i, result = hostlen; - gchar *p = host, c; - - for (i = 0; i < hostlen; i++, p++) { - if (*p == '%' && g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) { - c = X2DIGITS_TO_NUM (*(p + 1), *(p + 2)); - if (c != '\0') { - result -= 2; - } - } - } - - return result; -} - -/* URL-unescape the string S. - - This is done by transforming the sequences "%HH" to the character - represented by the hexadecimal digits HH. If % is not followed by - two hexadecimal digits, it is inserted literally. - - The transformation is done in place. If you need the original - string intact, make a copy before calling this function. */ - -static void -url_unescape (gchar *s) -{ - gchar *t = s; /* t - tortoise */ - gchar *h = s; /* h - hare */ - - for (; *h; h++, t++) { - if (*h != '%') { - copychar: - *t = *h; - } - else { - gchar c; - /* Do nothing if '%' is not followed by two hex digits. */ - if (!h[1] || !h[2] || !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2]))) - goto copychar; - c = X2DIGITS_TO_NUM (h[1], h[2]); - /* Don't unescape %00 because there is no way to insert it - * into a C string without effectively truncating it. */ - if (c == '\0') - goto copychar; - *t = c; - h += 2; - } - } - *t = '\0'; -} - -static void -url_strip (gchar *s) -{ - gchar *t = s; /* t - tortoise */ - gchar *h = s; /* h - hare */ - - while (*h) { - if (g_ascii_isgraph (*h)) { - *t = *h; - t++; - } - h++; - } - *t = '\0'; -} - -static gchar * -url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool) -{ - const gchar *p1; - gchar *p2, *newstr; - gint newlen; - gint addition = 0; - - for (p1 = s; *p1; p1++) - if (!is_urlsafe (*p1)) { - addition += 2; /* Two more characters (hex digits) */ - } - - if (!addition) { - if (allow_passthrough) { - return (gchar *)s; - } - else { - return rspamd_mempool_strdup (pool, s); - } - } - - newlen = (p1 - s) + addition; - newstr = (gchar *)rspamd_mempool_alloc (pool, newlen + 1); - - p1 = s; - p2 = newstr; - while (*p1) { - /* Quote the characters that match the test mask. */ - if (!is_urlsafe (*p1)) { - guchar c = *p1++; - *p2++ = '%'; - *p2++ = XNUM_TO_DIGIT (c >> 4); - *p2++ = XNUM_TO_DIGIT (c & 0xf); - } - else - *p2++ = *p1++; - } - *p2 = '\0'; - - return newstr; -} - -/* URL-escape the unsafe characters (see urlchr_table) in a given - string, returning a freshly allocated string. */ - -gchar * -url_escape (const gchar *s, rspamd_mempool_t * pool) -{ - return url_escape_1 (s, 0, pool); -} - -/* Decide whether the gchar at position P needs to be encoded. (It is - not enough to pass a single gchar *P because the function may need - to inspect the surrounding context.) - - Return 1 if the gchar should be escaped as %XX, 0 otherwise. */ - -static inline gboolean -char_needs_escaping (const gchar *p) -{ - if (*p == '%') { - if (g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2))) { - return FALSE; - } - else { - return TRUE; - } - } - else if (! is_urlsafe (*p)) { - return TRUE; - } - return FALSE; -} - -/* Translate a %-escaped (but possibly non-conformant) input string S - into a %-escaped (and conformant) output string. -*/ - -static gchar * -reencode_escapes (gchar *s, rspamd_mempool_t * pool) -{ - const gchar *p1; - gchar *newstr, *p2; - gint oldlen, newlen; - - gint encode_count = 0; - - /* First pass: inspect the string to see if there's anything to do, - and to calculate the new length. */ - for (p1 = s; *p1; p1++) - if (char_needs_escaping (p1)) - ++encode_count; - - if (!encode_count) { - /* The string is good as it is. */ - return s; - } - - oldlen = p1 - s; - /* Each encoding adds two characters (hex digits). */ - newlen = oldlen + 2 * encode_count; - newstr = rspamd_mempool_alloc (pool, newlen + 1); - - /* Second pass: copy the string to the destination address, encoding - chars when needed. */ - p1 = s; - p2 = newstr; - - while (*p1) - if (char_needs_escaping (p1)) { - guchar c = *p1++; - *p2++ = '%'; - *p2++ = XNUM_TO_DIGIT (c >> 4); - *p2++ = XNUM_TO_DIGIT (c & 0xf); - } - else { - *p2++ = *p1++; - } - - *p2 = '\0'; - return newstr; -} - -/* Unescape CHR in an otherwise escaped STR. Used to selectively - escaping of certain characters, such as "/" and ":". Returns a - count of unescaped chars. */ - -static void -unescape_single_char (gchar *str, gchar chr) -{ - const gchar c1 = XNUM_TO_DIGIT (chr >> 4); - const gchar c2 = XNUM_TO_DIGIT (chr & 0xf); - gchar *h = str; /* hare */ - gchar *t = str; /* tortoise */ - - for (; *h; h++, t++) { - if (h[0] == '%' && h[1] == c1 && h[2] == c2) { - *t = chr; - h += 2; - } - else { - *t = *h; - } - } - *t = '\0'; -} - - -/* - * Resolve "." and ".." elements of PATH by destructively modifying - * PATH and return non-zero if PATH has been modified, zero otherwise. - */ - -static gboolean -path_simplify (gchar *path) -{ - gchar *h = path; /* hare */ - gchar *t = path; /* tortoise */ - gchar *beg = path; /* boundary for backing the tortoise */ - gchar *end = path + strlen (path); - - while (h < end) { - /* Hare should be at the beginning of a path element. */ - if (h[0] == '.' && (h[1] == '/' || h[1] == '\0')) { - /* Ignore "./". */ - h += 2; - } - else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0')) { - /* Handle "../" by retreating the tortoise by one path - element -- but not past beginning. */ - if (t > beg) { - /* Move backwards until T hits the beginning of the - previous path element or the beginning of path. */ - for (--t; t > beg && t[-1] != '/'; t--); - } - else { - /* If we're at the beginning, copy the "../" literally - move the beginning so a later ".." doesn't remove - it. */ - beg = t + 3; - goto regular; - } - h += 3; - } - else { - regular: - /* A regular path element. If H hasn't advanced past T, - simply skip to the next path element. Otherwise, copy - the path element until the next slash. */ - if (t == h) { - /* Skip the path element, including the slash. */ - while (h < end && *h != '/') - t++, h++; - if (h < end) - t++, h++; - } - else { - /* Copy the path element, including the final slash. */ - while (h < end && *h != '/') - *t++ = *h++; - if (h < end) - *t++ = *h++; - } - } - } - - if (t != h) - *t = '\0'; - - return t != h; -} - -enum uri_errno -parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) -{ - guchar *prefix_end, *host_end, *p; - guchar *lbracket, *rbracket; - gint datalen, n, addrlen; - guchar *frag_or_post, *user_end, *port_end; - - memset (uri, 0, sizeof (*uri)); - - /* Nothing to do for an empty url. */ - if (!*uristring) - return URI_ERRNO_EMPTY; - - uri->string = reencode_escapes (uristring, pool); - msg_debug ("reencoding escapes in original url: '%s'", struri (uri)); - uri->protocollen = get_protocol_length (struri (uri)); - - /* Assume http as default protocol */ - if (!uri->protocollen || (uri->protocol = get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) { - /* Make exception for numeric urls */ - p = uri->string; - while (*p && (g_ascii_isalnum (*p) || *p == ':')) { - p ++; - } - if (*p == '\0') { - return URI_ERRNO_INVALID_PROTOCOL; - } - p = g_strconcat ("http://", uri->string, NULL); - uri->string = rspamd_mempool_strdup (pool, p); - g_free (p); - uri->protocol = PROTOCOL_HTTP; - prefix_end = struri (uri) + 7; - } - else { - /* Figure out whether the protocol is known */ - msg_debug ("getting protocol from url: %d", uri->protocol); - - prefix_end = struri (uri) + uri->protocollen; /* ':' */ - - /* Check if there's a digit after the protocol name. */ - if (g_ascii_isdigit (*prefix_end)) { - p = struri (uri); - uri->ip_family = p[uri->protocollen] - '0'; - prefix_end++; - } - if (*prefix_end != ':') { - msg_debug ("invalid protocol in uri"); - return URI_ERRNO_INVALID_PROTOCOL; - } - prefix_end++; - - /* Skip slashes */ - - if (prefix_end[0] == '/' && prefix_end[1] == '/') { - if (prefix_end[2] == '/') { - msg_debug ("too many '/' in uri"); - return URI_ERRNO_TOO_MANY_SLASHES; - } - - prefix_end += 2; - - } - else { - msg_debug ("no '/' in uri"); - return URI_ERRNO_NO_SLASHES; - } - } - - if (get_protocol_free_syntax (uri->protocol)) { - uri->data = prefix_end; - uri->datalen = strlen (prefix_end); - return URI_ERRNO_OK; - - } - else if (uri->protocol == PROTOCOL_FILE) { - datalen = check_uri_file (prefix_end); - frag_or_post = prefix_end + datalen; - - /* Extract the fragment part. */ - if (datalen >= 0) { - if (*frag_or_post == '#') { - uri->fragment = frag_or_post + 1; - uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S); - frag_or_post = uri->fragment + uri->fragmentlen; - } - if (*frag_or_post == POST_CHAR) { - uri->post = frag_or_post + 1; - } - } - else { - datalen = strlen (prefix_end); - } - - uri->data = prefix_end; - uri->datalen = datalen; - - return URI_ERRNO_OK; - } - - /* Isolate host */ - - /* Get brackets enclosing IPv6 address */ - lbracket = strchr (prefix_end, '['); - if (lbracket) { - rbracket = strchr (lbracket, ']'); - /* [address] is handled only inside of hostname part (surprisingly). */ - if (rbracket && rbracket < prefix_end + strcspn (prefix_end, "/")) - uri->ipv6 = 1; - else - lbracket = rbracket = NULL; - } - else { - rbracket = NULL; - } - - /* Possibly skip auth part */ - host_end = prefix_end + strcspn (prefix_end, "@"); - - if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */ - - /* Allow '@' in the password component */ - while (strcspn (host_end + 1, "@") < strcspn (host_end + 1, "/?")) - host_end = host_end + 1 + strcspn (host_end + 1, "@"); - - user_end = strchr (prefix_end, ':'); - - if (!user_end || user_end > host_end) { - uri->user = prefix_end; - uri->userlen = host_end - prefix_end; - } - else { - uri->user = prefix_end; - uri->userlen = user_end - prefix_end; - uri->password = user_end + 1; - uri->passwordlen = host_end - user_end - 1; - } - prefix_end = host_end + 1; - } - - if (uri->ipv6 && rbracket != NULL) { - host_end = rbracket + strcspn (rbracket, ":/?"); - } - else { - host_end = prefix_end + strcspn (prefix_end, ":/?"); - } - - if (uri->ipv6) { - addrlen = rbracket - lbracket - 1; - - - uri->host = lbracket + 1; - uri->hostlen = addrlen; - } - else { - uri->host = prefix_end; - uri->hostlen = host_end - prefix_end; - - /* Trim trailing '.'s */ - if (uri->hostlen && uri->host[uri->hostlen - 1] == '.') - return URI_ERRNO_TRAILING_DOTS; - } - - if (*host_end == ':') { /* we have port here */ - port_end = host_end + 1 + strcspn (host_end + 1, "/"); - - host_end++; - - uri->port = host_end; - uri->portlen = port_end - host_end; - - if (uri->portlen == 0) - return URI_ERRNO_NO_PORT_COLON; - - /* We only use 8 bits for portlen so better check */ - if ((gint)uri->portlen != port_end - host_end) - return URI_ERRNO_INVALID_PORT; - - /* test if port is number */ - for (; host_end < port_end; host_end++) - if (!g_ascii_isdigit (*host_end)) - return URI_ERRNO_INVALID_PORT; - - /* Check valid port value, and let show an error message - * about invalid url syntax. */ - if (uri->port && uri->portlen) { - - errno = 0; - n = strtol (uri->port, NULL, 10); - if (errno || !uri_port_is_valid (n)) - return URI_ERRNO_INVALID_PORT; - } - } - - if (*host_end == '/') { - host_end++; - - } - else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != '?') { - /* The need for slash after the host component depends on the - * need for a host component. -- The dangerous mind of Jonah */ - if (!uri->hostlen) - return URI_ERRNO_NO_HOST; - - return URI_ERRNO_NO_HOST_SLASH; - } - - /* Look for #fragment or POST_CHAR */ - prefix_end = host_end + strcspn (host_end, "#" POST_CHAR_S); - uri->data = host_end; - uri->datalen = prefix_end - host_end; - - if (*prefix_end == '#') { - uri->fragment = prefix_end + 1; - uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S); - prefix_end = uri->fragment + uri->fragmentlen; - } - - if (*prefix_end == POST_CHAR) { - uri->post = prefix_end + 1; - } - - convert_to_lowercase (uri->string, uri->protocollen); - convert_to_lowercase (uri->host, uri->hostlen); - /* Decode %HH sequences in host name. This is important not so much - to support %HH sequences in host names (which other browser - don't), but to support binary characters (which will have been - converted to %HH by reencode_escapes). */ - if (strchr (uri->host, '%')) { - uri->hostlen = url_calculate_escaped_hostlen (uri->host, uri->hostlen); - } - - url_strip (struri (uri)); - url_unescape (uri->host); - - path_simplify (uri->data); - - return URI_ERRNO_OK; -} - -static const gchar url_braces[] = { - '(', ')' , - '{', '}' , - '[', ']' , - '<', '>' , - '|', '|' , - '\'', '\'' -}; - -static gboolean -is_open_brace (gchar c) -{ - if (c == '(' || - c == '{' || - c == '[' || - c == '<' || - c == '|' || - c == '\'') { - return TRUE; - } - - return FALSE; -} - -static gboolean -url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - match->m_begin = pos; - return TRUE; -} -static gboolean -url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - const gchar *p; - gchar stop; - guint i; - - p = pos + strlen (match->pattern); - stop = *p; - if (*p == '/') { - p ++; - } - - for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { - if (*p == url_braces[i]) { - stop = url_braces[i + 1]; - break; - } - } - - while (p < end && *p != stop && is_urlsafe (*p)) { - p ++; - } - - if (p == begin) { - return FALSE; - } - match->m_len = p - match->m_begin; - - return TRUE; - -} - -static gboolean -url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - const gchar *p = pos; - - /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */ - while (p >= begin) { - if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) { - p ++; - if (!g_ascii_isalnum (*p)) { - /* Urls cannot start with strange symbols */ - return FALSE; - } - match->m_begin = p; - return TRUE; - } - else if (p == begin && p != pos) { - match->m_begin = p; - return TRUE; - } - else if (*p == '.') { - if (p == begin) { - /* Urls cannot start with a dot */ - return FALSE; - } - if (!g_ascii_isalnum (p[1])) { - /* Wrong we have an invalid character after dot */ - return FALSE; - } - } - else if (*p == '/') { - /* Urls cannot contain '/' in their body */ - return FALSE; - } - p --; - } - - return FALSE; -} - -static gboolean -url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - const gchar *p; - - /* A url must be finished by tld, so it must be followed by space character */ - p = pos + strlen (match->pattern); - if (p == end || g_ascii_isspace (*p) || *p == ',') { - match->m_len = p - match->m_begin; - return TRUE; - } - else if (*p == '/' || *p == ':') { - /* Parse arguments, ports by normal way by url default function */ - p = match->m_begin; - /* Check common prefix */ - if (g_ascii_strncasecmp (p, "http://", sizeof ("http://") - 1) == 0) { - return url_web_end (begin, end, match->m_begin + sizeof ("http://") - 1, match); - } - else { - return url_web_end (begin, end, match->m_begin, match); - } - - } - return FALSE; -} - -static gboolean -url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - /* Check what we have found */ - if (pos > begin && (g_ascii_strncasecmp (pos, "www", 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { - if (!is_open_brace (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) { - return FALSE; - } - } - if (*pos == '.') { - /* Urls cannot start with . */ - return FALSE; - } - match->m_begin = pos; - - return TRUE; -} - -static gboolean -url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - const gchar *p, *c; - gchar open_brace = '\0', close_brace = '\0'; - gint brace_stack = 0; - gboolean passwd = FALSE; - guint port, i; - - p = pos + strlen (match->pattern); - for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { - if (*p == url_braces[i]) { - close_brace = url_braces[i + 1]; - open_brace = *p; - break; - } - } - - /* find the end of the domain */ - if (is_atom (*p)) { - /* might be a domain or user@domain */ - c = p; - while (p < end) { - if (!is_atom (*p)) { - break; - } - - p++; - - while (p < end && is_atom (*p)) { - p++; - } - - if ((p + 1) < end && *p == '.' && (is_atom (*(p + 1)) || *(p + 1) == '/')) { - p++; - } - } - - if (*p != '@') { - p = c; - } - else { - p++; - } - - goto domain; - } - else if (is_domain (*p) || (*p & 0x80)) { -domain: - while (p < end) { - if (!is_domain (*p) && !(*p & 0x80)) { - break; - } - - p++; - - while (p < end && (is_domain (*p) || (*p & 0x80))) { - p++; - } - - if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/' || (*(p + 1) & 0x80))) { - p++; - } - } - } - else { - return FALSE; - } - - if (p < end) { - switch (*p) { - case ':': /* we either have a port or a password */ - p++; - - if (is_digit (*p) || passwd) { - port = (*p++ - '0'); - - while (p < end && is_digit (*p) && port < 65536) { - port = (port * 10) + (*p++ - '0'); - } - - if (!passwd && (port >= 65536 || *p == '@')) { - if (p < end && *p == '@') { - /* this must be a password? */ - goto passwd; - } - else if (p < end) { - return FALSE; - } - - p--; - } - } - else { - passwd: - passwd = TRUE; - c = p; - - while (p < end && is_atom (*p)) { - p++; - } - - if ((p + 2) < end) { - if (*p == '@') { - p++; - if (is_domain (*p)) { - goto domain; - } - } - - return FALSE; - } - } - - if (p >= end || *p != '/') { - break; - } - - /* we have a '/' so there could be a path - fall through */ - case '/': /* we've detected a path component to our url */ - p++; - case '?': - while (p < end && is_urlsafe (*p)) { - if (*p == open_brace) { - brace_stack++; - } - else if (*p == close_brace) { - brace_stack--; - if (brace_stack == -1) { - break; - } - } - p++; - } - - break; - default: - break; - } - } - - /* urls are extremely unlikely to end with any - * punctuation, so strip any trailing - * punctuation off. Also strip off any closing - * double-quotes. */ - while (p > pos && strchr (",.:;?!-|}])\"", p[-1])) { - p--; - } - - match->m_len = (p - pos); - - return TRUE; -} - - -static gboolean -url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - const gchar *p; - /* Check what we have found */ - if (pos > begin && *pos == '@') { - /* Try to extract it with username */ - p = pos - 1; - while (p > begin && (is_domain (*p) || *p == '.' || *p == '_')) { - p --; - } - if (!is_domain (*p) && p != pos - 1) { - match->m_begin = p + 1; - return TRUE; - } - else if (p == begin) { - match->m_begin = p; - return TRUE; - } - } - else { - p = pos + strlen (match->pattern); - if (is_domain (*p)) { - match->m_begin = pos; - return TRUE; - } - } - return FALSE; -} - -static gboolean -url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) -{ - const gchar *p; - gboolean got_at = FALSE; - - p = pos + strlen (match->pattern); - if (*pos == '@') { - got_at = TRUE; - } - - while (p < end && (is_domain (*p) || *p == '_' - || (*p == '@' && !got_at) || - (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) { - if (*p == '@') { - got_at = TRUE; - } - p ++; - } - match->m_len = p - match->m_begin; - match->add_prefix = TRUE; - return got_at; -} - -void -url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html) -{ - gint rc; - gchar *url_str = NULL, *url_start, *url_end; - struct uri *new; - struct process_exception *ex; - gchar *p, *end, *begin; - - - if (!part->orig->data || part->orig->len == 0) { - msg_warn ("got empty text part"); - return; - } - - if (url_init () == 0) { - if (is_html) { - begin = part->orig->data; - end = begin + part->orig->len; - p = begin; - } - else { - begin = part->content->data; - end = begin + part->content->len; - p = begin; - } - while (p < end) { - if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, is_html)) { - if (url_str != NULL) { - new = rspamd_mempool_alloc0 (pool, sizeof (struct uri)); - ex = rspamd_mempool_alloc0 (pool, sizeof (struct process_exception)); - if (new != NULL) { - g_strstrip (url_str); - rc = parse_uri (new, url_str, pool); - if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && - new->hostlen > 0) { - ex->pos = url_start - begin; - ex->len = url_end - url_start; - if (new->protocol == PROTOCOL_MAILTO) { - if (new->userlen > 0) { - if (!g_tree_lookup (task->emails, new)) { - g_tree_insert (task->emails, new, new); - } - } - } - else { - if (!g_tree_lookup (task->urls, new)) { - g_tree_insert (task->urls, new, new); - } - } - part->urls_offset = g_list_prepend (part->urls_offset, ex); - } - else if (rc != URI_ERRNO_OK) { - msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); - } - } - } - } - else { - break; - } - p = url_end + 1; - } - } - /* Handle offsets of this part */ - if (part->urls_offset != NULL) { - part->urls_offset = g_list_reverse (part->urls_offset); - rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, part->urls_offset); - } -} - -gboolean -url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str, gboolean is_html) -{ - const gchar *end, *pos; - gint idx, l; - struct url_matcher *matcher; - url_match_t m; - - end = begin + len; - if (url_init () == 0) { - if ((pos = rspamd_trie_lookup (url_scanner->patterns, begin, len, &idx)) == NULL) { - return FALSE; - } - else { - matcher = &matchers[idx]; - if ((matcher->flags & URL_FLAG_NOHTML) && is_html) { - /* Do not try to match non-html like urls in html texts */ - return FALSE; - } - m.pattern = matcher->pattern; - m.prefix = matcher->prefix; - m.add_prefix = FALSE; - if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) { - if (m.add_prefix) { - l = m.m_len + 1 + strlen (m.prefix); - *url_str = rspamd_mempool_alloc (pool, l); - rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin); - } - else { - *url_str = rspamd_mempool_alloc (pool, m.m_len + 1); - memcpy (*url_str, m.m_begin, m.m_len); - (*url_str)[m.m_len] = '\0'; - } - if (start != NULL) { - *start = (gchar *)m.m_begin; - } - if (fin != NULL) { - *fin = (gchar *)m.m_begin + m.m_len; - } - } - else { - *url_str = NULL; - if (start != NULL) { - *start = (gchar *)pos; - } - if (fin != NULL) { - *fin = (gchar *)pos + strlen (m.prefix); - } - } - - return TRUE; - } - } - - return FALSE; -} - -/* - * vi: ts=4 - */ diff --git a/src/url.h b/src/url.h deleted file mode 100644 index 60535ba5c..000000000 --- a/src/url.h +++ /dev/null @@ -1,111 +0,0 @@ -/* URL check functions */ -#ifndef URL_H -#define URL_H - -#include "config.h" -#include "mem_pool.h" - -struct rspamd_task; -struct mime_text_part; - -struct uri { - /* The start of the uri (and thus start of the protocol string). */ - gchar *string; - - /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */ - gint protocol; /* enum protocol */ - - gint ip_family; - - gchar *user; - gchar *password; - gchar *host; - gchar *port; - /* @data can contain both the path and query uri fields. - * It can never be NULL but can have zero length. */ - gchar *data; - gchar *fragment; - /* @post can contain some special encoded form data, used internally - * to make form data handling more efficient. The data is marked by - * POST_CHAR in the uri string. */ - gchar *post; - - struct uri *phished_url; - - /* @protocollen should only be usable if @protocol is either - * PROTOCOL_USER or an uri string should be composed. */ - guint protocollen; - guint userlen; - guint passwordlen; - guint hostlen; - guint portlen; - guint datalen; - guint fragmentlen; - - /* Flags */ - gboolean ipv6; /* URI contains IPv6 host */ - gboolean form; /* URI originated from form */ - gboolean is_phished; /* URI maybe phishing */ -}; - -enum uri_errno { - URI_ERRNO_OK, /* Parsing went well */ - URI_ERRNO_EMPTY, /* The URI string was empty */ - URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */ - URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */ - URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */ - URI_ERRNO_TRAILING_DOTS, /* '.' after host */ - URI_ERRNO_NO_HOST, /* Host part is missing */ - URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */ - URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */ - URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */ - URI_ERRNO_INVALID_PORT, /* Port number is bad */ - URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */ -}; - -enum protocol { - PROTOCOL_FILE, - PROTOCOL_FTP, - PROTOCOL_HTTP, - PROTOCOL_HTTPS, - PROTOCOL_MAILTO, - PROTOCOL_UNKNOWN -}; - -#define struri(uri) ((uri)->string) - -/* - * Parse urls inside text - * @param pool memory pool - * @param task task object - * @param part current text part - * @param is_html turn on html euristic - */ -void url_parse_text (rspamd_mempool_t *pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html); - -/* - * Parse a single url into an uri structure - * @param pool memory pool - * @param uristring text form of url - * @param uri url object, must be pre allocated - */ -enum uri_errno parse_uri(struct uri *uri, gchar *uristring, rspamd_mempool_t *pool); - -/* - * Try to extract url from a text - * @param pool memory pool - * @param begin begin of text - * @param len length of text - * @param start storage for start position of url found (or NULL) - * @param end storage for end position of url found (or NULL) - * @param url_str storage for url string(or NULL) - * @return TRUE if url is found in specified text - */ -gboolean url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str, gboolean is_html); - -/* - * Return text representation of url parsing error - */ -const gchar* url_strerror (enum uri_errno err); - -#endif diff --git a/src/util.c b/src/util.c deleted file mode 100644 index 03b38e087..000000000 --- a/src/util.c +++ /dev/null @@ -1,2275 +0,0 @@ -/* - * Copyright (c) 2009-2012, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "config.h" -#include "util.h" -#include "cfg_file.h" -#include "main.h" -#include "statfile.h" -#include "filter.h" -#include "message.h" - -#ifdef HAVE_OPENSSL -#include -#include -#endif - -#ifdef HAVE_TERMIOS_H -#include -#endif -#ifdef HAVE_READPASSPHRASE_H -#include -#endif - -/* Check log messages intensity once per minute */ -#define CHECK_TIME 60 -/* More than 2 log messages per second */ -#define BUF_INTENSITY 2 -/* Default connect timeout for sync sockets */ -#define CONNECT_TIMEOUT 3 - -gint -make_socket_nonblocking (gint fd) -{ - gint ofl; - - ofl = fcntl (fd, F_GETFL, 0); - - if (fcntl (fd, F_SETFL, ofl | O_NONBLOCK) == -1) { - msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); - return -1; - } - return 0; -} - -gint -make_socket_blocking (gint fd) -{ - gint ofl; - - ofl = fcntl (fd, F_GETFL, 0); - - if (fcntl (fd, F_SETFL, ofl & (~O_NONBLOCK)) == -1) { - msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); - return -1; - } - return 0; -} - -gint -poll_sync_socket (gint fd, gint timeout, short events) -{ - gint r; - struct pollfd fds[1]; - - fds->fd = fd; - fds->events = events; - fds->revents = 0; - while ((r = poll (fds, 1, timeout)) < 0) { - if (errno != EINTR) { - break; - } - } - - return r; -} - -static gint -make_inet_socket (gint type, struct addrinfo *addr, gboolean is_server, gboolean async, GList **list) -{ - gint fd, r, optlen, on = 1, s_error; - struct addrinfo *cur; - - cur = addr; - while (cur) { - /* Create socket */ - fd = socket (cur->ai_family, type, 0); - if (fd == -1) { - msg_warn ("socket failed: %d, '%s'", errno, strerror (errno)); - goto out; - } - - if (make_socket_nonblocking (fd) < 0) { - goto out; - } - - /* Set close on exec */ - if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) { - msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); - goto out; - } - - if (is_server) { - setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint)); -#ifdef HAVE_IPV6_V6ONLY - if (cur->ai_family == AF_INET6) { - setsockopt (fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void *)&on, sizeof (gint)); - } -#endif - r = bind (fd, cur->ai_addr, cur->ai_addrlen); - } - else { - r = connect (fd, cur->ai_addr, cur->ai_addrlen); - } - - if (r == -1) { - if (errno != EINPROGRESS) { - msg_warn ("bind/connect failed: %d, '%s'", errno, strerror (errno)); - goto out; - } - if (!async) { - /* Try to poll */ - if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) { - errno = ETIMEDOUT; - msg_warn ("bind/connect failed: timeout"); - goto out; - } - else { - /* Make synced again */ - if (make_socket_blocking (fd) < 0) { - goto out; - } - } - } - } - else { - /* Still need to check SO_ERROR on socket */ - optlen = sizeof (s_error); - getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen); - if (s_error) { - errno = s_error; - goto out; - } - } - if (list == NULL) { - /* Go out immediately */ - break; - } - else if (fd != -1) { - *list = g_list_prepend (*list, GINT_TO_POINTER (fd)); - cur = cur->ai_next; - continue; - } -out: - if (fd != -1) { - close (fd); - } - fd = -1; - cur = cur->ai_next; - } - return (fd); -} - -gint -make_tcp_socket (struct addrinfo *addr, gboolean is_server, gboolean async) -{ - return make_inet_socket (SOCK_STREAM, addr, is_server, async, NULL); -} - -gint -make_udp_socket (struct addrinfo *addr, gboolean is_server, gboolean async) -{ - return make_inet_socket (SOCK_DGRAM, addr, is_server, async, NULL); -} - -gint -make_unix_socket (const gchar *path, struct sockaddr_un *addr, gint type, gboolean is_server, gboolean async) -{ - gint fd = -1, s_error, r, optlen, serrno, on = 1; - struct stat st; - - if (path == NULL) - return -1; - - addr->sun_family = AF_UNIX; - - rspamd_strlcpy (addr->sun_path, path, sizeof (addr->sun_path)); -#ifdef FREEBSD - addr->sun_len = SUN_LEN (addr); -#endif - - if (is_server) { - /* Unlink socket if it exists already */ - if (lstat (addr->sun_path, &st) != -1) { - if (S_ISSOCK (st.st_mode)) { - if (unlink (addr->sun_path) == -1) { - msg_warn ("unlink %s failed: %d, '%s'", addr->sun_path, errno, strerror (errno)); - goto out; - } - } - else { - msg_warn ("%s is not a socket", addr->sun_path); - goto out; - } - } - } - fd = socket (PF_LOCAL, type, 0); - - if (fd == -1) { - msg_warn ("socket failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); - return -1; - } - - if (make_socket_nonblocking (fd) < 0) { - goto out; - } - - /* Set close on exec */ - if (fcntl (fd, F_SETFD, FD_CLOEXEC) == -1) { - msg_warn ("fcntl failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); - goto out; - } - if (is_server) { - setsockopt (fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof (gint)); - r = bind (fd, (struct sockaddr *)addr, SUN_LEN (addr)); - } - else { - r = connect (fd, (struct sockaddr *)addr, SUN_LEN (addr)); - } - - if (r == -1) { - if (errno != EINPROGRESS) { - msg_warn ("bind/connect failed %s: %d, '%s'", addr->sun_path, errno, strerror (errno)); - goto out; - } - if (!async) { - /* Try to poll */ - if (poll_sync_socket (fd, CONNECT_TIMEOUT * 1000, POLLOUT) <= 0) { - errno = ETIMEDOUT; - msg_warn ("bind/connect failed %s: timeout", addr->sun_path); - goto out; - } - else { - /* Make synced again */ - if (make_socket_blocking (fd) < 0) { - goto out; - } - } - } - } - else { - /* Still need to check SO_ERROR on socket */ - optlen = sizeof (s_error); - getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&s_error, &optlen); - if (s_error) { - errno = s_error; - goto out; - } - } - - - return (fd); - - out: - serrno = errno; - if (fd != -1) { - close (fd); - } - errno = serrno; - return (-1); -} - -/** - * Make a universal socket - * @param credits host, ip or path to unix socket - * @param port port (used for network sockets) - * @param async make this socket asynced - * @param is_server make this socket as server socket - * @param try_resolve try name resolution for a socket (BLOCKING) - */ -gint -make_universal_socket (const gchar *credits, guint16 port, - gint type, gboolean async, gboolean is_server, gboolean try_resolve) -{ - struct sockaddr_un un; - struct stat st; - struct addrinfo hints, *res; - gint r; - gchar portbuf[8]; - - if (*credits == '/') { - if (is_server) { - return make_unix_socket (credits, &un, type, is_server, async); - } - else { - r = stat (credits, &st); - if (r == -1) { - /* Unix socket doesn't exists it must be created first */ - errno = ENOENT; - return -1; - } - else { - if ((st.st_mode & S_IFSOCK) == 0) { - /* Path is not valid socket */ - errno = EINVAL; - return -1; - } - else { - return make_unix_socket (credits, &un, type, is_server, async); - } - } - } - } - else { - /* TCP related part */ - memset (&hints, 0, sizeof (hints)); - hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ - hints.ai_socktype = type; /* Type of the socket */ - hints.ai_flags = is_server ? AI_PASSIVE : 0; - hints.ai_protocol = 0; /* Any protocol */ - hints.ai_canonname = NULL; - hints.ai_addr = NULL; - hints.ai_next = NULL; - - if (!try_resolve) { - hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; - } - - rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port); - if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) { - r = make_inet_socket (type, res, is_server, async, NULL); - freeaddrinfo (res); - return r; - } - else { - msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r)); - return FALSE; - } - } -} - -/** - * Make universal stream socket - * @param credits host, ip or path to unix socket - * @param port port (used for network sockets) - * @param async make this socket asynced - * @param is_server make this socket as server socket - * @param try_resolve try name resolution for a socket (BLOCKING) - */ -GList* -make_universal_sockets_list (const gchar *credits, guint16 port, - gint type, gboolean async, gboolean is_server, gboolean try_resolve) -{ - struct sockaddr_un un; - struct stat st; - struct addrinfo hints, *res; - gint r, fd, serrno; - gchar portbuf[8], **strv, **cur; - GList *result = NULL, *rcur; - - strv = g_strsplit_set (credits, ",", -1); - if (strv == NULL) { - msg_err ("invalid sockets credentials: %s", credits); - return NULL; - } - cur = strv; - while (*cur != NULL) { - if (*credits == '/') { - if (is_server) { - fd = make_unix_socket (credits, &un, type, is_server, async); - } - else { - r = stat (credits, &st); - if (r == -1) { - /* Unix socket doesn't exists it must be created first */ - errno = ENOENT; - goto err; - } - else { - if ((st.st_mode & S_IFSOCK) == 0) { - /* Path is not valid socket */ - errno = EINVAL; - goto err; - } - else { - fd = make_unix_socket (credits, &un, type, is_server, async); - } - } - } - if (fd != -1) { - result = g_list_prepend (result, GINT_TO_POINTER (fd)); - } - else { - goto err; - } - } - else { - /* TCP related part */ - memset (&hints, 0, sizeof (hints)); - hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ - hints.ai_socktype = type; /* Type of the socket */ - hints.ai_flags = is_server ? AI_PASSIVE : 0; - hints.ai_protocol = 0; /* Any protocol */ - hints.ai_canonname = NULL; - hints.ai_addr = NULL; - hints.ai_next = NULL; - - if (!try_resolve) { - hints.ai_flags |= AI_NUMERICHOST | AI_NUMERICSERV; - } - - rspamd_snprintf (portbuf, sizeof (portbuf), "%d", (int)port); - if ((r = getaddrinfo (credits, portbuf, &hints, &res)) == 0) { - r = make_inet_socket (type, res, is_server, async, &result); - freeaddrinfo (res); - if (r == -1) { - goto err; - } - } - else { - msg_err ("address resolution for %s failed: %s", credits, gai_strerror (r)); - goto err; - } - } - cur ++; - } - - g_strfreev (strv); - return result; - -err: - g_strfreev (strv); - serrno = errno; - rcur = result; - while (rcur != NULL) { - fd = GPOINTER_TO_INT (rcur->data); - if (fd != -1) { - close (fd); - } - rcur = g_list_next (rcur); - } - if (result != NULL) { - g_list_free (result); - } - - errno = serrno; - return NULL; -} - -gint -make_socketpair (gint pair[2]) -{ - gint r; - - r = socketpair (AF_LOCAL, SOCK_STREAM, 0, pair); - - if (r == -1) { - msg_warn ("socketpair failed: %d, '%s'", errno, strerror (errno), pair[0], pair[1]); - return -1; - } - /* Set close on exec */ - if (fcntl (pair[0], F_SETFD, FD_CLOEXEC) == -1) { - msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); - goto out; - } - if (fcntl (pair[1], F_SETFD, FD_CLOEXEC) == -1) { - msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); - goto out; - } - - return 0; - -out: - close (pair[0]); - close (pair[1]); - return (-1); -} - -gint -write_pid (struct rspamd_main *main) -{ - pid_t pid; - - if (main->cfg->pid_file == NULL) { - return -1; - } - main->pfh = rspamd_pidfile_open (main->cfg->pid_file, 0644, &pid); - - if (main->pfh == NULL) { - return -1; - } - - if (main->is_privilleged) { - /* Force root user as owner of pid file */ -#ifdef HAVE_PIDFILE_FILENO - if (fchown (pidfile_fileno (main->pfh), 0, 0) == -1) { -#else - if (fchown (main->pfh->pf_fd, 0, 0) == -1) { -#endif - msg_err ("cannot chown of pidfile %s to 0:0 user", main->cfg->pid_file); - } - } - - rspamd_pidfile_write (main->pfh); - - return 0; -} - -#ifdef HAVE_SA_SIGINFO -void -init_signals (struct sigaction *signals, void (*sig_handler)(gint, siginfo_t *, void *)) -#else -void -init_signals (struct sigaction *signals, void (*sig_handler)(gint)) -#endif -{ - struct sigaction sigpipe_act; - /* Setting up signal handlers */ - /* SIGUSR1 - reopen config file */ - /* SIGUSR2 - worker is ready for accept */ - sigemptyset (&signals->sa_mask); - sigaddset (&signals->sa_mask, SIGTERM); - sigaddset (&signals->sa_mask, SIGINT); - sigaddset (&signals->sa_mask, SIGHUP); - sigaddset (&signals->sa_mask, SIGCHLD); - sigaddset (&signals->sa_mask, SIGUSR1); - sigaddset (&signals->sa_mask, SIGUSR2); - sigaddset (&signals->sa_mask, SIGALRM); - - -#ifdef HAVE_SA_SIGINFO - signals->sa_flags = SA_SIGINFO; - signals->sa_handler = NULL; - signals->sa_sigaction = sig_handler; -#else - signals->sa_handler = sig_handler; - signals->sa_flags = 0; -#endif - sigaction (SIGTERM, signals, NULL); - sigaction (SIGINT, signals, NULL); - sigaction (SIGHUP, signals, NULL); - sigaction (SIGCHLD, signals, NULL); - sigaction (SIGUSR1, signals, NULL); - sigaction (SIGUSR2, signals, NULL); - sigaction (SIGALRM, signals, NULL); - - /* Ignore SIGPIPE as we handle write errors manually */ - sigemptyset (&sigpipe_act.sa_mask); - sigaddset (&sigpipe_act.sa_mask, SIGPIPE); - sigpipe_act.sa_handler = SIG_IGN; - sigpipe_act.sa_flags = 0; - sigaction (SIGPIPE, &sigpipe_act, NULL); -} - -static void -pass_signal_cb (gpointer key, gpointer value, gpointer ud) -{ - struct rspamd_worker *cur = value; - gint signo = GPOINTER_TO_INT (ud); - - kill (cur->pid, signo); -} - -void -pass_signal_worker (GHashTable * workers, gint signo) -{ - g_hash_table_foreach (workers, pass_signal_cb, GINT_TO_POINTER (signo)); -} - -void -convert_to_lowercase (gchar *str, guint size) -{ - while (size--) { - *str = g_ascii_tolower (*str); - str++; - } -} - -#ifndef HAVE_SETPROCTITLE - -static gchar *title_buffer = 0; -static size_t title_buffer_size = 0; -static gchar *title_progname, *title_progname_full; - -gint -setproctitle (const gchar *fmt, ...) -{ - if (!title_buffer || !title_buffer_size) { - errno = ENOMEM; - return -1; - } - - memset (title_buffer, '\0', title_buffer_size); - - ssize_t written; - - if (fmt) { - ssize_t written2; - va_list ap; - - written = snprintf (title_buffer, title_buffer_size, "%s: ", title_progname); - if (written < 0 || (size_t) written >= title_buffer_size) - return -1; - - va_start (ap, fmt); - written2 = vsnprintf (title_buffer + written, title_buffer_size - written, fmt, ap); - va_end (ap); - if (written2 < 0 || (size_t) written2 >= title_buffer_size - written) - return -1; - } - else { - written = snprintf (title_buffer, title_buffer_size, "%s", title_progname); - if (written < 0 || (size_t) written >= title_buffer_size) - return -1; - } - - written = strlen (title_buffer); - memset (title_buffer + written, '\0', title_buffer_size - written); - - return 0; -} - -/* - It has to be _init function, because __attribute__((constructor)) - functions gets called without arguments. -*/ - -gint -init_title (gint argc, gchar *argv[], gchar *envp[]) -{ -#if defined(DARWIN) || defined(SOLARIS) - /* XXX: try to handle these OSes too */ - return 0; -#else - gchar *begin_of_buffer = 0, *end_of_buffer = 0; - gint i; - - for (i = 0; i < argc; ++i) { - if (!begin_of_buffer) - begin_of_buffer = argv[i]; - if (!end_of_buffer || end_of_buffer + 1 == argv[i]) - end_of_buffer = argv[i] + strlen (argv[i]); - } - - for (i = 0; envp[i]; ++i) { - if (!begin_of_buffer) - begin_of_buffer = envp[i]; - if (!end_of_buffer || end_of_buffer + 1 == envp[i]) - end_of_buffer = envp[i] + strlen (envp[i]); - } - - if (!end_of_buffer) - return 0; - - gchar **new_environ = g_malloc ((i + 1) * sizeof (envp[0])); - - if (!new_environ) - return 0; - - for (i = 0; envp[i]; ++i) { - if (!(new_environ[i] = g_strdup (envp[i]))) - goto cleanup_enomem; - } - new_environ[i] = 0; - - if (program_invocation_name) { - title_progname_full = g_strdup (program_invocation_name); - - if (!title_progname_full) - goto cleanup_enomem; - - gchar *p = strrchr (title_progname_full, '/'); - - if (p) - title_progname = p + 1; - else - title_progname = title_progname_full; - - program_invocation_name = title_progname_full; - program_invocation_short_name = title_progname; - } - - environ = new_environ; - title_buffer = begin_of_buffer; - title_buffer_size = end_of_buffer - begin_of_buffer; - - return 0; - - cleanup_enomem: - for (--i; i >= 0; --i) { - g_free (new_environ[i]); - } - g_free (new_environ); - return 0; -#endif -} -#endif - -#ifndef HAVE_PIDFILE -extern gchar *__progname; -static gint _rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit); - -static gint -rspamd_pidfile_verify (rspamd_pidfh_t *pfh) -{ - struct stat sb; - - if (pfh == NULL || pfh->pf_fd == -1) - return (-1); - /* - * Check remembered descriptor. - */ - if (fstat (pfh->pf_fd, &sb) == -1) - return (errno); - if (sb.st_dev != pfh->pf_dev || sb.st_ino != pfh->pf_ino) - return -1; - return 0; -} - -static gint -rspamd_pidfile_read (const gchar *path, pid_t * pidptr) -{ - gchar buf[16], *endptr; - gint error, fd, i; - - fd = open (path, O_RDONLY); - if (fd == -1) - return (errno); - - i = read (fd, buf, sizeof (buf) - 1); - error = errno; /* Remember errno in case close() wants to change it. */ - close (fd); - if (i == -1) - return error; - else if (i == 0) - return EAGAIN; - buf[i] = '\0'; - - *pidptr = strtol (buf, &endptr, 10); - if (endptr != &buf[i]) - return EINVAL; - - return 0; -} - -rspamd_pidfh_t * -rspamd_pidfile_open (const gchar *path, mode_t mode, pid_t * pidptr) -{ - rspamd_pidfh_t *pfh; - struct stat sb; - gint error, fd, len, count; - struct timespec rqtp; - - pfh = g_malloc (sizeof (*pfh)); - if (pfh == NULL) - return NULL; - - if (path == NULL) - len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "/var/run/%s.pid", g_get_prgname ()); - else - len = snprintf (pfh->pf_path, sizeof (pfh->pf_path), "%s", path); - if (len >= (gint)sizeof (pfh->pf_path)) { - g_free (pfh); - errno = ENAMETOOLONG; - return NULL; - } - - /* - * Open the PID file and obtain exclusive lock. - * We truncate PID file here only to remove old PID immediatelly, - * PID file will be truncated again in pidfile_write(), so - * pidfile_write() can be called multiple times. - */ - fd = open (pfh->pf_path, O_WRONLY | O_CREAT | O_TRUNC | O_NONBLOCK, mode); - lock_file (fd, TRUE); - if (fd == -1) { - count = 0; - rqtp.tv_sec = 0; - rqtp.tv_nsec = 5000000; - if (errno == EWOULDBLOCK && pidptr != NULL) { - again: - errno = rspamd_pidfile_read (pfh->pf_path, pidptr); - if (errno == 0) - errno = EEXIST; - else if (errno == EAGAIN) { - if (++count <= 3) { - nanosleep (&rqtp, 0); - goto again; - } - } - } - g_free (pfh); - return NULL; - } - /* - * Remember file information, so in pidfile_write() we are sure we write - * to the proper descriptor. - */ - if (fstat (fd, &sb) == -1) { - error = errno; - unlink (pfh->pf_path); - close (fd); - g_free (pfh); - errno = error; - return NULL; - } - - pfh->pf_fd = fd; - pfh->pf_dev = sb.st_dev; - pfh->pf_ino = sb.st_ino; - - return pfh; -} - -gint -rspamd_pidfile_write (rspamd_pidfh_t *pfh) -{ - gchar pidstr[16]; - gint error, fd; - - /* - * Check remembered descriptor, so we don't overwrite some other - * file if pidfile was closed and descriptor reused. - */ - errno = rspamd_pidfile_verify (pfh); - if (errno != 0) { - /* - * Don't close descriptor, because we are not sure if it's ours. - */ - return -1; - } - fd = pfh->pf_fd; - - /* - * Truncate PID file, so multiple calls of pidfile_write() are allowed. - */ - if (ftruncate (fd, 0) == -1) { - error = errno; - _rspamd_pidfile_remove (pfh, 0); - errno = error; - return -1; - } - - rspamd_snprintf (pidstr, sizeof (pidstr), "%P", getpid ()); - if (pwrite (fd, pidstr, strlen (pidstr), 0) != (ssize_t) strlen (pidstr)) { - error = errno; - _rspamd_pidfile_remove (pfh, 0); - errno = error; - return -1; - } - - return 0; -} - -gint -rspamd_pidfile_close (rspamd_pidfh_t *pfh) -{ - gint error; - - error = rspamd_pidfile_verify (pfh); - if (error != 0) { - errno = error; - return -1; - } - - if (close (pfh->pf_fd) == -1) - error = errno; - g_free (pfh); - if (error != 0) { - errno = error; - return -1; - } - return 0; -} - -static gint -_rspamd_pidfile_remove (rspamd_pidfh_t *pfh, gint freeit) -{ - gint error; - - error = rspamd_pidfile_verify (pfh); - if (error != 0) { - errno = error; - return -1; - } - - if (unlink (pfh->pf_path) == -1) - error = errno; - if (!unlock_file (pfh->pf_fd, FALSE)) { - if (error == 0) - error = errno; - } - if (close (pfh->pf_fd) == -1) { - if (error == 0) - error = errno; - } - if (freeit) - g_free (pfh); - else - pfh->pf_fd = -1; - if (error != 0) { - errno = error; - return -1; - } - return 0; -} - -gint -rspamd_pidfile_remove (rspamd_pidfh_t *pfh) -{ - - return (_rspamd_pidfile_remove (pfh, 1)); -} -#endif - -/* Replace %r with rcpt value and %f with from value, new string is allocated in pool */ -gchar * -resolve_stat_filename (rspamd_mempool_t * pool, gchar *pattern, gchar *rcpt, gchar *from) -{ - gint need_to_format = 0, len = 0; - gint rcptlen, fromlen; - gchar *c = pattern, *new, *s; - - if (rcpt) { - rcptlen = strlen (rcpt); - } - else { - rcptlen = 0; - } - - if (from) { - fromlen = strlen (from); - } - else { - fromlen = 0; - } - - /* Calculate length */ - while (*c++) { - if (*c == '%' && *(c + 1) == 'r') { - len += rcptlen; - c += 2; - need_to_format = 1; - continue; - } - else if (*c == '%' && *(c + 1) == 'f') { - len += fromlen; - c += 2; - need_to_format = 1; - continue; - } - len++; - } - - /* Do not allocate extra memory if we do not need to format string */ - if (!need_to_format) { - return pattern; - } - - /* Allocate new string */ - new = rspamd_mempool_alloc (pool, len); - c = pattern; - s = new; - - /* Format string */ - while (*c++) { - if (*c == '%' && *(c + 1) == 'r') { - c += 2; - memcpy (s, rcpt, rcptlen); - s += rcptlen; - continue; - } - else if (*c == '%' && *(c + 1) == 'r') { - c += 2; - memcpy (s, from, fromlen); - s += fromlen; - continue; - } - *s++ = *c; - } - - *s = '\0'; - - return new; -} - -#ifdef HAVE_CLOCK_GETTIME -const gchar * -calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_time) -#else -const gchar * -calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_time) -#endif -{ - double vdiff, diff; - static gchar res[64]; - static gchar fmt[sizeof ("%.10f ms real, %.10f ms virtual")]; - struct timeval tv_now; - - if (gettimeofday (&tv_now, NULL) == -1) { - msg_warn ("gettimeofday failed: %s", strerror (errno)); - } -#ifdef HAVE_CLOCK_GETTIME - struct timespec ts; - - diff = (tv_now.tv_sec - tv->tv_sec) * 1000. + /* Seconds */ - (tv_now.tv_usec - tv->tv_usec) / 1000.; /* Microseconds */ -#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID - clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts); -#elif defined(HAVE_CLOCK_VIRTUAL) - clock_gettime (CLOCK_VIRTUAL, &ts); -#else - clock_gettime (CLOCK_REALTIME, &ts); -#endif - - vdiff = (ts.tv_sec - begin->tv_sec) * 1000. + /* Seconds */ - (ts.tv_nsec - begin->tv_nsec) / 1000000.; /* Nanoseconds */ -#else - diff = (tv_now.tv_sec - begin->tv_sec) * 1000. + /* Seconds */ - (tv_now.tv_usec - begin->tv_usec) / 1000.; /* Microseconds */ - - vdiff = diff; -#endif - - *scan_time = diff; - - sprintf (fmt, "%%.%dfms real, %%.%dfms virtual", resolution, resolution); - snprintf (res, sizeof (res), fmt, diff, vdiff); - - return (const gchar *)res; -} - -#ifndef g_tolower -# define g_tolower(x) (((x) >= 'A' && (x) <= 'Z') ? (x) - 'A' + 'a' : (x)) -#endif - - -gboolean -rspamd_strcase_equal (gconstpointer v, gconstpointer v2) -{ - if (g_ascii_strcasecmp ((const gchar *)v, (const gchar *)v2) == 0) { - return TRUE; - } - - return FALSE; -} - - -guint -rspamd_strcase_hash (gconstpointer key) -{ - const gchar *p = key; - gchar buf[256]; - guint h = 0, i = 0; - - - while (*p != '\0') { - buf[i] = g_ascii_tolower (*p); - i++; - p++; - if (i == sizeof (buf)) { - h ^= murmur32_hash (buf, i); - i = 0; - } - } - - if (i > 0) { - h ^= murmur32_hash (buf, i); - } - - return h; -} - -guint -rspamd_str_hash (gconstpointer key) -{ - gsize len; - - len = strlen ((const gchar *)key); - - return murmur32_hash (key, len); -} - -gboolean -rspamd_str_equal (gconstpointer v, gconstpointer v2) -{ - return strcmp ((const gchar *)v, (const gchar *)v2) == 0; -} - -gboolean -fstr_strcase_equal (gconstpointer v, gconstpointer v2) -{ - const f_str_t *f1 = v, *f2 = v2; - if (f1->len == f2->len && g_ascii_strncasecmp (f1->begin, f2->begin, f1->len) == 0) { - return TRUE; - } - - return FALSE; -} - - -guint -fstr_strcase_hash (gconstpointer key) -{ - const f_str_t *f = key; - const gchar *p; - guint h = 0, i = 0; - gchar buf[256]; - - p = f->begin; - while (p - f->begin < (gint)f->len) { - buf[i] = g_ascii_tolower (*p); - i++; - p++; - if (i == sizeof (buf)) { - h ^= murmur32_hash (buf, i); - i = 0; - } - } - - if (i > 0) { - h ^= murmur32_hash (buf, i); - } - - return h; -} - -void -gperf_profiler_init (struct config_file *cfg, const gchar *descr) -{ -#if defined(WITH_GPERF_TOOLS) - gchar prof_path[PATH_MAX]; - - if (getenv ("CPUPROFILE")) { - - /* disable inherited Profiler enabled in master process */ - ProfilerStop (); - } - /* Try to create temp directory for gmon.out and chdir to it */ - if (cfg->profile_path == NULL) { - cfg->profile_path = g_strdup_printf ("%s/rspamd-profile", cfg->temp_dir); - } - - snprintf (prof_path, sizeof (prof_path), "%s-%s.%d", cfg->profile_path, descr, (gint)getpid ()); - if (ProfilerStart (prof_path)) { - /* start ITIMER_PROF timer */ - ProfilerRegisterThread (); - } - else { - msg_warn ("cannot start google perftools profiler"); - } - -#endif -} - -#ifdef HAVE_FLOCK -/* Flock version */ -gboolean -lock_file (gint fd, gboolean async) -{ - gint flags; - - if (async) { - flags = LOCK_EX | LOCK_NB; - } - else { - flags = LOCK_EX; - } - - if (flock (fd, flags) == -1) { - if (async && errno == EAGAIN) { - return FALSE; - } - msg_warn ("lock on file failed: %s", strerror (errno)); - return FALSE; - } - - return TRUE; -} - -gboolean -unlock_file (gint fd, gboolean async) -{ - gint flags; - - if (async) { - flags = LOCK_UN | LOCK_NB; - } - else { - flags = LOCK_UN; - } - - if (flock (fd, flags) == -1) { - if (async && errno == EAGAIN) { - return FALSE; - } - msg_warn ("lock on file failed: %s", strerror (errno)); - return FALSE; - } - - return TRUE; - -} -#else /* HAVE_FLOCK */ -/* Fctnl version */ -gboolean -lock_file (gint fd, gboolean async) -{ - struct flock fl = { - .l_type = F_WRLCK, - .l_whence = SEEK_SET, - .l_start = 0, - .l_len = 0 - }; - - if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { - if (async && (errno == EAGAIN || errno == EACCES)) { - return FALSE; - } - msg_warn ("lock on file failed: %s", strerror (errno)); - return FALSE; - } - - return TRUE; -} - -gboolean -unlock_file (gint fd, gboolean async) -{ - struct flock fl = { - .l_type = F_UNLCK, - .l_whence = SEEK_SET, - .l_start = 0, - .l_len = 0 - }; - - if (fcntl (fd, async ? F_SETLK : F_SETLKW, &fl) == -1) { - if (async && (errno == EAGAIN || errno == EACCES)) { - return FALSE; - } - msg_warn ("lock on file failed: %s", strerror (errno)); - return FALSE; - } - - return TRUE; - -} -#endif /* HAVE_FLOCK */ - - -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) -void -g_ptr_array_unref (GPtrArray *array) -{ - g_ptr_array_free (array, TRUE); -} -#endif -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) -void -g_queue_clear (GQueue *queue) -{ - g_return_if_fail (queue != NULL); - - g_list_free (queue->head); - queue->head = queue->tail = NULL; - queue->length = 0; -} -#endif - -gsize -rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz) -{ - gchar *d = dst; - const gchar *s = src; - gsize n = siz; - - /* Copy as many bytes as will fit */ - if (n != 0) { - while (--n != 0) { - if ((*d++ = *s++) == '\0') { - break; - } - } - } - - if (n == 0 && siz != 0) { - *d = '\0'; - } - - return (s - src - 1); /* count does not include NUL */ -} - -gsize -rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz) -{ - gchar *d = dst; - const gchar *s = src; - gsize n = siz; - - /* Copy as many bytes as will fit */ - if (n != 0) { - while (--n != 0) { - if ((*d++ = g_ascii_tolower (*s++)) == '\0') { - break; - } - } - } - - if (n == 0 && siz != 0) { - *d = '\0'; - } - - return (s - src - 1); /* count does not include NUL */ -} - -/* Compare two emails for building emails tree */ -gint -compare_email_func (gconstpointer a, gconstpointer b) -{ - const struct uri *u1 = a, *u2 = b; - gint r; - - if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { - return u1->hostlen - u2->hostlen; - } - else { - if ((r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen)) == 0){ - if (u1->userlen != u2->userlen || u1->userlen == 0) { - return u1->userlen - u2->userlen; - } - else { - return g_ascii_strncasecmp (u1->user, u2->user, u1->userlen); - } - } - else { - return r; - } - } - - return 0; -} - -gint -compare_url_func (gconstpointer a, gconstpointer b) -{ - const struct uri *u1 = a, *u2 = b; - int r; - - if (u1->hostlen != u2->hostlen || u1->hostlen == 0) { - return u1->hostlen - u2->hostlen; - } - else { - r = g_ascii_strncasecmp (u1->host, u2->host, u1->hostlen); - if (r == 0 && u1->is_phished != u2->is_phished) { - /* Always insert phished urls to the tree */ - return -1; - } - } - - return r; -} - -/* - * Find the first occurrence of find in s, ignore case. - */ -gchar * -rspamd_strncasestr (const gchar *s, const gchar *find, gint len) -{ - gchar c, sc; - gsize mlen; - - if ((c = *find++) != 0) { - c = g_ascii_tolower (c); - mlen = strlen (find); - do { - do { - if ((sc = *s++) == 0 || len -- == 0) - return (NULL); - } while (g_ascii_tolower (sc) != c); - } while (g_ascii_strncasecmp (s, find, mlen) != 0); - s--; - } - return ((gchar *)s); -} - -/* - * Try to convert string of length to long - */ -gboolean -rspamd_strtol (const gchar *s, gsize len, glong *value) -{ - const gchar *p = s, *end = s + len; - gchar c; - glong v = 0; - const glong cutoff = G_MAXLONG / 10, cutlim = G_MAXLONG % 10; - gboolean neg; - - /* Case negative values */ - if (*p == '-') { - neg = TRUE; - p ++; - } - else { - neg = FALSE; - } - /* Some preparations for range errors */ - - while (p < end) { - c = *p; - if (c >= '0' && c <= '9') { - c -= '0'; - if (v > cutoff || (v == cutoff && c > cutlim)) { - /* Range error */ - *value = neg ? G_MINLONG : G_MAXLONG; - return FALSE; - } - else { - v *= 10; - v += c; - } - } - else { - return FALSE; - } - p ++; - } - - *value = neg ? -(v) : v; - return TRUE; -} - -/* - * Try to convert string of length to long - */ -gboolean -rspamd_strtoul (const gchar *s, gsize len, gulong *value) -{ - const gchar *p = s, *end = s + len; - gchar c; - gulong v = 0; - const gulong cutoff = G_MAXULONG / 10, cutlim = G_MAXULONG % 10; - - /* Some preparations for range errors */ - while (p < end) { - c = *p; - if (c >= '0' && c <= '9') { - c -= '0'; - if (v > cutoff || (v == cutoff && (guint8)c > cutlim)) { - /* Range error */ - *value = G_MAXULONG; - return FALSE; - } - else { - v *= 10; - v += c; - } - } - else { - return FALSE; - } - p ++; - } - - *value = v; - return TRUE; -} - -gint -rspamd_fallocate (gint fd, off_t offset, off_t len) -{ -#if defined(HAVE_FALLOCATE) - return fallocate (fd, 0, offset, len); -#elif defined(HAVE_POSIX_FALLOCATE) - return posix_fallocate (fd, offset, len); -#else - /* Return 0 as nothing can be done on this system */ - return 0; -#endif -} - - -/** - * Create new mutex - * @return mutex or NULL - */ -inline rspamd_mutex_t* -rspamd_mutex_new (void) -{ - rspamd_mutex_t *new; - - new = g_slice_alloc (sizeof (rspamd_mutex_t)); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_mutex_init (&new->mtx); -#else - g_static_mutex_init (&new->mtx); -#endif - - return new; -} - -/** - * Lock mutex - * @param mtx - */ -inline void -rspamd_mutex_lock (rspamd_mutex_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_mutex_lock (&mtx->mtx); -#else - g_static_mutex_lock (&mtx->mtx); -#endif -} - -/** - * Unlock mutex - * @param mtx - */ -inline void -rspamd_mutex_unlock (rspamd_mutex_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_mutex_unlock (&mtx->mtx); -#else - g_static_mutex_unlock (&mtx->mtx); -#endif -} - -void -rspamd_mutex_free (rspamd_mutex_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_mutex_clear (&mtx->mtx); -#endif - g_slice_free1 (sizeof (rspamd_mutex_t), mtx); -} - -/** - * Create new rwlock - * @return - */ -rspamd_rwlock_t* -rspamd_rwlock_new (void) -{ - rspamd_rwlock_t *new; - - new = g_malloc (sizeof (rspamd_rwlock_t)); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_rw_lock_init (&new->rwlock); -#else - g_static_rw_lock_init (&new->rwlock); -#endif - - return new; -} - -/** - * Lock rwlock for writing - * @param mtx - */ -inline void -rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_rw_lock_writer_lock (&mtx->rwlock); -#else - g_static_rw_lock_writer_lock (&mtx->rwlock); -#endif -} - -/** - * Lock rwlock for reading - * @param mtx - */ -inline void -rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_rw_lock_reader_lock (&mtx->rwlock); -#else - g_static_rw_lock_reader_lock (&mtx->rwlock); -#endif -} - -/** - * Unlock rwlock from writing - * @param mtx - */ -inline void -rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_rw_lock_writer_unlock (&mtx->rwlock); -#else - g_static_rw_lock_writer_unlock (&mtx->rwlock); -#endif -} - -/** - * Unlock rwlock from reading - * @param mtx - */ -inline void -rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_rw_lock_reader_unlock (&mtx->rwlock); -#else - g_static_rw_lock_reader_unlock (&mtx->rwlock); -#endif -} - -void -rspamd_rwlock_free (rspamd_rwlock_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_rw_lock_clear (&mtx->rwlock); -#endif - g_slice_free1 (sizeof (rspamd_rwlock_t), mtx); -} - -struct rspamd_thread_data { - gchar *name; - gint id; - GThreadFunc func; - gpointer data; -}; - -static gpointer -rspamd_thread_func (gpointer ud) -{ - struct rspamd_thread_data *td = ud; - sigset_t s_mask; - - /* Ignore signals in thread */ - sigemptyset (&s_mask); - sigaddset (&s_mask, SIGTERM); - sigaddset (&s_mask, SIGINT); - sigaddset (&s_mask, SIGHUP); - sigaddset (&s_mask, SIGCHLD); - sigaddset (&s_mask, SIGUSR1); - sigaddset (&s_mask, SIGUSR2); - sigaddset (&s_mask, SIGALRM); - sigaddset (&s_mask, SIGPIPE); - - sigprocmask (SIG_BLOCK, &s_mask, NULL); - - ud = td->func (td->data); - g_free (td->name); - g_free (td); - - return ud; -} - -/** - * Create new named thread - * @param name name pattern - * @param func function to start - * @param data data to pass to function - * @param err error pointer - * @return new thread object that can be joined - */ -GThread* -rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err) -{ - GThread *new; - struct rspamd_thread_data *td; - static gint32 id; - guint r; - - r = strlen (name); - td = g_malloc (sizeof (struct rspamd_thread_data)); - td->id = ++id; - td->name = g_malloc (r + sizeof ("4294967296")); - td->func = func; - td->data = data; - - rspamd_snprintf (td->name, r + sizeof ("4294967296"), "%s-%d", name, id); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - new = g_thread_try_new (td->name, rspamd_thread_func, td, err); -#else - new = g_thread_create (rspamd_thread_func, td, TRUE, err); -#endif - - return new; -} - -guint32 -murmur32_hash (const guint8 *in, gsize len) -{ - - - const guint32 c1 = 0xcc9e2d51; - const guint32 c2 = 0x1b873593; - - const int nblocks = len / 4; - const guint32 *blocks = (const guint32 *)(in); - const guint8 *tail; - guint32 h = 0; - gint i; - guint32 k; - - if (in == NULL || len == 0) { - return 0; - } - - tail = (const guint8 *)(in + (nblocks * 4)); - - for (i = 0; i < nblocks; i++) { - k = blocks[i]; - - k *= c1; - k = (k << 15) | (k >> (32 - 15)); - k *= c2; - - h ^= k; - h = (h << 13) | (h >> (32 - 13)); - h = (h * 5) + 0xe6546b64; - } - - k = 0; - switch (len & 3) { - case 3: - k ^= tail[2] << 16; - case 2: - k ^= tail[1] << 8; - case 1: - k ^= tail[0]; - k *= c1; - k = (k << 13) | (k >> (32 - 15)); - k *= c2; - h ^= k; - }; - - h ^= len; - - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -void -murmur128_hash (const guint8 *in, gsize len, guint64 out[]) -{ - const guint64 c1 = 0x87c37b91114253d5ULL; - const guint64 c2 = 0x4cf5ad432745937fULL; - const gint nblocks = len / 16; - const guint64 *blocks = (const guint64 *)(in); - const guint8 *tail; - guint64 h1 = 0; - guint64 h2 = 0; - int i; - guint64 k1, k2; - - if (in == NULL || len == 0 || out == NULL) { - return; - } - - tail = (const guint8 *)(in + (nblocks * 16)); - - for (i = 0; i < nblocks; i++) { - k1 = blocks[i*2+0]; - k2 = blocks[i*2+1]; - - k1 *= c1; - k1 = (k1 << 31) | (k1 >> (64 - 31)); - k1 *= c2; - h1 ^= k1; - - h1 = (h1 << 27) | (h1 >> (64 - 27)); - h1 += h2; - h1 = h1*5+0x52dce729; - - k2 *= c2; - k2 = (k2 << 33) | (k2 >> (64 - 33)); - k2 *= c1; - h2 ^= k2; - - h2 = (h2 << 31) | (h2 >> (64 - 31)); - h2 += h1; - h2 = h2*5+0x38495ab5; - } - - k1 = k2 = 0; - switch (len & 15) { - case 15: - k2 ^= (guint64)(tail[14]) << 48; - case 14: - k2 ^= (guint64)(tail[13]) << 40; - case 13: - k2 ^= (guint64)(tail[12]) << 32; - case 12: - k2 ^= (guint64)(tail[11]) << 24; - case 11: - k2 ^= (guint64)(tail[10]) << 16; - case 10: - k2 ^= (guint64)(tail[ 9]) << 8; - case 9: - k2 ^= (guint64)(tail[ 8]) << 0; - k2 *= c2; - k2 = (k2 << 33) | (k2 >> (64 - 33)); - k2 *= c1; - h2 ^= k2; - - case 8: - k1 ^= (guint64)(tail[ 7]) << 56; - case 7: - k1 ^= (guint64)(tail[ 6]) << 48; - case 6: - k1 ^= (guint64)(tail[ 5]) << 40; - case 5: - k1 ^= (guint64)(tail[ 4]) << 32; - case 4: - k1 ^= (guint64)(tail[ 3]) << 24; - case 3: - k1 ^= (guint64)(tail[ 2]) << 16; - case 2: - k1 ^= (guint64)(tail[ 1]) << 8; - case 1: - k1 ^= (guint64)(tail[ 0]) << 0; - k1 *= c1; - k1 = (k1 << 31) | (k1 >> (64 - 31)); - k1 *= c2; - h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; - h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 ^= h1 >> 33; - h1 *= 0xff51afd7ed558ccdULL; - h1 ^= h1 >> 33; - h1 *= 0xc4ceb9fe1a85ec53ULL; - h1 ^= h1 >> 33; - - h2 ^= h2 >> 33; - h2 *= 0xff51afd7ed558ccdULL; - h2 ^= h2 >> 33; - h2 *= 0xc4ceb9fe1a85ec53ULL; - h2 ^= h2 >> 33; - - h1 += h2; - h2 += h1; - - out[0] = h1; - out[1] = h2; -} - -struct hash_copy_callback_data { - gpointer (*key_copy_func)(gconstpointer data, gpointer ud); - gpointer (*value_copy_func)(gconstpointer data, gpointer ud); - gpointer ud; - GHashTable *dst; -}; - -static void -copy_foreach_callback (gpointer key, gpointer value, gpointer ud) -{ - struct hash_copy_callback_data *cb = ud; - gpointer nkey, nvalue; - - nkey = cb->key_copy_func ? cb->key_copy_func (key, cb->ud) : (gpointer)key; - nvalue = cb->value_copy_func ? cb->value_copy_func (value, cb->ud) : (gpointer)value; - g_hash_table_insert (cb->dst, nkey, nvalue); -} -/** - * Deep copy of one hash table to another - * @param src source hash - * @param dst destination hash - * @param key_copy_func function called to copy or modify keys (or NULL) - * @param value_copy_func function called to copy or modify values (or NULL) - * @param ud user data for copy functions - */ -void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, - gpointer (*key_copy_func)(gconstpointer data, gpointer ud), - gpointer (*value_copy_func)(gconstpointer data, gpointer ud), - gpointer ud) -{ - struct hash_copy_callback_data cb; - if (src != NULL && dst != NULL) { - cb.key_copy_func = key_copy_func; - cb.value_copy_func = value_copy_func; - cb.ud = ud; - cb.dst = dst; - g_hash_table_foreach (src, copy_foreach_callback, &cb); - } -} - -/** - * Utility function to provide mem_pool copy for rspamd_hash_table_copy function - * @param data string to copy - * @param ud memory pool to use - * @return - */ -gpointer -rspamd_str_pool_copy (gconstpointer data, gpointer ud) -{ - rspamd_mempool_t *pool = ud; - - return data ? rspamd_mempool_strdup (pool, data) : NULL; -} - -gboolean -parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask) -{ - const char *pos; - char ip_buf[INET_ADDRSTRLEN + 1], mask_buf[3] = { '\0', '\0', '\0' }; - - bzero (ip_buf, sizeof (ip_buf)); - - if ((pos = strchr (line, '/')) != NULL) { - rspamd_strlcpy (ip_buf, line, MIN ((gsize)(pos - line), sizeof (ip_buf))); - rspamd_strlcpy (mask_buf, pos + 1, sizeof (mask_buf)); - } - else { - rspamd_strlcpy (ip_buf, line, sizeof (ip_buf)); - } - - if (!inet_aton (ip_buf, ina)) { - return FALSE; - } - - if (mask_buf[0] != '\0') { - /* Also parse mask */ - *mask = (mask_buf[0] - '0') * 10 + mask_buf[1] - '0'; - if (*mask > 32) { - return FALSE; - } - } - else { - *mask = 32; - } - - *mask = G_MAXUINT32 << (32 - *mask); - - return TRUE; -} - -static volatile sig_atomic_t saved_signo[NSIG]; - -static -void read_pass_tmp_sig_handler (int s) -{ - - saved_signo[s] = 1; -} - -#ifndef _PATH_TTY -# define _PATH_TTY "/dev/tty" -#endif - -gint -rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key) -{ -#ifdef HAVE_PASSPHRASE_H - gint len = 0; - gchar pass[BUFSIZ]; - - if (readpassphrase ("Enter passphrase: ", buf, size, RPP_ECHO_OFF | RPP_REQUIRE_TTY) == NULL) { - return 0; - } - - return strlen (buf); -#else - struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; - struct sigaction savetstp, savettin, savettou, savepipe; - struct termios term, oterm; - gint input, output, i; - gchar *end, *p, ch; - -restart: - if ((input = output = open (_PATH_TTY, O_RDWR)) == -1) { - errno = ENOTTY; - return 0; - } - if (fcntl (input, F_SETFD, FD_CLOEXEC) == -1) { - msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); - } - - /* Turn echo off */ - if (tcgetattr (input, &oterm) != 0) { - errno = ENOTTY; - return 0; - } - memcpy(&term, &oterm, sizeof(term)); - term.c_lflag &= ~(ECHO | ECHONL); - (void)tcsetattr(input, TCSAFLUSH, &term); - (void)write (output, "Enter passphrase: ", sizeof ("Enter passphrase: ") - 1); - - /* Save the current sighandler */ - for (i = 0; i < NSIG; i++) { - saved_signo[i] = 0; - } - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; - sa.sa_handler = read_pass_tmp_sig_handler; - (void)sigaction (SIGALRM, &sa, &savealrm); - (void)sigaction (SIGHUP, &sa, &savehup); - (void)sigaction (SIGINT, &sa, &saveint); - (void)sigaction (SIGPIPE, &sa, &savepipe); - (void)sigaction (SIGQUIT, &sa, &savequit); - (void)sigaction (SIGTERM, &sa, &saveterm); - (void)sigaction (SIGTSTP, &sa, &savetstp); - (void)sigaction (SIGTTIN, &sa, &savettin); - (void)sigaction (SIGTTOU, &sa, &savettou); - - /* Now read a passphrase */ - p = buf; - end = p + size - 1; - while (read (input, &ch, 1) == 1 && ch != '\n' && ch != '\r') { - if (p < end) { - *p++ = ch; - } - } - *p = '\0'; - (void)write (output, "\n", 1); - - /* Restore terminal state */ - if (memcmp (&term, &oterm, sizeof (term)) != 0) { - while (tcsetattr (input, TCSAFLUSH, &oterm) == -1 && - errno == EINTR && !saved_signo[SIGTTOU]); - } - - /* Restore signal handlers */ - (void)sigaction (SIGALRM, &savealrm, NULL); - (void)sigaction (SIGHUP, &savehup, NULL); - (void)sigaction (SIGINT, &saveint, NULL); - (void)sigaction (SIGQUIT, &savequit, NULL); - (void)sigaction (SIGPIPE, &savepipe, NULL); - (void)sigaction (SIGTERM, &saveterm, NULL); - (void)sigaction (SIGTSTP, &savetstp, NULL); - (void)sigaction (SIGTTIN, &savettin, NULL); - (void)sigaction (SIGTTOU, &savettou, NULL); - - close (input); - - /* Send signals pending */ - for (i = 0; i < NSIG; i++) { - if (saved_signo[i]) { - kill(getpid(), i); - switch (i) { - case SIGTSTP: - case SIGTTIN: - case SIGTTOU: - goto restart; - } - } - } - - return p - buf; -#endif -} - -gboolean -rspamd_ip_is_valid (rspamd_inet_addr_t *addr) -{ - const struct in_addr ip4_any = { INADDR_ANY }, ip4_none = { INADDR_NONE }; - const struct in6_addr ip6_any = IN6ADDR_ANY_INIT; - - gboolean ret = FALSE; - - if (G_LIKELY (addr->af == AF_INET)) { - if (memcmp (&addr->addr.s4.sin_addr, &ip4_any, sizeof (struct in_addr)) != 0 && - memcmp (&addr->addr.s4.sin_addr, &ip4_none, - sizeof (struct in_addr)) != 0) { - ret = TRUE; - } - } - else if (G_UNLIKELY (addr->af == AF_INET6)) { - if (memcmp (&addr->addr.s6.sin6_addr, &ip6_any, - sizeof (struct in6_addr)) != 0) { - ret = TRUE; - } - } - - return ret; -} - -/* - * GString ucl emitting functions - */ -static int -rspamd_gstring_append_character (unsigned char c, size_t len, void *ud) -{ - GString *buf = ud; - gsize old_len; - - if (len == 1) { - g_string_append_c (buf, c); - } - else { - if (buf->allocated_len - buf->len <= len) { - old_len = buf->len; - g_string_set_size (buf, buf->len + len + 1); - buf->len = old_len; - } - memset (&buf->str[buf->len], c, len); - buf->len += len; - } - - return 0; -} - -static int -rspamd_gstring_append_len (const unsigned char *str, size_t len, void *ud) -{ - GString *buf = ud; - - g_string_append_len (buf, str, len); - - return 0; -} - -static int -rspamd_gstring_append_int (int64_t val, void *ud) -{ - GString *buf = ud; - - rspamd_printf_gstring (buf, "%L", (intmax_t)val); - return 0; -} - -static int -rspamd_gstring_append_double (double val, void *ud) -{ - GString *buf = ud; - const double delta = 0.0000001; - - if (val == (double)(int)val) { - rspamd_printf_gstring (buf, "%.1f", val); - } - else if (fabs (val - (double)(int)val) < delta) { - /* Write at maximum precision */ - rspamd_printf_gstring (buf, "%.*g", DBL_DIG, val); - } - else { - rspamd_printf_gstring (buf, "%f", val); - } - - return 0; -} - -void -rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target) -{ - struct ucl_emitter_functions func = { - .ucl_emitter_append_character = rspamd_gstring_append_character, - .ucl_emitter_append_len = rspamd_gstring_append_len, - .ucl_emitter_append_int = rspamd_gstring_append_int, - .ucl_emitter_append_double = rspamd_gstring_append_double - }; - - func.ud = target; - ucl_object_emit_full (obj, emit_type, &func); -} - -gint -rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr) -{ - gint nfd, serrno; - socklen_t len = sizeof (addr->addr.ss); - - if ((nfd = accept (sock, &addr->addr.sa, &len)) == -1) { - if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) { - return 0; - } - return -1; - } - - addr->slen = len; - addr->af = addr->addr.sa.sa_family; - - if (make_socket_nonblocking (nfd) < 0) { - goto out; - } - - /* Set close on exec */ - if (fcntl (nfd, F_SETFD, FD_CLOEXEC) == -1) { - msg_warn ("fcntl failed: %d, '%s'", errno, strerror (errno)); - goto out; - } - - return (nfd); - - out: - serrno = errno; - close (nfd); - errno = serrno; - return (-1); - -} - -gboolean -rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src) -{ - gboolean ret = FALSE; - - if (inet_pton (AF_INET6, src, &target->addr.s6.sin6_addr) == 1) { - target->af = AF_INET6; - target->slen = sizeof (target->addr.s6); - ret = TRUE; - } - else if (inet_pton (AF_INET, src, &target->addr.s4.sin_addr) == 1) { - target->af = AF_INET; - target->slen = sizeof (target->addr.s4); - ret = TRUE; - } - - target->addr.sa.sa_family = target->af; - - return ret; -} - -const char* -rspamd_inet_address_to_string (rspamd_inet_addr_t *addr) -{ - static char addr_str[INET6_ADDRSTRLEN + 1]; - - switch (addr->af) { - case AF_INET: - return inet_ntop (addr->af, &addr->addr.s4.sin_addr, addr_str, - sizeof (addr_str)); - case AF_INET6: - return inet_ntop (addr->af, &addr->addr.s6.sin6_addr, addr_str, - sizeof (addr_str)); - case AF_UNIX: - return addr->addr.su.sun_path; - } - - return "undefined"; -} - -uint16_t -rspamd_inet_address_get_port (rspamd_inet_addr_t *addr) -{ - switch (addr->af) { - case AF_INET: - return ntohs (addr->addr.s4.sin_port); - case AF_INET6: - return ntohs (addr->addr.s6.sin6_port); - } - - return 0; -} diff --git a/src/util.h b/src/util.h deleted file mode 100644 index 0605fe87d..000000000 --- a/src/util.h +++ /dev/null @@ -1,491 +0,0 @@ -#ifndef RSPAMD_UTIL_H -#define RSPAMD_UTIL_H - -#include "config.h" -#include "mem_pool.h" -#include "radix.h" -#include "statfile.h" -#include "printf.h" -#include "fstring.h" -#include "ucl.h" - -struct config_file; -struct rspamd_main; -struct workq; -struct statfile; -struct classifier_config; - -/** - * Union that is used for storing sockaddrs - */ -union sa_union { - struct sockaddr_storage ss; - struct sockaddr sa; - struct sockaddr_in s4; - struct sockaddr_in6 s6; - struct sockaddr_un su; -}; - -typedef struct _rspamd_inet_addr_s { - union sa_union addr; - socklen_t slen; - int af; -} rspamd_inet_addr_t; - - -/* - * Create socket and bind or connect it to specified address and port - */ -gint make_tcp_socket (struct addrinfo *, gboolean is_server, gboolean async); -/* - * Create socket and bind or connect it to specified address and port - */ -gint make_udp_socket (struct addrinfo *, gboolean is_server, gboolean async); - -/* - * Create and bind or connect unix socket - */ -gint make_unix_socket (const gchar *, struct sockaddr_un *, gint type, gboolean is_server, gboolean async); - -/** - * Make a universal socket - * @param credits host, ip or path to unix socket - * @param port port (used for network sockets) - * @param type type of socket (SO_STREAM or SO_DGRAM) - * @param async make this socket asynced - * @param is_server make this socket as server socket - * @param try_resolve try name resolution for a socket (BLOCKING) - */ -gint make_universal_socket (const gchar *credits, guint16 port, gint type, - gboolean async, gboolean is_server, gboolean try_resolve); - -/** - * Make a universal sockets - * @param credits host, ip or path to unix socket (several items may be separated by ',') - * @param port port (used for network sockets) - * @param type type of socket (SO_STREAM or SO_DGRAM) - * @param async make this socket asynced - * @param is_server make this socket as server socket - * @param try_resolve try name resolution for a socket (BLOCKING) - */ -GList* make_universal_sockets_list (const gchar *credits, guint16 port, gint type, - gboolean async, gboolean is_server, gboolean try_resolve); -/* - * Create socketpair - */ -gint make_socketpair (gint pair[2]); - -/* - * Write pid to file - */ -gint write_pid (struct rspamd_main *); - -/* - * Make specified socket non-blocking - */ -gint make_socket_nonblocking (gint); -/* - * Make specified socket blocking - */ -gint make_socket_blocking (gint); - -/* - * Poll a sync socket for specified events - */ -gint poll_sync_socket (gint fd, gint timeout, short events); - -/* - * Init signals - */ -#ifdef HAVE_SA_SIGINFO -void init_signals (struct sigaction *sa, void (*sig_handler)(gint, siginfo_t *, void *)); -#else -void init_signals (struct sigaction *sa, void (*sig_handler)(gint)); -#endif - -/* - * Send specified signal to each worker - */ -void pass_signal_worker (GHashTable *, gint ); -/* - * Convert string to lowercase - */ -void convert_to_lowercase (gchar *str, guint size); - -#ifndef HAVE_SETPROCTITLE -/* - * Process title utility functions - */ -gint init_title(gint argc, gchar *argv[], gchar *envp[]); -gint setproctitle(const gchar *fmt, ...); -#endif - -#ifndef HAVE_PIDFILE -/* - * Pidfile functions from FreeBSD libutil code - */ -typedef struct rspamd_pidfh_s { - gint pf_fd; -#ifdef HAVE_PATH_MAX - gchar pf_path[PATH_MAX + 1]; -#elif defined(HAVE_MAXPATHLEN) - gchar pf_path[MAXPATHLEN + 1]; -#else - gchar pf_path[1024 + 1]; -#endif - dev_t pf_dev; - ino_t pf_ino; -} rspamd_pidfh_t; -rspamd_pidfh_t *rspamd_pidfile_open(const gchar *path, mode_t mode, pid_t *pidptr); -gint rspamd_pidfile_write(rspamd_pidfh_t *pfh); -gint rspamd_pidfile_close(rspamd_pidfh_t *pfh); -gint rspamd_pidfile_remove(rspamd_pidfh_t *pfh); -#else -typedef struct pidfh rspamd_pidfh_t; -#define rspamd_pidfile_open pidfile_open -#define rspamd_pidfile_write pidfile_write -#define rspamd_pidfile_close pidfile_close -#define rspamd_pidfile_remove pidfile_remove -#endif - -/* - * Replace %r with rcpt value and %f with from value, new string is allocated in pool - */ -gchar* resolve_stat_filename (rspamd_mempool_t *pool, gchar *pattern, gchar *rcpt, gchar *from); -#ifdef HAVE_CLOCK_GETTIME -/* - * Calculate check time with specified resolution of timer - */ -const gchar* calculate_check_time (struct timeval *tv, struct timespec *begin, gint resolution, guint32 *scan_ms); -#else -const gchar* calculate_check_time (struct timeval *begin, gint resolution, guint32 *scan_ms); -#endif - -/* - * File locking functions - */ -gboolean lock_file (gint fd, gboolean async); -gboolean unlock_file (gint fd, gboolean async); - -/* - * Hash table utility functions for case insensitive hashing - */ -guint rspamd_strcase_hash (gconstpointer key); -gboolean rspamd_strcase_equal (gconstpointer v, gconstpointer v2); - -/* - * Hash table utility functions for case sensitive hashing - */ -guint rspamd_str_hash (gconstpointer key); -gboolean rspamd_str_equal (gconstpointer v, gconstpointer v2); - - -/* - * Hash table utility functions for hashing fixed strings - */ -guint fstr_strcase_hash (gconstpointer key); -gboolean fstr_strcase_equal (gconstpointer v, gconstpointer v2); - -/* - * Google perf-tools initialization function - */ -void gperf_profiler_init (struct config_file *cfg, const gchar *descr); - -/* - * Workarounds for older versions of glib - */ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 22)) -void g_ptr_array_unref (GPtrArray *array); -#endif -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 14)) -void g_queue_clear (GQueue *queue); -#endif - - -/** - * Copy src to dest limited to len, in compare with standart strlcpy(3) rspamd strlcpy does not - * traverse the whole string and it is possible to use it for non NULL terminated strings. This is - * more like memccpy(dst, src, size, '\0') - * - * @param dst destination string - * @param src source string - * @param siz length of destination buffer - * @return bytes copied - */ -gsize rspamd_strlcpy (gchar *dst, const gchar *src, gsize siz); - -/** - * Lowercase strlcpy variant - * @param dst - * @param src - * @param siz - * @return - */ -gsize rspamd_strlcpy_tolower (gchar *dst, const gchar *src, gsize siz); - -/* - * Convert milliseconds to timeval fields - */ -#define msec_to_tv(msec, tv) do { (tv)->tv_sec = (msec) / 1000; (tv)->tv_usec = ((msec) - (tv)->tv_sec * 1000) * 1000; } while(0) -#define double_to_tv(dbl, tv) do { (tv)->tv_sec = (int)(dbl); (tv)->tv_usec = ((dbl) - (int)(dbl))*1000*1000; } while(0) -#define tv_to_msec(tv) (tv)->tv_sec * 1000 + (tv)->tv_usec / 1000 - -/* Compare two emails for building emails tree */ -gint compare_email_func (gconstpointer a, gconstpointer b); - -/* Compare two urls for building emails tree */ -gint compare_url_func (gconstpointer a, gconstpointer b); - -/* - * Find string find in string s ignoring case - */ -gchar* rspamd_strncasestr (const gchar *s, const gchar *find, gint len); - -/* - * Try to convert string of length to long - */ -gboolean rspamd_strtol (const gchar *s, gsize len, glong *value); - -/* - * Try to convert string of length to unsigned long - */ -gboolean rspamd_strtoul (const gchar *s, gsize len, gulong *value); - -/** - * Try to allocate a file on filesystem (using fallocate or posix_fallocate) - * @param fd descriptor - * @param offset offset of file - * @param len length to allocate - * @return -1 in case of failure - */ -gint rspamd_fallocate (gint fd, off_t offset, off_t len); - -/** - * Return worker's control structure by its type - * @param type - * @return worker's control structure or NULL - */ -extern worker_t* get_worker_by_type (GQuark type); - -/** - * Utils for working with threads to be compatible with all glib versions - */ -typedef struct rspamd_mutex_s { -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - GMutex mtx; -#else - GStaticMutex mtx; -#endif -} rspamd_mutex_t; - -typedef struct rspamd_rwlock_s { -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - GRWLock rwlock; -#else - GStaticRWLock rwlock; -#endif -} rspamd_rwlock_t; - - -/** - * Create new mutex - * @return mutex or NULL - */ -rspamd_mutex_t* rspamd_mutex_new (void); - -/** - * Lock mutex - * @param mtx - */ -void rspamd_mutex_lock (rspamd_mutex_t *mtx); - -/** - * Unlock mutex - * @param mtx - */ -void rspamd_mutex_unlock (rspamd_mutex_t *mtx); - -/** - * Clear rspamd mutex - * @param mtx - */ -void rspamd_mutex_free (rspamd_mutex_t *mtx); - -/** - * Create new rwloc - * @return - */ -rspamd_rwlock_t* rspamd_rwlock_new (void); - -/** - * Lock rwlock for writing - * @param mtx - */ -void rspamd_rwlock_writer_lock (rspamd_rwlock_t *mtx); - -/** - * Lock rwlock for reading - * @param mtx - */ -void rspamd_rwlock_reader_lock (rspamd_rwlock_t *mtx); - -/** - * Unlock rwlock from writing - * @param mtx - */ -void rspamd_rwlock_writer_unlock (rspamd_rwlock_t *mtx); - -/** - * Unlock rwlock from reading - * @param mtx - */ -void rspamd_rwlock_reader_unlock (rspamd_rwlock_t *mtx); - -/** - * Free rwlock - * @param mtx - */ -void rspamd_rwlock_free (rspamd_rwlock_t *mtx); - -static inline void -rspamd_cond_wait (GCond *cond, rspamd_mutex_t *mtx) -{ -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - g_cond_wait (cond, &mtx->mtx); -#else - g_cond_wait (cond, g_static_mutex_get_mutex (&mtx->mtx)); -#endif -} - -/** - * Create new named thread - * @param name name pattern - * @param func function to start - * @param data data to pass to function - * @param err error pointer - * @return new thread object that can be joined - */ -GThread* rspamd_create_thread (const gchar *name, GThreadFunc func, gpointer data, GError **err); - -/** - * Return 32bit murmur hash value for specified input - * @param in input data - * @param len length of the input data - * @code - * MurmurHash3 was created by Austin Appleby in 2008. The cannonical - * implementations are in C++ and placed in the public. - * - * https://sites.google.com/site/murmurhash/ - * - * Seungyoung Kim has ported it's cannonical implementation to C language - * in 2012 and published it as a part of qLibc component. - * @endcode - * @return - */ -guint32 murmur32_hash (const guint8 *in, gsize len); - -/** - * Return 32bit murmur hash value for specified input - * @param in input data - * @param len length of the input data - * @param out array of 2 guint64 variables - * @code - * MurmurHash3 was created by Austin Appleby in 2008. The cannonical - * implementations are in C++ and placed in the public. - * - * https://sites.google.com/site/murmurhash/ - * - * Seungyoung Kim has ported it's cannonical implementation to C language - * in 2012 and published it as a part of qLibc component. - * @endcode - * @return - */ -void murmur128_hash (const guint8 *in, gsize len, guint64 out[]); - -/** - * Deep copy of one hash table to another - * @param src source hash - * @param dst destination hash - * @param key_copy_func function called to copy or modify keys (or NULL) - * @param value_copy_func function called to copy or modify values (or NULL) - * @param ud user data for copy functions - */ -void rspamd_hash_table_copy (GHashTable *src, GHashTable *dst, - gpointer (*key_copy_func)(gconstpointer data, gpointer ud), - gpointer (*value_copy_func)(gconstpointer data, gpointer ud), - gpointer ud); - -/** - * Utility function to provide mem_pool copy for rspamd_hash_table_copy function - * @param data string to copy - * @param ud memory pool to use - * @return - */ -gpointer rspamd_str_pool_copy (gconstpointer data, gpointer ud); - -/** - * Parse ipv4 address with optional mask in CIDR format - * @param line cidr notation of ipv4 address - * @param ina destination address - * @param mask destination mask - * @return - */ -gboolean parse_ipmask_v4 (const char *line, struct in_addr *ina, int *mask); - -/** - * Read passphrase from tty - * @param buf buffer to fill with a password - * @param size size of the buffer - * @param rwflag unused flag - * @param key unused key - * @return size of password read - */ -gint rspamd_read_passphrase (gchar *buf, gint size, gint rwflag, gpointer key); - -/** - * Check whether specified ip is valid (not INADDR_ANY or INADDR_NONE) for ipv4 or ipv6 - * @param ptr pointer to struct in_addr or struct in6_addr - * @param af address family (AF_INET or AF_INET6) - * @return TRUE if the address is valid - */ -gboolean rspamd_ip_is_valid (rspamd_inet_addr_t *addr); - -/** - * Emit UCL object to gstring - * @param obj object to emit - * @param emit_type emitter type - * @param target target string - */ -void rspamd_ucl_emit_gstring (ucl_object_t *obj, enum ucl_emitter emit_type, GString *target); - -/** - * Accept from listening socket filling addr structure - * @param sock listening socket - * @param addr - * @return - */ -gint rspamd_accept_from_socket (gint sock, rspamd_inet_addr_t *addr); - -/** - * Try to parse address from string - * @param target target to fill - * @param src IP string representation - * @return TRUE if addr has been parsed - */ -gboolean rspamd_parse_inet_address (rspamd_inet_addr_t *target, const char *src); - -/** - * Returns string representation of inet address - * @param addr - * @return statically allocated string pointer (not thread safe) - */ -const char* rspamd_inet_address_to_string (rspamd_inet_addr_t *addr); - -/** - * Returns port number for the specified inet address in host byte order - * @param addr - * @return - */ -uint16_t rspamd_inet_address_get_port (rspamd_inet_addr_t *addr); - -#endif diff --git a/src/worker_util.c b/src/worker_util.c deleted file mode 100644 index d029f5dc4..000000000 --- a/src/worker_util.c +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright (c) 2010-2011, Vsevolod Stakhov - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "main.h" -#include "message.h" -#include "lua/lua_common.h" - -extern struct rspamd_main *rspamd_main; - -/** - * Return worker's control structure by its type - * @param type - * @return worker's control structure or NULL - */ -worker_t* -get_worker_by_type (GQuark type) -{ - worker_t **cur; - - cur = &workers[0]; - while (*cur) { - if (g_quark_from_string ((*cur)->name) == type) { - return *cur; - } - cur ++; - } - - return NULL; -} - -double -set_counter (const gchar *name, guint32 value) -{ - struct counter_data *cd; - double alpha; - gchar *key; - - cd = rspamd_hash_lookup (rspamd_main->counters, (gpointer) name); - - if (cd == NULL) { - cd = rspamd_mempool_alloc_shared (rspamd_main->counters->pool, sizeof (struct counter_data)); - cd->value = value; - cd->number = 0; - key = rspamd_mempool_strdup_shared (rspamd_main->counters->pool, name); - rspamd_hash_insert (rspamd_main->counters, (gpointer) key, (gpointer) cd); - } - else { - /* Calculate new value */ - rspamd_mempool_wlock_rwlock (rspamd_main->counters->lock); - - alpha = 2. / (++cd->number + 1); - cd->value = cd->value * (1. - alpha) + value * alpha; - - rspamd_mempool_wunlock_rwlock (rspamd_main->counters->lock); - } - - return cd->value; -} - -struct event_base * -prepare_worker (struct rspamd_worker *worker, const char *name, - rspamd_sig_handler_t sig_handler, - void (*accept_handler)(int, short, void *)) -{ - struct event_base *ev_base; - struct event *accept_event; - struct sigaction signals; - GList *cur; - gint listen_socket; - -#ifdef WITH_PROFILER - extern void _start (void), etext (void); - monstartup ((u_long) & _start, (u_long) & etext); -#endif - - gperf_profiler_init (worker->srv->cfg, name); - - worker->srv->pid = getpid (); - - ev_base = event_init (); - - init_signals (&signals, sig_handler); - sigprocmask (SIG_UNBLOCK, &signals.sa_mask, NULL); - - /* Accept all sockets */ - cur = worker->cf->listen_socks; - while (cur) { - listen_socket = GPOINTER_TO_INT (cur->data); - if (listen_socket != -1) { - accept_event = g_slice_alloc0 (sizeof (struct event)); - event_set (accept_event, listen_socket, EV_READ | EV_PERSIST, - accept_handler, worker); - event_base_set (ev_base, accept_event); - event_add (accept_event, NULL); - worker->accept_events = g_list_prepend (worker->accept_events, accept_event); - } - cur = g_list_next (cur); - } - - return ev_base; -} - -void -worker_stop_accept (struct rspamd_worker *worker) -{ - GList *cur; - struct event *event; - - /* Remove all events */ - cur = worker->accept_events; - while (cur) { - event = cur->data; - event_del (event); - cur = g_list_next (cur); - g_slice_free1 (sizeof (struct event), event); - } - - if (worker->accept_events != NULL) { - g_list_free (worker->accept_events); - } -} - -/* - * Called if all filters are processed - * @return TRUE if session should be terminated - */ -gboolean -rspamd_task_fin (void *arg) -{ - struct rspamd_task *task = (struct rspamd_task *) arg; - gint r; - GError *err = NULL; - - /* Task is already finished or skipped */ - if (task->state == WRITE_REPLY) { - if (task->fin_callback) { - task->fin_callback (task->fin_arg); - } - else { - rspamd_protocol_write_reply (task); - } - return TRUE; - } - - /* We processed all filters and want to process statfiles */ - if (task->state != WAIT_POST_FILTER && task->state != WAIT_PRE_FILTER) { - /* Process all statfiles */ - if (task->classify_pool == NULL) { - /* Non-threaded version */ - process_statfiles (task); - } - else { - /* Just process composites */ - make_composites (task); - } - if (task->cfg->post_filters) { - /* More to process */ - /* Special state */ - task->state = WAIT_POST_FILTER; - return FALSE; - } - - } - - /* We are on post-filter waiting state */ - if (task->state != WAIT_PRE_FILTER) { - /* Check if we have all events finished */ - task->state = WRITE_REPLY; - if (task->fin_callback) { - task->fin_callback (task->fin_arg); - } - else { - rspamd_protocol_write_reply (task); - } - } - else { - /* We were waiting for pre-filter */ - if (task->pre_result.action != METRIC_ACTION_NOACTION) { - /* Write result based on pre filters */ - task->state = WRITE_REPLY; - if (task->fin_callback) { - task->fin_callback (task->fin_arg); - } - else { - rspamd_protocol_write_reply (task); - } - return TRUE; - } - else { - task->state = WAIT_FILTER; - r = process_filters (task); - if (r == -1) { - task->last_error = "Filter processing error"; - task->error_code = RSPAMD_FILTER_ERROR; - task->state = WRITE_REPLY; - rspamd_protocol_write_reply (task); - return TRUE; - } - /* Add task to classify to classify pool */ - if (!task->is_skipped && task->classify_pool) { - register_async_thread (task->s); - g_thread_pool_push (task->classify_pool, task, &err); - if (err != NULL) { - msg_err ("cannot pull task to the pool: %s", err->message); - remove_async_thread (task->s); - g_error_free (err); - } - } - if (task->is_skipped) { - rspamd_protocol_write_reply (task); - } - else { - return FALSE; - } - } - } - - return TRUE; -} - -/* - * Called if session was restored inside fin callback - */ -void -rspamd_task_restore (void *arg) -{ - struct rspamd_task *task = (struct rspamd_task *) arg; - - /* Call post filters */ - if (task->state == WAIT_POST_FILTER) { - lua_call_post_filters (task); - } - task->s->wanna_die = TRUE; -} -- cgit v1.2.3