aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2014-04-21 16:25:51 +0100
commit61555065f3d1c8badcc9573691232f1b6e42988c (patch)
tree563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libserver
parentad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff)
downloadrspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz
rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip
Rework project structure, remove trash files.
Diffstat (limited to 'src/libserver')
-rw-r--r--src/libserver/CMakeLists.txt63
-rw-r--r--src/libserver/binlog.c579
-rw-r--r--src/libserver/binlog.h93
-rw-r--r--src/libserver/buffer.c786
-rw-r--r--src/libserver/buffer.h158
-rw-r--r--src/libserver/cfg_file.h516
-rw-r--r--src/libserver/cfg_rcl.c1471
-rw-r--r--src/libserver/cfg_rcl.h238
-rw-r--r--src/libserver/cfg_utils.c969
-rw-r--r--src/libserver/dkim.c1480
-rw-r--r--src/libserver/dkim.h207
-rw-r--r--src/libserver/dns.c151
-rw-r--r--src/libserver/dns.h60
-rw-r--r--src/libserver/dynamic_cfg.c599
-rw-r--r--src/libserver/dynamic_cfg.h66
-rw-r--r--src/libserver/events.c250
-rw-r--r--src/libserver/events.h88
-rw-r--r--src/libserver/html.c942
-rw-r--r--src/libserver/html.h226
-rw-r--r--src/libserver/proxy.c241
-rw-r--r--src/libserver/proxy.h69
-rw-r--r--src/libserver/roll_history.c212
-rw-r--r--src/libserver/roll_history.h106
-rw-r--r--src/libserver/settings.c657
-rw-r--r--src/libserver/settings.h55
-rw-r--r--src/libserver/spf.c1465
-rw-r--r--src/libserver/spf.h84
-rw-r--r--src/libserver/statfile.c927
-rw-r--r--src/libserver/statfile.h284
-rw-r--r--src/libserver/statfile_sync.c350
-rw-r--r--src/libserver/statfile_sync.h14
-rw-r--r--src/libserver/symbols_cache.c1055
-rw-r--r--src/libserver/symbols_cache.h150
-rw-r--r--src/libserver/task.c159
-rw-r--r--src/libserver/task.h165
-rw-r--r--src/libserver/url.c1620
-rw-r--r--src/libserver/url.h111
37 files changed, 16666 insertions, 0 deletions
diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt
new file mode 100644
index 000000000..bd5df18b9
--- /dev/null
+++ b/src/libserver/CMakeLists.txt
@@ -0,0 +1,63 @@
+# Librspamdserver
+SET(LIBRSPAMDSERVERSRC
+ binlog.c
+ buffer.c
+ cfg_utils.c
+ cfg_rcl.c
+ dkim.c
+ dns.c
+ dynamic_cfg.c
+ events.c
+ html.c
+ proxy.c
+ roll_history.c
+ settings.c
+ spf.c
+ statfile.c
+ statfile_sync.c
+ symbols_cache.c
+ task.c
+ url.c)
+SET(TOKENIZERSSRC ../tokenizers/tokenizers.c
+ ../tokenizers/osb.c)
+
+SET(CLASSIFIERSSRC ../classifiers/classifiers.c
+ ../classifiers/bayes.c
+ ../classifiers/winnow.c)
+
+# Librspamd-server
+
+#IF(WITH_DB)
+# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_bdb.c)
+#ENDIF(WITH_DB)
+#IF(WITH_SQLITE)
+# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_sqlite.c)
+#ENDIF(WITH_SQLITE)
+
+ADD_LIBRARY(rspamd-server ${LINK_TYPE} ${LIBRSPAMDSERVERSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC})
+IF(NOT DEBIAN_BUILD)
+SET_TARGET_PROPERTIES(rspamd-server PROPERTIES VERSION ${RSPAMD_VERSION})
+ENDIF(NOT DEBIAN_BUILD)
+SET_TARGET_PROPERTIES(rspamd-server PROPERTIES LINKER_LANGUAGE C COMPILE_FLAGS "-DRSPAMD_LIB")
+TARGET_LINK_LIBRARIES(rspamd-server rspamd-lua)
+TARGET_LINK_LIBRARIES(rspamd-server rspamd-json)
+TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb)
+TARGET_LINK_LIBRARIES(rspamd-server rspamd-util)
+TARGET_LINK_LIBRARIES(rspamd-server rdns)
+IF(CMAKE_COMPILER_IS_GNUCC)
+SET_TARGET_PROPERTIES(rspamd-server PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing")
+ENDIF(CMAKE_COMPILER_IS_GNUCC)
+
+IF(WITH_DB)
+ TARGET_LINK_LIBRARIES(rspamd-server db)
+ENDIF(WITH_DB)
+
+IF(OPENSSL_FOUND)
+ TARGET_LINK_LIBRARIES(rspamd-server ${OPENSSL_LIBRARIES})
+ENDIF(OPENSSL_FOUND)
+
+IF(NO_SHARED MATCHES "OFF")
+ INSTALL(TARGETS rspamd-server
+ LIBRARY DESTINATION ${LIBDIR}
+ PUBLIC_HEADER DESTINATION ${INCLUDEDIR})
+ENDIF(NO_SHARED MATCHES "OFF") \ No newline at end of file
diff --git a/src/libserver/binlog.c b/src/libserver/binlog.c
new file mode 100644
index 000000000..f085a7de0
--- /dev/null
+++ b/src/libserver/binlog.c
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "binlog.h"
+#include "cfg_file.h"
+#include "tokenizers/tokenizers.h"
+
+#define BINLOG_SUFFIX ".binlog"
+#define BACKUP_SUFFIX ".old"
+#define VALID_MAGIC { 'r', 's', 'l' }
+#define VALID_VERSION { '1', '0' }
+
+static GHashTable *binlog_opened = NULL;
+static rspamd_mempool_t *binlog_pool = NULL;
+
+static gboolean
+binlog_write_header (struct rspamd_binlog *log)
+{
+ struct rspamd_binlog_header header = {
+ .magic = VALID_MAGIC,
+ .version = VALID_VERSION,
+ .padding = { '\0', '\0' },
+ };
+
+ header.create_time = time (NULL);
+ lock_file (log->fd, FALSE);
+
+ if (write (log->fd, &header, sizeof (struct rspamd_binlog_header)) == -1) {
+ msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+
+ memcpy (&log->header, &header, sizeof (struct rspamd_binlog_header));
+
+ /* Metaindex */
+ log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex));
+ bzero (log->metaindex, sizeof (struct rspamd_binlog_metaindex));
+ /* Offset to metaindex */
+ log->metaindex->indexes[0] = sizeof (struct rspamd_binlog_metaindex) + sizeof (struct rspamd_binlog_header);
+
+ if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) {
+ g_free (log->metaindex);
+ msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno));
+ unlock_file (log->fd, FALSE);
+ return FALSE;
+ }
+
+ /* Alloc, write, mmap */
+ log->cur_idx = g_malloc (sizeof (struct rspamd_index_block));
+ bzero (log->cur_idx, sizeof (struct rspamd_index_block));
+ if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) {
+ g_free (log->cur_idx);
+ msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno));
+ unlock_file (log->fd, FALSE);
+ return FALSE;
+ }
+
+ unlock_file (log->fd, FALSE);
+
+ return TRUE;
+}
+
+static gboolean
+binlog_check_file (struct rspamd_binlog *log)
+{
+ static gchar valid_magic[] = VALID_MAGIC, valid_version[] = VALID_VERSION;
+
+ if (read (log->fd, &log->header, sizeof (struct rspamd_binlog_header)) != sizeof (struct rspamd_binlog_header)) {
+ msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+ /* Now check all fields */
+ if (memcmp (&log->header.magic, valid_magic, sizeof (valid_magic)) != 0 ||
+ memcmp (&log->header.version, valid_version, sizeof (valid_version)) != 0) {
+ msg_warn ("cannot validate file %s");
+ return FALSE;
+ }
+ /* Now mmap metaindex and current index */
+ if (log->metaindex == NULL) {
+ log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex));
+ }
+ if ((read (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex))) != sizeof (struct rspamd_binlog_metaindex)) {
+ msg_warn ("cannot read metaindex of file %s, error %d, %s", log->filename, errno, strerror (errno));
+ return FALSE;
+ }
+ /* Current index */
+ if (log->cur_idx == NULL) {
+ log->cur_idx = g_malloc (sizeof (struct rspamd_index_block));
+ }
+ if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) {
+ msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno));
+ return FALSE;
+ }
+ if ((read (log->fd, log->cur_idx, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) {
+ msg_warn ("cannot read index in file %s, error %d, %s", log->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+ log->cur_seq = log->metaindex->last_index * BINLOG_IDX_LEN + log->cur_idx->last_index;
+ log->cur_time = log->cur_idx->indexes[log->cur_idx->last_index].time;
+
+ return TRUE;
+
+}
+
+static gboolean
+binlog_create (struct rspamd_binlog *log)
+{
+ if ((log->fd = open (log->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ msg_info ("cannot create file %s, error %d, %s", log->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+ return binlog_write_header (log);
+}
+
+static gboolean
+binlog_open_real (struct rspamd_binlog *log)
+{
+ if ((log->fd = open (log->filename, O_RDWR)) == -1) {
+ msg_info ("cannot open file %s, error %d, %s", log->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+ return binlog_check_file (log);
+}
+
+
+struct rspamd_binlog*
+binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter)
+{
+ struct rspamd_binlog *new;
+ gint len = strlen (path);
+ struct stat st;
+
+ new = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_binlog));
+ new->pool = pool;
+ new->rotate_time = rotate_time;
+ new->fd = -1;
+
+ if (rotate_time) {
+ new->rotate_jitter = g_random_int_range (0, rotate_jitter);
+ }
+
+ new->filename = rspamd_mempool_alloc (pool, len + sizeof (BINLOG_SUFFIX));
+ rspamd_strlcpy (new->filename, path, len + 1);
+ rspamd_strlcpy (new->filename + len, BINLOG_SUFFIX, sizeof (BINLOG_SUFFIX));
+
+ if (stat (new->filename, &st) == -1) {
+ /* Check errno to check whether we should create this file */
+ if (errno != ENOENT) {
+ msg_err ("cannot stat file: %s, error %s", new->filename, strerror (errno));
+ return NULL;
+ }
+ else {
+ /* In case of ENOENT try to create binlog */
+ if (!binlog_create (new)) {
+ return NULL;
+ }
+ }
+ }
+ else {
+ /* Try to open binlog */
+ if (!binlog_open_real (new)) {
+ return NULL;
+ }
+ }
+
+ return new;
+}
+
+void
+binlog_close (struct rspamd_binlog *log)
+{
+ if (log) {
+ if (log->metaindex) {
+ g_free (log->metaindex);
+ }
+ if (log->cur_idx) {
+ g_free (log->cur_idx);
+ }
+ close (log->fd);
+ }
+}
+
+static gboolean
+binlog_tree_callback (gpointer key, gpointer value, gpointer data)
+{
+ token_node_t *node = key;
+ struct rspamd_binlog *log = data;
+ struct rspamd_binlog_element elt;
+
+ elt.h1 = node->h1;
+ elt.h2 = node->h2;
+ elt.value = node->value;
+
+ if (write (log->fd, &elt, sizeof (elt)) == -1) {
+ msg_info ("cannot write token to file: %s, error: %s", log->filename, strerror (errno));
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+write_binlog_tree (struct rspamd_binlog *log, GTree *nodes)
+{
+ off_t seek;
+ struct rspamd_binlog_index *idx;
+
+ lock_file (log->fd, FALSE);
+ log->cur_seq ++;
+
+ /* Seek to end of file */
+ if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) {
+ unlock_file (log->fd, FALSE);
+ msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno));
+ return FALSE;
+ }
+
+ /* Now write all nodes to file */
+ g_tree_foreach (nodes, binlog_tree_callback, (gpointer)log);
+
+ /* Write index */
+ idx = &log->cur_idx->indexes[log->cur_idx->last_index];
+ idx->seek = seek;
+ idx->time = (guint64)time (NULL);
+ log->cur_time = idx->time;
+ idx->len = g_tree_nnodes (nodes) * sizeof (struct rspamd_binlog_element);
+ if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) {
+ unlock_file (log->fd, FALSE);
+ msg_info ("cannot seek in file: %s, error: %s, seek: %L, op: insert index", log->filename,
+ strerror (errno), log->metaindex->indexes[log->metaindex->last_index]);
+ return FALSE;
+ }
+ log->cur_idx->last_index ++;
+ if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) {
+ unlock_file (log->fd, FALSE);
+ msg_info ("cannot write index to file: %s, error: %s", log->filename, strerror (errno));
+ return FALSE;
+ }
+
+ unlock_file (log->fd, FALSE);
+
+ return TRUE;
+}
+
+static gboolean
+create_new_metaindex_block (struct rspamd_binlog *log)
+{
+ off_t seek;
+
+ lock_file (log->fd, FALSE);
+
+ log->metaindex->last_index ++;
+ /* Seek to end of file */
+ if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) {
+ unlock_file (log->fd, FALSE);
+ msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno));
+ return FALSE;
+ }
+ if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) {
+ unlock_file (log->fd, FALSE);
+ g_free (log->cur_idx);
+ msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno));
+ return FALSE;
+ }
+ /* Offset to metaindex */
+ log->metaindex->indexes[log->metaindex->last_index] = seek;
+ /* Overwrite all metaindexes */
+ if (lseek (log->fd, sizeof (struct rspamd_binlog_header), SEEK_SET) == -1) {
+ unlock_file (log->fd, FALSE);
+ msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno));
+ return FALSE;
+ }
+ if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) {
+ unlock_file (log->fd, FALSE);
+ msg_info ("cannot write metaindex in file: %s, error: %s", log->filename, strerror (errno));
+ return FALSE;
+ }
+ bzero (log->cur_idx, sizeof (struct rspamd_index_block));
+ unlock_file (log->fd, FALSE);
+
+ return TRUE;
+}
+
+static gboolean
+maybe_rotate_binlog (struct rspamd_binlog *log)
+{
+ guint64 now = time (NULL);
+
+ if (log->rotate_time && ((now - log->header.create_time) > (guint)(log->rotate_time + log->rotate_jitter))) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static gboolean
+rotate_binlog (struct rspamd_binlog *log)
+{
+ gchar *backup_name;
+ struct stat st;
+
+ lock_file (log->fd, FALSE);
+
+ /* Unmap mapped fragments */
+ if (log->metaindex) {
+ g_free (log->metaindex);
+ log->metaindex = NULL;
+ }
+ if (log->cur_idx) {
+ g_free (log->cur_idx);
+ log->cur_idx = NULL;
+ }
+ /* Format backup name */
+ backup_name = g_strdup_printf ("%s.%s", log->filename, BACKUP_SUFFIX);
+
+ if (stat (backup_name, &st) != -1) {
+ msg_info ("replace old %s", backup_name);
+ unlink (backup_name);
+ }
+
+ rename (log->filename, backup_name);
+ g_free (backup_name);
+
+ /* XXX: maybe race condition here */
+ unlock_file (log->fd, FALSE);
+ close (log->fd);
+
+ return binlog_create (log);
+
+}
+
+gboolean
+binlog_insert (struct rspamd_binlog *log, GTree *nodes)
+{
+ off_t seek;
+
+ if (!log || !log->metaindex || !log->cur_idx || !nodes) {
+ msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown");
+ return FALSE;
+ }
+
+ if (maybe_rotate_binlog (log)) {
+ if (!rotate_binlog (log)) {
+ return FALSE;
+ }
+ }
+ /* First of all try to place new tokens in current index */
+ if (log->cur_idx->last_index < BINLOG_IDX_LEN) {
+ /* All is ok */
+ return write_binlog_tree (log, nodes);
+ }
+ /* Current index table is all busy, try to allocate new index */
+
+ /* Check metaindex free space */
+ if (log->metaindex->last_index < METAINDEX_LEN) {
+ /* Create new index block */
+ if ((seek = lseek (log->fd, 0, SEEK_END)) == (off_t)-1) {
+ msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno));
+ return FALSE;
+ }
+ if (!create_new_metaindex_block (log)) {
+ return FALSE;
+ }
+ return write_binlog_tree (log, nodes);
+ }
+
+ /* All binlog is filled, we need to rotate it forcefully */
+ if (!rotate_binlog (log)) {
+ return FALSE;
+ }
+
+ return write_binlog_tree (log, nodes);
+}
+
+gboolean
+binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep)
+{
+ guint32 metaindex_num;
+ struct rspamd_index_block *idxb;
+ struct rspamd_binlog_index *idx;
+ gboolean idx_mapped = FALSE, res = TRUE, is_first = FALSE;
+
+ if (!log || !log->metaindex || !log->cur_idx) {
+ msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown");
+ return FALSE;
+ }
+
+ if (*rep == NULL) {
+ *rep = g_malloc (sizeof (GByteArray));
+ is_first = TRUE;
+ }
+ else {
+ /* Unmap old fragment */
+ g_free ((*rep)->data);
+ }
+
+ if (from_rev == log->cur_seq) {
+ /* Last record */
+ *rep = NULL;
+ return FALSE;
+ }
+ else if (from_rev > log->cur_seq) {
+ /* Slave has more actual copy, write this to log and abort sync */
+ msg_warn ("slave has more recent revision of statfile %s: %uL and our is: %uL", log->filename, from_rev, log->cur_seq);
+ *rep = NULL;
+ *from_time = 0;
+ return FALSE;
+ }
+
+ metaindex_num = from_rev / BINLOG_IDX_LEN;
+ /* First of all try to find this revision */
+ if (metaindex_num > log->metaindex->last_index) {
+ return FALSE;
+ }
+ else if (metaindex_num != log->metaindex->last_index) {
+ /* Need to remap index block */
+ lock_file (log->fd, FALSE);
+ idxb = g_malloc (sizeof (struct rspamd_index_block));
+ idx_mapped = TRUE;
+ if (lseek (log->fd, log->metaindex->indexes[metaindex_num], SEEK_SET) == -1) {
+ unlock_file (log->fd, FALSE);
+ msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno));
+ res = FALSE;
+ goto end;
+ }
+ if ((read (log->fd, idxb, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) {
+ unlock_file (log->fd, FALSE);
+ msg_warn ("cannot read index from file %s, error %d, %s", log->filename, errno, strerror (errno));
+ res = FALSE;
+ goto end;
+ }
+ unlock_file (log->fd, FALSE);
+ }
+ else {
+ idxb = log->cur_idx;
+ }
+ /* Now check specified index */
+ idx = &idxb->indexes[from_rev % BINLOG_IDX_LEN];
+ if (is_first && idx->time != *from_time) {
+ res = FALSE;
+ *from_time = 0;
+ goto end;
+ }
+ else {
+ *from_time = idx->time;
+ }
+
+ /* Now fill reply structure */
+ (*rep)->len = idx->len;
+ /* Read result */
+ msg_info ("update from binlog '%s' from revision: %uL to revision %uL size is %uL",
+ log->filename, from_rev, log->cur_seq, idx->len);
+ if (lseek (log->fd, idx->seek, SEEK_SET) == -1) {
+ msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno));
+ res = FALSE;
+ goto end;
+ }
+
+ (*rep)->data = g_malloc (idx->len);
+ if ((read (log->fd, (*rep)->data, idx->len)) != (ssize_t)idx->len) {
+ msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno));
+ res = FALSE;
+ goto end;
+ }
+
+end:
+ if (idx_mapped) {
+ g_free (idxb);
+ }
+
+ return res;
+}
+
+static gboolean
+maybe_init_static (void)
+{
+ if (!binlog_opened) {
+ binlog_opened = g_hash_table_new (g_direct_hash, g_direct_equal);
+ if (!binlog_opened) {
+ return FALSE;
+ }
+ }
+
+ if (!binlog_pool) {
+ binlog_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ if (!binlog_pool) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes)
+{
+ struct rspamd_binlog *log;
+
+ if (ccf == NULL) {
+ return FALSE;
+ }
+
+
+ if (st == NULL || nodes == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) {
+ return FALSE;
+ }
+
+ if (!maybe_init_static ()) {
+ return FALSE;
+ }
+
+ if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) {
+ if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) {
+ g_hash_table_insert (binlog_opened, st, log);
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ if (binlog_insert (log, nodes)) {
+ msg_info ("set new revision of statfile %s: %uL", st->symbol, log->cur_seq);
+ (void)statfile_set_revision (file, log->cur_seq, log->cur_time);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+struct rspamd_binlog*
+get_binlog_by_statfile (struct statfile *st)
+{
+ struct rspamd_binlog *log;
+
+ if (st == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) {
+ return NULL;
+ }
+
+ if (!maybe_init_static ()) {
+ return NULL;
+ }
+
+ if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) {
+ if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) {
+ g_hash_table_insert (binlog_opened, st, log);
+ }
+ else {
+ return NULL;
+ }
+ }
+
+ return log;
+}
diff --git a/src/libserver/binlog.h b/src/libserver/binlog.h
new file mode 100644
index 000000000..9e1a786d3
--- /dev/null
+++ b/src/libserver/binlog.h
@@ -0,0 +1,93 @@
+#ifndef RSPAMD_BINLOG_H
+#define RSPAMD_BINLOG_H
+
+#include "config.h"
+#include "main.h"
+#include "statfile.h"
+
+/* How much records are in a single index */
+#define BINLOG_IDX_LEN 200
+#define METAINDEX_LEN 1024
+
+/* Assume 8 bytes words */
+struct rspamd_binlog_header {
+ gchar magic[3];
+ gchar version[2];
+ gchar padding[3];
+ guint64 create_time;
+};
+
+struct rspamd_binlog_index {
+ guint64 time;
+ guint64 seek;
+ guint32 len;
+};
+
+struct rspamd_index_block {
+ struct rspamd_binlog_index indexes[BINLOG_IDX_LEN];
+ guint32 last_index;
+};
+
+struct rspamd_binlog_metaindex {
+ guint64 indexes[METAINDEX_LEN];
+ guint64 last_index;
+};
+
+struct rspamd_binlog_element {
+ guint32 h1;
+ guint32 h2;
+ float value;
+} __attribute__((__packed__));
+
+struct rspamd_binlog {
+ gchar *filename;
+ time_t rotate_time;
+ gint rotate_jitter;
+ guint64 cur_seq;
+ guint64 cur_time;
+ gint fd;
+ rspamd_mempool_t *pool;
+
+ struct rspamd_binlog_header header;
+ struct rspamd_binlog_metaindex *metaindex;
+ struct rspamd_index_block *cur_idx;
+};
+
+struct classifier_config;
+
+/*
+ * Open binlog at specified path with specified rotate params
+ */
+struct rspamd_binlog* binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter);
+
+/*
+ * Get and open binlog for specified statfile
+ */
+struct rspamd_binlog* get_binlog_by_statfile (struct statfile *st);
+
+/*
+ * Close binlog
+ */
+void binlog_close (struct rspamd_binlog *log);
+
+/*
+ * Insert new nodes inside binlog
+ */
+gboolean binlog_insert (struct rspamd_binlog *log, GTree *nodes);
+
+/*
+ * Sync binlog from specified revision
+ * @param log binlog structure
+ * @param from_rev from revision
+ * @param from_time from time
+ * @param rep a portion of changes for revision is stored here
+ * @return TRUE if there are more revisions to get and FALSE if synchronization is complete
+ */
+gboolean binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep);
+
+/*
+ * Conditional write to a binlog for specified statfile
+ */
+gboolean maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes);
+
+#endif
diff --git a/src/libserver/buffer.c b/src/libserver/buffer.c
new file mode 100644
index 000000000..864f2fad6
--- /dev/null
+++ b/src/libserver/buffer.c
@@ -0,0 +1,786 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "buffer.h"
+#include "main.h"
+#ifdef HAVE_SYS_SENDFILE_H
+#include <sys/sendfile.h>
+#endif
+
+#define G_DISPATCHER_ERROR dispatcher_error_quark()
+#define debug_ip(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__)
+
+static void dispatcher_cb (gint fd, short what, void *arg);
+
+static inline GQuark
+dispatcher_error_quark (void)
+{
+ return g_quark_from_static_string ("g-dispatcher-error-quark");
+}
+
+static gboolean
+sendfile_callback (rspamd_io_dispatcher_t *d)
+{
+
+ GError *err;
+
+#ifdef HAVE_SENDFILE
+# if defined(FREEBSD) || defined(DARWIN)
+ off_t off = 0;
+ #if defined(FREEBSD)
+ /* FreeBSD version */
+ if (sendfile (d->sendfile_fd, d->fd, d->offset, 0, NULL, &off, 0) != 0) {
+ #elif defined(DARWIN)
+ /* Darwin version */
+ if (sendfile (d->sendfile_fd, d->fd, d->offset, &off, NULL, 0) != 0) {
+ #endif
+ if (errno != EAGAIN) {
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno));
+ d->err_callback (err, d->user_data);
+ return FALSE;
+ }
+ }
+ else {
+ debug_ip("partially write data, retry");
+ /* Wait for other event */
+ d->offset += off;
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ }
+ }
+ else {
+ if (d->write_callback) {
+ if (!d->write_callback (d->user_data)) {
+ debug_ip("callback set wanna_die flag, terminating");
+ return FALSE;
+ }
+ }
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ d->in_sendfile = FALSE;
+ }
+# else
+ ssize_t r;
+ /* Linux version */
+ r = sendfile (d->fd, d->sendfile_fd, &d->offset, d->file_size);
+ if (r == -1) {
+ if (errno != EAGAIN) {
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno));
+ d->err_callback (err, d->user_data);
+ return FALSE;
+ }
+ }
+ else {
+ debug_ip("partially write data, retry");
+ /* Wait for other event */
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ }
+ }
+ else if (r + d->offset < (ssize_t)d->file_size) {
+ debug_ip("partially write data, retry");
+ /* Wait for other event */
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ }
+ else {
+ if (d->write_callback) {
+ if (!d->write_callback (d->user_data)) {
+ debug_ip("callback set wanna_die flag, terminating");
+ return FALSE;
+ }
+ }
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ d->in_sendfile = FALSE;
+ }
+# endif
+#else
+ ssize_t r;
+ r = write (d->fd, d->map, d->file_size - d->offset);
+ if (r == -1) {
+ if (errno != EAGAIN) {
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno));
+ d->err_callback (err, d->user_data);
+ return FALSE;
+ }
+ }
+ else {
+ debug_ip("partially write data, retry");
+ /* Wait for other event */
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ }
+ }
+ else if (r + d->offset < d->file_size) {
+ d->offset += r;
+ debug_ip("partially write data, retry");
+ /* Wait for other event */
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ }
+ else {
+ if (d->write_callback) {
+ if (!d->write_callback (d->user_data)) {
+ debug_ip("callback set wanna_die flag, terminating");
+ return FALSE;
+ }
+ }
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ d->in_sendfile = FALSE;
+ }
+#endif
+ return TRUE;
+}
+
+#define BUFREMAIN(x) (x)->data->size - ((x)->pos - (x)->data->begin)
+
+#define APPEND_OUT_BUFFER(d, buf) do { \
+ DL_APPEND((d)->out_buffers.buffers, buf); \
+ (d)->out_buffers.pending ++; \
+ } while (0)
+#define DELETE_OUT_BUFFER(d, buf) do { \
+ DL_DELETE((d)->out_buffers.buffers, (buf)); \
+ g_string_free((buf->data), (buf)->allocated); \
+ g_slice_free1(sizeof (struct rspamd_out_buffer_s), (buf)); \
+ (d)->out_buffers.pending --; \
+ } while (0)
+
+static gboolean
+write_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean is_delayed)
+{
+ GError *err = NULL;
+ struct rspamd_out_buffer_s *cur = NULL, *tmp;
+ ssize_t r;
+ struct iovec *iov;
+ guint i, len;
+
+ len = d->out_buffers.pending;
+ while (len > 0) {
+ /* Unset delayed as actually we HAVE buffers to write */
+ is_delayed = TRUE;
+ iov = g_slice_alloc (len * sizeof (struct iovec));
+ i = 0;
+ DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) {
+ iov[i].iov_base = cur->data->str;
+ iov[i].iov_len = cur->data->len;
+ i ++;
+ }
+ /* Now try to write the whole vector */
+ r = writev (fd, iov, len);
+ if (r == -1 && errno != EAGAIN) {
+ g_slice_free1 (len * sizeof (struct iovec), iov);
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno));
+ d->err_callback (err, d->user_data);
+ return FALSE;
+ }
+ }
+ else if (r > 0) {
+ /* Find pos inside buffers */
+ DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) {
+ if (r >= (ssize_t)cur->data->len) {
+ /* Mark this buffer as read */
+ r -= cur->data->len;
+ DELETE_OUT_BUFFER (d, cur);
+ }
+ else {
+ /* This buffer was not written completely */
+ g_string_erase (cur->data, 0, r);
+ break;
+ }
+ }
+ g_slice_free1 (len * sizeof (struct iovec), iov);
+ if (d->out_buffers.pending > 0) {
+ /* Wait for other event */
+ event_del (d->ev);
+ event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ return TRUE;
+ }
+ }
+ else if (r == 0) {
+ /* Got EOF while we wait for data */
+ g_slice_free1 (len * sizeof (struct iovec), iov);
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF");
+ d->err_callback (err, d->user_data);
+ return FALSE;
+ }
+ }
+ else if (r == -1 && errno == EAGAIN) {
+ g_slice_free1 (len * sizeof (struct iovec), iov);
+ debug_ip("partially write data, retry");
+ /* Wait for other event */
+ event_del (d->ev);
+ event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ return TRUE;
+ }
+ len = d->out_buffers.pending;
+ }
+
+ if (d->out_buffers.pending == 0) {
+ /* Disable write event for this time */
+
+ debug_ip ("all buffers were written successfully");
+
+ if (is_delayed && d->write_callback) {
+ if (!d->write_callback (d->user_data)) {
+ debug_ip("callback set wanna_die flag, terminating");
+ return FALSE;
+ }
+ }
+
+ event_del (d->ev);
+ event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ }
+ else {
+ /* Plan other write event */
+ event_del (d->ev);
+ event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ }
+
+ return TRUE;
+}
+
+static void
+read_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean skip_read)
+{
+ ssize_t r;
+ GError *err = NULL;
+ f_str_t res;
+ gchar *c, *b;
+ gchar *end;
+ size_t len;
+ enum io_policy saved_policy;
+
+ if (d->wanna_die) {
+ rspamd_remove_dispatcher (d);
+ return;
+ }
+
+ if (d->in_buf == NULL) {
+ d->in_buf = rspamd_mempool_alloc_tmp (d->pool, sizeof (rspamd_buffer_t));
+ if (d->policy == BUFFER_LINE || d->policy == BUFFER_ANY) {
+ d->in_buf->data = fstralloc_tmp (d->pool, d->default_buf_size);
+ }
+ else {
+ d->in_buf->data = fstralloc_tmp (d->pool, d->nchars + 1);
+ }
+ d->in_buf->pos = d->in_buf->data->begin;
+ }
+
+ end = d->in_buf->pos;
+ len = d->in_buf->data->len;
+
+ if (BUFREMAIN (d->in_buf) == 0) {
+ /* Buffer is full, try to call callback with overflow error */
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, E2BIG, "buffer overflow");
+ d->err_callback (err, d->user_data);
+ return;
+ }
+ }
+ else if (!skip_read) {
+ /* Try to read the whole buffer */
+ r = read (fd, end, BUFREMAIN (d->in_buf));
+ if (r == -1 && errno != EAGAIN) {
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno));
+ d->err_callback (err, d->user_data);
+ return;
+ }
+ }
+ else if (r == 0) {
+ /* Got EOF while we wait for data */
+#if 0
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF");
+ d->err_callback (err, d->user_data);
+ return;
+ }
+#endif
+ /* Read returned 0, it may be shutdown or full quit */
+ if (!d->want_read) {
+ d->half_closed = TRUE;
+ /* Do not expect any read after this */
+ event_del (d->ev);
+ }
+ else {
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF");
+ d->err_callback (err, d->user_data);
+ return;
+ }
+ }
+ }
+ else if (r == -1 && errno == EAGAIN) {
+ debug_ip("partially read data, retry");
+ return;
+ }
+ else {
+ /* Set current position in buffer */
+ d->in_buf->pos += r;
+ d->in_buf->data->len += r;
+ }
+ debug_ip("read %z characters, policy is %s, watermark is: %z, buffer has %z bytes", r,
+ d->policy == BUFFER_LINE ? "LINE" : "CHARACTER", d->nchars, d->in_buf->data->len);
+ }
+
+ saved_policy = d->policy;
+ c = d->in_buf->data->begin;
+ end = d->in_buf->pos;
+ len = d->in_buf->data->len;
+ b = c;
+ r = 0;
+
+ switch (d->policy) {
+ case BUFFER_LINE:
+ /** Variables:
+ * b - begin of line
+ * r - current position in buffer
+ * *len - length of remaining buffer
+ * c - pointer to current position (buffer->begin + r)
+ * res - result string
+ */
+ while (r < (ssize_t)len) {
+ if (*c == '\n') {
+ res.begin = b;
+ res.len = c - b;
+ /* Strip EOL */
+ if (d->strip_eol) {
+ if (r != 0 && *(c - 1) == '\r') {
+ res.len--;
+ }
+ }
+ else {
+ /* Include EOL in reply */
+ res.len ++;
+ }
+ /* Call callback for a line */
+ if (d->read_callback) {
+ if (!d->read_callback (&res, d->user_data)) {
+ return;
+ }
+ if (d->policy != saved_policy) {
+ /* Drain buffer as policy is changed */
+ /* Note that d->in_buffer is other pointer now, so we need to reinit all pointers */
+ /* First detect how much symbols do we have */
+ if (end == c) {
+ /* In fact we read the whole buffer and change input policy, so just set current pos to begin of buffer */
+ d->in_buf->pos = d->in_buf->data->begin;
+ d->in_buf->data->len = 0;
+ }
+ else {
+ /* Otherwise we need to move buffer */
+ /* Reinit pointers */
+ len = d->in_buf->data->len - r - 1;
+ end = d->in_buf->data->begin + r + 1;
+ memmove (d->in_buf->data->begin, end, len);
+ d->in_buf->data->len = len;
+ d->in_buf->pos = d->in_buf->data->begin + len;
+ /* Process remaining buffer */
+ read_buffers (fd, d, TRUE);
+ }
+ return;
+ }
+ }
+ /* Set new begin of line */
+ b = c + 1;
+ }
+ r++;
+ c++;
+ }
+ /* Now drain remaining characters in buffer */
+ memmove (d->in_buf->data->begin, b, c - b);
+ d->in_buf->data->len = c - b;
+ d->in_buf->pos = d->in_buf->data->begin + (c - b);
+ break;
+ case BUFFER_CHARACTER:
+ r = d->nchars;
+ if ((ssize_t)len >= r) {
+ res.begin = b;
+ res.len = r;
+ c = b + r;
+ if (d->read_callback) {
+ if (!d->read_callback (&res, d->user_data)) {
+ return;
+ }
+ /* Move remaining string to begin of buffer (draining) */
+ if ((ssize_t)len > r) {
+ len -= r;
+ memmove (d->in_buf->data->begin, c, len);
+ d->in_buf->data->len = len;
+ d->in_buf->pos = d->in_buf->data->begin + len;
+ b = d->in_buf->data->begin;
+ }
+ else {
+ d->in_buf->data->len = 0;
+ d->in_buf->pos = d->in_buf->data->begin;
+ }
+ if (d->policy != saved_policy && (ssize_t)len != r) {
+ debug_ip("policy changed during callback, restart buffer's processing");
+ read_buffers (fd, d, TRUE);
+ return;
+ }
+ }
+ }
+ break;
+ case BUFFER_ANY:
+ res.begin = d->in_buf->data->begin;
+ res.len = len;
+
+ if (d->read_callback) {
+ /*
+ * Actually we do not want to send zero sized
+ * buffers to a read callback
+ */
+ if (! (d->want_read && res.len == 0)) {
+ if (!d->read_callback (&res, d->user_data)) {
+ return;
+ }
+ }
+ if (d->policy != saved_policy) {
+ debug_ip("policy changed during callback, restart buffer's processing");
+ read_buffers (fd, d, TRUE);
+ return;
+ }
+ }
+ d->in_buf->pos = d->in_buf->data->begin;
+ d->in_buf->data->len = 0;
+ break;
+ }
+}
+
+#undef BUFREMAIN
+
+static void
+dispatcher_cb (gint fd, short what, void *arg)
+{
+ rspamd_io_dispatcher_t *d = (rspamd_io_dispatcher_t *) arg;
+ GError *err = NULL;
+
+ debug_ip("in dispatcher callback, what: %d, fd: %d", (gint)what, fd);
+
+ if ((what & EV_TIMEOUT) != 0) {
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, ETIMEDOUT, "IO timeout");
+ d->err_callback (err, d->user_data);
+ }
+ }
+ else if ((what & EV_READ) != 0) {
+ read_buffers (fd, d, FALSE);
+ }
+ else if ((what & EV_WRITE) != 0) {
+ /* No data to write, disable further EV_WRITE to this fd */
+ if (d->in_sendfile) {
+ sendfile_callback (d);
+ }
+ else {
+ if (d->out_buffers.pending == 0) {
+ if (d->half_closed && !d->is_restored) {
+ /* Socket is half closed and there is nothing more to write, closing connection */
+ if (d->err_callback) {
+ err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF");
+ d->err_callback (err, d->user_data);
+ return;
+ }
+ }
+ else {
+ /* Want read again */
+ event_del (d->ev);
+ event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ if (d->is_restored && d->write_callback) {
+ if (!d->write_callback (d->user_data)) {
+ return;
+ }
+ d->is_restored = FALSE;
+ }
+ }
+ }
+ else {
+ /* Delayed write */
+ write_buffers (fd, d, TRUE);
+ }
+ }
+ }
+}
+
+
+rspamd_io_dispatcher_t *
+rspamd_create_dispatcher (struct event_base *base, gint fd, enum io_policy policy,
+ dispatcher_read_callback_t read_cb, dispatcher_write_callback_t write_cb, dispatcher_err_callback_t err_cb, struct timeval *tv, void *user_data)
+{
+ rspamd_io_dispatcher_t *new;
+
+ if (fd == -1) {
+ return NULL;
+ }
+
+ new = g_slice_alloc0 (sizeof (rspamd_io_dispatcher_t));
+
+ new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ if (tv != NULL) {
+ new->tv = rspamd_mempool_alloc (new->pool, sizeof (struct timeval));
+ memcpy (new->tv, tv, sizeof (struct timeval));
+ }
+ else {
+ new->tv = NULL;
+ }
+ new->nchars = 0;
+ new->in_sendfile = FALSE;
+ new->policy = policy;
+ new->read_callback = read_cb;
+ new->write_callback = write_cb;
+ new->err_callback = err_cb;
+ new->user_data = user_data;
+ new->strip_eol = TRUE;
+ new->half_closed = FALSE;
+ new->want_read = TRUE;
+ new->is_restored = FALSE;
+ new->default_buf_size = sysconf (_SC_PAGESIZE);
+
+ new->ev = rspamd_mempool_alloc0 (new->pool, sizeof (struct event));
+ new->fd = fd;
+ new->ev_base = base;
+
+ event_set (new->ev, fd, EV_WRITE, dispatcher_cb, (void *)new);
+ event_base_set (new->ev_base, new->ev);
+ event_add (new->ev, new->tv);
+
+ return new;
+}
+
+void
+rspamd_remove_dispatcher (rspamd_io_dispatcher_t * d)
+{
+ struct rspamd_out_buffer_s *cur, *tmp;
+
+ if (d != NULL) {
+ DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) {
+ DELETE_OUT_BUFFER (d, cur);
+ }
+ event_del (d->ev);
+ rspamd_mempool_delete (d->pool);
+ g_slice_free1 (sizeof (rspamd_io_dispatcher_t), d);
+ }
+}
+
+void
+rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t * d, enum io_policy policy, size_t nchars)
+{
+ f_str_t *tmp;
+ gint t;
+
+ if (d->policy != policy || nchars != d->nchars) {
+ d->policy = policy;
+ d->nchars = nchars ? nchars : d->default_buf_size;
+ /* Resize input buffer if needed */
+ if (policy == BUFFER_CHARACTER && nchars != 0) {
+ if (d->in_buf && d->in_buf->data->size < nchars) {
+ tmp = fstralloc_tmp (d->pool, d->nchars + 1);
+ memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len);
+ t = d->in_buf->pos - d->in_buf->data->begin;
+ tmp->len = d->in_buf->data->len;
+ d->in_buf->data = tmp;
+ d->in_buf->pos = d->in_buf->data->begin + t;
+ }
+ }
+ else if (policy == BUFFER_LINE || policy == BUFFER_ANY) {
+ if (d->in_buf && d->nchars < d->default_buf_size) {
+ tmp = fstralloc_tmp (d->pool, d->default_buf_size);
+ memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len);
+ t = d->in_buf->pos - d->in_buf->data->begin;
+ tmp->len = d->in_buf->data->len;
+ d->in_buf->data = tmp;
+ d->in_buf->pos = d->in_buf->data->begin + t;
+ }
+ d->strip_eol = TRUE;
+ }
+ }
+
+ debug_ip("new input length watermark is %uz", d->nchars);
+}
+
+gboolean
+rspamd_dispatcher_write (rspamd_io_dispatcher_t * d,
+ const void *data, size_t len, gboolean delayed, gboolean allocated)
+{
+ struct rspamd_out_buffer_s *newbuf;
+
+ newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s));
+ if (len == 0) {
+ /* Assume NULL terminated */
+ len = strlen ((const gchar *)data);
+ }
+
+ if (!allocated) {
+ newbuf->data = g_string_new_len (data, len);
+ newbuf->allocated = TRUE;
+ }
+ else {
+ newbuf->data = g_string_new (NULL);
+ newbuf->data->str = (gchar *)data;
+ newbuf->data->len = len;
+ newbuf->data->allocated_len = len;
+ newbuf->allocated = FALSE;
+ }
+
+ APPEND_OUT_BUFFER (d, newbuf);
+
+ if (!delayed) {
+ debug_ip("plan write event");
+ return write_buffers (d->fd, d, FALSE);
+ }
+ /* Otherwise plan write event */
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+
+ return TRUE;
+}
+
+gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d,
+ GString *str,
+ gboolean delayed,
+ gboolean free_on_write)
+{
+ struct rspamd_out_buffer_s *newbuf;
+
+ newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s));
+ newbuf->data = str;
+ newbuf->allocated = free_on_write;
+
+ APPEND_OUT_BUFFER (d, newbuf);
+
+ if (!delayed) {
+ debug_ip("plan write event");
+ return write_buffers (d->fd, d, FALSE);
+ }
+ /* Otherwise plan write event */
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+
+ return TRUE;
+}
+
+gboolean
+rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len)
+{
+ if (lseek (fd, 0, SEEK_SET) == -1) {
+ msg_warn ("lseek failed: %s", strerror (errno));
+ return FALSE;
+ }
+
+ d->offset = 0;
+ d->in_sendfile = TRUE;
+ d->sendfile_fd = fd;
+ d->file_size = len;
+
+#ifndef HAVE_SENDFILE
+ #ifdef HAVE_MMAP_NOCORE
+ if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED | MAP_NOCORE, fd, 0)) == MAP_FAILED) {
+ #else
+ if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ #endif
+ msg_warn ("mmap failed: %s", strerror (errno));
+ return FALSE;
+ }
+#endif
+
+ return sendfile_callback (d);
+}
+
+void
+rspamd_dispatcher_pause (rspamd_io_dispatcher_t * d)
+{
+ debug_ip ("paused dispatcher");
+ event_del (d->ev);
+ d->is_restored = FALSE;
+}
+
+void
+rspamd_dispatcher_restore (rspamd_io_dispatcher_t * d)
+{
+ if (!d->is_restored) {
+ debug_ip ("restored dispatcher");
+ event_del (d->ev);
+ event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, d);
+ event_base_set (d->ev_base, d->ev);
+ event_add (d->ev, d->tv);
+ d->is_restored = TRUE;
+ }
+}
+
+void
+rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *d)
+{
+ struct rspamd_out_buffer_s *cur, *tmp;
+
+ DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) {
+ DELETE_OUT_BUFFER (d, cur);
+ }
+ /* Cleanup temporary data */
+ rspamd_mempool_cleanup_tmp (d->pool);
+ d->in_buf = NULL;
+}
+
+#undef debug_ip
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libserver/buffer.h b/src/libserver/buffer.h
new file mode 100644
index 000000000..5ed42bfb3
--- /dev/null
+++ b/src/libserver/buffer.h
@@ -0,0 +1,158 @@
+/**
+ * @file buffer.h
+ * Implements buffered IO
+ */
+
+#ifndef RSPAMD_BUFFER_H
+#define RSPAMD_BUFFER_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "fstring.h"
+
+typedef gboolean (*dispatcher_read_callback_t)(f_str_t *in, void *user_data);
+typedef gboolean (*dispatcher_write_callback_t)(void *user_data);
+typedef void (*dispatcher_err_callback_t)(GError *err, void *user_data);
+
+/**
+ * Types of IO handling
+ */
+enum io_policy {
+ BUFFER_LINE, /**< call handler when we have line ready */
+ BUFFER_CHARACTER, /**< call handler when we have some characters */
+ BUFFER_ANY /**< call handler whenever we got data in buffer */
+};
+
+/**
+ * Buffer structure
+ */
+typedef struct rspamd_buffer_s {
+ f_str_t *data; /**< buffer logic */
+ gchar *pos; /**< current position */
+} rspamd_buffer_t;
+
+struct rspamd_out_buffer_s {
+ GString *data;
+ gboolean allocated;
+ struct rspamd_out_buffer_s *prev, *next;
+};
+
+typedef struct rspamd_io_dispatcher_s {
+ rspamd_buffer_t *in_buf; /**< input buffer */
+ struct {
+ guint pending;
+ struct rspamd_out_buffer_s *buffers;
+ } out_buffers; /**< output buffers chain */
+ struct timeval *tv; /**< io timeout */
+ struct event *ev; /**< libevent io event */
+ rspamd_mempool_t *pool; /**< where to store data */
+ enum io_policy policy; /**< IO policy */
+ size_t nchars; /**< how many chars to read */
+ gint fd; /**< descriptor */
+ guint32 peer_addr; /**< address of peer for debugging */
+ gboolean wanna_die; /**< if dispatcher should be stopped */
+ dispatcher_read_callback_t read_callback; /**< read callback */
+ dispatcher_write_callback_t write_callback; /**< write callback */
+ dispatcher_err_callback_t err_callback; /**< error callback */
+ void *user_data; /**< user's data for callbacks */
+ gulong default_buf_size; /**< default size for buffering */
+ off_t offset; /**< for sendfile use */
+ size_t file_size;
+ gint sendfile_fd;
+ gboolean in_sendfile; /**< whether buffer is in sendfile mode */
+ gboolean strip_eol; /**< strip or not line ends in BUFFER_LINE policy */
+ gboolean is_restored; /**< call a callback when dispatcher is restored */
+ gboolean half_closed; /**< connection is half closed */
+ gboolean want_read; /**< whether we want to read more data */
+ struct event_base *ev_base; /**< event base for io operations */
+#ifndef HAVE_SENDFILE
+ void *map;
+#endif
+} rspamd_io_dispatcher_t;
+
+/**
+ * Creates rspamd IO dispatcher for specified descriptor
+ * @param fd descriptor to IO
+ * @param policy IO policy
+ * @param read_cb read callback handler
+ * @param write_cb write callback handler
+ * @param err_cb error callback handler
+ * @param tv IO timeout
+ * @param user_data pointer to user's data
+ * @return new dispatcher object or NULL in case of failure
+ */
+rspamd_io_dispatcher_t* rspamd_create_dispatcher (struct event_base *base, gint fd,
+ enum io_policy policy,
+ dispatcher_read_callback_t read_cb,
+ dispatcher_write_callback_t write_cb,
+ dispatcher_err_callback_t err_cb,
+ struct timeval *tv,
+ void *user_data);
+
+/**
+ * Set new policy for dispatcher
+ * @param d pointer to dispatcher's object
+ * @param policy IO policy
+ * @param nchars number of characters in buffer for character policy
+ */
+void rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t *d,
+ enum io_policy policy,
+ size_t nchars);
+
+/**
+ * Write data when it would be possible
+ * @param d pointer to dispatcher's object
+ * @param data data to write
+ * @param len length of data
+ */
+gboolean rspamd_dispatcher_write (rspamd_io_dispatcher_t *d,
+ const void *data,
+ size_t len, gboolean delayed,
+ gboolean allocated) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Write a GString to dispatcher
+ * @param d dipatcher object
+ * @param str string to write
+ * @param delayed delay write
+ * @param free_on_write free string after writing to a socket
+ * @return TRUE if write has been queued successfully
+ */
+gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d,
+ GString *str,
+ gboolean delayed,
+ gboolean free_on_write) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Send specified descriptor to dispatcher
+ * @param d pointer to dispatcher's object
+ * @param fd descriptor of file
+ * @param len length of data
+ */
+gboolean rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len) G_GNUC_WARN_UNUSED_RESULT;
+
+/**
+ * Pause IO events on dispatcher
+ * @param d pointer to dispatcher's object
+ */
+void rspamd_dispatcher_pause (rspamd_io_dispatcher_t *d);
+
+/**
+ * Restore IO events on dispatcher
+ * @param d pointer to dispatcher's object
+ */
+void rspamd_dispatcher_restore (rspamd_io_dispatcher_t *d);
+
+/**
+ * Frees dispatcher object
+ * @param dispatcher pointer to dispatcher's object
+ */
+void rspamd_remove_dispatcher (rspamd_io_dispatcher_t *dispatcher);
+
+/**
+ * Cleanup dispatcher freeing all temporary data
+ * @param dispatcher pointer to dispatcher's object
+ */
+void rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *dispatcher);
+
+#endif
diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
new file mode 100644
index 000000000..6ecb441fd
--- /dev/null
+++ b/src/libserver/cfg_file.h
@@ -0,0 +1,516 @@
+/**
+ * @file cfg_file.h
+ * Config file parser and config routines API
+ */
+
+#ifndef CFG_FILE_H
+#define CFG_FILE_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "upstream.h"
+#include "memcached.h"
+#include "symbols_cache.h"
+#include "cfg_rcl.h"
+#include "utlist.h"
+#include "ucl.h"
+
+#define DEFAULT_BIND_PORT 11333
+#define DEFAULT_CONTROL_PORT 11334
+#define MAX_MEMCACHED_SERVERS 4
+#define DEFAULT_MEMCACHED_PORT 11211
+/* Memcached timeouts */
+#define DEFAULT_MEMCACHED_CONNECT_TIMEOUT 1000
+/* Upstream timeouts */
+#define DEFAULT_UPSTREAM_ERROR_TIME 10
+#define DEFAULT_UPSTREAM_ERROR_TIME 10
+#define DEFAULT_UPSTREAM_DEAD_TIME 300
+#define DEFAULT_UPSTREAM_MAXERRORS 10
+
+struct expression;
+struct tokenizer;
+struct classifier;
+
+enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE };
+
+/**
+ * Type of time configuration parameter
+ */
+enum time_type {
+ TIME_SECONDS = 0,
+ TIME_MILLISECONDS,
+ TIME_MINUTES,
+ TIME_HOURS
+};
+/**
+ * Types of rspamd bind lines
+ */
+enum rspamd_cred_type {
+ CRED_NORMAL,
+ CRED_CONTROL,
+ CRED_LMTP,
+ CRED_DELIVERY
+};
+
+/**
+ * Regexp type: /H - header, /M - mime, /U - url /X - raw header
+ */
+enum rspamd_regexp_type {
+ REGEXP_NONE = 0,
+ REGEXP_HEADER,
+ REGEXP_MIME,
+ REGEXP_MESSAGE,
+ REGEXP_URL,
+ REGEXP_RAW_HEADER
+};
+
+/**
+ * Logging type
+ */
+enum rspamd_log_type {
+ RSPAMD_LOG_CONSOLE,
+ RSPAMD_LOG_SYSLOG,
+ RSPAMD_LOG_FILE
+};
+
+/**
+ * Regexp structure
+ */
+struct rspamd_regexp {
+ enum rspamd_regexp_type type; /**< regexp type */
+ gchar *regexp_text; /**< regexp text representation */
+ GRegex *regexp; /**< glib regexp structure */
+ GRegex *raw_regexp; /**< glib regexp structure for raw matching */
+ gchar *header; /**< header name for header regexps */
+ gboolean is_test; /**< true if this expression must be tested */
+ gboolean is_raw; /**< true if this regexp is done by raw matching */
+ gboolean is_strong; /**< true if headers search must be case sensitive */
+};
+
+/**
+ * Memcached server object
+ */
+struct memcached_server {
+ struct upstream up; /**< common upstream base */
+ struct in_addr addr; /**< address of server */
+ guint16 port; /**< port to connect */
+ short alive; /**< is this server alive */
+ gint16 num; /**< number of servers in case of mirror */
+};
+
+/**
+ * script module list item
+ */
+struct script_module {
+ gchar *name; /**< name of module */
+ gchar *path; /**< path to module */
+};
+
+/**
+ * Type of lua variable
+ */
+enum lua_var_type {
+ LUA_VAR_NUM,
+ LUA_VAR_BOOLEAN,
+ LUA_VAR_STRING,
+ LUA_VAR_FUNCTION,
+ LUA_VAR_UNKNOWN
+};
+/**
+ * Module option
+ */
+struct module_opt {
+ gchar *param; /**< parameter name */
+ gchar *value; /**< parameter value */
+ gchar *description; /**< parameter description */
+ gchar *group; /**< parameter group */
+ gpointer actual_data; /**< parsed data */
+ gboolean is_lua; /**< actually this is lua variable */
+ enum lua_var_type lua_type; /**< type of lua variable */
+};
+
+struct module_meta_opt {
+ gchar *name; /**< Name of meta option */
+ GList *options; /**< List of struct module_opt */
+};
+
+/**
+ * Symbol definition
+ */
+struct symbol_def {
+ gchar *name;
+ gchar *description;
+ gdouble *weight_ptr;
+};
+
+/**
+ * Symbols group
+ */
+struct symbols_group {
+ gchar *name;
+ GList *symbols;
+};
+
+/**
+ * Statfile section definition
+ */
+struct statfile_section {
+ guint32 code; /**< section's code */
+ guint64 size; /**< size of section */
+ double weight; /**< weight coefficient for section */
+};
+
+/**
+ * Statfile autolearn parameters
+ */
+struct statfile_autolearn_params {
+ const gchar *metric; /**< metric name for autolearn triggering */
+ double threshold_min; /**< threshold mark */
+ double threshold_max; /**< threshold mark */
+ GList *symbols; /**< list of symbols */
+};
+
+/**
+ * Sync affinity
+ */
+enum sync_affinity {
+ AFFINITY_NONE = 0,
+ AFFINITY_MASTER,
+ AFFINITY_SLAVE
+};
+
+/**
+ * Binlog params
+ */
+struct statfile_binlog_params {
+ enum sync_affinity affinity;
+ time_t rotate_time;
+ gchar *master_addr;
+ guint16 master_port;
+};
+
+typedef double (*statfile_normalize_func)(struct config_file *cfg, long double score, void *params);
+
+/**
+ * Statfile config definition
+ */
+struct statfile {
+ gchar *symbol; /**< symbol of statfile */
+ gchar *path; /**< filesystem pattern (with %r or %f) */
+ gchar *label; /**< label of this statfile */
+ gsize size; /**< size of statfile */
+ GList *sections; /**< list of sections in statfile */
+ struct statfile_autolearn_params *autolearn; /**< autolearn params */
+ struct statfile_binlog_params *binlog; /**< binlog params */
+ statfile_normalize_func normalizer; /**< function that is used as normaliser */
+ void *normalizer_data; /**< normalizer function params */
+ gchar *normalizer_str; /**< source string (for dump) */
+ ucl_object_t *opts; /**< other options */
+ gboolean is_spam; /**< spam flag */
+};
+
+/**
+ * Classifier config definition
+ */
+struct classifier_config {
+ GList *statfiles; /**< statfiles list */
+ GHashTable *labels; /**< statfiles with labels */
+ gchar *metric; /**< metric of this classifier */
+ struct classifier *classifier; /**< classifier interface */
+ struct tokenizer *tokenizer; /**< tokenizer used for classifier */
+ GHashTable *opts; /**< other options */
+ GList *pre_callbacks; /**< list of callbacks that are called before classification */
+ GList *post_callbacks; /**< list of callbacks that are called after classification */
+};
+
+struct rspamd_worker_bind_conf {
+ gchar *bind_host;
+ guint16 bind_port;
+ gint ai;
+ gboolean is_systemd;
+ struct rspamd_worker_bind_conf *next;
+};
+
+struct rspamd_worker_param_parser {
+ rspamd_rcl_handler_t handler; /**< handler function */
+ struct rspamd_rcl_struct_parser parser; /**< parser attributes */
+ const gchar *name; /**< parameter's name */
+ UT_hash_handle hh; /**< hash by name */
+};
+
+struct rspamd_worker_cfg_parser {
+ struct rspamd_worker_param_parser *parsers; /**< parsers hash */
+ gint type; /**< workers quark */
+ gboolean (*def_obj_parser)(const ucl_object_t *obj, gpointer ud); /**< default object parser */
+ gpointer def_ud;
+ UT_hash_handle hh; /**< hash by type */
+};
+
+/**
+ * Config params for rspamd worker
+ */
+struct worker_conf {
+ worker_t *worker; /**< pointer to worker type */
+ GQuark type; /**< type of worker */
+ struct rspamd_worker_bind_conf *bind_conf; /**< bind configuration */
+ guint16 count; /**< number of workers */
+ GList *listen_socks; /**< listening sockets desctiptors */
+ guint32 rlimit_nofile; /**< max files limit */
+ guint32 rlimit_maxcore; /**< maximum core file size */
+ GHashTable *params; /**< params for worker */
+ GQueue *active_workers; /**< linked list of spawned workers */
+ gboolean has_socket; /**< whether we should make listening socket in main process */
+ gpointer *ctx; /**< worker's context */
+ ucl_object_t *options; /**< other worker's options */
+};
+
+/**
+ * Structure that stores all config data
+ */
+struct config_file {
+ gchar *rspamd_user; /**< user to run as */
+ gchar *rspamd_group; /**< group to run as */
+ rspamd_mempool_t *cfg_pool; /**< memory pool for config */
+ gchar *cfg_name; /**< name of config file */
+ gchar *pid_file; /**< name of pid file */
+ gchar *temp_dir; /**< dir for temp files */
+#ifdef WITH_GPERF_TOOLS
+ gchar *profile_path;
+#endif
+
+ gboolean no_fork; /**< if 1 do not call daemon() */
+ gboolean config_test; /**< if TRUE do only config file test */
+ gboolean raw_mode; /**< work in raw mode instead of utf one */
+ gboolean one_shot_mode; /**< rules add only one symbol */
+ gboolean check_text_attachements; /**< check text attachements as text */
+ gboolean convert_config; /**< convert config to XML format */
+ gboolean strict_protocol_headers; /**< strictly check protocol headers */
+
+ gsize max_diff; /**< maximum diff size for text parts */
+
+ enum rspamd_log_type log_type; /**< log type */
+ gint log_facility; /**< log facility in case of syslog */
+ gint log_level; /**< log level trigger */
+ gchar *log_file; /**< path to logfile in case of file logging */
+ gboolean log_buffered; /**< whether logging is buffered */
+ guint32 log_buf_size; /**< length of log buffer */
+ gchar *debug_ip_map; /**< turn on debugging for specified ip addresses */
+ gboolean log_urls; /**< whether we should log URLs */
+ GList *debug_symbols; /**< symbols to debug */
+ gboolean log_color; /**< output colors for console output */
+ gboolean log_extended; /**< log extended information */
+
+ guint32 statfile_sync_interval; /**< synchronization interval */
+ guint32 statfile_sync_timeout; /**< synchronization timeout */
+ gboolean mlock_statfile_pool; /**< use mlock (2) for locking statfiles */
+
+ struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS]; /**< memcached servers */
+ gsize memcached_servers_num; /**< number of memcached servers */
+ memc_proto_t memcached_protocol; /**< memcached protocol */
+ guint memcached_error_time; /**< memcached error time (see upstream documentation) */
+ guint memcached_dead_time; /**< memcached dead time */
+ guint memcached_maxerrors; /**< maximum number of errors */
+ guint memcached_connect_timeout; /**< connection timeout */
+
+ gboolean delivery_enable; /**< is delivery agent is enabled */
+ gchar *deliver_host; /**< host for mail deliviring */
+ struct in_addr deliver_addr; /**< its address */
+ guint16 deliver_port; /**< port for deliviring */
+ guint16 deliver_family; /**< socket family for delivirnig */
+ gchar *deliver_agent_path; /**< deliver to pipe instead of socket */
+ gboolean deliver_lmtp; /**< use LMTP instead of SMTP */
+
+ GList *script_modules; /**< linked list of script modules to load */
+
+ GList *filters; /**< linked list of all filters */
+ GList *workers; /**< linked list of all workers params */
+ struct rspamd_worker_cfg_parser *wrk_parsers; /**< hash for worker config parsers, indexed by worker quarks */
+ gchar *filters_str; /**< string of filters */
+ ucl_object_t *rcl_obj; /**< rcl object */
+ GHashTable* metrics; /**< hash of metrics indexed by metric name */
+ GList* symbols_groups; /**< groups of symbols */
+ GList* metrics_list; /**< linked list of metrics */
+ GHashTable* metrics_symbols; /**< hash table of metrics indexed by symbol */
+ GHashTable* c_modules; /**< hash of c modules indexed by module name */
+ GHashTable* composite_symbols; /**< hash of composite symbols indexed by its name */
+ GList *classifiers; /**< list of all classifiers defined */
+ GList *statfiles; /**< list of all statfiles in config file order */
+ GHashTable *classifiers_symbols; /**< hashtable indexed by symbol name of classifiers */
+ GHashTable* cfg_params; /**< all cfg params indexed by its name in this structure */
+ GList *pre_filters; /**< list of pre-processing lua filters */
+ GList *post_filters; /**< list of post-processing lua filters */
+ gchar *dynamic_conf; /**< path to dynamic configuration */
+ GList *current_dynamic_conf; /**< currently loaded dynamic configuration */
+ GHashTable* domain_settings; /**< settings per-domains */
+ GHashTable* user_settings; /**< settings per-user */
+ gchar* domain_settings_str; /**< string representation of settings */
+ gchar* user_settings_str;
+ gint clock_res; /**< resolution of clock used */
+
+ GList *maps; /**< maps active */
+ rspamd_mempool_t *map_pool; /**< static maps pool */
+ gdouble map_timeout; /**< maps watch timeout */
+
+ struct symbols_cache *cache; /**< symbols cache object */
+ gchar *cache_filename; /**< filename of cache file */
+ struct metric *default_metric; /**< default metric */
+
+ gchar* checksum; /**< real checksum of config file */
+ gchar* dump_checksum; /**< dump checksum of config file */
+ gpointer lua_state; /**< pointer to lua state */
+
+ gchar* rrd_file; /**< rrd file to store statistics */
+
+ gchar* history_file; /**< file to save rolling history */
+
+ gdouble dns_timeout; /**< timeout in milliseconds for waiting for dns reply */
+ guint32 dns_retransmits; /**< maximum retransmits count */
+ guint32 dns_throttling_errors; /**< maximum errors for starting resolver throttling */
+ guint32 dns_throttling_time; /**< time in seconds for DNS throttling */
+ guint32 dns_io_per_server; /**< number of sockets per DNS server */
+ GList *nameservers; /**< list of nameservers or NULL to parse resolv.conf */
+};
+
+
+/**
+ * Parse host[:port[:priority]] line
+ * @param ina host address
+ * @param port port
+ * @param priority priority
+ * @return TRUE if string was parsed
+ */
+gboolean parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority);
+
+/**
+ * Parse host:port line
+ * @param ina host address
+ * @param port port
+ * @return TRUE if string was parsed
+ */
+gboolean parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port);
+
+/**
+ * Parse host:priority line
+ * @param ina host address
+ * @param priority priority
+ * @return TRUE if string was parsed
+ */
+gboolean parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority);
+
+/**
+ * Parse bind credits
+ * @param cf config file to use
+ * @param str line that presents bind line
+ * @param type type of credits
+ * @return 1 if line was successfully parsed and 0 in case of error
+ */
+gboolean parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str);
+
+/**
+ * Init default values
+ * @param cfg config file
+ */
+void init_defaults (struct config_file *cfg);
+
+/**
+ * Free memory used by config structure
+ * @param cfg config file
+ */
+void free_config (struct config_file *cfg);
+
+/**
+ * Gets module option with specified name
+ * @param cfg config file
+ * @param module_name name of module
+ * @param opt_name name of option to get
+ * @return module value or NULL if option does not defined
+ */
+const ucl_object_t* get_module_opt (struct config_file *cfg, const gchar *module_name,
+ const gchar *opt_name);
+
+/**
+ * Parse limit
+ * @param limit string representation of limit (eg. 1M)
+ * @return numeric value of limit
+ */
+guint64 parse_limit (const gchar *limit, guint len);
+
+/**
+ * Parse flag
+ * @param str string representation of flag (eg. 'on')
+ * @return numeric value of flag (0 or 1)
+ */
+gchar parse_flag (const gchar *str);
+
+/**
+ * Do post load actions for config
+ * @param cfg config file
+ */
+void post_load_config (struct config_file *cfg);
+
+/**
+ * Calculate checksum for config file
+ * @param cfg config file
+ */
+gboolean get_config_checksum (struct config_file *cfg);
+
+
+/**
+ * Replace all \" with a single " in given string
+ * @param line input string
+ */
+void unescape_quotes (gchar *line);
+
+/*
+ * Convert comma separated string to a list of strings
+ */
+GList* parse_comma_list (rspamd_mempool_t *pool, const gchar *line);
+
+/*
+ * Return a new classifier_config structure, setting default and non-conflicting attributes
+ */
+struct classifier_config* check_classifier_conf (struct config_file *cfg, struct classifier_config *c);
+/*
+ * Return a new worker_conf structure, setting default and non-conflicting attributes
+ */
+struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c);
+/*
+ * Return a new metric structure, setting default and non-conflicting attributes
+ */
+struct metric* check_metric_conf (struct config_file *cfg, struct metric *c);
+/*
+ * Return a new statfile structure, setting default and non-conflicting attributes
+ */
+struct statfile* check_statfile_conf (struct config_file *cfg, struct statfile *c);
+
+/*
+ * Read XML configuration file
+ */
+gboolean read_rspamd_config (struct config_file *cfg,
+ const gchar *filename, const gchar *convert_to,
+ rspamd_rcl_section_fin_t logger_fin, gpointer logger_ud);
+
+/*
+ * Register symbols of classifiers inside metrics
+ */
+void insert_classifier_symbols (struct config_file *cfg);
+
+/*
+ * Check statfiles inside a classifier
+ */
+gboolean check_classifier_statfiles (struct classifier_config *cf);
+
+/*
+ * Find classifier config by name
+ */
+struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name);
+
+/*
+ * Parse input `ip_list` to radix tree `tree`. Now supports only IPv4 addresses.
+ */
+gboolean rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree);
+
+#endif /* ifdef CFG_FILE_H */
+/*
+ * vi:ts=4
+ */
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
new file mode 100644
index 000000000..37b554dec
--- /dev/null
+++ b/src/libserver/cfg_rcl.c
@@ -0,0 +1,1471 @@
+/* Copyright (c) 2013, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cfg_rcl.h"
+#include "main.h"
+#include "settings.h"
+#include "cfg_file.h"
+#include "lua/lua_common.h"
+#include "expressions.h"
+#include "classifiers/classifiers.h"
+#include "tokenizers/tokenizers.h"
+
+/*
+ * Common section handlers
+ */
+static gboolean
+rspamd_rcl_logging_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *val;
+ const gchar *facility, *log_type, *log_level;
+
+ val = ucl_object_find_key (obj, "type");
+ if (val != NULL && ucl_object_tostring_safe (val, &log_type)) {
+ if (g_ascii_strcasecmp (log_type, "file") == 0) {
+ /* Need to get filename */
+ val = ucl_object_find_key (obj, "filename");
+ if (val == NULL || val->type != UCL_STRING) {
+ g_set_error (err, CFG_RCL_ERROR, ENOENT, "filename attribute must be specified for file logging type");
+ return FALSE;
+ }
+ cfg->log_type = RSPAMD_LOG_FILE;
+ cfg->log_file = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (val));
+ }
+ else if (g_ascii_strcasecmp (log_type, "syslog") == 0) {
+ /* Need to get facility */
+ cfg->log_facility = LOG_DAEMON;
+ cfg->log_type = RSPAMD_LOG_SYSLOG;
+ val = ucl_object_find_key (obj, "facility");
+ if (val != NULL && ucl_object_tostring_safe (val, &facility)) {
+ if (g_ascii_strcasecmp (facility, "LOG_AUTH") == 0 ||
+ g_ascii_strcasecmp (facility, "auth") == 0 ) {
+ cfg->log_facility = LOG_AUTH;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_CRON") == 0 ||
+ g_ascii_strcasecmp (facility, "cron") == 0 ) {
+ cfg->log_facility = LOG_CRON;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_DAEMON") == 0 ||
+ g_ascii_strcasecmp (facility, "daemon") == 0 ) {
+ cfg->log_facility = LOG_DAEMON;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_MAIL") == 0 ||
+ g_ascii_strcasecmp (facility, "mail") == 0) {
+ cfg->log_facility = LOG_MAIL;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_USER") == 0 ||
+ g_ascii_strcasecmp (facility, "user") == 0 ) {
+ cfg->log_facility = LOG_USER;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL0") == 0 ||
+ g_ascii_strcasecmp (facility, "local0") == 0) {
+ cfg->log_facility = LOG_LOCAL0;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL1") == 0 ||
+ g_ascii_strcasecmp (facility, "local1") == 0) {
+ cfg->log_facility = LOG_LOCAL1;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL2") == 0 ||
+ g_ascii_strcasecmp (facility, "local2") == 0) {
+ cfg->log_facility = LOG_LOCAL2;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL3") == 0 ||
+ g_ascii_strcasecmp (facility, "local3") == 0) {
+ cfg->log_facility = LOG_LOCAL3;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL4") == 0 ||
+ g_ascii_strcasecmp (facility, "local4") == 0) {
+ cfg->log_facility = LOG_LOCAL4;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL5") == 0 ||
+ g_ascii_strcasecmp (facility, "local5") == 0) {
+ cfg->log_facility = LOG_LOCAL5;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL6") == 0 ||
+ g_ascii_strcasecmp (facility, "local6") == 0) {
+ cfg->log_facility = LOG_LOCAL6;
+ }
+ else if (g_ascii_strcasecmp (facility, "LOG_LOCAL7") == 0 ||
+ g_ascii_strcasecmp (facility, "local7") == 0) {
+ cfg->log_facility = LOG_LOCAL7;
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log facility: %s", facility);
+ return FALSE;
+ }
+ }
+ }
+ else if (g_ascii_strcasecmp (log_type, "stderr") == 0 || g_ascii_strcasecmp (log_type, "console") == 0) {
+ cfg->log_type = RSPAMD_LOG_CONSOLE;
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log type: %s", log_type);
+ return FALSE;
+ }
+ }
+ else {
+ /* No type specified */
+ msg_warn ("logging type is not specified correctly, log output to the console");
+ }
+
+ /* Handle log level */
+ val = ucl_object_find_key (obj, "level");
+ if (val != NULL && ucl_object_tostring_safe (val, &log_level)) {
+ if (g_ascii_strcasecmp (log_level, "error") == 0) {
+ cfg->log_level = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL;
+ }
+ else if (g_ascii_strcasecmp (log_level, "warning") == 0) {
+ cfg->log_level = G_LOG_LEVEL_WARNING;
+ }
+ else if (g_ascii_strcasecmp (log_level, "info") == 0) {
+ cfg->log_level = G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE;
+ }
+ else if (g_ascii_strcasecmp (log_level, "debug") == 0) {
+ cfg->log_level = G_LOG_LEVEL_DEBUG;
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log level: %s", log_level);
+ return FALSE;
+ }
+ }
+
+ return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err);
+}
+
+static gboolean
+rspamd_rcl_options_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *val;
+ const gchar *user_settings, *domain_settings;
+
+ /* Handle user and domain settings */
+ val = ucl_object_find_key (obj, "user_settings");
+ if (val != NULL && ucl_object_tostring_safe (val, &user_settings)) {
+ if (!read_settings (user_settings, "Users' settings", cfg, cfg->user_settings)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", user_settings);
+ return FALSE;
+ }
+ cfg->user_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, user_settings);
+ }
+
+ val = ucl_object_find_key (obj, "domain_settings");
+ if (val != NULL && ucl_object_tostring_safe (val, &domain_settings)) {
+ if (!read_settings (domain_settings, "Domains settings", cfg, cfg->domain_settings)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", domain_settings);
+ return FALSE;
+ }
+ cfg->domain_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, domain_settings);
+ }
+
+ return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err);
+}
+
+static gint
+rspamd_symbols_group_find_func (gconstpointer a, gconstpointer b)
+{
+ const struct symbols_group *gr = a;
+ const gchar *uv = b;
+
+ return g_ascii_strcasecmp (gr->name, uv);
+}
+
+/**
+ * Insert a symbol to the metric
+ * @param cfg
+ * @param metric
+ * @param obj symbol rcl object (either float value or an object)
+ * @param err
+ * @return
+ */
+static gboolean
+rspamd_rcl_insert_symbol (struct config_file *cfg, struct metric *metric,
+ const ucl_object_t *obj, gboolean is_legacy, GError **err)
+{
+ const gchar *group = "ungrouped", *description = NULL, *sym_name;
+ gdouble symbol_score, *score_ptr;
+ const ucl_object_t *val;
+ struct symbols_group *sym_group;
+ struct symbol_def *sym_def;
+ GList *metric_list, *group_list;
+
+ /*
+ * We allow two type of definitions:
+ * symbol = weight
+ * or
+ * symbol {
+ * weight = ...;
+ * description = ...;
+ * group = ...;
+ * }
+ */
+ if (is_legacy) {
+ val = ucl_object_find_key (obj, "name");
+ if (val == NULL) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbol name is missing");
+ return FALSE;
+ }
+ sym_name = ucl_object_tostring (val);
+ }
+ else {
+ sym_name = ucl_object_key (obj);
+ }
+ if (ucl_object_todouble_safe (obj, &symbol_score)) {
+ description = NULL;
+ }
+ else if (obj->type == UCL_OBJECT) {
+ val = ucl_object_find_key (obj, "weight");
+ if (val == NULL || !ucl_object_todouble_safe (val, &symbol_score)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol score: %s", sym_name);
+ return FALSE;
+ }
+ val = ucl_object_find_key (obj, "description");
+ if (val != NULL) {
+ description = ucl_object_tostring (val);
+ }
+ val = ucl_object_find_key (obj, "group");
+ if (val != NULL) {
+ ucl_object_tostring_safe (val, &group);
+ }
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol type: %s", sym_name);
+ return FALSE;
+ }
+
+ sym_def = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbol_def));
+ score_ptr = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (gdouble));
+
+ *score_ptr = symbol_score;
+ sym_def->weight_ptr = score_ptr;
+ sym_def->name = rspamd_mempool_strdup (cfg->cfg_pool, sym_name);
+ sym_def->description = (gchar *)description;
+
+ g_hash_table_insert (metric->symbols, sym_def->name, score_ptr);
+
+ if ((metric_list = g_hash_table_lookup (cfg->metrics_symbols, sym_def->name)) == NULL) {
+ metric_list = g_list_prepend (NULL, metric);
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, metric_list);
+ g_hash_table_insert (cfg->metrics_symbols, sym_def->name, metric_list);
+ }
+ else {
+ /* Slow but keep start element of list in safe */
+ if (!g_list_find (metric_list, metric)) {
+ metric_list = g_list_append (metric_list, metric);
+ }
+ }
+
+ /* Search for symbol group */
+ group_list = g_list_find_custom (cfg->symbols_groups, group, rspamd_symbols_group_find_func);
+ if (group_list == NULL) {
+ /* Create new group */
+ sym_group = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbols_group));
+ sym_group->name = rspamd_mempool_strdup (cfg->cfg_pool, group);
+ sym_group->symbols = NULL;
+ cfg->symbols_groups = g_list_prepend (cfg->symbols_groups, sym_group);
+ }
+ else {
+ sym_group = group_list->data;
+ }
+ /* Insert symbol */
+ sym_group->symbols = g_list_prepend (sym_group->symbols, sym_def);
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_metric_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *val, *cur;
+ const gchar *metric_name, *subject_name, *semicolon, *act_str;
+ struct metric *metric;
+ struct metric_action *action;
+ gdouble action_score, grow_factor;
+ gint action_value;
+ gboolean new = TRUE, have_actions = FALSE;
+ ucl_object_iter_t it = NULL;
+
+ val = ucl_object_find_key (obj, "name");
+ if (val == NULL || !ucl_object_tostring_safe (val, &metric_name)) {
+ metric_name = DEFAULT_METRIC;
+ }
+
+ metric = g_hash_table_lookup (cfg->metrics, metric_name);
+ if (metric == NULL) {
+ metric = check_metric_conf (cfg, metric);
+ metric->name = metric_name;
+ }
+ else {
+ new = FALSE;
+ }
+
+ /* Handle actions */
+ val = ucl_object_find_key (obj, "actions");
+ if (val != NULL) {
+ if (val->type != UCL_OBJECT) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "actions must be an object");
+ return FALSE;
+ }
+ while ((cur = ucl_iterate_object (val, &it, true)) != NULL) {
+ if (!check_action_str (ucl_object_key (cur), &action_value) ||
+ !ucl_object_todouble_safe (cur, &action_score)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid action definition: %s", ucl_object_key (cur));
+ return FALSE;
+ }
+ action = &metric->actions[action_value];
+ action->action = action_value;
+ action->score = action_score;
+ }
+ }
+ else if (new) {
+ /* Switch to legacy mode */
+ val = ucl_object_find_key (obj, "required_score");
+ if (val != NULL && ucl_object_todouble_safe (val, &action_score)) {
+ action = &metric->actions[METRIC_ACTION_REJECT];
+ action->action = METRIC_ACTION_REJECT;
+ action->score = action_score;
+ have_actions = TRUE;
+ }
+ val = ucl_object_find_key (obj, "action");
+ LL_FOREACH (val, cur) {
+ if (cur->type == UCL_STRING) {
+ act_str = ucl_object_tostring (cur);
+ semicolon = strchr (act_str, ':');
+ if (semicolon != NULL) {
+ if (check_action_str (act_str, &action_value)) {
+ action_score = strtod (semicolon + 1, NULL);
+ action = &metric->actions[action_value];
+ action->action = action_value;
+ action->score = action_score;
+ have_actions = TRUE;
+ }
+ }
+ }
+ }
+ if (new && !have_actions) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no actions", metric_name);
+ return FALSE;
+ }
+ }
+
+ /* Handle symbols */
+ val = ucl_object_find_key (obj, "symbols");
+ if (val != NULL) {
+ if (val->type == UCL_ARRAY) {
+ val = val->value.ov;
+ }
+ if (val->type != UCL_OBJECT) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object");
+ return FALSE;
+ }
+ it = NULL;
+ while ((cur = ucl_iterate_object (val, &it, true)) != NULL) {
+ if (!rspamd_rcl_insert_symbol (cfg, metric, cur, FALSE, err)) {
+ return FALSE;
+ }
+ }
+ }
+ else {
+ /* Legacy variant */
+ val = ucl_object_find_key (obj, "symbol");
+ if (val != NULL) {
+ if (val->type == UCL_ARRAY) {
+ val = val->value.ov;
+ }
+ if (val->type != UCL_OBJECT) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object");
+ return FALSE;
+ }
+ LL_FOREACH (val, cur) {
+ if (!rspamd_rcl_insert_symbol (cfg, metric, cur, TRUE, err)) {
+ return FALSE;
+ }
+ }
+ }
+ else if (new) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no symbols", metric_name);
+ return FALSE;
+ }
+ }
+
+ val = ucl_object_find_key (obj, "grow_factor");
+ if (val && ucl_object_todouble_safe (val, &grow_factor)) {
+ metric->grow_factor = grow_factor;
+ }
+
+ val = ucl_object_find_key (obj, "subject");
+ if (val && ucl_object_tostring_safe (val, &subject_name)) {
+ metric->subject = (gchar *)subject_name;
+ }
+
+ /* Insert the resulting metric */
+ if (new) {
+ g_hash_table_insert (cfg->metrics, (void *)metric->name, metric);
+ cfg->metrics_list = g_list_prepend (cfg->metrics_list, metric);
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_worker_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *val, *cur;
+ ucl_object_iter_t it = NULL;
+ const gchar *worker_type, *worker_bind;
+ GQuark qtype;
+ struct worker_conf *wrk;
+ struct rspamd_worker_cfg_parser *wparser;
+ struct rspamd_worker_param_parser *whandler;
+
+ val = ucl_object_find_key (obj, "type");
+ if (val != NULL && ucl_object_tostring_safe (val, &worker_type)) {
+ qtype = g_quark_try_string (worker_type);
+ if (qtype != 0) {
+ wrk = check_worker_conf (cfg, NULL);
+ wrk->worker = get_worker_by_type (qtype);
+ if (wrk->worker == NULL) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type);
+ return FALSE;
+ }
+ wrk->type = qtype;
+ if (wrk->worker->worker_init_func) {
+ wrk->ctx = wrk->worker->worker_init_func (cfg);
+ }
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type);
+ return FALSE;
+ }
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "undefined worker type");
+ return FALSE;
+ }
+
+ val = ucl_object_find_key (obj, "bind_socket");
+ if (val != NULL) {
+ if (val->type == UCL_ARRAY) {
+ val = val->value.ov;
+ }
+ LL_FOREACH (val, cur) {
+ if (!ucl_object_tostring_safe (cur, &worker_bind)) {
+ continue;
+ }
+ if (!parse_bind_line (cfg, wrk, worker_bind)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse bind line: %s", worker_bind);
+ return FALSE;
+ }
+ }
+ }
+
+ wrk->options = (ucl_object_t *)obj;
+
+ if (!rspamd_rcl_section_parse_defaults (section, cfg, obj, wrk, err)) {
+ return FALSE;
+ }
+
+ /* Parse other attributes */
+ HASH_FIND_INT (cfg->wrk_parsers, (gint *)&qtype, wparser);
+ if (wparser != NULL && obj->type == UCL_OBJECT) {
+ while ((cur = ucl_iterate_object (obj, &it, true)) != NULL) {
+ HASH_FIND_STR (wparser->parsers, ucl_object_key (cur), whandler);
+ if (whandler != NULL) {
+ if (!whandler->handler (cfg, cur, &whandler->parser, section, err)) {
+ return FALSE;
+ }
+ }
+ }
+ if (wparser->def_obj_parser != NULL) {
+ if (! wparser->def_obj_parser (obj, wparser->def_ud)) {
+ return FALSE;
+ }
+ }
+ }
+
+ cfg->workers = g_list_prepend (cfg->workers, wrk);
+
+ return TRUE;
+}
+
+static void
+rspamd_rcl_set_lua_globals (struct config_file *cfg, lua_State *L)
+{
+ struct config_file **pcfg;
+
+ /* First check for global variable 'config' */
+ lua_getglobal (L, "config");
+ if (lua_isnil (L, -1)) {
+ /* Assign global table to set up attributes */
+ lua_newtable (L);
+ lua_setglobal (L, "config");
+ }
+
+ lua_getglobal (L, "metrics");
+ if (lua_isnil (L, -1)) {
+ lua_newtable (L);
+ lua_setglobal (L, "metrics");
+ }
+
+ lua_getglobal (L, "composites");
+ if (lua_isnil (L, -1)) {
+ lua_newtable (L);
+ lua_setglobal (L, "composites");
+ }
+
+ lua_getglobal (L, "classifiers");
+ if (lua_isnil (L, -1)) {
+ lua_newtable (L);
+ lua_setglobal (L, "classifiers");
+ }
+
+ pcfg = lua_newuserdata (L, sizeof (struct config_file *));
+ lua_setclass (L, "rspamd{config}", -1);
+ *pcfg = cfg;
+ lua_setglobal (L, "rspamd_config");
+
+ /* Clear stack from globals */
+ lua_pop (L, 4);
+}
+
+static gboolean
+rspamd_rcl_lua_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const gchar *lua_src = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (obj));
+ gchar *cur_dir, *lua_dir, *lua_file, *tmp1, *tmp2;
+ lua_State *L = cfg->lua_state;
+
+ tmp1 = g_strdup (lua_src);
+ tmp2 = g_strdup (lua_src);
+ lua_dir = dirname (tmp1);
+ lua_file = basename (tmp2);
+ if (lua_dir && lua_file) {
+ cur_dir = g_malloc (PATH_MAX);
+ if (getcwd (cur_dir, PATH_MAX) != NULL && chdir (lua_dir) != -1) {
+ /* Load file */
+ if (luaL_loadfile (L, lua_file) != 0) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot load lua file %s: %s",
+ lua_src, lua_tostring (L, -1));
+ if (chdir (cur_dir) == -1) {
+ msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));;
+ }
+ g_free (cur_dir);
+ g_free (tmp1);
+ g_free (tmp2);
+ return FALSE;
+ }
+ rspamd_rcl_set_lua_globals (cfg, L);
+ /* Now do it */
+ if (lua_pcall (L, 0, LUA_MULTRET, 0) != 0) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot init lua file %s: %s",
+ lua_src, lua_tostring (L, -1));
+ if (chdir (cur_dir) == -1) {
+ msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));;
+ }
+ g_free (cur_dir);
+ g_free (tmp1);
+ g_free (tmp2);
+ return FALSE;
+ }
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot chdir to %s: %s",
+ lua_src, strerror (errno));
+ if (chdir (cur_dir) == -1) {
+ msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));;
+ }
+ g_free (cur_dir);
+ g_free (tmp1);
+ g_free (tmp2);
+ return FALSE;
+
+ }
+ if (chdir (cur_dir) == -1) {
+ msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));;
+ }
+ g_free (cur_dir);
+ g_free (tmp1);
+ g_free (tmp2);
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot find to %s: %s",
+ lua_src, strerror (errno));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_add_module_path (struct config_file *cfg, const gchar *path, GError **err)
+{
+ struct stat st;
+ struct script_module *cur_mod;
+ glob_t globbuf;
+ gchar *pattern;
+ size_t len;
+ guint i;
+
+ if (stat (path, &st) == -1) {
+ g_set_error (err, CFG_RCL_ERROR, errno, "cannot stat path %s, %s", path, strerror (errno));
+ return FALSE;
+ }
+
+ /* Handle directory */
+ if (S_ISDIR (st.st_mode)) {
+ globbuf.gl_offs = 0;
+ len = strlen (path) + sizeof ("*.lua");
+ pattern = g_malloc (len);
+ snprintf (pattern, len, "%s%s", path, "*.lua");
+
+ if (glob (pattern, GLOB_DOOFFS, NULL, &globbuf) == 0) {
+ for (i = 0; i < globbuf.gl_pathc; i ++) {
+ cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module));
+ cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, globbuf.gl_pathv[i]);
+ cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod);
+ }
+ globfree (&globbuf);
+ g_free (pattern);
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, errno, "glob failed for %s, %s", pattern, strerror (errno));
+ g_free (pattern);
+ return FALSE;
+ }
+ }
+ else {
+ /* Handle single file */
+ cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module));
+ cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, path);
+ cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod);
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_modules_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *val, *cur;
+ const gchar *data;
+
+ if (obj->type == UCL_OBJECT) {
+ val = ucl_object_find_key (obj, "path");
+
+ LL_FOREACH (val, cur) {
+ if (ucl_object_tostring_safe (cur, &data)) {
+ if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) {
+ return FALSE;
+ }
+ }
+ }
+ }
+ else if (ucl_object_tostring_safe (obj, &data)) {
+ if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) {
+ return FALSE;
+ }
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "module parameter has wrong type (must be an object or a string)");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_rcl_statfile_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ struct classifier_config *ccf = ud;
+ const ucl_object_t *val;
+ struct statfile *st;
+ const gchar *data;
+ gdouble binlog_rotate;
+ GList *labels;
+
+ st = check_statfile_conf (cfg, NULL);
+
+ val = ucl_object_find_key (obj, "binlog");
+ if (val != NULL && ucl_object_tostring_safe (val, &data)) {
+ if (st->binlog == NULL) {
+ st->binlog = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_binlog_params));
+ }
+ if (g_ascii_strcasecmp (data, "master") == 0) {
+ st->binlog->affinity = AFFINITY_MASTER;
+ }
+ else if (g_ascii_strcasecmp (data, "slave") == 0) {
+ st->binlog->affinity = AFFINITY_SLAVE;
+ }
+ else {
+ st->binlog->affinity = AFFINITY_NONE;
+ }
+ /* Parse remaining binlog attributes */
+ val = ucl_object_find_key (obj, "binlog_rotate");
+ if (val != NULL && ucl_object_todouble_safe (val, &binlog_rotate)) {
+ st->binlog->rotate_time = binlog_rotate;
+ }
+ val = ucl_object_find_key (obj, "binlog_master");
+ if (val != NULL && ucl_object_tostring_safe (val, &data)) {
+ if (!parse_host_port (cfg->cfg_pool, data, &st->binlog->master_addr, &st->binlog->master_port)) {
+ msg_err ("cannot parse master address: %s", data);
+ return FALSE;
+ }
+ }
+ }
+
+
+ if (rspamd_rcl_section_parse_defaults (section, cfg, obj, st, err)) {
+ ccf->statfiles = g_list_prepend (ccf->statfiles, st);
+ if (st->label != NULL) {
+ labels = g_hash_table_lookup (ccf->labels, st->label);
+ if (labels != NULL) {
+ labels = g_list_append (labels, st);
+ }
+ else {
+ g_hash_table_insert (ccf->labels, st->label, g_list_prepend (NULL, st));
+ }
+ }
+ if (st->symbol != NULL) {
+ g_hash_table_insert (cfg->classifiers_symbols, st->symbol, st);
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a symbol defined");
+ return FALSE;
+ }
+
+ if (st->path == NULL) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a path defined");
+ return FALSE;
+ }
+
+ st->opts = (ucl_object_t *)obj;
+
+ val = ucl_object_find_key (obj, "spam");
+ if (val == NULL) {
+ msg_info ("statfile %s has no explicit 'spam' setting, trying to guess by symbol", st->symbol);
+ if (rspamd_strncasestr (st->symbol, "spam", strlen (st->symbol)) != NULL) {
+ st->is_spam = TRUE;
+ }
+ else if (rspamd_strncasestr (st->symbol, "ham", strlen (st->symbol)) != NULL) {
+ st->is_spam = FALSE;
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot guess spam setting from %s", st->symbol);
+ return FALSE;
+ }
+ msg_info ("guessed that statfile with symbol %s is %s", st->symbol, st->is_spam ?
+ "spam" : "ham");
+ }
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_rcl_classifier_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *val, *cur;
+ ucl_object_iter_t it = NULL;
+ const gchar *key, *type;
+ struct classifier_config *ccf, *found = NULL;
+ gboolean res = TRUE;
+ struct rspamd_rcl_section *stat_section;
+ GList *cur_cl;
+
+ val = ucl_object_find_key (obj, "type");
+ if (val == NULL || !ucl_object_tostring_safe (val, &type)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "classifier should have type defined");
+ return FALSE;
+ }
+
+ cur_cl = cfg->classifiers;
+ while (cur_cl != NULL) {
+ ccf = cur_cl->data;
+ if (g_ascii_strcasecmp (ccf->classifier->name, type) == 0) {
+ found = ccf;
+ break;
+ }
+ cur_cl = g_list_next (cur_cl);
+ }
+
+ if (found == NULL) {
+ ccf = check_classifier_conf (cfg, NULL);
+ ccf->classifier = get_classifier (type);
+ }
+ else {
+ ccf = found;
+ }
+
+ HASH_FIND_STR (section->subsections, "statfile", stat_section);
+
+ while ((val = ucl_iterate_object (obj, &it, true)) != NULL && res) {
+ key = ucl_object_key (val);
+ if (key != NULL) {
+ if (g_ascii_strcasecmp (key, "statfile") == 0) {
+ LL_FOREACH (val, cur) {
+ res = rspamd_rcl_statfile_handler (cfg, cur, ccf, stat_section, err);
+ if (!res) {
+ return FALSE;
+ }
+ }
+ }
+ else if (g_ascii_strcasecmp (key, "type") == 0 && val->type == UCL_STRING) {
+ continue;
+ }
+ else if (g_ascii_strcasecmp (key, "tokenizer") == 0 && val->type == UCL_STRING) {
+ ccf->tokenizer = get_tokenizer (ucl_object_tostring (val));
+ }
+ else {
+ /* Just insert a value of option to the hash */
+ g_hash_table_insert (ccf->opts, (gpointer)key, (gpointer)ucl_object_tostring_forced (val));
+ }
+ }
+ }
+
+ if (found == NULL) {
+ cfg->classifiers = g_list_prepend (cfg->classifiers, ccf);
+ }
+
+
+ return res;
+}
+
+static gboolean
+rspamd_rcl_composite_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ const ucl_object_t *val;
+ struct expression *expr;
+ struct rspamd_composite *composite;
+ const gchar *composite_name, *composite_expression;
+ gboolean new = TRUE;
+
+ val = ucl_object_find_key (obj, "name");
+ if (val == NULL || !ucl_object_tostring_safe (val, &composite_name)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have a name defined");
+ return FALSE;
+ }
+
+ if (g_hash_table_lookup (cfg->composite_symbols, composite_name) != NULL) {
+ msg_warn ("composite %s is redefined", composite_name);
+ new = FALSE;
+ }
+
+ val = ucl_object_find_key (obj, "expression");
+ if (val == NULL || !ucl_object_tostring_safe (val, &composite_expression)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have an expression defined");
+ return FALSE;
+ }
+
+ if ((expr = parse_expression (cfg->cfg_pool, (gchar *)composite_expression)) == NULL) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse composite expression: %s", composite_expression);
+ return FALSE;
+ }
+
+ composite = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct rspamd_composite));
+ composite->expr = expr;
+ composite->id = g_hash_table_size (cfg->composite_symbols) + 1;
+ g_hash_table_insert (cfg->composite_symbols, (gpointer)composite_name, composite);
+
+ if (new) {
+ register_virtual_symbol (&cfg->cache, composite_name, 1);
+ }
+
+ return TRUE;
+}
+
+/**
+ * Fake handler to parse default options only, uses struct cfg_file as pointer
+ * for default handlers
+ */
+static gboolean
+rspamd_rcl_empty_handler (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err);
+}
+
+/**
+ * Add new section to the configuration
+ * @param top top section
+ * @param name the name of the section
+ * @param handler handler function for all attributes
+ * @param type type of object handled by a handler
+ * @param required whether at least one of these sections is required
+ * @param strict_type turn on strict check for types for this section
+ * @return newly created structure
+ */
+static inline struct rspamd_rcl_section*
+rspamd_rcl_add_section (struct rspamd_rcl_section **top,
+ const gchar *name, rspamd_rcl_handler_t handler,
+ enum ucl_type type, gboolean required, gboolean strict_type)
+{
+ struct rspamd_rcl_section *new;
+
+ new = g_slice_alloc0 (sizeof (struct rspamd_rcl_section));
+ new->name = name;
+ new->handler = handler;
+ new->type = type;
+ new->strict_type = strict_type;
+
+ HASH_ADD_KEYPTR (hh, *top, new->name, strlen (new->name), new);
+ return new;
+}
+
+/**
+ * Add a default handler for a section
+ * @param section section pointer
+ * @param name name of param
+ * @param handler handler of param
+ * @param offset offset in a structure
+ * @param flags flags for the parser
+ * @return newly created structure
+ */
+static inline struct rspamd_rcl_default_handler_data *
+rspamd_rcl_add_default_handler (struct rspamd_rcl_section *section, const gchar *name,
+ rspamd_rcl_handler_t handler, gsize offset, gint flags)
+{
+ struct rspamd_rcl_default_handler_data *new;
+
+ new = g_slice_alloc0 (sizeof (struct rspamd_rcl_default_handler_data));
+ new->key = name;
+ new->handler = handler;
+ new->pd.offset = offset;
+ new->pd.flags = flags;
+
+ HASH_ADD_KEYPTR (hh, section->default_parser, new->key, strlen (new->key), new);
+ return new;
+}
+
+struct rspamd_rcl_section*
+rspamd_rcl_config_init (void)
+{
+ struct rspamd_rcl_section *new = NULL, *sub, *ssub;
+
+ /* TODO: add all known rspamd sections here */
+ /**
+ * Logging section
+ */
+ sub = rspamd_rcl_add_section (&new, "logging", rspamd_rcl_logging_handler, UCL_OBJECT,
+ FALSE, TRUE);
+ /* Default handlers */
+ rspamd_rcl_add_default_handler (sub, "log_buffer", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct config_file, log_buf_size), 0);
+ rspamd_rcl_add_default_handler (sub, "log_urls", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct config_file, log_urls), 0);
+ rspamd_rcl_add_default_handler (sub, "debug_ip", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, debug_ip_map), 0);
+ rspamd_rcl_add_default_handler (sub, "debug_symbols", rspamd_rcl_parse_struct_string_list,
+ G_STRUCT_OFFSET (struct config_file, debug_symbols), 0);
+ rspamd_rcl_add_default_handler (sub, "log_color", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct config_file, log_color), 0);
+ /**
+ * Options section
+ */
+ sub = rspamd_rcl_add_section (&new, "options", rspamd_rcl_options_handler, UCL_OBJECT,
+ FALSE, TRUE);
+ rspamd_rcl_add_default_handler (sub, "cache_file", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, cache_filename), RSPAMD_CL_FLAG_STRING_PATH);
+ rspamd_rcl_add_default_handler (sub, "dns_nameserver", rspamd_rcl_parse_struct_string_list,
+ G_STRUCT_OFFSET (struct config_file, nameservers), 0);
+ rspamd_rcl_add_default_handler (sub, "dns_timeout", rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET (struct config_file, dns_timeout), RSPAMD_CL_FLAG_TIME_FLOAT);
+ rspamd_rcl_add_default_handler (sub, "dns_retransmits", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct config_file, dns_retransmits), RSPAMD_CL_FLAG_INT_32);
+ rspamd_rcl_add_default_handler (sub, "dns_sockets", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct config_file, dns_io_per_server), RSPAMD_CL_FLAG_INT_32);
+ rspamd_rcl_add_default_handler (sub, "raw_mode", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct config_file, raw_mode), 0);
+ rspamd_rcl_add_default_handler (sub, "one_shot", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct config_file, one_shot_mode), 0);
+ rspamd_rcl_add_default_handler (sub, "check_attachements", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct config_file, check_text_attachements), 0);
+ rspamd_rcl_add_default_handler (sub, "tempdir", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, temp_dir), RSPAMD_CL_FLAG_STRING_PATH);
+ rspamd_rcl_add_default_handler (sub, "pidfile", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, pid_file), RSPAMD_CL_FLAG_STRING_PATH);
+ rspamd_rcl_add_default_handler (sub, "filters", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, filters_str), 0);
+ rspamd_rcl_add_default_handler (sub, "sync_interval", rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET (struct config_file, statfile_sync_interval), RSPAMD_CL_FLAG_TIME_INTEGER);
+ rspamd_rcl_add_default_handler (sub, "sync_timeout", rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET (struct config_file, statfile_sync_timeout), RSPAMD_CL_FLAG_TIME_INTEGER);
+ rspamd_rcl_add_default_handler (sub, "max_diff", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct config_file, max_diff), RSPAMD_CL_FLAG_INT_SIZE);
+ rspamd_rcl_add_default_handler (sub, "map_watch_interval", rspamd_rcl_parse_struct_time,
+ G_STRUCT_OFFSET (struct config_file, map_timeout), RSPAMD_CL_FLAG_TIME_FLOAT);
+ rspamd_rcl_add_default_handler (sub, "dynamic_conf", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, dynamic_conf), 0);
+ rspamd_rcl_add_default_handler (sub, "rrd", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, rrd_file), RSPAMD_CL_FLAG_STRING_PATH);
+ rspamd_rcl_add_default_handler (sub, "history_file", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct config_file, history_file), RSPAMD_CL_FLAG_STRING_PATH);
+ rspamd_rcl_add_default_handler (sub, "use_mlock", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct config_file, mlock_statfile_pool), 0);
+ rspamd_rcl_add_default_handler (sub, "strict_protocol_headers", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct config_file, strict_protocol_headers), 0);
+
+ /**
+ * Metric section
+ */
+ sub = rspamd_rcl_add_section (&new, "metric", rspamd_rcl_metric_handler, UCL_OBJECT,
+ FALSE, TRUE);
+
+ /**
+ * Worker section
+ */
+ sub = rspamd_rcl_add_section (&new, "worker", rspamd_rcl_worker_handler, UCL_OBJECT,
+ FALSE, TRUE);
+ rspamd_rcl_add_default_handler (sub, "count", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct worker_conf, count), RSPAMD_CL_FLAG_INT_16);
+ rspamd_rcl_add_default_handler (sub, "max_files", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct worker_conf, rlimit_nofile), RSPAMD_CL_FLAG_INT_32);
+ rspamd_rcl_add_default_handler (sub, "max_core", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct worker_conf, rlimit_maxcore), RSPAMD_CL_FLAG_INT_32);
+
+ /**
+ * Lua handler
+ */
+ sub = rspamd_rcl_add_section (&new, "lua", rspamd_rcl_lua_handler, UCL_STRING,
+ FALSE, TRUE);
+
+ /**
+ * Modules handler
+ */
+ sub = rspamd_rcl_add_section (&new, "modules", rspamd_rcl_modules_handler, UCL_OBJECT,
+ FALSE, FALSE);
+
+ /**
+ * Classifiers handler
+ */
+ sub = rspamd_rcl_add_section (&new, "classifier", rspamd_rcl_classifier_handler, UCL_OBJECT,
+ FALSE, TRUE);
+ ssub = rspamd_rcl_add_section (&sub->subsections, "statfile", rspamd_rcl_statfile_handler,
+ UCL_OBJECT, TRUE, TRUE);
+ rspamd_rcl_add_default_handler (ssub, "symbol", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct statfile, symbol), 0);
+ rspamd_rcl_add_default_handler (ssub, "path", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct statfile, path), RSPAMD_CL_FLAG_STRING_PATH);
+ rspamd_rcl_add_default_handler (ssub, "label", rspamd_rcl_parse_struct_string,
+ G_STRUCT_OFFSET (struct statfile, label), 0);
+ rspamd_rcl_add_default_handler (ssub, "size", rspamd_rcl_parse_struct_integer,
+ G_STRUCT_OFFSET (struct statfile, size), RSPAMD_CL_FLAG_INT_SIZE);
+ rspamd_rcl_add_default_handler (ssub, "spam", rspamd_rcl_parse_struct_boolean,
+ G_STRUCT_OFFSET (struct statfile, is_spam), 0);
+
+ /**
+ * Composites handler
+ */
+ sub = rspamd_rcl_add_section (&new, "composite", rspamd_rcl_composite_handler, UCL_OBJECT,
+ FALSE, TRUE);
+
+ return new;
+}
+
+struct rspamd_rcl_section *
+rspamd_rcl_config_get_section (struct rspamd_rcl_section *top,
+ const char *path)
+{
+ struct rspamd_rcl_section *cur, *found;
+ char **path_components;
+ gint ncomponents, i;
+
+
+ if (path == NULL) {
+ return top;
+ }
+
+ path_components = g_strsplit_set (path, "/", -1);
+ ncomponents = g_strv_length (path_components);
+
+ cur = top;
+ for (i = 0; i < ncomponents; i ++) {
+ if (cur == NULL) {
+ g_strfreev (path_components);
+ return NULL;
+ }
+ HASH_FIND_STR (cur, path_components[i], found);
+ if (found == NULL) {
+ g_strfreev (path_components);
+ return NULL;
+ }
+ cur = found;
+ }
+
+ g_strfreev (path_components);
+ return found;
+}
+
+gboolean
+rspamd_read_rcl_config (struct rspamd_rcl_section *top,
+ struct config_file *cfg, const ucl_object_t *obj, GError **err)
+{
+ const ucl_object_t *found, *cur_obj;
+ struct rspamd_rcl_section *cur, *tmp;
+
+ if (obj->type != UCL_OBJECT) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "top configuration must be an object");
+ return FALSE;
+ }
+
+ /* Iterate over known sections and ignore unknown ones */
+ HASH_ITER (hh, top, cur, tmp) {
+ found = ucl_object_find_key (obj, cur->name);
+ if (found == NULL) {
+ if (cur->required) {
+ g_set_error (err, CFG_RCL_ERROR, ENOENT, "required section %s is missing", cur->name);
+ return FALSE;
+ }
+ }
+ else {
+ /* Check type */
+ if (cur->strict_type) {
+ if (cur->type != found->type) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "object in section %s has invalid type", cur->name);
+ return FALSE;
+ }
+ }
+ LL_FOREACH (found, cur_obj) {
+ if (!cur->handler (cfg, cur_obj, NULL, cur, err)) {
+ return FALSE;
+ }
+ }
+ }
+ if (cur->fin) {
+ cur->fin (cfg, cur->fin_ud);
+ }
+ }
+
+ cfg->rcl_obj = (ucl_object_t *)obj;
+
+ return TRUE;
+}
+
+gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section,
+ struct config_file *cfg, const ucl_object_t *obj, gpointer ptr,
+ GError **err)
+{
+ const ucl_object_t *found;
+ struct rspamd_rcl_default_handler_data *cur, *tmp;
+
+ if (obj->type != UCL_OBJECT) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "default configuration must be an object");
+ return FALSE;
+ }
+
+ HASH_ITER (hh, section->default_parser, cur, tmp) {
+ found = ucl_object_find_key (obj, cur->key);
+ if (found != NULL) {
+ cur->pd.user_struct = ptr;
+ if (!cur->handler (cfg, found, &cur->pd, section, err)) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ struct rspamd_rcl_struct_parser *pd = ud;
+ gchar **target;
+ const gsize num_str_len = 32;
+
+ target = (gchar **)(((gchar *)pd->user_struct) + pd->offset);
+ switch (obj->type) {
+ case UCL_STRING:
+ *target = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (obj));
+ break;
+ case UCL_INT:
+ *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len);
+ rspamd_snprintf (*target, num_str_len, "%L", obj->value.iv);
+ break;
+ case UCL_FLOAT:
+ *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len);
+ rspamd_snprintf (*target, num_str_len, "%f", obj->value.dv);
+ break;
+ case UCL_BOOLEAN:
+ *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len);
+ rspamd_snprintf (*target, num_str_len, "%b", (gboolean)obj->value.iv);
+ break;
+ default:
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert object or array to string");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ struct rspamd_rcl_struct_parser *pd = ud;
+ union {
+ gint *ip;
+ gint32 *i32p;
+ gint16 *i16p;
+ gint64 *i64p;
+ gsize *sp;
+ } target;
+ gint64 val;
+
+ if (pd->flags == RSPAMD_CL_FLAG_INT_32) {
+ target.i32p = (gint32 *)(((gchar *)pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe (obj, &val)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer");
+ return FALSE;
+ }
+ *target.i32p = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_INT_64) {
+ target.i64p = (gint64 *)(((gchar *)pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe (obj, &val)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer");
+ return FALSE;
+ }
+ *target.i64p = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_INT_SIZE) {
+ target.sp = (gsize *)(((gchar *)pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe (obj, &val)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer");
+ return FALSE;
+ }
+ *target.sp = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_INT_16) {
+ target.i16p = (gint16 *)(((gchar *)pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe (obj, &val)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer");
+ return FALSE;
+ }
+ *target.i16p = val;
+ }
+ else {
+ target.ip = (gint *)(((gchar *)pd->user_struct) + pd->offset);
+ if (!ucl_object_toint_safe (obj, &val)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer");
+ return FALSE;
+ }
+ *target.ip = val;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ struct rspamd_rcl_struct_parser *pd = ud;
+ gdouble *target;
+
+ target = (gdouble *)(((gchar *)pd->user_struct) + pd->offset);
+
+ if (!ucl_object_todouble_safe (obj, target)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ struct rspamd_rcl_struct_parser *pd = ud;
+ union {
+ gint *psec;
+ guint32 *pu32;
+ gdouble *pdv;
+ struct timeval *ptv;
+ struct timespec *pts;
+ } target;
+ gdouble val;
+
+ if (!ucl_object_todouble_safe (obj, &val)) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double");
+ return FALSE;
+ }
+
+ if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMEVAL) {
+ target.ptv = (struct timeval *)(((gchar *)pd->user_struct) + pd->offset);
+ target.ptv->tv_sec = (glong)val;
+ target.ptv->tv_usec = (val - (glong)val) * 1000000;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMESPEC) {
+ target.pts = (struct timespec *)(((gchar *)pd->user_struct) + pd->offset);
+ target.pts->tv_sec = (glong)val;
+ target.pts->tv_nsec = (val - (glong)val) * 1000000000000LL;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_FLOAT) {
+ target.pdv = (double *)(((gchar *)pd->user_struct) + pd->offset);
+ *target.pdv = val;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_INTEGER) {
+ target.psec = (gint *)(((gchar *)pd->user_struct) + pd->offset);
+ *target.psec = val * 1000;
+ }
+ else if (pd->flags == RSPAMD_CL_FLAG_TIME_UINT_32) {
+ target.pu32 = (guint32 *)(((gchar *)pd->user_struct) + pd->offset);
+ *target.pu32 = val * 1000;
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid flags to parse time value");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ struct rspamd_rcl_struct_parser *pd = ud;
+ GList **target;
+ gchar *val;
+ const ucl_object_t *cur;
+ const gsize num_str_len = 32;
+ ucl_object_iter_t iter = NULL;
+
+ target = (GList **)(((gchar *)pd->user_struct) + pd->offset);
+
+ if (obj->type != UCL_ARRAY) {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "an array of strings is expected");
+ return FALSE;
+ }
+
+ while ((cur = ucl_iterate_object (obj, &iter, true)) != NULL) {
+ switch (cur->type) {
+ case UCL_STRING:
+ val = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (cur));
+ break;
+ case UCL_INT:
+ val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len);
+ rspamd_snprintf (val, num_str_len, "%L", cur->value.iv);
+ break;
+ case UCL_FLOAT:
+ val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len);
+ rspamd_snprintf (val, num_str_len, "%f", cur->value.dv);
+ break;
+ case UCL_BOOLEAN:
+ val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len);
+ rspamd_snprintf (val, num_str_len, "%b", (gboolean)cur->value.iv);
+ break;
+ default:
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object or array to string");
+ return FALSE;
+ }
+ *target = g_list_prepend (*target, val);
+ }
+
+ /* Add a destructor */
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, *target);
+
+ return TRUE;
+}
+
+gboolean
+rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err)
+{
+ struct rspamd_rcl_struct_parser *pd = ud;
+ gboolean *target;
+
+ target = (gboolean *)(((gchar *)pd->user_struct) + pd->offset);
+
+ if (obj->type == UCL_BOOLEAN) {
+ *target = obj->value.iv;
+ }
+ else if (obj->type == UCL_INT) {
+ *target = obj->value.iv;
+ }
+ else {
+ g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object to boolean");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name,
+ rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags)
+{
+ struct rspamd_worker_param_parser *nhandler;
+ struct rspamd_worker_cfg_parser *nparser;
+
+ HASH_FIND_INT (cfg->wrk_parsers, &type, nparser);
+ if (nparser == NULL) {
+ /* Allocate new parser for this worker */
+ nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser));
+ nparser->type = type;
+ HASH_ADD_INT (cfg->wrk_parsers, type, nparser);
+ }
+
+ HASH_FIND_STR (nparser->parsers, name, nhandler);
+ if (nhandler != NULL) {
+ msg_warn ("handler for parameter %s is already registered for worker type %s",
+ name, g_quark_to_string (type));
+ return;
+ }
+ nhandler = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_param_parser));
+ nhandler->name = name;
+ nhandler->parser.flags = flags;
+ nhandler->parser.offset = offset;
+ nhandler->parser.user_struct = target;
+ nhandler->handler = handler;
+ HASH_ADD_KEYPTR (hh, nparser->parsers, name, strlen (name), nhandler);
+}
+
+
+void
+rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type,
+ gboolean (*func)(ucl_object_t *, gpointer), gpointer ud)
+{
+ struct rspamd_worker_cfg_parser *nparser;
+ HASH_FIND_INT (cfg->wrk_parsers, &type, nparser);
+ if (nparser == NULL) {
+ /* Allocate new parser for this worker */
+ nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser));
+ nparser->type = type;
+ HASH_ADD_INT (cfg->wrk_parsers, type, nparser);
+ }
+
+ nparser->def_obj_parser = func;
+ nparser->def_ud = ud;
+}
diff --git a/src/libserver/cfg_rcl.h b/src/libserver/cfg_rcl.h
new file mode 100644
index 000000000..99839d1ea
--- /dev/null
+++ b/src/libserver/cfg_rcl.h
@@ -0,0 +1,238 @@
+/* Copyright (c) 2013, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CFG_RCL_H_
+#define CFG_RCL_H_
+
+#include "config.h"
+#include "ucl/include/ucl.h"
+#include "uthash.h"
+
+#define CFG_RCL_ERROR cfg_rcl_error_quark ()
+static inline GQuark
+cfg_rcl_error_quark (void)
+{
+ return g_quark_from_static_string ("cfg-rcl-error-quark");
+}
+
+struct rspamd_rcl_section;
+struct config_file;
+
+struct rspamd_rcl_struct_parser {
+ gpointer user_struct;
+ goffset offset;
+ enum {
+ RSPAMD_CL_FLAG_TIME_FLOAT = 0x1 << 0,
+ RSPAMD_CL_FLAG_TIME_TIMEVAL = 0x1 << 1,
+ RSPAMD_CL_FLAG_TIME_TIMESPEC = 0x1 << 2,
+ RSPAMD_CL_FLAG_TIME_INTEGER = 0x1 << 3,
+ RSPAMD_CL_FLAG_TIME_UINT_32 = 0x1 << 4,
+ RSPAMD_CL_FLAG_INT_16 = 0x1 << 5,
+ RSPAMD_CL_FLAG_INT_32 = 0x1 << 6,
+ RSPAMD_CL_FLAG_INT_64 = 0x1 << 7,
+ RSPAMD_CL_FLAG_INT_SIZE = 0x1 << 8,
+ RSPAMD_CL_FLAG_STRING_PATH = 0x1 << 9
+ } flags;
+};
+
+/**
+ * Common handler type
+ * @param cfg configuration
+ * @param obj object to parse
+ * @param ud user data (depends on section)
+ * @param err error object
+ * @return TRUE if a section has been parsed
+ */
+typedef gboolean (*rspamd_rcl_handler_t) (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err);
+
+/**
+ * A handler type that is called at the end of section parsing
+ * @param cfg configuration
+ * @param ud user data
+ */
+typedef void (*rspamd_rcl_section_fin_t)(struct config_file *cfg, gpointer ud);
+
+struct rspamd_rcl_default_handler_data {
+ struct rspamd_rcl_struct_parser pd;
+ const gchar *key;
+ rspamd_rcl_handler_t handler;
+ UT_hash_handle hh;
+};
+
+struct rspamd_rcl_section {
+ const gchar *name; /**< name of section */
+ rspamd_rcl_handler_t handler; /**< handler of section attributes */
+ enum ucl_type type; /**< type of attribute */
+ gboolean required; /**< whether this param is required */
+ gboolean strict_type; /**< whether we need strict type */
+ UT_hash_handle hh; /** hash handle */
+ struct rspamd_rcl_section *subsections; /**< hash table of subsections */
+ struct rspamd_rcl_default_handler_data *default_parser; /**< generic parsing fields */
+ rspamd_rcl_section_fin_t fin; /** called at the end of section parsing */
+ gpointer fin_ud;
+};
+
+/**
+ * Init common sections known to rspamd
+ * @return top section
+ */
+struct rspamd_rcl_section* rspamd_rcl_config_init (void);
+
+/**
+ * Get a section specified by path, it understand paths separated by '/' character
+ * @param top top section
+ * @param path '/' divided path
+ * @return
+ */
+struct rspamd_rcl_section *rspamd_rcl_config_get_section (struct rspamd_rcl_section *top,
+ const char *path);
+
+/**
+ * Read RCL configuration and parse it to a config file
+ * @param top top section
+ * @param cfg target configuration
+ * @param obj object to handle
+ * @return TRUE if an object can be parsed
+ */
+gboolean rspamd_read_rcl_config (struct rspamd_rcl_section *top,
+ struct config_file *cfg, const ucl_object_t *obj, GError **err);
+
+
+/**
+ * Parse default structure for a section
+ * @param section section
+ * @param cfg config file
+ * @param obj object to parse
+ * @param ptr ptr to pass
+ * @param err error ptr
+ * @return TRUE if the object has been parsed
+ */
+gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section,
+ struct config_file *cfg, const ucl_object_t *obj, gpointer ptr,
+ GError **err);
+/**
+ * Here is a section of common handlers that accepts rcl_struct_parser
+ * which itself contains a struct pointer and the offset of a member in a
+ * specific structure
+ */
+
+/**
+ * Parse a string field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a string value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err);
+
+/**
+ * Parse an integer field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err);
+
+
+/**
+ * Parse a float field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err);
+
+/**
+ * Parse a time field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err);
+
+/**
+ * Parse a string list field of a structure presented by a GList* object
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err);
+
+/**
+ * Parse a boolean field of a structure
+ * @param cfg config pointer
+ * @param obj object to parse
+ * @param ud struct_parser structure (flags mean the exact structure used)
+ * @param section the current section
+ * @param err error pointer
+ * @return TRUE if a value has been successfully parsed
+ */
+gboolean rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj,
+ gpointer ud, struct rspamd_rcl_section *section, GError **err);
+
+/**
+ * Utility functions
+ */
+
+/**
+ * Register new parser for a worker type of an option with the specified name
+ * @param cfg config structure
+ * @param type type of worker (GQuark)
+ * @param name name of option
+ * @param handler handler of option
+ * @param target opaque target structure
+ * @param offset offset inside a structure
+ */
+void rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name,
+ rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags);
+
+/**
+ * Regiester a default parser for a worker
+ * @param cfg config structure
+ * @param type type of worker (GQuark)
+ * @param func handler function
+ * @param ud userdata for handler function
+ */
+void rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type,
+ gboolean (*func)(ucl_object_t *, gpointer), gpointer ud);
+#endif /* CFG_RCL_H_ */
diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
new file mode 100644
index 000000000..2ca846ebd
--- /dev/null
+++ b/src/libserver/cfg_utils.c
@@ -0,0 +1,969 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+
+#include "cfg_file.h"
+#include "main.h"
+#include "filter.h"
+#include "settings.h"
+#include "classifiers/classifiers.h"
+#include "lua/lua_common.h"
+#include "kvstorage_config.h"
+#include "map.h"
+#include "dynamic_cfg.h"
+
+#define DEFAULT_SCORE 10.0
+
+#define DEFAULT_RLIMIT_NOFILE 2048
+#define DEFAULT_RLIMIT_MAXCORE 0
+#define DEFAULT_MAP_TIMEOUT 10
+
+struct rspamd_ucl_map_cbdata {
+ struct config_file *cfg;
+ GString *buf;
+};
+static gchar* rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data);
+static void rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data);
+
+static gboolean
+parse_host_port_priority_strv (rspamd_mempool_t *pool, gchar **tokens,
+ gchar **addr, guint16 *port, guint *priority, guint default_port)
+{
+ gchar *err_str, portbuf[8];
+ const gchar *cur_tok, *cur_port;
+ struct addrinfo hints, *res;
+ guint port_parsed, priority_parsed, saved_errno = errno;
+ gint r;
+ union {
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } addr_holder;
+
+ /* Now try to parse host and write address to ina */
+ memset (&hints, 0, sizeof (hints));
+ hints.ai_socktype = SOCK_STREAM; /* Type of the socket */
+ hints.ai_flags = AI_NUMERICSERV;
+
+ cur_tok = tokens[0];
+
+ if (strcmp (cur_tok, "*v6") == 0) {
+ hints.ai_family = AF_INET6;
+ hints.ai_flags |= AI_PASSIVE;
+ cur_tok = NULL;
+ }
+ else if (strcmp (cur_tok, "*v4") == 0) {
+ hints.ai_family = AF_INET;
+ hints.ai_flags |= AI_PASSIVE;
+ cur_tok = NULL;
+ }
+ else {
+ hints.ai_family = AF_UNSPEC;
+ }
+
+ if (tokens[1] != NULL) {
+ /* Port part */
+ rspamd_strlcpy (portbuf, tokens[1], sizeof (portbuf));
+ cur_port = portbuf;
+ if (port != NULL) {
+ errno = 0;
+ port_parsed = strtoul (tokens[1], &err_str, 10);
+ if (*err_str != '\0' || errno != 0) {
+ msg_warn ("cannot parse port: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno));
+ hints.ai_flags ^= AI_NUMERICSERV;
+ }
+ else if (port_parsed > G_MAXUINT16) {
+ errno = ERANGE;
+ msg_warn ("cannot parse port: %s, error: %s", tokens[1], *err_str, strerror (errno));
+ hints.ai_flags ^= AI_NUMERICSERV;
+ }
+ else {
+ *port = port_parsed;
+ }
+ }
+ if (priority != NULL) {
+ if (port != NULL) {
+ cur_tok = tokens[2];
+ }
+ else {
+ cur_tok = tokens[1];
+ }
+ if (cur_tok != NULL) {
+ /* Priority part */
+ errno = 0;
+ priority_parsed = strtoul (cur_tok, &err_str, 10);
+ if (*err_str != '\0' || errno != 0) {
+ msg_warn ("cannot parse priority: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno));
+ }
+ else {
+ *priority = priority_parsed;
+ }
+ }
+ }
+ }
+ else if (default_port != 0) {
+ rspamd_snprintf (portbuf, sizeof (portbuf), "%ud", default_port);
+ cur_port = portbuf;
+ }
+ else {
+ cur_port = NULL;
+ }
+
+ if ((r = getaddrinfo (cur_tok, cur_port, &hints, &res)) == 0) {
+ memcpy (&addr_holder, res->ai_addr, MIN (sizeof (addr_holder), res->ai_addrlen));
+ if (res->ai_family == AF_INET) {
+ if (pool != NULL) {
+ *addr = rspamd_mempool_alloc (pool, INET_ADDRSTRLEN + 1);
+ }
+ inet_ntop (res->ai_family, &addr_holder.v4.sin_addr, *addr, INET_ADDRSTRLEN + 1);
+ }
+ else {
+ if (pool != NULL) {
+ *addr = rspamd_mempool_alloc (pool, INET6_ADDRSTRLEN + 1);
+ }
+ inet_ntop (res->ai_family, &addr_holder.v6.sin6_addr, *addr, INET6_ADDRSTRLEN + 1);
+ }
+ freeaddrinfo (res);
+ }
+ else {
+ msg_err ("address resolution for %s failed: %s", tokens[0], gai_strerror (r));
+ goto err;
+ }
+
+ /* Restore errno */
+ errno = saved_errno;
+ return TRUE;
+
+err:
+ errno = saved_errno;
+ return FALSE;
+}
+
+gboolean
+parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority)
+{
+ gchar **tokens;
+ gboolean ret;
+
+ tokens = g_strsplit_set (str, ":", 0);
+ if (!tokens || !tokens[0]) {
+ return FALSE;
+ }
+
+ ret = parse_host_port_priority_strv (pool, tokens, addr, port, priority, 0);
+
+ g_strfreev (tokens);
+
+ return ret;
+}
+
+gboolean
+parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port)
+{
+ return parse_host_port_priority (pool, str, addr, port, NULL);
+}
+
+gboolean
+parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority)
+{
+ return parse_host_port_priority (pool, str, addr, NULL, priority);
+}
+
+gboolean
+parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str)
+{
+ struct rspamd_worker_bind_conf *cnf;
+ gchar **tokens, *tmp, *err;
+ gboolean ret = TRUE;
+
+ if (str == NULL) {
+ return FALSE;
+ }
+
+ tokens = g_strsplit_set (str, ":", 0);
+ if (!tokens || !tokens[0]) {
+ return FALSE;
+ }
+
+ cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf));
+ cnf->bind_port = DEFAULT_BIND_PORT;
+ cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str);
+ cnf->ai = AF_UNSPEC;
+
+ if (*tokens[0] == '/' || *tokens[0] == '.') {
+ cnf->ai = AF_UNIX;
+ LL_PREPEND (cf->bind_conf, cnf);
+ return TRUE;
+ }
+ else if (strcmp (tokens[0], "*") == 0) {
+ /* We need to add two listen entries: one for ipv4 and one for ipv6 */
+ tmp = tokens[0];
+ tokens[0] = "*v4";
+ cnf->ai = AF_INET;
+ if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens,
+ &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) {
+ LL_PREPEND (cf->bind_conf, cnf);
+ }
+ cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf));
+ cnf->bind_port = DEFAULT_BIND_PORT;
+ cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str);
+ cnf->ai = AF_INET6;
+ tokens[0] = "*v6";
+ if ((ret &= parse_host_port_priority_strv (cfg->cfg_pool, tokens,
+ &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) {
+ LL_PREPEND (cf->bind_conf, cnf);
+ }
+ tokens[0] = tmp;
+ }
+ else if (strcmp (tokens[0], "systemd") == 0) {
+ /* The actual socket will be passed by systemd environment */
+ cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str);
+ cnf->ai = strtoul (tokens[1], &err, 10);
+ cnf->is_systemd = TRUE;
+ if (err == NULL || *err == '\0') {
+ LL_PREPEND (cf->bind_conf, cnf);
+ }
+ }
+ else {
+ if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens,
+ &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) {
+ LL_PREPEND (cf->bind_conf, cnf);
+ }
+ }
+
+ g_strfreev (tokens);
+
+ return ret;
+}
+
+void
+init_defaults (struct config_file *cfg)
+{
+
+ cfg->memcached_error_time = DEFAULT_UPSTREAM_ERROR_TIME;
+ cfg->memcached_dead_time = DEFAULT_UPSTREAM_DEAD_TIME;
+ cfg->memcached_maxerrors = DEFAULT_UPSTREAM_MAXERRORS;
+ cfg->memcached_protocol = TCP_TEXT;
+
+ cfg->dns_timeout = 1000;
+ cfg->dns_retransmits = 5;
+ /* After 20 errors do throttling for 10 seconds */
+ cfg->dns_throttling_errors = 20;
+ cfg->dns_throttling_time = 10000;
+ /* 16 sockets per DNS server */
+ cfg->dns_io_per_server = 16;
+
+ cfg->statfile_sync_interval = 60000;
+ cfg->statfile_sync_timeout = 20000;
+
+ /* 20 Kb */
+ cfg->max_diff = 20480;
+
+ cfg->metrics = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ cfg->c_modules = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ cfg->composite_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ cfg->classifiers_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ cfg->cfg_params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ cfg->metrics_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+
+ cfg->map_timeout = DEFAULT_MAP_TIMEOUT;
+
+ cfg->log_level = G_LOG_LEVEL_WARNING;
+ cfg->log_extended = TRUE;
+
+ init_settings (cfg);
+
+}
+
+void
+free_config (struct config_file *cfg)
+{
+ GList *cur;
+ struct symbols_group *gr;
+
+ remove_all_maps (cfg);
+ ucl_obj_unref (cfg->rcl_obj);
+ g_hash_table_remove_all (cfg->metrics);
+ g_hash_table_unref (cfg->metrics);
+ g_hash_table_remove_all (cfg->c_modules);
+ g_hash_table_unref (cfg->c_modules);
+ g_hash_table_remove_all (cfg->composite_symbols);
+ g_hash_table_unref (cfg->composite_symbols);
+ g_hash_table_remove_all (cfg->cfg_params);
+ g_hash_table_unref (cfg->cfg_params);
+ g_hash_table_destroy (cfg->metrics_symbols);
+ g_hash_table_destroy (cfg->classifiers_symbols);
+ /* Free symbols groups */
+ cur = cfg->symbols_groups;
+ while (cur) {
+ gr = cur->data;
+ if (gr->symbols) {
+ g_list_free (gr->symbols);
+ }
+ cur = g_list_next (cur);
+ }
+ if (cfg->symbols_groups) {
+ g_list_free (cfg->symbols_groups);
+ }
+
+ if (cfg->checksum) {
+ g_free (cfg->checksum);
+ }
+ g_list_free (cfg->classifiers);
+ g_list_free (cfg->metrics_list);
+ rspamd_mempool_delete (cfg->cfg_pool);
+}
+
+const ucl_object_t *
+get_module_opt (struct config_file *cfg, const gchar *module_name, const gchar *opt_name)
+{
+ const ucl_object_t *res = NULL, *sec;
+
+ sec = ucl_obj_get_key (cfg->rcl_obj, module_name);
+ if (sec != NULL) {
+ res = ucl_obj_get_key (sec, opt_name);
+ }
+
+ return res;
+}
+
+guint64
+parse_limit (const gchar *limit, guint len)
+{
+ guint64 result = 0;
+ const gchar *err_str;
+
+ if (!limit || *limit == '\0' || len == 0) {
+ return 0;
+ }
+
+ errno = 0;
+ result = strtoull (limit, (gchar **)&err_str, 10);
+
+ if (*err_str != '\0') {
+ /* Megabytes */
+ if (*err_str == 'm' || *err_str == 'M') {
+ result *= 1048576L;
+ }
+ /* Kilobytes */
+ else if (*err_str == 'k' || *err_str == 'K') {
+ result *= 1024;
+ }
+ /* Gigabytes */
+ else if (*err_str == 'g' || *err_str == 'G') {
+ result *= 1073741824L;
+ }
+ else if (len > 0 && err_str - limit != (gint)len) {
+ msg_warn ("invalid limit value '%s' at position '%s'", limit, err_str);
+ result = 0;
+ }
+ }
+
+ return result;
+}
+
+gchar
+parse_flag (const gchar *str)
+{
+ guint len;
+ gchar c;
+
+ if (!str || !*str) {
+ return -1;
+ }
+
+ len = strlen (str);
+
+ switch (len) {
+ case 1:
+ c = g_ascii_tolower (*str);
+ if (c == 'y' || c == '1') {
+ return 1;
+ }
+ else if (c == 'n' || c == '0') {
+ return 0;
+ }
+ break;
+ case 2:
+ if (g_ascii_strncasecmp (str, "no", len) == 0) {
+ return 0;
+ }
+ else if (g_ascii_strncasecmp (str, "on", len) == 0) {
+ return 1;
+ }
+ break;
+ case 3:
+ if (g_ascii_strncasecmp (str, "yes", len) == 0) {
+ return 1;
+ }
+ else if (g_ascii_strncasecmp (str, "off", len) == 0) {
+ return 0;
+ }
+ break;
+ case 4:
+ if (g_ascii_strncasecmp (str, "true", len) == 0) {
+ return 1;
+ }
+ break;
+ case 5:
+ if (g_ascii_strncasecmp (str, "false", len) == 0) {
+ return 0;
+ }
+ break;
+ }
+
+ return -1;
+}
+
+gboolean
+get_config_checksum (struct config_file *cfg)
+{
+ gint fd;
+ void *map;
+ struct stat st;
+
+ /* Compute checksum for config file that should be used by xml dumper */
+ if ((fd = open (cfg->cfg_name, O_RDONLY)) == -1) {
+ msg_err ("config file %s is no longer available, cannot calculate checksum");
+ return FALSE;
+ }
+ if (stat (cfg->cfg_name, &st) == -1) {
+ msg_err ("cannot stat %s: %s", cfg->cfg_name, strerror (errno));
+ return FALSE;
+ }
+
+ /* Now mmap this file to simplify reading process */
+ if ((map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ msg_err ("cannot mmap %s: %s", cfg->cfg_name, strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ close (fd);
+
+ /* Get checksum for a file */
+ cfg->checksum = g_compute_checksum_for_string (G_CHECKSUM_MD5, map, st.st_size);
+ munmap (map, st.st_size);
+
+ return TRUE;
+}
+/*
+ * Perform post load actions
+ */
+void
+post_load_config (struct config_file *cfg)
+{
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts;
+#endif
+ struct metric *def_metric;
+
+#ifdef HAVE_CLOCK_GETTIME
+#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
+ clock_getres (CLOCK_PROCESS_CPUTIME_ID, &ts);
+# elif defined(HAVE_CLOCK_VIRTUAL)
+ clock_getres (CLOCK_VIRTUAL, &ts);
+# else
+ clock_getres (CLOCK_REALTIME, &ts);
+# endif
+
+ cfg->clock_res = (gint)log10 (1000000 / ts.tv_nsec);
+ if (cfg->clock_res < 0) {
+ cfg->clock_res = 0;
+ }
+ if (cfg->clock_res > 3) {
+ cfg->clock_res = 3;
+ }
+#else
+ /* For gettimeofday */
+ cfg->clock_res = 1;
+#endif
+
+ if ((def_metric = g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) {
+ def_metric = check_metric_conf (cfg, NULL);
+ def_metric->name = DEFAULT_METRIC;
+ def_metric->actions[METRIC_ACTION_REJECT].score = DEFAULT_SCORE;
+ cfg->metrics_list = g_list_prepend (cfg->metrics_list, def_metric);
+ g_hash_table_insert (cfg->metrics, DEFAULT_METRIC, def_metric);
+ }
+
+ cfg->default_metric = def_metric;
+
+ /* Lua options */
+ (void)lua_post_load_config (cfg);
+ init_dynamic_config (cfg);
+}
+
+#if 0
+void
+parse_err (const gchar *fmt, ...)
+{
+ va_list aq;
+ gchar logbuf[BUFSIZ], readbuf[32];
+ gint r;
+
+ va_start (aq, fmt);
+ rspamd_strlcpy (readbuf, yytext, sizeof (readbuf));
+
+ r = snprintf (logbuf, sizeof (logbuf), "config file parse error! line: %d, text: %s, reason: ", yylineno, readbuf);
+ r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq);
+
+ va_end (aq);
+ g_critical ("%s", logbuf);
+}
+
+void
+parse_warn (const gchar *fmt, ...)
+{
+ va_list aq;
+ gchar logbuf[BUFSIZ], readbuf[32];
+ gint r;
+
+ va_start (aq, fmt);
+ rspamd_strlcpy (readbuf, yytext, sizeof (readbuf));
+
+ r = snprintf (logbuf, sizeof (logbuf), "config file parse warning! line: %d, text: %s, reason: ", yylineno, readbuf);
+ r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq);
+
+ va_end (aq);
+ g_warning ("%s", logbuf);
+}
+#endif
+
+void
+unescape_quotes (gchar *line)
+{
+ gchar *c = line, *t;
+
+ while (*c) {
+ if (*c == '\\' && *(c + 1) == '"') {
+ t = c;
+ while (*t) {
+ *t = *(t + 1);
+ t++;
+ }
+ }
+ c++;
+ }
+}
+
+GList *
+parse_comma_list (rspamd_mempool_t * pool, const gchar *line)
+{
+ GList *res = NULL;
+ const gchar *c, *p;
+ gchar *str;
+
+ c = line;
+ p = c;
+
+ while (*p) {
+ if (*p == ',' && *c != *p) {
+ str = rspamd_mempool_alloc (pool, p - c + 1);
+ rspamd_strlcpy (str, c, p - c + 1);
+ res = g_list_prepend (res, str);
+ /* Skip spaces */
+ while (g_ascii_isspace (*(++p)));
+ c = p;
+ continue;
+ }
+ p++;
+ }
+ if (res != NULL) {
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_list_free, res);
+ }
+
+ return res;
+}
+
+struct classifier_config *
+check_classifier_conf (struct config_file *cfg, struct classifier_config *c)
+{
+ if (c == NULL) {
+ c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct classifier_config));
+ }
+ if (c->opts == NULL) {
+ c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->opts);
+ }
+ if (c->labels == NULL) {
+ c->labels = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, NULL, (GDestroyNotify)g_list_free);
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->labels);
+ }
+
+ return c;
+}
+
+struct statfile*
+check_statfile_conf (struct config_file *cfg, struct statfile *c)
+{
+ if (c == NULL) {
+ c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile));
+ }
+
+ return c;
+}
+
+struct metric *
+check_metric_conf (struct config_file *cfg, struct metric *c)
+{
+ int i;
+ if (c == NULL) {
+ c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct metric));
+ c->grow_factor = 1.0;
+ c->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ c->descriptions = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) {
+ c->actions[i].score = -1.0;
+ }
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->symbols);
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->descriptions);
+ }
+
+ return c;
+}
+
+struct worker_conf *
+check_worker_conf (struct config_file *cfg, struct worker_conf *c)
+{
+ if (c == NULL) {
+ c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct worker_conf));
+ c->params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ c->active_workers = g_queue_new ();
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_hash_table_destroy, c->params);
+ rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_queue_free, c->active_workers);
+#ifdef HAVE_SC_NPROCESSORS_ONLN
+ c->count = sysconf (_SC_NPROCESSORS_ONLN);
+#else
+ c->count = DEFAULT_WORKERS_NUM;
+#endif
+ c->rlimit_nofile = DEFAULT_RLIMIT_NOFILE;
+ c->rlimit_maxcore = DEFAULT_RLIMIT_MAXCORE;
+ }
+
+ return c;
+}
+
+
+static bool
+rspamd_include_map_handler (const guchar *data, gsize len, void* ud)
+{
+ struct config_file *cfg = (struct config_file *)ud;
+ struct rspamd_ucl_map_cbdata *cbdata, **pcbdata;
+ gchar *map_line;
+
+ map_line = rspamd_mempool_alloc (cfg->cfg_pool, len + 1);
+ rspamd_strlcpy (map_line, data, len + 1);
+
+ cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata));
+ pcbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata *));
+ cbdata->buf = NULL;
+ cbdata->cfg = cfg;
+ *pcbdata = cbdata;
+
+ return add_map (cfg, map_line, "ucl include", rspamd_ucl_read_cb, rspamd_ucl_fin_cb, (void **)pcbdata);
+}
+
+/*
+ * Variables:
+ * $CONFDIR - configuration directory
+ * $RUNDIR - local states directory
+ * $DBDIR - databases dir
+ * $LOGDIR - logs dir
+ * $PLUGINSDIR - pluggins dir
+ * $PREFIX - installation prefix
+ * $VERSION - rspamd version
+ */
+
+#define RSPAMD_CONFDIR_MACRO "CONFDIR"
+#define RSPAMD_RUNDIR_MACRO "RUNDIR"
+#define RSPAMD_DBDIR_MACRO "DBDIR"
+#define RSPAMD_LOGDIR_MACRO "LOGDIR"
+#define RSPAMD_PLUGINSDIR_MACRO "PLUGINSDIR"
+#define RSPAMD_PREFIX_MACRO "PREFIX"
+#define RSPAMD_VERSION_MACRO "VERSION"
+
+static void
+rspamd_ucl_add_conf_variables (struct ucl_parser *parser)
+{
+ ucl_parser_register_variable (parser, RSPAMD_CONFDIR_MACRO, RSPAMD_CONFDIR);
+ ucl_parser_register_variable (parser, RSPAMD_RUNDIR_MACRO, RSPAMD_RUNDIR);
+ ucl_parser_register_variable (parser, RSPAMD_DBDIR_MACRO, RSPAMD_DBDIR);
+ ucl_parser_register_variable (parser, RSPAMD_LOGDIR_MACRO, RSPAMD_LOGDIR);
+ ucl_parser_register_variable (parser, RSPAMD_PLUGINSDIR_MACRO, RSPAMD_PLUGINSDIR);
+ ucl_parser_register_variable (parser, RSPAMD_PREFIX_MACRO, RSPAMD_PREFIX);
+ ucl_parser_register_variable (parser, RSPAMD_VERSION_MACRO, RVERSION);
+}
+
+static void
+rspamd_ucl_add_conf_macros (struct ucl_parser *parser, struct config_file *cfg)
+{
+ ucl_parser_register_macro (parser, "include_map", rspamd_include_map_handler, cfg);
+}
+
+gboolean
+read_rspamd_config (struct config_file *cfg, const gchar *filename,
+ const gchar *convert_to, rspamd_rcl_section_fin_t logger_fin,
+ gpointer logger_ud)
+{
+ struct stat st;
+ gint fd;
+ gchar *data;
+ GError *err = NULL;
+ struct rspamd_rcl_section *top, *logger;
+ gboolean res;
+ struct ucl_parser *parser;
+
+ if (stat (filename, &st) == -1) {
+ msg_err ("cannot stat %s: %s", filename, strerror (errno));
+ return FALSE;
+ }
+ if ((fd = open (filename, O_RDONLY)) == -1) {
+ msg_err ("cannot open %s: %s", filename, strerror (errno));
+ return FALSE;
+
+ }
+ /* Now mmap this file to simplify reading process */
+ if ((data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ msg_err ("cannot mmap %s: %s", filename, strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ close (fd);
+
+ parser = ucl_parser_new (0);
+ rspamd_ucl_add_conf_variables (parser);
+ rspamd_ucl_add_conf_macros (parser, cfg);
+ if (!ucl_parser_add_chunk (parser, data, st.st_size)) {
+ msg_err ("ucl parser error: %s", ucl_parser_get_error (parser));
+ ucl_parser_free (parser);
+ munmap (data, st.st_size);
+ return FALSE;
+ }
+ munmap (data, st.st_size);
+ cfg->rcl_obj = ucl_parser_get_object (parser);
+ ucl_parser_free (parser);
+ res = TRUE;
+
+ if (!res) {
+ return FALSE;
+ }
+
+ top = rspamd_rcl_config_init ();
+ err = NULL;
+
+ HASH_FIND_STR(top, "logging", logger);
+ if (logger != NULL) {
+ logger->fin = logger_fin;
+ logger->fin_ud = logger_ud;
+ }
+
+ if (!rspamd_read_rcl_config (top, cfg, cfg->rcl_obj, &err)) {
+ msg_err ("rcl parse error: %s", err->message);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+symbols_classifiers_callback (gpointer key, gpointer value, gpointer ud)
+{
+ struct config_file *cfg = ud;
+
+ register_virtual_symbol (&cfg->cache, key, 1.0);
+}
+
+void
+insert_classifier_symbols (struct config_file *cfg)
+{
+ g_hash_table_foreach (cfg->classifiers_symbols, symbols_classifiers_callback, cfg);
+}
+
+struct classifier_config*
+find_classifier_conf (struct config_file *cfg, const gchar *name)
+{
+ GList *cur;
+ struct classifier_config *cf;
+
+ if (name == NULL) {
+ return NULL;
+ }
+
+ cur = cfg->classifiers;
+ while (cur) {
+ cf = cur->data;
+
+ if (g_ascii_strcasecmp (cf->classifier->name, name) == 0) {
+ return cf;
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ return NULL;
+}
+
+gboolean
+check_classifier_statfiles (struct classifier_config *cf)
+{
+ struct statfile *st;
+ gboolean has_other = FALSE, res = FALSE, cur_class;
+ GList *cur;
+
+ /* First check classes directly */
+ cur = cf->statfiles;
+ while (cur) {
+ st = cur->data;
+ if (!has_other) {
+ cur_class = st->is_spam;
+ has_other = TRUE;
+ }
+ else {
+ if (cur_class != st->is_spam) {
+ return TRUE;
+ }
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ if (!has_other) {
+ /* We have only one statfile */
+ return FALSE;
+ }
+ /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */
+ has_other = FALSE;
+ cur = cf->statfiles;
+ while (cur) {
+ st = cur->data;
+ if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) {
+ st->is_spam = TRUE;
+ }
+ else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) {
+ st->is_spam = FALSE;
+ }
+
+ if (!has_other) {
+ cur_class = st->is_spam;
+ has_other = TRUE;
+ }
+ else {
+ if (cur_class != st->is_spam) {
+ res = TRUE;
+ }
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ return res;
+}
+
+static gchar*
+rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
+{
+ struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev;
+
+ if (cbdata == NULL) {
+ cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata));
+ prev = data->prev_data;
+ cbdata->buf = g_string_sized_new (BUFSIZ);
+ cbdata->cfg = prev->cfg;
+ data->cur_data = cbdata;
+ }
+ g_string_append_len (cbdata->buf, chunk, len);
+
+ /* Say not to copy any part of this buffer */
+ return NULL;
+}
+
+static void
+rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data)
+{
+ struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev = data->prev_data;
+ ucl_object_t *obj;
+ struct ucl_parser *parser;
+ guint32 checksum;
+
+ if (prev != NULL) {
+ if (prev->buf != NULL) {
+ g_string_free (prev->buf, TRUE);
+ }
+ g_free (prev);
+ }
+
+ if (cbdata == NULL) {
+ msg_err ("map fin error: new data is NULL");
+ return;
+ }
+
+ checksum = murmur32_hash (cbdata->buf->str, cbdata->buf->len);
+ if (data->map->checksum != checksum) {
+ /* New data available */
+ parser = ucl_parser_new (0);
+ if (!ucl_parser_add_chunk (parser, cbdata->buf->str, cbdata->buf->len)) {
+ msg_err ("cannot parse map %s: %s", data->map->uri, ucl_parser_get_error (parser));
+ ucl_parser_free (parser);
+ }
+ else {
+ obj = ucl_parser_get_object (parser);
+ ucl_parser_free (parser);
+ /* XXX: add replace objects code */
+ ucl_object_unref (obj);
+ data->map->checksum = checksum;
+ }
+ }
+ else {
+ msg_info ("do not reload map %s, checksum is the same: %d", data->map->uri, checksum);
+ }
+}
+
+gboolean
+rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree)
+{
+ gchar **strvec, **cur;
+ struct in_addr ina;
+ guint32 mask;
+
+ strvec = g_strsplit_set (ip_list, ",", 0);
+ cur = strvec;
+
+ while (*cur != NULL) {
+ /* XXX: handle only ipv4 addresses */
+ if (parse_ipmask_v4 (*cur, &ina, &mask)) {
+ if (*tree == NULL) {
+ *tree = radix_tree_create ();
+ }
+ radix32tree_add (*tree, htonl (ina.s_addr), mask, 1);
+ }
+ cur ++;
+ }
+
+ return (*tree != NULL);
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c
new file mode 100644
index 000000000..c7c8a35e1
--- /dev/null
+++ b/src/libserver/dkim.c
@@ -0,0 +1,1480 @@
+/* Copyright (c) 2010-2011, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "message.h"
+#include "dkim.h"
+#include "dns.h"
+
+/* Parser of dkim params */
+typedef gboolean (*dkim_parse_param_f) (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+
+static gboolean rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+static gboolean rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err);
+
+
+static const dkim_parse_param_f parser_funcs[] = {
+ [DKIM_PARAM_SIGNATURE] = rspamd_dkim_parse_signature,
+ [DKIM_PARAM_SIGNALG] = rspamd_dkim_parse_signalg,
+ [DKIM_PARAM_DOMAIN] = rspamd_dkim_parse_domain,
+ [DKIM_PARAM_CANONALG] = rspamd_dkim_parse_canonalg,
+ [DKIM_PARAM_QUERYMETHOD] = rspamd_dkim_parse_ignore,
+ [DKIM_PARAM_SELECTOR] = rspamd_dkim_parse_selector,
+ [DKIM_PARAM_HDRLIST] = rspamd_dkim_parse_hdrlist,
+ [DKIM_PARAM_VERSION] = rspamd_dkim_parse_version,
+ [DKIM_PARAM_IDENTITY] = rspamd_dkim_parse_ignore,
+ [DKIM_PARAM_TIMESTAMP] = rspamd_dkim_parse_timestamp,
+ [DKIM_PARAM_EXPIRATION] = rspamd_dkim_parse_expiration,
+ [DKIM_PARAM_COPIEDHDRS] = rspamd_dkim_parse_ignore,
+ [DKIM_PARAM_BODYHASH] = rspamd_dkim_parse_bodyhash,
+ [DKIM_PARAM_BODYLENGTH] = rspamd_dkim_parse_bodylength
+};
+
+struct rspamd_dkim_header {
+ gchar *name;
+ guint count;
+};
+
+#define DKIM_ERROR dkim_error_quark ()
+GQuark
+dkim_error_quark (void)
+{
+ return g_quark_from_static_string ("dkim-error-quark");
+}
+
+/* Parsers implementation */
+static gboolean
+rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ ctx->b = rspamd_mempool_alloc (ctx->pool, len + 1);
+ rspamd_strlcpy (ctx->b, param, len + 1);
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20))
+ gchar *tmp;
+ gsize tmp_len = len;
+ tmp = g_base64_decode (ctx->b, &tmp_len);
+ rspamd_strlcpy (ctx->b, tmp, len + 1);
+ g_free (tmp);
+#else
+ g_base64_decode_inplace (ctx->b, &len);
+#endif
+ ctx->blen = len;
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ if (len == 8) {
+ if (memcmp (param, "rsa-sha1", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_RSASHA1;
+ return TRUE;
+ }
+ }
+ else if (len == 10) {
+ if (memcmp (param, "rsa-sha256", len) == 0) {
+ ctx->sig_alg = DKIM_SIGN_RSASHA256;
+ return TRUE;
+ }
+ }
+
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim sign algorithm");
+ return FALSE;
+}
+
+static gboolean
+rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ ctx->domain = rspamd_mempool_alloc (ctx->pool, len + 1);
+ rspamd_strlcpy (ctx->domain, param, len + 1);
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ const gchar *p, *slash = NULL, *end = param + len;
+ gsize sl = 0;
+
+ p = param;
+ while (p != end) {
+ if (*p == '/') {
+ slash = p;
+ break;
+ }
+ p ++;
+ sl ++;
+ }
+
+ if (slash == NULL) {
+ /* Only check header */
+ if (len == 6 && memcmp (param, "simple", len) == 0) {
+ ctx->header_canon_type = DKIM_CANON_SIMPLE;
+ return TRUE;
+ }
+ else if (len == 7 && memcmp (param, "relaxed", len) == 0) {
+ ctx->header_canon_type = DKIM_CANON_RELAXED;
+ return TRUE;
+ }
+ }
+ else {
+ /* First check header */
+ if (sl == 6 && memcmp (param, "simple", sl) == 0) {
+ ctx->header_canon_type = DKIM_CANON_SIMPLE;
+ }
+ else if (sl == 7 && memcmp (param, "relaxed", sl) == 0) {
+ ctx->header_canon_type = DKIM_CANON_RELAXED;
+ }
+ else {
+ goto err;
+ }
+ /* Check body */
+ len -= sl + 1;
+ slash ++;
+ if (len == 6 && memcmp (slash, "simple", len) == 0) {
+ ctx->body_canon_type = DKIM_CANON_SIMPLE;
+ return TRUE;
+ }
+ else if (len == 7 && memcmp (slash, "relaxed", len) == 0) {
+ ctx->body_canon_type = DKIM_CANON_RELAXED;
+ return TRUE;
+ }
+ }
+
+err:
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim canonization algorithm");
+ return FALSE;
+}
+
+static gboolean
+rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ /* Just ignore unused params */
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ ctx->selector = rspamd_mempool_alloc (ctx->pool, len + 1);
+ rspamd_strlcpy (ctx->selector, param, len + 1);
+ return TRUE;
+}
+
+static struct rspamd_dkim_header*
+rspamd_dkim_find_header (GPtrArray *arr, const gchar *name, gsize len)
+{
+ guint i;
+ struct rspamd_dkim_header *h;
+
+ for (i = 0; i < arr->len; i ++) {
+ h = g_ptr_array_index (arr, i);
+ if (g_ascii_strncasecmp (h->name, name, len) == 0) {
+ return h;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+rspamd_dkim_hlist_free (void *ud)
+{
+ GPtrArray *a = ud;
+
+ g_ptr_array_free (a, TRUE);
+}
+
+static gboolean
+rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ const gchar *c, *p, *end = param + len;
+ gchar *h;
+ gboolean from_found = FALSE;
+ guint count = 0;
+ struct rspamd_dkim_header *new;
+
+ p = param;
+ while (p <= end) {
+ if ((*p == ':' || p == end)) {
+ count ++;
+ }
+ p ++;
+ }
+
+ if (count > 0) {
+ ctx->hlist = g_ptr_array_sized_new (count);
+ }
+ else {
+ return FALSE;
+ }
+
+ c = param;
+ p = param;
+ while (p <= end) {
+ if ((*p == ':' || p == end) && p - c > 0) {
+ if ((new = rspamd_dkim_find_header (ctx->hlist, c, p - c)) != NULL) {
+ new->count ++;
+ }
+ else {
+ /* Insert new header to the list */
+ new = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_header));
+ h = rspamd_mempool_alloc (ctx->pool, p - c + 1);
+ rspamd_strlcpy (h, c, p - c + 1);
+ g_strstrip (h);
+ new->name = h;
+ new->count = 1;
+ /* Check mandatory from */
+ if (!from_found && g_ascii_strcasecmp (h, "from") == 0) {
+ from_found = TRUE;
+ }
+ g_ptr_array_add (ctx->hlist, new);
+ }
+ c = p + 1;
+ p ++;
+ }
+ else {
+ p ++;
+ }
+ }
+
+ if (!ctx->hlist) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list");
+ return FALSE;
+ }
+ else {
+ if (!from_found) {
+ g_ptr_array_free (ctx->hlist, TRUE);
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list, from header is missing");
+ return FALSE;
+ }
+ /* Reverse list */
+ rspamd_mempool_add_destructor (ctx->pool, (rspamd_mempool_destruct_t)rspamd_dkim_hlist_free, ctx->hlist);
+ }
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ if (len != 1 || *param != '1') {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_VERSION, "invalid dkim version");
+ return FALSE;
+ }
+
+ ctx->ver = 1;
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ gulong val;
+
+ if (!rspamd_strtoul (param, len, &val)) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim timestamp");
+ return FALSE;
+ }
+ ctx->timestamp = val;
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ gulong val;
+
+ if (!rspamd_strtoul (param, len, &val)) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim expiration");
+ return FALSE;
+ }
+ ctx->expiration = val;
+
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ ctx->bh = rspamd_mempool_alloc (ctx->pool, len + 1);
+ rspamd_strlcpy (ctx->bh, param, len + 1);
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20))
+ gchar *tmp;
+ gsize tmp_len = len;
+ tmp = g_base64_decode (ctx->bh, &tmp_len);
+ rspamd_strlcpy (ctx->bh, tmp, len + 1);
+ g_free (tmp);
+#else
+ g_base64_decode_inplace (ctx->bh, &len);
+#endif
+ ctx->bhlen = len;
+ return TRUE;
+}
+
+static gboolean
+rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err)
+{
+ gulong val;
+
+ if (!rspamd_strtoul (param, len, &val)) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_L, "invalid dkim body length");
+ return FALSE;
+ }
+ ctx->len = val;
+
+ return TRUE;
+}
+
+/**
+ * Create new dkim context from signature
+ * @param sig message's signature
+ * @param pool pool to allocate memory from
+ * @param err pointer to error object
+ * @return new context or NULL
+ */
+rspamd_dkim_context_t*
+rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err)
+{
+ const gchar *p, *c, *tag = NULL, *end;
+ gsize taglen;
+ gint param = DKIM_PARAM_UNKNOWN;
+ time_t now;
+ rspamd_dkim_context_t *new;
+ enum {
+ DKIM_STATE_TAG = 0,
+ DKIM_STATE_AFTER_TAG,
+ DKIM_STATE_VALUE,
+ DKIM_STATE_SKIP_SPACES = 99,
+ DKIM_STATE_ERROR = 100
+ } state, next_state;
+
+
+ new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_dkim_context_t));
+ new->pool = pool;
+ new->header_canon_type = DKIM_CANON_DEFAULT;
+ new->body_canon_type = DKIM_CANON_DEFAULT;
+ new->sig_alg = DKIM_SIGN_UNKNOWN;
+ /* A simple state machine of parsing tags */
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_TAG;
+ taglen = 0;
+ p = sig;
+ c = sig;
+ end = p + strlen (p);
+ while (p <= end) {
+ switch (state) {
+ case DKIM_STATE_TAG:
+ if (g_ascii_isspace (*p)) {
+ taglen = p - c;
+ while (*p && g_ascii_isspace (*p)) {
+ /* Skip spaces before '=' sign */
+ p ++;
+ }
+ if (*p != '=') {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param");
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_AFTER_TAG;
+ param = DKIM_PARAM_UNKNOWN;
+ p ++;
+ tag = c;
+ }
+ }
+ else if (*p == '=') {
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_AFTER_TAG;
+ param = DKIM_PARAM_UNKNOWN;
+ p ++;
+ tag = c;
+ }
+ else {
+ taglen ++;
+ p ++;
+ }
+ break;
+ case DKIM_STATE_AFTER_TAG:
+ /* We got tag at tag and len at taglen */
+ switch (taglen) {
+ case 0:
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "zero length dkim param");
+ state = DKIM_STATE_ERROR;
+ break;
+ case 1:
+ /* Simple tags */
+ switch (*tag) {
+ case 'v':
+ param = DKIM_PARAM_VERSION;
+ break;
+ case 'a':
+ param = DKIM_PARAM_SIGNALG;
+ break;
+ case 'b':
+ param = DKIM_PARAM_SIGNATURE;
+ break;
+ case 'c':
+ param = DKIM_PARAM_CANONALG;
+ break;
+ case 'd':
+ param = DKIM_PARAM_DOMAIN;
+ break;
+ case 'h':
+ param = DKIM_PARAM_HDRLIST;
+ break;
+ case 'i':
+ param = DKIM_PARAM_IDENTITY;
+ break;
+ case 'l':
+ param = DKIM_PARAM_BODYLENGTH;
+ break;
+ case 'q':
+ param = DKIM_PARAM_QUERYMETHOD;
+ break;
+ case 's':
+ param = DKIM_PARAM_SELECTOR;
+ break;
+ case 't':
+ param = DKIM_PARAM_TIMESTAMP;
+ break;
+ case 'x':
+ param = DKIM_PARAM_EXPIRATION;
+ break;
+ case 'z':
+ param = DKIM_PARAM_COPIEDHDRS;
+ break;
+ default:
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c", *tag);
+ state = DKIM_STATE_ERROR;
+ break;
+ }
+ break;
+ case 2:
+ if (tag[0] == 'b' && tag[1] == 'h') {
+ param = DKIM_PARAM_BODYHASH;
+ }
+ else {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c%c", tag[0], tag[1]);
+ state = DKIM_STATE_ERROR;
+ }
+ break;
+ default:
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param length: %zd", taglen);
+ state = DKIM_STATE_ERROR;
+ break;
+ }
+ if (state != DKIM_STATE_ERROR) {
+ /* Skip spaces */
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_VALUE;
+ }
+ break;
+ case DKIM_STATE_VALUE:
+ if (*p == ';') {
+ if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c, err)) {
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ state = DKIM_STATE_SKIP_SPACES;
+ next_state = DKIM_STATE_TAG;
+ p ++;
+ taglen = 0;
+ }
+ }
+ else if (p == end) {
+ if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c + 1, err)) {
+ state = DKIM_STATE_ERROR;
+ }
+ else {
+ /* Finish processing */
+ p ++;
+ }
+ }
+ else {
+ p ++;
+ }
+ break;
+ case DKIM_STATE_SKIP_SPACES:
+ if (g_ascii_isspace (*p)) {
+ p ++;
+ }
+ else {
+ c = p;
+ state = next_state;
+ }
+ break;
+ case DKIM_STATE_ERROR:
+ if (err) {
+ msg_info ("dkim parse failed: %s", (*err)->message);
+ return NULL;
+ }
+ else {
+ msg_info ("dkim parse failed: unknown error");
+ return NULL;
+ }
+ break;
+ }
+ }
+
+ /* Now check validity of signature */
+ if (new->b == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_B, "b parameter missing");
+ return NULL;
+ }
+ if (new->bh == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_BH, "bh parameter missing");
+ return NULL;
+ }
+ if (new->domain == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_D, "domain parameter missing");
+ return NULL;
+ }
+ if (new->selector == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "selector parameter missing");
+ return NULL;
+ }
+ if (new->ver == 0) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_V, "v parameter missing");
+ return NULL;
+ }
+ if (new->hlist == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_H, "h parameter missing");
+ return NULL;
+ }
+ if (new->sig_alg == DKIM_SIGN_UNKNOWN) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "s parameter missing");
+ return NULL;
+ }
+ if (new->sig_alg == DKIM_SIGN_RSASHA1) {
+ /* Check bh length */
+ if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA1)) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen);
+ return NULL;
+ }
+
+ }
+ else if (new->sig_alg == DKIM_SIGN_RSASHA256) {
+ if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA256)) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen);
+ return NULL;
+ }
+ }
+ /* Check expiration */
+ now = time (NULL);
+ if (new->timestamp && now < new->timestamp && new->timestamp - now > (gint)time_jitter) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_FUTURE, "signature was made in future, ignoring");
+ return NULL;
+ }
+ if (new->expiration && new->expiration < now) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EXPIRED, "signature has expired");
+ return NULL;
+ }
+
+ /* Now create dns key to request further */
+ taglen = strlen (new->domain) + strlen (new->selector) + sizeof (DKIM_DNSKEYNAME) + 2;
+ new->dns_key = rspamd_mempool_alloc (new->pool, taglen);
+ rspamd_snprintf (new->dns_key, taglen, "%s.%s.%s", new->selector, DKIM_DNSKEYNAME, new->domain);
+
+ /* Create checksums for further operations */
+ if (new->sig_alg == DKIM_SIGN_RSASHA1) {
+ new->body_hash = g_checksum_new (G_CHECKSUM_SHA1);
+ new->headers_hash = g_checksum_new (G_CHECKSUM_SHA1);
+ }
+ else if (new->sig_alg == DKIM_SIGN_RSASHA256) {
+ new->body_hash = g_checksum_new (G_CHECKSUM_SHA256);
+ new->headers_hash = g_checksum_new (G_CHECKSUM_SHA256);
+ }
+ else {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has unsupported signature algorithm");
+ return NULL;
+ }
+
+ rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->body_hash);
+ rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->headers_hash);
+
+ return new;
+}
+
+struct rspamd_dkim_key_cbdata {
+ rspamd_dkim_context_t *ctx;
+ dkim_key_handler_f handler;
+ gpointer ud;
+};
+
+static rspamd_dkim_key_t*
+rspamd_dkim_make_key (const gchar *keydata, guint keylen, GError **err)
+{
+ rspamd_dkim_key_t *key = NULL;
+
+ key = g_slice_alloc0 (sizeof (rspamd_dkim_key_t));
+ key->keydata = g_slice_alloc (keylen + 1);
+ rspamd_strlcpy (key->keydata, keydata, keylen + 1);
+ key->keylen = keylen + 1;
+ key->decoded_len = keylen + 1;
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20))
+ gchar *tmp;
+ gsize tmp_len = keylen;
+ tmp = g_base64_decode (key->keydata, &tmp_len);
+ rspamd_strlcpy (key->keydata, tmp, keylen + 1);
+ g_free (tmp);
+ key->decoded_len = tmp_len;
+#else
+ g_base64_decode_inplace (key->keydata, &key->decoded_len);
+#endif
+#ifdef HAVE_OPENSSL
+ key->key_bio = BIO_new_mem_buf (key->keydata, key->decoded_len);
+ if (key->key_bio == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot make ssl bio from key");
+ rspamd_dkim_key_free (key);
+ return NULL;
+ }
+
+ key->key_evp = d2i_PUBKEY_bio (key->key_bio, NULL);
+ if (key->key_evp == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract pubkey from bio");
+ rspamd_dkim_key_free (key);
+ return NULL;
+ }
+
+ key->key_rsa = EVP_PKEY_get1_RSA (key->key_evp);
+ if (key->key_rsa == NULL) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract rsa key from evp key");
+ rspamd_dkim_key_free (key);
+ return NULL;
+ }
+
+#endif
+
+ return key;
+}
+
+/**
+ * Free DKIM key
+ * @param key
+ */
+void
+rspamd_dkim_key_free (rspamd_dkim_key_t *key)
+{
+#ifdef HAVE_OPENSSL
+ if (key->key_rsa) {
+ RSA_free (key->key_rsa);
+ }
+ if (key->key_bio) {
+ BIO_free (key->key_bio);
+ }
+#endif
+ g_slice_free1 (key->keylen, key->keydata);
+ g_slice_free1 (sizeof (rspamd_dkim_key_t), key);
+}
+
+static rspamd_dkim_key_t*
+rspamd_dkim_parse_key (const gchar *txt, gsize *keylen, GError **err)
+{
+ const gchar *c, *p, *end;
+ gint state = 0;
+ gsize len;
+
+ c = txt;
+ p = txt;
+ end = txt + strlen (txt);
+
+ while (p <= end) {
+ switch (state) {
+ case 0:
+ if (p != end && p[0] == 'p' && p[1] == '=') {
+ /* We got something like public key */
+ c = p + 2;
+ p = c;
+ state = 1;
+ }
+ else {
+ /* Ignore everything */
+ p ++;
+ }
+ break;
+ case 1:
+ /* State when we got p= and looking for some public key */
+ if ((*p == ';' || p == end) && p > c) {
+ len = p - c;
+ return rspamd_dkim_make_key (c, len, err);
+ }
+ else {
+ p ++;
+ }
+ break;
+ }
+ }
+
+ if (p - c == 0) {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYREVOKED, "key was revoked");
+ }
+ else {
+ g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "key was not found");
+ }
+
+ return NULL;
+}
+
+/* Get TXT request data and parse it */
+static void
+rspamd_dkim_dns_cb (struct rdns_reply *reply, gpointer arg)
+{
+ struct rspamd_dkim_key_cbdata *cbdata = arg;
+ rspamd_dkim_key_t *key = NULL;
+ GError *err = NULL;
+ struct rdns_reply_entry *elt;
+ gsize keylen = 0;
+
+ if (reply->code != RDNS_RC_NOERROR) {
+ g_set_error (&err, DKIM_ERROR, DKIM_SIGERROR_NOKEY, "dns request to %s failed: %s", cbdata->ctx->dns_key,
+ rdns_strerror (reply->code));
+ cbdata->handler (NULL, 0, cbdata->ctx, cbdata->ud, err);
+ }
+ else {
+ LL_FOREACH (reply->entries, elt) {
+ if (elt->type == RDNS_REQUEST_TXT) {
+ key = rspamd_dkim_parse_key (elt->content.txt.data, &keylen, &err);
+ if (key) {
+ key->ttl = elt->ttl;
+ break;
+ }
+ }
+ }
+ if (key != NULL && err != NULL) {
+ /* Free error as it is insignificant */
+ g_error_free (err);
+ err = NULL;
+ }
+ cbdata->handler (key, keylen, cbdata->ctx, cbdata->ud, err);
+ }
+}
+
+/**
+ * Make DNS request for specified context and obtain and parse key
+ * @param ctx dkim context from signature
+ * @param resolver dns resolver object
+ * @param s async session to make request
+ * @return
+ */
+gboolean
+rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver,
+ struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud)
+{
+ struct rspamd_dkim_key_cbdata *cbdata;
+
+ g_return_val_if_fail (ctx != NULL, FALSE);
+ g_return_val_if_fail (ctx->dns_key != NULL, FALSE);
+
+ cbdata = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_key_cbdata));
+ cbdata->ctx = ctx;
+ cbdata->handler = handler;
+ cbdata->ud = ud;
+
+ return make_dns_request (resolver, s, ctx->pool, rspamd_dkim_dns_cb, cbdata, RDNS_REQUEST_TXT, ctx->dns_key);
+}
+
+static gboolean
+rspamd_dkim_relaxed_body_step (GChecksum *ck, const gchar **start, guint remain)
+{
+ const gchar *h;
+ static gchar buf[BUFSIZ];
+ gchar *t;
+ guint len, inlen;
+ gboolean got_sp, finished = FALSE;
+
+ if (remain > sizeof (buf)) {
+ len = sizeof (buf);
+ }
+ else {
+ len = remain;
+ finished = TRUE;
+ }
+ inlen = sizeof (buf) - 1;
+ h = *start;
+ t = &buf[0];
+ got_sp = FALSE;
+
+ while (len && inlen) {
+ if (*h == '\r' || *h == '\n') {
+ /* Ignore spaces at the end of line */
+ if (got_sp) {
+ got_sp = FALSE;
+ t --;
+ }
+ /* Replace a single \n or \r with \r\n */
+ if (*h == '\n' && *(h - 1) != '\r') {
+ *t ++ = '\r';
+ inlen --;
+ }
+ else if (*h == '\r' && *(h + 1) != '\n') {
+ *t ++ = *h ++;
+ *t ++ = '\n';
+ if (inlen > 1) {
+ inlen -= 2;
+ }
+ else {
+ /* It is safe as inlen = sizeof (buf) - 1 */
+ inlen = 0;
+ }
+ len --;
+ continue;
+ }
+ }
+ else if (g_ascii_isspace (*h)) {
+ if (got_sp) {
+ /* Ignore multiply spaces */
+ h ++;
+ len --;
+ continue;
+ }
+ else {
+ *t++ = ' ';
+ h ++;
+ inlen --;
+ len --;
+ got_sp = TRUE;
+ continue;
+ }
+ }
+ else {
+ got_sp = FALSE;
+ }
+ *t++ = *h++;
+ inlen --;
+ len --;
+ }
+
+ *start = h;
+
+ if (!finished && *(t - 1) == ' ' && g_ascii_isspace (*h)) {
+ /* Avoid border problems */
+ t --;
+ }
+#if 0
+ msg_debug ("update signature with buffer: %*s", t - buf, buf);
+#endif
+ g_checksum_update (ck, buf, t - buf);
+
+ return !finished;
+}
+
+static gboolean
+rspamd_dkim_simple_body_step (GChecksum *ck, const gchar **start, guint remain)
+{
+ const gchar *h;
+ static gchar buf[BUFSIZ];
+ gchar *t;
+ guint len, inlen;
+ gboolean finished = FALSE;
+
+ if (remain > sizeof (buf)) {
+ len = sizeof (buf);
+ }
+ else {
+ len = remain;
+ finished = TRUE;
+ }
+ inlen = sizeof (buf) - 1;
+ h = *start;
+ t = &buf[0];
+
+ while (len && inlen) {
+ if (*h == '\r' || *h == '\n') {
+ /* Replace a single \n or \r with \r\n */
+ if (*h == '\n' && *(h - 1) != '\r') {
+ *t ++ = '\r';
+ inlen --;
+ }
+ else if (*h == '\r' && *(h + 1) != '\n') {
+ *t ++ = *h ++;
+ *t ++ = '\n';
+ if (inlen > 1) {
+ inlen -= 2;
+ }
+ else {
+ /* It is safe as inlen = sizeof (buf) - 1 */
+ inlen = 0;
+ }
+ len --;
+ continue;
+ }
+ }
+ *t++ = *h++;
+ inlen --;
+ len --;
+ }
+
+ *start = h;
+
+#if 0
+ msg_debug ("update signature with buffer: %*s", t - buf, buf);
+#endif
+ g_checksum_update (ck, buf, t - buf);
+
+ return !finished;
+}
+
+static gboolean
+rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, const gchar *start, const gchar *end)
+{
+ const gchar *p;
+
+ if (start == NULL) {
+ /* Empty body */
+ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) {
+ g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1);
+ }
+ else {
+ g_checksum_update (ctx->body_hash, "", 0);
+ }
+ }
+ else {
+ /* Strip extra ending CRLF */
+ p = end - 1;
+ while (p >= start + 2) {
+ if (*p == '\n' && *(p - 1) == '\r' && *(p - 2) == '\n') {
+ p -= 2;
+ }
+ else if (*p == '\n' && *(p - 1) == '\n') {
+ p --;
+ }
+ else if (*p == '\r' && *(p - 1) == '\r') {
+ p --;
+ }
+ else {
+ break;
+ }
+ }
+ end = p + 1;
+ if (end == start || end == start + 2) {
+ /* Empty body */
+ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) {
+ g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1);
+ }
+ else {
+ g_checksum_update (ctx->body_hash, "", 0);
+ }
+ }
+ else {
+ if (ctx->body_canon_type == DKIM_CANON_SIMPLE) {
+ /* Simple canonization */
+ while (rspamd_dkim_simple_body_step (ctx->body_hash, &start, end - start));
+ }
+ else {
+ while (rspamd_dkim_relaxed_body_step (ctx->body_hash, &start, end - start));
+ }
+ }
+ return TRUE;
+ }
+
+ /* TODO: Implement relaxed algorithm */
+ return FALSE;
+}
+
+/* Update hash converting all CR and LF to CRLF */
+static void
+rspamd_dkim_hash_update (GChecksum *ck, const gchar *begin, gsize len)
+{
+ const gchar *p, *c, *end;
+
+ end = begin + len;
+ p = begin;
+ c = p;
+ while (p != end) {
+ if (*p == '\r') {
+ g_checksum_update (ck, c, p - c);
+ g_checksum_update (ck, CRLF, sizeof (CRLF) - 1);
+ p ++;
+ if (*p == '\n') {
+ p ++;
+ }
+ c = p;
+ }
+ else if (*p == '\n') {
+ g_checksum_update (ck, c, p - c);
+ g_checksum_update (ck, CRLF, sizeof (CRLF) - 1);
+ p ++;
+ c = p;
+ }
+ else {
+ p ++;
+ }
+ }
+ if (p != c) {
+ g_checksum_update (ck, c, p - c);
+ }
+}
+
+/* Update hash by signature value (ignoring b= tag) */
+static void
+rspamd_dkim_signature_update (rspamd_dkim_context_t *ctx, const gchar *begin, guint len)
+{
+ const gchar *p, *c, *end;
+ gboolean tag, skip;
+
+ end = begin + len;
+ p = begin;
+ c = begin;
+ tag = TRUE;
+ skip = FALSE;
+
+ while (p < end) {
+ if (tag && p[0] == 'b' && p[1] == '=') {
+ /* Add to signature */
+ msg_debug ("initial update hash with signature part: %*s", p - c + 2, c);
+ rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 2);
+ skip = TRUE;
+ }
+ else if (skip && (*p == ';' || p == end - 1)) {
+ skip = FALSE;
+ c = p;
+ }
+ else if (!tag && *p == ';') {
+ tag = TRUE;
+ }
+ else if (tag && *p == '=') {
+ tag = FALSE;
+ }
+ p ++;
+ }
+
+ p --;
+ /* Skip \r\n at the end */
+ while ((*p == '\r' || *p == '\n') && p >= c) {
+ p --;
+ }
+
+ if (p - c + 1 > 0) {
+ msg_debug ("final update hash with signature part: %*s", p - c + 1, c);
+ rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 1);
+ }
+}
+
+static gboolean
+rspamd_dkim_canonize_header_relaxed (rspamd_dkim_context_t *ctx, const gchar *header, const gchar *header_name, gboolean is_sign)
+{
+ const gchar *h;
+ gchar *t, *buf;
+ guint inlen;
+ gboolean got_sp, allocated = FALSE;
+
+ inlen = strlen (header) + strlen (header_name) + sizeof (":" CRLF);
+ if (inlen > BUFSIZ) {
+ buf = g_malloc (inlen);
+ allocated = TRUE;
+ }
+ else {
+ /* Faster */
+ buf = g_alloca (inlen);
+ }
+
+ /* Name part */
+ t = buf;
+ h = header_name;
+ while (*h) {
+ *t ++ = g_ascii_tolower (*h++);
+ }
+ *t++ = ':';
+
+ /* Value part */
+ h = header;
+ /* Skip spaces at the beginning */
+ while (g_ascii_isspace (*h)) {
+ h ++;
+ }
+ got_sp = FALSE;
+
+ while (*h) {
+ if (g_ascii_isspace (*h)) {
+ if (got_sp) {
+ h ++;
+ continue;
+ }
+ else {
+ got_sp = TRUE;
+ *t ++ = ' ';
+ h ++;
+ continue;
+ }
+ }
+ else {
+ got_sp = FALSE;
+ }
+ *t ++ = *h ++;
+ }
+ if (g_ascii_isspace (*(t - 1))) {
+ t --;
+ }
+ *t++ = '\r';
+ *t++ = '\n';
+ *t = '\0';
+
+ if (!is_sign) {
+ msg_debug ("update signature with header: %s", buf);
+ g_checksum_update (ctx->headers_hash, buf, t - buf);
+ }
+ else {
+ rspamd_dkim_signature_update (ctx, buf, t - buf);
+ }
+
+ if (allocated) {
+ g_free (buf);
+ }
+
+ return TRUE;
+}
+
+struct rspamd_dkim_sign_chunk {
+ const gchar *begin;
+ gsize len;
+ gboolean append_crlf;
+};
+
+static gboolean
+rspamd_dkim_canonize_header_simple (rspamd_dkim_context_t *ctx, const gchar *headers,
+ const gchar *header_name, guint count, gboolean is_sign)
+{
+ const gchar *p, *c;
+ gint state = 0, hlen;
+ gboolean found = FALSE;
+ GArray *to_sign;
+ struct rspamd_dkim_sign_chunk chunk, *elt;
+ gint i;
+
+ /* This process is very similar to raw headers processing */
+ to_sign = g_array_sized_new (FALSE, FALSE, sizeof (struct rspamd_dkim_sign_chunk), count);
+ p = headers;
+ c = p;
+ hlen = strlen (header_name);
+
+ while (*p) {
+ switch (state) {
+ case 0:
+ /* Compare state */
+ if (*p == ':') {
+ /* Compare header's name with desired one */
+ if (p - c == hlen) {
+ if (g_ascii_strncasecmp (c, header_name, hlen) == 0) {
+ /* Get value */
+ state = 2;
+ }
+ else {
+ /* Skip the whole header */
+ state = 1;
+ }
+ }
+ else {
+ /* Skip the whole header */
+ state = 1;
+ }
+ }
+ p ++;
+ break;
+ case 1:
+ /* Skip header state */
+ if (*p == '\n' && !g_ascii_isspace (p[1])) {
+ /* Header is skipped */
+ state = 0;
+ c = p + 1;
+ }
+ p ++;
+ break;
+ case 2:
+ /* c contains the beginning of header */
+ if (*p == '\n' && (!g_ascii_isspace (p[1]) || p[1] == '\0')) {
+ chunk.begin = c;
+ if (*(p - 1) == '\r') {
+ chunk.len = p - c + 1;
+ chunk.append_crlf = FALSE;
+ }
+ else {
+ /* Need append CRLF as linefeed is not proper */
+ chunk.len = p - c;
+ chunk.append_crlf = TRUE;
+ }
+ g_array_append_val (to_sign, chunk);
+ c = p + 1;
+ state = 0;
+ found = TRUE;
+ }
+ p ++;
+ break;
+ }
+ }
+
+ if (found) {
+ if (!is_sign) {
+
+ for (i = to_sign->len - 1; i >= 0 && count > 0; i --, count --) {
+ elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, i);
+
+ if (!chunk.append_crlf) {
+ msg_debug ("update signature with header: %*s", elt->len, elt->begin);
+ rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len);
+ }
+ else {
+ msg_debug ("update signature with header: %*s", elt->len + 1, elt->begin);
+ rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len + 1);
+ }
+ }
+ }
+ else {
+ elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, 0);
+ if (elt->append_crlf) {
+ rspamd_dkim_signature_update (ctx, elt->begin, elt->len + 1);
+ }
+ else {
+ rspamd_dkim_signature_update (ctx, elt->begin, elt->len);
+ }
+ }
+ }
+
+ g_array_free (to_sign, TRUE);
+
+ return found;
+}
+
+static gboolean
+rspamd_dkim_canonize_header (rspamd_dkim_context_t *ctx, struct rspamd_task *task, const gchar *header_name,
+ guint count, gboolean is_sig)
+{
+ struct raw_header *rh, *rh_iter;
+ guint rh_num = 0;
+ GList *nh = NULL, *cur;
+
+ if (ctx->header_canon_type == DKIM_CANON_SIMPLE) {
+ return rspamd_dkim_canonize_header_simple (ctx, task->raw_headers_str, header_name, count, is_sig);
+ }
+ else {
+ rh = g_hash_table_lookup (task->raw_headers, header_name);
+ if (rh) {
+ if (!is_sig) {
+ rh_iter = rh;
+ while (rh_iter) {
+ rh_num ++;
+ rh_iter = rh_iter->next;
+ }
+
+ if (rh_num > count) {
+ /* Set skip count */
+ rh_num -= count;
+ }
+ else {
+ rh_num = 0;
+ }
+ rh_iter = rh;
+ while (rh_num) {
+ rh_iter = rh_iter->next;
+ rh_num --;
+ }
+ /* Now insert required headers */
+ while (rh_iter) {
+ nh = g_list_prepend (nh, rh_iter);
+ rh_iter = rh_iter->next;
+ }
+ cur = nh;
+ while (cur) {
+ rh = cur->data;
+ if (! rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig)) {
+ g_list_free (nh);
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+ if (nh != NULL) {
+ g_list_free (nh);
+ }
+ }
+ else {
+ /* For signature check just use the first dkim header */
+ rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig);
+ }
+ return TRUE;
+ }
+ }
+
+ /* TODO: Implement relaxed algorithm */
+ return FALSE;
+}
+
+/**
+ * Check task for dkim context using dkim key
+ * @param ctx dkim verify context
+ * @param key dkim key (from cache or from dns request)
+ * @param task task to check
+ * @return
+ */
+gint
+rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task)
+{
+ const gchar *p, *headers_end = NULL, *end, *body_end;
+ gboolean got_cr = FALSE, got_crlf = FALSE, got_lf = FALSE;
+ gchar *digest;
+ gsize dlen;
+ gint res = DKIM_CONTINUE;
+ guint i;
+ struct rspamd_dkim_header *dh;
+#ifdef HAVE_OPENSSL
+ gint nid;
+#endif
+
+ g_return_val_if_fail (ctx != NULL, DKIM_ERROR);
+ g_return_val_if_fail (key != NULL, DKIM_ERROR);
+ g_return_val_if_fail (task->msg != NULL, DKIM_ERROR);
+
+ /* First of all find place of body */
+ p = task->msg->str;
+
+ end = task->msg->str + task->msg->len;
+
+ while (p <= end) {
+ /* Search for \r\n\r\n at the end of headers */
+ if (*p == '\n') {
+ if (got_cr && *(p - 1) == '\r') {
+ if (got_crlf) {
+ /* \r\n\r\n */
+ headers_end = p + 1;
+ break;
+ }
+ else if (got_lf) {
+ /* \n\r\n */
+ headers_end = p + 1;
+ break;
+ }
+ else {
+ /* Set got crlf flag */
+ got_crlf = TRUE;
+ got_cr = FALSE;
+ got_lf = FALSE;
+ }
+ }
+ else if (got_cr && *(p - 1) != '\r') {
+ /* We got CR somewhere but not right before */
+ got_cr = FALSE;
+ if (*(p - 1) == '\n') {
+ /* \r\n\n case */
+ headers_end = p + 1;
+ break;
+ }
+ got_lf = TRUE;
+ }
+ else if (got_lf && *(p - 1) == '\n') {
+ /* \n\n case */
+ headers_end = p + 1;
+ break;
+ }
+ else {
+ got_lf = TRUE;
+ }
+ }
+ else if (*p == '\r') {
+ if (got_cr && *(p - 1) == '\r') {
+ /* \r\r case */
+ headers_end = p + 1;
+ break;
+ }
+ else if (got_lf && *(p - 1) != '\n') {
+ /* Sequence is broken */
+ got_lf = FALSE;
+ got_cr = TRUE;
+ }
+ else {
+ got_cr = TRUE;
+ }
+ }
+ else {
+ got_cr = FALSE;
+ got_crlf = FALSE;
+ }
+ p ++;
+ }
+
+ /* Start canonization of body part */
+ if (headers_end) {
+ if (ctx->len == 0 || (gint)ctx->len > end - headers_end) {
+ body_end = end;
+ }
+ else {
+ /* Strip message */
+ body_end = headers_end + ctx->len;
+ }
+ }
+ else {
+ body_end = end;
+ }
+ if (!rspamd_dkim_canonize_body (ctx, headers_end, body_end)) {
+ return DKIM_RECORD_ERROR;
+ }
+ /* Now canonize headers */
+ for (i = 0; i < ctx->hlist->len; i ++) {
+ dh = g_ptr_array_index (ctx->hlist, i);
+ rspamd_dkim_canonize_header (ctx, task, dh->name, dh->count, FALSE);
+ }
+
+ /* Canonize dkim signature */
+ rspamd_dkim_canonize_header (ctx, task, DKIM_SIGNHEADER, 1, TRUE);
+
+ dlen = ctx->bhlen;
+ digest = g_alloca (dlen);
+ g_checksum_get_digest (ctx->body_hash, digest, &dlen);
+
+ /* Check bh field */
+ if (memcmp (ctx->bh, digest, dlen) != 0) {
+ msg_debug ("bh value missmatch");
+ return DKIM_REJECT;
+ }
+
+ g_checksum_get_digest (ctx->headers_hash, digest, &dlen);
+#ifdef HAVE_OPENSSL
+ /* Check headers signature */
+
+ if (ctx->sig_alg == DKIM_SIGN_RSASHA1) {
+ nid = NID_sha1;
+ }
+ else if (ctx->sig_alg == DKIM_SIGN_RSASHA256) {
+ nid = NID_sha256;
+ }
+ else {
+ /* Not reached */
+ nid = NID_sha1;
+ }
+
+ if (RSA_verify (nid, digest, dlen, ctx->b, ctx->blen, key->key_rsa) != 1) {
+ msg_debug ("rsa verify failed");
+ res = DKIM_REJECT;
+ }
+#endif
+ return res;
+}
diff --git a/src/libserver/dkim.h b/src/libserver/dkim.h
new file mode 100644
index 000000000..29ec479b7
--- /dev/null
+++ b/src/libserver/dkim.h
@@ -0,0 +1,207 @@
+/* Copyright (c) 2010-2011, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef DKIM_H_
+#define DKIM_H_
+
+#include "config.h"
+#include "event.h"
+#include "dns.h"
+#ifdef HAVE_OPENSSL
+#include <openssl/rsa.h>
+#include <openssl/engine.h>
+#endif
+
+/* Main types and definitions */
+
+#define DKIM_SIGNHEADER "DKIM-Signature"
+ /* DKIM signature header */
+
+/* special DNS tokens */
+#define DKIM_DNSKEYNAME "_domainkey"
+ /* reserved DNS sub-zone */
+#define DKIM_DNSPOLICYNAME "_adsp" /* reserved DNS sub-zone */
+
+/* Canonization methods */
+#define DKIM_CANON_UNKNOWN (-1) /* unknown method */
+#define DKIM_CANON_SIMPLE 0 /* as specified in DKIM spec */
+#define DKIM_CANON_RELAXED 1 /* as specified in DKIM spec */
+
+#define DKIM_CANON_DEFAULT DKIM_CANON_SIMPLE
+
+/* Signature methods */
+#define DKIM_SIGN_UNKNOWN (-2) /* unknown method */
+#define DKIM_SIGN_DEFAULT (-1) /* use internal default */
+#define DKIM_SIGN_RSASHA1 0 /* an RSA-signed SHA1 digest */
+#define DKIM_SIGN_RSASHA256 1 /* an RSA-signed SHA256 digest */
+
+/* Params */
+#define DKIM_PARAM_UNKNOWN (-1) /* unknown */
+#define DKIM_PARAM_SIGNATURE 0 /* b */
+#define DKIM_PARAM_SIGNALG 1 /* a */
+#define DKIM_PARAM_DOMAIN 2 /* d */
+#define DKIM_PARAM_CANONALG 3 /* c */
+#define DKIM_PARAM_QUERYMETHOD 4 /* q */
+#define DKIM_PARAM_SELECTOR 5 /* s */
+#define DKIM_PARAM_HDRLIST 6 /* h */
+#define DKIM_PARAM_VERSION 7 /* v */
+#define DKIM_PARAM_IDENTITY 8 /* i */
+#define DKIM_PARAM_TIMESTAMP 9 /* t */
+#define DKIM_PARAM_EXPIRATION 10 /* x */
+#define DKIM_PARAM_COPIEDHDRS 11 /* z */
+#define DKIM_PARAM_BODYHASH 12 /* bh */
+#define DKIM_PARAM_BODYLENGTH 13 /* l */
+
+/* Errors (from OpenDKIM) */
+
+#define DKIM_SIGERROR_UNKNOWN (-1) /* unknown error */
+#define DKIM_SIGERROR_OK 0 /* no error */
+#define DKIM_SIGERROR_VERSION 1 /* unsupported version */
+#define DKIM_SIGERROR_DOMAIN 2 /* invalid domain (d=/i=) */
+#define DKIM_SIGERROR_EXPIRED 3 /* signature expired */
+#define DKIM_SIGERROR_FUTURE 4 /* signature in the future */
+#define DKIM_SIGERROR_TIMESTAMPS 5 /* x= < t= */
+#define DKIM_SIGERROR_UNUSED 6 /* OBSOLETE */
+#define DKIM_SIGERROR_INVALID_HC 7 /* c= invalid (header) */
+#define DKIM_SIGERROR_INVALID_BC 8 /* c= invalid (body) */
+#define DKIM_SIGERROR_MISSING_A 9 /* a= missing */
+#define DKIM_SIGERROR_INVALID_A 10 /* a= invalid */
+#define DKIM_SIGERROR_MISSING_H 11 /* h= missing */
+#define DKIM_SIGERROR_INVALID_L 12 /* l= invalid */
+#define DKIM_SIGERROR_INVALID_Q 13 /* q= invalid */
+#define DKIM_SIGERROR_INVALID_QO 14 /* q= option invalid */
+#define DKIM_SIGERROR_MISSING_D 15 /* d= missing */
+#define DKIM_SIGERROR_EMPTY_D 16 /* d= empty */
+#define DKIM_SIGERROR_MISSING_S 17 /* s= missing */
+#define DKIM_SIGERROR_EMPTY_S 18 /* s= empty */
+#define DKIM_SIGERROR_MISSING_B 19 /* b= missing */
+#define DKIM_SIGERROR_EMPTY_B 20 /* b= empty */
+#define DKIM_SIGERROR_CORRUPT_B 21 /* b= corrupt */
+#define DKIM_SIGERROR_NOKEY 22 /* no key found in DNS */
+#define DKIM_SIGERROR_DNSSYNTAX 23 /* DNS reply corrupt */
+#define DKIM_SIGERROR_KEYFAIL 24 /* DNS query failed */
+#define DKIM_SIGERROR_MISSING_BH 25 /* bh= missing */
+#define DKIM_SIGERROR_EMPTY_BH 26 /* bh= empty */
+#define DKIM_SIGERROR_CORRUPT_BH 27 /* bh= corrupt */
+#define DKIM_SIGERROR_BADSIG 28 /* signature mismatch */
+#define DKIM_SIGERROR_SUBDOMAIN 29 /* unauthorized subdomain */
+#define DKIM_SIGERROR_MULTIREPLY 30 /* multiple records returned */
+#define DKIM_SIGERROR_EMPTY_H 31 /* h= empty */
+#define DKIM_SIGERROR_INVALID_H 32 /* h= missing req'd entries */
+#define DKIM_SIGERROR_TOOLARGE_L 33 /* l= value exceeds body size */
+#define DKIM_SIGERROR_MBSFAILED 34 /* "must be signed" failure */
+#define DKIM_SIGERROR_KEYVERSION 35 /* unknown key version */
+#define DKIM_SIGERROR_KEYUNKNOWNHASH 36 /* unknown key hash */
+#define DKIM_SIGERROR_KEYHASHMISMATCH 37 /* sig-key hash mismatch */
+#define DKIM_SIGERROR_NOTEMAILKEY 38 /* not an e-mail key */
+#define DKIM_SIGERROR_UNUSED2 39 /* OBSOLETE */
+#define DKIM_SIGERROR_KEYTYPEMISSING 40 /* key type missing */
+#define DKIM_SIGERROR_KEYTYPEUNKNOWN 41 /* key type unknown */
+#define DKIM_SIGERROR_KEYREVOKED 42 /* key revoked */
+#define DKIM_SIGERROR_KEYDECODE 43 /* key couldn't be decoded */
+#define DKIM_SIGERROR_MISSING_V 44 /* v= tag missing */
+#define DKIM_SIGERROR_EMPTY_V 45 /* v= tag empty */
+
+/* Check results */
+#define DKIM_CONTINUE 0 /* continue */
+#define DKIM_REJECT 1 /* reject */
+#define DKIM_TRYAGAIN 2 /* try again later */
+#define DKIM_NOTFOUND 3 /* requested record not found */
+#define DKIM_RECORD_ERROR 4 /* error requesting record */
+
+typedef struct rspamd_dkim_context_s {
+ rspamd_mempool_t *pool;
+ gint sig_alg;
+ gint header_canon_type;
+ gint body_canon_type;
+ gsize len;
+ gchar *domain;
+ gchar *selector;
+ time_t timestamp;
+ time_t expiration;
+ gint8 *b;
+ gint8 *bh;
+ guint bhlen;
+ guint blen;
+ GPtrArray *hlist;
+ guint ver;
+ gchar *dns_key;
+ GChecksum *headers_hash;
+ GChecksum *body_hash;
+} rspamd_dkim_context_t;
+
+typedef struct rspamd_dkim_key_s {
+ guint8 *keydata;
+ guint keylen;
+ gsize decoded_len;
+ guint ttl;
+#ifdef HAVE_OPENSSL
+ RSA *key_rsa;
+ BIO *key_bio;
+ EVP_PKEY *key_evp;
+#endif
+}
+rspamd_dkim_key_t;
+
+struct rspamd_task;
+
+/* Err MUST be freed if it is not NULL, key is allocated by slice allocator */
+typedef void (*dkim_key_handler_f)(rspamd_dkim_key_t *key, gsize keylen, rspamd_dkim_context_t *ctx, gpointer ud, GError *err);
+
+/**
+ * Create new dkim context from signature
+ * @param sig message's signature
+ * @param pool pool to allocate memory from
+ * @param time_jitter jitter in seconds to allow time diff while checking
+ * @param err pointer to error object
+ * @return new context or NULL
+ */
+rspamd_dkim_context_t* rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err);
+
+/**
+ * Make DNS request for specified context and obtain and parse key
+ * @param ctx dkim context from signature
+ * @param resolver dns resolver object
+ * @param s async session to make request
+ * @return
+ */
+gboolean rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver,
+ struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud);
+
+/**
+ * Check task for dkim context using dkim key
+ * @param ctx dkim verify context
+ * @param key dkim key (from cache or from dns request)
+ * @param task task to check
+ * @return
+ */
+gint rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task);
+
+/**
+ * Free DKIM key
+ * @param key
+ */
+void rspamd_dkim_key_free (rspamd_dkim_key_t *key);
+
+#endif /* DKIM_H_ */
diff --git a/src/libserver/dns.c b/src/libserver/dns.c
new file mode 100644
index 000000000..e20cca9df
--- /dev/null
+++ b/src/libserver/dns.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2009-2013, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "dns.h"
+#include "main.h"
+#include "utlist.h"
+#include "uthash.h"
+#include "rdns_event.h"
+
+struct rspamd_dns_resolver {
+ struct rdns_resolver *r;
+ struct event_base *ev_base;
+ gdouble request_timeout;
+ guint max_retransmits;
+};
+
+struct rspamd_dns_request_ud {
+ struct rspamd_async_session *session;
+ dns_callback_type cb;
+ gpointer ud;
+ struct rdns_request *req;
+};
+
+static void
+rspamd_dns_fin_cb (gpointer arg)
+{
+ struct rdns_request *req = arg;
+
+ rdns_request_release (req);
+}
+
+static void
+rspamd_dns_callback (struct rdns_reply *reply, gpointer ud)
+{
+ struct rspamd_dns_request_ud *reqdata = ud;
+
+ reqdata->cb (reply, reqdata->ud);
+
+ remove_normal_event (reqdata->session, rspamd_dns_fin_cb, reqdata->req);
+}
+
+gboolean
+make_dns_request (struct rspamd_dns_resolver *resolver,
+ struct rspamd_async_session *session, rspamd_mempool_t *pool, dns_callback_type cb,
+ gpointer ud, enum rdns_request_type type, const char *name)
+{
+ struct rdns_request *req;
+ struct rspamd_dns_request_ud *reqdata;
+
+ reqdata = rspamd_mempool_alloc (pool, sizeof (struct rspamd_dns_request_ud));
+ reqdata->session = session;
+ reqdata->cb = cb;
+ reqdata->ud = ud;
+
+ req = rdns_make_request_full (resolver->r, rspamd_dns_callback, reqdata,
+ resolver->request_timeout, resolver->max_retransmits, 1, name, type);
+
+ if (req != NULL) {
+ register_async_event (session, (event_finalizer_t)rspamd_dns_fin_cb, req,
+ g_quark_from_static_string ("dns resolver"));
+ /* Ref event to free it only when according async event is deleted from the session */
+ rdns_request_retain (req);
+ reqdata->req = req;
+ }
+ else {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+struct rspamd_dns_resolver *
+dns_resolver_init (rspamd_logger_t *logger, struct event_base *ev_base, struct config_file *cfg)
+{
+ GList *cur;
+ struct rspamd_dns_resolver *new;
+ gchar *begin, *p, *err;
+ gint priority;
+
+ new = g_slice_alloc0 (sizeof (struct rspamd_dns_resolver));
+ new->ev_base = ev_base;
+ new->request_timeout = cfg->dns_timeout;
+ new->max_retransmits = cfg->dns_retransmits;
+
+ new->r = rdns_resolver_new ();
+ rdns_bind_libevent (new->r, new->ev_base);
+ rdns_resolver_set_log_level (new->r, cfg->log_level);
+ rdns_resolver_set_logger (new->r, (rdns_log_function)rspamd_common_logv, logger);
+
+ if (cfg->nameservers == NULL) {
+ /* Parse resolv.conf */
+ if (!rdns_resolver_parse_resolv_conf (new->r, "/etc/resolv.conf")) {
+ msg_err ("cannot parse resolv.conf and no nameservers defined, so no ways to resolve addresses");
+ return new;
+ }
+ }
+ else {
+ cur = cfg->nameservers;
+ while (cur) {
+ begin = cur->data;
+ p = strchr (begin, ':');
+ if (p != NULL) {
+ *p = '\0';
+ p ++;
+ priority = strtoul (p, &err, 10);
+ if (err != NULL && *err != '\0') {
+ msg_info ("bad character '%x', must be 'm' or 's' or a numeric priority", *err);
+ }
+ }
+ else {
+ priority = 0;
+ }
+ if (!rdns_resolver_add_server (new->r, begin, 53, priority, cfg->dns_io_per_server)) {
+ msg_warn ("cannot parse ip address of nameserver: %s", begin);
+ cur = g_list_next (cur);
+ continue;
+ }
+
+ cur = g_list_next (cur);
+ }
+
+ }
+
+ rdns_resolver_init (new->r);
+
+ return new;
+}
diff --git a/src/libserver/dns.h b/src/libserver/dns.h
new file mode 100644
index 000000000..26ae71387
--- /dev/null
+++ b/src/libserver/dns.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2013, Vsevolod Stakhov
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RSPAMD_DNS_H
+#define RSPAMD_DNS_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "events.h"
+#include "logger.h"
+#include "rdns.h"
+
+struct rspamd_dns_resolver;
+
+/* Rspamd DNS API */
+
+/**
+ * Init DNS resolver, params are obtained from a config file or system file /etc/resolv.conf
+ */
+struct rspamd_dns_resolver *dns_resolver_init (rspamd_logger_t *logger,
+ struct event_base *ev_base, struct config_file *cfg);
+
+/**
+ * Make a DNS request
+ * @param resolver resolver object
+ * @param session async session to register event
+ * @param pool memory pool for storage
+ * @param cb callback to call on resolve completing
+ * @param ud user data for callback
+ * @param type request type
+ * @param ... string or ip address based on a request type
+ * @return TRUE if request was sent.
+ */
+gboolean make_dns_request (struct rspamd_dns_resolver *resolver,
+ struct rspamd_async_session *session, rspamd_mempool_t *pool,
+ dns_callback_type cb, gpointer ud, enum rdns_request_type type, const char *name);
+
+#endif
diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c
new file mode 100644
index 000000000..7f5e8530d
--- /dev/null
+++ b/src/libserver/dynamic_cfg.c
@@ -0,0 +1,599 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "map.h"
+#include "filter.h"
+#include "dynamic_cfg.h"
+#include "json/jansson.h"
+
+struct dynamic_cfg_symbol {
+ gchar *name;
+ gdouble value;
+};
+
+struct dynamic_cfg_action {
+ enum rspamd_metric_action action;
+ gdouble value;
+};
+
+struct dynamic_cfg_metric {
+ GList *symbols;
+ struct dynamic_cfg_action actions[METRIC_ACTION_MAX];
+ gchar *name;
+};
+
+struct config_json_buf {
+ gchar *buf;
+ gchar *pos;
+ size_t buflen;
+ struct config_file *cfg;
+ GList *config_metrics;
+};
+
+/**
+ * Free dynamic configuration
+ * @param conf_metrics
+ */
+static void
+dynamic_cfg_free (GList *conf_metrics)
+{
+ GList *cur, *cur_elt;
+ struct dynamic_cfg_metric *metric;
+ struct dynamic_cfg_symbol *sym;
+
+ if (conf_metrics) {
+ cur = conf_metrics;
+ while (cur) {
+ metric = cur->data;
+ if (metric->symbols) {
+ cur_elt = metric->symbols;
+ while (cur_elt) {
+ sym = cur_elt->data;
+ g_free (sym->name);
+ g_slice_free1 (sizeof (struct dynamic_cfg_symbol), sym);
+ cur_elt = g_list_next (cur_elt);
+ }
+ g_list_free (metric->symbols);
+ }
+ g_slice_free1 (sizeof (struct dynamic_cfg_metric), metric);
+ cur = g_list_next (cur);
+ }
+ g_list_free (conf_metrics);
+ }
+}
+/**
+ * Apply configuration to the specified configuration
+ * @param conf_metrics
+ * @param cfg
+ */
+static void
+apply_dynamic_conf (GList *conf_metrics, struct config_file *cfg)
+{
+ GList *cur, *cur_elt;
+ struct dynamic_cfg_metric *metric;
+ struct dynamic_cfg_symbol *sym;
+ struct dynamic_cfg_action *act;
+ struct metric *real_metric;
+ struct metric_action *real_act;
+ gdouble *w;
+ gint i, j;
+
+ cur = conf_metrics;
+ while (cur) {
+ metric = cur->data;
+ if ((real_metric = g_hash_table_lookup (cfg->metrics, metric->name)) != NULL) {
+ cur_elt = metric->symbols;
+ while (cur_elt) {
+ sym = cur_elt->data;
+ if ((w = g_hash_table_lookup (real_metric->symbols, sym->name)) != NULL) {
+ *w = sym->value;
+ }
+ else {
+ msg_info ("symbol %s is not found in the main configuration", sym->name);
+ }
+ cur_elt = g_list_next (cur_elt);
+ }
+
+ for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) {
+ act = &metric->actions[i];
+ if (act->value < 0) {
+ continue;
+ }
+ for (j = METRIC_ACTION_REJECT; j < METRIC_ACTION_MAX; j ++) {
+ real_act = &real_metric->actions[j];
+ if (real_act->action == act->action) {
+ real_act->score = act->value;
+ }
+ /* Update required score accordingly to metric's action */
+ if (act->action == METRIC_ACTION_REJECT) {
+ real_metric->actions[METRIC_ACTION_REJECT].score = act->value;
+ }
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+}
+
+/* Callbacks for reading json dynamic rules */
+gchar *
+json_config_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
+{
+ struct config_json_buf *jb;
+ gint free, off;
+
+ if (data->cur_data == NULL) {
+ jb = g_malloc (sizeof (struct config_json_buf));
+ jb->cfg = ((struct config_json_buf *)data->prev_data)->cfg;
+ jb->buf = NULL;
+ jb->pos = NULL;
+ jb->config_metrics = NULL;
+ data->cur_data = jb;
+ }
+ else {
+ jb = data->cur_data;
+ }
+
+ if (jb->buf == NULL) {
+ /* Allocate memory for buffer */
+ jb->buflen = len * 2;
+ jb->buf = g_malloc (jb->buflen);
+ jb->pos = jb->buf;
+ }
+
+ off = jb->pos - jb->buf;
+ free = jb->buflen - off;
+
+ if (free < len) {
+ jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2);
+ jb->buf = g_realloc (jb->buf, jb->buflen);
+ jb->pos = jb->buf + off;
+ }
+
+ memcpy (jb->pos, chunk, len);
+ jb->pos += len;
+
+ /* Say not to copy any part of this buffer */
+ return NULL;
+}
+
+void
+json_config_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data)
+{
+ struct config_json_buf *jb;
+ guint nelts, i, j, selts;
+ gint test_act;
+ json_t *js, *cur_elt, *cur_nm, *it_val;
+ json_error_t je;
+ struct dynamic_cfg_metric *cur_metric;
+ struct dynamic_cfg_symbol *cur_symbol;
+ struct dynamic_cfg_action *cur_action;
+
+ if (data->prev_data) {
+ jb = data->prev_data;
+ /* Clean prev data */
+ if (jb->buf) {
+ g_free (jb->buf);
+ }
+ g_free (jb);
+ }
+
+ /* Now parse json */
+ if (data->cur_data) {
+ jb = data->cur_data;
+ }
+ else {
+ msg_err ("no data read");
+ return;
+ }
+ if (jb->buf == NULL) {
+ msg_err ("no data read");
+ return;
+ }
+ /* NULL terminate current buf */
+ *jb->pos = '\0';
+
+ js = json_loads (jb->buf, &je);
+ if (!js) {
+ msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line);
+ return;
+ }
+
+ if (!json_is_array (js)) {
+ json_decref (js);
+ msg_err ("loaded json is not an array");
+ return;
+ }
+
+ jb->cfg->current_dynamic_conf = NULL;
+ dynamic_cfg_free (jb->config_metrics);
+ jb->config_metrics = NULL;
+
+ /* Parse configuration */
+ nelts = json_array_size (js);
+ for (i = 0; i < nelts; i++) {
+ cur_elt = json_array_get (js, i);
+ if (!cur_elt || !json_is_object (cur_elt)) {
+ msg_err ("loaded json array element is not an object");
+ continue;
+ }
+
+ cur_nm = json_object_get (cur_elt, "metric");
+ if (!cur_nm || !json_is_string (cur_nm)) {
+ msg_err ("loaded json metric object element has no 'metric' attribute");
+ continue;
+ }
+ cur_metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric));
+ for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) {
+ cur_metric->actions[i].value = -1.0;
+ }
+ cur_metric->name = g_strdup (json_string_value (cur_nm));
+ cur_nm = json_object_get (cur_elt, "symbols");
+ /* Parse symbols */
+ if (cur_nm && json_is_array (cur_nm)) {
+ selts = json_array_size (cur_nm);
+ for (j = 0; j < selts; j ++) {
+ it_val = json_array_get (cur_nm, j);
+ if (it_val && json_is_object (it_val)) {
+ if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) {
+ cur_symbol = g_slice_alloc0 (sizeof (struct dynamic_cfg_symbol));
+ cur_symbol->name = g_strdup (json_string_value (json_object_get (it_val, "name")));
+ cur_symbol->value = json_number_value (json_object_get (it_val, "value"));
+ /* Insert symbol */
+ cur_metric->symbols = g_list_prepend (cur_metric->symbols, cur_symbol);
+ }
+ else {
+ msg_info ("json symbol object has no mandatory 'name' and 'value' attributes");
+ }
+ }
+ }
+ }
+ cur_nm = json_object_get (cur_elt, "actions");
+ /* Parse actions */
+ if (cur_nm && json_is_array (cur_nm)) {
+ selts = json_array_size (cur_nm);
+ for (j = 0; j < selts; j ++) {
+ it_val = json_array_get (cur_nm, j);
+ if (it_val && json_is_object (it_val)) {
+ if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) {
+ if (!check_action_str (json_string_value (json_object_get (it_val, "name")), &test_act)) {
+ msg_err ("unknown action: %s", json_string_value (json_object_get (it_val, "name")));
+ g_slice_free1 (sizeof (struct dynamic_cfg_action), cur_action);
+ continue;
+ }
+ cur_action = &cur_metric->actions[test_act];
+ cur_action->action = test_act;
+ cur_action->value = json_number_value (json_object_get (it_val, "value"));
+ }
+ else {
+ msg_info ("json symbol object has no mandatory 'name' and 'value' attributes");
+ }
+ }
+ }
+ }
+ jb->config_metrics = g_list_prepend (jb->config_metrics, cur_metric);
+ }
+ /*
+ * Note about thread safety: we are updating values that are gdoubles so it is not atomic in general case
+ * but on the other hand all that data is used only in the main thread, so why it is *likely* safe
+ * to do this task in this way without explicit lock.
+ */
+ apply_dynamic_conf (jb->config_metrics, jb->cfg);
+
+ jb->cfg->current_dynamic_conf = jb->config_metrics;
+
+ json_decref (js);
+}
+
+/**
+ * Init dynamic configuration using map logic and specific configuration
+ * @param cfg config file
+ */
+void
+init_dynamic_config (struct config_file *cfg)
+{
+ struct config_json_buf *jb, **pjb;
+
+ if (cfg->dynamic_conf == NULL) {
+ /* No dynamic conf has been specified, so do not try to load it */
+ return;
+ }
+
+ /* Now try to add map with json data */
+ jb = g_malloc0 (sizeof (struct config_json_buf));
+ pjb = g_malloc (sizeof (struct config_json_buf *));
+ jb->buf = NULL;
+ jb->cfg = cfg;
+ *pjb = jb;
+ if (!add_map (cfg, cfg->dynamic_conf, "Dynamic configuration map", json_config_read_cb, json_config_fin_cb, (void **)pjb)) {
+ msg_err ("cannot add map for configuration %s", cfg->dynamic_conf);
+ }
+}
+
+static gboolean
+dump_dynamic_list (gint fd, GList *rules)
+{
+ GList *cur, *cur_elt;
+ struct dynamic_cfg_metric *metric;
+ struct dynamic_cfg_symbol *sym;
+ struct dynamic_cfg_action *act;
+ FILE *f;
+ gint i;
+ gboolean start = TRUE;
+
+ /* Open buffered stream for the descriptor */
+ if ((f = fdopen (fd, "a+")) == NULL) {
+ msg_err ("fdopen failed: %s", strerror (errno));
+ return FALSE;
+ }
+
+
+ if (rules) {
+ fprintf (f, "[\n");
+ cur = rules;
+ while (cur) {
+ metric = cur->data;
+ fprintf (f, "{\n \"metric\": \"%s\",\n", metric->name);
+ if (metric->symbols) {
+ fprintf (f, " \"symbols\": [\n");
+ cur_elt = metric->symbols;
+ while (cur_elt) {
+ sym = cur_elt->data;
+ cur_elt = g_list_next (cur_elt);
+ if (cur_elt) {
+ fprintf (f, " {\"name\": \"%s\",\"value\": %.2f},\n", sym->name, sym->value);
+ }
+ else {
+ fprintf (f, " {\"name\": \"%s\",\"value\": %.2f}\n", sym->name, sym->value);
+ }
+ }
+ if (metric->actions) {
+ fprintf (f, " ],\n");
+ }
+ else {
+ fprintf (f, " ]\n");
+ }
+ }
+
+ if (metric->actions) {
+ fprintf (f, " \"actions\": [\n");
+ for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) {
+ act = &metric->actions[i];
+ if (act->value < 0) {
+ continue;
+ }
+ fprintf (f, " %s{\"name\": \"%s\",\"value\": %.2f}\n",
+ (start ? "" : ","), str_action_metric (act->action), act->value);
+ if (start) {
+ start = FALSE;
+ }
+ }
+ fprintf (f, " ]\n");
+ }
+ cur = g_list_next (cur);
+ if (cur) {
+ fprintf (f, "},\n");
+ }
+ else {
+ fprintf (f, "}\n]\n");
+ }
+ }
+ }
+ fclose (f);
+
+ return TRUE;
+}
+
+/**
+ * Dump dynamic configuration to the disk
+ * @param cfg
+ * @return
+ */
+gboolean
+dump_dynamic_config (struct config_file *cfg)
+{
+ struct stat st;
+ gchar *dir, pathbuf[PATH_MAX];
+ gint fd;
+
+ if (cfg->dynamic_conf == NULL || cfg->current_dynamic_conf == NULL) {
+ /* No dynamic conf has been specified, so do not try to dump it */
+ return FALSE;
+ }
+
+ dir = g_path_get_dirname (cfg->dynamic_conf);
+ if (dir == NULL) {
+ /* Inaccessible path */
+ if (dir != NULL) {
+ g_free (dir);
+ }
+ msg_err ("invalid file: %s", cfg->dynamic_conf);
+ return FALSE;
+ }
+
+ if (stat (cfg->dynamic_conf, &st) == -1) {
+ msg_debug ("%s is unavailable: %s", cfg->dynamic_conf, strerror (errno));
+ st.st_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH;
+ }
+ if (access (dir, W_OK | R_OK) == -1) {
+ msg_warn ("%s is inaccessible: %s", dir, strerror (errno));
+ g_free (dir);
+ return FALSE;
+ }
+ rspamd_snprintf (pathbuf, sizeof (pathbuf), "%s%crconf-XXXXXX", dir, G_DIR_SEPARATOR);
+ g_free (dir);
+#ifdef HAVE_MKSTEMP
+ /* Umask is set before */
+ fd = mkstemp (pathbuf);
+#else
+ fd = g_mkstemp_full (pathbuf, O_RDWR, S_IWUSR | S_IRUSR);
+#endif
+ if (fd == -1) {
+ msg_err ("mkstemp error: %s", strerror (errno));
+
+ return FALSE;
+ }
+
+ if (!dump_dynamic_list (fd, cfg->current_dynamic_conf)) {
+ close (fd);
+ unlink (pathbuf);
+ return FALSE;
+ }
+
+ (void)unlink (cfg->dynamic_conf);
+
+ /* Rename old config */
+ if (rename (pathbuf, cfg->dynamic_conf) == -1) {
+ msg_err ("rename error: %s", strerror (errno));
+ close (fd);
+ unlink (pathbuf);
+ return FALSE;
+ }
+ /* Set permissions */
+
+ if (chmod (cfg->dynamic_conf, st.st_mode) == -1) {
+ msg_warn ("chmod failed: %s", strerror (errno));
+ }
+
+ close (fd);
+ return TRUE;
+}
+
+/**
+ * Add symbol for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param symbol symbol's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean
+add_dynamic_symbol (struct config_file *cfg, const gchar *metric_name, const gchar *symbol, gdouble value)
+{
+ GList *cur;
+ struct dynamic_cfg_metric *metric = NULL;
+ struct dynamic_cfg_symbol *sym = NULL;
+
+ if (cfg->dynamic_conf == NULL) {
+ msg_info ("dynamic conf is disabled");
+ return FALSE;
+ }
+
+ cur = cfg->current_dynamic_conf;
+ while (cur) {
+ metric = cur->data;
+ if (g_ascii_strcasecmp (metric->name, metric_name) == 0) {
+ break;
+ }
+ metric = NULL;
+ cur = g_list_next (cur);
+ }
+
+ if (metric != NULL) {
+ /* Search for a symbol */
+ cur = metric->symbols;
+ while (cur) {
+ sym = cur->data;
+ if (g_ascii_strcasecmp (sym->name, symbol) == 0) {
+ sym->value = value;
+ msg_debug ("change value of action %s to %.2f", symbol, value);
+ break;
+ }
+ sym = NULL;
+ cur = g_list_next (cur);
+ }
+ if (sym == NULL) {
+ /* Symbol not found, insert it */
+ sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol));
+ sym->name = g_strdup (symbol);
+ sym->value = value;
+ metric->symbols = g_list_prepend (metric->symbols, sym);
+ msg_debug ("create symbol %s in metric %s", symbol, metric_name);
+ }
+ }
+ else {
+ /* Metric not found, create it */
+ metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric));
+ sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol));
+ sym->name = g_strdup (symbol);
+ sym->value = value;
+ metric->symbols = g_list_prepend (metric->symbols, sym);
+ metric->name = g_strdup (metric_name);
+ cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric);
+ msg_debug ("create metric %s for symbol %s", metric_name, symbol);
+ }
+
+ apply_dynamic_conf (cfg->current_dynamic_conf, cfg);
+
+ return TRUE;
+}
+
+
+/**
+ * Add action for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param action action's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean
+add_dynamic_action (struct config_file *cfg, const gchar *metric_name, guint action, gdouble value)
+{
+ GList *cur;
+ struct dynamic_cfg_metric *metric = NULL;
+
+ if (cfg->dynamic_conf == NULL) {
+ msg_info ("dynamic conf is disabled");
+ return FALSE;
+ }
+
+ cur = cfg->current_dynamic_conf;
+ while (cur) {
+ metric = cur->data;
+ if (g_ascii_strcasecmp (metric->name, metric_name) == 0) {
+ break;
+ }
+ metric = NULL;
+ cur = g_list_next (cur);
+ }
+
+ if (metric != NULL) {
+ /* Search for an action */
+ metric->actions[action].value = value;
+ }
+ else {
+ /* Metric not found, create it */
+ metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric));
+ metric->actions[action].value = value;
+ metric->name = g_strdup (metric_name);
+ cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric);
+ msg_debug ("create metric %s for action %d", metric_name, action);
+ }
+
+ apply_dynamic_conf (cfg->current_dynamic_conf, cfg);
+
+ return TRUE;
+}
diff --git a/src/libserver/dynamic_cfg.h b/src/libserver/dynamic_cfg.h
new file mode 100644
index 000000000..b65d7aa9a
--- /dev/null
+++ b/src/libserver/dynamic_cfg.h
@@ -0,0 +1,66 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef DYNAMIC_CFG_H_
+#define DYNAMIC_CFG_H_
+
+#include "config.h"
+#include "cfg_file.h"
+
+/**
+ * Init dynamic configuration using map logic and specific configuration
+ * @param cfg config file
+ */
+void init_dynamic_config (struct config_file *cfg);
+
+/**
+ * Dump dynamic configuration to the disk
+ * @param cfg
+ * @return
+ */
+gboolean dump_dynamic_config (struct config_file *cfg);
+
+/**
+ * Add symbol for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param symbol symbol's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean add_dynamic_symbol (struct config_file *cfg, const gchar *metric, const gchar *symbol, gdouble value);
+
+
+/**
+ * Add action for specified metric
+ * @param cfg config file object
+ * @param metric metric's name
+ * @param action action's name
+ * @param value value of symbol
+ * @return
+ */
+gboolean add_dynamic_action (struct config_file *cfg, const gchar *metric, guint action, gdouble value);
+
+
+#endif /* DYNAMIC_CFG_H_ */
diff --git a/src/libserver/events.c b/src/libserver/events.c
new file mode 100644
index 000000000..85843fd05
--- /dev/null
+++ b/src/libserver/events.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "events.h"
+
+static gboolean
+rspamd_event_equal (gconstpointer a, gconstpointer b)
+{
+ const struct rspamd_async_event *ev1 = a, *ev2 = b;
+
+ if (ev1->fin == ev2->fin) {
+ return ev1->user_data == ev2->user_data;
+ }
+
+ return FALSE;
+}
+
+static guint
+rspamd_event_hash (gconstpointer a)
+{
+ const struct rspamd_async_event *ev = a;
+
+ return GPOINTER_TO_UINT (ev->user_data);
+}
+
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+static void
+event_mutex_free (gpointer data)
+{
+ GMutex *mtx = data;
+
+ g_mutex_free (mtx);
+}
+
+static void
+event_cond_free (gpointer data)
+{
+ GCond *cond = data;
+
+ g_cond_free (cond);
+}
+#endif
+
+struct rspamd_async_session *
+new_async_session (rspamd_mempool_t * pool, session_finalizer_t fin,
+ event_finalizer_t restore, event_finalizer_t cleanup, void *user_data)
+{
+ struct rspamd_async_session *new;
+
+ new = rspamd_mempool_alloc (pool, sizeof (struct rspamd_async_session));
+ new->pool = pool;
+ new->fin = fin;
+ new->restore = restore;
+ new->cleanup = cleanup;
+ new->user_data = user_data;
+ new->wanna_die = FALSE;
+ new->events = g_hash_table_new (rspamd_event_hash, rspamd_event_equal);
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30))
+ new->mtx = g_mutex_new ();
+ new->cond = g_cond_new ();
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_mutex_free, new->mtx);
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_cond_free, new->cond);
+#else
+ new->mtx = rspamd_mempool_alloc (pool, sizeof (GMutex));
+ g_mutex_init (new->mtx);
+ new->cond = rspamd_mempool_alloc (pool, sizeof (GCond));
+ g_cond_init (new->cond);
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_mutex_clear, new->mtx);
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_cond_clear, new->cond);
+#endif
+ new->threads = 0;
+
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, new->events);
+
+ return new;
+}
+
+void
+register_async_event (struct rspamd_async_session *session, event_finalizer_t fin, void *user_data, GQuark subsystem)
+{
+ struct rspamd_async_event *new;
+
+ if (session == NULL) {
+ msg_info ("session is NULL");
+ return;
+ }
+
+ g_mutex_lock (session->mtx);
+ new = rspamd_mempool_alloc (session->pool, sizeof (struct rspamd_async_event));
+ new->fin = fin;
+ new->user_data = user_data;
+ new->subsystem = subsystem;
+
+ g_hash_table_insert (session->events, new, new);
+
+ msg_debug ("added event: %p, pending %d events, subsystem: %s", user_data, g_hash_table_size (session->events),
+ g_quark_to_string (subsystem));
+
+ g_mutex_unlock (session->mtx);
+}
+
+void
+remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud)
+{
+ struct rspamd_async_event search_ev, *found_ev;
+
+ if (session == NULL) {
+ msg_info ("session is NULL");
+ return;
+ }
+
+ g_mutex_lock (session->mtx);
+ /* Search for event */
+ search_ev.fin = fin;
+ search_ev.user_data = ud;
+ if ((found_ev = g_hash_table_lookup (session->events, &search_ev)) != NULL) {
+ g_hash_table_remove (session->events, found_ev);
+ msg_debug ("removed event: %p, subsystem: %s, pending %d events", ud,
+ g_quark_to_string (found_ev->subsystem), g_hash_table_size (session->events));
+ /* Remove event */
+ fin (ud);
+ }
+ g_mutex_unlock (session->mtx);
+
+ check_session_pending (session);
+}
+
+static gboolean
+rspamd_session_destroy (gpointer k, gpointer v, gpointer unused)
+{
+ struct rspamd_async_event *ev = v;
+
+ /* Call event's finalizer */
+ if (ev->fin != NULL) {
+ ev->fin (ev->user_data);
+ }
+
+ return TRUE;
+}
+
+gboolean
+destroy_session (struct rspamd_async_session *session)
+{
+ if (session == NULL) {
+ msg_info ("session is NULL");
+ return FALSE;
+ }
+
+ g_mutex_lock (session->mtx);
+ if (session->threads > 0) {
+ /* Wait for conditional variable to finish processing */
+ g_mutex_unlock (session->mtx);
+ g_cond_wait (session->cond, session->mtx);
+ }
+
+ session->wanna_die = TRUE;
+
+ g_hash_table_foreach_remove (session->events, rspamd_session_destroy, session);
+
+ /* Mutex can be destroyed here */
+ g_mutex_unlock (session->mtx);
+
+ if (session->cleanup != NULL) {
+ session->cleanup (session->user_data);
+ }
+ return TRUE;
+}
+
+gboolean
+check_session_pending (struct rspamd_async_session *session)
+{
+ g_mutex_lock (session->mtx);
+ if (session->wanna_die && g_hash_table_size (session->events) == 0) {
+ session->wanna_die = FALSE;
+ if (session->threads > 0) {
+ /* Wait for conditional variable to finish processing */
+ g_cond_wait (session->cond, session->mtx);
+ }
+ if (session->fin != NULL) {
+ g_mutex_unlock (session->mtx);
+ if (! session->fin (session->user_data)) {
+ /* Session finished incompletely, perform restoration */
+ if (session->restore != NULL) {
+ session->restore (session->user_data);
+ /* Call pending once more */
+ return check_session_pending (session);
+ }
+ return TRUE;
+ }
+ else {
+ return FALSE;
+ }
+ }
+ g_mutex_unlock (session->mtx);
+ return FALSE;
+ }
+ g_mutex_unlock (session->mtx);
+ return TRUE;
+}
+
+
+/**
+ * Add new async thread to session
+ * @param session session object
+ */
+void
+register_async_thread (struct rspamd_async_session *session)
+{
+ g_atomic_int_inc (&session->threads);
+ msg_debug ("added thread: pending %d thread", session->threads);
+}
+
+/**
+ * Remove async thread from session and check whether session can be terminated
+ * @param session session object
+ */
+void
+remove_async_thread (struct rspamd_async_session *session)
+{
+ if (g_atomic_int_dec_and_test (&session->threads)) {
+ /* Signal if there are any sessions waiting */
+ g_mutex_lock (session->mtx);
+ g_cond_signal (session->cond);
+ g_mutex_unlock (session->mtx);
+ }
+ msg_debug ("removed thread: pending %d thread", session->threads);
+}
diff --git a/src/libserver/events.h b/src/libserver/events.h
new file mode 100644
index 000000000..6728288eb
--- /dev/null
+++ b/src/libserver/events.h
@@ -0,0 +1,88 @@
+#ifndef RSPAMD_EVENTS_H
+#define RSPAMD_EVENTS_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+struct rspamd_async_event;
+
+typedef void (*event_finalizer_t)(void *user_data);
+typedef gboolean (*session_finalizer_t)(void *user_data);
+
+struct rspamd_async_event {
+ GQuark subsystem;
+ event_finalizer_t fin;
+ void *user_data;
+ guint ref;
+};
+
+struct rspamd_async_session {
+ session_finalizer_t fin;
+ event_finalizer_t restore;
+ event_finalizer_t cleanup;
+ GHashTable *events;
+ void *user_data;
+ rspamd_mempool_t *pool;
+ gboolean wanna_die;
+ guint threads;
+ GMutex *mtx;
+ GCond *cond;
+};
+
+/**
+ * Make new async session
+ * @param pool pool to alloc memory from
+ * @param fin a callback called when no events are found in session
+ * @param restore a callback is called to restore processing of session
+ * @param cleanup a callback called when session is forcefully destroyed
+ * @param user_data abstract user data
+ * @return
+ */
+struct rspamd_async_session *new_async_session (rspamd_mempool_t *pool,
+ session_finalizer_t fin, event_finalizer_t restore,
+ event_finalizer_t cleanup, void *user_data);
+
+/**
+ * Insert new event to the session
+ * @param session session object
+ * @param fin finalizer callback
+ * @param user_data abstract user_data
+ * @param forced unused
+ */
+void register_async_event (struct rspamd_async_session *session,
+ event_finalizer_t fin, void *user_data, GQuark subsystem);
+
+/**
+ * Remove normal event
+ * @param session session object
+ * @param fin final callback
+ * @param ud user data object
+ */
+void remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud);
+
+/**
+ * Must be called at the end of session, it calls fin functions for all non-forced callbacks
+ * @return true if the whole session was destroyed and false if there are forced events
+ */
+gboolean destroy_session (struct rspamd_async_session *session);
+
+/**
+ * Check session for events pending and call fin callback if no events are pending
+ * @param session session object
+ * @return TRUE if session has pending events
+ */
+gboolean check_session_pending (struct rspamd_async_session *session);
+
+/**
+ * Add new async thread to session
+ * @param session session object
+ */
+void register_async_thread (struct rspamd_async_session *session);
+
+/**
+ * Remove async thread from session and check whether session can be terminated
+ * @param session session object
+ */
+void remove_async_thread (struct rspamd_async_session *session);
+
+#endif /* RSPAMD_EVENTS_H */
diff --git a/src/libserver/html.c b/src/libserver/html.c
new file mode 100644
index 000000000..028c54f6c
--- /dev/null
+++ b/src/libserver/html.c
@@ -0,0 +1,942 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "main.h"
+#include "message.h"
+#include "html.h"
+#include "url.h"
+
+static sig_atomic_t tags_sorted = 0;
+
+static struct html_tag tag_defs[] = {
+ /* W3C defined elements */
+ {Tag_A, "a", (CM_INLINE)},
+ {Tag_ABBR, "abbr", (CM_INLINE)},
+ {Tag_ACRONYM, "acronym", (CM_INLINE)},
+ {Tag_ADDRESS, "address", (CM_BLOCK)},
+ {Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)},
+ {Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)},
+ {Tag_B, "b", (CM_INLINE)},
+ {Tag_BASE, "base", (CM_HEAD | CM_EMPTY)},
+ {Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)},
+ {Tag_BDO, "bdo", (CM_INLINE)},
+ {Tag_BIG, "big", (CM_INLINE)},
+ {Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)},
+ {Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST)},
+ {Tag_BR, "br", (CM_INLINE | CM_EMPTY)},
+ {Tag_BUTTON, "button", (CM_INLINE)},
+ {Tag_CAPTION, "caption", (CM_TABLE)},
+ {Tag_CENTER, "center", (CM_BLOCK)},
+ {Tag_CITE, "cite", (CM_INLINE)},
+ {Tag_CODE, "code", (CM_INLINE)},
+ {Tag_COL, "col", (CM_TABLE | CM_EMPTY)},
+ {Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)},
+ {Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)},
+ {Tag_DEL, "del", (CM_INLINE | CM_BLOCK | CM_MIXED)},
+ {Tag_DFN, "dfn", (CM_INLINE)},
+ {Tag_DIR, "dir", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_DIV, "div", (CM_BLOCK)},
+ {Tag_DL, "dl", (CM_BLOCK)},
+ {Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)},
+ {Tag_EM, "em", (CM_INLINE)},
+ {Tag_FIELDSET, "fieldset", (CM_BLOCK)},
+ {Tag_FONT, "font", (CM_INLINE)},
+ {Tag_FORM, "form", (CM_BLOCK)},
+ {Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)},
+ {Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)},
+ {Tag_H1, "h1", (CM_BLOCK | CM_HEADING)},
+ {Tag_H2, "h2", (CM_BLOCK | CM_HEADING)},
+ {Tag_H3, "h3", (CM_BLOCK | CM_HEADING)},
+ {Tag_H4, "h4", (CM_BLOCK | CM_HEADING)},
+ {Tag_H5, "h5", (CM_BLOCK | CM_HEADING)},
+ {Tag_H6, "h6", (CM_BLOCK | CM_HEADING)},
+ {Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST)},
+ {Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)},
+ {Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST)},
+ {Tag_I, "i", (CM_INLINE)},
+ {Tag_IFRAME, "iframe", (CM_INLINE)},
+ {Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)},
+ {Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)},
+ {Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)},
+ {Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)},
+ {Tag_KBD, "kbd", (CM_INLINE)},
+ {Tag_LABEL, "label", (CM_INLINE)},
+ {Tag_LEGEND, "legend", (CM_INLINE)},
+ {Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT)},
+ {Tag_LINK, "link", (CM_HEAD | CM_EMPTY)},
+ {Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_MAP, "map", (CM_INLINE)},
+ {Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_META, "meta", (CM_HEAD | CM_EMPTY)},
+ {Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)},
+ {Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_MIXED)},
+ {Tag_OBJECT, "object", (CM_OBJECT | CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)},
+ {Tag_OL, "ol", (CM_BLOCK)},
+ {Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)},
+ {Tag_OPTION, "option", (CM_FIELD | CM_OPT)},
+ {Tag_P, "p", (CM_BLOCK | CM_OPT)},
+ {Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)},
+ {Tag_PLAINTEXT, "plaintext", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_PRE, "pre", (CM_BLOCK)},
+ {Tag_Q, "q", (CM_INLINE)},
+ {Tag_RB, "rb", (CM_INLINE)},
+ {Tag_RBC, "rbc", (CM_INLINE)},
+ {Tag_RP, "rp", (CM_INLINE)},
+ {Tag_RT, "rt", (CM_INLINE)},
+ {Tag_RTC, "rtc", (CM_INLINE)},
+ {Tag_RUBY, "ruby", (CM_INLINE)},
+ {Tag_S, "s", (CM_INLINE)},
+ {Tag_SAMP, "samp", (CM_INLINE)},
+ {Tag_SCRIPT, "script", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)},
+ {Tag_SELECT, "select", (CM_INLINE | CM_FIELD)},
+ {Tag_SMALL, "small", (CM_INLINE)},
+ {Tag_SPAN, "span", (CM_INLINE)},
+ {Tag_STRIKE, "strike", (CM_INLINE)},
+ {Tag_STRONG, "strong", (CM_INLINE)},
+ {Tag_STYLE, "style", (CM_HEAD)},
+ {Tag_SUB, "sub", (CM_INLINE)},
+ {Tag_SUP, "sup", (CM_INLINE)},
+ {Tag_TABLE, "table", (CM_BLOCK)},
+ {Tag_TBODY, "tbody", (CM_TABLE | CM_ROWGRP | CM_OPT)},
+ {Tag_TD, "td", (CM_ROW | CM_OPT | CM_NO_INDENT)},
+ {Tag_TEXTAREA, "textarea", (CM_INLINE | CM_FIELD)},
+ {Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)},
+ {Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT)},
+ {Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)},
+ {Tag_TITLE, "title", (CM_HEAD)},
+ {Tag_TR, "tr", (CM_TABLE | CM_OPT)},
+ {Tag_TT, "tt", (CM_INLINE)},
+ {Tag_U, "u", (CM_INLINE)},
+ {Tag_UL, "ul", (CM_BLOCK)},
+ {Tag_VAR, "var", (CM_INLINE)},
+ {Tag_XMP, "xmp", (CM_BLOCK | CM_OBSOLETE)},
+ {Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)},
+
+ /* proprietary elements */
+ {Tag_ALIGN, "align", (CM_BLOCK)},
+ {Tag_BGSOUND, "bgsound", (CM_HEAD | CM_EMPTY)},
+ {Tag_BLINK, "blink", (CM_INLINE)},
+ {Tag_COMMENT, "comment", (CM_INLINE)},
+ {Tag_EMBED, "embed", (CM_INLINE | CM_IMG | CM_EMPTY)},
+ {Tag_ILAYER, "ilayer", (CM_INLINE)},
+ {Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)},
+ {Tag_LAYER, "layer", (CM_BLOCK)},
+ {Tag_MARQUEE, "marquee", (CM_INLINE | CM_OPT)},
+ {Tag_MULTICOL, "multicol", (CM_BLOCK)},
+ {Tag_NOBR, "nobr", (CM_INLINE)},
+ {Tag_NOEMBED, "noembed", (CM_INLINE)},
+ {Tag_NOLAYER, "nolayer", (CM_BLOCK | CM_INLINE | CM_MIXED)},
+ {Tag_NOSAVE, "nosave", (CM_BLOCK)},
+ {Tag_SERVER, "server", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)},
+ {Tag_SERVLET, "servlet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)},
+ {Tag_SPACER, "spacer", (CM_INLINE | CM_EMPTY)},
+ {Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)},
+};
+
+static sig_atomic_t entities_sorted = 0;
+struct _entity;
+typedef struct _entity entity;
+
+struct _entity {
+ gchar *name;
+ uint code;
+ gchar *replacement;
+};
+
+
+static entity entities_defs[] = {
+ /*
+ ** Markup pre-defined character entities
+ */
+ {"quot", 34, "\""},
+ {"amp", 38, "&"},
+ {"apos", 39, "'"},
+ {"lt", 60, "<"},
+ {"gt", 62, ">"},
+
+ /*
+ ** Latin-1 character entities
+ */
+ {"nbsp", 160, " "},
+ {"iexcl", 161, "!"},
+ {"cent", 162, "cent"},
+ {"pound", 163, "pound"},
+ {"curren", 164, "current"},
+ {"yen", 165, "yen"},
+ {"brvbar", 166, NULL},
+ {"sect", 167, NULL},
+ {"uml", 168, "uml"},
+ {"copy", 169, "c"},
+ {"ordf", 170, NULL},
+ {"laquo", 171, "\""},
+ {"not", 172, "!"},
+ {"shy", 173, NULL},
+ {"reg", 174, "r"},
+ {"macr", 175, NULL},
+ {"deg", 176, "deg"},
+ {"plusmn", 177, "+-"},
+ {"sup2", 178, "2"},
+ {"sup3", 179, "3"},
+ {"acute", 180, NULL},
+ {"micro", 181, NULL},
+ {"para", 182, NULL},
+ {"middot", 183, "."},
+ {"cedil", 184, NULL},
+ {"sup1", 185, "1"},
+ {"ordm", 186, NULL},
+ {"raquo", 187, "\""},
+ {"frac14", 188, "1/4"},
+ {"frac12", 189, "1/2"},
+ {"frac34", 190, "3/4"},
+ {"iquest", 191, "i"},
+ {"Agrave", 192, "a"},
+ {"Aacute", 193, "a"},
+ {"Acirc", 194, "a"},
+ {"Atilde", 195, "a"},
+ {"Auml", 196, "a"},
+ {"Aring", 197, "a"},
+ {"AElig", 198, "a"},
+ {"Ccedil", 199, "c"},
+ {"Egrave", 200, "e"},
+ {"Eacute", 201, "e"},
+ {"Ecirc", 202, "e"},
+ {"Euml", 203, "e"},
+ {"Igrave", 204, "i"},
+ {"Iacute", 205, "i"},
+ {"Icirc", 206, "i"},
+ {"Iuml", 207, "i"},
+ {"ETH", 208, "e"},
+ {"Ntilde", 209, "n"},
+ {"Ograve", 210, "o"},
+ {"Oacute", 211, "o"},
+ {"Ocirc", 212, "o"},
+ {"Otilde", 213, "o"},
+ {"Ouml", 214, "o"},
+ {"times", 215, "t"},
+ {"Oslash", 216, "o"},
+ {"Ugrave", 217, "u"},
+ {"Uacute", 218, "u"},
+ {"Ucirc", 219, "u"},
+ {"Uuml", 220, "u"},
+ {"Yacute", 221, "y"},
+ {"THORN", 222, "t"},
+ {"szlig", 223, "s"},
+ {"agrave", 224, "a"},
+ {"aacute", 225, "a"},
+ {"acirc", 226, "a"},
+ {"atilde", 227, "a"},
+ {"auml", 228, "a"},
+ {"aring", 229, "a"},
+ {"aelig", 230, "a"},
+ {"ccedil", 231, "c"},
+ {"egrave", 232, "e"},
+ {"eacute", 233, "e"},
+ {"ecirc", 234, "e"},
+ {"euml", 235, "e"},
+ {"igrave", 236, "e"},
+ {"iacute", 237, "e"},
+ {"icirc", 238, "e"},
+ {"iuml", 239, "e"},
+ {"eth", 240, "e"},
+ {"ntilde", 241, "n"},
+ {"ograve", 242, "o"},
+ {"oacute", 243, "o"},
+ {"ocirc", 244, "o"},
+ {"otilde", 245, "o"},
+ {"ouml", 246, "o"},
+ {"divide", 247, "/"},
+ {"oslash", 248, "/"},
+ {"ugrave", 249, "u"},
+ {"uacute", 250, "u"},
+ {"ucirc", 251, "u"},
+ {"uuml", 252, "u"},
+ {"yacute", 253, "y"},
+ {"thorn", 254, "t"},
+ {"yuml", 255, "y"},
+
+ /*
+ ** Extended Entities defined in HTML 4: Symbols
+ */
+ {"fnof", 402, "f"},
+ {"Alpha", 913, "alpha"},
+ {"Beta", 914, "beta"},
+ {"Gamma", 915, "gamma"},
+ {"Delta", 916, "delta"},
+ {"Epsilon", 917, "epsilon"},
+ {"Zeta", 918, "zeta"},
+ {"Eta", 919, "eta"},
+ {"Theta", 920, "theta"},
+ {"Iota", 921, "iota"},
+ {"Kappa", 922, "kappa"},
+ {"Lambda", 923, "lambda"},
+ {"Mu", 924, "mu"},
+ {"Nu", 925, "nu"},
+ {"Xi", 926, "xi"},
+ {"Omicron", 927, "omicron"},
+ {"Pi", 928, "pi"},
+ {"Rho", 929, "rho"},
+ {"Sigma", 931, "sigma"},
+ {"Tau", 932, "tau"},
+ {"Upsilon", 933, "upsilon"},
+ {"Phi", 934, "phi"},
+ {"Chi", 935, "chi"},
+ {"Psi", 936, "psi"},
+ {"Omega", 937, "omega"},
+ {"alpha", 945, "alpha"},
+ {"beta", 946, "beta"},
+ {"gamma", 947, "gamma"},
+ {"delta", 948, "delta"},
+ {"epsilon", 949, "epsilon"},
+ {"zeta", 950, "zeta"},
+ {"eta", 951, "eta"},
+ {"theta", 952, "theta"},
+ {"iota", 953, "iota"},
+ {"kappa", 954, "kappa"},
+ {"lambda", 955, "lambda"},
+ {"mu", 956, "mu"},
+ {"nu", 957, "nu"},
+ {"xi", 958, "xi"},
+ {"omicron", 959, "omicron"},
+ {"pi", 960, "pi"},
+ {"rho", 961, "rho"},
+ {"sigmaf", 962, "sigmaf"},
+ {"sigma", 963, "sigma"},
+ {"tau", 964, "tau"},
+ {"upsilon", 965, "upsilon"},
+ {"phi", 966, "phi"},
+ {"chi", 967, "chi"},
+ {"psi", 968, "psi"},
+ {"omega", 969, "omega"},
+ {"thetasym", 977, "thetasym"},
+ {"upsih", 978, "upsih"},
+ {"piv", 982, "piv"},
+ {"bull", 8226, "bull"},
+ {"hellip", 8230, "..."},
+ {"prime", 8242, "'"},
+ {"Prime", 8243, "'"},
+ {"oline", 8254, "-"},
+ {"frasl", 8260, NULL},
+ {"weierp", 8472, NULL},
+ {"image", 8465, NULL},
+ {"real", 8476, NULL},
+ {"trade", 8482, NULL},
+ {"alefsym", 8501, "a"},
+ {"larr", 8592, NULL},
+ {"uarr", 8593, NULL},
+ {"rarr", 8594, NULL},
+ {"darr", 8595, NULL},
+ {"harr", 8596, NULL},
+ {"crarr", 8629, NULL},
+ {"lArr", 8656, NULL},
+ {"uArr", 8657, NULL},
+ {"rArr", 8658, NULL},
+ {"dArr", 8659, NULL},
+ {"hArr", 8660, NULL},
+ {"forall", 8704, NULL},
+ {"part", 8706, NULL},
+ {"exist", 8707, NULL},
+ {"empty", 8709, NULL},
+ {"nabla", 8711, NULL},
+ {"isin", 8712, NULL},
+ {"notin", 8713, NULL},
+ {"ni", 8715, NULL},
+ {"prod", 8719, NULL},
+ {"sum", 8721, "E"},
+ {"minus", 8722, "-"},
+ {"lowast", 8727, NULL},
+ {"radic", 8730, NULL},
+ {"prop", 8733, NULL},
+ {"infin", 8734, NULL},
+ {"ang", 8736, "'"},
+ {"and", 8743, "&"},
+ {"or", 8744, "|"},
+ {"cap", 8745, NULL},
+ {"cup", 8746, NULL},
+ {"gint", 8747, NULL},
+ {"there4", 8756, NULL},
+ {"sim", 8764, NULL},
+ {"cong", 8773, NULL},
+ {"asymp", 8776, NULL},
+ {"ne", 8800, "!="},
+ {"equiv", 8801, "=="},
+ {"le", 8804, "<="},
+ {"ge", 8805, ">="},
+ {"sub", 8834, NULL},
+ {"sup", 8835, NULL},
+ {"nsub", 8836, NULL},
+ {"sube", 8838, NULL},
+ {"supe", 8839, NULL},
+ {"oplus", 8853, NULL},
+ {"otimes", 8855, NULL},
+ {"perp", 8869, NULL},
+ {"sdot", 8901, NULL},
+ {"lceil", 8968, NULL},
+ {"rceil", 8969, NULL},
+ {"lfloor", 8970, NULL},
+ {"rfloor", 8971, NULL},
+ {"lang", 9001, NULL},
+ {"rang", 9002, NULL},
+ {"loz", 9674, NULL},
+ {"spades", 9824, NULL},
+ {"clubs", 9827, NULL},
+ {"hearts", 9829, NULL},
+ {"diams", 9830, NULL},
+
+ /*
+ ** Extended Entities defined in HTML 4: Special (less Markup at top)
+ */
+ {"OElig", 338, NULL},
+ {"oelig", 339, NULL},
+ {"Scaron", 352, NULL},
+ {"scaron", 353, NULL},
+ {"Yuml", 376, NULL},
+ {"circ", 710, NULL},
+ {"tilde", 732, NULL},
+ {"ensp", 8194, NULL},
+ {"emsp", 8195, NULL},
+ {"thinsp", 8201, NULL},
+ {"zwnj", 8204, NULL},
+ {"zwj", 8205, NULL},
+ {"lrm", 8206, NULL},
+ {"rlm", 8207, NULL},
+ {"ndash", 8211, "-"},
+ {"mdash", 8212, "-"},
+ {"lsquo", 8216, "'"},
+ {"rsquo", 8217, "'"},
+ {"sbquo", 8218, "\""},
+ {"ldquo", 8220, "\""},
+ {"rdquo", 8221, "\""},
+ {"bdquo", 8222, "\""},
+ {"dagger", 8224, "T"},
+ {"Dagger", 8225, "T"},
+ {"permil", 8240, NULL},
+ {"lsaquo", 8249, "\""},
+ {"rsaquo", 8250, "\""},
+ {"euro", 8364, "E"},
+};
+
+static entity entities_defs_num[ (G_N_ELEMENTS (entities_defs)) ];
+
+static gint
+tag_cmp (const void *m1, const void *m2)
+{
+ const struct html_tag *p1 = m1;
+ const struct html_tag *p2 = m2;
+
+ return g_ascii_strcasecmp (p1->name, p2->name);
+}
+
+static gint
+entity_cmp (const void *m1, const void *m2)
+{
+ const entity *p1 = m1;
+ const entity *p2 = m2;
+
+ return g_ascii_strcasecmp (p1->name, p2->name);
+}
+
+static gint
+entity_cmp_num (const void *m1, const void *m2)
+{
+ const entity *p1 = m1;
+ const entity *p2 = m2;
+
+ return p1->code - p2->code;
+}
+
+static GNode *
+construct_html_node (rspamd_mempool_t * pool, gchar *text, gsize tag_len)
+{
+ struct html_node *html;
+ GNode *n = NULL;
+ struct html_tag key, *found;
+ gchar t;
+
+ if (text == NULL || *text == '\0') {
+ return NULL;
+ }
+
+ html = rspamd_mempool_alloc0 (pool, sizeof (struct html_node));
+
+ /* Check whether this tag is fully closed */
+ if (*(text + tag_len - 1) == '/') {
+ html->flags |= FL_CLOSED;
+ }
+
+ /* Check xml tag */
+ if (*text == '?' && g_ascii_strncasecmp (text + 1, "xml", sizeof ("xml") - 1) == 0) {
+ html->flags |= FL_XML;
+ html->tag = NULL;
+ }
+ else {
+ if (*text == '/') {
+ html->flags |= FL_CLOSING;
+ text++;
+ }
+
+ /* Find end of tag name */
+ key.name = text;
+ while (*text && g_ascii_isalnum (*(++text)));
+
+ t = *text;
+ *text = '\0';
+
+ /* Match tag id by tag name */
+ if ((found = bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp)) != NULL) {
+ *text = t;
+ html->tag = found;
+ }
+ else {
+ *text = t;
+ return NULL;
+ }
+ }
+
+ n = g_node_new (html);
+
+ return n;
+}
+
+static gboolean
+check_balance (GNode * node, GNode ** cur_level)
+{
+ struct html_node *arg = node->data, *tmp;
+ GNode *cur;
+
+ if (arg->flags & FL_CLOSING) {
+ /* First of all check whether this tag is closing tag for parent node */
+ cur = node->parent;
+ while (cur && cur->data) {
+ tmp = cur->data;
+ if ((tmp->tag && arg->tag) && tmp->tag->id == arg->tag->id && (tmp->flags & FL_CLOSED) == 0) {
+ tmp->flags |= FL_CLOSED;
+ /* Destroy current node as we find corresponding parent node */
+ g_node_destroy (node);
+ /* Change level */
+ *cur_level = cur->parent;
+ return TRUE;
+ }
+ cur = cur->parent;
+ }
+ }
+ else {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+struct html_tag *
+get_tag_by_name (const gchar *name)
+{
+ struct html_tag key;
+
+ key.name = name;
+
+ return bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp);
+}
+
+/* Decode HTML entitles in text */
+void
+decode_entitles (gchar *s, guint * len)
+{
+ guint l, rep_len;
+ gchar *t = s; /* t - tortoise */
+ gchar *h = s; /* h - hare */
+ gchar *e = s;
+ gchar *end_ptr;
+ gint state = 0, val, base;
+ entity *found, key;
+
+ if (len == NULL || *len == 0) {
+ l = strlen (s);
+ }
+ else {
+ l = *len;
+ }
+
+ while (h - s < (gint)l) {
+ switch (state) {
+ /* Out of entitle */
+ case 0:
+ if (*h == '&') {
+ state = 1;
+ e = h;
+ h++;
+ continue;
+ }
+ else {
+ *t = *h;
+ h++;
+ t++;
+ }
+ break;
+ case 1:
+ if (*h == ';') {
+ /* Determine base */
+ /* First find in entities table */
+
+ key.name = e + 1;
+ *h = '\0';
+ if (*(e + 1) != '#' && (found = bsearch (&key, entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp)) != NULL) {
+ if (found->replacement) {
+ rep_len = strlen (found->replacement);
+ memcpy (t, found->replacement, rep_len);
+ t += rep_len;
+ }
+ }
+ else {
+ if (*(e + 2) == 'x' || *(e + 2) == 'X') {
+ base = 16;
+ }
+ else if (*(e + 2) == 'o' || *(e + 2) == 'O') {
+ base = 8;
+ }
+ else {
+ base = 10;
+ }
+ if (base == 10) {
+ val = strtoul ((e + 2), &end_ptr, base);
+ }
+ else {
+ val = strtoul ((e + 3), &end_ptr, base);
+ }
+ if (end_ptr != NULL && *end_ptr != '\0') {
+ /* Skip undecoded */
+ t = h;
+ }
+ else {
+ /* Search for a replacement */
+ key.code = val;
+ found = bsearch (&key, entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num);
+ if (found) {
+ if (found->replacement) {
+ rep_len = strlen (found->replacement);
+ memcpy (t, found->replacement, rep_len);
+ t += rep_len;
+ }
+ }
+ }
+ }
+ *h = ';';
+ state = 0;
+ }
+ h++;
+ break;
+ }
+ }
+ *t = '\0';
+
+ if (len != NULL) {
+ *len = t - s;
+ }
+}
+
+static void
+check_phishing (struct rspamd_task *task, struct uri *href_url, const gchar *url_text, gsize remain, tag_id_t id)
+{
+ struct uri *new;
+ gchar *url_str;
+ const gchar *p, *c;
+ gchar tagbuf[128];
+ struct html_tag *tag;
+ gsize len = 0;
+ gint rc;
+
+ p = url_text;
+ while (len < remain) {
+ if (*p == '<') {
+ /* Check tag name */
+ if (*(p + 1) == '/') {
+ c = p + 2;
+ }
+ else {
+ c = p + 1;
+ }
+ while (len < remain) {
+ if (!g_ascii_isspace (*p) && *p != '>') {
+ p ++;
+ len ++;
+ }
+ else {
+ break;
+ }
+ }
+ rspamd_strlcpy (tagbuf, c, MIN ((gint)sizeof(tagbuf), p - c + 1));
+ if ((tag = get_tag_by_name (tagbuf)) != NULL) {
+ if (tag->id == id) {
+ break;
+ }
+ else if (tag->id == Tag_IMG) {
+ /* We should ignore IMG tag here */
+ while (len < remain && *p != '>' && *p != '<') {
+ p ++;
+ len ++;
+ }
+ if (*p == '>' && len < remain) {
+ p ++;
+ }
+
+ remain -= p - url_text;
+ url_text = p;
+ len = 0;
+ continue;
+ }
+ }
+ }
+ len ++;
+ p ++;
+ }
+
+ if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str, TRUE) && url_str != NULL) {
+ new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri));
+ if (new != NULL) {
+ g_strstrip (url_str);
+ rc = parse_uri (new, url_str, task->task_pool);
+
+ if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) {
+ if (g_ascii_strncasecmp (href_url->host, new->host,
+ MAX (href_url->hostlen, new->hostlen)) != 0) {
+ /* Special check for urls beginning with 'www' */
+ if (new->hostlen > 4 && href_url->hostlen > 4) {
+ p = new->host;
+ c = NULL;
+ if ((p[0] == 'w' || p[0] == 'W') &&
+ (p[1] == 'w' || p[1] == 'W') &&
+ (p[2] == 'w' || p[2] == 'W') &&
+ (p[3] == '.')) {
+ p += 4;
+ c = href_url->host;
+ len = MAX (href_url->hostlen, new->hostlen - 4);
+ }
+ else {
+ p = href_url->host;
+ if ((p[0] == 'w' || p[0] == 'W') &&
+ (p[1] == 'w' || p[1] == 'W') &&
+ (p[2] == 'w' || p[2] == 'W') &&
+ (p[3] == '.')) {
+ p += 4;
+ c = new->host;
+ len = MAX (href_url->hostlen - 4, new->hostlen);
+ }
+ }
+ /* Compare parts and check for phished hostname */
+ if (c != NULL) {
+ if (g_ascii_strncasecmp (p, c, len) != 0) {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
+ }
+ else {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
+ }
+ else {
+ href_url->is_phished = TRUE;
+ href_url->phished_url = new;
+ }
+ }
+ }
+ else {
+ msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
+ }
+ }
+ }
+
+}
+
+static void
+parse_tag_url (struct rspamd_task *task, struct mime_text_part *part, tag_id_t id,
+ gchar *tag_text, gsize tag_len, gsize remain)
+{
+ gchar *c = NULL, *p, *url_text;
+ gint len, rc;
+ struct uri *url;
+ gboolean got_single_quote = FALSE, got_double_quote = FALSE;
+
+ /* For A tags search for href= and for IMG tags search for src= */
+ if (id == Tag_A) {
+ c = rspamd_strncasestr (tag_text, "href=", tag_len);
+ len = sizeof ("href=") - 1;
+ }
+ else if (id == Tag_IMG) {
+ c = rspamd_strncasestr (tag_text, "src=", tag_len);
+ len = sizeof ("src=") - 1;
+ }
+
+ if (c != NULL) {
+ /* First calculate length */
+ c += len;
+ /* Skip spaces after eqsign */
+ while (g_ascii_isspace (*c)) {
+ c++;
+ }
+ len = 0;
+ p = c;
+ while (*p && (guint)(p - tag_text) < tag_len) {
+ if (got_double_quote) {
+ if (*p == '"') {
+ break;
+ }
+ else {
+ len++;
+ }
+ }
+ else if (got_single_quote) {
+ if (*p == '\'') {
+ break;
+ }
+ else {
+ len++;
+ }
+ }
+ else if (g_ascii_isspace (*p) || *p == '>' || (*p == '/' && *(p + 1) == '>') || *p == '\r' || *p == '\n') {
+ break;
+ }
+ else {
+ if (*p == '"' && !got_single_quote) {
+ got_double_quote = !got_double_quote;
+ }
+ else if (*p == '\'' && !got_double_quote) {
+ got_single_quote = !got_single_quote;
+ }
+ else {
+ len++;
+ }
+ }
+ p++;
+ }
+
+ if (got_single_quote || got_double_quote) {
+ c++;
+ }
+
+ if (len == 0) {
+ return;
+ }
+
+ url_text = rspamd_mempool_alloc (task->task_pool, len + 1);
+ rspamd_strlcpy (url_text, c, len + 1);
+ decode_entitles (url_text, NULL);
+
+ if (g_ascii_strncasecmp (url_text, "http://", sizeof ("http://") - 1) != 0 &&
+ g_ascii_strncasecmp (url_text, "www", sizeof ("www") - 1) != 0 &&
+ g_ascii_strncasecmp (url_text, "ftp://", sizeof ("ftp://") - 1) != 0 &&
+ g_ascii_strncasecmp (url_text, "mailto:", sizeof ("mailto:") - 1) != 0) {
+ return;
+ }
+
+ url = rspamd_mempool_alloc (task->task_pool, sizeof (struct uri));
+ rc = parse_uri (url, url_text, task->task_pool);
+
+ if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) {
+ /*
+ * Check for phishing
+ */
+ if ((p = strchr (c, '>')) != NULL && id == Tag_A) {
+ p ++;
+ check_phishing (task, url, p, remain - (p - tag_text), id);
+ }
+ if (g_tree_lookup (task->urls, url) == NULL) {
+ g_tree_insert (task->urls, url, url);
+ }
+ }
+ }
+}
+
+gboolean
+add_html_node (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part,
+ gchar *tag_text, gsize tag_len, gsize remain, GNode ** cur_level)
+{
+ GNode *new;
+ struct html_node *data;
+
+ if (!tags_sorted) {
+ qsort (tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp);
+ tags_sorted = 1;
+ }
+ if (!entities_sorted) {
+ qsort (entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp);
+ memcpy (entities_defs_num, entities_defs, sizeof (entities_defs));
+ qsort (entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num);
+ entities_sorted = 1;
+ }
+
+ /* First call of this function */
+ if (part->html_nodes == NULL) {
+ /* Insert root node */
+ new = g_node_new (NULL);
+ *cur_level = new;
+ part->html_nodes = new;
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_node_destroy, part->html_nodes);
+ /* Call once again with root node */
+ return add_html_node (task, pool, part, tag_text, tag_len, remain, cur_level);
+ }
+ else {
+ new = construct_html_node (pool, tag_text, tag_len);
+ if (new == NULL) {
+ debug_task ("cannot construct HTML node for text '%*s'", tag_len, tag_text);
+ return FALSE;
+ }
+ data = new->data;
+ if (data->tag && (data->tag->id == Tag_A || data->tag->id == Tag_IMG) && ((data->flags & FL_CLOSING) == 0)) {
+ parse_tag_url (task, part, data->tag->id, tag_text, tag_len, remain);
+ }
+
+ if (data->flags & FL_CLOSING) {
+ if (!*cur_level) {
+ debug_task ("bad parent node");
+ return FALSE;
+ }
+ g_node_append (*cur_level, new);
+ if (!check_balance (new, cur_level)) {
+ debug_task ("mark part as unbalanced as it has not pairable closing tags");
+ part->is_balanced = FALSE;
+ }
+ }
+ else {
+
+ g_node_append (*cur_level, new);
+ if ((data->flags & FL_CLOSED) == 0) {
+ *cur_level = new;
+ }
+ /* Skip some tags */
+ if (data->tag && (data->tag->id == Tag_STYLE ||
+ data->tag->id == Tag_SCRIPT ||
+ data->tag->id == Tag_OBJECT ||
+ data->tag->id == Tag_TITLE)) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libserver/html.h b/src/libserver/html.h
new file mode 100644
index 000000000..3ea758e60
--- /dev/null
+++ b/src/libserver/html.h
@@ -0,0 +1,226 @@
+/*
+ * Functions for simple html parsing
+ */
+
+#ifndef RSPAMD_HTML_H
+#define RSPAMD_HTML_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+/* Known HTML tags */
+typedef enum
+{
+ Tag_UNKNOWN, /**< Unknown tag! */
+ Tag_A, /**< A */
+ Tag_ABBR, /**< ABBR */
+ Tag_ACRONYM, /**< ACRONYM */
+ Tag_ADDRESS, /**< ADDRESS */
+ Tag_ALIGN, /**< ALIGN */
+ Tag_APPLET, /**< APPLET */
+ Tag_AREA, /**< AREA */
+ Tag_B, /**< B */
+ Tag_BASE, /**< BASE */
+ Tag_BASEFONT, /**< BASEFONT */
+ Tag_BDO, /**< BDO */
+ Tag_BGSOUND, /**< BGSOUND */
+ Tag_BIG, /**< BIG */
+ Tag_BLINK, /**< BLINK */
+ Tag_BLOCKQUOTE, /**< BLOCKQUOTE */
+ Tag_BODY, /**< BODY */
+ Tag_BR, /**< BR */
+ Tag_BUTTON, /**< BUTTON */
+ Tag_CAPTION, /**< CAPTION */
+ Tag_CENTER, /**< CENTER */
+ Tag_CITE, /**< CITE */
+ Tag_CODE, /**< CODE */
+ Tag_COL, /**< COL */
+ Tag_COLGROUP, /**< COLGROUP */
+ Tag_COMMENT, /**< COMMENT */
+ Tag_DD, /**< DD */
+ Tag_DEL, /**< DEL */
+ Tag_DFN, /**< DFN */
+ Tag_DIR, /**< DIR */
+ Tag_DIV, /**< DIF */
+ Tag_DL, /**< DL */
+ Tag_DT, /**< DT */
+ Tag_EM, /**< EM */
+ Tag_EMBED, /**< EMBED */
+ Tag_FIELDSET, /**< FIELDSET */
+ Tag_FONT, /**< FONT */
+ Tag_FORM, /**< FORM */
+ Tag_FRAME, /**< FRAME */
+ Tag_FRAMESET, /**< FRAMESET */
+ Tag_H1, /**< H1 */
+ Tag_H2, /**< H2 */
+ Tag_H3, /**< H3 */
+ Tag_H4, /**< H4 */
+ Tag_H5, /**< H5 */
+ Tag_H6, /**< H6 */
+ Tag_HEAD, /**< HEAD */
+ Tag_HR, /**< HR */
+ Tag_HTML, /**< HTML */
+ Tag_I, /**< I */
+ Tag_IFRAME, /**< IFRAME */
+ Tag_ILAYER, /**< ILAYER */
+ Tag_IMG, /**< IMG */
+ Tag_INPUT, /**< INPUT */
+ Tag_INS, /**< INS */
+ Tag_ISINDEX, /**< ISINDEX */
+ Tag_KBD, /**< KBD */
+ Tag_KEYGEN, /**< KEYGEN */
+ Tag_LABEL, /**< LABEL */
+ Tag_LAYER, /**< LAYER */
+ Tag_LEGEND, /**< LEGEND */
+ Tag_LI, /**< LI */
+ Tag_LINK, /**< LINK */
+ Tag_LISTING, /**< LISTING */
+ Tag_MAP, /**< MAP */
+ Tag_MARQUEE, /**< MARQUEE */
+ Tag_MENU, /**< MENU */
+ Tag_META, /**< META */
+ Tag_MULTICOL, /**< MULTICOL */
+ Tag_NOBR, /**< NOBR */
+ Tag_NOEMBED, /**< NOEMBED */
+ Tag_NOFRAMES, /**< NOFRAMES */
+ Tag_NOLAYER, /**< NOLAYER */
+ Tag_NOSAVE, /**< NOSAVE */
+ Tag_NOSCRIPT, /**< NOSCRIPT */
+ Tag_OBJECT, /**< OBJECT */
+ Tag_OL, /**< OL */
+ Tag_OPTGROUP, /**< OPTGROUP */
+ Tag_OPTION, /**< OPTION */
+ Tag_P, /**< P */
+ Tag_PARAM, /**< PARAM */
+ Tag_PLAINTEXT,/**< PLAINTEXT */
+ Tag_PRE, /**< PRE */
+ Tag_Q, /**< Q */
+ Tag_RB, /**< RB */
+ Tag_RBC, /**< RBC */
+ Tag_RP, /**< RP */
+ Tag_RT, /**< RT */
+ Tag_RTC, /**< RTC */
+ Tag_RUBY, /**< RUBY */
+ Tag_S, /**< S */
+ Tag_SAMP, /**< SAMP */
+ Tag_SCRIPT, /**< SCRIPT */
+ Tag_SELECT, /**< SELECT */
+ Tag_SERVER, /**< SERVER */
+ Tag_SERVLET, /**< SERVLET */
+ Tag_SMALL, /**< SMALL */
+ Tag_SPACER, /**< SPACER */
+ Tag_SPAN, /**< SPAN */
+ Tag_STRIKE, /**< STRIKE */
+ Tag_STRONG, /**< STRONG */
+ Tag_STYLE, /**< STYLE */
+ Tag_SUB, /**< SUB */
+ Tag_SUP, /**< SUP */
+ Tag_TABLE, /**< TABLE */
+ Tag_TBODY, /**< TBODY */
+ Tag_TD, /**< TD */
+ Tag_TEXTAREA, /**< TEXTAREA */
+ Tag_TFOOT, /**< TFOOT */
+ Tag_TH, /**< TH */
+ Tag_THEAD, /**< THEAD */
+ Tag_TITLE, /**< TITLE */
+ Tag_TR, /**< TR */
+ Tag_TT, /**< TT */
+ Tag_U, /**< U */
+ Tag_UL, /**< UL */
+ Tag_VAR, /**< VAR */
+ Tag_WBR, /**< WBR */
+ Tag_XMP, /**< XMP */
+ Tag_XML, /**< XML */
+ Tag_NEXTID, /**< NEXTID */
+
+ N_TAGS /**< Must be last */
+} tag_id_t;
+
+#define CM_UNKNOWN 0
+/* Elements with no content. Map to HTML specification. */
+#define CM_EMPTY (1 << 0)
+/* Elements that appear outside of "BODY". */
+#define CM_HTML (1 << 1)
+/* Elements that can appear within HEAD. */
+#define CM_HEAD (1 << 2)
+/* HTML "block" elements. */
+#define CM_BLOCK (1 << 3)
+/* HTML "inline" elements. */
+#define CM_INLINE (1 << 4)
+/* Elements that mark list item ("LI"). */
+#define CM_LIST (1 << 5)
+/* Elements that mark definition list item ("DL", "DT"). */
+#define CM_DEFLIST (1 << 6)
+/* Elements that can appear inside TABLE. */
+#define CM_TABLE (1 << 7)
+/* Used for "THEAD", "TFOOT" or "TBODY". */
+#define CM_ROWGRP (1 << 8)
+/* Used for "TD", "TH" */
+#define CM_ROW (1 << 9)
+/* Elements whose content must be protected against white space movement.
+ Includes some elements that can found in forms. */
+#define CM_FIELD (1 << 10)
+/* Used to avoid propagating inline emphasis inside some elements
+ such as OBJECT or APPLET. */
+#define CM_OBJECT (1 << 11)
+/* Elements that allows "PARAM". */
+#define CM_PARAM (1 << 12)
+/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
+#define CM_FRAMES (1 << 13)
+/* Heading elements (h1, h2, ...). */
+#define CM_HEADING (1 << 14)
+/* Elements with an optional end tag. */
+#define CM_OPT (1 << 15)
+/* Elements that use "align" attribute for vertical position. */
+#define CM_IMG (1 << 16)
+/* Elements with inline and block model. Used to avoid calling InlineDup. */
+#define CM_MIXED (1 << 17)
+/* Elements whose content needs to be indented only if containing one
+ CM_BLOCK element. */
+#define CM_NO_INDENT (1 << 18)
+/* Elements that are obsolete (such as "dir", "menu"). */
+#define CM_OBSOLETE (1 << 19)
+/* User defined elements. Used to determine how attributes wihout value
+ should be printed. */
+#define CM_NEW (1 << 20)
+/* Elements that cannot be omitted. */
+#define CM_OMITST (1 << 21)
+
+/* XML tag */
+#define FL_XML (1 << 0)
+/* Closing tag */
+#define FL_CLOSING (1 << 1)
+/* Fully closed tag (e.g. <a attrs />) */
+#define FL_CLOSED (1 << 2)
+
+struct html_tag {
+ tag_id_t id;
+ const gchar *name;
+ gint flags;
+};
+
+struct html_node {
+ struct html_tag *tag;
+ gint flags;
+};
+
+/* Forwarded declaration */
+struct rspamd_task;
+
+/*
+ * Add a single node to the tags tree
+ */
+gboolean add_html_node (struct rspamd_task *task, rspamd_mempool_t *pool,
+ struct mime_text_part *part, gchar *tag_text, gsize tag_len, gsize remain, GNode **cur_level);
+
+/*
+ * Get tag structure by its name (binary search is used)
+ */
+struct html_tag * get_tag_by_name (const gchar *name);
+
+/*
+ * Decode HTML entitles in text. Text is modified in place.
+ */
+void decode_entitles (gchar *s, guint *len);
+
+#endif
diff --git a/src/libserver/proxy.c b/src/libserver/proxy.c
new file mode 100644
index 000000000..67c7665b8
--- /dev/null
+++ b/src/libserver/proxy.c
@@ -0,0 +1,241 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "main.h"
+#include "proxy.h"
+
+static void rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data);
+static void rspamd_proxy_client_handler (gint fd, gshort what, gpointer data);
+
+static inline GQuark
+proxy_error_quark (void)
+{
+ return g_quark_from_static_string ("proxy-error");
+}
+
+void
+rspamd_proxy_close (rspamd_proxy_t *proxy)
+{
+ if (!proxy->closed) {
+ close (proxy->cfd);
+ close (proxy->bfd);
+
+ event_del (&proxy->client_ev);
+ event_del (&proxy->backend_ev);
+ proxy->closed = TRUE;
+ }
+}
+
+static void
+rspamd_proxy_client_handler (gint fd, gshort what, gpointer data)
+{
+ rspamd_proxy_t *proxy = data;
+ gint r;
+ GError *err = NULL;
+
+ if (what == EV_READ) {
+ /* Got data from client */
+ event_del (&proxy->client_ev);
+ r = read (proxy->cfd, proxy->buf, proxy->bufsize);
+ if (r > 0) {
+ /* Write this buffer to backend */
+ proxy->read_len = r;
+ proxy->buf_offset = 0;
+ event_del (&proxy->backend_ev);
+ event_set (&proxy->backend_ev, proxy->bfd, EV_WRITE, rspamd_proxy_backend_handler, proxy);
+ event_add (&proxy->backend_ev, proxy->tv);
+ }
+ else {
+ /* Error case or zero reply */
+ if (r < 0) {
+ /* Error case */
+ g_set_error (&err, proxy_error_quark(), r, "Client read error: %s", strerror (errno));
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (err, proxy->user_data);
+ }
+ else {
+ /* Client closes connection */
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (NULL, proxy->user_data);
+ }
+ }
+ }
+ else if (what == EV_WRITE) {
+ /* Can write to client */
+ r = write (proxy->cfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset);
+ if (r > 0) {
+ /* We wrote something */
+ proxy->buf_offset +=r;
+ if (proxy->buf_offset == proxy->read_len) {
+ /* We wrote everything */
+ event_del (&proxy->client_ev);
+ event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy);
+ event_add (&proxy->client_ev, proxy->tv);
+ event_del (&proxy->backend_ev);
+ event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy);
+ event_add (&proxy->backend_ev, proxy->tv);
+ }
+ else {
+ /* Plan another write event */
+ event_add (&proxy->backend_ev, proxy->tv);
+ }
+ }
+ else {
+ /* Error case or zero reply */
+ if (r < 0) {
+ /* Error case */
+ g_set_error (&err, proxy_error_quark(), r, "Client write error: %s", strerror (errno));
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (err, proxy->user_data);
+ }
+ else {
+ /* Client closes connection */
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (NULL, proxy->user_data);
+ }
+ }
+ }
+ else {
+ /* Got timeout */
+ g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout");
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (err, proxy->user_data);
+ }
+}
+
+static void
+rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data)
+{
+ rspamd_proxy_t *proxy = data;
+ gint r;
+ GError *err = NULL;
+
+ if (what == EV_READ) {
+ /* Got data from backend */
+ event_del (&proxy->backend_ev);
+ r = read (proxy->bfd, proxy->buf, proxy->bufsize);
+ if (r > 0) {
+ /* Write this buffer to client */
+ proxy->read_len = r;
+ proxy->buf_offset = 0;
+ event_del (&proxy->client_ev);
+ event_set (&proxy->client_ev, proxy->bfd, EV_WRITE, rspamd_proxy_client_handler, proxy);
+ event_add (&proxy->client_ev, proxy->tv);
+ }
+ else {
+ /* Error case or zero reply */
+ if (r < 0) {
+ /* Error case */
+ g_set_error (&err, proxy_error_quark(), r, "Backend read error: %s", strerror (errno));
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (err, proxy->user_data);
+ }
+ else {
+ /* Client closes connection */
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (NULL, proxy->user_data);
+ }
+ }
+ }
+ else if (what == EV_WRITE) {
+ /* Can write to backend */
+ r = write (proxy->bfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset);
+ if (r > 0) {
+ /* We wrote something */
+ proxy->buf_offset +=r;
+ if (proxy->buf_offset == proxy->read_len) {
+ /* We wrote everything */
+ event_del (&proxy->backend_ev);
+ event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy);
+ event_add (&proxy->backend_ev, proxy->tv);
+ event_del (&proxy->client_ev);
+ event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy);
+ event_add (&proxy->client_ev, proxy->tv);
+ }
+ else {
+ /* Plan another write event */
+ event_add (&proxy->backend_ev, proxy->tv);
+ }
+ }
+ else {
+ /* Error case or zero reply */
+ if (r < 0) {
+ /* Error case */
+ g_set_error (&err, proxy_error_quark(), r, "Backend write error: %s", strerror (errno));
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (err, proxy->user_data);
+ }
+ else {
+ /* Client closes connection */
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (NULL, proxy->user_data);
+ }
+ }
+ }
+ else {
+ /* Got timeout */
+ g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout");
+ rspamd_proxy_close (proxy);
+ proxy->err_cb (err, proxy->user_data);
+ }
+}
+
+/**
+ * Create new proxy between cfd and bfd
+ * @param cfd client's socket
+ * @param bfd backend's socket
+ * @param bufsize size of exchange buffer
+ * @param err_cb callback for erorrs or completing
+ * @param ud user data for callback
+ * @return new proxy object
+ */
+rspamd_proxy_t*
+rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool, struct event_base *base,
+ gsize bufsize, struct timeval *tv, dispatcher_err_callback_t err_cb, gpointer ud)
+{
+ rspamd_proxy_t *new;
+
+ new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_proxy_t));
+
+ new->cfd = dup (cfd);
+ new->bfd = dup (bfd);
+ new->pool = pool;
+ new->base = base;
+ new->bufsize = bufsize;
+ new->buf = rspamd_mempool_alloc (pool, bufsize);
+ new->err_cb = err_cb;
+ new->user_data = ud;
+ new->tv = tv;
+
+ /* Set client's and backend's interfaces to read events */
+ event_set (&new->client_ev, new->cfd, EV_READ, rspamd_proxy_client_handler, new);
+ event_base_set (new->base, &new->client_ev);
+ event_add (&new->client_ev, new->tv);
+
+ event_set (&new->backend_ev, new->bfd, EV_READ, rspamd_proxy_backend_handler, new);
+ event_base_set (new->base, &new->backend_ev);
+ event_add (&new->backend_ev, new->tv);
+
+ return new;
+}
diff --git a/src/libserver/proxy.h b/src/libserver/proxy.h
new file mode 100644
index 000000000..c505fe83d
--- /dev/null
+++ b/src/libserver/proxy.h
@@ -0,0 +1,69 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef PROXY_H_
+#define PROXY_H_
+
+#include "config.h"
+#include "buffer.h"
+
+/**
+ * @file proxy.h
+ * Direct asynchronous proxy implementation
+ */
+
+typedef struct rspamd_proxy_s {
+ struct event client_ev; /**< event for client's communication */
+ struct event backend_ev; /**< event for backend communication */
+ struct event_base *base; /**< base for event operations */
+ rspamd_mempool_t *pool; /**< memory pool */
+ dispatcher_err_callback_t err_cb; /**< error callback */
+ struct event_base *ev_base; /**< event base */
+ gint cfd; /**< client's socket */
+ gint bfd; /**< backend's socket */
+ guint8 *buf; /**< exchange buffer */
+ gsize bufsize; /**< buffer size */
+ gint read_len; /**< read length */
+ gint buf_offset; /**< offset to write */
+ gpointer user_data; /**< user's data for callbacks */
+ struct timeval *tv; /**< timeout for communications */
+ gboolean closed; /**< whether descriptors are closed */
+} rspamd_proxy_t;
+
+/**
+ * Create new proxy between cfd and bfd
+ * @param cfd client's socket
+ * @param bfd backend's socket
+ * @param bufsize size of exchange buffer
+ * @param err_cb callback for erorrs or completing
+ * @param ud user data for callback
+ * @return new proxy object
+ */
+rspamd_proxy_t* rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool,
+ struct event_base *base, gsize bufsize, struct timeval *tv,
+ dispatcher_err_callback_t err_cb, gpointer ud);
+
+void rspamd_proxy_close (rspamd_proxy_t *proxy);
+
+#endif /* PROXY_H_ */
diff --git a/src/libserver/roll_history.c b/src/libserver/roll_history.c
new file mode 100644
index 000000000..504f8ae3b
--- /dev/null
+++ b/src/libserver/roll_history.c
@@ -0,0 +1,212 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+
+#include "config.h"
+#include "main.h"
+#include "roll_history.h"
+
+
+/**
+ * Returns new roll history
+ * @param pool pool for shared memory
+ * @return new structure
+ */
+struct roll_history*
+rspamd_roll_history_new (rspamd_mempool_t *pool)
+{
+ struct roll_history *new;
+
+ if (pool == NULL) {
+ return NULL;
+ }
+
+ new = rspamd_mempool_alloc0_shared (pool, sizeof (struct roll_history));
+ new->pool = pool;
+ new->mtx = rspamd_mempool_get_mutex (pool);
+
+ return new;
+}
+
+struct history_metric_callback_data {
+ gchar *pos;
+ gint remain;
+};
+
+static void
+roll_history_symbols_callback (gpointer key, gpointer value, void *user_data)
+{
+ struct history_metric_callback_data *cb = user_data;
+ struct symbol *s = value;
+ guint wr;
+
+ if (cb->remain > 0) {
+ wr = rspamd_snprintf (cb->pos, cb->remain, "%s, ", s->name);
+ cb->pos += wr;
+ cb->remain -= wr;
+ }
+}
+
+/**
+ * Update roll history with data from task
+ * @param history roll history object
+ * @param task task object
+ */
+void
+rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task)
+{
+ gint row_num;
+ struct roll_history_row *row;
+ struct metric_result *metric_res;
+ struct history_metric_callback_data cbdata;
+
+ if (history->need_lock) {
+ /* Some process is getting history, so wait on a mutex */
+ rspamd_mempool_lock_mutex (history->mtx);
+ history->need_lock = FALSE;
+ rspamd_mempool_unlock_mutex (history->mtx);
+ }
+
+ /* First of all obtain check and obtain row number */
+ g_atomic_int_compare_and_exchange (&history->cur_row, HISTORY_MAX_ROWS, 0);
+#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30))
+ row_num = g_atomic_int_add (&history->cur_row, 1);
+#else
+ row_num = g_atomic_int_exchange_and_add (&history->cur_row, 1);
+#endif
+
+ if (row_num < HISTORY_MAX_ROWS) {
+ row = &history->rows[row_num];
+ row->completed = FALSE;
+ }
+ else {
+ /* Race condition */
+ history->cur_row = 0;
+ return;
+ }
+
+ /* Add information from task to roll history */
+ memcpy (&row->from_addr, &task->from_addr, sizeof (row->from_addr));
+ memcpy (&row->tv, &task->tv, sizeof (row->tv));
+
+ /* Strings */
+ rspamd_strlcpy (row->message_id, task->message_id, sizeof (row->message_id));
+ if (task->user) {
+ rspamd_strlcpy (row->user, task->user, sizeof (row->message_id));
+ }
+ else {
+ row->user[0] = '\0';
+ }
+
+ /* Get default metric */
+ metric_res = g_hash_table_lookup (task->results, DEFAULT_METRIC);
+ if (metric_res == NULL) {
+ row->symbols[0] = '\0';
+ row->action = METRIC_ACTION_NOACTION;
+ }
+ else {
+ row->score = metric_res->score;
+ row->required_score = metric_res->metric->actions[METRIC_ACTION_REJECT].score;
+ row->action = check_metric_action (metric_res->score,
+ metric_res->metric->actions[METRIC_ACTION_REJECT].score, metric_res->metric);
+ cbdata.pos = row->symbols;
+ cbdata.remain = sizeof (row->symbols);
+ g_hash_table_foreach (metric_res->symbols, roll_history_symbols_callback, &cbdata);
+ if (cbdata.remain > 0) {
+ /* Remove last whitespace and comma */
+ *cbdata.pos-- = '\0';
+ *cbdata.pos-- = '\0';
+ *cbdata.pos = '\0';
+ }
+ }
+
+ row->scan_time = task->scan_milliseconds;
+ row->len = (task->msg == NULL ? 0 : task->msg->len);
+ row->completed = TRUE;
+}
+
+/**
+ * Load previously saved history from file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been loaded
+ */
+gboolean
+rspamd_roll_history_load (struct roll_history *history, const gchar *filename)
+{
+ gint fd;
+ struct stat st;
+
+ if (stat (filename, &st) == -1) {
+ msg_info ("cannot load history from %s: %s", filename, strerror (errno));
+ return FALSE;
+ }
+
+ if (st.st_size != sizeof (history->rows)) {
+ msg_info ("cannot load history from %s: size mismatch", filename);
+ return FALSE;
+ }
+
+ if ((fd = open (filename, O_RDONLY)) == -1) {
+ msg_info ("cannot load history from %s: %s", filename, strerror (errno));
+ return FALSE;
+ }
+
+ if (read (fd, history->rows, sizeof (history->rows)) == -1) {
+ close (fd);
+ msg_info ("cannot read history from %s: %s", filename, strerror (errno));
+ return FALSE;
+ }
+
+ close (fd);
+
+ return TRUE;
+}
+
+/**
+ * Save history to file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been saved
+ */
+gboolean
+rspamd_roll_history_save (struct roll_history *history, const gchar *filename)
+{
+ gint fd;
+
+ if ((fd = open (filename, O_WRONLY | O_CREAT | O_TRUNC, 00600)) == -1) {
+ msg_info ("cannot save history to %s: %s", filename, strerror (errno));
+ return FALSE;
+ }
+
+ if (write (fd, history->rows, sizeof (history->rows)) == -1) {
+ close (fd);
+ msg_info ("cannot write history to %s: %s", filename, strerror (errno));
+ return FALSE;
+ }
+
+ close (fd);
+
+ return TRUE;
+}
diff --git a/src/libserver/roll_history.h b/src/libserver/roll_history.h
new file mode 100644
index 000000000..1dff93a4f
--- /dev/null
+++ b/src/libserver/roll_history.h
@@ -0,0 +1,106 @@
+/* Copyright (c) 2010-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef ROLL_HISTORY_H_
+#define ROLL_HISTORY_H_
+
+#include "config.h"
+#include "mem_pool.h"
+
+/*
+ * Roll history is a special cycled buffer for checked messages, it is designed for writing history messages
+ * and displaying them in webui
+ */
+
+#define HISTORY_MAX_ID 100
+#define HISTORY_MAX_SYMBOLS 200
+#define HISTORY_MAX_USER 20
+#define HISTORY_MAX_ROWS 200
+
+struct rspamd_task;
+
+struct roll_history_row {
+ struct timeval tv;
+ gchar message_id[HISTORY_MAX_ID];
+ gchar symbols[HISTORY_MAX_SYMBOLS];
+ gchar user[HISTORY_MAX_USER];
+#ifdef HAVE_INET_PTON
+ struct {
+ union {
+ struct in_addr in4;
+ struct in6_addr in6;
+ } d;
+ gboolean ipv6;
+ gboolean has_addr;
+ } from_addr;
+#else
+ struct in_addr from_addr;
+#endif
+ gsize len;
+ guint scan_time;
+ gint action;
+ gdouble score;
+ gdouble required_score;
+ guint8 completed;
+};
+
+struct roll_history {
+ struct roll_history_row rows[HISTORY_MAX_ROWS];
+ gint cur_row;
+ rspamd_mempool_t *pool;
+ gboolean need_lock;
+ rspamd_mempool_mutex_t *mtx;
+};
+
+/**
+ * Returns new roll history
+ * @param pool pool for shared memory
+ * @return new structure
+ */
+struct roll_history* rspamd_roll_history_new (rspamd_mempool_t *pool);
+
+/**
+ * Update roll history with data from task
+ * @param history roll history object
+ * @param task task object
+ */
+void rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task);
+
+/**
+ * Load previously saved history from file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been loaded
+ */
+gboolean rspamd_roll_history_load (struct roll_history *history, const gchar *filename);
+
+/**
+ * Save history to file
+ * @param history roll history object
+ * @param filename filename to load from
+ * @return TRUE if history has been saved
+ */
+gboolean rspamd_roll_history_save (struct roll_history *history, const gchar *filename);
+
+#endif /* ROLL_HISTORY_H_ */
diff --git a/src/libserver/settings.c b/src/libserver/settings.c
new file mode 100644
index 000000000..c3292c8ab
--- /dev/null
+++ b/src/libserver/settings.c
@@ -0,0 +1,657 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cfg_file.h"
+#include "map.h"
+#include "main.h"
+#include "settings.h"
+#include "filter.h"
+#include "json/jansson.h"
+
+struct json_buf {
+ GHashTable *table;
+ gchar *buf;
+ gchar *pos;
+ size_t buflen;
+};
+
+static void
+settings_actions_free (gpointer data)
+{
+ GList *cur = data;
+
+ while (cur) {
+ g_free (cur->data);
+ cur = g_list_next (cur);
+ }
+
+ g_list_free ((GList *)data);
+}
+
+static void
+settings_free (gpointer data)
+{
+ struct rspamd_settings *s = data;
+
+ if (s->statfile_alias) {
+ g_free (s->statfile_alias);
+ }
+ if (s->factors) {
+ g_hash_table_destroy (s->factors);
+ }
+ if (s->metric_scores) {
+ g_hash_table_destroy (s->metric_scores);
+ }
+ if (s->reject_scores) {
+ g_hash_table_destroy (s->reject_scores);
+ }
+ if (s->whitelist) {
+ g_hash_table_destroy (s->whitelist);
+ }
+ if (s->blacklist) {
+ g_hash_table_destroy (s->blacklist);
+ }
+ if (s->metric_actions) {
+ g_hash_table_destroy (s->metric_actions);
+ }
+
+ g_slice_free1 (sizeof (struct rspamd_settings), s);
+}
+
+static struct rspamd_settings *
+settings_ref (struct rspamd_settings *s)
+{
+ if (s == NULL) {
+ s = g_slice_alloc (sizeof (struct rspamd_settings));
+ s->metric_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free);
+ s->reject_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free);
+ s->metric_actions = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, settings_actions_free);
+ s->factors = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free);
+ s->whitelist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free);
+ s->blacklist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free);
+ s->statfile_alias = NULL;
+ s->want_spam = FALSE;
+ s->ref_count = 1;
+ }
+ else {
+ s->ref_count ++;
+ }
+
+ return s;
+}
+
+static void
+settings_unref (struct rspamd_settings *s)
+{
+ if (s != NULL) {
+ s->ref_count --;
+ if (s->ref_count <= 0) {
+ settings_free (s);
+ }
+ }
+}
+
+
+gchar *
+json_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data)
+{
+ struct json_buf *jb;
+ size_t free, off;
+
+ if (data->cur_data == NULL) {
+ jb = g_malloc (sizeof (struct json_buf));
+ jb->table = g_hash_table_ref (((struct json_buf *)data->prev_data)->table);
+ jb->buf = NULL;
+ jb->pos = NULL;
+ data->cur_data = jb;
+ }
+ else {
+ jb = data->cur_data;
+ }
+
+ if (jb->buf == NULL) {
+ /* Allocate memory for buffer */
+ jb->buflen = len * 2;
+ jb->buf = g_malloc (jb->buflen);
+ jb->pos = jb->buf;
+ }
+
+ off = jb->pos - jb->buf;
+ free = jb->buflen - off;
+
+ if ((gint)free < len) {
+ jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2);
+ jb->buf = g_realloc (jb->buf, jb->buflen);
+ jb->pos = jb->buf + off;
+ }
+
+ memcpy (jb->pos, chunk, len);
+ jb->pos += len;
+
+ /* Say not to copy any part of this buffer */
+ return NULL;
+}
+
+void
+json_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data)
+{
+ struct json_buf *jb;
+ gint nelts, i, n, j;
+ json_t *js, *cur_elt, *cur_nm, *it_val, *act_it, *act_value;
+ json_error_t je;
+ struct metric_action *new_act;
+ struct rspamd_settings *cur_settings;
+ GList *cur_act;
+ gchar *cur_name;
+ void *json_it;
+ double *score;
+
+ if (data->prev_data) {
+ jb = data->prev_data;
+ /* Clean prev data */
+ if (jb->table) {
+ g_hash_table_unref (jb->table);
+ }
+ if (jb->buf) {
+ g_free (jb->buf);
+ }
+ g_free (jb);
+ }
+
+ /* Now parse json */
+ if (data->cur_data) {
+ jb = data->cur_data;
+ }
+ else {
+ msg_err ("no data read");
+ return;
+ }
+ if (jb->buf == NULL) {
+ msg_err ("no data read");
+ return;
+ }
+ /* NULL terminate current buf */
+ *jb->pos = '\0';
+
+ js = json_loads (jb->buf, &je);
+ if (!js) {
+ msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line);
+ return;
+ }
+
+ if (!json_is_array (js)) {
+ json_decref (js);
+ msg_err ("loaded json is not an array");
+ return;
+ }
+
+ nelts = json_array_size (js);
+ for (i = 0; i < nelts; i++) {
+ cur_settings = settings_ref (NULL);
+
+ cur_elt = json_array_get (js, i);
+ if (!cur_elt || !json_is_object (cur_elt)) {
+ json_decref (js);
+ msg_err ("loaded json is not an object");
+ settings_unref (cur_settings);
+ return;
+ }
+ cur_nm = json_object_get (cur_elt, "name");
+ if (cur_nm == NULL || !json_is_string (cur_nm)) {
+ json_decref (js);
+ msg_err ("name is not a string or not exists");
+ settings_unref (cur_settings);
+ return;
+ }
+ cur_name = g_strdup (json_string_value (cur_nm));
+ /* Now check other settings */
+ /* Statfile */
+ cur_nm = json_object_get (cur_elt, "statfile");
+ if (cur_nm != NULL && json_is_string (cur_nm)) {
+ cur_settings->statfile_alias = g_strdup (json_string_value (cur_nm));
+ }
+ /* Factors object */
+ cur_nm = json_object_get (cur_elt, "factors");
+ if (cur_nm != NULL && json_is_object (cur_nm)) {
+ json_it = json_object_iter (cur_nm);
+ while (json_it) {
+ it_val = json_object_iter_value (json_it);
+ if (it_val && json_is_string (it_val)) {
+ g_hash_table_insert (cur_settings->factors, g_strdup (json_object_iter_key (json_it)), g_strdup (json_string_value (it_val)));
+ }
+ json_it = json_object_iter_next (cur_nm, json_it);
+ }
+ }
+ /* Metrics object */
+ cur_nm = json_object_get (cur_elt, "metrics");
+ if (cur_nm != NULL && json_is_object (cur_nm)) {
+ json_it = json_object_iter (cur_nm);
+ while (json_it) {
+ it_val = json_object_iter_value (json_it);
+ if (it_val && json_is_number (it_val)) {
+ score = g_malloc (sizeof (double));
+ *score = json_number_value (it_val);
+ g_hash_table_insert (cur_settings->metric_scores,
+ g_strdup (json_object_iter_key (json_it)), score);
+ }
+ else if (it_val && json_is_object (it_val)) {
+ /* Assume this as actions hash */
+ cur_act = NULL;
+ act_it = json_object_iter (it_val);
+ while (act_it) {
+ act_value = json_object_iter_value (act_it);
+
+ if (act_value && json_is_number (act_value)) {
+ /* Special cases */
+ if (g_ascii_strcasecmp (json_object_iter_key (act_it), "spam_score") == 0) {
+ score = g_malloc (sizeof (double));
+ *score = json_number_value (act_value);
+ g_hash_table_insert (cur_settings->metric_scores,
+ g_strdup (json_object_iter_key (json_it)), score);
+ }
+ else if (g_ascii_strcasecmp (json_object_iter_key (act_it), "reject_score") == 0) {
+ score = g_malloc (sizeof (double));
+ *score = json_number_value (act_value);
+ g_hash_table_insert (cur_settings->reject_scores,
+ g_strdup (json_object_iter_key (json_it)), score);
+ }
+ else if (check_action_str (json_object_iter_key (act_it), &j)) {
+ new_act = g_malloc (sizeof (struct metric_action));
+ new_act->action = j;
+ new_act->score = json_number_value (act_value);
+ cur_act = g_list_prepend (cur_act, new_act);
+ }
+ }
+ act_it = json_object_iter_next (it_val, act_it);
+ }
+ if (cur_act != NULL) {
+ g_hash_table_insert (cur_settings->metric_actions,
+ g_strdup (json_object_iter_key (json_it)), cur_act);
+ cur_act = NULL;
+ }
+ }
+ json_it = json_object_iter_next (cur_nm, json_it);
+ }
+ }
+ /* Rejects object */
+ cur_nm = json_object_get (cur_elt, "rejects");
+ if (cur_nm != NULL && json_is_object (cur_nm)) {
+ json_it = json_object_iter (cur_nm);
+ while (json_it) {
+ it_val = json_object_iter_value (json_it);
+ if (it_val && json_is_number (it_val)) {
+ score = g_malloc (sizeof (double));
+ *score = json_number_value (it_val);
+ g_hash_table_insert (cur_settings->reject_scores, g_strdup (json_object_iter_key (json_it)),
+ score);
+ }
+ json_it = json_object_iter_next(cur_nm, json_it);
+ }
+ }
+ /* Whitelist object */
+ cur_nm = json_object_get (cur_elt, "whitelist");
+ if (cur_nm != NULL && json_is_array (cur_nm)) {
+ n = json_array_size(cur_nm);
+ for(j = 0; j < n; j++) {
+ it_val = json_array_get(cur_nm, j);
+ if (it_val && json_is_string (it_val)) {
+ if (strlen (json_string_value (it_val)) > 0) {
+ g_hash_table_insert (cur_settings->whitelist,
+ g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val)));
+ }
+ }
+
+ }
+ }
+ /* Blacklist object */
+ cur_nm = json_object_get (cur_elt, "blacklist");
+ if (cur_nm != NULL && json_is_array (cur_nm)) {
+ n = json_array_size(cur_nm);
+ for(j = 0; j < n; j++) {
+ it_val = json_array_get(cur_nm, j);
+ if (it_val && json_is_string (it_val)) {
+ if (strlen (json_string_value (it_val)) > 0) {
+ g_hash_table_insert (cur_settings->blacklist,
+ g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val)));
+ }
+ }
+
+ }
+ }
+ /* Want spam */
+ cur_nm = json_object_get (cur_elt, "want_spam");
+ if (cur_nm != NULL) {
+ if (json_is_true (cur_nm)) {
+ cur_settings->want_spam = TRUE;
+ }
+ }
+ g_hash_table_replace (((struct json_buf *)data->cur_data)->table, cur_name, cur_settings);
+ }
+ json_decref (js);
+}
+
+gboolean
+read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable * table)
+{
+ struct json_buf *jb = g_malloc (sizeof (struct json_buf)), **pjb;
+
+ pjb = g_malloc (sizeof (struct json_buf *));
+
+ jb->table = table;
+ jb->buf = NULL;
+ *pjb = jb;
+
+ if (!add_map (cfg, path, description, json_read_cb, json_fin_cb, (void **)pjb)) {
+ msg_err ("cannot add map %s", path);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+init_settings (struct config_file *cfg)
+{
+ cfg->domain_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal,
+ g_free, (GDestroyNotify)settings_unref);
+ cfg->user_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal,
+ g_free, (GDestroyNotify)settings_unref);
+}
+
+static gboolean
+check_setting (struct rspamd_task *task, struct rspamd_settings **user_settings, struct rspamd_settings **domain_settings)
+{
+ gchar *field = NULL, *domain = NULL;
+ gchar cmp_buf[1024];
+ gint len;
+
+ if (task->deliver_to != NULL) {
+ /* First try to use deliver-to field */
+ field = task->deliver_to;
+ }
+ else if (task->user != NULL) {
+ /* Then user field */
+ field = task->user;
+ }
+ else if (task->rcpt != NULL) {
+ /* Then first recipient */
+ field = task->rcpt->data;
+ }
+ else {
+ return FALSE;
+ }
+
+ domain = strchr (field, '@');
+ if (domain == NULL) {
+ /* First try to search in first recipient */
+ if (task->rcpt) {
+ domain = strchr (task->rcpt->data, '@');
+ }
+ }
+ if (domain != NULL) {
+ domain++;
+ }
+
+ /* First try to search per-user settings */
+ if (field != NULL) {
+ if (*field == '<') {
+ field ++;
+ }
+ len = strcspn (field, ">");
+ rspamd_strlcpy (cmp_buf, field, MIN ((gint)sizeof (cmp_buf), len + 1));
+ *user_settings = g_hash_table_lookup (task->cfg->user_settings, cmp_buf);
+ }
+ if (domain != NULL) {
+ len = strcspn (domain, ">");
+ rspamd_strlcpy (cmp_buf, domain, MIN ((gint)sizeof (cmp_buf), len + 1));
+ *domain_settings = g_hash_table_lookup (task->cfg->domain_settings, cmp_buf);
+ }
+
+ if (*domain_settings != NULL || *user_settings != NULL) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+check_bwhitelist (struct rspamd_task *task, struct rspamd_settings *s, gboolean *is_black)
+{
+ gchar *src_email = NULL, *src_domain = NULL, *data;
+
+ if (task->from != NULL && *task->from != '\0') {
+ src_email = task->from;
+ } else {
+ return FALSE;
+ }
+
+ src_domain = strchr (src_email, '@');
+ if(src_domain != NULL) {
+ src_domain++;
+ }
+
+ if ((((data = g_hash_table_lookup (s->blacklist, src_email)) != NULL) ||
+ ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->blacklist, src_domain)) != NULL)) )) {
+ *is_black = TRUE;
+ msg_info ("<%s> blacklisted as domain %s is in settings blacklist", task->message_id, data);
+ return TRUE;
+ }
+ if ((((data = g_hash_table_lookup (s->whitelist, src_email)) != NULL) ||
+ ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->whitelist, src_domain)) != NULL)) )) {
+ *is_black = FALSE;
+ msg_info ("<%s> whitelisted as domain %s is in settings blacklist", task->message_id, data);
+ return TRUE;
+ }
+ return FALSE;
+}
+
+gboolean
+check_metric_settings (struct metric_result *res, double *score, double *rscore)
+{
+ struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings;
+ double *sc, *rs;
+ struct metric *metric = res->metric;
+
+ /* XXX: what the fuck is that? */
+ *rscore = 10.0;
+
+ if (us != NULL) {
+ if ((rs = g_hash_table_lookup (us->reject_scores, metric->name)) != NULL) {
+ *rscore = *rs;
+ }
+ if ((sc = g_hash_table_lookup (us->metric_scores, metric->name)) != NULL) {
+ *score = *sc;
+ return TRUE;
+ }
+ /* Now check in domain settings */
+ if (ds && ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL)) {
+ *rscore = *rs;
+ }
+ if (ds && (sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) {
+ *score = *sc;
+ return TRUE;
+ }
+ }
+ else if (ds != NULL) {
+ if ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL) {
+ *rscore = *rs;
+ }
+ if ((sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) {
+ *score = *sc;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+gboolean
+check_metric_action_settings (struct rspamd_task *task, struct metric_result *res,
+ double score, enum rspamd_metric_action *result)
+{
+ struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings;
+ struct metric_action *act, *sel = NULL;
+ GList *cur;
+ enum rspamd_metric_action r = METRIC_ACTION_NOACTION;
+ gboolean black;
+
+ if (us != NULL) {
+ /* Check whitelist and set appropriate action for whitelisted users */
+ if (check_bwhitelist(task, us, &black)) {
+ if (black) {
+ *result = METRIC_ACTION_REJECT;
+ }
+ else {
+ *result = METRIC_ACTION_NOACTION;
+ }
+ return TRUE;
+ }
+ if ((cur = g_hash_table_lookup (us->metric_actions, res->metric->name)) != NULL) {
+ while (cur) {
+ act = cur->data;
+ if (score >= act->score) {
+ r = act->action;
+ sel = act;
+ }
+ cur = g_list_next (cur);
+ }
+ }
+ }
+ else if (ds != NULL) {
+ /* Check whitelist and set appropriate action for whitelisted users */
+ if (check_bwhitelist(task, ds, &black)) {
+ if (black) {
+ *result = METRIC_ACTION_REJECT;
+ }
+ else {
+ *result = METRIC_ACTION_NOACTION;
+ }
+ return TRUE;
+ }
+ if ((cur = g_hash_table_lookup (ds->metric_actions, res->metric->name)) != NULL) {
+ while (cur) {
+ act = cur->data;
+ if (score >= act->score) {
+ r = act->action;
+ sel = act;
+ }
+ cur = g_list_next (cur);
+ }
+ }
+ }
+
+ if (sel != NULL && result != NULL) {
+ *result = r;
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+gboolean
+apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res)
+{
+ struct rspamd_settings *us = NULL, *ds = NULL;
+
+ if (check_setting (task, &us, &ds)) {
+ if (us != NULL || ds != NULL) {
+ if (us != NULL) {
+ res->user_settings = settings_ref (us);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref,
+ us);
+ }
+ if (ds != NULL) {
+ /* Need to ref hash table to avoid occasional data corruption */
+ res->domain_settings = settings_ref (ds);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref,
+ ds);
+ }
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+gboolean
+check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor)
+{
+ double *fc;
+
+ if (res->user_settings != NULL) {
+ /* First search in user's settings */
+ if ((fc = g_hash_table_lookup (res->user_settings->factors, symbol)) != NULL) {
+ *factor = *fc;
+ return TRUE;
+ }
+ /* Now check in domain settings */
+ if (res->domain_settings && (fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) {
+ *factor = *fc;
+ return TRUE;
+ }
+ }
+ else if (res->domain_settings != NULL) {
+ if ((fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) {
+ *factor = *fc;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+
+}
+
+
+gboolean
+check_want_spam (struct rspamd_task *task)
+{
+ struct rspamd_settings *us = NULL, *ds = NULL;
+
+ if (check_setting (task, &us, &ds)) {
+ if (us != NULL) {
+ /* First search in user's settings */
+ if (us->want_spam) {
+ return TRUE;
+ }
+ /* Now check in domain settings */
+ if (ds && ds->want_spam) {
+ return TRUE;
+ }
+ }
+ else if (ds != NULL) {
+ if (ds->want_spam) {
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libserver/settings.h b/src/libserver/settings.h
new file mode 100644
index 000000000..361700094
--- /dev/null
+++ b/src/libserver/settings.h
@@ -0,0 +1,55 @@
+#ifndef RSPAMD_SETTINGS_H
+#define RSPAMD_SETTINGS_H
+
+#include "config.h"
+#include "main.h"
+
+struct rspamd_settings {
+ GHashTable *metric_scores; /**< hash table of metric require scores for this setting */
+ GHashTable *reject_scores; /**< hash table of metric reject scores for this setting */
+ GHashTable *metric_actions; /**< hash table of metric actions for this setting */
+ GHashTable *factors; /**< hash table of new factors for this setting */
+ GHashTable *whitelist; /**< hash table of whitelist for this setting */
+ GHashTable *blacklist; /**< hash table of whitelist for this setting */
+ gchar *statfile_alias; /**< alias for statfile used */
+ gboolean want_spam; /**< if true disable rspamd checks */
+ gint ref_count; /**< reference counter */
+};
+
+
+/*
+ * Read settings from specified path
+ */
+gboolean read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable *table);
+
+/*
+ * Init configuration structures for settings
+ */
+void init_settings (struct config_file *cfg);
+
+/*
+ * Check scores settings
+ */
+gboolean check_metric_settings (struct metric_result *res, double *score, double *rscore);
+
+/*
+ * Check actions settings
+ */
+gboolean check_metric_action_settings (struct rspamd_task *task, struct metric_result *res, double score, enum rspamd_metric_action *result);
+
+/*
+ * Check individual weights for settings
+ */
+gboolean check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor);
+
+/*
+ * Check want_spam flag
+ */
+gboolean check_want_spam (struct rspamd_task *task);
+
+/*
+ * Search settings for metric and store pointers to settings into metric_result structure
+ */
+gboolean apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res);
+
+#endif
diff --git a/src/libserver/spf.c b/src/libserver/spf.c
new file mode 100644
index 000000000..12f1513d4
--- /dev/null
+++ b/src/libserver/spf.c
@@ -0,0 +1,1465 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "dns.h"
+#include "spf.h"
+#include "main.h"
+#include "message.h"
+#include "filter.h"
+
+#define SPF_VER1_STR "v=spf1"
+#define SPF_VER2_STR "spf2."
+#define SPF_SCOPE_PRA "pra"
+#define SPF_SCOPE_MFROM "mfrom"
+#define SPF_ALL "all"
+#define SPF_A "a"
+#define SPF_IP4 "ip4"
+#define SPF_IP6 "ip6"
+#define SPF_PTR "ptr"
+#define SPF_MX "mx"
+#define SPF_EXISTS "exists"
+#define SPF_INCLUDE "include"
+#define SPF_REDIRECT "redirect"
+#define SPF_EXP "exp"
+
+/** SPF limits for avoiding abuse **/
+#define SPF_MAX_NESTING 10
+#define SPF_MAX_DNS_REQUESTS 30
+
+/**
+ * State machine for SPF record:
+ *
+ * spf_mech ::= +|-|~|?
+ *
+ * spf_body ::= spf=v1 <spf_command> [<spf_command>]
+ * spf_command ::= [spf_mech]all|a|<ip4>|<ip6>|ptr|mx|<exists>|<include>|<redirect>
+ *
+ * spf_domain ::= [:domain][/mask]
+ * spf_ip4 ::= ip[/mask]
+ * ip4 ::= ip4:<spf_ip4>
+ * mx ::= mx<spf_domain>
+ * a ::= a<spf_domain>
+ * ptr ::= ptr[:domain]
+ * exists ::= exists:domain
+ * include ::= include:domain
+ * redirect ::= redirect:domain
+ * exp ::= exp:domain
+ *
+ */
+
+#undef SPF_DEBUG
+
+struct spf_dns_cb {
+ struct spf_record *rec;
+ struct spf_addr *addr;
+ spf_action_t cur_action;
+ gboolean in_include;
+};
+
+#define CHECK_REC(rec) \
+do { \
+ if ((rec)->nested > SPF_MAX_NESTING || \
+ (rec)->dns_requests > SPF_MAX_DNS_REQUESTS) { \
+ msg_info ("<%s> spf recursion limit %d is reached, domain: %s", \
+ (rec)->task->message_id, (rec)->dns_requests, \
+ (rec)->sender_domain); \
+ return FALSE; \
+ } \
+} while (0) \
+
+static gboolean parse_spf_record (struct rspamd_task *task, struct spf_record *rec);
+static void start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl);
+
+/* Determine spf mech */
+static spf_mech_t
+check_spf_mech (const gchar *elt, gboolean *need_shift)
+{
+ g_assert (elt != NULL);
+
+ *need_shift = TRUE;
+
+ switch (*elt) {
+ case '-':
+ return SPF_FAIL;
+ case '~':
+ return SPF_SOFT_FAIL;
+ case '+':
+ return SPF_PASS;
+ case '?':
+ return SPF_NEUTRAL;
+ default:
+ *need_shift = FALSE;
+ return SPF_PASS;
+ }
+}
+
+/* Debugging function that dumps spf record in log */
+static void
+dump_spf_record (GList *addrs)
+{
+ struct spf_addr *addr;
+ GList *cur;
+ gint r = 0;
+ gchar logbuf[BUFSIZ], c;
+#ifdef HAVE_INET_PTON
+ gchar ipbuf[INET6_ADDRSTRLEN];
+#else
+ struct in_addr ina;
+#endif
+
+ cur = addrs;
+
+ while (cur) {
+ addr = cur->data;
+ if (!addr->is_list) {
+ switch (addr->mech) {
+ case SPF_FAIL:
+ c = '-';
+ break;
+ case SPF_SOFT_FAIL:
+ case SPF_NEUTRAL:
+ c = '~';
+ break;
+ case SPF_PASS:
+ c = '+';
+ break;
+ }
+#ifdef HAVE_INET_PTON
+ if (addr->data.normal.ipv6) {
+ inet_ntop (AF_INET6, &addr->data.normal.d.in6, ipbuf, sizeof (ipbuf));
+
+ }
+ else {
+ inet_ntop (AF_INET, &addr->data.normal.d.in4, ipbuf, sizeof (ipbuf));
+ }
+ r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, ipbuf, addr->data.normal.mask);
+#else
+ ina.s_addr = addr->data.normal.d.in4.s_addr;
+ r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, inet_ntoa (ina), addr->data.normal.mask);
+#endif
+ }
+ else {
+ r += snprintf (logbuf + r, sizeof (logbuf) - r, "%s; ", addr->spf_string);
+ dump_spf_record (addr->data.list);
+ }
+ cur = g_list_next (cur);
+ }
+ msg_info ("spf record: %s", logbuf);
+}
+
+/* Find position of address inside addrs list */
+static GList *
+spf_addr_find (GList *addrs, gpointer to_find)
+{
+ struct spf_addr *addr;
+ GList *cur, *res = NULL;
+
+ cur = addrs;
+ while (cur) {
+ addr = cur->data;
+ if (addr->is_list) {
+ if ((res = spf_addr_find (addr->data.list, to_find)) != NULL) {
+ return cur;
+ }
+ }
+ else {
+ if (cur->data == to_find) {
+ return cur;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+
+ return res;
+}
+
+/*
+ * Destructor for spf record
+ */
+static void
+spf_record_destructor (gpointer r)
+{
+ struct spf_record *rec = r;
+ GList *cur;
+ struct spf_addr *addr;
+
+ if (rec->addrs) {
+ cur = rec->addrs;
+ while (cur) {
+ addr = cur->data;
+ if (addr->is_list && addr->data.list != NULL) {
+ g_list_free (addr->data.list);
+ }
+ cur = g_list_next (cur);
+ }
+ g_list_free (rec->addrs);
+ }
+}
+
+static gboolean
+parse_spf_ipmask (const gchar *begin, struct spf_addr *addr, struct spf_record *rec)
+{
+ const gchar *pos;
+ gchar mask_buf[5] = {'\0'}, *p;
+ gint state = 0, dots = 0;
+#ifdef HAVE_INET_PTON
+ gchar ip_buf[INET6_ADDRSTRLEN];
+#else
+ gchar ip_buf[INET_ADDRSTRLEN];
+#endif
+
+ bzero (ip_buf, sizeof (ip_buf));
+ bzero (mask_buf, sizeof (mask_buf));
+ pos = begin;
+ p = ip_buf;
+
+ while (*pos) {
+ switch (state) {
+ case 0:
+ /* Require ':' */
+ if (*pos != ':') {
+ msg_info ("<%s>: spf error for domain %s: semicolon missing",
+ rec->task->message_id, rec->sender_domain);
+ return FALSE;
+ }
+ state = 1;
+ pos ++;
+ p = ip_buf;
+ dots = 0;
+ break;
+ case 1:
+#ifdef HAVE_INET_PTON
+ if (p - ip_buf >= (gint)sizeof (ip_buf)) {
+ return FALSE;
+ }
+ if (g_ascii_isxdigit (*pos)) {
+ *p ++ = *pos ++;
+ }
+ else if (*pos == '.' || *pos == ':') {
+ *p ++ = *pos ++;
+ dots ++;
+ }
+#else
+ /* Begin parse ip */
+ if (p - ip_buf >= (gint)sizeof (ip_buf) || dots > 3) {
+ return FALSE;
+ }
+ if (g_ascii_isdigit (*pos)) {
+ *p ++ = *pos ++;
+ }
+ else if (*pos == '.') {
+ *p ++ = *pos ++;
+ dots ++;
+ }
+#endif
+ else if (*pos == '/') {
+ pos ++;
+ p = mask_buf;
+ state = 2;
+ }
+ else {
+ /* Invalid character */
+ msg_info ("<%s>: spf error for domain %s: invalid ip address",
+ rec->task->message_id, rec->sender_domain);
+ return FALSE;
+ }
+ break;
+ case 2:
+ /* Parse mask */
+ if (p - mask_buf >= (gint)sizeof (mask_buf)) {
+ msg_info ("<%s>: spf error for domain %s: too long mask",
+ rec->task->message_id, rec->sender_domain);
+ return FALSE;
+ }
+ if (g_ascii_isdigit (*pos)) {
+ *p ++ = *pos ++;
+ }
+ else {
+ return FALSE;
+ }
+ break;
+ }
+ }
+
+#ifdef HAVE_INET_PTON
+ if (inet_pton (AF_INET, ip_buf, &addr->data.normal.d.in4) != 1) {
+ if (inet_pton (AF_INET6, ip_buf, &addr->data.normal.d.in6) == 1) {
+ addr->data.normal.ipv6 = TRUE;
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: invalid ip address",
+ rec->task->message_id, rec->sender_domain);
+ return FALSE;
+ }
+ }
+ else {
+ addr->data.normal.ipv6 = FALSE;
+ }
+#else
+ if (!inet_aton (ip_buf, &addr->data.normal.d.in4)) {
+ return FALSE;
+ }
+#endif
+ if (state == 2) {
+ /* Also parse mask */
+ if (!addr->data.normal.ipv6) {
+ addr->data.normal.mask = strtoul (mask_buf, NULL, 10);
+ if (addr->data.normal.mask > 32) {
+ msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'",
+ rec->task->message_id, rec->sender_domain, begin);
+ return FALSE;
+ }
+ }
+ else {
+ addr->data.normal.mask = strtoul (mask_buf, NULL, 10);
+ if (addr->data.normal.mask > 128) {
+ msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'",
+ rec->task->message_id, rec->sender_domain, begin);
+ return FALSE;
+ }
+ }
+ }
+ else {
+ addr->data.normal.mask = addr->data.normal.ipv6 ? 128 : 32;
+ }
+ addr->data.normal.parsed = TRUE;
+ return TRUE;
+
+}
+
+static gchar *
+parse_spf_hostmask (struct rspamd_task *task, const gchar *begin, struct spf_addr *addr, struct spf_record *rec)
+{
+ gchar *host = NULL, *p, mask_buf[3];
+ gint hostlen;
+
+ bzero (mask_buf, sizeof (mask_buf));
+ if (*begin == '\0' || *begin == '/') {
+ /* Assume host as host to resolve from record */
+ host = rec->cur_domain;
+ }
+ p = strchr (begin, '/');
+ if (p != NULL) {
+ /* Extract mask */
+ rspamd_strlcpy (mask_buf, p + 1, sizeof (mask_buf));
+ addr->data.normal.mask = strtoul (mask_buf, NULL, 10);
+ if (addr->data.normal.mask > 32) {
+ msg_info ("<%s>: spf error for domain %s: too long mask",
+ rec->task->message_id, rec->sender_domain);
+ return FALSE;
+ }
+ if (host == NULL) {
+ hostlen = p - begin;
+ host = rspamd_mempool_alloc (task->task_pool, hostlen);
+ rspamd_strlcpy (host, begin, hostlen);
+ }
+ }
+ else {
+ addr->data.normal.mask = 32;
+ if (host == NULL) {
+ host = rspamd_mempool_strdup (task->task_pool, begin);
+ }
+ }
+
+ return host;
+}
+
+static void
+spf_record_dns_callback (struct rdns_reply *reply, gpointer arg)
+{
+ struct spf_dns_cb *cb = arg;
+ gchar *begin;
+ struct rdns_reply_entry *elt_data;
+ GList *tmp = NULL;
+ struct rspamd_task *task;
+ struct spf_addr *new_addr;
+
+ task = cb->rec->task;
+
+ cb->rec->requests_inflight --;
+
+ if (reply->code == RDNS_RC_NOERROR) {
+ /* Add all logic for all DNS states here */
+ LL_FOREACH (reply->entries, elt_data) {
+ switch (cb->cur_action) {
+ case SPF_RESOLVE_MX:
+ if (elt_data->type == RDNS_REQUEST_MX) {
+ /* Now resolve A record for this MX */
+ if (make_dns_request (task->resolver, task->s, task->task_pool,
+ spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, elt_data->content.mx.name)) {
+ task->dns_requests ++;
+ cb->rec->requests_inflight ++;
+ }
+ }
+ else if (elt_data->type == RDNS_REQUEST_A) {
+ if (!cb->addr->data.normal.parsed) {
+ cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr;
+ cb->addr->data.normal.mask = 32;
+ cb->addr->data.normal.parsed = TRUE;
+ }
+ else {
+ /* Insert one more address */
+ tmp = spf_addr_find (cb->rec->addrs, cb->addr);
+ if (tmp) {
+ new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr));
+ memcpy (new_addr, cb->addr, sizeof (struct spf_addr));
+ new_addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr;
+ new_addr->data.normal.parsed = TRUE;
+ cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr);
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: addresses mismatch",
+ task->message_id, cb->rec->sender_domain);
+ }
+ }
+
+ }
+#ifdef HAVE_INET_PTON
+ else if (elt_data->type == RDNS_REQUEST_AAAA) {
+ if (!cb->addr->data.normal.parsed) {
+ memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr));
+ cb->addr->data.normal.mask = 32;
+ cb->addr->data.normal.parsed = TRUE;
+ cb->addr->data.normal.ipv6 = TRUE;
+ }
+ else {
+ /* Insert one more address */
+ tmp = spf_addr_find (cb->rec->addrs, cb->addr);
+ if (tmp) {
+ new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr));
+ memcpy (new_addr, cb->addr, sizeof (struct spf_addr));
+ memcpy (&new_addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr));
+ new_addr->data.normal.parsed = TRUE;
+ new_addr->data.normal.ipv6 = TRUE;
+ cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr);
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: addresses mismatch",
+ task->message_id, cb->rec->sender_domain);
+ }
+ }
+
+ }
+#endif
+ break;
+ case SPF_RESOLVE_A:
+ if (elt_data->type == RDNS_REQUEST_A) {
+ /* XXX: process only one record */
+ cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr;
+ cb->addr->data.normal.mask = 32;
+ cb->addr->data.normal.parsed = TRUE;
+ }
+#ifdef HAVE_INET_PTON
+ else if (elt_data->type == RDNS_REQUEST_AAAA) {
+ memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr));
+ cb->addr->data.normal.mask = 32;
+ cb->addr->data.normal.parsed = TRUE;
+ cb->addr->data.normal.ipv6 = TRUE;
+ }
+#endif
+ break;
+#ifdef HAVE_INET_PTON
+ case SPF_RESOLVE_AAA:
+ if (elt_data->type == RDNS_REQUEST_A) {
+ /* XXX: process only one record */
+ cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr;
+ cb->addr->data.normal.mask = 32;
+ cb->addr->data.normal.parsed = TRUE;
+ }
+ else if (elt_data->type == RDNS_REQUEST_AAAA) {
+ memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr));
+ cb->addr->data.normal.mask = 32;
+ cb->addr->data.normal.parsed = TRUE;
+ cb->addr->data.normal.ipv6 = TRUE;
+ }
+#endif
+ break;
+ case SPF_RESOLVE_PTR:
+ break;
+ case SPF_RESOLVE_REDIRECT:
+ if (elt_data->type == RDNS_REQUEST_TXT) {
+ begin = elt_data->content.txt.data;
+
+ if (!cb->in_include && cb->rec->addrs) {
+ g_list_free (cb->rec->addrs);
+ cb->rec->addrs = NULL;
+ }
+ start_spf_parse (cb->rec, begin, elt_data->ttl);
+
+ }
+ break;
+ case SPF_RESOLVE_INCLUDE:
+ if (elt_data->type == RDNS_REQUEST_TXT) {
+ begin = elt_data->content.txt.data;
+#ifdef SPF_DEBUG
+ msg_info ("before include");
+ dump_spf_record (cb->rec->addrs);
+#endif
+ tmp = cb->rec->addrs;
+ cb->rec->addrs = NULL;
+ cb->rec->in_include = TRUE;
+ start_spf_parse (cb->rec, begin, 0);
+ cb->rec->in_include = FALSE;
+
+#ifdef SPF_DEBUG
+ msg_info ("after include");
+ dump_spf_record (cb->rec->addrs);
+#endif
+ /* Insert new list */
+ cb->addr->is_list = TRUE;
+ cb->addr->data.list = cb->rec->addrs;
+ cb->rec->addrs = tmp;
+ }
+ break;
+ case SPF_RESOLVE_EXP:
+ break;
+ case SPF_RESOLVE_EXISTS:
+ if (elt_data->type == RDNS_REQUEST_A) {
+ /* If specified address resolves, we can accept connection from every IP */
+ cb->addr->data.normal.d.in4.s_addr = INADDR_NONE;
+ cb->addr->data.normal.mask = 0;
+ }
+ break;
+ }
+ }
+ }
+ else if (reply->code == RDNS_RC_NXDOMAIN) {
+ switch (cb->cur_action) {
+ case SPF_RESOLVE_MX:
+ if (rdns_request_has_type (reply->request, RDNS_REQUEST_MX)) {
+ msg_info ("<%s>: spf error for domain %s: cannot find MX record for %s",
+ task->message_id, cb->rec->sender_domain, cb->rec->cur_domain);
+ cb->addr->data.normal.d.in4.s_addr = INADDR_NONE;
+ cb->addr->data.normal.mask = 32;
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: cannot resolve MX record for %s",
+ task->message_id, cb->rec->sender_domain, cb->rec->cur_domain);
+ cb->addr->data.normal.d.in4.s_addr = INADDR_NONE;
+ cb->addr->data.normal.mask = 32;
+ }
+ break;
+ case SPF_RESOLVE_A:
+ if (rdns_request_has_type (reply->request, RDNS_REQUEST_A)) {
+ cb->addr->data.normal.d.in4.s_addr = INADDR_NONE;
+ cb->addr->data.normal.mask = 32;
+ }
+ break;
+#ifdef HAVE_INET_PTON
+ case SPF_RESOLVE_AAA:
+ if (rdns_request_has_type (reply->request, RDNS_REQUEST_AAAA)) {
+ memset (&cb->addr->data.normal.d.in6, 0xff, sizeof (struct in6_addr));
+ cb->addr->data.normal.mask = 32;
+ }
+ break;
+#endif
+ case SPF_RESOLVE_PTR:
+ break;
+ case SPF_RESOLVE_REDIRECT:
+ msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s",
+ task->message_id, cb->rec->sender_domain, cb->rec->cur_domain);
+ break;
+ case SPF_RESOLVE_INCLUDE:
+ msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s",
+ task->message_id, cb->rec->sender_domain, cb->rec->cur_domain);
+ break;
+ case SPF_RESOLVE_EXP:
+ break;
+ case SPF_RESOLVE_EXISTS:
+ cb->addr->data.normal.d.in4.s_addr = INADDR_NONE;
+ cb->addr->data.normal.mask = 32;
+ break;
+ }
+ }
+
+ if (cb->rec->requests_inflight == 0) {
+ cb->rec->callback (cb->rec, cb->rec->task);
+ }
+}
+
+static gboolean
+parse_spf_a (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ gchar *host;
+
+ CHECK_REC (rec);
+
+ if (begin == NULL || *begin != ':') {
+ return FALSE;
+ }
+ begin ++;
+
+ host = parse_spf_hostmask (task, begin, addr, rec);
+
+ if (!host) {
+ return FALSE;
+ }
+ rec->dns_requests ++;
+ cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_A;
+ cb->in_include = rec->in_include;
+ if (make_dns_request (task->resolver, task->s, task->task_pool,
+ spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) {
+ task->dns_requests ++;
+ rec->requests_inflight ++;
+ return TRUE;
+ }
+
+ return FALSE;
+
+}
+
+static gboolean
+parse_spf_ptr (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ CHECK_REC (rec);
+
+ msg_info ("<%s>: spf error for domain %s: ptr elements are not implemented",
+ rec->task->message_id, rec->sender_domain);
+ return FALSE;
+}
+
+static gboolean
+parse_spf_mx (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ gchar *host;
+
+ CHECK_REC (rec);
+
+ if (begin == NULL) {
+ return FALSE;
+ }
+ if (*begin == ':') {
+ begin ++;
+ }
+
+ host = parse_spf_hostmask (task, begin, addr, rec);
+
+ if (!host) {
+ return FALSE;
+ }
+ rec->dns_requests ++;
+ cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ memset (&addr->data.normal, 0, sizeof (addr->data.normal));
+ cb->cur_action = SPF_RESOLVE_MX;
+ cb->in_include = rec->in_include;
+ if (make_dns_request (task->resolver, task->s, task->task_pool,
+ spf_record_dns_callback, (void *)cb, RDNS_REQUEST_MX, host)) {
+ task->dns_requests ++;
+ rec->requests_inflight ++;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_all (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ /* All is 0/0 */
+ memset (&addr->data.normal.d, 0, sizeof (addr->data.normal.d));
+ if (rec->in_include) {
+ /* Ignore all record in include */
+ addr->data.normal.mask = 32;
+ }
+ else {
+ addr->data.normal.mask = 0;
+ addr->data.normal.addr_any = TRUE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+parse_spf_ip4 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ /* ip4:addr[/mask] */
+
+ CHECK_REC (rec);
+ return parse_spf_ipmask (begin, addr, rec);
+}
+
+#ifdef HAVE_INET_PTON
+static gboolean
+parse_spf_ip6 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ /* ip6:addr[/mask] */
+
+ CHECK_REC (rec);
+ return parse_spf_ipmask (begin, addr, rec);
+}
+#endif
+
+static gboolean
+parse_spf_include (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ gchar *domain;
+
+ CHECK_REC (rec);
+
+ if (begin == NULL || *begin != ':') {
+ return FALSE;
+ }
+ begin ++;
+ rec->dns_requests ++;
+
+ cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_INCLUDE;
+ cb->in_include = rec->in_include;
+ addr->is_list = TRUE;
+ addr->data.list = NULL;
+ domain = rspamd_mempool_strdup (task->task_pool, begin);
+ if (make_dns_request (task->resolver, task->s, task->task_pool,
+ spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) {
+ task->dns_requests ++;
+ rec->requests_inflight ++;
+
+ return TRUE;
+ }
+
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_exp (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ CHECK_REC (rec);
+
+ msg_info ("exp record is ignored");
+ return TRUE;
+}
+
+static gboolean
+parse_spf_redirect (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ gchar *domain;
+
+ CHECK_REC (rec);
+
+ if (begin == NULL || *begin != '=') {
+ return FALSE;
+ }
+ begin ++;
+ rec->dns_requests ++;
+
+ cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_REDIRECT;
+ cb->in_include = rec->in_include;
+ domain = rspamd_mempool_strdup (task->task_pool, begin);
+ if (make_dns_request (task->resolver, task->s, task->task_pool,
+ spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) {
+ task->dns_requests ++;
+ rec->requests_inflight ++;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+parse_spf_exists (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr)
+{
+ struct spf_dns_cb *cb;
+ gchar *host;
+
+ CHECK_REC (rec);
+
+ if (begin == NULL || *begin != ':') {
+ return FALSE;
+ }
+ begin ++;
+ rec->dns_requests ++;
+
+ addr->data.normal.mask = 32;
+ cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb));
+ cb->rec = rec;
+ cb->addr = addr;
+ cb->cur_action = SPF_RESOLVE_EXISTS;
+ cb->in_include = rec->in_include;
+ host = rspamd_mempool_strdup (task->task_pool, begin);
+
+ if (make_dns_request (task->resolver, task->s, task->task_pool,
+ spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) {
+ task->dns_requests ++;
+ rec->requests_inflight ++;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+reverse_spf_ip (gchar *ip, gint len)
+{
+ gchar ipbuf[sizeof("255.255.255.255") - 1], *p, *c;
+ gint t = 0, l = len;
+
+ if (len > (gint)sizeof (ipbuf)) {
+ msg_info ("cannot reverse string of length %d", len);
+ return;
+ }
+
+ p = ipbuf + len;
+ c = ip;
+ while (-- l) {
+ if (*c == '.') {
+ memcpy (p, c - t, t);
+ *--p = '.';
+ c ++;
+ t = 0;
+ continue;
+ }
+
+ t ++;
+ c ++;
+ p --;
+ }
+
+ memcpy (p - 1, c - t, t + 1);
+
+ memcpy (ip, ipbuf, len);
+}
+
+static gchar *
+expand_spf_macro (struct rspamd_task *task, struct spf_record *rec, gchar *begin)
+{
+ gchar *p, *c, *new, *tmp;
+ gint len = 0, slen = 0, state = 0;
+#ifdef HAVE_INET_PTON
+ gchar ip_buf[INET6_ADDRSTRLEN];
+#endif
+ gboolean need_expand = FALSE;
+
+ p = begin;
+ /* Calculate length */
+ while (*p) {
+ switch (state) {
+ case 0:
+ /* Skip any character and wait for % in input */
+ if (*p == '%') {
+ state = 1;
+ }
+ else {
+ len ++;
+ }
+
+ slen ++;
+ p ++;
+ break;
+ case 1:
+ /* We got % sign, so we should whether wait for { or for - or for _ or for % */
+ if (*p == '%' || *p == '-') {
+ /* Just a single % sign or space */
+ len ++;
+ }
+ else if (*p == '_') {
+ /* %20 */
+ len += sizeof ("%20") - 1;
+ }
+ else if (*p == '{') {
+ state = 2;
+ }
+ else {
+ /* Something unknown */
+ msg_info ("<%s>: spf error for domain %s: unknown spf element",
+ task->message_id, rec->sender_domain);
+ return begin;
+ }
+ p ++;
+ slen ++;
+ break;
+ case 2:
+ /* Read macro name */
+ switch (g_ascii_tolower (*p)) {
+ case 'i':
+#ifdef HAVE_INET_PTON
+ len += sizeof (INET6_ADDRSTRLEN) - 1;
+#else
+ len += sizeof (INET_ADDRSTRLEN) - 1;
+#endif
+ break;
+ case 's':
+ len += strlen (rec->sender);
+ break;
+ case 'l':
+ len += strlen (rec->local_part);
+ break;
+ case 'o':
+ len += strlen (rec->sender_domain);
+ break;
+ case 'd':
+ len += strlen (rec->cur_domain);
+ break;
+ case 'v':
+ len += sizeof ("in-addr") - 1;
+ break;
+ case 'h':
+ if (task->helo) {
+ len += strlen (task->helo);
+ }
+ break;
+ default:
+ msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s",
+ task->message_id, rec->sender_domain, *p, begin);
+ return begin;
+ }
+ p ++;
+ slen ++;
+ state = 3;
+ break;
+ case 3:
+ /* Read modifier */
+ if (*p == '}') {
+ state = 0;
+ need_expand = TRUE;
+ }
+ else if (*p != 'r' && !g_ascii_isdigit (*p)) {
+ msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf modifier %c in %s",
+ task->message_id, rec->sender_domain, *p, begin);
+ return begin;
+ }
+ p ++;
+ slen ++;
+ break;
+ }
+ }
+
+ if (!need_expand) {
+ /* No expansion needed */
+ return begin;
+ }
+
+ new = rspamd_mempool_alloc (task->task_pool, len + 1);
+
+ c = new;
+ p = begin;
+ state = 0;
+ /* Begin macro expansion */
+
+ while (*p) {
+ switch (state) {
+ case 0:
+ /* Skip any character and wait for % in input */
+ if (*p == '%') {
+ state = 1;
+ }
+ else {
+ *c = *p;
+ c ++;
+ }
+
+ p ++;
+ break;
+ case 1:
+ /* We got % sign, so we should whether wait for { or for - or for _ or for % */
+ if (*p == '%') {
+ /* Just a single % sign or space */
+ *c++ = '%';
+ }
+ else if (*p == '-') {
+ *c++ = ' ';
+ }
+ else if (*p == '_') {
+ /* %20 */
+ *c++ = '%';
+ *c++ = '2';
+ *c++ = '0';
+ }
+ else if (*p == '{') {
+ state = 2;
+ }
+ else {
+ /* Something unknown */
+ msg_info ("<%s>: spf error for domain %s: unknown spf element",
+ task->message_id, rec->sender_domain);
+ return begin;
+ }
+ p ++;
+ break;
+ case 2:
+ /* Read macro name */
+ switch (g_ascii_tolower (*p)) {
+ case 'i':
+#ifdef HAVE_INET_PTON
+ len = rspamd_strlcpy (ip_buf,
+ rspamd_inet_address_to_string (&task->from_addr),
+ sizeof (ip_buf));
+ memcpy (c, ip_buf, len);
+#else
+ tmp = inet_ntoa (task->from_addr);
+ len = strlen (tmp);
+ memcpy (c, tmp, len);
+#endif
+ c += len;
+ break;
+ case 's':
+ len = strlen (rec->sender);
+ memcpy (c, rec->sender, len);
+ c += len;
+ break;
+ case 'l':
+ len = strlen (rec->local_part);
+ memcpy (c, rec->local_part, len);
+ c += len;
+ break;
+ case 'o':
+ len = strlen (rec->sender_domain);
+ memcpy (c, rec->sender_domain, len);
+ c += len;
+ break;
+ case 'd':
+ len = strlen (rec->cur_domain);
+ memcpy (c, rec->cur_domain, len);
+ c += len;
+ break;
+ case 'v':
+ len = sizeof ("in-addr") - 1;
+ memcpy (c, "in-addr", len);
+ c += len;
+ break;
+ case 'h':
+ if (task->helo) {
+ tmp = strchr (task->helo, '@');
+ if (tmp) {
+ len = strlen (tmp + 1);
+ memcpy (c, tmp + 1, len);
+ c += len;
+ }
+ }
+ break;
+ default:
+ msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s",
+ task->message_id, rec->sender_domain, *p, begin);
+ return begin;
+ }
+ p ++;
+ state = 3;
+ break;
+ case 3:
+ /* Read modifier */
+ if (*p == '}') {
+ state = 0;
+ }
+ else if (*p == 'r' && len != 0) {
+ reverse_spf_ip (c - len, len);
+ len = 0;
+ }
+ else if (g_ascii_isdigit (*p)) {
+ /*XXX: try to implement domain strimming */
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s",
+ task->message_id, rec->sender_domain, *p, begin);
+ return begin;
+ }
+ p ++;
+ break;
+ }
+ }
+ /* Null terminate */
+ *c = '\0';
+
+ return new;
+
+}
+
+#define NEW_ADDR(x) do { \
+ (x) = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); \
+ (x)->mech = check_spf_mech (rec->cur_elt, &need_shift); \
+ (x)->spf_string = rspamd_mempool_strdup (task->task_pool, begin); \
+ memset (&(x)->data.normal, 0, sizeof ((x)->data.normal)); \
+ (x)->data.normal.mask = 32; \
+ (x)->is_list = FALSE; \
+} while (0);
+
+/* Read current element and try to parse record */
+static gboolean
+parse_spf_record (struct rspamd_task *task, struct spf_record *rec)
+{
+ struct spf_addr *new = NULL;
+ gboolean need_shift, res = FALSE;
+ gchar *begin;
+
+ rec->cur_elt = rec->elts[rec->elt_num];
+ if (rec->cur_elt == NULL) {
+ return FALSE;
+ }
+ else if (*rec->cur_elt == '\0') {
+ /* Silently skip empty elements */
+ rec->elt_num ++;
+ return TRUE;
+ }
+ else {
+ begin = expand_spf_macro (task, rec, rec->cur_elt);
+ if (*begin == '?' || *begin == '+' || *begin == '-' || *begin == '~') {
+ begin ++;
+ }
+
+
+ /* Now check what we have */
+ switch (g_ascii_tolower (*begin)) {
+ case 'a':
+ /* all or a */
+ if (g_ascii_strncasecmp (begin, SPF_ALL, sizeof (SPF_ALL) - 1) == 0) {
+ NEW_ADDR (new);
+ begin += sizeof (SPF_ALL) - 1;
+ res = parse_spf_all (task, begin, rec, new);
+ }
+ else if (g_ascii_strncasecmp (begin, SPF_A, sizeof (SPF_A) - 1) == 0) {
+ NEW_ADDR (new);
+ begin += sizeof (SPF_A) - 1;
+ res = parse_spf_a (task, begin, rec, new);
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: bad spf command %s",
+ task->message_id, rec->sender_domain, begin);
+ }
+ break;
+ case 'i':
+ /* include or ip4 */
+ if (g_ascii_strncasecmp (begin, SPF_IP4, sizeof (SPF_IP4) - 1) == 0) {
+ NEW_ADDR (new);
+ begin += sizeof (SPF_IP4) - 1;
+ res = parse_spf_ip4 (task, begin, rec, new);
+ }
+ else if (g_ascii_strncasecmp (begin, SPF_INCLUDE, sizeof (SPF_INCLUDE) - 1) == 0) {
+ NEW_ADDR (new);
+ begin += sizeof (SPF_INCLUDE) - 1;
+ res = parse_spf_include (task, begin, rec, new);
+ }
+ else if (g_ascii_strncasecmp (begin, SPF_IP6, sizeof (SPF_IP6) - 1) == 0) {
+#ifdef HAVE_INET_PTON
+ NEW_ADDR (new);
+ begin += sizeof (SPF_IP6) - 1;
+ res = parse_spf_ip6 (task, begin, rec, new);
+#else
+ msg_info ("ignoring ip6 spf command as IPv6 is not supported: %s", begin);
+ new = NULL;
+ res = TRUE;
+ begin += sizeof (SPF_IP6) - 1;
+#endif
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: bad spf command %s",
+ task->message_id, rec->sender_domain, begin);
+ }
+ break;
+ case 'm':
+ /* mx */
+ if (g_ascii_strncasecmp (begin, SPF_MX, sizeof (SPF_MX) - 1) == 0) {
+ NEW_ADDR (new);
+ begin += sizeof (SPF_MX) - 1;
+ res = parse_spf_mx (task, begin, rec, new);
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: bad spf command %s",
+ task->message_id, rec->sender_domain, begin);
+ }
+ break;
+ case 'p':
+ /* ptr */
+ if (g_ascii_strncasecmp (begin, SPF_PTR, sizeof (SPF_PTR) - 1) == 0) {
+ NEW_ADDR (new);
+ begin += sizeof (SPF_PTR) - 1;
+ res = parse_spf_ptr (task, begin, rec, new);
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: bad spf command %s",
+ task->message_id, rec->sender_domain, begin);
+ }
+ break;
+ case 'e':
+ /* exp or exists */
+ if (g_ascii_strncasecmp (begin, SPF_EXP, sizeof (SPF_EXP) - 1) == 0) {
+ begin += sizeof (SPF_EXP) - 1;
+ res = parse_spf_exp (task, begin, rec, NULL);
+ }
+ else if (g_ascii_strncasecmp (begin, SPF_EXISTS, sizeof (SPF_EXISTS) - 1) == 0) {
+ NEW_ADDR (new);
+ begin += sizeof (SPF_EXISTS) - 1;
+ res = parse_spf_exists (task, begin, rec, new);
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: bad spf command %s",
+ task->message_id, rec->sender_domain, begin);
+ }
+ break;
+ case 'r':
+ /* redirect */
+ if (g_ascii_strncasecmp (begin, SPF_REDIRECT, sizeof (SPF_REDIRECT) - 1) == 0) {
+ begin += sizeof (SPF_REDIRECT) - 1;
+ res = parse_spf_redirect (task, begin, rec, NULL);
+ }
+ else {
+ msg_info ("<%s>: spf error for domain %s: bad spf command %s",
+ task->message_id, rec->sender_domain, begin);
+ }
+ break;
+ case 'v':
+ if (g_ascii_strncasecmp (begin, "v=spf", sizeof ("v=spf") - 1) == 0) {
+ /* Skip this element till the end of record */
+ while (*begin && !g_ascii_isspace (*begin)) {
+ begin ++;
+ }
+ }
+ break;
+ default:
+ msg_info ("<%s>: spf error for domain %s: bad spf command %s",
+ task->message_id, rec->sender_domain, begin);
+ break;
+ }
+ if (res) {
+ if (new != NULL) {
+ rec->addrs = g_list_prepend (rec->addrs, new);
+ }
+ rec->elt_num ++;
+ }
+ }
+
+ return res;
+}
+#undef NEW_ADDR
+
+static void
+parse_spf_scopes (struct spf_record *rec, gchar **begin)
+{
+ for (;;) {
+ if (g_ascii_strncasecmp (*begin, SPF_SCOPE_PRA, sizeof (SPF_SCOPE_PRA) - 1) == 0) {
+ *begin += sizeof (SPF_SCOPE_PRA) - 1;
+ /* XXX: Implement actual PRA check */
+ /* extract_pra_info (rec); */
+ continue;
+ }
+ else if (g_ascii_strncasecmp (*begin, SPF_SCOPE_MFROM, sizeof (SPF_SCOPE_MFROM) - 1) == 0) {
+ /* mfrom is standart spf1 check */
+ *begin += sizeof (SPF_SCOPE_MFROM) - 1;
+ continue;
+ }
+ else if (**begin != ',') {
+ break;
+ }
+ (*begin) ++;
+ }
+}
+
+static void
+start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl)
+{
+ /* Skip spaces */
+ while (g_ascii_isspace (*begin)) {
+ begin ++;
+ }
+
+ if (g_ascii_strncasecmp (begin, SPF_VER1_STR, sizeof (SPF_VER1_STR) - 1) == 0) {
+ begin += sizeof (SPF_VER1_STR) - 1;
+ while (g_ascii_isspace (*begin) && *begin) {
+ begin ++;
+ }
+ rec->elts = g_strsplit_set (begin, " ", 0);
+ rec->elt_num = 0;
+ if (rec->elts) {
+ rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts);
+ rec->cur_elt = rec->elts[0];
+ while (parse_spf_record (rec->task, rec));
+ if (ttl != 0) {
+ rec->ttl = ttl;
+ }
+ }
+ }
+ else if (g_ascii_strncasecmp (begin, SPF_VER2_STR, sizeof (SPF_VER2_STR) - 1) == 0) {
+ /* Skip one number of record, so no we are here spf2.0/ */
+ begin += sizeof (SPF_VER2_STR);
+ if (*begin != '/') {
+ msg_info ("<%s>: spf error for domain %s: sender id is invalid",
+ rec->task->message_id, rec->sender_domain);
+ }
+ else {
+ begin ++;
+ parse_spf_scopes (rec, &begin);
+ }
+ /* Now common spf record */
+ while (g_ascii_isspace (*begin) && *begin) {
+ begin ++;
+ }
+ rec->elts = g_strsplit_set (begin, " ", 0);
+ rec->elt_num = 0;
+ if (rec->elts) {
+ rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts);
+ rec->cur_elt = rec->elts[0];
+ while (parse_spf_record (rec->task, rec));
+ if (ttl != 0) {
+ rec->ttl = ttl;
+ }
+ }
+ }
+ else {
+ msg_debug ("<%s>: spf error for domain %s: bad spf record version: %*s",
+ rec->task->message_id, rec->sender_domain, sizeof (SPF_VER1_STR) - 1, begin);
+ }
+}
+
+static void
+spf_dns_callback (struct rdns_reply *reply, gpointer arg)
+{
+ struct spf_record *rec = arg;
+ struct rdns_reply_entry *elt;
+
+ rec->requests_inflight --;
+ if (reply->code == RDNS_RC_NOERROR) {
+ LL_FOREACH (reply->entries, elt) {
+ start_spf_parse (rec, elt->content.txt.data, elt->ttl);
+ }
+ }
+
+ if (rec->requests_inflight == 0) {
+ rec->callback (rec, rec->task);
+ }
+}
+
+gchar *
+get_spf_domain (struct rspamd_task *task)
+{
+ gchar *domain, *res = NULL;
+ GList *domains;
+
+ if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') {
+ res = rspamd_mempool_strdup (task->task_pool, domain + 1);
+ if ((domain = strchr (res, '>')) != NULL) {
+ *domain = '\0';
+ }
+ }
+ else {
+ /* Extract from header */
+ domains = message_get_header (task->task_pool, task->message, "From", FALSE);
+
+ if (domains != NULL) {
+ res = rspamd_mempool_strdup (task->task_pool, domains->data);
+
+ if ((domain = strrchr (res, '@')) == NULL) {
+ g_list_free (domains);
+ return NULL;
+ }
+ res = rspamd_mempool_strdup (task->task_pool, domain + 1);
+ g_list_free (domains);
+
+ if ((domain = strchr (res, '>')) != NULL) {
+ *domain = '\0';
+ }
+ }
+ }
+
+ return res;
+}
+
+gboolean
+resolve_spf (struct rspamd_task *task, spf_cb_t callback)
+{
+ struct spf_record *rec;
+ gchar *domain;
+ GList *domains;
+
+ rec = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct spf_record));
+ rec->task = task;
+ rec->callback = callback;
+ /* Add destructor */
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)spf_record_destructor, rec);
+
+ /* Extract from data */
+ if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') {
+ rec->sender = task->from;
+
+ rec->local_part = rspamd_mempool_strdup (task->task_pool, task->from);
+ *(rec->local_part + (domain - task->from)) = '\0';
+ if (*rec->local_part == '<') {
+ memmove (rec->local_part, rec->local_part + 1, strlen (rec->local_part));
+ }
+ rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domain + 1);
+ if ((domain = strchr (rec->cur_domain, '>')) != NULL) {
+ *domain = '\0';
+ }
+ rec->sender_domain = rec->cur_domain;
+
+ if (make_dns_request (task->resolver, task->s, task->task_pool, spf_dns_callback,
+ (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) {
+ task->dns_requests ++;
+ rec->requests_inflight ++;
+ return TRUE;
+ }
+ }
+ else {
+ /* Extract from header */
+ domains = message_get_header (task->task_pool, task->message, "From", FALSE);
+
+ if (domains != NULL) {
+ rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domains->data);
+ g_list_free (domains);
+
+ if ((domain = strrchr (rec->cur_domain, '@')) == NULL) {
+ return FALSE;
+ }
+ rec->sender = rspamd_mempool_strdup (task->task_pool, rec->cur_domain);
+ rec->local_part = rec->cur_domain;
+ *domain = '\0';
+ rec->cur_domain = domain + 1;
+
+ if ((domain = strchr (rec->local_part, '<')) != NULL) {
+ memmove (rec->local_part, domain + 1, strlen (domain));
+ }
+
+ if ((domain = strchr (rec->cur_domain, '>')) != NULL) {
+ *domain = '\0';
+ }
+ rec->sender_domain = rec->cur_domain;
+ if (make_dns_request (task->resolver, task->s, task->task_pool,
+ spf_dns_callback, (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) {
+ task->dns_requests ++;
+ rec->requests_inflight ++;
+ return TRUE;
+ }
+ }
+ }
+
+ return FALSE;
+}
+
+/*
+ * vi:ts=4
+ */
diff --git a/src/libserver/spf.h b/src/libserver/spf.h
new file mode 100644
index 000000000..94c613e42
--- /dev/null
+++ b/src/libserver/spf.h
@@ -0,0 +1,84 @@
+#ifndef RSPAMD_SPF_H
+#define RSPAMD_SPF_H
+
+#include "config.h"
+
+struct rspamd_task;
+struct spf_record;
+
+typedef void (*spf_cb_t)(struct spf_record *record, struct rspamd_task *task);
+
+typedef enum spf_mech_e {
+ SPF_FAIL,
+ SPF_SOFT_FAIL,
+ SPF_PASS,
+ SPF_NEUTRAL
+} spf_mech_t;
+
+typedef enum spf_action_e {
+ SPF_RESOLVE_MX,
+ SPF_RESOLVE_A,
+ SPF_RESOLVE_PTR,
+ SPF_RESOLVE_AAA,
+ SPF_RESOLVE_REDIRECT,
+ SPF_RESOLVE_INCLUDE,
+ SPF_RESOLVE_EXISTS,
+ SPF_RESOLVE_EXP
+} spf_action_t;
+
+struct spf_addr {
+ union {
+ struct {
+ union {
+ struct in_addr in4;
+#ifdef HAVE_INET_PTON
+ struct in6_addr in6;
+#endif
+ } d;
+ guint32 mask;
+ gboolean ipv6;
+ gboolean parsed;
+ gboolean addr_any;
+ } normal;
+ GList *list;
+ } data;
+ gboolean is_list;
+ spf_mech_t mech;
+ gchar *spf_string;
+};
+
+struct spf_record {
+ gchar **elts;
+
+ gchar *cur_elt;
+ gint elt_num;
+ gint nested;
+ gint dns_requests;
+ gint requests_inflight;
+
+ guint ttl;
+
+ GList *addrs;
+ gchar *cur_domain;
+ gchar *sender;
+ gchar *sender_domain;
+ gchar *local_part;
+ struct rspamd_task *task;
+ spf_cb_t callback;
+
+ gboolean in_include;
+};
+
+
+/*
+ * Resolve spf record for specified task and call a callback after resolution fails/succeed
+ */
+gboolean resolve_spf (struct rspamd_task *task, spf_cb_t callback);
+
+/*
+ * Get a domain for spf for specified task
+ */
+gchar *get_spf_domain (struct rspamd_task *task);
+
+
+#endif
diff --git a/src/libserver/statfile.c b/src/libserver/statfile.c
new file mode 100644
index 000000000..4c1cc13fb
--- /dev/null
+++ b/src/libserver/statfile.c
@@ -0,0 +1,927 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "statfile.h"
+#include "main.h"
+
+#define RSPAMD_STATFILE_VERSION {'1', '2'}
+#define BACKUP_SUFFIX ".old"
+
+/* Maximum number of statistics files */
+#define STATFILES_MAX 255
+static void statfile_pool_set_block_common (
+ statfile_pool_t * pool, stat_file_t * file,
+ guint32 h1, guint32 h2,
+ time_t t, double value,
+ gboolean from_now);
+
+static gint
+cmpstatfile (const void *a, const void *b)
+{
+ const stat_file_t *s1 = a, *s2 = b;
+
+ return g_ascii_strcasecmp (s1->filename, s2->filename);
+}
+
+/* Convert statfile version 1.0 to statfile version 1.2, saving backup */
+struct stat_file_header_10 {
+ u_char magic[3]; /**< magic signature ('r' 's' 'd') */
+ u_char version[2]; /**< version of statfile */
+ u_char padding[3]; /**< padding */
+ guint64 create_time; /**< create time (time_t->guint64) */
+};
+
+static gboolean
+convert_statfile_10 (stat_file_t * file)
+{
+ gchar *backup_name;
+ struct stat st;
+ struct stat_file_header header = {
+ .magic = {'r', 's', 'd'},
+ .version = RSPAMD_STATFILE_VERSION,
+ .padding = {0, 0, 0},
+ .revision = 0,
+ .rev_time = 0
+ };
+
+
+ /* Format backup name */
+ backup_name = g_strdup_printf ("%s.%s", file->filename, BACKUP_SUFFIX);
+
+ msg_info ("convert old statfile %s to version %c.%c, backup in %s", file->filename,
+ header.version[0], header.version[1], backup_name);
+
+ if (stat (backup_name, &st) != -1) {
+ msg_info ("replace old %s", backup_name);
+ unlink (backup_name);
+ }
+
+ rename (file->filename, backup_name);
+ g_free (backup_name);
+
+ /* XXX: maybe race condition here */
+ unlock_file (file->fd, FALSE);
+ close (file->fd);
+ if ((file->fd = open (file->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ msg_info ("cannot create file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+ lock_file (file->fd, FALSE);
+ /* Now make new header and copy it to new file */
+ if (write (file->fd, &header, sizeof (header)) == -1) {
+ msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+ /* Now write old map to new file */
+ if (write (file->fd, ((u_char *)file->map + sizeof (struct stat_file_header_10)),
+ file->len - sizeof (struct stat_file_header_10)) == -1) {
+ msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+ /* Unmap old memory and map new */
+ munmap (file->map, file->len);
+ file->len = file->len + sizeof (struct stat_file_header) - sizeof (struct stat_file_header_10);
+#ifdef HAVE_MMAP_NOCORE
+ if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NOCORE, file->fd, 0)) == MAP_FAILED) {
+#else
+ if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == MAP_FAILED) {
+#endif
+ msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/* Check whether specified file is statistic file and calculate its len in blocks */
+static gint
+statfile_pool_check (stat_file_t * file)
+{
+ struct stat_file *f;
+ gchar *c;
+ static gchar valid_version[] = RSPAMD_STATFILE_VERSION;
+
+
+ if (!file || !file->map) {
+ return -1;
+ }
+
+ if (file->len < sizeof (struct stat_file)) {
+ msg_info ("file %s is too short to be stat file: %z", file->filename, file->len);
+ return -1;
+ }
+
+ f = (struct stat_file *)file->map;
+ c = f->header.magic;
+ /* Check magic and version */
+ if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') {
+ msg_info ("file %s is invalid stat file", file->filename);
+ return -1;
+ }
+ /* Now check version and convert old version to new one (that can be used for sync */
+ if (*c == 1 && *(c + 1) == 0) {
+ if (!convert_statfile_10 (file)) {
+ return -1;
+ }
+ f = (struct stat_file *)file->map;
+ }
+ else if (memcmp (c, valid_version, sizeof (valid_version)) != 0) {
+ /* Unknown version */
+ msg_info ("file %s has invalid version %c.%c", file->filename, '0' + *c, '0' + *(c + 1));
+ return -1;
+ }
+
+ /* Check first section and set new offset */
+ file->cur_section.code = f->section.code;
+ file->cur_section.length = f->section.length;
+ if (file->cur_section.length * sizeof (struct stat_file_block) > file->len) {
+ msg_info ("file %s is truncated: %z, must be %z", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block));
+ return -1;
+ }
+ file->seek_pos = sizeof (struct stat_file) - sizeof (struct stat_file_block);
+
+ return 0;
+}
+
+
+statfile_pool_t *
+statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock)
+{
+ statfile_pool_t *new;
+
+ new = rspamd_mempool_alloc0 (pool, sizeof (statfile_pool_t));
+ new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ new->files = rspamd_mempool_alloc0 (new->pool, STATFILES_MAX * sizeof (stat_file_t));
+ new->lock = rspamd_mempool_get_mutex (new->pool);
+ new->mlock_ok = use_mlock;
+
+ return new;
+}
+
+static stat_file_t *
+statfile_pool_reindex (statfile_pool_t * pool, gchar *filename, size_t old_size, size_t size)
+{
+ gchar *backup;
+ gint fd;
+ stat_file_t *new;
+ u_char *map, *pos;
+ struct stat_file_block *block;
+ struct stat_file_header *header;
+
+ if (size <
+ sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) {
+ msg_err ("file %s is too small to carry any statistic: %z", filename, size);
+ return NULL;
+ }
+
+ /* First of all rename old file */
+ rspamd_mempool_lock_mutex (pool->lock);
+
+ backup = g_strconcat (filename, ".old", NULL);
+ if (rename (filename, backup) == -1) {
+ msg_err ("cannot rename %s to %s: %s", filename, backup, strerror (errno));
+ g_free (backup);
+ rspamd_mempool_unlock_mutex (pool->lock);
+ return NULL;
+ }
+
+ rspamd_mempool_unlock_mutex (pool->lock);
+
+ /* Now create new file with required size */
+ if (statfile_pool_create (pool, filename, size) != 0) {
+ msg_err ("cannot create new file");
+ g_free (backup);
+ return NULL;
+ }
+ /* Now open new file and start copying */
+ fd = open (backup, O_RDONLY);
+ new = statfile_pool_open (pool, filename, size, TRUE);
+
+ if (fd == -1 || new == NULL) {
+ msg_err ("cannot open file: %s", strerror (errno));
+ g_free (backup);
+ return NULL;
+ }
+
+ /* Now start reading blocks from old statfile */
+ if ((map = mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ msg_err ("cannot mmap file: %s", strerror (errno));
+ close (fd);
+ g_free (backup);
+ return NULL;
+ }
+
+ pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block));
+ while (old_size - (pos - map) >= sizeof (struct stat_file_block)) {
+ block = (struct stat_file_block *)pos;
+ if (block->hash1 != 0 && block->value != 0) {
+ statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, 0, block->value, FALSE);
+ }
+ pos += sizeof (block);
+ }
+
+ header = (struct stat_file_header *)map;
+ statfile_set_revision (new, header->revision, header->rev_time);
+
+ munmap (map, old_size);
+ close (fd);
+ unlink (backup);
+ g_free (backup);
+
+ return new;
+
+}
+
+/*
+ * Pre-load mmaped file into memory
+ */
+static void
+statfile_preload (stat_file_t *file)
+{
+ guint8 *pos, *end;
+ volatile guint8 t;
+ gsize size;
+
+ pos = (guint8 *)file->map;
+ end = (guint8 *)file->map + file->len;
+
+ if (madvise (pos, end - pos, MADV_SEQUENTIAL) == -1) {
+ msg_info ("madvise failed: %s", strerror (errno));
+ }
+ else {
+ /* Load pages of file */
+#ifdef HAVE_GETPAGESIZE
+ size = getpagesize ();
+#else
+ size = sysconf (_SC_PAGESIZE);
+#endif
+ while (pos < end) {
+ t = *pos;
+ (void)t;
+ pos += size;
+ }
+ }
+}
+
+stat_file_t *
+statfile_pool_open (statfile_pool_t * pool, gchar *filename, size_t size, gboolean forced)
+{
+ struct stat st;
+ stat_file_t *new_file;
+
+ if ((new_file = statfile_pool_is_open (pool, filename)) != NULL) {
+ return new_file;
+ }
+
+ if (pool->opened >= STATFILES_MAX - 1) {
+ msg_err ("reached hard coded limit of statfiles opened: %d", STATFILES_MAX);
+ return NULL;
+ }
+
+ if (stat (filename, &st) == -1) {
+ msg_info ("cannot stat file %s, error %s, %d", filename, strerror (errno), errno);
+ return NULL;
+ }
+
+ rspamd_mempool_lock_mutex (pool->lock);
+ if (!forced && labs (size - st.st_size) > (long)sizeof (struct stat_file) * 2
+ && size > sizeof (struct stat_file)) {
+ rspamd_mempool_unlock_mutex (pool->lock);
+ msg_warn ("need to reindex statfile old size: %Hz, new size: %Hz", (size_t)st.st_size, size);
+ return statfile_pool_reindex (pool, filename, st.st_size, size);
+ }
+ else if (size < sizeof (struct stat_file)) {
+ msg_err ("requested to shrink statfile to %Hz but it is too small", size);
+ }
+
+ new_file = &pool->files[pool->opened++];
+ bzero (new_file, sizeof (stat_file_t));
+ if ((new_file->fd = open (filename, O_RDWR)) == -1) {
+ msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno));
+ rspamd_mempool_unlock_mutex (pool->lock);
+ pool->opened--;
+ return NULL;
+ }
+
+ if ((new_file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, new_file->fd, 0)) == MAP_FAILED) {
+ close (new_file->fd);
+ rspamd_mempool_unlock_mutex (pool->lock);
+ msg_info ("cannot mmap file %s, error %d, %s", filename, errno, strerror (errno));
+ pool->opened--;
+ return NULL;
+
+ }
+
+ rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename));
+ new_file->len = st.st_size;
+ /* Try to lock pages in RAM */
+ if (pool->mlock_ok) {
+ if (mlock (new_file->map, new_file->len) == -1) {
+ msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno));
+ pool->mlock_ok = FALSE;
+ }
+ }
+ /* Acquire lock for this operation */
+ lock_file (new_file->fd, FALSE);
+ if (statfile_pool_check (new_file) == -1) {
+ pool->opened--;
+ rspamd_mempool_unlock_mutex (pool->lock);
+ unlock_file (new_file->fd, FALSE);
+ munmap (new_file->map, st.st_size);
+ return NULL;
+ }
+ unlock_file (new_file->fd, FALSE);
+
+ new_file->open_time = time (NULL);
+ new_file->access_time = new_file->open_time;
+ new_file->lock = rspamd_mempool_get_mutex (pool->pool);
+
+ statfile_preload (new_file);
+
+ rspamd_mempool_unlock_mutex (pool->lock);
+
+ return statfile_pool_is_open (pool, filename);
+}
+
+gint
+statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_sorted)
+{
+ stat_file_t *pos;
+
+ if ((pos = statfile_pool_is_open (pool, file->filename)) == NULL) {
+ msg_info ("file %s is not opened", file->filename);
+ return -1;
+ }
+
+ rspamd_mempool_lock_mutex (pool->lock);
+
+ if (file->map) {
+ msg_info ("syncing statfile %s", file->filename);
+ msync (file->map, file->len, MS_ASYNC);
+ munmap (file->map, file->len);
+ }
+ if (file->fd != -1) {
+ close (file->fd);
+ }
+ /* Move the remain statfiles */
+ memmove (pos, ((guint8 *)pos) + sizeof (stat_file_t),
+ (--pool->opened - (pos - pool->files)) * sizeof (stat_file_t));
+
+ rspamd_mempool_unlock_mutex (pool->lock);
+
+ return 0;
+}
+
+gint
+statfile_pool_create (statfile_pool_t * pool, gchar *filename, size_t size)
+{
+ struct stat_file_header header = {
+ .magic = {'r', 's', 'd'},
+ .version = RSPAMD_STATFILE_VERSION,
+ .padding = {0, 0, 0},
+ .revision = 0,
+ .rev_time = 0,
+ .used_blocks = 0
+ };
+ struct stat_file_section section = {
+ .code = STATFILE_SECTION_COMMON,
+ };
+ struct stat_file_block block = { 0, 0, 0 };
+ gint fd;
+ guint buflen = 0, nblocks;
+ gchar *buf = NULL;
+
+ if (statfile_pool_is_open (pool, filename) != NULL) {
+ msg_info ("file %s is already opened", filename);
+ return 0;
+ }
+
+ if (size <
+ sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) {
+ msg_err ("file %s is too small to carry any statistic: %z", filename, size);
+ return -1;
+ }
+
+ rspamd_mempool_lock_mutex (pool->lock);
+ nblocks = (size - sizeof (struct stat_file_header) - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block);
+ header.total_blocks = nblocks;
+
+ if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno));
+ rspamd_mempool_unlock_mutex (pool->lock);
+ return -1;
+ }
+
+ rspamd_fallocate (fd, 0, sizeof (header) + sizeof (section) + sizeof (block) * nblocks);
+
+ header.create_time = (guint64) time (NULL);
+ if (write (fd, &header, sizeof (header)) == -1) {
+ msg_info ("cannot write header to file %s, error %d, %s", filename, errno, strerror (errno));
+ close (fd);
+ rspamd_mempool_unlock_mutex (pool->lock);
+ return -1;
+ }
+
+ section.length = (guint64) nblocks;
+ if (write (fd, &section, sizeof (section)) == -1) {
+ msg_info ("cannot write section header to file %s, error %d, %s", filename, errno, strerror (errno));
+ close (fd);
+ rspamd_mempool_unlock_mutex (pool->lock);
+ return -1;
+ }
+
+ /* Buffer for write 256 blocks at once */
+ if (nblocks > 256) {
+ buflen = sizeof (block) * 256;
+ buf = g_malloc0 (buflen);
+ }
+
+ while (nblocks) {
+ if (nblocks > 256) {
+ /* Just write buffer */
+ if (write (fd, buf, buflen) == -1) {
+ msg_info ("cannot write blocks buffer to file %s, error %d, %s", filename, errno, strerror (errno));
+ close (fd);
+ rspamd_mempool_unlock_mutex (pool->lock);
+ g_free (buf);
+ return -1;
+ }
+ nblocks -= 256;
+ }
+ else {
+ if (write (fd, &block, sizeof (block)) == -1) {
+ msg_info ("cannot write block to file %s, error %d, %s", filename, errno, strerror (errno));
+ close (fd);
+ if (buf) {
+ g_free (buf);
+ }
+ rspamd_mempool_unlock_mutex (pool->lock);
+ return -1;
+ }
+ nblocks --;
+ }
+ }
+
+ close (fd);
+ rspamd_mempool_unlock_mutex (pool->lock);
+
+ if (buf) {
+ g_free (buf);
+ }
+
+ return 0;
+}
+
+void
+statfile_pool_delete (statfile_pool_t * pool)
+{
+ gint i;
+
+ for (i = 0; i < pool->opened; i++) {
+ statfile_pool_close (pool, &pool->files[i], FALSE);
+ }
+ rspamd_mempool_delete (pool->pool);
+}
+
+void
+statfile_pool_lock_file (statfile_pool_t * pool, stat_file_t * file)
+{
+
+ rspamd_mempool_lock_mutex (file->lock);
+}
+
+void
+statfile_pool_unlock_file (statfile_pool_t * pool, stat_file_t * file)
+{
+
+ rspamd_mempool_unlock_mutex (file->lock);
+}
+
+double
+statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now)
+{
+ struct stat_file_block *block;
+ guint i, blocknum;
+ u_char *c;
+
+
+ file->access_time = now;
+ if (!file->map) {
+ return 0;
+ }
+
+ blocknum = h1 % file->cur_section.length;
+ c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block);
+ block = (struct stat_file_block *)c;
+
+ for (i = 0; i < CHAIN_LENGTH; i++) {
+ if (i + blocknum >= file->cur_section.length) {
+ break;
+ }
+ if (block->hash1 == h1 && block->hash2 == h2) {
+ return block->value;
+ }
+ c += sizeof (struct stat_file_block);
+ block = (struct stat_file_block *)c;
+ }
+
+
+ return 0;
+}
+
+static void
+statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t t, double value, gboolean from_now)
+{
+ struct stat_file_block *block, *to_expire = NULL;
+ struct stat_file_header *header;
+ guint i, blocknum;
+ u_char *c;
+ double min = G_MAXDOUBLE;
+
+ if (from_now) {
+ file->access_time = t;
+ }
+ if (!file->map) {
+ return;
+ }
+
+ blocknum = h1 % file->cur_section.length;
+ header = (struct stat_file_header *)file->map;
+ c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block);
+ block = (struct stat_file_block *)c;
+
+ for (i = 0; i < CHAIN_LENGTH; i++) {
+ if (i + blocknum >= file->cur_section.length) {
+ /* Need to expire some block in chain */
+ msg_info ("chain %ud is full in statfile %s, starting expire", blocknum, file->filename);
+ break;
+ }
+ /* First try to find block in chain */
+ if (block->hash1 == h1 && block->hash2 == h2) {
+ block->value = value;
+ return;
+ }
+ /* Check whether we have a free block in chain */
+ if (block->hash1 == 0 && block->hash2 == 0) {
+ /* Write new block here */
+ msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", i, blocknum, h1, h2);
+ block->hash1 = h1;
+ block->hash2 = h2;
+ block->value = value;
+ header->used_blocks ++;
+
+ return;
+ }
+
+ /* Expire block with minimum value otherwise */
+ if (block->value < min) {
+ to_expire = block;
+ min = block->value;
+ }
+ c += sizeof (struct stat_file_block);
+ block = (struct stat_file_block *)c;
+ }
+
+ /* Try expire some block */
+ if (to_expire) {
+ block = to_expire;
+ }
+ else {
+ /* Expire first block in chain */
+ c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block);
+ block = (struct stat_file_block *)c;
+ }
+
+ block->hash1 = h1;
+ block->hash2 = h2;
+ block->value = value;
+}
+
+void
+statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now, double value)
+{
+ statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE);
+}
+
+stat_file_t *
+statfile_pool_is_open (statfile_pool_t * pool, gchar *filename)
+{
+ static stat_file_t f, *ret;
+ rspamd_strlcpy (f.filename, filename, sizeof (f.filename));
+ ret = lfind (&f, pool->files, (size_t *)&pool->opened, sizeof (stat_file_t), cmpstatfile);
+ return ret;
+}
+
+guint32
+statfile_pool_get_section (statfile_pool_t * pool, stat_file_t * file)
+{
+
+ return file->cur_section.code;
+}
+
+gboolean
+statfile_pool_set_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, gboolean from_begin)
+{
+ struct stat_file_section *sec;
+ off_t cur_offset;
+
+
+ /* Try to find section */
+ if (from_begin) {
+ cur_offset = sizeof (struct stat_file_header);
+ }
+ else {
+ cur_offset = file->seek_pos - sizeof (struct stat_file_section);
+ }
+ while (cur_offset < (off_t)file->len) {
+ sec = (struct stat_file_section *)((gchar *)file->map + cur_offset);
+ if (sec->code == code) {
+ file->cur_section.code = code;
+ file->cur_section.length = sec->length;
+ file->seek_pos = cur_offset + sizeof (struct stat_file_section);
+ return TRUE;
+ }
+ cur_offset += sec->length;
+ }
+
+ return FALSE;
+}
+
+gboolean
+statfile_pool_add_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, guint64 length)
+{
+ struct stat_file_section sect;
+ struct stat_file_block block = { 0, 0, 0 };
+
+ if (lseek (file->fd, 0, SEEK_END) == -1) {
+ msg_info ("cannot lseek file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+ sect.code = code;
+ sect.length = length;
+
+ if (write (file->fd, &sect, sizeof (sect)) == -1) {
+ msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+
+ while (length--) {
+ if (write (file->fd, &block, sizeof (block)) == -1) {
+ msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+ }
+
+ /* Lock statfile to remap memory */
+ statfile_pool_lock_file (pool, file);
+ munmap (file->map, file->len);
+ fsync (file->fd);
+ file->len += length;
+
+ if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == NULL) {
+ msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno));
+ return FALSE;
+ }
+ statfile_pool_unlock_file (pool, file);
+
+ return TRUE;
+
+}
+
+guint32
+statfile_get_section_by_name (const gchar *name)
+{
+ if (g_ascii_strcasecmp (name, "common") == 0) {
+ return STATFILE_SECTION_COMMON;
+ }
+ else if (g_ascii_strcasecmp (name, "header") == 0) {
+ return STATFILE_SECTION_HEADERS;
+ }
+ else if (g_ascii_strcasecmp (name, "url") == 0) {
+ return STATFILE_SECTION_URLS;
+ }
+ else if (g_ascii_strcasecmp (name, "regexp") == 0) {
+ return STATFILE_SECTION_REGEXP;
+ }
+
+ return 0;
+}
+
+gboolean
+statfile_set_revision (stat_file_t *file, guint64 rev, time_t time)
+{
+ struct stat_file_header *header;
+
+ if (file == NULL || file->map == NULL) {
+ return FALSE;
+ }
+
+ header = (struct stat_file_header *)file->map;
+
+ header->revision = rev;
+ header->rev_time = time;
+
+ return TRUE;
+}
+
+gboolean
+statfile_inc_revision (stat_file_t *file)
+{
+ struct stat_file_header *header;
+
+ if (file == NULL || file->map == NULL) {
+ return FALSE;
+ }
+
+ header = (struct stat_file_header *)file->map;
+
+ header->revision ++;
+
+ return TRUE;
+}
+
+gboolean
+statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time)
+{
+ struct stat_file_header *header;
+
+ if (file == NULL || file->map == NULL) {
+ return FALSE;
+ }
+
+ header = (struct stat_file_header *)file->map;
+
+ if (rev != NULL) {
+ *rev = header->revision;
+ }
+ if (time != NULL) {
+ *time = header->rev_time;
+ }
+
+ return TRUE;
+}
+
+guint64
+statfile_get_used_blocks (stat_file_t *file)
+{
+ struct stat_file_header *header;
+
+ if (file == NULL || file->map == NULL) {
+ return (guint64)-1;
+ }
+
+ header = (struct stat_file_header *)file->map;
+
+ return header->used_blocks;
+}
+
+guint64
+statfile_get_total_blocks (stat_file_t *file)
+{
+ struct stat_file_header *header;
+
+ if (file == NULL || file->map == NULL) {
+ return (guint64)-1;
+ }
+
+ header = (struct stat_file_header *)file->map;
+
+ /* If total blocks is 0 we have old version of header, so set total blocks correctly */
+ if (header->total_blocks == 0) {
+ header->total_blocks = file->cur_section.length;
+ }
+
+ return header->total_blocks;
+}
+
+static void
+statfile_pool_invalidate_callback (gint fd, short what, void *ud)
+{
+ statfile_pool_t *pool = ud;
+ stat_file_t *file;
+ gint i;
+
+ msg_info ("invalidating %d statfiles", pool->opened);
+
+ for (i = 0; i < pool->opened; i ++) {
+ file = &pool->files[i];
+ msync (file->map, file->len, MS_ASYNC);
+ }
+
+}
+
+
+void
+statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter)
+{
+ gboolean pending;
+
+
+ if (pool->invalidate_event != NULL) {
+ pending = evtimer_pending (pool->invalidate_event, NULL);
+ if (pending) {
+ /* Replan event */
+ pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter);
+ pool->invalidate_tv.tv_usec = 0;
+ evtimer_add (pool->invalidate_event, &pool->invalidate_tv);
+ }
+ }
+ else {
+ pool->invalidate_event = rspamd_mempool_alloc (pool->pool, sizeof (struct event));
+ pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter);
+ pool->invalidate_tv.tv_usec = 0;
+ evtimer_set (pool->invalidate_event, statfile_pool_invalidate_callback, pool);
+ evtimer_add (pool->invalidate_event, &pool->invalidate_tv);
+ msg_info ("invalidate of statfile pool is planned in %d seconds", (gint)pool->invalidate_tv.tv_sec);
+ }
+}
+
+
+stat_file_t *
+get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf,
+ const gchar *symbol, struct statfile **st, gboolean try_create)
+{
+ stat_file_t *res = NULL;
+ GList *cur;
+
+ if (pool == NULL || ccf == NULL || symbol == NULL) {
+ msg_err ("invalid input arguments");
+ return NULL;
+ }
+
+ cur = g_list_first (ccf->statfiles);
+ while (cur) {
+ *st = cur->data;
+ if (strcmp (symbol, (*st)->symbol) == 0) {
+ break;
+ }
+ *st = NULL;
+ cur = g_list_next (cur);
+ }
+ if (*st == NULL) {
+ msg_info ("cannot find statfile with symbol %s", symbol);
+ return NULL;
+ }
+
+ if ((res = statfile_pool_is_open (pool, (*st)->path)) == NULL) {
+ if ((res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE)) == NULL) {
+ msg_warn ("cannot open %s", (*st)->path);
+ if (try_create) {
+ if (statfile_pool_create (pool, (*st)->path, (*st)->size) == -1) {
+ msg_err ("cannot create statfile %s", (*st)->path);
+ return NULL;
+ }
+ res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE);
+ if (res == NULL) {
+ msg_err ("cannot open statfile %s after creation", (*st)->path);
+ }
+ }
+ }
+ }
+
+ return res;
+}
+
+void
+statfile_pool_lockall (statfile_pool_t *pool)
+{
+ stat_file_t *file;
+ gint i;
+
+ if (pool->mlock_ok) {
+ for (i = 0; i < pool->opened; i ++) {
+ file = &pool->files[i];
+ if (mlock (file->map, file->len) == -1) {
+ msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno));
+ pool->mlock_ok = FALSE;
+ return;
+ }
+ }
+ }
+ /* Do not try to lock if mlock failed */
+}
+
diff --git a/src/libserver/statfile.h b/src/libserver/statfile.h
new file mode 100644
index 000000000..5786c4927
--- /dev/null
+++ b/src/libserver/statfile.h
@@ -0,0 +1,284 @@
+/**
+ * @file statfile.h
+ * Describes common methods for accessing statistics files and caching them in memory
+ */
+
+#ifndef RSPAMD_STATFILE_H
+#define RSPAMD_STATFILE_H
+
+#include "config.h"
+#include "mem_pool.h"
+#include "hash.h"
+
+#define CHAIN_LENGTH 128
+
+/* Section types */
+#define STATFILE_SECTION_COMMON 1
+#define STATFILE_SECTION_HEADERS 2
+#define STATFILE_SECTION_URLS 3
+#define STATFILE_SECTION_REGEXP 4
+
+#define DEFAULT_STATFILE_INVALIDATE_TIME 30
+#define DEFAULT_STATFILE_INVALIDATE_JITTER 30
+
+/**
+ * Common statfile header
+ */
+struct stat_file_header {
+ u_char magic[3]; /**< magic signature ('r' 's' 'd') */
+ u_char version[2]; /**< version of statfile */
+ u_char padding[3]; /**< padding */
+ guint64 create_time; /**< create time (time_t->guint64) */
+ guint64 revision; /**< revision number */
+ guint64 rev_time; /**< revision time */
+ guint64 used_blocks; /**< used blocks number */
+ guint64 total_blocks; /**< total number of blocks */
+ u_char unused[239]; /**< some bytes that can be used in future */
+};
+
+/**
+ * Section header
+ */
+struct stat_file_section {
+ guint64 code; /**< section's code */
+ guint64 length; /**< section's length in blocks */
+};
+
+/**
+ * Block of data in statfile
+ */
+struct stat_file_block {
+ guint32 hash1; /**< hash1 (also acts as index) */
+ guint32 hash2; /**< hash2 */
+ double value; /**< double value */
+};
+
+/**
+ * Statistic file
+ */
+struct stat_file {
+ struct stat_file_header header; /**< header */
+ struct stat_file_section section; /**< first section */
+ struct stat_file_block blocks[1]; /**< first block of data */
+};
+
+/**
+ * Common view of statfile object
+ */
+typedef struct stat_file_s {
+#ifdef HAVE_PATH_MAX
+ gchar filename[PATH_MAX]; /**< name of file */
+#else
+ gchar filename[MAXPATHLEN]; /**< name of file */
+#endif
+ gint fd; /**< descriptor */
+ void *map; /**< mmaped area */
+ off_t seek_pos; /**< current seek position */
+ struct stat_file_section cur_section; /**< current section */
+ time_t open_time; /**< time when file was opened */
+ time_t access_time; /**< last access time */
+ size_t len; /**< length of file(in bytes) */
+ rspamd_mempool_mutex_t *lock; /**< mutex */
+} stat_file_t;
+
+/**
+ * Statfiles pool
+ */
+typedef struct statfile_pool_s {
+ stat_file_t *files; /**< hash table of opened files indexed by name */
+ void **maps; /**< shared hash table of mmaped areas indexed by name */
+ gint opened; /**< number of opened files */
+ rspamd_mempool_t *pool; /**< memory pool object */
+ rspamd_mempool_mutex_t *lock; /**< mutex */
+ struct event *invalidate_event; /**< event for pool invalidation */
+ struct timeval invalidate_tv;
+ gboolean mlock_ok; /**< whether it is possible to use mlock (2) to avoid statfiles unloading */
+} statfile_pool_t;
+
+/* Forwarded declarations */
+struct classifier_config;
+struct statfile;
+
+/**
+ * Create new statfile pool
+ * @param max_size maximum size
+ * @return statfile pool object
+ */
+statfile_pool_t* statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock);
+
+/**
+ * Open statfile and attach it to pool
+ * @param pool statfile pool object
+ * @param filename name of statfile to open
+ * @return 0 if specified statfile is attached and -1 in case of error
+ */
+stat_file_t* statfile_pool_open (statfile_pool_t *pool, gchar *filename, size_t len, gboolean forced);
+
+/**
+ * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching
+ * @param pool statfile pool object
+ * @param filename name of statfile to create
+ * @param len length of new statfile
+ * @return 0 if file was created and -1 in case of error
+ */
+gint statfile_pool_create (statfile_pool_t *pool, gchar *filename, size_t len);
+
+/**
+ * Close specified statfile
+ * @param pool statfile pool object
+ * @param filename name of statfile to close
+ * @param remove_hash remove filename from opened files hash also
+ * @return 0 if file was closed and -1 if statfile was not opened
+ */
+gint statfile_pool_close (statfile_pool_t *pool, stat_file_t *file, gboolean keep_sorted);
+
+/**
+ * Delete statfile pool and close all attached statfiles
+ * @param pool statfile pool object
+ */
+void statfile_pool_delete (statfile_pool_t *pool);
+
+/**
+ * Try to lock all statfiles in memory
+ * @param pool statfile pool object
+ */
+void statfile_pool_lockall (statfile_pool_t *pool);
+
+/**
+ * Lock specified file for exclusive use (eg. learning)
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ */
+void statfile_pool_lock_file (statfile_pool_t *pool, stat_file_t *file);
+
+/**
+ * Unlock specified file
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ */
+void statfile_pool_unlock_file (statfile_pool_t *pool, stat_file_t *file);
+
+/**
+ * Get block from statfile with h1 and h2 values, use time argument for current time
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ * @param h1 h1 in file
+ * @param h2 h2 in file
+ * @param now current time
+ * @return block value or 0 if block is not found
+ */
+double statfile_pool_get_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now);
+
+/**
+ * Set specified block in statfile
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ * @param h1 h1 in file
+ * @param h2 h2 in file
+ * @param now current time
+ * @param value value of block
+ */
+void statfile_pool_set_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now, double value);
+
+/**
+ * Check whether statfile is opened
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ * @return TRUE if specified statfile is opened and FALSE otherwise
+ */
+stat_file_t* statfile_pool_is_open (statfile_pool_t *pool, gchar *filename);
+
+/**
+ * Returns current statfile section
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ * @return code of section or 0 if file is not opened
+ */
+guint32 statfile_pool_get_section (statfile_pool_t *pool, stat_file_t *file);
+
+/**
+ * Go to other section of statfile
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ * @param code code of section to seek to
+ * @param from_begin search for section from begin of file if true
+ * @return TRUE if section was set and FALSE otherwise
+ */
+gboolean statfile_pool_set_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, gboolean from_begin);
+
+/**
+ * Add new section to statfile
+ * @param pool statfile pool object
+ * @param filename name of statfile
+ * @param code code of section to seek to
+ * @param length length in blocks of new section
+ * @return TRUE if section was successfully added and FALSE in case of error
+ */
+gboolean statfile_pool_add_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, guint64 length);
+
+
+/**
+ * Return code of section identified by name
+ * @param name name of section
+ * @return code of section or 0 if name of section is unknown
+ */
+guint32 statfile_get_section_by_name (const gchar *name);
+
+/**
+ * Set statfile revision and revision time
+ * @param filename name of statfile
+ * @param revision number of revision
+ * @param time time of revision
+ * @return TRUE if revision was set
+ */
+gboolean statfile_set_revision (stat_file_t *file, guint64 rev, time_t time);
+
+/**
+ * Increment statfile revision and revision time
+ * @param filename name of statfile
+ * @param time time of revision
+ * @return TRUE if revision was set
+ */
+gboolean statfile_inc_revision (stat_file_t *file);
+
+/**
+ * Set statfile revision and revision time
+ * @param filename name of statfile
+ * @param revision saved number of revision
+ * @param time saved time of revision
+ * @return TRUE if revision was saved in rev and time
+ */
+gboolean statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time);
+
+/**
+ * Get statfile used blocks
+ * @param file file to get number of used blocks
+ * @return number of used blocks or (guint64)-1 in case of error
+ */
+guint64 statfile_get_used_blocks (stat_file_t *file);
+
+/**
+ * Get statfile total blocks
+ * @param file file to get number of used blocks
+ * @return number of used blocks or (guint64)-1 in case of error
+ */
+guint64 statfile_get_total_blocks (stat_file_t *file);
+
+
+/**
+ * Plan statfile pool invalidation
+ */
+void statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter);
+
+/**
+ * Get a statfile by symbol
+ * @param pool pool object
+ * @param ccf ccf classifier config
+ * @param symbol symbol to search
+ * @param st statfile to get
+ * @param try_create whether we need to create statfile if it is absent
+ */
+stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf,
+ const gchar *symbol, struct statfile **st, gboolean try_create);
+
+#endif
diff --git a/src/libserver/statfile_sync.c b/src/libserver/statfile_sync.c
new file mode 100644
index 000000000..6b545af17
--- /dev/null
+++ b/src/libserver/statfile_sync.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "cfg_file.h"
+#include "tokenizers/tokenizers.h"
+#include "classifiers/classifiers.h"
+#include "statfile.h"
+#include "binlog.h"
+#include "buffer.h"
+#include "statfile_sync.h"
+
+enum rspamd_sync_state {
+ SYNC_STATE_GREETING,
+ SYNC_STATE_READ_LINE,
+ SYNC_STATE_READ_REV,
+ SYNC_STATE_QUIT,
+};
+
+/* Context of sync process */
+struct rspamd_sync_ctx {
+ struct statfile *st;
+ stat_file_t *real_statfile;
+ statfile_pool_t *pool;
+ rspamd_io_dispatcher_t *dispatcher;
+ struct event_base *ev_base;
+
+ struct event tm_ev;
+
+ struct timeval interval;
+ struct timeval io_tv;
+ gint sock;
+ guint32 timeout;
+ guint32 sync_interval;
+ enum rspamd_sync_state state;
+ gboolean is_busy;
+
+ guint64 new_rev;
+ guint64 new_time;
+ guint64 new_len;
+};
+
+static void
+log_next_sync (const gchar *symbol, time_t delay)
+{
+ gchar outstr[200];
+ time_t t;
+ struct tm *tmp;
+ gint r;
+
+ t = time(NULL);
+ t += delay;
+ tmp = localtime(&t);
+
+ if (tmp) {
+ r = rspamd_snprintf (outstr, sizeof (outstr), "statfile_sync: next sync of %s at ", symbol);
+ if ((r = strftime(outstr + r, sizeof(outstr) - r, "%T", tmp)) != 0) {
+ msg_info (outstr);
+ }
+ }
+}
+
+static gboolean
+parse_revision_line (struct rspamd_sync_ctx *ctx, f_str_t *in)
+{
+ guint i, state = 0;
+ gchar *p, *c, numbuf[sizeof("18446744073709551615")];
+ guint64 *val;
+
+ /* First of all try to find END line */
+ if (in->len >= sizeof ("END") - 1 && memcmp (in->begin, "END", sizeof ("END") - 1) == 0) {
+ ctx->state = SYNC_STATE_QUIT;
+ ctx->is_busy = FALSE;
+ return TRUE;
+ }
+
+ /* Next check for error line */
+ if (in->len >= sizeof ("FAIL") - 1 && memcmp (in->begin, "FAIL", sizeof ("FAIL") - 1) == 0) {
+ ctx->state = SYNC_STATE_QUIT;
+ ctx->is_busy = FALSE;
+ return TRUE;
+ }
+
+ /* Now try to extract 3 numbers from string: revision, time and length */
+ p = in->begin;
+ val = &ctx->new_rev;
+ for (i = 0; i < in->len; i ++, p ++) {
+ if (g_ascii_isspace (*p) || i == in->len - 1) {
+ if (state == 1) {
+ if (i == in->len - 1) {
+ /* One more character */
+ p ++;
+ }
+ rspamd_strlcpy (numbuf, c, MIN (p - c + 1, (gint)sizeof (numbuf)));
+ errno = 0;
+ *val = strtoull (numbuf, NULL, 10);
+ if (errno != 0) {
+ msg_info ("cannot parse number %s", strerror (errno));
+ return FALSE;
+ }
+ state = 2;
+ }
+ }
+ else {
+ if (state == 0) {
+ c = p;
+ state = 1;
+ }
+ else if (state == 2) {
+ if (val == &ctx->new_rev) {
+ val = &ctx->new_time;
+ }
+ else if (val == &ctx->new_time) {
+ val = &ctx->new_len;
+ }
+ c = p;
+ state = 1;
+ }
+ }
+ }
+
+ /* Current value must be len value and its value must not be 0 */
+ return ((val == &ctx->new_len));
+}
+
+static gboolean
+read_blocks (struct rspamd_sync_ctx *ctx, f_str_t *in)
+{
+ struct rspamd_binlog_element *elt;
+ guint i;
+
+ statfile_pool_lock_file (ctx->pool, ctx->real_statfile);
+ elt = (struct rspamd_binlog_element *)in->begin;
+ for (i = 0; i < in->len / sizeof (struct rspamd_binlog_element); i ++, elt ++) {
+ statfile_pool_set_block (ctx->pool, ctx->real_statfile, elt->h1, elt->h2, ctx->new_time, elt->value);
+ }
+ statfile_pool_unlock_file (ctx->pool, ctx->real_statfile);
+
+ return TRUE;
+}
+
+static gboolean
+sync_read (f_str_t * in, void *arg)
+{
+ struct rspamd_sync_ctx *ctx = arg;
+ gchar buf[256];
+ guint64 rev = 0;
+ time_t ti = 0;
+
+ if (in->len == 0) {
+ /* Skip empty lines */
+ return TRUE;
+ }
+ switch (ctx->state) {
+ case SYNC_STATE_GREETING:
+ /* Skip greeting line and write sync command */
+ /* Write initial data */
+ statfile_get_revision (ctx->real_statfile, &rev, &ti);
+ rev = rspamd_snprintf (buf, sizeof (buf), "sync %s %uL %T" CRLF, ctx->st->symbol, rev, ti);
+ ctx->state = SYNC_STATE_READ_LINE;
+ return rspamd_dispatcher_write (ctx->dispatcher, buf, rev, FALSE, FALSE);
+ break;
+ case SYNC_STATE_READ_LINE:
+ /* Try to parse line from server */
+ if (!parse_revision_line (ctx, in)) {
+ msg_info ("cannot parse line of length %z: '%*s'", in->len, (gint)in->len, in->begin);
+ close (ctx->sock);
+ rspamd_remove_dispatcher (ctx->dispatcher);
+ ctx->is_busy = FALSE;
+ return FALSE;
+ }
+ else if (ctx->state != SYNC_STATE_QUIT) {
+ if (ctx->new_len > 0) {
+ ctx->state = SYNC_STATE_READ_REV;
+ rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_CHARACTER, ctx->new_len);
+ }
+ }
+ else {
+ /* Quit this session */
+ msg_info ("sync ended for: %s", ctx->st->symbol);
+ close (ctx->sock);
+ rspamd_remove_dispatcher (ctx->dispatcher);
+ ctx->is_busy = FALSE;
+ /* Immediately return from callback */
+ return FALSE;
+ }
+ break;
+ case SYNC_STATE_READ_REV:
+ /* In now contains all blocks of specified revision, so we can read them directly */
+ if (!read_blocks (ctx, in)) {
+ msg_info ("cannot read blocks");
+ close (ctx->sock);
+ rspamd_remove_dispatcher (ctx->dispatcher);
+ ctx->is_busy = FALSE;
+ return FALSE;
+ }
+ statfile_set_revision (ctx->real_statfile, ctx->new_rev, ctx->new_time);
+ msg_info ("set new revision: %uL, readed %z bytes", ctx->new_rev, in->len);
+ /* Now try to read other revision or END line */
+ ctx->state = SYNC_STATE_READ_LINE;
+ rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_LINE, 0);
+ break;
+ case SYNC_STATE_QUIT:
+ close (ctx->sock);
+ rspamd_remove_dispatcher (ctx->dispatcher);
+ ctx->is_busy = FALSE;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+sync_err (GError *err, void *arg)
+{
+ struct rspamd_sync_ctx *ctx = arg;
+
+ msg_info ("abnormally closing connection, error: %s", err->message);
+ ctx->is_busy = FALSE;
+ close (ctx->sock);
+ rspamd_remove_dispatcher (ctx->dispatcher);
+}
+
+
+static void
+sync_timer_callback (gint fd, short what, void *ud)
+{
+ struct rspamd_sync_ctx *ctx = ud;
+ guint32 jittered_interval;
+
+ /* Plan new event */
+ evtimer_del (&ctx->tm_ev);
+ /* Add some jittering for synchronization */
+ jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2);
+ msec_to_tv (jittered_interval, &ctx->interval);
+ evtimer_add (&ctx->tm_ev, &ctx->interval);
+ log_next_sync (ctx->st->symbol, ctx->interval.tv_sec);
+
+ if (ctx->is_busy) {
+ /* Sync is in progress */
+ msg_info ("syncronization process is in progress, do not start new one");
+ return;
+ }
+
+ if ((ctx->sock = make_universal_socket (ctx->st->binlog->master_addr, ctx->st->binlog->master_port,
+ SOCK_STREAM, TRUE, FALSE, TRUE)) == -1) {
+ msg_info ("cannot connect to %s", ctx->st->binlog->master_addr);
+ return;
+ }
+ /* Now create and activate dispatcher */
+ msec_to_tv (ctx->timeout, &ctx->io_tv);
+ ctx->dispatcher = rspamd_create_dispatcher (ctx->ev_base, ctx->sock, BUFFER_LINE, sync_read, NULL, sync_err, &ctx->io_tv, ctx);
+
+ ctx->state = SYNC_STATE_GREETING;
+ ctx->is_busy = TRUE;
+
+ msg_info ("starting synchronization of %s", ctx->st->symbol);
+
+}
+
+static gboolean
+add_statfile_watch (statfile_pool_t *pool, struct statfile *st, struct config_file *cfg, struct event_base *ev_base)
+{
+ struct rspamd_sync_ctx *ctx;
+ guint32 jittered_interval;
+
+ if (st->binlog->master_addr != NULL) {
+ ctx = rspamd_mempool_alloc (pool->pool, sizeof (struct rspamd_sync_ctx));
+ ctx->st = st;
+ ctx->timeout = cfg->statfile_sync_timeout;
+ ctx->sync_interval = cfg->statfile_sync_interval;
+ ctx->ev_base = ev_base;
+ /* Add some jittering for synchronization */
+ jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2);
+ msec_to_tv (jittered_interval, &ctx->interval);
+ /* Open statfile and attach it to pool */
+ if ((ctx->real_statfile = statfile_pool_is_open (pool, st->path)) == NULL) {
+ if ((ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) {
+ msg_warn ("cannot open %s", st->path);
+ if (statfile_pool_create (pool, st->path, st->size) == -1) {
+ msg_err ("cannot create statfile %s", st->path);
+ return FALSE;
+ }
+ ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE);
+ }
+ }
+ /* Now plan event for it's future executing */
+ evtimer_set (&ctx->tm_ev, sync_timer_callback, ctx);
+ event_base_set (ctx->ev_base, &ctx->tm_ev);
+ evtimer_add (&ctx->tm_ev, &ctx->interval);
+ log_next_sync (st->symbol, ctx->interval.tv_sec);
+ }
+ else {
+ msg_err ("cannot add statfile watch for statfile %s: no master defined", st->symbol);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base)
+{
+ GList *cur, *l;
+ struct classifier_config *cl;
+ struct statfile *st;
+
+ /*
+ * First of all walk through all classifiers and find those statfiles
+ * for which we should do sync (slave affinity)
+ */
+ cur = cfg->classifiers;
+ while (cur) {
+ cl = cur->data;
+ l = cl->statfiles;
+ while (l) {
+ st = l->data;
+ if (st->binlog != NULL && st->binlog->affinity == AFFINITY_SLAVE) {
+ if (!add_statfile_watch (pool, st, cfg, ev_base)) {
+ return FALSE;
+ }
+ }
+ l = g_list_next (l);
+ }
+ cur = g_list_next (cur);
+ }
+
+ return TRUE;
+}
diff --git a/src/libserver/statfile_sync.h b/src/libserver/statfile_sync.h
new file mode 100644
index 000000000..b3abb8b91
--- /dev/null
+++ b/src/libserver/statfile_sync.h
@@ -0,0 +1,14 @@
+#ifndef RSPAMD_STATFILE_SYNC_H
+#define RSPAMD_STATFILE_SYNC_H
+
+#include "config.h"
+#include "main.h"
+#include "statfile.h"
+#include "cfg_file.h"
+
+/*
+ * Start synchronization of statfiles. Must be called after event_init as it adds events
+ */
+gboolean start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base);
+
+#endif
diff --git a/src/libserver/symbols_cache.c b/src/libserver/symbols_cache.c
new file mode 100644
index 000000000..dfca57c66
--- /dev/null
+++ b/src/libserver/symbols_cache.c
@@ -0,0 +1,1055 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "util.h"
+#include "main.h"
+#include "message.h"
+#include "symbols_cache.h"
+#include "cfg_file.h"
+
+#define WEIGHT_MULT 4.0
+#define FREQUENCY_MULT 10.0
+#define TIME_MULT -1.0
+
+/* After which number of messages try to resort cache */
+#define MAX_USES 100
+/*
+ * Symbols cache utility functions
+ */
+
+#define MIN_CACHE 17
+
+static guint64 total_frequency = 0;
+static guint32 nsymbols = 0;
+
+gint
+cache_cmp (const void *p1, const void *p2)
+{
+ const struct cache_item *i1 = p1, *i2 = p2;
+
+ return strcmp (i1->s->symbol, i2->s->symbol);
+}
+
+gint
+cache_logic_cmp (const void *p1, const void *p2)
+{
+ const struct cache_item *i1 = p1, *i2 = p2;
+ double w1, w2;
+ double weight1, weight2;
+ double f1 = 0, f2 = 0;
+
+ if (i1->priority == 0 && i2->priority == 0) {
+ if (total_frequency > 0) {
+ f1 = ((double)i1->s->frequency * nsymbols) / (double)total_frequency;
+ f2 = ((double)i2->s->frequency * nsymbols) / (double)total_frequency;
+ }
+ weight1 = i1->metric_weight == 0 ? i1->s->weight : i1->metric_weight;
+ weight2 = i2->metric_weight == 0 ? i2->s->weight : i2->metric_weight;
+ w1 = abs (weight1) * WEIGHT_MULT + f1 * FREQUENCY_MULT + i1->s->avg_time * TIME_MULT;
+ w2 = abs (weight2) * WEIGHT_MULT + f2 * FREQUENCY_MULT + i2->s->avg_time * TIME_MULT;
+ }
+ else {
+ /* Strict sorting */
+ w1 = abs (i1->priority);
+ w2 = abs (i2->priority);
+ }
+
+ return (gint)w2 - w1;
+}
+
+static GChecksum *
+get_mem_cksum (struct symbols_cache *cache)
+{
+ GChecksum *result;
+ GList *cur, *l;
+ struct cache_item *item;
+
+ result = g_checksum_new (G_CHECKSUM_SHA1);
+
+ l = g_list_copy (cache->negative_items);
+ l = g_list_sort (l, cache_cmp);
+ cur = g_list_first (l);
+ while (cur) {
+ item = cur->data;
+ if (item->s->symbol[0] != '\0') {
+ g_checksum_update (result, item->s->symbol, strlen (item->s->symbol));
+ }
+ cur = g_list_next (cur);
+ }
+ g_list_free (l);
+
+
+ l = g_list_copy (cache->static_items);
+ l = g_list_sort (l, cache_cmp);
+ cur = g_list_first (l);
+ while (cur) {
+ item = cur->data;
+ if (item->s->symbol[0] != '\0') {
+ g_checksum_update (result, item->s->symbol, strlen (item->s->symbol));
+ }
+ total_frequency += item->s->frequency;
+ cur = g_list_next (cur);
+ }
+ g_list_free (l);
+
+ return result;
+}
+
+/* Sort items in logical order */
+static void
+post_cache_init (struct symbols_cache *cache)
+{
+ GList *cur;
+ struct cache_item *item;
+
+ total_frequency = 0;
+ nsymbols = cache->used_items;
+ cur = g_list_first (cache->negative_items);
+ while (cur) {
+ item = cur->data;
+ total_frequency += item->s->frequency;
+ cur = g_list_next (cur);
+ }
+ cur = g_list_first (cache->static_items);
+ while (cur) {
+ item = cur->data;
+ total_frequency += item->s->frequency;
+ cur = g_list_next (cur);
+ }
+
+ cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp);
+ cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp);
+}
+
+/* Unmap cache file */
+static void
+unmap_cache_file (gpointer arg)
+{
+ struct symbols_cache *cache = arg;
+
+ /* A bit ugly usage */
+ munmap (cache->map, cache->used_items * sizeof (struct saved_cache_item));
+}
+
+static gboolean
+mmap_cache_file (struct symbols_cache *cache, gint fd, rspamd_mempool_t *pool)
+{
+ guint8 *map;
+ gint i;
+ GList *cur;
+ struct cache_item *item;
+
+ if (cache->used_items > 0) {
+ map = mmap (NULL, cache->used_items * sizeof (struct saved_cache_item), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (map == MAP_FAILED) {
+ msg_err ("cannot mmap cache file: %d, %s", errno, strerror (errno));
+ close (fd);
+ return FALSE;
+ }
+ /* Close descriptor as it would never be used */
+ close (fd);
+ cache->map = map;
+ /* Now free old values for saved cache items and fill them with mmapped ones */
+ i = 0;
+ cur = g_list_first (cache->negative_items);
+ while (cur) {
+ item = cur->data;
+ item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item));
+ cur = g_list_next (cur);
+ i ++;
+ }
+ cur = g_list_first (cache->static_items);
+ while (cur) {
+ item = cur->data;
+ item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item));
+ cur = g_list_next (cur);
+ i ++;
+ }
+
+ post_cache_init (cache);
+ }
+
+ return TRUE;
+}
+
+/* Fd must be opened for writing, after creating file is mmapped */
+static gboolean
+create_cache_file (struct symbols_cache *cache, const gchar *filename, gint fd, rspamd_mempool_t *pool)
+{
+ GChecksum *cksum;
+ u_char *digest;
+ gsize cklen;
+ GList *cur;
+ struct cache_item *item;
+
+ /* Calculate checksum */
+ cksum = get_mem_cksum (cache);
+ if (cksum == NULL) {
+ msg_err ("cannot calculate checksum for symbols");
+ close (fd);
+ return FALSE;
+ }
+
+ cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1);
+ digest = g_malloc (cklen);
+
+ g_checksum_get_digest (cksum, digest, &cklen);
+ /* Now write data to file */
+ cur = g_list_first (cache->negative_items);
+ while (cur) {
+ item = cur->data;
+ if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) {
+ msg_err ("cannot write to file %d, %s", errno, strerror (errno));
+ close (fd);
+ g_checksum_free (cksum);
+ g_free (digest);
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+ cur = g_list_first (cache->static_items);
+ while (cur) {
+ item = cur->data;
+ if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) {
+ msg_err ("cannot write to file %d, %s", errno, strerror (errno));
+ close (fd);
+ g_checksum_free (cksum);
+ g_free (digest);
+ return FALSE;
+ }
+ cur = g_list_next (cur);
+ }
+ /* Write checksum */
+ if (write (fd, digest, cklen) == -1) {
+ msg_err ("cannot write to file %d, %s", errno, strerror (errno));
+ close (fd);
+ g_checksum_free (cksum);
+ g_free (digest);
+ return FALSE;
+ }
+
+ close (fd);
+ g_checksum_free (cksum);
+ g_free (digest);
+ /* Reopen for reading */
+ if ((fd = open (filename, O_RDWR)) == -1) {
+ msg_info ("cannot open file %s, error %d, %s", errno, strerror (errno));
+ return FALSE;
+ }
+
+ return mmap_cache_file (cache, fd, pool);
+}
+
+enum rspamd_symbol_type {
+ SYMBOL_TYPE_NORMAL,
+ SYMBOL_TYPE_VIRTUAL,
+ SYMBOL_TYPE_CALLBACK
+};
+
+static void
+register_symbol_common (struct symbols_cache **cache, const gchar *name, double weight, gint priority,
+ symbol_func_t func, gpointer user_data, enum rspamd_symbol_type type)
+{
+ struct cache_item *item = NULL;
+ struct symbols_cache *pcache = *cache;
+ GList **target;
+ double *w;
+
+ if (*cache == NULL) {
+ pcache = g_new0 (struct symbols_cache, 1);
+ *cache = pcache;
+ pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ pcache->items_by_symbol = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ }
+
+ item = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct cache_item));
+ item->s = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct saved_cache_item));
+ rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol));
+ item->func = func;
+ item->user_data = user_data;
+ item->priority = priority;
+
+ switch (type) {
+ case SYMBOL_TYPE_NORMAL:
+ break;
+ case SYMBOL_TYPE_VIRTUAL:
+ item->is_virtual = TRUE;
+ break;
+ case SYMBOL_TYPE_CALLBACK:
+ item->is_callback = TRUE;
+ break;
+ }
+
+ /* Handle weight using default metric */
+ if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) {
+ item->s->weight = weight * (*w);
+ }
+ else {
+ item->s->weight = weight;
+ }
+
+ /* If we have undefined priority determine list according to weight */
+ if (priority == 0) {
+ if (item->s->weight > 0) {
+ target = &(*cache)->static_items;
+ }
+ else {
+ target = &(*cache)->negative_items;
+ }
+ }
+ else {
+ /* Items with more priority are called before items with less priority */
+ if (priority < 0) {
+ target = &(*cache)->negative_items;
+ }
+ else {
+ target = &(*cache)->static_items;
+ }
+ }
+
+ pcache->used_items++;
+ g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item);
+ msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name);
+ set_counter (item->s->symbol, 0);
+
+ *target = g_list_prepend (*target, item);
+}
+
+void
+register_symbol (struct symbols_cache **cache, const gchar *name, double weight,
+ symbol_func_t func, gpointer user_data)
+{
+ register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_NORMAL);
+}
+
+void
+register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight)
+{
+ register_symbol_common (cache, name, weight, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL);
+}
+
+void
+register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight,
+ symbol_func_t func, gpointer user_data)
+{
+ register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_CALLBACK);
+}
+
+void
+register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight, gint priority,
+ symbol_func_t func, gpointer user_data)
+{
+ register_symbol_common (cache, name, weight, priority, func, user_data, SYMBOL_TYPE_CALLBACK);
+}
+
+void
+register_dynamic_symbol (rspamd_mempool_t *dynamic_pool, struct symbols_cache **cache,
+ const gchar *name, double weight, symbol_func_t func,
+ gpointer user_data, GList *networks)
+{
+ struct cache_item *item = NULL;
+ struct symbols_cache *pcache = *cache;
+ GList *t, *cur;
+ uintptr_t r;
+ double *w;
+ guint32 mask = 0xFFFFFFFF;
+ struct dynamic_map_item *it;
+ gint rr;
+
+ if (*cache == NULL) {
+ pcache = g_new0 (struct symbols_cache, 1);
+ *cache = pcache;
+ pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+ }
+
+ item = rspamd_mempool_alloc0 (dynamic_pool, sizeof (struct cache_item));
+ item->s = rspamd_mempool_alloc (dynamic_pool, sizeof (struct saved_cache_item));
+ rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol));
+ item->func = func;
+ item->user_data = user_data;
+ /* Handle weight using default metric */
+ if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) {
+ item->s->weight = weight * (*w);
+ }
+ else {
+ item->s->weight = weight;
+ }
+ item->is_dynamic = TRUE;
+ item->priority = 0;
+
+ pcache->used_items++;
+ msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name);
+ set_counter (item->s->symbol, 0);
+
+ g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item);
+
+ if (networks == NULL) {
+ pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item);
+ }
+ else {
+ if (pcache->dynamic_map == NULL) {
+ pcache->dynamic_map = radix_tree_create ();
+ pcache->negative_dynamic_map = radix_tree_create ();
+ }
+ cur = networks;
+ while (cur) {
+ it = cur->data;
+ mask = mask << (32 - it->mask);
+ r = ntohl (it->addr.s_addr & mask);
+ if (it->negative) {
+ /* For negatve items insert into list and into negative cache map */
+ if ((r = radix32tree_find (pcache->negative_dynamic_map, r)) != RADIX_NO_VALUE) {
+ t = (GList *)((gpointer)r);
+ t = g_list_prepend (t, item);
+ /* Replace pointers in radix tree and in destructor function */
+ rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t);
+ rr = radix32tree_replace (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t);
+ if (rr == -1) {
+ msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask);
+ }
+ }
+ else {
+ t = g_list_prepend (NULL, item);
+ rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t);
+ rr = radix32tree_insert (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t);
+ if (rr == -1) {
+ msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask);
+ }
+ else if (rr == 1) {
+ msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask);
+ }
+ }
+ /* Insert into list */
+ pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item);
+ }
+ else {
+ if ((r = radix32tree_find (pcache->dynamic_map, r)) != RADIX_NO_VALUE) {
+ t = (GList *)((gpointer)r);
+ t = g_list_prepend (t, item);
+ /* Replace pointers in radix tree and in destructor function */
+ rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t);
+ rr = radix32tree_replace (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t);
+ if (rr == -1) {
+ msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask);
+ }
+ }
+ else {
+ t = g_list_prepend (NULL, item);
+ rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t);
+ rr = radix32tree_insert (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t);
+ if (rr == -1) {
+ msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask);
+ }
+ else if (rr == 1) {
+ msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask);
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ }
+}
+
+void
+remove_dynamic_rules (struct symbols_cache *cache)
+{
+ if (cache->dynamic_items) {
+ g_list_free (cache->dynamic_items);
+ cache->dynamic_items = NULL;
+ }
+
+ if (cache->dynamic_map) {
+ radix_tree_free (cache->dynamic_map);
+ cache->dynamic_map = NULL;
+ }
+ if (cache->negative_dynamic_map) {
+ radix_tree_free (cache->negative_dynamic_map);
+ cache->negative_dynamic_map = NULL;
+ }
+}
+
+static void
+free_cache (gpointer arg)
+{
+ struct symbols_cache *cache = arg;
+
+ if (cache->map != NULL) {
+ unmap_cache_file (cache);
+ }
+
+ if (cache->static_items) {
+ g_list_free (cache->static_items);
+ }
+ if (cache->negative_items) {
+ g_list_free (cache->negative_items);
+ }
+ if (cache->dynamic_items) {
+ g_list_free (cache->dynamic_items);
+ }
+ if (cache->dynamic_map) {
+ radix_tree_free (cache->dynamic_map);
+ }
+ if (cache->negative_dynamic_map) {
+ radix_tree_free (cache->negative_dynamic_map);
+ }
+ g_hash_table_destroy (cache->items_by_symbol);
+ rspamd_mempool_delete (cache->static_pool);
+
+ g_free (cache);
+}
+
+gboolean
+init_symbols_cache (rspamd_mempool_t * pool, struct symbols_cache *cache, struct config_file *cfg,
+ const gchar *filename, gboolean ignore_checksum)
+{
+ struct stat st;
+ gint fd;
+ GChecksum *cksum;
+ u_char *mem_sum, *file_sum;
+ gsize cklen;
+ gboolean res;
+
+ if (cache == NULL) {
+ return FALSE;
+ }
+
+ /* Init locking */
+ cache->lock = rspamd_mempool_get_rwlock (pool);
+
+ cache->cfg = cfg;
+
+ /* Just in-memory cache */
+ if (filename == NULL) {
+ post_cache_init (cache);
+ return TRUE;
+ }
+
+ /* First of all try to stat file */
+ if (stat (filename, &st) == -1) {
+ /* Check errno */
+ if (errno == ENOENT) {
+ /* Try to create file */
+ if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno));
+ return FALSE;
+ }
+ else {
+ return create_cache_file (cache, filename, fd, pool);
+ }
+ }
+ else {
+ msg_info ("cannot stat file %s, error %d, %s", filename, errno, strerror (errno));
+ return FALSE;
+ }
+ }
+ else {
+ if ((fd = open (filename, O_RDWR)) == -1) {
+ msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno));
+ return FALSE;
+ }
+ }
+
+ if (!ignore_checksum) {
+ /* Calculate checksum */
+ cksum = get_mem_cksum (cache);
+ if (cksum == NULL) {
+ msg_err ("cannot calculate checksum for symbols");
+ close (fd);
+ return FALSE;
+ }
+
+ cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1);
+ mem_sum = g_malloc (cklen);
+
+ g_checksum_get_digest (cksum, mem_sum, &cklen);
+ /* Now try to read file sum */
+ if (lseek (fd, -(cklen), SEEK_END) == -1) {
+ if (errno == EINVAL) {
+ /* Try to create file */
+ msg_info ("recreate cache file");
+ if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno));
+ return FALSE;
+ }
+ else {
+ return create_cache_file (cache, filename, fd, pool);
+ }
+ }
+ close (fd);
+ g_free (mem_sum);
+ g_checksum_free (cksum);
+ msg_err ("cannot seek to read checksum, %d, %s", errno, strerror (errno));
+ return FALSE;
+ }
+ file_sum = g_malloc (cklen);
+ if (read (fd, file_sum, cklen) == -1) {
+ close (fd);
+ g_free (mem_sum);
+ g_free (file_sum);
+ g_checksum_free (cksum);
+ msg_err ("cannot read checksum, %d, %s", errno, strerror (errno));
+ return FALSE;
+ }
+
+ if (memcmp (file_sum, mem_sum, cklen) != 0) {
+ close (fd);
+ g_free (mem_sum);
+ g_free (file_sum);
+ g_checksum_free (cksum);
+ msg_info ("checksum mismatch, recreating file");
+ /* Reopen with rw permissions */
+ if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) {
+ msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno));
+ return FALSE;
+ }
+ else {
+ return create_cache_file (cache, filename, fd, pool);
+ }
+ }
+
+ g_free (mem_sum);
+ g_free (file_sum);
+ g_checksum_free (cksum);
+ }
+ /* MMap cache file and copy saved_cache structures */
+ res = mmap_cache_file (cache, fd, pool);
+
+ rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t)free_cache, cache);
+
+ return res;
+}
+
+static GList *
+check_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache)
+{
+#ifdef HAVE_INET_PTON
+ /* TODO: radix doesn't support ipv6 addrs */
+ return NULL;
+#else
+ GList *res = NULL;
+ uintptr_t r;
+ if (cache->dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) {
+ if ((r = radix32tree_find (cache->dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) {
+ res = (GList *)((gpointer)r);
+ return res;
+ }
+ else {
+ return NULL;
+ }
+ }
+ return res;
+#endif
+}
+
+static gboolean
+check_negative_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache, struct cache_item *item)
+{
+
+#ifdef HAVE_INET_PTON
+ /* TODO: radix doesn't support ipv6 addrs */
+ return FALSE;
+#else
+ GList *res = NULL;
+ uintptr_t r;
+
+ if (cache->negative_dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) {
+ if ((r = radix32tree_find (cache->negative_dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) {
+ res = (GList *)((gpointer)r);
+ while (res) {
+ if (res->data == (gpointer)item) {
+ return TRUE;
+ }
+ res = g_list_next (res);
+ }
+ }
+ }
+ return FALSE;
+#endif
+
+}
+
+static gboolean
+check_debug_symbol (struct config_file *cfg, const gchar *symbol)
+{
+ GList *cur;
+
+ cur = cfg->debug_symbols;
+ while (cur) {
+ if (strcmp (symbol, (const gchar *)cur->data) == 0) {
+ return TRUE;
+ }
+ cur = g_list_next (cur);
+ }
+
+ return FALSE;
+}
+
+static void
+rspamd_symbols_cache_metric_cb (gpointer k, gpointer v, gpointer ud)
+{
+ struct symbols_cache *cache = (struct symbols_cache *)ud;
+ GList *cur;
+ const gchar *sym = k;
+ gdouble weight = *(gdouble *)v;
+ struct cache_item *item;
+
+ cur = cache->negative_items;
+ while (cur) {
+ item = cur->data;
+ if (strcmp (item->s->symbol, sym) == 0) {
+ item->metric_weight = weight;
+ return;
+ }
+ cur = g_list_next (cur);
+ }
+ cur = cache->static_items;
+ while (cur) {
+ item = cur->data;
+ if (strcmp (item->s->symbol, sym) == 0) {
+ item->metric_weight = weight;
+ return;
+ }
+ cur = g_list_next (cur);
+ }
+}
+
+gboolean
+validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict)
+{
+ struct cache_item *item;
+ GList *cur, *p, *metric_symbols;
+ gboolean res;
+
+ if (cache == NULL) {
+ msg_err ("empty cache is invalid");
+ return FALSE;
+ }
+
+ /* Check each symbol in a cache and find its weight definition */
+ cur = cache->negative_items;
+ while (cur) {
+ item = cur->data;
+ if (!item->is_callback) {
+ if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) {
+ if (strict) {
+ msg_warn ("no weight registered for symbol %s", item->s->symbol);
+ return FALSE;
+ }
+ else {
+ msg_info ("no weight registered for symbol %s", item->s->symbol);
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ cur = cache->static_items;
+ while (cur) {
+ item = cur->data;
+ if (!item->is_callback) {
+ if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) {
+ if (strict) {
+ msg_warn ("no weight registered for symbol %s", item->s->symbol);
+ return FALSE;
+ }
+ else {
+ msg_info ("no weight registered for symbol %s", item->s->symbol);
+ }
+ }
+ }
+ cur = g_list_next (cur);
+ }
+#ifndef GLIB_HASH_COMPAT
+ /* Now check each metric item and find corresponding symbol in a cache */
+ metric_symbols = g_hash_table_get_keys (cfg->metrics_symbols);
+ cur = metric_symbols;
+ while (cur) {
+ res = FALSE;
+ p = cache->negative_items;
+ while (p) {
+ item = p->data;
+ if (strcmp (item->s->symbol, cur->data) == 0) {
+ res = TRUE;
+ break;
+ }
+ p = g_list_next (p);
+ }
+ if (!res) {
+ p = cache->static_items;
+ while (p) {
+ item = p->data;
+ if (strcmp (item->s->symbol, cur->data) == 0) {
+ res = TRUE;
+ break;
+ }
+ p = g_list_next (p);
+ }
+ }
+ if (!res) {
+ msg_warn ("symbol '%s' is registered in metric but not found in cache", cur->data);
+ if (strict) {
+ return FALSE;
+ }
+ }
+ cur = g_list_next (cur);
+ }
+ g_list_free (metric_symbols);
+#endif /* GLIB_COMPAT */
+
+ /* Now adjust symbol weights according to default metric */
+ if (cfg->default_metric != NULL) {
+ g_hash_table_foreach (cfg->default_metric->symbols, rspamd_symbols_cache_metric_cb, cache);
+ /* Resort caches */
+ cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp);
+ cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp);
+ }
+
+ return TRUE;
+}
+
+struct symbol_callback_data {
+ enum {
+ CACHE_STATE_NEGATIVE,
+ CACHE_STATE_DYNAMIC_MAP,
+ CACHE_STATE_DYNAMIC,
+ CACHE_STATE_STATIC
+ } state;
+ struct cache_item *saved_item;
+ GList *list_pointer;
+};
+
+gboolean
+call_symbol_callback (struct rspamd_task * task, struct symbols_cache * cache, gpointer *save)
+{
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts1, ts2;
+#else
+ struct timeval tv1, tv2;
+#endif
+ guint64 diff;
+ struct cache_item *item = NULL;
+ struct symbol_callback_data *s = *save;
+
+ if (s == NULL) {
+ if (cache == NULL) {
+ return FALSE;
+ }
+ if (cache->uses++ >= MAX_USES) {
+ msg_info ("resort symbols cache");
+ rspamd_mempool_wlock_rwlock (cache->lock);
+ cache->uses = 0;
+ /* Resort while having write lock */
+ post_cache_init (cache);
+ rspamd_mempool_wunlock_rwlock (cache->lock);
+ }
+ s = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct symbol_callback_data));
+ *save = s;
+ if (cache->negative_items != NULL) {
+ s->list_pointer = g_list_first (cache->negative_items);
+ s->saved_item = s->list_pointer->data;
+ s->state = CACHE_STATE_NEGATIVE;
+ }
+ else if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) {
+ if (s->list_pointer == NULL) {
+ s->list_pointer = g_list_first (cache->dynamic_items);
+ s->saved_item = s->list_pointer->data;
+ s->state = CACHE_STATE_DYNAMIC;
+ }
+ else {
+ s->saved_item = s->list_pointer->data;
+ s->state = CACHE_STATE_DYNAMIC_MAP;
+ }
+ }
+ else {
+ s->state = CACHE_STATE_STATIC;
+ s->list_pointer = g_list_first (cache->static_items);
+ if (s->list_pointer) {
+ s->saved_item = s->list_pointer->data;
+ }
+ else {
+ return FALSE;
+ }
+ }
+ item = s->saved_item;
+ }
+ else {
+ if (cache == NULL) {
+ return FALSE;
+ }
+ switch (s->state) {
+ case CACHE_STATE_NEGATIVE:
+ s->list_pointer = g_list_next (s->list_pointer);
+ if (s->list_pointer == NULL) {
+ if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) {
+ if (s->list_pointer == NULL) {
+ s->list_pointer = g_list_first (cache->dynamic_items);
+ s->saved_item = s->list_pointer->data;
+ s->state = CACHE_STATE_DYNAMIC;
+ }
+ else {
+ s->saved_item = s->list_pointer->data;
+ s->state = CACHE_STATE_DYNAMIC_MAP;
+ }
+ }
+ else {
+ s->state = CACHE_STATE_STATIC;
+ s->list_pointer = g_list_first (cache->static_items);
+ if (s->list_pointer) {
+ s->saved_item = s->list_pointer->data;
+ }
+ else {
+ return FALSE;
+ }
+ }
+ }
+ else {
+ s->saved_item = s->list_pointer->data;
+ }
+ item = s->saved_item;
+ break;
+ case CACHE_STATE_DYNAMIC_MAP:
+ s->list_pointer = g_list_next (s->list_pointer);
+ if (s->list_pointer == NULL) {
+ s->list_pointer = g_list_first (cache->dynamic_items);
+ if (s->list_pointer) {
+ s->saved_item = s->list_pointer->data;
+ s->state = CACHE_STATE_DYNAMIC;
+ }
+ else {
+ s->state = CACHE_STATE_STATIC;
+ s->list_pointer = g_list_first (cache->static_items);
+ if (s->list_pointer) {
+ s->saved_item = s->list_pointer->data;
+ }
+ else {
+ return FALSE;
+ }
+ }
+ }
+ else {
+ s->saved_item = s->list_pointer->data;
+ }
+ item = s->saved_item;
+ break;
+ case CACHE_STATE_DYNAMIC:
+ s->list_pointer = g_list_next (s->list_pointer);
+ if (s->list_pointer == NULL) {
+ s->state = CACHE_STATE_STATIC;
+ s->list_pointer = g_list_first (cache->static_items);
+ if (s->list_pointer) {
+ s->saved_item = s->list_pointer->data;
+ }
+ else {
+ return FALSE;
+ }
+ }
+ else {
+ s->saved_item = s->list_pointer->data;
+ /* Skip items that are in negative map */
+ while (s->list_pointer != NULL && check_negative_dynamic_item (task, cache, s->saved_item)) {
+ s->list_pointer = g_list_next (s->list_pointer);
+ if (s->list_pointer != NULL) {
+ s->saved_item = s->list_pointer->data;
+ }
+ }
+ if (s->list_pointer == NULL) {
+ s->state = CACHE_STATE_STATIC;
+ s->list_pointer = g_list_first (cache->static_items);
+ if (s->list_pointer) {
+ s->saved_item = s->list_pointer->data;
+ }
+ else {
+ return FALSE;
+ }
+ }
+ }
+ item = s->saved_item;
+ break;
+ case CACHE_STATE_STATIC:
+ /* Next pointer */
+ s->list_pointer = g_list_next (s->list_pointer);
+ if (s->list_pointer) {
+ s->saved_item = s->list_pointer->data;
+ }
+ else {
+ return FALSE;
+ }
+ item = s->saved_item;
+ break;
+ }
+ }
+ if (!item) {
+ return FALSE;
+ }
+ if (!item->is_virtual) {
+#ifdef HAVE_CLOCK_GETTIME
+# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1);
+# elif defined(HAVE_CLOCK_VIRTUAL)
+ clock_gettime (CLOCK_VIRTUAL, &ts1);
+# else
+ clock_gettime (CLOCK_REALTIME, &ts1);
+# endif
+#else
+ if (gettimeofday (&tv1, NULL) == -1) {
+ msg_warn ("gettimeofday failed: %s", strerror (errno));
+ }
+#endif
+ if (G_UNLIKELY (check_debug_symbol (task->cfg, item->s->symbol))) {
+ rspamd_log_debug (rspamd_main->logger);
+ item->func (task, item->user_data);
+ rspamd_log_nodebug (rspamd_main->logger);
+ }
+ else {
+ item->func (task, item->user_data);
+ }
+
+
+#ifdef HAVE_CLOCK_GETTIME
+# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts2);
+# elif defined(HAVE_CLOCK_VIRTUAL)
+ clock_gettime (CLOCK_VIRTUAL, &ts2);
+# else
+ clock_gettime (CLOCK_REALTIME, &ts2);
+# endif
+#else
+ if (gettimeofday (&tv2, NULL) == -1) {
+ msg_warn ("gettimeofday failed: %s", strerror (errno));
+ }
+#endif
+
+#ifdef HAVE_CLOCK_GETTIME
+ diff = (ts2.tv_sec - ts1.tv_sec) * 1000000 + (ts2.tv_nsec - ts1.tv_nsec) / 1000;
+#else
+ diff = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
+#endif
+ item->s->avg_time = set_counter (item->s->symbol, diff);
+ }
+
+ s->saved_item = item;
+
+ return TRUE;
+
+}
diff --git a/src/libserver/symbols_cache.h b/src/libserver/symbols_cache.h
new file mode 100644
index 000000000..bb2100fc1
--- /dev/null
+++ b/src/libserver/symbols_cache.h
@@ -0,0 +1,150 @@
+#ifndef RSPAMD_SYMBOLS_CACHE_H
+#define RSPAMD_SYMBOLS_CACHE_H
+
+#include "config.h"
+#include "radix.h"
+
+#define MAX_SYMBOL 128
+
+struct rspamd_task;
+struct config_file;
+
+typedef void (*symbol_func_t)(struct rspamd_task *task, gpointer user_data);
+
+struct saved_cache_item {
+ gchar symbol[MAX_SYMBOL];
+ double weight;
+ guint32 frequency;
+ double avg_time;
+};
+
+struct dynamic_map_item {
+ struct in_addr addr;
+ guint32 mask;
+ gboolean negative;
+};
+
+struct cache_item {
+ /* Static item's data */
+ struct saved_cache_item *s;
+
+ /* For dynamic rules */
+ struct dynamic_map_item *networks;
+ guint32 networks_number;
+ gboolean is_dynamic;
+
+ /* Callback data */
+ symbol_func_t func;
+ gpointer user_data;
+
+ /* Flags of virtual symbols */
+ gboolean is_virtual;
+ gboolean is_callback;
+
+ /* Priority */
+ gint priority;
+ gdouble metric_weight;
+};
+
+
+struct symbols_cache {
+ /* Normal cache items */
+ GList *static_items;
+
+ /* Items that have negative weights */
+ GList *negative_items;
+
+ /* Radix map of dynamic rules with ip mappings */
+ radix_tree_t *dynamic_map;
+ radix_tree_t *negative_dynamic_map;
+
+ /* Common dynamic rules */
+ GList *dynamic_items;
+
+ /* Hash table for fast access */
+ GHashTable *items_by_symbol;
+
+ rspamd_mempool_t *static_pool;
+
+ guint cur_items;
+ guint used_items;
+ guint uses;
+ gpointer map;
+ rspamd_mempool_rwlock_t *lock;
+ struct config_file *cfg;
+};
+
+/**
+ * Load symbols cache from file, must be called _after_ init_symbols_cache
+ */
+gboolean init_symbols_cache (rspamd_mempool_t *pool, struct symbols_cache *cache, struct config_file *cfg,
+ const gchar *filename, gboolean ignore_checksum);
+
+/**
+ * Register function for symbols parsing
+ * @param name name of symbol
+ * @param func pointer to handler
+ * @param user_data pointer to user_data
+ */
+void register_symbol (struct symbols_cache **cache, const gchar *name, double weight,
+ symbol_func_t func, gpointer user_data);
+
+
+/**
+ * Register virtual symbol
+ * @param name name of symbol
+ */
+void register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight);
+
+/**
+ * Register callback function for symbols parsing
+ * @param name name of symbol
+ * @param func pointer to handler
+ * @param user_data pointer to user_data
+ */
+void register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight,
+ symbol_func_t func, gpointer user_data);
+
+/**
+ * Register function for symbols parsing with strict priority
+ * @param name name of symbol
+ * @param func pointer to handler
+ * @param user_data pointer to user_data
+ */
+void register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight,
+ gint priority, symbol_func_t func, gpointer user_data);
+
+/**
+ * Register function for dynamic symbols parsing
+ * @param name name of symbol
+ * @param func pointer to handler
+ * @param user_data pointer to user_data
+ */
+void register_dynamic_symbol (rspamd_mempool_t *pool, struct symbols_cache **cache, const gchar *name,
+ double weight, symbol_func_t func,
+ gpointer user_data, GList *networks);
+
+/**
+ * Call function for cached symbol using saved callback
+ * @param task task object
+ * @param cache symbols cache
+ * @param saved_item pointer to currently saved item
+ */
+gboolean call_symbol_callback (struct rspamd_task *task, struct symbols_cache *cache, gpointer *save);
+
+/**
+ * Remove all dynamic rules from cache
+ * @param cache symbols cache
+ */
+void remove_dynamic_rules (struct symbols_cache *cache);
+
+/**
+ * Validate cache items agains theirs weights defined in metrics
+ * @param cache symbols cache
+ * @param cfg configuration
+ * @param strict do strict checks - symbols MUST be described in metrics
+ */
+gboolean validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict);
+
+
+#endif
diff --git a/src/libserver/task.c b/src/libserver/task.c
new file mode 100644
index 000000000..f389793dd
--- /dev/null
+++ b/src/libserver/task.c
@@ -0,0 +1,159 @@
+/* Copyright (c) 2014, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "task.h"
+#include "main.h"
+#include "filter.h"
+#include "message.h"
+
+/*
+ * Destructor for recipients list in a task
+ */
+static void
+rcpt_destruct (void *pointer)
+{
+ struct rspamd_task *task = (struct rspamd_task *) pointer;
+
+ if (task->rcpt) {
+ g_list_free (task->rcpt);
+ }
+}
+
+/*
+ * Create new task
+ */
+struct rspamd_task *
+rspamd_task_new (struct rspamd_worker *worker)
+{
+ struct rspamd_task *new_task;
+
+ new_task = g_slice_alloc0 (sizeof (struct rspamd_task));
+
+ new_task->worker = worker;
+ new_task->state = READ_MESSAGE;
+ if (worker) {
+ new_task->cfg = worker->srv->cfg;
+ }
+#ifdef HAVE_CLOCK_GETTIME
+# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &new_task->ts);
+# elif defined(HAVE_CLOCK_VIRTUAL)
+ clock_gettime (CLOCK_VIRTUAL, &new_task->ts);
+# else
+ clock_gettime (CLOCK_REALTIME, &new_task->ts);
+# endif
+#endif
+ if (gettimeofday (&new_task->tv, NULL) == -1) {
+ msg_warn ("gettimeofday failed: %s", strerror (errno));
+ }
+
+ new_task->task_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ());
+
+ /* Add destructor for recipients list (it would be better to use anonymous function here */
+ rspamd_mempool_add_destructor (new_task->task_pool,
+ (rspamd_mempool_destruct_t) rcpt_destruct, new_task);
+ new_task->results = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ rspamd_mempool_add_destructor (new_task->task_pool,
+ (rspamd_mempool_destruct_t) g_hash_table_destroy,
+ new_task->results);
+ new_task->re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+ rspamd_mempool_add_destructor (new_task->task_pool,
+ (rspamd_mempool_destruct_t) g_hash_table_destroy,
+ new_task->re_cache);
+ new_task->raw_headers = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
+ rspamd_mempool_add_destructor (new_task->task_pool,
+ (rspamd_mempool_destruct_t) g_hash_table_destroy,
+ new_task->raw_headers);
+ new_task->emails = g_tree_new (compare_email_func);
+ rspamd_mempool_add_destructor (new_task->task_pool,
+ (rspamd_mempool_destruct_t) g_tree_destroy,
+ new_task->emails);
+ new_task->urls = g_tree_new (compare_url_func);
+ rspamd_mempool_add_destructor (new_task->task_pool,
+ (rspamd_mempool_destruct_t) g_tree_destroy,
+ new_task->urls);
+ new_task->sock = -1;
+ new_task->is_mime = TRUE;
+ new_task->pre_result.action = METRIC_ACTION_NOACTION;
+
+ new_task->message_id = new_task->queue_id = "undef";
+
+ return new_task;
+}
+
+
+/*
+ * Free all structures of worker_task
+ */
+void
+rspamd_task_free (struct rspamd_task *task, gboolean is_soft)
+{
+ GList *part;
+ struct mime_part *p;
+
+ if (task) {
+ debug_task ("free pointer %p", task);
+ while ((part = g_list_first (task->parts))) {
+ task->parts = g_list_remove_link (task->parts, part);
+ p = (struct mime_part *) part->data;
+ g_byte_array_free (p->content, TRUE);
+ g_list_free_1 (part);
+ }
+ if (task->text_parts) {
+ g_list_free (task->text_parts);
+ }
+ if (task->images) {
+ g_list_free (task->images);
+ }
+ if (task->messages) {
+ g_list_free (task->messages);
+ }
+ if (task->received) {
+ g_list_free (task->received);
+ }
+ if (task->http_conn != NULL) {
+ rspamd_http_connection_unref (task->http_conn);
+ }
+ if (task->sock != -1) {
+ close (task->sock);
+ }
+ rspamd_mempool_delete (task->task_pool);
+ g_slice_free1 (sizeof (struct rspamd_task), task);
+ }
+}
+
+void
+rspamd_task_free_hard (gpointer ud)
+{
+ struct rspamd_task *task = ud;
+
+ rspamd_task_free (task, FALSE);
+}
+
+void
+rspamd_task_free_soft (gpointer ud)
+{
+ struct rspamd_task *task = ud;
+
+ rspamd_task_free (task, FALSE);
+}
diff --git a/src/libserver/task.h b/src/libserver/task.h
new file mode 100644
index 000000000..f8f7c89e3
--- /dev/null
+++ b/src/libserver/task.h
@@ -0,0 +1,165 @@
+/* Copyright (c) 2014, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef TASK_H_
+#define TASK_H_
+
+#include "config.h"
+#include "http.h"
+#include "events.h"
+#include "util.h"
+#include "mem_pool.h"
+#include "dns.h"
+
+enum rspamd_command {
+ CMD_CHECK,
+ CMD_SYMBOLS,
+ CMD_REPORT,
+ CMD_REPORT_IFSPAM,
+ CMD_SKIP,
+ CMD_PING,
+ CMD_PROCESS,
+ CMD_OTHER
+};
+
+enum rspamd_metric_action {
+ METRIC_ACTION_REJECT = 0,
+ METRIC_ACTION_SOFT_REJECT,
+ METRIC_ACTION_REWRITE_SUBJECT,
+ METRIC_ACTION_ADD_HEADER,
+ METRIC_ACTION_GREYLIST,
+ METRIC_ACTION_NOACTION,
+ METRIC_ACTION_MAX
+};
+
+typedef gint (*protocol_reply_func)(struct rspamd_task *task);
+
+struct custom_command {
+ const gchar *name;
+ protocol_reply_func func;
+};
+
+/**
+ * Worker task structure
+ */
+struct rspamd_task {
+ struct rspamd_worker *worker; /**< pointer to worker object */
+ enum {
+ READ_MESSAGE,
+ WAIT_PRE_FILTER,
+ WAIT_FILTER,
+ WAIT_POST_FILTER,
+ WRITE_REPLY,
+ CLOSING_CONNECTION
+ } state; /**< current session state */
+ enum rspamd_command cmd; /**< command */
+ struct custom_command *custom_cmd; /**< custom command if any */
+ gint sock; /**< socket descriptor */
+ gboolean is_mime; /**< if this task is mime task */
+ gboolean is_json; /**< output is JSON */
+ gboolean allow_learn; /**< allow learning */
+ gboolean is_skipped; /**< whether message was skipped by configuration */
+
+ gchar *helo; /**< helo header value */
+ gchar *from; /**< from header value */
+ gchar *queue_id; /**< queue id if specified */
+ const gchar *message_id; /**< message id */
+ GList *rcpt; /**< recipients list */
+ guint nrcpt; /**< number of recipients */
+ rspamd_inet_addr_t from_addr; /**< from addr for a task */
+ rspamd_inet_addr_t client_addr; /**< address of connected socket */
+ gchar *deliver_to; /**< address to deliver */
+ gchar *user; /**< user to deliver */
+ gchar *subject; /**< subject (for non-mime) */
+ gchar *hostname; /**< hostname reported by MTA */
+ GString *msg; /**< message buffer */
+ struct rspamd_http_connection *http_conn; /**< HTTP server connection */
+ struct rspamd_async_session* s; /**< async session object */
+ gint parts_count; /**< mime parts count */
+ GMimeMessage *message; /**< message, parsed with GMime */
+ GMimeObject *parser_parent_part; /**< current parent part */
+ InternetAddressList *rcpts; /**< list of all recipients */
+ GList *parts; /**< list of parsed parts */
+ GList *text_parts; /**< list of text parts */
+ gchar *raw_headers_str; /**< list of raw headers */
+ GList *received; /**< list of received headers */
+ GTree *urls; /**< list of parsed urls */
+ GTree *emails; /**< list of parsed emails */
+ GList *images; /**< list of images */
+ GHashTable *raw_headers; /**< list of raw headers */
+ GHashTable *results; /**< hash table of metric_result indexed by
+ * metric's name */
+ GHashTable *tokens; /**< hash table of tokens indexed by tokenizer
+ * pointer */
+ GList *messages; /**< list of messages that would be reported */
+ GHashTable *re_cache; /**< cache for matched or not matched regexps */
+ struct config_file *cfg; /**< pointer to config object */
+ gchar *last_error; /**< last error */
+ gint error_code; /**< code of last error */
+ rspamd_mempool_t *task_pool; /**< memory pool for task */
+#ifdef HAVE_CLOCK_GETTIME
+ struct timespec ts; /**< time of connection */
+#endif
+ struct timeval tv; /**< time of connection */
+ guint32 scan_milliseconds; /**< how much milliseconds passed */
+ gboolean pass_all_filters; /**< pass task throught every rule */
+ gboolean no_log; /**< do not log or write this task to the history */
+ guint32 parser_recursion; /**< for avoiding recursion stack overflow */
+ gboolean (*fin_callback)(void *arg); /**< calback for filters finalizing */
+ void *fin_arg; /**< argument for fin callback */
+
+ guint32 dns_requests; /**< number of DNS requests per this task */
+
+ struct rspamd_dns_resolver *resolver; /**< DNS resolver */
+ struct event_base *ev_base; /**< Event base */
+
+ GThreadPool *classify_pool; /**< A pool of classify threads */
+
+ struct {
+ enum rspamd_metric_action action; /**< Action of pre filters */
+ gchar *str; /**< String describing action */
+ } pre_result; /**< Result of pre-filters */
+};
+
+/**
+ * Construct new task for worker
+ */
+struct rspamd_task* rspamd_task_new (struct rspamd_worker *worker);
+/**
+ * Destroy task object and remove its IO dispatcher if it exists
+ */
+void rspamd_task_free (struct rspamd_task *task, gboolean is_soft);
+void rspamd_task_free_hard (gpointer ud);
+void rspamd_task_free_soft (gpointer ud);
+
+/**
+ * Called if session was restored inside fin callback
+ */
+void rspamd_task_restore (void *arg);
+
+/**
+ * Called if all filters are processed
+ * @return TRUE if session should be terminated
+ */
+gboolean rspamd_task_fin (void *arg);
+
+#endif /* TASK_H_ */
diff --git a/src/libserver/url.c b/src/libserver/url.c
new file mode 100644
index 000000000..c4313e8a9
--- /dev/null
+++ b/src/libserver/url.c
@@ -0,0 +1,1620 @@
+/*
+ * Copyright (c) 2009-2012, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "url.h"
+#include "util.h"
+#include "fstring.h"
+#include "main.h"
+#include "message.h"
+#include "trie.h"
+
+#define POST_CHAR 1
+#define POST_CHAR_S "\001"
+
+/* Tcp port range */
+#define LOWEST_PORT 0
+#define HIGHEST_PORT 65535
+
+#define uri_port_is_valid(port) \
+ (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT)
+
+struct _proto {
+ guchar *name;
+ gint port;
+ uintptr_t *unused;
+ guint need_slashes:1;
+ guint need_slash_after_host:1;
+ guint free_syntax:1;
+ guint need_ssl:1;
+};
+
+typedef struct url_match_s {
+ const gchar *m_begin;
+ gsize m_len;
+ const gchar *pattern;
+ const gchar *prefix;
+ gboolean add_prefix;
+} url_match_t;
+
+#define URL_FLAG_NOHTML 0x1
+#define URL_FLAG_STRICT_MATCH 0x2
+
+struct url_matcher {
+ const gchar *pattern;
+ const gchar *prefix;
+ gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+ gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+ gint flags;
+};
+
+static gboolean url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+static gboolean url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+
+static gboolean url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+static gboolean url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+
+static gboolean url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+static gboolean url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+
+static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+static gboolean url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match);
+
+struct url_matcher matchers[] = {
+ /* Common prefixes */
+ { "file://", "", url_file_start, url_file_end, 0 },
+ { "ftp://", "", url_web_start, url_web_end, 0 },
+ { "sftp://", "", url_web_start, url_web_end, 0 },
+ { "http://", "", url_web_start, url_web_end, 0 },
+ { "https://", "", url_web_start, url_web_end, 0 },
+ { "news://", "", url_web_start, url_web_end, 0 },
+ { "nntp://", "", url_web_start, url_web_end, 0 },
+ { "telnet://", "", url_web_start, url_web_end, 0 },
+ { "webcal://", "", url_web_start, url_web_end, 0 },
+ { "mailto://", "", url_email_start, url_email_end, 0 },
+ { "callto://", "", url_web_start, url_web_end, 0 },
+ { "h323:", "", url_web_start, url_web_end, 0 },
+ { "sip:", "", url_web_start, url_web_end, 0 },
+ { "www.", "http://", url_web_start, url_web_end, 0 },
+ { "ftp.", "ftp://", url_web_start, url_web_end, URL_FLAG_NOHTML },
+ /* TLD domains parts */
+ { ".ac", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ad", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ae", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".aero", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".af", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ag", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ai", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".al", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".am", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".an", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ao", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".aq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ar", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".arpa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".as", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".asia", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".at", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".au", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".aw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ax", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".az", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ba", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".be", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".biz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".br", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".by", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".bz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ca", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cat", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ch", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ci", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ck", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".co", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".com", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".coop", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".cz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".de", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".do", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".dz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ec", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".edu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ee", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".eg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".er", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".es", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".et", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".eu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".fr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ga", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ge", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gov", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".gy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ht", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".hu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".id", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ie", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".il", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".im", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".in", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".info", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".int", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".io", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".iq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ir", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".is", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".it", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".je", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jobs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".jp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ke", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ki", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".km", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ky", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".kz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".la", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".li", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ls", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".lv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ly", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ma", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".md", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".me", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mil", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ml", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mobi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ms", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".museum", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".my", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".mz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".na", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".name", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ne", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".net", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ng", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ni", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".no", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".np", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".nz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".om", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".org", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pe", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ph", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ps", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".pw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".py", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".qa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".re", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".rs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ru", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".rw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".se", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".si", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".so", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".st", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".su", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".sz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".td", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".th", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".to", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".travel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".tz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ua", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ug", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".uk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".us", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".uy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".uz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".va", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ve", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".vu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".wf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ws", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".xxx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".ye", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".yt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".za", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".zm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ { ".zw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH },
+ /* Likely emails */
+ { "@", "mailto://",url_email_start, url_email_end, URL_FLAG_NOHTML }
+};
+
+struct url_match_scanner {
+ struct url_matcher *matchers;
+ gsize matchers_count;
+ rspamd_trie_t *patterns;
+};
+
+struct url_match_scanner *url_scanner = NULL;
+
+static const struct _proto protocol_backends[] = {
+ {"file", 0, NULL, 1, 0, 0, 0},
+ {"ftp", 21, NULL, 1, 0, 0, 0},
+ {"http", 80, NULL, 1, 0, 0, 0},
+ {"https", 443, NULL, 1, 0, 0, 1},
+ {"mailto", 25, NULL, 1, 0, 0, 0},
+ /* Keep these last! */
+ {NULL, 0, NULL, 0, 0, 1, 0}
+};
+
+/* Convert an ASCII hex digit to the corresponding number between 0
+ and 15. H should be a hexadecimal digit that satisfies isxdigit;
+ otherwise, the result is undefined. */
+#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + 10)
+#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2))
+/* The reverse of the above: convert a number in the [0, 16) range to
+ the ASCII representation of the corresponding hexadecimal digit.
+ `+ 0' is there so you can't accidentally use it as an lvalue. */
+#define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0)
+#define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0)
+
+static guchar url_scanner_table[256] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128,
+ 160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192,
+ 128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+enum {
+ IS_CTRL = (1 << 0),
+ IS_ALPHA = (1 << 1),
+ IS_DIGIT = (1 << 2),
+ IS_LWSP = (1 << 3),
+ IS_SPACE = (1 << 4),
+ IS_SPECIAL = (1 << 5),
+ IS_DOMAIN = (1 << 6),
+ IS_URLSAFE = (1 << 7)
+};
+
+#define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0)
+#define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0)
+#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0)
+#define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0)
+#define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0)
+#define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0)
+#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0)
+
+
+const gchar *
+url_strerror (enum uri_errno err)
+{
+ switch (err) {
+ case URI_ERRNO_OK:
+ return "Parsing went well";
+ case URI_ERRNO_EMPTY:
+ return "The URI string was empty";
+ case URI_ERRNO_INVALID_PROTOCOL:
+ return "No protocol was found";
+ case URI_ERRNO_NO_SLASHES:
+ return "Slashes after protocol missing";
+ case URI_ERRNO_TOO_MANY_SLASHES:
+ return "Too many slashes after protocol";
+ case URI_ERRNO_TRAILING_DOTS:
+ return "'.' after host";
+ case URI_ERRNO_NO_HOST:
+ return "Host part is missing";
+ case URI_ERRNO_NO_PORT_COLON:
+ return "':' after host without port";
+ case URI_ERRNO_NO_HOST_SLASH:
+ return "Slash after host missing";
+ case URI_ERRNO_IPV6_SECURITY:
+ return "IPv6 security bug detected";
+ case URI_ERRNO_INVALID_PORT:
+ return "Port number is bad";
+ case URI_ERRNO_INVALID_PORT_RANGE:
+ return "Port number is not within 0-65535";
+ }
+ return NULL;
+}
+
+static gint
+check_uri_file (gchar *name)
+{
+ static const gchar chars[] = POST_CHAR_S "#?";
+
+ return strcspn (name, chars);
+}
+
+static gint
+url_init (void)
+{
+ guint i;
+ gchar patbuf[128];
+
+ if (url_scanner == NULL) {
+ url_scanner = g_malloc (sizeof (struct url_match_scanner));
+ url_scanner->matchers = matchers;
+ url_scanner->matchers_count = G_N_ELEMENTS (matchers);
+ url_scanner->patterns = rspamd_trie_create (TRUE);
+ for (i = 0; i < url_scanner->matchers_count; i ++) {
+ if (matchers[i].flags & URL_FLAG_STRICT_MATCH) {
+ /* Insert more specific patterns */
+
+ /* some.tld/ */
+ rspamd_snprintf (patbuf, sizeof (patbuf), "%s/", matchers[i].pattern);
+ rspamd_trie_insert (url_scanner->patterns, patbuf, i);
+ /* some.tld */
+ rspamd_snprintf (patbuf, sizeof (patbuf), "%s ", matchers[i].pattern);
+ rspamd_trie_insert (url_scanner->patterns, patbuf, i);
+ /* some.tld: */
+ rspamd_snprintf (patbuf, sizeof (patbuf), "%s:", matchers[i].pattern);
+ rspamd_trie_insert (url_scanner->patterns, patbuf, i);
+ }
+ else {
+ rspamd_trie_insert (url_scanner->patterns, matchers[i].pattern, i);
+ }
+ }
+ }
+
+ return 0;
+}
+
+enum protocol
+get_protocol (gchar *name, gint namelen)
+{
+ /* These are really enum protocol values but can take on negative
+ * values and since 0 <= -1 for enum values it's better to use clean
+ * integer type. */
+ gint start, end;
+ enum protocol protocol;
+ guchar *pname;
+ gint pnamelen, minlen, compare;
+
+ /* Almost dichotomic search is used here */
+ /* Starting at the HTTP entry which is the most common that will make
+ * file and NNTP the next entries checked and amongst the third checks
+ * are proxy and FTP. */
+ start = 0;
+ end = PROTOCOL_UNKNOWN - 1;
+ protocol = PROTOCOL_HTTP;
+
+ while (start <= end) {
+ pname = protocol_backends[protocol].name;
+ pnamelen = strlen (pname);
+ minlen = MIN (pnamelen, namelen);
+ compare = g_ascii_strncasecmp (pname, name, minlen);
+
+ if (compare == 0) {
+ if (pnamelen == namelen)
+ return protocol;
+
+ /* If the current protocol name is longer than the
+ * protocol name being searched for move @end else move
+ * @start. */
+ compare = pnamelen > namelen ? 1 : -1;
+ }
+
+ if (compare > 0)
+ end = protocol - 1;
+ else
+ start = protocol + 1;
+
+ protocol = (start + end) / 2;
+ }
+
+ return PROTOCOL_UNKNOWN;
+}
+
+
+gint
+get_protocol_port (enum protocol protocol)
+{
+ return protocol_backends[protocol].port;
+}
+
+gint
+get_protocol_need_slashes (enum protocol protocol)
+{
+ return protocol_backends[protocol].need_slashes;
+}
+
+gint
+get_protocol_need_slash_after_host (enum protocol protocol)
+{
+ return protocol_backends[protocol].need_slash_after_host;
+}
+
+gint
+get_protocol_free_syntax (enum protocol protocol)
+{
+ return protocol_backends[protocol].free_syntax;
+}
+
+static gint
+get_protocol_length (const gchar *url)
+{
+ gchar *end = (gchar *)url;
+
+ /* Seek the end of the protocol name if any. */
+ /* RFC1738:
+ * scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
+ * (but per its recommendations we accept "upalpha" too) */
+ while (g_ascii_isalnum (*end) || *end == '+' || *end == '-' || *end == '.')
+ end++;
+
+ /* Also return 0 if there's no protocol name (@end == @url). */
+ return (*end == ':') ? end - url : 0;
+}
+
+
+/*
+ * Calcualte new length of unescaped hostlen
+ */
+static guint
+url_calculate_escaped_hostlen (gchar *host, guint hostlen)
+{
+ guint i, result = hostlen;
+ gchar *p = host, c;
+
+ for (i = 0; i < hostlen; i++, p++) {
+ if (*p == '%' && g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) {
+ c = X2DIGITS_TO_NUM (*(p + 1), *(p + 2));
+ if (c != '\0') {
+ result -= 2;
+ }
+ }
+ }
+
+ return result;
+}
+
+/* URL-unescape the string S.
+
+ This is done by transforming the sequences "%HH" to the character
+ represented by the hexadecimal digits HH. If % is not followed by
+ two hexadecimal digits, it is inserted literally.
+
+ The transformation is done in place. If you need the original
+ string intact, make a copy before calling this function. */
+
+static void
+url_unescape (gchar *s)
+{
+ gchar *t = s; /* t - tortoise */
+ gchar *h = s; /* h - hare */
+
+ for (; *h; h++, t++) {
+ if (*h != '%') {
+ copychar:
+ *t = *h;
+ }
+ else {
+ gchar c;
+ /* Do nothing if '%' is not followed by two hex digits. */
+ if (!h[1] || !h[2] || !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2])))
+ goto copychar;
+ c = X2DIGITS_TO_NUM (h[1], h[2]);
+ /* Don't unescape %00 because there is no way to insert it
+ * into a C string without effectively truncating it. */
+ if (c == '\0')
+ goto copychar;
+ *t = c;
+ h += 2;
+ }
+ }
+ *t = '\0';
+}
+
+static void
+url_strip (gchar *s)
+{
+ gchar *t = s; /* t - tortoise */
+ gchar *h = s; /* h - hare */
+
+ while (*h) {
+ if (g_ascii_isgraph (*h)) {
+ *t = *h;
+ t++;
+ }
+ h++;
+ }
+ *t = '\0';
+}
+
+static gchar *
+url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool)
+{
+ const gchar *p1;
+ gchar *p2, *newstr;
+ gint newlen;
+ gint addition = 0;
+
+ for (p1 = s; *p1; p1++)
+ if (!is_urlsafe (*p1)) {
+ addition += 2; /* Two more characters (hex digits) */
+ }
+
+ if (!addition) {
+ if (allow_passthrough) {
+ return (gchar *)s;
+ }
+ else {
+ return rspamd_mempool_strdup (pool, s);
+ }
+ }
+
+ newlen = (p1 - s) + addition;
+ newstr = (gchar *)rspamd_mempool_alloc (pool, newlen + 1);
+
+ p1 = s;
+ p2 = newstr;
+ while (*p1) {
+ /* Quote the characters that match the test mask. */
+ if (!is_urlsafe (*p1)) {
+ guchar c = *p1++;
+ *p2++ = '%';
+ *p2++ = XNUM_TO_DIGIT (c >> 4);
+ *p2++ = XNUM_TO_DIGIT (c & 0xf);
+ }
+ else
+ *p2++ = *p1++;
+ }
+ *p2 = '\0';
+
+ return newstr;
+}
+
+/* URL-escape the unsafe characters (see urlchr_table) in a given
+ string, returning a freshly allocated string. */
+
+gchar *
+url_escape (const gchar *s, rspamd_mempool_t * pool)
+{
+ return url_escape_1 (s, 0, pool);
+}
+
+/* Decide whether the gchar at position P needs to be encoded. (It is
+ not enough to pass a single gchar *P because the function may need
+ to inspect the surrounding context.)
+
+ Return 1 if the gchar should be escaped as %XX, 0 otherwise. */
+
+static inline gboolean
+char_needs_escaping (const gchar *p)
+{
+ if (*p == '%') {
+ if (g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2))) {
+ return FALSE;
+ }
+ else {
+ return TRUE;
+ }
+ }
+ else if (! is_urlsafe (*p)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/* Translate a %-escaped (but possibly non-conformant) input string S
+ into a %-escaped (and conformant) output string.
+*/
+
+static gchar *
+reencode_escapes (gchar *s, rspamd_mempool_t * pool)
+{
+ const gchar *p1;
+ gchar *newstr, *p2;
+ gint oldlen, newlen;
+
+ gint encode_count = 0;
+
+ /* First pass: inspect the string to see if there's anything to do,
+ and to calculate the new length. */
+ for (p1 = s; *p1; p1++)
+ if (char_needs_escaping (p1))
+ ++encode_count;
+
+ if (!encode_count) {
+ /* The string is good as it is. */
+ return s;
+ }
+
+ oldlen = p1 - s;
+ /* Each encoding adds two characters (hex digits). */
+ newlen = oldlen + 2 * encode_count;
+ newstr = rspamd_mempool_alloc (pool, newlen + 1);
+
+ /* Second pass: copy the string to the destination address, encoding
+ chars when needed. */
+ p1 = s;
+ p2 = newstr;
+
+ while (*p1)
+ if (char_needs_escaping (p1)) {
+ guchar c = *p1++;
+ *p2++ = '%';
+ *p2++ = XNUM_TO_DIGIT (c >> 4);
+ *p2++ = XNUM_TO_DIGIT (c & 0xf);
+ }
+ else {
+ *p2++ = *p1++;
+ }
+
+ *p2 = '\0';
+ return newstr;
+}
+
+/* Unescape CHR in an otherwise escaped STR. Used to selectively
+ escaping of certain characters, such as "/" and ":". Returns a
+ count of unescaped chars. */
+
+static void
+unescape_single_char (gchar *str, gchar chr)
+{
+ const gchar c1 = XNUM_TO_DIGIT (chr >> 4);
+ const gchar c2 = XNUM_TO_DIGIT (chr & 0xf);
+ gchar *h = str; /* hare */
+ gchar *t = str; /* tortoise */
+
+ for (; *h; h++, t++) {
+ if (h[0] == '%' && h[1] == c1 && h[2] == c2) {
+ *t = chr;
+ h += 2;
+ }
+ else {
+ *t = *h;
+ }
+ }
+ *t = '\0';
+}
+
+
+/*
+ * Resolve "." and ".." elements of PATH by destructively modifying
+ * PATH and return non-zero if PATH has been modified, zero otherwise.
+ */
+
+static gboolean
+path_simplify (gchar *path)
+{
+ gchar *h = path; /* hare */
+ gchar *t = path; /* tortoise */
+ gchar *beg = path; /* boundary for backing the tortoise */
+ gchar *end = path + strlen (path);
+
+ while (h < end) {
+ /* Hare should be at the beginning of a path element. */
+ if (h[0] == '.' && (h[1] == '/' || h[1] == '\0')) {
+ /* Ignore "./". */
+ h += 2;
+ }
+ else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0')) {
+ /* Handle "../" by retreating the tortoise by one path
+ element -- but not past beginning. */
+ if (t > beg) {
+ /* Move backwards until T hits the beginning of the
+ previous path element or the beginning of path. */
+ for (--t; t > beg && t[-1] != '/'; t--);
+ }
+ else {
+ /* If we're at the beginning, copy the "../" literally
+ move the beginning so a later ".." doesn't remove
+ it. */
+ beg = t + 3;
+ goto regular;
+ }
+ h += 3;
+ }
+ else {
+ regular:
+ /* A regular path element. If H hasn't advanced past T,
+ simply skip to the next path element. Otherwise, copy
+ the path element until the next slash. */
+ if (t == h) {
+ /* Skip the path element, including the slash. */
+ while (h < end && *h != '/')
+ t++, h++;
+ if (h < end)
+ t++, h++;
+ }
+ else {
+ /* Copy the path element, including the final slash. */
+ while (h < end && *h != '/')
+ *t++ = *h++;
+ if (h < end)
+ *t++ = *h++;
+ }
+ }
+ }
+
+ if (t != h)
+ *t = '\0';
+
+ return t != h;
+}
+
+enum uri_errno
+parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool)
+{
+ guchar *prefix_end, *host_end, *p;
+ guchar *lbracket, *rbracket;
+ gint datalen, n, addrlen;
+ guchar *frag_or_post, *user_end, *port_end;
+
+ memset (uri, 0, sizeof (*uri));
+
+ /* Nothing to do for an empty url. */
+ if (!*uristring)
+ return URI_ERRNO_EMPTY;
+
+ uri->string = reencode_escapes (uristring, pool);
+ msg_debug ("reencoding escapes in original url: '%s'", struri (uri));
+ uri->protocollen = get_protocol_length (struri (uri));
+
+ /* Assume http as default protocol */
+ if (!uri->protocollen || (uri->protocol = get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) {
+ /* Make exception for numeric urls */
+ p = uri->string;
+ while (*p && (g_ascii_isalnum (*p) || *p == ':')) {
+ p ++;
+ }
+ if (*p == '\0') {
+ return URI_ERRNO_INVALID_PROTOCOL;
+ }
+ p = g_strconcat ("http://", uri->string, NULL);
+ uri->string = rspamd_mempool_strdup (pool, p);
+ g_free (p);
+ uri->protocol = PROTOCOL_HTTP;
+ prefix_end = struri (uri) + 7;
+ }
+ else {
+ /* Figure out whether the protocol is known */
+ msg_debug ("getting protocol from url: %d", uri->protocol);
+
+ prefix_end = struri (uri) + uri->protocollen; /* ':' */
+
+ /* Check if there's a digit after the protocol name. */
+ if (g_ascii_isdigit (*prefix_end)) {
+ p = struri (uri);
+ uri->ip_family = p[uri->protocollen] - '0';
+ prefix_end++;
+ }
+ if (*prefix_end != ':') {
+ msg_debug ("invalid protocol in uri");
+ return URI_ERRNO_INVALID_PROTOCOL;
+ }
+ prefix_end++;
+
+ /* Skip slashes */
+
+ if (prefix_end[0] == '/' && prefix_end[1] == '/') {
+ if (prefix_end[2] == '/') {
+ msg_debug ("too many '/' in uri");
+ return URI_ERRNO_TOO_MANY_SLASHES;
+ }
+
+ prefix_end += 2;
+
+ }
+ else {
+ msg_debug ("no '/' in uri");
+ return URI_ERRNO_NO_SLASHES;
+ }
+ }
+
+ if (get_protocol_free_syntax (uri->protocol)) {
+ uri->data = prefix_end;
+ uri->datalen = strlen (prefix_end);
+ return URI_ERRNO_OK;
+
+ }
+ else if (uri->protocol == PROTOCOL_FILE) {
+ datalen = check_uri_file (prefix_end);
+ frag_or_post = prefix_end + datalen;
+
+ /* Extract the fragment part. */
+ if (datalen >= 0) {
+ if (*frag_or_post == '#') {
+ uri->fragment = frag_or_post + 1;
+ uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S);
+ frag_or_post = uri->fragment + uri->fragmentlen;
+ }
+ if (*frag_or_post == POST_CHAR) {
+ uri->post = frag_or_post + 1;
+ }
+ }
+ else {
+ datalen = strlen (prefix_end);
+ }
+
+ uri->data = prefix_end;
+ uri->datalen = datalen;
+
+ return URI_ERRNO_OK;
+ }
+
+ /* Isolate host */
+
+ /* Get brackets enclosing IPv6 address */
+ lbracket = strchr (prefix_end, '[');
+ if (lbracket) {
+ rbracket = strchr (lbracket, ']');
+ /* [address] is handled only inside of hostname part (surprisingly). */
+ if (rbracket && rbracket < prefix_end + strcspn (prefix_end, "/"))
+ uri->ipv6 = 1;
+ else
+ lbracket = rbracket = NULL;
+ }
+ else {
+ rbracket = NULL;
+ }
+
+ /* Possibly skip auth part */
+ host_end = prefix_end + strcspn (prefix_end, "@");
+
+ if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */
+
+ /* Allow '@' in the password component */
+ while (strcspn (host_end + 1, "@") < strcspn (host_end + 1, "/?"))
+ host_end = host_end + 1 + strcspn (host_end + 1, "@");
+
+ user_end = strchr (prefix_end, ':');
+
+ if (!user_end || user_end > host_end) {
+ uri->user = prefix_end;
+ uri->userlen = host_end - prefix_end;
+ }
+ else {
+ uri->user = prefix_end;
+ uri->userlen = user_end - prefix_end;
+ uri->password = user_end + 1;
+ uri->passwordlen = host_end - user_end - 1;
+ }
+ prefix_end = host_end + 1;
+ }
+
+ if (uri->ipv6 && rbracket != NULL) {
+ host_end = rbracket + strcspn (rbracket, ":/?");
+ }
+ else {
+ host_end = prefix_end + strcspn (prefix_end, ":/?");
+ }
+
+ if (uri->ipv6) {
+ addrlen = rbracket - lbracket - 1;
+
+
+ uri->host = lbracket + 1;
+ uri->hostlen = addrlen;
+ }
+ else {
+ uri->host = prefix_end;
+ uri->hostlen = host_end - prefix_end;
+
+ /* Trim trailing '.'s */
+ if (uri->hostlen && uri->host[uri->hostlen - 1] == '.')
+ return URI_ERRNO_TRAILING_DOTS;
+ }
+
+ if (*host_end == ':') { /* we have port here */
+ port_end = host_end + 1 + strcspn (host_end + 1, "/");
+
+ host_end++;
+
+ uri->port = host_end;
+ uri->portlen = port_end - host_end;
+
+ if (uri->portlen == 0)
+ return URI_ERRNO_NO_PORT_COLON;
+
+ /* We only use 8 bits for portlen so better check */
+ if ((gint)uri->portlen != port_end - host_end)
+ return URI_ERRNO_INVALID_PORT;
+
+ /* test if port is number */
+ for (; host_end < port_end; host_end++)
+ if (!g_ascii_isdigit (*host_end))
+ return URI_ERRNO_INVALID_PORT;
+
+ /* Check valid port value, and let show an error message
+ * about invalid url syntax. */
+ if (uri->port && uri->portlen) {
+
+ errno = 0;
+ n = strtol (uri->port, NULL, 10);
+ if (errno || !uri_port_is_valid (n))
+ return URI_ERRNO_INVALID_PORT;
+ }
+ }
+
+ if (*host_end == '/') {
+ host_end++;
+
+ }
+ else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != '?') {
+ /* The need for slash after the host component depends on the
+ * need for a host component. -- The dangerous mind of Jonah */
+ if (!uri->hostlen)
+ return URI_ERRNO_NO_HOST;
+
+ return URI_ERRNO_NO_HOST_SLASH;
+ }
+
+ /* Look for #fragment or POST_CHAR */
+ prefix_end = host_end + strcspn (host_end, "#" POST_CHAR_S);
+ uri->data = host_end;
+ uri->datalen = prefix_end - host_end;
+
+ if (*prefix_end == '#') {
+ uri->fragment = prefix_end + 1;
+ uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S);
+ prefix_end = uri->fragment + uri->fragmentlen;
+ }
+
+ if (*prefix_end == POST_CHAR) {
+ uri->post = prefix_end + 1;
+ }
+
+ convert_to_lowercase (uri->string, uri->protocollen);
+ convert_to_lowercase (uri->host, uri->hostlen);
+ /* Decode %HH sequences in host name. This is important not so much
+ to support %HH sequences in host names (which other browser
+ don't), but to support binary characters (which will have been
+ converted to %HH by reencode_escapes). */
+ if (strchr (uri->host, '%')) {
+ uri->hostlen = url_calculate_escaped_hostlen (uri->host, uri->hostlen);
+ }
+
+ url_strip (struri (uri));
+ url_unescape (uri->host);
+
+ path_simplify (uri->data);
+
+ return URI_ERRNO_OK;
+}
+
+static const gchar url_braces[] = {
+ '(', ')' ,
+ '{', '}' ,
+ '[', ']' ,
+ '<', '>' ,
+ '|', '|' ,
+ '\'', '\''
+};
+
+static gboolean
+is_open_brace (gchar c)
+{
+ if (c == '(' ||
+ c == '{' ||
+ c == '[' ||
+ c == '<' ||
+ c == '|' ||
+ c == '\'') {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ match->m_begin = pos;
+ return TRUE;
+}
+static gboolean
+url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ const gchar *p;
+ gchar stop;
+ guint i;
+
+ p = pos + strlen (match->pattern);
+ stop = *p;
+ if (*p == '/') {
+ p ++;
+ }
+
+ for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) {
+ if (*p == url_braces[i]) {
+ stop = url_braces[i + 1];
+ break;
+ }
+ }
+
+ while (p < end && *p != stop && is_urlsafe (*p)) {
+ p ++;
+ }
+
+ if (p == begin) {
+ return FALSE;
+ }
+ match->m_len = p - match->m_begin;
+
+ return TRUE;
+
+}
+
+static gboolean
+url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ const gchar *p = pos;
+
+ /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */
+ while (p >= begin) {
+ if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) {
+ p ++;
+ if (!g_ascii_isalnum (*p)) {
+ /* Urls cannot start with strange symbols */
+ return FALSE;
+ }
+ match->m_begin = p;
+ return TRUE;
+ }
+ else if (p == begin && p != pos) {
+ match->m_begin = p;
+ return TRUE;
+ }
+ else if (*p == '.') {
+ if (p == begin) {
+ /* Urls cannot start with a dot */
+ return FALSE;
+ }
+ if (!g_ascii_isalnum (p[1])) {
+ /* Wrong we have an invalid character after dot */
+ return FALSE;
+ }
+ }
+ else if (*p == '/') {
+ /* Urls cannot contain '/' in their body */
+ return FALSE;
+ }
+ p --;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ const gchar *p;
+
+ /* A url must be finished by tld, so it must be followed by space character */
+ p = pos + strlen (match->pattern);
+ if (p == end || g_ascii_isspace (*p) || *p == ',') {
+ match->m_len = p - match->m_begin;
+ return TRUE;
+ }
+ else if (*p == '/' || *p == ':') {
+ /* Parse arguments, ports by normal way by url default function */
+ p = match->m_begin;
+ /* Check common prefix */
+ if (g_ascii_strncasecmp (p, "http://", sizeof ("http://") - 1) == 0) {
+ return url_web_end (begin, end, match->m_begin + sizeof ("http://") - 1, match);
+ }
+ else {
+ return url_web_end (begin, end, match->m_begin, match);
+ }
+
+ }
+ return FALSE;
+}
+
+static gboolean
+url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ /* Check what we have found */
+ if (pos > begin && (g_ascii_strncasecmp (pos, "www", 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) {
+ if (!is_open_brace (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) {
+ return FALSE;
+ }
+ }
+ if (*pos == '.') {
+ /* Urls cannot start with . */
+ return FALSE;
+ }
+ match->m_begin = pos;
+
+ return TRUE;
+}
+
+static gboolean
+url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ const gchar *p, *c;
+ gchar open_brace = '\0', close_brace = '\0';
+ gint brace_stack = 0;
+ gboolean passwd = FALSE;
+ guint port, i;
+
+ p = pos + strlen (match->pattern);
+ for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) {
+ if (*p == url_braces[i]) {
+ close_brace = url_braces[i + 1];
+ open_brace = *p;
+ break;
+ }
+ }
+
+ /* find the end of the domain */
+ if (is_atom (*p)) {
+ /* might be a domain or user@domain */
+ c = p;
+ while (p < end) {
+ if (!is_atom (*p)) {
+ break;
+ }
+
+ p++;
+
+ while (p < end && is_atom (*p)) {
+ p++;
+ }
+
+ if ((p + 1) < end && *p == '.' && (is_atom (*(p + 1)) || *(p + 1) == '/')) {
+ p++;
+ }
+ }
+
+ if (*p != '@') {
+ p = c;
+ }
+ else {
+ p++;
+ }
+
+ goto domain;
+ }
+ else if (is_domain (*p) || (*p & 0x80)) {
+domain:
+ while (p < end) {
+ if (!is_domain (*p) && !(*p & 0x80)) {
+ break;
+ }
+
+ p++;
+
+ while (p < end && (is_domain (*p) || (*p & 0x80))) {
+ p++;
+ }
+
+ if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/' || (*(p + 1) & 0x80))) {
+ p++;
+ }
+ }
+ }
+ else {
+ return FALSE;
+ }
+
+ if (p < end) {
+ switch (*p) {
+ case ':': /* we either have a port or a password */
+ p++;
+
+ if (is_digit (*p) || passwd) {
+ port = (*p++ - '0');
+
+ while (p < end && is_digit (*p) && port < 65536) {
+ port = (port * 10) + (*p++ - '0');
+ }
+
+ if (!passwd && (port >= 65536 || *p == '@')) {
+ if (p < end && *p == '@') {
+ /* this must be a password? */
+ goto passwd;
+ }
+ else if (p < end) {
+ return FALSE;
+ }
+
+ p--;
+ }
+ }
+ else {
+ passwd:
+ passwd = TRUE;
+ c = p;
+
+ while (p < end && is_atom (*p)) {
+ p++;
+ }
+
+ if ((p + 2) < end) {
+ if (*p == '@') {
+ p++;
+ if (is_domain (*p)) {
+ goto domain;
+ }
+ }
+
+ return FALSE;
+ }
+ }
+
+ if (p >= end || *p != '/') {
+ break;
+ }
+
+ /* we have a '/' so there could be a path - fall through */
+ case '/': /* we've detected a path component to our url */
+ p++;
+ case '?':
+ while (p < end && is_urlsafe (*p)) {
+ if (*p == open_brace) {
+ brace_stack++;
+ }
+ else if (*p == close_brace) {
+ brace_stack--;
+ if (brace_stack == -1) {
+ break;
+ }
+ }
+ p++;
+ }
+
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* urls are extremely unlikely to end with any
+ * punctuation, so strip any trailing
+ * punctuation off. Also strip off any closing
+ * double-quotes. */
+ while (p > pos && strchr (",.:;?!-|}])\"", p[-1])) {
+ p--;
+ }
+
+ match->m_len = (p - pos);
+
+ return TRUE;
+}
+
+
+static gboolean
+url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ const gchar *p;
+ /* Check what we have found */
+ if (pos > begin && *pos == '@') {
+ /* Try to extract it with username */
+ p = pos - 1;
+ while (p > begin && (is_domain (*p) || *p == '.' || *p == '_')) {
+ p --;
+ }
+ if (!is_domain (*p) && p != pos - 1) {
+ match->m_begin = p + 1;
+ return TRUE;
+ }
+ else if (p == begin) {
+ match->m_begin = p;
+ return TRUE;
+ }
+ }
+ else {
+ p = pos + strlen (match->pattern);
+ if (is_domain (*p)) {
+ match->m_begin = pos;
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static gboolean
+url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match)
+{
+ const gchar *p;
+ gboolean got_at = FALSE;
+
+ p = pos + strlen (match->pattern);
+ if (*pos == '@') {
+ got_at = TRUE;
+ }
+
+ while (p < end && (is_domain (*p) || *p == '_'
+ || (*p == '@' && !got_at) ||
+ (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) {
+ if (*p == '@') {
+ got_at = TRUE;
+ }
+ p ++;
+ }
+ match->m_len = p - match->m_begin;
+ match->add_prefix = TRUE;
+ return got_at;
+}
+
+void
+url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html)
+{
+ gint rc;
+ gchar *url_str = NULL, *url_start, *url_end;
+ struct uri *new;
+ struct process_exception *ex;
+ gchar *p, *end, *begin;
+
+
+ if (!part->orig->data || part->orig->len == 0) {
+ msg_warn ("got empty text part");
+ return;
+ }
+
+ if (url_init () == 0) {
+ if (is_html) {
+ begin = part->orig->data;
+ end = begin + part->orig->len;
+ p = begin;
+ }
+ else {
+ begin = part->content->data;
+ end = begin + part->content->len;
+ p = begin;
+ }
+ while (p < end) {
+ if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, is_html)) {
+ if (url_str != NULL) {
+ new = rspamd_mempool_alloc0 (pool, sizeof (struct uri));
+ ex = rspamd_mempool_alloc0 (pool, sizeof (struct process_exception));
+ if (new != NULL) {
+ g_strstrip (url_str);
+ rc = parse_uri (new, url_str, pool);
+ if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) &&
+ new->hostlen > 0) {
+ ex->pos = url_start - begin;
+ ex->len = url_end - url_start;
+ if (new->protocol == PROTOCOL_MAILTO) {
+ if (new->userlen > 0) {
+ if (!g_tree_lookup (task->emails, new)) {
+ g_tree_insert (task->emails, new, new);
+ }
+ }
+ }
+ else {
+ if (!g_tree_lookup (task->urls, new)) {
+ g_tree_insert (task->urls, new, new);
+ }
+ }
+ part->urls_offset = g_list_prepend (part->urls_offset, ex);
+ }
+ else if (rc != URI_ERRNO_OK) {
+ msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc));
+ }
+ }
+ }
+ }
+ else {
+ break;
+ }
+ p = url_end + 1;
+ }
+ }
+ /* Handle offsets of this part */
+ if (part->urls_offset != NULL) {
+ part->urls_offset = g_list_reverse (part->urls_offset);
+ rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, part->urls_offset);
+ }
+}
+
+gboolean
+url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str, gboolean is_html)
+{
+ const gchar *end, *pos;
+ gint idx, l;
+ struct url_matcher *matcher;
+ url_match_t m;
+
+ end = begin + len;
+ if (url_init () == 0) {
+ if ((pos = rspamd_trie_lookup (url_scanner->patterns, begin, len, &idx)) == NULL) {
+ return FALSE;
+ }
+ else {
+ matcher = &matchers[idx];
+ if ((matcher->flags & URL_FLAG_NOHTML) && is_html) {
+ /* Do not try to match non-html like urls in html texts */
+ return FALSE;
+ }
+ m.pattern = matcher->pattern;
+ m.prefix = matcher->prefix;
+ m.add_prefix = FALSE;
+ if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) {
+ if (m.add_prefix) {
+ l = m.m_len + 1 + strlen (m.prefix);
+ *url_str = rspamd_mempool_alloc (pool, l);
+ rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin);
+ }
+ else {
+ *url_str = rspamd_mempool_alloc (pool, m.m_len + 1);
+ memcpy (*url_str, m.m_begin, m.m_len);
+ (*url_str)[m.m_len] = '\0';
+ }
+ if (start != NULL) {
+ *start = (gchar *)m.m_begin;
+ }
+ if (fin != NULL) {
+ *fin = (gchar *)m.m_begin + m.m_len;
+ }
+ }
+ else {
+ *url_str = NULL;
+ if (start != NULL) {
+ *start = (gchar *)pos;
+ }
+ if (fin != NULL) {
+ *fin = (gchar *)pos + strlen (m.prefix);
+ }
+ }
+
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/*
+ * vi: ts=4
+ */
diff --git a/src/libserver/url.h b/src/libserver/url.h
new file mode 100644
index 000000000..60535ba5c
--- /dev/null
+++ b/src/libserver/url.h
@@ -0,0 +1,111 @@
+/* URL check functions */
+#ifndef URL_H
+#define URL_H
+
+#include "config.h"
+#include "mem_pool.h"
+
+struct rspamd_task;
+struct mime_text_part;
+
+struct uri {
+ /* The start of the uri (and thus start of the protocol string). */
+ gchar *string;
+
+ /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */
+ gint protocol; /* enum protocol */
+
+ gint ip_family;
+
+ gchar *user;
+ gchar *password;
+ gchar *host;
+ gchar *port;
+ /* @data can contain both the path and query uri fields.
+ * It can never be NULL but can have zero length. */
+ gchar *data;
+ gchar *fragment;
+ /* @post can contain some special encoded form data, used internally
+ * to make form data handling more efficient. The data is marked by
+ * POST_CHAR in the uri string. */
+ gchar *post;
+
+ struct uri *phished_url;
+
+ /* @protocollen should only be usable if @protocol is either
+ * PROTOCOL_USER or an uri string should be composed. */
+ guint protocollen;
+ guint userlen;
+ guint passwordlen;
+ guint hostlen;
+ guint portlen;
+ guint datalen;
+ guint fragmentlen;
+
+ /* Flags */
+ gboolean ipv6; /* URI contains IPv6 host */
+ gboolean form; /* URI originated from form */
+ gboolean is_phished; /* URI maybe phishing */
+};
+
+enum uri_errno {
+ URI_ERRNO_OK, /* Parsing went well */
+ URI_ERRNO_EMPTY, /* The URI string was empty */
+ URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */
+ URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */
+ URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */
+ URI_ERRNO_TRAILING_DOTS, /* '.' after host */
+ URI_ERRNO_NO_HOST, /* Host part is missing */
+ URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */
+ URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */
+ URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */
+ URI_ERRNO_INVALID_PORT, /* Port number is bad */
+ URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */
+};
+
+enum protocol {
+ PROTOCOL_FILE,
+ PROTOCOL_FTP,
+ PROTOCOL_HTTP,
+ PROTOCOL_HTTPS,
+ PROTOCOL_MAILTO,
+ PROTOCOL_UNKNOWN
+};
+
+#define struri(uri) ((uri)->string)
+
+/*
+ * Parse urls inside text
+ * @param pool memory pool
+ * @param task task object
+ * @param part current text part
+ * @param is_html turn on html euristic
+ */
+void url_parse_text (rspamd_mempool_t *pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html);
+
+/*
+ * Parse a single url into an uri structure
+ * @param pool memory pool
+ * @param uristring text form of url
+ * @param uri url object, must be pre allocated
+ */
+enum uri_errno parse_uri(struct uri *uri, gchar *uristring, rspamd_mempool_t *pool);
+
+/*
+ * Try to extract url from a text
+ * @param pool memory pool
+ * @param begin begin of text
+ * @param len length of text
+ * @param start storage for start position of url found (or NULL)
+ * @param end storage for end position of url found (or NULL)
+ * @param url_str storage for url string(or NULL)
+ * @return TRUE if url is found in specified text
+ */
+gboolean url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str, gboolean is_html);
+
+/*
+ * Return text representation of url parsing error
+ */
+const gchar* url_strerror (enum uri_errno err);
+
+#endif