diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2014-04-21 16:25:51 +0100 |
commit | 61555065f3d1c8badcc9573691232f1b6e42988c (patch) | |
tree | 563d5b7cb8c468530f7e79c4da0a75267b1184e1 /src/libserver | |
parent | ad5bf825b7f33bc10311673991f0cc888e69c0b1 (diff) | |
download | rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.tar.gz rspamd-61555065f3d1c8badcc9573691232f1b6e42988c.zip |
Rework project structure, remove trash files.
Diffstat (limited to 'src/libserver')
37 files changed, 16666 insertions, 0 deletions
diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt new file mode 100644 index 000000000..bd5df18b9 --- /dev/null +++ b/src/libserver/CMakeLists.txt @@ -0,0 +1,63 @@ +# Librspamdserver +SET(LIBRSPAMDSERVERSRC + binlog.c + buffer.c + cfg_utils.c + cfg_rcl.c + dkim.c + dns.c + dynamic_cfg.c + events.c + html.c + proxy.c + roll_history.c + settings.c + spf.c + statfile.c + statfile_sync.c + symbols_cache.c + task.c + url.c) +SET(TOKENIZERSSRC ../tokenizers/tokenizers.c + ../tokenizers/osb.c) + +SET(CLASSIFIERSSRC ../classifiers/classifiers.c + ../classifiers/bayes.c + ../classifiers/winnow.c) + +# Librspamd-server + +#IF(WITH_DB) +# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_bdb.c) +#ENDIF(WITH_DB) +#IF(WITH_SQLITE) +# LIST(APPEND LIBRSPAMDSERVERSRC kvstorage_sqlite.c) +#ENDIF(WITH_SQLITE) + +ADD_LIBRARY(rspamd-server ${LINK_TYPE} ${LIBRSPAMDSERVERSRC} ${TOKENIZERSSRC} ${CLASSIFIERSSRC}) +IF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-server PROPERTIES VERSION ${RSPAMD_VERSION}) +ENDIF(NOT DEBIAN_BUILD) +SET_TARGET_PROPERTIES(rspamd-server PROPERTIES LINKER_LANGUAGE C COMPILE_FLAGS "-DRSPAMD_LIB") +TARGET_LINK_LIBRARIES(rspamd-server rspamd-lua) +TARGET_LINK_LIBRARIES(rspamd-server rspamd-json) +TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) +TARGET_LINK_LIBRARIES(rspamd-server rspamd-util) +TARGET_LINK_LIBRARIES(rspamd-server rdns) +IF(CMAKE_COMPILER_IS_GNUCC) +SET_TARGET_PROPERTIES(rspamd-server PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing") +ENDIF(CMAKE_COMPILER_IS_GNUCC) + +IF(WITH_DB) + TARGET_LINK_LIBRARIES(rspamd-server db) +ENDIF(WITH_DB) + +IF(OPENSSL_FOUND) + TARGET_LINK_LIBRARIES(rspamd-server ${OPENSSL_LIBRARIES}) +ENDIF(OPENSSL_FOUND) + +IF(NO_SHARED MATCHES "OFF") + INSTALL(TARGETS rspamd-server + LIBRARY DESTINATION ${LIBDIR} + PUBLIC_HEADER DESTINATION ${INCLUDEDIR}) +ENDIF(NO_SHARED MATCHES "OFF")
\ No newline at end of file diff --git a/src/libserver/binlog.c b/src/libserver/binlog.c new file mode 100644 index 000000000..f085a7de0 --- /dev/null +++ b/src/libserver/binlog.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "binlog.h" +#include "cfg_file.h" +#include "tokenizers/tokenizers.h" + +#define BINLOG_SUFFIX ".binlog" +#define BACKUP_SUFFIX ".old" +#define VALID_MAGIC { 'r', 's', 'l' } +#define VALID_VERSION { '1', '0' } + +static GHashTable *binlog_opened = NULL; +static rspamd_mempool_t *binlog_pool = NULL; + +static gboolean +binlog_write_header (struct rspamd_binlog *log) +{ + struct rspamd_binlog_header header = { + .magic = VALID_MAGIC, + .version = VALID_VERSION, + .padding = { '\0', '\0' }, + }; + + header.create_time = time (NULL); + lock_file (log->fd, FALSE); + + if (write (log->fd, &header, sizeof (struct rspamd_binlog_header)) == -1) { + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + + memcpy (&log->header, &header, sizeof (struct rspamd_binlog_header)); + + /* Metaindex */ + log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex)); + bzero (log->metaindex, sizeof (struct rspamd_binlog_metaindex)); + /* Offset to metaindex */ + log->metaindex->indexes[0] = sizeof (struct rspamd_binlog_metaindex) + sizeof (struct rspamd_binlog_header); + + if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) { + g_free (log->metaindex); + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + unlock_file (log->fd, FALSE); + return FALSE; + } + + /* Alloc, write, mmap */ + log->cur_idx = g_malloc (sizeof (struct rspamd_index_block)); + bzero (log->cur_idx, sizeof (struct rspamd_index_block)); + if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { + g_free (log->cur_idx); + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + unlock_file (log->fd, FALSE); + return FALSE; + } + + unlock_file (log->fd, FALSE); + + return TRUE; +} + +static gboolean +binlog_check_file (struct rspamd_binlog *log) +{ + static gchar valid_magic[] = VALID_MAGIC, valid_version[] = VALID_VERSION; + + if (read (log->fd, &log->header, sizeof (struct rspamd_binlog_header)) != sizeof (struct rspamd_binlog_header)) { + msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + /* Now check all fields */ + if (memcmp (&log->header.magic, valid_magic, sizeof (valid_magic)) != 0 || + memcmp (&log->header.version, valid_version, sizeof (valid_version)) != 0) { + msg_warn ("cannot validate file %s"); + return FALSE; + } + /* Now mmap metaindex and current index */ + if (log->metaindex == NULL) { + log->metaindex = g_malloc (sizeof (struct rspamd_binlog_metaindex)); + } + if ((read (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex))) != sizeof (struct rspamd_binlog_metaindex)) { + msg_warn ("cannot read metaindex of file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + /* Current index */ + if (log->cur_idx == NULL) { + log->cur_idx = g_malloc (sizeof (struct rspamd_index_block)); + } + if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) { + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if ((read (log->fd, log->cur_idx, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) { + msg_warn ("cannot read index in file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + log->cur_seq = log->metaindex->last_index * BINLOG_IDX_LEN + log->cur_idx->last_index; + log->cur_time = log->cur_idx->indexes[log->cur_idx->last_index].time; + + return TRUE; + +} + +static gboolean +binlog_create (struct rspamd_binlog *log) +{ + if ((log->fd = open (log->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + return binlog_write_header (log); +} + +static gboolean +binlog_open_real (struct rspamd_binlog *log) +{ + if ((log->fd = open (log->filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + + return binlog_check_file (log); +} + + +struct rspamd_binlog* +binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter) +{ + struct rspamd_binlog *new; + gint len = strlen (path); + struct stat st; + + new = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_binlog)); + new->pool = pool; + new->rotate_time = rotate_time; + new->fd = -1; + + if (rotate_time) { + new->rotate_jitter = g_random_int_range (0, rotate_jitter); + } + + new->filename = rspamd_mempool_alloc (pool, len + sizeof (BINLOG_SUFFIX)); + rspamd_strlcpy (new->filename, path, len + 1); + rspamd_strlcpy (new->filename + len, BINLOG_SUFFIX, sizeof (BINLOG_SUFFIX)); + + if (stat (new->filename, &st) == -1) { + /* Check errno to check whether we should create this file */ + if (errno != ENOENT) { + msg_err ("cannot stat file: %s, error %s", new->filename, strerror (errno)); + return NULL; + } + else { + /* In case of ENOENT try to create binlog */ + if (!binlog_create (new)) { + return NULL; + } + } + } + else { + /* Try to open binlog */ + if (!binlog_open_real (new)) { + return NULL; + } + } + + return new; +} + +void +binlog_close (struct rspamd_binlog *log) +{ + if (log) { + if (log->metaindex) { + g_free (log->metaindex); + } + if (log->cur_idx) { + g_free (log->cur_idx); + } + close (log->fd); + } +} + +static gboolean +binlog_tree_callback (gpointer key, gpointer value, gpointer data) +{ + token_node_t *node = key; + struct rspamd_binlog *log = data; + struct rspamd_binlog_element elt; + + elt.h1 = node->h1; + elt.h2 = node->h2; + elt.value = node->value; + + if (write (log->fd, &elt, sizeof (elt)) == -1) { + msg_info ("cannot write token to file: %s, error: %s", log->filename, strerror (errno)); + return TRUE; + } + + return FALSE; +} + +static gboolean +write_binlog_tree (struct rspamd_binlog *log, GTree *nodes) +{ + off_t seek; + struct rspamd_binlog_index *idx; + + lock_file (log->fd, FALSE); + log->cur_seq ++; + + /* Seek to end of file */ + if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + + /* Now write all nodes to file */ + g_tree_foreach (nodes, binlog_tree_callback, (gpointer)log); + + /* Write index */ + idx = &log->cur_idx->indexes[log->cur_idx->last_index]; + idx->seek = seek; + idx->time = (guint64)time (NULL); + log->cur_time = idx->time; + idx->len = g_tree_nnodes (nodes) * sizeof (struct rspamd_binlog_element); + if (lseek (log->fd, log->metaindex->indexes[log->metaindex->last_index], SEEK_SET) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s, seek: %L, op: insert index", log->filename, + strerror (errno), log->metaindex->indexes[log->metaindex->last_index]); + return FALSE; + } + log->cur_idx->last_index ++; + if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot write index to file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + + unlock_file (log->fd, FALSE); + + return TRUE; +} + +static gboolean +create_new_metaindex_block (struct rspamd_binlog *log) +{ + off_t seek; + + lock_file (log->fd, FALSE); + + log->metaindex->last_index ++; + /* Seek to end of file */ + if ((seek = lseek (log->fd, 0, SEEK_END)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if (write (log->fd, log->cur_idx, sizeof (struct rspamd_index_block)) == -1) { + unlock_file (log->fd, FALSE); + g_free (log->cur_idx); + msg_warn ("cannot write file %s, error %d, %s", log->filename, errno, strerror (errno)); + return FALSE; + } + /* Offset to metaindex */ + log->metaindex->indexes[log->metaindex->last_index] = seek; + /* Overwrite all metaindexes */ + if (lseek (log->fd, sizeof (struct rspamd_binlog_header), SEEK_SET) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if (write (log->fd, log->metaindex, sizeof (struct rspamd_binlog_metaindex)) == -1) { + unlock_file (log->fd, FALSE); + msg_info ("cannot write metaindex in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + bzero (log->cur_idx, sizeof (struct rspamd_index_block)); + unlock_file (log->fd, FALSE); + + return TRUE; +} + +static gboolean +maybe_rotate_binlog (struct rspamd_binlog *log) +{ + guint64 now = time (NULL); + + if (log->rotate_time && ((now - log->header.create_time) > (guint)(log->rotate_time + log->rotate_jitter))) { + return TRUE; + } + return FALSE; +} + +static gboolean +rotate_binlog (struct rspamd_binlog *log) +{ + gchar *backup_name; + struct stat st; + + lock_file (log->fd, FALSE); + + /* Unmap mapped fragments */ + if (log->metaindex) { + g_free (log->metaindex); + log->metaindex = NULL; + } + if (log->cur_idx) { + g_free (log->cur_idx); + log->cur_idx = NULL; + } + /* Format backup name */ + backup_name = g_strdup_printf ("%s.%s", log->filename, BACKUP_SUFFIX); + + if (stat (backup_name, &st) != -1) { + msg_info ("replace old %s", backup_name); + unlink (backup_name); + } + + rename (log->filename, backup_name); + g_free (backup_name); + + /* XXX: maybe race condition here */ + unlock_file (log->fd, FALSE); + close (log->fd); + + return binlog_create (log); + +} + +gboolean +binlog_insert (struct rspamd_binlog *log, GTree *nodes) +{ + off_t seek; + + if (!log || !log->metaindex || !log->cur_idx || !nodes) { + msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown"); + return FALSE; + } + + if (maybe_rotate_binlog (log)) { + if (!rotate_binlog (log)) { + return FALSE; + } + } + /* First of all try to place new tokens in current index */ + if (log->cur_idx->last_index < BINLOG_IDX_LEN) { + /* All is ok */ + return write_binlog_tree (log, nodes); + } + /* Current index table is all busy, try to allocate new index */ + + /* Check metaindex free space */ + if (log->metaindex->last_index < METAINDEX_LEN) { + /* Create new index block */ + if ((seek = lseek (log->fd, 0, SEEK_END)) == (off_t)-1) { + msg_info ("cannot seek in file: %s, error: %s", log->filename, strerror (errno)); + return FALSE; + } + if (!create_new_metaindex_block (log)) { + return FALSE; + } + return write_binlog_tree (log, nodes); + } + + /* All binlog is filled, we need to rotate it forcefully */ + if (!rotate_binlog (log)) { + return FALSE; + } + + return write_binlog_tree (log, nodes); +} + +gboolean +binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep) +{ + guint32 metaindex_num; + struct rspamd_index_block *idxb; + struct rspamd_binlog_index *idx; + gboolean idx_mapped = FALSE, res = TRUE, is_first = FALSE; + + if (!log || !log->metaindex || !log->cur_idx) { + msg_info ("improperly opened binlog: %s", log != NULL ? log->filename : "unknown"); + return FALSE; + } + + if (*rep == NULL) { + *rep = g_malloc (sizeof (GByteArray)); + is_first = TRUE; + } + else { + /* Unmap old fragment */ + g_free ((*rep)->data); + } + + if (from_rev == log->cur_seq) { + /* Last record */ + *rep = NULL; + return FALSE; + } + else if (from_rev > log->cur_seq) { + /* Slave has more actual copy, write this to log and abort sync */ + msg_warn ("slave has more recent revision of statfile %s: %uL and our is: %uL", log->filename, from_rev, log->cur_seq); + *rep = NULL; + *from_time = 0; + return FALSE; + } + + metaindex_num = from_rev / BINLOG_IDX_LEN; + /* First of all try to find this revision */ + if (metaindex_num > log->metaindex->last_index) { + return FALSE; + } + else if (metaindex_num != log->metaindex->last_index) { + /* Need to remap index block */ + lock_file (log->fd, FALSE); + idxb = g_malloc (sizeof (struct rspamd_index_block)); + idx_mapped = TRUE; + if (lseek (log->fd, log->metaindex->indexes[metaindex_num], SEEK_SET) == -1) { + unlock_file (log->fd, FALSE); + msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + if ((read (log->fd, idxb, sizeof (struct rspamd_index_block))) != sizeof (struct rspamd_index_block)) { + unlock_file (log->fd, FALSE); + msg_warn ("cannot read index from file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + unlock_file (log->fd, FALSE); + } + else { + idxb = log->cur_idx; + } + /* Now check specified index */ + idx = &idxb->indexes[from_rev % BINLOG_IDX_LEN]; + if (is_first && idx->time != *from_time) { + res = FALSE; + *from_time = 0; + goto end; + } + else { + *from_time = idx->time; + } + + /* Now fill reply structure */ + (*rep)->len = idx->len; + /* Read result */ + msg_info ("update from binlog '%s' from revision: %uL to revision %uL size is %uL", + log->filename, from_rev, log->cur_seq, idx->len); + if (lseek (log->fd, idx->seek, SEEK_SET) == -1) { + msg_warn ("cannot seek file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + + (*rep)->data = g_malloc (idx->len); + if ((read (log->fd, (*rep)->data, idx->len)) != (ssize_t)idx->len) { + msg_warn ("cannot read file %s, error %d, %s", log->filename, errno, strerror (errno)); + res = FALSE; + goto end; + } + +end: + if (idx_mapped) { + g_free (idxb); + } + + return res; +} + +static gboolean +maybe_init_static (void) +{ + if (!binlog_opened) { + binlog_opened = g_hash_table_new (g_direct_hash, g_direct_equal); + if (!binlog_opened) { + return FALSE; + } + } + + if (!binlog_pool) { + binlog_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + if (!binlog_pool) { + return FALSE; + } + } + + return TRUE; +} + +gboolean +maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes) +{ + struct rspamd_binlog *log; + + if (ccf == NULL) { + return FALSE; + } + + + if (st == NULL || nodes == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) { + return FALSE; + } + + if (!maybe_init_static ()) { + return FALSE; + } + + if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) { + if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) { + g_hash_table_insert (binlog_opened, st, log); + } + else { + return FALSE; + } + } + + if (binlog_insert (log, nodes)) { + msg_info ("set new revision of statfile %s: %uL", st->symbol, log->cur_seq); + (void)statfile_set_revision (file, log->cur_seq, log->cur_time); + return TRUE; + } + + return FALSE; +} + +struct rspamd_binlog* +get_binlog_by_statfile (struct statfile *st) +{ + struct rspamd_binlog *log; + + if (st == NULL || st->binlog == NULL || st->binlog->affinity != AFFINITY_MASTER) { + return NULL; + } + + if (!maybe_init_static ()) { + return NULL; + } + + if ((log = g_hash_table_lookup (binlog_opened, st)) == NULL) { + if ((log = binlog_open (binlog_pool, st->path, st->binlog->rotate_time, st->binlog->rotate_time / 2)) != NULL) { + g_hash_table_insert (binlog_opened, st, log); + } + else { + return NULL; + } + } + + return log; +} diff --git a/src/libserver/binlog.h b/src/libserver/binlog.h new file mode 100644 index 000000000..9e1a786d3 --- /dev/null +++ b/src/libserver/binlog.h @@ -0,0 +1,93 @@ +#ifndef RSPAMD_BINLOG_H +#define RSPAMD_BINLOG_H + +#include "config.h" +#include "main.h" +#include "statfile.h" + +/* How much records are in a single index */ +#define BINLOG_IDX_LEN 200 +#define METAINDEX_LEN 1024 + +/* Assume 8 bytes words */ +struct rspamd_binlog_header { + gchar magic[3]; + gchar version[2]; + gchar padding[3]; + guint64 create_time; +}; + +struct rspamd_binlog_index { + guint64 time; + guint64 seek; + guint32 len; +}; + +struct rspamd_index_block { + struct rspamd_binlog_index indexes[BINLOG_IDX_LEN]; + guint32 last_index; +}; + +struct rspamd_binlog_metaindex { + guint64 indexes[METAINDEX_LEN]; + guint64 last_index; +}; + +struct rspamd_binlog_element { + guint32 h1; + guint32 h2; + float value; +} __attribute__((__packed__)); + +struct rspamd_binlog { + gchar *filename; + time_t rotate_time; + gint rotate_jitter; + guint64 cur_seq; + guint64 cur_time; + gint fd; + rspamd_mempool_t *pool; + + struct rspamd_binlog_header header; + struct rspamd_binlog_metaindex *metaindex; + struct rspamd_index_block *cur_idx; +}; + +struct classifier_config; + +/* + * Open binlog at specified path with specified rotate params + */ +struct rspamd_binlog* binlog_open (rspamd_mempool_t *pool, const gchar *path, time_t rotate_time, gint rotate_jitter); + +/* + * Get and open binlog for specified statfile + */ +struct rspamd_binlog* get_binlog_by_statfile (struct statfile *st); + +/* + * Close binlog + */ +void binlog_close (struct rspamd_binlog *log); + +/* + * Insert new nodes inside binlog + */ +gboolean binlog_insert (struct rspamd_binlog *log, GTree *nodes); + +/* + * Sync binlog from specified revision + * @param log binlog structure + * @param from_rev from revision + * @param from_time from time + * @param rep a portion of changes for revision is stored here + * @return TRUE if there are more revisions to get and FALSE if synchronization is complete + */ +gboolean binlog_sync (struct rspamd_binlog *log, guint64 from_rev, guint64 *from_time, GByteArray **rep); + +/* + * Conditional write to a binlog for specified statfile + */ +gboolean maybe_write_binlog (struct classifier_config *ccf, struct statfile *st, stat_file_t *file, GTree *nodes); + +#endif diff --git a/src/libserver/buffer.c b/src/libserver/buffer.c new file mode 100644 index 000000000..864f2fad6 --- /dev/null +++ b/src/libserver/buffer.c @@ -0,0 +1,786 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "buffer.h" +#include "main.h" +#ifdef HAVE_SYS_SENDFILE_H +#include <sys/sendfile.h> +#endif + +#define G_DISPATCHER_ERROR dispatcher_error_quark() +#define debug_ip(...) rspamd_conditional_debug(rspamd_main->logger, NULL, __FUNCTION__, __VA_ARGS__) + +static void dispatcher_cb (gint fd, short what, void *arg); + +static inline GQuark +dispatcher_error_quark (void) +{ + return g_quark_from_static_string ("g-dispatcher-error-quark"); +} + +static gboolean +sendfile_callback (rspamd_io_dispatcher_t *d) +{ + + GError *err; + +#ifdef HAVE_SENDFILE +# if defined(FREEBSD) || defined(DARWIN) + off_t off = 0; + #if defined(FREEBSD) + /* FreeBSD version */ + if (sendfile (d->sendfile_fd, d->fd, d->offset, 0, NULL, &off, 0) != 0) { + #elif defined(DARWIN) + /* Darwin version */ + if (sendfile (d->sendfile_fd, d->fd, d->offset, &off, NULL, 0) != 0) { + #endif + if (errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else { + debug_ip("partially write data, retry"); + /* Wait for other event */ + d->offset += off; + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + } + else { + if (d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + event_del (d->ev); + event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->in_sendfile = FALSE; + } +# else + ssize_t r; + /* Linux version */ + r = sendfile (d->fd, d->sendfile_fd, &d->offset, d->file_size); + if (r == -1) { + if (errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else { + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + } + else if (r + d->offset < (ssize_t)d->file_size) { + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + else { + if (d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + event_del (d->ev); + event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->in_sendfile = FALSE; + } +# endif +#else + ssize_t r; + r = write (d->fd, d->map, d->file_size - d->offset); + if (r == -1) { + if (errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else { + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + } + else if (r + d->offset < d->file_size) { + d->offset += r; + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + else { + if (d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + event_del (d->ev); + event_set (d->ev, d->fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->in_sendfile = FALSE; + } +#endif + return TRUE; +} + +#define BUFREMAIN(x) (x)->data->size - ((x)->pos - (x)->data->begin) + +#define APPEND_OUT_BUFFER(d, buf) do { \ + DL_APPEND((d)->out_buffers.buffers, buf); \ + (d)->out_buffers.pending ++; \ + } while (0) +#define DELETE_OUT_BUFFER(d, buf) do { \ + DL_DELETE((d)->out_buffers.buffers, (buf)); \ + g_string_free((buf->data), (buf)->allocated); \ + g_slice_free1(sizeof (struct rspamd_out_buffer_s), (buf)); \ + (d)->out_buffers.pending --; \ + } while (0) + +static gboolean +write_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean is_delayed) +{ + GError *err = NULL; + struct rspamd_out_buffer_s *cur = NULL, *tmp; + ssize_t r; + struct iovec *iov; + guint i, len; + + len = d->out_buffers.pending; + while (len > 0) { + /* Unset delayed as actually we HAVE buffers to write */ + is_delayed = TRUE; + iov = g_slice_alloc (len * sizeof (struct iovec)); + i = 0; + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + iov[i].iov_base = cur->data->str; + iov[i].iov_len = cur->data->len; + i ++; + } + /* Now try to write the whole vector */ + r = writev (fd, iov, len); + if (r == -1 && errno != EAGAIN) { + g_slice_free1 (len * sizeof (struct iovec), iov); + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else if (r > 0) { + /* Find pos inside buffers */ + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + if (r >= (ssize_t)cur->data->len) { + /* Mark this buffer as read */ + r -= cur->data->len; + DELETE_OUT_BUFFER (d, cur); + } + else { + /* This buffer was not written completely */ + g_string_erase (cur->data, 0, r); + break; + } + } + g_slice_free1 (len * sizeof (struct iovec), iov); + if (d->out_buffers.pending > 0) { + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + return TRUE; + } + } + else if (r == 0) { + /* Got EOF while we wait for data */ + g_slice_free1 (len * sizeof (struct iovec), iov); + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return FALSE; + } + } + else if (r == -1 && errno == EAGAIN) { + g_slice_free1 (len * sizeof (struct iovec), iov); + debug_ip("partially write data, retry"); + /* Wait for other event */ + event_del (d->ev); + event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + return TRUE; + } + len = d->out_buffers.pending; + } + + if (d->out_buffers.pending == 0) { + /* Disable write event for this time */ + + debug_ip ("all buffers were written successfully"); + + if (is_delayed && d->write_callback) { + if (!d->write_callback (d->user_data)) { + debug_ip("callback set wanna_die flag, terminating"); + return FALSE; + } + } + + event_del (d->ev); + event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + else { + /* Plan other write event */ + event_del (d->ev); + event_set (d->ev, fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + } + + return TRUE; +} + +static void +read_buffers (gint fd, rspamd_io_dispatcher_t * d, gboolean skip_read) +{ + ssize_t r; + GError *err = NULL; + f_str_t res; + gchar *c, *b; + gchar *end; + size_t len; + enum io_policy saved_policy; + + if (d->wanna_die) { + rspamd_remove_dispatcher (d); + return; + } + + if (d->in_buf == NULL) { + d->in_buf = rspamd_mempool_alloc_tmp (d->pool, sizeof (rspamd_buffer_t)); + if (d->policy == BUFFER_LINE || d->policy == BUFFER_ANY) { + d->in_buf->data = fstralloc_tmp (d->pool, d->default_buf_size); + } + else { + d->in_buf->data = fstralloc_tmp (d->pool, d->nchars + 1); + } + d->in_buf->pos = d->in_buf->data->begin; + } + + end = d->in_buf->pos; + len = d->in_buf->data->len; + + if (BUFREMAIN (d->in_buf) == 0) { + /* Buffer is full, try to call callback with overflow error */ + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, E2BIG, "buffer overflow"); + d->err_callback (err, d->user_data); + return; + } + } + else if (!skip_read) { + /* Try to read the whole buffer */ + r = read (fd, end, BUFREMAIN (d->in_buf)); + if (r == -1 && errno != EAGAIN) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, errno, "%s", strerror (errno)); + d->err_callback (err, d->user_data); + return; + } + } + else if (r == 0) { + /* Got EOF while we wait for data */ +#if 0 + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return; + } +#endif + /* Read returned 0, it may be shutdown or full quit */ + if (!d->want_read) { + d->half_closed = TRUE; + /* Do not expect any read after this */ + event_del (d->ev); + } + else { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return; + } + } + } + else if (r == -1 && errno == EAGAIN) { + debug_ip("partially read data, retry"); + return; + } + else { + /* Set current position in buffer */ + d->in_buf->pos += r; + d->in_buf->data->len += r; + } + debug_ip("read %z characters, policy is %s, watermark is: %z, buffer has %z bytes", r, + d->policy == BUFFER_LINE ? "LINE" : "CHARACTER", d->nchars, d->in_buf->data->len); + } + + saved_policy = d->policy; + c = d->in_buf->data->begin; + end = d->in_buf->pos; + len = d->in_buf->data->len; + b = c; + r = 0; + + switch (d->policy) { + case BUFFER_LINE: + /** Variables: + * b - begin of line + * r - current position in buffer + * *len - length of remaining buffer + * c - pointer to current position (buffer->begin + r) + * res - result string + */ + while (r < (ssize_t)len) { + if (*c == '\n') { + res.begin = b; + res.len = c - b; + /* Strip EOL */ + if (d->strip_eol) { + if (r != 0 && *(c - 1) == '\r') { + res.len--; + } + } + else { + /* Include EOL in reply */ + res.len ++; + } + /* Call callback for a line */ + if (d->read_callback) { + if (!d->read_callback (&res, d->user_data)) { + return; + } + if (d->policy != saved_policy) { + /* Drain buffer as policy is changed */ + /* Note that d->in_buffer is other pointer now, so we need to reinit all pointers */ + /* First detect how much symbols do we have */ + if (end == c) { + /* In fact we read the whole buffer and change input policy, so just set current pos to begin of buffer */ + d->in_buf->pos = d->in_buf->data->begin; + d->in_buf->data->len = 0; + } + else { + /* Otherwise we need to move buffer */ + /* Reinit pointers */ + len = d->in_buf->data->len - r - 1; + end = d->in_buf->data->begin + r + 1; + memmove (d->in_buf->data->begin, end, len); + d->in_buf->data->len = len; + d->in_buf->pos = d->in_buf->data->begin + len; + /* Process remaining buffer */ + read_buffers (fd, d, TRUE); + } + return; + } + } + /* Set new begin of line */ + b = c + 1; + } + r++; + c++; + } + /* Now drain remaining characters in buffer */ + memmove (d->in_buf->data->begin, b, c - b); + d->in_buf->data->len = c - b; + d->in_buf->pos = d->in_buf->data->begin + (c - b); + break; + case BUFFER_CHARACTER: + r = d->nchars; + if ((ssize_t)len >= r) { + res.begin = b; + res.len = r; + c = b + r; + if (d->read_callback) { + if (!d->read_callback (&res, d->user_data)) { + return; + } + /* Move remaining string to begin of buffer (draining) */ + if ((ssize_t)len > r) { + len -= r; + memmove (d->in_buf->data->begin, c, len); + d->in_buf->data->len = len; + d->in_buf->pos = d->in_buf->data->begin + len; + b = d->in_buf->data->begin; + } + else { + d->in_buf->data->len = 0; + d->in_buf->pos = d->in_buf->data->begin; + } + if (d->policy != saved_policy && (ssize_t)len != r) { + debug_ip("policy changed during callback, restart buffer's processing"); + read_buffers (fd, d, TRUE); + return; + } + } + } + break; + case BUFFER_ANY: + res.begin = d->in_buf->data->begin; + res.len = len; + + if (d->read_callback) { + /* + * Actually we do not want to send zero sized + * buffers to a read callback + */ + if (! (d->want_read && res.len == 0)) { + if (!d->read_callback (&res, d->user_data)) { + return; + } + } + if (d->policy != saved_policy) { + debug_ip("policy changed during callback, restart buffer's processing"); + read_buffers (fd, d, TRUE); + return; + } + } + d->in_buf->pos = d->in_buf->data->begin; + d->in_buf->data->len = 0; + break; + } +} + +#undef BUFREMAIN + +static void +dispatcher_cb (gint fd, short what, void *arg) +{ + rspamd_io_dispatcher_t *d = (rspamd_io_dispatcher_t *) arg; + GError *err = NULL; + + debug_ip("in dispatcher callback, what: %d, fd: %d", (gint)what, fd); + + if ((what & EV_TIMEOUT) != 0) { + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, ETIMEDOUT, "IO timeout"); + d->err_callback (err, d->user_data); + } + } + else if ((what & EV_READ) != 0) { + read_buffers (fd, d, FALSE); + } + else if ((what & EV_WRITE) != 0) { + /* No data to write, disable further EV_WRITE to this fd */ + if (d->in_sendfile) { + sendfile_callback (d); + } + else { + if (d->out_buffers.pending == 0) { + if (d->half_closed && !d->is_restored) { + /* Socket is half closed and there is nothing more to write, closing connection */ + if (d->err_callback) { + err = g_error_new (G_DISPATCHER_ERROR, EOF, "got EOF"); + d->err_callback (err, d->user_data); + return; + } + } + else { + /* Want read again */ + event_del (d->ev); + event_set (d->ev, fd, EV_READ | EV_PERSIST, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + if (d->is_restored && d->write_callback) { + if (!d->write_callback (d->user_data)) { + return; + } + d->is_restored = FALSE; + } + } + } + else { + /* Delayed write */ + write_buffers (fd, d, TRUE); + } + } + } +} + + +rspamd_io_dispatcher_t * +rspamd_create_dispatcher (struct event_base *base, gint fd, enum io_policy policy, + dispatcher_read_callback_t read_cb, dispatcher_write_callback_t write_cb, dispatcher_err_callback_t err_cb, struct timeval *tv, void *user_data) +{ + rspamd_io_dispatcher_t *new; + + if (fd == -1) { + return NULL; + } + + new = g_slice_alloc0 (sizeof (rspamd_io_dispatcher_t)); + + new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + if (tv != NULL) { + new->tv = rspamd_mempool_alloc (new->pool, sizeof (struct timeval)); + memcpy (new->tv, tv, sizeof (struct timeval)); + } + else { + new->tv = NULL; + } + new->nchars = 0; + new->in_sendfile = FALSE; + new->policy = policy; + new->read_callback = read_cb; + new->write_callback = write_cb; + new->err_callback = err_cb; + new->user_data = user_data; + new->strip_eol = TRUE; + new->half_closed = FALSE; + new->want_read = TRUE; + new->is_restored = FALSE; + new->default_buf_size = sysconf (_SC_PAGESIZE); + + new->ev = rspamd_mempool_alloc0 (new->pool, sizeof (struct event)); + new->fd = fd; + new->ev_base = base; + + event_set (new->ev, fd, EV_WRITE, dispatcher_cb, (void *)new); + event_base_set (new->ev_base, new->ev); + event_add (new->ev, new->tv); + + return new; +} + +void +rspamd_remove_dispatcher (rspamd_io_dispatcher_t * d) +{ + struct rspamd_out_buffer_s *cur, *tmp; + + if (d != NULL) { + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + DELETE_OUT_BUFFER (d, cur); + } + event_del (d->ev); + rspamd_mempool_delete (d->pool); + g_slice_free1 (sizeof (rspamd_io_dispatcher_t), d); + } +} + +void +rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t * d, enum io_policy policy, size_t nchars) +{ + f_str_t *tmp; + gint t; + + if (d->policy != policy || nchars != d->nchars) { + d->policy = policy; + d->nchars = nchars ? nchars : d->default_buf_size; + /* Resize input buffer if needed */ + if (policy == BUFFER_CHARACTER && nchars != 0) { + if (d->in_buf && d->in_buf->data->size < nchars) { + tmp = fstralloc_tmp (d->pool, d->nchars + 1); + memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len); + t = d->in_buf->pos - d->in_buf->data->begin; + tmp->len = d->in_buf->data->len; + d->in_buf->data = tmp; + d->in_buf->pos = d->in_buf->data->begin + t; + } + } + else if (policy == BUFFER_LINE || policy == BUFFER_ANY) { + if (d->in_buf && d->nchars < d->default_buf_size) { + tmp = fstralloc_tmp (d->pool, d->default_buf_size); + memcpy (tmp->begin, d->in_buf->data->begin, d->in_buf->data->len); + t = d->in_buf->pos - d->in_buf->data->begin; + tmp->len = d->in_buf->data->len; + d->in_buf->data = tmp; + d->in_buf->pos = d->in_buf->data->begin + t; + } + d->strip_eol = TRUE; + } + } + + debug_ip("new input length watermark is %uz", d->nchars); +} + +gboolean +rspamd_dispatcher_write (rspamd_io_dispatcher_t * d, + const void *data, size_t len, gboolean delayed, gboolean allocated) +{ + struct rspamd_out_buffer_s *newbuf; + + newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s)); + if (len == 0) { + /* Assume NULL terminated */ + len = strlen ((const gchar *)data); + } + + if (!allocated) { + newbuf->data = g_string_new_len (data, len); + newbuf->allocated = TRUE; + } + else { + newbuf->data = g_string_new (NULL); + newbuf->data->str = (gchar *)data; + newbuf->data->len = len; + newbuf->data->allocated_len = len; + newbuf->allocated = FALSE; + } + + APPEND_OUT_BUFFER (d, newbuf); + + if (!delayed) { + debug_ip("plan write event"); + return write_buffers (d->fd, d, FALSE); + } + /* Otherwise plan write event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + + return TRUE; +} + +gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d, + GString *str, + gboolean delayed, + gboolean free_on_write) +{ + struct rspamd_out_buffer_s *newbuf; + + newbuf = g_slice_alloc (sizeof (struct rspamd_out_buffer_s)); + newbuf->data = str; + newbuf->allocated = free_on_write; + + APPEND_OUT_BUFFER (d, newbuf); + + if (!delayed) { + debug_ip("plan write event"); + return write_buffers (d->fd, d, FALSE); + } + /* Otherwise plan write event */ + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, (void *)d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + + return TRUE; +} + +gboolean +rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len) +{ + if (lseek (fd, 0, SEEK_SET) == -1) { + msg_warn ("lseek failed: %s", strerror (errno)); + return FALSE; + } + + d->offset = 0; + d->in_sendfile = TRUE; + d->sendfile_fd = fd; + d->file_size = len; + +#ifndef HAVE_SENDFILE + #ifdef HAVE_MMAP_NOCORE + if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED | MAP_NOCORE, fd, 0)) == MAP_FAILED) { + #else + if ((d->map = mmap (NULL, len, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + #endif + msg_warn ("mmap failed: %s", strerror (errno)); + return FALSE; + } +#endif + + return sendfile_callback (d); +} + +void +rspamd_dispatcher_pause (rspamd_io_dispatcher_t * d) +{ + debug_ip ("paused dispatcher"); + event_del (d->ev); + d->is_restored = FALSE; +} + +void +rspamd_dispatcher_restore (rspamd_io_dispatcher_t * d) +{ + if (!d->is_restored) { + debug_ip ("restored dispatcher"); + event_del (d->ev); + event_set (d->ev, d->fd, EV_WRITE, dispatcher_cb, d); + event_base_set (d->ev_base, d->ev); + event_add (d->ev, d->tv); + d->is_restored = TRUE; + } +} + +void +rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *d) +{ + struct rspamd_out_buffer_s *cur, *tmp; + + DL_FOREACH_SAFE (d->out_buffers.buffers, cur, tmp) { + DELETE_OUT_BUFFER (d, cur); + } + /* Cleanup temporary data */ + rspamd_mempool_cleanup_tmp (d->pool); + d->in_buf = NULL; +} + +#undef debug_ip + +/* + * vi:ts=4 + */ diff --git a/src/libserver/buffer.h b/src/libserver/buffer.h new file mode 100644 index 000000000..5ed42bfb3 --- /dev/null +++ b/src/libserver/buffer.h @@ -0,0 +1,158 @@ +/** + * @file buffer.h + * Implements buffered IO + */ + +#ifndef RSPAMD_BUFFER_H +#define RSPAMD_BUFFER_H + +#include "config.h" +#include "mem_pool.h" +#include "fstring.h" + +typedef gboolean (*dispatcher_read_callback_t)(f_str_t *in, void *user_data); +typedef gboolean (*dispatcher_write_callback_t)(void *user_data); +typedef void (*dispatcher_err_callback_t)(GError *err, void *user_data); + +/** + * Types of IO handling + */ +enum io_policy { + BUFFER_LINE, /**< call handler when we have line ready */ + BUFFER_CHARACTER, /**< call handler when we have some characters */ + BUFFER_ANY /**< call handler whenever we got data in buffer */ +}; + +/** + * Buffer structure + */ +typedef struct rspamd_buffer_s { + f_str_t *data; /**< buffer logic */ + gchar *pos; /**< current position */ +} rspamd_buffer_t; + +struct rspamd_out_buffer_s { + GString *data; + gboolean allocated; + struct rspamd_out_buffer_s *prev, *next; +}; + +typedef struct rspamd_io_dispatcher_s { + rspamd_buffer_t *in_buf; /**< input buffer */ + struct { + guint pending; + struct rspamd_out_buffer_s *buffers; + } out_buffers; /**< output buffers chain */ + struct timeval *tv; /**< io timeout */ + struct event *ev; /**< libevent io event */ + rspamd_mempool_t *pool; /**< where to store data */ + enum io_policy policy; /**< IO policy */ + size_t nchars; /**< how many chars to read */ + gint fd; /**< descriptor */ + guint32 peer_addr; /**< address of peer for debugging */ + gboolean wanna_die; /**< if dispatcher should be stopped */ + dispatcher_read_callback_t read_callback; /**< read callback */ + dispatcher_write_callback_t write_callback; /**< write callback */ + dispatcher_err_callback_t err_callback; /**< error callback */ + void *user_data; /**< user's data for callbacks */ + gulong default_buf_size; /**< default size for buffering */ + off_t offset; /**< for sendfile use */ + size_t file_size; + gint sendfile_fd; + gboolean in_sendfile; /**< whether buffer is in sendfile mode */ + gboolean strip_eol; /**< strip or not line ends in BUFFER_LINE policy */ + gboolean is_restored; /**< call a callback when dispatcher is restored */ + gboolean half_closed; /**< connection is half closed */ + gboolean want_read; /**< whether we want to read more data */ + struct event_base *ev_base; /**< event base for io operations */ +#ifndef HAVE_SENDFILE + void *map; +#endif +} rspamd_io_dispatcher_t; + +/** + * Creates rspamd IO dispatcher for specified descriptor + * @param fd descriptor to IO + * @param policy IO policy + * @param read_cb read callback handler + * @param write_cb write callback handler + * @param err_cb error callback handler + * @param tv IO timeout + * @param user_data pointer to user's data + * @return new dispatcher object or NULL in case of failure + */ +rspamd_io_dispatcher_t* rspamd_create_dispatcher (struct event_base *base, gint fd, + enum io_policy policy, + dispatcher_read_callback_t read_cb, + dispatcher_write_callback_t write_cb, + dispatcher_err_callback_t err_cb, + struct timeval *tv, + void *user_data); + +/** + * Set new policy for dispatcher + * @param d pointer to dispatcher's object + * @param policy IO policy + * @param nchars number of characters in buffer for character policy + */ +void rspamd_set_dispatcher_policy (rspamd_io_dispatcher_t *d, + enum io_policy policy, + size_t nchars); + +/** + * Write data when it would be possible + * @param d pointer to dispatcher's object + * @param data data to write + * @param len length of data + */ +gboolean rspamd_dispatcher_write (rspamd_io_dispatcher_t *d, + const void *data, + size_t len, gboolean delayed, + gboolean allocated) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Write a GString to dispatcher + * @param d dipatcher object + * @param str string to write + * @param delayed delay write + * @param free_on_write free string after writing to a socket + * @return TRUE if write has been queued successfully + */ +gboolean rspamd_dispatcher_write_string (rspamd_io_dispatcher_t *d, + GString *str, + gboolean delayed, + gboolean free_on_write) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Send specified descriptor to dispatcher + * @param d pointer to dispatcher's object + * @param fd descriptor of file + * @param len length of data + */ +gboolean rspamd_dispatcher_sendfile (rspamd_io_dispatcher_t *d, gint fd, size_t len) G_GNUC_WARN_UNUSED_RESULT; + +/** + * Pause IO events on dispatcher + * @param d pointer to dispatcher's object + */ +void rspamd_dispatcher_pause (rspamd_io_dispatcher_t *d); + +/** + * Restore IO events on dispatcher + * @param d pointer to dispatcher's object + */ +void rspamd_dispatcher_restore (rspamd_io_dispatcher_t *d); + +/** + * Frees dispatcher object + * @param dispatcher pointer to dispatcher's object + */ +void rspamd_remove_dispatcher (rspamd_io_dispatcher_t *dispatcher); + +/** + * Cleanup dispatcher freeing all temporary data + * @param dispatcher pointer to dispatcher's object + */ +void rspamd_dispacther_cleanup (rspamd_io_dispatcher_t *dispatcher); + +#endif diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h new file mode 100644 index 000000000..6ecb441fd --- /dev/null +++ b/src/libserver/cfg_file.h @@ -0,0 +1,516 @@ +/** + * @file cfg_file.h + * Config file parser and config routines API + */ + +#ifndef CFG_FILE_H +#define CFG_FILE_H + +#include "config.h" +#include "mem_pool.h" +#include "upstream.h" +#include "memcached.h" +#include "symbols_cache.h" +#include "cfg_rcl.h" +#include "utlist.h" +#include "ucl.h" + +#define DEFAULT_BIND_PORT 11333 +#define DEFAULT_CONTROL_PORT 11334 +#define MAX_MEMCACHED_SERVERS 4 +#define DEFAULT_MEMCACHED_PORT 11211 +/* Memcached timeouts */ +#define DEFAULT_MEMCACHED_CONNECT_TIMEOUT 1000 +/* Upstream timeouts */ +#define DEFAULT_UPSTREAM_ERROR_TIME 10 +#define DEFAULT_UPSTREAM_ERROR_TIME 10 +#define DEFAULT_UPSTREAM_DEAD_TIME 300 +#define DEFAULT_UPSTREAM_MAXERRORS 10 + +struct expression; +struct tokenizer; +struct classifier; + +enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE }; + +/** + * Type of time configuration parameter + */ +enum time_type { + TIME_SECONDS = 0, + TIME_MILLISECONDS, + TIME_MINUTES, + TIME_HOURS +}; +/** + * Types of rspamd bind lines + */ +enum rspamd_cred_type { + CRED_NORMAL, + CRED_CONTROL, + CRED_LMTP, + CRED_DELIVERY +}; + +/** + * Regexp type: /H - header, /M - mime, /U - url /X - raw header + */ +enum rspamd_regexp_type { + REGEXP_NONE = 0, + REGEXP_HEADER, + REGEXP_MIME, + REGEXP_MESSAGE, + REGEXP_URL, + REGEXP_RAW_HEADER +}; + +/** + * Logging type + */ +enum rspamd_log_type { + RSPAMD_LOG_CONSOLE, + RSPAMD_LOG_SYSLOG, + RSPAMD_LOG_FILE +}; + +/** + * Regexp structure + */ +struct rspamd_regexp { + enum rspamd_regexp_type type; /**< regexp type */ + gchar *regexp_text; /**< regexp text representation */ + GRegex *regexp; /**< glib regexp structure */ + GRegex *raw_regexp; /**< glib regexp structure for raw matching */ + gchar *header; /**< header name for header regexps */ + gboolean is_test; /**< true if this expression must be tested */ + gboolean is_raw; /**< true if this regexp is done by raw matching */ + gboolean is_strong; /**< true if headers search must be case sensitive */ +}; + +/** + * Memcached server object + */ +struct memcached_server { + struct upstream up; /**< common upstream base */ + struct in_addr addr; /**< address of server */ + guint16 port; /**< port to connect */ + short alive; /**< is this server alive */ + gint16 num; /**< number of servers in case of mirror */ +}; + +/** + * script module list item + */ +struct script_module { + gchar *name; /**< name of module */ + gchar *path; /**< path to module */ +}; + +/** + * Type of lua variable + */ +enum lua_var_type { + LUA_VAR_NUM, + LUA_VAR_BOOLEAN, + LUA_VAR_STRING, + LUA_VAR_FUNCTION, + LUA_VAR_UNKNOWN +}; +/** + * Module option + */ +struct module_opt { + gchar *param; /**< parameter name */ + gchar *value; /**< parameter value */ + gchar *description; /**< parameter description */ + gchar *group; /**< parameter group */ + gpointer actual_data; /**< parsed data */ + gboolean is_lua; /**< actually this is lua variable */ + enum lua_var_type lua_type; /**< type of lua variable */ +}; + +struct module_meta_opt { + gchar *name; /**< Name of meta option */ + GList *options; /**< List of struct module_opt */ +}; + +/** + * Symbol definition + */ +struct symbol_def { + gchar *name; + gchar *description; + gdouble *weight_ptr; +}; + +/** + * Symbols group + */ +struct symbols_group { + gchar *name; + GList *symbols; +}; + +/** + * Statfile section definition + */ +struct statfile_section { + guint32 code; /**< section's code */ + guint64 size; /**< size of section */ + double weight; /**< weight coefficient for section */ +}; + +/** + * Statfile autolearn parameters + */ +struct statfile_autolearn_params { + const gchar *metric; /**< metric name for autolearn triggering */ + double threshold_min; /**< threshold mark */ + double threshold_max; /**< threshold mark */ + GList *symbols; /**< list of symbols */ +}; + +/** + * Sync affinity + */ +enum sync_affinity { + AFFINITY_NONE = 0, + AFFINITY_MASTER, + AFFINITY_SLAVE +}; + +/** + * Binlog params + */ +struct statfile_binlog_params { + enum sync_affinity affinity; + time_t rotate_time; + gchar *master_addr; + guint16 master_port; +}; + +typedef double (*statfile_normalize_func)(struct config_file *cfg, long double score, void *params); + +/** + * Statfile config definition + */ +struct statfile { + gchar *symbol; /**< symbol of statfile */ + gchar *path; /**< filesystem pattern (with %r or %f) */ + gchar *label; /**< label of this statfile */ + gsize size; /**< size of statfile */ + GList *sections; /**< list of sections in statfile */ + struct statfile_autolearn_params *autolearn; /**< autolearn params */ + struct statfile_binlog_params *binlog; /**< binlog params */ + statfile_normalize_func normalizer; /**< function that is used as normaliser */ + void *normalizer_data; /**< normalizer function params */ + gchar *normalizer_str; /**< source string (for dump) */ + ucl_object_t *opts; /**< other options */ + gboolean is_spam; /**< spam flag */ +}; + +/** + * Classifier config definition + */ +struct classifier_config { + GList *statfiles; /**< statfiles list */ + GHashTable *labels; /**< statfiles with labels */ + gchar *metric; /**< metric of this classifier */ + struct classifier *classifier; /**< classifier interface */ + struct tokenizer *tokenizer; /**< tokenizer used for classifier */ + GHashTable *opts; /**< other options */ + GList *pre_callbacks; /**< list of callbacks that are called before classification */ + GList *post_callbacks; /**< list of callbacks that are called after classification */ +}; + +struct rspamd_worker_bind_conf { + gchar *bind_host; + guint16 bind_port; + gint ai; + gboolean is_systemd; + struct rspamd_worker_bind_conf *next; +}; + +struct rspamd_worker_param_parser { + rspamd_rcl_handler_t handler; /**< handler function */ + struct rspamd_rcl_struct_parser parser; /**< parser attributes */ + const gchar *name; /**< parameter's name */ + UT_hash_handle hh; /**< hash by name */ +}; + +struct rspamd_worker_cfg_parser { + struct rspamd_worker_param_parser *parsers; /**< parsers hash */ + gint type; /**< workers quark */ + gboolean (*def_obj_parser)(const ucl_object_t *obj, gpointer ud); /**< default object parser */ + gpointer def_ud; + UT_hash_handle hh; /**< hash by type */ +}; + +/** + * Config params for rspamd worker + */ +struct worker_conf { + worker_t *worker; /**< pointer to worker type */ + GQuark type; /**< type of worker */ + struct rspamd_worker_bind_conf *bind_conf; /**< bind configuration */ + guint16 count; /**< number of workers */ + GList *listen_socks; /**< listening sockets desctiptors */ + guint32 rlimit_nofile; /**< max files limit */ + guint32 rlimit_maxcore; /**< maximum core file size */ + GHashTable *params; /**< params for worker */ + GQueue *active_workers; /**< linked list of spawned workers */ + gboolean has_socket; /**< whether we should make listening socket in main process */ + gpointer *ctx; /**< worker's context */ + ucl_object_t *options; /**< other worker's options */ +}; + +/** + * Structure that stores all config data + */ +struct config_file { + gchar *rspamd_user; /**< user to run as */ + gchar *rspamd_group; /**< group to run as */ + rspamd_mempool_t *cfg_pool; /**< memory pool for config */ + gchar *cfg_name; /**< name of config file */ + gchar *pid_file; /**< name of pid file */ + gchar *temp_dir; /**< dir for temp files */ +#ifdef WITH_GPERF_TOOLS + gchar *profile_path; +#endif + + gboolean no_fork; /**< if 1 do not call daemon() */ + gboolean config_test; /**< if TRUE do only config file test */ + gboolean raw_mode; /**< work in raw mode instead of utf one */ + gboolean one_shot_mode; /**< rules add only one symbol */ + gboolean check_text_attachements; /**< check text attachements as text */ + gboolean convert_config; /**< convert config to XML format */ + gboolean strict_protocol_headers; /**< strictly check protocol headers */ + + gsize max_diff; /**< maximum diff size for text parts */ + + enum rspamd_log_type log_type; /**< log type */ + gint log_facility; /**< log facility in case of syslog */ + gint log_level; /**< log level trigger */ + gchar *log_file; /**< path to logfile in case of file logging */ + gboolean log_buffered; /**< whether logging is buffered */ + guint32 log_buf_size; /**< length of log buffer */ + gchar *debug_ip_map; /**< turn on debugging for specified ip addresses */ + gboolean log_urls; /**< whether we should log URLs */ + GList *debug_symbols; /**< symbols to debug */ + gboolean log_color; /**< output colors for console output */ + gboolean log_extended; /**< log extended information */ + + guint32 statfile_sync_interval; /**< synchronization interval */ + guint32 statfile_sync_timeout; /**< synchronization timeout */ + gboolean mlock_statfile_pool; /**< use mlock (2) for locking statfiles */ + + struct memcached_server memcached_servers[MAX_MEMCACHED_SERVERS]; /**< memcached servers */ + gsize memcached_servers_num; /**< number of memcached servers */ + memc_proto_t memcached_protocol; /**< memcached protocol */ + guint memcached_error_time; /**< memcached error time (see upstream documentation) */ + guint memcached_dead_time; /**< memcached dead time */ + guint memcached_maxerrors; /**< maximum number of errors */ + guint memcached_connect_timeout; /**< connection timeout */ + + gboolean delivery_enable; /**< is delivery agent is enabled */ + gchar *deliver_host; /**< host for mail deliviring */ + struct in_addr deliver_addr; /**< its address */ + guint16 deliver_port; /**< port for deliviring */ + guint16 deliver_family; /**< socket family for delivirnig */ + gchar *deliver_agent_path; /**< deliver to pipe instead of socket */ + gboolean deliver_lmtp; /**< use LMTP instead of SMTP */ + + GList *script_modules; /**< linked list of script modules to load */ + + GList *filters; /**< linked list of all filters */ + GList *workers; /**< linked list of all workers params */ + struct rspamd_worker_cfg_parser *wrk_parsers; /**< hash for worker config parsers, indexed by worker quarks */ + gchar *filters_str; /**< string of filters */ + ucl_object_t *rcl_obj; /**< rcl object */ + GHashTable* metrics; /**< hash of metrics indexed by metric name */ + GList* symbols_groups; /**< groups of symbols */ + GList* metrics_list; /**< linked list of metrics */ + GHashTable* metrics_symbols; /**< hash table of metrics indexed by symbol */ + GHashTable* c_modules; /**< hash of c modules indexed by module name */ + GHashTable* composite_symbols; /**< hash of composite symbols indexed by its name */ + GList *classifiers; /**< list of all classifiers defined */ + GList *statfiles; /**< list of all statfiles in config file order */ + GHashTable *classifiers_symbols; /**< hashtable indexed by symbol name of classifiers */ + GHashTable* cfg_params; /**< all cfg params indexed by its name in this structure */ + GList *pre_filters; /**< list of pre-processing lua filters */ + GList *post_filters; /**< list of post-processing lua filters */ + gchar *dynamic_conf; /**< path to dynamic configuration */ + GList *current_dynamic_conf; /**< currently loaded dynamic configuration */ + GHashTable* domain_settings; /**< settings per-domains */ + GHashTable* user_settings; /**< settings per-user */ + gchar* domain_settings_str; /**< string representation of settings */ + gchar* user_settings_str; + gint clock_res; /**< resolution of clock used */ + + GList *maps; /**< maps active */ + rspamd_mempool_t *map_pool; /**< static maps pool */ + gdouble map_timeout; /**< maps watch timeout */ + + struct symbols_cache *cache; /**< symbols cache object */ + gchar *cache_filename; /**< filename of cache file */ + struct metric *default_metric; /**< default metric */ + + gchar* checksum; /**< real checksum of config file */ + gchar* dump_checksum; /**< dump checksum of config file */ + gpointer lua_state; /**< pointer to lua state */ + + gchar* rrd_file; /**< rrd file to store statistics */ + + gchar* history_file; /**< file to save rolling history */ + + gdouble dns_timeout; /**< timeout in milliseconds for waiting for dns reply */ + guint32 dns_retransmits; /**< maximum retransmits count */ + guint32 dns_throttling_errors; /**< maximum errors for starting resolver throttling */ + guint32 dns_throttling_time; /**< time in seconds for DNS throttling */ + guint32 dns_io_per_server; /**< number of sockets per DNS server */ + GList *nameservers; /**< list of nameservers or NULL to parse resolv.conf */ +}; + + +/** + * Parse host[:port[:priority]] line + * @param ina host address + * @param port port + * @param priority priority + * @return TRUE if string was parsed + */ +gboolean parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority); + +/** + * Parse host:port line + * @param ina host address + * @param port port + * @return TRUE if string was parsed + */ +gboolean parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port); + +/** + * Parse host:priority line + * @param ina host address + * @param priority priority + * @return TRUE if string was parsed + */ +gboolean parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority); + +/** + * Parse bind credits + * @param cf config file to use + * @param str line that presents bind line + * @param type type of credits + * @return 1 if line was successfully parsed and 0 in case of error + */ +gboolean parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str); + +/** + * Init default values + * @param cfg config file + */ +void init_defaults (struct config_file *cfg); + +/** + * Free memory used by config structure + * @param cfg config file + */ +void free_config (struct config_file *cfg); + +/** + * Gets module option with specified name + * @param cfg config file + * @param module_name name of module + * @param opt_name name of option to get + * @return module value or NULL if option does not defined + */ +const ucl_object_t* get_module_opt (struct config_file *cfg, const gchar *module_name, + const gchar *opt_name); + +/** + * Parse limit + * @param limit string representation of limit (eg. 1M) + * @return numeric value of limit + */ +guint64 parse_limit (const gchar *limit, guint len); + +/** + * Parse flag + * @param str string representation of flag (eg. 'on') + * @return numeric value of flag (0 or 1) + */ +gchar parse_flag (const gchar *str); + +/** + * Do post load actions for config + * @param cfg config file + */ +void post_load_config (struct config_file *cfg); + +/** + * Calculate checksum for config file + * @param cfg config file + */ +gboolean get_config_checksum (struct config_file *cfg); + + +/** + * Replace all \" with a single " in given string + * @param line input string + */ +void unescape_quotes (gchar *line); + +/* + * Convert comma separated string to a list of strings + */ +GList* parse_comma_list (rspamd_mempool_t *pool, const gchar *line); + +/* + * Return a new classifier_config structure, setting default and non-conflicting attributes + */ +struct classifier_config* check_classifier_conf (struct config_file *cfg, struct classifier_config *c); +/* + * Return a new worker_conf structure, setting default and non-conflicting attributes + */ +struct worker_conf* check_worker_conf (struct config_file *cfg, struct worker_conf *c); +/* + * Return a new metric structure, setting default and non-conflicting attributes + */ +struct metric* check_metric_conf (struct config_file *cfg, struct metric *c); +/* + * Return a new statfile structure, setting default and non-conflicting attributes + */ +struct statfile* check_statfile_conf (struct config_file *cfg, struct statfile *c); + +/* + * Read XML configuration file + */ +gboolean read_rspamd_config (struct config_file *cfg, + const gchar *filename, const gchar *convert_to, + rspamd_rcl_section_fin_t logger_fin, gpointer logger_ud); + +/* + * Register symbols of classifiers inside metrics + */ +void insert_classifier_symbols (struct config_file *cfg); + +/* + * Check statfiles inside a classifier + */ +gboolean check_classifier_statfiles (struct classifier_config *cf); + +/* + * Find classifier config by name + */ +struct classifier_config* find_classifier_conf (struct config_file *cfg, const gchar *name); + +/* + * Parse input `ip_list` to radix tree `tree`. Now supports only IPv4 addresses. + */ +gboolean rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree); + +#endif /* ifdef CFG_FILE_H */ +/* + * vi:ts=4 + */ diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c new file mode 100644 index 000000000..37b554dec --- /dev/null +++ b/src/libserver/cfg_rcl.c @@ -0,0 +1,1471 @@ +/* Copyright (c) 2013, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cfg_rcl.h" +#include "main.h" +#include "settings.h" +#include "cfg_file.h" +#include "lua/lua_common.h" +#include "expressions.h" +#include "classifiers/classifiers.h" +#include "tokenizers/tokenizers.h" + +/* + * Common section handlers + */ +static gboolean +rspamd_rcl_logging_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val; + const gchar *facility, *log_type, *log_level; + + val = ucl_object_find_key (obj, "type"); + if (val != NULL && ucl_object_tostring_safe (val, &log_type)) { + if (g_ascii_strcasecmp (log_type, "file") == 0) { + /* Need to get filename */ + val = ucl_object_find_key (obj, "filename"); + if (val == NULL || val->type != UCL_STRING) { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "filename attribute must be specified for file logging type"); + return FALSE; + } + cfg->log_type = RSPAMD_LOG_FILE; + cfg->log_file = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (val)); + } + else if (g_ascii_strcasecmp (log_type, "syslog") == 0) { + /* Need to get facility */ + cfg->log_facility = LOG_DAEMON; + cfg->log_type = RSPAMD_LOG_SYSLOG; + val = ucl_object_find_key (obj, "facility"); + if (val != NULL && ucl_object_tostring_safe (val, &facility)) { + if (g_ascii_strcasecmp (facility, "LOG_AUTH") == 0 || + g_ascii_strcasecmp (facility, "auth") == 0 ) { + cfg->log_facility = LOG_AUTH; + } + else if (g_ascii_strcasecmp (facility, "LOG_CRON") == 0 || + g_ascii_strcasecmp (facility, "cron") == 0 ) { + cfg->log_facility = LOG_CRON; + } + else if (g_ascii_strcasecmp (facility, "LOG_DAEMON") == 0 || + g_ascii_strcasecmp (facility, "daemon") == 0 ) { + cfg->log_facility = LOG_DAEMON; + } + else if (g_ascii_strcasecmp (facility, "LOG_MAIL") == 0 || + g_ascii_strcasecmp (facility, "mail") == 0) { + cfg->log_facility = LOG_MAIL; + } + else if (g_ascii_strcasecmp (facility, "LOG_USER") == 0 || + g_ascii_strcasecmp (facility, "user") == 0 ) { + cfg->log_facility = LOG_USER; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL0") == 0 || + g_ascii_strcasecmp (facility, "local0") == 0) { + cfg->log_facility = LOG_LOCAL0; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL1") == 0 || + g_ascii_strcasecmp (facility, "local1") == 0) { + cfg->log_facility = LOG_LOCAL1; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL2") == 0 || + g_ascii_strcasecmp (facility, "local2") == 0) { + cfg->log_facility = LOG_LOCAL2; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL3") == 0 || + g_ascii_strcasecmp (facility, "local3") == 0) { + cfg->log_facility = LOG_LOCAL3; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL4") == 0 || + g_ascii_strcasecmp (facility, "local4") == 0) { + cfg->log_facility = LOG_LOCAL4; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL5") == 0 || + g_ascii_strcasecmp (facility, "local5") == 0) { + cfg->log_facility = LOG_LOCAL5; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL6") == 0 || + g_ascii_strcasecmp (facility, "local6") == 0) { + cfg->log_facility = LOG_LOCAL6; + } + else if (g_ascii_strcasecmp (facility, "LOG_LOCAL7") == 0 || + g_ascii_strcasecmp (facility, "local7") == 0) { + cfg->log_facility = LOG_LOCAL7; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log facility: %s", facility); + return FALSE; + } + } + } + else if (g_ascii_strcasecmp (log_type, "stderr") == 0 || g_ascii_strcasecmp (log_type, "console") == 0) { + cfg->log_type = RSPAMD_LOG_CONSOLE; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log type: %s", log_type); + return FALSE; + } + } + else { + /* No type specified */ + msg_warn ("logging type is not specified correctly, log output to the console"); + } + + /* Handle log level */ + val = ucl_object_find_key (obj, "level"); + if (val != NULL && ucl_object_tostring_safe (val, &log_level)) { + if (g_ascii_strcasecmp (log_level, "error") == 0) { + cfg->log_level = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL; + } + else if (g_ascii_strcasecmp (log_level, "warning") == 0) { + cfg->log_level = G_LOG_LEVEL_WARNING; + } + else if (g_ascii_strcasecmp (log_level, "info") == 0) { + cfg->log_level = G_LOG_LEVEL_INFO | G_LOG_LEVEL_MESSAGE; + } + else if (g_ascii_strcasecmp (log_level, "debug") == 0) { + cfg->log_level = G_LOG_LEVEL_DEBUG; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid log level: %s", log_level); + return FALSE; + } + } + + return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); +} + +static gboolean +rspamd_rcl_options_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val; + const gchar *user_settings, *domain_settings; + + /* Handle user and domain settings */ + val = ucl_object_find_key (obj, "user_settings"); + if (val != NULL && ucl_object_tostring_safe (val, &user_settings)) { + if (!read_settings (user_settings, "Users' settings", cfg, cfg->user_settings)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", user_settings); + return FALSE; + } + cfg->user_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, user_settings); + } + + val = ucl_object_find_key (obj, "domain_settings"); + if (val != NULL && ucl_object_tostring_safe (val, &domain_settings)) { + if (!read_settings (domain_settings, "Domains settings", cfg, cfg->domain_settings)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot read settings: %s", domain_settings); + return FALSE; + } + cfg->domain_settings_str = rspamd_mempool_strdup (cfg->cfg_pool, domain_settings); + } + + return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); +} + +static gint +rspamd_symbols_group_find_func (gconstpointer a, gconstpointer b) +{ + const struct symbols_group *gr = a; + const gchar *uv = b; + + return g_ascii_strcasecmp (gr->name, uv); +} + +/** + * Insert a symbol to the metric + * @param cfg + * @param metric + * @param obj symbol rcl object (either float value or an object) + * @param err + * @return + */ +static gboolean +rspamd_rcl_insert_symbol (struct config_file *cfg, struct metric *metric, + const ucl_object_t *obj, gboolean is_legacy, GError **err) +{ + const gchar *group = "ungrouped", *description = NULL, *sym_name; + gdouble symbol_score, *score_ptr; + const ucl_object_t *val; + struct symbols_group *sym_group; + struct symbol_def *sym_def; + GList *metric_list, *group_list; + + /* + * We allow two type of definitions: + * symbol = weight + * or + * symbol { + * weight = ...; + * description = ...; + * group = ...; + * } + */ + if (is_legacy) { + val = ucl_object_find_key (obj, "name"); + if (val == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbol name is missing"); + return FALSE; + } + sym_name = ucl_object_tostring (val); + } + else { + sym_name = ucl_object_key (obj); + } + if (ucl_object_todouble_safe (obj, &symbol_score)) { + description = NULL; + } + else if (obj->type == UCL_OBJECT) { + val = ucl_object_find_key (obj, "weight"); + if (val == NULL || !ucl_object_todouble_safe (val, &symbol_score)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol score: %s", sym_name); + return FALSE; + } + val = ucl_object_find_key (obj, "description"); + if (val != NULL) { + description = ucl_object_tostring (val); + } + val = ucl_object_find_key (obj, "group"); + if (val != NULL) { + ucl_object_tostring_safe (val, &group); + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid symbol type: %s", sym_name); + return FALSE; + } + + sym_def = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbol_def)); + score_ptr = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (gdouble)); + + *score_ptr = symbol_score; + sym_def->weight_ptr = score_ptr; + sym_def->name = rspamd_mempool_strdup (cfg->cfg_pool, sym_name); + sym_def->description = (gchar *)description; + + g_hash_table_insert (metric->symbols, sym_def->name, score_ptr); + + if ((metric_list = g_hash_table_lookup (cfg->metrics_symbols, sym_def->name)) == NULL) { + metric_list = g_list_prepend (NULL, metric); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, metric_list); + g_hash_table_insert (cfg->metrics_symbols, sym_def->name, metric_list); + } + else { + /* Slow but keep start element of list in safe */ + if (!g_list_find (metric_list, metric)) { + metric_list = g_list_append (metric_list, metric); + } + } + + /* Search for symbol group */ + group_list = g_list_find_custom (cfg->symbols_groups, group, rspamd_symbols_group_find_func); + if (group_list == NULL) { + /* Create new group */ + sym_group = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct symbols_group)); + sym_group->name = rspamd_mempool_strdup (cfg->cfg_pool, group); + sym_group->symbols = NULL; + cfg->symbols_groups = g_list_prepend (cfg->symbols_groups, sym_group); + } + else { + sym_group = group_list->data; + } + /* Insert symbol */ + sym_group->symbols = g_list_prepend (sym_group->symbols, sym_def); + + return TRUE; +} + +static gboolean +rspamd_rcl_metric_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + const gchar *metric_name, *subject_name, *semicolon, *act_str; + struct metric *metric; + struct metric_action *action; + gdouble action_score, grow_factor; + gint action_value; + gboolean new = TRUE, have_actions = FALSE; + ucl_object_iter_t it = NULL; + + val = ucl_object_find_key (obj, "name"); + if (val == NULL || !ucl_object_tostring_safe (val, &metric_name)) { + metric_name = DEFAULT_METRIC; + } + + metric = g_hash_table_lookup (cfg->metrics, metric_name); + if (metric == NULL) { + metric = check_metric_conf (cfg, metric); + metric->name = metric_name; + } + else { + new = FALSE; + } + + /* Handle actions */ + val = ucl_object_find_key (obj, "actions"); + if (val != NULL) { + if (val->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "actions must be an object"); + return FALSE; + } + while ((cur = ucl_iterate_object (val, &it, true)) != NULL) { + if (!check_action_str (ucl_object_key (cur), &action_value) || + !ucl_object_todouble_safe (cur, &action_score)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid action definition: %s", ucl_object_key (cur)); + return FALSE; + } + action = &metric->actions[action_value]; + action->action = action_value; + action->score = action_score; + } + } + else if (new) { + /* Switch to legacy mode */ + val = ucl_object_find_key (obj, "required_score"); + if (val != NULL && ucl_object_todouble_safe (val, &action_score)) { + action = &metric->actions[METRIC_ACTION_REJECT]; + action->action = METRIC_ACTION_REJECT; + action->score = action_score; + have_actions = TRUE; + } + val = ucl_object_find_key (obj, "action"); + LL_FOREACH (val, cur) { + if (cur->type == UCL_STRING) { + act_str = ucl_object_tostring (cur); + semicolon = strchr (act_str, ':'); + if (semicolon != NULL) { + if (check_action_str (act_str, &action_value)) { + action_score = strtod (semicolon + 1, NULL); + action = &metric->actions[action_value]; + action->action = action_value; + action->score = action_score; + have_actions = TRUE; + } + } + } + } + if (new && !have_actions) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no actions", metric_name); + return FALSE; + } + } + + /* Handle symbols */ + val = ucl_object_find_key (obj, "symbols"); + if (val != NULL) { + if (val->type == UCL_ARRAY) { + val = val->value.ov; + } + if (val->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object"); + return FALSE; + } + it = NULL; + while ((cur = ucl_iterate_object (val, &it, true)) != NULL) { + if (!rspamd_rcl_insert_symbol (cfg, metric, cur, FALSE, err)) { + return FALSE; + } + } + } + else { + /* Legacy variant */ + val = ucl_object_find_key (obj, "symbol"); + if (val != NULL) { + if (val->type == UCL_ARRAY) { + val = val->value.ov; + } + if (val->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "symbols must be an object"); + return FALSE; + } + LL_FOREACH (val, cur) { + if (!rspamd_rcl_insert_symbol (cfg, metric, cur, TRUE, err)) { + return FALSE; + } + } + } + else if (new) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "metric %s has no symbols", metric_name); + return FALSE; + } + } + + val = ucl_object_find_key (obj, "grow_factor"); + if (val && ucl_object_todouble_safe (val, &grow_factor)) { + metric->grow_factor = grow_factor; + } + + val = ucl_object_find_key (obj, "subject"); + if (val && ucl_object_tostring_safe (val, &subject_name)) { + metric->subject = (gchar *)subject_name; + } + + /* Insert the resulting metric */ + if (new) { + g_hash_table_insert (cfg->metrics, (void *)metric->name, metric); + cfg->metrics_list = g_list_prepend (cfg->metrics_list, metric); + } + + return TRUE; +} + +static gboolean +rspamd_rcl_worker_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + ucl_object_iter_t it = NULL; + const gchar *worker_type, *worker_bind; + GQuark qtype; + struct worker_conf *wrk; + struct rspamd_worker_cfg_parser *wparser; + struct rspamd_worker_param_parser *whandler; + + val = ucl_object_find_key (obj, "type"); + if (val != NULL && ucl_object_tostring_safe (val, &worker_type)) { + qtype = g_quark_try_string (worker_type); + if (qtype != 0) { + wrk = check_worker_conf (cfg, NULL); + wrk->worker = get_worker_by_type (qtype); + if (wrk->worker == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type); + return FALSE; + } + wrk->type = qtype; + if (wrk->worker->worker_init_func) { + wrk->ctx = wrk->worker->worker_init_func (cfg); + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "unknown worker type: %s", worker_type); + return FALSE; + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "undefined worker type"); + return FALSE; + } + + val = ucl_object_find_key (obj, "bind_socket"); + if (val != NULL) { + if (val->type == UCL_ARRAY) { + val = val->value.ov; + } + LL_FOREACH (val, cur) { + if (!ucl_object_tostring_safe (cur, &worker_bind)) { + continue; + } + if (!parse_bind_line (cfg, wrk, worker_bind)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse bind line: %s", worker_bind); + return FALSE; + } + } + } + + wrk->options = (ucl_object_t *)obj; + + if (!rspamd_rcl_section_parse_defaults (section, cfg, obj, wrk, err)) { + return FALSE; + } + + /* Parse other attributes */ + HASH_FIND_INT (cfg->wrk_parsers, (gint *)&qtype, wparser); + if (wparser != NULL && obj->type == UCL_OBJECT) { + while ((cur = ucl_iterate_object (obj, &it, true)) != NULL) { + HASH_FIND_STR (wparser->parsers, ucl_object_key (cur), whandler); + if (whandler != NULL) { + if (!whandler->handler (cfg, cur, &whandler->parser, section, err)) { + return FALSE; + } + } + } + if (wparser->def_obj_parser != NULL) { + if (! wparser->def_obj_parser (obj, wparser->def_ud)) { + return FALSE; + } + } + } + + cfg->workers = g_list_prepend (cfg->workers, wrk); + + return TRUE; +} + +static void +rspamd_rcl_set_lua_globals (struct config_file *cfg, lua_State *L) +{ + struct config_file **pcfg; + + /* First check for global variable 'config' */ + lua_getglobal (L, "config"); + if (lua_isnil (L, -1)) { + /* Assign global table to set up attributes */ + lua_newtable (L); + lua_setglobal (L, "config"); + } + + lua_getglobal (L, "metrics"); + if (lua_isnil (L, -1)) { + lua_newtable (L); + lua_setglobal (L, "metrics"); + } + + lua_getglobal (L, "composites"); + if (lua_isnil (L, -1)) { + lua_newtable (L); + lua_setglobal (L, "composites"); + } + + lua_getglobal (L, "classifiers"); + if (lua_isnil (L, -1)) { + lua_newtable (L); + lua_setglobal (L, "classifiers"); + } + + pcfg = lua_newuserdata (L, sizeof (struct config_file *)); + lua_setclass (L, "rspamd{config}", -1); + *pcfg = cfg; + lua_setglobal (L, "rspamd_config"); + + /* Clear stack from globals */ + lua_pop (L, 4); +} + +static gboolean +rspamd_rcl_lua_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const gchar *lua_src = rspamd_mempool_strdup (cfg->cfg_pool, ucl_object_tostring (obj)); + gchar *cur_dir, *lua_dir, *lua_file, *tmp1, *tmp2; + lua_State *L = cfg->lua_state; + + tmp1 = g_strdup (lua_src); + tmp2 = g_strdup (lua_src); + lua_dir = dirname (tmp1); + lua_file = basename (tmp2); + if (lua_dir && lua_file) { + cur_dir = g_malloc (PATH_MAX); + if (getcwd (cur_dir, PATH_MAX) != NULL && chdir (lua_dir) != -1) { + /* Load file */ + if (luaL_loadfile (L, lua_file) != 0) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot load lua file %s: %s", + lua_src, lua_tostring (L, -1)); + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + } + rspamd_rcl_set_lua_globals (cfg, L); + /* Now do it */ + if (lua_pcall (L, 0, LUA_MULTRET, 0) != 0) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot init lua file %s: %s", + lua_src, lua_tostring (L, -1)); + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + } + } + else { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot chdir to %s: %s", + lua_src, strerror (errno)); + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + return FALSE; + + } + if (chdir (cur_dir) == -1) { + msg_err ("cannot chdir to %s: %s", cur_dir, strerror (errno));; + } + g_free (cur_dir); + g_free (tmp1); + g_free (tmp2); + } + else { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "cannot find to %s: %s", + lua_src, strerror (errno)); + return FALSE; + } + + return TRUE; +} + +static gboolean +rspamd_rcl_add_module_path (struct config_file *cfg, const gchar *path, GError **err) +{ + struct stat st; + struct script_module *cur_mod; + glob_t globbuf; + gchar *pattern; + size_t len; + guint i; + + if (stat (path, &st) == -1) { + g_set_error (err, CFG_RCL_ERROR, errno, "cannot stat path %s, %s", path, strerror (errno)); + return FALSE; + } + + /* Handle directory */ + if (S_ISDIR (st.st_mode)) { + globbuf.gl_offs = 0; + len = strlen (path) + sizeof ("*.lua"); + pattern = g_malloc (len); + snprintf (pattern, len, "%s%s", path, "*.lua"); + + if (glob (pattern, GLOB_DOOFFS, NULL, &globbuf) == 0) { + for (i = 0; i < globbuf.gl_pathc; i ++) { + cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module)); + cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, globbuf.gl_pathv[i]); + cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod); + } + globfree (&globbuf); + g_free (pattern); + } + else { + g_set_error (err, CFG_RCL_ERROR, errno, "glob failed for %s, %s", pattern, strerror (errno)); + g_free (pattern); + return FALSE; + } + } + else { + /* Handle single file */ + cur_mod = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct script_module)); + cur_mod->path = rspamd_mempool_strdup (cfg->cfg_pool, path); + cfg->script_modules = g_list_prepend (cfg->script_modules, cur_mod); + } + + return TRUE; +} + +static gboolean +rspamd_rcl_modules_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + const gchar *data; + + if (obj->type == UCL_OBJECT) { + val = ucl_object_find_key (obj, "path"); + + LL_FOREACH (val, cur) { + if (ucl_object_tostring_safe (cur, &data)) { + if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) { + return FALSE; + } + } + } + } + else if (ucl_object_tostring_safe (obj, &data)) { + if (!rspamd_rcl_add_module_path (cfg, rspamd_mempool_strdup (cfg->cfg_pool, data), err)) { + return FALSE; + } + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "module parameter has wrong type (must be an object or a string)"); + return FALSE; + } + + return TRUE; +} + +static gboolean +rspamd_rcl_statfile_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct classifier_config *ccf = ud; + const ucl_object_t *val; + struct statfile *st; + const gchar *data; + gdouble binlog_rotate; + GList *labels; + + st = check_statfile_conf (cfg, NULL); + + val = ucl_object_find_key (obj, "binlog"); + if (val != NULL && ucl_object_tostring_safe (val, &data)) { + if (st->binlog == NULL) { + st->binlog = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile_binlog_params)); + } + if (g_ascii_strcasecmp (data, "master") == 0) { + st->binlog->affinity = AFFINITY_MASTER; + } + else if (g_ascii_strcasecmp (data, "slave") == 0) { + st->binlog->affinity = AFFINITY_SLAVE; + } + else { + st->binlog->affinity = AFFINITY_NONE; + } + /* Parse remaining binlog attributes */ + val = ucl_object_find_key (obj, "binlog_rotate"); + if (val != NULL && ucl_object_todouble_safe (val, &binlog_rotate)) { + st->binlog->rotate_time = binlog_rotate; + } + val = ucl_object_find_key (obj, "binlog_master"); + if (val != NULL && ucl_object_tostring_safe (val, &data)) { + if (!parse_host_port (cfg->cfg_pool, data, &st->binlog->master_addr, &st->binlog->master_port)) { + msg_err ("cannot parse master address: %s", data); + return FALSE; + } + } + } + + + if (rspamd_rcl_section_parse_defaults (section, cfg, obj, st, err)) { + ccf->statfiles = g_list_prepend (ccf->statfiles, st); + if (st->label != NULL) { + labels = g_hash_table_lookup (ccf->labels, st->label); + if (labels != NULL) { + labels = g_list_append (labels, st); + } + else { + g_hash_table_insert (ccf->labels, st->label, g_list_prepend (NULL, st)); + } + } + if (st->symbol != NULL) { + g_hash_table_insert (cfg->classifiers_symbols, st->symbol, st); + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a symbol defined"); + return FALSE; + } + + if (st->path == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "statfile must have a path defined"); + return FALSE; + } + + st->opts = (ucl_object_t *)obj; + + val = ucl_object_find_key (obj, "spam"); + if (val == NULL) { + msg_info ("statfile %s has no explicit 'spam' setting, trying to guess by symbol", st->symbol); + if (rspamd_strncasestr (st->symbol, "spam", strlen (st->symbol)) != NULL) { + st->is_spam = TRUE; + } + else if (rspamd_strncasestr (st->symbol, "ham", strlen (st->symbol)) != NULL) { + st->is_spam = FALSE; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot guess spam setting from %s", st->symbol); + return FALSE; + } + msg_info ("guessed that statfile with symbol %s is %s", st->symbol, st->is_spam ? + "spam" : "ham"); + } + return TRUE; + } + + return FALSE; +} + +static gboolean +rspamd_rcl_classifier_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val, *cur; + ucl_object_iter_t it = NULL; + const gchar *key, *type; + struct classifier_config *ccf, *found = NULL; + gboolean res = TRUE; + struct rspamd_rcl_section *stat_section; + GList *cur_cl; + + val = ucl_object_find_key (obj, "type"); + if (val == NULL || !ucl_object_tostring_safe (val, &type)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "classifier should have type defined"); + return FALSE; + } + + cur_cl = cfg->classifiers; + while (cur_cl != NULL) { + ccf = cur_cl->data; + if (g_ascii_strcasecmp (ccf->classifier->name, type) == 0) { + found = ccf; + break; + } + cur_cl = g_list_next (cur_cl); + } + + if (found == NULL) { + ccf = check_classifier_conf (cfg, NULL); + ccf->classifier = get_classifier (type); + } + else { + ccf = found; + } + + HASH_FIND_STR (section->subsections, "statfile", stat_section); + + while ((val = ucl_iterate_object (obj, &it, true)) != NULL && res) { + key = ucl_object_key (val); + if (key != NULL) { + if (g_ascii_strcasecmp (key, "statfile") == 0) { + LL_FOREACH (val, cur) { + res = rspamd_rcl_statfile_handler (cfg, cur, ccf, stat_section, err); + if (!res) { + return FALSE; + } + } + } + else if (g_ascii_strcasecmp (key, "type") == 0 && val->type == UCL_STRING) { + continue; + } + else if (g_ascii_strcasecmp (key, "tokenizer") == 0 && val->type == UCL_STRING) { + ccf->tokenizer = get_tokenizer (ucl_object_tostring (val)); + } + else { + /* Just insert a value of option to the hash */ + g_hash_table_insert (ccf->opts, (gpointer)key, (gpointer)ucl_object_tostring_forced (val)); + } + } + } + + if (found == NULL) { + cfg->classifiers = g_list_prepend (cfg->classifiers, ccf); + } + + + return res; +} + +static gboolean +rspamd_rcl_composite_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + const ucl_object_t *val; + struct expression *expr; + struct rspamd_composite *composite; + const gchar *composite_name, *composite_expression; + gboolean new = TRUE; + + val = ucl_object_find_key (obj, "name"); + if (val == NULL || !ucl_object_tostring_safe (val, &composite_name)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have a name defined"); + return FALSE; + } + + if (g_hash_table_lookup (cfg->composite_symbols, composite_name) != NULL) { + msg_warn ("composite %s is redefined", composite_name); + new = FALSE; + } + + val = ucl_object_find_key (obj, "expression"); + if (val == NULL || !ucl_object_tostring_safe (val, &composite_expression)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "composite must have an expression defined"); + return FALSE; + } + + if ((expr = parse_expression (cfg->cfg_pool, (gchar *)composite_expression)) == NULL) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot parse composite expression: %s", composite_expression); + return FALSE; + } + + composite = rspamd_mempool_alloc (cfg->cfg_pool, sizeof (struct rspamd_composite)); + composite->expr = expr; + composite->id = g_hash_table_size (cfg->composite_symbols) + 1; + g_hash_table_insert (cfg->composite_symbols, (gpointer)composite_name, composite); + + if (new) { + register_virtual_symbol (&cfg->cache, composite_name, 1); + } + + return TRUE; +} + +/** + * Fake handler to parse default options only, uses struct cfg_file as pointer + * for default handlers + */ +static gboolean +rspamd_rcl_empty_handler (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + return rspamd_rcl_section_parse_defaults (section, cfg, obj, cfg, err); +} + +/** + * Add new section to the configuration + * @param top top section + * @param name the name of the section + * @param handler handler function for all attributes + * @param type type of object handled by a handler + * @param required whether at least one of these sections is required + * @param strict_type turn on strict check for types for this section + * @return newly created structure + */ +static inline struct rspamd_rcl_section* +rspamd_rcl_add_section (struct rspamd_rcl_section **top, + const gchar *name, rspamd_rcl_handler_t handler, + enum ucl_type type, gboolean required, gboolean strict_type) +{ + struct rspamd_rcl_section *new; + + new = g_slice_alloc0 (sizeof (struct rspamd_rcl_section)); + new->name = name; + new->handler = handler; + new->type = type; + new->strict_type = strict_type; + + HASH_ADD_KEYPTR (hh, *top, new->name, strlen (new->name), new); + return new; +} + +/** + * Add a default handler for a section + * @param section section pointer + * @param name name of param + * @param handler handler of param + * @param offset offset in a structure + * @param flags flags for the parser + * @return newly created structure + */ +static inline struct rspamd_rcl_default_handler_data * +rspamd_rcl_add_default_handler (struct rspamd_rcl_section *section, const gchar *name, + rspamd_rcl_handler_t handler, gsize offset, gint flags) +{ + struct rspamd_rcl_default_handler_data *new; + + new = g_slice_alloc0 (sizeof (struct rspamd_rcl_default_handler_data)); + new->key = name; + new->handler = handler; + new->pd.offset = offset; + new->pd.flags = flags; + + HASH_ADD_KEYPTR (hh, section->default_parser, new->key, strlen (new->key), new); + return new; +} + +struct rspamd_rcl_section* +rspamd_rcl_config_init (void) +{ + struct rspamd_rcl_section *new = NULL, *sub, *ssub; + + /* TODO: add all known rspamd sections here */ + /** + * Logging section + */ + sub = rspamd_rcl_add_section (&new, "logging", rspamd_rcl_logging_handler, UCL_OBJECT, + FALSE, TRUE); + /* Default handlers */ + rspamd_rcl_add_default_handler (sub, "log_buffer", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, log_buf_size), 0); + rspamd_rcl_add_default_handler (sub, "log_urls", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, log_urls), 0); + rspamd_rcl_add_default_handler (sub, "debug_ip", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, debug_ip_map), 0); + rspamd_rcl_add_default_handler (sub, "debug_symbols", rspamd_rcl_parse_struct_string_list, + G_STRUCT_OFFSET (struct config_file, debug_symbols), 0); + rspamd_rcl_add_default_handler (sub, "log_color", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, log_color), 0); + /** + * Options section + */ + sub = rspamd_rcl_add_section (&new, "options", rspamd_rcl_options_handler, UCL_OBJECT, + FALSE, TRUE); + rspamd_rcl_add_default_handler (sub, "cache_file", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, cache_filename), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "dns_nameserver", rspamd_rcl_parse_struct_string_list, + G_STRUCT_OFFSET (struct config_file, nameservers), 0); + rspamd_rcl_add_default_handler (sub, "dns_timeout", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, dns_timeout), RSPAMD_CL_FLAG_TIME_FLOAT); + rspamd_rcl_add_default_handler (sub, "dns_retransmits", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, dns_retransmits), RSPAMD_CL_FLAG_INT_32); + rspamd_rcl_add_default_handler (sub, "dns_sockets", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, dns_io_per_server), RSPAMD_CL_FLAG_INT_32); + rspamd_rcl_add_default_handler (sub, "raw_mode", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, raw_mode), 0); + rspamd_rcl_add_default_handler (sub, "one_shot", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, one_shot_mode), 0); + rspamd_rcl_add_default_handler (sub, "check_attachements", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, check_text_attachements), 0); + rspamd_rcl_add_default_handler (sub, "tempdir", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, temp_dir), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "pidfile", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, pid_file), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "filters", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, filters_str), 0); + rspamd_rcl_add_default_handler (sub, "sync_interval", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, statfile_sync_interval), RSPAMD_CL_FLAG_TIME_INTEGER); + rspamd_rcl_add_default_handler (sub, "sync_timeout", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, statfile_sync_timeout), RSPAMD_CL_FLAG_TIME_INTEGER); + rspamd_rcl_add_default_handler (sub, "max_diff", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct config_file, max_diff), RSPAMD_CL_FLAG_INT_SIZE); + rspamd_rcl_add_default_handler (sub, "map_watch_interval", rspamd_rcl_parse_struct_time, + G_STRUCT_OFFSET (struct config_file, map_timeout), RSPAMD_CL_FLAG_TIME_FLOAT); + rspamd_rcl_add_default_handler (sub, "dynamic_conf", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, dynamic_conf), 0); + rspamd_rcl_add_default_handler (sub, "rrd", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, rrd_file), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "history_file", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct config_file, history_file), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (sub, "use_mlock", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, mlock_statfile_pool), 0); + rspamd_rcl_add_default_handler (sub, "strict_protocol_headers", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct config_file, strict_protocol_headers), 0); + + /** + * Metric section + */ + sub = rspamd_rcl_add_section (&new, "metric", rspamd_rcl_metric_handler, UCL_OBJECT, + FALSE, TRUE); + + /** + * Worker section + */ + sub = rspamd_rcl_add_section (&new, "worker", rspamd_rcl_worker_handler, UCL_OBJECT, + FALSE, TRUE); + rspamd_rcl_add_default_handler (sub, "count", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct worker_conf, count), RSPAMD_CL_FLAG_INT_16); + rspamd_rcl_add_default_handler (sub, "max_files", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct worker_conf, rlimit_nofile), RSPAMD_CL_FLAG_INT_32); + rspamd_rcl_add_default_handler (sub, "max_core", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct worker_conf, rlimit_maxcore), RSPAMD_CL_FLAG_INT_32); + + /** + * Lua handler + */ + sub = rspamd_rcl_add_section (&new, "lua", rspamd_rcl_lua_handler, UCL_STRING, + FALSE, TRUE); + + /** + * Modules handler + */ + sub = rspamd_rcl_add_section (&new, "modules", rspamd_rcl_modules_handler, UCL_OBJECT, + FALSE, FALSE); + + /** + * Classifiers handler + */ + sub = rspamd_rcl_add_section (&new, "classifier", rspamd_rcl_classifier_handler, UCL_OBJECT, + FALSE, TRUE); + ssub = rspamd_rcl_add_section (&sub->subsections, "statfile", rspamd_rcl_statfile_handler, + UCL_OBJECT, TRUE, TRUE); + rspamd_rcl_add_default_handler (ssub, "symbol", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct statfile, symbol), 0); + rspamd_rcl_add_default_handler (ssub, "path", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct statfile, path), RSPAMD_CL_FLAG_STRING_PATH); + rspamd_rcl_add_default_handler (ssub, "label", rspamd_rcl_parse_struct_string, + G_STRUCT_OFFSET (struct statfile, label), 0); + rspamd_rcl_add_default_handler (ssub, "size", rspamd_rcl_parse_struct_integer, + G_STRUCT_OFFSET (struct statfile, size), RSPAMD_CL_FLAG_INT_SIZE); + rspamd_rcl_add_default_handler (ssub, "spam", rspamd_rcl_parse_struct_boolean, + G_STRUCT_OFFSET (struct statfile, is_spam), 0); + + /** + * Composites handler + */ + sub = rspamd_rcl_add_section (&new, "composite", rspamd_rcl_composite_handler, UCL_OBJECT, + FALSE, TRUE); + + return new; +} + +struct rspamd_rcl_section * +rspamd_rcl_config_get_section (struct rspamd_rcl_section *top, + const char *path) +{ + struct rspamd_rcl_section *cur, *found; + char **path_components; + gint ncomponents, i; + + + if (path == NULL) { + return top; + } + + path_components = g_strsplit_set (path, "/", -1); + ncomponents = g_strv_length (path_components); + + cur = top; + for (i = 0; i < ncomponents; i ++) { + if (cur == NULL) { + g_strfreev (path_components); + return NULL; + } + HASH_FIND_STR (cur, path_components[i], found); + if (found == NULL) { + g_strfreev (path_components); + return NULL; + } + cur = found; + } + + g_strfreev (path_components); + return found; +} + +gboolean +rspamd_read_rcl_config (struct rspamd_rcl_section *top, + struct config_file *cfg, const ucl_object_t *obj, GError **err) +{ + const ucl_object_t *found, *cur_obj; + struct rspamd_rcl_section *cur, *tmp; + + if (obj->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "top configuration must be an object"); + return FALSE; + } + + /* Iterate over known sections and ignore unknown ones */ + HASH_ITER (hh, top, cur, tmp) { + found = ucl_object_find_key (obj, cur->name); + if (found == NULL) { + if (cur->required) { + g_set_error (err, CFG_RCL_ERROR, ENOENT, "required section %s is missing", cur->name); + return FALSE; + } + } + else { + /* Check type */ + if (cur->strict_type) { + if (cur->type != found->type) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "object in section %s has invalid type", cur->name); + return FALSE; + } + } + LL_FOREACH (found, cur_obj) { + if (!cur->handler (cfg, cur_obj, NULL, cur, err)) { + return FALSE; + } + } + } + if (cur->fin) { + cur->fin (cfg, cur->fin_ud); + } + } + + cfg->rcl_obj = (ucl_object_t *)obj; + + return TRUE; +} + +gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section, + struct config_file *cfg, const ucl_object_t *obj, gpointer ptr, + GError **err) +{ + const ucl_object_t *found; + struct rspamd_rcl_default_handler_data *cur, *tmp; + + if (obj->type != UCL_OBJECT) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "default configuration must be an object"); + return FALSE; + } + + HASH_ITER (hh, section->default_parser, cur, tmp) { + found = ucl_object_find_key (obj, cur->key); + if (found != NULL) { + cur->pd.user_struct = ptr; + if (!cur->handler (cfg, found, &cur->pd, section, err)) { + return FALSE; + } + } + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + gchar **target; + const gsize num_str_len = 32; + + target = (gchar **)(((gchar *)pd->user_struct) + pd->offset); + switch (obj->type) { + case UCL_STRING: + *target = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (obj)); + break; + case UCL_INT: + *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (*target, num_str_len, "%L", obj->value.iv); + break; + case UCL_FLOAT: + *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (*target, num_str_len, "%f", obj->value.dv); + break; + case UCL_BOOLEAN: + *target = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (*target, num_str_len, "%b", (gboolean)obj->value.iv); + break; + default: + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert object or array to string"); + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + union { + gint *ip; + gint32 *i32p; + gint16 *i16p; + gint64 *i64p; + gsize *sp; + } target; + gint64 val; + + if (pd->flags == RSPAMD_CL_FLAG_INT_32) { + target.i32p = (gint32 *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.i32p = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_INT_64) { + target.i64p = (gint64 *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.i64p = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_INT_SIZE) { + target.sp = (gsize *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.sp = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_INT_16) { + target.i16p = (gint16 *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.i16p = val; + } + else { + target.ip = (gint *)(((gchar *)pd->user_struct) + pd->offset); + if (!ucl_object_toint_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to integer"); + return FALSE; + } + *target.ip = val; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + gdouble *target; + + target = (gdouble *)(((gchar *)pd->user_struct) + pd->offset); + + if (!ucl_object_todouble_safe (obj, target)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double"); + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + union { + gint *psec; + guint32 *pu32; + gdouble *pdv; + struct timeval *ptv; + struct timespec *pts; + } target; + gdouble val; + + if (!ucl_object_todouble_safe (obj, &val)) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert param to double"); + return FALSE; + } + + if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMEVAL) { + target.ptv = (struct timeval *)(((gchar *)pd->user_struct) + pd->offset); + target.ptv->tv_sec = (glong)val; + target.ptv->tv_usec = (val - (glong)val) * 1000000; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_TIMESPEC) { + target.pts = (struct timespec *)(((gchar *)pd->user_struct) + pd->offset); + target.pts->tv_sec = (glong)val; + target.pts->tv_nsec = (val - (glong)val) * 1000000000000LL; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_FLOAT) { + target.pdv = (double *)(((gchar *)pd->user_struct) + pd->offset); + *target.pdv = val; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_INTEGER) { + target.psec = (gint *)(((gchar *)pd->user_struct) + pd->offset); + *target.psec = val * 1000; + } + else if (pd->flags == RSPAMD_CL_FLAG_TIME_UINT_32) { + target.pu32 = (guint32 *)(((gchar *)pd->user_struct) + pd->offset); + *target.pu32 = val * 1000; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "invalid flags to parse time value"); + return FALSE; + } + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + GList **target; + gchar *val; + const ucl_object_t *cur; + const gsize num_str_len = 32; + ucl_object_iter_t iter = NULL; + + target = (GList **)(((gchar *)pd->user_struct) + pd->offset); + + if (obj->type != UCL_ARRAY) { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "an array of strings is expected"); + return FALSE; + } + + while ((cur = ucl_iterate_object (obj, &iter, true)) != NULL) { + switch (cur->type) { + case UCL_STRING: + val = rspamd_mempool_strdup (cfg->cfg_pool, ucl_copy_value_trash (cur)); + break; + case UCL_INT: + val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (val, num_str_len, "%L", cur->value.iv); + break; + case UCL_FLOAT: + val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (val, num_str_len, "%f", cur->value.dv); + break; + case UCL_BOOLEAN: + val = rspamd_mempool_alloc (cfg->cfg_pool, num_str_len); + rspamd_snprintf (val, num_str_len, "%b", (gboolean)cur->value.iv); + break; + default: + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object or array to string"); + return FALSE; + } + *target = g_list_prepend (*target, val); + } + + /* Add a destructor */ + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_list_free, *target); + + return TRUE; +} + +gboolean +rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err) +{ + struct rspamd_rcl_struct_parser *pd = ud; + gboolean *target; + + target = (gboolean *)(((gchar *)pd->user_struct) + pd->offset); + + if (obj->type == UCL_BOOLEAN) { + *target = obj->value.iv; + } + else if (obj->type == UCL_INT) { + *target = obj->value.iv; + } + else { + g_set_error (err, CFG_RCL_ERROR, EINVAL, "cannot convert an object to boolean"); + return FALSE; + } + + return TRUE; +} + +void +rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name, + rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags) +{ + struct rspamd_worker_param_parser *nhandler; + struct rspamd_worker_cfg_parser *nparser; + + HASH_FIND_INT (cfg->wrk_parsers, &type, nparser); + if (nparser == NULL) { + /* Allocate new parser for this worker */ + nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser)); + nparser->type = type; + HASH_ADD_INT (cfg->wrk_parsers, type, nparser); + } + + HASH_FIND_STR (nparser->parsers, name, nhandler); + if (nhandler != NULL) { + msg_warn ("handler for parameter %s is already registered for worker type %s", + name, g_quark_to_string (type)); + return; + } + nhandler = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_param_parser)); + nhandler->name = name; + nhandler->parser.flags = flags; + nhandler->parser.offset = offset; + nhandler->parser.user_struct = target; + nhandler->handler = handler; + HASH_ADD_KEYPTR (hh, nparser->parsers, name, strlen (name), nhandler); +} + + +void +rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type, + gboolean (*func)(ucl_object_t *, gpointer), gpointer ud) +{ + struct rspamd_worker_cfg_parser *nparser; + HASH_FIND_INT (cfg->wrk_parsers, &type, nparser); + if (nparser == NULL) { + /* Allocate new parser for this worker */ + nparser = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_cfg_parser)); + nparser->type = type; + HASH_ADD_INT (cfg->wrk_parsers, type, nparser); + } + + nparser->def_obj_parser = func; + nparser->def_ud = ud; +} diff --git a/src/libserver/cfg_rcl.h b/src/libserver/cfg_rcl.h new file mode 100644 index 000000000..99839d1ea --- /dev/null +++ b/src/libserver/cfg_rcl.h @@ -0,0 +1,238 @@ +/* Copyright (c) 2013, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CFG_RCL_H_ +#define CFG_RCL_H_ + +#include "config.h" +#include "ucl/include/ucl.h" +#include "uthash.h" + +#define CFG_RCL_ERROR cfg_rcl_error_quark () +static inline GQuark +cfg_rcl_error_quark (void) +{ + return g_quark_from_static_string ("cfg-rcl-error-quark"); +} + +struct rspamd_rcl_section; +struct config_file; + +struct rspamd_rcl_struct_parser { + gpointer user_struct; + goffset offset; + enum { + RSPAMD_CL_FLAG_TIME_FLOAT = 0x1 << 0, + RSPAMD_CL_FLAG_TIME_TIMEVAL = 0x1 << 1, + RSPAMD_CL_FLAG_TIME_TIMESPEC = 0x1 << 2, + RSPAMD_CL_FLAG_TIME_INTEGER = 0x1 << 3, + RSPAMD_CL_FLAG_TIME_UINT_32 = 0x1 << 4, + RSPAMD_CL_FLAG_INT_16 = 0x1 << 5, + RSPAMD_CL_FLAG_INT_32 = 0x1 << 6, + RSPAMD_CL_FLAG_INT_64 = 0x1 << 7, + RSPAMD_CL_FLAG_INT_SIZE = 0x1 << 8, + RSPAMD_CL_FLAG_STRING_PATH = 0x1 << 9 + } flags; +}; + +/** + * Common handler type + * @param cfg configuration + * @param obj object to parse + * @param ud user data (depends on section) + * @param err error object + * @return TRUE if a section has been parsed + */ +typedef gboolean (*rspamd_rcl_handler_t) (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * A handler type that is called at the end of section parsing + * @param cfg configuration + * @param ud user data + */ +typedef void (*rspamd_rcl_section_fin_t)(struct config_file *cfg, gpointer ud); + +struct rspamd_rcl_default_handler_data { + struct rspamd_rcl_struct_parser pd; + const gchar *key; + rspamd_rcl_handler_t handler; + UT_hash_handle hh; +}; + +struct rspamd_rcl_section { + const gchar *name; /**< name of section */ + rspamd_rcl_handler_t handler; /**< handler of section attributes */ + enum ucl_type type; /**< type of attribute */ + gboolean required; /**< whether this param is required */ + gboolean strict_type; /**< whether we need strict type */ + UT_hash_handle hh; /** hash handle */ + struct rspamd_rcl_section *subsections; /**< hash table of subsections */ + struct rspamd_rcl_default_handler_data *default_parser; /**< generic parsing fields */ + rspamd_rcl_section_fin_t fin; /** called at the end of section parsing */ + gpointer fin_ud; +}; + +/** + * Init common sections known to rspamd + * @return top section + */ +struct rspamd_rcl_section* rspamd_rcl_config_init (void); + +/** + * Get a section specified by path, it understand paths separated by '/' character + * @param top top section + * @param path '/' divided path + * @return + */ +struct rspamd_rcl_section *rspamd_rcl_config_get_section (struct rspamd_rcl_section *top, + const char *path); + +/** + * Read RCL configuration and parse it to a config file + * @param top top section + * @param cfg target configuration + * @param obj object to handle + * @return TRUE if an object can be parsed + */ +gboolean rspamd_read_rcl_config (struct rspamd_rcl_section *top, + struct config_file *cfg, const ucl_object_t *obj, GError **err); + + +/** + * Parse default structure for a section + * @param section section + * @param cfg config file + * @param obj object to parse + * @param ptr ptr to pass + * @param err error ptr + * @return TRUE if the object has been parsed + */ +gboolean rspamd_rcl_section_parse_defaults (struct rspamd_rcl_section *section, + struct config_file *cfg, const ucl_object_t *obj, gpointer ptr, + GError **err); +/** + * Here is a section of common handlers that accepts rcl_struct_parser + * which itself contains a struct pointer and the offset of a member in a + * specific structure + */ + +/** + * Parse a string field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure + * @param section the current section + * @param err error pointer + * @return TRUE if a string value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_string (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse an integer field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_integer (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + + +/** + * Parse a float field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_double (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse a time field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure (flags mean the exact structure used) + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_time (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse a string list field of a structure presented by a GList* object + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure (flags mean the exact structure used) + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_string_list (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Parse a boolean field of a structure + * @param cfg config pointer + * @param obj object to parse + * @param ud struct_parser structure (flags mean the exact structure used) + * @param section the current section + * @param err error pointer + * @return TRUE if a value has been successfully parsed + */ +gboolean rspamd_rcl_parse_struct_boolean (struct config_file *cfg, const ucl_object_t *obj, + gpointer ud, struct rspamd_rcl_section *section, GError **err); + +/** + * Utility functions + */ + +/** + * Register new parser for a worker type of an option with the specified name + * @param cfg config structure + * @param type type of worker (GQuark) + * @param name name of option + * @param handler handler of option + * @param target opaque target structure + * @param offset offset inside a structure + */ +void rspamd_rcl_register_worker_option (struct config_file *cfg, gint type, const gchar *name, + rspamd_rcl_handler_t handler, gpointer target, gsize offset, gint flags); + +/** + * Regiester a default parser for a worker + * @param cfg config structure + * @param type type of worker (GQuark) + * @param func handler function + * @param ud userdata for handler function + */ +void rspamd_rcl_register_worker_parser (struct config_file *cfg, gint type, + gboolean (*func)(ucl_object_t *, gpointer), gpointer ud); +#endif /* CFG_RCL_H_ */ diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c new file mode 100644 index 000000000..2ca846ebd --- /dev/null +++ b/src/libserver/cfg_utils.c @@ -0,0 +1,969 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" + +#include "cfg_file.h" +#include "main.h" +#include "filter.h" +#include "settings.h" +#include "classifiers/classifiers.h" +#include "lua/lua_common.h" +#include "kvstorage_config.h" +#include "map.h" +#include "dynamic_cfg.h" + +#define DEFAULT_SCORE 10.0 + +#define DEFAULT_RLIMIT_NOFILE 2048 +#define DEFAULT_RLIMIT_MAXCORE 0 +#define DEFAULT_MAP_TIMEOUT 10 + +struct rspamd_ucl_map_cbdata { + struct config_file *cfg; + GString *buf; +}; +static gchar* rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data); +static void rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data); + +static gboolean +parse_host_port_priority_strv (rspamd_mempool_t *pool, gchar **tokens, + gchar **addr, guint16 *port, guint *priority, guint default_port) +{ + gchar *err_str, portbuf[8]; + const gchar *cur_tok, *cur_port; + struct addrinfo hints, *res; + guint port_parsed, priority_parsed, saved_errno = errno; + gint r; + union { + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr_holder; + + /* Now try to parse host and write address to ina */ + memset (&hints, 0, sizeof (hints)); + hints.ai_socktype = SOCK_STREAM; /* Type of the socket */ + hints.ai_flags = AI_NUMERICSERV; + + cur_tok = tokens[0]; + + if (strcmp (cur_tok, "*v6") == 0) { + hints.ai_family = AF_INET6; + hints.ai_flags |= AI_PASSIVE; + cur_tok = NULL; + } + else if (strcmp (cur_tok, "*v4") == 0) { + hints.ai_family = AF_INET; + hints.ai_flags |= AI_PASSIVE; + cur_tok = NULL; + } + else { + hints.ai_family = AF_UNSPEC; + } + + if (tokens[1] != NULL) { + /* Port part */ + rspamd_strlcpy (portbuf, tokens[1], sizeof (portbuf)); + cur_port = portbuf; + if (port != NULL) { + errno = 0; + port_parsed = strtoul (tokens[1], &err_str, 10); + if (*err_str != '\0' || errno != 0) { + msg_warn ("cannot parse port: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno)); + hints.ai_flags ^= AI_NUMERICSERV; + } + else if (port_parsed > G_MAXUINT16) { + errno = ERANGE; + msg_warn ("cannot parse port: %s, error: %s", tokens[1], *err_str, strerror (errno)); + hints.ai_flags ^= AI_NUMERICSERV; + } + else { + *port = port_parsed; + } + } + if (priority != NULL) { + if (port != NULL) { + cur_tok = tokens[2]; + } + else { + cur_tok = tokens[1]; + } + if (cur_tok != NULL) { + /* Priority part */ + errno = 0; + priority_parsed = strtoul (cur_tok, &err_str, 10); + if (*err_str != '\0' || errno != 0) { + msg_warn ("cannot parse priority: %s, at symbol %c, error: %s", tokens[1], *err_str, strerror (errno)); + } + else { + *priority = priority_parsed; + } + } + } + } + else if (default_port != 0) { + rspamd_snprintf (portbuf, sizeof (portbuf), "%ud", default_port); + cur_port = portbuf; + } + else { + cur_port = NULL; + } + + if ((r = getaddrinfo (cur_tok, cur_port, &hints, &res)) == 0) { + memcpy (&addr_holder, res->ai_addr, MIN (sizeof (addr_holder), res->ai_addrlen)); + if (res->ai_family == AF_INET) { + if (pool != NULL) { + *addr = rspamd_mempool_alloc (pool, INET_ADDRSTRLEN + 1); + } + inet_ntop (res->ai_family, &addr_holder.v4.sin_addr, *addr, INET_ADDRSTRLEN + 1); + } + else { + if (pool != NULL) { + *addr = rspamd_mempool_alloc (pool, INET6_ADDRSTRLEN + 1); + } + inet_ntop (res->ai_family, &addr_holder.v6.sin6_addr, *addr, INET6_ADDRSTRLEN + 1); + } + freeaddrinfo (res); + } + else { + msg_err ("address resolution for %s failed: %s", tokens[0], gai_strerror (r)); + goto err; + } + + /* Restore errno */ + errno = saved_errno; + return TRUE; + +err: + errno = saved_errno; + return FALSE; +} + +gboolean +parse_host_port_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port, guint *priority) +{ + gchar **tokens; + gboolean ret; + + tokens = g_strsplit_set (str, ":", 0); + if (!tokens || !tokens[0]) { + return FALSE; + } + + ret = parse_host_port_priority_strv (pool, tokens, addr, port, priority, 0); + + g_strfreev (tokens); + + return ret; +} + +gboolean +parse_host_port (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint16 *port) +{ + return parse_host_port_priority (pool, str, addr, port, NULL); +} + +gboolean +parse_host_priority (rspamd_mempool_t *pool, const gchar *str, gchar **addr, guint *priority) +{ + return parse_host_port_priority (pool, str, addr, NULL, priority); +} + +gboolean +parse_bind_line (struct config_file *cfg, struct worker_conf *cf, const gchar *str) +{ + struct rspamd_worker_bind_conf *cnf; + gchar **tokens, *tmp, *err; + gboolean ret = TRUE; + + if (str == NULL) { + return FALSE; + } + + tokens = g_strsplit_set (str, ":", 0); + if (!tokens || !tokens[0]) { + return FALSE; + } + + cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf)); + cnf->bind_port = DEFAULT_BIND_PORT; + cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); + cnf->ai = AF_UNSPEC; + + if (*tokens[0] == '/' || *tokens[0] == '.') { + cnf->ai = AF_UNIX; + LL_PREPEND (cf->bind_conf, cnf); + return TRUE; + } + else if (strcmp (tokens[0], "*") == 0) { + /* We need to add two listen entries: one for ipv4 and one for ipv6 */ + tmp = tokens[0]; + tokens[0] = "*v4"; + cnf->ai = AF_INET; + if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens, + &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { + LL_PREPEND (cf->bind_conf, cnf); + } + cnf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct rspamd_worker_bind_conf)); + cnf->bind_port = DEFAULT_BIND_PORT; + cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); + cnf->ai = AF_INET6; + tokens[0] = "*v6"; + if ((ret &= parse_host_port_priority_strv (cfg->cfg_pool, tokens, + &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { + LL_PREPEND (cf->bind_conf, cnf); + } + tokens[0] = tmp; + } + else if (strcmp (tokens[0], "systemd") == 0) { + /* The actual socket will be passed by systemd environment */ + cnf->bind_host = rspamd_mempool_strdup (cfg->cfg_pool, str); + cnf->ai = strtoul (tokens[1], &err, 10); + cnf->is_systemd = TRUE; + if (err == NULL || *err == '\0') { + LL_PREPEND (cf->bind_conf, cnf); + } + } + else { + if ((ret = parse_host_port_priority_strv (cfg->cfg_pool, tokens, + &cnf->bind_host, &cnf->bind_port, NULL, DEFAULT_BIND_PORT))) { + LL_PREPEND (cf->bind_conf, cnf); + } + } + + g_strfreev (tokens); + + return ret; +} + +void +init_defaults (struct config_file *cfg) +{ + + cfg->memcached_error_time = DEFAULT_UPSTREAM_ERROR_TIME; + cfg->memcached_dead_time = DEFAULT_UPSTREAM_DEAD_TIME; + cfg->memcached_maxerrors = DEFAULT_UPSTREAM_MAXERRORS; + cfg->memcached_protocol = TCP_TEXT; + + cfg->dns_timeout = 1000; + cfg->dns_retransmits = 5; + /* After 20 errors do throttling for 10 seconds */ + cfg->dns_throttling_errors = 20; + cfg->dns_throttling_time = 10000; + /* 16 sockets per DNS server */ + cfg->dns_io_per_server = 16; + + cfg->statfile_sync_interval = 60000; + cfg->statfile_sync_timeout = 20000; + + /* 20 Kb */ + cfg->max_diff = 20480; + + cfg->metrics = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->c_modules = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->composite_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->classifiers_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->cfg_params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + cfg->metrics_symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + + cfg->map_timeout = DEFAULT_MAP_TIMEOUT; + + cfg->log_level = G_LOG_LEVEL_WARNING; + cfg->log_extended = TRUE; + + init_settings (cfg); + +} + +void +free_config (struct config_file *cfg) +{ + GList *cur; + struct symbols_group *gr; + + remove_all_maps (cfg); + ucl_obj_unref (cfg->rcl_obj); + g_hash_table_remove_all (cfg->metrics); + g_hash_table_unref (cfg->metrics); + g_hash_table_remove_all (cfg->c_modules); + g_hash_table_unref (cfg->c_modules); + g_hash_table_remove_all (cfg->composite_symbols); + g_hash_table_unref (cfg->composite_symbols); + g_hash_table_remove_all (cfg->cfg_params); + g_hash_table_unref (cfg->cfg_params); + g_hash_table_destroy (cfg->metrics_symbols); + g_hash_table_destroy (cfg->classifiers_symbols); + /* Free symbols groups */ + cur = cfg->symbols_groups; + while (cur) { + gr = cur->data; + if (gr->symbols) { + g_list_free (gr->symbols); + } + cur = g_list_next (cur); + } + if (cfg->symbols_groups) { + g_list_free (cfg->symbols_groups); + } + + if (cfg->checksum) { + g_free (cfg->checksum); + } + g_list_free (cfg->classifiers); + g_list_free (cfg->metrics_list); + rspamd_mempool_delete (cfg->cfg_pool); +} + +const ucl_object_t * +get_module_opt (struct config_file *cfg, const gchar *module_name, const gchar *opt_name) +{ + const ucl_object_t *res = NULL, *sec; + + sec = ucl_obj_get_key (cfg->rcl_obj, module_name); + if (sec != NULL) { + res = ucl_obj_get_key (sec, opt_name); + } + + return res; +} + +guint64 +parse_limit (const gchar *limit, guint len) +{ + guint64 result = 0; + const gchar *err_str; + + if (!limit || *limit == '\0' || len == 0) { + return 0; + } + + errno = 0; + result = strtoull (limit, (gchar **)&err_str, 10); + + if (*err_str != '\0') { + /* Megabytes */ + if (*err_str == 'm' || *err_str == 'M') { + result *= 1048576L; + } + /* Kilobytes */ + else if (*err_str == 'k' || *err_str == 'K') { + result *= 1024; + } + /* Gigabytes */ + else if (*err_str == 'g' || *err_str == 'G') { + result *= 1073741824L; + } + else if (len > 0 && err_str - limit != (gint)len) { + msg_warn ("invalid limit value '%s' at position '%s'", limit, err_str); + result = 0; + } + } + + return result; +} + +gchar +parse_flag (const gchar *str) +{ + guint len; + gchar c; + + if (!str || !*str) { + return -1; + } + + len = strlen (str); + + switch (len) { + case 1: + c = g_ascii_tolower (*str); + if (c == 'y' || c == '1') { + return 1; + } + else if (c == 'n' || c == '0') { + return 0; + } + break; + case 2: + if (g_ascii_strncasecmp (str, "no", len) == 0) { + return 0; + } + else if (g_ascii_strncasecmp (str, "on", len) == 0) { + return 1; + } + break; + case 3: + if (g_ascii_strncasecmp (str, "yes", len) == 0) { + return 1; + } + else if (g_ascii_strncasecmp (str, "off", len) == 0) { + return 0; + } + break; + case 4: + if (g_ascii_strncasecmp (str, "true", len) == 0) { + return 1; + } + break; + case 5: + if (g_ascii_strncasecmp (str, "false", len) == 0) { + return 0; + } + break; + } + + return -1; +} + +gboolean +get_config_checksum (struct config_file *cfg) +{ + gint fd; + void *map; + struct stat st; + + /* Compute checksum for config file that should be used by xml dumper */ + if ((fd = open (cfg->cfg_name, O_RDONLY)) == -1) { + msg_err ("config file %s is no longer available, cannot calculate checksum"); + return FALSE; + } + if (stat (cfg->cfg_name, &st) == -1) { + msg_err ("cannot stat %s: %s", cfg->cfg_name, strerror (errno)); + return FALSE; + } + + /* Now mmap this file to simplify reading process */ + if ((map = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap %s: %s", cfg->cfg_name, strerror (errno)); + close (fd); + return FALSE; + } + close (fd); + + /* Get checksum for a file */ + cfg->checksum = g_compute_checksum_for_string (G_CHECKSUM_MD5, map, st.st_size); + munmap (map, st.st_size); + + return TRUE; +} +/* + * Perform post load actions + */ +void +post_load_config (struct config_file *cfg) +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; +#endif + struct metric *def_metric; + +#ifdef HAVE_CLOCK_GETTIME +#ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_getres (CLOCK_PROCESS_CPUTIME_ID, &ts); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_getres (CLOCK_VIRTUAL, &ts); +# else + clock_getres (CLOCK_REALTIME, &ts); +# endif + + cfg->clock_res = (gint)log10 (1000000 / ts.tv_nsec); + if (cfg->clock_res < 0) { + cfg->clock_res = 0; + } + if (cfg->clock_res > 3) { + cfg->clock_res = 3; + } +#else + /* For gettimeofday */ + cfg->clock_res = 1; +#endif + + if ((def_metric = g_hash_table_lookup (cfg->metrics, DEFAULT_METRIC)) == NULL) { + def_metric = check_metric_conf (cfg, NULL); + def_metric->name = DEFAULT_METRIC; + def_metric->actions[METRIC_ACTION_REJECT].score = DEFAULT_SCORE; + cfg->metrics_list = g_list_prepend (cfg->metrics_list, def_metric); + g_hash_table_insert (cfg->metrics, DEFAULT_METRIC, def_metric); + } + + cfg->default_metric = def_metric; + + /* Lua options */ + (void)lua_post_load_config (cfg); + init_dynamic_config (cfg); +} + +#if 0 +void +parse_err (const gchar *fmt, ...) +{ + va_list aq; + gchar logbuf[BUFSIZ], readbuf[32]; + gint r; + + va_start (aq, fmt); + rspamd_strlcpy (readbuf, yytext, sizeof (readbuf)); + + r = snprintf (logbuf, sizeof (logbuf), "config file parse error! line: %d, text: %s, reason: ", yylineno, readbuf); + r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq); + + va_end (aq); + g_critical ("%s", logbuf); +} + +void +parse_warn (const gchar *fmt, ...) +{ + va_list aq; + gchar logbuf[BUFSIZ], readbuf[32]; + gint r; + + va_start (aq, fmt); + rspamd_strlcpy (readbuf, yytext, sizeof (readbuf)); + + r = snprintf (logbuf, sizeof (logbuf), "config file parse warning! line: %d, text: %s, reason: ", yylineno, readbuf); + r += vsnprintf (logbuf + r, sizeof (logbuf) - r, fmt, aq); + + va_end (aq); + g_warning ("%s", logbuf); +} +#endif + +void +unescape_quotes (gchar *line) +{ + gchar *c = line, *t; + + while (*c) { + if (*c == '\\' && *(c + 1) == '"') { + t = c; + while (*t) { + *t = *(t + 1); + t++; + } + } + c++; + } +} + +GList * +parse_comma_list (rspamd_mempool_t * pool, const gchar *line) +{ + GList *res = NULL; + const gchar *c, *p; + gchar *str; + + c = line; + p = c; + + while (*p) { + if (*p == ',' && *c != *p) { + str = rspamd_mempool_alloc (pool, p - c + 1); + rspamd_strlcpy (str, c, p - c + 1); + res = g_list_prepend (res, str); + /* Skip spaces */ + while (g_ascii_isspace (*(++p))); + c = p; + continue; + } + p++; + } + if (res != NULL) { + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_list_free, res); + } + + return res; +} + +struct classifier_config * +check_classifier_conf (struct config_file *cfg, struct classifier_config *c) +{ + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct classifier_config)); + } + if (c->opts == NULL) { + c->opts = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->opts); + } + if (c->labels == NULL) { + c->labels = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, NULL, (GDestroyNotify)g_list_free); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->labels); + } + + return c; +} + +struct statfile* +check_statfile_conf (struct config_file *cfg, struct statfile *c) +{ + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct statfile)); + } + + return c; +} + +struct metric * +check_metric_conf (struct config_file *cfg, struct metric *c) +{ + int i; + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct metric)); + c->grow_factor = 1.0; + c->symbols = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + c->descriptions = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + c->actions[i].score = -1.0; + } + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->symbols); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, c->descriptions); + } + + return c; +} + +struct worker_conf * +check_worker_conf (struct config_file *cfg, struct worker_conf *c) +{ + if (c == NULL) { + c = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (struct worker_conf)); + c->params = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + c->active_workers = g_queue_new (); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_hash_table_destroy, c->params); + rspamd_mempool_add_destructor (cfg->cfg_pool, (rspamd_mempool_destruct_t)g_queue_free, c->active_workers); +#ifdef HAVE_SC_NPROCESSORS_ONLN + c->count = sysconf (_SC_NPROCESSORS_ONLN); +#else + c->count = DEFAULT_WORKERS_NUM; +#endif + c->rlimit_nofile = DEFAULT_RLIMIT_NOFILE; + c->rlimit_maxcore = DEFAULT_RLIMIT_MAXCORE; + } + + return c; +} + + +static bool +rspamd_include_map_handler (const guchar *data, gsize len, void* ud) +{ + struct config_file *cfg = (struct config_file *)ud; + struct rspamd_ucl_map_cbdata *cbdata, **pcbdata; + gchar *map_line; + + map_line = rspamd_mempool_alloc (cfg->cfg_pool, len + 1); + rspamd_strlcpy (map_line, data, len + 1); + + cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata)); + pcbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata *)); + cbdata->buf = NULL; + cbdata->cfg = cfg; + *pcbdata = cbdata; + + return add_map (cfg, map_line, "ucl include", rspamd_ucl_read_cb, rspamd_ucl_fin_cb, (void **)pcbdata); +} + +/* + * Variables: + * $CONFDIR - configuration directory + * $RUNDIR - local states directory + * $DBDIR - databases dir + * $LOGDIR - logs dir + * $PLUGINSDIR - pluggins dir + * $PREFIX - installation prefix + * $VERSION - rspamd version + */ + +#define RSPAMD_CONFDIR_MACRO "CONFDIR" +#define RSPAMD_RUNDIR_MACRO "RUNDIR" +#define RSPAMD_DBDIR_MACRO "DBDIR" +#define RSPAMD_LOGDIR_MACRO "LOGDIR" +#define RSPAMD_PLUGINSDIR_MACRO "PLUGINSDIR" +#define RSPAMD_PREFIX_MACRO "PREFIX" +#define RSPAMD_VERSION_MACRO "VERSION" + +static void +rspamd_ucl_add_conf_variables (struct ucl_parser *parser) +{ + ucl_parser_register_variable (parser, RSPAMD_CONFDIR_MACRO, RSPAMD_CONFDIR); + ucl_parser_register_variable (parser, RSPAMD_RUNDIR_MACRO, RSPAMD_RUNDIR); + ucl_parser_register_variable (parser, RSPAMD_DBDIR_MACRO, RSPAMD_DBDIR); + ucl_parser_register_variable (parser, RSPAMD_LOGDIR_MACRO, RSPAMD_LOGDIR); + ucl_parser_register_variable (parser, RSPAMD_PLUGINSDIR_MACRO, RSPAMD_PLUGINSDIR); + ucl_parser_register_variable (parser, RSPAMD_PREFIX_MACRO, RSPAMD_PREFIX); + ucl_parser_register_variable (parser, RSPAMD_VERSION_MACRO, RVERSION); +} + +static void +rspamd_ucl_add_conf_macros (struct ucl_parser *parser, struct config_file *cfg) +{ + ucl_parser_register_macro (parser, "include_map", rspamd_include_map_handler, cfg); +} + +gboolean +read_rspamd_config (struct config_file *cfg, const gchar *filename, + const gchar *convert_to, rspamd_rcl_section_fin_t logger_fin, + gpointer logger_ud) +{ + struct stat st; + gint fd; + gchar *data; + GError *err = NULL; + struct rspamd_rcl_section *top, *logger; + gboolean res; + struct ucl_parser *parser; + + if (stat (filename, &st) == -1) { + msg_err ("cannot stat %s: %s", filename, strerror (errno)); + return FALSE; + } + if ((fd = open (filename, O_RDONLY)) == -1) { + msg_err ("cannot open %s: %s", filename, strerror (errno)); + return FALSE; + + } + /* Now mmap this file to simplify reading process */ + if ((data = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap %s: %s", filename, strerror (errno)); + close (fd); + return FALSE; + } + close (fd); + + parser = ucl_parser_new (0); + rspamd_ucl_add_conf_variables (parser); + rspamd_ucl_add_conf_macros (parser, cfg); + if (!ucl_parser_add_chunk (parser, data, st.st_size)) { + msg_err ("ucl parser error: %s", ucl_parser_get_error (parser)); + ucl_parser_free (parser); + munmap (data, st.st_size); + return FALSE; + } + munmap (data, st.st_size); + cfg->rcl_obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + res = TRUE; + + if (!res) { + return FALSE; + } + + top = rspamd_rcl_config_init (); + err = NULL; + + HASH_FIND_STR(top, "logging", logger); + if (logger != NULL) { + logger->fin = logger_fin; + logger->fin_ud = logger_ud; + } + + if (!rspamd_read_rcl_config (top, cfg, cfg->rcl_obj, &err)) { + msg_err ("rcl parse error: %s", err->message); + return FALSE; + } + + return TRUE; +} + +static void +symbols_classifiers_callback (gpointer key, gpointer value, gpointer ud) +{ + struct config_file *cfg = ud; + + register_virtual_symbol (&cfg->cache, key, 1.0); +} + +void +insert_classifier_symbols (struct config_file *cfg) +{ + g_hash_table_foreach (cfg->classifiers_symbols, symbols_classifiers_callback, cfg); +} + +struct classifier_config* +find_classifier_conf (struct config_file *cfg, const gchar *name) +{ + GList *cur; + struct classifier_config *cf; + + if (name == NULL) { + return NULL; + } + + cur = cfg->classifiers; + while (cur) { + cf = cur->data; + + if (g_ascii_strcasecmp (cf->classifier->name, name) == 0) { + return cf; + } + + cur = g_list_next (cur); + } + + return NULL; +} + +gboolean +check_classifier_statfiles (struct classifier_config *cf) +{ + struct statfile *st; + gboolean has_other = FALSE, res = FALSE, cur_class; + GList *cur; + + /* First check classes directly */ + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + return TRUE; + } + } + + cur = g_list_next (cur); + } + + if (!has_other) { + /* We have only one statfile */ + return FALSE; + } + /* We have not detected any statfile that has different class, so turn on euristic based on symbol's name */ + has_other = FALSE; + cur = cf->statfiles; + while (cur) { + st = cur->data; + if (rspamd_strncasestr (st->symbol, "spam", -1) != NULL) { + st->is_spam = TRUE; + } + else if (rspamd_strncasestr (st->symbol, "ham", -1) != NULL) { + st->is_spam = FALSE; + } + + if (!has_other) { + cur_class = st->is_spam; + has_other = TRUE; + } + else { + if (cur_class != st->is_spam) { + res = TRUE; + } + } + + cur = g_list_next (cur); + } + + return res; +} + +static gchar* +rspamd_ucl_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev; + + if (cbdata == NULL) { + cbdata = g_malloc (sizeof (struct rspamd_ucl_map_cbdata)); + prev = data->prev_data; + cbdata->buf = g_string_sized_new (BUFSIZ); + cbdata->cfg = prev->cfg; + data->cur_data = cbdata; + } + g_string_append_len (cbdata->buf, chunk, len); + + /* Say not to copy any part of this buffer */ + return NULL; +} + +static void +rspamd_ucl_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + struct rspamd_ucl_map_cbdata *cbdata = data->cur_data, *prev = data->prev_data; + ucl_object_t *obj; + struct ucl_parser *parser; + guint32 checksum; + + if (prev != NULL) { + if (prev->buf != NULL) { + g_string_free (prev->buf, TRUE); + } + g_free (prev); + } + + if (cbdata == NULL) { + msg_err ("map fin error: new data is NULL"); + return; + } + + checksum = murmur32_hash (cbdata->buf->str, cbdata->buf->len); + if (data->map->checksum != checksum) { + /* New data available */ + parser = ucl_parser_new (0); + if (!ucl_parser_add_chunk (parser, cbdata->buf->str, cbdata->buf->len)) { + msg_err ("cannot parse map %s: %s", data->map->uri, ucl_parser_get_error (parser)); + ucl_parser_free (parser); + } + else { + obj = ucl_parser_get_object (parser); + ucl_parser_free (parser); + /* XXX: add replace objects code */ + ucl_object_unref (obj); + data->map->checksum = checksum; + } + } + else { + msg_info ("do not reload map %s, checksum is the same: %d", data->map->uri, checksum); + } +} + +gboolean +rspamd_parse_ip_list (const gchar *ip_list, radix_tree_t **tree) +{ + gchar **strvec, **cur; + struct in_addr ina; + guint32 mask; + + strvec = g_strsplit_set (ip_list, ",", 0); + cur = strvec; + + while (*cur != NULL) { + /* XXX: handle only ipv4 addresses */ + if (parse_ipmask_v4 (*cur, &ina, &mask)) { + if (*tree == NULL) { + *tree = radix_tree_create (); + } + radix32tree_add (*tree, htonl (ina.s_addr), mask, 1); + } + cur ++; + } + + return (*tree != NULL); +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c new file mode 100644 index 000000000..c7c8a35e1 --- /dev/null +++ b/src/libserver/dkim.c @@ -0,0 +1,1480 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "message.h" +#include "dkim.h" +#include "dns.h" + +/* Parser of dkim params */ +typedef gboolean (*dkim_parse_param_f) (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); + +static gboolean rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); +static gboolean rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err); + + +static const dkim_parse_param_f parser_funcs[] = { + [DKIM_PARAM_SIGNATURE] = rspamd_dkim_parse_signature, + [DKIM_PARAM_SIGNALG] = rspamd_dkim_parse_signalg, + [DKIM_PARAM_DOMAIN] = rspamd_dkim_parse_domain, + [DKIM_PARAM_CANONALG] = rspamd_dkim_parse_canonalg, + [DKIM_PARAM_QUERYMETHOD] = rspamd_dkim_parse_ignore, + [DKIM_PARAM_SELECTOR] = rspamd_dkim_parse_selector, + [DKIM_PARAM_HDRLIST] = rspamd_dkim_parse_hdrlist, + [DKIM_PARAM_VERSION] = rspamd_dkim_parse_version, + [DKIM_PARAM_IDENTITY] = rspamd_dkim_parse_ignore, + [DKIM_PARAM_TIMESTAMP] = rspamd_dkim_parse_timestamp, + [DKIM_PARAM_EXPIRATION] = rspamd_dkim_parse_expiration, + [DKIM_PARAM_COPIEDHDRS] = rspamd_dkim_parse_ignore, + [DKIM_PARAM_BODYHASH] = rspamd_dkim_parse_bodyhash, + [DKIM_PARAM_BODYLENGTH] = rspamd_dkim_parse_bodylength +}; + +struct rspamd_dkim_header { + gchar *name; + guint count; +}; + +#define DKIM_ERROR dkim_error_quark () +GQuark +dkim_error_quark (void) +{ + return g_quark_from_static_string ("dkim-error-quark"); +} + +/* Parsers implementation */ +static gboolean +rspamd_dkim_parse_signature (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->b = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->b, param, len + 1); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) + gchar *tmp; + gsize tmp_len = len; + tmp = g_base64_decode (ctx->b, &tmp_len); + rspamd_strlcpy (ctx->b, tmp, len + 1); + g_free (tmp); +#else + g_base64_decode_inplace (ctx->b, &len); +#endif + ctx->blen = len; + return TRUE; +} + +static gboolean +rspamd_dkim_parse_signalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + if (len == 8) { + if (memcmp (param, "rsa-sha1", len) == 0) { + ctx->sig_alg = DKIM_SIGN_RSASHA1; + return TRUE; + } + } + else if (len == 10) { + if (memcmp (param, "rsa-sha256", len) == 0) { + ctx->sig_alg = DKIM_SIGN_RSASHA256; + return TRUE; + } + } + + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim sign algorithm"); + return FALSE; +} + +static gboolean +rspamd_dkim_parse_domain (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->domain = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->domain, param, len + 1); + return TRUE; +} + +static gboolean +rspamd_dkim_parse_canonalg (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + const gchar *p, *slash = NULL, *end = param + len; + gsize sl = 0; + + p = param; + while (p != end) { + if (*p == '/') { + slash = p; + break; + } + p ++; + sl ++; + } + + if (slash == NULL) { + /* Only check header */ + if (len == 6 && memcmp (param, "simple", len) == 0) { + ctx->header_canon_type = DKIM_CANON_SIMPLE; + return TRUE; + } + else if (len == 7 && memcmp (param, "relaxed", len) == 0) { + ctx->header_canon_type = DKIM_CANON_RELAXED; + return TRUE; + } + } + else { + /* First check header */ + if (sl == 6 && memcmp (param, "simple", sl) == 0) { + ctx->header_canon_type = DKIM_CANON_SIMPLE; + } + else if (sl == 7 && memcmp (param, "relaxed", sl) == 0) { + ctx->header_canon_type = DKIM_CANON_RELAXED; + } + else { + goto err; + } + /* Check body */ + len -= sl + 1; + slash ++; + if (len == 6 && memcmp (slash, "simple", len) == 0) { + ctx->body_canon_type = DKIM_CANON_SIMPLE; + return TRUE; + } + else if (len == 7 && memcmp (slash, "relaxed", len) == 0) { + ctx->body_canon_type = DKIM_CANON_RELAXED; + return TRUE; + } + } + +err: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_A, "invalid dkim canonization algorithm"); + return FALSE; +} + +static gboolean +rspamd_dkim_parse_ignore (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + /* Just ignore unused params */ + return TRUE; +} + +static gboolean +rspamd_dkim_parse_selector (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->selector = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->selector, param, len + 1); + return TRUE; +} + +static struct rspamd_dkim_header* +rspamd_dkim_find_header (GPtrArray *arr, const gchar *name, gsize len) +{ + guint i; + struct rspamd_dkim_header *h; + + for (i = 0; i < arr->len; i ++) { + h = g_ptr_array_index (arr, i); + if (g_ascii_strncasecmp (h->name, name, len) == 0) { + return h; + } + } + + return NULL; +} + +static void +rspamd_dkim_hlist_free (void *ud) +{ + GPtrArray *a = ud; + + g_ptr_array_free (a, TRUE); +} + +static gboolean +rspamd_dkim_parse_hdrlist (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + const gchar *c, *p, *end = param + len; + gchar *h; + gboolean from_found = FALSE; + guint count = 0; + struct rspamd_dkim_header *new; + + p = param; + while (p <= end) { + if ((*p == ':' || p == end)) { + count ++; + } + p ++; + } + + if (count > 0) { + ctx->hlist = g_ptr_array_sized_new (count); + } + else { + return FALSE; + } + + c = param; + p = param; + while (p <= end) { + if ((*p == ':' || p == end) && p - c > 0) { + if ((new = rspamd_dkim_find_header (ctx->hlist, c, p - c)) != NULL) { + new->count ++; + } + else { + /* Insert new header to the list */ + new = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_header)); + h = rspamd_mempool_alloc (ctx->pool, p - c + 1); + rspamd_strlcpy (h, c, p - c + 1); + g_strstrip (h); + new->name = h; + new->count = 1; + /* Check mandatory from */ + if (!from_found && g_ascii_strcasecmp (h, "from") == 0) { + from_found = TRUE; + } + g_ptr_array_add (ctx->hlist, new); + } + c = p + 1; + p ++; + } + else { + p ++; + } + } + + if (!ctx->hlist) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list"); + return FALSE; + } + else { + if (!from_found) { + g_ptr_array_free (ctx->hlist, TRUE); + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_H, "invalid dkim header list, from header is missing"); + return FALSE; + } + /* Reverse list */ + rspamd_mempool_add_destructor (ctx->pool, (rspamd_mempool_destruct_t)rspamd_dkim_hlist_free, ctx->hlist); + } + + return TRUE; +} + +static gboolean +rspamd_dkim_parse_version (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + if (len != 1 || *param != '1') { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_VERSION, "invalid dkim version"); + return FALSE; + } + + ctx->ver = 1; + return TRUE; +} + +static gboolean +rspamd_dkim_parse_timestamp (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + gulong val; + + if (!rspamd_strtoul (param, len, &val)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim timestamp"); + return FALSE; + } + ctx->timestamp = val; + + return TRUE; +} + +static gboolean +rspamd_dkim_parse_expiration (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + gulong val; + + if (!rspamd_strtoul (param, len, &val)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim expiration"); + return FALSE; + } + ctx->expiration = val; + + return TRUE; +} + +static gboolean +rspamd_dkim_parse_bodyhash (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + ctx->bh = rspamd_mempool_alloc (ctx->pool, len + 1); + rspamd_strlcpy (ctx->bh, param, len + 1); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) + gchar *tmp; + gsize tmp_len = len; + tmp = g_base64_decode (ctx->bh, &tmp_len); + rspamd_strlcpy (ctx->bh, tmp, len + 1); + g_free (tmp); +#else + g_base64_decode_inplace (ctx->bh, &len); +#endif + ctx->bhlen = len; + return TRUE; +} + +static gboolean +rspamd_dkim_parse_bodylength (rspamd_dkim_context_t* ctx, const gchar *param, gsize len, GError **err) +{ + gulong val; + + if (!rspamd_strtoul (param, len, &val)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_INVALID_L, "invalid dkim body length"); + return FALSE; + } + ctx->len = val; + + return TRUE; +} + +/** + * Create new dkim context from signature + * @param sig message's signature + * @param pool pool to allocate memory from + * @param err pointer to error object + * @return new context or NULL + */ +rspamd_dkim_context_t* +rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err) +{ + const gchar *p, *c, *tag = NULL, *end; + gsize taglen; + gint param = DKIM_PARAM_UNKNOWN; + time_t now; + rspamd_dkim_context_t *new; + enum { + DKIM_STATE_TAG = 0, + DKIM_STATE_AFTER_TAG, + DKIM_STATE_VALUE, + DKIM_STATE_SKIP_SPACES = 99, + DKIM_STATE_ERROR = 100 + } state, next_state; + + + new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_dkim_context_t)); + new->pool = pool; + new->header_canon_type = DKIM_CANON_DEFAULT; + new->body_canon_type = DKIM_CANON_DEFAULT; + new->sig_alg = DKIM_SIGN_UNKNOWN; + /* A simple state machine of parsing tags */ + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_TAG; + taglen = 0; + p = sig; + c = sig; + end = p + strlen (p); + while (p <= end) { + switch (state) { + case DKIM_STATE_TAG: + if (g_ascii_isspace (*p)) { + taglen = p - c; + while (*p && g_ascii_isspace (*p)) { + /* Skip spaces before '=' sign */ + p ++; + } + if (*p != '=') { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param"); + state = DKIM_STATE_ERROR; + } + else { + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_AFTER_TAG; + param = DKIM_PARAM_UNKNOWN; + p ++; + tag = c; + } + } + else if (*p == '=') { + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_AFTER_TAG; + param = DKIM_PARAM_UNKNOWN; + p ++; + tag = c; + } + else { + taglen ++; + p ++; + } + break; + case DKIM_STATE_AFTER_TAG: + /* We got tag at tag and len at taglen */ + switch (taglen) { + case 0: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "zero length dkim param"); + state = DKIM_STATE_ERROR; + break; + case 1: + /* Simple tags */ + switch (*tag) { + case 'v': + param = DKIM_PARAM_VERSION; + break; + case 'a': + param = DKIM_PARAM_SIGNALG; + break; + case 'b': + param = DKIM_PARAM_SIGNATURE; + break; + case 'c': + param = DKIM_PARAM_CANONALG; + break; + case 'd': + param = DKIM_PARAM_DOMAIN; + break; + case 'h': + param = DKIM_PARAM_HDRLIST; + break; + case 'i': + param = DKIM_PARAM_IDENTITY; + break; + case 'l': + param = DKIM_PARAM_BODYLENGTH; + break; + case 'q': + param = DKIM_PARAM_QUERYMETHOD; + break; + case 's': + param = DKIM_PARAM_SELECTOR; + break; + case 't': + param = DKIM_PARAM_TIMESTAMP; + break; + case 'x': + param = DKIM_PARAM_EXPIRATION; + break; + case 'z': + param = DKIM_PARAM_COPIEDHDRS; + break; + default: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c", *tag); + state = DKIM_STATE_ERROR; + break; + } + break; + case 2: + if (tag[0] == 'b' && tag[1] == 'h') { + param = DKIM_PARAM_BODYHASH; + } + else { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param: %c%c", tag[0], tag[1]); + state = DKIM_STATE_ERROR; + } + break; + default: + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_UNKNOWN, "invalid dkim param length: %zd", taglen); + state = DKIM_STATE_ERROR; + break; + } + if (state != DKIM_STATE_ERROR) { + /* Skip spaces */ + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_VALUE; + } + break; + case DKIM_STATE_VALUE: + if (*p == ';') { + if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c, err)) { + state = DKIM_STATE_ERROR; + } + else { + state = DKIM_STATE_SKIP_SPACES; + next_state = DKIM_STATE_TAG; + p ++; + taglen = 0; + } + } + else if (p == end) { + if (param == DKIM_PARAM_UNKNOWN || !parser_funcs[param](new, c, p - c + 1, err)) { + state = DKIM_STATE_ERROR; + } + else { + /* Finish processing */ + p ++; + } + } + else { + p ++; + } + break; + case DKIM_STATE_SKIP_SPACES: + if (g_ascii_isspace (*p)) { + p ++; + } + else { + c = p; + state = next_state; + } + break; + case DKIM_STATE_ERROR: + if (err) { + msg_info ("dkim parse failed: %s", (*err)->message); + return NULL; + } + else { + msg_info ("dkim parse failed: unknown error"); + return NULL; + } + break; + } + } + + /* Now check validity of signature */ + if (new->b == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_B, "b parameter missing"); + return NULL; + } + if (new->bh == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_BH, "bh parameter missing"); + return NULL; + } + if (new->domain == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_D, "domain parameter missing"); + return NULL; + } + if (new->selector == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "selector parameter missing"); + return NULL; + } + if (new->ver == 0) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_V, "v parameter missing"); + return NULL; + } + if (new->hlist == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_H, "h parameter missing"); + return NULL; + } + if (new->sig_alg == DKIM_SIGN_UNKNOWN) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EMPTY_S, "s parameter missing"); + return NULL; + } + if (new->sig_alg == DKIM_SIGN_RSASHA1) { + /* Check bh length */ + if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA1)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen); + return NULL; + } + + } + else if (new->sig_alg == DKIM_SIGN_RSASHA256) { + if (new->bhlen != (guint)g_checksum_type_get_length (G_CHECKSUM_SHA256)) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has incorrect length: %ud", new->bhlen); + return NULL; + } + } + /* Check expiration */ + now = time (NULL); + if (new->timestamp && now < new->timestamp && new->timestamp - now > (gint)time_jitter) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_FUTURE, "signature was made in future, ignoring"); + return NULL; + } + if (new->expiration && new->expiration < now) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_EXPIRED, "signature has expired"); + return NULL; + } + + /* Now create dns key to request further */ + taglen = strlen (new->domain) + strlen (new->selector) + sizeof (DKIM_DNSKEYNAME) + 2; + new->dns_key = rspamd_mempool_alloc (new->pool, taglen); + rspamd_snprintf (new->dns_key, taglen, "%s.%s.%s", new->selector, DKIM_DNSKEYNAME, new->domain); + + /* Create checksums for further operations */ + if (new->sig_alg == DKIM_SIGN_RSASHA1) { + new->body_hash = g_checksum_new (G_CHECKSUM_SHA1); + new->headers_hash = g_checksum_new (G_CHECKSUM_SHA1); + } + else if (new->sig_alg == DKIM_SIGN_RSASHA256) { + new->body_hash = g_checksum_new (G_CHECKSUM_SHA256); + new->headers_hash = g_checksum_new (G_CHECKSUM_SHA256); + } + else { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_BADSIG, "signature has unsupported signature algorithm"); + return NULL; + } + + rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->body_hash); + rspamd_mempool_add_destructor (new->pool, (rspamd_mempool_destruct_t)g_checksum_free, new->headers_hash); + + return new; +} + +struct rspamd_dkim_key_cbdata { + rspamd_dkim_context_t *ctx; + dkim_key_handler_f handler; + gpointer ud; +}; + +static rspamd_dkim_key_t* +rspamd_dkim_make_key (const gchar *keydata, guint keylen, GError **err) +{ + rspamd_dkim_key_t *key = NULL; + + key = g_slice_alloc0 (sizeof (rspamd_dkim_key_t)); + key->keydata = g_slice_alloc (keylen + 1); + rspamd_strlcpy (key->keydata, keydata, keylen + 1); + key->keylen = keylen + 1; + key->decoded_len = keylen + 1; +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION < 20)) + gchar *tmp; + gsize tmp_len = keylen; + tmp = g_base64_decode (key->keydata, &tmp_len); + rspamd_strlcpy (key->keydata, tmp, keylen + 1); + g_free (tmp); + key->decoded_len = tmp_len; +#else + g_base64_decode_inplace (key->keydata, &key->decoded_len); +#endif +#ifdef HAVE_OPENSSL + key->key_bio = BIO_new_mem_buf (key->keydata, key->decoded_len); + if (key->key_bio == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot make ssl bio from key"); + rspamd_dkim_key_free (key); + return NULL; + } + + key->key_evp = d2i_PUBKEY_bio (key->key_bio, NULL); + if (key->key_evp == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract pubkey from bio"); + rspamd_dkim_key_free (key); + return NULL; + } + + key->key_rsa = EVP_PKEY_get1_RSA (key->key_evp); + if (key->key_rsa == NULL) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "cannot extract rsa key from evp key"); + rspamd_dkim_key_free (key); + return NULL; + } + +#endif + + return key; +} + +/** + * Free DKIM key + * @param key + */ +void +rspamd_dkim_key_free (rspamd_dkim_key_t *key) +{ +#ifdef HAVE_OPENSSL + if (key->key_rsa) { + RSA_free (key->key_rsa); + } + if (key->key_bio) { + BIO_free (key->key_bio); + } +#endif + g_slice_free1 (key->keylen, key->keydata); + g_slice_free1 (sizeof (rspamd_dkim_key_t), key); +} + +static rspamd_dkim_key_t* +rspamd_dkim_parse_key (const gchar *txt, gsize *keylen, GError **err) +{ + const gchar *c, *p, *end; + gint state = 0; + gsize len; + + c = txt; + p = txt; + end = txt + strlen (txt); + + while (p <= end) { + switch (state) { + case 0: + if (p != end && p[0] == 'p' && p[1] == '=') { + /* We got something like public key */ + c = p + 2; + p = c; + state = 1; + } + else { + /* Ignore everything */ + p ++; + } + break; + case 1: + /* State when we got p= and looking for some public key */ + if ((*p == ';' || p == end) && p > c) { + len = p - c; + return rspamd_dkim_make_key (c, len, err); + } + else { + p ++; + } + break; + } + } + + if (p - c == 0) { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYREVOKED, "key was revoked"); + } + else { + g_set_error (err, DKIM_ERROR, DKIM_SIGERROR_KEYFAIL, "key was not found"); + } + + return NULL; +} + +/* Get TXT request data and parse it */ +static void +rspamd_dkim_dns_cb (struct rdns_reply *reply, gpointer arg) +{ + struct rspamd_dkim_key_cbdata *cbdata = arg; + rspamd_dkim_key_t *key = NULL; + GError *err = NULL; + struct rdns_reply_entry *elt; + gsize keylen = 0; + + if (reply->code != RDNS_RC_NOERROR) { + g_set_error (&err, DKIM_ERROR, DKIM_SIGERROR_NOKEY, "dns request to %s failed: %s", cbdata->ctx->dns_key, + rdns_strerror (reply->code)); + cbdata->handler (NULL, 0, cbdata->ctx, cbdata->ud, err); + } + else { + LL_FOREACH (reply->entries, elt) { + if (elt->type == RDNS_REQUEST_TXT) { + key = rspamd_dkim_parse_key (elt->content.txt.data, &keylen, &err); + if (key) { + key->ttl = elt->ttl; + break; + } + } + } + if (key != NULL && err != NULL) { + /* Free error as it is insignificant */ + g_error_free (err); + err = NULL; + } + cbdata->handler (key, keylen, cbdata->ctx, cbdata->ud, err); + } +} + +/** + * Make DNS request for specified context and obtain and parse key + * @param ctx dkim context from signature + * @param resolver dns resolver object + * @param s async session to make request + * @return + */ +gboolean +rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud) +{ + struct rspamd_dkim_key_cbdata *cbdata; + + g_return_val_if_fail (ctx != NULL, FALSE); + g_return_val_if_fail (ctx->dns_key != NULL, FALSE); + + cbdata = rspamd_mempool_alloc (ctx->pool, sizeof (struct rspamd_dkim_key_cbdata)); + cbdata->ctx = ctx; + cbdata->handler = handler; + cbdata->ud = ud; + + return make_dns_request (resolver, s, ctx->pool, rspamd_dkim_dns_cb, cbdata, RDNS_REQUEST_TXT, ctx->dns_key); +} + +static gboolean +rspamd_dkim_relaxed_body_step (GChecksum *ck, const gchar **start, guint remain) +{ + const gchar *h; + static gchar buf[BUFSIZ]; + gchar *t; + guint len, inlen; + gboolean got_sp, finished = FALSE; + + if (remain > sizeof (buf)) { + len = sizeof (buf); + } + else { + len = remain; + finished = TRUE; + } + inlen = sizeof (buf) - 1; + h = *start; + t = &buf[0]; + got_sp = FALSE; + + while (len && inlen) { + if (*h == '\r' || *h == '\n') { + /* Ignore spaces at the end of line */ + if (got_sp) { + got_sp = FALSE; + t --; + } + /* Replace a single \n or \r with \r\n */ + if (*h == '\n' && *(h - 1) != '\r') { + *t ++ = '\r'; + inlen --; + } + else if (*h == '\r' && *(h + 1) != '\n') { + *t ++ = *h ++; + *t ++ = '\n'; + if (inlen > 1) { + inlen -= 2; + } + else { + /* It is safe as inlen = sizeof (buf) - 1 */ + inlen = 0; + } + len --; + continue; + } + } + else if (g_ascii_isspace (*h)) { + if (got_sp) { + /* Ignore multiply spaces */ + h ++; + len --; + continue; + } + else { + *t++ = ' '; + h ++; + inlen --; + len --; + got_sp = TRUE; + continue; + } + } + else { + got_sp = FALSE; + } + *t++ = *h++; + inlen --; + len --; + } + + *start = h; + + if (!finished && *(t - 1) == ' ' && g_ascii_isspace (*h)) { + /* Avoid border problems */ + t --; + } +#if 0 + msg_debug ("update signature with buffer: %*s", t - buf, buf); +#endif + g_checksum_update (ck, buf, t - buf); + + return !finished; +} + +static gboolean +rspamd_dkim_simple_body_step (GChecksum *ck, const gchar **start, guint remain) +{ + const gchar *h; + static gchar buf[BUFSIZ]; + gchar *t; + guint len, inlen; + gboolean finished = FALSE; + + if (remain > sizeof (buf)) { + len = sizeof (buf); + } + else { + len = remain; + finished = TRUE; + } + inlen = sizeof (buf) - 1; + h = *start; + t = &buf[0]; + + while (len && inlen) { + if (*h == '\r' || *h == '\n') { + /* Replace a single \n or \r with \r\n */ + if (*h == '\n' && *(h - 1) != '\r') { + *t ++ = '\r'; + inlen --; + } + else if (*h == '\r' && *(h + 1) != '\n') { + *t ++ = *h ++; + *t ++ = '\n'; + if (inlen > 1) { + inlen -= 2; + } + else { + /* It is safe as inlen = sizeof (buf) - 1 */ + inlen = 0; + } + len --; + continue; + } + } + *t++ = *h++; + inlen --; + len --; + } + + *start = h; + +#if 0 + msg_debug ("update signature with buffer: %*s", t - buf, buf); +#endif + g_checksum_update (ck, buf, t - buf); + + return !finished; +} + +static gboolean +rspamd_dkim_canonize_body (rspamd_dkim_context_t *ctx, const gchar *start, const gchar *end) +{ + const gchar *p; + + if (start == NULL) { + /* Empty body */ + if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { + g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1); + } + else { + g_checksum_update (ctx->body_hash, "", 0); + } + } + else { + /* Strip extra ending CRLF */ + p = end - 1; + while (p >= start + 2) { + if (*p == '\n' && *(p - 1) == '\r' && *(p - 2) == '\n') { + p -= 2; + } + else if (*p == '\n' && *(p - 1) == '\n') { + p --; + } + else if (*p == '\r' && *(p - 1) == '\r') { + p --; + } + else { + break; + } + } + end = p + 1; + if (end == start || end == start + 2) { + /* Empty body */ + if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { + g_checksum_update (ctx->body_hash, CRLF, sizeof (CRLF) - 1); + } + else { + g_checksum_update (ctx->body_hash, "", 0); + } + } + else { + if (ctx->body_canon_type == DKIM_CANON_SIMPLE) { + /* Simple canonization */ + while (rspamd_dkim_simple_body_step (ctx->body_hash, &start, end - start)); + } + else { + while (rspamd_dkim_relaxed_body_step (ctx->body_hash, &start, end - start)); + } + } + return TRUE; + } + + /* TODO: Implement relaxed algorithm */ + return FALSE; +} + +/* Update hash converting all CR and LF to CRLF */ +static void +rspamd_dkim_hash_update (GChecksum *ck, const gchar *begin, gsize len) +{ + const gchar *p, *c, *end; + + end = begin + len; + p = begin; + c = p; + while (p != end) { + if (*p == '\r') { + g_checksum_update (ck, c, p - c); + g_checksum_update (ck, CRLF, sizeof (CRLF) - 1); + p ++; + if (*p == '\n') { + p ++; + } + c = p; + } + else if (*p == '\n') { + g_checksum_update (ck, c, p - c); + g_checksum_update (ck, CRLF, sizeof (CRLF) - 1); + p ++; + c = p; + } + else { + p ++; + } + } + if (p != c) { + g_checksum_update (ck, c, p - c); + } +} + +/* Update hash by signature value (ignoring b= tag) */ +static void +rspamd_dkim_signature_update (rspamd_dkim_context_t *ctx, const gchar *begin, guint len) +{ + const gchar *p, *c, *end; + gboolean tag, skip; + + end = begin + len; + p = begin; + c = begin; + tag = TRUE; + skip = FALSE; + + while (p < end) { + if (tag && p[0] == 'b' && p[1] == '=') { + /* Add to signature */ + msg_debug ("initial update hash with signature part: %*s", p - c + 2, c); + rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 2); + skip = TRUE; + } + else if (skip && (*p == ';' || p == end - 1)) { + skip = FALSE; + c = p; + } + else if (!tag && *p == ';') { + tag = TRUE; + } + else if (tag && *p == '=') { + tag = FALSE; + } + p ++; + } + + p --; + /* Skip \r\n at the end */ + while ((*p == '\r' || *p == '\n') && p >= c) { + p --; + } + + if (p - c + 1 > 0) { + msg_debug ("final update hash with signature part: %*s", p - c + 1, c); + rspamd_dkim_hash_update (ctx->headers_hash, c, p - c + 1); + } +} + +static gboolean +rspamd_dkim_canonize_header_relaxed (rspamd_dkim_context_t *ctx, const gchar *header, const gchar *header_name, gboolean is_sign) +{ + const gchar *h; + gchar *t, *buf; + guint inlen; + gboolean got_sp, allocated = FALSE; + + inlen = strlen (header) + strlen (header_name) + sizeof (":" CRLF); + if (inlen > BUFSIZ) { + buf = g_malloc (inlen); + allocated = TRUE; + } + else { + /* Faster */ + buf = g_alloca (inlen); + } + + /* Name part */ + t = buf; + h = header_name; + while (*h) { + *t ++ = g_ascii_tolower (*h++); + } + *t++ = ':'; + + /* Value part */ + h = header; + /* Skip spaces at the beginning */ + while (g_ascii_isspace (*h)) { + h ++; + } + got_sp = FALSE; + + while (*h) { + if (g_ascii_isspace (*h)) { + if (got_sp) { + h ++; + continue; + } + else { + got_sp = TRUE; + *t ++ = ' '; + h ++; + continue; + } + } + else { + got_sp = FALSE; + } + *t ++ = *h ++; + } + if (g_ascii_isspace (*(t - 1))) { + t --; + } + *t++ = '\r'; + *t++ = '\n'; + *t = '\0'; + + if (!is_sign) { + msg_debug ("update signature with header: %s", buf); + g_checksum_update (ctx->headers_hash, buf, t - buf); + } + else { + rspamd_dkim_signature_update (ctx, buf, t - buf); + } + + if (allocated) { + g_free (buf); + } + + return TRUE; +} + +struct rspamd_dkim_sign_chunk { + const gchar *begin; + gsize len; + gboolean append_crlf; +}; + +static gboolean +rspamd_dkim_canonize_header_simple (rspamd_dkim_context_t *ctx, const gchar *headers, + const gchar *header_name, guint count, gboolean is_sign) +{ + const gchar *p, *c; + gint state = 0, hlen; + gboolean found = FALSE; + GArray *to_sign; + struct rspamd_dkim_sign_chunk chunk, *elt; + gint i; + + /* This process is very similar to raw headers processing */ + to_sign = g_array_sized_new (FALSE, FALSE, sizeof (struct rspamd_dkim_sign_chunk), count); + p = headers; + c = p; + hlen = strlen (header_name); + + while (*p) { + switch (state) { + case 0: + /* Compare state */ + if (*p == ':') { + /* Compare header's name with desired one */ + if (p - c == hlen) { + if (g_ascii_strncasecmp (c, header_name, hlen) == 0) { + /* Get value */ + state = 2; + } + else { + /* Skip the whole header */ + state = 1; + } + } + else { + /* Skip the whole header */ + state = 1; + } + } + p ++; + break; + case 1: + /* Skip header state */ + if (*p == '\n' && !g_ascii_isspace (p[1])) { + /* Header is skipped */ + state = 0; + c = p + 1; + } + p ++; + break; + case 2: + /* c contains the beginning of header */ + if (*p == '\n' && (!g_ascii_isspace (p[1]) || p[1] == '\0')) { + chunk.begin = c; + if (*(p - 1) == '\r') { + chunk.len = p - c + 1; + chunk.append_crlf = FALSE; + } + else { + /* Need append CRLF as linefeed is not proper */ + chunk.len = p - c; + chunk.append_crlf = TRUE; + } + g_array_append_val (to_sign, chunk); + c = p + 1; + state = 0; + found = TRUE; + } + p ++; + break; + } + } + + if (found) { + if (!is_sign) { + + for (i = to_sign->len - 1; i >= 0 && count > 0; i --, count --) { + elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, i); + + if (!chunk.append_crlf) { + msg_debug ("update signature with header: %*s", elt->len, elt->begin); + rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len); + } + else { + msg_debug ("update signature with header: %*s", elt->len + 1, elt->begin); + rspamd_dkim_hash_update (ctx->headers_hash, elt->begin, elt->len + 1); + } + } + } + else { + elt = &g_array_index (to_sign, struct rspamd_dkim_sign_chunk, 0); + if (elt->append_crlf) { + rspamd_dkim_signature_update (ctx, elt->begin, elt->len + 1); + } + else { + rspamd_dkim_signature_update (ctx, elt->begin, elt->len); + } + } + } + + g_array_free (to_sign, TRUE); + + return found; +} + +static gboolean +rspamd_dkim_canonize_header (rspamd_dkim_context_t *ctx, struct rspamd_task *task, const gchar *header_name, + guint count, gboolean is_sig) +{ + struct raw_header *rh, *rh_iter; + guint rh_num = 0; + GList *nh = NULL, *cur; + + if (ctx->header_canon_type == DKIM_CANON_SIMPLE) { + return rspamd_dkim_canonize_header_simple (ctx, task->raw_headers_str, header_name, count, is_sig); + } + else { + rh = g_hash_table_lookup (task->raw_headers, header_name); + if (rh) { + if (!is_sig) { + rh_iter = rh; + while (rh_iter) { + rh_num ++; + rh_iter = rh_iter->next; + } + + if (rh_num > count) { + /* Set skip count */ + rh_num -= count; + } + else { + rh_num = 0; + } + rh_iter = rh; + while (rh_num) { + rh_iter = rh_iter->next; + rh_num --; + } + /* Now insert required headers */ + while (rh_iter) { + nh = g_list_prepend (nh, rh_iter); + rh_iter = rh_iter->next; + } + cur = nh; + while (cur) { + rh = cur->data; + if (! rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig)) { + g_list_free (nh); + return FALSE; + } + cur = g_list_next (cur); + } + if (nh != NULL) { + g_list_free (nh); + } + } + else { + /* For signature check just use the first dkim header */ + rspamd_dkim_canonize_header_relaxed (ctx, rh->value, header_name, is_sig); + } + return TRUE; + } + } + + /* TODO: Implement relaxed algorithm */ + return FALSE; +} + +/** + * Check task for dkim context using dkim key + * @param ctx dkim verify context + * @param key dkim key (from cache or from dns request) + * @param task task to check + * @return + */ +gint +rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task) +{ + const gchar *p, *headers_end = NULL, *end, *body_end; + gboolean got_cr = FALSE, got_crlf = FALSE, got_lf = FALSE; + gchar *digest; + gsize dlen; + gint res = DKIM_CONTINUE; + guint i; + struct rspamd_dkim_header *dh; +#ifdef HAVE_OPENSSL + gint nid; +#endif + + g_return_val_if_fail (ctx != NULL, DKIM_ERROR); + g_return_val_if_fail (key != NULL, DKIM_ERROR); + g_return_val_if_fail (task->msg != NULL, DKIM_ERROR); + + /* First of all find place of body */ + p = task->msg->str; + + end = task->msg->str + task->msg->len; + + while (p <= end) { + /* Search for \r\n\r\n at the end of headers */ + if (*p == '\n') { + if (got_cr && *(p - 1) == '\r') { + if (got_crlf) { + /* \r\n\r\n */ + headers_end = p + 1; + break; + } + else if (got_lf) { + /* \n\r\n */ + headers_end = p + 1; + break; + } + else { + /* Set got crlf flag */ + got_crlf = TRUE; + got_cr = FALSE; + got_lf = FALSE; + } + } + else if (got_cr && *(p - 1) != '\r') { + /* We got CR somewhere but not right before */ + got_cr = FALSE; + if (*(p - 1) == '\n') { + /* \r\n\n case */ + headers_end = p + 1; + break; + } + got_lf = TRUE; + } + else if (got_lf && *(p - 1) == '\n') { + /* \n\n case */ + headers_end = p + 1; + break; + } + else { + got_lf = TRUE; + } + } + else if (*p == '\r') { + if (got_cr && *(p - 1) == '\r') { + /* \r\r case */ + headers_end = p + 1; + break; + } + else if (got_lf && *(p - 1) != '\n') { + /* Sequence is broken */ + got_lf = FALSE; + got_cr = TRUE; + } + else { + got_cr = TRUE; + } + } + else { + got_cr = FALSE; + got_crlf = FALSE; + } + p ++; + } + + /* Start canonization of body part */ + if (headers_end) { + if (ctx->len == 0 || (gint)ctx->len > end - headers_end) { + body_end = end; + } + else { + /* Strip message */ + body_end = headers_end + ctx->len; + } + } + else { + body_end = end; + } + if (!rspamd_dkim_canonize_body (ctx, headers_end, body_end)) { + return DKIM_RECORD_ERROR; + } + /* Now canonize headers */ + for (i = 0; i < ctx->hlist->len; i ++) { + dh = g_ptr_array_index (ctx->hlist, i); + rspamd_dkim_canonize_header (ctx, task, dh->name, dh->count, FALSE); + } + + /* Canonize dkim signature */ + rspamd_dkim_canonize_header (ctx, task, DKIM_SIGNHEADER, 1, TRUE); + + dlen = ctx->bhlen; + digest = g_alloca (dlen); + g_checksum_get_digest (ctx->body_hash, digest, &dlen); + + /* Check bh field */ + if (memcmp (ctx->bh, digest, dlen) != 0) { + msg_debug ("bh value missmatch"); + return DKIM_REJECT; + } + + g_checksum_get_digest (ctx->headers_hash, digest, &dlen); +#ifdef HAVE_OPENSSL + /* Check headers signature */ + + if (ctx->sig_alg == DKIM_SIGN_RSASHA1) { + nid = NID_sha1; + } + else if (ctx->sig_alg == DKIM_SIGN_RSASHA256) { + nid = NID_sha256; + } + else { + /* Not reached */ + nid = NID_sha1; + } + + if (RSA_verify (nid, digest, dlen, ctx->b, ctx->blen, key->key_rsa) != 1) { + msg_debug ("rsa verify failed"); + res = DKIM_REJECT; + } +#endif + return res; +} diff --git a/src/libserver/dkim.h b/src/libserver/dkim.h new file mode 100644 index 000000000..29ec479b7 --- /dev/null +++ b/src/libserver/dkim.h @@ -0,0 +1,207 @@ +/* Copyright (c) 2010-2011, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef DKIM_H_ +#define DKIM_H_ + +#include "config.h" +#include "event.h" +#include "dns.h" +#ifdef HAVE_OPENSSL +#include <openssl/rsa.h> +#include <openssl/engine.h> +#endif + +/* Main types and definitions */ + +#define DKIM_SIGNHEADER "DKIM-Signature" + /* DKIM signature header */ + +/* special DNS tokens */ +#define DKIM_DNSKEYNAME "_domainkey" + /* reserved DNS sub-zone */ +#define DKIM_DNSPOLICYNAME "_adsp" /* reserved DNS sub-zone */ + +/* Canonization methods */ +#define DKIM_CANON_UNKNOWN (-1) /* unknown method */ +#define DKIM_CANON_SIMPLE 0 /* as specified in DKIM spec */ +#define DKIM_CANON_RELAXED 1 /* as specified in DKIM spec */ + +#define DKIM_CANON_DEFAULT DKIM_CANON_SIMPLE + +/* Signature methods */ +#define DKIM_SIGN_UNKNOWN (-2) /* unknown method */ +#define DKIM_SIGN_DEFAULT (-1) /* use internal default */ +#define DKIM_SIGN_RSASHA1 0 /* an RSA-signed SHA1 digest */ +#define DKIM_SIGN_RSASHA256 1 /* an RSA-signed SHA256 digest */ + +/* Params */ +#define DKIM_PARAM_UNKNOWN (-1) /* unknown */ +#define DKIM_PARAM_SIGNATURE 0 /* b */ +#define DKIM_PARAM_SIGNALG 1 /* a */ +#define DKIM_PARAM_DOMAIN 2 /* d */ +#define DKIM_PARAM_CANONALG 3 /* c */ +#define DKIM_PARAM_QUERYMETHOD 4 /* q */ +#define DKIM_PARAM_SELECTOR 5 /* s */ +#define DKIM_PARAM_HDRLIST 6 /* h */ +#define DKIM_PARAM_VERSION 7 /* v */ +#define DKIM_PARAM_IDENTITY 8 /* i */ +#define DKIM_PARAM_TIMESTAMP 9 /* t */ +#define DKIM_PARAM_EXPIRATION 10 /* x */ +#define DKIM_PARAM_COPIEDHDRS 11 /* z */ +#define DKIM_PARAM_BODYHASH 12 /* bh */ +#define DKIM_PARAM_BODYLENGTH 13 /* l */ + +/* Errors (from OpenDKIM) */ + +#define DKIM_SIGERROR_UNKNOWN (-1) /* unknown error */ +#define DKIM_SIGERROR_OK 0 /* no error */ +#define DKIM_SIGERROR_VERSION 1 /* unsupported version */ +#define DKIM_SIGERROR_DOMAIN 2 /* invalid domain (d=/i=) */ +#define DKIM_SIGERROR_EXPIRED 3 /* signature expired */ +#define DKIM_SIGERROR_FUTURE 4 /* signature in the future */ +#define DKIM_SIGERROR_TIMESTAMPS 5 /* x= < t= */ +#define DKIM_SIGERROR_UNUSED 6 /* OBSOLETE */ +#define DKIM_SIGERROR_INVALID_HC 7 /* c= invalid (header) */ +#define DKIM_SIGERROR_INVALID_BC 8 /* c= invalid (body) */ +#define DKIM_SIGERROR_MISSING_A 9 /* a= missing */ +#define DKIM_SIGERROR_INVALID_A 10 /* a= invalid */ +#define DKIM_SIGERROR_MISSING_H 11 /* h= missing */ +#define DKIM_SIGERROR_INVALID_L 12 /* l= invalid */ +#define DKIM_SIGERROR_INVALID_Q 13 /* q= invalid */ +#define DKIM_SIGERROR_INVALID_QO 14 /* q= option invalid */ +#define DKIM_SIGERROR_MISSING_D 15 /* d= missing */ +#define DKIM_SIGERROR_EMPTY_D 16 /* d= empty */ +#define DKIM_SIGERROR_MISSING_S 17 /* s= missing */ +#define DKIM_SIGERROR_EMPTY_S 18 /* s= empty */ +#define DKIM_SIGERROR_MISSING_B 19 /* b= missing */ +#define DKIM_SIGERROR_EMPTY_B 20 /* b= empty */ +#define DKIM_SIGERROR_CORRUPT_B 21 /* b= corrupt */ +#define DKIM_SIGERROR_NOKEY 22 /* no key found in DNS */ +#define DKIM_SIGERROR_DNSSYNTAX 23 /* DNS reply corrupt */ +#define DKIM_SIGERROR_KEYFAIL 24 /* DNS query failed */ +#define DKIM_SIGERROR_MISSING_BH 25 /* bh= missing */ +#define DKIM_SIGERROR_EMPTY_BH 26 /* bh= empty */ +#define DKIM_SIGERROR_CORRUPT_BH 27 /* bh= corrupt */ +#define DKIM_SIGERROR_BADSIG 28 /* signature mismatch */ +#define DKIM_SIGERROR_SUBDOMAIN 29 /* unauthorized subdomain */ +#define DKIM_SIGERROR_MULTIREPLY 30 /* multiple records returned */ +#define DKIM_SIGERROR_EMPTY_H 31 /* h= empty */ +#define DKIM_SIGERROR_INVALID_H 32 /* h= missing req'd entries */ +#define DKIM_SIGERROR_TOOLARGE_L 33 /* l= value exceeds body size */ +#define DKIM_SIGERROR_MBSFAILED 34 /* "must be signed" failure */ +#define DKIM_SIGERROR_KEYVERSION 35 /* unknown key version */ +#define DKIM_SIGERROR_KEYUNKNOWNHASH 36 /* unknown key hash */ +#define DKIM_SIGERROR_KEYHASHMISMATCH 37 /* sig-key hash mismatch */ +#define DKIM_SIGERROR_NOTEMAILKEY 38 /* not an e-mail key */ +#define DKIM_SIGERROR_UNUSED2 39 /* OBSOLETE */ +#define DKIM_SIGERROR_KEYTYPEMISSING 40 /* key type missing */ +#define DKIM_SIGERROR_KEYTYPEUNKNOWN 41 /* key type unknown */ +#define DKIM_SIGERROR_KEYREVOKED 42 /* key revoked */ +#define DKIM_SIGERROR_KEYDECODE 43 /* key couldn't be decoded */ +#define DKIM_SIGERROR_MISSING_V 44 /* v= tag missing */ +#define DKIM_SIGERROR_EMPTY_V 45 /* v= tag empty */ + +/* Check results */ +#define DKIM_CONTINUE 0 /* continue */ +#define DKIM_REJECT 1 /* reject */ +#define DKIM_TRYAGAIN 2 /* try again later */ +#define DKIM_NOTFOUND 3 /* requested record not found */ +#define DKIM_RECORD_ERROR 4 /* error requesting record */ + +typedef struct rspamd_dkim_context_s { + rspamd_mempool_t *pool; + gint sig_alg; + gint header_canon_type; + gint body_canon_type; + gsize len; + gchar *domain; + gchar *selector; + time_t timestamp; + time_t expiration; + gint8 *b; + gint8 *bh; + guint bhlen; + guint blen; + GPtrArray *hlist; + guint ver; + gchar *dns_key; + GChecksum *headers_hash; + GChecksum *body_hash; +} rspamd_dkim_context_t; + +typedef struct rspamd_dkim_key_s { + guint8 *keydata; + guint keylen; + gsize decoded_len; + guint ttl; +#ifdef HAVE_OPENSSL + RSA *key_rsa; + BIO *key_bio; + EVP_PKEY *key_evp; +#endif +} +rspamd_dkim_key_t; + +struct rspamd_task; + +/* Err MUST be freed if it is not NULL, key is allocated by slice allocator */ +typedef void (*dkim_key_handler_f)(rspamd_dkim_key_t *key, gsize keylen, rspamd_dkim_context_t *ctx, gpointer ud, GError *err); + +/** + * Create new dkim context from signature + * @param sig message's signature + * @param pool pool to allocate memory from + * @param time_jitter jitter in seconds to allow time diff while checking + * @param err pointer to error object + * @return new context or NULL + */ +rspamd_dkim_context_t* rspamd_create_dkim_context (const gchar *sig, rspamd_mempool_t *pool, guint time_jitter, GError **err); + +/** + * Make DNS request for specified context and obtain and parse key + * @param ctx dkim context from signature + * @param resolver dns resolver object + * @param s async session to make request + * @return + */ +gboolean rspamd_get_dkim_key (rspamd_dkim_context_t *ctx, struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *s, dkim_key_handler_f handler, gpointer ud); + +/** + * Check task for dkim context using dkim key + * @param ctx dkim verify context + * @param key dkim key (from cache or from dns request) + * @param task task to check + * @return + */ +gint rspamd_dkim_check (rspamd_dkim_context_t *ctx, rspamd_dkim_key_t *key, struct rspamd_task *task); + +/** + * Free DKIM key + * @param key + */ +void rspamd_dkim_key_free (rspamd_dkim_key_t *key); + +#endif /* DKIM_H_ */ diff --git a/src/libserver/dns.c b/src/libserver/dns.c new file mode 100644 index 000000000..e20cca9df --- /dev/null +++ b/src/libserver/dns.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2009-2013, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "dns.h" +#include "main.h" +#include "utlist.h" +#include "uthash.h" +#include "rdns_event.h" + +struct rspamd_dns_resolver { + struct rdns_resolver *r; + struct event_base *ev_base; + gdouble request_timeout; + guint max_retransmits; +}; + +struct rspamd_dns_request_ud { + struct rspamd_async_session *session; + dns_callback_type cb; + gpointer ud; + struct rdns_request *req; +}; + +static void +rspamd_dns_fin_cb (gpointer arg) +{ + struct rdns_request *req = arg; + + rdns_request_release (req); +} + +static void +rspamd_dns_callback (struct rdns_reply *reply, gpointer ud) +{ + struct rspamd_dns_request_ud *reqdata = ud; + + reqdata->cb (reply, reqdata->ud); + + remove_normal_event (reqdata->session, rspamd_dns_fin_cb, reqdata->req); +} + +gboolean +make_dns_request (struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *session, rspamd_mempool_t *pool, dns_callback_type cb, + gpointer ud, enum rdns_request_type type, const char *name) +{ + struct rdns_request *req; + struct rspamd_dns_request_ud *reqdata; + + reqdata = rspamd_mempool_alloc (pool, sizeof (struct rspamd_dns_request_ud)); + reqdata->session = session; + reqdata->cb = cb; + reqdata->ud = ud; + + req = rdns_make_request_full (resolver->r, rspamd_dns_callback, reqdata, + resolver->request_timeout, resolver->max_retransmits, 1, name, type); + + if (req != NULL) { + register_async_event (session, (event_finalizer_t)rspamd_dns_fin_cb, req, + g_quark_from_static_string ("dns resolver")); + /* Ref event to free it only when according async event is deleted from the session */ + rdns_request_retain (req); + reqdata->req = req; + } + else { + return FALSE; + } + + return TRUE; +} + + +struct rspamd_dns_resolver * +dns_resolver_init (rspamd_logger_t *logger, struct event_base *ev_base, struct config_file *cfg) +{ + GList *cur; + struct rspamd_dns_resolver *new; + gchar *begin, *p, *err; + gint priority; + + new = g_slice_alloc0 (sizeof (struct rspamd_dns_resolver)); + new->ev_base = ev_base; + new->request_timeout = cfg->dns_timeout; + new->max_retransmits = cfg->dns_retransmits; + + new->r = rdns_resolver_new (); + rdns_bind_libevent (new->r, new->ev_base); + rdns_resolver_set_log_level (new->r, cfg->log_level); + rdns_resolver_set_logger (new->r, (rdns_log_function)rspamd_common_logv, logger); + + if (cfg->nameservers == NULL) { + /* Parse resolv.conf */ + if (!rdns_resolver_parse_resolv_conf (new->r, "/etc/resolv.conf")) { + msg_err ("cannot parse resolv.conf and no nameservers defined, so no ways to resolve addresses"); + return new; + } + } + else { + cur = cfg->nameservers; + while (cur) { + begin = cur->data; + p = strchr (begin, ':'); + if (p != NULL) { + *p = '\0'; + p ++; + priority = strtoul (p, &err, 10); + if (err != NULL && *err != '\0') { + msg_info ("bad character '%x', must be 'm' or 's' or a numeric priority", *err); + } + } + else { + priority = 0; + } + if (!rdns_resolver_add_server (new->r, begin, 53, priority, cfg->dns_io_per_server)) { + msg_warn ("cannot parse ip address of nameserver: %s", begin); + cur = g_list_next (cur); + continue; + } + + cur = g_list_next (cur); + } + + } + + rdns_resolver_init (new->r); + + return new; +} diff --git a/src/libserver/dns.h b/src/libserver/dns.h new file mode 100644 index 000000000..26ae71387 --- /dev/null +++ b/src/libserver/dns.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2013, Vsevolod Stakhov + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RSPAMD_DNS_H +#define RSPAMD_DNS_H + +#include "config.h" +#include "mem_pool.h" +#include "events.h" +#include "logger.h" +#include "rdns.h" + +struct rspamd_dns_resolver; + +/* Rspamd DNS API */ + +/** + * Init DNS resolver, params are obtained from a config file or system file /etc/resolv.conf + */ +struct rspamd_dns_resolver *dns_resolver_init (rspamd_logger_t *logger, + struct event_base *ev_base, struct config_file *cfg); + +/** + * Make a DNS request + * @param resolver resolver object + * @param session async session to register event + * @param pool memory pool for storage + * @param cb callback to call on resolve completing + * @param ud user data for callback + * @param type request type + * @param ... string or ip address based on a request type + * @return TRUE if request was sent. + */ +gboolean make_dns_request (struct rspamd_dns_resolver *resolver, + struct rspamd_async_session *session, rspamd_mempool_t *pool, + dns_callback_type cb, gpointer ud, enum rdns_request_type type, const char *name); + +#endif diff --git a/src/libserver/dynamic_cfg.c b/src/libserver/dynamic_cfg.c new file mode 100644 index 000000000..7f5e8530d --- /dev/null +++ b/src/libserver/dynamic_cfg.c @@ -0,0 +1,599 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "map.h" +#include "filter.h" +#include "dynamic_cfg.h" +#include "json/jansson.h" + +struct dynamic_cfg_symbol { + gchar *name; + gdouble value; +}; + +struct dynamic_cfg_action { + enum rspamd_metric_action action; + gdouble value; +}; + +struct dynamic_cfg_metric { + GList *symbols; + struct dynamic_cfg_action actions[METRIC_ACTION_MAX]; + gchar *name; +}; + +struct config_json_buf { + gchar *buf; + gchar *pos; + size_t buflen; + struct config_file *cfg; + GList *config_metrics; +}; + +/** + * Free dynamic configuration + * @param conf_metrics + */ +static void +dynamic_cfg_free (GList *conf_metrics) +{ + GList *cur, *cur_elt; + struct dynamic_cfg_metric *metric; + struct dynamic_cfg_symbol *sym; + + if (conf_metrics) { + cur = conf_metrics; + while (cur) { + metric = cur->data; + if (metric->symbols) { + cur_elt = metric->symbols; + while (cur_elt) { + sym = cur_elt->data; + g_free (sym->name); + g_slice_free1 (sizeof (struct dynamic_cfg_symbol), sym); + cur_elt = g_list_next (cur_elt); + } + g_list_free (metric->symbols); + } + g_slice_free1 (sizeof (struct dynamic_cfg_metric), metric); + cur = g_list_next (cur); + } + g_list_free (conf_metrics); + } +} +/** + * Apply configuration to the specified configuration + * @param conf_metrics + * @param cfg + */ +static void +apply_dynamic_conf (GList *conf_metrics, struct config_file *cfg) +{ + GList *cur, *cur_elt; + struct dynamic_cfg_metric *metric; + struct dynamic_cfg_symbol *sym; + struct dynamic_cfg_action *act; + struct metric *real_metric; + struct metric_action *real_act; + gdouble *w; + gint i, j; + + cur = conf_metrics; + while (cur) { + metric = cur->data; + if ((real_metric = g_hash_table_lookup (cfg->metrics, metric->name)) != NULL) { + cur_elt = metric->symbols; + while (cur_elt) { + sym = cur_elt->data; + if ((w = g_hash_table_lookup (real_metric->symbols, sym->name)) != NULL) { + *w = sym->value; + } + else { + msg_info ("symbol %s is not found in the main configuration", sym->name); + } + cur_elt = g_list_next (cur_elt); + } + + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + act = &metric->actions[i]; + if (act->value < 0) { + continue; + } + for (j = METRIC_ACTION_REJECT; j < METRIC_ACTION_MAX; j ++) { + real_act = &real_metric->actions[j]; + if (real_act->action == act->action) { + real_act->score = act->value; + } + /* Update required score accordingly to metric's action */ + if (act->action == METRIC_ACTION_REJECT) { + real_metric->actions[METRIC_ACTION_REJECT].score = act->value; + } + } + } + } + cur = g_list_next (cur); + } +} + +/* Callbacks for reading json dynamic rules */ +gchar * +json_config_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + struct config_json_buf *jb; + gint free, off; + + if (data->cur_data == NULL) { + jb = g_malloc (sizeof (struct config_json_buf)); + jb->cfg = ((struct config_json_buf *)data->prev_data)->cfg; + jb->buf = NULL; + jb->pos = NULL; + jb->config_metrics = NULL; + data->cur_data = jb; + } + else { + jb = data->cur_data; + } + + if (jb->buf == NULL) { + /* Allocate memory for buffer */ + jb->buflen = len * 2; + jb->buf = g_malloc (jb->buflen); + jb->pos = jb->buf; + } + + off = jb->pos - jb->buf; + free = jb->buflen - off; + + if (free < len) { + jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2); + jb->buf = g_realloc (jb->buf, jb->buflen); + jb->pos = jb->buf + off; + } + + memcpy (jb->pos, chunk, len); + jb->pos += len; + + /* Say not to copy any part of this buffer */ + return NULL; +} + +void +json_config_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + struct config_json_buf *jb; + guint nelts, i, j, selts; + gint test_act; + json_t *js, *cur_elt, *cur_nm, *it_val; + json_error_t je; + struct dynamic_cfg_metric *cur_metric; + struct dynamic_cfg_symbol *cur_symbol; + struct dynamic_cfg_action *cur_action; + + if (data->prev_data) { + jb = data->prev_data; + /* Clean prev data */ + if (jb->buf) { + g_free (jb->buf); + } + g_free (jb); + } + + /* Now parse json */ + if (data->cur_data) { + jb = data->cur_data; + } + else { + msg_err ("no data read"); + return; + } + if (jb->buf == NULL) { + msg_err ("no data read"); + return; + } + /* NULL terminate current buf */ + *jb->pos = '\0'; + + js = json_loads (jb->buf, &je); + if (!js) { + msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line); + return; + } + + if (!json_is_array (js)) { + json_decref (js); + msg_err ("loaded json is not an array"); + return; + } + + jb->cfg->current_dynamic_conf = NULL; + dynamic_cfg_free (jb->config_metrics); + jb->config_metrics = NULL; + + /* Parse configuration */ + nelts = json_array_size (js); + for (i = 0; i < nelts; i++) { + cur_elt = json_array_get (js, i); + if (!cur_elt || !json_is_object (cur_elt)) { + msg_err ("loaded json array element is not an object"); + continue; + } + + cur_nm = json_object_get (cur_elt, "metric"); + if (!cur_nm || !json_is_string (cur_nm)) { + msg_err ("loaded json metric object element has no 'metric' attribute"); + continue; + } + cur_metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + cur_metric->actions[i].value = -1.0; + } + cur_metric->name = g_strdup (json_string_value (cur_nm)); + cur_nm = json_object_get (cur_elt, "symbols"); + /* Parse symbols */ + if (cur_nm && json_is_array (cur_nm)) { + selts = json_array_size (cur_nm); + for (j = 0; j < selts; j ++) { + it_val = json_array_get (cur_nm, j); + if (it_val && json_is_object (it_val)) { + if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) { + cur_symbol = g_slice_alloc0 (sizeof (struct dynamic_cfg_symbol)); + cur_symbol->name = g_strdup (json_string_value (json_object_get (it_val, "name"))); + cur_symbol->value = json_number_value (json_object_get (it_val, "value")); + /* Insert symbol */ + cur_metric->symbols = g_list_prepend (cur_metric->symbols, cur_symbol); + } + else { + msg_info ("json symbol object has no mandatory 'name' and 'value' attributes"); + } + } + } + } + cur_nm = json_object_get (cur_elt, "actions"); + /* Parse actions */ + if (cur_nm && json_is_array (cur_nm)) { + selts = json_array_size (cur_nm); + for (j = 0; j < selts; j ++) { + it_val = json_array_get (cur_nm, j); + if (it_val && json_is_object (it_val)) { + if (json_object_get (it_val, "name") && json_object_get (it_val, "value")) { + if (!check_action_str (json_string_value (json_object_get (it_val, "name")), &test_act)) { + msg_err ("unknown action: %s", json_string_value (json_object_get (it_val, "name"))); + g_slice_free1 (sizeof (struct dynamic_cfg_action), cur_action); + continue; + } + cur_action = &cur_metric->actions[test_act]; + cur_action->action = test_act; + cur_action->value = json_number_value (json_object_get (it_val, "value")); + } + else { + msg_info ("json symbol object has no mandatory 'name' and 'value' attributes"); + } + } + } + } + jb->config_metrics = g_list_prepend (jb->config_metrics, cur_metric); + } + /* + * Note about thread safety: we are updating values that are gdoubles so it is not atomic in general case + * but on the other hand all that data is used only in the main thread, so why it is *likely* safe + * to do this task in this way without explicit lock. + */ + apply_dynamic_conf (jb->config_metrics, jb->cfg); + + jb->cfg->current_dynamic_conf = jb->config_metrics; + + json_decref (js); +} + +/** + * Init dynamic configuration using map logic and specific configuration + * @param cfg config file + */ +void +init_dynamic_config (struct config_file *cfg) +{ + struct config_json_buf *jb, **pjb; + + if (cfg->dynamic_conf == NULL) { + /* No dynamic conf has been specified, so do not try to load it */ + return; + } + + /* Now try to add map with json data */ + jb = g_malloc0 (sizeof (struct config_json_buf)); + pjb = g_malloc (sizeof (struct config_json_buf *)); + jb->buf = NULL; + jb->cfg = cfg; + *pjb = jb; + if (!add_map (cfg, cfg->dynamic_conf, "Dynamic configuration map", json_config_read_cb, json_config_fin_cb, (void **)pjb)) { + msg_err ("cannot add map for configuration %s", cfg->dynamic_conf); + } +} + +static gboolean +dump_dynamic_list (gint fd, GList *rules) +{ + GList *cur, *cur_elt; + struct dynamic_cfg_metric *metric; + struct dynamic_cfg_symbol *sym; + struct dynamic_cfg_action *act; + FILE *f; + gint i; + gboolean start = TRUE; + + /* Open buffered stream for the descriptor */ + if ((f = fdopen (fd, "a+")) == NULL) { + msg_err ("fdopen failed: %s", strerror (errno)); + return FALSE; + } + + + if (rules) { + fprintf (f, "[\n"); + cur = rules; + while (cur) { + metric = cur->data; + fprintf (f, "{\n \"metric\": \"%s\",\n", metric->name); + if (metric->symbols) { + fprintf (f, " \"symbols\": [\n"); + cur_elt = metric->symbols; + while (cur_elt) { + sym = cur_elt->data; + cur_elt = g_list_next (cur_elt); + if (cur_elt) { + fprintf (f, " {\"name\": \"%s\",\"value\": %.2f},\n", sym->name, sym->value); + } + else { + fprintf (f, " {\"name\": \"%s\",\"value\": %.2f}\n", sym->name, sym->value); + } + } + if (metric->actions) { + fprintf (f, " ],\n"); + } + else { + fprintf (f, " ]\n"); + } + } + + if (metric->actions) { + fprintf (f, " \"actions\": [\n"); + for (i = METRIC_ACTION_REJECT; i < METRIC_ACTION_MAX; i ++) { + act = &metric->actions[i]; + if (act->value < 0) { + continue; + } + fprintf (f, " %s{\"name\": \"%s\",\"value\": %.2f}\n", + (start ? "" : ","), str_action_metric (act->action), act->value); + if (start) { + start = FALSE; + } + } + fprintf (f, " ]\n"); + } + cur = g_list_next (cur); + if (cur) { + fprintf (f, "},\n"); + } + else { + fprintf (f, "}\n]\n"); + } + } + } + fclose (f); + + return TRUE; +} + +/** + * Dump dynamic configuration to the disk + * @param cfg + * @return + */ +gboolean +dump_dynamic_config (struct config_file *cfg) +{ + struct stat st; + gchar *dir, pathbuf[PATH_MAX]; + gint fd; + + if (cfg->dynamic_conf == NULL || cfg->current_dynamic_conf == NULL) { + /* No dynamic conf has been specified, so do not try to dump it */ + return FALSE; + } + + dir = g_path_get_dirname (cfg->dynamic_conf); + if (dir == NULL) { + /* Inaccessible path */ + if (dir != NULL) { + g_free (dir); + } + msg_err ("invalid file: %s", cfg->dynamic_conf); + return FALSE; + } + + if (stat (cfg->dynamic_conf, &st) == -1) { + msg_debug ("%s is unavailable: %s", cfg->dynamic_conf, strerror (errno)); + st.st_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; + } + if (access (dir, W_OK | R_OK) == -1) { + msg_warn ("%s is inaccessible: %s", dir, strerror (errno)); + g_free (dir); + return FALSE; + } + rspamd_snprintf (pathbuf, sizeof (pathbuf), "%s%crconf-XXXXXX", dir, G_DIR_SEPARATOR); + g_free (dir); +#ifdef HAVE_MKSTEMP + /* Umask is set before */ + fd = mkstemp (pathbuf); +#else + fd = g_mkstemp_full (pathbuf, O_RDWR, S_IWUSR | S_IRUSR); +#endif + if (fd == -1) { + msg_err ("mkstemp error: %s", strerror (errno)); + + return FALSE; + } + + if (!dump_dynamic_list (fd, cfg->current_dynamic_conf)) { + close (fd); + unlink (pathbuf); + return FALSE; + } + + (void)unlink (cfg->dynamic_conf); + + /* Rename old config */ + if (rename (pathbuf, cfg->dynamic_conf) == -1) { + msg_err ("rename error: %s", strerror (errno)); + close (fd); + unlink (pathbuf); + return FALSE; + } + /* Set permissions */ + + if (chmod (cfg->dynamic_conf, st.st_mode) == -1) { + msg_warn ("chmod failed: %s", strerror (errno)); + } + + close (fd); + return TRUE; +} + +/** + * Add symbol for specified metric + * @param cfg config file object + * @param metric metric's name + * @param symbol symbol's name + * @param value value of symbol + * @return + */ +gboolean +add_dynamic_symbol (struct config_file *cfg, const gchar *metric_name, const gchar *symbol, gdouble value) +{ + GList *cur; + struct dynamic_cfg_metric *metric = NULL; + struct dynamic_cfg_symbol *sym = NULL; + + if (cfg->dynamic_conf == NULL) { + msg_info ("dynamic conf is disabled"); + return FALSE; + } + + cur = cfg->current_dynamic_conf; + while (cur) { + metric = cur->data; + if (g_ascii_strcasecmp (metric->name, metric_name) == 0) { + break; + } + metric = NULL; + cur = g_list_next (cur); + } + + if (metric != NULL) { + /* Search for a symbol */ + cur = metric->symbols; + while (cur) { + sym = cur->data; + if (g_ascii_strcasecmp (sym->name, symbol) == 0) { + sym->value = value; + msg_debug ("change value of action %s to %.2f", symbol, value); + break; + } + sym = NULL; + cur = g_list_next (cur); + } + if (sym == NULL) { + /* Symbol not found, insert it */ + sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol)); + sym->name = g_strdup (symbol); + sym->value = value; + metric->symbols = g_list_prepend (metric->symbols, sym); + msg_debug ("create symbol %s in metric %s", symbol, metric_name); + } + } + else { + /* Metric not found, create it */ + metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); + sym = g_slice_alloc (sizeof (struct dynamic_cfg_symbol)); + sym->name = g_strdup (symbol); + sym->value = value; + metric->symbols = g_list_prepend (metric->symbols, sym); + metric->name = g_strdup (metric_name); + cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric); + msg_debug ("create metric %s for symbol %s", metric_name, symbol); + } + + apply_dynamic_conf (cfg->current_dynamic_conf, cfg); + + return TRUE; +} + + +/** + * Add action for specified metric + * @param cfg config file object + * @param metric metric's name + * @param action action's name + * @param value value of symbol + * @return + */ +gboolean +add_dynamic_action (struct config_file *cfg, const gchar *metric_name, guint action, gdouble value) +{ + GList *cur; + struct dynamic_cfg_metric *metric = NULL; + + if (cfg->dynamic_conf == NULL) { + msg_info ("dynamic conf is disabled"); + return FALSE; + } + + cur = cfg->current_dynamic_conf; + while (cur) { + metric = cur->data; + if (g_ascii_strcasecmp (metric->name, metric_name) == 0) { + break; + } + metric = NULL; + cur = g_list_next (cur); + } + + if (metric != NULL) { + /* Search for an action */ + metric->actions[action].value = value; + } + else { + /* Metric not found, create it */ + metric = g_slice_alloc0 (sizeof (struct dynamic_cfg_metric)); + metric->actions[action].value = value; + metric->name = g_strdup (metric_name); + cfg->current_dynamic_conf = g_list_prepend (cfg->current_dynamic_conf, metric); + msg_debug ("create metric %s for action %d", metric_name, action); + } + + apply_dynamic_conf (cfg->current_dynamic_conf, cfg); + + return TRUE; +} diff --git a/src/libserver/dynamic_cfg.h b/src/libserver/dynamic_cfg.h new file mode 100644 index 000000000..b65d7aa9a --- /dev/null +++ b/src/libserver/dynamic_cfg.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef DYNAMIC_CFG_H_ +#define DYNAMIC_CFG_H_ + +#include "config.h" +#include "cfg_file.h" + +/** + * Init dynamic configuration using map logic and specific configuration + * @param cfg config file + */ +void init_dynamic_config (struct config_file *cfg); + +/** + * Dump dynamic configuration to the disk + * @param cfg + * @return + */ +gboolean dump_dynamic_config (struct config_file *cfg); + +/** + * Add symbol for specified metric + * @param cfg config file object + * @param metric metric's name + * @param symbol symbol's name + * @param value value of symbol + * @return + */ +gboolean add_dynamic_symbol (struct config_file *cfg, const gchar *metric, const gchar *symbol, gdouble value); + + +/** + * Add action for specified metric + * @param cfg config file object + * @param metric metric's name + * @param action action's name + * @param value value of symbol + * @return + */ +gboolean add_dynamic_action (struct config_file *cfg, const gchar *metric, guint action, gdouble value); + + +#endif /* DYNAMIC_CFG_H_ */ diff --git a/src/libserver/events.c b/src/libserver/events.c new file mode 100644 index 000000000..85843fd05 --- /dev/null +++ b/src/libserver/events.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "events.h" + +static gboolean +rspamd_event_equal (gconstpointer a, gconstpointer b) +{ + const struct rspamd_async_event *ev1 = a, *ev2 = b; + + if (ev1->fin == ev2->fin) { + return ev1->user_data == ev2->user_data; + } + + return FALSE; +} + +static guint +rspamd_event_hash (gconstpointer a) +{ + const struct rspamd_async_event *ev = a; + + return GPOINTER_TO_UINT (ev->user_data); +} + +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) +static void +event_mutex_free (gpointer data) +{ + GMutex *mtx = data; + + g_mutex_free (mtx); +} + +static void +event_cond_free (gpointer data) +{ + GCond *cond = data; + + g_cond_free (cond); +} +#endif + +struct rspamd_async_session * +new_async_session (rspamd_mempool_t * pool, session_finalizer_t fin, + event_finalizer_t restore, event_finalizer_t cleanup, void *user_data) +{ + struct rspamd_async_session *new; + + new = rspamd_mempool_alloc (pool, sizeof (struct rspamd_async_session)); + new->pool = pool; + new->fin = fin; + new->restore = restore; + new->cleanup = cleanup; + new->user_data = user_data; + new->wanna_die = FALSE; + new->events = g_hash_table_new (rspamd_event_hash, rspamd_event_equal); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION <= 30)) + new->mtx = g_mutex_new (); + new->cond = g_cond_new (); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_mutex_free, new->mtx); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) event_cond_free, new->cond); +#else + new->mtx = rspamd_mempool_alloc (pool, sizeof (GMutex)); + g_mutex_init (new->mtx); + new->cond = rspamd_mempool_alloc (pool, sizeof (GCond)); + g_cond_init (new->cond); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_mutex_clear, new->mtx); + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_cond_clear, new->cond); +#endif + new->threads = 0; + + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_hash_table_destroy, new->events); + + return new; +} + +void +register_async_event (struct rspamd_async_session *session, event_finalizer_t fin, void *user_data, GQuark subsystem) +{ + struct rspamd_async_event *new; + + if (session == NULL) { + msg_info ("session is NULL"); + return; + } + + g_mutex_lock (session->mtx); + new = rspamd_mempool_alloc (session->pool, sizeof (struct rspamd_async_event)); + new->fin = fin; + new->user_data = user_data; + new->subsystem = subsystem; + + g_hash_table_insert (session->events, new, new); + + msg_debug ("added event: %p, pending %d events, subsystem: %s", user_data, g_hash_table_size (session->events), + g_quark_to_string (subsystem)); + + g_mutex_unlock (session->mtx); +} + +void +remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud) +{ + struct rspamd_async_event search_ev, *found_ev; + + if (session == NULL) { + msg_info ("session is NULL"); + return; + } + + g_mutex_lock (session->mtx); + /* Search for event */ + search_ev.fin = fin; + search_ev.user_data = ud; + if ((found_ev = g_hash_table_lookup (session->events, &search_ev)) != NULL) { + g_hash_table_remove (session->events, found_ev); + msg_debug ("removed event: %p, subsystem: %s, pending %d events", ud, + g_quark_to_string (found_ev->subsystem), g_hash_table_size (session->events)); + /* Remove event */ + fin (ud); + } + g_mutex_unlock (session->mtx); + + check_session_pending (session); +} + +static gboolean +rspamd_session_destroy (gpointer k, gpointer v, gpointer unused) +{ + struct rspamd_async_event *ev = v; + + /* Call event's finalizer */ + if (ev->fin != NULL) { + ev->fin (ev->user_data); + } + + return TRUE; +} + +gboolean +destroy_session (struct rspamd_async_session *session) +{ + if (session == NULL) { + msg_info ("session is NULL"); + return FALSE; + } + + g_mutex_lock (session->mtx); + if (session->threads > 0) { + /* Wait for conditional variable to finish processing */ + g_mutex_unlock (session->mtx); + g_cond_wait (session->cond, session->mtx); + } + + session->wanna_die = TRUE; + + g_hash_table_foreach_remove (session->events, rspamd_session_destroy, session); + + /* Mutex can be destroyed here */ + g_mutex_unlock (session->mtx); + + if (session->cleanup != NULL) { + session->cleanup (session->user_data); + } + return TRUE; +} + +gboolean +check_session_pending (struct rspamd_async_session *session) +{ + g_mutex_lock (session->mtx); + if (session->wanna_die && g_hash_table_size (session->events) == 0) { + session->wanna_die = FALSE; + if (session->threads > 0) { + /* Wait for conditional variable to finish processing */ + g_cond_wait (session->cond, session->mtx); + } + if (session->fin != NULL) { + g_mutex_unlock (session->mtx); + if (! session->fin (session->user_data)) { + /* Session finished incompletely, perform restoration */ + if (session->restore != NULL) { + session->restore (session->user_data); + /* Call pending once more */ + return check_session_pending (session); + } + return TRUE; + } + else { + return FALSE; + } + } + g_mutex_unlock (session->mtx); + return FALSE; + } + g_mutex_unlock (session->mtx); + return TRUE; +} + + +/** + * Add new async thread to session + * @param session session object + */ +void +register_async_thread (struct rspamd_async_session *session) +{ + g_atomic_int_inc (&session->threads); + msg_debug ("added thread: pending %d thread", session->threads); +} + +/** + * Remove async thread from session and check whether session can be terminated + * @param session session object + */ +void +remove_async_thread (struct rspamd_async_session *session) +{ + if (g_atomic_int_dec_and_test (&session->threads)) { + /* Signal if there are any sessions waiting */ + g_mutex_lock (session->mtx); + g_cond_signal (session->cond); + g_mutex_unlock (session->mtx); + } + msg_debug ("removed thread: pending %d thread", session->threads); +} diff --git a/src/libserver/events.h b/src/libserver/events.h new file mode 100644 index 000000000..6728288eb --- /dev/null +++ b/src/libserver/events.h @@ -0,0 +1,88 @@ +#ifndef RSPAMD_EVENTS_H +#define RSPAMD_EVENTS_H + +#include "config.h" +#include "mem_pool.h" + +struct rspamd_async_event; + +typedef void (*event_finalizer_t)(void *user_data); +typedef gboolean (*session_finalizer_t)(void *user_data); + +struct rspamd_async_event { + GQuark subsystem; + event_finalizer_t fin; + void *user_data; + guint ref; +}; + +struct rspamd_async_session { + session_finalizer_t fin; + event_finalizer_t restore; + event_finalizer_t cleanup; + GHashTable *events; + void *user_data; + rspamd_mempool_t *pool; + gboolean wanna_die; + guint threads; + GMutex *mtx; + GCond *cond; +}; + +/** + * Make new async session + * @param pool pool to alloc memory from + * @param fin a callback called when no events are found in session + * @param restore a callback is called to restore processing of session + * @param cleanup a callback called when session is forcefully destroyed + * @param user_data abstract user data + * @return + */ +struct rspamd_async_session *new_async_session (rspamd_mempool_t *pool, + session_finalizer_t fin, event_finalizer_t restore, + event_finalizer_t cleanup, void *user_data); + +/** + * Insert new event to the session + * @param session session object + * @param fin finalizer callback + * @param user_data abstract user_data + * @param forced unused + */ +void register_async_event (struct rspamd_async_session *session, + event_finalizer_t fin, void *user_data, GQuark subsystem); + +/** + * Remove normal event + * @param session session object + * @param fin final callback + * @param ud user data object + */ +void remove_normal_event (struct rspamd_async_session *session, event_finalizer_t fin, void *ud); + +/** + * Must be called at the end of session, it calls fin functions for all non-forced callbacks + * @return true if the whole session was destroyed and false if there are forced events + */ +gboolean destroy_session (struct rspamd_async_session *session); + +/** + * Check session for events pending and call fin callback if no events are pending + * @param session session object + * @return TRUE if session has pending events + */ +gboolean check_session_pending (struct rspamd_async_session *session); + +/** + * Add new async thread to session + * @param session session object + */ +void register_async_thread (struct rspamd_async_session *session); + +/** + * Remove async thread from session and check whether session can be terminated + * @param session session object + */ +void remove_async_thread (struct rspamd_async_session *session); + +#endif /* RSPAMD_EVENTS_H */ diff --git a/src/libserver/html.c b/src/libserver/html.c new file mode 100644 index 000000000..028c54f6c --- /dev/null +++ b/src/libserver/html.c @@ -0,0 +1,942 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "main.h" +#include "message.h" +#include "html.h" +#include "url.h" + +static sig_atomic_t tags_sorted = 0; + +static struct html_tag tag_defs[] = { + /* W3C defined elements */ + {Tag_A, "a", (CM_INLINE)}, + {Tag_ABBR, "abbr", (CM_INLINE)}, + {Tag_ACRONYM, "acronym", (CM_INLINE)}, + {Tag_ADDRESS, "address", (CM_BLOCK)}, + {Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)}, + {Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)}, + {Tag_B, "b", (CM_INLINE)}, + {Tag_BASE, "base", (CM_HEAD | CM_EMPTY)}, + {Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)}, + {Tag_BDO, "bdo", (CM_INLINE)}, + {Tag_BIG, "big", (CM_INLINE)}, + {Tag_BLOCKQUOTE, "blockquote", (CM_BLOCK)}, + {Tag_BODY, "body", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_BR, "br", (CM_INLINE | CM_EMPTY)}, + {Tag_BUTTON, "button", (CM_INLINE)}, + {Tag_CAPTION, "caption", (CM_TABLE)}, + {Tag_CENTER, "center", (CM_BLOCK)}, + {Tag_CITE, "cite", (CM_INLINE)}, + {Tag_CODE, "code", (CM_INLINE)}, + {Tag_COL, "col", (CM_TABLE | CM_EMPTY)}, + {Tag_COLGROUP, "colgroup", (CM_TABLE | CM_OPT)}, + {Tag_DD, "dd", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)}, + {Tag_DEL, "del", (CM_INLINE | CM_BLOCK | CM_MIXED)}, + {Tag_DFN, "dfn", (CM_INLINE)}, + {Tag_DIR, "dir", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_DIV, "div", (CM_BLOCK)}, + {Tag_DL, "dl", (CM_BLOCK)}, + {Tag_DT, "dt", (CM_DEFLIST | CM_OPT | CM_NO_INDENT)}, + {Tag_EM, "em", (CM_INLINE)}, + {Tag_FIELDSET, "fieldset", (CM_BLOCK)}, + {Tag_FONT, "font", (CM_INLINE)}, + {Tag_FORM, "form", (CM_BLOCK)}, + {Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)}, + {Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)}, + {Tag_H1, "h1", (CM_BLOCK | CM_HEADING)}, + {Tag_H2, "h2", (CM_BLOCK | CM_HEADING)}, + {Tag_H3, "h3", (CM_BLOCK | CM_HEADING)}, + {Tag_H4, "h4", (CM_BLOCK | CM_HEADING)}, + {Tag_H5, "h5", (CM_BLOCK | CM_HEADING)}, + {Tag_H6, "h6", (CM_BLOCK | CM_HEADING)}, + {Tag_HEAD, "head", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)}, + {Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST)}, + {Tag_I, "i", (CM_INLINE)}, + {Tag_IFRAME, "iframe", (CM_INLINE)}, + {Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)}, + {Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)}, + {Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)}, + {Tag_ISINDEX, "isindex", (CM_BLOCK | CM_EMPTY)}, + {Tag_KBD, "kbd", (CM_INLINE)}, + {Tag_LABEL, "label", (CM_INLINE)}, + {Tag_LEGEND, "legend", (CM_INLINE)}, + {Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT)}, + {Tag_LINK, "link", (CM_HEAD | CM_EMPTY)}, + {Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_MAP, "map", (CM_INLINE)}, + {Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_META, "meta", (CM_HEAD | CM_EMPTY)}, + {Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)}, + {Tag_NOSCRIPT, "noscript", (CM_BLOCK | CM_INLINE | CM_MIXED)}, + {Tag_OBJECT, "object", (CM_OBJECT | CM_HEAD | CM_IMG | CM_INLINE | CM_PARAM)}, + {Tag_OL, "ol", (CM_BLOCK)}, + {Tag_OPTGROUP, "optgroup", (CM_FIELD | CM_OPT)}, + {Tag_OPTION, "option", (CM_FIELD | CM_OPT)}, + {Tag_P, "p", (CM_BLOCK | CM_OPT)}, + {Tag_PARAM, "param", (CM_INLINE | CM_EMPTY)}, + {Tag_PLAINTEXT, "plaintext", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_PRE, "pre", (CM_BLOCK)}, + {Tag_Q, "q", (CM_INLINE)}, + {Tag_RB, "rb", (CM_INLINE)}, + {Tag_RBC, "rbc", (CM_INLINE)}, + {Tag_RP, "rp", (CM_INLINE)}, + {Tag_RT, "rt", (CM_INLINE)}, + {Tag_RTC, "rtc", (CM_INLINE)}, + {Tag_RUBY, "ruby", (CM_INLINE)}, + {Tag_S, "s", (CM_INLINE)}, + {Tag_SAMP, "samp", (CM_INLINE)}, + {Tag_SCRIPT, "script", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)}, + {Tag_SELECT, "select", (CM_INLINE | CM_FIELD)}, + {Tag_SMALL, "small", (CM_INLINE)}, + {Tag_SPAN, "span", (CM_INLINE)}, + {Tag_STRIKE, "strike", (CM_INLINE)}, + {Tag_STRONG, "strong", (CM_INLINE)}, + {Tag_STYLE, "style", (CM_HEAD)}, + {Tag_SUB, "sub", (CM_INLINE)}, + {Tag_SUP, "sup", (CM_INLINE)}, + {Tag_TABLE, "table", (CM_BLOCK)}, + {Tag_TBODY, "tbody", (CM_TABLE | CM_ROWGRP | CM_OPT)}, + {Tag_TD, "td", (CM_ROW | CM_OPT | CM_NO_INDENT)}, + {Tag_TEXTAREA, "textarea", (CM_INLINE | CM_FIELD)}, + {Tag_TFOOT, "tfoot", (CM_TABLE | CM_ROWGRP | CM_OPT)}, + {Tag_TH, "th", (CM_ROW | CM_OPT | CM_NO_INDENT)}, + {Tag_THEAD, "thead", (CM_TABLE | CM_ROWGRP | CM_OPT)}, + {Tag_TITLE, "title", (CM_HEAD)}, + {Tag_TR, "tr", (CM_TABLE | CM_OPT)}, + {Tag_TT, "tt", (CM_INLINE)}, + {Tag_U, "u", (CM_INLINE)}, + {Tag_UL, "ul", (CM_BLOCK)}, + {Tag_VAR, "var", (CM_INLINE)}, + {Tag_XMP, "xmp", (CM_BLOCK | CM_OBSOLETE)}, + {Tag_NEXTID, "nextid", (CM_HEAD | CM_EMPTY)}, + + /* proprietary elements */ + {Tag_ALIGN, "align", (CM_BLOCK)}, + {Tag_BGSOUND, "bgsound", (CM_HEAD | CM_EMPTY)}, + {Tag_BLINK, "blink", (CM_INLINE)}, + {Tag_COMMENT, "comment", (CM_INLINE)}, + {Tag_EMBED, "embed", (CM_INLINE | CM_IMG | CM_EMPTY)}, + {Tag_ILAYER, "ilayer", (CM_INLINE)}, + {Tag_KEYGEN, "keygen", (CM_INLINE | CM_EMPTY)}, + {Tag_LAYER, "layer", (CM_BLOCK)}, + {Tag_MARQUEE, "marquee", (CM_INLINE | CM_OPT)}, + {Tag_MULTICOL, "multicol", (CM_BLOCK)}, + {Tag_NOBR, "nobr", (CM_INLINE)}, + {Tag_NOEMBED, "noembed", (CM_INLINE)}, + {Tag_NOLAYER, "nolayer", (CM_BLOCK | CM_INLINE | CM_MIXED)}, + {Tag_NOSAVE, "nosave", (CM_BLOCK)}, + {Tag_SERVER, "server", (CM_HEAD | CM_MIXED | CM_BLOCK | CM_INLINE)}, + {Tag_SERVLET, "servlet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)}, + {Tag_SPACER, "spacer", (CM_INLINE | CM_EMPTY)}, + {Tag_WBR, "wbr", (CM_INLINE | CM_EMPTY)}, +}; + +static sig_atomic_t entities_sorted = 0; +struct _entity; +typedef struct _entity entity; + +struct _entity { + gchar *name; + uint code; + gchar *replacement; +}; + + +static entity entities_defs[] = { + /* + ** Markup pre-defined character entities + */ + {"quot", 34, "\""}, + {"amp", 38, "&"}, + {"apos", 39, "'"}, + {"lt", 60, "<"}, + {"gt", 62, ">"}, + + /* + ** Latin-1 character entities + */ + {"nbsp", 160, " "}, + {"iexcl", 161, "!"}, + {"cent", 162, "cent"}, + {"pound", 163, "pound"}, + {"curren", 164, "current"}, + {"yen", 165, "yen"}, + {"brvbar", 166, NULL}, + {"sect", 167, NULL}, + {"uml", 168, "uml"}, + {"copy", 169, "c"}, + {"ordf", 170, NULL}, + {"laquo", 171, "\""}, + {"not", 172, "!"}, + {"shy", 173, NULL}, + {"reg", 174, "r"}, + {"macr", 175, NULL}, + {"deg", 176, "deg"}, + {"plusmn", 177, "+-"}, + {"sup2", 178, "2"}, + {"sup3", 179, "3"}, + {"acute", 180, NULL}, + {"micro", 181, NULL}, + {"para", 182, NULL}, + {"middot", 183, "."}, + {"cedil", 184, NULL}, + {"sup1", 185, "1"}, + {"ordm", 186, NULL}, + {"raquo", 187, "\""}, + {"frac14", 188, "1/4"}, + {"frac12", 189, "1/2"}, + {"frac34", 190, "3/4"}, + {"iquest", 191, "i"}, + {"Agrave", 192, "a"}, + {"Aacute", 193, "a"}, + {"Acirc", 194, "a"}, + {"Atilde", 195, "a"}, + {"Auml", 196, "a"}, + {"Aring", 197, "a"}, + {"AElig", 198, "a"}, + {"Ccedil", 199, "c"}, + {"Egrave", 200, "e"}, + {"Eacute", 201, "e"}, + {"Ecirc", 202, "e"}, + {"Euml", 203, "e"}, + {"Igrave", 204, "i"}, + {"Iacute", 205, "i"}, + {"Icirc", 206, "i"}, + {"Iuml", 207, "i"}, + {"ETH", 208, "e"}, + {"Ntilde", 209, "n"}, + {"Ograve", 210, "o"}, + {"Oacute", 211, "o"}, + {"Ocirc", 212, "o"}, + {"Otilde", 213, "o"}, + {"Ouml", 214, "o"}, + {"times", 215, "t"}, + {"Oslash", 216, "o"}, + {"Ugrave", 217, "u"}, + {"Uacute", 218, "u"}, + {"Ucirc", 219, "u"}, + {"Uuml", 220, "u"}, + {"Yacute", 221, "y"}, + {"THORN", 222, "t"}, + {"szlig", 223, "s"}, + {"agrave", 224, "a"}, + {"aacute", 225, "a"}, + {"acirc", 226, "a"}, + {"atilde", 227, "a"}, + {"auml", 228, "a"}, + {"aring", 229, "a"}, + {"aelig", 230, "a"}, + {"ccedil", 231, "c"}, + {"egrave", 232, "e"}, + {"eacute", 233, "e"}, + {"ecirc", 234, "e"}, + {"euml", 235, "e"}, + {"igrave", 236, "e"}, + {"iacute", 237, "e"}, + {"icirc", 238, "e"}, + {"iuml", 239, "e"}, + {"eth", 240, "e"}, + {"ntilde", 241, "n"}, + {"ograve", 242, "o"}, + {"oacute", 243, "o"}, + {"ocirc", 244, "o"}, + {"otilde", 245, "o"}, + {"ouml", 246, "o"}, + {"divide", 247, "/"}, + {"oslash", 248, "/"}, + {"ugrave", 249, "u"}, + {"uacute", 250, "u"}, + {"ucirc", 251, "u"}, + {"uuml", 252, "u"}, + {"yacute", 253, "y"}, + {"thorn", 254, "t"}, + {"yuml", 255, "y"}, + + /* + ** Extended Entities defined in HTML 4: Symbols + */ + {"fnof", 402, "f"}, + {"Alpha", 913, "alpha"}, + {"Beta", 914, "beta"}, + {"Gamma", 915, "gamma"}, + {"Delta", 916, "delta"}, + {"Epsilon", 917, "epsilon"}, + {"Zeta", 918, "zeta"}, + {"Eta", 919, "eta"}, + {"Theta", 920, "theta"}, + {"Iota", 921, "iota"}, + {"Kappa", 922, "kappa"}, + {"Lambda", 923, "lambda"}, + {"Mu", 924, "mu"}, + {"Nu", 925, "nu"}, + {"Xi", 926, "xi"}, + {"Omicron", 927, "omicron"}, + {"Pi", 928, "pi"}, + {"Rho", 929, "rho"}, + {"Sigma", 931, "sigma"}, + {"Tau", 932, "tau"}, + {"Upsilon", 933, "upsilon"}, + {"Phi", 934, "phi"}, + {"Chi", 935, "chi"}, + {"Psi", 936, "psi"}, + {"Omega", 937, "omega"}, + {"alpha", 945, "alpha"}, + {"beta", 946, "beta"}, + {"gamma", 947, "gamma"}, + {"delta", 948, "delta"}, + {"epsilon", 949, "epsilon"}, + {"zeta", 950, "zeta"}, + {"eta", 951, "eta"}, + {"theta", 952, "theta"}, + {"iota", 953, "iota"}, + {"kappa", 954, "kappa"}, + {"lambda", 955, "lambda"}, + {"mu", 956, "mu"}, + {"nu", 957, "nu"}, + {"xi", 958, "xi"}, + {"omicron", 959, "omicron"}, + {"pi", 960, "pi"}, + {"rho", 961, "rho"}, + {"sigmaf", 962, "sigmaf"}, + {"sigma", 963, "sigma"}, + {"tau", 964, "tau"}, + {"upsilon", 965, "upsilon"}, + {"phi", 966, "phi"}, + {"chi", 967, "chi"}, + {"psi", 968, "psi"}, + {"omega", 969, "omega"}, + {"thetasym", 977, "thetasym"}, + {"upsih", 978, "upsih"}, + {"piv", 982, "piv"}, + {"bull", 8226, "bull"}, + {"hellip", 8230, "..."}, + {"prime", 8242, "'"}, + {"Prime", 8243, "'"}, + {"oline", 8254, "-"}, + {"frasl", 8260, NULL}, + {"weierp", 8472, NULL}, + {"image", 8465, NULL}, + {"real", 8476, NULL}, + {"trade", 8482, NULL}, + {"alefsym", 8501, "a"}, + {"larr", 8592, NULL}, + {"uarr", 8593, NULL}, + {"rarr", 8594, NULL}, + {"darr", 8595, NULL}, + {"harr", 8596, NULL}, + {"crarr", 8629, NULL}, + {"lArr", 8656, NULL}, + {"uArr", 8657, NULL}, + {"rArr", 8658, NULL}, + {"dArr", 8659, NULL}, + {"hArr", 8660, NULL}, + {"forall", 8704, NULL}, + {"part", 8706, NULL}, + {"exist", 8707, NULL}, + {"empty", 8709, NULL}, + {"nabla", 8711, NULL}, + {"isin", 8712, NULL}, + {"notin", 8713, NULL}, + {"ni", 8715, NULL}, + {"prod", 8719, NULL}, + {"sum", 8721, "E"}, + {"minus", 8722, "-"}, + {"lowast", 8727, NULL}, + {"radic", 8730, NULL}, + {"prop", 8733, NULL}, + {"infin", 8734, NULL}, + {"ang", 8736, "'"}, + {"and", 8743, "&"}, + {"or", 8744, "|"}, + {"cap", 8745, NULL}, + {"cup", 8746, NULL}, + {"gint", 8747, NULL}, + {"there4", 8756, NULL}, + {"sim", 8764, NULL}, + {"cong", 8773, NULL}, + {"asymp", 8776, NULL}, + {"ne", 8800, "!="}, + {"equiv", 8801, "=="}, + {"le", 8804, "<="}, + {"ge", 8805, ">="}, + {"sub", 8834, NULL}, + {"sup", 8835, NULL}, + {"nsub", 8836, NULL}, + {"sube", 8838, NULL}, + {"supe", 8839, NULL}, + {"oplus", 8853, NULL}, + {"otimes", 8855, NULL}, + {"perp", 8869, NULL}, + {"sdot", 8901, NULL}, + {"lceil", 8968, NULL}, + {"rceil", 8969, NULL}, + {"lfloor", 8970, NULL}, + {"rfloor", 8971, NULL}, + {"lang", 9001, NULL}, + {"rang", 9002, NULL}, + {"loz", 9674, NULL}, + {"spades", 9824, NULL}, + {"clubs", 9827, NULL}, + {"hearts", 9829, NULL}, + {"diams", 9830, NULL}, + + /* + ** Extended Entities defined in HTML 4: Special (less Markup at top) + */ + {"OElig", 338, NULL}, + {"oelig", 339, NULL}, + {"Scaron", 352, NULL}, + {"scaron", 353, NULL}, + {"Yuml", 376, NULL}, + {"circ", 710, NULL}, + {"tilde", 732, NULL}, + {"ensp", 8194, NULL}, + {"emsp", 8195, NULL}, + {"thinsp", 8201, NULL}, + {"zwnj", 8204, NULL}, + {"zwj", 8205, NULL}, + {"lrm", 8206, NULL}, + {"rlm", 8207, NULL}, + {"ndash", 8211, "-"}, + {"mdash", 8212, "-"}, + {"lsquo", 8216, "'"}, + {"rsquo", 8217, "'"}, + {"sbquo", 8218, "\""}, + {"ldquo", 8220, "\""}, + {"rdquo", 8221, "\""}, + {"bdquo", 8222, "\""}, + {"dagger", 8224, "T"}, + {"Dagger", 8225, "T"}, + {"permil", 8240, NULL}, + {"lsaquo", 8249, "\""}, + {"rsaquo", 8250, "\""}, + {"euro", 8364, "E"}, +}; + +static entity entities_defs_num[ (G_N_ELEMENTS (entities_defs)) ]; + +static gint +tag_cmp (const void *m1, const void *m2) +{ + const struct html_tag *p1 = m1; + const struct html_tag *p2 = m2; + + return g_ascii_strcasecmp (p1->name, p2->name); +} + +static gint +entity_cmp (const void *m1, const void *m2) +{ + const entity *p1 = m1; + const entity *p2 = m2; + + return g_ascii_strcasecmp (p1->name, p2->name); +} + +static gint +entity_cmp_num (const void *m1, const void *m2) +{ + const entity *p1 = m1; + const entity *p2 = m2; + + return p1->code - p2->code; +} + +static GNode * +construct_html_node (rspamd_mempool_t * pool, gchar *text, gsize tag_len) +{ + struct html_node *html; + GNode *n = NULL; + struct html_tag key, *found; + gchar t; + + if (text == NULL || *text == '\0') { + return NULL; + } + + html = rspamd_mempool_alloc0 (pool, sizeof (struct html_node)); + + /* Check whether this tag is fully closed */ + if (*(text + tag_len - 1) == '/') { + html->flags |= FL_CLOSED; + } + + /* Check xml tag */ + if (*text == '?' && g_ascii_strncasecmp (text + 1, "xml", sizeof ("xml") - 1) == 0) { + html->flags |= FL_XML; + html->tag = NULL; + } + else { + if (*text == '/') { + html->flags |= FL_CLOSING; + text++; + } + + /* Find end of tag name */ + key.name = text; + while (*text && g_ascii_isalnum (*(++text))); + + t = *text; + *text = '\0'; + + /* Match tag id by tag name */ + if ((found = bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp)) != NULL) { + *text = t; + html->tag = found; + } + else { + *text = t; + return NULL; + } + } + + n = g_node_new (html); + + return n; +} + +static gboolean +check_balance (GNode * node, GNode ** cur_level) +{ + struct html_node *arg = node->data, *tmp; + GNode *cur; + + if (arg->flags & FL_CLOSING) { + /* First of all check whether this tag is closing tag for parent node */ + cur = node->parent; + while (cur && cur->data) { + tmp = cur->data; + if ((tmp->tag && arg->tag) && tmp->tag->id == arg->tag->id && (tmp->flags & FL_CLOSED) == 0) { + tmp->flags |= FL_CLOSED; + /* Destroy current node as we find corresponding parent node */ + g_node_destroy (node); + /* Change level */ + *cur_level = cur->parent; + return TRUE; + } + cur = cur->parent; + } + } + else { + return TRUE; + } + + return FALSE; +} + +struct html_tag * +get_tag_by_name (const gchar *name) +{ + struct html_tag key; + + key.name = name; + + return bsearch (&key, tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp); +} + +/* Decode HTML entitles in text */ +void +decode_entitles (gchar *s, guint * len) +{ + guint l, rep_len; + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ + gchar *e = s; + gchar *end_ptr; + gint state = 0, val, base; + entity *found, key; + + if (len == NULL || *len == 0) { + l = strlen (s); + } + else { + l = *len; + } + + while (h - s < (gint)l) { + switch (state) { + /* Out of entitle */ + case 0: + if (*h == '&') { + state = 1; + e = h; + h++; + continue; + } + else { + *t = *h; + h++; + t++; + } + break; + case 1: + if (*h == ';') { + /* Determine base */ + /* First find in entities table */ + + key.name = e + 1; + *h = '\0'; + if (*(e + 1) != '#' && (found = bsearch (&key, entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp)) != NULL) { + if (found->replacement) { + rep_len = strlen (found->replacement); + memcpy (t, found->replacement, rep_len); + t += rep_len; + } + } + else { + if (*(e + 2) == 'x' || *(e + 2) == 'X') { + base = 16; + } + else if (*(e + 2) == 'o' || *(e + 2) == 'O') { + base = 8; + } + else { + base = 10; + } + if (base == 10) { + val = strtoul ((e + 2), &end_ptr, base); + } + else { + val = strtoul ((e + 3), &end_ptr, base); + } + if (end_ptr != NULL && *end_ptr != '\0') { + /* Skip undecoded */ + t = h; + } + else { + /* Search for a replacement */ + key.code = val; + found = bsearch (&key, entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num); + if (found) { + if (found->replacement) { + rep_len = strlen (found->replacement); + memcpy (t, found->replacement, rep_len); + t += rep_len; + } + } + } + } + *h = ';'; + state = 0; + } + h++; + break; + } + } + *t = '\0'; + + if (len != NULL) { + *len = t - s; + } +} + +static void +check_phishing (struct rspamd_task *task, struct uri *href_url, const gchar *url_text, gsize remain, tag_id_t id) +{ + struct uri *new; + gchar *url_str; + const gchar *p, *c; + gchar tagbuf[128]; + struct html_tag *tag; + gsize len = 0; + gint rc; + + p = url_text; + while (len < remain) { + if (*p == '<') { + /* Check tag name */ + if (*(p + 1) == '/') { + c = p + 2; + } + else { + c = p + 1; + } + while (len < remain) { + if (!g_ascii_isspace (*p) && *p != '>') { + p ++; + len ++; + } + else { + break; + } + } + rspamd_strlcpy (tagbuf, c, MIN ((gint)sizeof(tagbuf), p - c + 1)); + if ((tag = get_tag_by_name (tagbuf)) != NULL) { + if (tag->id == id) { + break; + } + else if (tag->id == Tag_IMG) { + /* We should ignore IMG tag here */ + while (len < remain && *p != '>' && *p != '<') { + p ++; + len ++; + } + if (*p == '>' && len < remain) { + p ++; + } + + remain -= p - url_text; + url_text = p; + len = 0; + continue; + } + } + } + len ++; + p ++; + } + + if (url_try_text (task->task_pool, url_text, len, NULL, NULL, &url_str, TRUE) && url_str != NULL) { + new = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct uri)); + if (new != NULL) { + g_strstrip (url_str); + rc = parse_uri (new, url_str, task->task_pool); + + if (rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) { + if (g_ascii_strncasecmp (href_url->host, new->host, + MAX (href_url->hostlen, new->hostlen)) != 0) { + /* Special check for urls beginning with 'www' */ + if (new->hostlen > 4 && href_url->hostlen > 4) { + p = new->host; + c = NULL; + if ((p[0] == 'w' || p[0] == 'W') && + (p[1] == 'w' || p[1] == 'W') && + (p[2] == 'w' || p[2] == 'W') && + (p[3] == '.')) { + p += 4; + c = href_url->host; + len = MAX (href_url->hostlen, new->hostlen - 4); + } + else { + p = href_url->host; + if ((p[0] == 'w' || p[0] == 'W') && + (p[1] == 'w' || p[1] == 'W') && + (p[2] == 'w' || p[2] == 'W') && + (p[3] == '.')) { + p += 4; + c = new->host; + len = MAX (href_url->hostlen - 4, new->hostlen); + } + } + /* Compare parts and check for phished hostname */ + if (c != NULL) { + if (g_ascii_strncasecmp (p, c, len) != 0) { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + else { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + else { + href_url->is_phished = TRUE; + href_url->phished_url = new; + } + } + } + else { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + +} + +static void +parse_tag_url (struct rspamd_task *task, struct mime_text_part *part, tag_id_t id, + gchar *tag_text, gsize tag_len, gsize remain) +{ + gchar *c = NULL, *p, *url_text; + gint len, rc; + struct uri *url; + gboolean got_single_quote = FALSE, got_double_quote = FALSE; + + /* For A tags search for href= and for IMG tags search for src= */ + if (id == Tag_A) { + c = rspamd_strncasestr (tag_text, "href=", tag_len); + len = sizeof ("href=") - 1; + } + else if (id == Tag_IMG) { + c = rspamd_strncasestr (tag_text, "src=", tag_len); + len = sizeof ("src=") - 1; + } + + if (c != NULL) { + /* First calculate length */ + c += len; + /* Skip spaces after eqsign */ + while (g_ascii_isspace (*c)) { + c++; + } + len = 0; + p = c; + while (*p && (guint)(p - tag_text) < tag_len) { + if (got_double_quote) { + if (*p == '"') { + break; + } + else { + len++; + } + } + else if (got_single_quote) { + if (*p == '\'') { + break; + } + else { + len++; + } + } + else if (g_ascii_isspace (*p) || *p == '>' || (*p == '/' && *(p + 1) == '>') || *p == '\r' || *p == '\n') { + break; + } + else { + if (*p == '"' && !got_single_quote) { + got_double_quote = !got_double_quote; + } + else if (*p == '\'' && !got_double_quote) { + got_single_quote = !got_single_quote; + } + else { + len++; + } + } + p++; + } + + if (got_single_quote || got_double_quote) { + c++; + } + + if (len == 0) { + return; + } + + url_text = rspamd_mempool_alloc (task->task_pool, len + 1); + rspamd_strlcpy (url_text, c, len + 1); + decode_entitles (url_text, NULL); + + if (g_ascii_strncasecmp (url_text, "http://", sizeof ("http://") - 1) != 0 && + g_ascii_strncasecmp (url_text, "www", sizeof ("www") - 1) != 0 && + g_ascii_strncasecmp (url_text, "ftp://", sizeof ("ftp://") - 1) != 0 && + g_ascii_strncasecmp (url_text, "mailto:", sizeof ("mailto:") - 1) != 0) { + return; + } + + url = rspamd_mempool_alloc (task->task_pool, sizeof (struct uri)); + rc = parse_uri (url, url_text, task->task_pool); + + if (rc != URI_ERRNO_EMPTY && rc != URI_ERRNO_NO_HOST && url->hostlen != 0) { + /* + * Check for phishing + */ + if ((p = strchr (c, '>')) != NULL && id == Tag_A) { + p ++; + check_phishing (task, url, p, remain - (p - tag_text), id); + } + if (g_tree_lookup (task->urls, url) == NULL) { + g_tree_insert (task->urls, url, url); + } + } + } +} + +gboolean +add_html_node (struct rspamd_task *task, rspamd_mempool_t * pool, struct mime_text_part *part, + gchar *tag_text, gsize tag_len, gsize remain, GNode ** cur_level) +{ + GNode *new; + struct html_node *data; + + if (!tags_sorted) { + qsort (tag_defs, G_N_ELEMENTS (tag_defs), sizeof (struct html_tag), tag_cmp); + tags_sorted = 1; + } + if (!entities_sorted) { + qsort (entities_defs, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp); + memcpy (entities_defs_num, entities_defs, sizeof (entities_defs)); + qsort (entities_defs_num, G_N_ELEMENTS (entities_defs), sizeof (entity), entity_cmp_num); + entities_sorted = 1; + } + + /* First call of this function */ + if (part->html_nodes == NULL) { + /* Insert root node */ + new = g_node_new (NULL); + *cur_level = new; + part->html_nodes = new; + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t) g_node_destroy, part->html_nodes); + /* Call once again with root node */ + return add_html_node (task, pool, part, tag_text, tag_len, remain, cur_level); + } + else { + new = construct_html_node (pool, tag_text, tag_len); + if (new == NULL) { + debug_task ("cannot construct HTML node for text '%*s'", tag_len, tag_text); + return FALSE; + } + data = new->data; + if (data->tag && (data->tag->id == Tag_A || data->tag->id == Tag_IMG) && ((data->flags & FL_CLOSING) == 0)) { + parse_tag_url (task, part, data->tag->id, tag_text, tag_len, remain); + } + + if (data->flags & FL_CLOSING) { + if (!*cur_level) { + debug_task ("bad parent node"); + return FALSE; + } + g_node_append (*cur_level, new); + if (!check_balance (new, cur_level)) { + debug_task ("mark part as unbalanced as it has not pairable closing tags"); + part->is_balanced = FALSE; + } + } + else { + + g_node_append (*cur_level, new); + if ((data->flags & FL_CLOSED) == 0) { + *cur_level = new; + } + /* Skip some tags */ + if (data->tag && (data->tag->id == Tag_STYLE || + data->tag->id == Tag_SCRIPT || + data->tag->id == Tag_OBJECT || + data->tag->id == Tag_TITLE)) { + return FALSE; + } + } + } + + return TRUE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/html.h b/src/libserver/html.h new file mode 100644 index 000000000..3ea758e60 --- /dev/null +++ b/src/libserver/html.h @@ -0,0 +1,226 @@ +/* + * Functions for simple html parsing + */ + +#ifndef RSPAMD_HTML_H +#define RSPAMD_HTML_H + +#include "config.h" +#include "mem_pool.h" + +/* Known HTML tags */ +typedef enum +{ + Tag_UNKNOWN, /**< Unknown tag! */ + Tag_A, /**< A */ + Tag_ABBR, /**< ABBR */ + Tag_ACRONYM, /**< ACRONYM */ + Tag_ADDRESS, /**< ADDRESS */ + Tag_ALIGN, /**< ALIGN */ + Tag_APPLET, /**< APPLET */ + Tag_AREA, /**< AREA */ + Tag_B, /**< B */ + Tag_BASE, /**< BASE */ + Tag_BASEFONT, /**< BASEFONT */ + Tag_BDO, /**< BDO */ + Tag_BGSOUND, /**< BGSOUND */ + Tag_BIG, /**< BIG */ + Tag_BLINK, /**< BLINK */ + Tag_BLOCKQUOTE, /**< BLOCKQUOTE */ + Tag_BODY, /**< BODY */ + Tag_BR, /**< BR */ + Tag_BUTTON, /**< BUTTON */ + Tag_CAPTION, /**< CAPTION */ + Tag_CENTER, /**< CENTER */ + Tag_CITE, /**< CITE */ + Tag_CODE, /**< CODE */ + Tag_COL, /**< COL */ + Tag_COLGROUP, /**< COLGROUP */ + Tag_COMMENT, /**< COMMENT */ + Tag_DD, /**< DD */ + Tag_DEL, /**< DEL */ + Tag_DFN, /**< DFN */ + Tag_DIR, /**< DIR */ + Tag_DIV, /**< DIF */ + Tag_DL, /**< DL */ + Tag_DT, /**< DT */ + Tag_EM, /**< EM */ + Tag_EMBED, /**< EMBED */ + Tag_FIELDSET, /**< FIELDSET */ + Tag_FONT, /**< FONT */ + Tag_FORM, /**< FORM */ + Tag_FRAME, /**< FRAME */ + Tag_FRAMESET, /**< FRAMESET */ + Tag_H1, /**< H1 */ + Tag_H2, /**< H2 */ + Tag_H3, /**< H3 */ + Tag_H4, /**< H4 */ + Tag_H5, /**< H5 */ + Tag_H6, /**< H6 */ + Tag_HEAD, /**< HEAD */ + Tag_HR, /**< HR */ + Tag_HTML, /**< HTML */ + Tag_I, /**< I */ + Tag_IFRAME, /**< IFRAME */ + Tag_ILAYER, /**< ILAYER */ + Tag_IMG, /**< IMG */ + Tag_INPUT, /**< INPUT */ + Tag_INS, /**< INS */ + Tag_ISINDEX, /**< ISINDEX */ + Tag_KBD, /**< KBD */ + Tag_KEYGEN, /**< KEYGEN */ + Tag_LABEL, /**< LABEL */ + Tag_LAYER, /**< LAYER */ + Tag_LEGEND, /**< LEGEND */ + Tag_LI, /**< LI */ + Tag_LINK, /**< LINK */ + Tag_LISTING, /**< LISTING */ + Tag_MAP, /**< MAP */ + Tag_MARQUEE, /**< MARQUEE */ + Tag_MENU, /**< MENU */ + Tag_META, /**< META */ + Tag_MULTICOL, /**< MULTICOL */ + Tag_NOBR, /**< NOBR */ + Tag_NOEMBED, /**< NOEMBED */ + Tag_NOFRAMES, /**< NOFRAMES */ + Tag_NOLAYER, /**< NOLAYER */ + Tag_NOSAVE, /**< NOSAVE */ + Tag_NOSCRIPT, /**< NOSCRIPT */ + Tag_OBJECT, /**< OBJECT */ + Tag_OL, /**< OL */ + Tag_OPTGROUP, /**< OPTGROUP */ + Tag_OPTION, /**< OPTION */ + Tag_P, /**< P */ + Tag_PARAM, /**< PARAM */ + Tag_PLAINTEXT,/**< PLAINTEXT */ + Tag_PRE, /**< PRE */ + Tag_Q, /**< Q */ + Tag_RB, /**< RB */ + Tag_RBC, /**< RBC */ + Tag_RP, /**< RP */ + Tag_RT, /**< RT */ + Tag_RTC, /**< RTC */ + Tag_RUBY, /**< RUBY */ + Tag_S, /**< S */ + Tag_SAMP, /**< SAMP */ + Tag_SCRIPT, /**< SCRIPT */ + Tag_SELECT, /**< SELECT */ + Tag_SERVER, /**< SERVER */ + Tag_SERVLET, /**< SERVLET */ + Tag_SMALL, /**< SMALL */ + Tag_SPACER, /**< SPACER */ + Tag_SPAN, /**< SPAN */ + Tag_STRIKE, /**< STRIKE */ + Tag_STRONG, /**< STRONG */ + Tag_STYLE, /**< STYLE */ + Tag_SUB, /**< SUB */ + Tag_SUP, /**< SUP */ + Tag_TABLE, /**< TABLE */ + Tag_TBODY, /**< TBODY */ + Tag_TD, /**< TD */ + Tag_TEXTAREA, /**< TEXTAREA */ + Tag_TFOOT, /**< TFOOT */ + Tag_TH, /**< TH */ + Tag_THEAD, /**< THEAD */ + Tag_TITLE, /**< TITLE */ + Tag_TR, /**< TR */ + Tag_TT, /**< TT */ + Tag_U, /**< U */ + Tag_UL, /**< UL */ + Tag_VAR, /**< VAR */ + Tag_WBR, /**< WBR */ + Tag_XMP, /**< XMP */ + Tag_XML, /**< XML */ + Tag_NEXTID, /**< NEXTID */ + + N_TAGS /**< Must be last */ +} tag_id_t; + +#define CM_UNKNOWN 0 +/* Elements with no content. Map to HTML specification. */ +#define CM_EMPTY (1 << 0) +/* Elements that appear outside of "BODY". */ +#define CM_HTML (1 << 1) +/* Elements that can appear within HEAD. */ +#define CM_HEAD (1 << 2) +/* HTML "block" elements. */ +#define CM_BLOCK (1 << 3) +/* HTML "inline" elements. */ +#define CM_INLINE (1 << 4) +/* Elements that mark list item ("LI"). */ +#define CM_LIST (1 << 5) +/* Elements that mark definition list item ("DL", "DT"). */ +#define CM_DEFLIST (1 << 6) +/* Elements that can appear inside TABLE. */ +#define CM_TABLE (1 << 7) +/* Used for "THEAD", "TFOOT" or "TBODY". */ +#define CM_ROWGRP (1 << 8) +/* Used for "TD", "TH" */ +#define CM_ROW (1 << 9) +/* Elements whose content must be protected against white space movement. + Includes some elements that can found in forms. */ +#define CM_FIELD (1 << 10) +/* Used to avoid propagating inline emphasis inside some elements + such as OBJECT or APPLET. */ +#define CM_OBJECT (1 << 11) +/* Elements that allows "PARAM". */ +#define CM_PARAM (1 << 12) +/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */ +#define CM_FRAMES (1 << 13) +/* Heading elements (h1, h2, ...). */ +#define CM_HEADING (1 << 14) +/* Elements with an optional end tag. */ +#define CM_OPT (1 << 15) +/* Elements that use "align" attribute for vertical position. */ +#define CM_IMG (1 << 16) +/* Elements with inline and block model. Used to avoid calling InlineDup. */ +#define CM_MIXED (1 << 17) +/* Elements whose content needs to be indented only if containing one + CM_BLOCK element. */ +#define CM_NO_INDENT (1 << 18) +/* Elements that are obsolete (such as "dir", "menu"). */ +#define CM_OBSOLETE (1 << 19) +/* User defined elements. Used to determine how attributes wihout value + should be printed. */ +#define CM_NEW (1 << 20) +/* Elements that cannot be omitted. */ +#define CM_OMITST (1 << 21) + +/* XML tag */ +#define FL_XML (1 << 0) +/* Closing tag */ +#define FL_CLOSING (1 << 1) +/* Fully closed tag (e.g. <a attrs />) */ +#define FL_CLOSED (1 << 2) + +struct html_tag { + tag_id_t id; + const gchar *name; + gint flags; +}; + +struct html_node { + struct html_tag *tag; + gint flags; +}; + +/* Forwarded declaration */ +struct rspamd_task; + +/* + * Add a single node to the tags tree + */ +gboolean add_html_node (struct rspamd_task *task, rspamd_mempool_t *pool, + struct mime_text_part *part, gchar *tag_text, gsize tag_len, gsize remain, GNode **cur_level); + +/* + * Get tag structure by its name (binary search is used) + */ +struct html_tag * get_tag_by_name (const gchar *name); + +/* + * Decode HTML entitles in text. Text is modified in place. + */ +void decode_entitles (gchar *s, guint *len); + +#endif diff --git a/src/libserver/proxy.c b/src/libserver/proxy.c new file mode 100644 index 000000000..67c7665b8 --- /dev/null +++ b/src/libserver/proxy.c @@ -0,0 +1,241 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "proxy.h" + +static void rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data); +static void rspamd_proxy_client_handler (gint fd, gshort what, gpointer data); + +static inline GQuark +proxy_error_quark (void) +{ + return g_quark_from_static_string ("proxy-error"); +} + +void +rspamd_proxy_close (rspamd_proxy_t *proxy) +{ + if (!proxy->closed) { + close (proxy->cfd); + close (proxy->bfd); + + event_del (&proxy->client_ev); + event_del (&proxy->backend_ev); + proxy->closed = TRUE; + } +} + +static void +rspamd_proxy_client_handler (gint fd, gshort what, gpointer data) +{ + rspamd_proxy_t *proxy = data; + gint r; + GError *err = NULL; + + if (what == EV_READ) { + /* Got data from client */ + event_del (&proxy->client_ev); + r = read (proxy->cfd, proxy->buf, proxy->bufsize); + if (r > 0) { + /* Write this buffer to backend */ + proxy->read_len = r; + proxy->buf_offset = 0; + event_del (&proxy->backend_ev); + event_set (&proxy->backend_ev, proxy->bfd, EV_WRITE, rspamd_proxy_backend_handler, proxy); + event_add (&proxy->backend_ev, proxy->tv); + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Client read error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else if (what == EV_WRITE) { + /* Can write to client */ + r = write (proxy->cfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset); + if (r > 0) { + /* We wrote something */ + proxy->buf_offset +=r; + if (proxy->buf_offset == proxy->read_len) { + /* We wrote everything */ + event_del (&proxy->client_ev); + event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy); + event_add (&proxy->client_ev, proxy->tv); + event_del (&proxy->backend_ev); + event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy); + event_add (&proxy->backend_ev, proxy->tv); + } + else { + /* Plan another write event */ + event_add (&proxy->backend_ev, proxy->tv); + } + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Client write error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else { + /* Got timeout */ + g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout"); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } +} + +static void +rspamd_proxy_backend_handler (gint fd, gshort what, gpointer data) +{ + rspamd_proxy_t *proxy = data; + gint r; + GError *err = NULL; + + if (what == EV_READ) { + /* Got data from backend */ + event_del (&proxy->backend_ev); + r = read (proxy->bfd, proxy->buf, proxy->bufsize); + if (r > 0) { + /* Write this buffer to client */ + proxy->read_len = r; + proxy->buf_offset = 0; + event_del (&proxy->client_ev); + event_set (&proxy->client_ev, proxy->bfd, EV_WRITE, rspamd_proxy_client_handler, proxy); + event_add (&proxy->client_ev, proxy->tv); + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Backend read error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else if (what == EV_WRITE) { + /* Can write to backend */ + r = write (proxy->bfd, proxy->buf + proxy->buf_offset, proxy->read_len - proxy->buf_offset); + if (r > 0) { + /* We wrote something */ + proxy->buf_offset +=r; + if (proxy->buf_offset == proxy->read_len) { + /* We wrote everything */ + event_del (&proxy->backend_ev); + event_set (&proxy->backend_ev, proxy->bfd, EV_READ, rspamd_proxy_backend_handler, proxy); + event_add (&proxy->backend_ev, proxy->tv); + event_del (&proxy->client_ev); + event_set (&proxy->client_ev, proxy->cfd, EV_READ, rspamd_proxy_client_handler, proxy); + event_add (&proxy->client_ev, proxy->tv); + } + else { + /* Plan another write event */ + event_add (&proxy->backend_ev, proxy->tv); + } + } + else { + /* Error case or zero reply */ + if (r < 0) { + /* Error case */ + g_set_error (&err, proxy_error_quark(), r, "Backend write error: %s", strerror (errno)); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } + else { + /* Client closes connection */ + rspamd_proxy_close (proxy); + proxy->err_cb (NULL, proxy->user_data); + } + } + } + else { + /* Got timeout */ + g_set_error (&err, proxy_error_quark(), ETIMEDOUT, "Client timeout"); + rspamd_proxy_close (proxy); + proxy->err_cb (err, proxy->user_data); + } +} + +/** + * Create new proxy between cfd and bfd + * @param cfd client's socket + * @param bfd backend's socket + * @param bufsize size of exchange buffer + * @param err_cb callback for erorrs or completing + * @param ud user data for callback + * @return new proxy object + */ +rspamd_proxy_t* +rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool, struct event_base *base, + gsize bufsize, struct timeval *tv, dispatcher_err_callback_t err_cb, gpointer ud) +{ + rspamd_proxy_t *new; + + new = rspamd_mempool_alloc0 (pool, sizeof (rspamd_proxy_t)); + + new->cfd = dup (cfd); + new->bfd = dup (bfd); + new->pool = pool; + new->base = base; + new->bufsize = bufsize; + new->buf = rspamd_mempool_alloc (pool, bufsize); + new->err_cb = err_cb; + new->user_data = ud; + new->tv = tv; + + /* Set client's and backend's interfaces to read events */ + event_set (&new->client_ev, new->cfd, EV_READ, rspamd_proxy_client_handler, new); + event_base_set (new->base, &new->client_ev); + event_add (&new->client_ev, new->tv); + + event_set (&new->backend_ev, new->bfd, EV_READ, rspamd_proxy_backend_handler, new); + event_base_set (new->base, &new->backend_ev); + event_add (&new->backend_ev, new->tv); + + return new; +} diff --git a/src/libserver/proxy.h b/src/libserver/proxy.h new file mode 100644 index 000000000..c505fe83d --- /dev/null +++ b/src/libserver/proxy.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef PROXY_H_ +#define PROXY_H_ + +#include "config.h" +#include "buffer.h" + +/** + * @file proxy.h + * Direct asynchronous proxy implementation + */ + +typedef struct rspamd_proxy_s { + struct event client_ev; /**< event for client's communication */ + struct event backend_ev; /**< event for backend communication */ + struct event_base *base; /**< base for event operations */ + rspamd_mempool_t *pool; /**< memory pool */ + dispatcher_err_callback_t err_cb; /**< error callback */ + struct event_base *ev_base; /**< event base */ + gint cfd; /**< client's socket */ + gint bfd; /**< backend's socket */ + guint8 *buf; /**< exchange buffer */ + gsize bufsize; /**< buffer size */ + gint read_len; /**< read length */ + gint buf_offset; /**< offset to write */ + gpointer user_data; /**< user's data for callbacks */ + struct timeval *tv; /**< timeout for communications */ + gboolean closed; /**< whether descriptors are closed */ +} rspamd_proxy_t; + +/** + * Create new proxy between cfd and bfd + * @param cfd client's socket + * @param bfd backend's socket + * @param bufsize size of exchange buffer + * @param err_cb callback for erorrs or completing + * @param ud user data for callback + * @return new proxy object + */ +rspamd_proxy_t* rspamd_create_proxy (gint cfd, gint bfd, rspamd_mempool_t *pool, + struct event_base *base, gsize bufsize, struct timeval *tv, + dispatcher_err_callback_t err_cb, gpointer ud); + +void rspamd_proxy_close (rspamd_proxy_t *proxy); + +#endif /* PROXY_H_ */ diff --git a/src/libserver/roll_history.c b/src/libserver/roll_history.c new file mode 100644 index 000000000..504f8ae3b --- /dev/null +++ b/src/libserver/roll_history.c @@ -0,0 +1,212 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + + +#include "config.h" +#include "main.h" +#include "roll_history.h" + + +/** + * Returns new roll history + * @param pool pool for shared memory + * @return new structure + */ +struct roll_history* +rspamd_roll_history_new (rspamd_mempool_t *pool) +{ + struct roll_history *new; + + if (pool == NULL) { + return NULL; + } + + new = rspamd_mempool_alloc0_shared (pool, sizeof (struct roll_history)); + new->pool = pool; + new->mtx = rspamd_mempool_get_mutex (pool); + + return new; +} + +struct history_metric_callback_data { + gchar *pos; + gint remain; +}; + +static void +roll_history_symbols_callback (gpointer key, gpointer value, void *user_data) +{ + struct history_metric_callback_data *cb = user_data; + struct symbol *s = value; + guint wr; + + if (cb->remain > 0) { + wr = rspamd_snprintf (cb->pos, cb->remain, "%s, ", s->name); + cb->pos += wr; + cb->remain -= wr; + } +} + +/** + * Update roll history with data from task + * @param history roll history object + * @param task task object + */ +void +rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task) +{ + gint row_num; + struct roll_history_row *row; + struct metric_result *metric_res; + struct history_metric_callback_data cbdata; + + if (history->need_lock) { + /* Some process is getting history, so wait on a mutex */ + rspamd_mempool_lock_mutex (history->mtx); + history->need_lock = FALSE; + rspamd_mempool_unlock_mutex (history->mtx); + } + + /* First of all obtain check and obtain row number */ + g_atomic_int_compare_and_exchange (&history->cur_row, HISTORY_MAX_ROWS, 0); +#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) + row_num = g_atomic_int_add (&history->cur_row, 1); +#else + row_num = g_atomic_int_exchange_and_add (&history->cur_row, 1); +#endif + + if (row_num < HISTORY_MAX_ROWS) { + row = &history->rows[row_num]; + row->completed = FALSE; + } + else { + /* Race condition */ + history->cur_row = 0; + return; + } + + /* Add information from task to roll history */ + memcpy (&row->from_addr, &task->from_addr, sizeof (row->from_addr)); + memcpy (&row->tv, &task->tv, sizeof (row->tv)); + + /* Strings */ + rspamd_strlcpy (row->message_id, task->message_id, sizeof (row->message_id)); + if (task->user) { + rspamd_strlcpy (row->user, task->user, sizeof (row->message_id)); + } + else { + row->user[0] = '\0'; + } + + /* Get default metric */ + metric_res = g_hash_table_lookup (task->results, DEFAULT_METRIC); + if (metric_res == NULL) { + row->symbols[0] = '\0'; + row->action = METRIC_ACTION_NOACTION; + } + else { + row->score = metric_res->score; + row->required_score = metric_res->metric->actions[METRIC_ACTION_REJECT].score; + row->action = check_metric_action (metric_res->score, + metric_res->metric->actions[METRIC_ACTION_REJECT].score, metric_res->metric); + cbdata.pos = row->symbols; + cbdata.remain = sizeof (row->symbols); + g_hash_table_foreach (metric_res->symbols, roll_history_symbols_callback, &cbdata); + if (cbdata.remain > 0) { + /* Remove last whitespace and comma */ + *cbdata.pos-- = '\0'; + *cbdata.pos-- = '\0'; + *cbdata.pos = '\0'; + } + } + + row->scan_time = task->scan_milliseconds; + row->len = (task->msg == NULL ? 0 : task->msg->len); + row->completed = TRUE; +} + +/** + * Load previously saved history from file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been loaded + */ +gboolean +rspamd_roll_history_load (struct roll_history *history, const gchar *filename) +{ + gint fd; + struct stat st; + + if (stat (filename, &st) == -1) { + msg_info ("cannot load history from %s: %s", filename, strerror (errno)); + return FALSE; + } + + if (st.st_size != sizeof (history->rows)) { + msg_info ("cannot load history from %s: size mismatch", filename); + return FALSE; + } + + if ((fd = open (filename, O_RDONLY)) == -1) { + msg_info ("cannot load history from %s: %s", filename, strerror (errno)); + return FALSE; + } + + if (read (fd, history->rows, sizeof (history->rows)) == -1) { + close (fd); + msg_info ("cannot read history from %s: %s", filename, strerror (errno)); + return FALSE; + } + + close (fd); + + return TRUE; +} + +/** + * Save history to file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been saved + */ +gboolean +rspamd_roll_history_save (struct roll_history *history, const gchar *filename) +{ + gint fd; + + if ((fd = open (filename, O_WRONLY | O_CREAT | O_TRUNC, 00600)) == -1) { + msg_info ("cannot save history to %s: %s", filename, strerror (errno)); + return FALSE; + } + + if (write (fd, history->rows, sizeof (history->rows)) == -1) { + close (fd); + msg_info ("cannot write history to %s: %s", filename, strerror (errno)); + return FALSE; + } + + close (fd); + + return TRUE; +} diff --git a/src/libserver/roll_history.h b/src/libserver/roll_history.h new file mode 100644 index 000000000..1dff93a4f --- /dev/null +++ b/src/libserver/roll_history.h @@ -0,0 +1,106 @@ +/* Copyright (c) 2010-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef ROLL_HISTORY_H_ +#define ROLL_HISTORY_H_ + +#include "config.h" +#include "mem_pool.h" + +/* + * Roll history is a special cycled buffer for checked messages, it is designed for writing history messages + * and displaying them in webui + */ + +#define HISTORY_MAX_ID 100 +#define HISTORY_MAX_SYMBOLS 200 +#define HISTORY_MAX_USER 20 +#define HISTORY_MAX_ROWS 200 + +struct rspamd_task; + +struct roll_history_row { + struct timeval tv; + gchar message_id[HISTORY_MAX_ID]; + gchar symbols[HISTORY_MAX_SYMBOLS]; + gchar user[HISTORY_MAX_USER]; +#ifdef HAVE_INET_PTON + struct { + union { + struct in_addr in4; + struct in6_addr in6; + } d; + gboolean ipv6; + gboolean has_addr; + } from_addr; +#else + struct in_addr from_addr; +#endif + gsize len; + guint scan_time; + gint action; + gdouble score; + gdouble required_score; + guint8 completed; +}; + +struct roll_history { + struct roll_history_row rows[HISTORY_MAX_ROWS]; + gint cur_row; + rspamd_mempool_t *pool; + gboolean need_lock; + rspamd_mempool_mutex_t *mtx; +}; + +/** + * Returns new roll history + * @param pool pool for shared memory + * @return new structure + */ +struct roll_history* rspamd_roll_history_new (rspamd_mempool_t *pool); + +/** + * Update roll history with data from task + * @param history roll history object + * @param task task object + */ +void rspamd_roll_history_update (struct roll_history *history, struct rspamd_task *task); + +/** + * Load previously saved history from file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been loaded + */ +gboolean rspamd_roll_history_load (struct roll_history *history, const gchar *filename); + +/** + * Save history to file + * @param history roll history object + * @param filename filename to load from + * @return TRUE if history has been saved + */ +gboolean rspamd_roll_history_save (struct roll_history *history, const gchar *filename); + +#endif /* ROLL_HISTORY_H_ */ diff --git a/src/libserver/settings.c b/src/libserver/settings.c new file mode 100644 index 000000000..c3292c8ab --- /dev/null +++ b/src/libserver/settings.c @@ -0,0 +1,657 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cfg_file.h" +#include "map.h" +#include "main.h" +#include "settings.h" +#include "filter.h" +#include "json/jansson.h" + +struct json_buf { + GHashTable *table; + gchar *buf; + gchar *pos; + size_t buflen; +}; + +static void +settings_actions_free (gpointer data) +{ + GList *cur = data; + + while (cur) { + g_free (cur->data); + cur = g_list_next (cur); + } + + g_list_free ((GList *)data); +} + +static void +settings_free (gpointer data) +{ + struct rspamd_settings *s = data; + + if (s->statfile_alias) { + g_free (s->statfile_alias); + } + if (s->factors) { + g_hash_table_destroy (s->factors); + } + if (s->metric_scores) { + g_hash_table_destroy (s->metric_scores); + } + if (s->reject_scores) { + g_hash_table_destroy (s->reject_scores); + } + if (s->whitelist) { + g_hash_table_destroy (s->whitelist); + } + if (s->blacklist) { + g_hash_table_destroy (s->blacklist); + } + if (s->metric_actions) { + g_hash_table_destroy (s->metric_actions); + } + + g_slice_free1 (sizeof (struct rspamd_settings), s); +} + +static struct rspamd_settings * +settings_ref (struct rspamd_settings *s) +{ + if (s == NULL) { + s = g_slice_alloc (sizeof (struct rspamd_settings)); + s->metric_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->reject_scores = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->metric_actions = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, settings_actions_free); + s->factors = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->whitelist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->blacklist = g_hash_table_new_full (rspamd_str_hash, rspamd_str_equal, g_free, g_free); + s->statfile_alias = NULL; + s->want_spam = FALSE; + s->ref_count = 1; + } + else { + s->ref_count ++; + } + + return s; +} + +static void +settings_unref (struct rspamd_settings *s) +{ + if (s != NULL) { + s->ref_count --; + if (s->ref_count <= 0) { + settings_free (s); + } + } +} + + +gchar * +json_read_cb (rspamd_mempool_t * pool, gchar * chunk, gint len, struct map_cb_data *data) +{ + struct json_buf *jb; + size_t free, off; + + if (data->cur_data == NULL) { + jb = g_malloc (sizeof (struct json_buf)); + jb->table = g_hash_table_ref (((struct json_buf *)data->prev_data)->table); + jb->buf = NULL; + jb->pos = NULL; + data->cur_data = jb; + } + else { + jb = data->cur_data; + } + + if (jb->buf == NULL) { + /* Allocate memory for buffer */ + jb->buflen = len * 2; + jb->buf = g_malloc (jb->buflen); + jb->pos = jb->buf; + } + + off = jb->pos - jb->buf; + free = jb->buflen - off; + + if ((gint)free < len) { + jb->buflen = MAX (jb->buflen * 2, jb->buflen + len * 2); + jb->buf = g_realloc (jb->buf, jb->buflen); + jb->pos = jb->buf + off; + } + + memcpy (jb->pos, chunk, len); + jb->pos += len; + + /* Say not to copy any part of this buffer */ + return NULL; +} + +void +json_fin_cb (rspamd_mempool_t * pool, struct map_cb_data *data) +{ + struct json_buf *jb; + gint nelts, i, n, j; + json_t *js, *cur_elt, *cur_nm, *it_val, *act_it, *act_value; + json_error_t je; + struct metric_action *new_act; + struct rspamd_settings *cur_settings; + GList *cur_act; + gchar *cur_name; + void *json_it; + double *score; + + if (data->prev_data) { + jb = data->prev_data; + /* Clean prev data */ + if (jb->table) { + g_hash_table_unref (jb->table); + } + if (jb->buf) { + g_free (jb->buf); + } + g_free (jb); + } + + /* Now parse json */ + if (data->cur_data) { + jb = data->cur_data; + } + else { + msg_err ("no data read"); + return; + } + if (jb->buf == NULL) { + msg_err ("no data read"); + return; + } + /* NULL terminate current buf */ + *jb->pos = '\0'; + + js = json_loads (jb->buf, &je); + if (!js) { + msg_err ("cannot load json data: parse error %s, on line %d", je.text, je.line); + return; + } + + if (!json_is_array (js)) { + json_decref (js); + msg_err ("loaded json is not an array"); + return; + } + + nelts = json_array_size (js); + for (i = 0; i < nelts; i++) { + cur_settings = settings_ref (NULL); + + cur_elt = json_array_get (js, i); + if (!cur_elt || !json_is_object (cur_elt)) { + json_decref (js); + msg_err ("loaded json is not an object"); + settings_unref (cur_settings); + return; + } + cur_nm = json_object_get (cur_elt, "name"); + if (cur_nm == NULL || !json_is_string (cur_nm)) { + json_decref (js); + msg_err ("name is not a string or not exists"); + settings_unref (cur_settings); + return; + } + cur_name = g_strdup (json_string_value (cur_nm)); + /* Now check other settings */ + /* Statfile */ + cur_nm = json_object_get (cur_elt, "statfile"); + if (cur_nm != NULL && json_is_string (cur_nm)) { + cur_settings->statfile_alias = g_strdup (json_string_value (cur_nm)); + } + /* Factors object */ + cur_nm = json_object_get (cur_elt, "factors"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_string (it_val)) { + g_hash_table_insert (cur_settings->factors, g_strdup (json_object_iter_key (json_it)), g_strdup (json_string_value (it_val))); + } + json_it = json_object_iter_next (cur_nm, json_it); + } + } + /* Metrics object */ + cur_nm = json_object_get (cur_elt, "metrics"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_number (it_val)) { + score = g_malloc (sizeof (double)); + *score = json_number_value (it_val); + g_hash_table_insert (cur_settings->metric_scores, + g_strdup (json_object_iter_key (json_it)), score); + } + else if (it_val && json_is_object (it_val)) { + /* Assume this as actions hash */ + cur_act = NULL; + act_it = json_object_iter (it_val); + while (act_it) { + act_value = json_object_iter_value (act_it); + + if (act_value && json_is_number (act_value)) { + /* Special cases */ + if (g_ascii_strcasecmp (json_object_iter_key (act_it), "spam_score") == 0) { + score = g_malloc (sizeof (double)); + *score = json_number_value (act_value); + g_hash_table_insert (cur_settings->metric_scores, + g_strdup (json_object_iter_key (json_it)), score); + } + else if (g_ascii_strcasecmp (json_object_iter_key (act_it), "reject_score") == 0) { + score = g_malloc (sizeof (double)); + *score = json_number_value (act_value); + g_hash_table_insert (cur_settings->reject_scores, + g_strdup (json_object_iter_key (json_it)), score); + } + else if (check_action_str (json_object_iter_key (act_it), &j)) { + new_act = g_malloc (sizeof (struct metric_action)); + new_act->action = j; + new_act->score = json_number_value (act_value); + cur_act = g_list_prepend (cur_act, new_act); + } + } + act_it = json_object_iter_next (it_val, act_it); + } + if (cur_act != NULL) { + g_hash_table_insert (cur_settings->metric_actions, + g_strdup (json_object_iter_key (json_it)), cur_act); + cur_act = NULL; + } + } + json_it = json_object_iter_next (cur_nm, json_it); + } + } + /* Rejects object */ + cur_nm = json_object_get (cur_elt, "rejects"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_number (it_val)) { + score = g_malloc (sizeof (double)); + *score = json_number_value (it_val); + g_hash_table_insert (cur_settings->reject_scores, g_strdup (json_object_iter_key (json_it)), + score); + } + json_it = json_object_iter_next(cur_nm, json_it); + } + } + /* Whitelist object */ + cur_nm = json_object_get (cur_elt, "whitelist"); + if (cur_nm != NULL && json_is_array (cur_nm)) { + n = json_array_size(cur_nm); + for(j = 0; j < n; j++) { + it_val = json_array_get(cur_nm, j); + if (it_val && json_is_string (it_val)) { + if (strlen (json_string_value (it_val)) > 0) { + g_hash_table_insert (cur_settings->whitelist, + g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val))); + } + } + + } + } + /* Blacklist object */ + cur_nm = json_object_get (cur_elt, "blacklist"); + if (cur_nm != NULL && json_is_array (cur_nm)) { + n = json_array_size(cur_nm); + for(j = 0; j < n; j++) { + it_val = json_array_get(cur_nm, j); + if (it_val && json_is_string (it_val)) { + if (strlen (json_string_value (it_val)) > 0) { + g_hash_table_insert (cur_settings->blacklist, + g_strdup (json_string_value (it_val)), g_strdup (json_string_value (it_val))); + } + } + + } + } + /* Want spam */ + cur_nm = json_object_get (cur_elt, "want_spam"); + if (cur_nm != NULL) { + if (json_is_true (cur_nm)) { + cur_settings->want_spam = TRUE; + } + } + g_hash_table_replace (((struct json_buf *)data->cur_data)->table, cur_name, cur_settings); + } + json_decref (js); +} + +gboolean +read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable * table) +{ + struct json_buf *jb = g_malloc (sizeof (struct json_buf)), **pjb; + + pjb = g_malloc (sizeof (struct json_buf *)); + + jb->table = table; + jb->buf = NULL; + *pjb = jb; + + if (!add_map (cfg, path, description, json_read_cb, json_fin_cb, (void **)pjb)) { + msg_err ("cannot add map %s", path); + return FALSE; + } + + return TRUE; +} + +void +init_settings (struct config_file *cfg) +{ + cfg->domain_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, + g_free, (GDestroyNotify)settings_unref); + cfg->user_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, + g_free, (GDestroyNotify)settings_unref); +} + +static gboolean +check_setting (struct rspamd_task *task, struct rspamd_settings **user_settings, struct rspamd_settings **domain_settings) +{ + gchar *field = NULL, *domain = NULL; + gchar cmp_buf[1024]; + gint len; + + if (task->deliver_to != NULL) { + /* First try to use deliver-to field */ + field = task->deliver_to; + } + else if (task->user != NULL) { + /* Then user field */ + field = task->user; + } + else if (task->rcpt != NULL) { + /* Then first recipient */ + field = task->rcpt->data; + } + else { + return FALSE; + } + + domain = strchr (field, '@'); + if (domain == NULL) { + /* First try to search in first recipient */ + if (task->rcpt) { + domain = strchr (task->rcpt->data, '@'); + } + } + if (domain != NULL) { + domain++; + } + + /* First try to search per-user settings */ + if (field != NULL) { + if (*field == '<') { + field ++; + } + len = strcspn (field, ">"); + rspamd_strlcpy (cmp_buf, field, MIN ((gint)sizeof (cmp_buf), len + 1)); + *user_settings = g_hash_table_lookup (task->cfg->user_settings, cmp_buf); + } + if (domain != NULL) { + len = strcspn (domain, ">"); + rspamd_strlcpy (cmp_buf, domain, MIN ((gint)sizeof (cmp_buf), len + 1)); + *domain_settings = g_hash_table_lookup (task->cfg->domain_settings, cmp_buf); + } + + if (*domain_settings != NULL || *user_settings != NULL) { + return TRUE; + } + + return FALSE; +} + +static gboolean +check_bwhitelist (struct rspamd_task *task, struct rspamd_settings *s, gboolean *is_black) +{ + gchar *src_email = NULL, *src_domain = NULL, *data; + + if (task->from != NULL && *task->from != '\0') { + src_email = task->from; + } else { + return FALSE; + } + + src_domain = strchr (src_email, '@'); + if(src_domain != NULL) { + src_domain++; + } + + if ((((data = g_hash_table_lookup (s->blacklist, src_email)) != NULL) || + ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->blacklist, src_domain)) != NULL)) )) { + *is_black = TRUE; + msg_info ("<%s> blacklisted as domain %s is in settings blacklist", task->message_id, data); + return TRUE; + } + if ((((data = g_hash_table_lookup (s->whitelist, src_email)) != NULL) || + ( (src_domain != NULL) && ((data = g_hash_table_lookup (s->whitelist, src_domain)) != NULL)) )) { + *is_black = FALSE; + msg_info ("<%s> whitelisted as domain %s is in settings blacklist", task->message_id, data); + return TRUE; + } + return FALSE; +} + +gboolean +check_metric_settings (struct metric_result *res, double *score, double *rscore) +{ + struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings; + double *sc, *rs; + struct metric *metric = res->metric; + + /* XXX: what the fuck is that? */ + *rscore = 10.0; + + if (us != NULL) { + if ((rs = g_hash_table_lookup (us->reject_scores, metric->name)) != NULL) { + *rscore = *rs; + } + if ((sc = g_hash_table_lookup (us->metric_scores, metric->name)) != NULL) { + *score = *sc; + return TRUE; + } + /* Now check in domain settings */ + if (ds && ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL)) { + *rscore = *rs; + } + if (ds && (sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) { + *score = *sc; + return TRUE; + } + } + else if (ds != NULL) { + if ((rs = g_hash_table_lookup (ds->reject_scores, metric->name)) != NULL) { + *rscore = *rs; + } + if ((sc = g_hash_table_lookup (ds->metric_scores, metric->name)) != NULL) { + *score = *sc; + return TRUE; + } + } + + return FALSE; +} + +gboolean +check_metric_action_settings (struct rspamd_task *task, struct metric_result *res, + double score, enum rspamd_metric_action *result) +{ + struct rspamd_settings *us = res->user_settings, *ds = res->domain_settings; + struct metric_action *act, *sel = NULL; + GList *cur; + enum rspamd_metric_action r = METRIC_ACTION_NOACTION; + gboolean black; + + if (us != NULL) { + /* Check whitelist and set appropriate action for whitelisted users */ + if (check_bwhitelist(task, us, &black)) { + if (black) { + *result = METRIC_ACTION_REJECT; + } + else { + *result = METRIC_ACTION_NOACTION; + } + return TRUE; + } + if ((cur = g_hash_table_lookup (us->metric_actions, res->metric->name)) != NULL) { + while (cur) { + act = cur->data; + if (score >= act->score) { + r = act->action; + sel = act; + } + cur = g_list_next (cur); + } + } + } + else if (ds != NULL) { + /* Check whitelist and set appropriate action for whitelisted users */ + if (check_bwhitelist(task, ds, &black)) { + if (black) { + *result = METRIC_ACTION_REJECT; + } + else { + *result = METRIC_ACTION_NOACTION; + } + return TRUE; + } + if ((cur = g_hash_table_lookup (ds->metric_actions, res->metric->name)) != NULL) { + while (cur) { + act = cur->data; + if (score >= act->score) { + r = act->action; + sel = act; + } + cur = g_list_next (cur); + } + } + } + + if (sel != NULL && result != NULL) { + *result = r; + return TRUE; + } + + return FALSE; +} + +gboolean +apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res) +{ + struct rspamd_settings *us = NULL, *ds = NULL; + + if (check_setting (task, &us, &ds)) { + if (us != NULL || ds != NULL) { + if (us != NULL) { + res->user_settings = settings_ref (us); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref, + us); + } + if (ds != NULL) { + /* Need to ref hash table to avoid occasional data corruption */ + res->domain_settings = settings_ref (ds); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)settings_unref, + ds); + } + } + else { + return FALSE; + } + } + + return TRUE; +} + +gboolean +check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor) +{ + double *fc; + + if (res->user_settings != NULL) { + /* First search in user's settings */ + if ((fc = g_hash_table_lookup (res->user_settings->factors, symbol)) != NULL) { + *factor = *fc; + return TRUE; + } + /* Now check in domain settings */ + if (res->domain_settings && (fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) { + *factor = *fc; + return TRUE; + } + } + else if (res->domain_settings != NULL) { + if ((fc = g_hash_table_lookup (res->domain_settings->factors, symbol)) != NULL) { + *factor = *fc; + return TRUE; + } + } + + return FALSE; + +} + + +gboolean +check_want_spam (struct rspamd_task *task) +{ + struct rspamd_settings *us = NULL, *ds = NULL; + + if (check_setting (task, &us, &ds)) { + if (us != NULL) { + /* First search in user's settings */ + if (us->want_spam) { + return TRUE; + } + /* Now check in domain settings */ + if (ds && ds->want_spam) { + return TRUE; + } + } + else if (ds != NULL) { + if (ds->want_spam) { + return TRUE; + } + } + } + + return FALSE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/settings.h b/src/libserver/settings.h new file mode 100644 index 000000000..361700094 --- /dev/null +++ b/src/libserver/settings.h @@ -0,0 +1,55 @@ +#ifndef RSPAMD_SETTINGS_H +#define RSPAMD_SETTINGS_H + +#include "config.h" +#include "main.h" + +struct rspamd_settings { + GHashTable *metric_scores; /**< hash table of metric require scores for this setting */ + GHashTable *reject_scores; /**< hash table of metric reject scores for this setting */ + GHashTable *metric_actions; /**< hash table of metric actions for this setting */ + GHashTable *factors; /**< hash table of new factors for this setting */ + GHashTable *whitelist; /**< hash table of whitelist for this setting */ + GHashTable *blacklist; /**< hash table of whitelist for this setting */ + gchar *statfile_alias; /**< alias for statfile used */ + gboolean want_spam; /**< if true disable rspamd checks */ + gint ref_count; /**< reference counter */ +}; + + +/* + * Read settings from specified path + */ +gboolean read_settings (const gchar *path, const gchar *description, struct config_file *cfg, GHashTable *table); + +/* + * Init configuration structures for settings + */ +void init_settings (struct config_file *cfg); + +/* + * Check scores settings + */ +gboolean check_metric_settings (struct metric_result *res, double *score, double *rscore); + +/* + * Check actions settings + */ +gboolean check_metric_action_settings (struct rspamd_task *task, struct metric_result *res, double score, enum rspamd_metric_action *result); + +/* + * Check individual weights for settings + */ +gboolean check_factor_settings (struct metric_result *res, const gchar *symbol, double *factor); + +/* + * Check want_spam flag + */ +gboolean check_want_spam (struct rspamd_task *task); + +/* + * Search settings for metric and store pointers to settings into metric_result structure + */ +gboolean apply_metric_settings (struct rspamd_task *task, struct metric *metric, struct metric_result *res); + +#endif diff --git a/src/libserver/spf.c b/src/libserver/spf.c new file mode 100644 index 000000000..12f1513d4 --- /dev/null +++ b/src/libserver/spf.c @@ -0,0 +1,1465 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "dns.h" +#include "spf.h" +#include "main.h" +#include "message.h" +#include "filter.h" + +#define SPF_VER1_STR "v=spf1" +#define SPF_VER2_STR "spf2." +#define SPF_SCOPE_PRA "pra" +#define SPF_SCOPE_MFROM "mfrom" +#define SPF_ALL "all" +#define SPF_A "a" +#define SPF_IP4 "ip4" +#define SPF_IP6 "ip6" +#define SPF_PTR "ptr" +#define SPF_MX "mx" +#define SPF_EXISTS "exists" +#define SPF_INCLUDE "include" +#define SPF_REDIRECT "redirect" +#define SPF_EXP "exp" + +/** SPF limits for avoiding abuse **/ +#define SPF_MAX_NESTING 10 +#define SPF_MAX_DNS_REQUESTS 30 + +/** + * State machine for SPF record: + * + * spf_mech ::= +|-|~|? + * + * spf_body ::= spf=v1 <spf_command> [<spf_command>] + * spf_command ::= [spf_mech]all|a|<ip4>|<ip6>|ptr|mx|<exists>|<include>|<redirect> + * + * spf_domain ::= [:domain][/mask] + * spf_ip4 ::= ip[/mask] + * ip4 ::= ip4:<spf_ip4> + * mx ::= mx<spf_domain> + * a ::= a<spf_domain> + * ptr ::= ptr[:domain] + * exists ::= exists:domain + * include ::= include:domain + * redirect ::= redirect:domain + * exp ::= exp:domain + * + */ + +#undef SPF_DEBUG + +struct spf_dns_cb { + struct spf_record *rec; + struct spf_addr *addr; + spf_action_t cur_action; + gboolean in_include; +}; + +#define CHECK_REC(rec) \ +do { \ + if ((rec)->nested > SPF_MAX_NESTING || \ + (rec)->dns_requests > SPF_MAX_DNS_REQUESTS) { \ + msg_info ("<%s> spf recursion limit %d is reached, domain: %s", \ + (rec)->task->message_id, (rec)->dns_requests, \ + (rec)->sender_domain); \ + return FALSE; \ + } \ +} while (0) \ + +static gboolean parse_spf_record (struct rspamd_task *task, struct spf_record *rec); +static void start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl); + +/* Determine spf mech */ +static spf_mech_t +check_spf_mech (const gchar *elt, gboolean *need_shift) +{ + g_assert (elt != NULL); + + *need_shift = TRUE; + + switch (*elt) { + case '-': + return SPF_FAIL; + case '~': + return SPF_SOFT_FAIL; + case '+': + return SPF_PASS; + case '?': + return SPF_NEUTRAL; + default: + *need_shift = FALSE; + return SPF_PASS; + } +} + +/* Debugging function that dumps spf record in log */ +static void +dump_spf_record (GList *addrs) +{ + struct spf_addr *addr; + GList *cur; + gint r = 0; + gchar logbuf[BUFSIZ], c; +#ifdef HAVE_INET_PTON + gchar ipbuf[INET6_ADDRSTRLEN]; +#else + struct in_addr ina; +#endif + + cur = addrs; + + while (cur) { + addr = cur->data; + if (!addr->is_list) { + switch (addr->mech) { + case SPF_FAIL: + c = '-'; + break; + case SPF_SOFT_FAIL: + case SPF_NEUTRAL: + c = '~'; + break; + case SPF_PASS: + c = '+'; + break; + } +#ifdef HAVE_INET_PTON + if (addr->data.normal.ipv6) { + inet_ntop (AF_INET6, &addr->data.normal.d.in6, ipbuf, sizeof (ipbuf)); + + } + else { + inet_ntop (AF_INET, &addr->data.normal.d.in4, ipbuf, sizeof (ipbuf)); + } + r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, ipbuf, addr->data.normal.mask); +#else + ina.s_addr = addr->data.normal.d.in4.s_addr; + r += snprintf (logbuf + r, sizeof (logbuf) - r, "%c%s/%d; ", c, inet_ntoa (ina), addr->data.normal.mask); +#endif + } + else { + r += snprintf (logbuf + r, sizeof (logbuf) - r, "%s; ", addr->spf_string); + dump_spf_record (addr->data.list); + } + cur = g_list_next (cur); + } + msg_info ("spf record: %s", logbuf); +} + +/* Find position of address inside addrs list */ +static GList * +spf_addr_find (GList *addrs, gpointer to_find) +{ + struct spf_addr *addr; + GList *cur, *res = NULL; + + cur = addrs; + while (cur) { + addr = cur->data; + if (addr->is_list) { + if ((res = spf_addr_find (addr->data.list, to_find)) != NULL) { + return cur; + } + } + else { + if (cur->data == to_find) { + return cur; + } + } + cur = g_list_next (cur); + } + + return res; +} + +/* + * Destructor for spf record + */ +static void +spf_record_destructor (gpointer r) +{ + struct spf_record *rec = r; + GList *cur; + struct spf_addr *addr; + + if (rec->addrs) { + cur = rec->addrs; + while (cur) { + addr = cur->data; + if (addr->is_list && addr->data.list != NULL) { + g_list_free (addr->data.list); + } + cur = g_list_next (cur); + } + g_list_free (rec->addrs); + } +} + +static gboolean +parse_spf_ipmask (const gchar *begin, struct spf_addr *addr, struct spf_record *rec) +{ + const gchar *pos; + gchar mask_buf[5] = {'\0'}, *p; + gint state = 0, dots = 0; +#ifdef HAVE_INET_PTON + gchar ip_buf[INET6_ADDRSTRLEN]; +#else + gchar ip_buf[INET_ADDRSTRLEN]; +#endif + + bzero (ip_buf, sizeof (ip_buf)); + bzero (mask_buf, sizeof (mask_buf)); + pos = begin; + p = ip_buf; + + while (*pos) { + switch (state) { + case 0: + /* Require ':' */ + if (*pos != ':') { + msg_info ("<%s>: spf error for domain %s: semicolon missing", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + state = 1; + pos ++; + p = ip_buf; + dots = 0; + break; + case 1: +#ifdef HAVE_INET_PTON + if (p - ip_buf >= (gint)sizeof (ip_buf)) { + return FALSE; + } + if (g_ascii_isxdigit (*pos)) { + *p ++ = *pos ++; + } + else if (*pos == '.' || *pos == ':') { + *p ++ = *pos ++; + dots ++; + } +#else + /* Begin parse ip */ + if (p - ip_buf >= (gint)sizeof (ip_buf) || dots > 3) { + return FALSE; + } + if (g_ascii_isdigit (*pos)) { + *p ++ = *pos ++; + } + else if (*pos == '.') { + *p ++ = *pos ++; + dots ++; + } +#endif + else if (*pos == '/') { + pos ++; + p = mask_buf; + state = 2; + } + else { + /* Invalid character */ + msg_info ("<%s>: spf error for domain %s: invalid ip address", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + break; + case 2: + /* Parse mask */ + if (p - mask_buf >= (gint)sizeof (mask_buf)) { + msg_info ("<%s>: spf error for domain %s: too long mask", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + if (g_ascii_isdigit (*pos)) { + *p ++ = *pos ++; + } + else { + return FALSE; + } + break; + } + } + +#ifdef HAVE_INET_PTON + if (inet_pton (AF_INET, ip_buf, &addr->data.normal.d.in4) != 1) { + if (inet_pton (AF_INET6, ip_buf, &addr->data.normal.d.in6) == 1) { + addr->data.normal.ipv6 = TRUE; + } + else { + msg_info ("<%s>: spf error for domain %s: invalid ip address", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + } + else { + addr->data.normal.ipv6 = FALSE; + } +#else + if (!inet_aton (ip_buf, &addr->data.normal.d.in4)) { + return FALSE; + } +#endif + if (state == 2) { + /* Also parse mask */ + if (!addr->data.normal.ipv6) { + addr->data.normal.mask = strtoul (mask_buf, NULL, 10); + if (addr->data.normal.mask > 32) { + msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'", + rec->task->message_id, rec->sender_domain, begin); + return FALSE; + } + } + else { + addr->data.normal.mask = strtoul (mask_buf, NULL, 10); + if (addr->data.normal.mask > 128) { + msg_info ("<%s>: spf error for domain %s: bad ipmask value: '%s'", + rec->task->message_id, rec->sender_domain, begin); + return FALSE; + } + } + } + else { + addr->data.normal.mask = addr->data.normal.ipv6 ? 128 : 32; + } + addr->data.normal.parsed = TRUE; + return TRUE; + +} + +static gchar * +parse_spf_hostmask (struct rspamd_task *task, const gchar *begin, struct spf_addr *addr, struct spf_record *rec) +{ + gchar *host = NULL, *p, mask_buf[3]; + gint hostlen; + + bzero (mask_buf, sizeof (mask_buf)); + if (*begin == '\0' || *begin == '/') { + /* Assume host as host to resolve from record */ + host = rec->cur_domain; + } + p = strchr (begin, '/'); + if (p != NULL) { + /* Extract mask */ + rspamd_strlcpy (mask_buf, p + 1, sizeof (mask_buf)); + addr->data.normal.mask = strtoul (mask_buf, NULL, 10); + if (addr->data.normal.mask > 32) { + msg_info ("<%s>: spf error for domain %s: too long mask", + rec->task->message_id, rec->sender_domain); + return FALSE; + } + if (host == NULL) { + hostlen = p - begin; + host = rspamd_mempool_alloc (task->task_pool, hostlen); + rspamd_strlcpy (host, begin, hostlen); + } + } + else { + addr->data.normal.mask = 32; + if (host == NULL) { + host = rspamd_mempool_strdup (task->task_pool, begin); + } + } + + return host; +} + +static void +spf_record_dns_callback (struct rdns_reply *reply, gpointer arg) +{ + struct spf_dns_cb *cb = arg; + gchar *begin; + struct rdns_reply_entry *elt_data; + GList *tmp = NULL; + struct rspamd_task *task; + struct spf_addr *new_addr; + + task = cb->rec->task; + + cb->rec->requests_inflight --; + + if (reply->code == RDNS_RC_NOERROR) { + /* Add all logic for all DNS states here */ + LL_FOREACH (reply->entries, elt_data) { + switch (cb->cur_action) { + case SPF_RESOLVE_MX: + if (elt_data->type == RDNS_REQUEST_MX) { + /* Now resolve A record for this MX */ + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, elt_data->content.mx.name)) { + task->dns_requests ++; + cb->rec->requests_inflight ++; + } + } + else if (elt_data->type == RDNS_REQUEST_A) { + if (!cb->addr->data.normal.parsed) { + cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + } + else { + /* Insert one more address */ + tmp = spf_addr_find (cb->rec->addrs, cb->addr); + if (tmp) { + new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); + memcpy (new_addr, cb->addr, sizeof (struct spf_addr)); + new_addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + new_addr->data.normal.parsed = TRUE; + cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr); + } + else { + msg_info ("<%s>: spf error for domain %s: addresses mismatch", + task->message_id, cb->rec->sender_domain); + } + } + + } +#ifdef HAVE_INET_PTON + else if (elt_data->type == RDNS_REQUEST_AAAA) { + if (!cb->addr->data.normal.parsed) { + memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + cb->addr->data.normal.ipv6 = TRUE; + } + else { + /* Insert one more address */ + tmp = spf_addr_find (cb->rec->addrs, cb->addr); + if (tmp) { + new_addr = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); + memcpy (new_addr, cb->addr, sizeof (struct spf_addr)); + memcpy (&new_addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + new_addr->data.normal.parsed = TRUE; + new_addr->data.normal.ipv6 = TRUE; + cb->rec->addrs = g_list_insert_before (cb->rec->addrs, tmp, new_addr); + } + else { + msg_info ("<%s>: spf error for domain %s: addresses mismatch", + task->message_id, cb->rec->sender_domain); + } + } + + } +#endif + break; + case SPF_RESOLVE_A: + if (elt_data->type == RDNS_REQUEST_A) { + /* XXX: process only one record */ + cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + } +#ifdef HAVE_INET_PTON + else if (elt_data->type == RDNS_REQUEST_AAAA) { + memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + cb->addr->data.normal.ipv6 = TRUE; + } +#endif + break; +#ifdef HAVE_INET_PTON + case SPF_RESOLVE_AAA: + if (elt_data->type == RDNS_REQUEST_A) { + /* XXX: process only one record */ + cb->addr->data.normal.d.in4.s_addr = elt_data->content.a.addr.s_addr; + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + } + else if (elt_data->type == RDNS_REQUEST_AAAA) { + memcpy (&cb->addr->data.normal.d.in6, &elt_data->content.aaa.addr, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + cb->addr->data.normal.parsed = TRUE; + cb->addr->data.normal.ipv6 = TRUE; + } +#endif + break; + case SPF_RESOLVE_PTR: + break; + case SPF_RESOLVE_REDIRECT: + if (elt_data->type == RDNS_REQUEST_TXT) { + begin = elt_data->content.txt.data; + + if (!cb->in_include && cb->rec->addrs) { + g_list_free (cb->rec->addrs); + cb->rec->addrs = NULL; + } + start_spf_parse (cb->rec, begin, elt_data->ttl); + + } + break; + case SPF_RESOLVE_INCLUDE: + if (elt_data->type == RDNS_REQUEST_TXT) { + begin = elt_data->content.txt.data; +#ifdef SPF_DEBUG + msg_info ("before include"); + dump_spf_record (cb->rec->addrs); +#endif + tmp = cb->rec->addrs; + cb->rec->addrs = NULL; + cb->rec->in_include = TRUE; + start_spf_parse (cb->rec, begin, 0); + cb->rec->in_include = FALSE; + +#ifdef SPF_DEBUG + msg_info ("after include"); + dump_spf_record (cb->rec->addrs); +#endif + /* Insert new list */ + cb->addr->is_list = TRUE; + cb->addr->data.list = cb->rec->addrs; + cb->rec->addrs = tmp; + } + break; + case SPF_RESOLVE_EXP: + break; + case SPF_RESOLVE_EXISTS: + if (elt_data->type == RDNS_REQUEST_A) { + /* If specified address resolves, we can accept connection from every IP */ + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 0; + } + break; + } + } + } + else if (reply->code == RDNS_RC_NXDOMAIN) { + switch (cb->cur_action) { + case SPF_RESOLVE_MX: + if (rdns_request_has_type (reply->request, RDNS_REQUEST_MX)) { + msg_info ("<%s>: spf error for domain %s: cannot find MX record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + } + else { + msg_info ("<%s>: spf error for domain %s: cannot resolve MX record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + } + break; + case SPF_RESOLVE_A: + if (rdns_request_has_type (reply->request, RDNS_REQUEST_A)) { + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + } + break; +#ifdef HAVE_INET_PTON + case SPF_RESOLVE_AAA: + if (rdns_request_has_type (reply->request, RDNS_REQUEST_AAAA)) { + memset (&cb->addr->data.normal.d.in6, 0xff, sizeof (struct in6_addr)); + cb->addr->data.normal.mask = 32; + } + break; +#endif + case SPF_RESOLVE_PTR: + break; + case SPF_RESOLVE_REDIRECT: + msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + break; + case SPF_RESOLVE_INCLUDE: + msg_info ("<%s>: spf error for domain %s: cannot resolve TXT record for %s", + task->message_id, cb->rec->sender_domain, cb->rec->cur_domain); + break; + case SPF_RESOLVE_EXP: + break; + case SPF_RESOLVE_EXISTS: + cb->addr->data.normal.d.in4.s_addr = INADDR_NONE; + cb->addr->data.normal.mask = 32; + break; + } + } + + if (cb->rec->requests_inflight == 0) { + cb->rec->callback (cb->rec, cb->rec->task); + } +} + +static gboolean +parse_spf_a (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *host; + + CHECK_REC (rec); + + if (begin == NULL || *begin != ':') { + return FALSE; + } + begin ++; + + host = parse_spf_hostmask (task, begin, addr, rec); + + if (!host) { + return FALSE; + } + rec->dns_requests ++; + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_A; + cb->in_include = rec->in_include; + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) { + task->dns_requests ++; + rec->requests_inflight ++; + return TRUE; + } + + return FALSE; + +} + +static gboolean +parse_spf_ptr (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + CHECK_REC (rec); + + msg_info ("<%s>: spf error for domain %s: ptr elements are not implemented", + rec->task->message_id, rec->sender_domain); + return FALSE; +} + +static gboolean +parse_spf_mx (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *host; + + CHECK_REC (rec); + + if (begin == NULL) { + return FALSE; + } + if (*begin == ':') { + begin ++; + } + + host = parse_spf_hostmask (task, begin, addr, rec); + + if (!host) { + return FALSE; + } + rec->dns_requests ++; + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + memset (&addr->data.normal, 0, sizeof (addr->data.normal)); + cb->cur_action = SPF_RESOLVE_MX; + cb->in_include = rec->in_include; + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_MX, host)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + return FALSE; +} + +static gboolean +parse_spf_all (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + /* All is 0/0 */ + memset (&addr->data.normal.d, 0, sizeof (addr->data.normal.d)); + if (rec->in_include) { + /* Ignore all record in include */ + addr->data.normal.mask = 32; + } + else { + addr->data.normal.mask = 0; + addr->data.normal.addr_any = TRUE; + } + + return TRUE; +} + +static gboolean +parse_spf_ip4 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + /* ip4:addr[/mask] */ + + CHECK_REC (rec); + return parse_spf_ipmask (begin, addr, rec); +} + +#ifdef HAVE_INET_PTON +static gboolean +parse_spf_ip6 (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + /* ip6:addr[/mask] */ + + CHECK_REC (rec); + return parse_spf_ipmask (begin, addr, rec); +} +#endif + +static gboolean +parse_spf_include (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *domain; + + CHECK_REC (rec); + + if (begin == NULL || *begin != ':') { + return FALSE; + } + begin ++; + rec->dns_requests ++; + + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_INCLUDE; + cb->in_include = rec->in_include; + addr->is_list = TRUE; + addr->data.list = NULL; + domain = rspamd_mempool_strdup (task->task_pool, begin); + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + + return FALSE; +} + +static gboolean +parse_spf_exp (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + CHECK_REC (rec); + + msg_info ("exp record is ignored"); + return TRUE; +} + +static gboolean +parse_spf_redirect (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *domain; + + CHECK_REC (rec); + + if (begin == NULL || *begin != '=') { + return FALSE; + } + begin ++; + rec->dns_requests ++; + + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_REDIRECT; + cb->in_include = rec->in_include; + domain = rspamd_mempool_strdup (task->task_pool, begin); + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_TXT, domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + return FALSE; +} + +static gboolean +parse_spf_exists (struct rspamd_task *task, const gchar *begin, struct spf_record *rec, struct spf_addr *addr) +{ + struct spf_dns_cb *cb; + gchar *host; + + CHECK_REC (rec); + + if (begin == NULL || *begin != ':') { + return FALSE; + } + begin ++; + rec->dns_requests ++; + + addr->data.normal.mask = 32; + cb = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_dns_cb)); + cb->rec = rec; + cb->addr = addr; + cb->cur_action = SPF_RESOLVE_EXISTS; + cb->in_include = rec->in_include; + host = rspamd_mempool_strdup (task->task_pool, begin); + + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_record_dns_callback, (void *)cb, RDNS_REQUEST_A, host)) { + task->dns_requests ++; + rec->requests_inflight ++; + + return TRUE; + } + + return FALSE; +} + +static void +reverse_spf_ip (gchar *ip, gint len) +{ + gchar ipbuf[sizeof("255.255.255.255") - 1], *p, *c; + gint t = 0, l = len; + + if (len > (gint)sizeof (ipbuf)) { + msg_info ("cannot reverse string of length %d", len); + return; + } + + p = ipbuf + len; + c = ip; + while (-- l) { + if (*c == '.') { + memcpy (p, c - t, t); + *--p = '.'; + c ++; + t = 0; + continue; + } + + t ++; + c ++; + p --; + } + + memcpy (p - 1, c - t, t + 1); + + memcpy (ip, ipbuf, len); +} + +static gchar * +expand_spf_macro (struct rspamd_task *task, struct spf_record *rec, gchar *begin) +{ + gchar *p, *c, *new, *tmp; + gint len = 0, slen = 0, state = 0; +#ifdef HAVE_INET_PTON + gchar ip_buf[INET6_ADDRSTRLEN]; +#endif + gboolean need_expand = FALSE; + + p = begin; + /* Calculate length */ + while (*p) { + switch (state) { + case 0: + /* Skip any character and wait for % in input */ + if (*p == '%') { + state = 1; + } + else { + len ++; + } + + slen ++; + p ++; + break; + case 1: + /* We got % sign, so we should whether wait for { or for - or for _ or for % */ + if (*p == '%' || *p == '-') { + /* Just a single % sign or space */ + len ++; + } + else if (*p == '_') { + /* %20 */ + len += sizeof ("%20") - 1; + } + else if (*p == '{') { + state = 2; + } + else { + /* Something unknown */ + msg_info ("<%s>: spf error for domain %s: unknown spf element", + task->message_id, rec->sender_domain); + return begin; + } + p ++; + slen ++; + break; + case 2: + /* Read macro name */ + switch (g_ascii_tolower (*p)) { + case 'i': +#ifdef HAVE_INET_PTON + len += sizeof (INET6_ADDRSTRLEN) - 1; +#else + len += sizeof (INET_ADDRSTRLEN) - 1; +#endif + break; + case 's': + len += strlen (rec->sender); + break; + case 'l': + len += strlen (rec->local_part); + break; + case 'o': + len += strlen (rec->sender_domain); + break; + case 'd': + len += strlen (rec->cur_domain); + break; + case 'v': + len += sizeof ("in-addr") - 1; + break; + case 'h': + if (task->helo) { + len += strlen (task->helo); + } + break; + default: + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + slen ++; + state = 3; + break; + case 3: + /* Read modifier */ + if (*p == '}') { + state = 0; + need_expand = TRUE; + } + else if (*p != 'r' && !g_ascii_isdigit (*p)) { + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf modifier %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + slen ++; + break; + } + } + + if (!need_expand) { + /* No expansion needed */ + return begin; + } + + new = rspamd_mempool_alloc (task->task_pool, len + 1); + + c = new; + p = begin; + state = 0; + /* Begin macro expansion */ + + while (*p) { + switch (state) { + case 0: + /* Skip any character and wait for % in input */ + if (*p == '%') { + state = 1; + } + else { + *c = *p; + c ++; + } + + p ++; + break; + case 1: + /* We got % sign, so we should whether wait for { or for - or for _ or for % */ + if (*p == '%') { + /* Just a single % sign or space */ + *c++ = '%'; + } + else if (*p == '-') { + *c++ = ' '; + } + else if (*p == '_') { + /* %20 */ + *c++ = '%'; + *c++ = '2'; + *c++ = '0'; + } + else if (*p == '{') { + state = 2; + } + else { + /* Something unknown */ + msg_info ("<%s>: spf error for domain %s: unknown spf element", + task->message_id, rec->sender_domain); + return begin; + } + p ++; + break; + case 2: + /* Read macro name */ + switch (g_ascii_tolower (*p)) { + case 'i': +#ifdef HAVE_INET_PTON + len = rspamd_strlcpy (ip_buf, + rspamd_inet_address_to_string (&task->from_addr), + sizeof (ip_buf)); + memcpy (c, ip_buf, len); +#else + tmp = inet_ntoa (task->from_addr); + len = strlen (tmp); + memcpy (c, tmp, len); +#endif + c += len; + break; + case 's': + len = strlen (rec->sender); + memcpy (c, rec->sender, len); + c += len; + break; + case 'l': + len = strlen (rec->local_part); + memcpy (c, rec->local_part, len); + c += len; + break; + case 'o': + len = strlen (rec->sender_domain); + memcpy (c, rec->sender_domain, len); + c += len; + break; + case 'd': + len = strlen (rec->cur_domain); + memcpy (c, rec->cur_domain, len); + c += len; + break; + case 'v': + len = sizeof ("in-addr") - 1; + memcpy (c, "in-addr", len); + c += len; + break; + case 'h': + if (task->helo) { + tmp = strchr (task->helo, '@'); + if (tmp) { + len = strlen (tmp + 1); + memcpy (c, tmp + 1, len); + c += len; + } + } + break; + default: + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + state = 3; + break; + case 3: + /* Read modifier */ + if (*p == '}') { + state = 0; + } + else if (*p == 'r' && len != 0) { + reverse_spf_ip (c - len, len); + len = 0; + } + else if (g_ascii_isdigit (*p)) { + /*XXX: try to implement domain strimming */ + } + else { + msg_info ("<%s>: spf error for domain %s: unknown or unsupported spf macro %c in %s", + task->message_id, rec->sender_domain, *p, begin); + return begin; + } + p ++; + break; + } + } + /* Null terminate */ + *c = '\0'; + + return new; + +} + +#define NEW_ADDR(x) do { \ + (x) = rspamd_mempool_alloc (task->task_pool, sizeof (struct spf_addr)); \ + (x)->mech = check_spf_mech (rec->cur_elt, &need_shift); \ + (x)->spf_string = rspamd_mempool_strdup (task->task_pool, begin); \ + memset (&(x)->data.normal, 0, sizeof ((x)->data.normal)); \ + (x)->data.normal.mask = 32; \ + (x)->is_list = FALSE; \ +} while (0); + +/* Read current element and try to parse record */ +static gboolean +parse_spf_record (struct rspamd_task *task, struct spf_record *rec) +{ + struct spf_addr *new = NULL; + gboolean need_shift, res = FALSE; + gchar *begin; + + rec->cur_elt = rec->elts[rec->elt_num]; + if (rec->cur_elt == NULL) { + return FALSE; + } + else if (*rec->cur_elt == '\0') { + /* Silently skip empty elements */ + rec->elt_num ++; + return TRUE; + } + else { + begin = expand_spf_macro (task, rec, rec->cur_elt); + if (*begin == '?' || *begin == '+' || *begin == '-' || *begin == '~') { + begin ++; + } + + + /* Now check what we have */ + switch (g_ascii_tolower (*begin)) { + case 'a': + /* all or a */ + if (g_ascii_strncasecmp (begin, SPF_ALL, sizeof (SPF_ALL) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_ALL) - 1; + res = parse_spf_all (task, begin, rec, new); + } + else if (g_ascii_strncasecmp (begin, SPF_A, sizeof (SPF_A) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_A) - 1; + res = parse_spf_a (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'i': + /* include or ip4 */ + if (g_ascii_strncasecmp (begin, SPF_IP4, sizeof (SPF_IP4) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_IP4) - 1; + res = parse_spf_ip4 (task, begin, rec, new); + } + else if (g_ascii_strncasecmp (begin, SPF_INCLUDE, sizeof (SPF_INCLUDE) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_INCLUDE) - 1; + res = parse_spf_include (task, begin, rec, new); + } + else if (g_ascii_strncasecmp (begin, SPF_IP6, sizeof (SPF_IP6) - 1) == 0) { +#ifdef HAVE_INET_PTON + NEW_ADDR (new); + begin += sizeof (SPF_IP6) - 1; + res = parse_spf_ip6 (task, begin, rec, new); +#else + msg_info ("ignoring ip6 spf command as IPv6 is not supported: %s", begin); + new = NULL; + res = TRUE; + begin += sizeof (SPF_IP6) - 1; +#endif + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'm': + /* mx */ + if (g_ascii_strncasecmp (begin, SPF_MX, sizeof (SPF_MX) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_MX) - 1; + res = parse_spf_mx (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'p': + /* ptr */ + if (g_ascii_strncasecmp (begin, SPF_PTR, sizeof (SPF_PTR) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_PTR) - 1; + res = parse_spf_ptr (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'e': + /* exp or exists */ + if (g_ascii_strncasecmp (begin, SPF_EXP, sizeof (SPF_EXP) - 1) == 0) { + begin += sizeof (SPF_EXP) - 1; + res = parse_spf_exp (task, begin, rec, NULL); + } + else if (g_ascii_strncasecmp (begin, SPF_EXISTS, sizeof (SPF_EXISTS) - 1) == 0) { + NEW_ADDR (new); + begin += sizeof (SPF_EXISTS) - 1; + res = parse_spf_exists (task, begin, rec, new); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'r': + /* redirect */ + if (g_ascii_strncasecmp (begin, SPF_REDIRECT, sizeof (SPF_REDIRECT) - 1) == 0) { + begin += sizeof (SPF_REDIRECT) - 1; + res = parse_spf_redirect (task, begin, rec, NULL); + } + else { + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + } + break; + case 'v': + if (g_ascii_strncasecmp (begin, "v=spf", sizeof ("v=spf") - 1) == 0) { + /* Skip this element till the end of record */ + while (*begin && !g_ascii_isspace (*begin)) { + begin ++; + } + } + break; + default: + msg_info ("<%s>: spf error for domain %s: bad spf command %s", + task->message_id, rec->sender_domain, begin); + break; + } + if (res) { + if (new != NULL) { + rec->addrs = g_list_prepend (rec->addrs, new); + } + rec->elt_num ++; + } + } + + return res; +} +#undef NEW_ADDR + +static void +parse_spf_scopes (struct spf_record *rec, gchar **begin) +{ + for (;;) { + if (g_ascii_strncasecmp (*begin, SPF_SCOPE_PRA, sizeof (SPF_SCOPE_PRA) - 1) == 0) { + *begin += sizeof (SPF_SCOPE_PRA) - 1; + /* XXX: Implement actual PRA check */ + /* extract_pra_info (rec); */ + continue; + } + else if (g_ascii_strncasecmp (*begin, SPF_SCOPE_MFROM, sizeof (SPF_SCOPE_MFROM) - 1) == 0) { + /* mfrom is standart spf1 check */ + *begin += sizeof (SPF_SCOPE_MFROM) - 1; + continue; + } + else if (**begin != ',') { + break; + } + (*begin) ++; + } +} + +static void +start_spf_parse (struct spf_record *rec, gchar *begin, guint ttl) +{ + /* Skip spaces */ + while (g_ascii_isspace (*begin)) { + begin ++; + } + + if (g_ascii_strncasecmp (begin, SPF_VER1_STR, sizeof (SPF_VER1_STR) - 1) == 0) { + begin += sizeof (SPF_VER1_STR) - 1; + while (g_ascii_isspace (*begin) && *begin) { + begin ++; + } + rec->elts = g_strsplit_set (begin, " ", 0); + rec->elt_num = 0; + if (rec->elts) { + rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts); + rec->cur_elt = rec->elts[0]; + while (parse_spf_record (rec->task, rec)); + if (ttl != 0) { + rec->ttl = ttl; + } + } + } + else if (g_ascii_strncasecmp (begin, SPF_VER2_STR, sizeof (SPF_VER2_STR) - 1) == 0) { + /* Skip one number of record, so no we are here spf2.0/ */ + begin += sizeof (SPF_VER2_STR); + if (*begin != '/') { + msg_info ("<%s>: spf error for domain %s: sender id is invalid", + rec->task->message_id, rec->sender_domain); + } + else { + begin ++; + parse_spf_scopes (rec, &begin); + } + /* Now common spf record */ + while (g_ascii_isspace (*begin) && *begin) { + begin ++; + } + rec->elts = g_strsplit_set (begin, " ", 0); + rec->elt_num = 0; + if (rec->elts) { + rspamd_mempool_add_destructor (rec->task->task_pool, (rspamd_mempool_destruct_t)g_strfreev, rec->elts); + rec->cur_elt = rec->elts[0]; + while (parse_spf_record (rec->task, rec)); + if (ttl != 0) { + rec->ttl = ttl; + } + } + } + else { + msg_debug ("<%s>: spf error for domain %s: bad spf record version: %*s", + rec->task->message_id, rec->sender_domain, sizeof (SPF_VER1_STR) - 1, begin); + } +} + +static void +spf_dns_callback (struct rdns_reply *reply, gpointer arg) +{ + struct spf_record *rec = arg; + struct rdns_reply_entry *elt; + + rec->requests_inflight --; + if (reply->code == RDNS_RC_NOERROR) { + LL_FOREACH (reply->entries, elt) { + start_spf_parse (rec, elt->content.txt.data, elt->ttl); + } + } + + if (rec->requests_inflight == 0) { + rec->callback (rec, rec->task); + } +} + +gchar * +get_spf_domain (struct rspamd_task *task) +{ + gchar *domain, *res = NULL; + GList *domains; + + if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') { + res = rspamd_mempool_strdup (task->task_pool, domain + 1); + if ((domain = strchr (res, '>')) != NULL) { + *domain = '\0'; + } + } + else { + /* Extract from header */ + domains = message_get_header (task->task_pool, task->message, "From", FALSE); + + if (domains != NULL) { + res = rspamd_mempool_strdup (task->task_pool, domains->data); + + if ((domain = strrchr (res, '@')) == NULL) { + g_list_free (domains); + return NULL; + } + res = rspamd_mempool_strdup (task->task_pool, domain + 1); + g_list_free (domains); + + if ((domain = strchr (res, '>')) != NULL) { + *domain = '\0'; + } + } + } + + return res; +} + +gboolean +resolve_spf (struct rspamd_task *task, spf_cb_t callback) +{ + struct spf_record *rec; + gchar *domain; + GList *domains; + + rec = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct spf_record)); + rec->task = task; + rec->callback = callback; + /* Add destructor */ + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)spf_record_destructor, rec); + + /* Extract from data */ + if (task->from && (domain = strchr (task->from, '@')) != NULL && *domain == '@') { + rec->sender = task->from; + + rec->local_part = rspamd_mempool_strdup (task->task_pool, task->from); + *(rec->local_part + (domain - task->from)) = '\0'; + if (*rec->local_part == '<') { + memmove (rec->local_part, rec->local_part + 1, strlen (rec->local_part)); + } + rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domain + 1); + if ((domain = strchr (rec->cur_domain, '>')) != NULL) { + *domain = '\0'; + } + rec->sender_domain = rec->cur_domain; + + if (make_dns_request (task->resolver, task->s, task->task_pool, spf_dns_callback, + (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + return TRUE; + } + } + else { + /* Extract from header */ + domains = message_get_header (task->task_pool, task->message, "From", FALSE); + + if (domains != NULL) { + rec->cur_domain = rspamd_mempool_strdup (task->task_pool, domains->data); + g_list_free (domains); + + if ((domain = strrchr (rec->cur_domain, '@')) == NULL) { + return FALSE; + } + rec->sender = rspamd_mempool_strdup (task->task_pool, rec->cur_domain); + rec->local_part = rec->cur_domain; + *domain = '\0'; + rec->cur_domain = domain + 1; + + if ((domain = strchr (rec->local_part, '<')) != NULL) { + memmove (rec->local_part, domain + 1, strlen (domain)); + } + + if ((domain = strchr (rec->cur_domain, '>')) != NULL) { + *domain = '\0'; + } + rec->sender_domain = rec->cur_domain; + if (make_dns_request (task->resolver, task->s, task->task_pool, + spf_dns_callback, (void *)rec, RDNS_REQUEST_TXT, rec->cur_domain)) { + task->dns_requests ++; + rec->requests_inflight ++; + return TRUE; + } + } + } + + return FALSE; +} + +/* + * vi:ts=4 + */ diff --git a/src/libserver/spf.h b/src/libserver/spf.h new file mode 100644 index 000000000..94c613e42 --- /dev/null +++ b/src/libserver/spf.h @@ -0,0 +1,84 @@ +#ifndef RSPAMD_SPF_H +#define RSPAMD_SPF_H + +#include "config.h" + +struct rspamd_task; +struct spf_record; + +typedef void (*spf_cb_t)(struct spf_record *record, struct rspamd_task *task); + +typedef enum spf_mech_e { + SPF_FAIL, + SPF_SOFT_FAIL, + SPF_PASS, + SPF_NEUTRAL +} spf_mech_t; + +typedef enum spf_action_e { + SPF_RESOLVE_MX, + SPF_RESOLVE_A, + SPF_RESOLVE_PTR, + SPF_RESOLVE_AAA, + SPF_RESOLVE_REDIRECT, + SPF_RESOLVE_INCLUDE, + SPF_RESOLVE_EXISTS, + SPF_RESOLVE_EXP +} spf_action_t; + +struct spf_addr { + union { + struct { + union { + struct in_addr in4; +#ifdef HAVE_INET_PTON + struct in6_addr in6; +#endif + } d; + guint32 mask; + gboolean ipv6; + gboolean parsed; + gboolean addr_any; + } normal; + GList *list; + } data; + gboolean is_list; + spf_mech_t mech; + gchar *spf_string; +}; + +struct spf_record { + gchar **elts; + + gchar *cur_elt; + gint elt_num; + gint nested; + gint dns_requests; + gint requests_inflight; + + guint ttl; + + GList *addrs; + gchar *cur_domain; + gchar *sender; + gchar *sender_domain; + gchar *local_part; + struct rspamd_task *task; + spf_cb_t callback; + + gboolean in_include; +}; + + +/* + * Resolve spf record for specified task and call a callback after resolution fails/succeed + */ +gboolean resolve_spf (struct rspamd_task *task, spf_cb_t callback); + +/* + * Get a domain for spf for specified task + */ +gchar *get_spf_domain (struct rspamd_task *task); + + +#endif diff --git a/src/libserver/statfile.c b/src/libserver/statfile.c new file mode 100644 index 000000000..4c1cc13fb --- /dev/null +++ b/src/libserver/statfile.c @@ -0,0 +1,927 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "statfile.h" +#include "main.h" + +#define RSPAMD_STATFILE_VERSION {'1', '2'} +#define BACKUP_SUFFIX ".old" + +/* Maximum number of statistics files */ +#define STATFILES_MAX 255 +static void statfile_pool_set_block_common ( + statfile_pool_t * pool, stat_file_t * file, + guint32 h1, guint32 h2, + time_t t, double value, + gboolean from_now); + +static gint +cmpstatfile (const void *a, const void *b) +{ + const stat_file_t *s1 = a, *s2 = b; + + return g_ascii_strcasecmp (s1->filename, s2->filename); +} + +/* Convert statfile version 1.0 to statfile version 1.2, saving backup */ +struct stat_file_header_10 { + u_char magic[3]; /**< magic signature ('r' 's' 'd') */ + u_char version[2]; /**< version of statfile */ + u_char padding[3]; /**< padding */ + guint64 create_time; /**< create time (time_t->guint64) */ +}; + +static gboolean +convert_statfile_10 (stat_file_t * file) +{ + gchar *backup_name; + struct stat st; + struct stat_file_header header = { + .magic = {'r', 's', 'd'}, + .version = RSPAMD_STATFILE_VERSION, + .padding = {0, 0, 0}, + .revision = 0, + .rev_time = 0 + }; + + + /* Format backup name */ + backup_name = g_strdup_printf ("%s.%s", file->filename, BACKUP_SUFFIX); + + msg_info ("convert old statfile %s to version %c.%c, backup in %s", file->filename, + header.version[0], header.version[1], backup_name); + + if (stat (backup_name, &st) != -1) { + msg_info ("replace old %s", backup_name); + unlink (backup_name); + } + + rename (file->filename, backup_name); + g_free (backup_name); + + /* XXX: maybe race condition here */ + unlock_file (file->fd, FALSE); + close (file->fd); + if ((file->fd = open (file->filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + lock_file (file->fd, FALSE); + /* Now make new header and copy it to new file */ + if (write (file->fd, &header, sizeof (header)) == -1) { + msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + /* Now write old map to new file */ + if (write (file->fd, ((u_char *)file->map + sizeof (struct stat_file_header_10)), + file->len - sizeof (struct stat_file_header_10)) == -1) { + msg_info ("cannot write to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + /* Unmap old memory and map new */ + munmap (file->map, file->len); + file->len = file->len + sizeof (struct stat_file_header) - sizeof (struct stat_file_header_10); +#ifdef HAVE_MMAP_NOCORE + if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NOCORE, file->fd, 0)) == MAP_FAILED) { +#else + if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == MAP_FAILED) { +#endif + msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + + return TRUE; +} + +/* Check whether specified file is statistic file and calculate its len in blocks */ +static gint +statfile_pool_check (stat_file_t * file) +{ + struct stat_file *f; + gchar *c; + static gchar valid_version[] = RSPAMD_STATFILE_VERSION; + + + if (!file || !file->map) { + return -1; + } + + if (file->len < sizeof (struct stat_file)) { + msg_info ("file %s is too short to be stat file: %z", file->filename, file->len); + return -1; + } + + f = (struct stat_file *)file->map; + c = f->header.magic; + /* Check magic and version */ + if (*c++ != 'r' || *c++ != 's' || *c++ != 'd') { + msg_info ("file %s is invalid stat file", file->filename); + return -1; + } + /* Now check version and convert old version to new one (that can be used for sync */ + if (*c == 1 && *(c + 1) == 0) { + if (!convert_statfile_10 (file)) { + return -1; + } + f = (struct stat_file *)file->map; + } + else if (memcmp (c, valid_version, sizeof (valid_version)) != 0) { + /* Unknown version */ + msg_info ("file %s has invalid version %c.%c", file->filename, '0' + *c, '0' + *(c + 1)); + return -1; + } + + /* Check first section and set new offset */ + file->cur_section.code = f->section.code; + file->cur_section.length = f->section.length; + if (file->cur_section.length * sizeof (struct stat_file_block) > file->len) { + msg_info ("file %s is truncated: %z, must be %z", file->filename, file->len, file->cur_section.length * sizeof (struct stat_file_block)); + return -1; + } + file->seek_pos = sizeof (struct stat_file) - sizeof (struct stat_file_block); + + return 0; +} + + +statfile_pool_t * +statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock) +{ + statfile_pool_t *new; + + new = rspamd_mempool_alloc0 (pool, sizeof (statfile_pool_t)); + new->pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + new->files = rspamd_mempool_alloc0 (new->pool, STATFILES_MAX * sizeof (stat_file_t)); + new->lock = rspamd_mempool_get_mutex (new->pool); + new->mlock_ok = use_mlock; + + return new; +} + +static stat_file_t * +statfile_pool_reindex (statfile_pool_t * pool, gchar *filename, size_t old_size, size_t size) +{ + gchar *backup; + gint fd; + stat_file_t *new; + u_char *map, *pos; + struct stat_file_block *block; + struct stat_file_header *header; + + if (size < + sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) { + msg_err ("file %s is too small to carry any statistic: %z", filename, size); + return NULL; + } + + /* First of all rename old file */ + rspamd_mempool_lock_mutex (pool->lock); + + backup = g_strconcat (filename, ".old", NULL); + if (rename (filename, backup) == -1) { + msg_err ("cannot rename %s to %s: %s", filename, backup, strerror (errno)); + g_free (backup); + rspamd_mempool_unlock_mutex (pool->lock); + return NULL; + } + + rspamd_mempool_unlock_mutex (pool->lock); + + /* Now create new file with required size */ + if (statfile_pool_create (pool, filename, size) != 0) { + msg_err ("cannot create new file"); + g_free (backup); + return NULL; + } + /* Now open new file and start copying */ + fd = open (backup, O_RDONLY); + new = statfile_pool_open (pool, filename, size, TRUE); + + if (fd == -1 || new == NULL) { + msg_err ("cannot open file: %s", strerror (errno)); + g_free (backup); + return NULL; + } + + /* Now start reading blocks from old statfile */ + if ((map = mmap (NULL, old_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + msg_err ("cannot mmap file: %s", strerror (errno)); + close (fd); + g_free (backup); + return NULL; + } + + pos = map + (sizeof (struct stat_file) - sizeof (struct stat_file_block)); + while (old_size - (pos - map) >= sizeof (struct stat_file_block)) { + block = (struct stat_file_block *)pos; + if (block->hash1 != 0 && block->value != 0) { + statfile_pool_set_block_common (pool, new, block->hash1, block->hash2, 0, block->value, FALSE); + } + pos += sizeof (block); + } + + header = (struct stat_file_header *)map; + statfile_set_revision (new, header->revision, header->rev_time); + + munmap (map, old_size); + close (fd); + unlink (backup); + g_free (backup); + + return new; + +} + +/* + * Pre-load mmaped file into memory + */ +static void +statfile_preload (stat_file_t *file) +{ + guint8 *pos, *end; + volatile guint8 t; + gsize size; + + pos = (guint8 *)file->map; + end = (guint8 *)file->map + file->len; + + if (madvise (pos, end - pos, MADV_SEQUENTIAL) == -1) { + msg_info ("madvise failed: %s", strerror (errno)); + } + else { + /* Load pages of file */ +#ifdef HAVE_GETPAGESIZE + size = getpagesize (); +#else + size = sysconf (_SC_PAGESIZE); +#endif + while (pos < end) { + t = *pos; + (void)t; + pos += size; + } + } +} + +stat_file_t * +statfile_pool_open (statfile_pool_t * pool, gchar *filename, size_t size, gboolean forced) +{ + struct stat st; + stat_file_t *new_file; + + if ((new_file = statfile_pool_is_open (pool, filename)) != NULL) { + return new_file; + } + + if (pool->opened >= STATFILES_MAX - 1) { + msg_err ("reached hard coded limit of statfiles opened: %d", STATFILES_MAX); + return NULL; + } + + if (stat (filename, &st) == -1) { + msg_info ("cannot stat file %s, error %s, %d", filename, strerror (errno), errno); + return NULL; + } + + rspamd_mempool_lock_mutex (pool->lock); + if (!forced && labs (size - st.st_size) > (long)sizeof (struct stat_file) * 2 + && size > sizeof (struct stat_file)) { + rspamd_mempool_unlock_mutex (pool->lock); + msg_warn ("need to reindex statfile old size: %Hz, new size: %Hz", (size_t)st.st_size, size); + return statfile_pool_reindex (pool, filename, st.st_size, size); + } + else if (size < sizeof (struct stat_file)) { + msg_err ("requested to shrink statfile to %Hz but it is too small", size); + } + + new_file = &pool->files[pool->opened++]; + bzero (new_file, sizeof (stat_file_t)); + if ((new_file->fd = open (filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno)); + rspamd_mempool_unlock_mutex (pool->lock); + pool->opened--; + return NULL; + } + + if ((new_file->map = mmap (NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, new_file->fd, 0)) == MAP_FAILED) { + close (new_file->fd); + rspamd_mempool_unlock_mutex (pool->lock); + msg_info ("cannot mmap file %s, error %d, %s", filename, errno, strerror (errno)); + pool->opened--; + return NULL; + + } + + rspamd_strlcpy (new_file->filename, filename, sizeof (new_file->filename)); + new_file->len = st.st_size; + /* Try to lock pages in RAM */ + if (pool->mlock_ok) { + if (mlock (new_file->map, new_file->len) == -1) { + msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno)); + pool->mlock_ok = FALSE; + } + } + /* Acquire lock for this operation */ + lock_file (new_file->fd, FALSE); + if (statfile_pool_check (new_file) == -1) { + pool->opened--; + rspamd_mempool_unlock_mutex (pool->lock); + unlock_file (new_file->fd, FALSE); + munmap (new_file->map, st.st_size); + return NULL; + } + unlock_file (new_file->fd, FALSE); + + new_file->open_time = time (NULL); + new_file->access_time = new_file->open_time; + new_file->lock = rspamd_mempool_get_mutex (pool->pool); + + statfile_preload (new_file); + + rspamd_mempool_unlock_mutex (pool->lock); + + return statfile_pool_is_open (pool, filename); +} + +gint +statfile_pool_close (statfile_pool_t * pool, stat_file_t * file, gboolean keep_sorted) +{ + stat_file_t *pos; + + if ((pos = statfile_pool_is_open (pool, file->filename)) == NULL) { + msg_info ("file %s is not opened", file->filename); + return -1; + } + + rspamd_mempool_lock_mutex (pool->lock); + + if (file->map) { + msg_info ("syncing statfile %s", file->filename); + msync (file->map, file->len, MS_ASYNC); + munmap (file->map, file->len); + } + if (file->fd != -1) { + close (file->fd); + } + /* Move the remain statfiles */ + memmove (pos, ((guint8 *)pos) + sizeof (stat_file_t), + (--pool->opened - (pos - pool->files)) * sizeof (stat_file_t)); + + rspamd_mempool_unlock_mutex (pool->lock); + + return 0; +} + +gint +statfile_pool_create (statfile_pool_t * pool, gchar *filename, size_t size) +{ + struct stat_file_header header = { + .magic = {'r', 's', 'd'}, + .version = RSPAMD_STATFILE_VERSION, + .padding = {0, 0, 0}, + .revision = 0, + .rev_time = 0, + .used_blocks = 0 + }; + struct stat_file_section section = { + .code = STATFILE_SECTION_COMMON, + }; + struct stat_file_block block = { 0, 0, 0 }; + gint fd; + guint buflen = 0, nblocks; + gchar *buf = NULL; + + if (statfile_pool_is_open (pool, filename) != NULL) { + msg_info ("file %s is already opened", filename); + return 0; + } + + if (size < + sizeof (struct stat_file_header) + sizeof (struct stat_file_section) + sizeof (block)) { + msg_err ("file %s is too small to carry any statistic: %z", filename, size); + return -1; + } + + rspamd_mempool_lock_mutex (pool->lock); + nblocks = (size - sizeof (struct stat_file_header) - sizeof (struct stat_file_section)) / sizeof (struct stat_file_block); + header.total_blocks = nblocks; + + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + rspamd_fallocate (fd, 0, sizeof (header) + sizeof (section) + sizeof (block) * nblocks); + + header.create_time = (guint64) time (NULL); + if (write (fd, &header, sizeof (header)) == -1) { + msg_info ("cannot write header to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + section.length = (guint64) nblocks; + if (write (fd, §ion, sizeof (section)) == -1) { + msg_info ("cannot write section header to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + + /* Buffer for write 256 blocks at once */ + if (nblocks > 256) { + buflen = sizeof (block) * 256; + buf = g_malloc0 (buflen); + } + + while (nblocks) { + if (nblocks > 256) { + /* Just write buffer */ + if (write (fd, buf, buflen) == -1) { + msg_info ("cannot write blocks buffer to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + g_free (buf); + return -1; + } + nblocks -= 256; + } + else { + if (write (fd, &block, sizeof (block)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", filename, errno, strerror (errno)); + close (fd); + if (buf) { + g_free (buf); + } + rspamd_mempool_unlock_mutex (pool->lock); + return -1; + } + nblocks --; + } + } + + close (fd); + rspamd_mempool_unlock_mutex (pool->lock); + + if (buf) { + g_free (buf); + } + + return 0; +} + +void +statfile_pool_delete (statfile_pool_t * pool) +{ + gint i; + + for (i = 0; i < pool->opened; i++) { + statfile_pool_close (pool, &pool->files[i], FALSE); + } + rspamd_mempool_delete (pool->pool); +} + +void +statfile_pool_lock_file (statfile_pool_t * pool, stat_file_t * file) +{ + + rspamd_mempool_lock_mutex (file->lock); +} + +void +statfile_pool_unlock_file (statfile_pool_t * pool, stat_file_t * file) +{ + + rspamd_mempool_unlock_mutex (file->lock); +} + +double +statfile_pool_get_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now) +{ + struct stat_file_block *block; + guint i, blocknum; + u_char *c; + + + file->access_time = now; + if (!file->map) { + return 0; + } + + blocknum = h1 % file->cur_section.length; + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + + for (i = 0; i < CHAIN_LENGTH; i++) { + if (i + blocknum >= file->cur_section.length) { + break; + } + if (block->hash1 == h1 && block->hash2 == h2) { + return block->value; + } + c += sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + + return 0; +} + +static void +statfile_pool_set_block_common (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t t, double value, gboolean from_now) +{ + struct stat_file_block *block, *to_expire = NULL; + struct stat_file_header *header; + guint i, blocknum; + u_char *c; + double min = G_MAXDOUBLE; + + if (from_now) { + file->access_time = t; + } + if (!file->map) { + return; + } + + blocknum = h1 % file->cur_section.length; + header = (struct stat_file_header *)file->map; + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + + for (i = 0; i < CHAIN_LENGTH; i++) { + if (i + blocknum >= file->cur_section.length) { + /* Need to expire some block in chain */ + msg_info ("chain %ud is full in statfile %s, starting expire", blocknum, file->filename); + break; + } + /* First try to find block in chain */ + if (block->hash1 == h1 && block->hash2 == h2) { + block->value = value; + return; + } + /* Check whether we have a free block in chain */ + if (block->hash1 == 0 && block->hash2 == 0) { + /* Write new block here */ + msg_debug ("found free block %ud in chain %ud, set h1=%ud, h2=%ud", i, blocknum, h1, h2); + block->hash1 = h1; + block->hash2 = h2; + block->value = value; + header->used_blocks ++; + + return; + } + + /* Expire block with minimum value otherwise */ + if (block->value < min) { + to_expire = block; + min = block->value; + } + c += sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + /* Try expire some block */ + if (to_expire) { + block = to_expire; + } + else { + /* Expire first block in chain */ + c = (u_char *) file->map + file->seek_pos + blocknum * sizeof (struct stat_file_block); + block = (struct stat_file_block *)c; + } + + block->hash1 = h1; + block->hash2 = h2; + block->value = value; +} + +void +statfile_pool_set_block (statfile_pool_t * pool, stat_file_t * file, guint32 h1, guint32 h2, time_t now, double value) +{ + statfile_pool_set_block_common (pool, file, h1, h2, now, value, TRUE); +} + +stat_file_t * +statfile_pool_is_open (statfile_pool_t * pool, gchar *filename) +{ + static stat_file_t f, *ret; + rspamd_strlcpy (f.filename, filename, sizeof (f.filename)); + ret = lfind (&f, pool->files, (size_t *)&pool->opened, sizeof (stat_file_t), cmpstatfile); + return ret; +} + +guint32 +statfile_pool_get_section (statfile_pool_t * pool, stat_file_t * file) +{ + + return file->cur_section.code; +} + +gboolean +statfile_pool_set_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, gboolean from_begin) +{ + struct stat_file_section *sec; + off_t cur_offset; + + + /* Try to find section */ + if (from_begin) { + cur_offset = sizeof (struct stat_file_header); + } + else { + cur_offset = file->seek_pos - sizeof (struct stat_file_section); + } + while (cur_offset < (off_t)file->len) { + sec = (struct stat_file_section *)((gchar *)file->map + cur_offset); + if (sec->code == code) { + file->cur_section.code = code; + file->cur_section.length = sec->length; + file->seek_pos = cur_offset + sizeof (struct stat_file_section); + return TRUE; + } + cur_offset += sec->length; + } + + return FALSE; +} + +gboolean +statfile_pool_add_section (statfile_pool_t * pool, stat_file_t * file, guint32 code, guint64 length) +{ + struct stat_file_section sect; + struct stat_file_block block = { 0, 0, 0 }; + + if (lseek (file->fd, 0, SEEK_END) == -1) { + msg_info ("cannot lseek file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + + sect.code = code; + sect.length = length; + + if (write (file->fd, §, sizeof (sect)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + + while (length--) { + if (write (file->fd, &block, sizeof (block)) == -1) { + msg_info ("cannot write block to file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + } + + /* Lock statfile to remap memory */ + statfile_pool_lock_file (pool, file); + munmap (file->map, file->len); + fsync (file->fd); + file->len += length; + + if ((file->map = mmap (NULL, file->len, PROT_READ | PROT_WRITE, MAP_SHARED, file->fd, 0)) == NULL) { + msg_info ("cannot mmap file %s, error %d, %s", file->filename, errno, strerror (errno)); + return FALSE; + } + statfile_pool_unlock_file (pool, file); + + return TRUE; + +} + +guint32 +statfile_get_section_by_name (const gchar *name) +{ + if (g_ascii_strcasecmp (name, "common") == 0) { + return STATFILE_SECTION_COMMON; + } + else if (g_ascii_strcasecmp (name, "header") == 0) { + return STATFILE_SECTION_HEADERS; + } + else if (g_ascii_strcasecmp (name, "url") == 0) { + return STATFILE_SECTION_URLS; + } + else if (g_ascii_strcasecmp (name, "regexp") == 0) { + return STATFILE_SECTION_REGEXP; + } + + return 0; +} + +gboolean +statfile_set_revision (stat_file_t *file, guint64 rev, time_t time) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + header->revision = rev; + header->rev_time = time; + + return TRUE; +} + +gboolean +statfile_inc_revision (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + header->revision ++; + + return TRUE; +} + +gboolean +statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return FALSE; + } + + header = (struct stat_file_header *)file->map; + + if (rev != NULL) { + *rev = header->revision; + } + if (time != NULL) { + *time = header->rev_time; + } + + return TRUE; +} + +guint64 +statfile_get_used_blocks (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return (guint64)-1; + } + + header = (struct stat_file_header *)file->map; + + return header->used_blocks; +} + +guint64 +statfile_get_total_blocks (stat_file_t *file) +{ + struct stat_file_header *header; + + if (file == NULL || file->map == NULL) { + return (guint64)-1; + } + + header = (struct stat_file_header *)file->map; + + /* If total blocks is 0 we have old version of header, so set total blocks correctly */ + if (header->total_blocks == 0) { + header->total_blocks = file->cur_section.length; + } + + return header->total_blocks; +} + +static void +statfile_pool_invalidate_callback (gint fd, short what, void *ud) +{ + statfile_pool_t *pool = ud; + stat_file_t *file; + gint i; + + msg_info ("invalidating %d statfiles", pool->opened); + + for (i = 0; i < pool->opened; i ++) { + file = &pool->files[i]; + msync (file->map, file->len, MS_ASYNC); + } + +} + + +void +statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter) +{ + gboolean pending; + + + if (pool->invalidate_event != NULL) { + pending = evtimer_pending (pool->invalidate_event, NULL); + if (pending) { + /* Replan event */ + pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); + pool->invalidate_tv.tv_usec = 0; + evtimer_add (pool->invalidate_event, &pool->invalidate_tv); + } + } + else { + pool->invalidate_event = rspamd_mempool_alloc (pool->pool, sizeof (struct event)); + pool->invalidate_tv.tv_sec = seconds + g_random_int_range (0, jitter); + pool->invalidate_tv.tv_usec = 0; + evtimer_set (pool->invalidate_event, statfile_pool_invalidate_callback, pool); + evtimer_add (pool->invalidate_event, &pool->invalidate_tv); + msg_info ("invalidate of statfile pool is planned in %d seconds", (gint)pool->invalidate_tv.tv_sec); + } +} + + +stat_file_t * +get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, + const gchar *symbol, struct statfile **st, gboolean try_create) +{ + stat_file_t *res = NULL; + GList *cur; + + if (pool == NULL || ccf == NULL || symbol == NULL) { + msg_err ("invalid input arguments"); + return NULL; + } + + cur = g_list_first (ccf->statfiles); + while (cur) { + *st = cur->data; + if (strcmp (symbol, (*st)->symbol) == 0) { + break; + } + *st = NULL; + cur = g_list_next (cur); + } + if (*st == NULL) { + msg_info ("cannot find statfile with symbol %s", symbol); + return NULL; + } + + if ((res = statfile_pool_is_open (pool, (*st)->path)) == NULL) { + if ((res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE)) == NULL) { + msg_warn ("cannot open %s", (*st)->path); + if (try_create) { + if (statfile_pool_create (pool, (*st)->path, (*st)->size) == -1) { + msg_err ("cannot create statfile %s", (*st)->path); + return NULL; + } + res = statfile_pool_open (pool, (*st)->path, (*st)->size, FALSE); + if (res == NULL) { + msg_err ("cannot open statfile %s after creation", (*st)->path); + } + } + } + } + + return res; +} + +void +statfile_pool_lockall (statfile_pool_t *pool) +{ + stat_file_t *file; + gint i; + + if (pool->mlock_ok) { + for (i = 0; i < pool->opened; i ++) { + file = &pool->files[i]; + if (mlock (file->map, file->len) == -1) { + msg_warn ("mlock of statfile failed, maybe you need to increase RLIMIT_MEMLOCK limit for a process: %s", strerror (errno)); + pool->mlock_ok = FALSE; + return; + } + } + } + /* Do not try to lock if mlock failed */ +} + diff --git a/src/libserver/statfile.h b/src/libserver/statfile.h new file mode 100644 index 000000000..5786c4927 --- /dev/null +++ b/src/libserver/statfile.h @@ -0,0 +1,284 @@ +/** + * @file statfile.h + * Describes common methods for accessing statistics files and caching them in memory + */ + +#ifndef RSPAMD_STATFILE_H +#define RSPAMD_STATFILE_H + +#include "config.h" +#include "mem_pool.h" +#include "hash.h" + +#define CHAIN_LENGTH 128 + +/* Section types */ +#define STATFILE_SECTION_COMMON 1 +#define STATFILE_SECTION_HEADERS 2 +#define STATFILE_SECTION_URLS 3 +#define STATFILE_SECTION_REGEXP 4 + +#define DEFAULT_STATFILE_INVALIDATE_TIME 30 +#define DEFAULT_STATFILE_INVALIDATE_JITTER 30 + +/** + * Common statfile header + */ +struct stat_file_header { + u_char magic[3]; /**< magic signature ('r' 's' 'd') */ + u_char version[2]; /**< version of statfile */ + u_char padding[3]; /**< padding */ + guint64 create_time; /**< create time (time_t->guint64) */ + guint64 revision; /**< revision number */ + guint64 rev_time; /**< revision time */ + guint64 used_blocks; /**< used blocks number */ + guint64 total_blocks; /**< total number of blocks */ + u_char unused[239]; /**< some bytes that can be used in future */ +}; + +/** + * Section header + */ +struct stat_file_section { + guint64 code; /**< section's code */ + guint64 length; /**< section's length in blocks */ +}; + +/** + * Block of data in statfile + */ +struct stat_file_block { + guint32 hash1; /**< hash1 (also acts as index) */ + guint32 hash2; /**< hash2 */ + double value; /**< double value */ +}; + +/** + * Statistic file + */ +struct stat_file { + struct stat_file_header header; /**< header */ + struct stat_file_section section; /**< first section */ + struct stat_file_block blocks[1]; /**< first block of data */ +}; + +/** + * Common view of statfile object + */ +typedef struct stat_file_s { +#ifdef HAVE_PATH_MAX + gchar filename[PATH_MAX]; /**< name of file */ +#else + gchar filename[MAXPATHLEN]; /**< name of file */ +#endif + gint fd; /**< descriptor */ + void *map; /**< mmaped area */ + off_t seek_pos; /**< current seek position */ + struct stat_file_section cur_section; /**< current section */ + time_t open_time; /**< time when file was opened */ + time_t access_time; /**< last access time */ + size_t len; /**< length of file(in bytes) */ + rspamd_mempool_mutex_t *lock; /**< mutex */ +} stat_file_t; + +/** + * Statfiles pool + */ +typedef struct statfile_pool_s { + stat_file_t *files; /**< hash table of opened files indexed by name */ + void **maps; /**< shared hash table of mmaped areas indexed by name */ + gint opened; /**< number of opened files */ + rspamd_mempool_t *pool; /**< memory pool object */ + rspamd_mempool_mutex_t *lock; /**< mutex */ + struct event *invalidate_event; /**< event for pool invalidation */ + struct timeval invalidate_tv; + gboolean mlock_ok; /**< whether it is possible to use mlock (2) to avoid statfiles unloading */ +} statfile_pool_t; + +/* Forwarded declarations */ +struct classifier_config; +struct statfile; + +/** + * Create new statfile pool + * @param max_size maximum size + * @return statfile pool object + */ +statfile_pool_t* statfile_pool_new (rspamd_mempool_t *pool, gboolean use_mlock); + +/** + * Open statfile and attach it to pool + * @param pool statfile pool object + * @param filename name of statfile to open + * @return 0 if specified statfile is attached and -1 in case of error + */ +stat_file_t* statfile_pool_open (statfile_pool_t *pool, gchar *filename, size_t len, gboolean forced); + +/** + * Create new statfile but DOES NOT attach it to pool, use @see statfile_pool_open for attaching + * @param pool statfile pool object + * @param filename name of statfile to create + * @param len length of new statfile + * @return 0 if file was created and -1 in case of error + */ +gint statfile_pool_create (statfile_pool_t *pool, gchar *filename, size_t len); + +/** + * Close specified statfile + * @param pool statfile pool object + * @param filename name of statfile to close + * @param remove_hash remove filename from opened files hash also + * @return 0 if file was closed and -1 if statfile was not opened + */ +gint statfile_pool_close (statfile_pool_t *pool, stat_file_t *file, gboolean keep_sorted); + +/** + * Delete statfile pool and close all attached statfiles + * @param pool statfile pool object + */ +void statfile_pool_delete (statfile_pool_t *pool); + +/** + * Try to lock all statfiles in memory + * @param pool statfile pool object + */ +void statfile_pool_lockall (statfile_pool_t *pool); + +/** + * Lock specified file for exclusive use (eg. learning) + * @param pool statfile pool object + * @param filename name of statfile + */ +void statfile_pool_lock_file (statfile_pool_t *pool, stat_file_t *file); + +/** + * Unlock specified file + * @param pool statfile pool object + * @param filename name of statfile + */ +void statfile_pool_unlock_file (statfile_pool_t *pool, stat_file_t *file); + +/** + * Get block from statfile with h1 and h2 values, use time argument for current time + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @return block value or 0 if block is not found + */ +double statfile_pool_get_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now); + +/** + * Set specified block in statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param h1 h1 in file + * @param h2 h2 in file + * @param now current time + * @param value value of block + */ +void statfile_pool_set_block (statfile_pool_t *pool, stat_file_t *file, guint32 h1, guint32 h2, time_t now, double value); + +/** + * Check whether statfile is opened + * @param pool statfile pool object + * @param filename name of statfile + * @return TRUE if specified statfile is opened and FALSE otherwise + */ +stat_file_t* statfile_pool_is_open (statfile_pool_t *pool, gchar *filename); + +/** + * Returns current statfile section + * @param pool statfile pool object + * @param filename name of statfile + * @return code of section or 0 if file is not opened + */ +guint32 statfile_pool_get_section (statfile_pool_t *pool, stat_file_t *file); + +/** + * Go to other section of statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param code code of section to seek to + * @param from_begin search for section from begin of file if true + * @return TRUE if section was set and FALSE otherwise + */ +gboolean statfile_pool_set_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, gboolean from_begin); + +/** + * Add new section to statfile + * @param pool statfile pool object + * @param filename name of statfile + * @param code code of section to seek to + * @param length length in blocks of new section + * @return TRUE if section was successfully added and FALSE in case of error + */ +gboolean statfile_pool_add_section (statfile_pool_t *pool, stat_file_t *file, guint32 code, guint64 length); + + +/** + * Return code of section identified by name + * @param name name of section + * @return code of section or 0 if name of section is unknown + */ +guint32 statfile_get_section_by_name (const gchar *name); + +/** + * Set statfile revision and revision time + * @param filename name of statfile + * @param revision number of revision + * @param time time of revision + * @return TRUE if revision was set + */ +gboolean statfile_set_revision (stat_file_t *file, guint64 rev, time_t time); + +/** + * Increment statfile revision and revision time + * @param filename name of statfile + * @param time time of revision + * @return TRUE if revision was set + */ +gboolean statfile_inc_revision (stat_file_t *file); + +/** + * Set statfile revision and revision time + * @param filename name of statfile + * @param revision saved number of revision + * @param time saved time of revision + * @return TRUE if revision was saved in rev and time + */ +gboolean statfile_get_revision (stat_file_t *file, guint64 *rev, time_t *time); + +/** + * Get statfile used blocks + * @param file file to get number of used blocks + * @return number of used blocks or (guint64)-1 in case of error + */ +guint64 statfile_get_used_blocks (stat_file_t *file); + +/** + * Get statfile total blocks + * @param file file to get number of used blocks + * @return number of used blocks or (guint64)-1 in case of error + */ +guint64 statfile_get_total_blocks (stat_file_t *file); + + +/** + * Plan statfile pool invalidation + */ +void statfile_pool_plan_invalidate (statfile_pool_t *pool, time_t seconds, time_t jitter); + +/** + * Get a statfile by symbol + * @param pool pool object + * @param ccf ccf classifier config + * @param symbol symbol to search + * @param st statfile to get + * @param try_create whether we need to create statfile if it is absent + */ +stat_file_t* get_statfile_by_symbol (statfile_pool_t *pool, struct classifier_config *ccf, + const gchar *symbol, struct statfile **st, gboolean try_create); + +#endif diff --git a/src/libserver/statfile_sync.c b/src/libserver/statfile_sync.c new file mode 100644 index 000000000..6b545af17 --- /dev/null +++ b/src/libserver/statfile_sync.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cfg_file.h" +#include "tokenizers/tokenizers.h" +#include "classifiers/classifiers.h" +#include "statfile.h" +#include "binlog.h" +#include "buffer.h" +#include "statfile_sync.h" + +enum rspamd_sync_state { + SYNC_STATE_GREETING, + SYNC_STATE_READ_LINE, + SYNC_STATE_READ_REV, + SYNC_STATE_QUIT, +}; + +/* Context of sync process */ +struct rspamd_sync_ctx { + struct statfile *st; + stat_file_t *real_statfile; + statfile_pool_t *pool; + rspamd_io_dispatcher_t *dispatcher; + struct event_base *ev_base; + + struct event tm_ev; + + struct timeval interval; + struct timeval io_tv; + gint sock; + guint32 timeout; + guint32 sync_interval; + enum rspamd_sync_state state; + gboolean is_busy; + + guint64 new_rev; + guint64 new_time; + guint64 new_len; +}; + +static void +log_next_sync (const gchar *symbol, time_t delay) +{ + gchar outstr[200]; + time_t t; + struct tm *tmp; + gint r; + + t = time(NULL); + t += delay; + tmp = localtime(&t); + + if (tmp) { + r = rspamd_snprintf (outstr, sizeof (outstr), "statfile_sync: next sync of %s at ", symbol); + if ((r = strftime(outstr + r, sizeof(outstr) - r, "%T", tmp)) != 0) { + msg_info (outstr); + } + } +} + +static gboolean +parse_revision_line (struct rspamd_sync_ctx *ctx, f_str_t *in) +{ + guint i, state = 0; + gchar *p, *c, numbuf[sizeof("18446744073709551615")]; + guint64 *val; + + /* First of all try to find END line */ + if (in->len >= sizeof ("END") - 1 && memcmp (in->begin, "END", sizeof ("END") - 1) == 0) { + ctx->state = SYNC_STATE_QUIT; + ctx->is_busy = FALSE; + return TRUE; + } + + /* Next check for error line */ + if (in->len >= sizeof ("FAIL") - 1 && memcmp (in->begin, "FAIL", sizeof ("FAIL") - 1) == 0) { + ctx->state = SYNC_STATE_QUIT; + ctx->is_busy = FALSE; + return TRUE; + } + + /* Now try to extract 3 numbers from string: revision, time and length */ + p = in->begin; + val = &ctx->new_rev; + for (i = 0; i < in->len; i ++, p ++) { + if (g_ascii_isspace (*p) || i == in->len - 1) { + if (state == 1) { + if (i == in->len - 1) { + /* One more character */ + p ++; + } + rspamd_strlcpy (numbuf, c, MIN (p - c + 1, (gint)sizeof (numbuf))); + errno = 0; + *val = strtoull (numbuf, NULL, 10); + if (errno != 0) { + msg_info ("cannot parse number %s", strerror (errno)); + return FALSE; + } + state = 2; + } + } + else { + if (state == 0) { + c = p; + state = 1; + } + else if (state == 2) { + if (val == &ctx->new_rev) { + val = &ctx->new_time; + } + else if (val == &ctx->new_time) { + val = &ctx->new_len; + } + c = p; + state = 1; + } + } + } + + /* Current value must be len value and its value must not be 0 */ + return ((val == &ctx->new_len)); +} + +static gboolean +read_blocks (struct rspamd_sync_ctx *ctx, f_str_t *in) +{ + struct rspamd_binlog_element *elt; + guint i; + + statfile_pool_lock_file (ctx->pool, ctx->real_statfile); + elt = (struct rspamd_binlog_element *)in->begin; + for (i = 0; i < in->len / sizeof (struct rspamd_binlog_element); i ++, elt ++) { + statfile_pool_set_block (ctx->pool, ctx->real_statfile, elt->h1, elt->h2, ctx->new_time, elt->value); + } + statfile_pool_unlock_file (ctx->pool, ctx->real_statfile); + + return TRUE; +} + +static gboolean +sync_read (f_str_t * in, void *arg) +{ + struct rspamd_sync_ctx *ctx = arg; + gchar buf[256]; + guint64 rev = 0; + time_t ti = 0; + + if (in->len == 0) { + /* Skip empty lines */ + return TRUE; + } + switch (ctx->state) { + case SYNC_STATE_GREETING: + /* Skip greeting line and write sync command */ + /* Write initial data */ + statfile_get_revision (ctx->real_statfile, &rev, &ti); + rev = rspamd_snprintf (buf, sizeof (buf), "sync %s %uL %T" CRLF, ctx->st->symbol, rev, ti); + ctx->state = SYNC_STATE_READ_LINE; + return rspamd_dispatcher_write (ctx->dispatcher, buf, rev, FALSE, FALSE); + break; + case SYNC_STATE_READ_LINE: + /* Try to parse line from server */ + if (!parse_revision_line (ctx, in)) { + msg_info ("cannot parse line of length %z: '%*s'", in->len, (gint)in->len, in->begin); + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + return FALSE; + } + else if (ctx->state != SYNC_STATE_QUIT) { + if (ctx->new_len > 0) { + ctx->state = SYNC_STATE_READ_REV; + rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_CHARACTER, ctx->new_len); + } + } + else { + /* Quit this session */ + msg_info ("sync ended for: %s", ctx->st->symbol); + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + /* Immediately return from callback */ + return FALSE; + } + break; + case SYNC_STATE_READ_REV: + /* In now contains all blocks of specified revision, so we can read them directly */ + if (!read_blocks (ctx, in)) { + msg_info ("cannot read blocks"); + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + return FALSE; + } + statfile_set_revision (ctx->real_statfile, ctx->new_rev, ctx->new_time); + msg_info ("set new revision: %uL, readed %z bytes", ctx->new_rev, in->len); + /* Now try to read other revision or END line */ + ctx->state = SYNC_STATE_READ_LINE; + rspamd_set_dispatcher_policy (ctx->dispatcher, BUFFER_LINE, 0); + break; + case SYNC_STATE_QUIT: + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); + ctx->is_busy = FALSE; + return FALSE; + } + + return TRUE; +} + +static void +sync_err (GError *err, void *arg) +{ + struct rspamd_sync_ctx *ctx = arg; + + msg_info ("abnormally closing connection, error: %s", err->message); + ctx->is_busy = FALSE; + close (ctx->sock); + rspamd_remove_dispatcher (ctx->dispatcher); +} + + +static void +sync_timer_callback (gint fd, short what, void *ud) +{ + struct rspamd_sync_ctx *ctx = ud; + guint32 jittered_interval; + + /* Plan new event */ + evtimer_del (&ctx->tm_ev); + /* Add some jittering for synchronization */ + jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2); + msec_to_tv (jittered_interval, &ctx->interval); + evtimer_add (&ctx->tm_ev, &ctx->interval); + log_next_sync (ctx->st->symbol, ctx->interval.tv_sec); + + if (ctx->is_busy) { + /* Sync is in progress */ + msg_info ("syncronization process is in progress, do not start new one"); + return; + } + + if ((ctx->sock = make_universal_socket (ctx->st->binlog->master_addr, ctx->st->binlog->master_port, + SOCK_STREAM, TRUE, FALSE, TRUE)) == -1) { + msg_info ("cannot connect to %s", ctx->st->binlog->master_addr); + return; + } + /* Now create and activate dispatcher */ + msec_to_tv (ctx->timeout, &ctx->io_tv); + ctx->dispatcher = rspamd_create_dispatcher (ctx->ev_base, ctx->sock, BUFFER_LINE, sync_read, NULL, sync_err, &ctx->io_tv, ctx); + + ctx->state = SYNC_STATE_GREETING; + ctx->is_busy = TRUE; + + msg_info ("starting synchronization of %s", ctx->st->symbol); + +} + +static gboolean +add_statfile_watch (statfile_pool_t *pool, struct statfile *st, struct config_file *cfg, struct event_base *ev_base) +{ + struct rspamd_sync_ctx *ctx; + guint32 jittered_interval; + + if (st->binlog->master_addr != NULL) { + ctx = rspamd_mempool_alloc (pool->pool, sizeof (struct rspamd_sync_ctx)); + ctx->st = st; + ctx->timeout = cfg->statfile_sync_timeout; + ctx->sync_interval = cfg->statfile_sync_interval; + ctx->ev_base = ev_base; + /* Add some jittering for synchronization */ + jittered_interval = g_random_int_range (ctx->sync_interval, ctx->sync_interval * 2); + msec_to_tv (jittered_interval, &ctx->interval); + /* Open statfile and attach it to pool */ + if ((ctx->real_statfile = statfile_pool_is_open (pool, st->path)) == NULL) { + if ((ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE)) == NULL) { + msg_warn ("cannot open %s", st->path); + if (statfile_pool_create (pool, st->path, st->size) == -1) { + msg_err ("cannot create statfile %s", st->path); + return FALSE; + } + ctx->real_statfile = statfile_pool_open (pool, st->path, st->size, FALSE); + } + } + /* Now plan event for it's future executing */ + evtimer_set (&ctx->tm_ev, sync_timer_callback, ctx); + event_base_set (ctx->ev_base, &ctx->tm_ev); + evtimer_add (&ctx->tm_ev, &ctx->interval); + log_next_sync (st->symbol, ctx->interval.tv_sec); + } + else { + msg_err ("cannot add statfile watch for statfile %s: no master defined", st->symbol); + return FALSE; + } + + return TRUE; +} + +gboolean +start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base) +{ + GList *cur, *l; + struct classifier_config *cl; + struct statfile *st; + + /* + * First of all walk through all classifiers and find those statfiles + * for which we should do sync (slave affinity) + */ + cur = cfg->classifiers; + while (cur) { + cl = cur->data; + l = cl->statfiles; + while (l) { + st = l->data; + if (st->binlog != NULL && st->binlog->affinity == AFFINITY_SLAVE) { + if (!add_statfile_watch (pool, st, cfg, ev_base)) { + return FALSE; + } + } + l = g_list_next (l); + } + cur = g_list_next (cur); + } + + return TRUE; +} diff --git a/src/libserver/statfile_sync.h b/src/libserver/statfile_sync.h new file mode 100644 index 000000000..b3abb8b91 --- /dev/null +++ b/src/libserver/statfile_sync.h @@ -0,0 +1,14 @@ +#ifndef RSPAMD_STATFILE_SYNC_H +#define RSPAMD_STATFILE_SYNC_H + +#include "config.h" +#include "main.h" +#include "statfile.h" +#include "cfg_file.h" + +/* + * Start synchronization of statfiles. Must be called after event_init as it adds events + */ +gboolean start_statfile_sync (statfile_pool_t *pool, struct config_file *cfg, struct event_base *ev_base); + +#endif diff --git a/src/libserver/symbols_cache.c b/src/libserver/symbols_cache.c new file mode 100644 index 000000000..dfca57c66 --- /dev/null +++ b/src/libserver/symbols_cache.c @@ -0,0 +1,1055 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "util.h" +#include "main.h" +#include "message.h" +#include "symbols_cache.h" +#include "cfg_file.h" + +#define WEIGHT_MULT 4.0 +#define FREQUENCY_MULT 10.0 +#define TIME_MULT -1.0 + +/* After which number of messages try to resort cache */ +#define MAX_USES 100 +/* + * Symbols cache utility functions + */ + +#define MIN_CACHE 17 + +static guint64 total_frequency = 0; +static guint32 nsymbols = 0; + +gint +cache_cmp (const void *p1, const void *p2) +{ + const struct cache_item *i1 = p1, *i2 = p2; + + return strcmp (i1->s->symbol, i2->s->symbol); +} + +gint +cache_logic_cmp (const void *p1, const void *p2) +{ + const struct cache_item *i1 = p1, *i2 = p2; + double w1, w2; + double weight1, weight2; + double f1 = 0, f2 = 0; + + if (i1->priority == 0 && i2->priority == 0) { + if (total_frequency > 0) { + f1 = ((double)i1->s->frequency * nsymbols) / (double)total_frequency; + f2 = ((double)i2->s->frequency * nsymbols) / (double)total_frequency; + } + weight1 = i1->metric_weight == 0 ? i1->s->weight : i1->metric_weight; + weight2 = i2->metric_weight == 0 ? i2->s->weight : i2->metric_weight; + w1 = abs (weight1) * WEIGHT_MULT + f1 * FREQUENCY_MULT + i1->s->avg_time * TIME_MULT; + w2 = abs (weight2) * WEIGHT_MULT + f2 * FREQUENCY_MULT + i2->s->avg_time * TIME_MULT; + } + else { + /* Strict sorting */ + w1 = abs (i1->priority); + w2 = abs (i2->priority); + } + + return (gint)w2 - w1; +} + +static GChecksum * +get_mem_cksum (struct symbols_cache *cache) +{ + GChecksum *result; + GList *cur, *l; + struct cache_item *item; + + result = g_checksum_new (G_CHECKSUM_SHA1); + + l = g_list_copy (cache->negative_items); + l = g_list_sort (l, cache_cmp); + cur = g_list_first (l); + while (cur) { + item = cur->data; + if (item->s->symbol[0] != '\0') { + g_checksum_update (result, item->s->symbol, strlen (item->s->symbol)); + } + cur = g_list_next (cur); + } + g_list_free (l); + + + l = g_list_copy (cache->static_items); + l = g_list_sort (l, cache_cmp); + cur = g_list_first (l); + while (cur) { + item = cur->data; + if (item->s->symbol[0] != '\0') { + g_checksum_update (result, item->s->symbol, strlen (item->s->symbol)); + } + total_frequency += item->s->frequency; + cur = g_list_next (cur); + } + g_list_free (l); + + return result; +} + +/* Sort items in logical order */ +static void +post_cache_init (struct symbols_cache *cache) +{ + GList *cur; + struct cache_item *item; + + total_frequency = 0; + nsymbols = cache->used_items; + cur = g_list_first (cache->negative_items); + while (cur) { + item = cur->data; + total_frequency += item->s->frequency; + cur = g_list_next (cur); + } + cur = g_list_first (cache->static_items); + while (cur) { + item = cur->data; + total_frequency += item->s->frequency; + cur = g_list_next (cur); + } + + cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp); + cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp); +} + +/* Unmap cache file */ +static void +unmap_cache_file (gpointer arg) +{ + struct symbols_cache *cache = arg; + + /* A bit ugly usage */ + munmap (cache->map, cache->used_items * sizeof (struct saved_cache_item)); +} + +static gboolean +mmap_cache_file (struct symbols_cache *cache, gint fd, rspamd_mempool_t *pool) +{ + guint8 *map; + gint i; + GList *cur; + struct cache_item *item; + + if (cache->used_items > 0) { + map = mmap (NULL, cache->used_items * sizeof (struct saved_cache_item), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + msg_err ("cannot mmap cache file: %d, %s", errno, strerror (errno)); + close (fd); + return FALSE; + } + /* Close descriptor as it would never be used */ + close (fd); + cache->map = map; + /* Now free old values for saved cache items and fill them with mmapped ones */ + i = 0; + cur = g_list_first (cache->negative_items); + while (cur) { + item = cur->data; + item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item)); + cur = g_list_next (cur); + i ++; + } + cur = g_list_first (cache->static_items); + while (cur) { + item = cur->data; + item->s = (struct saved_cache_item *)(map + i * sizeof (struct saved_cache_item)); + cur = g_list_next (cur); + i ++; + } + + post_cache_init (cache); + } + + return TRUE; +} + +/* Fd must be opened for writing, after creating file is mmapped */ +static gboolean +create_cache_file (struct symbols_cache *cache, const gchar *filename, gint fd, rspamd_mempool_t *pool) +{ + GChecksum *cksum; + u_char *digest; + gsize cklen; + GList *cur; + struct cache_item *item; + + /* Calculate checksum */ + cksum = get_mem_cksum (cache); + if (cksum == NULL) { + msg_err ("cannot calculate checksum for symbols"); + close (fd); + return FALSE; + } + + cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1); + digest = g_malloc (cklen); + + g_checksum_get_digest (cksum, digest, &cklen); + /* Now write data to file */ + cur = g_list_first (cache->negative_items); + while (cur) { + item = cur->data; + if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) { + msg_err ("cannot write to file %d, %s", errno, strerror (errno)); + close (fd); + g_checksum_free (cksum); + g_free (digest); + return FALSE; + } + cur = g_list_next (cur); + } + cur = g_list_first (cache->static_items); + while (cur) { + item = cur->data; + if (write (fd, item->s, sizeof (struct saved_cache_item)) == -1) { + msg_err ("cannot write to file %d, %s", errno, strerror (errno)); + close (fd); + g_checksum_free (cksum); + g_free (digest); + return FALSE; + } + cur = g_list_next (cur); + } + /* Write checksum */ + if (write (fd, digest, cklen) == -1) { + msg_err ("cannot write to file %d, %s", errno, strerror (errno)); + close (fd); + g_checksum_free (cksum); + g_free (digest); + return FALSE; + } + + close (fd); + g_checksum_free (cksum); + g_free (digest); + /* Reopen for reading */ + if ((fd = open (filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", errno, strerror (errno)); + return FALSE; + } + + return mmap_cache_file (cache, fd, pool); +} + +enum rspamd_symbol_type { + SYMBOL_TYPE_NORMAL, + SYMBOL_TYPE_VIRTUAL, + SYMBOL_TYPE_CALLBACK +}; + +static void +register_symbol_common (struct symbols_cache **cache, const gchar *name, double weight, gint priority, + symbol_func_t func, gpointer user_data, enum rspamd_symbol_type type) +{ + struct cache_item *item = NULL; + struct symbols_cache *pcache = *cache; + GList **target; + double *w; + + if (*cache == NULL) { + pcache = g_new0 (struct symbols_cache, 1); + *cache = pcache; + pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + pcache->items_by_symbol = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + } + + item = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct cache_item)); + item->s = rspamd_mempool_alloc0 (pcache->static_pool, sizeof (struct saved_cache_item)); + rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); + item->func = func; + item->user_data = user_data; + item->priority = priority; + + switch (type) { + case SYMBOL_TYPE_NORMAL: + break; + case SYMBOL_TYPE_VIRTUAL: + item->is_virtual = TRUE; + break; + case SYMBOL_TYPE_CALLBACK: + item->is_callback = TRUE; + break; + } + + /* Handle weight using default metric */ + if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) { + item->s->weight = weight * (*w); + } + else { + item->s->weight = weight; + } + + /* If we have undefined priority determine list according to weight */ + if (priority == 0) { + if (item->s->weight > 0) { + target = &(*cache)->static_items; + } + else { + target = &(*cache)->negative_items; + } + } + else { + /* Items with more priority are called before items with less priority */ + if (priority < 0) { + target = &(*cache)->negative_items; + } + else { + target = &(*cache)->static_items; + } + } + + pcache->used_items++; + g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item); + msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name); + set_counter (item->s->symbol, 0); + + *target = g_list_prepend (*target, item); +} + +void +register_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_NORMAL); +} + +void +register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight) +{ + register_symbol_common (cache, name, weight, 0, NULL, NULL, SYMBOL_TYPE_VIRTUAL); +} + +void +register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, 0, func, user_data, SYMBOL_TYPE_CALLBACK); +} + +void +register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight, gint priority, + symbol_func_t func, gpointer user_data) +{ + register_symbol_common (cache, name, weight, priority, func, user_data, SYMBOL_TYPE_CALLBACK); +} + +void +register_dynamic_symbol (rspamd_mempool_t *dynamic_pool, struct symbols_cache **cache, + const gchar *name, double weight, symbol_func_t func, + gpointer user_data, GList *networks) +{ + struct cache_item *item = NULL; + struct symbols_cache *pcache = *cache; + GList *t, *cur; + uintptr_t r; + double *w; + guint32 mask = 0xFFFFFFFF; + struct dynamic_map_item *it; + gint rr; + + if (*cache == NULL) { + pcache = g_new0 (struct symbols_cache, 1); + *cache = pcache; + pcache->static_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + } + + item = rspamd_mempool_alloc0 (dynamic_pool, sizeof (struct cache_item)); + item->s = rspamd_mempool_alloc (dynamic_pool, sizeof (struct saved_cache_item)); + rspamd_strlcpy (item->s->symbol, name, sizeof (item->s->symbol)); + item->func = func; + item->user_data = user_data; + /* Handle weight using default metric */ + if (pcache->cfg && pcache->cfg->default_metric && (w = g_hash_table_lookup (pcache->cfg->default_metric->symbols, name)) != NULL) { + item->s->weight = weight * (*w); + } + else { + item->s->weight = weight; + } + item->is_dynamic = TRUE; + item->priority = 0; + + pcache->used_items++; + msg_debug ("used items: %d, added symbol: %s", (*cache)->used_items, name); + set_counter (item->s->symbol, 0); + + g_hash_table_insert (pcache->items_by_symbol, item->s->symbol, item); + + if (networks == NULL) { + pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item); + } + else { + if (pcache->dynamic_map == NULL) { + pcache->dynamic_map = radix_tree_create (); + pcache->negative_dynamic_map = radix_tree_create (); + } + cur = networks; + while (cur) { + it = cur->data; + mask = mask << (32 - it->mask); + r = ntohl (it->addr.s_addr & mask); + if (it->negative) { + /* For negatve items insert into list and into negative cache map */ + if ((r = radix32tree_find (pcache->negative_dynamic_map, r)) != RADIX_NO_VALUE) { + t = (GList *)((gpointer)r); + t = g_list_prepend (t, item); + /* Replace pointers in radix tree and in destructor function */ + rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t); + rr = radix32tree_replace (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + } + else { + t = g_list_prepend (NULL, item); + rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t); + rr = radix32tree_insert (pcache->negative_dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + else if (rr == 1) { + msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask); + } + } + /* Insert into list */ + pcache->dynamic_items = g_list_prepend (pcache->dynamic_items, item); + } + else { + if ((r = radix32tree_find (pcache->dynamic_map, r)) != RADIX_NO_VALUE) { + t = (GList *)((gpointer)r); + t = g_list_prepend (t, item); + /* Replace pointers in radix tree and in destructor function */ + rspamd_mempool_replace_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, (gpointer)r, t); + rr = radix32tree_replace (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot replace ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + } + else { + t = g_list_prepend (NULL, item); + rspamd_mempool_add_destructor (dynamic_pool, (rspamd_mempool_destruct_t)g_list_free, t); + rr = radix32tree_insert (pcache->dynamic_map, ntohl (it->addr.s_addr), mask, (uintptr_t)t); + if (rr == -1) { + msg_warn ("cannot insert ip to tree: %s, mask %X", inet_ntoa (it->addr), mask); + } + else if (rr == 1) { + msg_warn ("ip %s, mask %X, value already exists", inet_ntoa (it->addr), mask); + } + } + } + cur = g_list_next (cur); + } + } +} + +void +remove_dynamic_rules (struct symbols_cache *cache) +{ + if (cache->dynamic_items) { + g_list_free (cache->dynamic_items); + cache->dynamic_items = NULL; + } + + if (cache->dynamic_map) { + radix_tree_free (cache->dynamic_map); + cache->dynamic_map = NULL; + } + if (cache->negative_dynamic_map) { + radix_tree_free (cache->negative_dynamic_map); + cache->negative_dynamic_map = NULL; + } +} + +static void +free_cache (gpointer arg) +{ + struct symbols_cache *cache = arg; + + if (cache->map != NULL) { + unmap_cache_file (cache); + } + + if (cache->static_items) { + g_list_free (cache->static_items); + } + if (cache->negative_items) { + g_list_free (cache->negative_items); + } + if (cache->dynamic_items) { + g_list_free (cache->dynamic_items); + } + if (cache->dynamic_map) { + radix_tree_free (cache->dynamic_map); + } + if (cache->negative_dynamic_map) { + radix_tree_free (cache->negative_dynamic_map); + } + g_hash_table_destroy (cache->items_by_symbol); + rspamd_mempool_delete (cache->static_pool); + + g_free (cache); +} + +gboolean +init_symbols_cache (rspamd_mempool_t * pool, struct symbols_cache *cache, struct config_file *cfg, + const gchar *filename, gboolean ignore_checksum) +{ + struct stat st; + gint fd; + GChecksum *cksum; + u_char *mem_sum, *file_sum; + gsize cklen; + gboolean res; + + if (cache == NULL) { + return FALSE; + } + + /* Init locking */ + cache->lock = rspamd_mempool_get_rwlock (pool); + + cache->cfg = cfg; + + /* Just in-memory cache */ + if (filename == NULL) { + post_cache_init (cache); + return TRUE; + } + + /* First of all try to stat file */ + if (stat (filename, &st) == -1) { + /* Check errno */ + if (errno == ENOENT) { + /* Try to create file */ + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + else { + return create_cache_file (cache, filename, fd, pool); + } + } + else { + msg_info ("cannot stat file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + } + else { + if ((fd = open (filename, O_RDWR)) == -1) { + msg_info ("cannot open file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + } + + if (!ignore_checksum) { + /* Calculate checksum */ + cksum = get_mem_cksum (cache); + if (cksum == NULL) { + msg_err ("cannot calculate checksum for symbols"); + close (fd); + return FALSE; + } + + cklen = g_checksum_type_get_length (G_CHECKSUM_SHA1); + mem_sum = g_malloc (cklen); + + g_checksum_get_digest (cksum, mem_sum, &cklen); + /* Now try to read file sum */ + if (lseek (fd, -(cklen), SEEK_END) == -1) { + if (errno == EINVAL) { + /* Try to create file */ + msg_info ("recreate cache file"); + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + else { + return create_cache_file (cache, filename, fd, pool); + } + } + close (fd); + g_free (mem_sum); + g_checksum_free (cksum); + msg_err ("cannot seek to read checksum, %d, %s", errno, strerror (errno)); + return FALSE; + } + file_sum = g_malloc (cklen); + if (read (fd, file_sum, cklen) == -1) { + close (fd); + g_free (mem_sum); + g_free (file_sum); + g_checksum_free (cksum); + msg_err ("cannot read checksum, %d, %s", errno, strerror (errno)); + return FALSE; + } + + if (memcmp (file_sum, mem_sum, cklen) != 0) { + close (fd); + g_free (mem_sum); + g_free (file_sum); + g_checksum_free (cksum); + msg_info ("checksum mismatch, recreating file"); + /* Reopen with rw permissions */ + if ((fd = open (filename, O_RDWR | O_TRUNC | O_CREAT, S_IWUSR | S_IRUSR)) == -1) { + msg_info ("cannot create file %s, error %d, %s", filename, errno, strerror (errno)); + return FALSE; + } + else { + return create_cache_file (cache, filename, fd, pool); + } + } + + g_free (mem_sum); + g_free (file_sum); + g_checksum_free (cksum); + } + /* MMap cache file and copy saved_cache structures */ + res = mmap_cache_file (cache, fd, pool); + + rspamd_mempool_add_destructor (pool, (rspamd_mempool_destruct_t)free_cache, cache); + + return res; +} + +static GList * +check_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache) +{ +#ifdef HAVE_INET_PTON + /* TODO: radix doesn't support ipv6 addrs */ + return NULL; +#else + GList *res = NULL; + uintptr_t r; + if (cache->dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) { + if ((r = radix32tree_find (cache->dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) { + res = (GList *)((gpointer)r); + return res; + } + else { + return NULL; + } + } + return res; +#endif +} + +static gboolean +check_negative_dynamic_item (struct rspamd_task *task, struct symbols_cache *cache, struct cache_item *item) +{ + +#ifdef HAVE_INET_PTON + /* TODO: radix doesn't support ipv6 addrs */ + return FALSE; +#else + GList *res = NULL; + uintptr_t r; + + if (cache->negative_dynamic_map != NULL && task->from_addr.s_addr != INADDR_NONE) { + if ((r = radix32tree_find (cache->negative_dynamic_map, ntohl (task->from_addr.s_addr))) != RADIX_NO_VALUE) { + res = (GList *)((gpointer)r); + while (res) { + if (res->data == (gpointer)item) { + return TRUE; + } + res = g_list_next (res); + } + } + } + return FALSE; +#endif + +} + +static gboolean +check_debug_symbol (struct config_file *cfg, const gchar *symbol) +{ + GList *cur; + + cur = cfg->debug_symbols; + while (cur) { + if (strcmp (symbol, (const gchar *)cur->data) == 0) { + return TRUE; + } + cur = g_list_next (cur); + } + + return FALSE; +} + +static void +rspamd_symbols_cache_metric_cb (gpointer k, gpointer v, gpointer ud) +{ + struct symbols_cache *cache = (struct symbols_cache *)ud; + GList *cur; + const gchar *sym = k; + gdouble weight = *(gdouble *)v; + struct cache_item *item; + + cur = cache->negative_items; + while (cur) { + item = cur->data; + if (strcmp (item->s->symbol, sym) == 0) { + item->metric_weight = weight; + return; + } + cur = g_list_next (cur); + } + cur = cache->static_items; + while (cur) { + item = cur->data; + if (strcmp (item->s->symbol, sym) == 0) { + item->metric_weight = weight; + return; + } + cur = g_list_next (cur); + } +} + +gboolean +validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict) +{ + struct cache_item *item; + GList *cur, *p, *metric_symbols; + gboolean res; + + if (cache == NULL) { + msg_err ("empty cache is invalid"); + return FALSE; + } + + /* Check each symbol in a cache and find its weight definition */ + cur = cache->negative_items; + while (cur) { + item = cur->data; + if (!item->is_callback) { + if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { + if (strict) { + msg_warn ("no weight registered for symbol %s", item->s->symbol); + return FALSE; + } + else { + msg_info ("no weight registered for symbol %s", item->s->symbol); + } + } + } + cur = g_list_next (cur); + } + cur = cache->static_items; + while (cur) { + item = cur->data; + if (!item->is_callback) { + if (g_hash_table_lookup (cfg->metrics_symbols, item->s->symbol) == NULL) { + if (strict) { + msg_warn ("no weight registered for symbol %s", item->s->symbol); + return FALSE; + } + else { + msg_info ("no weight registered for symbol %s", item->s->symbol); + } + } + } + cur = g_list_next (cur); + } +#ifndef GLIB_HASH_COMPAT + /* Now check each metric item and find corresponding symbol in a cache */ + metric_symbols = g_hash_table_get_keys (cfg->metrics_symbols); + cur = metric_symbols; + while (cur) { + res = FALSE; + p = cache->negative_items; + while (p) { + item = p->data; + if (strcmp (item->s->symbol, cur->data) == 0) { + res = TRUE; + break; + } + p = g_list_next (p); + } + if (!res) { + p = cache->static_items; + while (p) { + item = p->data; + if (strcmp (item->s->symbol, cur->data) == 0) { + res = TRUE; + break; + } + p = g_list_next (p); + } + } + if (!res) { + msg_warn ("symbol '%s' is registered in metric but not found in cache", cur->data); + if (strict) { + return FALSE; + } + } + cur = g_list_next (cur); + } + g_list_free (metric_symbols); +#endif /* GLIB_COMPAT */ + + /* Now adjust symbol weights according to default metric */ + if (cfg->default_metric != NULL) { + g_hash_table_foreach (cfg->default_metric->symbols, rspamd_symbols_cache_metric_cb, cache); + /* Resort caches */ + cache->negative_items = g_list_sort (cache->negative_items, cache_logic_cmp); + cache->static_items = g_list_sort (cache->static_items, cache_logic_cmp); + } + + return TRUE; +} + +struct symbol_callback_data { + enum { + CACHE_STATE_NEGATIVE, + CACHE_STATE_DYNAMIC_MAP, + CACHE_STATE_DYNAMIC, + CACHE_STATE_STATIC + } state; + struct cache_item *saved_item; + GList *list_pointer; +}; + +gboolean +call_symbol_callback (struct rspamd_task * task, struct symbols_cache * cache, gpointer *save) +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts1, ts2; +#else + struct timeval tv1, tv2; +#endif + guint64 diff; + struct cache_item *item = NULL; + struct symbol_callback_data *s = *save; + + if (s == NULL) { + if (cache == NULL) { + return FALSE; + } + if (cache->uses++ >= MAX_USES) { + msg_info ("resort symbols cache"); + rspamd_mempool_wlock_rwlock (cache->lock); + cache->uses = 0; + /* Resort while having write lock */ + post_cache_init (cache); + rspamd_mempool_wunlock_rwlock (cache->lock); + } + s = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct symbol_callback_data)); + *save = s; + if (cache->negative_items != NULL) { + s->list_pointer = g_list_first (cache->negative_items); + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_NEGATIVE; + } + else if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) { + if (s->list_pointer == NULL) { + s->list_pointer = g_list_first (cache->dynamic_items); + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC; + } + else { + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC_MAP; + } + } + else { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + item = s->saved_item; + } + else { + if (cache == NULL) { + return FALSE; + } + switch (s->state) { + case CACHE_STATE_NEGATIVE: + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer == NULL) { + if ((s->list_pointer = check_dynamic_item (task, cache)) || cache->dynamic_items != NULL) { + if (s->list_pointer == NULL) { + s->list_pointer = g_list_first (cache->dynamic_items); + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC; + } + else { + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC_MAP; + } + } + else { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + } + else { + s->saved_item = s->list_pointer->data; + } + item = s->saved_item; + break; + case CACHE_STATE_DYNAMIC_MAP: + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer == NULL) { + s->list_pointer = g_list_first (cache->dynamic_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + s->state = CACHE_STATE_DYNAMIC; + } + else { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + } + else { + s->saved_item = s->list_pointer->data; + } + item = s->saved_item; + break; + case CACHE_STATE_DYNAMIC: + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer == NULL) { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + else { + s->saved_item = s->list_pointer->data; + /* Skip items that are in negative map */ + while (s->list_pointer != NULL && check_negative_dynamic_item (task, cache, s->saved_item)) { + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer != NULL) { + s->saved_item = s->list_pointer->data; + } + } + if (s->list_pointer == NULL) { + s->state = CACHE_STATE_STATIC; + s->list_pointer = g_list_first (cache->static_items); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + } + } + item = s->saved_item; + break; + case CACHE_STATE_STATIC: + /* Next pointer */ + s->list_pointer = g_list_next (s->list_pointer); + if (s->list_pointer) { + s->saved_item = s->list_pointer->data; + } + else { + return FALSE; + } + item = s->saved_item; + break; + } + } + if (!item) { + return FALSE; + } + if (!item->is_virtual) { +#ifdef HAVE_CLOCK_GETTIME +# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts1); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &ts1); +# else + clock_gettime (CLOCK_REALTIME, &ts1); +# endif +#else + if (gettimeofday (&tv1, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } +#endif + if (G_UNLIKELY (check_debug_symbol (task->cfg, item->s->symbol))) { + rspamd_log_debug (rspamd_main->logger); + item->func (task, item->user_data); + rspamd_log_nodebug (rspamd_main->logger); + } + else { + item->func (task, item->user_data); + } + + +#ifdef HAVE_CLOCK_GETTIME +# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts2); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &ts2); +# else + clock_gettime (CLOCK_REALTIME, &ts2); +# endif +#else + if (gettimeofday (&tv2, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } +#endif + +#ifdef HAVE_CLOCK_GETTIME + diff = (ts2.tv_sec - ts1.tv_sec) * 1000000 + (ts2.tv_nsec - ts1.tv_nsec) / 1000; +#else + diff = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec); +#endif + item->s->avg_time = set_counter (item->s->symbol, diff); + } + + s->saved_item = item; + + return TRUE; + +} diff --git a/src/libserver/symbols_cache.h b/src/libserver/symbols_cache.h new file mode 100644 index 000000000..bb2100fc1 --- /dev/null +++ b/src/libserver/symbols_cache.h @@ -0,0 +1,150 @@ +#ifndef RSPAMD_SYMBOLS_CACHE_H +#define RSPAMD_SYMBOLS_CACHE_H + +#include "config.h" +#include "radix.h" + +#define MAX_SYMBOL 128 + +struct rspamd_task; +struct config_file; + +typedef void (*symbol_func_t)(struct rspamd_task *task, gpointer user_data); + +struct saved_cache_item { + gchar symbol[MAX_SYMBOL]; + double weight; + guint32 frequency; + double avg_time; +}; + +struct dynamic_map_item { + struct in_addr addr; + guint32 mask; + gboolean negative; +}; + +struct cache_item { + /* Static item's data */ + struct saved_cache_item *s; + + /* For dynamic rules */ + struct dynamic_map_item *networks; + guint32 networks_number; + gboolean is_dynamic; + + /* Callback data */ + symbol_func_t func; + gpointer user_data; + + /* Flags of virtual symbols */ + gboolean is_virtual; + gboolean is_callback; + + /* Priority */ + gint priority; + gdouble metric_weight; +}; + + +struct symbols_cache { + /* Normal cache items */ + GList *static_items; + + /* Items that have negative weights */ + GList *negative_items; + + /* Radix map of dynamic rules with ip mappings */ + radix_tree_t *dynamic_map; + radix_tree_t *negative_dynamic_map; + + /* Common dynamic rules */ + GList *dynamic_items; + + /* Hash table for fast access */ + GHashTable *items_by_symbol; + + rspamd_mempool_t *static_pool; + + guint cur_items; + guint used_items; + guint uses; + gpointer map; + rspamd_mempool_rwlock_t *lock; + struct config_file *cfg; +}; + +/** + * Load symbols cache from file, must be called _after_ init_symbols_cache + */ +gboolean init_symbols_cache (rspamd_mempool_t *pool, struct symbols_cache *cache, struct config_file *cfg, + const gchar *filename, gboolean ignore_checksum); + +/** + * Register function for symbols parsing + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data); + + +/** + * Register virtual symbol + * @param name name of symbol + */ +void register_virtual_symbol (struct symbols_cache **cache, const gchar *name, double weight); + +/** + * Register callback function for symbols parsing + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_callback_symbol (struct symbols_cache **cache, const gchar *name, double weight, + symbol_func_t func, gpointer user_data); + +/** + * Register function for symbols parsing with strict priority + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_callback_symbol_priority (struct symbols_cache **cache, const gchar *name, double weight, + gint priority, symbol_func_t func, gpointer user_data); + +/** + * Register function for dynamic symbols parsing + * @param name name of symbol + * @param func pointer to handler + * @param user_data pointer to user_data + */ +void register_dynamic_symbol (rspamd_mempool_t *pool, struct symbols_cache **cache, const gchar *name, + double weight, symbol_func_t func, + gpointer user_data, GList *networks); + +/** + * Call function for cached symbol using saved callback + * @param task task object + * @param cache symbols cache + * @param saved_item pointer to currently saved item + */ +gboolean call_symbol_callback (struct rspamd_task *task, struct symbols_cache *cache, gpointer *save); + +/** + * Remove all dynamic rules from cache + * @param cache symbols cache + */ +void remove_dynamic_rules (struct symbols_cache *cache); + +/** + * Validate cache items agains theirs weights defined in metrics + * @param cache symbols cache + * @param cfg configuration + * @param strict do strict checks - symbols MUST be described in metrics + */ +gboolean validate_cache (struct symbols_cache *cache, struct config_file *cfg, gboolean strict); + + +#endif diff --git a/src/libserver/task.c b/src/libserver/task.c new file mode 100644 index 000000000..f389793dd --- /dev/null +++ b/src/libserver/task.c @@ -0,0 +1,159 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "task.h" +#include "main.h" +#include "filter.h" +#include "message.h" + +/* + * Destructor for recipients list in a task + */ +static void +rcpt_destruct (void *pointer) +{ + struct rspamd_task *task = (struct rspamd_task *) pointer; + + if (task->rcpt) { + g_list_free (task->rcpt); + } +} + +/* + * Create new task + */ +struct rspamd_task * +rspamd_task_new (struct rspamd_worker *worker) +{ + struct rspamd_task *new_task; + + new_task = g_slice_alloc0 (sizeof (struct rspamd_task)); + + new_task->worker = worker; + new_task->state = READ_MESSAGE; + if (worker) { + new_task->cfg = worker->srv->cfg; + } +#ifdef HAVE_CLOCK_GETTIME +# ifdef HAVE_CLOCK_PROCESS_CPUTIME_ID + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &new_task->ts); +# elif defined(HAVE_CLOCK_VIRTUAL) + clock_gettime (CLOCK_VIRTUAL, &new_task->ts); +# else + clock_gettime (CLOCK_REALTIME, &new_task->ts); +# endif +#endif + if (gettimeofday (&new_task->tv, NULL) == -1) { + msg_warn ("gettimeofday failed: %s", strerror (errno)); + } + + new_task->task_pool = rspamd_mempool_new (rspamd_mempool_suggest_size ()); + + /* Add destructor for recipients list (it would be better to use anonymous function here */ + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) rcpt_destruct, new_task); + new_task->results = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_hash_table_destroy, + new_task->results); + new_task->re_cache = g_hash_table_new (rspamd_str_hash, rspamd_str_equal); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_hash_table_destroy, + new_task->re_cache); + new_task->raw_headers = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_hash_table_destroy, + new_task->raw_headers); + new_task->emails = g_tree_new (compare_email_func); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_tree_destroy, + new_task->emails); + new_task->urls = g_tree_new (compare_url_func); + rspamd_mempool_add_destructor (new_task->task_pool, + (rspamd_mempool_destruct_t) g_tree_destroy, + new_task->urls); + new_task->sock = -1; + new_task->is_mime = TRUE; + new_task->pre_result.action = METRIC_ACTION_NOACTION; + + new_task->message_id = new_task->queue_id = "undef"; + + return new_task; +} + + +/* + * Free all structures of worker_task + */ +void +rspamd_task_free (struct rspamd_task *task, gboolean is_soft) +{ + GList *part; + struct mime_part *p; + + if (task) { + debug_task ("free pointer %p", task); + while ((part = g_list_first (task->parts))) { + task->parts = g_list_remove_link (task->parts, part); + p = (struct mime_part *) part->data; + g_byte_array_free (p->content, TRUE); + g_list_free_1 (part); + } + if (task->text_parts) { + g_list_free (task->text_parts); + } + if (task->images) { + g_list_free (task->images); + } + if (task->messages) { + g_list_free (task->messages); + } + if (task->received) { + g_list_free (task->received); + } + if (task->http_conn != NULL) { + rspamd_http_connection_unref (task->http_conn); + } + if (task->sock != -1) { + close (task->sock); + } + rspamd_mempool_delete (task->task_pool); + g_slice_free1 (sizeof (struct rspamd_task), task); + } +} + +void +rspamd_task_free_hard (gpointer ud) +{ + struct rspamd_task *task = ud; + + rspamd_task_free (task, FALSE); +} + +void +rspamd_task_free_soft (gpointer ud) +{ + struct rspamd_task *task = ud; + + rspamd_task_free (task, FALSE); +} diff --git a/src/libserver/task.h b/src/libserver/task.h new file mode 100644 index 000000000..f8f7c89e3 --- /dev/null +++ b/src/libserver/task.h @@ -0,0 +1,165 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef TASK_H_ +#define TASK_H_ + +#include "config.h" +#include "http.h" +#include "events.h" +#include "util.h" +#include "mem_pool.h" +#include "dns.h" + +enum rspamd_command { + CMD_CHECK, + CMD_SYMBOLS, + CMD_REPORT, + CMD_REPORT_IFSPAM, + CMD_SKIP, + CMD_PING, + CMD_PROCESS, + CMD_OTHER +}; + +enum rspamd_metric_action { + METRIC_ACTION_REJECT = 0, + METRIC_ACTION_SOFT_REJECT, + METRIC_ACTION_REWRITE_SUBJECT, + METRIC_ACTION_ADD_HEADER, + METRIC_ACTION_GREYLIST, + METRIC_ACTION_NOACTION, + METRIC_ACTION_MAX +}; + +typedef gint (*protocol_reply_func)(struct rspamd_task *task); + +struct custom_command { + const gchar *name; + protocol_reply_func func; +}; + +/** + * Worker task structure + */ +struct rspamd_task { + struct rspamd_worker *worker; /**< pointer to worker object */ + enum { + READ_MESSAGE, + WAIT_PRE_FILTER, + WAIT_FILTER, + WAIT_POST_FILTER, + WRITE_REPLY, + CLOSING_CONNECTION + } state; /**< current session state */ + enum rspamd_command cmd; /**< command */ + struct custom_command *custom_cmd; /**< custom command if any */ + gint sock; /**< socket descriptor */ + gboolean is_mime; /**< if this task is mime task */ + gboolean is_json; /**< output is JSON */ + gboolean allow_learn; /**< allow learning */ + gboolean is_skipped; /**< whether message was skipped by configuration */ + + gchar *helo; /**< helo header value */ + gchar *from; /**< from header value */ + gchar *queue_id; /**< queue id if specified */ + const gchar *message_id; /**< message id */ + GList *rcpt; /**< recipients list */ + guint nrcpt; /**< number of recipients */ + rspamd_inet_addr_t from_addr; /**< from addr for a task */ + rspamd_inet_addr_t client_addr; /**< address of connected socket */ + gchar *deliver_to; /**< address to deliver */ + gchar *user; /**< user to deliver */ + gchar *subject; /**< subject (for non-mime) */ + gchar *hostname; /**< hostname reported by MTA */ + GString *msg; /**< message buffer */ + struct rspamd_http_connection *http_conn; /**< HTTP server connection */ + struct rspamd_async_session* s; /**< async session object */ + gint parts_count; /**< mime parts count */ + GMimeMessage *message; /**< message, parsed with GMime */ + GMimeObject *parser_parent_part; /**< current parent part */ + InternetAddressList *rcpts; /**< list of all recipients */ + GList *parts; /**< list of parsed parts */ + GList *text_parts; /**< list of text parts */ + gchar *raw_headers_str; /**< list of raw headers */ + GList *received; /**< list of received headers */ + GTree *urls; /**< list of parsed urls */ + GTree *emails; /**< list of parsed emails */ + GList *images; /**< list of images */ + GHashTable *raw_headers; /**< list of raw headers */ + GHashTable *results; /**< hash table of metric_result indexed by + * metric's name */ + GHashTable *tokens; /**< hash table of tokens indexed by tokenizer + * pointer */ + GList *messages; /**< list of messages that would be reported */ + GHashTable *re_cache; /**< cache for matched or not matched regexps */ + struct config_file *cfg; /**< pointer to config object */ + gchar *last_error; /**< last error */ + gint error_code; /**< code of last error */ + rspamd_mempool_t *task_pool; /**< memory pool for task */ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; /**< time of connection */ +#endif + struct timeval tv; /**< time of connection */ + guint32 scan_milliseconds; /**< how much milliseconds passed */ + gboolean pass_all_filters; /**< pass task throught every rule */ + gboolean no_log; /**< do not log or write this task to the history */ + guint32 parser_recursion; /**< for avoiding recursion stack overflow */ + gboolean (*fin_callback)(void *arg); /**< calback for filters finalizing */ + void *fin_arg; /**< argument for fin callback */ + + guint32 dns_requests; /**< number of DNS requests per this task */ + + struct rspamd_dns_resolver *resolver; /**< DNS resolver */ + struct event_base *ev_base; /**< Event base */ + + GThreadPool *classify_pool; /**< A pool of classify threads */ + + struct { + enum rspamd_metric_action action; /**< Action of pre filters */ + gchar *str; /**< String describing action */ + } pre_result; /**< Result of pre-filters */ +}; + +/** + * Construct new task for worker + */ +struct rspamd_task* rspamd_task_new (struct rspamd_worker *worker); +/** + * Destroy task object and remove its IO dispatcher if it exists + */ +void rspamd_task_free (struct rspamd_task *task, gboolean is_soft); +void rspamd_task_free_hard (gpointer ud); +void rspamd_task_free_soft (gpointer ud); + +/** + * Called if session was restored inside fin callback + */ +void rspamd_task_restore (void *arg); + +/** + * Called if all filters are processed + * @return TRUE if session should be terminated + */ +gboolean rspamd_task_fin (void *arg); + +#endif /* TASK_H_ */ diff --git a/src/libserver/url.c b/src/libserver/url.c new file mode 100644 index 000000000..c4313e8a9 --- /dev/null +++ b/src/libserver/url.c @@ -0,0 +1,1620 @@ +/* + * Copyright (c) 2009-2012, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "url.h" +#include "util.h" +#include "fstring.h" +#include "main.h" +#include "message.h" +#include "trie.h" + +#define POST_CHAR 1 +#define POST_CHAR_S "\001" + +/* Tcp port range */ +#define LOWEST_PORT 0 +#define HIGHEST_PORT 65535 + +#define uri_port_is_valid(port) \ + (LOWEST_PORT <= (port) && (port) <= HIGHEST_PORT) + +struct _proto { + guchar *name; + gint port; + uintptr_t *unused; + guint need_slashes:1; + guint need_slash_after_host:1; + guint free_syntax:1; + guint need_ssl:1; +}; + +typedef struct url_match_s { + const gchar *m_begin; + gsize m_len; + const gchar *pattern; + const gchar *prefix; + gboolean add_prefix; +} url_match_t; + +#define URL_FLAG_NOHTML 0x1 +#define URL_FLAG_STRICT_MATCH 0x2 + +struct url_matcher { + const gchar *pattern; + const gchar *prefix; + gboolean (*start)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + gboolean (*end)(const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + gint flags; +}; + +static gboolean url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +static gboolean url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +static gboolean url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +static gboolean url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); +static gboolean url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match); + +struct url_matcher matchers[] = { + /* Common prefixes */ + { "file://", "", url_file_start, url_file_end, 0 }, + { "ftp://", "", url_web_start, url_web_end, 0 }, + { "sftp://", "", url_web_start, url_web_end, 0 }, + { "http://", "", url_web_start, url_web_end, 0 }, + { "https://", "", url_web_start, url_web_end, 0 }, + { "news://", "", url_web_start, url_web_end, 0 }, + { "nntp://", "", url_web_start, url_web_end, 0 }, + { "telnet://", "", url_web_start, url_web_end, 0 }, + { "webcal://", "", url_web_start, url_web_end, 0 }, + { "mailto://", "", url_email_start, url_email_end, 0 }, + { "callto://", "", url_web_start, url_web_end, 0 }, + { "h323:", "", url_web_start, url_web_end, 0 }, + { "sip:", "", url_web_start, url_web_end, 0 }, + { "www.", "http://", url_web_start, url_web_end, 0 }, + { "ftp.", "ftp://", url_web_start, url_web_end, URL_FLAG_NOHTML }, + /* TLD domains parts */ + { ".ac", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ad", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ae", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aero", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".af", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ag", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ai", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".al", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".am", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".an", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ao", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ar", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".arpa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".as", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".asia", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".at", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".au", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".aw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ax", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".az", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ba", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".be", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".biz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".br", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".by", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".bz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ca", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cat", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ch", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ci", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ck", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".co", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".com", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".coop", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".cz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".de", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".do", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".dz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ec", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".edu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ee", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".eg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".er", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".es", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".et", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".eu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".fr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ga", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ge", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gov", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".gy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ht", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".hu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".id", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ie", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".il", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".im", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".in", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".info", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".int", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".io", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".iq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ir", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".is", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".it", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".je", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jobs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".jp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ke", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ki", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".km", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ky", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".kz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".la", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".li", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ls", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".lv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ly", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ma", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".md", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".me", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mil", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ml", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mo", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mobi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mq", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ms", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".museum", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".my", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".mz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".na", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".name", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ne", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".net", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ng", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ni", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".no", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".np", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".nz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".om", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".org", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pe", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ph", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ps", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".pw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".py", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".qa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".re", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ro", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".rs", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ru", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".rw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sa", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sb", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sd", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".se", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sh", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".si", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".so", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".st", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".su", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".sz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".td", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".th", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tj", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tl", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".to", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tp", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tr", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".travel", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tv", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".tz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ua", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ug", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uk", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".us", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uy", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".uz", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".va", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vc", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ve", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vg", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vi", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vn", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".vu", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".wf", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ws", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".xxx", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".ye", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".yt", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".za", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".zm", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + { ".zw", "http://", url_tld_start, url_tld_end, URL_FLAG_NOHTML | URL_FLAG_STRICT_MATCH }, + /* Likely emails */ + { "@", "mailto://",url_email_start, url_email_end, URL_FLAG_NOHTML } +}; + +struct url_match_scanner { + struct url_matcher *matchers; + gsize matchers_count; + rspamd_trie_t *patterns; +}; + +struct url_match_scanner *url_scanner = NULL; + +static const struct _proto protocol_backends[] = { + {"file", 0, NULL, 1, 0, 0, 0}, + {"ftp", 21, NULL, 1, 0, 0, 0}, + {"http", 80, NULL, 1, 0, 0, 0}, + {"https", 443, NULL, 1, 0, 0, 1}, + {"mailto", 25, NULL, 1, 0, 0, 0}, + /* Keep these last! */ + {NULL, 0, NULL, 0, 0, 1, 0} +}; + +/* Convert an ASCII hex digit to the corresponding number between 0 + and 15. H should be a hexadecimal digit that satisfies isxdigit; + otherwise, the result is undefined. */ +#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : g_ascii_toupper (h) - 'A' + 10) +#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2)) +/* The reverse of the above: convert a number in the [0, 16) range to + the ASCII representation of the corresponding hexadecimal digit. + `+ 0' is there so you can't accidentally use it as an lvalue. */ +#define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0) +#define XNUM_TO_digit(x) ("0123456789abcdef"[x] + 0) + +static guchar url_scanner_table[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128, + 160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,192, + 128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +enum { + IS_CTRL = (1 << 0), + IS_ALPHA = (1 << 1), + IS_DIGIT = (1 << 2), + IS_LWSP = (1 << 3), + IS_SPACE = (1 << 4), + IS_SPECIAL = (1 << 5), + IS_DOMAIN = (1 << 6), + IS_URLSAFE = (1 << 7) +}; + +#define is_ctrl(x) ((url_scanner_table[(guchar)(x)] & IS_CTRL) != 0) +#define is_lwsp(x) ((url_scanner_table[(guchar)(x)] & IS_LWSP) != 0) +#define is_atom(x) ((url_scanner_table[(guchar)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0) +#define is_alpha(x) ((url_scanner_table[(guchar)(x)] & IS_ALPHA) != 0) +#define is_digit(x) ((url_scanner_table[(guchar)(x)] & IS_DIGIT) != 0) +#define is_domain(x) ((url_scanner_table[(guchar)(x)] & IS_DOMAIN) != 0) +#define is_urlsafe(x) ((url_scanner_table[(guchar)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0) + + +const gchar * +url_strerror (enum uri_errno err) +{ + switch (err) { + case URI_ERRNO_OK: + return "Parsing went well"; + case URI_ERRNO_EMPTY: + return "The URI string was empty"; + case URI_ERRNO_INVALID_PROTOCOL: + return "No protocol was found"; + case URI_ERRNO_NO_SLASHES: + return "Slashes after protocol missing"; + case URI_ERRNO_TOO_MANY_SLASHES: + return "Too many slashes after protocol"; + case URI_ERRNO_TRAILING_DOTS: + return "'.' after host"; + case URI_ERRNO_NO_HOST: + return "Host part is missing"; + case URI_ERRNO_NO_PORT_COLON: + return "':' after host without port"; + case URI_ERRNO_NO_HOST_SLASH: + return "Slash after host missing"; + case URI_ERRNO_IPV6_SECURITY: + return "IPv6 security bug detected"; + case URI_ERRNO_INVALID_PORT: + return "Port number is bad"; + case URI_ERRNO_INVALID_PORT_RANGE: + return "Port number is not within 0-65535"; + } + return NULL; +} + +static gint +check_uri_file (gchar *name) +{ + static const gchar chars[] = POST_CHAR_S "#?"; + + return strcspn (name, chars); +} + +static gint +url_init (void) +{ + guint i; + gchar patbuf[128]; + + if (url_scanner == NULL) { + url_scanner = g_malloc (sizeof (struct url_match_scanner)); + url_scanner->matchers = matchers; + url_scanner->matchers_count = G_N_ELEMENTS (matchers); + url_scanner->patterns = rspamd_trie_create (TRUE); + for (i = 0; i < url_scanner->matchers_count; i ++) { + if (matchers[i].flags & URL_FLAG_STRICT_MATCH) { + /* Insert more specific patterns */ + + /* some.tld/ */ + rspamd_snprintf (patbuf, sizeof (patbuf), "%s/", matchers[i].pattern); + rspamd_trie_insert (url_scanner->patterns, patbuf, i); + /* some.tld */ + rspamd_snprintf (patbuf, sizeof (patbuf), "%s ", matchers[i].pattern); + rspamd_trie_insert (url_scanner->patterns, patbuf, i); + /* some.tld: */ + rspamd_snprintf (patbuf, sizeof (patbuf), "%s:", matchers[i].pattern); + rspamd_trie_insert (url_scanner->patterns, patbuf, i); + } + else { + rspamd_trie_insert (url_scanner->patterns, matchers[i].pattern, i); + } + } + } + + return 0; +} + +enum protocol +get_protocol (gchar *name, gint namelen) +{ + /* These are really enum protocol values but can take on negative + * values and since 0 <= -1 for enum values it's better to use clean + * integer type. */ + gint start, end; + enum protocol protocol; + guchar *pname; + gint pnamelen, minlen, compare; + + /* Almost dichotomic search is used here */ + /* Starting at the HTTP entry which is the most common that will make + * file and NNTP the next entries checked and amongst the third checks + * are proxy and FTP. */ + start = 0; + end = PROTOCOL_UNKNOWN - 1; + protocol = PROTOCOL_HTTP; + + while (start <= end) { + pname = protocol_backends[protocol].name; + pnamelen = strlen (pname); + minlen = MIN (pnamelen, namelen); + compare = g_ascii_strncasecmp (pname, name, minlen); + + if (compare == 0) { + if (pnamelen == namelen) + return protocol; + + /* If the current protocol name is longer than the + * protocol name being searched for move @end else move + * @start. */ + compare = pnamelen > namelen ? 1 : -1; + } + + if (compare > 0) + end = protocol - 1; + else + start = protocol + 1; + + protocol = (start + end) / 2; + } + + return PROTOCOL_UNKNOWN; +} + + +gint +get_protocol_port (enum protocol protocol) +{ + return protocol_backends[protocol].port; +} + +gint +get_protocol_need_slashes (enum protocol protocol) +{ + return protocol_backends[protocol].need_slashes; +} + +gint +get_protocol_need_slash_after_host (enum protocol protocol) +{ + return protocol_backends[protocol].need_slash_after_host; +} + +gint +get_protocol_free_syntax (enum protocol protocol) +{ + return protocol_backends[protocol].free_syntax; +} + +static gint +get_protocol_length (const gchar *url) +{ + gchar *end = (gchar *)url; + + /* Seek the end of the protocol name if any. */ + /* RFC1738: + * scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] + * (but per its recommendations we accept "upalpha" too) */ + while (g_ascii_isalnum (*end) || *end == '+' || *end == '-' || *end == '.') + end++; + + /* Also return 0 if there's no protocol name (@end == @url). */ + return (*end == ':') ? end - url : 0; +} + + +/* + * Calcualte new length of unescaped hostlen + */ +static guint +url_calculate_escaped_hostlen (gchar *host, guint hostlen) +{ + guint i, result = hostlen; + gchar *p = host, c; + + for (i = 0; i < hostlen; i++, p++) { + if (*p == '%' && g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2)) && i < hostlen - 2) { + c = X2DIGITS_TO_NUM (*(p + 1), *(p + 2)); + if (c != '\0') { + result -= 2; + } + } + } + + return result; +} + +/* URL-unescape the string S. + + This is done by transforming the sequences "%HH" to the character + represented by the hexadecimal digits HH. If % is not followed by + two hexadecimal digits, it is inserted literally. + + The transformation is done in place. If you need the original + string intact, make a copy before calling this function. */ + +static void +url_unescape (gchar *s) +{ + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ + + for (; *h; h++, t++) { + if (*h != '%') { + copychar: + *t = *h; + } + else { + gchar c; + /* Do nothing if '%' is not followed by two hex digits. */ + if (!h[1] || !h[2] || !(g_ascii_isxdigit (h[1]) && g_ascii_isxdigit (h[2]))) + goto copychar; + c = X2DIGITS_TO_NUM (h[1], h[2]); + /* Don't unescape %00 because there is no way to insert it + * into a C string without effectively truncating it. */ + if (c == '\0') + goto copychar; + *t = c; + h += 2; + } + } + *t = '\0'; +} + +static void +url_strip (gchar *s) +{ + gchar *t = s; /* t - tortoise */ + gchar *h = s; /* h - hare */ + + while (*h) { + if (g_ascii_isgraph (*h)) { + *t = *h; + t++; + } + h++; + } + *t = '\0'; +} + +static gchar * +url_escape_1 (const gchar *s, gint allow_passthrough, rspamd_mempool_t * pool) +{ + const gchar *p1; + gchar *p2, *newstr; + gint newlen; + gint addition = 0; + + for (p1 = s; *p1; p1++) + if (!is_urlsafe (*p1)) { + addition += 2; /* Two more characters (hex digits) */ + } + + if (!addition) { + if (allow_passthrough) { + return (gchar *)s; + } + else { + return rspamd_mempool_strdup (pool, s); + } + } + + newlen = (p1 - s) + addition; + newstr = (gchar *)rspamd_mempool_alloc (pool, newlen + 1); + + p1 = s; + p2 = newstr; + while (*p1) { + /* Quote the characters that match the test mask. */ + if (!is_urlsafe (*p1)) { + guchar c = *p1++; + *p2++ = '%'; + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); + } + else + *p2++ = *p1++; + } + *p2 = '\0'; + + return newstr; +} + +/* URL-escape the unsafe characters (see urlchr_table) in a given + string, returning a freshly allocated string. */ + +gchar * +url_escape (const gchar *s, rspamd_mempool_t * pool) +{ + return url_escape_1 (s, 0, pool); +} + +/* Decide whether the gchar at position P needs to be encoded. (It is + not enough to pass a single gchar *P because the function may need + to inspect the surrounding context.) + + Return 1 if the gchar should be escaped as %XX, 0 otherwise. */ + +static inline gboolean +char_needs_escaping (const gchar *p) +{ + if (*p == '%') { + if (g_ascii_isxdigit (*(p + 1)) && g_ascii_isxdigit (*(p + 2))) { + return FALSE; + } + else { + return TRUE; + } + } + else if (! is_urlsafe (*p)) { + return TRUE; + } + return FALSE; +} + +/* Translate a %-escaped (but possibly non-conformant) input string S + into a %-escaped (and conformant) output string. +*/ + +static gchar * +reencode_escapes (gchar *s, rspamd_mempool_t * pool) +{ + const gchar *p1; + gchar *newstr, *p2; + gint oldlen, newlen; + + gint encode_count = 0; + + /* First pass: inspect the string to see if there's anything to do, + and to calculate the new length. */ + for (p1 = s; *p1; p1++) + if (char_needs_escaping (p1)) + ++encode_count; + + if (!encode_count) { + /* The string is good as it is. */ + return s; + } + + oldlen = p1 - s; + /* Each encoding adds two characters (hex digits). */ + newlen = oldlen + 2 * encode_count; + newstr = rspamd_mempool_alloc (pool, newlen + 1); + + /* Second pass: copy the string to the destination address, encoding + chars when needed. */ + p1 = s; + p2 = newstr; + + while (*p1) + if (char_needs_escaping (p1)) { + guchar c = *p1++; + *p2++ = '%'; + *p2++ = XNUM_TO_DIGIT (c >> 4); + *p2++ = XNUM_TO_DIGIT (c & 0xf); + } + else { + *p2++ = *p1++; + } + + *p2 = '\0'; + return newstr; +} + +/* Unescape CHR in an otherwise escaped STR. Used to selectively + escaping of certain characters, such as "/" and ":". Returns a + count of unescaped chars. */ + +static void +unescape_single_char (gchar *str, gchar chr) +{ + const gchar c1 = XNUM_TO_DIGIT (chr >> 4); + const gchar c2 = XNUM_TO_DIGIT (chr & 0xf); + gchar *h = str; /* hare */ + gchar *t = str; /* tortoise */ + + for (; *h; h++, t++) { + if (h[0] == '%' && h[1] == c1 && h[2] == c2) { + *t = chr; + h += 2; + } + else { + *t = *h; + } + } + *t = '\0'; +} + + +/* + * Resolve "." and ".." elements of PATH by destructively modifying + * PATH and return non-zero if PATH has been modified, zero otherwise. + */ + +static gboolean +path_simplify (gchar *path) +{ + gchar *h = path; /* hare */ + gchar *t = path; /* tortoise */ + gchar *beg = path; /* boundary for backing the tortoise */ + gchar *end = path + strlen (path); + + while (h < end) { + /* Hare should be at the beginning of a path element. */ + if (h[0] == '.' && (h[1] == '/' || h[1] == '\0')) { + /* Ignore "./". */ + h += 2; + } + else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0')) { + /* Handle "../" by retreating the tortoise by one path + element -- but not past beginning. */ + if (t > beg) { + /* Move backwards until T hits the beginning of the + previous path element or the beginning of path. */ + for (--t; t > beg && t[-1] != '/'; t--); + } + else { + /* If we're at the beginning, copy the "../" literally + move the beginning so a later ".." doesn't remove + it. */ + beg = t + 3; + goto regular; + } + h += 3; + } + else { + regular: + /* A regular path element. If H hasn't advanced past T, + simply skip to the next path element. Otherwise, copy + the path element until the next slash. */ + if (t == h) { + /* Skip the path element, including the slash. */ + while (h < end && *h != '/') + t++, h++; + if (h < end) + t++, h++; + } + else { + /* Copy the path element, including the final slash. */ + while (h < end && *h != '/') + *t++ = *h++; + if (h < end) + *t++ = *h++; + } + } + } + + if (t != h) + *t = '\0'; + + return t != h; +} + +enum uri_errno +parse_uri (struct uri *uri, gchar *uristring, rspamd_mempool_t * pool) +{ + guchar *prefix_end, *host_end, *p; + guchar *lbracket, *rbracket; + gint datalen, n, addrlen; + guchar *frag_or_post, *user_end, *port_end; + + memset (uri, 0, sizeof (*uri)); + + /* Nothing to do for an empty url. */ + if (!*uristring) + return URI_ERRNO_EMPTY; + + uri->string = reencode_escapes (uristring, pool); + msg_debug ("reencoding escapes in original url: '%s'", struri (uri)); + uri->protocollen = get_protocol_length (struri (uri)); + + /* Assume http as default protocol */ + if (!uri->protocollen || (uri->protocol = get_protocol (struri (uri), uri->protocollen)) == PROTOCOL_UNKNOWN) { + /* Make exception for numeric urls */ + p = uri->string; + while (*p && (g_ascii_isalnum (*p) || *p == ':')) { + p ++; + } + if (*p == '\0') { + return URI_ERRNO_INVALID_PROTOCOL; + } + p = g_strconcat ("http://", uri->string, NULL); + uri->string = rspamd_mempool_strdup (pool, p); + g_free (p); + uri->protocol = PROTOCOL_HTTP; + prefix_end = struri (uri) + 7; + } + else { + /* Figure out whether the protocol is known */ + msg_debug ("getting protocol from url: %d", uri->protocol); + + prefix_end = struri (uri) + uri->protocollen; /* ':' */ + + /* Check if there's a digit after the protocol name. */ + if (g_ascii_isdigit (*prefix_end)) { + p = struri (uri); + uri->ip_family = p[uri->protocollen] - '0'; + prefix_end++; + } + if (*prefix_end != ':') { + msg_debug ("invalid protocol in uri"); + return URI_ERRNO_INVALID_PROTOCOL; + } + prefix_end++; + + /* Skip slashes */ + + if (prefix_end[0] == '/' && prefix_end[1] == '/') { + if (prefix_end[2] == '/') { + msg_debug ("too many '/' in uri"); + return URI_ERRNO_TOO_MANY_SLASHES; + } + + prefix_end += 2; + + } + else { + msg_debug ("no '/' in uri"); + return URI_ERRNO_NO_SLASHES; + } + } + + if (get_protocol_free_syntax (uri->protocol)) { + uri->data = prefix_end; + uri->datalen = strlen (prefix_end); + return URI_ERRNO_OK; + + } + else if (uri->protocol == PROTOCOL_FILE) { + datalen = check_uri_file (prefix_end); + frag_or_post = prefix_end + datalen; + + /* Extract the fragment part. */ + if (datalen >= 0) { + if (*frag_or_post == '#') { + uri->fragment = frag_or_post + 1; + uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S); + frag_or_post = uri->fragment + uri->fragmentlen; + } + if (*frag_or_post == POST_CHAR) { + uri->post = frag_or_post + 1; + } + } + else { + datalen = strlen (prefix_end); + } + + uri->data = prefix_end; + uri->datalen = datalen; + + return URI_ERRNO_OK; + } + + /* Isolate host */ + + /* Get brackets enclosing IPv6 address */ + lbracket = strchr (prefix_end, '['); + if (lbracket) { + rbracket = strchr (lbracket, ']'); + /* [address] is handled only inside of hostname part (surprisingly). */ + if (rbracket && rbracket < prefix_end + strcspn (prefix_end, "/")) + uri->ipv6 = 1; + else + lbracket = rbracket = NULL; + } + else { + rbracket = NULL; + } + + /* Possibly skip auth part */ + host_end = prefix_end + strcspn (prefix_end, "@"); + + if (prefix_end + strcspn (prefix_end, "/?") > host_end && *host_end) { /* we have auth info here */ + + /* Allow '@' in the password component */ + while (strcspn (host_end + 1, "@") < strcspn (host_end + 1, "/?")) + host_end = host_end + 1 + strcspn (host_end + 1, "@"); + + user_end = strchr (prefix_end, ':'); + + if (!user_end || user_end > host_end) { + uri->user = prefix_end; + uri->userlen = host_end - prefix_end; + } + else { + uri->user = prefix_end; + uri->userlen = user_end - prefix_end; + uri->password = user_end + 1; + uri->passwordlen = host_end - user_end - 1; + } + prefix_end = host_end + 1; + } + + if (uri->ipv6 && rbracket != NULL) { + host_end = rbracket + strcspn (rbracket, ":/?"); + } + else { + host_end = prefix_end + strcspn (prefix_end, ":/?"); + } + + if (uri->ipv6) { + addrlen = rbracket - lbracket - 1; + + + uri->host = lbracket + 1; + uri->hostlen = addrlen; + } + else { + uri->host = prefix_end; + uri->hostlen = host_end - prefix_end; + + /* Trim trailing '.'s */ + if (uri->hostlen && uri->host[uri->hostlen - 1] == '.') + return URI_ERRNO_TRAILING_DOTS; + } + + if (*host_end == ':') { /* we have port here */ + port_end = host_end + 1 + strcspn (host_end + 1, "/"); + + host_end++; + + uri->port = host_end; + uri->portlen = port_end - host_end; + + if (uri->portlen == 0) + return URI_ERRNO_NO_PORT_COLON; + + /* We only use 8 bits for portlen so better check */ + if ((gint)uri->portlen != port_end - host_end) + return URI_ERRNO_INVALID_PORT; + + /* test if port is number */ + for (; host_end < port_end; host_end++) + if (!g_ascii_isdigit (*host_end)) + return URI_ERRNO_INVALID_PORT; + + /* Check valid port value, and let show an error message + * about invalid url syntax. */ + if (uri->port && uri->portlen) { + + errno = 0; + n = strtol (uri->port, NULL, 10); + if (errno || !uri_port_is_valid (n)) + return URI_ERRNO_INVALID_PORT; + } + } + + if (*host_end == '/') { + host_end++; + + } + else if (get_protocol_need_slash_after_host (uri->protocol) && *host_end != '?') { + /* The need for slash after the host component depends on the + * need for a host component. -- The dangerous mind of Jonah */ + if (!uri->hostlen) + return URI_ERRNO_NO_HOST; + + return URI_ERRNO_NO_HOST_SLASH; + } + + /* Look for #fragment or POST_CHAR */ + prefix_end = host_end + strcspn (host_end, "#" POST_CHAR_S); + uri->data = host_end; + uri->datalen = prefix_end - host_end; + + if (*prefix_end == '#') { + uri->fragment = prefix_end + 1; + uri->fragmentlen = strcspn (uri->fragment, POST_CHAR_S); + prefix_end = uri->fragment + uri->fragmentlen; + } + + if (*prefix_end == POST_CHAR) { + uri->post = prefix_end + 1; + } + + convert_to_lowercase (uri->string, uri->protocollen); + convert_to_lowercase (uri->host, uri->hostlen); + /* Decode %HH sequences in host name. This is important not so much + to support %HH sequences in host names (which other browser + don't), but to support binary characters (which will have been + converted to %HH by reencode_escapes). */ + if (strchr (uri->host, '%')) { + uri->hostlen = url_calculate_escaped_hostlen (uri->host, uri->hostlen); + } + + url_strip (struri (uri)); + url_unescape (uri->host); + + path_simplify (uri->data); + + return URI_ERRNO_OK; +} + +static const gchar url_braces[] = { + '(', ')' , + '{', '}' , + '[', ']' , + '<', '>' , + '|', '|' , + '\'', '\'' +}; + +static gboolean +is_open_brace (gchar c) +{ + if (c == '(' || + c == '{' || + c == '[' || + c == '<' || + c == '|' || + c == '\'') { + return TRUE; + } + + return FALSE; +} + +static gboolean +url_file_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + match->m_begin = pos; + return TRUE; +} +static gboolean +url_file_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + gchar stop; + guint i; + + p = pos + strlen (match->pattern); + stop = *p; + if (*p == '/') { + p ++; + } + + for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { + if (*p == url_braces[i]) { + stop = url_braces[i + 1]; + break; + } + } + + while (p < end && *p != stop && is_urlsafe (*p)) { + p ++; + } + + if (p == begin) { + return FALSE; + } + match->m_len = p - match->m_begin; + + return TRUE; + +} + +static gboolean +url_tld_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p = pos; + + /* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */ + while (p >= begin) { + if ((!is_domain (*p) && *p != '.' && *p != '/') || g_ascii_isspace (*p)) { + p ++; + if (!g_ascii_isalnum (*p)) { + /* Urls cannot start with strange symbols */ + return FALSE; + } + match->m_begin = p; + return TRUE; + } + else if (p == begin && p != pos) { + match->m_begin = p; + return TRUE; + } + else if (*p == '.') { + if (p == begin) { + /* Urls cannot start with a dot */ + return FALSE; + } + if (!g_ascii_isalnum (p[1])) { + /* Wrong we have an invalid character after dot */ + return FALSE; + } + } + else if (*p == '/') { + /* Urls cannot contain '/' in their body */ + return FALSE; + } + p --; + } + + return FALSE; +} + +static gboolean +url_tld_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + + /* A url must be finished by tld, so it must be followed by space character */ + p = pos + strlen (match->pattern); + if (p == end || g_ascii_isspace (*p) || *p == ',') { + match->m_len = p - match->m_begin; + return TRUE; + } + else if (*p == '/' || *p == ':') { + /* Parse arguments, ports by normal way by url default function */ + p = match->m_begin; + /* Check common prefix */ + if (g_ascii_strncasecmp (p, "http://", sizeof ("http://") - 1) == 0) { + return url_web_end (begin, end, match->m_begin + sizeof ("http://") - 1, match); + } + else { + return url_web_end (begin, end, match->m_begin, match); + } + + } + return FALSE; +} + +static gboolean +url_web_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + /* Check what we have found */ + if (pos > begin && (g_ascii_strncasecmp (pos, "www", 3) == 0 || g_ascii_strncasecmp (pos, "ftp", 3) == 0)) { + if (!is_open_brace (*(pos - 1)) && !g_ascii_isspace (*(pos - 1))) { + return FALSE; + } + } + if (*pos == '.') { + /* Urls cannot start with . */ + return FALSE; + } + match->m_begin = pos; + + return TRUE; +} + +static gboolean +url_web_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p, *c; + gchar open_brace = '\0', close_brace = '\0'; + gint brace_stack = 0; + gboolean passwd = FALSE; + guint port, i; + + p = pos + strlen (match->pattern); + for (i = 0; i < G_N_ELEMENTS (url_braces) / 2; i += 2) { + if (*p == url_braces[i]) { + close_brace = url_braces[i + 1]; + open_brace = *p; + break; + } + } + + /* find the end of the domain */ + if (is_atom (*p)) { + /* might be a domain or user@domain */ + c = p; + while (p < end) { + if (!is_atom (*p)) { + break; + } + + p++; + + while (p < end && is_atom (*p)) { + p++; + } + + if ((p + 1) < end && *p == '.' && (is_atom (*(p + 1)) || *(p + 1) == '/')) { + p++; + } + } + + if (*p != '@') { + p = c; + } + else { + p++; + } + + goto domain; + } + else if (is_domain (*p) || (*p & 0x80)) { +domain: + while (p < end) { + if (!is_domain (*p) && !(*p & 0x80)) { + break; + } + + p++; + + while (p < end && (is_domain (*p) || (*p & 0x80))) { + p++; + } + + if ((p + 1) < end && *p == '.' && (is_domain (*(p + 1)) || *(p + 1) == '/' || (*(p + 1) & 0x80))) { + p++; + } + } + } + else { + return FALSE; + } + + if (p < end) { + switch (*p) { + case ':': /* we either have a port or a password */ + p++; + + if (is_digit (*p) || passwd) { + port = (*p++ - '0'); + + while (p < end && is_digit (*p) && port < 65536) { + port = (port * 10) + (*p++ - '0'); + } + + if (!passwd && (port >= 65536 || *p == '@')) { + if (p < end && *p == '@') { + /* this must be a password? */ + goto passwd; + } + else if (p < end) { + return FALSE; + } + + p--; + } + } + else { + passwd: + passwd = TRUE; + c = p; + + while (p < end && is_atom (*p)) { + p++; + } + + if ((p + 2) < end) { + if (*p == '@') { + p++; + if (is_domain (*p)) { + goto domain; + } + } + + return FALSE; + } + } + + if (p >= end || *p != '/') { + break; + } + + /* we have a '/' so there could be a path - fall through */ + case '/': /* we've detected a path component to our url */ + p++; + case '?': + while (p < end && is_urlsafe (*p)) { + if (*p == open_brace) { + brace_stack++; + } + else if (*p == close_brace) { + brace_stack--; + if (brace_stack == -1) { + break; + } + } + p++; + } + + break; + default: + break; + } + } + + /* urls are extremely unlikely to end with any + * punctuation, so strip any trailing + * punctuation off. Also strip off any closing + * double-quotes. */ + while (p > pos && strchr (",.:;?!-|}])\"", p[-1])) { + p--; + } + + match->m_len = (p - pos); + + return TRUE; +} + + +static gboolean +url_email_start (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + /* Check what we have found */ + if (pos > begin && *pos == '@') { + /* Try to extract it with username */ + p = pos - 1; + while (p > begin && (is_domain (*p) || *p == '.' || *p == '_')) { + p --; + } + if (!is_domain (*p) && p != pos - 1) { + match->m_begin = p + 1; + return TRUE; + } + else if (p == begin) { + match->m_begin = p; + return TRUE; + } + } + else { + p = pos + strlen (match->pattern); + if (is_domain (*p)) { + match->m_begin = pos; + return TRUE; + } + } + return FALSE; +} + +static gboolean +url_email_end (const gchar *begin, const gchar *end, const gchar *pos, url_match_t *match) +{ + const gchar *p; + gboolean got_at = FALSE; + + p = pos + strlen (match->pattern); + if (*pos == '@') { + got_at = TRUE; + } + + while (p < end && (is_domain (*p) || *p == '_' + || (*p == '@' && !got_at) || + (*p == '.' && p + 1 < end && is_domain (*(p + 1))))) { + if (*p == '@') { + got_at = TRUE; + } + p ++; + } + match->m_len = p - match->m_begin; + match->add_prefix = TRUE; + return got_at; +} + +void +url_parse_text (rspamd_mempool_t * pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html) +{ + gint rc; + gchar *url_str = NULL, *url_start, *url_end; + struct uri *new; + struct process_exception *ex; + gchar *p, *end, *begin; + + + if (!part->orig->data || part->orig->len == 0) { + msg_warn ("got empty text part"); + return; + } + + if (url_init () == 0) { + if (is_html) { + begin = part->orig->data; + end = begin + part->orig->len; + p = begin; + } + else { + begin = part->content->data; + end = begin + part->content->len; + p = begin; + } + while (p < end) { + if (url_try_text (pool, p, end - p, &url_start, &url_end, &url_str, is_html)) { + if (url_str != NULL) { + new = rspamd_mempool_alloc0 (pool, sizeof (struct uri)); + ex = rspamd_mempool_alloc0 (pool, sizeof (struct process_exception)); + if (new != NULL) { + g_strstrip (url_str); + rc = parse_uri (new, url_str, pool); + if ((rc == URI_ERRNO_OK || rc == URI_ERRNO_NO_SLASHES || rc == URI_ERRNO_NO_HOST_SLASH) && + new->hostlen > 0) { + ex->pos = url_start - begin; + ex->len = url_end - url_start; + if (new->protocol == PROTOCOL_MAILTO) { + if (new->userlen > 0) { + if (!g_tree_lookup (task->emails, new)) { + g_tree_insert (task->emails, new, new); + } + } + } + else { + if (!g_tree_lookup (task->urls, new)) { + g_tree_insert (task->urls, new, new); + } + } + part->urls_offset = g_list_prepend (part->urls_offset, ex); + } + else if (rc != URI_ERRNO_OK) { + msg_info ("extract of url '%s' failed: %s", url_str, url_strerror (rc)); + } + } + } + } + else { + break; + } + p = url_end + 1; + } + } + /* Handle offsets of this part */ + if (part->urls_offset != NULL) { + part->urls_offset = g_list_reverse (part->urls_offset); + rspamd_mempool_add_destructor (task->task_pool, (rspamd_mempool_destruct_t)g_list_free, part->urls_offset); + } +} + +gboolean +url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **fin, gchar **url_str, gboolean is_html) +{ + const gchar *end, *pos; + gint idx, l; + struct url_matcher *matcher; + url_match_t m; + + end = begin + len; + if (url_init () == 0) { + if ((pos = rspamd_trie_lookup (url_scanner->patterns, begin, len, &idx)) == NULL) { + return FALSE; + } + else { + matcher = &matchers[idx]; + if ((matcher->flags & URL_FLAG_NOHTML) && is_html) { + /* Do not try to match non-html like urls in html texts */ + return FALSE; + } + m.pattern = matcher->pattern; + m.prefix = matcher->prefix; + m.add_prefix = FALSE; + if (matcher->start (begin, end, pos, &m) && matcher->end (begin, end, pos, &m)) { + if (m.add_prefix) { + l = m.m_len + 1 + strlen (m.prefix); + *url_str = rspamd_mempool_alloc (pool, l); + rspamd_snprintf (*url_str, l, "%s%*s", m.prefix, m.m_len, m.m_begin); + } + else { + *url_str = rspamd_mempool_alloc (pool, m.m_len + 1); + memcpy (*url_str, m.m_begin, m.m_len); + (*url_str)[m.m_len] = '\0'; + } + if (start != NULL) { + *start = (gchar *)m.m_begin; + } + if (fin != NULL) { + *fin = (gchar *)m.m_begin + m.m_len; + } + } + else { + *url_str = NULL; + if (start != NULL) { + *start = (gchar *)pos; + } + if (fin != NULL) { + *fin = (gchar *)pos + strlen (m.prefix); + } + } + + return TRUE; + } + } + + return FALSE; +} + +/* + * vi: ts=4 + */ diff --git a/src/libserver/url.h b/src/libserver/url.h new file mode 100644 index 000000000..60535ba5c --- /dev/null +++ b/src/libserver/url.h @@ -0,0 +1,111 @@ +/* URL check functions */ +#ifndef URL_H +#define URL_H + +#include "config.h" +#include "mem_pool.h" + +struct rspamd_task; +struct mime_text_part; + +struct uri { + /* The start of the uri (and thus start of the protocol string). */ + gchar *string; + + /* The internal type of protocol. Can _never_ be PROTOCOL_UNKNOWN. */ + gint protocol; /* enum protocol */ + + gint ip_family; + + gchar *user; + gchar *password; + gchar *host; + gchar *port; + /* @data can contain both the path and query uri fields. + * It can never be NULL but can have zero length. */ + gchar *data; + gchar *fragment; + /* @post can contain some special encoded form data, used internally + * to make form data handling more efficient. The data is marked by + * POST_CHAR in the uri string. */ + gchar *post; + + struct uri *phished_url; + + /* @protocollen should only be usable if @protocol is either + * PROTOCOL_USER or an uri string should be composed. */ + guint protocollen; + guint userlen; + guint passwordlen; + guint hostlen; + guint portlen; + guint datalen; + guint fragmentlen; + + /* Flags */ + gboolean ipv6; /* URI contains IPv6 host */ + gboolean form; /* URI originated from form */ + gboolean is_phished; /* URI maybe phishing */ +}; + +enum uri_errno { + URI_ERRNO_OK, /* Parsing went well */ + URI_ERRNO_EMPTY, /* The URI string was empty */ + URI_ERRNO_INVALID_PROTOCOL, /* No protocol was found */ + URI_ERRNO_NO_SLASHES, /* Slashes after protocol missing */ + URI_ERRNO_TOO_MANY_SLASHES, /* Too many slashes after protocol */ + URI_ERRNO_TRAILING_DOTS, /* '.' after host */ + URI_ERRNO_NO_HOST, /* Host part is missing */ + URI_ERRNO_NO_PORT_COLON, /* ':' after host without port */ + URI_ERRNO_NO_HOST_SLASH, /* Slash after host missing */ + URI_ERRNO_IPV6_SECURITY, /* IPv6 security bug detected */ + URI_ERRNO_INVALID_PORT, /* Port number is bad */ + URI_ERRNO_INVALID_PORT_RANGE /* Port number is not within 0-65535 */ +}; + +enum protocol { + PROTOCOL_FILE, + PROTOCOL_FTP, + PROTOCOL_HTTP, + PROTOCOL_HTTPS, + PROTOCOL_MAILTO, + PROTOCOL_UNKNOWN +}; + +#define struri(uri) ((uri)->string) + +/* + * Parse urls inside text + * @param pool memory pool + * @param task task object + * @param part current text part + * @param is_html turn on html euristic + */ +void url_parse_text (rspamd_mempool_t *pool, struct rspamd_task *task, struct mime_text_part *part, gboolean is_html); + +/* + * Parse a single url into an uri structure + * @param pool memory pool + * @param uristring text form of url + * @param uri url object, must be pre allocated + */ +enum uri_errno parse_uri(struct uri *uri, gchar *uristring, rspamd_mempool_t *pool); + +/* + * Try to extract url from a text + * @param pool memory pool + * @param begin begin of text + * @param len length of text + * @param start storage for start position of url found (or NULL) + * @param end storage for end position of url found (or NULL) + * @param url_str storage for url string(or NULL) + * @return TRUE if url is found in specified text + */ +gboolean url_try_text (rspamd_mempool_t *pool, const gchar *begin, gsize len, gchar **start, gchar **end, gchar **url_str, gboolean is_html); + +/* + * Return text representation of url parsing error + */ +const gchar* url_strerror (enum uri_errno err); + +#endif |