From: Vsevolod Stakhov Date: Fri, 19 Dec 2014 14:35:31 +0000 (+0000) Subject: Start total rework of fuzzy_storage. X-Git-Tag: 0.8.0~68 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b26afb57c80333a9b5a68a31d613bed32e2c6796;p=rspamd.git Start total rework of fuzzy_storage. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index d599973a8..154e503f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -404,11 +404,6 @@ ELSE(NOT LUA_FOUND) INCLUDE_DIRECTORIES("${LUA_INCLUDE_DIR}") ENDIF(NOT LUA_FOUND) -IF(ENABLE_SQLITE MATCHES "ON") - # Find optional sqlite3 support - ProcessPackage(SQLITE sqlite3>=3.6.0) -ENDIF(ENABLE_SQLITE MATCHES "ON") - #Check for openssl (required for dkim) IF(OPENSSL_FOUND) SET(HAVE_OPENSSL 1) @@ -566,30 +561,7 @@ IF(ENABLE_URL_INCLUDE MATCHES "ON") ENDIF(LIBFETCH_LIBRARY) ENDIF(ENABLE_URL_INCLUDE MATCHES "ON") -# Find liblmdb -FIND_LIBRARY(LIBLMDB_LIBRARY NAMES lmdb PATHS PATH_SUFFIXES lib64 lib - PATHS - ~/Library/Frameworks - /Library/Frameworks - /usr/local - /usr - /sw - /opt/local - /opt/csw - /opt - DOC "Path where the liblmdb library can be found") -IF(LIBLMDB_LIBRARY) - FIND_PATH(LIBLMDB_INCLUDE lmdb.h PATHS /opt/include - /usr/include - /usr/local/include - DOC "Path where the lmdb header files can be found") - - GET_FILENAME_COMPONENT(LIBLMDB_PATH "${LIBLMDB_LIBRARY}" PATH) - INCLUDE_DIRECTORIES("${LIBLMDB_INCLUDE}") - LINK_DIRECTORIES("${LIBLMDB_PATH}") -ELSE(LIBLMDB_LIBRARY) - MESSAGE(FATAL "Liblmdb is required for rspamd") -ENDIF(LIBLMDB_LIBRARY) +ProcessPackage(SQLITE sqlite3>=3.6.0) # Static build diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7914175d0..2c7783acf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -109,7 +109,6 @@ ENDIF(HAVE_LIBEVENT2) IF(WITH_DB) TARGET_LINK_LIBRARIES(rspamd db) ENDIF(WITH_DB) -TARGET_LINK_LIBRARIES(rspamd lmdb) IF(OPENSSL_FOUND) TARGET_LINK_LIBRARIES(rspamd ${OPENSSL_LIBRARIES}) diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c index b1f313556..41e844ff2 100644 --- a/src/fuzzy_storage.c +++ b/src/fuzzy_storage.c @@ -38,8 +38,7 @@ #include "bloom.h" #include "map.h" #include "fuzzy_storage.h" - -#include +#include "fuzzy_backend.h" /* This number is used as limit while comparing two fuzzy hashes, this value can vary from 0 to 100 */ #define LEV_LIMIT 99 @@ -95,16 +94,8 @@ struct rspamd_fuzzy_storage_ctx { radix_compressed_t *update_ips; gchar *update_map; struct event_base *ev_base; - gboolean legacy; - /* Legacy portions */ - rspamd_rwlock_t *tree_lock; - rspamd_mutex_t *update_mtx; - GCond *update_cond; - GThread *update_thread; - - /* lmdb interface */ - MDB_env *env; + struct rspamd_fuzzy_backend *backend; }; struct rspamd_legacy_fuzzy_node { @@ -116,13 +107,10 @@ struct rspamd_legacy_fuzzy_node { struct fuzzy_session { struct rspamd_worker *worker; - union { - struct legacy_fuzzy_cmd legacy; - struct rspamd_fuzzy_cmd current; - } cmd; + struct rspamd_fuzzy_cmd cmd; gint fd; - u_char *pos; guint64 time; + gboolean legacy; rspamd_inet_addr_t addr; struct rspamd_fuzzy_storage_ctx *ctx; }; @@ -135,154 +123,6 @@ rspamd_fuzzy_quark(void) return g_quark_from_static_string ("fuzzy-storage"); } -static void -legacy_fuzzy_node_free (gpointer n) -{ - struct rspamd_legacy_fuzzy_node *node = (struct rspamd_legacy_fuzzy_node *)n; - - g_slice_free1 (sizeof (struct rspamd_legacy_fuzzy_node), node); -} - -/** - * Expire nodes from list (need to be called in tree write lock) - * @param to_expire nodes that should be removed (if judy it is an array of nodes, - * and it is array of GList * otherwise) - * @param expired_num number of elements to expire - * @param ctx context - */ -static void -legacy_expire_nodes (gpointer *to_expire, gint expired_num, - struct rspamd_fuzzy_storage_ctx *ctx) -{ - gint i; - struct rspamd_legacy_fuzzy_node *node; - - for (i = 0; i < expired_num; i++) { - node = (struct rspamd_legacy_fuzzy_node *)to_expire[i]; - if (node->time != INVALID_NODE_TIME) { - server_stat->fuzzy_hashes_expired++; - } - server_stat->fuzzy_hashes--; - rspamd_bloom_del (bf, node->h.hash_pipe); - g_hash_table_remove (static_hash, &node->h); - } -} - -static gpointer -rspamd_fuzzy_storage_sync_cb (gpointer ud) -{ - static const int max_expired = 8192; - struct rspamd_worker *wrk = ud; - gint fd, expired_num = 0; - gchar *filename, header[4]; - struct rspamd_legacy_fuzzy_node *node; - gpointer *nodes_expired = NULL; - guint64 expire, now; - struct rspamd_fuzzy_storage_ctx *ctx; - GHashTableIter iter; - - ctx = wrk->ctx; - - for (;;) { - - rspamd_mutex_lock (ctx->update_mtx); - - /* Check for modifications */ - while (mods < ctx->max_mods && !wanna_die) { - rspamd_cond_wait (ctx->update_cond, ctx->update_mtx); - } - - msg_info ("syncing fuzzy hash storage"); - if (ctx->legacy) { - filename = ctx->hashfile; - if (filename == NULL ) { - rspamd_mutex_unlock (ctx->update_mtx); - if (wanna_die) { - return NULL; - } - continue; - } - expire = ctx->expire; - - if ((fd = open (filename, O_WRONLY | O_TRUNC | O_CREAT, - S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH)) == -1) { - msg_err ( - "cannot create hash file %s: %s", filename, - strerror (errno)); - rspamd_mutex_unlock (ctx->update_mtx); - if (wanna_die) { - return NULL; - } - continue; - } - - (void) rspamd_file_lock (fd, FALSE); - - now = (guint64) time (NULL ); - - /* Fill header */ - memcpy (header, FUZZY_FILE_MAGIC, 3); - header[3] = (gchar) CURRENT_FUZZY_VERSION; - if (write (fd, header, sizeof(header)) == -1) { - msg_err ( - "cannot write file %s while writing header: %s", - filename, - strerror (errno)); - goto end; - } - - rspamd_rwlock_reader_lock (ctx->tree_lock); - g_hash_table_iter_init (&iter, static_hash); - - while (g_hash_table_iter_next (&iter, NULL, (void **)&node)) { - if (node->time == INVALID_NODE_TIME || - now - node->time > expire) { - if (nodes_expired == NULL) { - nodes_expired = g_malloc ( - max_expired * sizeof (gpointer)); - } - - if (expired_num < max_expired) { - nodes_expired[expired_num++] = node; - } - continue; - } - if (write (fd, node, sizeof (struct rspamd_legacy_fuzzy_node)) - == -1) { - msg_err ("cannot write file %s: %s", filename, - strerror (errno)); - goto end; - } - } - rspamd_rwlock_reader_unlock (ctx->tree_lock); - - /* Now try to expire some nodes */ - if (expired_num > 0) { - rspamd_rwlock_writer_lock (ctx->tree_lock); - legacy_expire_nodes (nodes_expired, expired_num, ctx); - rspamd_rwlock_writer_unlock (ctx->tree_lock); - } - mods = 0; - end: - if (nodes_expired != NULL) { - g_free (nodes_expired); - } - (void) rspamd_file_unlock (fd, FALSE); - close (fd); - } - else { - mdb_env_sync (ctx->env, 0); - } - - rspamd_mutex_unlock (ctx->update_mtx); - if (wanna_die) { - break; - } - } - - return NULL; -} - static void sigterm_handler (void *arg) { @@ -290,10 +130,6 @@ sigterm_handler (void *arg) struct rspamd_fuzzy_storage_ctx *ctx; ctx = worker->ctx; - rspamd_mutex_lock (ctx->update_mtx); - mods = ctx->max_mods + 1; - g_cond_signal (ctx->update_cond); - rspamd_mutex_unlock (ctx->update_mtx); } /* @@ -306,371 +142,8 @@ sigusr2_handler (void *arg) struct rspamd_fuzzy_storage_ctx *ctx; ctx = worker->ctx; - rspamd_mutex_lock (ctx->update_mtx); - mods = ctx->max_mods + 1; - g_cond_signal (ctx->update_cond); - rspamd_mutex_unlock (ctx->update_mtx); } -static gboolean -legacy_read_db (struct rspamd_worker *wrk) -{ - gint r, fd, version = 0; - struct stat st; - gchar *filename, header[4]; - gboolean touch_stat = TRUE; - struct rspamd_legacy_fuzzy_node *node; - struct rspamd_fuzzy_storage_ctx *ctx = wrk->ctx; - struct { - gint32 value; - guint64 time; - rspamd_fuzzy_t h; - } legacy_node; - - if (server_stat->fuzzy_hashes != 0) { - touch_stat = FALSE; - } - - filename = ctx->hashfile; - if (filename == NULL) { - return FALSE; - } - - if ((fd = open (filename, O_RDONLY)) == -1) { - msg_err ("cannot open hash file %s: %s", filename, strerror (errno)); - return FALSE; - } - - (void)rspamd_file_lock (fd, FALSE); - - fstat (fd, &st); - - /* First of all try to read magic and version number */ - if ((r = read (fd, header, sizeof (header))) == sizeof (header)) { - if (memcmp (header, FUZZY_FILE_MAGIC, sizeof (header) - 1) == 0) { - /* We have version in last byte of header */ - version = (gint)header[3]; - if (version > CURRENT_FUZZY_VERSION) { - msg_err ("unsupported version of fuzzy hash file: %d", version); - close (fd); - return FALSE; - } - msg_info ( - "reading fuzzy hashes storage file of version %d of size %d", - version, - (gint)(st.st_size - - sizeof (header)) / sizeof (struct rspamd_legacy_fuzzy_node)); - } - else { - /* Old version */ - version = 0; - msg_info ( - "got old version of fuzzy hashes storage, it would be converted to new version %d automatically", - CURRENT_FUZZY_VERSION); - /* Rewind file */ - (void)lseek (fd, 0, SEEK_SET); - } - } - - for (;; ) { - node = g_slice_alloc (sizeof (struct rspamd_legacy_fuzzy_node)); - if (version == 0) { - r = read (fd, &legacy_node, sizeof (legacy_node)); - if (r != sizeof (legacy_node)) { - break; - } - node->value = legacy_node.value; - node->time = legacy_node.time; - memcpy (&node->h, &legacy_node.h, sizeof (rspamd_fuzzy_t)); - node->flag = 0; - } - else { - r = read (fd, node, sizeof (struct rspamd_legacy_fuzzy_node)); - if (r != sizeof (struct rspamd_legacy_fuzzy_node)) { - break; - } - } - g_hash_table_insert (static_hash, &node->h, node); - rspamd_bloom_add (bf, node->h.hash_pipe); - if (touch_stat) { - server_stat->fuzzy_hashes++; - } - } - - (void)rspamd_file_unlock (fd, FALSE); - close (fd); - - if (r > 0) { - msg_warn ("ignore garbage at the end of file, length of garbage: %d", - r); - } - else if (r == -1) { - msg_err ("cannot open read file %s: %s", filename, strerror (errno)); - return FALSE; - } - - return TRUE; -} - -static inline struct rspamd_legacy_fuzzy_node * -legacy_check_node (GQueue *hash, rspamd_fuzzy_t *s, gint update_value, - guint64 time, struct rspamd_fuzzy_storage_ctx *ctx) -{ - struct rspamd_legacy_fuzzy_node *h; - - h = g_hash_table_lookup (static_hash, s); - if (h != NULL) { - if (h->time == INVALID_NODE_TIME) { - /* Node is expired */ - return NULL; - } - else if (update_value == 0 && time - h->time > ctx->expire) { - h->time = INVALID_NODE_TIME; - server_stat->fuzzy_hashes_expired++; - return NULL; - } - else if (h->h.block_size== s->block_size) { - msg_debug ("fuzzy hash was found in tree"); - if (update_value) { - h->value += update_value; - } - return h; - } - } - - return NULL; -} - -static gint -legacy_check_cmd (struct legacy_fuzzy_cmd *cmd, - gint *flag, - guint64 time, - struct rspamd_fuzzy_storage_ctx *ctx) -{ - rspamd_fuzzy_t s; - struct rspamd_legacy_fuzzy_node *h; - - - if (!rspamd_bloom_check (bf, cmd->hash)) { - return 0; - } - - memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe)); - s.block_size = cmd->blocksize; - - rspamd_rwlock_reader_lock (ctx->tree_lock); - h = legacy_check_node (NULL, &s, 0, time, ctx); - rspamd_rwlock_reader_unlock (ctx->tree_lock); - - if (h == NULL) { - return 0; - } - else { - *flag = h->flag; - return h->value; - } -} - -static struct rspamd_legacy_fuzzy_node * -legacy_add_node (struct legacy_fuzzy_cmd *cmd, - guint64 time, - struct rspamd_fuzzy_storage_ctx *ctx) -{ - struct rspamd_legacy_fuzzy_node *h; - - h = g_slice_alloc (sizeof (struct rspamd_legacy_fuzzy_node)); - memcpy (&h->h.hash_pipe, &cmd->hash, sizeof (cmd->hash)); - h->h.block_size = cmd->blocksize; - h->time = time; - h->value = cmd->value; - h->flag = cmd->flag; - g_hash_table_insert (static_hash, &h->h, h); - rspamd_bloom_add (bf, cmd->hash); - - return h; -} - -static gboolean -legacy_update_hash (struct legacy_fuzzy_cmd *cmd, - guint64 time, - struct rspamd_fuzzy_storage_ctx *ctx) -{ - rspamd_fuzzy_t s; - struct rspamd_legacy_fuzzy_node *n; - - memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe)); - s.block_size = cmd->blocksize; - mods++; - - rspamd_rwlock_writer_lock (ctx->tree_lock); - n = legacy_check_node (NULL, &s, cmd->value, time, ctx); - if (n == NULL) { - /* Bloom false positive */ - n = legacy_add_node (cmd, time, ctx); - } - rspamd_rwlock_writer_unlock (ctx->tree_lock); - - if (n != NULL) { - n->time = time; - return TRUE; - } - return FALSE; -} - -static gboolean -legacy_write_cmd (struct legacy_fuzzy_cmd *cmd, - guint64 time, - struct rspamd_fuzzy_storage_ctx *ctx) -{ - if (rspamd_bloom_check (bf, cmd->hash)) { - if (legacy_update_hash (cmd, time, ctx)) { - return TRUE; - } - } - - rspamd_rwlock_writer_lock (ctx->tree_lock); - legacy_add_node (cmd, time, ctx); - rspamd_rwlock_writer_unlock (ctx->tree_lock); - - mods++; - server_stat->fuzzy_hashes++; - msg_info ("fuzzy hash was successfully added"); - - return TRUE; -} - -static gboolean -legacy_delete_hash (GQueue *hash, rspamd_fuzzy_t *s, - struct rspamd_fuzzy_storage_ctx *ctx) -{ - gboolean res = FALSE; - - rspamd_rwlock_writer_lock (ctx->tree_lock); - if (g_hash_table_remove (static_hash, s)) { - rspamd_bloom_del (bf, s->hash_pipe); - msg_info ("fuzzy hash was successfully deleted"); - server_stat->fuzzy_hashes--; - mods++; - } - rspamd_rwlock_writer_unlock (ctx->tree_lock); - - return res; - -} - -static gboolean -legacy_delete_cmd (struct legacy_fuzzy_cmd *cmd, - guint64 time, - struct rspamd_fuzzy_storage_ctx *ctx) -{ - rspamd_fuzzy_t s; - - if (!rspamd_bloom_check (bf, cmd->hash)) { - return FALSE; - } - - memcpy (s.hash_pipe, cmd->hash, sizeof (s.hash_pipe)); - s.block_size = cmd->blocksize; - - return legacy_delete_hash (NULL, &s, ctx); -} - -/** - * Checks the client's address for update commands permission - */ -static gboolean -check_fuzzy_client (struct fuzzy_session *session) -{ - if (session->ctx->update_ips != NULL) { - if (radix_find_compressed_addr (session->ctx->update_ips, - &session->addr) == RADIX_NO_VALUE) { - return FALSE; - } - } - - return TRUE; -} - -#define LEGACY_CMD_PROCESS(x) \ - do { \ - if (legacy_ ## x ## _cmd (&session->cmd.legacy, session->time, \ - session->worker->ctx)) { \ - if (sendto (session->fd, "OK" CRLF, sizeof ("OK" CRLF) - 1, 0, \ - &session->addr.addr.sa, session->addr.slen) == -1) { \ - msg_err ("error while writing reply: %s", strerror (errno)); \ - } \ - } \ - else { \ - if (sendto (session->fd, "ERR" CRLF, sizeof ("ERR" CRLF) - 1, 0, \ - &session->addr.addr.sa, session->addr.slen) == -1) { \ - msg_err ("error while writing reply: %s", strerror (errno)); \ - } \ - } \ - } while (0) - -static void -legacy_fuzzy_cmd (struct fuzzy_session *session) -{ - gint r, flag = 0; - gchar buf[64]; - - switch (session->cmd.legacy.cmd) { - case FUZZY_CHECK: - r = legacy_check_cmd (&session->cmd.legacy, - &flag, - session->time, - session->worker->ctx); - if (r != 0) { - r = rspamd_snprintf (buf, sizeof (buf), "OK %d %d" CRLF, r, flag); - if (sendto (session->fd, buf, r, 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - } - else { - if (sendto (session->fd, "ERR" CRLF, sizeof ("ERR" CRLF) - 1, 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - } - break; - case FUZZY_WRITE: - if (!check_fuzzy_client (session)) { - msg_info ("try to insert a hash from an untrusted address"); - if (sendto (session->fd, "UNAUTH" CRLF, sizeof ("UNAUTH" CRLF) - 1, - 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - } - else { - LEGACY_CMD_PROCESS (write); - } - break; - case FUZZY_DEL: - if (!check_fuzzy_client (session)) { - msg_info ("try to delete a hash from an untrusted address"); - if (sendto (session->fd, "UNAUTH" CRLF, sizeof ("UNAUTH" CRLF) - 1, - 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - } - else { - LEGACY_CMD_PROCESS (delete); - } - break; - default: - if (sendto (session->fd, "ERR" CRLF, sizeof ("ERR" CRLF) - 1, 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - break; - } -} - -#undef LEGACY_CMD_PROCESS - /* * MDB Interface */ @@ -678,120 +151,9 @@ legacy_fuzzy_cmd (struct fuzzy_session *session) static void rspamd_fuzzy_process_command (struct fuzzy_session *session) { - struct rspamd_fuzzy_cmd *cmd = &session->cmd.current; struct rspamd_fuzzy_reply rep; - MDB_dbi db; - MDB_txn *txn; - MDB_val k, v; guint64 value; int rc, match = 0, i; - - if (cmd->cmd == FUZZY_CHECK) { - mdb_txn_begin (session->ctx->env, NULL, MDB_RDONLY, &txn); - if ((rc = mdb_dbi_open (txn, NULL, MDB_INTEGERKEY, &db)) != 0) { - msg_err ("cannot open db: %s", mdb_strerror (rc)); - } - else { - for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - k.mv_data = &cmd->sh.hashes[i]; - k.mv_size = sizeof (cmd->sh.hashes[0]); - if (mdb_get (txn, db, &k, &v) == 0) { - match ++; - } - } - - rep.code = 0; - rep.prob = (gdouble)match / (gdouble)RSPAMD_SHINGLE_SIZE; - if (sendto (session->fd, &rep, sizeof (rep), 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - } - mdb_dbi_close (session->ctx->env, db); - mdb_txn_abort (txn); - } - else { - mdb_txn_begin (session->ctx->env, NULL, 0, &txn); - if ((rc = mdb_dbi_open (txn, NULL, MDB_INTEGERKEY, &db)) != 0) { - msg_err ("cannot open db: %s", mdb_strerror (rc)); - } - else { - if (cmd->cmd == FUZZY_WRITE) { - for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - k.mv_data = &cmd->sh.hashes[i]; - k.mv_size = sizeof (cmd->sh.hashes[0]); - if (mdb_get (txn, db, &k, &v) == 0) { - value = *(guint64 *)&v; - ++value; - v.mv_data = &value; - v.mv_size = sizeof (value); - mdb_put (txn, db, &k, &v, 0); - } - else { - value = 1; - v.mv_data = &value; - v.mv_size = sizeof (value); - mdb_put (txn, db, &k, &v, 0); - } - } - rep.code = 0; - rep.prob = value; - if (sendto (session->fd, &rep, sizeof (rep), 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - } - else { - /* Delete command */ - for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) { - k.mv_data = &cmd->sh.hashes[i]; - k.mv_size = sizeof (cmd->sh.hashes[0]); - if (mdb_get (txn, db, &k, &v) == 0) { - value = *(guint64 *)&v; - mdb_del (txn, db, &k, &v); - } - } - rep.code = 0; - rep.prob = value; - if (sendto (session->fd, &rep, sizeof (rep), 0, - &session->addr.addr.sa, session->addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - - } - } - mdb_dbi_close (session->ctx->env, db); - mdb_txn_commit (txn); - } -} - -static gboolean -rspamd_fuzzy_storage_open_db (struct rspamd_fuzzy_storage_ctx *ctx, GError **err) -{ - gchar *dir; - gint rc; - - if (ctx->hashfile == NULL) { - g_set_error (err, rspamd_fuzzy_quark(), 500, "Cannot work without file"); - return FALSE; - } - - dir = g_path_get_dirname (ctx->hashfile); - if (dir == NULL || access (dir, W_OK) == -1) { - g_set_error (err, rspamd_fuzzy_quark(), errno, "Cannot access directory: %s", - strerror (errno)); - return FALSE; - } - - mdb_env_create (&ctx->env); - - if ((rc = mdb_env_open (ctx->env, dir, MDB_NOSYNC, 0600)) != 0) { - g_set_error (err, rspamd_fuzzy_quark(), errno, "Cannot open mdb_env: %s", - mdb_strerror (rc)); - return FALSE; - } - - return TRUE; } /* @@ -816,14 +178,13 @@ accept_fuzzy_socket (gint fd, short what, void *arg) ctx = worker->ctx; session.worker = worker; session.fd = fd; - session.pos = buf; session.addr.slen = sizeof (session.addr.addr); session.ctx = worker->ctx; session.time = (guint64)time (NULL); /* Got some data */ if (what == EV_READ) { - while ((r = recvfrom (fd, session.pos, sizeof (buf), 0, + while ((r = recvfrom (fd, buf, sizeof (buf), 0, &session.addr.addr.sa, &session.addr.slen)) == -1) { if (errno == EINTR) { continue; @@ -834,40 +195,14 @@ accept_fuzzy_socket (gint fd, short what, void *arg) return; } session.addr.af = session.addr.addr.sa.sa_family; - if (r == sizeof (struct legacy_fuzzy_cmd) && ctx->legacy) { - /* Assume that the whole command was read */ - legacy_fuzzy_cmd (&session); - } - else if (r == sizeof (legacy_cmd) && ctx->legacy) { - /* Process requests from old rspamd */ - memcpy (&legacy_cmd, session.pos, sizeof (legacy_cmd)); - session.cmd.legacy.cmd = legacy_cmd.cmd; - session.cmd.legacy.blocksize = legacy_cmd.blocksize; - session.cmd.legacy.value = legacy_cmd.value; - session.cmd.legacy.flag = 0; - memcpy (session.cmd.legacy.hash, legacy_cmd.hash, - sizeof (legacy_cmd.hash)); - legacy_fuzzy_cmd (&session); + if (r == sizeof (struct legacy_fuzzy_cmd)) { + /* Old command */ } - else if (r == sizeof (struct rspamd_fuzzy_cmd) && !ctx->legacy) { - /* We have the second version of request */ - memcpy (&session.cmd.current, buf, sizeof (session.cmd.current)); - if (session.cmd.current.size == RSPAMD_SHINGLE_SIZE && - session.cmd.current.version == RSPAMD_FUZZY_VERSION) { - rspamd_fuzzy_process_command (&session); - } - else { - rep.code = EINVAL; - rep.prob = 0.0; - if (sendto (session.fd, &rep, sizeof (rep), 0, - &session.addr.addr.sa, session.addr.slen) == -1) { - msg_err ("error while writing reply: %s", strerror (errno)); - } - } + else if (r == sizeof (struct rspamd_fuzzy_cmd)) { + /* New command */ } else { /* Discard input */ - } } } @@ -887,9 +222,7 @@ sync_callback (gint fd, short what, void *arg) tmv.tv_usec = 0; evtimer_add (&tev, &tmv); - rspamd_mutex_lock (ctx->update_mtx); - g_cond_signal (ctx->update_cond); - rspamd_mutex_unlock (ctx->update_mtx); + /* Call backend sync */ } gpointer @@ -921,10 +254,6 @@ init_fuzzy (struct rspamd_config *cfg) rspamd_rcl_parse_struct_string, ctx, G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, hashfile), 0); - rspamd_rcl_register_worker_option (cfg, type, "legacy", - rspamd_rcl_parse_struct_boolean, ctx, - G_STRUCT_OFFSET (struct rspamd_fuzzy_storage_ctx, legacy), 0); - /* Legacy options */ rspamd_rcl_register_worker_option (cfg, type, "max_mods", rspamd_rcl_parse_struct_integer, ctx, @@ -981,32 +310,10 @@ start_fuzzy (struct rspamd_worker *worker) sigh->post_handler = sigterm_handler; sigh->handler_data = worker; - if (ctx->legacy) { - ctx->tree_lock = rspamd_rwlock_new (); - ctx->update_mtx = rspamd_mutex_new (); -#if ((GLIB_MAJOR_VERSION == 2) && (GLIB_MINOR_VERSION > 30)) - ctx->update_cond = g_malloc0 (sizeof (GCond)); - g_cond_init (ctx->update_cond); -#else - ctx->update_cond = g_cond_new (); -#endif - static_hash = g_hash_table_new_full (rspamd_fuzzy_hash, rspamd_fuzzy_equal, - NULL, legacy_fuzzy_node_free); - - /* Init bloom filter */ - bf = rspamd_bloom_create (2000000L, RSPAMD_DEFAULT_BLOOM_HASHES); - /* Try to read hashes from file */ - if (!legacy_read_db (worker)) { - msg_err ( - "cannot read hashes file, it can be created after save procedure"); - } - } - else { - if (!rspamd_fuzzy_storage_open_db (ctx, &err)) { - msg_err (err->message); - g_error_free (err); - exit (EXIT_FAILURE); - } + if ((ctx->backend = rspamd_fuzzy_backend_open (ctx->hashfile, &err)) == NULL) { + msg_err (err->message); + g_error_free (err); + exit (EXIT_FAILURE); } /* Timer event */ @@ -1033,24 +340,8 @@ start_fuzzy (struct rspamd_worker *worker) /* Maps events */ rspamd_map_watch (worker->srv->cfg, ctx->ev_base); - ctx->update_thread = rspamd_create_thread ("fuzzy update", - rspamd_fuzzy_storage_sync_cb, - worker, - &err); - if (ctx->update_thread == NULL) { - msg_err ("error creating update thread: %s", err->message); - } - event_base_loop (ctx->ev_base, 0); - if (ctx->update_thread != NULL) { - g_thread_join (ctx->update_thread); - } - - if (!ctx->legacy) { - mdb_env_close (ctx->env); - } - rspamd_log_close (rspamd_main->logger); exit (EXIT_SUCCESS); } diff --git a/src/fuzzy_storage.h b/src/fuzzy_storage.h index 9db779c33..037f21a79 100644 --- a/src/fuzzy_storage.h +++ b/src/fuzzy_storage.h @@ -24,8 +24,13 @@ struct legacy_fuzzy_cmd { struct rspamd_fuzzy_cmd { guint8 version; guint8 cmd; - guint16 size; - struct rspamd_shingle sh; + guint16 shingles_count; + gchar digest[64]; +}; + +struct rspamd_fuzzy_shingle_cmd { + struct rspamd_fuzzy_cmd basic; + struct rspamd_shingle sgl; }; struct rspamd_fuzzy_reply { diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index 02713af79..8696da7ba 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -8,6 +8,7 @@ SET(LIBRSPAMDSERVERSRC dns.c dynamic_cfg.c events.c + fuzzy_backend.c html.c protocol.c proxy.c @@ -44,6 +45,7 @@ SET_TARGET_PROPERTIES(rspamd-server PROPERTIES LINKER_LANGUAGE C COMPILE_FLAGS " TARGET_LINK_LIBRARIES(rspamd-server rspamd-lua) TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) TARGET_LINK_LIBRARIES(rspamd-server rspamd-util) +TARGET_LINK_LIBRARIES(rspamd-server sqlite3) TARGET_LINK_LIBRARIES(rspamd-server rdns) IF(CMAKE_COMPILER_IS_GNUCC) SET_TARGET_PROPERTIES(rspamd-server PROPERTIES COMPILE_FLAGS "-DRSPAMD_LIB -fno-strict-aliasing") diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c new file mode 100644 index 000000000..17f0eb802 --- /dev/null +++ b/src/libserver/fuzzy_backend.c @@ -0,0 +1,72 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "main.h" +#include "fuzzy_backend.h" +#include "fuzzy_storage.h" + + +struct rspamd_fuzzy_backend; + +struct +rspamd_fuzzy_backend* rspamd_fuzzy_backend_open (const gchar *path, + GError **err) +{ + +} + +struct rspamd_fuzzy_reply +rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend, + const struct rspamd_fuzzy_cmd *cmd) +{ + +} + +gboolean +rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend, + const struct rspamd_fuzzy_cmd *cmd) +{ + +} + + +gboolean +rspamd_fuzzy_backend_del (struct rspamd_fuzzy_backend *backend, + const struct rspamd_fuzzy_cmd *cmd) +{ + +} + +gboolean +rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend) +{ + +} + + +void +rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend) +{ + +} diff --git a/src/libserver/fuzzy_backend.h b/src/libserver/fuzzy_backend.h new file mode 100644 index 000000000..5322eff50 --- /dev/null +++ b/src/libserver/fuzzy_backend.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2014, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef FUZZY_BACKEND_H_ +#define FUZZY_BACKEND_H_ + +#include "config.h" +#include "fuzzy_storage.h" + + +struct rspamd_fuzzy_backend; + +/** + * Open fuzzy backend + * @param path file to open (legacy file will be converted automatically) + * @param err error pointer + * @return backend structure or NULL + */ +struct rspamd_fuzzy_backend* rspamd_fuzzy_backend_open (const gchar *path, + GError **err); + +/** + * Check specified fuzzy in the backend + * @param backend + * @param cmd + * @return reply with probability and weight + */ +struct rspamd_fuzzy_reply rspamd_fuzzy_backend_check ( + struct rspamd_fuzzy_backend *backend, + const struct rspamd_fuzzy_cmd *cmd); + +/** + * Add digest to the database + * @param backend + * @param cmd + * @return + */ +gboolean rspamd_fuzzy_backend_add ( + struct rspamd_fuzzy_backend *backend, + const struct rspamd_fuzzy_cmd *cmd); + +/** + * Delete digest from the database + * @param backend + * @param cmd + * @return + */ +gboolean rspamd_fuzzy_backend_del ( + struct rspamd_fuzzy_backend *backend, + const struct rspamd_fuzzy_cmd *cmd); + +/** + * Sync storage + * @param backend + * @return + */ +gboolean rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend); + +/** + * Close storage + * @param backend + */ +void rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend); + +#endif /* FUZZY_BACKEND_H_ */