From: Vsevolod Stakhov Date: Fri, 25 Sep 2015 15:57:50 +0000 (+0100) Subject: Start fuzzy_merge command. X-Git-Tag: 1.0.3~14 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=67f20b684d802fd534c9d43e163d2b52a6cdddd6;p=rspamd.git Start fuzzy_merge command. --- diff --git a/src/libutil/sqlite_utils.c b/src/libutil/sqlite_utils.c index e58921a1a..0dea2edc2 100644 --- a/src/libutil/sqlite_utils.c +++ b/src/libutil/sqlite_utils.c @@ -273,7 +273,7 @@ rspamd_sqlite3_open_or_create (rspamd_mempool_t *pool, const gchar *path, const rspamd_snprintf (lock_path, sizeof (lock_path), "%s.lock", path); - if (access (path, R_OK) == -1) { + if (access (path, R_OK) == -1 && create_sql != NULL) { flags |= SQLITE_OPEN_CREATE; create = TRUE; } diff --git a/src/rspamadm/CMakeLists.txt b/src/rspamadm/CMakeLists.txt index b5988972d..336ffd156 100644 --- a/src/rspamadm/CMakeLists.txt +++ b/src/rspamadm/CMakeLists.txt @@ -6,7 +6,7 @@ SET(RSPAMADMSRC rspamadm.c commands.c pw.c keypair.c configtest.c ${CMAKE_SOURCE_DIR}/src/lua_worker.c ${CMAKE_SOURCE_DIR}/src/smtp_proxy.c ${CMAKE_SOURCE_DIR}/src/worker.c - ${CMAKE_SOURCE_DIR}/src/http_proxy.c) + ${CMAKE_SOURCE_DIR}/src/http_proxy.c fuzzy_merge.c) ADD_EXECUTABLE(rspamadm ${RSPAMADMSRC}) TARGET_LINK_LIBRARIES(rspamadm rspamd-server) diff --git a/src/rspamadm/commands.c b/src/rspamadm/commands.c index 703d1ac08..8df1c9ed8 100644 --- a/src/rspamadm/commands.c +++ b/src/rspamadm/commands.c @@ -26,12 +26,14 @@ extern struct rspamadm_command pw_command; extern struct rspamadm_command keypair_command; extern struct rspamadm_command configtest_command; +extern struct rspamadm_command fuzzy_merge_command; const struct rspamadm_command *commands[] = { &help_command, &pw_command, &keypair_command, &configtest_command, + &fuzzy_merge_command, NULL }; diff --git a/src/rspamadm/fuzzy_merge.c b/src/rspamadm/fuzzy_merge.c new file mode 100644 index 000000000..278f84424 --- /dev/null +++ b/src/rspamadm/fuzzy_merge.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2015, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "rspamadm.h" +#include "sqlite_utils.h" + +static gchar *target = NULL; +static gchar **sources = NULL; + +static void rspamadm_fuzzy_merge (gint argc, gchar **argv); +static const char *rspamadm_fuzzy_merge_help (gboolean full_help); + +struct rspamadm_command fuzzy_merge_command = { + .name = "fuzzy_merge", + .flags = 0, + .help = rspamadm_fuzzy_merge_help, + .run = rspamadm_fuzzy_merge +}; + +static GOptionEntry entries[] = { + {"source", 's', 0, G_OPTION_ARG_STRING_ARRAY, &sources, + "Source for merge (can be repeated)", NULL}, + {"destination", 'd', 0, G_OPTION_ARG_STRING, &target, + "Destination db", NULL}, + {NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL} +}; + +static const gchar *create_tables_sql = + "BEGIN;" + "CREATE TABLE digests(" + "id INTEGER PRIMARY KEY," + "flag INTEGER NOT NULL," + "digest TEXT NOT NULL," + "value INTEGER," + "time INTEGER);" + "CREATE TABLE shingles(" + "value INTEGER NOT NULL," + "number INTEGER NOT NULL," + "digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE " + "ON UPDATE CASCADE);" + "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);" + "CREATE INDEX IF NOT EXISTS t ON digests(time);" + "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);" + "COMMIT;"; +static const gchar *select_digests_sql = + "SELECT * FROM digests;"; +static const gchar *select_shingles_sql = + "SELECT * FROM shingles;"; + +enum statement_idx { + TRANSACTION_START = 0, + TRANSACTION_COMMIT, + TRANSACTION_ROLLBACK, + INSERT, + UPDATE, + INSERT_SHINGLE, + CHECK, + CHECK_SHINGLE, + COUNT, + STMAX +}; + +static struct rspamd_sqlite3_prstmt prepared_stmts[STMAX] = { + [TRANSACTION_START] = { + .idx = TRANSACTION_START, + .sql = "BEGIN IMMEDIATE TRANSACTION;", + .args = "", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = "" + }, + [TRANSACTION_COMMIT] = { + .idx = TRANSACTION_COMMIT, + .sql = "COMMIT;", + .args = "", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = "" + }, + [TRANSACTION_ROLLBACK] = { + .idx = TRANSACTION_ROLLBACK, + .sql = "ROLLBACK;", + .args = "", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = "" + }, + [INSERT] = { + .idx = INSERT, + .sql = "INSERT INTO digests(flag, digest, value, time) VALUES" + "(?1, ?2, ?3, ?4);", + .args = "SBII", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = "" + }, + [INSERT_SHINGLE] = { + .idx = INSERT_SHINGLE, + .sql = "INSERT OR REPLACE INTO shingles(value, number, digest_id) " + "VALUES (?1, ?2, ?3);", + .args = "III", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = "" + }, + [UPDATE] = { + .idx = UPDATE, + .sql = "UPDATE digests SET value = value + ?1 WHERE " + "digest==?2;", + .args = "IB", + .stmt = NULL, + .result = SQLITE_DONE, + .ret = "" + }, + [CHECK] = { + .idx = CHECK, + .sql = "SELECT value, time, flag FROM digests WHERE digest==?1;", + .args = "B", + .stmt = NULL, + .result = SQLITE_ROW, + .ret = "" + }, + [CHECK_SHINGLE] = { + .idx = CHECK_SHINGLE, + .sql = "SELECT digest_id FROM shingles WHERE value=?1 AND number=?2", + .args = "IS", + .stmt = NULL, + .result = SQLITE_ROW, + .ret = "" + }, + [COUNT] = { + .idx = COUNT, + .sql = "SELECT COUNT(*) FROM digests;", + .args = "", + .stmt = NULL, + .result = SQLITE_ROW, + .ret = "I" + }, +}; + +static const char * +rspamadm_fuzzy_merge_help (gboolean full_help) +{ + const char *help_str; + + if (full_help) { + help_str = "Merge multiple sources of fuzzy hashes db into a single destination\n\n" + "Usage: rspamadm fuzzy_merge -s source1 [-s source2 ...] -d destination\n" + "Where options are:\n\n" + "-s: source db for merge\n" + "-d: destination db for merge\n" + "--help: shows available options and commands"; + } + else { + help_str = "Create encryption key pairs"; + } + + return help_str; +} + +enum op_type { + OP_INSERT = 0, + OP_UPDATE, + OP_INSERT_SHINGLE, +}; +struct fuzzy_merge_op { + enum op_type op; + union { + struct { + guint flag; + gint64 value; + guchar digest[64]; + gint64 tm; + } dgst; + struct { + guint number; + gint64 value; + gint64 dgst; + } shgl; + } data; +}; + +static void +rspamadm_fuzzy_merge (gint argc, gchar **argv) +{ + GOptionContext *context; + GError *error = NULL; + sqlite3 *dest_db; + GPtrArray *source_dbs; + GArray *prstmt; + GArray *ops; + rspamd_mempool_t *pool; + guint i, nsrc; + guint64 old_count, new_count, inserted = 0, updated = 0; + sqlite3_stmt *stmt; + + context = g_option_context_new ( + "fuzzy_merge - merge fuzzy databases"); + g_option_context_set_summary (context, + "Summary:\n Rspamd administration utility version " + RVERSION + "\n Release id: " + RID); + g_option_context_add_main_entries (context, entries, NULL); + + if (!g_option_context_parse (context, &argc, &argv, &error)) { + fprintf (stderr, "option parsing failed: %s\n", error->message); + g_error_free (error); + exit (1); + } + + if (target == NULL || sources == NULL || sources[0] == NULL) { + fprintf (stderr, "no sources or no destination has been specified\n"); + exit (1); + } + + pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "fuzzy_merge"); + dest_db = rspamd_sqlite3_open_or_create (pool, target, create_tables_sql, + &error); + + if (dest_db == NULL) { + fprintf (stderr, "cannot open destination: %s\n", error->message); + g_error_free (error); + exit (1); + } + + prstmt = rspamd_sqlite3_init_prstmt (dest_db, prepared_stmts, + STMAX, &error); + + if (prstmt == NULL) { + fprintf (stderr, "cannot init prepared statements: %s\n", error->message); + g_error_free (error); + exit (1); + } + + rspamd_sqlite3_run_prstmt (pool, dest_db, prstmt, COUNT, &old_count); + + nsrc = g_strv_length (sources); + source_dbs = g_ptr_array_sized_new (nsrc); + + for (i = 0; i < nsrc; i++) { + sqlite3 *src; + + src = rspamd_sqlite3_open_or_create (pool, sources[i], NULL, &error); + + if (src == NULL) { + fprintf (stderr, "cannot open source %s: %s\n", sources[i], + error->message); + g_error_free (error); + exit (1); + } + + g_ptr_array_add (source_dbs, src); + } + + for (i = 0; i < nsrc; i++) { + /* Select all digests */ + } +} \ No newline at end of file