]> source.dussan.org Git - rspamd.git/commitdiff
Start fuzzy_merge command.
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 25 Sep 2015 15:57:50 +0000 (16:57 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 25 Sep 2015 15:57:50 +0000 (16:57 +0100)
src/libutil/sqlite_utils.c
src/rspamadm/CMakeLists.txt
src/rspamadm/commands.c
src/rspamadm/fuzzy_merge.c [new file with mode: 0644]

index e58921a1a474d3e448ccd67a5e52dd0854082bdd..0dea2edc298ded8e9b73fc1be5e9c266c3643a15 100644 (file)
@@ -273,7 +273,7 @@ rspamd_sqlite3_open_or_create (rspamd_mempool_t *pool, const gchar *path, const
 
        rspamd_snprintf (lock_path, sizeof (lock_path), "%s.lock", path);
 
-       if (access (path, R_OK) == -1) {
+       if (access (path, R_OK) == -1 && create_sql != NULL) {
                flags |= SQLITE_OPEN_CREATE;
                create = TRUE;
        }
index b5988972db063ec33b6fe37d48fdb1966c4214c2..336ffd15659bad1aec9898b58ce39310c49c29fe 100644 (file)
@@ -6,7 +6,7 @@ SET(RSPAMADMSRC rspamadm.c commands.c pw.c keypair.c configtest.c
         ${CMAKE_SOURCE_DIR}/src/lua_worker.c
         ${CMAKE_SOURCE_DIR}/src/smtp_proxy.c
         ${CMAKE_SOURCE_DIR}/src/worker.c
-        ${CMAKE_SOURCE_DIR}/src/http_proxy.c)
+        ${CMAKE_SOURCE_DIR}/src/http_proxy.c fuzzy_merge.c)
 
 ADD_EXECUTABLE(rspamadm ${RSPAMADMSRC})
 TARGET_LINK_LIBRARIES(rspamadm rspamd-server)
index 703d1ac08564b9dd4947975b825887c753a6b6fd..8df1c9ed869f3d998bcddfef81b434e157b91913 100644 (file)
 extern struct rspamadm_command pw_command;
 extern struct rspamadm_command keypair_command;
 extern struct rspamadm_command configtest_command;
+extern struct rspamadm_command fuzzy_merge_command;
 
 const struct rspamadm_command *commands[] = {
        &help_command,
        &pw_command,
        &keypair_command,
        &configtest_command,
+       &fuzzy_merge_command,
        NULL
 };
 
diff --git a/src/rspamadm/fuzzy_merge.c b/src/rspamadm/fuzzy_merge.c
new file mode 100644 (file)
index 0000000..278f844
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2015, Vsevolod Stakhov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *      * Redistributions of source code must retain the above copyright
+ *        notice, this list of conditions and the following disclaimer.
+ *      * Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimer in the
+ *        documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "rspamadm.h"
+#include "sqlite_utils.h"
+
+static gchar *target = NULL;
+static gchar **sources = NULL;
+
+static void rspamadm_fuzzy_merge (gint argc, gchar **argv);
+static const char *rspamadm_fuzzy_merge_help (gboolean full_help);
+
+struct rspamadm_command fuzzy_merge_command = {
+               .name = "fuzzy_merge",
+               .flags = 0,
+               .help = rspamadm_fuzzy_merge_help,
+               .run = rspamadm_fuzzy_merge
+};
+
+static GOptionEntry entries[] = {
+               {"source", 's', 0, G_OPTION_ARG_STRING_ARRAY, &sources,
+                               "Source for merge (can be repeated)",                    NULL},
+               {"destination", 'd', 0, G_OPTION_ARG_STRING, &target,
+                               "Destination db",     NULL},
+               {NULL,  0,   0, G_OPTION_ARG_NONE, NULL, NULL, NULL}
+};
+
+static const gchar *create_tables_sql =
+                               "BEGIN;"
+                               "CREATE TABLE digests("
+                               "id INTEGER PRIMARY KEY,"
+                               "flag INTEGER NOT NULL,"
+                               "digest TEXT NOT NULL,"
+                               "value INTEGER,"
+                               "time INTEGER);"
+                               "CREATE TABLE shingles("
+                               "value INTEGER NOT NULL,"
+                               "number INTEGER NOT NULL,"
+                               "digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE "
+                               "ON UPDATE CASCADE);"
+                               "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
+                               "CREATE INDEX IF NOT EXISTS t ON digests(time);"
+                               "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
+                               "COMMIT;";
+static const gchar *select_digests_sql =
+                               "SELECT * FROM digests;";
+static const gchar *select_shingles_sql =
+                               "SELECT * FROM shingles;";
+
+enum statement_idx {
+       TRANSACTION_START = 0,
+       TRANSACTION_COMMIT,
+       TRANSACTION_ROLLBACK,
+       INSERT,
+       UPDATE,
+       INSERT_SHINGLE,
+       CHECK,
+       CHECK_SHINGLE,
+       COUNT,
+       STMAX
+};
+
+static struct rspamd_sqlite3_prstmt prepared_stmts[STMAX] = {
+               [TRANSACTION_START] = {
+                               .idx = TRANSACTION_START,
+                               .sql = "BEGIN IMMEDIATE TRANSACTION;",
+                               .args = "",
+                               .stmt = NULL,
+                               .result = SQLITE_DONE,
+                               .ret = ""
+               },
+               [TRANSACTION_COMMIT] = {
+                               .idx = TRANSACTION_COMMIT,
+                               .sql = "COMMIT;",
+                               .args = "",
+                               .stmt = NULL,
+                               .result = SQLITE_DONE,
+                               .ret = ""
+               },
+               [TRANSACTION_ROLLBACK] = {
+                               .idx = TRANSACTION_ROLLBACK,
+                               .sql = "ROLLBACK;",
+                               .args = "",
+                               .stmt = NULL,
+                               .result = SQLITE_DONE,
+                               .ret = ""
+               },
+               [INSERT] = {
+                               .idx = INSERT,
+                               .sql = "INSERT INTO digests(flag, digest, value, time) VALUES"
+                                               "(?1, ?2, ?3, ?4);",
+                               .args = "SBII",
+                               .stmt = NULL,
+                               .result = SQLITE_DONE,
+                               .ret = ""
+               },
+               [INSERT_SHINGLE] = {
+                               .idx = INSERT_SHINGLE,
+                               .sql = "INSERT OR REPLACE INTO shingles(value, number, digest_id) "
+                                               "VALUES (?1, ?2, ?3);",
+                               .args = "III",
+                               .stmt = NULL,
+                               .result = SQLITE_DONE,
+                               .ret = ""
+               },
+               [UPDATE] = {
+                               .idx = UPDATE,
+                               .sql = "UPDATE digests SET value = value + ?1 WHERE "
+                                               "digest==?2;",
+                               .args = "IB",
+                               .stmt = NULL,
+                               .result = SQLITE_DONE,
+                               .ret = ""
+               },
+               [CHECK] = {
+                               .idx = CHECK,
+                               .sql = "SELECT value, time, flag FROM digests WHERE digest==?1;",
+                               .args = "B",
+                               .stmt = NULL,
+                               .result = SQLITE_ROW,
+                               .ret = ""
+               },
+               [CHECK_SHINGLE] = {
+                               .idx = CHECK_SHINGLE,
+                               .sql = "SELECT digest_id FROM shingles WHERE value=?1 AND number=?2",
+                               .args = "IS",
+                               .stmt = NULL,
+                               .result = SQLITE_ROW,
+                               .ret = ""
+               },
+               [COUNT] = {
+                               .idx = COUNT,
+                               .sql = "SELECT COUNT(*) FROM digests;",
+                               .args = "",
+                               .stmt = NULL,
+                               .result = SQLITE_ROW,
+                               .ret = "I"
+               },
+};
+
+static const char *
+rspamadm_fuzzy_merge_help (gboolean full_help)
+{
+       const char *help_str;
+
+       if (full_help) {
+               help_str = "Merge multiple sources of fuzzy hashes db into a single destination\n\n"
+                               "Usage: rspamadm fuzzy_merge -s source1 [-s source2 ...] -d destination\n"
+                               "Where options are:\n\n"
+                               "-s: source db for merge\n"
+                               "-d: destination db for merge\n"
+                               "--help: shows available options and commands";
+       }
+       else {
+               help_str = "Create encryption key pairs";
+       }
+
+       return help_str;
+}
+
+enum op_type {
+       OP_INSERT = 0,
+       OP_UPDATE,
+       OP_INSERT_SHINGLE,
+};
+struct fuzzy_merge_op {
+       enum op_type op;
+       union {
+               struct {
+                       guint flag;
+                       gint64 value;
+                       guchar digest[64];
+                       gint64 tm;
+               } dgst;
+               struct {
+                       guint number;
+                       gint64 value;
+                       gint64 dgst;
+               } shgl;
+       } data;
+};
+
+static void
+rspamadm_fuzzy_merge (gint argc, gchar **argv)
+{
+       GOptionContext *context;
+       GError *error = NULL;
+       sqlite3 *dest_db;
+       GPtrArray *source_dbs;
+       GArray *prstmt;
+       GArray *ops;
+       rspamd_mempool_t *pool;
+       guint i, nsrc;
+       guint64 old_count, new_count, inserted = 0, updated = 0;
+       sqlite3_stmt *stmt;
+
+       context = g_option_context_new (
+                       "fuzzy_merge - merge fuzzy databases");
+       g_option_context_set_summary (context,
+                       "Summary:\n  Rspamd administration utility version "
+                                       RVERSION
+                                       "\n  Release id: "
+                                       RID);
+       g_option_context_add_main_entries (context, entries, NULL);
+
+       if (!g_option_context_parse (context, &argc, &argv, &error)) {
+               fprintf (stderr, "option parsing failed: %s\n", error->message);
+               g_error_free (error);
+               exit (1);
+       }
+
+       if (target == NULL || sources == NULL || sources[0] == NULL) {
+               fprintf (stderr, "no sources or no destination has been specified\n");
+               exit (1);
+       }
+
+       pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "fuzzy_merge");
+       dest_db = rspamd_sqlite3_open_or_create (pool, target, create_tables_sql,
+                       &error);
+
+       if (dest_db == NULL) {
+               fprintf (stderr, "cannot open destination: %s\n", error->message);
+               g_error_free (error);
+               exit (1);
+       }
+
+       prstmt = rspamd_sqlite3_init_prstmt (dest_db, prepared_stmts,
+                       STMAX, &error);
+
+       if (prstmt == NULL) {
+               fprintf (stderr, "cannot init prepared statements: %s\n", error->message);
+               g_error_free (error);
+               exit (1);
+       }
+
+       rspamd_sqlite3_run_prstmt (pool, dest_db, prstmt, COUNT, &old_count);
+
+       nsrc = g_strv_length (sources);
+       source_dbs = g_ptr_array_sized_new (nsrc);
+
+       for (i = 0; i < nsrc; i++) {
+               sqlite3 *src;
+
+               src = rspamd_sqlite3_open_or_create (pool, sources[i], NULL, &error);
+
+               if (src == NULL) {
+                       fprintf (stderr, "cannot open source %s: %s\n", sources[i],
+                                       error->message);
+                       g_error_free (error);
+                       exit (1);
+               }
+
+               g_ptr_array_add (source_dbs, src);
+       }
+
+       for (i = 0; i < nsrc; i++) {
+               /* Select all digests */
+       }
+}
\ No newline at end of file