aboutsummaryrefslogtreecommitdiffstats
path: root/src/libserver/fuzzy_backend.c
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-01 14:38:20 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-09-01 14:38:20 +0100
commit3cd8eaf2a2be74b215bf1e901d8675e51f8e0833 (patch)
tree53648a2d7bc5478781085a203798fce018e24609 /src/libserver/fuzzy_backend.c
parent99c2eb287a15649c0dac67065b0254c2a94a768f (diff)
downloadrspamd-3cd8eaf2a2be74b215bf1e901d8675e51f8e0833.tar.gz
rspamd-3cd8eaf2a2be74b215bf1e901d8675e51f8e0833.zip
[Rework] Reorganize fuzzy backend structure
Diffstat (limited to 'src/libserver/fuzzy_backend.c')
-rw-r--r--src/libserver/fuzzy_backend.c1039
1 files changed, 2 insertions, 1037 deletions
diff --git a/src/libserver/fuzzy_backend.c b/src/libserver/fuzzy_backend.c
index 463fdd1f4..5c3683466 100644
--- a/src/libserver/fuzzy_backend.c
+++ b/src/libserver/fuzzy_backend.c
@@ -13,1042 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
#include "config.h"
-#include "rspamd.h"
#include "fuzzy_backend.h"
-#include "unix-std.h"
-
-#include <sqlite3.h>
-#include "libutil/sqlite_utils.h"
-
-struct rspamd_fuzzy_backend {
- sqlite3 *db;
- char *path;
- gchar id[MEMPOOL_UID_LEN];
- gsize count;
- gsize expired;
- rspamd_mempool_t *pool;
-};
-
-static const gdouble sql_sleep_time = 0.1;
-static const guint max_retries = 10;
-
-#define msg_err_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
- backend->pool->tag.tagname, backend->pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-#define msg_warn_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
- backend->pool->tag.tagname, backend->pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-#define msg_info_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
- backend->pool->tag.tagname, backend->pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-#define msg_debug_fuzzy_backend(...) rspamd_default_log_function (G_LOG_LEVEL_DEBUG, \
- backend->pool->tag.tagname, backend->pool->tag.uid, \
- G_STRFUNC, \
- __VA_ARGS__)
-
-static const char *create_tables_sql =
- "BEGIN;"
- "CREATE TABLE IF NOT EXISTS digests("
- " id INTEGER PRIMARY KEY,"
- " flag INTEGER NOT NULL,"
- " digest TEXT NOT NULL,"
- " value INTEGER,"
- " time INTEGER);"
- "CREATE TABLE IF NOT EXISTS shingles("
- " value INTEGER NOT NULL,"
- " number INTEGER NOT NULL,"
- " digest_id INTEGER REFERENCES digests(id) ON DELETE CASCADE "
- " ON UPDATE CASCADE);"
- "CREATE TABLE IF NOT EXISTS sources("
- " name TEXT UNIQUE,"
- " version INTEGER,"
- " last INTEGER);"
- "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
- "CREATE INDEX IF NOT EXISTS t ON digests(time);"
- "CREATE INDEX IF NOT EXISTS dgst_id ON shingles(digest_id);"
- "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
- "COMMIT;";
-#if 0
-static const char *create_index_sql =
- "BEGIN;"
- "CREATE UNIQUE INDEX IF NOT EXISTS d ON digests(digest);"
- "CREATE INDEX IF NOT EXISTS t ON digests(time);"
- "CREATE INDEX IF NOT EXISTS dgst_id ON shingles(digest_id);"
- "CREATE UNIQUE INDEX IF NOT EXISTS s ON shingles(value, number);"
- "COMMIT;";
-#endif
-enum rspamd_fuzzy_statement_idx {
- RSPAMD_FUZZY_BACKEND_TRANSACTION_START = 0,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
- RSPAMD_FUZZY_BACKEND_INSERT,
- RSPAMD_FUZZY_BACKEND_UPDATE,
- RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
- RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
- RSPAMD_FUZZY_BACKEND_CHECK,
- RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
- RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
- RSPAMD_FUZZY_BACKEND_DELETE,
- RSPAMD_FUZZY_BACKEND_COUNT,
- RSPAMD_FUZZY_BACKEND_EXPIRE,
- RSPAMD_FUZZY_BACKEND_VACUUM,
- RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
- RSPAMD_FUZZY_BACKEND_ADD_SOURCE,
- RSPAMD_FUZZY_BACKEND_VERSION,
- RSPAMD_FUZZY_BACKEND_SET_VERSION,
- RSPAMD_FUZZY_BACKEND_MAX
-};
-static struct rspamd_fuzzy_stmts {
- enum rspamd_fuzzy_statement_idx idx;
- const gchar *sql;
- const gchar *args;
- sqlite3_stmt *stmt;
- gint result;
-} prepared_stmts[RSPAMD_FUZZY_BACKEND_MAX] =
-{
- {
- .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_START,
- .sql = "BEGIN TRANSACTION;",
- .args = "",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT,
- .sql = "COMMIT;",
- .args = "",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK,
- .sql = "ROLLBACK;",
- .args = "",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_INSERT,
- .sql = "INSERT INTO digests(flag, digest, value, time) VALUES"
- "(?1, ?2, ?3, strftime('%s','now'));",
- .args = "SDI",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_UPDATE,
- .sql = "UPDATE digests SET value = value + ?1, time = strftime('%s','now') WHERE "
- "digest==?2;",
- .args = "ID",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
- .sql = "UPDATE digests SET value = ?1, flag = ?2, time = strftime('%s','now') WHERE "
- "digest==?3;",
- .args = "IID",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
- .sql = "INSERT OR REPLACE INTO shingles(value, number, digest_id) "
- "VALUES (?1, ?2, ?3);",
- .args = "III",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_CHECK,
- .sql = "SELECT value, time, flag FROM digests WHERE digest==?1;",
- .args = "D",
- .stmt = NULL,
- .result = SQLITE_ROW
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
- .sql = "SELECT digest_id FROM shingles WHERE value=?1 AND number=?2",
- .args = "IS",
- .stmt = NULL,
- .result = SQLITE_ROW
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID,
- .sql = "SELECT digest, value, time, flag FROM digests WHERE id=?1",
- .args = "I",
- .stmt = NULL,
- .result = SQLITE_ROW
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_DELETE,
- .sql = "DELETE FROM digests WHERE digest==?1;",
- .args = "D",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_COUNT,
- .sql = "SELECT COUNT(*) FROM digests;",
- .args = "",
- .stmt = NULL,
- .result = SQLITE_ROW
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_EXPIRE,
- .sql = "DELETE FROM digests WHERE id IN (SELECT id FROM digests WHERE time < ?1 LIMIT ?2);",
- .args = "II",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_VACUUM,
- .sql = "VACUUM;",
- .args = "",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
- .sql = "DELETE FROM shingles WHERE value=?1 AND number=?2;",
- .args = "II",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_ADD_SOURCE,
- .sql = "INSERT OR IGNORE INTO sources(name, version, last) VALUES (?1, ?2, ?3);",
- .args = "TII",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_VERSION,
- .sql = "SELECT version FROM sources WHERE name=?1;",
- .args = "T",
- .stmt = NULL,
- .result = SQLITE_ROW
- },
- {
- .idx = RSPAMD_FUZZY_BACKEND_SET_VERSION,
- .sql = "INSERT OR REPLACE INTO sources (name, version, last) VALUES (?3, ?1, ?2);",
- .args = "IIT",
- .stmt = NULL,
- .result = SQLITE_DONE
- },
-};
-
-static GQuark
-rspamd_fuzzy_backend_quark(void)
-{
- return g_quark_from_static_string ("fuzzy-storage-backend");
-}
-
-static gboolean
-rspamd_fuzzy_backend_prepare_stmts (struct rspamd_fuzzy_backend *bk, GError **err)
-{
- int i;
-
- for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i ++) {
- if (prepared_stmts[i].stmt != NULL) {
- /* Skip already prepared statements */
- continue;
- }
- if (sqlite3_prepare_v2 (bk->db, prepared_stmts[i].sql, -1,
- &prepared_stmts[i].stmt, NULL) != SQLITE_OK) {
- g_set_error (err, rspamd_fuzzy_backend_quark (),
- -1, "Cannot initialize prepared sql `%s`: %s",
- prepared_stmts[i].sql, sqlite3_errmsg (bk->db));
-
- return FALSE;
- }
- }
-
- return TRUE;
-}
-
-static int
-rspamd_fuzzy_backend_cleanup_stmt (struct rspamd_fuzzy_backend *backend,
- int idx)
-{
- sqlite3_stmt *stmt;
-
- if (idx < 0 || idx >= RSPAMD_FUZZY_BACKEND_MAX) {
-
- return -1;
- }
-
- msg_debug_fuzzy_backend ("reseting `%s`", prepared_stmts[idx].sql);
- stmt = prepared_stmts[idx].stmt;
- sqlite3_clear_bindings (stmt);
- sqlite3_reset (stmt);
-
- return SQLITE_OK;
-}
-
-static int
-rspamd_fuzzy_backend_run_stmt (struct rspamd_fuzzy_backend *backend,
- gboolean auto_cleanup,
- int idx, ...)
-{
- int retcode;
- va_list ap;
- sqlite3_stmt *stmt;
- int i;
- const char *argtypes;
- guint retries = 0;
- struct timespec ts;
-
- if (idx < 0 || idx >= RSPAMD_FUZZY_BACKEND_MAX) {
-
- return -1;
- }
-
- stmt = prepared_stmts[idx].stmt;
- g_assert ((int)prepared_stmts[idx].idx == idx);
-
- if (stmt == NULL) {
- if ((retcode = sqlite3_prepare_v2 (backend->db, prepared_stmts[idx].sql, -1,
- &prepared_stmts[idx].stmt, NULL)) != SQLITE_OK) {
- msg_err_fuzzy_backend ("Cannot initialize prepared sql `%s`: %s",
- prepared_stmts[idx].sql, sqlite3_errmsg (backend->db));
-
- return retcode;
- }
- stmt = prepared_stmts[idx].stmt;
- }
-
- msg_debug_fuzzy_backend ("executing `%s` %s auto cleanup",
- prepared_stmts[idx].sql, auto_cleanup ? "with" : "without");
- argtypes = prepared_stmts[idx].args;
- sqlite3_clear_bindings (stmt);
- sqlite3_reset (stmt);
- va_start (ap, idx);
-
- for (i = 0; argtypes[i] != '\0'; i++) {
- switch (argtypes[i]) {
- case 'T':
- sqlite3_bind_text (stmt, i + 1, va_arg (ap, const char*), -1,
- SQLITE_STATIC);
- break;
- case 'I':
- sqlite3_bind_int64 (stmt, i + 1, va_arg (ap, gint64));
- break;
- case 'S':
- sqlite3_bind_int (stmt, i + 1, va_arg (ap, gint));
- break;
- case 'D':
- /* Special case for digests variable */
- sqlite3_bind_text (stmt, i + 1, va_arg (ap, const char*), 64,
- SQLITE_STATIC);
- break;
- }
- }
-
- va_end (ap);
-
-retry:
- retcode = sqlite3_step (stmt);
-
- if (retcode == prepared_stmts[idx].result) {
- retcode = SQLITE_OK;
- }
- else {
- if ((retcode == SQLITE_BUSY ||
- retcode == SQLITE_LOCKED) && retries++ < max_retries) {
- double_to_ts (sql_sleep_time, &ts);
- nanosleep (&ts, NULL);
- goto retry;
- }
-
- msg_debug_fuzzy_backend ("failed to execute query %s: %d, %s", prepared_stmts[idx].sql,
- retcode, sqlite3_errmsg (backend->db));
- }
-
- if (auto_cleanup) {
- sqlite3_clear_bindings (stmt);
- sqlite3_reset (stmt);
- }
-
- return retcode;
-}
-
-static void
-rspamd_fuzzy_backend_close_stmts (struct rspamd_fuzzy_backend *bk)
-{
- int i;
-
- for (i = 0; i < RSPAMD_FUZZY_BACKEND_MAX; i++) {
- if (prepared_stmts[i].stmt != NULL) {
- sqlite3_finalize (prepared_stmts[i].stmt);
- prepared_stmts[i].stmt = NULL;
- }
- }
-
- return;
-}
-
-static gboolean
-rspamd_fuzzy_backend_run_sql (const gchar *sql, struct rspamd_fuzzy_backend *bk,
- GError **err)
-{
- guint retries = 0;
- struct timespec ts;
- gint ret;
-
- do {
- ret = sqlite3_exec (bk->db, sql, NULL, NULL, NULL);
- double_to_ts (sql_sleep_time, &ts);
- } while (ret == SQLITE_BUSY && retries++ < max_retries &&
- nanosleep (&ts, NULL) == 0);
-
- if (ret != SQLITE_OK) {
- g_set_error (err, rspamd_fuzzy_backend_quark (),
- -1, "Cannot execute raw sql `%s`: %s",
- sql, sqlite3_errmsg (bk->db));
- return FALSE;
- }
-
- return TRUE;
-}
-
-static struct rspamd_fuzzy_backend *
-rspamd_fuzzy_backend_open_db (const gchar *path, GError **err)
-{
- struct rspamd_fuzzy_backend *bk;
- rspamd_cryptobox_hash_state_t st;
- guchar hash_out[rspamd_cryptobox_HASHBYTES];
-
- g_assert (path != NULL);
-
- bk = g_slice_alloc (sizeof (*bk));
- bk->path = g_strdup (path);
- bk->expired = 0;
- bk->pool = rspamd_mempool_new (rspamd_mempool_suggest_size (), "fuzzy_backend");
- bk->db = rspamd_sqlite3_open_or_create (bk->pool, bk->path,
- create_tables_sql, 1, err);
-
- if (bk->db == NULL) {
- rspamd_fuzzy_backend_close (bk);
-
- return NULL;
- }
-
- if (!rspamd_fuzzy_backend_prepare_stmts (bk, err)) {
- rspamd_fuzzy_backend_close (bk);
-
- return NULL;
- }
-
- /* Set id for the backend */
- rspamd_cryptobox_hash_init (&st, NULL, 0);
- rspamd_cryptobox_hash_update (&st, path, strlen (path));
- rspamd_cryptobox_hash_final (&st, hash_out);
- rspamd_snprintf (bk->id, sizeof (bk->id), "%xs", hash_out);
- memcpy (bk->pool->tag.uid, bk->id, sizeof (bk->pool->tag.uid));
-
- return bk;
-}
-
-struct rspamd_fuzzy_backend *
-rspamd_fuzzy_backend_open (const gchar *path,
- gboolean vacuum,
- GError **err)
-{
- struct rspamd_fuzzy_backend *backend;
-
- if (path == NULL) {
- g_set_error (err, rspamd_fuzzy_backend_quark (),
- ENOENT, "Path has not been specified");
- return NULL;
- }
-
- /* Open database */
- if ((backend = rspamd_fuzzy_backend_open_db (path, err)) == NULL) {
- return NULL;
- }
-
- if (rspamd_fuzzy_backend_run_stmt (backend, FALSE, RSPAMD_FUZZY_BACKEND_COUNT)
- == SQLITE_OK) {
- backend->count = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_COUNT].stmt, 0);
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_COUNT);
-
- return backend;
-}
-
-static gint
-rspamd_fuzzy_backend_int64_cmp (const void *a, const void *b)
-{
- gint64 ia = *(gint64 *)a, ib = *(gint64 *)b;
-
- return (ia - ib);
-}
-
-struct rspamd_fuzzy_reply
-rspamd_fuzzy_backend_check (struct rspamd_fuzzy_backend *backend,
- const struct rspamd_fuzzy_cmd *cmd, gint64 expire)
-{
- struct rspamd_fuzzy_reply rep = {0, 0, 0, 0.0};
- const struct rspamd_fuzzy_shingle_cmd *shcmd;
- int rc;
- gint64 timestamp;
- gint64 shingle_values[RSPAMD_SHINGLE_SIZE], i, sel_id, cur_id,
- cur_cnt, max_cnt;
-
- if (backend == NULL) {
- return rep;
- }
-
- /* Try direct match first of all */
- rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
- rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_CHECK,
- cmd->digest);
-
- if (rc == SQLITE_OK) {
- timestamp = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 1);
- if (time (NULL) - timestamp > expire) {
- /* Expire element */
- msg_debug_fuzzy_backend ("requested hash has been expired");
- }
- else {
- rep.value = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 0);
- rep.prob = 1.0;
- rep.flag = sqlite3_column_int (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt, 2);
- }
- }
- else if (cmd->shingles_count > 0) {
- /* Fuzzy match */
-
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
- shcmd = (const struct rspamd_fuzzy_shingle_cmd *)cmd;
-
- for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
- rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE,
- shcmd->sgl.hashes[i], i);
- if (rc == SQLITE_OK) {
- shingle_values[i] = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE].stmt,
- 0);
- }
- else {
- shingle_values[i] = -1;
- }
- msg_debug_fuzzy_backend ("looking for shingle %L -> %L: %d", i,
- shcmd->sgl.hashes[i], rc);
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend,
- RSPAMD_FUZZY_BACKEND_CHECK_SHINGLE);
-
- qsort (shingle_values, RSPAMD_SHINGLE_SIZE, sizeof (gint64),
- rspamd_fuzzy_backend_int64_cmp);
- sel_id = -1;
- cur_id = -1;
- cur_cnt = 0;
- max_cnt = 0;
-
- for (i = 0; i < RSPAMD_SHINGLE_SIZE; i ++) {
- if (shingle_values[i] == -1) {
- continue;
- }
-
- /* We have some value here, so we need to check it */
- if (shingle_values[i] == cur_id) {
- cur_cnt ++;
- }
- else {
- cur_id = shingle_values[i];
- if (cur_cnt >= max_cnt) {
- max_cnt = cur_cnt;
- sel_id = cur_id;
- }
- cur_cnt = 0;
- }
- }
-
- if (cur_cnt > max_cnt) {
- max_cnt = cur_cnt;
- }
-
- if (sel_id != -1) {
- /* We have some id selected here */
- rep.prob = (float)max_cnt / (float)RSPAMD_SHINGLE_SIZE;
-
- if (rep.prob > 0.5) {
- msg_debug_fuzzy_backend (
- "found fuzzy hash with probability %.2f",
- rep.prob);
- rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID, sel_id);
- if (rc == SQLITE_OK) {
- timestamp = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
- 2);
- if (time (NULL) - timestamp > expire) {
- /* Expire element */
- msg_debug_fuzzy_backend (
- "requested hash has been expired");
- rep.prob = 0.0;
- }
- else {
- rep.value = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
- 1);
- rep.flag = sqlite3_column_int (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID].stmt,
- 3);
- }
- }
- }
- else {
- /* Otherwise we assume that as error */
- rep.value = 0;
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend,
- RSPAMD_FUZZY_BACKEND_GET_DIGEST_BY_ID);
- }
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
- rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
-
- return rep;
-}
-
-gboolean
-rspamd_fuzzy_backend_prepare_update (struct rspamd_fuzzy_backend *backend,
- const gchar *source)
-{
- gint rc;
-
- if (backend == NULL) {
- return FALSE;
- }
-
- rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
-
- if (rc != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot start transaction for updates: %s",
- sqlite3_errmsg (backend->db));
- return FALSE;
- }
-
- return TRUE;
-}
-
-gboolean
-rspamd_fuzzy_backend_add (struct rspamd_fuzzy_backend *backend,
- const struct rspamd_fuzzy_cmd *cmd)
-{
- int rc, i;
- gint64 id, flag;
- const struct rspamd_fuzzy_shingle_cmd *shcmd;
-
- if (backend == NULL) {
- return FALSE;
- }
-
- rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_CHECK,
- cmd->digest);
-
- if (rc == SQLITE_OK) {
- /* Check flag */
- flag = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_CHECK].stmt,
- 2);
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
-
- if (flag == cmd->flag) {
- /* We need to increase weight */
- rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_UPDATE,
- (gint64) cmd->value,
- cmd->digest);
- if (rc != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot update hash to %d -> "
- "%*xs: %s", (gint) cmd->flag,
- (gint) sizeof (cmd->digest), cmd->digest,
- sqlite3_errmsg (backend->db));
- }
- }
- else {
- /* We need to relearn actually */
-
- rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_UPDATE_FLAG,
- (gint64) cmd->value,
- (gint64) cmd->flag,
- cmd->digest);
-
- if (rc != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot update hash to %d -> "
- "%*xs: %s", (gint) cmd->flag,
- (gint) sizeof (cmd->digest), cmd->digest,
- sqlite3_errmsg (backend->db));
- }
- }
- }
- else {
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
- rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_INSERT,
- (gint) cmd->flag,
- cmd->digest,
- (gint64) cmd->value);
-
- if (rc == SQLITE_OK) {
- if (cmd->shingles_count > 0) {
- id = sqlite3_last_insert_rowid (backend->db);
- shcmd = (const struct rspamd_fuzzy_shingle_cmd *) cmd;
-
- for (i = 0; i < RSPAMD_SHINGLE_SIZE; i++) {
- rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_INSERT_SHINGLE,
- shcmd->sgl.hashes[i], (gint64)i, id);
- msg_debug_fuzzy_backend ("add shingle %d -> %L: %L",
- i,
- shcmd->sgl.hashes[i],
- id);
-
- if (rc != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot add shingle %d -> "
- "%L: %L: %s", i,
- shcmd->sgl.hashes[i],
- id, sqlite3_errmsg (backend->db));
- }
- }
- }
- }
- else {
- msg_warn_fuzzy_backend ("cannot add hash to %d -> "
- "%*xs: %s", (gint)cmd->flag,
- (gint)sizeof (cmd->digest), cmd->digest,
- sqlite3_errmsg (backend->db));
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend,
- RSPAMD_FUZZY_BACKEND_INSERT);
- }
-
- return (rc == SQLITE_OK);
-}
-
-gboolean
-rspamd_fuzzy_backend_finish_update (struct rspamd_fuzzy_backend *backend,
- const gchar *source, gboolean version_bump)
-{
- gint rc = SQLITE_OK, wal_frames, wal_checkpointed, ver;
-
- /* Get and update version */
- if (version_bump) {
- ver = rspamd_fuzzy_backend_version (backend, source);
- ++ver;
-
- rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_SET_VERSION,
- (gint64)ver, (gint64)time (NULL), source);
- }
-
- if (rc == SQLITE_OK) {
- rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
-
- if (rc != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot commit updates: %s",
- sqlite3_errmsg (backend->db));
- rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
- return FALSE;
- }
- else {
- if (!rspamd_sqlite3_sync (backend->db, &wal_frames, &wal_checkpointed)) {
- msg_warn_fuzzy_backend ("cannot commit checkpoint: %s",
- sqlite3_errmsg (backend->db));
- }
- else if (wal_checkpointed > 0) {
- msg_info_fuzzy_backend ("total number of frames in the wal file: "
- "%d, checkpointed: %d", wal_frames, wal_checkpointed);
- }
- }
- }
- else {
- msg_warn_fuzzy_backend ("cannot update version for %s: %s", source,
- sqlite3_errmsg (backend->db));
- rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
- return FALSE;
- }
-
- return TRUE;
-}
-
-gboolean
-rspamd_fuzzy_backend_del (struct rspamd_fuzzy_backend *backend,
- const struct rspamd_fuzzy_cmd *cmd)
-{
- int rc = -1;
-
- if (backend == NULL) {
- return FALSE;
- }
-
- rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_CHECK,
- cmd->digest);
-
- if (rc == SQLITE_OK) {
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
-
- rc = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_DELETE,
- cmd->digest);
- if (rc != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot update hash to %d -> "
- "%*xs: %s", (gint) cmd->flag,
- (gint) sizeof (cmd->digest), cmd->digest,
- sqlite3_errmsg (backend->db));
- }
- }
- else {
- /* Hash is missing */
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_CHECK);
- }
-
- return (rc == SQLITE_OK);
-}
-
-gboolean
-rspamd_fuzzy_backend_sync (struct rspamd_fuzzy_backend *backend,
- gint64 expire,
- gboolean clean_orphaned)
-{
- struct orphaned_shingle_elt {
- gint64 value;
- gint64 number;
- };
-
- /* Do not do more than 5k ops per step */
- const guint64 max_changes = 5000;
- gboolean ret = FALSE;
- gint64 expire_lim, expired;
- gint rc, i, orphaned_cnt = 0;
- GError *err = NULL;
- static const gchar orphaned_shingles[] = "SELECT shingles.value,shingles.number "
- "FROM shingles "
- "LEFT JOIN digests ON "
- "shingles.digest_id=digests.id WHERE "
- "digests.id IS NULL;";
- sqlite3_stmt *stmt;
- GArray *orphaned;
- struct orphaned_shingle_elt orphaned_elt, *pelt;
-
-
- if (backend == NULL) {
- return FALSE;
- }
-
- /* Perform expire */
- if (expire > 0) {
- expire_lim = time (NULL) - expire;
-
- if (expire_lim > 0) {
- ret = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
-
- if (ret == SQLITE_OK) {
-
- rc = rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_EXPIRE, expire_lim, max_changes);
-
- if (rc == SQLITE_OK) {
- expired = sqlite3_changes (backend->db);
-
- if (expired > 0) {
- backend->expired += expired;
- msg_info_fuzzy_backend ("expired %L hashes", expired);
- }
- }
- else {
- msg_warn_fuzzy_backend (
- "cannot execute expired statement: %s",
- sqlite3_errmsg (backend->db));
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend,
- RSPAMD_FUZZY_BACKEND_EXPIRE);
-
- ret = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
-
- if (ret != SQLITE_OK) {
- rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
- }
- }
- if (ret != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot expire db: %s",
- sqlite3_errmsg (backend->db));
- }
- }
- }
-
- /* Cleanup database */
- if (clean_orphaned) {
- ret = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_START);
-
- if (ret == SQLITE_OK) {
- if ((rc = sqlite3_prepare_v2 (backend->db,
- orphaned_shingles,
- -1,
- &stmt,
- NULL)) != SQLITE_OK) {
- msg_warn_fuzzy_backend ("cannot cleanup shingles: %s",
- sqlite3_errmsg (backend->db));
- }
- else {
- orphaned = g_array_new (FALSE,
- FALSE,
- sizeof (struct orphaned_shingle_elt));
-
- while (sqlite3_step (stmt) == SQLITE_ROW) {
- orphaned_elt.value = sqlite3_column_int64 (stmt, 0);
- orphaned_elt.number = sqlite3_column_int64 (stmt, 1);
- g_array_append_val (orphaned, orphaned_elt);
-
- if (orphaned->len > max_changes) {
- break;
- }
- }
-
- sqlite3_finalize (stmt);
- orphaned_cnt = orphaned->len;
-
- if (orphaned_cnt > 0) {
- msg_info_fuzzy_backend (
- "going to delete %ud orphaned shingles",
- orphaned_cnt);
- /* Need to delete orphaned elements */
- for (i = 0; i < (gint) orphaned_cnt; i++) {
- pelt = &g_array_index (orphaned,
- struct orphaned_shingle_elt,
- i);
- rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_DELETE_ORPHANED,
- pelt->value, pelt->number);
- }
- }
-
-
- g_array_free (orphaned, TRUE);
- }
-
- ret = rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_COMMIT);
-
- if (ret == SQLITE_OK) {
- msg_info_fuzzy_backend (
- "deleted %ud orphaned shingles",
- orphaned_cnt);
- }
- else {
- msg_warn_fuzzy_backend (
- "cannot synchronize fuzzy backend: %e",
- err);
- rspamd_fuzzy_backend_run_stmt (backend, TRUE,
- RSPAMD_FUZZY_BACKEND_TRANSACTION_ROLLBACK);
- }
- }
- }
-
- return ret;
-}
-
-
-void
-rspamd_fuzzy_backend_close (struct rspamd_fuzzy_backend *backend)
-{
- if (backend != NULL) {
- if (backend->db != NULL) {
- rspamd_fuzzy_backend_close_stmts (backend);
- sqlite3_close (backend->db);
- }
-
- if (backend->path != NULL) {
- g_free (backend->path);
- }
-
- if (backend->pool) {
- rspamd_mempool_delete (backend->pool);
- }
-
- g_slice_free1 (sizeof (*backend), backend);
- }
-}
-
-
-gsize
-rspamd_fuzzy_backend_count (struct rspamd_fuzzy_backend *backend)
-{
- if (backend) {
- if (rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_COUNT) == SQLITE_OK) {
- backend->count = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_COUNT].stmt, 0);
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_COUNT);
-
- return backend->count;
- }
-
- return 0;
-}
-
-gint
-rspamd_fuzzy_backend_version (struct rspamd_fuzzy_backend *backend,
- const gchar *source)
-{
- gint ret = -1;
-
- if (backend) {
- if (rspamd_fuzzy_backend_run_stmt (backend, FALSE,
- RSPAMD_FUZZY_BACKEND_VERSION, source) == SQLITE_OK) {
- ret = sqlite3_column_int64 (
- prepared_stmts[RSPAMD_FUZZY_BACKEND_VERSION].stmt, 0);
- }
-
- rspamd_fuzzy_backend_cleanup_stmt (backend, RSPAMD_FUZZY_BACKEND_VERSION);
- }
-
- return ret;
-}
-
-gsize
-rspamd_fuzzy_backend_expired (struct rspamd_fuzzy_backend *backend)
-{
- return backend != NULL ? backend->expired : 0;
-}
-
-const gchar *
-rspamd_fuzzy_backend_id (struct rspamd_fuzzy_backend *backend)
-{
- return backend != NULL ? backend->id : 0;
-}
+#include "fuzzy_backend_sqlite.h"