aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-09-28 14:15:46 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-09-28 14:15:46 +0100
commit216c44d0bc3b23ce4cdee9eda4131c8aff1b0494 (patch)
treecedfe93fa42c8a09669a495a8093a855f6406fe7
parent415a2279a0f3ee9a820a55d90b08dd25e6d5c7d9 (diff)
downloadrspamd-216c44d0bc3b23ce4cdee9eda4131c8aff1b0494.tar.gz
rspamd-216c44d0bc3b23ce4cdee9eda4131c8aff1b0494.zip
Print merge stats.
-rw-r--r--src/rspamadm/fuzzy_merge.c46
1 files changed, 38 insertions, 8 deletions
diff --git a/src/rspamadm/fuzzy_merge.c b/src/rspamadm/fuzzy_merge.c
index b8533e7b3..f65dd8653 100644
--- a/src/rspamadm/fuzzy_merge.c
+++ b/src/rspamadm/fuzzy_merge.c
@@ -302,6 +302,8 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv)
for (i = 0; i < nsrc; i++) {
const guchar *digest;
+ guint64 nsrc_ops = 0, ndup_dst = 0, ndup_other = 0, nupdated = 0,
+ nsrc_shingles = 0;
src = g_ptr_array_index (source_dbs, i);
@@ -349,16 +351,28 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv)
if ((op = g_hash_table_lookup (unique_ops, nop)) == NULL) {
g_ptr_array_add (ops, nop);
g_hash_table_insert (unique_ops, nop, nop);
+ nupdated ++;
}
else {
if (op->data.dgst.value < nop->data.dgst.value) {
op->data.dgst.value = nop->data.dgst.value;
+ op->data.dgst.tm = nop->data.dgst.tm;
+ nupdated ++;
}
+ else {
+ ndup_other ++;
+ }
+ g_slice_free1 (sizeof (*nop), nop);
}
}
+ else {
+ ndup_dst ++;
+ }
}
else {
- /* Digest has not been found in the destination db, insert it */
+ /* Digest has not been found, but maybe we have the same in other
+ * sources ?
+ */
nop = g_slice_alloc (sizeof (*nop));
nop->op = OP_INSERT;
memcpy (nop->digest, digest,
@@ -374,16 +388,20 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv)
g_hash_table_insert (unique_ops, nop, nop);
g_hash_table_insert (digests_id, &nop->data.dgst.id,
nop);
+ nsrc_ops ++;
}
else {
- continue;
+ if (op->data.dgst.value < nop->data.dgst.value) {
+ op->data.dgst.value = nop->data.dgst.value;
+ op->data.dgst.tm = nop->data.dgst.tm;
+ op->data.dgst.tm = nop->data.dgst.tm;
+ nupdated++;
+ }
+ else {
+ ndup_other++;
+ }
+ g_slice_free1 (sizeof (*nop), nop);
}
-
- /*
- * If we have no digest registered, we also need to check
- * shingles associated with this digest
- */
-
}
}
@@ -410,6 +428,7 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv)
nop->data.shgl.value = sqlite3_column_int64 (shgl_stmt,
0);
g_ptr_array_add (ops, nop);
+ nsrc_shingles ++;
}
}
@@ -421,6 +440,17 @@ rspamadm_fuzzy_merge (gint argc, gchar **argv)
exit (1);
}
+ if (!quiet) {
+ rspamd_printf ("processed %s: %L new hashes, %L duplicate hashes (other sources), "
+ "%L duplicate hashes (destination), %L hashes to update, "
+ "%L shingles to insert\n\n",
+ sources[i],
+ nsrc_ops,
+ ndup_other,
+ ndup_dst,
+ nupdated,
+ nsrc_shingles);
+ }
/* Cleanup */
g_hash_table_unref (digests_id);
sqlite3_finalize (stmt);