From fec137a7cccd626ce248f619011b2570f75438f8 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 23 Feb 2015 14:29:04 +0000 Subject: Fuzzy check uses already normalized words. --- src/plugins/fuzzy_check.c | 37 ++----------------------------------- 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c index 2dd05a109..b1c24c6b8 100644 --- a/src/plugins/fuzzy_check.c +++ b/src/plugins/fuzzy_check.c @@ -49,7 +49,6 @@ #include "main.h" #include "blake2.h" #include "ottery.h" -#include "libstemmer.h" #define DEFAULT_SYMBOL "R_FUZZY_HASH" #define DEFAULT_UPSTREAM_ERROR_TIME 10 @@ -534,50 +533,18 @@ fuzzy_io_fin (void *ud) close (session->fd); } -static void -fuzzy_g_array_destructor (gpointer a) -{ - GArray *ar = (GArray *)a; - - g_array_free (ar, TRUE); -} - static GArray * fuzzy_preprocess_words (struct mime_text_part *part, rspamd_mempool_t *pool) { GArray *res; - struct sb_stemmer *stem; - rspamd_fstring_t *w, stw; - const guchar *r; - guint i; if (!part->is_utf || !part->language || part->language[0] == '\0') { res = part->words; } else { - /* Lemmatize words */ - stem = sb_stemmer_new (part->language, "UTF_8"); - if (stem == NULL) { - msg_debug ("cannot lemmatize %s language", part->language); - res = part->words; - } - else { - res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t), - part->words->len); - for (i = 0; i < part->words->len; i ++) { - w = &g_array_index (part->words, rspamd_fstring_t, i); - r = sb_stemmer_stem (stem, w->begin, w->len); - if (r != NULL) { - stw.begin = rspamd_mempool_strdup (pool, r); - stw.len = strlen (r); - rspamd_str_lc (stw.begin, stw.len); - g_array_append_val (res, stw); - } - } - rspamd_mempool_add_destructor (pool, fuzzy_g_array_destructor, res); - sb_stemmer_delete (stem); - } + res = part->normalized_words; } + return res; } -- cgit v1.2.3