aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2017-05-13 13:23:32 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2017-05-13 13:23:32 +0100
commiteb0ec2feb63afdf7d16babbcc82f59950233e0ee (patch)
treefd24ced0c68f52d3f51cccc73a098379420310b2 /src
parent2cebc2f8b4e75135e0842af5af44bc1f9f891a27 (diff)
downloadrspamd-eb0ec2feb63afdf7d16babbcc82f59950233e0ee.tar.gz
rspamd-eb0ec2feb63afdf7d16babbcc82f59950233e0ee.zip
[Feature] Reuse stemmers in the cache
Diffstat (limited to 'src')
-rw-r--r--src/libmime/message.c29
1 files changed, 21 insertions, 8 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 9cf611e79..0cf18e0a4 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -204,11 +204,29 @@ rspamd_extract_words (struct rspamd_task *task,
guint i, nlen, total_len = 0, short_len = 0;
#ifdef WITH_SNOWBALL
+ static GHashTable *stemmers = NULL;
+
if (part->language && part->language[0] != '\0' && IS_PART_UTF (part)) {
- stem = sb_stemmer_new (part->language, "UTF_8");
+
+ if (!stemmers) {
+ stemmers = g_hash_table_new (rspamd_strcase_hash,
+ rspamd_strcase_equal);
+ }
+
+ stem = g_hash_table_lookup (stemmers, part->language);
+
if (stem == NULL) {
- msg_debug_task ("<%s> cannot create lemmatizer for %s language",
- task->message_id, part->language);
+
+ stem = sb_stemmer_new (part->language, "UTF_8");
+
+ if (stem == NULL) {
+ msg_debug_task ("<%s> cannot create lemmatizer for %s language",
+ task->message_id, part->language);
+ }
+ else {
+ g_hash_table_insert (stemmers, g_strdup (part->language),
+ stem);
+ }
}
}
#endif
@@ -284,11 +302,6 @@ rspamd_extract_words (struct rspamd_task *task,
}
}
}
-#ifdef WITH_SNOWBALL
- if (stem != NULL) {
- sb_stemmer_delete (stem);
- }
-#endif
if (part->normalized_words && part->normalized_words->len) {
gdouble *avg_len_p, *short_len_p;