aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-01 17:48:23 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2015-04-01 17:48:50 +0100
commit02429b4f2358919d69a885256547dc366ee45af9 (patch)
treead12c937699330b2280222061a0ad68dee2a9b5d /src
parent9a01296f34403c5949c167b0062943ff473d11d7 (diff)
downloadrspamd-02429b4f2358919d69a885256547dc366ee45af9.tar.gz
rspamd-02429b4f2358919d69a885256547dc366ee45af9.zip
Fix words normalization.
Diffstat (limited to 'src')
-rw-r--r--src/libmime/message.c44
1 files changed, 24 insertions, 20 deletions
diff --git a/src/libmime/message.c b/src/libmime/message.c
index b963ac455..2eac86ed2 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1181,6 +1181,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
rspamd_fstring_t *w, stw;
const guchar *r;
guint i;
+ GArray *tmp;
if (part->language && part->language[0] != '\0' && part->is_utf) {
stem = sb_stemmer_new (part->language, "UTF_8");
@@ -1191,32 +1192,35 @@ rspamd_normalize_text_part (struct rspamd_task *task,
}
/* Ugly workaround */
- part->normalized_words = rspamd_tokenize_text (part->content->data,
+ tmp = rspamd_tokenize_text (part->content->data,
part->content->len, part->is_utf, task->cfg->min_word_len,
part->urls_offset, FALSE);
- for (i = 0; i < part->words->len; i ++) {
- w = &g_array_index (part->words, rspamd_fstring_t, i);
- if (stem) {
- r = sb_stemmer_stem (stem, w->begin, w->len);
- }
+ if (tmp) {
+ for (i = 0; i < tmp->len; i ++) {
+ w = &g_array_index (tmp, rspamd_fstring_t, i);
+ if (stem) {
+ r = sb_stemmer_stem (stem, w->begin, w->len);
+ }
- if (stem == NULL || r == NULL) {
- stw.begin = rspamd_mempool_fstrdup (task->task_pool, w);
- stw.len = w->len;
- }
- else {
- stw.begin = rspamd_mempool_strdup (task->task_pool, r);
- stw.len = strlen (r);
- }
+ if (stem == NULL || r == NULL) {
+ stw.begin = rspamd_mempool_fstrdup (task->task_pool, w);
+ stw.len = w->len;
+ }
+ else {
+ stw.begin = rspamd_mempool_strdup (task->task_pool, r);
+ stw.len = strlen (r);
+ }
- if (part->is_utf) {
- rspamd_str_lc_utf8 (stw.begin, stw.len);
- }
- else {
- rspamd_str_lc (stw.begin, stw.len);
+ if (part->is_utf) {
+ rspamd_str_lc_utf8 (stw.begin, stw.len);
+ }
+ else {
+ rspamd_str_lc (stw.begin, stw.len);
+ }
+ g_array_append_val (part->normalized_words, stw);
}
- g_array_append_val (part->normalized_words, stw);
+ g_array_free (tmp, TRUE);
}
if (stem != NULL) {