struct sb_stemmer *stem = NULL;
rspamd_fstring_t *w, stw;
const guchar *r;
- guint i;
+ guint i, nlen;
GArray *tmp;
if (part->language && part->language[0] != '\0' && part->is_utf) {
r = sb_stemmer_stem (stem, w->begin, w->len);
}
- if (stem == NULL || r == NULL) {
- stw.begin = rspamd_mempool_fstrdup (task->task_pool, w);
- stw.len = w->len;
- }
- else {
- stw.begin = rspamd_mempool_strdup (task->task_pool, r);
- stw.len = strlen (r);
+ if (stem != NULL && r != NULL) {
+ nlen = strlen (r);
+ nlen = MIN (nlen, stw.len);
+ memcpy (stw.begin, r, nlen);
+ stw.len = nlen;
}
if (part->is_utf) {
else {
rspamd_str_lc (stw.begin, stw.len);
}
- g_array_append_val (part->normalized_words, stw);
}
- g_array_free (tmp, TRUE);
+ part->normalized_words = tmp;
}
if (stem != NULL) {
token->begin = buf->begin;
token->len = 0;
}
+ *cur = token->begin;
}
token->len = 0;
token->begin = "exception";
token->len = sizeof ("exception") - 1;
state = skip_exception;
+ continue;
}
else if (g_unichar_isgraph (uc) && !g_unichar_ispunct (uc)) {
state = feed_token;
func = rspamd_tokenizer_get_word;
}
- res = g_array_new (FALSE, FALSE, sizeof (rspamd_fstring_t));
+ res = g_array_sized_new (FALSE, FALSE, sizeof (rspamd_fstring_t), 128);
while (func (&buf, &pos, &token, &cur, is_utf, &l)) {
if (min_len > 0 && l < min_len) {