if (!IS_PART_EMPTY (part) && part->words != NULL) {
if (compat) {
tok->tokenizer->tokenize_func (cf, task->task_pool,
- part->words, tok->tokens, IS_PART_UTF (part));
+ part->words, tok->tokens, IS_PART_UTF (part), NULL);
}
else {
tok->tokenizer->tokenize_func (cf, task->task_pool,
- part->normalized_words, tok->tokens, IS_PART_UTF (part));
+ part->normalized_words, tok->tokens, IS_PART_UTF (part), NULL);
}
}
}
task->task_pool,
words,
tok->tokens,
- TRUE);
+ TRUE,
+ "SUBJECT");
g_array_free (words, TRUE);
}
}
rspamd_mempool_t * pool,
GArray * input,
GTree * tree,
- gboolean is_utf)
+ gboolean is_utf,
+ const gchar *prefix)
{
rspamd_token_t *new = NULL;
rspamd_fstring_t *token;
struct rspamd_osb_tokenizer_config *osb_cf;
- guint64 *hashpipe, cur;
+ guint64 *hashpipe, cur, seed;
guint32 h1, h2;
guint processed = 0, i, w, window_size;
window_size = osb_cf->window_size;
+ if (prefix) {
+ seed = XXH64 (prefix, strlen (prefix), osb_cf->seed);
+ }
+ else {
+ seed = osb_cf->seed;
+ }
+
hashpipe = g_alloca (window_size * sizeof (hashpipe[0]));
memset (hashpipe, 0xfe, window_size * sizeof (hashpipe[0]));
else {
rspamd_cryptobox_siphash ((guchar *)&cur, token->begin,
token->len, osb_cf->sk);
+
+ if (prefix) {
+ cur ^= seed;
+ }
}
}
rspamd_mempool_t *pool,
GArray *words,
GTree *result,
- gboolean is_utf);
+ gboolean is_utf,
+ const gchar *prefix);
};
/* Compare two token nodes */
rspamd_mempool_t *pool,
GArray *input,
GTree *tokens,
- gboolean is_utf);
+ gboolean is_utf,
+ const gchar *prefix);
gpointer rspamd_tokenizer_osb_get_config (struct rspamd_tokenizer_config *cf,
gsize *len);