const char *word = ucl_object_tolstring (w, &wlen);
const char *saved;
+#ifdef WITH_HYPERSCAN
+ rspamd_multipattern_add_pattern_len (d->stop_words[cat].mp,
+ word, wlen,
+ RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8
+ |RSPAMD_MULTIPATTERN_RE);
+#else
rspamd_multipattern_add_pattern_len (d->stop_words[cat].mp,
word, wlen,
RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
+#endif
nelt->stop_words ++;
nstop ++;
/* Map from ngramm in ucs32 to GPtrArray of rspamd_language_elt */
for (i = 0; i < RSPAMD_LANGUAGE_MAX; i ++) {
ret->trigramms[i] = kh_init (rspamd_trigram_hash);
+#ifdef WITH_HYPERSCAN
+ ret->stop_words[i].mp = rspamd_multipattern_create (
+ RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8|
+ RSPAMD_MULTIPATTERN_RE);
+#else
ret->stop_words[i].mp = rspamd_multipattern_create (
RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
+#endif
+
ret->stop_words[i].ranges = g_array_new (FALSE, FALSE,
sizeof (struct rspamd_stop_word_range));
}
}
rspamd_config_add_symbol (cfg, composite_name, score,
- description, group, FALSE, FALSE,
+ description, group,
+ 0,
+ ucl_object_get_priority (obj) + 1,
1);
elt = ucl_object_lookup (obj, "groups");
m.flags = flags;
rspamd_multipattern_add_pattern (url_scanner->search_trie, p,
- RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+ RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
m.pattern = rspamd_multipattern_get_pattern (url_scanner->search_trie,
rspamd_multipattern_get_npatterns (url_scanner->search_trie) - 1);
m.patlen = strlen (m.pattern);
url_scanner->matchers = g_array_sized_new (FALSE, TRUE,
sizeof (struct url_matcher), 13000);
url_scanner->search_trie = rspamd_multipattern_create_sized (13000,
- RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+ RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
}
else {
url_scanner->matchers = g_array_sized_new (FALSE, TRUE,
sizeof (struct url_matcher), 128);
url_scanner->search_trie = rspamd_multipattern_create_sized (128,
- RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+ RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
}
rspamd_url_add_static_matchers (url_scanner);