#ifdef WITH_HYPERSCAN
hs_database_t *hs_db;
hs_scratch_t *hs_scratch;
+ gint *hs_ids;
+ guint nhs;
#endif
};
if (re_class->hs_scratch) {
hs_free_scratch (re_class->hs_scratch);
}
+ if (re_class->hs_ids) {
+ g_free (re_class->hs_ids);
+ }
#endif
g_slice_free1 (sizeof (*re_class), re_class);
}
}
static guint
-rspamd_re_cache_process_pcre (struct rspamd_re_cache *cache,
+rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
rspamd_regexp_t *re, const guchar *in, gsize len,
gboolean is_raw, gboolean is_multiple)
{
len = strlen (in);
}
- if (cache->max_re_data > 0 && len > cache->max_re_data) {
- len = cache->max_re_data;
+ if (rt->cache->max_re_data > 0 && len > rt->cache->max_re_data) {
+ len = rt->cache->max_re_data;
}
while (rspamd_regexp_search (re,
return r;
}
+#ifdef WITH_HYPERSCAN
+struct rspamd_re_hyperscan_cbdata {
+ struct rspamd_re_runtime *rt;
+ const guchar *in;
+ rspamd_regexp_t *re;
+};
+
+static gint
+rspamd_re_cache_hyperscan_cb (unsigned int id,
+ unsigned long long from,
+ unsigned long long to,
+ unsigned int flags,
+ void *ud)
+{
+ struct rspamd_re_hyperscan_cbdata *cbdata = ud;
+ struct rspamd_re_runtime *rt;
+ guint ret;
+ rspamd_regexp_t *re;
+
+ rt = cbdata->rt;
+ re = cbdata->re;
+
+ if (flags & HS_FLAG_PREFILTER) {
+ /* We need to match the corresponding pcre first */
+ ret = rspamd_re_cache_process_pcre (rt,
+ re,
+ cbdata->in + from,
+ to - from,
+ FALSE,
+ TRUE);
+ }
+ else {
+ ret = 1;
+ }
+
+ setbit (rt->checked, id);
+ rt->results[id] += ret;
+
+ return 0;
+}
+#endif
+
+static guint
+rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
+ rspamd_regexp_t *re,
+ const guchar *in, gsize len,
+ gboolean is_raw, gboolean is_multiple)
+{
+ struct rspamd_re_cache_elt *elt;
+ struct rspamd_re_class *re_class;
+ guint64 re_id;
+ guint ret, i;
+
+ re_id = rspamd_regexp_get_cache_id (re);
+ elt = g_ptr_array_index (rt->cache->re, re_id);
+ (void)i;
+
+#ifndef WITH_HYPERSCAN
+ ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+ setbit (rt->checked, re_id);
+ rt->results[re_id] = ret;
+#else
+ struct rspamd_re_hyperscan_cbdata cbdata;
+
+ if (elt->match_type == RSPAMD_RE_CACHE_PCRE) {
+ ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+ setbit (rt->checked, re_id);
+ rt->results[re_id] = ret;
+ }
+ else {
+ if (len == 0) {
+ len = strlen (in);
+ }
+
+ if (rt->cache->max_re_data > 0 && len > rt->cache->max_re_data) {
+ len = rt->cache->max_re_data;
+ }
+
+ re_class = rspamd_regexp_get_class (re);
+ g_assert (re_class->hs_scratch != NULL);
+ g_assert (re_class->hs_db != NULL);
+
+ /* Go through hyperscan API */
+ cbdata.in = in;
+ cbdata.re = re;
+ cbdata.rt = rt;
+
+ if ((hs_scan (re_class->hs_db, in, len, 0, re_class->hs_scratch,
+ rspamd_re_cache_hyperscan_cb, &cbdata)) != HS_SUCCESS) {
+ ret = 0;
+ }
+ else {
+ ret = rt->results[re_id];
+ }
+
+ /* Set all bits unchecked */
+ for (i = 0; i < re_class->nhs; i++) {
+ re_id = re_class->hs_ids[i];
+
+ if (!isset (rt->checked, re_id)) {
+ rt->results[re_id] = 0;
+ setbit (rt->checked, re_id);
+ }
+ }
+ }
+#endif
+
+ return ret;
+}
/*
* Calculates the specified regexp for the specified class if it's not calculated
*/
struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
struct rspamd_re_class *re_class,
- guint64 re_id,
gboolean is_strong,
gboolean is_multiple)
{
gboolean raw = FALSE;
struct mime_text_part *part;
struct rspamd_url *url;
+
gpointer k, v;
gsize len;
/* Match re */
if (in) {
- ret += rspamd_re_cache_process_pcre (rt->cache, re, in,
+ ret += rspamd_re_cache_process_regexp_data (rt, re, in,
strlen (in), raw, is_multiple);
debug_task ("checking header %s regexp: %s -> %d",
re_class->type_data,
rspamd_regexp_get_pattern (re), ret);
-
- if (!is_multiple && ret) {
- break;
- }
}
cur = g_list_next (cur);
raw = TRUE;
in = task->raw_headers_content.begin;
len = task->raw_headers_content.len;
- ret = rspamd_re_cache_process_pcre (rt->cache, re, in,
+ ret = rspamd_re_cache_process_regexp_data (rt, re, in,
len, raw, is_multiple);
debug_task ("checking allheader regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
}
if (len > 0) {
- ret += rspamd_re_cache_process_pcre (rt->cache, re, in,
+ ret += rspamd_re_cache_process_regexp_data (rt, re, in,
len, raw, is_multiple);
debug_task ("checking mime regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
-
- if (!is_multiple && ret) {
- break;
- }
}
}
break;
g_hash_table_iter_init (&it, task->urls);
while (g_hash_table_iter_next (&it, &k, &v)) {
- if (ret && !is_multiple) {
- break;
- }
-
url = v;
in = url->string;
len = url->urllen;
raw = FALSE;
- ret += rspamd_re_cache_process_pcre (rt->cache, re, in,
+ ret += rspamd_re_cache_process_regexp_data (rt, re, in,
len, raw, is_multiple);
}
g_hash_table_iter_init (&it, task->emails);
while (g_hash_table_iter_next (&it, &k, &v)) {
- if (ret && !is_multiple) {
- break;
- }
-
url = v;
in = url->string;
len = url->urllen;
raw = FALSE;
- ret += rspamd_re_cache_process_pcre (rt->cache, re, in,
+ ret += rspamd_re_cache_process_regexp_data (rt, re, in,
len, raw, is_multiple);
}
in = task->msg.begin;
len = task->msg.len;
- ret = rspamd_re_cache_process_pcre (rt->cache, re, in,
+ ret = rspamd_re_cache_process_regexp_data (rt, re, in,
len, raw, is_multiple);
debug_task ("checking rawbody regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
break;
}
- setbit (rt->checked, re_id);
- rt->results[re_id] = ret > 0xFF ? 0xFF : ret;
-
return ret;
}
if (isset (rt->checked, re_id)) {
/* Fast path */
- return rt->results[re_id];
+ if (is_multiple) {
+ return rt->results[re_id];
+ }
+ else {
+ return rt->results[re_id] ? 1 : 0;
+ }
}
else {
/* Slow path */
return 0;
}
- return rspamd_re_cache_exec_re (task, rt, re, re_class, re_id,
+ return rspamd_re_cache_exec_re (task, rt, re, re_class,
is_strong, is_multiple);
}
elt->match_type = RSPAMD_RE_CACHE_HYPERSCAN;
}
- g_free (hs_ids);
+ re_class->hs_ids = hs_ids;
+ re_class->nhs = n;
}
else {
msg_err_re_cache ("invalid hyperscan hash file '%s'",