Browse Source

[Feature] Add more words regexp classes

tags/1.8.3
Vsevolod Stakhov 5 years ago
parent
commit
36c874383c
3 changed files with 46 additions and 5 deletions
  1. 8
    0
      src/libmime/mime_expressions.c
  2. 36
    5
      src/libserver/re_cache.c
  3. 2
    0
      src/libserver/re_cache.h

+ 8
- 0
src/libmime/mime_expressions.c View File

@@ -243,6 +243,14 @@ rspamd_parse_long_option (const gchar *start, gsize len,
ret = TRUE;
a->type = RSPAMD_RE_WORDS;
}
else if (TYPE_CHECK (start, "raw_words", len)) {
ret = TRUE;
a->type = RSPAMD_RE_RAWWORDS;
}
else if (TYPE_CHECK (start, "stem_words", len)) {
ret = TRUE;
a->type = RSPAMD_RE_STEMWORDS;
}
else if (TYPE_CHECK (start, "selector", len)) {
ret = TRUE;
a->type = RSPAMD_RE_SELECTOR;

+ 36
- 5
src/libserver/re_cache.c View File

@@ -1222,8 +1222,11 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
}
break;
case RSPAMD_RE_WORDS:
case RSPAMD_RE_STEMWORDS:
case RSPAMD_RE_RAWWORDS:
if (task->text_parts->len > 0) {
cnt = 0;
raw = FALSE;

PTR_ARRAY_FOREACH (task->text_parts, i, part) {
if (part->utf_words) {
@@ -1241,22 +1244,50 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
guint j;
rspamd_stat_token_t *tok;


if (part->utf_words) {
for (j = 0; j < part->utf_words->len; j ++) {
tok = &g_array_index (part->utf_words,
rspamd_stat_token_t, j);

if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) {
scvec[cnt] = tok->normalized.begin;
lenvec[cnt++] = tok->normalized.len;
if (tok->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_UTF)) {
if (!re_class->has_utf8) {
raw = TRUE;
}
else {
continue; /* Skip */
}
}
}
else {
continue; /* Skip non text */
}

if (re_class->type == RSPAMD_RE_RAWWORDS) {
if (tok->original.len > 0) {
scvec[cnt] = tok->original.begin;
lenvec[cnt++] = tok->original.len;
}
}
else if (re_class->type == RSPAMD_RE_WORDS) {
if (tok->normalized.len > 0) {
scvec[cnt] = tok->normalized.begin;
lenvec[cnt++] = tok->normalized.len;
}
}
else {
/* Stemmed words */
if (tok->stemmed.len > 0) {
scvec[cnt] = tok->stemmed.begin;
lenvec[cnt++] = tok->stemmed.len;
}
}
}
}
}

ret = rspamd_re_cache_process_regexp_data (rt, re,
task, scvec, lenvec, cnt, TRUE);
task, scvec, lenvec, cnt, raw);

msg_debug_re_task ("checking sa words regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);

+ 2
- 0
src/libserver/re_cache.h View File

@@ -36,6 +36,8 @@ enum rspamd_re_type {
RSPAMD_RE_SABODY, /* body in SA */
RSPAMD_RE_SARAWBODY, /* rawbody in SA */
RSPAMD_RE_WORDS, /* normalized words */
RSPAMD_RE_RAWWORDS, /* raw words */
RSPAMD_RE_STEMWORDS, /* stemmed words */
RSPAMD_RE_SELECTOR, /* use lua selector to process regexp */
RSPAMD_RE_MAX
};

Loading…
Cancel
Save