diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-15 14:23:17 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2021-07-15 14:23:17 +0100 |
commit | 5f80e2e32e9e82c769b3f28be29cff4f62d9ecce (patch) | |
tree | a18428dc67bae8a113a293b6406cecf76097a336 | |
parent | b2c36feea701c6685d83b8c4e6282fe7e307609d (diff) | |
download | rspamd-5f80e2e32e9e82c769b3f28be29cff4f62d9ecce.tar.gz rspamd-5f80e2e32e9e82c769b3f28be29cff4f62d9ecce.zip |
[Rework] Slightly improve old regexp API
-rw-r--r-- | src/libutil/regexp.c | 49 | ||||
-rw-r--r-- | src/libutil/regexp.h | 34 |
2 files changed, 49 insertions, 34 deletions
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c index 93b8125e4..6ec5b4d39 100644 --- a/src/libutil/regexp.c +++ b/src/libutil/regexp.c @@ -306,10 +306,10 @@ rspamd_regexp_post_process (rspamd_regexp_t *r) } rspamd_regexp_t* -rspamd_regexp_new (const gchar *pattern, const gchar *flags, +rspamd_regexp_new_len (const gchar *pattern, gsize len, const gchar *flags, GError **err) { - const gchar *start = pattern, *end, *flags_str = NULL; + const gchar *start = pattern, *end = start + len, *flags_str = NULL; gchar *err_str; rspamd_regexp_t *res; gboolean explicit_utf = FALSE; @@ -331,7 +331,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, return NULL; } - if (flags == NULL) { + if (flags == NULL && start < end) { /* We need to parse pattern and detect flags set */ if (*start == '/') { sep = '/'; @@ -347,14 +347,13 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, rspamd_flags |= RSPAMD_REGEXP_FLAG_FULL_MATCH; } - if (sep == '\0' || g_ascii_isalnum (sep)) { + if (g_ascii_isalnum (sep)) { /* We have no flags, no separators and just use all line as expr */ start = pattern; - end = start + strlen (pattern); rspamd_flags &= ~RSPAMD_REGEXP_FLAG_FULL_MATCH; } else { - end = strrchr (pattern, sep); + end = rspamd_memrchr(pattern, sep, len); if (end == NULL || end <= start) { g_set_error (err, rspamd_regexp_quark(), EINVAL, @@ -370,7 +369,6 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, /* Strictly check all flags */ strict_flags = TRUE; start = pattern; - end = pattern + strlen (pattern); flags_str = flags; } @@ -384,7 +382,7 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags, #endif if (flags_str != NULL) { - while (*flags_str) { + while (flags_str < end) { switch (*flags_str) { case 'i': regexp_flags |= PCRE_FLAG(CASELESS); @@ -535,9 +533,16 @@ fin: return res; } +rspamd_regexp_t * +rspamd_regexp_new (const gchar *pattern, const gchar *flags, + GError **err) +{ + return rspamd_regexp_new_len(pattern, strlen(pattern), flags, err); +} + #ifndef WITH_PCRE2 gboolean -rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, +rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len, const gchar **start, const gchar **end, gboolean raw, GArray *captures) { @@ -672,7 +677,7 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, #else /* PCRE 2 version */ gboolean -rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, +rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len, const gchar **start, const gchar **end, gboolean raw, GArray *captures) { @@ -788,7 +793,7 @@ rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len, #endif const char* -rspamd_regexp_get_pattern (rspamd_regexp_t *re) +rspamd_regexp_get_pattern (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -808,7 +813,7 @@ rspamd_regexp_set_flags (rspamd_regexp_t *re, guint new_flags) } guint -rspamd_regexp_get_flags (rspamd_regexp_t *re) +rspamd_regexp_get_flags (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -816,7 +821,7 @@ rspamd_regexp_get_flags (rspamd_regexp_t *re) } guint -rspamd_regexp_get_pcre_flags (rspamd_regexp_t *re) +rspamd_regexp_get_pcre_flags (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -824,7 +829,7 @@ rspamd_regexp_get_pcre_flags (rspamd_regexp_t *re) } gint -rspamd_regexp_get_nbackrefs (rspamd_regexp_t *re) +rspamd_regexp_get_nbackrefs (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -832,7 +837,7 @@ rspamd_regexp_get_nbackrefs (rspamd_regexp_t *re) } gint -rspamd_regexp_get_ncaptures (rspamd_regexp_t *re) +rspamd_regexp_get_ncaptures (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -840,7 +845,7 @@ rspamd_regexp_get_ncaptures (rspamd_regexp_t *re) } guint -rspamd_regexp_get_maxhits (rspamd_regexp_t *re) +rspamd_regexp_get_maxhits (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -860,7 +865,7 @@ rspamd_regexp_set_maxhits (rspamd_regexp_t *re, guint new_maxhits) } guint64 -rspamd_regexp_get_cache_id (rspamd_regexp_t *re) +rspamd_regexp_get_cache_id (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -880,7 +885,7 @@ rspamd_regexp_set_cache_id (rspamd_regexp_t *re, guint64 id) } gboolean -rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len, +rspamd_regexp_match (const rspamd_regexp_t *re, const gchar *text, gsize len, gboolean raw) { const gchar *start = NULL, *end = NULL; @@ -888,7 +893,7 @@ rspamd_regexp_match (rspamd_regexp_t *re, const gchar *text, gsize len, g_assert (re != NULL); g_assert (text != NULL); - if (len == 0 && text != NULL) { + if (len == 0) { len = strlen (text); } @@ -926,7 +931,7 @@ rspamd_regexp_set_ud (rspamd_regexp_t *re, gpointer ud) } gpointer -rspamd_regexp_get_ud (rspamd_regexp_t *re) +rspamd_regexp_get_ud (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -1169,7 +1174,7 @@ rspamd_regexp_library_init (struct rspamd_config *cfg) } gpointer -rspamd_regexp_get_id (rspamd_regexp_t *re) +rspamd_regexp_get_id (const rspamd_regexp_t *re) { g_assert (re != NULL); @@ -1177,7 +1182,7 @@ rspamd_regexp_get_id (rspamd_regexp_t *re) } gpointer -rspamd_regexp_get_class (rspamd_regexp_t *re) +rspamd_regexp_get_class (const rspamd_regexp_t *re) { g_assert (re != NULL); diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h index 128edd761..1e98b7b3c 100644 --- a/src/libutil/regexp.h +++ b/src/libutil/regexp.h @@ -61,6 +61,16 @@ rspamd_regexp_t *rspamd_regexp_new (const gchar *pattern, const gchar *flags, GError **err); /** + * Create new rspamd regexp + * @param pattern regexp pattern + * @param flags flags (may be enclosed inside pattern) + * @param err error pointer set if compilation failed + * @return new regexp object + */ +rspamd_regexp_t *rspamd_regexp_new_len (const gchar *pattern, gsize len, const gchar *flags, + GError **err); + +/** * Search the specified regexp in the text * @param re * @param text @@ -71,7 +81,7 @@ rspamd_regexp_t *rspamd_regexp_new (const gchar *pattern, const gchar *flags, * @param captures array of captured strings of type rspamd_fstring_capture or NULL * @return */ -gboolean rspamd_regexp_search (rspamd_regexp_t *re, +gboolean rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len, const gchar **start, const gchar **end, gboolean raw, GArray *captures); @@ -84,7 +94,7 @@ gboolean rspamd_regexp_search (rspamd_regexp_t *re, * @param len * @return */ -gboolean rspamd_regexp_match (rspamd_regexp_t *re, +gboolean rspamd_regexp_match (const rspamd_regexp_t *re, const gchar *text, gsize len, gboolean raw); /** @@ -110,31 +120,31 @@ void rspamd_regexp_set_ud (rspamd_regexp_t *re, gpointer ud); * @param re regexp object * @return opaque pointer */ -gpointer rspamd_regexp_get_ud (rspamd_regexp_t *re); +gpointer rspamd_regexp_get_ud (const rspamd_regexp_t *re); /** * Get regexp ID suitable for hashing * @param re * @return */ -gpointer rspamd_regexp_get_id (rspamd_regexp_t *re); +gpointer rspamd_regexp_get_id (const rspamd_regexp_t *re); /** * Get pattern for the specified regexp object * @param re * @return */ -const char *rspamd_regexp_get_pattern (rspamd_regexp_t *re); +const char *rspamd_regexp_get_pattern (const rspamd_regexp_t *re); /** * Get PCRE flags for the regexp */ -guint rspamd_regexp_get_pcre_flags (rspamd_regexp_t *re); +guint rspamd_regexp_get_pcre_flags (const rspamd_regexp_t *re); /** * Get rspamd flags for the regexp */ -guint rspamd_regexp_get_flags (rspamd_regexp_t *re); +guint rspamd_regexp_get_flags (const rspamd_regexp_t *re); /** * Set rspamd flags for the regexp @@ -144,7 +154,7 @@ guint rspamd_regexp_set_flags (rspamd_regexp_t *re, guint new_flags); /** * Set regexp maximum hits */ -guint rspamd_regexp_get_maxhits (rspamd_regexp_t *re); +guint rspamd_regexp_get_maxhits (const rspamd_regexp_t *re); /** * Get regexp maximum hits @@ -154,17 +164,17 @@ guint rspamd_regexp_set_maxhits (rspamd_regexp_t *re, guint new_maxhits); /** * Returns number of backreferences in a regexp */ -gint rspamd_regexp_get_nbackrefs (rspamd_regexp_t *re); +gint rspamd_regexp_get_nbackrefs (const rspamd_regexp_t *re); /** * Returns number of capture groups in a regexp */ -gint rspamd_regexp_get_ncaptures (rspamd_regexp_t *re); +gint rspamd_regexp_get_ncaptures (const rspamd_regexp_t *re); /** * Returns cache id for a regexp */ -guint64 rspamd_regexp_get_cache_id (rspamd_regexp_t *re); +guint64 rspamd_regexp_get_cache_id (const rspamd_regexp_t *re); /** * Sets cache id for a regexp @@ -174,7 +184,7 @@ guint64 rspamd_regexp_set_cache_id (rspamd_regexp_t *re, guint64 id); /** * Get regexp class for the re object */ -gpointer rspamd_regexp_get_class (rspamd_regexp_t *re); +gpointer rspamd_regexp_get_class (const rspamd_regexp_t *re); /** * Set regexp class for the re object |