You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

regexp.h 6.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef REGEXP_H_
  17. #define REGEXP_H_
  18. #include "config.h"
  19. #ifndef WITH_PCRE2
  20. #define PCRE_FLAG(x) G_PASTE(PCRE_, x)
  21. #else
  22. #ifndef PCRE2_CODE_UNIT_WIDTH
  23. #define PCRE2_CODE_UNIT_WIDTH 8
  24. #endif
  25. #define PCRE_FLAG(x) G_PASTE(PCRE2_, x)
  26. #endif
  27. #define RSPAMD_INVALID_ID ((uint64_t) -1LL)
  28. #define RSPAMD_REGEXP_FLAG_RAW (1 << 1)
  29. #define RSPAMD_REGEXP_FLAG_NOOPT (1 << 2)
  30. #define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3)
  31. #define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
  32. #define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
  33. #define RSPAMD_REGEXP_FLAG_UTF (1 << 6)
  34. #define RSPAMD_REGEXP_FLAG_LEFTMOST (1 << 7)
  35. #ifdef __cplusplus
  36. extern "C" {
  37. #endif
  38. struct rspamd_config;
  39. typedef struct rspamd_regexp_s rspamd_regexp_t;
  40. struct rspamd_regexp_cache;
  41. struct rspamd_re_capture {
  42. const char *p;
  43. gsize len;
  44. };
  45. /**
  46. * Create new rspamd regexp
  47. * @param pattern regexp pattern
  48. * @param flags flags (may be enclosed inside pattern)
  49. * @param err error pointer set if compilation failed
  50. * @return new regexp object
  51. */
  52. rspamd_regexp_t *rspamd_regexp_new(const gchar *pattern, const gchar *flags,
  53. GError **err);
  54. /**
  55. * Create new rspamd regexp
  56. * @param pattern regexp pattern
  57. * @param flags flags (may be enclosed inside pattern)
  58. * @param err error pointer set if compilation failed
  59. * @return new regexp object
  60. */
  61. rspamd_regexp_t *rspamd_regexp_new_len(const gchar *pattern, gsize len, const gchar *flags,
  62. GError **err);
  63. /**
  64. * Search the specified regexp in the text
  65. * @param re
  66. * @param text
  67. * @param len
  68. * @param start position of start of match
  69. * @param start position of end of match
  70. * @param raw
  71. * @param captures array of captured strings of type rspamd_fstring_capture or NULL
  72. * @return
  73. */
  74. gboolean rspamd_regexp_search(const rspamd_regexp_t *re,
  75. const gchar *text, gsize len,
  76. const gchar **start, const gchar **end, gboolean raw,
  77. GArray *captures);
  78. /**
  79. * Exact match of the specified text against the regexp
  80. * @param re
  81. * @param text
  82. * @param len
  83. * @return
  84. */
  85. gboolean rspamd_regexp_match(const rspamd_regexp_t *re,
  86. const gchar *text, gsize len, gboolean raw);
  87. /**
  88. * Increase refcount for a regexp object
  89. */
  90. rspamd_regexp_t *rspamd_regexp_ref(rspamd_regexp_t *re);
  91. /**
  92. * Unref regexp object
  93. * @param re
  94. */
  95. void rspamd_regexp_unref(rspamd_regexp_t *re);
  96. /**
  97. * Set auxiliary userdata for the specified regexp
  98. * @param re regexp object
  99. * @param ud opaque pointer
  100. */
  101. void rspamd_regexp_set_ud(rspamd_regexp_t *re, gpointer ud);
  102. /**
  103. * Get userdata for a regexp object
  104. * @param re regexp object
  105. * @return opaque pointer
  106. */
  107. gpointer rspamd_regexp_get_ud(const rspamd_regexp_t *re);
  108. /**
  109. * Get regexp ID suitable for hashing
  110. * @param re
  111. * @return
  112. */
  113. gpointer rspamd_regexp_get_id(const rspamd_regexp_t *re);
  114. /**
  115. * Get pattern for the specified regexp object
  116. * @param re
  117. * @return
  118. */
  119. const char *rspamd_regexp_get_pattern(const rspamd_regexp_t *re);
  120. /**
  121. * Get PCRE flags for the regexp
  122. */
  123. guint rspamd_regexp_get_pcre_flags(const rspamd_regexp_t *re);
  124. /**
  125. * Get rspamd flags for the regexp
  126. */
  127. guint rspamd_regexp_get_flags(const rspamd_regexp_t *re);
  128. /**
  129. * Set rspamd flags for the regexp
  130. */
  131. guint rspamd_regexp_set_flags(rspamd_regexp_t *re, guint new_flags);
  132. /**
  133. * Set regexp maximum hits
  134. */
  135. guint rspamd_regexp_get_maxhits(const rspamd_regexp_t *re);
  136. /**
  137. * Get regexp maximum hits
  138. */
  139. guint rspamd_regexp_set_maxhits(rspamd_regexp_t *re, guint new_maxhits);
  140. /**
  141. * Returns cache id for a regexp
  142. */
  143. uint64_t rspamd_regexp_get_cache_id(const rspamd_regexp_t *re);
  144. /**
  145. * Sets cache id for a regexp
  146. */
  147. uint64_t rspamd_regexp_set_cache_id(rspamd_regexp_t *re, uint64_t id);
  148. /**
  149. * Returns match limit for a regexp
  150. */
  151. gsize rspamd_regexp_get_match_limit(const rspamd_regexp_t *re);
  152. /**
  153. * Sets cache id for a regexp
  154. */
  155. gsize rspamd_regexp_set_match_limit(rspamd_regexp_t *re, gsize lim);
  156. /**
  157. * Get regexp class for the re object
  158. */
  159. gpointer rspamd_regexp_get_class(const rspamd_regexp_t *re);
  160. /**
  161. * Set regexp class for the re object
  162. * @return old re class value
  163. */
  164. gpointer rspamd_regexp_set_class(rspamd_regexp_t *re, gpointer re_class);
  165. /**
  166. * Create new regexp cache
  167. * @return
  168. */
  169. struct rspamd_regexp_cache *rspamd_regexp_cache_new(void);
  170. /**
  171. * Query rspamd cache for a specified regexp
  172. * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
  173. * @param pattern
  174. * @param flags
  175. * @return
  176. */
  177. rspamd_regexp_t *rspamd_regexp_cache_query(struct rspamd_regexp_cache *cache,
  178. const gchar *pattern,
  179. const gchar *flags);
  180. /**
  181. * Create or get cached regexp from the specified cache
  182. * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
  183. * @param pattern regexp pattern
  184. * @param flags flags (may be enclosed inside pattern)
  185. * @param err error pointer set if compilation failed
  186. * @return new regexp object
  187. */
  188. rspamd_regexp_t *rspamd_regexp_cache_create(struct rspamd_regexp_cache *cache,
  189. const gchar *pattern,
  190. const gchar *flags, GError **err);
  191. /**
  192. * Remove regexp from the cache
  193. * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
  194. * @param re re to remove
  195. * @return TRUE if a regexp has been removed
  196. */
  197. gboolean rspamd_regexp_cache_remove(struct rspamd_regexp_cache *cache,
  198. rspamd_regexp_t *re);
  199. /**
  200. * Destroy regexp cache and unref all elements inside it
  201. * @param cache
  202. */
  203. void rspamd_regexp_cache_destroy(struct rspamd_regexp_cache *cache);
  204. /**
  205. * Return the value for regexp hash based on its ID
  206. * @param a
  207. * @return
  208. */
  209. uint32_t rspamd_regexp_hash(gconstpointer a);
  210. /**
  211. * Compare two regexp objects based on theirs ID
  212. * @param a
  213. * @param b
  214. * @return
  215. */
  216. gboolean rspamd_regexp_equal(gconstpointer a, gconstpointer b);
  217. /**
  218. * Acts like memcmp but for regexp
  219. */
  220. gint rspamd_regexp_cmp(gconstpointer a, gconstpointer b);
  221. /**
  222. * Initialize superglobal regexp cache and library
  223. */
  224. void rspamd_regexp_library_init(struct rspamd_config *cfg);
  225. /**
  226. * Create regexp from glob
  227. * @param gl
  228. * @param err
  229. * @return
  230. */
  231. rspamd_regexp_t *rspamd_regexp_from_glob(const gchar *gl, gsize sz, GError **err);
  232. #ifdef __cplusplus
  233. }
  234. #endif
  235. #endif /* REGEXP_H_ */