Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

regexp.h 7.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef REGEXP_H_
  17. #define REGEXP_H_
  18. #include "config.h"
  19. #ifndef WITH_PCRE2
  20. #define PCRE_FLAG(x) G_PASTE(PCRE_, x)
  21. #else
  22. #ifndef PCRE2_CODE_UNIT_WIDTH
  23. #define PCRE2_CODE_UNIT_WIDTH 8
  24. #endif
  25. #define PCRE_FLAG(x) G_PASTE(PCRE2_, x)
  26. #endif
  27. #define RSPAMD_INVALID_ID ((guint64)-1LL)
  28. #define RSPAMD_REGEXP_FLAG_RAW (1 << 1)
  29. #define RSPAMD_REGEXP_FLAG_NOOPT (1 << 2)
  30. #define RSPAMD_REGEXP_FLAG_FULL_MATCH (1 << 3)
  31. #define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
  32. #define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
  33. #define RSPAMD_REGEXP_FLAG_UTF (1 << 6)
  34. #define RSPAMD_REGEXP_FLAG_LEFTMOST (1 << 7)
  35. #ifdef __cplusplus
  36. extern "C" {
  37. #endif
  38. struct rspamd_config;
  39. typedef struct rspamd_regexp_s rspamd_regexp_t;
  40. struct rspamd_regexp_cache;
  41. struct rspamd_re_capture {
  42. const char *p;
  43. gsize len;
  44. };
  45. /**
  46. * Create new rspamd regexp
  47. * @param pattern regexp pattern
  48. * @param flags flags (may be enclosed inside pattern)
  49. * @param err error pointer set if compilation failed
  50. * @return new regexp object
  51. */
  52. rspamd_regexp_t *rspamd_regexp_new (const gchar *pattern, const gchar *flags,
  53. GError **err);
  54. /**
  55. * Create new rspamd regexp
  56. * @param pattern regexp pattern
  57. * @param flags flags (may be enclosed inside pattern)
  58. * @param err error pointer set if compilation failed
  59. * @return new regexp object
  60. */
  61. rspamd_regexp_t *rspamd_regexp_new_len (const gchar *pattern, gsize len, const gchar *flags,
  62. GError **err);
  63. /**
  64. * Search the specified regexp in the text
  65. * @param re
  66. * @param text
  67. * @param len
  68. * @param start position of start of match
  69. * @param start position of end of match
  70. * @param raw
  71. * @param captures array of captured strings of type rspamd_fstring_capture or NULL
  72. * @return
  73. */
  74. gboolean rspamd_regexp_search (const rspamd_regexp_t *re,
  75. const gchar *text, gsize len,
  76. const gchar **start, const gchar **end, gboolean raw,
  77. GArray *captures);
  78. /**
  79. * Exact match of the specified text against the regexp
  80. * @param re
  81. * @param text
  82. * @param len
  83. * @return
  84. */
  85. gboolean rspamd_regexp_match (const rspamd_regexp_t *re,
  86. const gchar *text, gsize len, gboolean raw);
  87. /**
  88. * Increase refcount for a regexp object
  89. */
  90. rspamd_regexp_t *rspamd_regexp_ref (rspamd_regexp_t *re);
  91. /**
  92. * Unref regexp object
  93. * @param re
  94. */
  95. void rspamd_regexp_unref (rspamd_regexp_t *re);
  96. /**
  97. * Set auxiliary userdata for the specified regexp
  98. * @param re regexp object
  99. * @param ud opaque pointer
  100. */
  101. void rspamd_regexp_set_ud (rspamd_regexp_t *re, gpointer ud);
  102. /**
  103. * Get userdata for a regexp object
  104. * @param re regexp object
  105. * @return opaque pointer
  106. */
  107. gpointer rspamd_regexp_get_ud (const rspamd_regexp_t *re);
  108. /**
  109. * Get regexp ID suitable for hashing
  110. * @param re
  111. * @return
  112. */
  113. gpointer rspamd_regexp_get_id (const rspamd_regexp_t *re);
  114. /**
  115. * Get pattern for the specified regexp object
  116. * @param re
  117. * @return
  118. */
  119. const char *rspamd_regexp_get_pattern (const rspamd_regexp_t *re);
  120. /**
  121. * Get PCRE flags for the regexp
  122. */
  123. guint rspamd_regexp_get_pcre_flags (const rspamd_regexp_t *re);
  124. /**
  125. * Get rspamd flags for the regexp
  126. */
  127. guint rspamd_regexp_get_flags (const rspamd_regexp_t *re);
  128. /**
  129. * Set rspamd flags for the regexp
  130. */
  131. guint rspamd_regexp_set_flags (rspamd_regexp_t *re, guint new_flags);
  132. /**
  133. * Set regexp maximum hits
  134. */
  135. guint rspamd_regexp_get_maxhits (const rspamd_regexp_t *re);
  136. /**
  137. * Get regexp maximum hits
  138. */
  139. guint rspamd_regexp_set_maxhits (rspamd_regexp_t *re, guint new_maxhits);
  140. /**
  141. * Returns cache id for a regexp
  142. */
  143. guint64 rspamd_regexp_get_cache_id (const rspamd_regexp_t *re);
  144. /**
  145. * Sets cache id for a regexp
  146. */
  147. guint64 rspamd_regexp_set_cache_id (rspamd_regexp_t *re, guint64 id);
  148. /**
  149. * Returns match limit for a regexp
  150. */
  151. gsize rspamd_regexp_get_match_limit (const rspamd_regexp_t *re);
  152. /**
  153. * Sets cache id for a regexp
  154. */
  155. gsize rspamd_regexp_set_match_limit (rspamd_regexp_t *re, gsize lim);
  156. /**
  157. * Get regexp class for the re object
  158. */
  159. gpointer rspamd_regexp_get_class (const rspamd_regexp_t *re);
  160. /**
  161. * Set regexp class for the re object
  162. * @return old re class value
  163. */
  164. gpointer rspamd_regexp_set_class (rspamd_regexp_t *re, gpointer re_class);
  165. /**
  166. * Create new regexp cache
  167. * @return
  168. */
  169. struct rspamd_regexp_cache *rspamd_regexp_cache_new (void);
  170. /**
  171. * Query rspamd cache for a specified regexp
  172. * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
  173. * @param pattern
  174. * @param flags
  175. * @return
  176. */
  177. rspamd_regexp_t *rspamd_regexp_cache_query (struct rspamd_regexp_cache *cache,
  178. const gchar *pattern,
  179. const gchar *flags);
  180. /**
  181. * Insert item to the cache using custom pattern and flags
  182. * @param cache
  183. * @param pattern
  184. * @param flags
  185. * @param re
  186. */
  187. void rspamd_regexp_cache_insert (struct rspamd_regexp_cache *cache,
  188. const gchar *pattern,
  189. const gchar *flags, rspamd_regexp_t *re);
  190. /**
  191. * Create or get cached regexp from the specified cache
  192. * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
  193. * @param pattern regexp pattern
  194. * @param flags flags (may be enclosed inside pattern)
  195. * @param err error pointer set if compilation failed
  196. * @return new regexp object
  197. */
  198. rspamd_regexp_t *rspamd_regexp_cache_create (struct rspamd_regexp_cache *cache,
  199. const gchar *pattern,
  200. const gchar *flags, GError **err);
  201. /**
  202. * Remove regexp from the cache
  203. * @param cache regexp cache. if NULL, the superglobal cache is used (*not* thread-safe)
  204. * @param re re to remove
  205. * @return TRUE if a regexp has been removed
  206. */
  207. gboolean rspamd_regexp_cache_remove (struct rspamd_regexp_cache *cache,
  208. rspamd_regexp_t *re);
  209. /**
  210. * Destroy regexp cache and unref all elements inside it
  211. * @param cache
  212. */
  213. void rspamd_regexp_cache_destroy (struct rspamd_regexp_cache *cache);
  214. /**
  215. * Return the value for regexp hash based on its ID
  216. * @param a
  217. * @return
  218. */
  219. guint32 rspamd_regexp_hash (gconstpointer a);
  220. /**
  221. * Compare two regexp objects based on theirs ID
  222. * @param a
  223. * @param b
  224. * @return
  225. */
  226. gboolean rspamd_regexp_equal (gconstpointer a, gconstpointer b);
  227. /**
  228. * Acts like memcmp but for regexp
  229. */
  230. gint rspamd_regexp_cmp (gconstpointer a, gconstpointer b);
  231. /**
  232. * Initialize superglobal regexp cache and library
  233. */
  234. void rspamd_regexp_library_init (struct rspamd_config *cfg);
  235. /**
  236. * Create regexp from glob
  237. * @param gl
  238. * @param err
  239. * @return
  240. */
  241. rspamd_regexp_t *rspamd_regexp_from_glob (const gchar *gl, gsize sz, GError **err);
  242. #ifdef __cplusplus
  243. }
  244. #endif
  245. #endif /* REGEXP_H_ */