You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

re_cache.h 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef RSPAMD_RE_CACHE_H
  17. #define RSPAMD_RE_CACHE_H
  18. #include "config.h"
  19. #include "libutil/regexp.h"
  20. #ifdef __cplusplus
  21. extern "C" {
  22. #endif
  23. struct rspamd_re_cache;
  24. struct rspamd_re_runtime;
  25. struct rspamd_task;
  26. struct rspamd_config;
  27. enum rspamd_re_type {
  28. RSPAMD_RE_HEADER,
  29. RSPAMD_RE_RAWHEADER,
  30. RSPAMD_RE_ALLHEADER,
  31. RSPAMD_RE_MIMEHEADER,
  32. RSPAMD_RE_MIME,
  33. RSPAMD_RE_RAWMIME,
  34. RSPAMD_RE_URL,
  35. RSPAMD_RE_EMAIL,
  36. RSPAMD_RE_BODY, /* full in SA */
  37. RSPAMD_RE_SABODY, /* body in SA */
  38. RSPAMD_RE_SARAWBODY, /* rawbody in SA */
  39. RSPAMD_RE_WORDS, /* normalized words */
  40. RSPAMD_RE_RAWWORDS, /* raw words */
  41. RSPAMD_RE_STEMWORDS, /* stemmed words */
  42. RSPAMD_RE_SELECTOR, /* use lua selector to process regexp */
  43. RSPAMD_RE_MAX
  44. };
  45. struct rspamd_re_cache_stat {
  46. uint64_t bytes_scanned;
  47. uint64_t bytes_scanned_pcre;
  48. guint regexp_checked;
  49. guint regexp_matched;
  50. guint regexp_total;
  51. guint regexp_fast_cached;
  52. };
  53. /**
  54. * Initialize re_cache persistent structure
  55. */
  56. struct rspamd_re_cache *rspamd_re_cache_new(void);
  57. /**
  58. * Add the existing regexp to the cache
  59. * @param cache cache object
  60. * @param re regexp object
  61. * @param type type of object
  62. * @param type_data associated data with the type (e.g. header name)
  63. * @param datalen associated data length
  64. * @param lua_cbref optional lua callback reference for matching purposes
  65. */
  66. rspamd_regexp_t *
  67. rspamd_re_cache_add(struct rspamd_re_cache *cache, rspamd_regexp_t *re,
  68. enum rspamd_re_type type,
  69. gconstpointer type_data, gsize datalen,
  70. gint lua_cbref);
  71. /**
  72. * Replace regexp in the cache with another regexp
  73. * @param cache cache object
  74. * @param what re to replace
  75. * @param with regexp object to replace the origin
  76. */
  77. void rspamd_re_cache_replace(struct rspamd_re_cache *cache,
  78. rspamd_regexp_t *what,
  79. rspamd_regexp_t *with);
  80. /**
  81. * Initialize and optimize re cache structure
  82. */
  83. void rspamd_re_cache_init(struct rspamd_re_cache *cache,
  84. struct rspamd_config *cfg);
  85. enum rspamd_hyperscan_status {
  86. RSPAMD_HYPERSCAN_UNKNOWN = 0,
  87. RSPAMD_HYPERSCAN_UNSUPPORTED,
  88. RSPAMD_HYPERSCAN_LOADED_PARTIAL,
  89. RSPAMD_HYPERSCAN_LOADED_FULL,
  90. RSPAMD_HYPERSCAN_LOAD_ERROR,
  91. };
  92. /**
  93. * Returns true when hyperscan is loaded
  94. * @param cache
  95. * @return
  96. */
  97. enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded(struct rspamd_re_cache *cache);
  98. /**
  99. * Get runtime data for a cache
  100. */
  101. struct rspamd_re_runtime *rspamd_re_cache_runtime_new(struct rspamd_re_cache *cache);
  102. /**
  103. * Get runtime statistics
  104. */
  105. const struct rspamd_re_cache_stat *
  106. rspamd_re_cache_get_stat(struct rspamd_re_runtime *rt);
  107. /**
  108. * Process regexp runtime and return the result for a specific regexp
  109. * @param task task object
  110. * @param rt cache runtime object
  111. * @param re regexp object
  112. * @param type type of object
  113. * @param type_data associated data with the type (e.g. header name)
  114. * @param datalen associated data length
  115. * @param is_strong use case sensitive match when looking for headers
  116. */
  117. gint rspamd_re_cache_process(struct rspamd_task *task,
  118. rspamd_regexp_t *re,
  119. enum rspamd_re_type type,
  120. gconstpointer type_data,
  121. gsize datalen,
  122. gboolean is_strong);
  123. int rspamd_re_cache_process_ffi(void *ptask,
  124. void *pre,
  125. int type,
  126. void *type_data,
  127. int is_strong);
  128. /**
  129. * Destroy runtime data
  130. */
  131. void rspamd_re_cache_runtime_destroy(struct rspamd_re_runtime *rt);
  132. /**
  133. * Unref re cache
  134. */
  135. void rspamd_re_cache_unref(struct rspamd_re_cache *cache);
  136. /**
  137. * Retain reference to re cache
  138. */
  139. struct rspamd_re_cache *rspamd_re_cache_ref(struct rspamd_re_cache *cache);
  140. /**
  141. * Set limit for all regular expressions in the cache, returns previous limit
  142. */
  143. guint rspamd_re_cache_set_limit(struct rspamd_re_cache *cache, guint limit);
  144. /**
  145. * Convert re type to a human readable string (constant one)
  146. */
  147. const gchar *rspamd_re_cache_type_to_string(enum rspamd_re_type type);
  148. /**
  149. * Convert re type string to the type enum
  150. */
  151. enum rspamd_re_type rspamd_re_cache_type_from_string(const char *str);
  152. struct ev_loop;
  153. /**
  154. * Compile expressions to the hyperscan tree and store in the `cache_dir`
  155. */
  156. gint rspamd_re_cache_compile_hyperscan(struct rspamd_re_cache *cache,
  157. const char *cache_dir,
  158. gdouble max_time,
  159. gboolean silent,
  160. struct ev_loop *event_loop,
  161. void (*cb)(guint ncompiled, GError *err, void *cbd),
  162. void *cbd);
  163. /**
  164. * Returns TRUE if the specified file is valid hyperscan cache
  165. */
  166. gboolean rspamd_re_cache_is_valid_hyperscan_file(struct rspamd_re_cache *cache,
  167. const char *path,
  168. gboolean silent,
  169. gboolean try_load,
  170. GError **err);
  171. /**
  172. * Loads all hyperscan regexps precompiled
  173. */
  174. enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan(
  175. struct rspamd_re_cache *cache,
  176. const char *cache_dir, bool try_load);
  177. /**
  178. * Registers lua selector in the cache
  179. */
  180. void rspamd_re_cache_add_selector(struct rspamd_re_cache *cache,
  181. const gchar *sname, gint ref);
  182. #ifdef __cplusplus
  183. }
  184. #endif
  185. #endif