You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

multipattern.h 4.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef SRC_LIBUTIL_MULTIPATTERN_H_
  17. #define SRC_LIBUTIL_MULTIPATTERN_H_
  18. #include "config.h"
  19. /**
  20. * @file multipattern.h
  21. *
  22. * This file defines structure that acts like a transparent bridge between
  23. * hyperscan and ac-trie
  24. */
  25. #ifdef __cplusplus
  26. extern "C" {
  27. #endif
  28. enum rspamd_multipattern_flags {
  29. RSPAMD_MULTIPATTERN_DEFAULT = 0,
  30. RSPAMD_MULTIPATTERN_ICASE = (1 << 0),
  31. RSPAMD_MULTIPATTERN_UTF8 = (1 << 1),
  32. RSPAMD_MULTIPATTERN_TLD = (1 << 2),
  33. /* Not supported by acism */
  34. RSPAMD_MULTIPATTERN_GLOB = (1 << 3),
  35. RSPAMD_MULTIPATTERN_RE = (1 << 4),
  36. RSPAMD_MULTIPATTERN_DOTALL = (1 << 5),
  37. RSPAMD_MULTIPATTERN_SINGLEMATCH = (1 << 6),
  38. RSPAMD_MULTIPATTERN_NO_START = (1 << 7),
  39. };
  40. struct rspamd_multipattern;
  41. struct rspamd_cryptobox_library_ctx;
  42. /**
  43. * Called on pattern match
  44. * @param mp multipattern structure
  45. * @param strnum number of pattern matched
  46. * @param textpos position in the text
  47. * @param text input text
  48. * @param len length of input text
  49. * @param context userdata
  50. * @return if 0 then search for another pattern, otherwise return this value to caller
  51. */
  52. typedef gint (*rspamd_multipattern_cb_t)(struct rspamd_multipattern *mp,
  53. guint strnum,
  54. gint match_start,
  55. gint match_pos,
  56. const gchar *text,
  57. gsize len,
  58. void *context);
  59. /**
  60. * Init multipart library and set the appropriate cache dir
  61. * @param cache_dir
  62. */
  63. void rspamd_multipattern_library_init(const gchar *cache_dir);
  64. /**
  65. * Creates empty multipattern structure
  66. * @param flags
  67. * @return
  68. */
  69. struct rspamd_multipattern *rspamd_multipattern_create(
  70. enum rspamd_multipattern_flags flags);
  71. /**
  72. * Creates multipattern with preallocated number of patterns to speed up loading
  73. * @param flags
  74. * @param reserved
  75. * @return
  76. */
  77. struct rspamd_multipattern *rspamd_multipattern_create_sized(guint reserved,
  78. enum rspamd_multipattern_flags flags);
  79. /**
  80. * Creates new multipattern structure
  81. * @param patterns vector of null terminated strings
  82. * @param npatterns number of patterns
  83. * @param flags flags applied to all patterns
  84. * @return new multipattern structure
  85. */
  86. struct rspamd_multipattern *rspamd_multipattern_create_full(
  87. const gchar **patterns,
  88. guint npatterns,
  89. enum rspamd_multipattern_flags flags);
  90. /**
  91. * Adds new pattern to match engine from zero-terminated string
  92. * @param mp
  93. * @param pattern
  94. */
  95. void rspamd_multipattern_add_pattern(struct rspamd_multipattern *mp,
  96. const gchar *pattern, gint flags);
  97. /**
  98. * Adds new pattern from arbitrary string
  99. * @param mp
  100. * @param pattern
  101. * @param patlen
  102. * @param flags
  103. */
  104. void rspamd_multipattern_add_pattern_len(struct rspamd_multipattern *mp,
  105. const gchar *pattern, gsize patlen, gint flags);
  106. #define RSPAMD_MULTIPATTERN_COMPILE_NO_FS (0x1u << 0u)
  107. /**
  108. * Compiles multipattern structure
  109. * @param mp
  110. * @return
  111. */
  112. gboolean rspamd_multipattern_compile(struct rspamd_multipattern *mp,
  113. int flags,
  114. GError **err);
  115. /**
  116. * Lookups for patterns in a text using the specified callback function
  117. * @param mp
  118. * @param in
  119. * @param len
  120. * @param cb if callback returns non-zero, then search is terminated and that value is returned
  121. * @param ud callback data
  122. * @return
  123. */
  124. gint rspamd_multipattern_lookup(struct rspamd_multipattern *mp,
  125. const gchar *in, gsize len, rspamd_multipattern_cb_t cb,
  126. gpointer ud, guint *pnfound);
  127. /**
  128. * Get pattern string from multipattern identified by index
  129. * @param mp
  130. * @param index
  131. * @return
  132. */
  133. const gchar *rspamd_multipattern_get_pattern(struct rspamd_multipattern *mp,
  134. guint index);
  135. /**
  136. * Returns number of patterns in a multipattern matcher
  137. * @param mp
  138. * @return
  139. */
  140. guint rspamd_multipattern_get_npatterns(struct rspamd_multipattern *mp);
  141. /**
  142. * Destroys multipattern structure
  143. * @param mp
  144. */
  145. void rspamd_multipattern_destroy(struct rspamd_multipattern *mp);
  146. /**
  147. * Returns TRUE if hyperscan is supported
  148. * @return
  149. */
  150. gboolean rspamd_multipattern_has_hyperscan(void);
  151. #ifdef __cplusplus
  152. }
  153. #endif
  154. #endif /* SRC_LIBUTIL_MULTIPATTERN_H_ */