You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

stat_api.h 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /*
  2. * Copyright 2024 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef STAT_API_H_
  17. #define STAT_API_H_
  18. #include "config.h"
  19. #include "task.h"
  20. #include "lua/lua_common.h"
  21. #include "contrib/libev/ev.h"
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif
  25. /**
  26. * @file stat_api.h
  27. * High level statistics API
  28. */
  29. #define RSPAMD_STAT_TOKEN_FLAG_TEXT (1u << 0)
  30. #define RSPAMD_STAT_TOKEN_FLAG_META (1u << 1)
  31. #define RSPAMD_STAT_TOKEN_FLAG_LUA_META (1u << 2)
  32. #define RSPAMD_STAT_TOKEN_FLAG_EXCEPTION (1u << 3)
  33. #define RSPAMD_STAT_TOKEN_FLAG_HEADER (1u << 4)
  34. #define RSPAMD_STAT_TOKEN_FLAG_UNIGRAM (1u << 5)
  35. #define RSPAMD_STAT_TOKEN_FLAG_UTF (1u << 6)
  36. #define RSPAMD_STAT_TOKEN_FLAG_NORMALISED (1u << 7)
  37. #define RSPAMD_STAT_TOKEN_FLAG_STEMMED (1u << 8)
  38. #define RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE (1u << 9)
  39. #define RSPAMD_STAT_TOKEN_FLAG_STOP_WORD (1u << 10)
  40. #define RSPAMD_STAT_TOKEN_FLAG_SKIPPED (1u << 11)
  41. #define RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES (1u << 12)
  42. #define RSPAMD_STAT_TOKEN_FLAG_EMOJI (1u << 13)
  43. typedef struct rspamd_stat_token_s {
  44. rspamd_ftok_t original; /* utf8 raw */
  45. rspamd_ftok_unicode_t unicode; /* array of unicode characters, normalized, lowercased */
  46. rspamd_ftok_t normalized; /* normalized and lowercased utf8 */
  47. rspamd_ftok_t stemmed; /* stemmed utf8 */
  48. guint flags;
  49. } rspamd_stat_token_t;
  50. #define RSPAMD_TOKEN_VALUE_TYPE float
  51. typedef struct token_node_s {
  52. uint64_t data;
  53. guint window_idx;
  54. guint flags;
  55. rspamd_stat_token_t *t1;
  56. rspamd_stat_token_t *t2;
  57. RSPAMD_TOKEN_VALUE_TYPE values[0];
  58. } rspamd_token_t;
  59. struct rspamd_stat_ctx;
  60. /**
  61. * The results of statistics processing:
  62. * - error
  63. * - need to do additional job for processing
  64. * - all processed
  65. */
  66. typedef enum rspamd_stat_result_e {
  67. RSPAMD_STAT_PROCESS_ERROR = 0,
  68. RSPAMD_STAT_PROCESS_DELAYED = 1,
  69. RSPAMD_STAT_PROCESS_OK
  70. } rspamd_stat_result_t;
  71. /**
  72. * Initialise statistics modules
  73. * @param cfg
  74. */
  75. void rspamd_stat_init(struct rspamd_config *cfg, struct ev_loop *ev_base);
  76. /**
  77. * Finalize statistics
  78. */
  79. void rspamd_stat_close(void);
  80. /**
  81. * Tokenize task
  82. * @param st_ctx
  83. * @param task
  84. */
  85. void rspamd_stat_process_tokenize(struct rspamd_stat_ctx *st_ctx,
  86. struct rspamd_task *task);
  87. /**
  88. * Classify the task specified and insert symbols if needed
  89. * @param task
  90. * @param L lua state
  91. * @param err error returned
  92. * @return TRUE if task has been classified
  93. */
  94. rspamd_stat_result_t rspamd_stat_classify(struct rspamd_task *task,
  95. lua_State *L, guint stage, GError **err);
  96. /**
  97. * Check if a task should be learned and set the appropriate flags for it
  98. * @param task
  99. * @return
  100. */
  101. gboolean rspamd_stat_check_autolearn(struct rspamd_task *task);
  102. /**
  103. * Learn task as spam or ham, task must be processed prior to this call
  104. * @param task task to learn
  105. * @param spam if TRUE learn spam, otherwise learn ham
  106. * @param L lua state
  107. * @param classifier NULL to learn all classifiers, name to learn a specific one
  108. * @param err error returned
  109. * @return TRUE if task has been learned
  110. */
  111. rspamd_stat_result_t rspamd_stat_learn(struct rspamd_task *task,
  112. gboolean spam, lua_State *L, const gchar *classifier,
  113. guint stage,
  114. GError **err);
  115. /**
  116. * Get the overall statistics for all statfile backends
  117. * @param cfg configuration
  118. * @param total_learns the total number of learns is stored here
  119. * @return array of statistical information
  120. */
  121. rspamd_stat_result_t rspamd_stat_statistics(struct rspamd_task *task,
  122. struct rspamd_config *cfg,
  123. uint64_t *total_learns,
  124. ucl_object_t **res);
  125. void rspamd_stat_unload(void);
  126. #ifdef __cplusplus
  127. }
  128. #endif
  129. #endif /* STAT_API_H_ */