Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

chartable.c 28KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. /*-
  2. * Copyright 2016 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /***MODULE:chartable
  17. * rspamd module that make marks based on symbol chains
  18. *
  19. * Allowed options:
  20. * - symbol (string): symbol to insert (default: 'R_BAD_CHARSET')
  21. * - threshold (double): value that would be used as threshold in expression characters_changed / total_characters
  22. * (e.g. if threshold is 0.1 than charset change should occur more often than in 10 symbols), default: 0.1
  23. */
  24. #include "config.h"
  25. #include "libmime/message.h"
  26. #include "rspamd.h"
  27. #include "libstat/stat_api.h"
  28. #include "libmime/lang_detection.h"
  29. #include "unicode/utf8.h"
  30. #include "unicode/uchar.h"
  31. #define DEFAULT_SYMBOL "R_MIXED_CHARSET"
  32. #define DEFAULT_URL_SYMBOL "R_MIXED_CHARSET_URL"
  33. #define DEFAULT_THRESHOLD 0.1
  34. #define msg_err_chartable(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
  35. "chartable", task->task_pool->tag.uid, \
  36. G_STRFUNC, \
  37. __VA_ARGS__)
  38. #define msg_warn_chartable(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
  39. "chartable", task->task_pool->tag.uid, \
  40. G_STRFUNC, \
  41. __VA_ARGS__)
  42. #define msg_info_chartable(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
  43. "chartable", task->task_pool->tag.uid, \
  44. G_STRFUNC, \
  45. __VA_ARGS__)
  46. #define msg_debug_chartable(...) rspamd_conditional_debug_fast (NULL, task->from_addr, \
  47. rspamd_chartable_log_id, "chartable", task->task_pool->tag.uid, \
  48. G_STRFUNC, \
  49. __VA_ARGS__)
  50. INIT_LOG_MODULE(chartable)
  51. /* Initialization */
  52. gint chartable_module_init (struct rspamd_config *cfg, struct module_ctx **ctx);
  53. gint chartable_module_config (struct rspamd_config *cfg, bool validate);
  54. gint chartable_module_reconfig (struct rspamd_config *cfg);
  55. module_t chartable_module = {
  56. "chartable",
  57. chartable_module_init,
  58. chartable_module_config,
  59. chartable_module_reconfig,
  60. NULL,
  61. RSPAMD_MODULE_VER,
  62. (guint)-1,
  63. };
  64. struct chartable_ctx {
  65. struct module_ctx ctx;
  66. const gchar *symbol;
  67. const gchar *url_symbol;
  68. double threshold;
  69. guint max_word_len;
  70. };
  71. static inline struct chartable_ctx *
  72. chartable_get_context (struct rspamd_config *cfg)
  73. {
  74. return (struct chartable_ctx *)g_ptr_array_index (cfg->c_modules,
  75. chartable_module.ctx_offset);
  76. }
  77. static void chartable_symbol_callback (struct rspamd_task *task,
  78. struct rspamd_symcache_item *item,
  79. void *unused);
  80. static void chartable_url_symbol_callback (struct rspamd_task *task,
  81. struct rspamd_symcache_item *item,
  82. void *unused);
  83. gint
  84. chartable_module_init (struct rspamd_config *cfg, struct module_ctx **ctx)
  85. {
  86. struct chartable_ctx *chartable_module_ctx;
  87. chartable_module_ctx = rspamd_mempool_alloc0 (cfg->cfg_pool,
  88. sizeof (*chartable_module_ctx));
  89. chartable_module_ctx->max_word_len = 10;
  90. *ctx = (struct module_ctx *)chartable_module_ctx;
  91. return 0;
  92. }
  93. gint
  94. chartable_module_config (struct rspamd_config *cfg, bool validate)
  95. {
  96. const ucl_object_t *value;
  97. gint res = TRUE;
  98. struct chartable_ctx *chartable_module_ctx = chartable_get_context (cfg);
  99. if (!rspamd_config_is_module_enabled (cfg, "chartable")) {
  100. return TRUE;
  101. }
  102. if ((value =
  103. rspamd_config_get_module_opt (cfg, "chartable", "symbol")) != NULL) {
  104. chartable_module_ctx->symbol = ucl_obj_tostring (value);
  105. }
  106. else {
  107. chartable_module_ctx->symbol = DEFAULT_SYMBOL;
  108. }
  109. if ((value =
  110. rspamd_config_get_module_opt (cfg, "chartable", "url_symbol")) != NULL) {
  111. chartable_module_ctx->url_symbol = ucl_obj_tostring (value);
  112. }
  113. else {
  114. chartable_module_ctx->url_symbol = DEFAULT_URL_SYMBOL;
  115. }
  116. if ((value =
  117. rspamd_config_get_module_opt (cfg, "chartable", "threshold")) != NULL) {
  118. if (!ucl_obj_todouble_safe (value, &chartable_module_ctx->threshold)) {
  119. msg_warn_config ("invalid numeric value");
  120. chartable_module_ctx->threshold = DEFAULT_THRESHOLD;
  121. }
  122. }
  123. else {
  124. chartable_module_ctx->threshold = DEFAULT_THRESHOLD;
  125. }
  126. if ((value =
  127. rspamd_config_get_module_opt (cfg, "chartable", "max_word_len")) != NULL) {
  128. chartable_module_ctx->max_word_len = ucl_object_toint (value);
  129. }
  130. else {
  131. chartable_module_ctx->threshold = DEFAULT_THRESHOLD;
  132. }
  133. rspamd_symcache_add_symbol (cfg->cache,
  134. chartable_module_ctx->symbol,
  135. 0,
  136. chartable_symbol_callback,
  137. NULL,
  138. SYMBOL_TYPE_NORMAL,
  139. -1);
  140. rspamd_symcache_add_symbol (cfg->cache,
  141. chartable_module_ctx->url_symbol,
  142. 0,
  143. chartable_url_symbol_callback,
  144. NULL,
  145. SYMBOL_TYPE_NORMAL,
  146. -1);
  147. msg_info_config ("init internal chartable module");
  148. return res;
  149. }
  150. gint
  151. chartable_module_reconfig (struct rspamd_config *cfg)
  152. {
  153. return chartable_module_config (cfg, false);
  154. }
  155. static gint latin_confusable[] = {
  156. 0x02028, 0x02029, 0x01680, 0x02000, 0x02001, 0x02002, 0x02003, 0x02004, 0x02005, 0x02006,
  157. 0x02008, 0x02009, 0x0200a, 0x0205f, 0x000a0, 0x02007, 0x0202f, 0x007fa, 0x0fe4d, 0x0fe4e,
  158. 0x0fe4f, 0x02010, 0x02011, 0x02012, 0x02013, 0x0fe58, 0x006d4, 0x02043, 0x002d7, 0x02212,
  159. 0x02796, 0x02cba, 0x0060d, 0x0066b, 0x0201a, 0x000b8, 0x0a4f9, 0x0037e, 0x00903, 0x00a83,
  160. 0x0ff1a, 0x00589, 0x00703, 0x00704, 0x016ec, 0x0fe30, 0x01803, 0x01809, 0x0205a, 0x005c3,
  161. 0x002f8, 0x0a789, 0x02236, 0x002d0, 0x0a4fd, 0x0ff01, 0x001c3, 0x02d51, 0x00294, 0x00241,
  162. 0x0097d, 0x013ae, 0x0a6eb, 0x1d16d, 0x02024, 0x00701, 0x00702, 0x0a60e, 0x10a50, 0x00660,
  163. 0x006f0, 0x0a4f8, 0x0055d, 0x0ff07, 0x02018, 0x02019, 0x0201b, 0x02032, 0x02035, 0x0055a,
  164. 0x005f3, 0x00060, 0x01fef, 0x0ff40, 0x000b4, 0x00384, 0x01ffd, 0x01fbd, 0x01fbf, 0x01ffe,
  165. 0x002b9, 0x00374, 0x002c8, 0x002ca, 0x002cb, 0x002f4, 0x002bb, 0x002bd, 0x002bc, 0x002be,
  166. 0x0a78c, 0x005d9, 0x007f4, 0x007f5, 0x0144a, 0x016cc, 0x16f51, 0x16f52, 0x0ff3b, 0x02768,
  167. 0x02772, 0x03014, 0x0fd3e, 0x0ff3d, 0x02769, 0x02773, 0x03015, 0x0fd3f, 0x02774, 0x1d114,
  168. 0x02775, 0x0204e, 0x0066d, 0x02217, 0x1031f, 0x01735, 0x02041, 0x02215, 0x02044, 0x02571,
  169. 0x027cb, 0x029f8, 0x1d23a, 0x031d3, 0x03033, 0x02cc6, 0x030ce, 0x04e3f, 0x02f03, 0x0ff3c,
  170. 0x0fe68, 0x02216, 0x027cd, 0x029f5, 0x029f9, 0x1d20f, 0x1d23b, 0x031d4, 0x04e36, 0x02f02,
  171. 0x0a778, 0x002c4, 0x002c6, 0x016ed, 0x02795, 0x1029b, 0x02039, 0x0276e, 0x002c2, 0x1d236,
  172. 0x01438, 0x016b2, 0x01400, 0x02e40, 0x030a0, 0x0a4ff, 0x0203a, 0x0276f, 0x002c3, 0x1d237,
  173. 0x01433, 0x16f3f, 0x02053, 0x002dc, 0x01fc0, 0x0223c, 0x1d7d0, 0x1d7da, 0x1d7e4, 0x1d7ee,
  174. 0x1d7f8, 0x0a75a, 0x001a7, 0x003e8, 0x0a644, 0x014bf, 0x0a6ef, 0x1d206, 0x1d7d1, 0x1d7db,
  175. 0x1d7e5, 0x1d7ef, 0x1d7f9, 0x0a7ab, 0x0021c, 0x001b7, 0x0a76a, 0x02ccc, 0x00417, 0x004e0,
  176. 0x16f3b, 0x118ca, 0x1d7d2, 0x1d7dc, 0x1d7e6, 0x1d7f0, 0x1d7fa, 0x013ce, 0x118af, 0x1d7d3,
  177. 0x1d7dd, 0x1d7e7, 0x1d7f1, 0x1d7fb, 0x001bc, 0x118bb, 0x1d7d4, 0x1d7de, 0x1d7e8, 0x1d7f2,
  178. 0x1d7fc, 0x02cd2, 0x00431, 0x013ee, 0x118d5, 0x1d212, 0x1d7d5, 0x1d7df, 0x1d7e9, 0x1d7f3,
  179. 0x1d7fd, 0x104d2, 0x118c6, 0x00b03, 0x009ea, 0x00a6a, 0x1e8cb, 0x1d7d6, 0x1d7e0, 0x1d7ea,
  180. 0x1d7f4, 0x1d7fe, 0x00223, 0x00222, 0x1031a, 0x00a67, 0x00b68, 0x009ed, 0x00d6d, 0x1d7d7,
  181. 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x0a76e, 0x02cca, 0x118cc, 0x118ac, 0x118d6, 0x0237a,
  182. 0x0ff41, 0x1d41a, 0x1d44e, 0x1d482, 0x1d4b6, 0x1d4ea, 0x1d51e, 0x1d552, 0x1d586, 0x1d5ba,
  183. 0x1d5ee, 0x1d622, 0x1d656, 0x1d68a, 0x00251, 0x003b1, 0x1d6c2, 0x1d6fc, 0x1d736, 0x1d770,
  184. 0x1d7aa, 0x00430, 0x0ff21, 0x1d400, 0x1d434, 0x1d468, 0x1d49c, 0x1d4d0, 0x1d504, 0x1d538,
  185. 0x1d56c, 0x1d5a0, 0x1d5d4, 0x1d608, 0x1d63c, 0x1d670, 0x00391, 0x1d6a8, 0x1d6e2, 0x1d71c,
  186. 0x1d756, 0x1d790, 0x00410, 0x013aa, 0x015c5, 0x0a4ee, 0x16f40, 0x102a0, 0x1d41b, 0x1d44f,
  187. 0x1d483, 0x1d4b7, 0x1d4eb, 0x1d51f, 0x1d553, 0x1d587, 0x1d5bb, 0x1d5ef, 0x1d623, 0x1d657,
  188. 0x1d68b, 0x00184, 0x0042c, 0x013cf, 0x015af, 0x0ff22, 0x0212c, 0x1d401, 0x1d435, 0x1d469,
  189. 0x1d4d1, 0x1d505, 0x1d539, 0x1d56d, 0x1d5a1, 0x1d5d5, 0x1d609, 0x1d63d, 0x1d671, 0x0a7b4,
  190. 0x00392, 0x1d6a9, 0x1d6e3, 0x1d71d, 0x1d757, 0x1d791, 0x00412, 0x013f4, 0x015f7, 0x0a4d0,
  191. 0x10282, 0x102a1, 0x10301, 0x0ff43, 0x0217d, 0x1d41c, 0x1d450, 0x1d484, 0x1d4b8, 0x1d4ec,
  192. 0x1d520, 0x1d554, 0x1d588, 0x1d5bc, 0x1d5f0, 0x1d624, 0x1d658, 0x1d68c, 0x01d04, 0x003f2,
  193. 0x02ca5, 0x00441, 0x0abaf, 0x1043d, 0x1f74c, 0x118f2, 0x118e9, 0x0ff23, 0x0216d, 0x02102,
  194. 0x0212d, 0x1d402, 0x1d436, 0x1d46a, 0x1d49e, 0x1d4d2, 0x1d56e, 0x1d5a2, 0x1d5d6, 0x1d60a,
  195. 0x1d63e, 0x1d672, 0x003f9, 0x02ca4, 0x00421, 0x013df, 0x0a4da, 0x102a2, 0x10302, 0x10415,
  196. 0x1051c, 0x0217e, 0x02146, 0x1d41d, 0x1d451, 0x1d485, 0x1d4b9, 0x1d4ed, 0x1d521, 0x1d555,
  197. 0x1d589, 0x1d5bd, 0x1d5f1, 0x1d625, 0x1d659, 0x1d68d, 0x00501, 0x013e7, 0x0146f, 0x0a4d2,
  198. 0x0216e, 0x02145, 0x1d403, 0x1d437, 0x1d46b, 0x1d49f, 0x1d4d3, 0x1d507, 0x1d53b, 0x1d56f,
  199. 0x1d5a3, 0x1d5d7, 0x1d60b, 0x1d63f, 0x1d673, 0x013a0, 0x015de, 0x015ea, 0x0a4d3, 0x0212e,
  200. 0x0ff45, 0x0212f, 0x02147, 0x1d41e, 0x1d452, 0x1d486, 0x1d4ee, 0x1d522, 0x1d556, 0x1d58a,
  201. 0x1d5be, 0x1d5f2, 0x1d626, 0x1d65a, 0x1d68e, 0x0ab32, 0x00435, 0x004bd, 0x022ff, 0x0ff25,
  202. 0x02130, 0x1d404, 0x1d438, 0x1d46c, 0x1d4d4, 0x1d508, 0x1d53c, 0x1d570, 0x1d5a4, 0x1d5d8,
  203. 0x1d60c, 0x1d640, 0x1d674, 0x00395, 0x1d6ac, 0x1d6e6, 0x1d720, 0x1d75a, 0x1d794, 0x00415,
  204. 0x02d39, 0x013ac, 0x0a4f0, 0x118a6, 0x118ae, 0x10286, 0x1d41f, 0x1d453, 0x1d487, 0x1d4bb,
  205. 0x1d4ef, 0x1d523, 0x1d557, 0x1d58b, 0x1d5bf, 0x1d5f3, 0x1d627, 0x1d65b, 0x1d68f, 0x0ab35,
  206. 0x0a799, 0x0017f, 0x01e9d, 0x00584, 0x1d213, 0x02131, 0x1d405, 0x1d439, 0x1d46d, 0x1d4d5,
  207. 0x1d509, 0x1d53d, 0x1d571, 0x1d5a5, 0x1d5d9, 0x1d60d, 0x1d641, 0x1d675, 0x0a798, 0x003dc,
  208. 0x1d7ca, 0x015b4, 0x0a4dd, 0x118c2, 0x118a2, 0x10287, 0x102a5, 0x10525, 0x0ff47, 0x0210a,
  209. 0x1d420, 0x1d454, 0x1d488, 0x1d4f0, 0x1d524, 0x1d558, 0x1d58c, 0x1d5c0, 0x1d5f4, 0x1d628,
  210. 0x1d65c, 0x1d690, 0x00261, 0x01d83, 0x0018d, 0x00581, 0x1d406, 0x1d43a, 0x1d46e, 0x1d4a2,
  211. 0x1d4d6, 0x1d50a, 0x1d53e, 0x1d572, 0x1d5a6, 0x1d5da, 0x1d60e, 0x1d642, 0x1d676, 0x0050c,
  212. 0x013c0, 0x013f3, 0x0a4d6, 0x0ff48, 0x0210e, 0x1d421, 0x1d489, 0x1d4bd, 0x1d4f1, 0x1d525,
  213. 0x1d559, 0x1d58d, 0x1d5c1, 0x1d5f5, 0x1d629, 0x1d65d, 0x1d691, 0x004bb, 0x00570, 0x013c2,
  214. 0x0ff28, 0x0210b, 0x0210c, 0x0210d, 0x1d407, 0x1d43b, 0x1d46f, 0x1d4d7, 0x1d573, 0x1d5a7,
  215. 0x1d5db, 0x1d60f, 0x1d643, 0x1d677, 0x00397, 0x1d6ae, 0x1d6e8, 0x1d722, 0x1d75c, 0x1d796,
  216. 0x02c8e, 0x0041d, 0x013bb, 0x0157c, 0x0a4e7, 0x102cf, 0x002db, 0x02373, 0x0ff49, 0x02170,
  217. 0x02139, 0x02148, 0x1d422, 0x1d456, 0x1d48a, 0x1d4be, 0x1d4f2, 0x1d526, 0x1d55a, 0x1d58e,
  218. 0x1d5c2, 0x1d5f6, 0x1d62a, 0x1d65e, 0x1d692, 0x00131, 0x1d6a4, 0x0026a, 0x00269, 0x003b9,
  219. 0x01fbe, 0x0037a, 0x1d6ca, 0x1d704, 0x1d73e, 0x1d778, 0x1d7b2, 0x00456, 0x0a647, 0x004cf,
  220. 0x0ab75, 0x013a5, 0x118c3, 0x0ff4a, 0x02149, 0x1d423, 0x1d457, 0x1d48b, 0x1d4bf, 0x1d4f3,
  221. 0x1d527, 0x1d55b, 0x1d58f, 0x1d5c3, 0x1d5f7, 0x1d62b, 0x1d65f, 0x1d693, 0x003f3, 0x00458,
  222. 0x0ff2a, 0x1d409, 0x1d43d, 0x1d471, 0x1d4a5, 0x1d4d9, 0x1d50d, 0x1d541, 0x1d575, 0x1d5a9,
  223. 0x1d5dd, 0x1d611, 0x1d645, 0x1d679, 0x0a7b2, 0x0037f, 0x00408, 0x013ab, 0x0148d, 0x0a4d9,
  224. 0x1d424, 0x1d458, 0x1d48c, 0x1d4c0, 0x1d4f4, 0x1d528, 0x1d55c, 0x1d590, 0x1d5c4, 0x1d5f8,
  225. 0x1d62c, 0x1d660, 0x1d694, 0x0212a, 0x0ff2b, 0x1d40a, 0x1d43e, 0x1d472, 0x1d4a6, 0x1d4da,
  226. 0x1d50e, 0x1d542, 0x1d576, 0x1d5aa, 0x1d5de, 0x1d612, 0x1d646, 0x1d67a, 0x0039a, 0x1d6b1,
  227. 0x1d6eb, 0x1d725, 0x1d75f, 0x1d799, 0x02c94, 0x0041a, 0x013e6, 0x016d5, 0x0a4d7, 0x10518,
  228. 0x005c0, 0x0007c, 0x02223, 0x023fd, 0x0ffe8, 0x00031, 0x00661, 0x006f1, 0x10320, 0x1e8c7,
  229. 0x1d7cf, 0x1d7d9, 0x1d7e3, 0x1d7ed, 0x1d7f7, 0x00049, 0x0ff29, 0x02160, 0x02110, 0x02111,
  230. 0x1d408, 0x1d43c, 0x1d470, 0x1d4d8, 0x1d540, 0x1d574, 0x1d5a8, 0x1d5dc, 0x1d610, 0x1d644,
  231. 0x1d678, 0x00196, 0x0ff4c, 0x0217c, 0x02113, 0x1d425, 0x1d459, 0x1d48d, 0x1d4c1, 0x1d4f5,
  232. 0x1d529, 0x1d55d, 0x1d591, 0x1d5c5, 0x1d5f9, 0x1d62d, 0x1d661, 0x1d695, 0x001c0, 0x00399,
  233. 0x1d6b0, 0x1d6ea, 0x1d724, 0x1d75e, 0x1d798, 0x02c92, 0x00406, 0x004c0, 0x005d5, 0x005df,
  234. 0x00627, 0x1ee00, 0x1ee80, 0x0fe8e, 0x0fe8d, 0x007ca, 0x02d4f, 0x016c1, 0x0a4f2, 0x16f28,
  235. 0x1028a, 0x10309, 0x1d22a, 0x0216c, 0x02112, 0x1d40b, 0x1d43f, 0x1d473, 0x1d4db, 0x1d50f,
  236. 0x1d543, 0x1d577, 0x1d5ab, 0x1d5df, 0x1d613, 0x1d647, 0x1d67b, 0x02cd0, 0x013de, 0x014aa,
  237. 0x0a4e1, 0x16f16, 0x118a3, 0x118b2, 0x1041b, 0x10526, 0x0ff2d, 0x0216f, 0x02133, 0x1d40c,
  238. 0x1d440, 0x1d474, 0x1d4dc, 0x1d510, 0x1d544, 0x1d578, 0x1d5ac, 0x1d5e0, 0x1d614, 0x1d648,
  239. 0x1d67c, 0x0039c, 0x1d6b3, 0x1d6ed, 0x1d727, 0x1d761, 0x1d79b, 0x003fa, 0x02c98, 0x0041c,
  240. 0x013b7, 0x015f0, 0x016d6, 0x0a4df, 0x102b0, 0x10311, 0x1d427, 0x1d45b, 0x1d48f, 0x1d4c3,
  241. 0x1d4f7, 0x1d52b, 0x1d55f, 0x1d593, 0x1d5c7, 0x1d5fb, 0x1d62f, 0x1d663, 0x1d697, 0x00578,
  242. 0x0057c, 0x0ff2e, 0x02115, 0x1d40d, 0x1d441, 0x1d475, 0x1d4a9, 0x1d4dd, 0x1d511, 0x1d579,
  243. 0x1d5ad, 0x1d5e1, 0x1d615, 0x1d649, 0x1d67d, 0x0039d, 0x1d6b4, 0x1d6ee, 0x1d728, 0x1d762,
  244. 0x1d79c, 0x02c9a, 0x0a4e0, 0x10513, 0x00c02, 0x00c82, 0x00d02, 0x00d82, 0x00966, 0x00a66,
  245. 0x00ae6, 0x00be6, 0x00c66, 0x00ce6, 0x00d66, 0x00e50, 0x00ed0, 0x01040, 0x00665, 0x006f5,
  246. 0x0ff4f, 0x02134, 0x1d428, 0x1d45c, 0x1d490, 0x1d4f8, 0x1d52c, 0x1d560, 0x1d594, 0x1d5c8,
  247. 0x1d5fc, 0x1d630, 0x1d664, 0x1d698, 0x01d0f, 0x01d11, 0x0ab3d, 0x003bf, 0x1d6d0, 0x1d70a,
  248. 0x1d744, 0x1d77e, 0x1d7b8, 0x003c3, 0x1d6d4, 0x1d70e, 0x1d748, 0x1d782, 0x1d7bc, 0x02c9f,
  249. 0x0043e, 0x010ff, 0x00585, 0x005e1, 0x00647, 0x1ee24, 0x1ee64, 0x1ee84, 0x0feeb, 0x0feec,
  250. 0x0feea, 0x0fee9, 0x006be, 0x0fbac, 0x0fbad, 0x0fbab, 0x0fbaa, 0x006c1, 0x0fba8, 0x0fba9,
  251. 0x0fba7, 0x0fba6, 0x006d5, 0x00d20, 0x0101d, 0x104ea, 0x118c8, 0x118d7, 0x1042c, 0x00030,
  252. 0x007c0, 0x009e6, 0x00b66, 0x03007, 0x114d0, 0x118e0, 0x1d7ce, 0x1d7d8, 0x1d7e2, 0x1d7ec,
  253. 0x1d7f6, 0x0ff2f, 0x1d40e, 0x1d442, 0x1d476, 0x1d4aa, 0x1d4de, 0x1d512, 0x1d546, 0x1d57a,
  254. 0x1d5ae, 0x1d5e2, 0x1d616, 0x1d64a, 0x1d67e, 0x0039f, 0x1d6b6, 0x1d6f0, 0x1d72a, 0x1d764,
  255. 0x1d79e, 0x02c9e, 0x0041e, 0x00555, 0x02d54, 0x012d0, 0x00b20, 0x104c2, 0x0a4f3, 0x118b5,
  256. 0x10292, 0x102ab, 0x10404, 0x10516, 0x02374, 0x0ff50, 0x1d429, 0x1d45d, 0x1d491, 0x1d4c5,
  257. 0x1d4f9, 0x1d52d, 0x1d561, 0x1d595, 0x1d5c9, 0x1d5fd, 0x1d631, 0x1d665, 0x1d699, 0x003c1,
  258. 0x003f1, 0x1d6d2, 0x1d6e0, 0x1d70c, 0x1d71a, 0x1d746, 0x1d754, 0x1d780, 0x1d78e, 0x1d7ba,
  259. 0x1d7c8, 0x02ca3, 0x00440, 0x0ff30, 0x02119, 0x1d40f, 0x1d443, 0x1d477, 0x1d4ab, 0x1d4df,
  260. 0x1d513, 0x1d57b, 0x1d5af, 0x1d5e3, 0x1d617, 0x1d64b, 0x1d67f, 0x003a1, 0x1d6b8, 0x1d6f2,
  261. 0x1d72c, 0x1d766, 0x1d7a0, 0x02ca2, 0x00420, 0x013e2, 0x0146d, 0x0a4d1, 0x10295, 0x1d42a,
  262. 0x1d45e, 0x1d492, 0x1d4c6, 0x1d4fa, 0x1d52e, 0x1d562, 0x1d596, 0x1d5ca, 0x1d5fe, 0x1d632,
  263. 0x1d666, 0x1d69a, 0x0051b, 0x00563, 0x00566, 0x0211a, 0x1d410, 0x1d444, 0x1d478, 0x1d4ac,
  264. 0x1d4e0, 0x1d514, 0x1d57c, 0x1d5b0, 0x1d5e4, 0x1d618, 0x1d64c, 0x1d680, 0x02d55, 0x1d42b,
  265. 0x1d45f, 0x1d493, 0x1d4c7, 0x1d4fb, 0x1d52f, 0x1d563, 0x1d597, 0x1d5cb, 0x1d5ff, 0x1d633,
  266. 0x1d667, 0x1d69b, 0x0ab47, 0x0ab48, 0x01d26, 0x02c85, 0x00433, 0x0ab81, 0x1d216, 0x0211b,
  267. 0x0211c, 0x0211d, 0x1d411, 0x1d445, 0x1d479, 0x1d4e1, 0x1d57d, 0x1d5b1, 0x1d5e5, 0x1d619,
  268. 0x1d64d, 0x1d681, 0x001a6, 0x013a1, 0x013d2, 0x104b4, 0x01587, 0x0a4e3, 0x16f35, 0x0ff53,
  269. 0x1d42c, 0x1d460, 0x1d494, 0x1d4c8, 0x1d4fc, 0x1d530, 0x1d564, 0x1d598, 0x1d5cc, 0x1d600,
  270. 0x1d634, 0x1d668, 0x1d69c, 0x0a731, 0x001bd, 0x00455, 0x0abaa, 0x118c1, 0x10448, 0x0ff33,
  271. 0x1d412, 0x1d446, 0x1d47a, 0x1d4ae, 0x1d4e2, 0x1d516, 0x1d54a, 0x1d57e, 0x1d5b2, 0x1d5e6,
  272. 0x1d61a, 0x1d64e, 0x1d682, 0x00405, 0x0054f, 0x013d5, 0x013da, 0x0a4e2, 0x16f3a, 0x10296,
  273. 0x10420, 0x1d42d, 0x1d461, 0x1d495, 0x1d4c9, 0x1d4fd, 0x1d531, 0x1d565, 0x1d599, 0x1d5cd,
  274. 0x1d601, 0x1d635, 0x1d669, 0x1d69d, 0x022a4, 0x027d9, 0x1f768, 0x0ff34, 0x1d413, 0x1d447,
  275. 0x1d47b, 0x1d4af, 0x1d4e3, 0x1d517, 0x1d54b, 0x1d57f, 0x1d5b3, 0x1d5e7, 0x1d61b, 0x1d64f,
  276. 0x1d683, 0x003a4, 0x1d6bb, 0x1d6f5, 0x1d72f, 0x1d769, 0x1d7a3, 0x02ca6, 0x00422, 0x013a2,
  277. 0x0a4d4, 0x16f0a, 0x118bc, 0x10297, 0x102b1, 0x10315, 0x1d42e, 0x1d462, 0x1d496, 0x1d4ca,
  278. 0x1d4fe, 0x1d532, 0x1d566, 0x1d59a, 0x1d5ce, 0x1d602, 0x1d636, 0x1d66a, 0x1d69e, 0x0a79f,
  279. 0x01d1c, 0x0ab4e, 0x0ab52, 0x0028b, 0x003c5, 0x1d6d6, 0x1d710, 0x1d74a, 0x1d784, 0x1d7be,
  280. 0x0057d, 0x104f6, 0x118d8, 0x0222a, 0x022c3, 0x1d414, 0x1d448, 0x1d47c, 0x1d4b0, 0x1d4e4,
  281. 0x1d518, 0x1d54c, 0x1d580, 0x1d5b4, 0x1d5e8, 0x1d61c, 0x1d650, 0x1d684, 0x0054d, 0x01200,
  282. 0x104ce, 0x0144c, 0x0a4f4, 0x16f42, 0x118b8, 0x02228, 0x022c1, 0x0ff56, 0x02174, 0x1d42f,
  283. 0x1d463, 0x1d497, 0x1d4cb, 0x1d4ff, 0x1d533, 0x1d567, 0x1d59b, 0x1d5cf, 0x1d603, 0x1d637,
  284. 0x1d66b, 0x1d69f, 0x01d20, 0x003bd, 0x1d6ce, 0x1d708, 0x1d742, 0x1d77c, 0x1d7b6, 0x00475,
  285. 0x005d8, 0x11706, 0x0aba9, 0x118c0, 0x1d20d, 0x00667, 0x006f7, 0x02164, 0x1d415, 0x1d449,
  286. 0x1d47d, 0x1d4b1, 0x1d4e5, 0x1d519, 0x1d54d, 0x1d581, 0x1d5b5, 0x1d5e9, 0x1d61d, 0x1d651,
  287. 0x1d685, 0x00474, 0x02d38, 0x013d9, 0x0142f, 0x0a6df, 0x0a4e6, 0x16f08, 0x118a0, 0x1051d,
  288. 0x0026f, 0x1d430, 0x1d464, 0x1d498, 0x1d4cc, 0x1d500, 0x1d534, 0x1d568, 0x1d59c, 0x1d5d0,
  289. 0x1d604, 0x1d638, 0x1d66c, 0x1d6a0, 0x01d21, 0x00461, 0x0051d, 0x00561, 0x1170a, 0x1170e,
  290. 0x1170f, 0x0ab83, 0x118ef, 0x118e6, 0x1d416, 0x1d44a, 0x1d47e, 0x1d4b2, 0x1d4e6, 0x1d51a,
  291. 0x1d54e, 0x1d582, 0x1d5b6, 0x1d5ea, 0x1d61e, 0x1d652, 0x1d686, 0x0051c, 0x013b3, 0x013d4,
  292. 0x0a4ea, 0x0166e, 0x000d7, 0x0292b, 0x0292c, 0x02a2f, 0x0ff58, 0x02179, 0x1d431, 0x1d465,
  293. 0x1d499, 0x1d4cd, 0x1d501, 0x1d535, 0x1d569, 0x1d59d, 0x1d5d1, 0x1d605, 0x1d639, 0x1d66d,
  294. 0x1d6a1, 0x00445, 0x01541, 0x0157d, 0x0166d, 0x02573, 0x10322, 0x118ec, 0x0ff38, 0x02169,
  295. 0x1d417, 0x1d44b, 0x1d47f, 0x1d4b3, 0x1d4e7, 0x1d51b, 0x1d54f, 0x1d583, 0x1d5b7, 0x1d5eb,
  296. 0x1d61f, 0x1d653, 0x1d687, 0x0a7b3, 0x003a7, 0x1d6be, 0x1d6f8, 0x1d732, 0x1d76c, 0x1d7a6,
  297. 0x02cac, 0x00425, 0x02d5d, 0x016b7, 0x0a4eb, 0x10290, 0x102b4, 0x10317, 0x10527, 0x00263,
  298. 0x01d8c, 0x0ff59, 0x1d432, 0x1d466, 0x1d49a, 0x1d4ce, 0x1d502, 0x1d536, 0x1d56a, 0x1d59e,
  299. 0x1d5d2, 0x1d606, 0x1d63a, 0x1d66e, 0x1d6a2, 0x0028f, 0x01eff, 0x0ab5a, 0x003b3, 0x0213d,
  300. 0x1d6c4, 0x1d6fe, 0x1d738, 0x1d772, 0x1d7ac, 0x00443, 0x004af, 0x010e7, 0x118dc, 0x0ff39,
  301. 0x1d418, 0x1d44c, 0x1d480, 0x1d4b4, 0x1d4e8, 0x1d51c, 0x1d550, 0x1d584, 0x1d5b8, 0x1d5ec,
  302. 0x1d620, 0x1d654, 0x1d688, 0x003a5, 0x003d2, 0x1d6bc, 0x1d6f6, 0x1d730, 0x1d76a, 0x1d7a4,
  303. 0x02ca8, 0x00423, 0x004ae, 0x013a9, 0x013bd, 0x0a4ec, 0x16f43, 0x118a4, 0x102b2, 0x1d433,
  304. 0x1d467, 0x1d49b, 0x1d4cf, 0x1d503, 0x1d537, 0x1d56b, 0x1d59f, 0x1d5d3, 0x1d607, 0x1d63b,
  305. 0x1d66f, 0x1d6a3, 0x01d22, 0x0ab93, 0x118c4, 0x102f5, 0x118e5, 0x0ff3a, 0x02124, 0x02128,
  306. 0x1d419, 0x1d44d, 0x1d481, 0x1d4b5, 0x1d4e9, 0x1d585, 0x1d5b9, 0x1d5ed, 0x1d621, 0x1d655,
  307. 0x1d689, 0x00396, 0x1d6ad, 0x1d6e7, 0x1d721, 0x1d75b, 0x1d795, 0x013c3, 0x0a4dc, 0x118a9,
  308. };
  309. GHashTable *latin_confusable_ht = NULL;
  310. static gboolean
  311. rspamd_can_alias_latin (gint ch)
  312. {
  313. if (latin_confusable_ht == NULL) {
  314. guint i;
  315. /* Build hash table */
  316. latin_confusable_ht = g_hash_table_new (g_int_hash, g_int_equal);
  317. for (i = 0; i < G_N_ELEMENTS (latin_confusable); i ++) {
  318. g_hash_table_insert(latin_confusable_ht, &latin_confusable[i],
  319. GINT_TO_POINTER (-1));
  320. }
  321. }
  322. return g_hash_table_lookup (latin_confusable_ht, &ch) != NULL;
  323. }
  324. static gdouble
  325. rspamd_chartable_process_word_utf (struct rspamd_task *task,
  326. rspamd_stat_token_t *w,
  327. gboolean is_url,
  328. guint *ncap,
  329. struct chartable_ctx *chartable_module_ctx,
  330. const gchar *lang,
  331. gboolean ignore_diacritics)
  332. {
  333. const UChar32 *p, *end;
  334. gdouble badness = 0.0;
  335. UChar32 uc;
  336. UBlockCode sc;
  337. guint cat;
  338. gint last_is_latin = -1;
  339. guint same_script_count = 0, nsym = 0, nspecial = 0;
  340. enum {
  341. start_process = 0,
  342. got_alpha,
  343. got_digit,
  344. got_unknown,
  345. } state = start_process, prev_state = start_process;
  346. p = w->unicode.begin;
  347. end = p + w->unicode.len;
  348. /* We assume that w is normalized */
  349. while (p < end) {
  350. uc = *p++;
  351. if (((gint32)uc) < 0) {
  352. break;
  353. }
  354. sc = ublock_getCode (uc);
  355. cat = u_charType (uc);
  356. if (!ignore_diacritics) {
  357. if (cat == U_NON_SPACING_MARK ||
  358. (sc == UBLOCK_LATIN_1_SUPPLEMENT) ||
  359. (sc == UBLOCK_LATIN_EXTENDED_A) ||
  360. (sc == UBLOCK_LATIN_EXTENDED_ADDITIONAL) ||
  361. (sc == UBLOCK_LATIN_EXTENDED_B) ||
  362. (sc == UBLOCK_COMBINING_DIACRITICAL_MARKS)) {
  363. nspecial++;
  364. }
  365. }
  366. if (u_isalpha (uc)) {
  367. if (sc <= UBLOCK_COMBINING_DIACRITICAL_MARKS ||
  368. sc == UBLOCK_LATIN_EXTENDED_ADDITIONAL) {
  369. /*
  370. * Assume all latin, IPA, diacritic and space modifiers
  371. * characters as basic latin
  372. */
  373. sc = UBLOCK_BASIC_LATIN;
  374. }
  375. if (sc != UBLOCK_BASIC_LATIN && u_isupper (uc)) {
  376. if (ncap) {
  377. (*ncap) ++;
  378. }
  379. }
  380. if (state == got_digit) {
  381. /* Penalize digit -> alpha translations */
  382. if (!is_url && sc != UBLOCK_BASIC_LATIN &&
  383. prev_state != start_process) {
  384. badness += 0.25;
  385. }
  386. }
  387. else if (state == got_alpha) {
  388. /* Check script */
  389. if (same_script_count > 0) {
  390. if (sc != UBLOCK_BASIC_LATIN && last_is_latin) {
  391. if (rspamd_can_alias_latin (uc)) {
  392. badness += 1.0 / (gdouble)same_script_count;
  393. }
  394. last_is_latin = 0;
  395. same_script_count = 1;
  396. }
  397. else {
  398. same_script_count ++;
  399. }
  400. }
  401. else {
  402. last_is_latin = sc == UBLOCK_BASIC_LATIN;
  403. same_script_count = 1;
  404. }
  405. }
  406. prev_state = state;
  407. state = got_alpha;
  408. }
  409. else if (u_isdigit (uc)) {
  410. if (state != got_digit) {
  411. prev_state = state;
  412. }
  413. state = got_digit;
  414. same_script_count = 0;
  415. }
  416. else {
  417. /* We don't care about unknown characters here */
  418. if (state != got_unknown) {
  419. prev_state = state;
  420. }
  421. state = got_unknown;
  422. same_script_count = 0;
  423. }
  424. nsym ++;
  425. }
  426. if (nspecial > 0) {
  427. if (!ignore_diacritics) {
  428. /* Count diacritics */
  429. badness += nspecial;
  430. }
  431. else if (nspecial > 1) {
  432. badness += (nspecial - 1.0) / 2.0;
  433. }
  434. }
  435. /* Try to avoid FP for long words */
  436. if (nsym > chartable_module_ctx->max_word_len) {
  437. badness = 0;
  438. }
  439. else {
  440. if (badness > 4.0) {
  441. badness = 4.0;
  442. }
  443. }
  444. msg_debug_chartable ("word %*s, badness: %.2f",
  445. (gint)w->normalized.len, w->normalized.begin,
  446. badness);
  447. return badness;
  448. }
  449. static gdouble
  450. rspamd_chartable_process_word_ascii (struct rspamd_task *task,
  451. rspamd_stat_token_t *w,
  452. gboolean is_url,
  453. struct chartable_ctx *chartable_module_ctx)
  454. {
  455. const guchar *p, *end;
  456. gdouble badness = 0.0;
  457. enum {
  458. ascii = 1,
  459. non_ascii
  460. } sc, last_sc;
  461. gint same_script_count = 0, seen_alpha = FALSE;
  462. enum {
  463. start_process = 0,
  464. got_alpha,
  465. got_digit,
  466. got_unknown,
  467. } state = start_process;
  468. p = w->normalized.begin;
  469. end = p + w->normalized.len;
  470. last_sc = 0;
  471. if (w->normalized.len > chartable_module_ctx->max_word_len) {
  472. return 0.0;
  473. }
  474. /* We assume that w is normalized */
  475. while (p < end) {
  476. if (g_ascii_isalpha (*p) || *p > 0x7f) {
  477. if (state == got_digit) {
  478. /* Penalize digit -> alpha translations */
  479. if (seen_alpha && !is_url && !g_ascii_isxdigit (*p)) {
  480. badness += 0.25;
  481. }
  482. }
  483. else if (state == got_alpha) {
  484. /* Check script */
  485. sc = (*p > 0x7f) ? ascii : non_ascii;
  486. if (same_script_count > 0) {
  487. if (sc != last_sc) {
  488. badness += 1.0 / (gdouble)same_script_count;
  489. last_sc = sc;
  490. same_script_count = 1;
  491. }
  492. else {
  493. same_script_count ++;
  494. }
  495. }
  496. else {
  497. last_sc = sc;
  498. same_script_count = 1;
  499. }
  500. }
  501. seen_alpha = TRUE;
  502. state = got_alpha;
  503. }
  504. else if (g_ascii_isdigit (*p)) {
  505. state = got_digit;
  506. same_script_count = 0;
  507. }
  508. else {
  509. /* We don't care about unknown characters here */
  510. state = got_unknown;
  511. same_script_count = 0;
  512. }
  513. p ++;
  514. }
  515. if (badness > 4.0) {
  516. badness = 4.0;
  517. }
  518. msg_debug_chartable ("word %*s, badness: %.2f",
  519. (gint)w->normalized.len, w->normalized.begin,
  520. badness);
  521. return badness;
  522. }
  523. static gboolean
  524. rspamd_chartable_process_part (struct rspamd_task *task,
  525. struct rspamd_mime_text_part *part,
  526. struct chartable_ctx *chartable_module_ctx,
  527. gboolean ignore_diacritics)
  528. {
  529. rspamd_stat_token_t *w;
  530. guint i, ncap = 0;
  531. gdouble cur_score = 0.0;
  532. if (part == NULL || part->utf_words == NULL ||
  533. part->utf_words->len == 0 || part->nwords == 0) {
  534. return FALSE;
  535. }
  536. for (i = 0; i < part->utf_words->len; i++) {
  537. w = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
  538. if ((w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT)) {
  539. if (w->flags & RSPAMD_STAT_TOKEN_FLAG_UTF) {
  540. cur_score += rspamd_chartable_process_word_utf (task, w, FALSE,
  541. &ncap, chartable_module_ctx, part->language, ignore_diacritics);
  542. }
  543. else {
  544. cur_score += rspamd_chartable_process_word_ascii (task, w,
  545. FALSE, chartable_module_ctx);
  546. }
  547. }
  548. }
  549. /*
  550. * TODO: perhaps, we should do this analysis somewhere else and get
  551. * something like: <SYM_SC><SYM_SC><SYM_SC> representing classes for all
  552. * symbols in the text
  553. */
  554. part->capital_letters += ncap;
  555. cur_score /= (gdouble)part->nwords;
  556. if (cur_score > 1.0) {
  557. cur_score = 1.0;
  558. }
  559. if (cur_score > chartable_module_ctx->threshold) {
  560. rspamd_task_insert_result (task, chartable_module_ctx->symbol,
  561. cur_score, NULL);
  562. return TRUE;
  563. }
  564. return FALSE;
  565. }
  566. static void
  567. chartable_symbol_callback (struct rspamd_task *task,
  568. struct rspamd_symcache_item *item,
  569. void *unused)
  570. {
  571. guint i;
  572. struct rspamd_mime_text_part *part;
  573. struct chartable_ctx *chartable_module_ctx = chartable_get_context (task->cfg);
  574. const gchar *language = NULL;
  575. gboolean ignore_diacritics = FALSE, seen_violated_part = FALSE;
  576. /* Check if we have parts with diacritic symbols language */
  577. PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
  578. if (part->languages && part->languages->len > 0) {
  579. struct rspamd_lang_detector_res *lang =
  580. (struct rspamd_lang_detector_res *)g_ptr_array_index (part->languages, 0);
  581. gint flags;
  582. flags = rspamd_language_detector_elt_flags (lang->elt);
  583. if (flags & RS_LANGUAGE_DIACRITICS) {
  584. ignore_diacritics = TRUE;
  585. }
  586. }
  587. if (rspamd_chartable_process_part (task, part, chartable_module_ctx,
  588. ignore_diacritics)) {
  589. seen_violated_part = TRUE;
  590. }
  591. }
  592. if (MESSAGE_FIELD (task, text_parts)->len == 0) {
  593. /* No text parts, assume that we should ignore diacritics checks for metatokens */
  594. ignore_diacritics = TRUE;
  595. }
  596. if (task->meta_words != NULL && task->meta_words->len > 0) {
  597. rspamd_stat_token_t *w;
  598. gdouble cur_score = 0;
  599. gsize arlen = task->meta_words->len;
  600. for (i = 0; i < arlen; i++) {
  601. w = &g_array_index (task->meta_words, rspamd_stat_token_t, i);
  602. cur_score += rspamd_chartable_process_word_utf (task, w, FALSE,
  603. NULL, chartable_module_ctx, language, ignore_diacritics);
  604. }
  605. cur_score /= (gdouble)arlen;
  606. if (cur_score > 1.0) {
  607. cur_score = 1.0;
  608. }
  609. if (cur_score > chartable_module_ctx->threshold) {
  610. if (!seen_violated_part) {
  611. /* Further penalise */
  612. if (cur_score > 0.25) {
  613. cur_score = 0.25;
  614. }
  615. }
  616. rspamd_task_insert_result (task, chartable_module_ctx->symbol,
  617. cur_score, "subject");
  618. }
  619. }
  620. rspamd_symcache_finalize_item (task, item);
  621. }
  622. static void
  623. chartable_url_symbol_callback (struct rspamd_task *task,
  624. struct rspamd_symcache_item *item,
  625. void *unused)
  626. {
  627. /* XXX: TODO: unbreak module once URLs unicode project is over */
  628. #if 0
  629. struct rspamd_url *u;
  630. GHashTableIter it;
  631. gpointer k, v;
  632. rspamd_stat_token_t w;
  633. gdouble cur_score = 0.0;
  634. struct chartable_ctx *chartable_module_ctx = chartable_get_context (task->cfg);
  635. g_hash_table_iter_init (&it, task->urls);
  636. while (g_hash_table_iter_next (&it, &k, &v)) {
  637. u = v;
  638. if (cur_score > 2.0) {
  639. cur_score = 2.0;
  640. break;
  641. }
  642. if (u->hostlen > 0) {
  643. w.stemmed.begin = u->host;
  644. w.stemmed.len = u->hostlen;
  645. if (g_utf8_validate (w.stemmed.begin, w.stemmed.len, NULL)) {
  646. cur_score += rspamd_chartable_process_word_utf (task, &w,
  647. TRUE, NULL, chartable_module_ctx);
  648. }
  649. else {
  650. cur_score += rspamd_chartable_process_word_ascii (task, &w,
  651. TRUE, chartable_module_ctx);
  652. }
  653. }
  654. }
  655. g_hash_table_iter_init (&it, task->emails);
  656. while (g_hash_table_iter_next (&it, &k, &v)) {
  657. u = v;
  658. if (cur_score > 2.0) {
  659. cur_score = 2.0;
  660. break;
  661. }
  662. if (u->hostlen > 0) {
  663. w.stemmed.begin = u->host;
  664. w.stemmed.len = u->hostlen;
  665. if (g_utf8_validate (w.stemmed.begin, w.stemmed.len, NULL)) {
  666. cur_score += rspamd_chartable_process_word_utf (task, &w,
  667. TRUE, NULL, chartable_module_ctx);
  668. }
  669. else {
  670. cur_score += rspamd_chartable_process_word_ascii (task, &w,
  671. TRUE, chartable_module_ctx);
  672. }
  673. }
  674. }
  675. if (cur_score > chartable_module_ctx->threshold) {
  676. rspamd_task_insert_result (task, chartable_module_ctx->symbol,
  677. cur_score, NULL);
  678. }
  679. #endif
  680. rspamd_symcache_finalize_item (task, item);
  681. }