#define RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE (1u << 9)
#define RSPAMD_STAT_TOKEN_FLAG_STOP_WORD (1u << 9)
#define RSPAMD_STAT_TOKEN_FLAG_SKIPPED (1u << 10)
+#define RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES (1u << 11)
typedef struct rspamd_stat_token_s {
rspamd_ftok_t original; /* utf8 raw */
while (i < srclen) {
U16_NEXT_UNSAFE (src, i, t);
- *d++ = u_tolower (t);
+
+ if (u_isgraph (t)) {
+ *d++ = u_tolower (t);
+ }
+ else {
+ /* Invisible spaces ! */
+ tok->flags |= RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES;
+ }
}
tok->unicode.begin = dest;