Просмотр исходного кода

[Feature] Speed up is_ascii function

tags/2.3
Vsevolod Stakhov 4 лет назад
Родитель
Сommit
cec3e89b04
2 измененных файлов: 73 добавлений и 34 удалений
  1. 72
    1
      src/libutil/str_util.c
  2. 1
    33
      src/libutil/str_util.h

+ 72
- 1
src/libutil/str_util.c Просмотреть файл

@@ -3324,4 +3324,75 @@ rspamd_string_len_split (const gchar *in, gsize len, const gchar *spill,
}

return res;
}
}

#if defined(__x86_64__)
#include <x86intrin.h>
#endif

static inline gboolean
rspamd_str_has_8bit_u64 (const guchar *beg, gsize len)
{
guint8 orb = 0;

if (len >= 16) {
const guchar *nextd = beg+8;
guint64 n1 = 0, n2 = 0;

do {
n1 |= *(const guint64 *)beg;
n2 |= *(const guint64 *)nextd;
beg += 16;
nextd += 16;
len -= 16;
} while (len >= 16);

/*
* Idea from Benny Halevy <bhalevy@scylladb.com>
* - 7-th bit set ==> orb = !(non-zero) - 1 = 0 - 1 = 0xFF
* - 7-th bit clear ==> orb = !0 - 1 = 1 - 1 = 0x00
*/
orb = !((n1 | n2) & 0x8080808080808080ULL) - 1;
}

while (len--) {
orb |= *beg++;
}

return orb >= 0x80;
}

gboolean
rspamd_str_has_8bit (const guchar *beg, gsize len)
{
#if defined(__x86_64__)
if (len >= 32) {
const uint8_t *nextd = beg + 16;

__m128i n1 = _mm_set1_epi8 (0), n2;

n2 = n1;

while (len >= 32) {
__m128i xmm1 = _mm_lddqu_si128 ((const __m128i *)beg);
__m128i xmm2 = _mm_lddqu_si128 ((const __m128i *)nextd);

n1 = _mm_or_si128 (n1, xmm1);
n2 = _mm_or_si128 (n2, xmm2);

beg += 32;
nextd += 32;
len -= 32;
}

n1 = _mm_or_si128 (n1, n2);

/* We assume 2 complement here */
if (_mm_movemask_epi8 (n1)) {
return TRUE;
}
}
#endif

return rspamd_str_has_8bit_u64 (beg, len);
}

+ 1
- 33
src/libutil/str_util.h Просмотреть файл

@@ -440,39 +440,7 @@ gsize rspamd_memspn (const gchar *s, const gchar *e, gsize len);
*/
#define rspamd_is_aligned(p, n) (((uintptr_t)(p) & ((uintptr_t)(n) - 1)) == 0)
#define rspamd_is_aligned_as(p, v) rspamd_is_aligned(p, _Alignof(__typeof((v))))

static inline gboolean
rspamd_str_has_8bit (const guchar *beg, gsize len)
{
unsigned long *w;
gsize i, leftover;

if (rspamd_is_aligned_as (beg, *w)) {
leftover = len % sizeof (*w);
w = (unsigned long *) beg;

for (i = 0; i < len / sizeof (*w); i++) {
if (rspamd_str_hasmore (*w, 127)) {
return TRUE;
}

w++;
}

beg = (const guchar *) w;
}
else {
leftover = len;
}

for (i = 0; i < leftover; i++) {
if (beg[i] > 127) {
return TRUE;
}
}

return FALSE;
}
gboolean rspamd_str_has_8bit (const guchar *beg, gsize len);

struct UConverter;


Загрузка…
Отмена
Сохранить