@@ -1294,6 +1294,7 @@ ADD_SUBDIRECTORY(contrib/lua-lpeg) | |||
ADD_SUBDIRECTORY(contrib/t1ha) | |||
ADD_SUBDIRECTORY(contrib/libev) | |||
ADD_SUBDIRECTORY(contrib/kann) | |||
ADD_SUBDIRECTORY(contrib/fastutf8) | |||
IF (NOT WITH_LUAJIT) | |||
ADD_SUBDIRECTORY(contrib/lua-bit) |
@@ -0,0 +1,13 @@ | |||
SET(UTFSRC ${CMAKE_CURRENT_SOURCE_DIR}/fastutf8.c) | |||
IF(HAVE_AVX2) | |||
SET(UTFSRC ${UTFSRC} ${CMAKE_CURRENT_SOURCE_DIR}/avx2.c) | |||
MESSAGE(STATUS "UTF8: AVX2 support is added") | |||
ENDIF() | |||
IF(HAVE_SSE41) | |||
SET(UTFSRC ${UTFSRC} ${CMAKE_CURRENT_SOURCE_DIR}/sse41.c) | |||
MESSAGE(STATUS "UTF8: SSE41 support is added") | |||
ENDIF() | |||
CONFIGURE_FILE(platform_config.h.in platform_config.h) | |||
ADD_LIBRARY(rspamd-fastutf8 STATIC ${UTFSRC}) |
@@ -0,0 +1,22 @@ | |||
MIT License | |||
Copyright (c) 2019 Yibo Cai | |||
Copyright (c) 2019 Vsevolod Stakhov | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. |
@@ -0,0 +1,314 @@ | |||
/* | |||
* MIT License | |||
* | |||
* Copyright (c) 2019 Yibo Cai | |||
* Copyright (c) 2019 Vsevolod Stakhov | |||
* Permission is hereby granted, free of charge, to any person obtaining a copy | |||
* of this software and associated documentation files (the "Software"), to deal | |||
* in the Software without restriction, including without limitation the rights | |||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
* copies of the Software, and to permit persons to whom the Software is | |||
* furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be included in all | |||
* copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
* SOFTWARE. | |||
*/ | |||
#include "config.h" | |||
#include "fastutf8.h" | |||
#include "platform_config.h" | |||
#ifndef __clang__ | |||
#pragma GCC push_options | |||
#pragma GCC target("avx2") | |||
#endif | |||
#ifndef __SSE2__ | |||
#define __SSE2__ | |||
#endif | |||
#ifndef __SSE__ | |||
#define __SSE__ | |||
#endif | |||
#ifndef __SSE4_2__ | |||
#define __SSE4_2__ | |||
#endif | |||
#ifndef __SSE4_1__ | |||
#define __SSE4_1__ | |||
#endif | |||
#ifndef __SSEE3__ | |||
#define __SSEE3__ | |||
#endif | |||
#ifndef __AVX__ | |||
#define __AVX__ | |||
#endif | |||
#ifndef __AVX2__ | |||
#define __AVX2__ | |||
#endif | |||
#include <immintrin.h> | |||
/* | |||
* Map high nibble of "First Byte" to legal character length minus 1 | |||
* 0x00 ~ 0xBF --> 0 | |||
* 0xC0 ~ 0xDF --> 1 | |||
* 0xE0 ~ 0xEF --> 2 | |||
* 0xF0 ~ 0xFF --> 3 | |||
*/ | |||
static const int8_t _first_len_tbl[] = { | |||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, | |||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, | |||
}; | |||
/* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */ | |||
static const int8_t _first_range_tbl[] = { | |||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, | |||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, | |||
}; | |||
/* | |||
* Range table, map range index to min and max values | |||
* Index 0 : 00 ~ 7F (First Byte, ascii) | |||
* Index 1,2,3: 80 ~ BF (Second, Third, Fourth Byte) | |||
* Index 4 : A0 ~ BF (Second Byte after E0) | |||
* Index 5 : 80 ~ 9F (Second Byte after ED) | |||
* Index 6 : 90 ~ BF (Second Byte after F0) | |||
* Index 7 : 80 ~ 8F (Second Byte after F4) | |||
* Index 8 : C2 ~ F4 (First Byte, non ascii) | |||
* Index 9~15 : illegal: i >= 127 && i <= -128 | |||
*/ | |||
static const int8_t _range_min_tbl[] = { | |||
0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, | |||
0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, | |||
0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, | |||
0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, | |||
}; | |||
static const int8_t _range_max_tbl[] = { | |||
0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, | |||
0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | |||
0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, | |||
0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | |||
}; | |||
/* | |||
* Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after | |||
* which the Second Byte are not 80~BF. It contains "range index adjustment". | |||
* +------------+---------------+------------------+----------------+ | |||
* | First Byte | original range| range adjustment | adjusted range | | |||
* +------------+---------------+------------------+----------------+ | |||
* | E0 | 2 | 2 | 4 | | |||
* +------------+---------------+------------------+----------------+ | |||
* | ED | 2 | 3 | 5 | | |||
* +------------+---------------+------------------+----------------+ | |||
* | F0 | 3 | 3 | 6 | | |||
* +------------+---------------+------------------+----------------+ | |||
* | F4 | 4 | 4 | 8 | | |||
* +------------+---------------+------------------+----------------+ | |||
*/ | |||
/* index1 -> E0, index14 -> ED */ | |||
static const int8_t _df_ee_tbl[] = { | |||
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, | |||
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, | |||
}; | |||
/* index1 -> F0, index5 -> F4 */ | |||
static const int8_t _ef_fe_tbl[] = { | |||
0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
}; | |||
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) | |||
__attribute__((__target__("avx2"))); | |||
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) | |||
{ | |||
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15); | |||
} | |||
static inline __m256i push_last_2bytes_of_a_to_b(__m256i a, __m256i b) | |||
__attribute__((__target__("avx2"))); | |||
static inline __m256i push_last_2bytes_of_a_to_b(__m256i a, __m256i b) | |||
{ | |||
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14); | |||
} | |||
static inline __m256i push_last_3bytes_of_a_to_b(__m256i a, __m256i b) | |||
__attribute__((__target__("avx2"))); | |||
static inline __m256i push_last_3bytes_of_a_to_b(__m256i a, __m256i b) | |||
{ | |||
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 13); | |||
} | |||
off_t rspamd_fast_utf8_validate_avx2 (const unsigned char *data, size_t len) | |||
__attribute__((__target__("avx2"))); | |||
/* 5x faster than naive method */ | |||
/* Return 0 - success, -1 - error, >0 - first error char(if RET_ERR_IDX = 1) */ | |||
off_t rspamd_fast_utf8_validate_avx2 (const unsigned char *data, size_t len) | |||
{ | |||
off_t err_pos = 1; | |||
if (len >= 32) { | |||
__m256i prev_input = _mm256_set1_epi8 (0); | |||
__m256i prev_first_len = _mm256_set1_epi8 (0); | |||
/* Cached tables */ | |||
const __m256i first_len_tbl = | |||
_mm256_lddqu_si256 ((const __m256i *) _first_len_tbl); | |||
const __m256i first_range_tbl = | |||
_mm256_lddqu_si256 ((const __m256i *) _first_range_tbl); | |||
const __m256i range_min_tbl = | |||
_mm256_lddqu_si256 ((const __m256i *) _range_min_tbl); | |||
const __m256i range_max_tbl = | |||
_mm256_lddqu_si256 ((const __m256i *) _range_max_tbl); | |||
const __m256i df_ee_tbl = | |||
_mm256_lddqu_si256 ((const __m256i *) _df_ee_tbl); | |||
const __m256i ef_fe_tbl = | |||
_mm256_lddqu_si256 ((const __m256i *) _ef_fe_tbl); | |||
__m256i error = _mm256_set1_epi8 (0); | |||
while (len >= 32) { | |||
const __m256i input = _mm256_lddqu_si256 ((const __m256i *) data); | |||
/* high_nibbles = input >> 4 */ | |||
const __m256i high_nibbles = | |||
_mm256_and_si256 (_mm256_srli_epi16 (input, 4), _mm256_set1_epi8 (0x0F)); | |||
/* first_len = legal character length minus 1 */ | |||
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */ | |||
/* first_len = first_len_tbl[high_nibbles] */ | |||
__m256i first_len = _mm256_shuffle_epi8 (first_len_tbl, high_nibbles); | |||
/* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */ | |||
/* range = first_range_tbl[high_nibbles] */ | |||
__m256i range = _mm256_shuffle_epi8 (first_range_tbl, high_nibbles); | |||
/* Second Byte: set range index to first_len */ | |||
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */ | |||
/* range |= (first_len, prev_first_len) << 1 byte */ | |||
range = _mm256_or_si256 ( | |||
range, push_last_byte_of_a_to_b (prev_first_len, first_len)); | |||
/* Third Byte: set range index to saturate_sub(first_len, 1) */ | |||
/* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */ | |||
__m256i tmp1, tmp2; | |||
/* tmp1 = saturate_sub(first_len, 1) */ | |||
tmp1 = _mm256_subs_epu8 (first_len, _mm256_set1_epi8 (1)); | |||
/* tmp2 = saturate_sub(prev_first_len, 1) */ | |||
tmp2 = _mm256_subs_epu8 (prev_first_len, _mm256_set1_epi8 (1)); | |||
/* range |= (tmp1, tmp2) << 2 bytes */ | |||
range = _mm256_or_si256 (range, push_last_2bytes_of_a_to_b (tmp2, tmp1)); | |||
/* Fourth Byte: set range index to saturate_sub(first_len, 2) */ | |||
/* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */ | |||
/* tmp1 = saturate_sub(first_len, 2) */ | |||
tmp1 = _mm256_subs_epu8 (first_len, _mm256_set1_epi8 (2)); | |||
/* tmp2 = saturate_sub(prev_first_len, 2) */ | |||
tmp2 = _mm256_subs_epu8 (prev_first_len, _mm256_set1_epi8 (2)); | |||
/* range |= (tmp1, tmp2) << 3 bytes */ | |||
range = _mm256_or_si256 (range, push_last_3bytes_of_a_to_b (tmp2, tmp1)); | |||
/* | |||
* Now we have below range indices caluclated | |||
* Correct cases: | |||
* - 8 for C0~FF | |||
* - 3 for 1st byte after F0~FF | |||
* - 2 for 1st byte after E0~EF or 2nd byte after F0~FF | |||
* - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or | |||
* 3rd byte after F0~FF | |||
* - 0 for others | |||
* Error cases: | |||
* 9,10,11 if non ascii First Byte overlaps | |||
* E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error | |||
*/ | |||
/* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */ | |||
/* Overlaps lead to index 9~15, which are illegal in range table */ | |||
__m256i shift1, pos, range2; | |||
/* shift1 = (input, prev_input) << 1 byte */ | |||
shift1 = push_last_byte_of_a_to_b (prev_input, input); | |||
pos = _mm256_sub_epi8 (shift1, _mm256_set1_epi8 (0xEF)); | |||
/* | |||
* shift1: | EF F0 ... FE | FF 00 ... ... DE | DF E0 ... EE | | |||
* pos: | 0 1 15 | 16 17 239| 240 241 255| | |||
* pos-240: | 0 0 0 | 0 0 0 | 0 1 15 | | |||
* pos+112: | 112 113 127| >= 128 | >= 128 | | |||
*/ | |||
tmp1 = _mm256_subs_epu8 (pos, _mm256_set1_epi8 ((char)240)); | |||
range2 = _mm256_shuffle_epi8 (df_ee_tbl, tmp1); | |||
tmp2 = _mm256_adds_epu8 (pos, _mm256_set1_epi8 (112)); | |||
range2 = _mm256_add_epi8 (range2, _mm256_shuffle_epi8 (ef_fe_tbl, tmp2)); | |||
range = _mm256_add_epi8 (range, range2); | |||
/* Load min and max values per calculated range index */ | |||
__m256i minv = _mm256_shuffle_epi8 (range_min_tbl, range); | |||
__m256i maxv = _mm256_shuffle_epi8 (range_max_tbl, range); | |||
/* Check value range */ | |||
error = _mm256_cmpgt_epi8(minv, input); | |||
error = _mm256_or_si256(error, _mm256_cmpgt_epi8(input, maxv)); | |||
/* 5% performance drop from this conditional branch */ | |||
if (!_mm256_testz_si256(error, error)) { | |||
break; | |||
} | |||
prev_input = input; | |||
prev_first_len = first_len; | |||
data += 32; | |||
len -= 32; | |||
err_pos += 32; | |||
} | |||
/* Error in first 16 bytes */ | |||
if (err_pos == 1) { | |||
goto do_naive; | |||
} | |||
/* Find previous token (not 80~BF) */ | |||
int32_t token4 = _mm256_extract_epi32 (prev_input, 7); | |||
const int8_t *token = (const int8_t *) &token4; | |||
int lookahead = 0; | |||
if (token[3] > (int8_t) 0xBF) { | |||
lookahead = 1; | |||
} | |||
else if (token[2] > (int8_t) 0xBF) { | |||
lookahead = 2; | |||
} | |||
else if (token[1] > (int8_t) 0xBF) { | |||
lookahead = 3; | |||
} | |||
data -= lookahead; | |||
len += lookahead; | |||
err_pos -= lookahead; | |||
} | |||
/* Check remaining bytes with naive method */ | |||
do_naive: | |||
if (len > 0) { | |||
off_t err_pos2 = rspamd_fast_utf8_validate_ref (data, len); | |||
if (err_pos2) { | |||
return err_pos + err_pos2 - 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
#ifndef __clang__ | |||
#pragma GCC pop_options | |||
#endif | |||
@@ -0,0 +1,158 @@ | |||
/* | |||
* MIT License | |||
* | |||
* Copyright (c) 2019 Yibo Cai | |||
* Copyright (c) 2019 Vsevolod Stakhov | |||
* Permission is hereby granted, free of charge, to any person obtaining a copy | |||
* of this software and associated documentation files (the "Software"), to deal | |||
* in the Software without restriction, including without limitation the rights | |||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
* copies of the Software, and to permit persons to whom the Software is | |||
* furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be included in all | |||
* copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
* SOFTWARE. | |||
*/ | |||
#include "fastutf8.h" | |||
#include "platform_config.h" | |||
/* | |||
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94 | |||
* | |||
* Table 3-7. Well-Formed UTF-8 Byte Sequences | |||
* | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+0000..U+007F | 00..7F | | | | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+0080..U+07FF | C2..DF | 80..BF | | | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | | |||
* +--------------------+------------+-------------+------------+-------------+ | |||
*/ | |||
/* Return 0 - success, >0 - index (1 based) of first error char */ | |||
off_t | |||
rspamd_fast_utf8_validate_ref (const unsigned char *data, size_t len) | |||
{ | |||
off_t err_pos = 1; | |||
while (len) { | |||
int bytes; | |||
const unsigned char byte1 = data[0]; | |||
/* 00..7F */ | |||
if (byte1 <= 0x7F) { | |||
bytes = 1; | |||
/* C2..DF, 80..BF */ | |||
} | |||
else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && | |||
(signed char) data[1] <= (signed char) 0xBF) { | |||
bytes = 2; | |||
} | |||
else if (len >= 3) { | |||
const unsigned char byte2 = data[1]; | |||
/* Is byte2, byte3 between 0x80 ~ 0xBF */ | |||
const int byte2_ok = (signed char) byte2 <= (signed char) 0xBF; | |||
const int byte3_ok = (signed char) data[2] <= (signed char) 0xBF; | |||
if (byte2_ok && byte3_ok && | |||
/* E0, A0..BF, 80..BF */ | |||
((byte1 == 0xE0 && byte2 >= 0xA0) || | |||
/* E1..EC, 80..BF, 80..BF */ | |||
(byte1 >= 0xE1 && byte1 <= 0xEC) || | |||
/* ED, 80..9F, 80..BF */ | |||
(byte1 == 0xED && byte2 <= 0x9F) || | |||
/* EE..EF, 80..BF, 80..BF */ | |||
(byte1 >= 0xEE && byte1 <= 0xEF))) { | |||
bytes = 3; | |||
} | |||
else if (len >= 4) { | |||
/* Is byte4 between 0x80 ~ 0xBF */ | |||
const int byte4_ok = (signed char) data[3] <= (signed char) 0xBF; | |||
if (byte2_ok && byte3_ok && byte4_ok && | |||
/* F0, 90..BF, 80..BF, 80..BF */ | |||
((byte1 == 0xF0 && byte2 >= 0x90) || | |||
/* F1..F3, 80..BF, 80..BF, 80..BF */ | |||
(byte1 >= 0xF1 && byte1 <= 0xF3) || | |||
/* F4, 80..8F, 80..BF, 80..BF */ | |||
(byte1 == 0xF4 && byte2 <= 0x8F))) { | |||
bytes = 4; | |||
} | |||
else { | |||
return err_pos; | |||
} | |||
} | |||
else { | |||
return err_pos; | |||
} | |||
} | |||
else { | |||
return err_pos; | |||
} | |||
len -= bytes; | |||
err_pos += bytes; | |||
data += bytes; | |||
} | |||
return 0; | |||
} | |||
/* Prototypes */ | |||
#ifdef HAVE_SSSE3 | |||
extern off_t rspamd_fast_utf8_validate_sse41 (const unsigned char *data, size_t len); | |||
#endif | |||
#ifdef HAVE_AVX2 | |||
extern off_t rspamd_fast_utf8_validate_avx2 (const unsigned char *data, size_t len); | |||
#endif | |||
static off_t (*validate_func) (const unsigned char *data, size_t len) = | |||
rspamd_fast_utf8_validate_ref; | |||
void | |||
rspamd_fast_utf8_library_init (unsigned flags) | |||
{ | |||
#ifdef HAVE_SSSE3 | |||
if (flags & RSPAMD_FAST_UTF8_FLAG_SSE41) { | |||
validate_func = rspamd_fast_utf8_validate_sse41; | |||
} | |||
#endif | |||
#ifdef HAVE_AVX2 | |||
if (flags & RSPAMD_FAST_UTF8_FLAG_AVX2) { | |||
validate_func = rspamd_fast_utf8_validate_avx2; | |||
} | |||
#endif | |||
} | |||
off_t | |||
rspamd_fast_utf8_validate (const unsigned char *data, size_t len) | |||
{ | |||
return validate_func (data, len); | |||
} |
@@ -0,0 +1,59 @@ | |||
/* | |||
* MIT License | |||
* | |||
* Copyright (c) 2019 Yibo Cai | |||
* Copyright (c) 2019 Vsevolod Stakhov | |||
* Permission is hereby granted, free of charge, to any person obtaining a copy | |||
* of this software and associated documentation files (the "Software"), to deal | |||
* in the Software without restriction, including without limitation the rights | |||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
* copies of the Software, and to permit persons to whom the Software is | |||
* furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be included in all | |||
* copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
* SOFTWARE. | |||
*/ | |||
#ifndef RSPAMD_FASTUTF8_H | |||
#define RSPAMD_FASTUTF8_H | |||
#include <sys/types.h> | |||
#include <stdbool.h> | |||
#include <stdint.h> | |||
enum rspamd_fast_utf8_cpu_flags { | |||
RSPAMD_FAST_UTF8_FLAG_SSE41 = 1u << 0u, | |||
RSPAMD_FAST_UTF8_FLAG_AVX2 = 1u << 1u, | |||
}; | |||
/** | |||
* Called to init codecs | |||
* @param flags | |||
*/ | |||
void rspamd_fast_utf8_library_init (unsigned flags); | |||
/** | |||
* Called to validate input using fast codec | |||
* @param data | |||
* @param len | |||
* @return | |||
*/ | |||
off_t rspamd_fast_utf8_validate (const unsigned char *data, size_t len); | |||
/** | |||
* Use plain C implementation | |||
* @param data | |||
* @param len | |||
* @return | |||
*/ | |||
off_t rspamd_fast_utf8_validate_ref (const unsigned char *data, size_t len); | |||
#endif |
@@ -0,0 +1,12 @@ | |||
#ifndef PLATFORM_H_CONFIG | |||
#define PLATFORM_H_CONFIG | |||
#define ARCH "${ARCH}" | |||
#define CMAKE_ARCH_${ARCH} 1 | |||
#ifdef __x86_64__ | |||
#cmakedefine HAVE_AVX2 1 | |||
#cmakedefine HAVE_SSE41 1 | |||
#endif | |||
#endif |
@@ -0,0 +1,272 @@ | |||
/* | |||
* MIT License | |||
* | |||
* Copyright (c) 2019 Yibo Cai | |||
* Copyright (c) 2019 Vsevolod Stakhov | |||
* Permission is hereby granted, free of charge, to any person obtaining a copy | |||
* of this software and associated documentation files (the "Software"), to deal | |||
* in the Software without restriction, including without limitation the rights | |||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
* copies of the Software, and to permit persons to whom the Software is | |||
* furnished to do so, subject to the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be included in all | |||
* copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
* SOFTWARE. | |||
*/ | |||
#include "config.h" | |||
#include "fastutf8.h" | |||
#include "platform_config.h" | |||
#ifndef __clang__ | |||
#pragma GCC push_options | |||
#pragma GCC target("sse4.1") | |||
#endif | |||
#ifndef __SSE2__ | |||
#define __SSE2__ | |||
#endif | |||
#ifndef __SSE__ | |||
#define __SSE__ | |||
#endif | |||
#ifndef __SSEE3__ | |||
#define __SSEE3__ | |||
#endif | |||
#ifndef __SSE4_1__ | |||
#define __SSE4_1__ | |||
#endif | |||
#include <smmintrin.h> | |||
/* | |||
* Map high nibble of "First Byte" to legal character length minus 1 | |||
* 0x00 ~ 0xBF --> 0 | |||
* 0xC0 ~ 0xDF --> 1 | |||
* 0xE0 ~ 0xEF --> 2 | |||
* 0xF0 ~ 0xFF --> 3 | |||
*/ | |||
static const int8_t _first_len_tbl[] = { | |||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3, | |||
}; | |||
/* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */ | |||
static const int8_t _first_range_tbl[] = { | |||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, | |||
}; | |||
/* | |||
* Range table, map range index to min and max values | |||
* Index 0 : 00 ~ 7F (First Byte, ascii) | |||
* Index 1,2,3: 80 ~ BF (Second, Third, Fourth Byte) | |||
* Index 4 : A0 ~ BF (Second Byte after E0) | |||
* Index 5 : 80 ~ 9F (Second Byte after ED) | |||
* Index 6 : 90 ~ BF (Second Byte after F0) | |||
* Index 7 : 80 ~ 8F (Second Byte after F4) | |||
* Index 8 : C2 ~ F4 (First Byte, non ascii) | |||
* Index 9~15 : illegal: i >= 127 && i <= -128 | |||
*/ | |||
static const int8_t _range_min_tbl[] = { | |||
0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, | |||
0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, | |||
}; | |||
static const int8_t _range_max_tbl[] = { | |||
0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, | |||
0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, | |||
}; | |||
/* | |||
* Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after | |||
* which the Second Byte are not 80~BF. It contains "range index adjustment". | |||
* +------------+---------------+------------------+----------------+ | |||
* | First Byte | original range| range adjustment | adjusted range | | |||
* +------------+---------------+------------------+----------------+ | |||
* | E0 | 2 | 2 | 4 | | |||
* +------------+---------------+------------------+----------------+ | |||
* | ED | 2 | 3 | 5 | | |||
* +------------+---------------+------------------+----------------+ | |||
* | F0 | 3 | 3 | 6 | | |||
* +------------+---------------+------------------+----------------+ | |||
* | F4 | 4 | 4 | 8 | | |||
* +------------+---------------+------------------+----------------+ | |||
*/ | |||
/* index1 -> E0, index14 -> ED */ | |||
static const int8_t _df_ee_tbl[] = { | |||
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, | |||
}; | |||
/* index1 -> F0, index5 -> F4 */ | |||
static const int8_t _ef_fe_tbl[] = { | |||
0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |||
}; | |||
off_t | |||
rspamd_fast_utf8_validate_sse41 (const unsigned char *data, size_t len) | |||
__attribute__((__target__("sse4.1"))); | |||
/* Return 0 - success, >0 - first error char(if RET_ERR_IDX = 1) */ | |||
off_t | |||
rspamd_fast_utf8_validate_sse41 (const unsigned char *data, size_t len) | |||
{ | |||
off_t err_pos = 1; | |||
if (len >= 16) { | |||
__m128i prev_input = _mm_set1_epi8 (0); | |||
__m128i prev_first_len = _mm_set1_epi8 (0); | |||
/* Cached tables */ | |||
const __m128i first_len_tbl = | |||
_mm_lddqu_si128 ((const __m128i *) _first_len_tbl); | |||
const __m128i first_range_tbl = | |||
_mm_lddqu_si128 ((const __m128i *) _first_range_tbl); | |||
const __m128i range_min_tbl = | |||
_mm_lddqu_si128 ((const __m128i *) _range_min_tbl); | |||
const __m128i range_max_tbl = | |||
_mm_lddqu_si128 ((const __m128i *) _range_max_tbl); | |||
const __m128i df_ee_tbl = | |||
_mm_lddqu_si128 ((const __m128i *) _df_ee_tbl); | |||
const __m128i ef_fe_tbl = | |||
_mm_lddqu_si128 ((const __m128i *) _ef_fe_tbl); | |||
__m128i error = _mm_set1_epi8 (0); | |||
while (len >= 16) { | |||
const __m128i input = _mm_lddqu_si128 ((const __m128i *) data); | |||
/* high_nibbles = input >> 4 */ | |||
const __m128i high_nibbles = | |||
_mm_and_si128 (_mm_srli_epi16 (input, 4), _mm_set1_epi8 (0x0F)); | |||
/* first_len = legal character length minus 1 */ | |||
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */ | |||
/* first_len = first_len_tbl[high_nibbles] */ | |||
__m128i first_len = _mm_shuffle_epi8 (first_len_tbl, high_nibbles); | |||
/* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */ | |||
/* range = first_range_tbl[high_nibbles] */ | |||
__m128i range = _mm_shuffle_epi8 (first_range_tbl, high_nibbles); | |||
/* Second Byte: set range index to first_len */ | |||
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */ | |||
/* range |= (first_len, prev_first_len) << 1 byte */ | |||
range = _mm_or_si128 ( | |||
range, _mm_alignr_epi8(first_len, prev_first_len, 15)); | |||
/* Third Byte: set range index to saturate_sub(first_len, 1) */ | |||
/* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */ | |||
__m128i tmp1, tmp2; | |||
/* tmp1 = saturate_sub(first_len, 1) */ | |||
tmp1 = _mm_subs_epu8 (first_len, _mm_set1_epi8 (1)); | |||
/* tmp2 = saturate_sub(prev_first_len, 1) */ | |||
tmp2 = _mm_subs_epu8 (prev_first_len, _mm_set1_epi8 (1)); | |||
/* range |= (tmp1, tmp2) << 2 bytes */ | |||
range = _mm_or_si128 (range, _mm_alignr_epi8(tmp1, tmp2, 14)); | |||
/* Fourth Byte: set range index to saturate_sub(first_len, 2) */ | |||
/* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */ | |||
/* tmp1 = saturate_sub(first_len, 2) */ | |||
tmp1 = _mm_subs_epu8 (first_len, _mm_set1_epi8 (2)); | |||
/* tmp2 = saturate_sub(prev_first_len, 2) */ | |||
tmp2 = _mm_subs_epu8 (prev_first_len, _mm_set1_epi8 (2)); | |||
/* range |= (tmp1, tmp2) << 3 bytes */ | |||
range = _mm_or_si128 (range, _mm_alignr_epi8(tmp1, tmp2, 13)); | |||
/* | |||
* Now we have below range indices caluclated | |||
* Correct cases: | |||
* - 8 for C0~FF | |||
* - 3 for 1st byte after F0~FF | |||
* - 2 for 1st byte after E0~EF or 2nd byte after F0~FF | |||
* - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or | |||
* 3rd byte after F0~FF | |||
* - 0 for others | |||
* Error cases: | |||
* 9,10,11 if non ascii First Byte overlaps | |||
* E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error | |||
*/ | |||
/* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */ | |||
/* Overlaps lead to index 9~15, which are illegal in range table */ | |||
__m128i shift1, pos, range2; | |||
/* shift1 = (input, prev_input) << 1 byte */ | |||
shift1 = _mm_alignr_epi8(input, prev_input, 15); | |||
pos = _mm_sub_epi8 (shift1, _mm_set1_epi8 (0xEF)); | |||
/* | |||
* shift1: | EF F0 ... FE | FF 00 ... ... DE | DF E0 ... EE | | |||
* pos: | 0 1 15 | 16 17 239| 240 241 255| | |||
* pos-240: | 0 0 0 | 0 0 0 | 0 1 15 | | |||
* pos+112: | 112 113 127| >= 128 | >= 128 | | |||
*/ | |||
tmp1 = _mm_subs_epu8 (pos, _mm_set1_epi8 ((char)240)); | |||
range2 = _mm_shuffle_epi8 (df_ee_tbl, tmp1); | |||
tmp2 = _mm_adds_epu8 (pos, _mm_set1_epi8 (112)); | |||
range2 = _mm_add_epi8 (range2, _mm_shuffle_epi8 (ef_fe_tbl, tmp2)); | |||
range = _mm_add_epi8 (range, range2); | |||
/* Load min and max values per calculated range index */ | |||
__m128i minv = _mm_shuffle_epi8 (range_min_tbl, range); | |||
__m128i maxv = _mm_shuffle_epi8 (range_max_tbl, range); | |||
/* Check value range */ | |||
error = _mm_cmplt_epi8(input, minv); | |||
error = _mm_or_si128(error, _mm_cmpgt_epi8(input, maxv)); | |||
/* 5% performance drop from this conditional branch */ | |||
if (!_mm_testz_si128(error, error)) { | |||
break; | |||
} | |||
prev_input = input; | |||
prev_first_len = first_len; | |||
data += 16; | |||
len -= 16; | |||
err_pos += 16; | |||
} | |||
/* Error in first 16 bytes */ | |||
if (err_pos == 1) { | |||
goto do_naive; | |||
} | |||
/* Find previous token (not 80~BF) */ | |||
int32_t token4 = _mm_extract_epi32 (prev_input, 3); | |||
const int8_t *token = (const int8_t *) &token4; | |||
int lookahead = 0; | |||
if (token[3] > (int8_t) 0xBF) { | |||
lookahead = 1; | |||
} | |||
else if (token[2] > (int8_t) 0xBF) { | |||
lookahead = 2; | |||
} | |||
else if (token[1] > (int8_t) 0xBF) { | |||
lookahead = 3; | |||
} | |||
data -= lookahead; | |||
len += lookahead; | |||
err_pos -= lookahead; | |||
} | |||
do_naive: | |||
if (len > 0) { | |||
off_t err_pos2 = rspamd_fast_utf8_validate_ref (data, len); | |||
if (err_pos2) { | |||
return err_pos + err_pos2 - 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
#ifndef __clang__ | |||
#pragma GCC pop_options | |||
#endif |
@@ -184,6 +184,7 @@ TARGET_LINK_LIBRARIES(rspamd-server rspamd-cdb) | |||
TARGET_LINK_LIBRARIES(rspamd-server rspamd-lpeg) | |||
TARGET_LINK_LIBRARIES(rspamd-server lcbtrie) | |||
TARGET_LINK_LIBRARIES(rspamd-server rspamd-zstd) | |||
TARGET_LINK_LIBRARIES(rspamd-server rspamd-fastutf8) | |||
IF (ENABLE_CLANG_PLUGIN MATCHES "ON") | |||
ADD_DEPENDENCIES(rspamd-server rspamd-clang) |
@@ -7,19 +7,19 @@ SET(BASE64SRC ${CMAKE_CURRENT_SOURCE_DIR}/base64/ref.c | |||
IF(HAVE_AVX2) | |||
SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx2.S) | |||
SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/avx2.c) | |||
MESSAGE(STATUS "AVX2 support is added") | |||
MESSAGE(STATUS "Cryptobox: AVX2 support is added (chacha20, avx2)") | |||
ENDIF(HAVE_AVX2) | |||
IF(HAVE_AVX) | |||
SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/avx.S) | |||
MESSAGE(STATUS "AVX support is added") | |||
MESSAGE(STATUS "Cryptobox: AVX support is added (chacha20)") | |||
ENDIF(HAVE_AVX) | |||
IF(HAVE_SSE2) | |||
SET(CHACHASRC ${CHACHASRC} ${CMAKE_CURRENT_SOURCE_DIR}/chacha20/sse2.S) | |||
MESSAGE(STATUS "SSE2 support is added") | |||
MESSAGE(STATUS "Cryptobox: SSE2 support is added (chacha20)") | |||
ENDIF(HAVE_SSE2) | |||
IF(HAVE_SSE42) | |||
SET(BASE64SRC ${BASE64SRC} ${CMAKE_CURRENT_SOURCE_DIR}/base64/sse42.c) | |||
MESSAGE(STATUS "SSE42 support is added") | |||
MESSAGE(STATUS "Cryptobox: SSE42 support is added (base64)") | |||
ENDIF(HAVE_SSE42) | |||
CONFIGURE_FILE(platform_config.h.in platform_config.h) |