1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
// file included directly
simdutf_really_inline __m512i check_special_cases(__m512i input,
const __m512i prev1) {
__m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080,
0x0202020202020202, 0x4915012180808080,
0x0202020202020202, 0x4915012180808080,
0x0202020202020202, 0x4915012180808080);
const __m512i v_0f = _mm512_set1_epi8(0x0f);
__m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f);
__m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1);
__m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb,
0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb,
0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb,
0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb);
__m512i index2 = _mm512_and_si512(prev1, v_0f);
__m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2);
__m512i mask3 =
_mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101,
0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6,
0x101010101010101, 0x1010101babaaee6);
__m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f);
__m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3);
return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128);
}
simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input,
const __m512i prev_input,
const __m512i sc) {
__m512i prev2 = prev<2>(input, prev_input);
__m512i prev3 = prev<3>(input, prev_input);
__m512i is_third_byte = _mm512_subs_epu8(
prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0
__m512i is_fourth_byte = _mm512_subs_epu8(
prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0
__m512i is_third_or_fourth_byte =
_mm512_or_si512(is_third_byte, is_fourth_byte);
const __m512i v_7f = _mm512_set1_epi8(char(0x7f));
is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte);
// We want to compute (is_third_or_fourth_byte AND v80) XOR sc.
const __m512i v_80 = _mm512_set1_epi8(char(0x80));
return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc,
0b1101010);
//__m512i is_third_or_fourth_byte_mask =
//_mm512_and_si512(is_third_or_fourth_byte, v_80); return
// _mm512_xor_si512(is_third_or_fourth_byte_mask, sc);
}
//
// Return nonzero if there are incomplete multibyte characters at the end of the
// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end.
//
simdutf_really_inline __m512i is_incomplete(const __m512i input) {
// If the previous input's last 3 bytes match this, they're too short (they
// ended at EOF):
// ... 1111____ 111_____ 11______
__m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff,
0xffffffffffffffff, 0xffffffffffffffff,
0xffffffffffffffff, 0xffffffffffffffff,
0xffffffffffffffff, 0xbfdfefffffffffff);
return _mm512_subs_epu8(input, max_value);
}
struct avx512_utf8_checker {
// If this is nonzero, there has been a UTF-8 error.
__m512i error{};
// The last input we received
__m512i prev_input_block{};
// Whether the last input we received was incomplete (used for ASCII fast
// path)
__m512i prev_incomplete{};
//
// Check whether the current bytes are valid UTF-8.
//
simdutf_really_inline void check_utf8_bytes(const __m512i input,
const __m512i prev_input) {
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
// lead bytes (2, 3, 4-byte leads become large positive numbers instead of
// small negative numbers)
__m512i prev1 = prev<1>(input, prev_input);
__m512i sc = check_special_cases(input, prev1);
this->error = _mm512_or_si512(
check_multibyte_lengths(input, prev_input, sc), this->error);
}
// The only problem that can happen at EOF is that a multibyte character is
// too short or a byte value too large in the last bytes: check_special_cases
// only checks for bytes too large in the first of two bytes.
simdutf_really_inline void check_eof() {
// If the previous block had incomplete UTF-8 characters at the end, an
// ASCII block can't possibly finish them.
this->error = _mm512_or_si512(this->error, this->prev_incomplete);
}
// returns true if ASCII.
simdutf_really_inline bool check_next_input(const __m512i input) {
const __m512i v_80 = _mm512_set1_epi8(char(0x80));
const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80);
if (ascii == 0) {
this->error = _mm512_or_si512(this->error, this->prev_incomplete);
return true;
} else {
this->check_utf8_bytes(input, this->prev_input_block);
this->prev_incomplete = is_incomplete(input);
this->prev_input_block = input;
return false;
}
}
// do not forget to call check_eof!
simdutf_really_inline bool errors() const {
return _mm512_test_epi8_mask(this->error, this->error) != 0;
}
}; // struct avx512_utf8_checker
|