1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
const char32_t *lasx_validate_utf32le(const char32_t *input, size_t size) {
const char32_t *end = input + size;
// Performance degradation when memory address is not 32-byte aligned
while (((uint64_t)input & 0x1F) && input < end) {
uint32_t word = *input++;
if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) {
return nullptr;
}
}
__m256i offset = __lasx_xvreplgr2vr_w(uint32_t(0xffff2000));
__m256i standardoffsetmax = __lasx_xvreplgr2vr_w(uint32_t(0xfffff7ff));
__m256i standardmax = __lasx_xvldi(-2288); /*0x10ffff*/
__m256i currentmax = __lasx_xvldi(0x0);
__m256i currentoffsetmax = __lasx_xvldi(0x0);
while (input + 8 < end) {
__m256i in = __lasx_xvld(reinterpret_cast<const uint32_t *>(input), 0);
currentmax = __lasx_xvmax_wu(in, currentmax);
// 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF
currentoffsetmax =
__lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax);
input += 8;
}
__m256i is_zero =
__lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax);
if (__lasx_xbnz_v(is_zero)) {
return nullptr;
}
is_zero = __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax),
standardoffsetmax);
if (__lasx_xbnz_v(is_zero)) {
return nullptr;
}
return input;
}
const result lasx_validate_utf32le_with_errors(const char32_t *input,
size_t size) {
const char32_t *start = input;
const char32_t *end = input + size;
// Performance degradation when memory address is not 32-byte aligned
while (((uint64_t)input & 0x1F) && input < end) {
uint32_t word = *input;
if (word > 0x10FFFF) {
return result(error_code::TOO_LARGE, input - start);
}
if (word >= 0xD800 && word <= 0xDFFF) {
return result(error_code::SURROGATE, input - start);
}
input++;
}
__m256i offset = __lasx_xvreplgr2vr_w(uint32_t(0xffff2000));
__m256i standardoffsetmax = __lasx_xvreplgr2vr_w(uint32_t(0xfffff7ff));
__m256i standardmax = __lasx_xvldi(-2288); /*0x10ffff*/
__m256i currentmax = __lasx_xvldi(0x0);
__m256i currentoffsetmax = __lasx_xvldi(0x0);
while (input + 8 < end) {
__m256i in = __lasx_xvld(reinterpret_cast<const uint32_t *>(input), 0);
currentmax = __lasx_xvmax_wu(in, currentmax);
currentoffsetmax =
__lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax);
__m256i is_zero =
__lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax);
if (__lasx_xbnz_v(is_zero)) {
return result(error_code::TOO_LARGE, input - start);
}
is_zero =
__lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax),
standardoffsetmax);
if (__lasx_xbnz_v(is_zero)) {
return result(error_code::SURROGATE, input - start);
}
input += 8;
}
return result(error_code::SUCCESS, input - start);
}
|