blob: c25675e3e91cf526692cc3e359127f59ec789af0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
std::pair<const char *, char32_t *>
sse_convert_latin1_to_utf32(const char *buf, size_t len,
char32_t *utf32_output) {
const char *end = buf + len;
while (end - buf >= 16) {
// Load 16 Latin1 characters (16 bytes) into a 128-bit register
__m128i in = _mm_loadu_si128((__m128i *)buf);
// Shift input to process next 4 bytes
__m128i in_shifted1 = _mm_srli_si128(in, 4);
__m128i in_shifted2 = _mm_srli_si128(in, 8);
__m128i in_shifted3 = _mm_srli_si128(in, 12);
// expand 8-bit to 32-bit unit
__m128i out1 = _mm_cvtepu8_epi32(in);
__m128i out2 = _mm_cvtepu8_epi32(in_shifted1);
__m128i out3 = _mm_cvtepu8_epi32(in_shifted2);
__m128i out4 = _mm_cvtepu8_epi32(in_shifted3);
_mm_storeu_si128((__m128i *)utf32_output, out1);
_mm_storeu_si128((__m128i *)(utf32_output + 4), out2);
_mm_storeu_si128((__m128i *)(utf32_output + 8), out3);
_mm_storeu_si128((__m128i *)(utf32_output + 12), out4);
utf32_output += 16;
buf += 16;
}
return std::make_pair(buf, utf32_output);
}
|