aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp
blob: 1e7e4296e7a6a3b12891dea583f0e3df8e2bd4d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// file included directly
size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len,
                                       char *latin1_output) {
  const char32_t *end = buf + len;
  __m512i v_0xFF = _mm512_set1_epi32(0xff);
  __m512i shufmask = _mm512_set_epi8(
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60,
      56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0);
  while (end - buf >= 16) {
    __m512i in = _mm512_loadu_si512((__m512i *)buf);
    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
      return 0;
    }
    _mm_storeu_si128(
        (__m128i *)latin1_output,
        _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
    latin1_output += 16;
    buf += 16;
  }
  if (buf < end) {
    uint16_t mask = uint16_t((1 << (end - buf)) - 1);
    __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
      return 0;
    }
    _mm_mask_storeu_epi8(
        latin1_output, mask,
        _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
  }
  return len;
}

std::pair<result, char *>
icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
                                            char *latin1_output) {
  const char32_t *end = buf + len;
  const char32_t *start = buf;
  __m512i v_0xFF = _mm512_set1_epi32(0xff);
  __m512i shufmask = _mm512_set_epi8(
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60,
      56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0);
  while (end - buf >= 16) {
    __m512i in = _mm512_loadu_si512((__m512i *)buf);
    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
      while (uint32_t(*buf) <= 0xff) {
        *latin1_output++ = uint8_t(*buf++);
      }
      return std::make_pair(result(error_code::TOO_LARGE, buf - start),
                            latin1_output);
    }
    _mm_storeu_si128(
        (__m128i *)latin1_output,
        _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
    latin1_output += 16;
    buf += 16;
  }
  if (buf < end) {
    uint16_t mask = uint16_t((1 << (end - buf)) - 1);
    __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
      while (uint32_t(*buf) <= 0xff) {
        *latin1_output++ = uint8_t(*buf++);
      }
      return std::make_pair(result(error_code::TOO_LARGE, buf - start),
                            latin1_output);
    }
    _mm_mask_storeu_epi8(
        latin1_output, mask,
        _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
  }
  return std::make_pair(result(error_code::SUCCESS, len), latin1_output);
}