aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp
blob: a784d364e9d6c114d9373846a4ca846bc50fcfb0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
std::pair<const char *, char16_t *>
lasx_convert_latin1_to_utf16le(const char *buf, size_t len,
                               char16_t *utf16_output) {
  const char *end = buf + len;

  // Performance degradation when memory address is not 32-byte aligned
  while (((uint64_t)utf16_output & 0x1F) && buf < end) {
    *utf16_output++ = uint8_t(*buf) & 0xFF;
    buf++;
  }

  while (buf + 32 <= end) {
    __m256i in8 = __lasx_xvld(reinterpret_cast<const uint8_t *>(buf), 0);

    __m256i inlow = __lasx_vext2xv_hu_bu(in8);
    __m256i in8_high = __lasx_xvpermi_q(in8, in8, 0b00000001);
    __m256i inhigh = __lasx_vext2xv_hu_bu(in8_high);
    __lasx_xvst(inlow, reinterpret_cast<uint16_t *>(utf16_output), 0);
    __lasx_xvst(inhigh, reinterpret_cast<uint16_t *>(utf16_output), 32);

    utf16_output += 32;
    buf += 32;
  }

  if (buf + 16 <= end) {
    __m128i zero = __lsx_vldi(0);
    __m128i in8 = __lsx_vld(reinterpret_cast<const uint8_t *>(buf), 0);

    __m128i inlow = __lsx_vilvl_b(zero, in8);
    __m128i inhigh = __lsx_vilvh_b(zero, in8);
    __lsx_vst(inlow, reinterpret_cast<uint16_t *>(utf16_output), 0);
    __lsx_vst(inhigh, reinterpret_cast<uint16_t *>(utf16_output), 16);

    utf16_output += 16;
    buf += 16;
  }
  return std::make_pair(buf, utf16_output);
}

std::pair<const char *, char16_t *>
lasx_convert_latin1_to_utf16be(const char *buf, size_t len,
                               char16_t *utf16_output) {
  const char *end = buf + len;

  while (((uint64_t)utf16_output & 0x1F) && buf < end) {
    *utf16_output++ = (uint16_t(*buf++) << 8);
  }

  __m256i zero = __lasx_xvldi(0);
  while (buf + 32 <= end) {
    __m256i in8 = __lasx_xvld(reinterpret_cast<const uint8_t *>(buf), 0);

    __m256i in8_shuf = __lasx_xvpermi_d(in8, 0b11011000);

    __m256i inlow = __lasx_xvilvl_b(in8_shuf, zero);
    __m256i inhigh = __lasx_xvilvh_b(in8_shuf, zero);
    __lasx_xvst(inlow, reinterpret_cast<uint16_t *>(utf16_output), 0);
    __lasx_xvst(inhigh, reinterpret_cast<uint16_t *>(utf16_output), 32);
    utf16_output += 32;
    buf += 32;
  }

  if (buf + 16 <= end) {
    __m128i zero_128 = __lsx_vldi(0);
    __m128i in8 = __lsx_vld(reinterpret_cast<const uint8_t *>(buf), 0);

    __m128i inlow = __lsx_vilvl_b(in8, zero_128);
    __m128i inhigh = __lsx_vilvh_b(in8, zero_128);
    __lsx_vst(inlow, reinterpret_cast<uint16_t *>(utf16_output), 0);
    __lsx_vst(inhigh, reinterpret_cast<uint16_t *>(utf16_output), 16);
    utf16_output += 16;
    buf += 16;
  }

  return std::make_pair(buf, utf16_output);
}