aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp
blob: 718b1140d4b9264329bcfcf10ac274459e0a165f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/*
 * reads a vector of uint16 values
 * bits after 11th are ignored
 * first 11 bits are encoded into utf8
 * !important! utf8_output must have at least 16 writable bytes
 */

inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output,
                                       const __m128i one_byte_bytemask,
                                       const uint16_t one_byte_bitmask) {
  // 0b1100_0000_1000_0000
  const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080);
  // 0b0001_1111_0000_0000
  const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
  // 0b0000_0000_0011_1111
  const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);

  // 1. prepare 2-byte values
  // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
  // expected output   : [110a|aaaa|10bb|bbbb] x 8

  // t0 = [000a|aaaa|bbbb|bb00]
  const __m128i t0 = _mm_slli_epi16(v_u16, 2);
  // t1 = [000a|aaaa|0000|0000]
  const __m128i t1 = _mm_and_si128(t0, v_1f00);
  // t2 = [0000|0000|00bb|bbbb]
  const __m128i t2 = _mm_and_si128(v_u16, v_003f);
  // t3 = [000a|aaaa|00bb|bbbb]
  const __m128i t3 = _mm_or_si128(t1, t2);
  // t4 = [110a|aaaa|10bb|bbbb]
  const __m128i t4 = _mm_or_si128(t3, v_c080);

  // 2. merge ASCII and 2-byte codewords
  const __m128i utf8_unpacked = _mm_blendv_epi8(t4, v_u16, one_byte_bytemask);

  // 3. prepare bitmask for 8-bit lookup
  //    one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a
  //    - LSB)
  const uint16_t m0 = one_byte_bitmask & 0x5555;      // m0 = 0h0g0f0e0d0c0b0a
  const uint16_t m1 = static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
  const uint8_t m2 = static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
  // 4. pack the bytes
  const uint8_t *row =
      &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
  const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1));
  const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);

  // 5. store bytes
  _mm_storeu_si128((__m128i *)utf8_output, utf8_packed);

  // 6. adjust pointers
  utf8_output += row[0];
}

inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output,
                                       const __m128i v_0000,
                                       const __m128i v_ff80) {
  // no bits set above 7th bit
  const __m128i one_byte_bytemask =
      _mm_cmpeq_epi16(_mm_and_si128(v_u16, v_ff80), v_0000);
  const uint16_t one_byte_bitmask =
      static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));

  write_v_u16_11bits_to_utf8(v_u16, utf8_output, one_byte_bytemask,
                             one_byte_bitmask);
}