aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/simdutf/src/scalar/utf32_to_latin1/utf32_to_latin1.h
blob: f09bc9d1d9a7ec4896fddde799d91f621698c5c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#ifndef SIMDUTF_UTF32_TO_LATIN1_H
#define SIMDUTF_UTF32_TO_LATIN1_H

namespace simdutf {
namespace scalar {
namespace {
namespace utf32_to_latin1 {

inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) {
  const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
  char *start = latin1_output;
  uint32_t utf32_char;
  size_t pos = 0;
  uint32_t too_large = 0;

  while (pos < len) {
    utf32_char = (uint32_t)data[pos];
    too_large |= utf32_char;
    *latin1_output++ = (char)(utf32_char & 0xFF);
    pos++;
  }
  if ((too_large & 0xFFFFFF00) != 0) {
    return 0;
  }
  return latin1_output - start;
}

inline result convert_with_errors(const char32_t *buf, size_t len,
                                  char *latin1_output) {
  const uint32_t *data = reinterpret_cast<const uint32_t *>(buf);
  char *start{latin1_output};
  size_t pos = 0;
  while (pos < len) {
    if (pos + 2 <=
        len) { // if it is safe to read 8 more bytes, check that they are Latin1
      uint64_t v;
      ::memcpy(&v, data + pos, sizeof(uint64_t));
      if ((v & 0xFFFFFF00FFFFFF00) == 0) {
        *latin1_output++ = char(buf[pos]);
        *latin1_output++ = char(buf[pos + 1]);
        pos += 2;
        continue;
      }
    }
    uint32_t utf32_char = data[pos];
    if ((utf32_char & 0xFFFFFF00) ==
        0) { // Check if the character can be represented in Latin-1
      *latin1_output++ = (char)(utf32_char & 0xFF);
      pos++;
    } else {
      return result(error_code::TOO_LARGE, pos);
    };
  }
  return result(error_code::SUCCESS, latin1_output - start);
}

} // namespace utf32_to_latin1
} // unnamed namespace
} // namespace scalar
} // namespace simdutf

#endif