#ifndef SIMDUTF_UTF16_TO_LATIN1_H #define SIMDUTF_UTF16_TO_LATIN1_H namespace simdutf { namespace scalar { namespace { namespace utf16_to_latin1 { #include // for std::memcpy template inline size_t convert(const char16_t *buf, size_t len, char *latin_output) { if (len == 0) { return 0; } const uint16_t *data = reinterpret_cast(buf); size_t pos = 0; char *current_write = latin_output; uint16_t word = 0; uint16_t too_large = 0; while (pos < len) { word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; too_large |= word; *current_write++ = char(word & 0xFF); pos++; } if ((too_large & 0xFF00) != 0) { return 0; } return current_write - latin_output; } template inline result convert_with_errors(const char16_t *buf, size_t len, char *latin_output) { if (len == 0) { return result(error_code::SUCCESS, 0); } const uint16_t *data = reinterpret_cast(buf); size_t pos = 0; char *start{latin_output}; uint16_t word; while (pos < len) { if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that // they are Latin1 uint64_t v1, v2, v3, v4; ::memcpy(&v1, data + pos, sizeof(uint64_t)); ::memcpy(&v2, data + pos + 4, sizeof(uint64_t)); ::memcpy(&v3, data + pos + 8, sizeof(uint64_t)); ::memcpy(&v4, data + pos + 12, sizeof(uint64_t)); if (!match_system(big_endian)) { v1 = (v1 >> 8) | (v1 << (64 - 8)); } if (!match_system(big_endian)) { v2 = (v2 >> 8) | (v2 << (64 - 8)); } if (!match_system(big_endian)) { v3 = (v3 >> 8) | (v3 << (64 - 8)); } if (!match_system(big_endian)) { v4 = (v4 >> 8) | (v4 << (64 - 8)); } if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) { size_t final_pos = pos + 16; while (pos < final_pos) { *latin_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(data[pos])) : char(data[pos]); pos++; } continue; } } word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos]; if ((word & 0xFF00) == 0) { *latin_output++ = char(word & 0xFF); pos++; } else { return result(error_code::TOO_LARGE, pos); } } return result(error_code::SUCCESS, latin_output - start); } } // namespace utf16_to_latin1 } // unnamed namespace } // namespace scalar } // namespace simdutf #endif