std::pair<const char *, char32_t *> arm_convert_latin1_to_utf32(const char *buf, size_t len, char32_t *utf32_output) { const char *end = buf + len; while (end - buf >= 16) { uint8x16_t in8 = vld1q_u8(reinterpret_cast<const uint8_t *>(buf)); uint16x8_t in8low = vmovl_u8(vget_low_u8(in8)); uint32x4_t in16lowlow = vmovl_u16(vget_low_u16(in8low)); uint32x4_t in16lowhigh = vmovl_u16(vget_high_u16(in8low)); uint16x8_t in8high = vmovl_u8(vget_high_u8(in8)); uint32x4_t in8highlow = vmovl_u16(vget_low_u16(in8high)); uint32x4_t in8highhigh = vmovl_u16(vget_high_u16(in8high)); vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output), in16lowlow); vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output + 4), in16lowhigh); vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output + 8), in8highlow); vst1q_u32(reinterpret_cast<uint32_t *>(utf32_output + 12), in8highhigh); utf32_output += 16; buf += 16; } return std::make_pair(buf, utf32_output); }