You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

conversion.cxx 2.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. #include <algorithm>
  2. #include <string>
  3. #include <cstring>
  4. #include <cctype>
  5. #include <clocale>
  6. #include "unicode/utf8.h"
  7. #include "conversion.hxx"
  8. #ifdef _WIN32
  9. #define strdup _strdup
  10. #endif
  11. using namespace std;
  12. namespace replxx {
  13. namespace locale {
  14. void to_lower( std::string& s_ ) {
  15. transform( s_.begin(), s_.end(), s_.begin(), static_cast<int(*)(int)>( &tolower ) );
  16. }
  17. bool is_8bit_encoding( void ) {
  18. bool is8BitEncoding( false );
  19. string origLC( setlocale( LC_CTYPE, nullptr ) );
  20. string lc( origLC );
  21. to_lower( lc );
  22. if ( lc == "c" ) {
  23. setlocale( LC_CTYPE, "" );
  24. }
  25. lc = setlocale( LC_CTYPE, nullptr );
  26. setlocale( LC_CTYPE, origLC.c_str() );
  27. to_lower( lc );
  28. if ( lc.find( "8859" ) != std::string::npos ) {
  29. is8BitEncoding = true;
  30. }
  31. return ( is8BitEncoding );
  32. }
  33. bool is8BitEncoding( is_8bit_encoding() );
  34. }
  35. ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char* src) {
  36. ConversionResult res = ConversionResult::conversionOK;
  37. if ( ! locale::is8BitEncoding ) {
  38. auto sourceStart = reinterpret_cast<const unsigned char*>(src);
  39. auto slen = strlen(src);
  40. auto targetStart = reinterpret_cast<UChar32*>(dst);
  41. int i = 0, j = 0;
  42. while (i < slen && j < dstSize) {
  43. UChar32 uc;
  44. auto prev_i = i;
  45. U8_NEXT (sourceStart, i, slen, uc);
  46. if (uc <= 0) {
  47. if (U8_IS_LEAD (sourceStart[prev_i])) {
  48. auto lead_byte = sourceStart[prev_i];
  49. auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+
  50. ((uint8_t)(lead_byte)>=0xe0)+
  51. ((uint8_t)(lead_byte)>=0xf0));
  52. if (trailing_bytes + i > slen) {
  53. return ConversionResult::sourceExhausted;
  54. }
  55. }
  56. /* Replace with 0xFFFD */
  57. uc = 0x0000FFFD;
  58. }
  59. targetStart[j++] = uc;
  60. }
  61. dstCount = j;
  62. if (j < dstSize) {
  63. targetStart[j] = 0;
  64. }
  65. } else {
  66. for ( dstCount = 0; ( dstCount < dstSize ) && src[dstCount]; ++ dstCount ) {
  67. dst[dstCount] = src[dstCount];
  68. }
  69. }
  70. return res;
  71. }
  72. ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char8_t* src) {
  73. return copyString8to32(
  74. dst, dstSize, dstCount, reinterpret_cast<const char*>(src)
  75. );
  76. }
  77. int copyString32to8(
  78. char* dst, int dstSize, const char32_t* src, int srcSize
  79. ) {
  80. int resCount = 0;
  81. if ( ! locale::is8BitEncoding ) {
  82. int j = 0;
  83. UBool is_error = 0;
  84. for (auto i = 0; i < srcSize; i ++) {
  85. U8_APPEND ((uint8_t *)dst, j, dstSize, src[i], is_error);
  86. if (is_error) {
  87. break;
  88. }
  89. }
  90. if (!is_error) {
  91. resCount = j;
  92. if (j < dstSize) {
  93. dst[j] = '\0';
  94. }
  95. }
  96. } else {
  97. int i( 0 );
  98. for ( i = 0; ( i < dstSize ) && ( i < srcSize ) && src[i]; ++ i ) {
  99. dst[i] = static_cast<char>( src[i] );
  100. }
  101. resCount = i;
  102. if ( i < dstSize ) {
  103. dst[i] = 0;
  104. }
  105. }
  106. return resCount;
  107. }
  108. }