aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/replxx/src/conversion.cxx
blob: f629f910e4f5a68177ca7253f7bb7c55afb2c000 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include <algorithm>
#include <string>
#include <cstring>
#include <cctype>
#include <clocale>

#include "unicode/utf8.h"
#include "conversion.hxx"

#ifdef _WIN32
#define strdup _strdup
#endif

using namespace std;

namespace replxx {

namespace locale {

void to_lower( std::string& s_ ) {
	transform( s_.begin(), s_.end(), s_.begin(), static_cast<int(*)(int)>( &tolower ) );
}

bool is_8bit_encoding( void ) {
	bool is8BitEncoding( false );
	string origLC( setlocale( LC_CTYPE, nullptr ) );
	string lc( origLC );
	to_lower( lc );
	if ( lc == "c" ) {
		setlocale( LC_CTYPE, "" );
	}
	lc = setlocale( LC_CTYPE, nullptr );
	setlocale( LC_CTYPE, origLC.c_str() );
	to_lower( lc );
	if ( lc.find( "8859" ) != std::string::npos ) {
		is8BitEncoding = true;
	}
	return ( is8BitEncoding );
}

bool is8BitEncoding( is_8bit_encoding() );

}

ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char* src) {
	ConversionResult res = ConversionResult::conversionOK;
	if ( ! locale::is8BitEncoding ) {
		auto sourceStart = reinterpret_cast<const unsigned char*>(src);
		auto slen = strlen(src);
		auto targetStart = reinterpret_cast<UChar32*>(dst);
		int i = 0, j = 0;

		while (i < slen && j < dstSize) {
			UChar32 uc;
			auto prev_i = i;
			U8_NEXT (sourceStart, i, slen, uc);

			if (uc <= 0) {
				if (U8_IS_LEAD (sourceStart[prev_i])) {
					auto lead_byte = sourceStart[prev_i];
					auto trailing_bytes = (((uint8_t)(lead_byte)>=0xc2)+
										   ((uint8_t)(lead_byte)>=0xe0)+
										   ((uint8_t)(lead_byte)>=0xf0));

					if (trailing_bytes + i > slen) {
						return ConversionResult::sourceExhausted;
					}
				}

				/* Replace with 0xFFFD */
				uc = 0x0000FFFD;
			}
			targetStart[j++] = uc;
		}

		dstCount = j;

		if (j < dstSize) {
			targetStart[j] = 0;
		}
	} else {
		for ( dstCount = 0; ( dstCount < dstSize ) && src[dstCount]; ++ dstCount ) {
			dst[dstCount] = src[dstCount];
		}
	}
	return res;
}

ConversionResult copyString8to32(char32_t* dst, int dstSize, int& dstCount, const char8_t* src) {
	return copyString8to32(
			dst, dstSize, dstCount, reinterpret_cast<const char*>(src)
	);
}

int copyString32to8(
		char* dst, int dstSize, const char32_t* src, int srcSize
) {
	int resCount = 0;

	if ( ! locale::is8BitEncoding ) {
		int j = 0;
		UBool is_error = 0;

		for (auto i = 0; i < srcSize; i ++) {
			U8_APPEND ((uint8_t *)dst, j, dstSize, src[i], is_error);

			if (is_error) {
				break;
			}
		}

		if (!is_error) {
			resCount = j;

			if (j < dstSize) {
				dst[j] = '\0';
			}
		}
	} else {
		int i( 0 );
		for ( i = 0; ( i < dstSize ) && ( i < srcSize ) && src[i]; ++ i ) {
			dst[i] = static_cast<char>( src[i] );
		}
		resCount = i;
		if ( i < dstSize ) {
			dst[i] = 0;
		}
	}

	return resCount;
}

}