You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

punycode.c 6.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. /*
  2. * Copyright (c) 2014, Vsevolod Stakhov
  3. * Copyright (c) 2004, 2006, 2007, 2008 Kungliga Tekniska Högskolan
  4. * (Royal Institute of Technology, Stockholm, Sweden).
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. *
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. *
  18. * 3. Neither the name of the Institute nor the names of its contributors
  19. * may be used to endorse or promote products derived from this software
  20. * without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
  23. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
  26. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32. * SUCH DAMAGE.
  33. */
  34. #include "dns_private.h"
  35. static const unsigned event_loop = 36;
  36. static const unsigned t_min = 1;
  37. static const unsigned t_max = 26;
  38. static const unsigned skew = 38;
  39. static const unsigned damp = 700;
  40. static const unsigned initial_n = 128;
  41. static const unsigned initial_bias = 72;
  42. /* Punycode utility */
  43. static unsigned int
  44. digit (unsigned n)
  45. {
  46. static const char ascii[] = "abcdefghijklmnopqrstuvwxyz0123456789";
  47. return ascii[n];
  48. }
  49. static unsigned int
  50. adapt (unsigned int delta, unsigned int numpoints, int first)
  51. {
  52. unsigned int k;
  53. if (first) {
  54. delta = delta / damp;
  55. }
  56. else {
  57. delta /= 2;
  58. }
  59. delta += delta / numpoints;
  60. k = 0;
  61. while (delta > ((event_loop - t_min) * t_max) / 2) {
  62. delta /= event_loop - t_min;
  63. k += event_loop;
  64. }
  65. return k + (((event_loop - t_min + 1) * delta) / (delta + skew));
  66. }
  67. /**
  68. * Convert an UCS4 string to a puny-coded DNS label string suitable
  69. * when combined with delimiters and other labels for DNS lookup.
  70. *
  71. * @param in an UCS4 string to convert
  72. * @param in_len the length of in.
  73. * @param out the resulting puny-coded string. The string is not NULL
  74. * terminated.
  75. * @param out_len before processing out_len should be the length of
  76. * the out variable, after processing it will be the length of the out
  77. * string.
  78. *
  79. * @return returns 0 on success, an wind error code otherwise
  80. */
  81. bool
  82. rdns_punycode_label_toascii (const uint32_t *in, size_t in_len, char *out,
  83. size_t *out_len)
  84. {
  85. unsigned int n = initial_n;
  86. unsigned int delta = 0;
  87. unsigned int bias = initial_bias;
  88. unsigned int h = 0;
  89. unsigned int b;
  90. unsigned int i;
  91. unsigned int o = 0;
  92. unsigned int m;
  93. for (i = 0; i < in_len; ++i) {
  94. if (in[i] < 0x80) {
  95. ++h;
  96. if (o >= *out_len) {
  97. return false;
  98. }
  99. out[o++] = in[i];
  100. }
  101. }
  102. b = h;
  103. if (b > 0) {
  104. if (o >= *out_len) {
  105. return false;
  106. }
  107. out[o++] = 0x2D;
  108. }
  109. /* is this string punycoded */
  110. if (h < in_len) {
  111. if (o + 4 >= *out_len) {
  112. return false;
  113. }
  114. memmove (out + 4, out, o);
  115. memcpy (out, "xn--", 4);
  116. o += 4;
  117. }
  118. while (h < in_len) {
  119. m = (unsigned int) -1;
  120. for (i = 0; i < in_len; ++i) {
  121. if (in[i] < m && in[i] >= n) {
  122. m = in[i];
  123. }
  124. }
  125. delta += (m - n) * (h + 1);
  126. n = m;
  127. for (i = 0; i < in_len; ++i) {
  128. if (in[i] < n) {
  129. ++delta;
  130. }
  131. else if (in[i] == n) {
  132. unsigned int q = delta;
  133. unsigned int k;
  134. for (k = event_loop;; k += event_loop) {
  135. unsigned int t;
  136. if (k <= bias) {
  137. t = t_min;
  138. }
  139. else if (k >= bias + t_max) {
  140. t = t_max;
  141. }
  142. else {
  143. t = k - bias;
  144. }
  145. if (q < t) {
  146. break;
  147. }
  148. if (o >= *out_len) {
  149. return -1;
  150. }
  151. out[o++] = digit (t + ((q - t) % (event_loop - t)));
  152. q = (q - t) / (event_loop - t);
  153. }
  154. if (o >= *out_len) {
  155. return -1;
  156. }
  157. out[o++] = digit (q);
  158. /* output */
  159. bias = adapt (delta, h + 1, h == b);
  160. delta = 0;
  161. ++h;
  162. }
  163. }
  164. ++delta;
  165. ++n;
  166. }
  167. *out_len = o;
  168. return true;
  169. }
  170. static int
  171. utf8toutf32 (const unsigned char **pp, uint32_t *out, size_t *remain)
  172. {
  173. const unsigned char *p = *pp;
  174. unsigned c = *p;
  175. size_t reduce;
  176. if (c & 0x80) {
  177. if ((c & 0xE0) == 0xC0 && *remain >= 2) {
  178. const unsigned c2 = *++p;
  179. reduce = 2;
  180. if ((c2 & 0xC0) == 0x80) {
  181. *out = ((c & 0x1F) << 6) | (c2 & 0x3F);
  182. }
  183. else {
  184. return -1;
  185. }
  186. }
  187. else if ((c & 0xF0) == 0xE0 && *remain >= 3) {
  188. const unsigned c2 = *++p;
  189. if ((c2 & 0xC0) == 0x80) {
  190. const unsigned c3 = *++p;
  191. reduce = 3;
  192. if ((c3 & 0xC0) == 0x80) {
  193. *out = ((c & 0x0F) << 12) | ((c2 & 0x3F) << 6)
  194. | (c3 & 0x3F);
  195. }
  196. else {
  197. return -1;
  198. }
  199. }
  200. else {
  201. return -1;
  202. }
  203. }
  204. else if ((c & 0xF8) == 0xF0 && *remain >= 4) {
  205. const unsigned c2 = *++p;
  206. if ((c2 & 0xC0) == 0x80) {
  207. const unsigned c3 = *++p;
  208. if ((c3 & 0xC0) == 0x80) {
  209. const unsigned c4 = *++p;
  210. reduce = 4;
  211. if ((c4 & 0xC0) == 0x80) {
  212. *out = ((c & 0x07) << 18) | ((c2 & 0x3F) << 12)
  213. | ((c3 & 0x3F) << 6) | (c4 & 0x3F);
  214. }
  215. else {
  216. return -1;
  217. }
  218. }
  219. else {
  220. return -1;
  221. }
  222. }
  223. else {
  224. return -1;
  225. }
  226. }
  227. else {
  228. return -1;
  229. }
  230. }
  231. else {
  232. *out = c;
  233. reduce = 1;
  234. }
  235. *pp = ++p;
  236. *remain -= reduce;
  237. return 0;
  238. }
  239. /**
  240. * Convert an UTF-8 string to an UCS4 string.
  241. *
  242. * @param in an UTF-8 string to convert.
  243. * @param out the resulting UCS4 string
  244. * @param out_len before processing out_len should be the length of
  245. * the out variable, after processing it will be the length of the out
  246. * string.
  247. *
  248. * @return returns 0 on success, an -1 otherwise
  249. * @ingroup wind
  250. */
  251. int
  252. rdns_utf8_to_ucs4 (const char *in, size_t in_len, uint32_t **out, size_t *out_len)
  253. {
  254. const unsigned char *p;
  255. size_t remain = in_len, olen = 0;
  256. int ret;
  257. uint32_t *res;
  258. p = (const unsigned char *)in;
  259. while (remain > 0) {
  260. uint32_t u;
  261. ret = utf8toutf32 (&p, &u, &remain);
  262. if (ret != 0) {
  263. return ret;
  264. }
  265. olen ++;
  266. }
  267. res = malloc (olen * sizeof (uint32_t));
  268. if (res == NULL) {
  269. return -1;
  270. }
  271. p = (const unsigned char *)in;
  272. remain = in_len;
  273. olen = 0;
  274. while (remain > 0) {
  275. uint32_t u;
  276. (void)utf8toutf32 (&p, &u, &remain);
  277. res[olen++] = u;
  278. }
  279. *out_len = olen;
  280. *out = res;
  281. return 0;
  282. }