You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

css_util.cxx 3.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. /*-
  2. * Copyright 2021 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "css_util.hxx"
  17. #include "css.hxx"
  18. #include <unicode/utf8.h>
  19. namespace rspamd::css {
  20. std::string_view unescape_css(rspamd_mempool_t *pool,
  21. const std::string_view &sv)
  22. {
  23. auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, sv.length()));
  24. auto *d = nspace;
  25. auto nleft = sv.length();
  26. enum {
  27. normal = 0,
  28. quoted,
  29. escape,
  30. skip_spaces,
  31. } state = normal;
  32. char quote_char, prev_c = 0;
  33. auto escape_offset = 0, i = 0;
  34. #define MAYBE_CONSUME_CHAR(c) \
  35. do { \
  36. if ((c) == '"' || (c) == '\'') { \
  37. state = quoted; \
  38. quote_char = (c); \
  39. nleft--; \
  40. *d++ = (c); \
  41. } \
  42. else if ((c) == '\\') { \
  43. escape_offset = i; \
  44. state = escape; \
  45. } \
  46. else { \
  47. state = normal; \
  48. nleft--; \
  49. *d++ = g_ascii_tolower(c); \
  50. } \
  51. } while (0)
  52. for (const auto c: sv) {
  53. if (nleft == 0) {
  54. msg_err_css("cannot unescape css: truncated buffer of size %d",
  55. (int) sv.length());
  56. break;
  57. }
  58. switch (state) {
  59. case normal:
  60. MAYBE_CONSUME_CHAR(c);
  61. break;
  62. case quoted:
  63. if (c == quote_char) {
  64. if (prev_c != '\\') {
  65. state = normal;
  66. }
  67. }
  68. prev_c = c;
  69. nleft--;
  70. *d++ = c;
  71. break;
  72. case escape:
  73. if (!g_ascii_isxdigit(c)) {
  74. if (i > escape_offset + 1) {
  75. /* Try to decode an escape */
  76. const auto *escape_start = &sv[escape_offset + 1];
  77. unsigned long val;
  78. if (!rspamd_xstrtoul(escape_start, i - escape_offset - 1, &val)) {
  79. msg_debug_css("invalid broken escape found at pos %d",
  80. escape_offset);
  81. }
  82. else {
  83. if (val < 0x80) {
  84. /* Trivial case: ascii character */
  85. *d++ = (unsigned char) g_ascii_tolower(val);
  86. nleft--;
  87. }
  88. else {
  89. UChar32 uc = val;
  90. auto off = 0;
  91. UTF8_APPEND_CHAR_SAFE((uint8_t *) d, off,
  92. sv.length(), u_tolower(uc));
  93. d += off;
  94. nleft -= off;
  95. }
  96. }
  97. }
  98. else {
  99. /* Empty escape, ignore it */
  100. msg_debug_css("invalid empty escape found at pos %d",
  101. escape_offset);
  102. }
  103. if (nleft <= 0) {
  104. msg_err_css("cannot unescape css: truncated buffer of size %d",
  105. (int) sv.length());
  106. }
  107. else {
  108. /* Escape is done, advance forward */
  109. if (g_ascii_isspace(c)) {
  110. state = skip_spaces;
  111. }
  112. else {
  113. MAYBE_CONSUME_CHAR(c);
  114. }
  115. }
  116. }
  117. break;
  118. case skip_spaces:
  119. if (!g_ascii_isspace(c)) {
  120. MAYBE_CONSUME_CHAR(c);
  121. }
  122. /* Ignore spaces */
  123. break;
  124. }
  125. i++;
  126. }
  127. return std::string_view{nspace, sv.size() - nleft};
  128. }
  129. }// namespace rspamd::css
  130. /* C API */
  131. const char *rspamd_css_unescape(rspamd_mempool_t *pool,
  132. const unsigned char *begin,
  133. gsize len,
  134. gsize *outlen)
  135. {
  136. auto sv = rspamd::css::unescape_css(pool, {(const char *) begin, len});
  137. const auto *v = sv.begin();
  138. if (outlen) {
  139. *outlen = sv.size();
  140. }
  141. return v;
  142. }