You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

css_selector.cxx 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*-
  2. * Copyright 2021 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "css_selector.hxx"
  17. #include "css.hxx"
  18. #include "libserver/html/html.hxx"
  19. #include "fmt/core.h"
  20. #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
  21. #include "doctest/doctest.h"
  22. namespace rspamd::css {
  23. auto process_selector_tokens(rspamd_mempool_t *pool,
  24. blocks_gen_functor &&next_token_functor)
  25. -> selectors_vec
  26. {
  27. selectors_vec ret;
  28. bool can_continue = true;
  29. enum class selector_process_state {
  30. selector_parse_start = 0,
  31. selector_expect_ident,
  32. selector_ident_consumed,
  33. selector_ignore_attribute,
  34. selector_ignore_function,
  35. selector_ignore_combination
  36. } state = selector_process_state::selector_parse_start;
  37. std::unique_ptr<css_selector> cur_selector;
  38. while (can_continue) {
  39. const auto &next_tok = next_token_functor();
  40. if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
  41. const auto &parser_tok = next_tok.get_token_or_empty();
  42. if (state == selector_process_state::selector_parse_start) {
  43. /*
  44. * At the beginning of the parsing we can expect either
  45. * delim or an ident, everything else is discarded for now
  46. */
  47. msg_debug_css("start consume selector");
  48. switch (parser_tok.type) {
  49. case css_parser_token::token_type::delim_token: {
  50. auto delim_c = parser_tok.get_delim();
  51. if (delim_c == '.') {
  52. cur_selector = std::make_unique<css_selector>(
  53. css_selector::selector_type::SELECTOR_CLASS);
  54. state = selector_process_state::selector_expect_ident;
  55. }
  56. else if (delim_c == '#') {
  57. cur_selector = std::make_unique<css_selector>(
  58. css_selector::selector_type::SELECTOR_ID);
  59. state = selector_process_state::selector_expect_ident;
  60. }
  61. else if (delim_c == '*') {
  62. cur_selector = std::make_unique<css_selector>(
  63. css_selector::selector_type::SELECTOR_ALL);
  64. state = selector_process_state::selector_ident_consumed;
  65. }
  66. break;
  67. }
  68. case css_parser_token::token_type::ident_token: {
  69. auto tag_id = html::html_tag_by_name(parser_tok.get_string_or_default(""));
  70. if (tag_id) {
  71. cur_selector = std::make_unique<css_selector>(tag_id.value());
  72. }
  73. state = selector_process_state::selector_ident_consumed;
  74. break;
  75. }
  76. case css_parser_token::token_type::hash_token:
  77. cur_selector = std::make_unique<css_selector>(
  78. css_selector::selector_type::SELECTOR_ID);
  79. cur_selector->value =
  80. parser_tok.get_string_or_default("");
  81. state = selector_process_state::selector_ident_consumed;
  82. break;
  83. default:
  84. msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected start",
  85. next_tok.token_type_str());
  86. can_continue = false;
  87. break;
  88. }
  89. }
  90. else if (state == selector_process_state::selector_expect_ident) {
  91. /*
  92. * We got something like a selector start, so we expect
  93. * a plain ident
  94. */
  95. if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
  96. cur_selector->value = parser_tok.get_string_or_default("");
  97. state = selector_process_state::selector_ident_consumed;
  98. }
  99. else {
  100. msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected ident",
  101. next_tok.token_type_str());
  102. can_continue = false;
  103. }
  104. }
  105. else if (state == selector_process_state::selector_ident_consumed) {
  106. if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
  107. /* Got full selector, attach it to the vector and go further */
  108. msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
  109. ret.push_back(std::move(cur_selector));
  110. state = selector_process_state::selector_parse_start;
  111. }
  112. else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
  113. /* TODO: implement adjustments */
  114. state = selector_process_state::selector_ignore_function;
  115. }
  116. else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
  117. /* TODO: implement attributes checks */
  118. state = selector_process_state::selector_ignore_attribute;
  119. }
  120. else {
  121. /* TODO: implement selectors combinations */
  122. state = selector_process_state::selector_ignore_combination;
  123. }
  124. }
  125. else {
  126. /* Ignore state; ignore all till ',' token or eof token */
  127. if (parser_tok.type == css_parser_token::token_type::comma_token && cur_selector) {
  128. /* Got full selector, attach it to the vector and go further */
  129. ret.push_back(std::move(cur_selector));
  130. state = selector_process_state::selector_parse_start;
  131. }
  132. else {
  133. auto debug_str = parser_tok.get_string_or_default("");
  134. msg_debug_css("ignore token %*s", (int) debug_str.size(),
  135. debug_str.data());
  136. }
  137. }
  138. }
  139. else {
  140. /* End of parsing */
  141. if (state == selector_process_state::selector_ident_consumed && cur_selector) {
  142. msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
  143. ret.push_back(std::move(cur_selector));
  144. }
  145. else {
  146. msg_debug_css("not attached selector, state: %d", static_cast<int>(state));
  147. }
  148. can_continue = false;
  149. }
  150. }
  151. return ret; /* copy elision */
  152. }
  153. auto css_selector::debug_str() const -> std::string
  154. {
  155. std::string ret;
  156. if (type == selector_type::SELECTOR_ID) {
  157. ret += "#";
  158. }
  159. else if (type == selector_type::SELECTOR_CLASS) {
  160. ret += ".";
  161. }
  162. else if (type == selector_type::SELECTOR_ALL) {
  163. ret = "*";
  164. return ret;
  165. }
  166. std::visit([&](auto arg) -> void {
  167. using T = std::decay_t<decltype(arg)>;
  168. if constexpr (std::is_same_v<T, tag_id_t>) {
  169. ret += fmt::format("tag: {}", static_cast<int>(arg));
  170. }
  171. else {
  172. ret += arg;
  173. }
  174. },
  175. value);
  176. return ret;
  177. }
  178. TEST_SUITE("css")
  179. {
  180. TEST_CASE("simple css selectors")
  181. {
  182. const std::vector<std::pair<const char *, std::vector<css_selector::selector_type>>> cases{
  183. {"em", {css_selector::selector_type::SELECTOR_TAG}},
  184. {"*", {css_selector::selector_type::SELECTOR_ALL}},
  185. {".class", {css_selector::selector_type::SELECTOR_CLASS}},
  186. {"#id", {css_selector::selector_type::SELECTOR_ID}},
  187. {"em,.class,#id", {css_selector::selector_type::SELECTOR_TAG, css_selector::selector_type::SELECTOR_CLASS, css_selector::selector_type::SELECTOR_ID}},
  188. };
  189. auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
  190. "css", 0);
  191. for (const auto &c: cases) {
  192. auto res = process_selector_tokens(pool,
  193. get_selectors_parser_functor(pool, c.first));
  194. CHECK(c.second.size() == res.size());
  195. for (auto i = 0; i < c.second.size(); i++) {
  196. CHECK(res[i]->type == c.second[i]);
  197. }
  198. }
  199. rspamd_mempool_delete(pool);
  200. }
  201. }
  202. }// namespace rspamd::css