You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

css_selector.cxx 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. /*-
  2. * Copyright 2021 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "css_selector.hxx"
  17. #include "css.hxx"
  18. #include "fmt/core.h"
  19. #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
  20. #include "doctest/doctest.h"
  21. namespace rspamd::css {
  22. auto process_selector_tokens(rspamd_mempool_t *pool,
  23. blocks_gen_functor &&next_token_functor)
  24. -> selectors_vec
  25. {
  26. selectors_vec ret;
  27. bool can_continue = true;
  28. enum class selector_process_state {
  29. selector_parse_start = 0,
  30. selector_expect_ident,
  31. selector_ident_consumed,
  32. selector_ignore_attribute,
  33. selector_ignore_function,
  34. selector_ignore_combination
  35. } state = selector_process_state::selector_parse_start;
  36. std::unique_ptr<css_selector> cur_selector;
  37. while (can_continue) {
  38. const auto &next_tok = next_token_functor();
  39. if (next_tok.tag == css_consumed_block::parser_tag_type::css_component) {
  40. const auto &parser_tok = next_tok.get_token_or_empty();
  41. if (state == selector_process_state::selector_parse_start) {
  42. /*
  43. * At the beginning of the parsing we can expect either
  44. * delim or an ident, everything else is discarded for now
  45. */
  46. msg_debug_css("start consume selector");
  47. switch (parser_tok.type) {
  48. case css_parser_token::token_type::delim_token: {
  49. auto delim_c = parser_tok.get_delim();
  50. if (delim_c == '.') {
  51. cur_selector = std::make_unique<css_selector>(
  52. css_selector::selector_type::SELECTOR_CLASS);
  53. state = selector_process_state::selector_expect_ident;
  54. }
  55. else if (delim_c == '#') {
  56. cur_selector = std::make_unique<css_selector>(
  57. css_selector::selector_type::SELECTOR_ID);
  58. state = selector_process_state::selector_expect_ident;
  59. }
  60. else if (delim_c == '*') {
  61. cur_selector = std::make_unique<css_selector>(
  62. css_selector::selector_type::SELECTOR_ALL);
  63. state = selector_process_state::selector_ident_consumed;
  64. }
  65. break;
  66. }
  67. case css_parser_token::token_type::ident_token:
  68. cur_selector = std::make_unique<css_selector>(
  69. css_selector::selector_type::SELECTOR_ELEMENT);
  70. cur_selector->value = parser_tok.get_string_or_default("");
  71. state = selector_process_state::selector_ident_consumed;
  72. break;
  73. case css_parser_token::token_type::hash_token:
  74. cur_selector = std::make_unique<css_selector>(
  75. css_selector::selector_type::SELECTOR_ID);
  76. cur_selector->value =
  77. parser_tok.get_string_or_default("").substr(1);
  78. state = selector_process_state::selector_ident_consumed;
  79. break;
  80. default:
  81. msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected start",
  82. next_tok.token_type_str());
  83. can_continue = false;
  84. break;
  85. }
  86. }
  87. else if (state == selector_process_state::selector_expect_ident) {
  88. /*
  89. * We got something like a selector start, so we expect
  90. * a plain ident
  91. */
  92. if (parser_tok.type == css_parser_token::token_type::ident_token && cur_selector) {
  93. cur_selector->value = parser_tok.get_string_or_default("");
  94. state = selector_process_state::selector_ident_consumed;
  95. }
  96. else {
  97. msg_debug_css("cannot consume more of a selector, invalid parser token: %s; expected ident",
  98. next_tok.token_type_str());
  99. can_continue = false;
  100. }
  101. }
  102. else if (state == selector_process_state::selector_ident_consumed) {
  103. if (parser_tok.type == css_parser_token::token_type::comma_token) {
  104. /* Got full selector, attach it to the vector and go further */
  105. msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
  106. ret.push_back(std::move(cur_selector));
  107. state = selector_process_state::selector_parse_start;
  108. }
  109. else if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
  110. /* TODO: implement adjustments */
  111. state = selector_process_state::selector_ignore_function;
  112. }
  113. else if (parser_tok.type == css_parser_token::token_type::osqbrace_token) {
  114. /* TODO: implement attributes checks */
  115. state = selector_process_state::selector_ignore_attribute;
  116. }
  117. else {
  118. /* TODO: implement selectors combinations */
  119. state = selector_process_state::selector_ignore_combination;
  120. }
  121. }
  122. else {
  123. /* Ignore state; ignore all till ',' token or eof token */
  124. if (parser_tok.type == css_parser_token::token_type::comma_token) {
  125. /* Got full selector, attach it to the vector and go further */
  126. ret.push_back(std::move(cur_selector));
  127. state = selector_process_state::selector_parse_start;
  128. }
  129. else {
  130. auto debug_str = parser_tok.get_string_or_default("");
  131. msg_debug_css("ignore token %*s", (int)debug_str.size(),
  132. debug_str.data());
  133. }
  134. }
  135. }
  136. else {
  137. /* End of parsing */
  138. if (state == selector_process_state::selector_ident_consumed && cur_selector) {
  139. msg_debug_css("attached selector: %s", cur_selector->debug_str().c_str());
  140. ret.push_back(std::move(cur_selector));
  141. }
  142. can_continue = false;
  143. }
  144. }
  145. return ret; /* copy elision */
  146. }
  147. auto
  148. css_selector::debug_str() const -> std::string
  149. {
  150. std::string ret;
  151. if (type == selector_type::SELECTOR_ID) {
  152. ret += "#";
  153. }
  154. else if (type == selector_type::SELECTOR_CLASS) {
  155. ret += ".";
  156. }
  157. else if (type == selector_type::SELECTOR_ALL) {
  158. ret = "*";
  159. return ret;
  160. }
  161. std::visit([&](auto arg) -> void {
  162. using T = std::decay_t<decltype(arg)>;
  163. if constexpr (std::is_same_v<T, tag_id_t>) {
  164. ret += fmt::format("tag: {}", static_cast<int>(arg));
  165. }
  166. else {
  167. ret += arg;
  168. }
  169. }, value);
  170. return ret;
  171. }
  172. TEST_SUITE("css selectors") {
  173. TEST_CASE("simple css selectors") {
  174. const std::vector<std::pair<const char *, std::vector<css_selector::selector_type>>> cases{
  175. {"em", {css_selector::selector_type::SELECTOR_ELEMENT}},
  176. {"*", {css_selector::selector_type::SELECTOR_ALL}},
  177. {".class", {css_selector::selector_type::SELECTOR_CLASS}},
  178. {"#id", {css_selector::selector_type::SELECTOR_ID}},
  179. {"em,.class,#id", {css_selector::selector_type::SELECTOR_ELEMENT,
  180. css_selector::selector_type::SELECTOR_CLASS,
  181. css_selector::selector_type::SELECTOR_ID}},
  182. };
  183. auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
  184. "css", 0);
  185. for (const auto &c : cases) {
  186. auto res = process_selector_tokens(pool,
  187. get_selectors_parser_functor(pool, c.first));
  188. CHECK(c.second.size() == res.size());
  189. for (auto i = 0; i < c.second.size(); i ++) {
  190. CHECK(res[i]->type == c.second[i]);
  191. }
  192. }
  193. rspamd_mempool_delete(pool);
  194. }
  195. }
  196. }