You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

css_tokeniser.hxx 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*-
  2. * Copyright 2021 Vsevolod Stakhov
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #ifndef RSPAMD_CSS_TOKENISER_HXX
  18. #define RSPAMD_CSS_TOKENISER_HXX
  19. #include <string_view>
  20. #include <utility>
  21. #include <variant>
  22. #include <list>
  23. #include <functional>
  24. #include "mem_pool.h"
  25. namespace rspamd::css {
  26. struct css_parser_token_placeholder {}; /* For empty tokens */
  27. struct css_parser_token {
  28. enum class token_type : std::uint8_t {
  29. whitespace_token,
  30. ident_token,
  31. function_token,
  32. at_keyword_token,
  33. hash_token,
  34. string_token,
  35. number_token,
  36. url_token,
  37. cdo_token, /* xml open comment */
  38. cdc_token, /* xml close comment */
  39. delim_token,
  40. obrace_token, /* ( */
  41. ebrace_token, /* ) */
  42. osqbrace_token, /* [ */
  43. esqbrace_token, /* ] */
  44. ocurlbrace_token, /* { */
  45. ecurlbrace_token, /* } */
  46. comma_token,
  47. colon_token,
  48. semicolon_token,
  49. eof_token,
  50. };
  51. enum class dim_type : std::uint8_t {
  52. dim_px = 0,
  53. dim_em,
  54. dim_rem,
  55. dim_ex,
  56. dim_wv,
  57. dim_wh,
  58. dim_vmax,
  59. dim_vmin,
  60. dim_pt,
  61. dim_cm,
  62. dim_mm,
  63. dim_in,
  64. dim_pc,
  65. dim_max,
  66. };
  67. static const std::uint8_t default_flags = 0;
  68. static const std::uint8_t flag_bad_string = (1u << 0u);
  69. static const std::uint8_t number_dimension = (1u << 1u);
  70. static const std::uint8_t number_percent = (1u << 2u);
  71. static const std::uint8_t flag_bad_dimension = (1u << 3u);
  72. using value_type = std::variant<std::string_view, /* For strings and string like tokens */
  73. char, /* For delimiters (might need to move to unicode point) */
  74. double, /* For numeric stuff */
  75. css_parser_token_placeholder /* For general no token stuff */
  76. >;
  77. /* Typed storage */
  78. value_type value;
  79. int lineno;
  80. token_type type;
  81. std::uint8_t flags = default_flags;
  82. dim_type dimension_type;
  83. css_parser_token() = delete;
  84. explicit css_parser_token(token_type type, const value_type &value) :
  85. value(value), type(type) {}
  86. css_parser_token(css_parser_token &&other) = default;
  87. auto operator=(css_parser_token &&other) -> css_parser_token& = default;
  88. auto adjust_dim(const css_parser_token &dim_token) -> bool;
  89. auto get_string_or_default(const std::string_view &def) const -> std::string_view {
  90. if (std::holds_alternative<std::string_view>(value)) {
  91. return std::get<std::string_view>(value);
  92. }
  93. else if (std::holds_alternative<char>(value)) {
  94. return std::string_view(&std::get<char>(value), 1);
  95. }
  96. return def;
  97. }
  98. auto get_delim() const -> char {
  99. if (std::holds_alternative<char>(value)) {
  100. return std::get<char>(value);
  101. }
  102. return (char)-1;
  103. }
  104. auto get_number_or_default(double def) const -> double {
  105. if (std::holds_alternative<double>(value)) {
  106. auto dbl = std::get<double>(value);
  107. if (flags & css_parser_token::number_percent) {
  108. dbl /= 100.0;
  109. }
  110. return dbl;
  111. }
  112. return def;
  113. }
  114. auto get_normal_number_or_default(double def) const -> double {
  115. if (std::holds_alternative<double>(value)) {
  116. auto dbl = std::get<double>(value);
  117. if (flags & css_parser_token::number_percent) {
  118. dbl /= 100.0;
  119. }
  120. if (dbl < 0) {
  121. return 0.0;
  122. }
  123. else if (dbl > 1.0) {
  124. return 1.0;
  125. }
  126. return dbl;
  127. }
  128. return def;
  129. }
  130. /* Debugging routines */
  131. constexpr auto get_token_type() -> const char *;
  132. /* This function might be slow */
  133. auto debug_token_str() -> std::string;
  134. };
  135. static auto css_parser_eof_token(void) -> const css_parser_token & {
  136. static css_parser_token eof_tok {
  137. css_parser_token::token_type::eof_token,
  138. css_parser_token_placeholder()
  139. };
  140. return eof_tok;
  141. }
  142. /* Ensure that parser tokens are simple enough */
  143. /*
  144. * compiler must implement P0602 "variant and optional should propagate copy/move triviality"
  145. * This is broken on gcc < 8!
  146. */
  147. static_assert(std::is_trivially_copyable_v<css_parser_token>);
  148. class css_tokeniser {
  149. public:
  150. css_tokeniser() = delete;
  151. css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv) :
  152. input(sv), offset(0), pool(pool) {}
  153. auto next_token(void) -> struct css_parser_token;
  154. auto get_offset(void) const { return offset; }
  155. auto pushback_token(struct css_parser_token &&t) const -> void {
  156. backlog.push_back(std::forward<css_parser_token>(t));
  157. }
  158. private:
  159. std::string_view input;
  160. std::size_t offset;
  161. rspamd_mempool_t *pool;
  162. mutable std::list<css_parser_token> backlog;
  163. auto consume_number() -> struct css_parser_token;
  164. auto consume_ident() -> struct css_parser_token;
  165. };
  166. }
  167. #endif //RSPAMD_CSS_TOKENISER_HXX