Browse Source

[Project] Css: Projected a parser

tags/3.0
Vsevolod Stakhov 3 years ago
parent
commit
ab34f88895

+ 135
- 9
src/libserver/css/css_parser.cxx View File

@@ -16,11 +16,64 @@

#include "css_parser.hxx"
#include "css_tokeniser.hxx"
#include <vector>
#include <unicode/utf8.h>


namespace rspamd::css {

/*
* Represents a consumed token by a parser
*/
struct css_consumed_block {
enum class parser_tag_type : std::uint8_t {
css_top_block,
css_qualified_rule,
css_at_rule,
css_simple_block,
css_function,
css_component
};

using consumed_block_ptr = std::unique_ptr<css_consumed_block>;

parser_tag_type tag;
std::variant<std::monostate,
std::vector<consumed_block_ptr>,
css_parser_token> content;

css_consumed_block() = delete;

css_consumed_block(parser_tag_type tag) : tag(tag) {
if (tag == parser_tag_type::css_top_block ||
tag == parser_tag_type::css_qualified_rule ||
tag == parser_tag_type::css_simple_block) {
/* Pre-allocate content for known vector blocks */
content = std::vector<consumed_block_ptr>(4);
}
}
/* Construct a block from a single lexer token (for trivial blocks) */
explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) :
tag(tag), content(std::move(tok)) {}

/* Attach a new block to the compound block, consuming block inside */
auto attach_block(consumed_block_ptr &&block) -> bool {
if (content.index() == 0) {
/* Switch from monostate */
content = std::vector<consumed_block_ptr>(1);
}
else if (content.index() == 2) {
/* A single component, cannot attach a block ! */
return false;
}

std::get<std::vector<consumed_block_ptr>>(content)
.push_back(std::move(block));

return true;
}
};

class css_parser {
public:
css_parser(void) = delete; /* Require mempool to be set for logging */
@@ -31,10 +84,10 @@ public:
auto get_object_maybe(void) -> tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error> {
if (state == parser_state::parse_done) {
state = parser_state::initial_state;
return std::move (style_object);
return std::move(style_object);
}

return tl::make_unexpected (error);
return tl::make_unexpected(error);
}

private:
@@ -93,17 +146,90 @@ bool css_parser::consume_input(const std::string_view &sv)
bool eof = false;
css_tokeniser css_tokeniser(pool, sv);

while (!eof) {
auto consumed_blocks = std::make_unique<css_consumed_block>(
css_consumed_block::parser_tag_type::css_top_block);
auto rec_level = 0;
const auto max_rec = 20;

auto component_value_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {

if (++rec_level > max_rec) {
error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
return false;
}

auto next_token = css_tokeniser.next_token();

/* Top level parser */
switch (next_token.type) {
case css_parser_token::token_type::eof_token:
eof = true;

}

--rec_level;

return true;
};

auto qualified_rule_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
if (++rec_level > max_rec) {
msg_err_css("max nesting reached, ignore style");
error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
return false;
}

auto ret = true;
auto block = std::make_unique<css_consumed_block>(
css_consumed_block::parser_tag_type::css_qualified_rule);

while (ret && !eof) {
auto &&next_token = css_tokeniser.next_token();
switch (next_token.type) {
case css_parser_token::token_type::eof_token:
eof = true;
break;
case css_parser_token::token_type::ident_token:
case css_parser_token::token_type::hash_token:
/* Consume allowed complex tokens as a rule preamble */
ret = component_value_consumer(block);
break;
case css_parser_token::token_type::cdo_token:
case css_parser_token::token_type::cdc_token:
if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
/* Ignore */
ret = true;
}
else {

}
break;
};
}

if (ret) {
if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
top->attach_block(std::move(block));
}
}

--rec_level;

return ret;
};

auto get_parser_consumer = [&]() -> auto {
switch (state) {
case parser_state::initial_state:
/* Top level qualified parser */
return qualified_rule_consumer;
break;
default:
/* Ignore tokens */
msg_debug_css("got token: %s", next_token.debug_token_str().c_str());
}
};

while (!eof) {
/* Get a token and a consumer lambda for the current parser state */

auto consumer = get_parser_consumer();

if (!consumer(consumed_blocks)) {
break;
}
}

+ 1
- 0
src/libserver/css/css_tokeniser.hxx View File

@@ -90,6 +90,7 @@ struct css_parser_token {
css_parser_token() = delete;
explicit css_parser_token(token_type type, const value_type &value) :
value(value), type(type) {}
css_parser_token(css_parser_token &&other) = default;
auto adjust_dim(const css_parser_token &dim_token) -> bool;

/* Debugging routines */

+ 1
- 0
src/libserver/css/parse_error.hxx View File

@@ -30,6 +30,7 @@ namespace rspamd::css {
enum class css_parse_error_type {
PARSE_ERROR_UNKNOWN_OPTION,
PARSE_ERROR_INVALID_SYNTAX,
PARSE_ERROR_BAD_NESTING,
PARSE_ERROR_NYI,
PARSE_ERROR_UNKNOWN_ERROR,
};

Loading…
Cancel
Save