Browse Source

[Project] Add some methods for css parser

tags/3.0
Vsevolod Stakhov 3 years ago
parent
commit
427f887936

+ 1
- 0
src/libserver/css/CMakeLists.txt View File

@@ -14,6 +14,7 @@ SET(LIBCSSSRC "${CMAKE_CURRENT_SOURCE_DIR}/css.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_property.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_value.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_selector.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_parser.cxx"
"${RAGEL_ragel_css_selector_parser_OUTPUTS}"
"${RAGEL_ragel_css_rule_parser_OUTPUTS}"
PARENT_SCOPE)

+ 2
- 0
src/libserver/css/css.cxx View File

@@ -29,6 +29,8 @@ rspamd_css_parse_style (const guchar *begin, gsize len, GError **err)

namespace rspamd::css {

INIT_LOG_MODULE_PUBLIC(css);

class css_style_sheet::impl {

};

+ 4
- 1
src/libserver/css/css.h View File

@@ -18,13 +18,16 @@
#define RSPAMD_CSS_H

#include "config.h"
#include "mem_pool.h"

#ifdef __cplusplus
extern "C" {
#endif
typedef void * rspamd_css;

rspamd_css rspamd_css_parse_style (const guchar *begin, gsize len, GError **err);
rspamd_css rspamd_css_parse_style (rspamd_mempool_t *pool,
const guchar *begin,
gsize len, GError **err);
#ifdef __cplusplus
}
#endif

+ 12
- 0
src/libserver/css/css.hxx View File

@@ -18,9 +18,21 @@

#include <string>
#include <memory>
#include "logger.h"

namespace rspamd::css {

extern unsigned int rspamd_css_log_id;

#define msg_debug_css(...) rspamd_conditional_debug_fast (NULL, NULL, \
rspamd_css_log_id, "css", pool->tag.uid, \
G_STRFUNC, \
__VA_ARGS__)
#define msg_err_css(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
"css", pool->tag.uid, \
G_STRFUNC, \
__VA_ARGS__)

class css_style_sheet {
public:
css_style_sheet();

+ 238
- 0
src/libserver/css/css_parser.cxx View File

@@ -0,0 +1,238 @@
/*-
* Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "css_parser.hxx"
#include <unicode/utf8.h>


namespace rspamd::css {

class css_parser {
public:
css_parser(void) = delete; /* Require mempool to be set for logging */
explicit css_parser(rspamd_mempool_t *pool) : pool (pool) {}

bool consume_input(const std::string_view &sv);

auto get_object_maybe(void) -> tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error> {
if (state == parser_state::parse_done) {
state = parser_state::initial_state;
return std::move (style_object);
}

return tl::make_unexpected (error);
}

private:
enum class parser_state {
initial_state,
skip_spaces,
parse_selector,
ignore_selector, /* e.g. media or namespace */
parse_done,
};
parser_state state = parser_state::initial_state;
std::unique_ptr<css_style_sheet> style_object;
css_parse_error error;
rspamd_mempool_t *pool;

/* Helper parser methods */
bool need_unescape(const std::string_view &sv);

std::string_view unescape_css(const std::string_view &sv);
};

/*
* Find if we need to unescape css
*/
bool
css_parser::need_unescape(const std::string_view &sv)
{
bool in_quote = false;
char quote_char, prev_c = 0;

for (const auto c : sv) {
if (!in_quote) {
if (c == '"' || c == '\'') {
in_quote = true;
quote_char = c;
}
else if (c == '\\') {
return true;
}
}
else {
if (c == quote_char) {
if (prev_c != '\\') {
in_quote = false;
}
}
prev_c = c;
}
}

return false;
}

/*
* Unescape css escapes
* \20AC : must be followed by a space if the next character is one of a-f, A-F, 0-9
* \0020AC : must be 6 digits long, no space needed (but can be included)
*/
std::string_view
css_parser::unescape_css(const std::string_view &sv)
{
auto *nspace = reinterpret_cast<char *>(rspamd_mempool_alloc(pool, sv.length ()));
auto *d = nspace;
auto nleft = sv.length ();

enum {
normal = 0,
quoted,
escape,
skip_spaces,
} state = normal;

char quote_char, prev_c = 0;
auto escape_offset = 0, i = 0;

#define MAYBE_CONSUME_CHAR(c) do { \
if (c == '"' || c == '\'') { \
state = quoted; \
quote_char = c; \
nleft--; \
*d++ = c; \
} \
else if (c == '\\') { \
escape_offset = i; \
state = escape; \
} \
else { \
state = normal; \
nleft--; \
*d++ = c; \
} \
} while (0)

for (const auto c : sv) {
if (nleft == 0) {
msg_err_css("cannot unescape css: truncated buffer of size %d",
(int)sv.length());
break;
}
switch (state) {
case normal:
MAYBE_CONSUME_CHAR(c);
break;
case quoted:
if (c == quote_char) {
if (prev_c != '\\') {
state = normal;
}
}
prev_c = c;
nleft --;
*d++ = c;
break;
case escape:
if (!g_ascii_isxdigit(c)) {
if (i > escape_offset + 1) {
/* Try to decode an escape */
const auto *escape_start = &sv[escape_offset + 1];
unsigned long val;

if (!rspamd_xstrtoul (escape_start, i - escape_offset - 1, &val)) {
msg_debug_css("invalid broken escape found at pos %d",
escape_offset);
}
else {
if (val < 0x1f) {
/* Trivial case: ascii character */
*d++ = (unsigned char)val;
nleft --;
}
else {
UChar32 uc = val;
auto off = d - nspace;
UTF8_APPEND_CHAR_SAFE((uint8_t *) d, off,
sv.length (), uc);
d = nspace + off;
nleft = sv.length () - off;
}
}
}
else {
/* Empty escape, ignore it */
msg_debug_css("invalid empty escape found at pos %d",
escape_offset);
}

if (nleft > 0) {
msg_err_css("cannot unescape css: truncated buffer of size %d",
(int)sv.length());
}
else {
/* Escape is done, advance forward */
if (g_ascii_isspace (c)) {
state = skip_spaces;
}
else {
MAYBE_CONSUME_CHAR(c);
}
}
}
break;
case skip_spaces:
if (!g_ascii_isspace(c)) {
MAYBE_CONSUME_CHAR(c);
}
/* Ignore spaces */
break;
}

i ++;
}

return std::string_view{nspace, sv.size() - nleft};
};

bool css_parser::consume_input(const std::string_view &sv)
{
auto our_sv = sv;

if (need_unescape(sv)) {
our_sv = unescape_css(sv);
msg_debug_css("unescaped css: input size %d, unescaped size %d",
(int)sv.size(), (int)our_sv.size());
}

return true;
}

/*
* Wrapper for the parser
*/
auto parse_css(rspamd_mempool_t *pool, const std::string_view &st) ->
tl::expected<std::unique_ptr<css_style_sheet>,css_parse_error>
{
css_parser parser(pool);

parser.consume_input(st);

return parser.get_object_maybe();
}

}

+ 34
- 0
src/libserver/css/css_parser.hxx View File

@@ -0,0 +1,34 @@
/*-
* Copyright 2021 Vsevolod Stakhov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef RSPAMD_CSS_PARSER_HXX
#define RSPAMD_CSS_PARSER_HXX

#include "css.hxx"
#include "parse_error.hxx"
#include "contrib/expected/expected.hpp"
#include "logger.h"

namespace rspamd::css {

INIT_LOG_MODULE(chartable)

auto parse_css (rspamd_mempool_t *pool, const std::string_view &st) ->
tl::expected<std::unique_ptr<css_style_sheet>,css_parse_error>;

}

#endif //RSPAMD_CSS_PARSER_HXX

+ 2
- 1
src/libserver/css/parse_error.hxx View File

@@ -34,13 +34,14 @@ enum class css_parse_error_type {
};

struct css_parse_error {
css_parse_error_type type;
css_parse_error_type type = css_parse_error_type::PARSE_ERROR_UNKNOWN_ERROR;
std::optional<std::string> description;

explicit css_parse_error (css_parse_error_type type, const std::string &description) :
type(type), description(description) {}
explicit css_parse_error (css_parse_error_type type) :
type(type) {}
css_parse_error() = default;
};

}

Loading…
Cancel
Save