diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-07-23 17:13:36 +0100 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2016-07-23 17:14:45 +0100 |
commit | f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08 (patch) | |
tree | c18b5037178bc3f9b0de97f2df35d3507497a7b0 /src/ragel | |
parent | c9733c168687f1b0bf843adbfdcd3a1d586f099b (diff) | |
download | rspamd-f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08.tar.gz rspamd-f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08.zip |
[Feature] Create a dedicated parser to strip newlines
Issue: #744
Diffstat (limited to 'src/ragel')
-rw-r--r-- | src/ragel/newlines_strip.rl | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/src/ragel/newlines_strip.rl b/src/ragel/newlines_strip.rl new file mode 100644 index 000000000..a2f5620bd --- /dev/null +++ b/src/ragel/newlines_strip.rl @@ -0,0 +1,82 @@ +%%{ + machine newlines_strip; + + action Double_CRLF { + if (!crlf_added) { + g_byte_array_append (data, (const guint8 *)"\n", 1); + c = p; + } + + crlf_added = TRUE; + c = p; + } + + action WSP { + g_byte_array_append (data, (const guint8 *)" ", 1); + c = p; + } + + action Text_Start { + crlf_added = FALSE; + c = p; + } + + action Text_End { + if (p > c) { + g_byte_array_append (data, (const guint8 *)c, p - c); + last_c = *(p - 1); + } + + c = p; + } + + action Line_CRLF { + if (!crlf_added) { + if (is_html || g_ascii_ispunct (last_c)) { + g_byte_array_append (data, (const guint8 *)"\n", 1); + crlf_added = TRUE; + } + } + + (*newlines_count)++; + g_ptr_array_add (newlines, (gpointer)p); + c = p; + } + + + WSP = " " | "\t" | "\v"; + CRLF = ("\r" . "\n") | ( "\r" ) | ("\n"); + DOUBLE_CRLF = (CRLF <: (WSP* CRLF)+) %Double_CRLF; + ANY_CRLF = CRLF | DOUBLE_CRLF; + LINE_ELT = ((WSP+ %WSP)** :> ((^space)+) >Text_Start %Text_End <: (WSP+ %WSP)**); + LINE = LINE_ELT+; + TEXT = ANY_CRLF** . (LINE <: ANY_CRLF %Line_CRLF)+ | LINE | ANY_CRLF %Line_CRLF; + + main := TEXT; +}%% + +#include <glib.h> + +%% write data; + +void +rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe, + GByteArray *data, gboolean is_html, guint *newlines_count, + GPtrArray *newlines) +{ + const gchar *c, *p, *eof; + gint last_c = -1; + gint cs = 0; + gboolean crlf_added = FALSE; + + c = begin; + p = begin; + eof = pe; + + %% write init; + %% write exec; + + if (p > c) { + g_byte_array_append (data, (const guint8 *)c, p - c); + } +} |