aboutsummaryrefslogtreecommitdiffstats
path: root/src/ragel
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@highsecure.ru>2016-07-23 17:13:36 +0100
committerVsevolod Stakhov <vsevolod@highsecure.ru>2016-07-23 17:14:45 +0100
commitf5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08 (patch)
treec18b5037178bc3f9b0de97f2df35d3507497a7b0 /src/ragel
parentc9733c168687f1b0bf843adbfdcd3a1d586f099b (diff)
downloadrspamd-f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08.tar.gz
rspamd-f5dcf4b8a4a6a9881d95e4d4b1edd4c27c077d08.zip
[Feature] Create a dedicated parser to strip newlines
Issue: #744
Diffstat (limited to 'src/ragel')
-rw-r--r--src/ragel/newlines_strip.rl82
1 files changed, 82 insertions, 0 deletions
diff --git a/src/ragel/newlines_strip.rl b/src/ragel/newlines_strip.rl
new file mode 100644
index 000000000..a2f5620bd
--- /dev/null
+++ b/src/ragel/newlines_strip.rl
@@ -0,0 +1,82 @@
+%%{
+ machine newlines_strip;
+
+ action Double_CRLF {
+ if (!crlf_added) {
+ g_byte_array_append (data, (const guint8 *)"\n", 1);
+ c = p;
+ }
+
+ crlf_added = TRUE;
+ c = p;
+ }
+
+ action WSP {
+ g_byte_array_append (data, (const guint8 *)" ", 1);
+ c = p;
+ }
+
+ action Text_Start {
+ crlf_added = FALSE;
+ c = p;
+ }
+
+ action Text_End {
+ if (p > c) {
+ g_byte_array_append (data, (const guint8 *)c, p - c);
+ last_c = *(p - 1);
+ }
+
+ c = p;
+ }
+
+ action Line_CRLF {
+ if (!crlf_added) {
+ if (is_html || g_ascii_ispunct (last_c)) {
+ g_byte_array_append (data, (const guint8 *)"\n", 1);
+ crlf_added = TRUE;
+ }
+ }
+
+ (*newlines_count)++;
+ g_ptr_array_add (newlines, (gpointer)p);
+ c = p;
+ }
+
+
+ WSP = " " | "\t" | "\v";
+ CRLF = ("\r" . "\n") | ( "\r" ) | ("\n");
+ DOUBLE_CRLF = (CRLF <: (WSP* CRLF)+) %Double_CRLF;
+ ANY_CRLF = CRLF | DOUBLE_CRLF;
+ LINE_ELT = ((WSP+ %WSP)** :> ((^space)+) >Text_Start %Text_End <: (WSP+ %WSP)**);
+ LINE = LINE_ELT+;
+ TEXT = ANY_CRLF** . (LINE <: ANY_CRLF %Line_CRLF)+ | LINE | ANY_CRLF %Line_CRLF;
+
+ main := TEXT;
+}%%
+
+#include <glib.h>
+
+%% write data;
+
+void
+rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
+ GByteArray *data, gboolean is_html, guint *newlines_count,
+ GPtrArray *newlines)
+{
+ const gchar *c, *p, *eof;
+ gint last_c = -1;
+ gint cs = 0;
+ gboolean crlf_added = FALSE;
+
+ c = begin;
+ p = begin;
+ eof = pe;
+
+ %% write init;
+ %% write exec;
+
+ if (p > c) {
+ g_byte_array_append (data, (const guint8 *)c, p - c);
+ }
+}