]> source.dussan.org Git - rspamd.git/commitdiff
[Fix] Html: Fix processing of fjlig entity
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 14 Jun 2019 10:50:41 +0000 (11:50 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 14 Jun 2019 10:50:41 +0000 (11:50 +0100)
TODO: Make a generic fix

src/libserver/html.c
src/libserver/html_entities.h
test/lua/unit/html.lua

index fcc6e568463cad70a9301e9a661ef5da51296be9..ca4ad5d38b35d3f3f74e151fa5c183fb8f920a14 100644 (file)
@@ -222,14 +222,16 @@ rspamd_html_library_init (void)
                                G_N_ELEMENTS (entities_defs));
 
                for (i = 0; i < G_N_ELEMENTS (entities_defs); i++) {
-                       k = kh_put (entity_by_number, html_entity_by_number,
-                                       entities_defs[i].code, &rc);
-                       kh_val (html_entity_by_number, k) = entities_defs[i].replacement;
+                       if (entities_defs[i].code != 0) {
+                               k = kh_put (entity_by_number, html_entity_by_number,
+                                               entities_defs[i].code, &rc);
+                               kh_val (html_entity_by_number, k) = entities_defs[i].replacement;
+                       }
 
                        k = kh_put (entity_by_name, html_entity_by_name,
                                        entities_defs[i].name, &rc);
                        kh_val (html_entity_by_name, k) = entities_defs[i].replacement;
-       }
+               }
 
                html_color_by_name = kh_init (color_by_name);
                kh_resize (color_by_name, html_color_by_name,
@@ -238,7 +240,7 @@ rspamd_html_library_init (void)
                rspamd_ftok_t *keys;
 
                keys = g_malloc0 (sizeof (rspamd_ftok_t) *
-                               G_N_ELEMENTS (html_colornames));
+                                                 G_N_ELEMENTS (html_colornames));
 
                for (i = 0; i < G_N_ELEMENTS (html_colornames); i ++) {
                        struct html_color c;
index c6155664b5ee29ee221b137094fc9a8507654b88..8b323e9b388d9302de21cec6e532ee843ad981e5 100644 (file)
@@ -1722,7 +1722,7 @@ static entity entities_defs[] = {
                {"die", 168, "\xc2\xa8"},
                {"ngt", 8815, "\xe2\x89\xaf"},
                {"vcy", 1074, "\xd0\xb2"},
-               {"fjlig", 102, "\x66\x6a"},
+               {"fjlig", 0, "\x66\x6a"},
                {"submult", 10945, "\xe2\xab\x81"},
                {"ubrcy", 1118, "\xd1\x9e"},
                {"ovbar", 9021, "\xe2\x8c\xbd"},
index 68ee7d1eb74298aeb7cd7d3e716c6338f2e2840e..79d55502eb4cce80110499e780fb68f024fc124e 100644 (file)
@@ -4,6 +4,9 @@ context("HTML processing", function()
 
   test("Extract text from HTML", function()
     local cases = {
+      -- Entities
+      {[[<html><body>.&#102;&#105;&#114;&#101;&#98;&#97;&#115;&#101;&#97;&#112;&#112;.&#99;&#111;&#109;</body></html>]],
+       [[.firebaseapp.com]]},
       {[[
 <!DOCTYPE html>
 <html lang="en">