ソースを参照

* Fix url length while passing them to normalizer

TODO: fix html parsing regexp (now it doesn't work)
tags/0.2.7
Vsevolod Stakhov 15年前
コミット
f1e17a0d63
2個のファイルの変更7行の追加9行の削除
  1. 2
    2
      test/rspamd_url_test.c
  2. 5
    7
      url.c

+ 2
- 2
test/rspamd_url_test.c ファイルの表示

@@ -31,10 +31,10 @@ rspamd_url_test_func ()

text = g_byte_array_new();
text->data = (gchar *)test_text;
text->len = sizeof (test_text);
text->len = strlen (test_text);
html = g_byte_array_new();
html->data = (gchar *)test_html;
html->len = sizeof (test_html);
html->len = strlen (test_html);
bzero (&task, sizeof (task));
TAILQ_INIT (&task.urls);

+ 5
- 7
url.c ファイルの表示

@@ -32,10 +32,8 @@ struct _proto {
unsigned int need_ssl:1;
};

static const char *html_url = "((?:href\\s*=\\s*)|(?:archive\\s*=\\s*)|(?:code\\s*=\\s*)|(?:codebase\\s*=\\s*)|(?:src\\s*=\\s*)|(?:cite\\s*=\\s*)"
"|(:?background\\s*=\\s*)|(?:pluginspage\\s*=\\s*)|(?:pluginurl\\s*=\\s*)|(?:action\\s*=\\s*)|(?:dynsrc\\s*=\\s*)|(?:longdesc\\s*=\\s*)|(?:lowsrc\\s*=\\s*)|(?:usemap\\s*=\\s*))"
"\\\"?([^>\"<]+)\\\"?";
static const char *text_url = "((?:mailto\\:|(?:news|(?:ht|f)tp(?:s?))\\://){1}[^ ]+)";
static const char *html_url = "((?:href\\s*=\\s*)?([^>\"<]+))?";
static const char *text_url = "(https?://[^ ]+)";

static short url_initialized = 0;
GRegex *text_re, *html_re;
@@ -906,7 +904,7 @@ url_parse_text (struct worker_task *task, GByteArray *content)
else {
msg_debug ("url_parse_text: cannot find url pattern in given string");
}
} while (rc > 0);
} while (rc);
}
}

@@ -926,7 +924,7 @@ url_parse_html (struct worker_task *task, GByteArray *content)
if (rc) {
if (g_match_info_matches (info)) {
g_match_info_fetch_pos (info, 0, &start, &pos);
url_str = g_match_info_fetch (info, 3);
url_str = g_match_info_fetch (info, 2);
msg_debug ("url_parse_html: extracted string with regexp: '%s'", url_str);
if (url_str != NULL) {
new = g_malloc (sizeof (struct uri));
@@ -947,6 +945,6 @@ url_parse_html (struct worker_task *task, GByteArray *content)
else {
msg_debug ("url_parse_html: cannot find url pattern in given string");
}
} while (rc > 0);
} while (rc);
}
}

読み込み中…
キャンセル
保存