1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#include <sys/types.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <syslog.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include "../src/config.h"
#include "../src/main.h"
#include "../src/cfg_file.h"
#include "../src/url.h"
#include "tests.h"
const char *test_text =
"www.schemeless.ru\n"
"www.schemeless.rus\n"
" as ftp.schemeless.ru dasd \n"
"ftp12.schemeless.ru\n"
"ftpsearch.schemeless.ru\n"
"schemeless.ru\n"
"www.schemeless.microsoft\n"
"1.2.3.4\n"
"1.2.3.4/a\n"
"1.2.3\n"
"1.2.3.4.5\n"
"www.schemeless.ru,\n"
"www.schemeless.ru.\n"
"http://www.schemed.ru.\n"
"http://www.schemed.ru.\n"
"http://www.bolinfest.com/targetalert/'\n"
"http://www.bolinfest.com/targetalert/'';\n"
"https://www.schemed.ru.\n"
"ufps://www.schemed.ru.\n"
"http://ported.ru:8080\n"
"http://ported.ru:8080\n"
"http://1.2.3.4\n"
"http://1.2.3.4:80\n"
"1.2.3.4:80\n"
"www.a9.com\n"
"www.a-9.com\n"
"http://www.schemed.ru/a.txt:\n"
"http://www.schemed.ru/a.txt'\n"
"http://www.schemed.ru/a.txt\"\n"
"http://www.schemed.ru/a.txt>\n"
"http://www.schemed.ru/a=3&b=4\n"
"http://spam.ru/bad=user@domain.com\n"
"http://spam.ru/bad=user@domain.com\n"
"http://spam.ru user@domain.com\n"
"http://a.foto.radikal.ru/0604/de7793c6ca62.jpg\n"
"http://a.foto.radikal.ru/0604/de7793c6ca62.jpg\n"
"schemeless.gz\n"
"schemeless.jp\n"
"schemeless.ua\n"
"schemeless.gz/a\n"
"mysql.so\n"
"http://mysql.so\n"
"3com.com\n"
"lj-user.livejournal.com\n"
"http://lj-user.livejournal.com\n"
"http://vsem.ru?action;\n";
const char *test_html = "<some_tag>This is test file with <a href=\"http://microsoft.com\">http://TesT.com/././?%45%46%20 url</a></some_tag>";
/* Function for using in glib test suite */
void
rspamd_url_test_func ()
{
GByteArray *text, *html;
struct worker_task task;
struct uri *url;
int i = 0;
text = g_byte_array_new();
text->data = (gchar *)test_text;
text->len = strlen (test_text);
html = g_byte_array_new();
html->data = (gchar *)test_html;
html->len = strlen (test_html);
bzero (&task, sizeof (task));
TAILQ_INIT (&task.urls);
task.task_pool = memory_pool_new (8192);
g_test_timer_start ();
g_test_message ("Testing text URL regexp parser");
url_parse_text (&task, text);
TAILQ_FOREACH (url, &task.urls, next) {
msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data);
i ++;
}
while (!TAILQ_EMPTY (&task.urls)) {
url = TAILQ_FIRST (&task.urls);
TAILQ_REMOVE (&task.urls, url, next);
}
g_assert (i == 39);
msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ());
i = 0;
g_test_timer_start ();
g_test_message ("Testing html URL regexp parser");
url_parse_html (&task, html);
TAILQ_FOREACH (url, &task.urls, next) {
msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data);
i ++;
}
while (!TAILQ_EMPTY (&task.urls)) {
url = TAILQ_FIRST (&task.urls);
TAILQ_REMOVE (&task.urls, url, next);
}
g_assert (i == 1);
msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ());
}
|