Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

rspamd_url_test.c 3.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #include <sys/types.h>
  2. #include <sys/time.h>
  3. #include <sys/wait.h>
  4. #include <sys/param.h>
  5. #include <netinet/in.h>
  6. #include <arpa/inet.h>
  7. #include <netdb.h>
  8. #include <syslog.h>
  9. #include <fcntl.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include "../src/config.h"
  13. #include "../src/main.h"
  14. #include "../src/cfg_file.h"
  15. #include "../src/url.h"
  16. #include "tests.h"
  17. const char *test_text =
  18. "www.schemeless.ru\n"
  19. "www.schemeless.rus\n"
  20. " as ftp.schemeless.ru dasd \n"
  21. "ftp12.schemeless.ru\n"
  22. "ftpsearch.schemeless.ru\n"
  23. "schemeless.ru\n"
  24. "www.schemeless.microsoft\n"
  25. "1.2.3.4\n"
  26. "1.2.3.4/a\n"
  27. "1.2.3\n"
  28. "1.2.3.4.5\n"
  29. "www.schemeless.ru,\n"
  30. "www.schemeless.ru.\n"
  31. "http://www.schemed.ru.\n"
  32. "http://www.schemed.ru.\n"
  33. "http://www.bolinfest.com/targetalert/'\n"
  34. "http://www.bolinfest.com/targetalert/'';\n"
  35. "https://www.schemed.ru.\n"
  36. "ufps://www.schemed.ru.\n"
  37. "http://ported.ru:8080\n"
  38. "http://ported.ru:8080\n"
  39. "http://1.2.3.4\n"
  40. "http://1.2.3.4:80\n"
  41. "1.2.3.4:80\n"
  42. "www.a9.com\n"
  43. "www.a-9.com\n"
  44. "http://www.schemed.ru/a.txt:\n"
  45. "http://www.schemed.ru/a.txt'\n"
  46. "http://www.schemed.ru/a.txt\"\n"
  47. "http://www.schemed.ru/a.txt>\n"
  48. "http://www.schemed.ru/a=3&b=4\n"
  49. "http://spam.ru/bad=user@domain.com\n"
  50. "http://spam.ru/bad=user@domain.com\n"
  51. "http://spam.ru user@domain.com\n"
  52. "http://a.foto.radikal.ru/0604/de7793c6ca62.jpg\n"
  53. "http://a.foto.radikal.ru/0604/de7793c6ca62.jpg\n"
  54. "schemeless.gz\n"
  55. "schemeless.jp\n"
  56. "schemeless.ua\n"
  57. "schemeless.gz/a\n"
  58. "mysql.so\n"
  59. "http://mysql.so\n"
  60. "3com.com\n"
  61. "lj-user.livejournal.com\n"
  62. "http://lj-user.livejournal.com\n"
  63. "http://vsem.ru?action;\n";
  64. const char *test_html = "<some_tag>This is test file with <a href=\"http://microsoft.com\">http://TesT.com/././?%45%46%20 url</a></some_tag>";
  65. /* Function for using in glib test suite */
  66. void
  67. rspamd_url_test_func ()
  68. {
  69. GByteArray *text, *html;
  70. struct worker_task task;
  71. struct uri *url;
  72. int i = 0;
  73. text = g_byte_array_new();
  74. text->data = (gchar *)test_text;
  75. text->len = strlen (test_text);
  76. html = g_byte_array_new();
  77. html->data = (gchar *)test_html;
  78. html->len = strlen (test_html);
  79. bzero (&task, sizeof (task));
  80. TAILQ_INIT (&task.urls);
  81. task.task_pool = memory_pool_new (8192);
  82. g_test_timer_start ();
  83. g_test_message ("Testing text URL regexp parser");
  84. url_parse_text (&task, text);
  85. TAILQ_FOREACH (url, &task.urls, next) {
  86. msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data);
  87. i ++;
  88. }
  89. while (!TAILQ_EMPTY (&task.urls)) {
  90. url = TAILQ_FIRST (&task.urls);
  91. TAILQ_REMOVE (&task.urls, url, next);
  92. }
  93. g_assert (i == 39);
  94. msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ());
  95. i = 0;
  96. g_test_timer_start ();
  97. g_test_message ("Testing html URL regexp parser");
  98. url_parse_html (&task, html);
  99. TAILQ_FOREACH (url, &task.urls, next) {
  100. msg_debug ("Found url: %s, hostname: %s, data: %s", struri (url), url->host, url->data);
  101. i ++;
  102. }
  103. while (!TAILQ_EMPTY (&task.urls)) {
  104. url = TAILQ_FIRST (&task.urls);
  105. TAILQ_REMOVE (&task.urls, url, next);
  106. }
  107. g_assert (i == 1);
  108. msg_debug ("Time elapsed: %.2f", g_test_timer_elapsed ());
  109. }