Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

url_extracter.c 4.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #include <sys/types.h>
  2. #include <sys/time.h>
  3. #include <sys/wait.h>
  4. #include <sys/param.h>
  5. #include <netinet/in.h>
  6. #include <arpa/inet.h>
  7. #include <netdb.h>
  8. #include <syslog.h>
  9. #include <fcntl.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <gmime/gmime.h>
  13. #include "../config.h"
  14. #include "../main.h"
  15. #include "../cfg_file.h"
  16. #include "../url.h"
  17. static void
  18. mime_foreach_callback (GMimeObject *part, gpointer user_data)
  19. {
  20. struct worker_task *task = (struct worker_task *)user_data;
  21. struct mime_part *mime_part;
  22. GMimeContentType *type;
  23. GMimeDataWrapper *wrapper;
  24. GMimeStream *part_stream;
  25. GByteArray *part_content;
  26. GMimeMessage *message;
  27. /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
  28. /* find out what class 'part' is... */
  29. if (GMIME_IS_MESSAGE_PART (part)) {
  30. /* message/rfc822 or message/news */
  31. printf ("Message part found\n");
  32. /* g_mime_message_foreach_part() won't descend into
  33. child message parts, so if we want to count any
  34. subparts of this child message, we'll have to call
  35. g_mime_message_foreach_part() again here. */
  36. message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
  37. g_mime_message_foreach_part (message, mime_foreach_callback, task);
  38. g_object_unref (message);
  39. } else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
  40. /* message/partial */
  41. printf ("Message/partial part found\n");
  42. /* this is an incomplete message part, probably a
  43. large message that the sender has broken into
  44. smaller parts and is sending us bit by bit. we
  45. could save some info about it so that we could
  46. piece this back together again once we get all the
  47. parts? */
  48. } else if (GMIME_IS_MULTIPART (part)) {
  49. /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
  50. /* we'll get to finding out if this is a signed/encrypted multipart later... */
  51. } else if (GMIME_IS_PART (part)) {
  52. printf ("Normal part found\n");
  53. /* a normal leaf part, could be text/plain or image/jpeg etc */
  54. wrapper = g_mime_part_get_content_object (GMIME_PART (part));
  55. if (wrapper != NULL) {
  56. part_stream = g_mime_stream_mem_new ();
  57. printf ("Get new wrapper object for normal part\n");
  58. if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
  59. printf ("Write wrapper to stream\n");
  60. part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
  61. type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part));
  62. mime_part = g_malloc (sizeof (struct mime_part));
  63. mime_part->type = type;
  64. mime_part->content = part_content;
  65. TAILQ_INSERT_TAIL (&task->parts, mime_part, next);
  66. if (g_mime_content_type_is_type (type, "text", "html")) {
  67. printf ("Found text/html part\n");
  68. url_parse_html (task, part_content);
  69. }
  70. else if (g_mime_content_type_is_type (type, "text", "plain")) {
  71. printf ("Found text/plain part\n");
  72. url_parse_text (task, part_content);
  73. }
  74. }
  75. }
  76. } else {
  77. g_assert_not_reached ();
  78. }
  79. }
  80. int
  81. main (int argc, char **argv)
  82. {
  83. GMimeMessage *message;
  84. GMimeParser *parser;
  85. GMimeStream *stream;
  86. struct worker_task task;
  87. struct uri *url;
  88. char *buf = NULL;
  89. size_t pos = 0, size = 65535;
  90. g_mem_set_vtable(glib_mem_profiler_table);
  91. g_mime_init (0);
  92. /* Preallocate buffer */
  93. buf = g_malloc (size);
  94. while (!feof (stdin)) {
  95. *(buf + pos) = getchar ();
  96. pos ++;
  97. if (pos == size) {
  98. size *= 2;
  99. buf = g_realloc (buf, size);
  100. }
  101. }
  102. stream = g_mime_stream_mem_new_with_buffer (buf, pos);
  103. /* create a new parser object to parse the stream */
  104. parser = g_mime_parser_new_with_stream (stream);
  105. /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
  106. g_object_unref (stream);
  107. /* parse the message from the stream */
  108. message = g_mime_parser_construct_message (parser);
  109. task.message = message;
  110. TAILQ_INIT (&task.urls);
  111. TAILQ_INIT (&task.parts);
  112. /* free the parser (and the stream) */
  113. g_object_unref (parser);
  114. g_mime_message_foreach_part (message, mime_foreach_callback, &task);
  115. TAILQ_FOREACH (url, &task.urls, next) {
  116. printf ("Found url: %s, hostname: %s, data: %s\n", struri (url), url->host, url->data);
  117. }
  118. }