]> source.dussan.org Git - rspamd.git/commitdiff
* Add utility for extracting urls from message
authorVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 10 Sep 2008 13:58:54 +0000 (17:58 +0400)
committerVsevolod Stakhov <vsevolod@rambler-co.ru>
Wed, 10 Sep 2008 13:58:54 +0000 (17:58 +0400)
* Rework build system

Makefile.in
configure
test/Makefile.in
utils/Makefile.in [new file with mode: 0644]
utils/url_extracter.c [new file with mode: 0644]
worker.c

index bbc443cd258a86a2565d94275a3c4dd281b0b9a7..8dc6e597fa337158d4d1d0a6f486497a7d7c6fa6 100644 (file)
@@ -1,11 +1,11 @@
-.PHONY: perl clean test
+.PHONY: perl clean $(SUBDIRS) 
 
 all: perl $(TARGETS)
 
 perl: perl/Makefile
        cd perl && make && cd ..
 
-perl/Makefile: perl/Makefile.PL
+perl/Makefile:
        cd perl && perl Makefile.PL && cd ..
 
 memctest: upstream.c memcached.c memcached-test.c
@@ -22,22 +22,18 @@ install: $(EXEC)
        #$(INSTALL) -m0644 rspamd.conf.sample $(PREFIX)/etc
        $(MKDIR) -o $(RSPAMD_USER) -g $(RSPAMD_GROUP) /var/run/rspamd
 
-clean:
+clean: clean-subdirs perl/Makefile
        rm -f *.o $(EXEC) *.core
        rm -f cfg_lex.c cfg_yacc.c cfg_yacc.h
        cd perl && make clean && cd ..
-       cd test && make clean && cd ..
 
-dist-clean: clean
+dist-clean: clean dist-clean-subdirs
        rm -f Makefile
        rm -f test/Makefile
        rm -f config.log
        rm -f md5.h md5.c strlcpy.h strlcpy.c queue.h config.h modules.c modules.h
        cd perl && rm -f Makefile.old && rm -f Makefile.PL && cd ..
 
-test:
-       cd test && make
-
 creategroup:
        @echo "Create group $(RSPAMD_GROUP) before installing!" 
 
index e7d0aad411a9ba018de05908680cdbf33d2f893b..9031313a9c3f771848607a9e7ad37a87bfccef71 100755 (executable)
--- a/configure
+++ b/configure
@@ -37,7 +37,7 @@ GROUP=postfix
 INSTALL="/usr/bin/install -v"
 MKDIR="/usr/bin/install -v -d"
 MANPATH="${PREFIX}/share/man"
-SUBDIRS="test"
+SUBDIRS="test utils"
 
 MAKEFILE="Makefile"
 MAKEFILE_IN="Makefile.in"
@@ -486,14 +486,18 @@ EXEC=$EXEC
 RSPAMD_USER=$USER
 RSPAMD_GROUP=$GROUP
 # All target dependenses
-TARGETS=$TARGETS
+TARGETS=$TARGETS $SUBDIRS
 # Common dependenses
 DEPS=$DEPS
 # Path to install manual page
 MANPATH=$MANPATH
+# Subdirectories
+SUBDIRS=$SUBDIRS
 
 END
        # Write subdirs makefiles
+       clean_target="clean-subdirs: "
+       dist_clean_target="dist-clean-subdirs: "
        for sub in $SUBDIRS ; do
                cp $MAKEFILE $sub/$MAKEFILE
                saved_pwd=`pwd`
@@ -514,8 +518,28 @@ ${_sub_obj}: ${_sub_src}
 END
                done
                cd $saved_pwd
+               dist_clean_target="$dist_clean_target ${sub}-dist-clean"
+               clean_target="$clean_target ${sub}-clean"
        done
+
+       # Write IN file
        cat $MAKEFILE_IN >> $MAKEFILE
+
+       # Process clean targets for all subdirs
+       echo $dist_clean_target >> $MAKEFILE
+       echo $clean_target >> $MAKEFILE
+       for sub in $SUBDIRS ; do
+               cat >> $MAKEFILE << END
+${sub}-clean:
+       cd ${sub} && make clean && cd ..
+${sub}-dist-clean:
+       cd ${sub} && make dist-clean && cd ..
+${sub}:
+       cd ${sub} && make && cd ..
+
+END
+       done
+
        # Write build targets to makefile
        cat >> $MAKEFILE << END
 ${EXEC}: \$(OBJECTS)
index d7b6957c865f64407471c52980b6d320122f932d..1d46e2e2f9e8a1383077c66e9155cfbda11ce83a 100644 (file)
@@ -11,3 +11,6 @@ run_test: rspamd_test_suite
 
 clean:
        rm -f *.o rspamd_test_suite *.core rspamd_test.xml
+
+dist-clean: clean
+       rm -f Makefile
diff --git a/utils/Makefile.in b/utils/Makefile.in
new file mode 100644 (file)
index 0000000..86f3eab
--- /dev/null
@@ -0,0 +1,12 @@
+.PHONY: clean
+
+all: url_extracter
+
+url_extracter: $(OBJECTS) ../url.o ../util.o
+       $(CC) $(PTHREAD_LDFLAGS) $(LDFLAGS) $(OBJECTS) ../url.o ../util.o $(LIBS) -o url_extracter
+
+clean:
+       rm -f *.o url_extracter *.core
+
+dist-clean: clean
+       rm -f Makefile
diff --git a/utils/url_extracter.c b/utils/url_extracter.c
new file mode 100644 (file)
index 0000000..dc2138e
--- /dev/null
@@ -0,0 +1,140 @@
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#include <gmime/gmime.h>
+
+#include "../config.h"
+#include "../main.h"
+#include "../cfg_file.h"
+#include "../url.h"
+
+static void
+mime_foreach_callback (GMimeObject *part, gpointer user_data)
+{
+       struct worker_task *task = (struct worker_task *)user_data;
+       struct mime_part *mime_part;
+       GMimeContentType *type;
+       GMimeDataWrapper *wrapper;
+       GMimeStream *part_stream;
+       GByteArray *part_content;
+       
+       /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
+       
+       /* find out what class 'part' is... */
+       if (GMIME_IS_MESSAGE_PART (part)) {
+               /* message/rfc822 or message/news */
+               printf ("Message part found\n");
+               GMimeMessage *message;
+               
+               /* g_mime_message_foreach_part() won't descend into
+                   child message parts, so if we want to count any
+                   subparts of this child message, we'll have to call
+                   g_mime_message_foreach_part() again here. */
+               
+               message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
+               g_mime_message_foreach_part (message, mime_foreach_callback, task);
+               g_object_unref (message);
+       } else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
+               /* message/partial */
+               printf ("Message/partial part found\n");
+               
+               /* this is an incomplete message part, probably a
+                   large message that the sender has broken into
+                   smaller parts and is sending us bit by bit. we
+                   could save some info about it so that we could
+                   piece this back together again once we get all the
+                   parts? */
+       } else if (GMIME_IS_MULTIPART (part)) {
+               /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
+               
+               /* we'll get to finding out if this is a signed/encrypted multipart later... */
+       } else if (GMIME_IS_PART (part)) {
+               printf ("Normal part found\n");
+               /* a normal leaf part, could be text/plain or image/jpeg etc */
+               wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+               if (wrapper != NULL) {
+                       part_stream = g_mime_stream_mem_new ();
+                       printf ("Get new wrapper object for normal part\n");
+                       if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
+                               printf ("Write wrapper to stream\n");
+                               part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
+                               type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part));
+                               mime_part = g_malloc (sizeof (struct mime_part));
+                               mime_part->type = type;
+                               mime_part->content = part_content;
+                               TAILQ_INSERT_TAIL (&task->parts, mime_part, next);
+                               if (g_mime_content_type_is_type (type, "text", "html")) {
+                                       printf ("Found text/html part\n");
+                                       url_parse_html (task, part_content);
+                               } 
+                               else if (g_mime_content_type_is_type (type, "text", "plain")) {
+                                       printf ("Found text/plain part\n");
+                                       url_parse_text (task, part_content);
+                               }
+                       }
+               }
+       } else {
+               g_assert_not_reached ();
+       }
+}
+
+
+int
+main (int argc, char **argv)
+{
+       GMimeMessage *message;
+       GMimeParser *parser;
+       GMimeStream *stream;
+       struct worker_task task;
+       struct uri *url;
+       char *buf = NULL;
+       size_t pos = 0, size = 65535;
+       
+       g_mem_set_vtable(glib_mem_profiler_table);
+       g_mime_init (0);
+       
+       /* Preallocate buffer */
+       buf = g_malloc (size);
+
+       while (!feof (stdin)) {
+               *(buf + pos) = getchar ();
+               pos ++;
+               if (pos == size) {
+                       size *= 2;
+                       buf = g_realloc (buf, size);
+               }
+       }
+
+       stream = g_mime_stream_mem_new_with_buffer (buf, pos);
+       /* create a new parser object to parse the stream */
+       parser = g_mime_parser_new_with_stream (stream);
+
+       /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
+       g_object_unref (stream);
+
+       /* parse the message from the stream */
+       message = g_mime_parser_construct_message (parser);
+       
+       task.message = message;
+       TAILQ_INIT (&task.urls);
+       TAILQ_INIT (&task.parts);
+
+       /* free the parser (and the stream) */
+       g_object_unref (parser);
+
+       g_mime_message_foreach_part (message, mime_foreach_callback, &task);
+
+       TAILQ_FOREACH (url, &task.urls, next) {
+               printf ("Found url: %s, hostname: %s, data: %s\n", struri (url), url->host, url->data);
+       }
+
+}
index baf7479ffee272d861df132e84770d8474d4019c..34a096629898dd1d95940b9f0ba25ae16948f2be 100644 (file)
--- a/worker.c
+++ b/worker.c
@@ -384,8 +384,6 @@ process_message (struct worker_task *task)
        stream = g_mime_stream_mem_new_with_buffer (task->msg->buf->begin, task->msg->buf->len);
        /* create a new parser object to parse the stream */
        parser = g_mime_parser_new_with_stream (stream);
-       /* create a new parser object to parse the stream */
-       parser = g_mime_parser_new_with_stream (stream);
 
        /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
        g_object_unref (stream);