From fe7ebd5be9d1352f7a3727bfbfabb6453321e269 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Wed, 10 Sep 2008 17:58:54 +0400 Subject: [PATCH] * Add utility for extracting urls from message * Rework build system --- Makefile.in | 12 ++-- configure | 28 ++++++++- test/Makefile.in | 3 + utils/Makefile.in | 12 ++++ utils/url_extracter.c | 140 ++++++++++++++++++++++++++++++++++++++++++ worker.c | 2 - 6 files changed, 185 insertions(+), 12 deletions(-) create mode 100644 utils/Makefile.in create mode 100644 utils/url_extracter.c diff --git a/Makefile.in b/Makefile.in index bbc443cd2..8dc6e597f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,11 +1,11 @@ -.PHONY: perl clean test +.PHONY: perl clean $(SUBDIRS) all: perl $(TARGETS) perl: perl/Makefile cd perl && make && cd .. -perl/Makefile: perl/Makefile.PL +perl/Makefile: cd perl && perl Makefile.PL && cd .. memctest: upstream.c memcached.c memcached-test.c @@ -22,22 +22,18 @@ install: $(EXEC) #$(INSTALL) -m0644 rspamd.conf.sample $(PREFIX)/etc $(MKDIR) -o $(RSPAMD_USER) -g $(RSPAMD_GROUP) /var/run/rspamd -clean: +clean: clean-subdirs perl/Makefile rm -f *.o $(EXEC) *.core rm -f cfg_lex.c cfg_yacc.c cfg_yacc.h cd perl && make clean && cd .. - cd test && make clean && cd .. -dist-clean: clean +dist-clean: clean dist-clean-subdirs rm -f Makefile rm -f test/Makefile rm -f config.log rm -f md5.h md5.c strlcpy.h strlcpy.c queue.h config.h modules.c modules.h cd perl && rm -f Makefile.old && rm -f Makefile.PL && cd .. -test: - cd test && make - creategroup: @echo "Create group $(RSPAMD_GROUP) before installing!" diff --git a/configure b/configure index e7d0aad41..9031313a9 100755 --- a/configure +++ b/configure @@ -37,7 +37,7 @@ GROUP=postfix INSTALL="/usr/bin/install -v" MKDIR="/usr/bin/install -v -d" MANPATH="${PREFIX}/share/man" -SUBDIRS="test" +SUBDIRS="test utils" MAKEFILE="Makefile" MAKEFILE_IN="Makefile.in" @@ -486,14 +486,18 @@ EXEC=$EXEC RSPAMD_USER=$USER RSPAMD_GROUP=$GROUP # All target dependenses -TARGETS=$TARGETS +TARGETS=$TARGETS $SUBDIRS # Common dependenses DEPS=$DEPS # Path to install manual page MANPATH=$MANPATH +# Subdirectories +SUBDIRS=$SUBDIRS END # Write subdirs makefiles + clean_target="clean-subdirs: " + dist_clean_target="dist-clean-subdirs: " for sub in $SUBDIRS ; do cp $MAKEFILE $sub/$MAKEFILE saved_pwd=`pwd` @@ -514,8 +518,28 @@ ${_sub_obj}: ${_sub_src} END done cd $saved_pwd + dist_clean_target="$dist_clean_target ${sub}-dist-clean" + clean_target="$clean_target ${sub}-clean" done + + # Write IN file cat $MAKEFILE_IN >> $MAKEFILE + + # Process clean targets for all subdirs + echo $dist_clean_target >> $MAKEFILE + echo $clean_target >> $MAKEFILE + for sub in $SUBDIRS ; do + cat >> $MAKEFILE << END +${sub}-clean: + cd ${sub} && make clean && cd .. +${sub}-dist-clean: + cd ${sub} && make dist-clean && cd .. +${sub}: + cd ${sub} && make && cd .. + +END + done + # Write build targets to makefile cat >> $MAKEFILE << END ${EXEC}: \$(OBJECTS) diff --git a/test/Makefile.in b/test/Makefile.in index d7b6957c8..1d46e2e2f 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -11,3 +11,6 @@ run_test: rspamd_test_suite clean: rm -f *.o rspamd_test_suite *.core rspamd_test.xml + +dist-clean: clean + rm -f Makefile diff --git a/utils/Makefile.in b/utils/Makefile.in new file mode 100644 index 000000000..86f3eab60 --- /dev/null +++ b/utils/Makefile.in @@ -0,0 +1,12 @@ +.PHONY: clean + +all: url_extracter + +url_extracter: $(OBJECTS) ../url.o ../util.o + $(CC) $(PTHREAD_LDFLAGS) $(LDFLAGS) $(OBJECTS) ../url.o ../util.o $(LIBS) -o url_extracter + +clean: + rm -f *.o url_extracter *.core + +dist-clean: clean + rm -f Makefile diff --git a/utils/url_extracter.c b/utils/url_extracter.c new file mode 100644 index 000000000..dc2138e6f --- /dev/null +++ b/utils/url_extracter.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "../config.h" +#include "../main.h" +#include "../cfg_file.h" +#include "../url.h" + +static void +mime_foreach_callback (GMimeObject *part, gpointer user_data) +{ + struct worker_task *task = (struct worker_task *)user_data; + struct mime_part *mime_part; + GMimeContentType *type; + GMimeDataWrapper *wrapper; + GMimeStream *part_stream; + GByteArray *part_content; + + /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */ + + /* find out what class 'part' is... */ + if (GMIME_IS_MESSAGE_PART (part)) { + /* message/rfc822 or message/news */ + printf ("Message part found\n"); + GMimeMessage *message; + + /* g_mime_message_foreach_part() won't descend into + child message parts, so if we want to count any + subparts of this child message, we'll have to call + g_mime_message_foreach_part() again here. */ + + message = g_mime_message_part_get_message ((GMimeMessagePart *) part); + g_mime_message_foreach_part (message, mime_foreach_callback, task); + g_object_unref (message); + } else if (GMIME_IS_MESSAGE_PARTIAL (part)) { + /* message/partial */ + printf ("Message/partial part found\n"); + + /* this is an incomplete message part, probably a + large message that the sender has broken into + smaller parts and is sending us bit by bit. we + could save some info about it so that we could + piece this back together again once we get all the + parts? */ + } else if (GMIME_IS_MULTIPART (part)) { + /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */ + + /* we'll get to finding out if this is a signed/encrypted multipart later... */ + } else if (GMIME_IS_PART (part)) { + printf ("Normal part found\n"); + /* a normal leaf part, could be text/plain or image/jpeg etc */ + wrapper = g_mime_part_get_content_object (GMIME_PART (part)); + if (wrapper != NULL) { + part_stream = g_mime_stream_mem_new (); + printf ("Get new wrapper object for normal part\n"); + if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) { + printf ("Write wrapper to stream\n"); + part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream)); + type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part)); + mime_part = g_malloc (sizeof (struct mime_part)); + mime_part->type = type; + mime_part->content = part_content; + TAILQ_INSERT_TAIL (&task->parts, mime_part, next); + if (g_mime_content_type_is_type (type, "text", "html")) { + printf ("Found text/html part\n"); + url_parse_html (task, part_content); + } + else if (g_mime_content_type_is_type (type, "text", "plain")) { + printf ("Found text/plain part\n"); + url_parse_text (task, part_content); + } + } + } + } else { + g_assert_not_reached (); + } +} + + +int +main (int argc, char **argv) +{ + GMimeMessage *message; + GMimeParser *parser; + GMimeStream *stream; + struct worker_task task; + struct uri *url; + char *buf = NULL; + size_t pos = 0, size = 65535; + + g_mem_set_vtable(glib_mem_profiler_table); + g_mime_init (0); + + /* Preallocate buffer */ + buf = g_malloc (size); + + while (!feof (stdin)) { + *(buf + pos) = getchar (); + pos ++; + if (pos == size) { + size *= 2; + buf = g_realloc (buf, size); + } + } + + stream = g_mime_stream_mem_new_with_buffer (buf, pos); + /* create a new parser object to parse the stream */ + parser = g_mime_parser_new_with_stream (stream); + + /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */ + g_object_unref (stream); + + /* parse the message from the stream */ + message = g_mime_parser_construct_message (parser); + + task.message = message; + TAILQ_INIT (&task.urls); + TAILQ_INIT (&task.parts); + + /* free the parser (and the stream) */ + g_object_unref (parser); + + g_mime_message_foreach_part (message, mime_foreach_callback, &task); + + TAILQ_FOREACH (url, &task.urls, next) { + printf ("Found url: %s, hostname: %s, data: %s\n", struri (url), url->host, url->data); + } + +} diff --git a/worker.c b/worker.c index baf7479ff..34a096629 100644 --- a/worker.c +++ b/worker.c @@ -384,8 +384,6 @@ process_message (struct worker_task *task) stream = g_mime_stream_mem_new_with_buffer (task->msg->buf->begin, task->msg->buf->len); /* create a new parser object to parse the stream */ parser = g_mime_parser_new_with_stream (stream); - /* create a new parser object to parse the stream */ - parser = g_mime_parser_new_with_stream (stream); /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */ g_object_unref (stream); -- 2.39.5