aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVsevolod Stakhov <vsevolod@rambler-co.ru>2008-09-10 17:58:54 +0400
committerVsevolod Stakhov <vsevolod@rambler-co.ru>2008-09-10 17:58:54 +0400
commitfe7ebd5be9d1352f7a3727bfbfabb6453321e269 (patch)
tree3888171e8e16362cecbefca6ad6548243ba9a8b9
parent57e765ce78c6b9746cddab4c3415dc386552151f (diff)
downloadrspamd-fe7ebd5be9d1352f7a3727bfbfabb6453321e269.tar.gz
rspamd-fe7ebd5be9d1352f7a3727bfbfabb6453321e269.zip
* Add utility for extracting urls from message
* Rework build system
-rw-r--r--Makefile.in12
-rwxr-xr-xconfigure28
-rw-r--r--test/Makefile.in3
-rw-r--r--utils/Makefile.in12
-rw-r--r--utils/url_extracter.c140
-rw-r--r--worker.c2
6 files changed, 185 insertions, 12 deletions
diff --git a/Makefile.in b/Makefile.in
index bbc443cd2..8dc6e597f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,11 +1,11 @@
-.PHONY: perl clean test
+.PHONY: perl clean $(SUBDIRS)
all: perl $(TARGETS)
perl: perl/Makefile
cd perl && make && cd ..
-perl/Makefile: perl/Makefile.PL
+perl/Makefile:
cd perl && perl Makefile.PL && cd ..
memctest: upstream.c memcached.c memcached-test.c
@@ -22,22 +22,18 @@ install: $(EXEC)
#$(INSTALL) -m0644 rspamd.conf.sample $(PREFIX)/etc
$(MKDIR) -o $(RSPAMD_USER) -g $(RSPAMD_GROUP) /var/run/rspamd
-clean:
+clean: clean-subdirs perl/Makefile
rm -f *.o $(EXEC) *.core
rm -f cfg_lex.c cfg_yacc.c cfg_yacc.h
cd perl && make clean && cd ..
- cd test && make clean && cd ..
-dist-clean: clean
+dist-clean: clean dist-clean-subdirs
rm -f Makefile
rm -f test/Makefile
rm -f config.log
rm -f md5.h md5.c strlcpy.h strlcpy.c queue.h config.h modules.c modules.h
cd perl && rm -f Makefile.old && rm -f Makefile.PL && cd ..
-test:
- cd test && make
-
creategroup:
@echo "Create group $(RSPAMD_GROUP) before installing!"
diff --git a/configure b/configure
index e7d0aad41..9031313a9 100755
--- a/configure
+++ b/configure
@@ -37,7 +37,7 @@ GROUP=postfix
INSTALL="/usr/bin/install -v"
MKDIR="/usr/bin/install -v -d"
MANPATH="${PREFIX}/share/man"
-SUBDIRS="test"
+SUBDIRS="test utils"
MAKEFILE="Makefile"
MAKEFILE_IN="Makefile.in"
@@ -486,14 +486,18 @@ EXEC=$EXEC
RSPAMD_USER=$USER
RSPAMD_GROUP=$GROUP
# All target dependenses
-TARGETS=$TARGETS
+TARGETS=$TARGETS $SUBDIRS
# Common dependenses
DEPS=$DEPS
# Path to install manual page
MANPATH=$MANPATH
+# Subdirectories
+SUBDIRS=$SUBDIRS
END
# Write subdirs makefiles
+ clean_target="clean-subdirs: "
+ dist_clean_target="dist-clean-subdirs: "
for sub in $SUBDIRS ; do
cp $MAKEFILE $sub/$MAKEFILE
saved_pwd=`pwd`
@@ -514,8 +518,28 @@ ${_sub_obj}: ${_sub_src}
END
done
cd $saved_pwd
+ dist_clean_target="$dist_clean_target ${sub}-dist-clean"
+ clean_target="$clean_target ${sub}-clean"
done
+
+ # Write IN file
cat $MAKEFILE_IN >> $MAKEFILE
+
+ # Process clean targets for all subdirs
+ echo $dist_clean_target >> $MAKEFILE
+ echo $clean_target >> $MAKEFILE
+ for sub in $SUBDIRS ; do
+ cat >> $MAKEFILE << END
+${sub}-clean:
+ cd ${sub} && make clean && cd ..
+${sub}-dist-clean:
+ cd ${sub} && make dist-clean && cd ..
+${sub}:
+ cd ${sub} && make && cd ..
+
+END
+ done
+
# Write build targets to makefile
cat >> $MAKEFILE << END
${EXEC}: \$(OBJECTS)
diff --git a/test/Makefile.in b/test/Makefile.in
index d7b6957c8..1d46e2e2f 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -11,3 +11,6 @@ run_test: rspamd_test_suite
clean:
rm -f *.o rspamd_test_suite *.core rspamd_test.xml
+
+dist-clean: clean
+ rm -f Makefile
diff --git a/utils/Makefile.in b/utils/Makefile.in
new file mode 100644
index 000000000..86f3eab60
--- /dev/null
+++ b/utils/Makefile.in
@@ -0,0 +1,12 @@
+.PHONY: clean
+
+all: url_extracter
+
+url_extracter: $(OBJECTS) ../url.o ../util.o
+ $(CC) $(PTHREAD_LDFLAGS) $(LDFLAGS) $(OBJECTS) ../url.o ../util.o $(LIBS) -o url_extracter
+
+clean:
+ rm -f *.o url_extracter *.core
+
+dist-clean: clean
+ rm -f Makefile
diff --git a/utils/url_extracter.c b/utils/url_extracter.c
new file mode 100644
index 000000000..dc2138e6f
--- /dev/null
+++ b/utils/url_extracter.c
@@ -0,0 +1,140 @@
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#include <gmime/gmime.h>
+
+#include "../config.h"
+#include "../main.h"
+#include "../cfg_file.h"
+#include "../url.h"
+
+static void
+mime_foreach_callback (GMimeObject *part, gpointer user_data)
+{
+ struct worker_task *task = (struct worker_task *)user_data;
+ struct mime_part *mime_part;
+ GMimeContentType *type;
+ GMimeDataWrapper *wrapper;
+ GMimeStream *part_stream;
+ GByteArray *part_content;
+
+ /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
+
+ /* find out what class 'part' is... */
+ if (GMIME_IS_MESSAGE_PART (part)) {
+ /* message/rfc822 or message/news */
+ printf ("Message part found\n");
+ GMimeMessage *message;
+
+ /* g_mime_message_foreach_part() won't descend into
+ child message parts, so if we want to count any
+ subparts of this child message, we'll have to call
+ g_mime_message_foreach_part() again here. */
+
+ message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
+ g_mime_message_foreach_part (message, mime_foreach_callback, task);
+ g_object_unref (message);
+ } else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
+ /* message/partial */
+ printf ("Message/partial part found\n");
+
+ /* this is an incomplete message part, probably a
+ large message that the sender has broken into
+ smaller parts and is sending us bit by bit. we
+ could save some info about it so that we could
+ piece this back together again once we get all the
+ parts? */
+ } else if (GMIME_IS_MULTIPART (part)) {
+ /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
+
+ /* we'll get to finding out if this is a signed/encrypted multipart later... */
+ } else if (GMIME_IS_PART (part)) {
+ printf ("Normal part found\n");
+ /* a normal leaf part, could be text/plain or image/jpeg etc */
+ wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+ if (wrapper != NULL) {
+ part_stream = g_mime_stream_mem_new ();
+ printf ("Get new wrapper object for normal part\n");
+ if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
+ printf ("Write wrapper to stream\n");
+ part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
+ type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part));
+ mime_part = g_malloc (sizeof (struct mime_part));
+ mime_part->type = type;
+ mime_part->content = part_content;
+ TAILQ_INSERT_TAIL (&task->parts, mime_part, next);
+ if (g_mime_content_type_is_type (type, "text", "html")) {
+ printf ("Found text/html part\n");
+ url_parse_html (task, part_content);
+ }
+ else if (g_mime_content_type_is_type (type, "text", "plain")) {
+ printf ("Found text/plain part\n");
+ url_parse_text (task, part_content);
+ }
+ }
+ }
+ } else {
+ g_assert_not_reached ();
+ }
+}
+
+
+int
+main (int argc, char **argv)
+{
+ GMimeMessage *message;
+ GMimeParser *parser;
+ GMimeStream *stream;
+ struct worker_task task;
+ struct uri *url;
+ char *buf = NULL;
+ size_t pos = 0, size = 65535;
+
+ g_mem_set_vtable(glib_mem_profiler_table);
+ g_mime_init (0);
+
+ /* Preallocate buffer */
+ buf = g_malloc (size);
+
+ while (!feof (stdin)) {
+ *(buf + pos) = getchar ();
+ pos ++;
+ if (pos == size) {
+ size *= 2;
+ buf = g_realloc (buf, size);
+ }
+ }
+
+ stream = g_mime_stream_mem_new_with_buffer (buf, pos);
+ /* create a new parser object to parse the stream */
+ parser = g_mime_parser_new_with_stream (stream);
+
+ /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
+ g_object_unref (stream);
+
+ /* parse the message from the stream */
+ message = g_mime_parser_construct_message (parser);
+
+ task.message = message;
+ TAILQ_INIT (&task.urls);
+ TAILQ_INIT (&task.parts);
+
+ /* free the parser (and the stream) */
+ g_object_unref (parser);
+
+ g_mime_message_foreach_part (message, mime_foreach_callback, &task);
+
+ TAILQ_FOREACH (url, &task.urls, next) {
+ printf ("Found url: %s, hostname: %s, data: %s\n", struri (url), url->host, url->data);
+ }
+
+}
diff --git a/worker.c b/worker.c
index baf7479ff..34a096629 100644
--- a/worker.c
+++ b/worker.c
@@ -384,8 +384,6 @@ process_message (struct worker_task *task)
stream = g_mime_stream_mem_new_with_buffer (task->msg->buf->begin, task->msg->buf->len);
/* create a new parser object to parse the stream */
parser = g_mime_parser_new_with_stream (stream);
- /* create a new parser object to parse the stream */
- parser = g_mime_parser_new_with_stream (stream);
/* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
g_object_unref (stream);