From: Vsevolod Stakhov Date: Mon, 7 Sep 2009 16:11:48 +0000 (+0400) Subject: * Add JSON settings parser X-Git-Tag: 0.2.7~22 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=9af09b7467a4b9b7b2c9fbf1390b481e71bb07c6;p=rspamd.git * Add JSON settings parser --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 50355c61e..7be951bed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -356,6 +356,7 @@ SET(RSPAMDSRC src/modules.c src/view.c src/map.c src/bloom.c + src/settings.c src/symbols_cache.c src/fuzzy_storage.c) @@ -366,6 +367,8 @@ IF(ENABLE_LUA MATCHES "ON") ADD_SUBDIRECTORY(src/lua) ENDIF(ENABLE_LUA MATCHES "ON") +ADD_SUBDIRECTORY(src/json) + SET(TOKENIZERSSRC src/tokenizers/tokenizers.c src/tokenizers/osb.c) @@ -471,6 +474,7 @@ IF(LIBUTIL_LIBRARY) TARGET_LINK_LIBRARIES(rspamd util) ENDIF(LIBUTIL_LIBRARY) TARGET_LINK_LIBRARIES(rspamd event) +TARGET_LINK_LIBRARIES(rspamd rspamd_json) TARGET_LINK_LIBRARIES(rspamd ${GLIB2_LIBRARIES}) TARGET_LINK_LIBRARIES(rspamd ${GMIME2_LIBRARIES}) diff --git a/src/cfg_file.h b/src/cfg_file.h index 2b67f8c72..3e932fc10 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -227,6 +227,8 @@ struct config_file { GHashTable* cfg_params; /**< all cfg params indexed by its name in this structure */ int clock_res; /**< resolution of clock used */ GList *views; /**< views */ + GHashTable* domain_settings; /**< settings per-domains */ + GHashTable* user_settings; /**< settings per-user */ }; /** diff --git a/src/json/CMakeLists.txt b/src/json/CMakeLists.txt new file mode 100644 index 000000000..e0f33c502 --- /dev/null +++ b/src/json/CMakeLists.txt @@ -0,0 +1,9 @@ +# Json support makefile +SET(JSONSRC dump.c + hashtable.c + load.c + strbuffer.c + utf.c + value.c) + +ADD_LIBRARY(rspamd_json STATIC ${JSONSRC}) diff --git a/src/json/dump.c b/src/json/dump.c new file mode 100644 index 000000000..31ad41586 --- /dev/null +++ b/src/json/dump.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "jansson.h" +#include "strbuffer.h" + +typedef int (*dump_func) (const char *buffer, int size, void *data); + +struct string { + char *buffer; + int length; + int size; +}; + +static int +dump_to_strbuffer (const char *buffer, int size, void *data) +{ + return strbuffer_append_bytes ((strbuffer_t *) data, buffer, size); +} + +static int +dump_to_file (const char *buffer, int size, void *data) +{ + FILE *dest = (FILE *) data; + if (fwrite (buffer, size, 1, dest) != 1) + return -1; + return 0; +} + +/* 256 spaces (the maximum indentation size) */ +static char whitespace[] = + " "; + +static int +dump_indent (uint32_t flags, int depth, dump_func dump, void *data) +{ + if (JSON_INDENT (flags) > 0) { + int i, ws_count = JSON_INDENT (flags); + + if (dump ("\n", 1, data)) + return -1; + + for (i = 0; i < depth; i++) { + if (dump (whitespace, ws_count, data)) + return -1; + } + } + return 0; +} + +static int +dump_string (const char *str, dump_func dump, void *data) +{ + const char *end; + + if (dump ("\"", 1, data)) + return -1; + + end = str; + while (1) { + const char *text; + char seq[7]; + int length; + + while (*end && *end != '\\' && *end != '"' && (*end < 0 || *end > 0x1F)) + end++; + + if (end != str) { + if (dump (str, end - str, data)) + return -1; + } + + if (!*end) + break; + + /* handle \, ", and control codes */ + length = 2; + switch (*end) { + case '\\': + text = "\\\\"; + break; + case '\"': + text = "\\\""; + break; + case '\b': + text = "\\b"; + break; + case '\f': + text = "\\f"; + break; + case '\n': + text = "\\n"; + break; + case '\r': + text = "\\r"; + break; + case '\t': + text = "\\t"; + break; + default: + { + sprintf (seq, "\\u00%02x", *end); + text = seq; + length = 6; + break; + } + } + + if (dump (text, length, data)) + return -1; + + end++; + str = end; + } + + return dump ("\"", 1, data); +} + +static int +do_dump (const json_t * json, uint32_t flags, int depth, + dump_func dump, void *data) +{ + switch (json_typeof (json)) { + case JSON_NULL: + return dump ("null", 4, data); + + case JSON_TRUE: + return dump ("true", 4, data); + + case JSON_FALSE: + return dump ("false", 5, data); + + case JSON_INTEGER: + { + char *buffer; + int size, ret; + + size = asprintf (&buffer, "%d", json_integer_value (json)); + if (size == -1) + return -1; + + ret = dump (buffer, size, data); + g_free (buffer); + return ret; + } + + case JSON_REAL: + { + char *buffer; + int size, ret; + + size = asprintf (&buffer, "%.17f", json_real_value (json)); + if (size == -1) + return -1; + + ret = dump (buffer, size, data); + g_free (buffer); + return ret; + } + + case JSON_STRING: + return dump_string (json_string_value (json), dump, data); + + case JSON_ARRAY: + { + int i; + int n = json_array_size (json); + + if (dump ("[", 1, data)) + return -1; + if (n == 0) + return dump ("]", 1, data); + if (dump_indent (flags, depth + 1, dump, data)) + return -1; + + for (i = 0; i < n; ++i) { + if (do_dump (json_array_get (json, i), flags, depth + 1, + dump, data)) + return -1; + + if (i < n - 1) { + if (dump (",", 1, data) || + dump_indent (flags, depth + 1, dump, data)) + return -1; + } + else { + if (dump_indent (flags, depth, dump, data)) + return -1; + } + } + return dump ("]", 1, data); + } + + case JSON_OBJECT: + { + void *iter = json_object_iter ((json_t *) json); + + if (dump ("{", 1, data)) + return -1; + if (!iter) + return dump ("}", 1, data); + if (dump_indent (flags, depth + 1, dump, data)) + return -1; + + while (iter) { + void *next = + json_object_iter_next ((json_t *) json, iter); + + dump_string (json_object_iter_key (iter), dump, data); + if (dump (": ", 2, data) || + do_dump (json_object_iter_value (iter), flags, depth + 1, + dump, data)) + return -1; + + if (next) { + if (dump (",", 1, data) || + dump_indent (flags, depth + 1, dump, data)) + return -1; + } + else { + if (dump_indent (flags, depth, dump, data)) + return -1; + } + + iter = next; + } + return dump ("}", 1, data); + } + + default: + /* not reached */ + return -1; + } +} + + +char * +json_dumps (const json_t * json, uint32_t flags) +{ + strbuffer_t strbuff; + char *result; + + if (!json_is_array (json) && !json_is_object (json)) + return NULL; + + if (strbuffer_init (&strbuff)) + return NULL; + + if (do_dump (json, flags, 0, dump_to_strbuffer, (void *)&strbuff)) + return NULL; + + if (dump_to_strbuffer ("\n", 1, (void *)&strbuff)) + return NULL; + + result = strdup (strbuffer_value (&strbuff)); + strbuffer_close (&strbuff); + + return result; +} + +int +json_dumpf (const json_t * json, FILE * output, uint32_t flags) +{ + if (!json_is_array (json) && !json_is_object (json)) + return -1; + + if (do_dump (json, flags, 0, dump_to_file, (void *)output)) + return -1; + return dump_to_file ("\n", 1, (void *)output); +} + +int +json_dump_file (const json_t * json, const char *path, uint32_t flags) +{ + int result; + + FILE *output = fopen (path, "w"); + if (!output) + return -1; + + result = json_dumpf (json, output, flags); + + fclose (output); + return result; +} diff --git a/src/json/hashtable.c b/src/json/hashtable.c new file mode 100644 index 000000000..b3ea0d62a --- /dev/null +++ b/src/json/hashtable.c @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "../config.h" +#include "hashtable.h" + +typedef struct hashtable_list list_t; +typedef struct hashtable_pair pair_t; +typedef struct hashtable_bucket bucket_t; + +#define container_of(ptr_, type_, member_) \ + ((type_ *)((char *)ptr_ - (size_t)&((type_ *)0)->member_)) + +#define list_to_pair(list_) container_of(list_, pair_t, list) + +static inline void +list_init (list_t * list) +{ + list->next = list; + list->prev = list; +} + +static inline void +list_insert (list_t * list, list_t * node) +{ + node->next = list; + node->prev = list->prev; + list->prev->next = node; + list->prev = node; +} + +static inline void +list_remove (list_t * list) +{ + list->prev->next = list->next; + list->next->prev = list->prev; +} + +static inline int +bucket_is_empty (hashtable_t * hashtable, bucket_t * bucket) +{ + return bucket->first == &hashtable->list && bucket->first == bucket->last; +} + +static void +insert_to_bucket (hashtable_t * hashtable, bucket_t * bucket, list_t * list) +{ + if (bucket_is_empty (hashtable, bucket)) { + list_insert (&hashtable->list, list); + bucket->first = bucket->last = list; + } + else { + list_insert (bucket->first, list); + bucket->first = list; + } +} + +static unsigned int primes[] = { + 5, 13, 23, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, + 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, + 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, + 805306457, 1610612741 +}; + +static const unsigned int num_primes = sizeof (primes) / sizeof (unsigned int); + +static inline unsigned int +num_buckets (hashtable_t * hashtable) +{ + return primes[hashtable->num_buckets]; +} + + +static pair_t * +hashtable_find_pair (hashtable_t * hashtable, bucket_t * bucket, + const void *key, unsigned int hash) +{ + list_t *list; + pair_t *pair; + + if (bucket_is_empty (hashtable, bucket)) + return NULL; + + list = bucket->first; + while (1) { + pair = list_to_pair (list); + if (pair->hash == hash && hashtable->cmp_keys (pair->key, key)) + return pair; + + if (list == bucket->last) + break; + + list = list->next; + } + + return NULL; +} + +/* returns 0 on success, -1 if key was not found */ +static int +hashtable_do_del (hashtable_t * hashtable, const void *key, unsigned int hash) +{ + pair_t *pair; + bucket_t *bucket; + unsigned int index; + + index = hash % num_buckets (hashtable); + bucket = &hashtable->buckets[index]; + + pair = hashtable_find_pair (hashtable, bucket, key, hash); + if (!pair) + return -1; + + if (&pair->list == bucket->first && &pair->list == bucket->last) + bucket->first = bucket->last = &hashtable->list; + + else if (&pair->list == bucket->first) + bucket->first = pair->list.next; + + else if (&pair->list == bucket->last) + bucket->last = pair->list.prev; + + list_remove (&pair->list); + + if (hashtable->free_key) + hashtable->free_key (pair->key); + if (hashtable->free_value) + hashtable->free_value (pair->value); + + g_free (pair); + hashtable->size--; + + return 0; +} + +static int +hashtable_do_rehash (hashtable_t * hashtable) +{ + list_t *list, *next; + pair_t *pair; + unsigned int i, index, new_size; + + g_free (hashtable->buckets); + + hashtable->num_buckets++; + new_size = num_buckets (hashtable); + + hashtable->buckets = g_malloc (new_size * sizeof (bucket_t)); + if (!hashtable->buckets) + return -1; + + for (i = 0; i < num_buckets (hashtable); i++) { + hashtable->buckets[i].first = hashtable->buckets[i].last = + &hashtable->list; + } + + list = hashtable->list.next; + list_init (&hashtable->list); + + for (; list != &hashtable->list; list = next) { + next = list->next; + pair = list_to_pair (list); + index = pair->hash % new_size; + insert_to_bucket (hashtable, &hashtable->buckets[index], &pair->list); + } + + return 0; +} + + +hashtable_t * +hashtable_create (key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value) +{ + hashtable_t *hashtable = g_malloc (sizeof (hashtable_t)); + if (!hashtable) + return NULL; + + if (hashtable_init (hashtable, hash_key, cmp_keys, free_key, free_value)) { + g_free (hashtable); + return NULL; + } + + return hashtable; +} + +void +hashtable_destroy (hashtable_t * hashtable) +{ + hashtable_close (hashtable); + g_free (hashtable); +} + +int +hashtable_init (hashtable_t * hashtable, + key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value) +{ + unsigned int i; + + hashtable->size = 0; + hashtable->num_buckets = 0; /* index to primes[] */ + hashtable->buckets = g_malloc (num_buckets (hashtable) * sizeof (bucket_t)); + if (!hashtable->buckets) + return -1; + + list_init (&hashtable->list); + + hashtable->hash_key = hash_key; + hashtable->cmp_keys = cmp_keys; + hashtable->free_key = free_key; + hashtable->free_value = free_value; + + for (i = 0; i < num_buckets (hashtable); i++) { + hashtable->buckets[i].first = hashtable->buckets[i].last = + &hashtable->list; + } + + return 0; +} + +void +hashtable_close (hashtable_t * hashtable) +{ + list_t *list, *next; + pair_t *pair; + for (list = hashtable->list.next; list != &hashtable->list; list = next) { + next = list->next; + pair = list_to_pair (list); + if (hashtable->free_key) + hashtable->free_key (pair->key); + if (hashtable->free_value) + hashtable->free_value (pair->value); + g_free (pair); + } + + g_free (hashtable->buckets); +} + +int +hashtable_set (hashtable_t * hashtable, void *key, void *value) +{ + pair_t *pair; + bucket_t *bucket; + unsigned int hash, index; + + hash = hashtable->hash_key (key); + + /* if the key already exists, delete it */ + hashtable_do_del (hashtable, key, hash); + + /* rehash if the load ratio exceeds 1 */ + if (hashtable->size >= num_buckets (hashtable)) + if (hashtable_do_rehash (hashtable)) + return -1; + + pair = g_malloc (sizeof (pair_t)); + if (!pair) + return -1; + + pair->key = key; + pair->value = value; + pair->hash = hash; + list_init (&pair->list); + + index = hash % num_buckets (hashtable); + bucket = &hashtable->buckets[index]; + + insert_to_bucket (hashtable, bucket, &pair->list); + + hashtable->size++; + return 0; +} + +void * +hashtable_get (hashtable_t * hashtable, const void *key) +{ + pair_t *pair; + unsigned int hash; + bucket_t *bucket; + + hash = hashtable->hash_key (key); + bucket = &hashtable->buckets[hash % num_buckets (hashtable)]; + + pair = hashtable_find_pair (hashtable, bucket, key, hash); + if (!pair) + return NULL; + + return pair->value; +} + +int +hashtable_del (hashtable_t * hashtable, const void *key) +{ + unsigned int hash = hashtable->hash_key (key); + return hashtable_do_del (hashtable, key, hash); +} + +void * +hashtable_iter (hashtable_t * hashtable) +{ + return hashtable_iter_next (hashtable, &hashtable->list); +} + +void * +hashtable_iter_next (hashtable_t * hashtable, void *iter) +{ + list_t *list = (list_t *) iter; + if (list->next == &hashtable->list) + return NULL; + return list->next; +} + +void * +hashtable_iter_key (void *iter) +{ + pair_t *pair = list_to_pair ((list_t *) iter); + return pair->key; +} + +void * +hashtable_iter_value (void *iter) +{ + pair_t *pair = list_to_pair ((list_t *) iter); + return pair->value; +} diff --git a/src/json/hashtable.h b/src/json/hashtable.h new file mode 100644 index 000000000..30e52daa8 --- /dev/null +++ b/src/json/hashtable.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef HASHTABLE_H +#define HASHTABLE_H + +#include "../config.h" + +typedef unsigned int (*key_hash_fn)(const void *key); +typedef int (*key_cmp_fn)(const void *key1, const void *key2); +typedef void (*free_fn)(void *key); + +struct hashtable_list { + struct hashtable_list *prev; + struct hashtable_list *next; +}; + +struct hashtable_pair { + void *key; + void *value; + unsigned int hash; + struct hashtable_list list; +}; + +struct hashtable_bucket { + struct hashtable_list *first; + struct hashtable_list *last; +}; + +typedef struct hashtable { + unsigned int size; + struct hashtable_bucket *buckets; + unsigned int num_buckets; /* index to primes[] */ + struct hashtable_list list; + + key_hash_fn hash_key; + key_cmp_fn cmp_keys; /* returns non-zero for equal keys */ + free_fn free_key; + free_fn free_value; +} hashtable_t; + +/** + * hashtable_create - Create a hashtable object + * + * @hash_key: The key hashing function + * @cmp_keys: The key compare function. Returns non-zero for equal and + * zero for unequal unequal keys + * @free_key: If non-NULL, called for a key that is no longer referenced. + * @free_value: If non-NULL, called for a value that is no longer referenced. + * + * Returns a new hashtable object that should be freed with + * hashtable_destroy when it's no longer used, or NULL on failure (out + * of memory). + */ +hashtable_t *hashtable_create(key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value); + +/** + * hashtable_destroy - Destroy a hashtable object + * + * @hashtable: The hashtable + * + * Destroys a hashtable created with hashtable_create(). + */ +void hashtable_destroy(hashtable_t *hashtable); + +/** + * hashtable_init - Initialize a hashtable object + * + * @hashtable: The (statically allocated) hashtable object + * @hash_key: The key hashing function + * @cmp_keys: The key compare function. Returns non-zero for equal and + * zero for unequal unequal keys + * @free_key: If non-NULL, called for a key that is no longer referenced. + * @free_value: If non-NULL, called for a value that is no longer referenced. + * + * Initializes a statically allocated hashtable object. The object + * should be cleared with hashtable_close when it's no longer used. + * + * Returns 0 on success, -1 on error (out of memory). + */ +int hashtable_init(hashtable_t *hashtable, + key_hash_fn hash_key, key_cmp_fn cmp_keys, + free_fn free_key, free_fn free_value); + +/** + * hashtable_close - Release all resources used by a hashtable object + * + * @hashtable: The hashtable + * + * Destroys a statically allocated hashtable object. + */ +void hashtable_close(hashtable_t *hashtable); + +/** + * hashtable_set - Add/modify value in hashtable + * + * @hashtable: The hashtable object + * @key: The key + * @value: The value + * + * If a value with the given key already exists, its value is replaced + * with the new value. + * + * Key and value are "stealed" in the sense that hashtable frees them + * automatically when they are no longer used. The freeing is + * accomplished by calling free_key and free_value functions that were + * supplied to hashtable_new. In case one or both of the free + * functions is NULL, the corresponding item is not "stealed". + * + * Returns 0 on success, -1 on failure (out of memory). + */ +int hashtable_set(hashtable_t *hashtable, void *key, void *value); + +/** + * hashtable_get - Get a value associated with a key + * + * @hashtable: The hashtable object + * @key: The key + * + * Returns value if it is found, or NULL otherwise. + */ +void *hashtable_get(hashtable_t *hashtable, const void *key); + +/** + * hashtable_del - Remove a value from the hashtable + * + * @hashtable: The hashtable object + * @key: The key + * + * Returns 0 on success, or -1 if the key was not found. + */ +int hashtable_del(hashtable_t *hashtable, const void *key); + +/** + * hashtable_iter - Iterate over hashtable + * + * @hashtable: The hashtable object + * + * Returns an opaque iterator to the first element in the hashtable. + * The iterator should be passed to hashtable_iter_* functions. + * The hashtable items are not iterated over in any particular order. + * + * There's no need to free the iterator in any way. The iterator is + * valid as long as the item that is referenced by the iterator is not + * deleted. Other values may be added or deleted. In particular, + * hashtable_iter_next() may be called on an iterator, and after that + * the key/value pair pointed by the old iterator may be deleted. + */ +void *hashtable_iter(hashtable_t *hashtable); + +/** + * hashtable_iter_next - Advance an iterator + * + * @hashtable: The hashtable object + * @iter: The iterator + * + * Returns a new iterator pointing to the next element in the + * hashtable or NULL if the whole hastable has been iterated over. + */ +void *hashtable_iter_next(hashtable_t *hashtable, void *iter); + +/** + * hashtable_iter_key - Retrieve the key pointed by an iterator + * + * @iter: The iterator + */ +void *hashtable_iter_key(void *iter); + +/** + * hashtable_iter_value - Retrieve the value pointed by an iterator + * + * @iter: The iterator + */ +void *hashtable_iter_value(void *iter); + +#endif diff --git a/src/json/jansson.h b/src/json/jansson.h new file mode 100644 index 000000000..6a2047db3 --- /dev/null +++ b/src/json/jansson.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef JANSSON_H +#define JANSSON_H + +#include "../config.h" + +/* types */ + +typedef enum { + JSON_OBJECT, + JSON_ARRAY, + JSON_STRING, + JSON_INTEGER, + JSON_REAL, + JSON_TRUE, + JSON_FALSE, + JSON_NULL +} json_type; + +typedef struct { + json_type type; + unsigned long refcount; +} json_t; + +#define json_typeof(json) ((json)->type) +#define json_is_object(json) (json && json_typeof(json) == JSON_OBJECT) +#define json_is_array(json) (json && json_typeof(json) == JSON_ARRAY) +#define json_is_string(json) (json && json_typeof(json) == JSON_STRING) +#define json_is_integer(json) (json && json_typeof(json) == JSON_INTEGER) +#define json_is_real(json) (json && json_typeof(json) == JSON_REAL) +#define json_is_number(json) (json_is_integer(json) || json_is_real(json)) +#define json_is_true(json) (json && json_typeof(json) == JSON_TRUE) +#define json_is_false(json) (json && json_typeof(json) == JSON_FALSE) +#define json_is_boolean(json) (json_is_true(json) || json_is_false(json)) +#define json_is_null(json) (json && json_typeof(json) == JSON_NULL) + +/* construction, destruction, reference counting */ + +json_t *json_object(void); +json_t *json_array(void); +json_t *json_string(const char *value); +json_t *json_integer(int value); +json_t *json_real(double value); +json_t *json_true(void); +json_t *json_false(void); +json_t *json_null(void); + +static inline json_t *json_incref(json_t *json) +{ + if(json) + ++json->refcount; + return json; +} + +/* do not call json_delete directly */ +void json_delete(json_t *json); + +static inline void json_decref(json_t *json) +{ + if(json && --json->refcount == 0) + json_delete(json); +} + + +/* getters, setters, manipulation */ + +json_t *json_object_get(const json_t *object, const char *key); +int json_object_set_new(json_t *object, const char *key, json_t *value); +int json_object_del(json_t *object, const char *key); +void *json_object_iter(json_t *object); +void *json_object_iter_next(json_t *object, void *iter); +const char *json_object_iter_key(void *iter); +json_t *json_object_iter_value(void *iter); + +static inline +int json_object_set(json_t *object, const char *key, json_t *value) +{ + return json_object_set_new(object, key, json_incref(value)); +} + +unsigned int json_array_size(const json_t *array); +json_t *json_array_get(const json_t *array, unsigned int index); +int json_array_set_new(json_t *array, unsigned int index, json_t *value); +int json_array_append_new(json_t *array, json_t *value); + +static inline +int json_array_set(json_t *array, unsigned int index, json_t *value) +{ + return json_array_set_new(array, index, json_incref(value)); +} + +static inline +int json_array_append(json_t *array, json_t *value) +{ + return json_array_append_new(array, json_incref(value)); +} + + +const char *json_string_value(const json_t *json); +int json_integer_value(const json_t *json); +double json_real_value(const json_t *json); +double json_number_value(const json_t *json); + + +/* loading, printing */ + +#define JSON_ERROR_TEXT_LENGTH 160 + +typedef struct { + char text[JSON_ERROR_TEXT_LENGTH]; + int line; +} json_error_t; + +json_t *json_loads(const char *input, json_error_t *error); +json_t *json_loadf(FILE *input, json_error_t *error); +json_t *json_load_file(const char *path, json_error_t *error); + +#define JSON_INDENT(n) (n & 0xFF) + +char *json_dumps(const json_t *json, uint32_t flags); +int json_dumpf(const json_t *json, FILE *output, uint32_t flags); +int json_dump_file(const json_t *json, const char *path, uint32_t flags); + +#endif diff --git a/src/json/jansson_private.h b/src/json/jansson_private.h new file mode 100644 index 000000000..ad8419aa1 --- /dev/null +++ b/src/json/jansson_private.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef JANSSON_PRIVATE_H +#define JANSSON_PRIVATE_H + +int json_object_set_nocheck(json_t *json, const char *key, json_t *value); +json_t *json_string_nocheck(const char *value); + + +#endif diff --git a/src/json/load.c b/src/json/load.c new file mode 100644 index 000000000..6c6cda59e --- /dev/null +++ b/src/json/load.c @@ -0,0 +1,867 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "jansson.h" +#include "jansson_private.h" +#include "strbuffer.h" +#include "utf.h" + +#define TOKEN_INVALID -1 +#define TOKEN_EOF 0 +#define TOKEN_STRING 256 +#define TOKEN_INTEGER 257 +#define TOKEN_REAL 258 +#define TOKEN_TRUE 259 +#define TOKEN_FALSE 260 +#define TOKEN_NULL 261 + +/* read one byte from stream, return EOF on end of file */ +typedef int (*get_func) (void *data); + +/* return non-zero if end of file has been reached */ +typedef int (*eof_func) (void *data); + +typedef struct { + get_func get; + eof_func eof; + void *data; + int stream_pos; + char buffer[5]; + int buffer_pos; +} stream_t; + + +typedef struct { + stream_t stream; + strbuffer_t saved_text; + int token; + int line, column; + union { + char *string; + int integer; + double real; + } value; +} lex_t; + + +/*** error reporting ***/ + +static void +error_init (json_error_t * error) +{ + if (error) { + error->text[0] = '\0'; + error->line = -1; + } +} + +static void +error_set (json_error_t * error, const lex_t * lex, const char *msg, ...) +{ + va_list ap; + char text[JSON_ERROR_TEXT_LENGTH]; + + if (!error || error->text[0] != '\0') { + /* error already set */ + return; + } + + va_start (ap, msg); + vsnprintf (text, JSON_ERROR_TEXT_LENGTH, msg, ap); + va_end (ap); + + if (lex) { + const char *saved_text = strbuffer_value (&lex->saved_text); + error->line = lex->line; + if (saved_text && saved_text[0]) { + if (lex->saved_text.length <= 20) { + snprintf (error->text, JSON_ERROR_TEXT_LENGTH, + "%s near '%s'", text, saved_text); + } + else + snprintf (error->text, JSON_ERROR_TEXT_LENGTH, "%s", text); + } + else { + snprintf (error->text, JSON_ERROR_TEXT_LENGTH, + "%s near end of file", text); + } + } + else { + error->line = -1; + snprintf (error->text, JSON_ERROR_TEXT_LENGTH, "%s", text); + } +} + + +/*** lexical analyzer ***/ + +void +stream_init (stream_t * stream, get_func get, eof_func eof, void *data) +{ + stream->get = get; + stream->eof = eof; + stream->data = data; + stream->stream_pos = 0; + stream->buffer[0] = '\0'; + stream->buffer_pos = 0; +} + +static char +stream_get (stream_t * stream, json_error_t * error) +{ + char c; + + if (!stream->buffer[stream->buffer_pos]) { + stream->buffer[0] = stream->get (stream->data); + stream->buffer_pos = 0; + + c = stream->buffer[0]; + + if (c == EOF && stream->eof (stream->data)) + return EOF; + + if (c < 0) { + /* multi-byte UTF-8 sequence */ + int i, count; + + count = utf8_check_first (c); + if (!count) + goto out; + + g_assert (count >= 2); + + for (i = 1; i < count; i++) + stream->buffer[i] = stream->get (stream->data); + + if (!utf8_check_full (stream->buffer, count)) + goto out; + + stream->stream_pos += count; + stream->buffer[count] = '\0'; + } + else { + stream->buffer[1] = '\0'; + stream->stream_pos++; + } + } + + return stream->buffer[stream->buffer_pos++]; + + out: + error_set (error, NULL, "unable to decode byte 0x%x at position %d", + (unsigned char)c, stream->stream_pos); + + stream->buffer[0] = EOF; + stream->buffer[1] = '\0'; + stream->buffer_pos = 1; + + return EOF; +} + +static void +stream_unget (stream_t * stream, char c) +{ + g_assert (stream->buffer_pos > 0); + stream->buffer_pos--; + g_assert (stream->buffer[stream->buffer_pos] == c); +} + + +static int +lex_get (lex_t * lex, json_error_t * error) +{ + return stream_get (&lex->stream, error); +} + +static int +lex_eof (lex_t * lex) +{ + return lex->stream.eof (lex->stream.data); +} + +static void +lex_save (lex_t * lex, char c) +{ + strbuffer_append_byte (&lex->saved_text, c); +} + +static int +lex_get_save (lex_t * lex, json_error_t * error) +{ + char c = stream_get (&lex->stream, error); + lex_save (lex, c); + return c; +} + +static void +lex_unget_unsave (lex_t * lex, char c) +{ + char d; + stream_unget (&lex->stream, c); + d = strbuffer_pop (&lex->saved_text); + g_assert (c == d); +} + +static void +lex_save_cached (lex_t * lex) +{ + while (lex->stream.buffer[lex->stream.buffer_pos] != '\0') { + lex_save (lex, lex->stream.buffer[lex->stream.buffer_pos]); + lex->stream.buffer_pos++; + } +} + +/* assumes that str points to 'u' plus at least 4 valid hex digits */ +static int +decode_unicode_escape (const char *str) +{ + int i; + int value = 0; + + g_assert (str[0] == 'u'); + + for (i = 1; i <= 4; i++) { + char c = str[i]; + value <<= 4; + if (g_ascii_isdigit (c)) + value += c - '0'; + else if (g_ascii_islower (c)) + value += c - 'a' + 10; + else if (g_ascii_isupper (c)) + value += c - 'A' + 10; + else + g_assert (0); + } + + return value; +} + +static void +lex_scan_string (lex_t * lex, json_error_t * error) +{ + char c; + const char *p; + char *t; + int i; + + lex->value.string = NULL; + lex->token = TOKEN_INVALID; + + /* skip the " */ + c = lex_get_save (lex, error); + + while (c != '"') { + if (c == EOF) { + if (lex_eof (lex)) + error_set (error, lex, "premature end of input"); + goto out; + } + + else if (0 <= c && c <= 0x1F) { + /* control character */ + lex_unget_unsave (lex, c); + if (c == '\n') + error_set (error, lex, "unexpected newline", c); + else + error_set (error, lex, "control character 0x%x", c); + goto out; + } + + else if (c == '\\') { + c = lex_get_save (lex, error); + if (c == 'u') { + c = lex_get_save (lex, error); + for (i = 0; i < 4; i++) { + if (!g_ascii_isxdigit (c)) { + lex_unget_unsave (lex, c); + error_set (error, lex, "invalid escape"); + goto out; + } + c = lex_get_save (lex, error); + } + } + else if (c == '"' || c == '\\' || c == '/' || c == 'b' || + c == 'f' || c == 'n' || c == 'r' || c == 't') + c = lex_get_save (lex, error); + else { + lex_unget_unsave (lex, c); + error_set (error, lex, "invalid escape"); + goto out; + } + } + else + c = lex_get_save (lex, error); + } + + /* the actual value is at most of the same length as the source + string, because: + - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte + - a single \uXXXX escape (length 6) is converted to at most 3 bytes + - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair + are converted to 4 bytes + */ + lex->value.string = g_malloc (lex->saved_text.length + 1); + if (!lex->value.string) { + /* this is not very nice, since TOKEN_INVALID is returned */ + goto out; + } + + /* the target */ + t = lex->value.string; + + /* + 1 to skip the " */ + p = strbuffer_value (&lex->saved_text) + 1; + + while (*p != '"') { + if (*p == '\\') { + p++; + if (*p == 'u') { + char buffer[4]; + int length; + int value; + + value = decode_unicode_escape (p); + p += 5; + + if (0xD800 <= value && value <= 0xDBFF) { + /* surrogate pair */ + if (*p == '\\' && *(p + 1) == 'u') { + int value2 = decode_unicode_escape (++p); + p += 5; + + if (0xDC00 <= value2 && value2 <= 0xDFFF) { + /* valid second surrogate */ + value = ((value - 0xD800) << 10) + + (value2 - 0xDC00) + 0x10000; + } + else { + /* invalid second surrogate */ + error_set (error, lex, + "invalid Unicode '\\u%04X\\u%04X'", + value, value2); + goto out; + } + } + else { + /* no second surrogate */ + error_set (error, lex, "invalid Unicode '\\u%04X'", + value); + goto out; + } + } + else if (0xDC00 <= value && value <= 0xDFFF) { + error_set (error, lex, "invalid Unicode '\\u%04X'", value); + goto out; + } + else if (value == 0) { + error_set (error, lex, "\\u0000 is not allowed"); + goto out; + } + + if (utf8_encode (value, buffer, &length)) + g_assert (0); + + memcpy (t, buffer, length); + t += length; + } + else { + switch (*p) { + case '"': + case '\\': + case '/': + *t = *p; + break; + case 'b': + *t = '\b'; + break; + case 'f': + *t = '\f'; + break; + case 'n': + *t = '\n'; + break; + case 'r': + *t = '\r'; + break; + case 't': + *t = '\t'; + break; + default: + g_assert (0); + } + t++; + p++; + } + } + else + *(t++) = *(p++); + } + *t = '\0'; + lex->token = TOKEN_STRING; + return; + + out: + g_free (lex->value.string); +} + +static void +lex_scan_number (lex_t * lex, char c, json_error_t * error) +{ + const char *saved_text; + char *end; + + lex->token = TOKEN_INVALID; + + if (c == '-') + c = lex_get_save (lex, error); + + if (c == '0') { + c = lex_get_save (lex, error); + if (g_ascii_isdigit (c)) { + lex_unget_unsave (lex, c); + goto out; + } + } + else { /* c != '0' */ + + c = lex_get_save (lex, error); + while (g_ascii_isdigit (c)) + c = lex_get_save (lex, error); + } + + if (c != '.' && c != 'E' && c != 'e') { + lex_unget_unsave (lex, c); + lex->token = TOKEN_INTEGER; + + saved_text = strbuffer_value (&lex->saved_text); + lex->value.integer = strtol (saved_text, &end, 10); + g_assert (end == saved_text + lex->saved_text.length); + + return; + } + + if (c == '.') { + c = lex_get (lex, error); + if (!g_ascii_isdigit (c)) + goto out; + lex_save (lex, c); + + c = lex_get_save (lex, error); + while (g_ascii_isdigit (c)) + c = lex_get_save (lex, error); + } + + if (c == 'E' || c == 'e') { + c = lex_get_save (lex, error); + if (c == '+' || c == '-') + c = lex_get_save (lex, error); + + if (!g_ascii_isdigit (c)) { + lex_unget_unsave (lex, c); + goto out; + } + + c = lex_get_save (lex, error); + while (g_ascii_isdigit (c)) + c = lex_get_save (lex, error); + } + + lex_unget_unsave (lex, c); + lex->token = TOKEN_REAL; + + saved_text = strbuffer_value (&lex->saved_text); + lex->value.real = strtod (saved_text, &end); + g_assert (end == saved_text + lex->saved_text.length); + + out: + return; +} + +static int +lex_scan (lex_t * lex, json_error_t * error) +{ + char c; + + strbuffer_clear (&lex->saved_text); + + if (lex->token == TOKEN_STRING) { + g_free (lex->value.string); + lex->value.string = NULL; + } + + c = lex_get (lex, error); + while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + if (c == '\n') + lex->line++; + + c = lex_get (lex, error); + } + + if (c == EOF) { + if (lex_eof (lex)) + lex->token = TOKEN_EOF; + else + lex->token = TOKEN_INVALID; + goto out; + } + + lex_save (lex, c); + + if (c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') + lex->token = c; + + else if (c == '"') + lex_scan_string (lex, error); + + else if (g_ascii_isdigit (c) || c == '-') + lex_scan_number (lex, c, error); + + else if (g_ascii_isupper (c) || g_ascii_islower (c)) { + /* eat up the whole identifier for clearer error messages */ + const char *saved_text; + + c = lex_get_save (lex, error); + while (g_ascii_isupper (c) || g_ascii_islower (c)) + c = lex_get_save (lex, error); + lex_unget_unsave (lex, c); + + saved_text = strbuffer_value (&lex->saved_text); + + if (strcmp (saved_text, "true") == 0) + lex->token = TOKEN_TRUE; + else if (strcmp (saved_text, "false") == 0) + lex->token = TOKEN_FALSE; + else if (strcmp (saved_text, "null") == 0) + lex->token = TOKEN_NULL; + else + lex->token = TOKEN_INVALID; + } + + else { + /* save the rest of the input UTF-8 sequence to get an error + message of valid UTF-8 */ + lex_save_cached (lex); + lex->token = TOKEN_INVALID; + } + + out: + return lex->token; +} + +static char * +lex_steal_string (lex_t * lex) +{ + char *result = NULL; + if (lex->token == TOKEN_STRING) { + result = lex->value.string; + lex->value.string = NULL; + } + return result; +} + +static int +lex_init (lex_t * lex, get_func get, eof_func eof, void *data) +{ + stream_init (&lex->stream, get, eof, data); + if (strbuffer_init (&lex->saved_text)) + return -1; + + lex->token = TOKEN_INVALID; + lex->line = 1; + + return 0; +} + +static void +lex_close (lex_t * lex) +{ + if (lex->token == TOKEN_STRING) + g_free (lex->value.string); + strbuffer_close (&lex->saved_text); +} + + +/*** parser ***/ + +static json_t *parse_value (lex_t * lex, json_error_t * error); + +static json_t * +parse_object (lex_t * lex, json_error_t * error) +{ + json_t *object = json_object (); + if (!object) + return NULL; + + lex_scan (lex, error); + if (lex->token == '}') + return object; + + while (1) { + char *key; + json_t *value; + + if (lex->token != TOKEN_STRING) { + error_set (error, lex, "string or '}' expected"); + goto error; + } + + key = lex_steal_string (lex); + if (!key) + return NULL; + + lex_scan (lex, error); + if (lex->token != ':') { + g_free (key); + error_set (error, lex, "':' expected"); + goto error; + } + + lex_scan (lex, error); + value = parse_value (lex, error); + if (!value) { + g_free (key); + goto error; + } + + if (json_object_set_nocheck (object, key, value)) { + g_free (key); + json_decref (value); + goto error; + } + + json_decref (value); + g_free (key); + + lex_scan (lex, error); + if (lex->token != ',') + break; + + lex_scan (lex, error); + } + + if (lex->token != '}') { + error_set (error, lex, "'}' expected"); + goto error; + } + + return object; + + error: + json_decref (object); + return NULL; +} + +static json_t * +parse_array (lex_t * lex, json_error_t * error) +{ + json_t *array = json_array (); + if (!array) + return NULL; + + lex_scan (lex, error); + if (lex->token == ']') + return array; + + while (lex->token) { + json_t *elem = parse_value (lex, error); + if (!elem) + goto error; + + if (json_array_append (array, elem)) { + json_decref (elem); + goto error; + } + json_decref (elem); + + lex_scan (lex, error); + if (lex->token != ',') + break; + + lex_scan (lex, error); + } + + if (lex->token != ']') { + error_set (error, lex, "']' expected"); + goto error; + } + + return array; + + error: + json_decref (array); + return NULL; +} + +static json_t * +parse_value (lex_t * lex, json_error_t * error) +{ + json_t *json; + + switch (lex->token) { + case TOKEN_STRING:{ + json = json_string_nocheck (lex->value.string); + break; + } + + case TOKEN_INTEGER:{ + json = json_integer (lex->value.integer); + break; + } + + case TOKEN_REAL:{ + json = json_real (lex->value.real); + break; + } + + case TOKEN_TRUE: + json = json_true (); + break; + + case TOKEN_FALSE: + json = json_false (); + break; + + case TOKEN_NULL: + json = json_null (); + break; + + case '{': + json = parse_object (lex, error); + break; + + case '[': + json = parse_array (lex, error); + break; + + case TOKEN_INVALID: + error_set (error, lex, "invalid token"); + return NULL; + + default: + error_set (error, lex, "unexpected token"); + return NULL; + } + + if (!json) + return NULL; + + return json; +} + +json_t * +parse_json (lex_t * lex, json_error_t * error) +{ + error_init (error); + + lex_scan (lex, error); + if (lex->token != '[' && lex->token != '{') { + error_set (error, lex, "'[' or '{' expected"); + return NULL; + } + + return parse_value (lex, error); +} + +typedef struct { + const char *data; + int pos; +} string_data_t; + +static int +string_get (void *data) +{ + char c; + string_data_t *stream = (string_data_t *) data; + c = stream->data[stream->pos]; + if (c == '\0') + return EOF; + else { + stream->pos++; + return c; + } +} + +static int +string_eof (void *data) +{ + string_data_t *stream = (string_data_t *) data; + return (stream->data[stream->pos] == '\0'); +} + +json_t * +json_loads (const char *string, json_error_t * error) +{ + lex_t lex; + json_t *result; + + string_data_t stream_data = { + .data = string, + .pos = 0 + }; + + if (lex_init (&lex, string_get, string_eof, (void *)&stream_data)) + return NULL; + + result = parse_json (&lex, error); + if (!result) + goto out; + + lex_scan (&lex, error); + if (lex.token != TOKEN_EOF) { + error_set (error, &lex, "end of file expected"); + json_decref (result); + result = NULL; + } + + out: + lex_close (&lex); + return result; +} + +json_t * +json_loadf (FILE * input, json_error_t * error) +{ + lex_t lex; + json_t *result; + + if (lex_init (&lex, (get_func) fgetc, (eof_func) feof, input)) + return NULL; + + result = parse_json (&lex, error); + if (!result) + goto out; + + lex_scan (&lex, error); + if (lex.token != TOKEN_EOF) { + error_set (error, &lex, "end of file expected"); + json_decref (result); + result = NULL; + } + + out: + lex_close (&lex); + return result; +} + +json_t * +json_load_file (const char *path, json_error_t * error) +{ + json_t *result; + FILE *fp; + + fp = fopen (path, "r"); + if (!fp) { + error_set (error, NULL, "unable to open %s: %s", + path, strerror (errno)); + return NULL; + } + + result = json_loadf (fp, error); + + fclose (fp); + return result; +} diff --git a/src/json/strbuffer.c b/src/json/strbuffer.c new file mode 100644 index 000000000..8ba7faa9a --- /dev/null +++ b/src/json/strbuffer.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "../config.h" +#include "strbuffer.h" +#include "util.h" + +#define STRBUFFER_MIN_SIZE 16 +#define STRBUFFER_FACTOR 2 + +int +strbuffer_init (strbuffer_t * strbuff) +{ + strbuff->size = STRBUFFER_MIN_SIZE; + strbuff->length = 0; + + strbuff->value = g_malloc (strbuff->size); + if (!strbuff->value) + return -1; + + /* initialize to empty */ + strbuff->value[0] = '\0'; + return 0; +} + +void +strbuffer_close (strbuffer_t * strbuff) +{ + g_free (strbuff->value); + strbuff->size = 0; + strbuff->length = 0; + strbuff->value = NULL; +} + +void +strbuffer_clear (strbuffer_t * strbuff) +{ + strbuff->length = 0; + strbuff->value[0] = '\0'; +} + +const char * +strbuffer_value (const strbuffer_t * strbuff) +{ + return strbuff->value; +} + +char * +strbuffer_steal_value (strbuffer_t * strbuff) +{ + char *result = strbuff->value; + strbuffer_init (strbuff); + return result; +} + +int +strbuffer_append (strbuffer_t * strbuff, const char *string) +{ + return strbuffer_append_bytes (strbuff, string, strlen (string)); +} + +int +strbuffer_append_byte (strbuffer_t * strbuff, char byte) +{ + return strbuffer_append_bytes (strbuff, &byte, 1); +} + +int +strbuffer_append_bytes (strbuffer_t * strbuff, const char *data, int size) +{ + if (strbuff->length + size >= strbuff->size) { + strbuff->size = max (strbuff->size * STRBUFFER_FACTOR, + strbuff->length + size + 1); + + strbuff->value = realloc (strbuff->value, strbuff->size); + if (!strbuff->value) + return -1; + } + + memcpy (strbuff->value + strbuff->length, data, size); + strbuff->length += size; + strbuff->value[strbuff->length] = '\0'; + + return 0; +} + +char +strbuffer_pop (strbuffer_t * strbuff) +{ + if (strbuff->length > 0) { + char c = strbuff->value[--strbuff->length]; + strbuff->value[strbuff->length] = '\0'; + return c; + } + else + return '\0'; +} diff --git a/src/json/strbuffer.h b/src/json/strbuffer.h new file mode 100644 index 000000000..4afefa9ff --- /dev/null +++ b/src/json/strbuffer.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef STRBUFFER_H +#define STRBUFFER_H + +typedef struct { + char *value; + int length; /* bytes used */ + int size; /* bytes allocated */ +} strbuffer_t; + +int strbuffer_init(strbuffer_t *strbuff); +void strbuffer_close(strbuffer_t *strbuff); + +void strbuffer_clear(strbuffer_t *strbuff); + +const char *strbuffer_value(const strbuffer_t *strbuff); +char *strbuffer_steal_value(strbuffer_t *strbuff); + +int strbuffer_append(strbuffer_t *strbuff, const char *string); +int strbuffer_append_byte(strbuffer_t *strbuff, char byte); +int strbuffer_append_bytes(strbuffer_t *strbuff, const char *data, int size); + +char strbuffer_pop(strbuffer_t *strbuff); + +#endif diff --git a/src/json/utf.c b/src/json/utf.c new file mode 100644 index 000000000..c9cfeaaef --- /dev/null +++ b/src/json/utf.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include "../config.h" + +int +utf8_encode (int codepoint, char *buffer, int *size) +{ + if (codepoint < 0) + return -1; + else if (codepoint < 0x80) { + buffer[0] = (char)codepoint; + *size = 1; + } + else if (codepoint < 0x800) { + buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6); + buffer[1] = 0x80 + ((codepoint & 0x03F)); + *size = 2; + } + else if (codepoint < 0x10000) { + buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12); + buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6); + buffer[2] = 0x80 + ((codepoint & 0x003F)); + *size = 3; + } + else if (codepoint <= 0x10FFFF) { + buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18); + buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12); + buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6); + buffer[3] = 0x80 + ((codepoint & 0x00003F)); + *size = 4; + } + else + return -1; + + return 0; +} + +int +utf8_check_first (char byte) +{ + unsigned char u = (unsigned char)byte; + + if (u < 0x80) + return 1; + + if (0x80 <= u && u <= 0xBF) { + /* second, third or fourth byte of a multi-byte + sequence, i.e. a "continuation byte" */ + return 0; + } + else if (u == 0xC0 || u == 0xC1) { + /* overlong encoding of an ASCII byte */ + return 0; + } + else if (0xC2 <= u && u <= 0xDF) { + /* 2-byte sequence */ + return 2; + } + + else if (0xE0 <= u && u <= 0xEF) { + /* 3-byte sequence */ + return 3; + } + else if (0xF0 <= u && u <= 0xF4) { + /* 4-byte sequence */ + return 4; + } + else { /* u >= 0xF5 */ + /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid + UTF-8 */ + return 0; + } +} + +int +utf8_check_full (const char *buffer, int size) +{ + int i, value = 0; + unsigned char u = (unsigned char)buffer[0]; + + if (size == 2) { + value = u & 0x1F; + } + else if (size == 3) { + value = u & 0xF; + } + else if (size == 4) { + value = u & 0x7; + } + else + return 0; + + for (i = 1; i < size; i++) { + u = (unsigned char)buffer[i]; + + if (u < 0x80 || u > 0xBF) { + /* not a continuation byte */ + return 0; + } + + value = (value << 6) + (u & 0x3F); + } + + if (value > 0x10FFFF) { + /* not in Unicode range */ + return 0; + } + + else if (0xD800 <= value && value <= 0xDFFF) { + /* invalid code point (UTF-16 surrogate halves) */ + return 0; + } + + else if ((size == 2 && value < 0x80) || + (size == 3 && value < 0x800) || (size == 4 && value < 0x10000)) { + /* overlong encoding */ + return 0; + } + + return 1; +} + +int +utf8_check_string (const char *string, int length) +{ + int i; + + if (length == -1) + length = strlen (string); + + for (i = 0; i < length; i++) { + int count = utf8_check_first (string[i]); + if (count == 0) + return 0; + else if (count > 1) { + if (i + count > length) + return 0; + + if (!utf8_check_full (&string[i], count)) + return 0; + + i += count - 1; + } + } + + return 1; +} diff --git a/src/json/utf.h b/src/json/utf.h new file mode 100644 index 000000000..75d7b6eb8 --- /dev/null +++ b/src/json/utf.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef UTF_H +#define UTF_H + +int utf8_encode(int codepoint, char *buffer, int *size); + +int utf8_check_first(char byte); +int utf8_check_full(const char *buffer, int size); + +int utf8_check_string(const char *string, int length); + +#endif diff --git a/src/json/util.h b/src/json/util.h new file mode 100644 index 000000000..66066c52c --- /dev/null +++ b/src/json/util.h @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef UTIL_H +#define UTIL_H + +#define max(a, b) ((a) > (b) ? (a) : (b)) + +#endif diff --git a/src/json/value.c b/src/json/value.c new file mode 100644 index 000000000..9270f1f76 --- /dev/null +++ b/src/json/value.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2009 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + + +#include "jansson.h" +#include "hashtable.h" +#include "jansson_private.h" +#include "utf.h" +#include "util.h" + +#define container_of(ptr_, type_, member_) \ + ((type_ *)((char *)ptr_ - (size_t)&((type_ *)0)->member_)) + +typedef struct { + json_t json; + hashtable_t hashtable; +} json_object_t; + +typedef struct { + json_t json; + unsigned int size; + unsigned int entries; + json_t **table; +} json_array_t; + +typedef struct { + json_t json; + char *value; +} json_string_t; + +typedef struct { + json_t json; + double value; +} json_real_t; + +typedef struct { + json_t json; + int value; +} json_integer_t; + +#define json_to_object(json_) container_of(json_, json_object_t, json) +#define json_to_array(json_) container_of(json_, json_array_t, json) +#define json_to_string(json_) container_of(json_, json_string_t, json) +#define json_to_real(json_) container_of(json_, json_real_t, json) +#define json_to_integer(json_) container_of(json_, json_integer_t, json) + +static inline void +json_init (json_t * json, json_type type) +{ + json->type = type; + json->refcount = 1; +} + + +/*** object ***/ + +static unsigned int +hash_string (const void *key) +{ + const char *str = (const char *)key; + unsigned int hash = 5381; + unsigned int c; + + while ((c = (unsigned int)*str)) { + hash = ((hash << 5) + hash) + c; + str++; + } + + return hash; +} + +static int +string_equal (const void *key1, const void *key2) +{ + return strcmp ((const char *)key1, (const char *)key2) == 0; +} + +static void +value_decref (void *value) +{ + json_decref ((json_t *) value); +} + +json_t * +json_object (void) +{ + json_object_t *object = g_malloc (sizeof (json_object_t)); + if (!object) + return NULL; + json_init (&object->json, JSON_OBJECT); + + if (hashtable_init (&object->hashtable, hash_string, string_equal, + g_free, value_decref)) { + g_free (object); + return NULL; + } + return &object->json; +} + +static void +json_delete_object (json_object_t * object) +{ + hashtable_close (&object->hashtable); + g_free (object); +} + +json_t * +json_object_get (const json_t * json, const char *key) +{ + json_object_t *object; + + if (!json_is_object (json)) + return NULL; + + object = json_to_object (json); + return hashtable_get (&object->hashtable, key); +} + +int +json_object_set_new_nocheck (json_t * json, const char *key, json_t * value) +{ + json_object_t *object; + + if (!key || !value) + return -1; + + if (!json_is_object (json)) { + json_decref (value); + return -1; + } + object = json_to_object (json); + + if (hashtable_set (&object->hashtable, strdup (key), value)) { + json_decref (value); + return -1; + } + + return 0; +} + +int +json_object_set_nocheck (json_t * json, const char *key, json_t * value) +{ + return json_object_set_new_nocheck (json, key, json_incref (value)); +} + +int +json_object_set_new (json_t * json, const char *key, json_t * value) +{ + if (!utf8_check_string (key, -1)) { + json_decref (value); + return -1; + } + + return json_object_set_new_nocheck (json, key, value); +} + +int +json_object_del (json_t * json, const char *key) +{ + json_object_t *object; + + if (!json_is_object (json)) + return -1; + + object = json_to_object (json); + return hashtable_del (&object->hashtable, key); +} + +void * +json_object_iter (json_t * json) +{ + json_object_t *object; + + if (!json_is_object (json)) + return NULL; + + object = json_to_object (json); + return hashtable_iter (&object->hashtable); +} + +void * +json_object_iter_next (json_t * json, void *iter) +{ + json_object_t *object; + + if (!json_is_object (json) || iter == NULL) + return NULL; + + object = json_to_object (json); + return hashtable_iter_next (&object->hashtable, iter); +} + +const char * +json_object_iter_key (void *iter) +{ + if (!iter) + return NULL; + + return (const char *)hashtable_iter_key (iter); +} + +json_t * +json_object_iter_value (void *iter) +{ + if (!iter) + return NULL; + + return (json_t *) hashtable_iter_value (iter); +} + + +/*** array ***/ + +json_t * +json_array (void) +{ + json_array_t *array = g_malloc (sizeof (json_array_t)); + if (!array) + return NULL; + json_init (&array->json, JSON_ARRAY); + + array->entries = 0; + array->size = 0; + array->table = NULL; + + return &array->json; +} + +static void +json_delete_array (json_array_t * array) +{ + unsigned int i; + + for (i = 0; i < array->entries; i++) + json_decref (array->table[i]); + + g_free (array->table); + g_free (array); +} + +unsigned int +json_array_size (const json_t * json) +{ + if (!json_is_array (json)) + return 0; + + return json_to_array (json)->entries; +} + +json_t * +json_array_get (const json_t * json, unsigned int index) +{ + json_array_t *array; + if (!json_is_array (json)) + return NULL; + array = json_to_array (json); + + if (index >= array->entries) + return NULL; + + return array->table[index]; +} + +int +json_array_set_new (json_t * json, unsigned int index, json_t * value) +{ + json_array_t *array; + + if (!value) + return -1; + + if (!json_is_array (json)) { + json_decref (value); + return -1; + } + array = json_to_array (json); + + if (index >= array->entries) { + json_decref (value); + return -1; + } + + json_decref (array->table[index]); + array->table[index] = value; + + return 0; +} + +int +json_array_append_new (json_t * json, json_t * value) +{ + json_array_t *array; + + if (!value) + return -1; + + if (!json_is_array (json)) { + json_decref (value); + return -1; + } + array = json_to_array (json); + + if (array->entries == array->size) { + array->size = max (8, array->size * 2); + array->table = realloc (array->table, array->size * sizeof (json_t *)); + if (!array->table) { + json_decref (value); + return -1; + } + } + + array->table[array->entries] = value; + array->entries++; + + return 0; +} + + +/*** string ***/ + +json_t * +json_string_nocheck (const char *value) +{ + json_string_t *string; + + if (!value) + return NULL; + + string = g_malloc (sizeof (json_string_t)); + if (!string) + return NULL; + json_init (&string->json, JSON_STRING); + + string->value = strdup (value); + if (!string->value) { + g_free (string); + return NULL; + } + + return &string->json; +} + +json_t * +json_string (const char *value) +{ + if (!value || !utf8_check_string (value, -1)) + return NULL; + + return json_string_nocheck (value); +} + +const char * +json_string_value (const json_t * json) +{ + if (!json_is_string (json)) + return NULL; + + return json_to_string (json)->value; +} + +static void +json_delete_string (json_string_t * string) +{ + g_free (string->value); + g_free (string); +} + + +/*** integer ***/ + +json_t * +json_integer (int value) +{ + json_integer_t *integer = g_malloc (sizeof (json_integer_t)); + if (!integer) + return NULL; + json_init (&integer->json, JSON_INTEGER); + + integer->value = value; + return &integer->json; +} + +int +json_integer_value (const json_t * json) +{ + if (!json_is_integer (json)) + return 0; + + return json_to_integer (json)->value; +} + +static void +json_delete_integer (json_integer_t * integer) +{ + g_free (integer); +} + + +/*** real ***/ + +json_t * +json_real (double value) +{ + json_real_t *real = g_malloc (sizeof (json_real_t)); + if (!real) + return NULL; + json_init (&real->json, JSON_REAL); + + real->value = value; + return &real->json; +} + +double +json_real_value (const json_t * json) +{ + if (!json_is_real (json)) + return 0; + + return json_to_real (json)->value; +} + +static void +json_delete_real (json_real_t * real) +{ + g_free (real); +} + + +/*** number ***/ + +double +json_number_value (const json_t * json) +{ + if (json_is_integer (json)) + return json_integer_value (json); + else if (json_is_real (json)) + return json_real_value (json); + else + return 0.0; +} + + +/*** simple values ***/ + +json_t * +json_true (void) +{ + static json_t the_true = { + .type = JSON_TRUE, + .refcount = 1 + }; + return json_incref (&the_true); +} + + +json_t * +json_false (void) +{ + static json_t the_false = { + .type = JSON_FALSE, + .refcount = 1 + }; + return json_incref (&the_false); +} + + +json_t * +json_null (void) +{ + static json_t the_null = { + .type = JSON_NULL, + .refcount = 1 + }; + return json_incref (&the_null); +} + + +/*** deletion ***/ + +void +json_delete (json_t * json) +{ + if (json_is_object (json)) + json_delete_object (json_to_object (json)); + + else if (json_is_array (json)) + json_delete_array (json_to_array (json)); + + else if (json_is_string (json)) + json_delete_string (json_to_string (json)); + + else if (json_is_integer (json)) + json_delete_integer (json_to_integer (json)); + + else if (json_is_real (json)) + json_delete_real (json_to_real (json)); + + /* json_delete is not called for true, false or null */ +} diff --git a/src/settings.c b/src/settings.c new file mode 100644 index 000000000..f05f59b45 --- /dev/null +++ b/src/settings.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2009, Rambler media + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Rambler media ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Rambler BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "cfg_file.h" +#include "map.h" +#include "main.h" +#include "settings.h" +#include "json/jansson.h" + +struct json_buf { + GHashTable *table; + u_char *buf; + u_char *pos; + size_t buflen; +}; + +static void +settings_free (gpointer data) +{ + struct rspamd_settings *s = data; + + if (s->statfile_alias) { + g_free (s->statfile_alias); + } + if (s->factors) { + g_hash_table_destroy (s->factors); + } + if (s->metric_scores) { + g_hash_table_destroy (s->metric_scores); + } + g_free (s); +} + + +u_char* +json_read_cb (memory_pool_t *pool, u_char *chunk, size_t len, struct map_cb_data *data) +{ + struct json_buf *jb; + size_t free, off; + + if (data->cur_data == NULL) { + jb = g_malloc (sizeof (struct json_buf)); + jb->table = ((struct json_buf *)data->prev_data)->table; + jb->buf = NULL; + jb->pos = NULL; + data->cur_data = jb; + } + else { + jb = data->cur_data; + } + + if (jb->buf == NULL) { + /* Allocate memory for buffer */ + jb->pos = jb->buf; + jb->buflen = len * 2; + jb->buf = g_malloc (jb->buflen); + } + + off = jb->pos - jb->buf; + free = jb->buflen - off; + + if (free < len) { + jb->buflen = MAX (free * 2, len * 2); + jb->buf = g_realloc (jb->buf, jb->buflen); + jb->pos = jb->buf + off; + } + + memcpy (chunk, jb->pos, len); + jb->pos += len; + + /* Say not to copy any part of this buffer */ + return NULL; +} + +void +json_fin_cb (memory_pool_t *pool, struct map_cb_data *data) +{ + struct json_buf *jb; + int nelts, i; + json_t *js, *cur_elt, *cur_nm, *it_val; + json_error_t je; + struct rspamd_settings *cur_settings; + char *cur_name; + void *json_it; + double *score; + + if (data->prev_data) { + jb = data->prev_data; + /* Clean prev data */ + if (jb->table) { + g_hash_table_remove_all (jb->table); + } + if (jb->buf) { + g_free (jb->buf); + } + g_free (jb->buf); + } + + /* Now parse json */ + /* NULL terminate current buf */ + *jb->pos = '\0'; + + js = json_loads (jb->buf, &je); + if (!js) { + msg_err ("json_fin_cb: cannot load json data: parse error %s, on line %d", je.text, je.line); + return; + } + + if (!json_is_array (js)) { + json_decref (js); + msg_err ("json_fin_cb: loaded json is not an array"); + return; + } + + nelts = json_array_size (js); + for (i = 0; i < nelts; i ++) { + cur_settings = g_malloc (sizeof (struct rspamd_settings)); + cur_settings->metric_scores = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + cur_settings->factors = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + cur_settings->statfile_alias = NULL; + cur_settings->want_spam = FALSE; + + cur_elt = json_array_get (js, i); + if (!cur_elt || !json_is_object (cur_elt)) { + json_decref (js); + msg_err ("json_fin_cb: loaded json is not an object"); + return; + } + cur_nm = json_object_get (cur_elt, "name"); + if (cur_nm == NULL || !json_is_string (cur_nm)) { + json_decref (js); + msg_err ("json_fin_cb: name is not a string or not exists"); + return; + } + cur_name = g_strdup (json_string_value (cur_nm)); + /* Now check other settings */ + /* Statfile */ + cur_nm = json_object_get (cur_elt, "statfile"); + if (cur_nm != NULL && json_is_string (cur_nm)) { + cur_settings->statfile_alias = g_strdup (json_string_value (cur_nm)); + } + /* Factors object */ + cur_nm = json_object_get (cur_elt, "factors"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_string (it_val)) { + g_hash_table_insert (cur_settings->factors, g_strdup (json_object_iter_key (json_it)), + g_strdup (json_string_value (it_val))); + } + json_it = json_object_iter_next(cur_nm, json_it); + } + } + /* Metrics object */ + cur_nm = json_object_get (cur_elt, "metrics"); + if (cur_nm != NULL && json_is_object (cur_nm)) { + json_it = json_object_iter (cur_nm); + while (json_it) { + it_val = json_object_iter_value (json_it); + if (it_val && json_is_number (it_val)) { + score = g_malloc (sizeof (double)); + *score = json_number_value (it_val); + g_hash_table_insert (cur_settings->factors, g_strdup (json_object_iter_key (json_it)), + score); + } + json_it = json_object_iter_next(cur_nm, json_it); + } + } + /* Want spam */ + cur_nm = json_object_get (cur_elt, "want_spam"); + if (cur_nm != NULL) { + if (json_is_true (cur_nm)) { + cur_settings->want_spam = TRUE; + } + } + g_hash_table_insert (((struct json_buf*)data->cur_data)->table, cur_name, cur_settings); + } + json_decref (js); +} + +gboolean +read_settings (const char *path, struct config_file *cfg, GHashTable *table) +{ + struct json_buf *jb = g_malloc (sizeof (struct json_buf)); + + jb->table = table; + jb->buf = NULL; + + if (!add_map (path, json_read_cb, json_fin_cb, (void **)&jb)) { + msg_err ("read_settings: cannot add map %s", path); + return FALSE; + } + + return TRUE; +} + +void +init_settings (struct config_file *cfg) +{ + cfg->domain_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, settings_free); + cfg->user_settings = g_hash_table_new_full (rspamd_strcase_hash, rspamd_strcase_equal, g_free, settings_free); +} + +/* + * vi:ts=4 + */ diff --git a/src/settings.h b/src/settings.h new file mode 100644 index 000000000..8d2214ae1 --- /dev/null +++ b/src/settings.h @@ -0,0 +1,17 @@ +#ifndef RSPAMD_SETTINGS_H +#define RSPAMD_SETTINGS_H + +#include "config.h" +#include "main.h" + +struct rspamd_settings { + GHashTable *metric_scores; /**< hash table of metric scores for this setting */ + GHashTable *factors; /**< hash table of new factors for this setting */ + char *statfile_alias; /**< alias for statfile used */ + gboolean want_spam; /**< if true disable rspamd checks */ +}; + + +int read_settings (const char *path, struct config_file *cfg, GHashTable *table); + +#endif