From 4cd7b6979919dad2dba90c096eb8581bb4266632 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Tue, 28 Apr 2015 14:39:25 +0100 Subject: [PATCH] Import fixes from libucl. --- contrib/libucl/ucl.h | 12 +++ contrib/libucl/ucl_hash.c | 104 ++++++++++++++++++----- contrib/libucl/ucl_internal.h | 1 + contrib/libucl/ucl_parser.c | 153 ++++++++++++++++------------------ contrib/libucl/ucl_util.c | 20 ++++- 5 files changed, 185 insertions(+), 105 deletions(-) diff --git a/contrib/libucl/ucl.h b/contrib/libucl/ucl.h index 823ac8d3b..7262c8e11 100644 --- a/contrib/libucl/ucl.h +++ b/contrib/libucl/ucl.h @@ -145,6 +145,7 @@ typedef enum ucl_emitter { * UCL still has to perform copying implicitly. */ typedef enum ucl_parser_flags { + UCL_PARSER_DEFAULT = 0x0, /**< No special flags */ UCL_PARSER_KEY_LOWERCASE = 0x1, /**< Convert all keys to lower case */ UCL_PARSER_ZEROCOPY = 0x2, /**< Parse input in zero-copy mode if possible */ UCL_PARSER_NO_TIME = 0x4, /**< Do not parse time and treat time values as strings */ @@ -155,6 +156,7 @@ typedef enum ucl_parser_flags { * String conversion flags, that are used in #ucl_object_fromstring_common function. */ typedef enum ucl_string_flags { + UCL_STRING_RAW = 0x0, /**< Treat string as is */ UCL_STRING_ESCAPE = 0x1, /**< Perform JSON escape */ UCL_STRING_TRIM = 0x2, /**< Trim leading and trailing whitespaces */ UCL_STRING_PARSE_BOOLEAN = 0x4, /**< Parse passed string and detect boolean */ @@ -630,6 +632,16 @@ UCL_EXTERN const ucl_object_t* ucl_object_find_keyl (const ucl_object_t *obj, UCL_EXTERN const ucl_object_t *ucl_lookup_path (const ucl_object_t *obj, const char *path); +/** + * Return object identified by object notation string using arbitrary delimiter + * @param obj object to search in + * @param path dot.notation.path to the path to lookup. May use numeric .index on arrays + * @param sep the sepatorator to use in place of . (incase keys have . in them) + * @return object matched the specified path or NULL if path is not found + */ +UCL_EXTERN const ucl_object_t *ucl_lookup_path_char (const ucl_object_t *obj, + const char *path, char sep); + /** * Returns a key of an object as a NULL terminated string * @param obj CL object diff --git a/contrib/libucl/ucl_hash.c b/contrib/libucl/ucl_hash.c index 760952e8d..ddddf1dd0 100644 --- a/contrib/libucl/ucl_hash.c +++ b/contrib/libucl/ucl_hash.c @@ -26,6 +26,8 @@ #include "khash.h" #include "kvec.h" +#include + struct ucl_hash_elt { const ucl_object_t *obj; size_t ar_idx; @@ -37,10 +39,58 @@ struct ucl_hash_struct { bool caseless; }; +static uint64_t +ucl_hash_seed (void) +{ + static uint64_t seed; + + if (seed == 0) { + /* Not very random but can be useful for our purposes */ + seed = time (NULL); + } + + return seed; +} + +static const unsigned char lc_map[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + static inline uint32_t ucl_hash_func (const ucl_object_t *o) { - return XXH32 (o->key, o->keylen, 0xdeadbeef); + return XXH64 (o->key, o->keylen, ucl_hash_seed ()); } static inline int @@ -59,29 +109,43 @@ KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt, 1, static inline uint32_t ucl_hash_caseless_func (const ucl_object_t *o) { - void *xxh = XXH32_init (0xdeadbeef); - char hash_buf[64], *c; - const char *p; - ssize_t remain = o->keylen; - - p = o->key; - c = &hash_buf[0]; - - while (remain > 0) { - *c++ = tolower (*p++); - - if (c - &hash_buf[0] == sizeof (hash_buf)) { - XXH32_update (xxh, hash_buf, sizeof (hash_buf)); - c = &hash_buf[0]; - } - remain --; + unsigned len = o->keylen; + unsigned leftover = o->keylen % 4; + unsigned fp, i; + const uint8_t* s = (const uint8_t*)o->key; + union { + struct { + unsigned char c1, c2, c3, c4; + } c; + uint32_t pp; + } u; + XXH64_state_t st; + + fp = len - leftover; + XXH64_reset (&st, ucl_hash_seed ()); + + for (i = 0; i != fp; i += 4) { + u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3]; + u.c.c1 = lc_map[u.c.c1]; + u.c.c2 = lc_map[u.c.c2]; + u.c.c3 = lc_map[u.c.c3]; + u.c.c4 = lc_map[u.c.c4]; + XXH64_update (&st, &u.pp, sizeof (u)); } - if (c - &hash_buf[0] != 0) { - XXH32_update (xxh, hash_buf, c - &hash_buf[0]); + u.pp = 0; + switch (leftover) { + case 3: + u.c.c3 = lc_map[(unsigned char)s[i++]]; + case 2: + u.c.c2 = lc_map[(unsigned char)s[i++]]; + case 1: + u.c.c1 = lc_map[(unsigned char)s[i]]; + XXH64_update (&st, &u.pp, leftover); + break; } - return XXH32_digest (xxh); + return XXH64_digest (&st); } static inline int diff --git a/contrib/libucl/ucl_internal.h b/contrib/libucl/ucl_internal.h index 7528750ff..0bb225148 100644 --- a/contrib/libucl/ucl_internal.h +++ b/contrib/libucl/ucl_internal.h @@ -193,6 +193,7 @@ struct ucl_parser { int flags; ucl_object_t *top_obj; ucl_object_t *cur_obj; + ucl_object_t *trash_objs; char *cur_file; struct ucl_macro *macroes; struct ucl_stack *stack; diff --git a/contrib/libucl/ucl_parser.c b/contrib/libucl/ucl_parser.c index 8036229d4..4b5005be6 100644 --- a/contrib/libucl/ucl_parser.c +++ b/contrib/libucl/ucl_parser.c @@ -1001,7 +1001,7 @@ ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, */ static bool ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, - bool *next_key, bool *end_of_object, bool *skip_value) + bool *next_key, bool *end_of_object) { const unsigned char *p, *c = NULL, *end, *t; const char *key = NULL; @@ -1220,9 +1220,12 @@ ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, ucl_parser_append_elt (parser, container, tobj, nobj); } else if (priold > prinew) { - ucl_object_unref (nobj); - *skip_value = true; - return true; + /* + * We add this new object to a list of trash objects just to ensure + * that it won't come to any real object + * XXX: rather inefficient approach + */ + DL_APPEND (parser->trash_objs, nobj); } else { ucl_hash_replace (container, tobj, nobj); @@ -1236,7 +1239,6 @@ ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, parser->stack->obj->value.ov = container; parser->cur_obj = nobj; - *skip_value = false; return true; } @@ -1249,8 +1251,7 @@ ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, */ static bool ucl_parse_string_value (struct ucl_parser *parser, - struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape, - bool skip_value) + struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) { const unsigned char *p; enum { @@ -1397,8 +1398,7 @@ ucl_get_value_object (struct ucl_parser *parser) * @return */ static bool -ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk, - bool skip_value) +ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) { const unsigned char *p, *c; ucl_object_t *obj = NULL; @@ -1431,18 +1431,16 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk, return false; } - if (!skip_value) { - obj = ucl_get_value_object (parser); - str_len = chunk->pos - c - 2; - obj->type = UCL_STRING; - if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, - &obj->trash_stack[UCL_TRASH_VALUE], - &obj->value.sv, str_len, need_unescape, false, - var_expand)) == -1) { - return false; - } - obj->len = str_len; + obj = ucl_get_value_object (parser); + str_len = chunk->pos - c - 2; + obj->type = UCL_STRING; + if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len, need_unescape, false, + var_expand)) == -1) { + return false; } + obj->len = str_len; parser->state = UCL_STATE_AFTER_VALUE; p = chunk->pos; @@ -1450,13 +1448,11 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk, return true; break; case '{': - if (!skip_value) { - obj = ucl_get_value_object (parser); - /* We have a new object */ - obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); - if (obj == NULL) { - return false; - } + obj = ucl_get_value_object (parser); + /* We have a new object */ + obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); + if (obj == NULL) { + return false; } ucl_chunk_skipc (chunk, p); @@ -1464,13 +1460,11 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk, return true; break; case '[': - if (!skip_value) { - obj = ucl_get_value_object (parser); - /* We have a new array */ - obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); - if (obj == NULL) { - return false; - } + obj = ucl_get_value_object (parser); + /* We have a new array */ + obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); + if (obj == NULL) { + return false; } ucl_chunk_skipc (chunk, p); @@ -1511,16 +1505,14 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk, return false; } - if (!skip_value) { - obj->type = UCL_STRING; - if ((str_len = ucl_copy_or_store_ptr (parser, c, - &obj->trash_stack[UCL_TRASH_VALUE], - &obj->value.sv, str_len - 1, false, - false, var_expand)) == -1) { - return false; - } - obj->len = str_len; + obj->type = UCL_STRING; + if ((str_len = ucl_copy_or_store_ptr (parser, c, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len - 1, false, + false, var_expand)) == -1) { + return false; } + obj->len = str_len; parser->state = UCL_STATE_AFTER_VALUE; @@ -1531,7 +1523,7 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk, /* Fallback to ordinary strings */ default: parse_string: - if (obj == NULL && !skip_value) { + if (obj == NULL) { obj = ucl_get_value_object (parser); } @@ -1550,7 +1542,7 @@ parse_string: } if (!ucl_parse_string_value (parser, chunk, &var_expand, - &need_unescape, skip_value)) { + &need_unescape)) { return false; } /* Cut trailing spaces */ @@ -1570,16 +1562,14 @@ parse_string: obj->type = UCL_NULL; } else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { - if (!skip_value) { - obj->type = UCL_STRING; - if ((str_len = ucl_copy_or_store_ptr (parser, c, - &obj->trash_stack[UCL_TRASH_VALUE], - &obj->value.sv, str_len, need_unescape, - false, var_expand)) == -1) { - return false; - } - obj->len = str_len; + obj->type = UCL_STRING; + if ((str_len = ucl_copy_or_store_ptr (parser, c, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len, need_unescape, + false, var_expand)) == -1) { + return false; } + obj->len = str_len; } parser->state = UCL_STATE_AFTER_VALUE; p = chunk->pos; @@ -1599,8 +1589,7 @@ parse_string: * @return */ static bool -ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk, - bool skip_value) +ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) { const unsigned char *p; bool got_sep = false; @@ -1630,31 +1619,29 @@ ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk, &parser->err); return false; } - if (!skip_value) { - if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || - (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { + if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || + (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { + + /* Pop all nested objects from a stack */ + st = parser->stack; + parser->stack = st->next; + UCL_FREE (sizeof (struct ucl_stack), st); - /* Pop all nested objects from a stack */ + while (parser->stack != NULL) { st = parser->stack; + if (st->next == NULL || st->next->level == st->level) { + break; + } parser->stack = st->next; UCL_FREE (sizeof (struct ucl_stack), st); - - while (parser->stack != NULL) { - st = parser->stack; - if (st->next == NULL || st->next->level == st->level) { - break; - } - parser->stack = st->next; - UCL_FREE (sizeof (struct ucl_stack), st); - } - } - else { - ucl_set_err (parser, UCL_ESYNTAX, - "unexpected terminating symbol detected", - &parser->err); - return false; } } + else { + ucl_set_err (parser, UCL_ESYNTAX, + "unexpected terminating symbol detected", + &parser->err); + return false; + } if (parser->stack == NULL) { /* Ignore everything after a top object */ @@ -1892,7 +1879,7 @@ ucl_state_machine (struct ucl_parser *parser) unsigned char *macro_escaped; size_t macro_len = 0; struct ucl_macro *macro = NULL; - bool next_key = false, end_of_object = false, skip_value = false, ret; + bool next_key = false, end_of_object = false, ret; if (parser->top_obj == NULL) { if (*chunk->pos == '[') { @@ -1960,8 +1947,7 @@ ucl_state_machine (struct ucl_parser *parser) parser->state = UCL_STATE_ERROR; return false; } - if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object, - &skip_value)) { + if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { parser->prev_state = parser->state; parser->state = UCL_STATE_ERROR; return false; @@ -1991,7 +1977,7 @@ ucl_state_machine (struct ucl_parser *parser) break; case UCL_STATE_VALUE: /* We need to check what we do have */ - if (!ucl_parse_value (parser, chunk, skip_value)) { + if (!ucl_parse_value (parser, chunk)) { parser->prev_state = parser->state; parser->state = UCL_STATE_ERROR; return false; @@ -2000,14 +1986,12 @@ ucl_state_machine (struct ucl_parser *parser) p = chunk->pos; break; case UCL_STATE_AFTER_VALUE: - if (!ucl_parse_after_value (parser, chunk, skip_value)) { + if (!ucl_parse_after_value (parser, chunk)) { parser->prev_state = parser->state; parser->state = UCL_STATE_ERROR; return false; } - skip_value = false; - if (parser->stack != NULL) { if (parser->stack->obj->type == UCL_OBJECT) { parser->state = UCL_STATE_KEY; @@ -2073,6 +2057,11 @@ ucl_state_machine (struct ucl_parser *parser) macro->ud); UCL_FREE (macro_len + 1, macro_escaped); } + + /* + * Chunk can be modified within macro handler + */ + chunk = parser->chunks; p = chunk->pos; if (macro_args) { ucl_object_unref (macro_args); diff --git a/contrib/libucl/ucl_util.c b/contrib/libucl/ucl_util.c index e770bd50a..d976b0d71 100644 --- a/contrib/libucl/ucl_util.c +++ b/contrib/libucl/ucl_util.c @@ -429,6 +429,7 @@ ucl_parser_free (struct ucl_parser *parser) struct ucl_chunk *chunk, *ctmp; struct ucl_pubkey *key, *ktmp; struct ucl_variable *var, *vtmp; + ucl_object_t *tr, *trtmp; if (parser == NULL) { return; @@ -457,6 +458,9 @@ ucl_parser_free (struct ucl_parser *parser) free (var->var); UCL_FREE (sizeof (struct ucl_variable), var); } + LL_FOREACH_SAFE (parser->trash_objs, tr, trtmp) { + ucl_object_unref (tr); + } if (parser->err != NULL) { utstring_free (parser->err); @@ -1795,6 +1799,12 @@ ucl_object_iterate_free (ucl_object_iter_t it) const ucl_object_t * ucl_lookup_path (const ucl_object_t *top, const char *path_in) { + return ucl_lookup_path_char (top, path_in, '.'); +} + + +const ucl_object_t * +ucl_lookup_path_char (const ucl_object_t *top, const char *path_in, const char sep) { const ucl_object_t *o = NULL, *found; const char *p, *c; char *err_str; @@ -1808,20 +1818,20 @@ ucl_lookup_path (const ucl_object_t *top, const char *path_in) { p = path_in; /* Skip leading dots */ - while (*p == '.') { + while (*p == sep) { p ++; } c = p; while (*p != '\0') { p ++; - if (*p == '.' || *p == '\0') { + if (*p == sep || *p == '\0') { if (p > c) { switch (top->type) { case UCL_ARRAY: /* Key should be an int */ index = strtoul (c, &err_str, 10); - if (err_str != NULL && (*err_str != '.' && *err_str != '\0')) { + if (err_str != NULL && (*err_str != sep && *err_str != '\0')) { return NULL; } o = ucl_array_find_index (top, index); @@ -1916,6 +1926,10 @@ ucl_object_new_userdata (ucl_userdata_dtor dtor, ucl_userdata_emitter emitter) ucl_type_t ucl_object_type (const ucl_object_t *obj) { + if (obj == NULL) { + return UCL_NULL; + } + return obj->type; } -- 2.39.5