diff options
author | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-03-21 15:01:31 +0000 |
---|---|---|
committer | Vsevolod Stakhov <vsevolod@highsecure.ru> | 2018-03-21 15:03:16 +0000 |
commit | 15cc0164347a8397b324709b0ef288bd421f24cc (patch) | |
tree | 7d8807117524ff60cf9ca04e3d7dd08a61cb05e4 /contrib | |
parent | ad66f32f1358d30d28e5d28a61b71349f158027c (diff) | |
download | rspamd-15cc0164347a8397b324709b0ef288bd421f24cc.tar.gz rspamd-15cc0164347a8397b324709b0ef288bd421f24cc.zip |
[Feature] Support single quotes in config
Issue: #2080
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/libucl/README.md | 397 | ||||
-rw-r--r-- | contrib/libucl/ucl.h | 87 | ||||
-rw-r--r-- | contrib/libucl/ucl_emitter.c | 12 | ||||
-rw-r--r-- | contrib/libucl/ucl_emitter_utils.c | 86 | ||||
-rw-r--r-- | contrib/libucl/ucl_internal.h | 56 | ||||
-rw-r--r-- | contrib/libucl/ucl_parser.c | 102 | ||||
-rw-r--r-- | contrib/libucl/ucl_util.c | 63 |
7 files changed, 753 insertions, 50 deletions
diff --git a/contrib/libucl/README.md b/contrib/libucl/README.md new file mode 100644 index 000000000..146143d13 --- /dev/null +++ b/contrib/libucl/README.md @@ -0,0 +1,397 @@ +# LIBUCL + +[![CircleCI](https://circleci.com/gh/vstakhov/libucl.svg?style=svg)](https://circleci.com/gh/vstakhov/libucl) +[![Coverity](https://scan.coverity.com/projects/4138/badge.svg)](https://scan.coverity.com/projects/4138) +[![Coverage Status](https://coveralls.io/repos/github/vstakhov/libucl/badge.svg?branch=master)](https://coveralls.io/github/vstakhov/libucl?branch=master) + +**Table of Contents** *generated with [DocToc](http://doctoc.herokuapp.com/)* + +- [Introduction](#introduction) +- [Basic structure](#basic-structure) +- [Improvements to the json notation](#improvements-to-the-json-notation) + - [General syntax sugar](#general-syntax-sugar) + - [Automatic arrays creation](#automatic-arrays-creation) + - [Named keys hierarchy](#named-keys-hierarchy) + - [Convenient numbers and booleans](#convenient-numbers-and-booleans) +- [General improvements](#general-improvements) + - [Comments](#comments) + - [Macros support](#macros-support) + - [Variables support](#variables-support) + - [Multiline strings](#multiline-strings) + - [Single quoted strings](#single-quoted-strings) +- [Emitter](#emitter) +- [Validation](#validation) +- [Performance](#performance) +- [Conclusion](#conclusion) + +## Introduction + +This document describes the main features and principles of the configuration +language called `UCL` - universal configuration language. + +If you are looking for the libucl API documentation you can find it at [this page](doc/api.md). + +## Basic structure + +UCL is heavily infused by `nginx` configuration as the example of a convenient configuration +system. However, UCL is fully compatible with `JSON` format and is able to parse json files. +For example, you can write the same configuration in the following ways: + +* in nginx like: + +```nginx +param = value; +section { + param = value; + param1 = value1; + flag = true; + number = 10k; + time = 0.2s; + string = "something"; + subsection { + host = { + host = "hostname"; + port = 900; + } + host = { + host = "hostname"; + port = 901; + } + } +} +``` + +* or in JSON: + +```json +{ + "param": "value", + "param1": "value1", + "flag": true, + "subsection": { + "host": [ + { + "host": "hostname", + "port": 900 + }, + { + "host": "hostname", + "port": 901 + } + ] + } +} +``` + +## Improvements to the json notation. + +There are various things that make ucl configuration more convenient for editing than strict json: + +### General syntax sugar + +* Braces are not necessary to enclose a top object: it is automatically treated as an object: + +```json +"key": "value" +``` +is equal to: +```json +{"key": "value"} +``` + +* There is no requirement of quotes for strings and keys, moreover, `:` may be replaced `=` or even be skipped for objects: + +```nginx +key = value; +section { + key = value; +} +``` +is equal to: +```json +{ + "key": "value", + "section": { + "key": "value" + } +} +``` + +* No commas mess: you can safely place a comma or semicolon for the last element in an array or an object: + +```json +{ + "key1": "value", + "key2": "value", +} +``` +### Automatic arrays creation + +* Non-unique keys in an object are allowed and are automatically converted to the arrays internally: + +```json +{ + "key": "value1", + "key": "value2" +} +``` +is converted to: +```json +{ + "key": ["value1", "value2"] +} +``` + +### Named keys hierarchy + +UCL accepts named keys and organize them into objects hierarchy internally. Here is an example of this process: +```nginx +section "blah" { + key = value; +} +section foo { + key = value; +} +``` + +is converted to the following object: + +```nginx +section { + blah { + key = value; + } + foo { + key = value; + } +} +``` + +Plain definitions may be more complex and contain more than a single level of nested objects: + +```nginx +section "blah" "foo" { + key = value; +} +``` + +is presented as: + +```nginx +section { + blah { + foo { + key = value; + } + } +} +``` + +### Convenient numbers and booleans + +* Numbers can have suffixes to specify standard multipliers: + + `[kKmMgG]` - standard 10 base multipliers (so `1k` is translated to 1000) + + `[kKmMgG]b` - 2 power multipliers (so `1kb` is translated to 1024) + + `[s|min|d|w|y]` - time multipliers, all time values are translated to float number of seconds, for example `10min` is translated to 600.0 and `10ms` is translated to 0.01 +* Hexadecimal integers can be used by `0x` prefix, for example `key = 0xff`. However, floating point values can use decimal base only. +* Booleans can be specified as `true` or `yes` or `on` and `false` or `no` or `off`. +* It is still possible to treat numbers and booleans as strings by enclosing them in double quotes. + +## General improvements + +### Comments + +UCL supports different style of comments: + +* single line: `#` +* multiline: `/* ... */` + +Multiline comments may be nested: +```c +# Sample single line comment +/* + some comment + /* nested comment */ + end of comment +*/ +``` + +### Macros support + +UCL supports external macros both multiline and single line ones: +```nginx +.macro_name "sometext"; +.macro_name { + Some long text + .... +}; +``` + +Moreover, each macro can accept an optional list of arguments in braces. These +arguments themselves are the UCL object that is parsed and passed to a macro as +options: + +```nginx +.macro_name(param=value) "something"; +.macro_name(param={key=value}) "something"; +.macro_name(.include "params.conf") "something"; +.macro_name(#this is multiline macro +param = [value1, value2]) "something"; +.macro_name(key="()") "something"; +``` + +UCL also provide a convenient `include` macro to load content from another files +to the current UCL object. This macro accepts either path to file: + +```nginx +.include "/full/path.conf" +.include "./relative/path.conf" +.include "${CURDIR}/path.conf" +``` + +or URL (if ucl is built with url support provided by either `libcurl` or `libfetch`): + + .include "http://example.com/file.conf" + +`.include` macro supports a set of options: + +* `try` (default: **false**) - if this option is `true` than UCL treats errors on loading of +this file as non-fatal. For example, such a file can be absent but it won't stop the parsing +of the top-level document. +* `sign` (default: **false**) - if this option is `true` UCL loads and checks the signature for +a file from path named `<FILEPATH>.sig`. Trusted public keys should be provided for UCL API after +parser is created but before any configurations are parsed. +* `glob` (default: **false**) - if this option is `true` UCL treats the filename as GLOB pattern and load +all files that matches the specified pattern (normally the format of patterns is defined in `glob` manual page +for your operating system). This option is meaningless for URL includes. +* `url` (default: **true**) - allow URL includes. +* `path` (default: empty) - A UCL_ARRAY of directories to search for the include file. +Search ends after the first match, unless `glob` is true, then all matches are included. +* `prefix` (default false) - Put included contents inside an object, instead +of loading them into the root. If no `key` is provided, one is automatically generated based on each files basename() +* `key` (default: <empty string>) - Key to load contents of include into. If +the key already exists, it must be the correct type +* `target` (default: object) - Specify if the `prefix` `key` should be an +object or an array. +* `priority` (default: 0) - specify priority for the include (see below). +* `duplicate` (default: 'append') - specify policy of duplicates resolving: + - `append` - default strategy, if we have new object of higher priority then it replaces old one, if we have new object with less priority it is ignored completely, and if we have two duplicate objects with the same priority then we have a multi-value key (implicit array) + - `merge` - if we have object or array, then new keys are merged inside, if we have a plain object then an implicit array is formed (regardless of priorities) + - `error` - create error on duplicate keys and stop parsing + - `rewrite` - always rewrite an old value with new one (ignoring priorities) + +Priorities are used by UCL parser to manage the policy of objects rewriting during including other files +as following: + +* If we have two objects with the same priority then we form an implicit array +* If a new object has bigger priority then we overwrite an old one +* If a new object has lower priority then we ignore it + +By default, the priority of top-level object is set to zero (lowest priority). Currently, +you can define up to 16 priorities (from 0 to 15). Includes with bigger priorities will +rewrite keys from the objects with lower priorities as specified by the policy. + +### Variables support + +UCL supports variables in input. Variables are registered by a user of the UCL parser and can be presented in the following forms: + +* `${VARIABLE}` +* `$VARIABLE` + +UCL currently does not support nested variables. To escape variables one could use double dollar signs: + +* `$${VARIABLE}` is converted to `${VARIABLE}` +* `$$VARIABLE` is converted to `$VARIABLE` + +However, if no valid variables are found in a string, no expansion will be performed (and `$$` thus remains unchanged). This may be a subject +to change in future libucl releases. + +### Multiline strings + +UCL can handle multiline strings as well as single line ones. It uses shell/perl like notation for such objects: +``` +key = <<EOD +some text +splitted to +lines +EOD +``` + +In this example `key` will be interpreted as the following string: `some text\nsplitted to\nlines`. +Here are some rules for this syntax: + +* Multiline terminator must start just after `<<` symbols and it must consist of capital letters only (e.g. `<<eof` or `<< EOF` won't work); +* Terminator must end with a single newline character (and no spaces are allowed between terminator and newline character); +* To finish multiline string you need to include a terminator string just after newline and followed by a newline (no spaces or other characters are allowed as well); +* The initial and the final newlines are not inserted to the resulting string, but you can still specify newlines at the beginning and at the end of a value, for example: + +``` +key <<EOD + +some +text + +EOD +``` + +### Single quoted strings + +It is possible to use single quoted strings to simplify escaping rules. All values passed in single quoted strings are *NOT* escaped, with two exceptions: a single `'` character just before `\` character, and a newline character just after `\` character that is ignored. + +``` +key = 'value'; # Read as value +key = 'value\n\'; # Read as value\n\ +key = 'value\''; # Read as value' +key = 'value\ +bla'; # Read as valuebla +``` + +## Emitter + +Each UCL object can be serialized to one of the three supported formats: + +* `JSON` - canonic json notation (with spaces indented structure); +* `Compacted JSON` - compact json notation (without spaces or newlines); +* `Configuration` - nginx like notation; +* `YAML` - yaml inlined notation. + +## Validation + +UCL allows validation of objects. It uses the same schema that is used for json: [json schema v4](http://json-schema.org). UCL supports the full set of json schema with the exception of remote references. This feature is unlikely useful for configuration objects. Of course, a schema definition can be in UCL format instead of JSON that simplifies schemas writing. Moreover, since UCL supports multiple values for keys in an object it is possible to specify generic integer constraints `maxValues` and `minValues` to define the limits of values count in a single key. UCL currently is not absolutely strict about validation schemas themselves, therefore UCL users should supply valid schemas (as it is defined in json-schema draft v4) to ensure that the input objects are validated properly. + +## Performance + +Are UCL parser and emitter fast enough? Well, there are some numbers. +I got a 19Mb file that consist of ~700 thousand lines of json (obtained via +http://www.json-generator.com/). Then I checked jansson library that performs json +parsing and emitting and compared it with UCL. Here are results: + +``` +jansson: parsed json in 1.3899 seconds +jansson: emitted object in 0.2609 seconds + +ucl: parsed input in 0.6649 seconds +ucl: emitted config in 0.2423 seconds +ucl: emitted json in 0.2329 seconds +ucl: emitted compact json in 0.1811 seconds +ucl: emitted yaml in 0.2489 seconds +``` + +So far, UCL seems to be significantly faster than jansson on parsing and slightly faster on emitting. Moreover, +UCL compiled with optimizations (-O3) performs significantly faster: +``` +ucl: parsed input in 0.3002 seconds +ucl: emitted config in 0.1174 seconds +ucl: emitted json in 0.1174 seconds +ucl: emitted compact json in 0.0991 seconds +ucl: emitted yaml in 0.1354 seconds +``` + +You can do your own benchmarks by running `make check` in libucl top directory. + +## Conclusion + +UCL has clear design that should be very convenient for reading and writing. At the same time it is compatible with +JSON language and therefore can be used as a simple JSON parser. Macro logic provides an ability to extend configuration +language (for example by including some lua code) and comments allow to disable or enable the parts of a configuration +quickly. diff --git a/contrib/libucl/ucl.h b/contrib/libucl/ucl.h index 8f6d6beb6..812b89477 100644 --- a/contrib/libucl/ucl.h +++ b/contrib/libucl/ucl.h @@ -177,7 +177,8 @@ typedef enum ucl_string_flags { } ucl_string_flags_t; /** - * Basic flags for an object + * Basic flags for an object (can use up to 12 bits as higher 4 bits are used + * for priorities) */ typedef enum ucl_object_flags { UCL_OBJECT_ALLOCATED_KEY = (1 << 0), /**< An object has key allocated internally */ @@ -187,7 +188,8 @@ typedef enum ucl_object_flags { UCL_OBJECT_MULTILINE = (1 << 4), /**< String should be displayed as multiline string */ UCL_OBJECT_MULTIVALUE = (1 << 5), /**< Object is a key with multiple values */ UCL_OBJECT_INHERITED = (1 << 6), /**< Object has been inherited from another */ - UCL_OBJECT_BINARY = (1 << 7) /**< Object contains raw binary data */ + UCL_OBJECT_BINARY = (1 << 7), /**< Object contains raw binary data */ + UCL_OBJECT_SQUOTED = (1 << 8) /**< Object has been enclosed in single quotes */ } ucl_object_flags_t; /** @@ -541,6 +543,13 @@ UCL_EXTERN ucl_object_t* ucl_array_pop_last (ucl_object_t *top); UCL_EXTERN ucl_object_t* ucl_array_pop_first (ucl_object_t *top); /** + * Return size of the array `top` + * @param top object to get size from (must be of type UCL_ARRAY) + * @return size of the array + */ +UCL_EXTERN unsigned int ucl_array_size (const ucl_object_t *top); + +/** * Return object identified by index of the array `top` * @param top object to get a key from (must be of type UCL_ARRAY) * @param index array index to return @@ -919,7 +928,7 @@ struct ucl_parser; UCL_EXTERN struct ucl_parser* ucl_parser_new (int flags); /** - * Sets the default priority for the parser applied to chunks that does not + * Sets the default priority for the parser applied to chunks that do not * specify priority explicitly * @param parser parser object * @param prio default priority (0 .. 16) @@ -928,6 +937,14 @@ UCL_EXTERN struct ucl_parser* ucl_parser_new (int flags); UCL_EXTERN bool ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio); /** + * Gets the default priority for the parser applied to chunks that do not + * specify priority explicitly + * @param parser parser object + * @return true default priority (0 .. 16), -1 for failure + */ +UCL_EXTERN int ucl_parser_get_default_priority (struct ucl_parser *parser); + +/** * Register new handler for a macro * @param parser parser object * @param macro macro name (without leading dot) @@ -1004,6 +1021,16 @@ UCL_EXTERN bool ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *data, size_t len, unsigned priority); /** + * Insert new chunk to a parser (must have previously processed data with an existing top object) + * @param parser parser structure + * @param data the pointer to the beginning of a chunk + * @param len the length of a chunk + * @return true if chunk has been added and false in case of error + */ +UCL_EXTERN bool ucl_parser_insert_chunk (struct ucl_parser *parser, + const unsigned char *data, size_t len); + +/** * Full version of ucl_add_chunk with priority and duplicate strategy * @param parser parser structure * @param data the pointer to the beginning of a chunk @@ -1026,7 +1053,7 @@ UCL_EXTERN bool ucl_parser_add_chunk_full (struct ucl_parser *parser, * @return true if string has been added and false in case of error */ UCL_EXTERN bool ucl_parser_add_string (struct ucl_parser *parser, - const char *data,size_t len); + const char *data, size_t len); /** * Load ucl object from a string @@ -1063,6 +1090,20 @@ UCL_EXTERN bool ucl_parser_add_file_priority (struct ucl_parser *parser, const char *filename, unsigned priority); /** + * Load and add data from a file + * @param parser parser structure + * @param filename the name of file + * @param priority the desired priority of a chunk (only 4 least significant bits + * are considered for this parameter) + * @param strat Merge strategy to use while parsing this file + * @param parse_type Parser type to use while parsing this file + * @return true if chunk has been added and false in case of error + */ +UCL_EXTERN bool ucl_parser_add_file_full (struct ucl_parser *parser, const char *filename, + unsigned priority, enum ucl_duplicate_strategy strat, + enum ucl_parse_type parse_type); + +/** * Load and add data from a file descriptor * @param parser parser structure * @param filename the name of file @@ -1085,6 +1126,21 @@ UCL_EXTERN bool ucl_parser_add_fd_priority (struct ucl_parser *parser, int fd, unsigned priority); /** + * Load and add data from a file descriptor + * @param parser parser structure + * @param filename the name of file + * @param err if *err is NULL it is set to parser error + * @param priority the desired priority of a chunk (only 4 least significant bits + * are considered for this parameter) + * @param strat Merge strategy to use while parsing this file + * @param parse_type Parser type to use while parsing this file + * @return true if chunk has been added and false in case of error + */ +UCL_EXTERN bool ucl_parser_add_fd_full (struct ucl_parser *parser, int fd, + unsigned priority, enum ucl_duplicate_strategy strat, + enum ucl_parse_type parse_type); + +/** * Provide a UCL_ARRAY of paths to search for include files. The object is * copied so caller must unref the object. * @param parser parser structure @@ -1103,6 +1159,29 @@ UCL_EXTERN bool ucl_set_include_path (struct ucl_parser *parser, UCL_EXTERN ucl_object_t* ucl_parser_get_object (struct ucl_parser *parser); /** + * Get the current stack object as stack accessor function for use in macro + * functions (refcount is increased) + * @param parser parser object + * @param depth depth of stack to retrieve (top is 0) + * @return current stack object or NULL + */ +UCL_EXTERN ucl_object_t* ucl_parser_get_current_stack_object (struct ucl_parser *parser, unsigned int depth); + +/** + * Peek at the character at the current chunk position + * @param parser parser structure + * @return current chunk position character + */ +UCL_EXTERN unsigned char ucl_parser_chunk_peek (struct ucl_parser *parser); + +/** + * Skip the character at the current chunk position + * @param parser parser structure + * @return success boolean + */ +UCL_EXTERN bool ucl_parser_chunk_skip (struct ucl_parser *parser); + +/** * Get the error string if parsing has been failed * @param parser parser object * @return error description diff --git a/contrib/libucl/ucl_emitter.c b/contrib/libucl/ucl_emitter.c index a15cd08cf..4f4465dfb 100644 --- a/contrib/libucl/ucl_emitter.c +++ b/contrib/libucl/ucl_emitter.c @@ -424,8 +424,16 @@ ucl_emitter_common_elt (struct ucl_emitter_context *ctx, break; case UCL_STRING: ucl_emitter_print_key (print_key, ctx, obj, compact); - if (ctx->id == UCL_EMIT_CONFIG && ucl_maybe_long_string (obj)) { - ucl_elt_string_write_multiline (obj->value.sv, obj->len, ctx); + if (ctx->id == UCL_EMIT_CONFIG) { + if (ucl_maybe_long_string (obj)) { + ucl_elt_string_write_multiline (obj->value.sv, obj->len, ctx); + } else { + if (obj->flags & UCL_OBJECT_SQUOTED) { + ucl_elt_string_write_squoted (obj->value.sv, obj->len, ctx); + } else { + ucl_elt_string_write_json (obj->value.sv, obj->len, ctx); + } + } } else { ucl_elt_string_write_json (obj->value.sv, obj->len, ctx); diff --git a/contrib/libucl/ucl_emitter_utils.c b/contrib/libucl/ucl_emitter_utils.c index 6c46a8e49..04940a902 100644 --- a/contrib/libucl/ucl_emitter_utils.c +++ b/contrib/libucl/ucl_emitter_utils.c @@ -151,6 +151,40 @@ ucl_elt_string_write_json (const char *str, size_t size, } void +ucl_elt_string_write_squoted (const char *str, size_t size, + struct ucl_emitter_context *ctx) +{ + const char *p = str, *c = str; + size_t len = 0; + const struct ucl_emitter_functions *func = ctx->func; + + func->ucl_emitter_append_character ('\'', 1, func->ud); + + while (size) { + if (*p == '\'') { + if (len > 0) { + func->ucl_emitter_append_len (c, len, func->ud); + } + + len = 0; + c = ++p; + func->ucl_emitter_append_len ("\\\'", 2, func->ud); + } + else { + p ++; + len ++; + } + size --; + } + + if (len > 0) { + func->ucl_emitter_append_len (c, len, func->ud); + } + + func->ucl_emitter_append_character ('\'', 1, func->ud); +} + +void ucl_elt_string_write_multiline (const char *str, size_t size, struct ucl_emitter_context *ctx) { @@ -207,18 +241,15 @@ ucl_utstring_append_double (double val, void *ud) UT_string *buf = ud; const double delta = 0.0000001; - if (isfinite (val)) { - if (val == (double) (int) val) { - utstring_printf (buf, "%.1lf", val); - } else if (fabs (val - (double) (int) val) < delta) { - /* Write at maximum precision */ - utstring_printf (buf, "%.*lg", DBL_DIG, val); - } else { - utstring_printf (buf, "%lf", val); - } + if (val == (double)(int)val) { + utstring_printf (buf, "%.1lf", val); + } + else if (fabs (val - (double)(int)val) < delta) { + /* Write at maximum precision */ + utstring_printf (buf, "%.*lg", DBL_DIG, val); } else { - utstring_append_len (buf, "null", 5); + utstring_printf (buf, "%lf", val); } return 0; @@ -265,19 +296,15 @@ ucl_file_append_double (double val, void *ud) FILE *fp = ud; const double delta = 0.0000001; - if (isfinite (val)) { - if (val == (double) (int) val) { - fprintf (fp, "%.1lf", val); - } else if (fabs (val - (double) (int) val) < delta) { - /* Write at maximum precision */ - fprintf (fp, "%.*lg", DBL_DIG, val); - } else { - fprintf (fp, "%lf", val); - } + if (val == (double)(int)val) { + fprintf (fp, "%.1lf", val); + } + else if (fabs (val - (double)(int)val) < delta) { + /* Write at maximum precision */ + fprintf (fp, "%.*lg", DBL_DIG, val); } else { - /* Encode as null */ - fprintf (fp, "null"); + fprintf (fp, "%lf", val); } return 0; @@ -343,18 +370,15 @@ ucl_fd_append_double (double val, void *ud) const double delta = 0.0000001; char nbuf[64]; - if (isfinite (val)) { - if (val == (double) (int) val) { - snprintf (nbuf, sizeof (nbuf), "%.1lf", val); - } else if (fabs (val - (double) (int) val) < delta) { - /* Write at maximum precision */ - snprintf (nbuf, sizeof (nbuf), "%.*lg", DBL_DIG, val); - } else { - snprintf (nbuf, sizeof (nbuf), "%lf", val); - } + if (val == (double)(int)val) { + snprintf (nbuf, sizeof (nbuf), "%.1lf", val); + } + else if (fabs (val - (double)(int)val) < delta) { + /* Write at maximum precision */ + snprintf (nbuf, sizeof (nbuf), "%.*lg", DBL_DIG, val); } else { - memcpy (nbuf, "null", 5); + snprintf (nbuf, sizeof (nbuf), "%lf", val); } return write (fd, nbuf, strlen (nbuf)); diff --git a/contrib/libucl/ucl_internal.h b/contrib/libucl/ucl_internal.h index 11b71de70..d60581037 100644 --- a/contrib/libucl/ucl_internal.h +++ b/contrib/libucl/ucl_internal.h @@ -63,7 +63,9 @@ #include <sys/stat.h> #endif #ifdef HAVE_SYS_PARAM_H -#include <sys/param.h> +# ifndef _WIN32 +# include <sys/param.h> +# endif #endif #ifdef HAVE_LIMITS_H @@ -76,7 +78,9 @@ #include <errno.h> #endif #ifdef HAVE_UNISTD_H -#include <unistd.h> +# ifndef _WIN32 +# include <unistd.h> +# endif #endif #ifdef HAVE_CTYPE_H #include <ctype.h> @@ -87,13 +91,40 @@ #ifdef HAVE_STRING_H #include <string.h> #endif +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif + +#if defined(_MSC_VER) +/* Windows hacks */ +#include <BaseTsd.h> +typedef SSIZE_T ssize_t; +#define strdup _strdup +#define snprintf _snprintf +#define vsnprintf _vsnprintf +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#if _MSC_VER >= 1900 +#include <../ucrt/stdlib.h> +#else +#include <../include/stdlib.h> +#endif +#ifndef PATH_MAX +#define PATH_MAX _MAX_PATH +#endif + +/* Dirname, basename implementations */ + + +#endif #include "utlist.h" #include "utstring.h" #include "uthash.h" #include "ucl.h" #include "ucl_hash.h" -#include "xxhash.h" #ifdef HAVE_OPENSSL #include <openssl/evp.h> @@ -228,6 +259,13 @@ struct ucl_object_userdata { */ size_t ucl_unescape_json_string (char *str, size_t len); + +/** + * Unescape single quoted string inplace + * @param str + */ +size_t ucl_unescape_squoted_string (char *str, size_t len); + /** * Handle include macro * @param data include data @@ -321,7 +359,7 @@ ucl_create_err (UT_string **err, const char *fmt, ...) } #ifdef UCL_FATAL_ERRORS - abort (); + assert (0); #endif } @@ -432,6 +470,16 @@ ucl_emit_get_standard_context (enum ucl_emitter emit_type); void ucl_elt_string_write_json (const char *str, size_t size, struct ucl_emitter_context *ctx); + +/** + * Serialize string as single quoted string + * @param str string to emit + * @param buf target buffer + */ +void +ucl_elt_string_write_squoted (const char *str, size_t size, + struct ucl_emitter_context *ctx); + /** * Write multiline string using `EOD` as string terminator * @param str diff --git a/contrib/libucl/ucl_parser.c b/contrib/libucl/ucl_parser.c index e82b5da51..fedd5065c 100644 --- a/contrib/libucl/ucl_parser.c +++ b/contrib/libucl/ucl_parser.c @@ -557,13 +557,15 @@ ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, * @param need_unescape need to unescape source (and copy it) * @param need_lowercase need to lowercase value (and copy) * @param need_expand need to expand variables (and copy as well) + * @param unescape_squote unescape single quoted string * @return output length (excluding \0 symbol) */ static inline ssize_t ucl_copy_or_store_ptr (struct ucl_parser *parser, const unsigned char *src, unsigned char **dst, const char **dst_const, size_t in_len, - bool need_unescape, bool need_lowercase, bool need_expand) + bool need_unescape, bool need_lowercase, bool need_expand, + bool unescape_squote) { ssize_t ret = -1, tret; unsigned char *tmp; @@ -586,8 +588,14 @@ ucl_copy_or_store_ptr (struct ucl_parser *parser, } if (need_unescape) { - ret = ucl_unescape_json_string (*dst, ret); + if (!unescape_squote) { + ret = ucl_unescape_json_string (*dst, ret); + } + else { + ret = ucl_unescape_squoted_string (*dst, ret); + } } + if (need_expand) { tmp = *dst; tret = ret; @@ -962,7 +970,10 @@ ucl_lex_number (struct ucl_parser *parser, */ static bool ucl_lex_json_string (struct ucl_parser *parser, - struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) + struct ucl_chunk *chunk, + bool *need_unescape, + bool *ucl_escape, + bool *var_expand) { const unsigned char *p = chunk->pos; unsigned char c; @@ -1033,6 +1044,50 @@ ucl_lex_json_string (struct ucl_parser *parser, return false; } +/** + * Process single quoted string + * @param parser + * @param chunk + * @param need_unescape + * @return + */ +static bool +ucl_lex_squoted_string (struct ucl_parser *parser, + struct ucl_chunk *chunk, bool *need_unescape) +{ + const unsigned char *p = chunk->pos; + unsigned char c; + + while (p < chunk->end) { + c = *p; + if (c == '\\') { + ucl_chunk_skipc (chunk, p); + + if (p >= chunk->end) { + ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", + &parser->err); + return false; + } + else { + ucl_chunk_skipc (chunk, p); + } + + *need_unescape = true; + continue; + } + else if (c == '\'') { + ucl_chunk_skipc (chunk, p); + return true; + } + + ucl_chunk_skipc (chunk, p); + } + + ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of single quoted string", + &parser->err); + return false; +} + static void ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, ucl_object_t *top, @@ -1369,7 +1424,8 @@ ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, /* Create a new object */ nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], - &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); + &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, + false, false); if (keylen == -1) { ucl_object_unref (nobj); return false; @@ -1565,7 +1621,7 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) const unsigned char *p, *c; ucl_object_t *obj = NULL; unsigned int stripped_spaces; - int str_len; + ssize_t str_len; bool need_unescape = false, ucl_escape = false, var_expand = false; p = chunk->pos; @@ -1603,13 +1659,41 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], &obj->value.sv, str_len, need_unescape, false, - var_expand)) == -1) { + var_expand, false)) == -1) { + return false; + } + + obj->len = str_len; + parser->state = UCL_STATE_AFTER_VALUE; + + return true; + break; + case '\'': + ucl_chunk_skipc (chunk, p); + + if (!ucl_lex_squoted_string (parser, chunk, &need_unescape)) { + return false; + } + + obj = ucl_parser_get_container (parser); + if (!obj) { + return false; + } + + str_len = chunk->pos - c - 2; + obj->type = UCL_STRING; + obj->flags |= UCL_OBJECT_SQUOTED; + + if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len, need_unescape, false, + var_expand, true)) == -1) { return false; } + obj->len = str_len; parser->state = UCL_STATE_AFTER_VALUE; - p = chunk->pos; return true; break; @@ -1695,7 +1779,7 @@ ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], &obj->value.sv, str_len - 1, false, - false, var_expand)) == -1) { + false, var_expand, false)) == -1) { return false; } obj->len = str_len; @@ -1753,7 +1837,7 @@ parse_string: if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], &obj->value.sv, str_len, need_unescape, - false, var_expand)) == -1) { + false, var_expand, false)) == -1) { return false; } obj->len = str_len; diff --git a/contrib/libucl/ucl_util.c b/contrib/libucl/ucl_util.c index fba3df920..c6c07ba87 100644 --- a/contrib/libucl/ucl_util.c +++ b/contrib/libucl/ucl_util.c @@ -431,6 +431,69 @@ ucl_unescape_json_string (char *str, size_t len) return (t - str); } +size_t +ucl_unescape_squoted_string (char *str, size_t len) +{ + char *t = str, *h = str; + + if (len <= 1) { + return len; + } + + /* t is target (tortoise), h is source (hare) */ + + while (len) { + if (*h == '\\') { + h ++; + + if (len == 1) { + /* + * If \ is last, then do not try to go further + * Issue: #74 + */ + len --; + *t++ = '\\'; + continue; + } + + switch (*h) { + case '\'': + *t++ = '\''; + break; + case '\n': + /* Ignore \<newline> style stuff */ + break; + case '\r': + /* Ignore \r and the following \n if needed */ + if (len > 1 && h[1] == '\n') { + h ++; + len --; + } + break; + default: + /* Ignore \ */ + *t++ = '\\'; + *t++ = *h; + break; + } + + h ++; + len --; + } + else { + *t++ = *h++; + } + + if (len > 0) { + len --; + } + } + + *t = '\0'; + + return (t - str); +} + char * ucl_copy_key_trash (const ucl_object_t *obj) { |