You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ucl_parser.c 59KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696
  1. /* Copyright (c) 2013, Vsevolod Stakhov
  2. * All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. * * Redistributions of source code must retain the above copyright
  7. * notice, this list of conditions and the following disclaimer.
  8. * * Redistributions in binary form must reproduce the above copyright
  9. * notice, this list of conditions and the following disclaimer in the
  10. * documentation and/or other materials provided with the distribution.
  11. *
  12. * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
  13. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15. * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  16. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22. */
  23. #include "ucl.h"
  24. #include "ucl_internal.h"
  25. #include "ucl_chartable.h"
  26. /**
  27. * @file ucl_parser.c
  28. * The implementation of ucl parser
  29. */
  30. struct ucl_parser_saved_state {
  31. unsigned int line;
  32. unsigned int column;
  33. size_t remain;
  34. const unsigned char *pos;
  35. };
  36. /**
  37. * Move up to len characters
  38. * @param parser
  39. * @param begin
  40. * @param len
  41. * @return new position in chunk
  42. */
  43. #define ucl_chunk_skipc(chunk, p) do{ \
  44. if (*(p) == '\n') { \
  45. (chunk)->line ++; \
  46. (chunk)->column = 0; \
  47. } \
  48. else (chunk)->column ++; \
  49. (p++); \
  50. (chunk)->pos ++; \
  51. (chunk)->remain --; \
  52. } while (0)
  53. static inline void
  54. ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err)
  55. {
  56. const char *fmt_string, *filename;
  57. struct ucl_chunk *chunk = parser->chunks;
  58. if (parser->cur_file) {
  59. filename = parser->cur_file;
  60. }
  61. else {
  62. filename = "<unknown>";
  63. }
  64. if (chunk->pos < chunk->end) {
  65. if (isgraph (*chunk->pos)) {
  66. fmt_string = "error while parsing %s: "
  67. "line: %d, column: %d - '%s', character: '%c'";
  68. }
  69. else {
  70. fmt_string = "error while parsing %s: "
  71. "line: %d, column: %d - '%s', character: '0x%02x'";
  72. }
  73. ucl_create_err (err, fmt_string,
  74. filename, chunk->line, chunk->column,
  75. str, *chunk->pos);
  76. }
  77. else {
  78. ucl_create_err (err, "error while parsing %s: at the end of chunk: %s",
  79. filename, str);
  80. }
  81. parser->err_code = code;
  82. }
  83. static void
  84. ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len)
  85. {
  86. ucl_object_t *nobj;
  87. if (len > 0 && begin != NULL) {
  88. nobj = ucl_object_fromstring_common (begin, len, 0);
  89. if (parser->last_comment) {
  90. /* We need to append data to an existing object */
  91. DL_APPEND (parser->last_comment, nobj);
  92. }
  93. else {
  94. parser->last_comment = nobj;
  95. }
  96. }
  97. }
  98. static void
  99. ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before)
  100. {
  101. if (parser->last_comment) {
  102. ucl_object_insert_key (parser->comments, parser->last_comment,
  103. (const char *)&obj, sizeof (void *), true);
  104. if (before) {
  105. parser->last_comment->flags |= UCL_OBJECT_INHERITED;
  106. }
  107. parser->last_comment = NULL;
  108. }
  109. }
  110. /**
  111. * Skip all comments from the current pos resolving nested and multiline comments
  112. * @param parser
  113. * @return
  114. */
  115. static bool
  116. ucl_skip_comments (struct ucl_parser *parser)
  117. {
  118. struct ucl_chunk *chunk = parser->chunks;
  119. const unsigned char *p, *beg = NULL;
  120. int comments_nested = 0;
  121. bool quoted = false;
  122. p = chunk->pos;
  123. start:
  124. if (chunk->remain > 0 && *p == '#') {
  125. if (parser->state != UCL_STATE_SCOMMENT &&
  126. parser->state != UCL_STATE_MCOMMENT) {
  127. beg = p;
  128. while (p < chunk->end) {
  129. if (*p == '\n') {
  130. if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
  131. ucl_save_comment (parser, beg, p - beg);
  132. beg = NULL;
  133. }
  134. ucl_chunk_skipc (chunk, p);
  135. goto start;
  136. }
  137. ucl_chunk_skipc (chunk, p);
  138. }
  139. }
  140. }
  141. else if (chunk->remain >= 2 && *p == '/') {
  142. if (p[1] == '*') {
  143. beg = p;
  144. ucl_chunk_skipc (chunk, p);
  145. comments_nested ++;
  146. ucl_chunk_skipc (chunk, p);
  147. while (p < chunk->end) {
  148. if (*p == '"' && *(p - 1) != '\\') {
  149. quoted = !quoted;
  150. }
  151. if (!quoted) {
  152. if (*p == '*') {
  153. ucl_chunk_skipc (chunk, p);
  154. if (*p == '/') {
  155. comments_nested --;
  156. if (comments_nested == 0) {
  157. if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
  158. ucl_save_comment (parser, beg, p - beg + 1);
  159. beg = NULL;
  160. }
  161. ucl_chunk_skipc (chunk, p);
  162. goto start;
  163. }
  164. }
  165. ucl_chunk_skipc (chunk, p);
  166. }
  167. else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
  168. comments_nested ++;
  169. ucl_chunk_skipc (chunk, p);
  170. ucl_chunk_skipc (chunk, p);
  171. continue;
  172. }
  173. }
  174. ucl_chunk_skipc (chunk, p);
  175. }
  176. if (comments_nested != 0) {
  177. ucl_set_err (parser, UCL_ENESTED,
  178. "unfinished multiline comment", &parser->err);
  179. return false;
  180. }
  181. }
  182. }
  183. if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) {
  184. ucl_save_comment (parser, beg, p - beg);
  185. }
  186. return true;
  187. }
  188. /**
  189. * Return multiplier for a character
  190. * @param c multiplier character
  191. * @param is_bytes if true use 1024 multiplier
  192. * @return multiplier
  193. */
  194. static inline unsigned long
  195. ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
  196. const struct {
  197. char c;
  198. long mult_normal;
  199. long mult_bytes;
  200. } multipliers[] = {
  201. {'m', 1000 * 1000, 1024 * 1024},
  202. {'k', 1000, 1024},
  203. {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
  204. };
  205. int i;
  206. for (i = 0; i < 3; i ++) {
  207. if (tolower (c) == multipliers[i].c) {
  208. if (is_bytes) {
  209. return multipliers[i].mult_bytes;
  210. }
  211. return multipliers[i].mult_normal;
  212. }
  213. }
  214. return 1;
  215. }
  216. /**
  217. * Return multiplier for time scaling
  218. * @param c
  219. * @return
  220. */
  221. static inline double
  222. ucl_lex_time_multiplier (const unsigned char c) {
  223. const struct {
  224. char c;
  225. double mult;
  226. } multipliers[] = {
  227. {'m', 60},
  228. {'h', 60 * 60},
  229. {'d', 60 * 60 * 24},
  230. {'w', 60 * 60 * 24 * 7},
  231. {'y', 60 * 60 * 24 * 7 * 365}
  232. };
  233. int i;
  234. for (i = 0; i < 5; i ++) {
  235. if (tolower (c) == multipliers[i].c) {
  236. return multipliers[i].mult;
  237. }
  238. }
  239. return 1;
  240. }
  241. /**
  242. * Return true if a character is a end of an atom
  243. * @param c
  244. * @return
  245. */
  246. static inline bool
  247. ucl_lex_is_atom_end (const unsigned char c)
  248. {
  249. return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
  250. }
  251. static inline bool
  252. ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
  253. {
  254. if (c1 == '/') {
  255. if (c2 == '*') {
  256. return true;
  257. }
  258. }
  259. else if (c1 == '#') {
  260. return true;
  261. }
  262. return false;
  263. }
  264. /**
  265. * Check variable found
  266. * @param parser
  267. * @param ptr
  268. * @param remain
  269. * @param out_len
  270. * @param strict
  271. * @param found
  272. * @return
  273. */
  274. static inline const char *
  275. ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
  276. size_t *out_len, bool strict, bool *found)
  277. {
  278. struct ucl_variable *var;
  279. unsigned char *dst;
  280. size_t dstlen;
  281. bool need_free = false;
  282. LL_FOREACH (parser->variables, var) {
  283. if (strict) {
  284. if (remain == var->var_len) {
  285. if (memcmp (ptr, var->var, var->var_len) == 0) {
  286. *out_len += var->value_len;
  287. *found = true;
  288. return (ptr + var->var_len);
  289. }
  290. }
  291. }
  292. else {
  293. if (remain >= var->var_len) {
  294. if (memcmp (ptr, var->var, var->var_len) == 0) {
  295. *out_len += var->value_len;
  296. *found = true;
  297. return (ptr + var->var_len);
  298. }
  299. }
  300. }
  301. }
  302. /* XXX: can only handle ${VAR} */
  303. if (!(*found) && parser->var_handler != NULL && strict) {
  304. /* Call generic handler */
  305. if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
  306. parser->var_data)) {
  307. *found = true;
  308. if (need_free) {
  309. free (dst);
  310. }
  311. return (ptr + remain);
  312. }
  313. }
  314. return ptr;
  315. }
  316. /**
  317. * Check for a variable in a given string
  318. * @param parser
  319. * @param ptr
  320. * @param remain
  321. * @param out_len
  322. * @param vars_found
  323. * @return
  324. */
  325. static const char *
  326. ucl_check_variable (struct ucl_parser *parser, const char *ptr,
  327. size_t remain, size_t *out_len, bool *vars_found)
  328. {
  329. const char *p, *end, *ret = ptr;
  330. bool found = false;
  331. if (*ptr == '{') {
  332. /* We need to match the variable enclosed in braces */
  333. p = ptr + 1;
  334. end = ptr + remain;
  335. while (p < end) {
  336. if (*p == '}') {
  337. ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
  338. out_len, true, &found);
  339. if (found) {
  340. /* {} must be excluded actually */
  341. ret ++;
  342. if (!*vars_found) {
  343. *vars_found = true;
  344. }
  345. }
  346. else {
  347. *out_len += 2;
  348. }
  349. break;
  350. }
  351. p ++;
  352. }
  353. }
  354. else if (*ptr != '$') {
  355. /* Not count escaped dollar sign */
  356. ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
  357. if (found && !*vars_found) {
  358. *vars_found = true;
  359. }
  360. if (!found) {
  361. (*out_len) ++;
  362. }
  363. }
  364. else {
  365. ret ++;
  366. (*out_len) ++;
  367. }
  368. return ret;
  369. }
  370. /**
  371. * Expand a single variable
  372. * @param parser
  373. * @param ptr
  374. * @param remain
  375. * @param dest
  376. * @return
  377. */
  378. static const char *
  379. ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
  380. size_t remain, unsigned char **dest)
  381. {
  382. unsigned char *d = *dest, *dst;
  383. const char *p = ptr + 1, *ret;
  384. struct ucl_variable *var;
  385. size_t dstlen;
  386. bool need_free = false;
  387. bool found = false;
  388. bool strict = false;
  389. ret = ptr + 1;
  390. remain --;
  391. if (*p == '$') {
  392. *d++ = *p++;
  393. *dest = d;
  394. return p;
  395. }
  396. else if (*p == '{') {
  397. p ++;
  398. strict = true;
  399. ret += 2;
  400. remain -= 2;
  401. }
  402. LL_FOREACH (parser->variables, var) {
  403. if (remain >= var->var_len) {
  404. if (memcmp (p, var->var, var->var_len) == 0) {
  405. memcpy (d, var->value, var->value_len);
  406. ret += var->var_len;
  407. d += var->value_len;
  408. found = true;
  409. break;
  410. }
  411. }
  412. }
  413. if (!found) {
  414. if (strict && parser->var_handler != NULL) {
  415. if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
  416. parser->var_data)) {
  417. memcpy (d, dst, dstlen);
  418. ret += dstlen;
  419. d += remain;
  420. found = true;
  421. }
  422. }
  423. /* Leave variable as is */
  424. if (!found) {
  425. if (strict) {
  426. /* Copy '${' */
  427. memcpy (d, ptr, 2);
  428. d += 2;
  429. ret --;
  430. }
  431. else {
  432. memcpy (d, ptr, 1);
  433. d ++;
  434. }
  435. }
  436. }
  437. *dest = d;
  438. return ret;
  439. }
  440. /**
  441. * Expand variables in string
  442. * @param parser
  443. * @param dst
  444. * @param src
  445. * @param in_len
  446. * @return
  447. */
  448. static ssize_t
  449. ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
  450. const char *src, size_t in_len)
  451. {
  452. const char *p, *end = src + in_len;
  453. unsigned char *d;
  454. size_t out_len = 0;
  455. bool vars_found = false;
  456. if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
  457. *dst = NULL;
  458. return in_len;
  459. }
  460. p = src;
  461. while (p != end) {
  462. if (*p == '$') {
  463. p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
  464. }
  465. else {
  466. p ++;
  467. out_len ++;
  468. }
  469. }
  470. if (!vars_found) {
  471. /* Trivial case */
  472. *dst = NULL;
  473. return in_len;
  474. }
  475. *dst = UCL_ALLOC (out_len + 1);
  476. if (*dst == NULL) {
  477. return in_len;
  478. }
  479. d = *dst;
  480. p = src;
  481. while (p != end) {
  482. if (*p == '$') {
  483. p = ucl_expand_single_variable (parser, p, end - p, &d);
  484. }
  485. else {
  486. *d++ = *p++;
  487. }
  488. }
  489. *d = '\0';
  490. return out_len;
  491. }
  492. /**
  493. * Store or copy pointer to the trash stack
  494. * @param parser parser object
  495. * @param src src string
  496. * @param dst destination buffer (trash stack pointer)
  497. * @param dst_const const destination pointer (e.g. value of object)
  498. * @param in_len input length
  499. * @param need_unescape need to unescape source (and copy it)
  500. * @param need_lowercase need to lowercase value (and copy)
  501. * @param need_expand need to expand variables (and copy as well)
  502. * @return output length (excluding \0 symbol)
  503. */
  504. static inline ssize_t
  505. ucl_copy_or_store_ptr (struct ucl_parser *parser,
  506. const unsigned char *src, unsigned char **dst,
  507. const char **dst_const, size_t in_len,
  508. bool need_unescape, bool need_lowercase, bool need_expand)
  509. {
  510. ssize_t ret = -1, tret;
  511. unsigned char *tmp;
  512. if (need_unescape || need_lowercase ||
  513. (need_expand && parser->variables != NULL) ||
  514. !(parser->flags & UCL_PARSER_ZEROCOPY)) {
  515. /* Copy string */
  516. *dst = UCL_ALLOC (in_len + 1);
  517. if (*dst == NULL) {
  518. ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string",
  519. &parser->err);
  520. return false;
  521. }
  522. if (need_lowercase) {
  523. ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
  524. }
  525. else {
  526. ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
  527. }
  528. if (need_unescape) {
  529. ret = ucl_unescape_json_string (*dst, ret);
  530. }
  531. if (need_expand) {
  532. tmp = *dst;
  533. tret = ret;
  534. ret = ucl_expand_variable (parser, dst, tmp, ret);
  535. if (*dst == NULL) {
  536. /* Nothing to expand */
  537. *dst = tmp;
  538. ret = tret;
  539. }
  540. else {
  541. /* Free unexpanded value */
  542. UCL_FREE (in_len + 1, tmp);
  543. }
  544. }
  545. *dst_const = *dst;
  546. }
  547. else {
  548. *dst_const = src;
  549. ret = in_len;
  550. }
  551. return ret;
  552. }
  553. /**
  554. * Create and append an object at the specified level
  555. * @param parser
  556. * @param is_array
  557. * @param level
  558. * @return
  559. */
  560. static inline ucl_object_t *
  561. ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser,
  562. bool is_array, int level)
  563. {
  564. struct ucl_stack *st;
  565. if (!is_array) {
  566. if (obj == NULL) {
  567. obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority);
  568. }
  569. else {
  570. obj->type = UCL_OBJECT;
  571. }
  572. if (obj->value.ov == NULL) {
  573. obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE);
  574. }
  575. parser->state = UCL_STATE_KEY;
  576. }
  577. else {
  578. if (obj == NULL) {
  579. obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority);
  580. }
  581. else {
  582. obj->type = UCL_ARRAY;
  583. }
  584. parser->state = UCL_STATE_VALUE;
  585. }
  586. st = UCL_ALLOC (sizeof (struct ucl_stack));
  587. if (st == NULL) {
  588. ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object",
  589. &parser->err);
  590. ucl_object_unref (obj);
  591. return NULL;
  592. }
  593. st->obj = obj;
  594. st->level = level;
  595. LL_PREPEND (parser->stack, st);
  596. parser->cur_obj = obj;
  597. return obj;
  598. }
  599. int
  600. ucl_maybe_parse_number (ucl_object_t *obj,
  601. const char *start, const char *end, const char **pos,
  602. bool allow_double, bool number_bytes, bool allow_time)
  603. {
  604. const char *p = start, *c = start;
  605. char *endptr;
  606. bool got_dot = false, got_exp = false, need_double = false,
  607. is_time = false, valid_start = false, is_hex = false,
  608. is_neg = false;
  609. double dv = 0;
  610. int64_t lv = 0;
  611. if (*p == '-') {
  612. is_neg = true;
  613. c ++;
  614. p ++;
  615. }
  616. while (p < end) {
  617. if (is_hex && isxdigit (*p)) {
  618. p ++;
  619. }
  620. else if (isdigit (*p)) {
  621. valid_start = true;
  622. p ++;
  623. }
  624. else if (!is_hex && (*p == 'x' || *p == 'X')) {
  625. is_hex = true;
  626. allow_double = false;
  627. c = p + 1;
  628. }
  629. else if (allow_double) {
  630. if (p == c) {
  631. /* Empty digits sequence, not a number */
  632. *pos = start;
  633. return EINVAL;
  634. }
  635. else if (*p == '.') {
  636. if (got_dot) {
  637. /* Double dots, not a number */
  638. *pos = start;
  639. return EINVAL;
  640. }
  641. else {
  642. got_dot = true;
  643. need_double = true;
  644. p ++;
  645. }
  646. }
  647. else if (*p == 'e' || *p == 'E') {
  648. if (got_exp) {
  649. /* Double exp, not a number */
  650. *pos = start;
  651. return EINVAL;
  652. }
  653. else {
  654. got_exp = true;
  655. need_double = true;
  656. p ++;
  657. if (p >= end) {
  658. *pos = start;
  659. return EINVAL;
  660. }
  661. if (!isdigit (*p) && *p != '+' && *p != '-') {
  662. /* Wrong exponent sign */
  663. *pos = start;
  664. return EINVAL;
  665. }
  666. else {
  667. p ++;
  668. }
  669. }
  670. }
  671. else {
  672. /* Got the end of the number, need to check */
  673. break;
  674. }
  675. }
  676. else {
  677. break;
  678. }
  679. }
  680. if (!valid_start) {
  681. *pos = start;
  682. return EINVAL;
  683. }
  684. errno = 0;
  685. if (need_double) {
  686. dv = strtod (c, &endptr);
  687. }
  688. else {
  689. if (is_hex) {
  690. lv = strtoimax (c, &endptr, 16);
  691. }
  692. else {
  693. lv = strtoimax (c, &endptr, 10);
  694. }
  695. }
  696. if (errno == ERANGE) {
  697. *pos = start;
  698. return ERANGE;
  699. }
  700. /* Now check endptr */
  701. if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
  702. p = endptr;
  703. goto set_obj;
  704. }
  705. if (endptr < end && endptr != start) {
  706. p = endptr;
  707. switch (*p) {
  708. case 'm':
  709. case 'M':
  710. case 'g':
  711. case 'G':
  712. case 'k':
  713. case 'K':
  714. if (end - p >= 2) {
  715. if (p[1] == 's' || p[1] == 'S') {
  716. /* Milliseconds */
  717. if (!need_double) {
  718. need_double = true;
  719. dv = lv;
  720. }
  721. is_time = true;
  722. if (p[0] == 'm' || p[0] == 'M') {
  723. dv /= 1000.;
  724. }
  725. else {
  726. dv *= ucl_lex_num_multiplier (*p, false);
  727. }
  728. p += 2;
  729. goto set_obj;
  730. }
  731. else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
  732. /* Bytes */
  733. if (need_double) {
  734. need_double = false;
  735. lv = dv;
  736. }
  737. lv *= ucl_lex_num_multiplier (*p, true);
  738. p += 2;
  739. goto set_obj;
  740. }
  741. else if (ucl_lex_is_atom_end (p[1])) {
  742. if (need_double) {
  743. dv *= ucl_lex_num_multiplier (*p, false);
  744. }
  745. else {
  746. lv *= ucl_lex_num_multiplier (*p, number_bytes);
  747. }
  748. p ++;
  749. goto set_obj;
  750. }
  751. else if (allow_time && end - p >= 3) {
  752. if (tolower (p[0]) == 'm' &&
  753. tolower (p[1]) == 'i' &&
  754. tolower (p[2]) == 'n') {
  755. /* Minutes */
  756. if (!need_double) {
  757. need_double = true;
  758. dv = lv;
  759. }
  760. is_time = true;
  761. dv *= 60.;
  762. p += 3;
  763. goto set_obj;
  764. }
  765. }
  766. }
  767. else {
  768. if (need_double) {
  769. dv *= ucl_lex_num_multiplier (*p, false);
  770. }
  771. else {
  772. lv *= ucl_lex_num_multiplier (*p, number_bytes);
  773. }
  774. p ++;
  775. goto set_obj;
  776. }
  777. break;
  778. case 'S':
  779. case 's':
  780. if (allow_time &&
  781. (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
  782. if (!need_double) {
  783. need_double = true;
  784. dv = lv;
  785. }
  786. p ++;
  787. is_time = true;
  788. goto set_obj;
  789. }
  790. break;
  791. case 'h':
  792. case 'H':
  793. case 'd':
  794. case 'D':
  795. case 'w':
  796. case 'W':
  797. case 'Y':
  798. case 'y':
  799. if (allow_time &&
  800. (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
  801. if (!need_double) {
  802. need_double = true;
  803. dv = lv;
  804. }
  805. is_time = true;
  806. dv *= ucl_lex_time_multiplier (*p);
  807. p ++;
  808. goto set_obj;
  809. }
  810. break;
  811. case '\t':
  812. case ' ':
  813. while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
  814. p++;
  815. }
  816. if (ucl_lex_is_atom_end(*p))
  817. goto set_obj;
  818. break;
  819. }
  820. }
  821. else if (endptr == end) {
  822. /* Just a number at the end of chunk */
  823. p = endptr;
  824. goto set_obj;
  825. }
  826. *pos = c;
  827. return EINVAL;
  828. set_obj:
  829. if (obj != NULL) {
  830. if (allow_double && (need_double || is_time)) {
  831. if (!is_time) {
  832. obj->type = UCL_FLOAT;
  833. }
  834. else {
  835. obj->type = UCL_TIME;
  836. }
  837. obj->value.dv = is_neg ? (-dv) : dv;
  838. }
  839. else {
  840. obj->type = UCL_INT;
  841. obj->value.iv = is_neg ? (-lv) : lv;
  842. }
  843. }
  844. *pos = p;
  845. return 0;
  846. }
  847. /**
  848. * Parse possible number
  849. * @param parser
  850. * @param chunk
  851. * @param obj
  852. * @return true if a number has been parsed
  853. */
  854. static bool
  855. ucl_lex_number (struct ucl_parser *parser,
  856. struct ucl_chunk *chunk, ucl_object_t *obj)
  857. {
  858. const unsigned char *pos;
  859. int ret;
  860. ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
  861. true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
  862. if (ret == 0) {
  863. chunk->remain -= pos - chunk->pos;
  864. chunk->column += pos - chunk->pos;
  865. chunk->pos = pos;
  866. return true;
  867. }
  868. else if (ret == ERANGE) {
  869. ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range",
  870. &parser->err);
  871. }
  872. return false;
  873. }
  874. /**
  875. * Parse quoted string with possible escapes
  876. * @param parser
  877. * @param chunk
  878. * @param need_unescape
  879. * @param ucl_escape
  880. * @param var_expand
  881. * @return true if a string has been parsed
  882. */
  883. static bool
  884. ucl_lex_json_string (struct ucl_parser *parser,
  885. struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
  886. {
  887. const unsigned char *p = chunk->pos;
  888. unsigned char c;
  889. int i;
  890. while (p < chunk->end) {
  891. c = *p;
  892. if (c < 0x1F) {
  893. /* Unmasked control character */
  894. if (c == '\n') {
  895. ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline",
  896. &parser->err);
  897. }
  898. else {
  899. ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character",
  900. &parser->err);
  901. }
  902. return false;
  903. }
  904. else if (c == '\\') {
  905. ucl_chunk_skipc (chunk, p);
  906. c = *p;
  907. if (p >= chunk->end) {
  908. ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
  909. &parser->err);
  910. return false;
  911. }
  912. else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
  913. if (c == 'u') {
  914. ucl_chunk_skipc (chunk, p);
  915. for (i = 0; i < 4 && p < chunk->end; i ++) {
  916. if (!isxdigit (*p)) {
  917. ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape",
  918. &parser->err);
  919. return false;
  920. }
  921. ucl_chunk_skipc (chunk, p);
  922. }
  923. if (p >= chunk->end) {
  924. ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
  925. &parser->err);
  926. return false;
  927. }
  928. }
  929. else {
  930. ucl_chunk_skipc (chunk, p);
  931. }
  932. }
  933. *need_unescape = true;
  934. *ucl_escape = true;
  935. continue;
  936. }
  937. else if (c == '"') {
  938. ucl_chunk_skipc (chunk, p);
  939. return true;
  940. }
  941. else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
  942. *ucl_escape = true;
  943. }
  944. else if (c == '$') {
  945. *var_expand = true;
  946. }
  947. ucl_chunk_skipc (chunk, p);
  948. }
  949. ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string",
  950. &parser->err);
  951. return false;
  952. }
  953. static void
  954. ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont,
  955. ucl_object_t *top,
  956. ucl_object_t *elt)
  957. {
  958. ucl_object_t *nobj;
  959. if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
  960. /* Implicit array */
  961. top->flags |= UCL_OBJECT_MULTIVALUE;
  962. DL_APPEND (top, elt);
  963. parser->stack->obj->len ++;
  964. }
  965. else {
  966. if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
  967. /* Just add to the explicit array */
  968. ucl_array_append (top, elt);
  969. }
  970. else {
  971. /* Convert to an array */
  972. nobj = ucl_object_typed_new (UCL_ARRAY);
  973. nobj->key = top->key;
  974. nobj->keylen = top->keylen;
  975. nobj->flags |= UCL_OBJECT_MULTIVALUE;
  976. ucl_array_append (nobj, top);
  977. ucl_array_append (nobj, elt);
  978. ucl_hash_replace (cont, top, nobj);
  979. }
  980. }
  981. }
  982. bool
  983. ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj)
  984. {
  985. ucl_hash_t *container;
  986. ucl_object_t *tobj;
  987. container = parser->stack->obj->value.ov;
  988. tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
  989. if (tobj == NULL) {
  990. container = ucl_hash_insert_object (container, nobj,
  991. parser->flags & UCL_PARSER_KEY_LOWERCASE);
  992. nobj->prev = nobj;
  993. nobj->next = NULL;
  994. parser->stack->obj->len ++;
  995. }
  996. else {
  997. unsigned priold = ucl_object_get_priority (tobj),
  998. prinew = ucl_object_get_priority (nobj);
  999. switch (parser->chunks->strategy) {
  1000. case UCL_DUPLICATE_APPEND:
  1001. /*
  1002. * The logic here is the following:
  1003. *
  1004. * - if we have two objects with the same priority, then we form an
  1005. * implicit or explicit array
  1006. * - if a new object has bigger priority, then we overwrite an old one
  1007. * - if a new object has lower priority, then we ignore it
  1008. */
  1009. /* Special case for inherited objects */
  1010. if (tobj->flags & UCL_OBJECT_INHERITED) {
  1011. prinew = priold + 1;
  1012. }
  1013. if (priold == prinew) {
  1014. ucl_parser_append_elt (parser, container, tobj, nobj);
  1015. }
  1016. else if (priold > prinew) {
  1017. /*
  1018. * We add this new object to a list of trash objects just to ensure
  1019. * that it won't come to any real object
  1020. * XXX: rather inefficient approach
  1021. */
  1022. DL_APPEND (parser->trash_objs, nobj);
  1023. }
  1024. else {
  1025. ucl_hash_replace (container, tobj, nobj);
  1026. ucl_object_unref (tobj);
  1027. }
  1028. break;
  1029. case UCL_DUPLICATE_REWRITE:
  1030. /* We just rewrite old values regardless of priority */
  1031. ucl_hash_replace (container, tobj, nobj);
  1032. ucl_object_unref (tobj);
  1033. break;
  1034. case UCL_DUPLICATE_ERROR:
  1035. ucl_create_err (&parser->err, "error while parsing %s: "
  1036. "line: %d, column: %d: duplicate element for key '%s' "
  1037. "has been found",
  1038. parser->cur_file ? parser->cur_file : "<unknown>",
  1039. parser->chunks->line, parser->chunks->column, nobj->key);
  1040. return false;
  1041. case UCL_DUPLICATE_MERGE:
  1042. /*
  1043. * Here we do have some old object so we just push it on top of objects stack
  1044. */
  1045. if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) {
  1046. ucl_object_unref (nobj);
  1047. nobj = tobj;
  1048. }
  1049. else {
  1050. /* For other types we create implicit array as usual */
  1051. ucl_parser_append_elt (parser, container, tobj, nobj);
  1052. }
  1053. break;
  1054. }
  1055. }
  1056. parser->stack->obj->value.ov = container;
  1057. parser->cur_obj = nobj;
  1058. ucl_attach_comment (parser, nobj, false);
  1059. return true;
  1060. }
  1061. /**
  1062. * Parse a key in an object
  1063. * @param parser
  1064. * @param chunk
  1065. * @param next_key
  1066. * @param end_of_object
  1067. * @return true if a key has been parsed
  1068. */
  1069. static bool
  1070. ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk,
  1071. bool *next_key, bool *end_of_object)
  1072. {
  1073. const unsigned char *p, *c = NULL, *end, *t;
  1074. const char *key = NULL;
  1075. bool got_quote = false, got_eq = false, got_semicolon = false,
  1076. need_unescape = false, ucl_escape = false, var_expand = false,
  1077. got_content = false, got_sep = false;
  1078. ucl_object_t *nobj;
  1079. ssize_t keylen;
  1080. p = chunk->pos;
  1081. if (*p == '.') {
  1082. /* It is macro actually */
  1083. if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) {
  1084. ucl_chunk_skipc (chunk, p);
  1085. }
  1086. parser->prev_state = parser->state;
  1087. parser->state = UCL_STATE_MACRO_NAME;
  1088. *end_of_object = false;
  1089. return true;
  1090. }
  1091. while (p < chunk->end) {
  1092. /*
  1093. * A key must start with alpha, number, '/' or '_' and end with space character
  1094. */
  1095. if (c == NULL) {
  1096. if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
  1097. if (!ucl_skip_comments (parser)) {
  1098. return false;
  1099. }
  1100. p = chunk->pos;
  1101. }
  1102. else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
  1103. ucl_chunk_skipc (chunk, p);
  1104. }
  1105. else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
  1106. /* The first symbol */
  1107. c = p;
  1108. ucl_chunk_skipc (chunk, p);
  1109. got_content = true;
  1110. }
  1111. else if (*p == '"') {
  1112. /* JSON style key */
  1113. c = p + 1;
  1114. got_quote = true;
  1115. got_content = true;
  1116. ucl_chunk_skipc (chunk, p);
  1117. }
  1118. else if (*p == '}') {
  1119. /* We have actually end of an object */
  1120. *end_of_object = true;
  1121. return true;
  1122. }
  1123. else if (*p == '.') {
  1124. ucl_chunk_skipc (chunk, p);
  1125. parser->prev_state = parser->state;
  1126. parser->state = UCL_STATE_MACRO_NAME;
  1127. return true;
  1128. }
  1129. else {
  1130. /* Invalid identifier */
  1131. ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter",
  1132. &parser->err);
  1133. return false;
  1134. }
  1135. }
  1136. else {
  1137. /* Parse the body of a key */
  1138. if (!got_quote) {
  1139. if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
  1140. got_content = true;
  1141. ucl_chunk_skipc (chunk, p);
  1142. }
  1143. else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
  1144. end = p;
  1145. break;
  1146. }
  1147. else {
  1148. ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key",
  1149. &parser->err);
  1150. return false;
  1151. }
  1152. }
  1153. else {
  1154. /* We need to parse json like quoted string */
  1155. if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
  1156. return false;
  1157. }
  1158. /* Always escape keys obtained via json */
  1159. end = chunk->pos - 1;
  1160. p = chunk->pos;
  1161. break;
  1162. }
  1163. }
  1164. }
  1165. if (p >= chunk->end && got_content) {
  1166. ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
  1167. return false;
  1168. }
  1169. else if (!got_content) {
  1170. return true;
  1171. }
  1172. *end_of_object = false;
  1173. /* We are now at the end of the key, need to parse the rest */
  1174. while (p < chunk->end) {
  1175. if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
  1176. ucl_chunk_skipc (chunk, p);
  1177. }
  1178. else if (*p == '=') {
  1179. if (!got_eq && !got_semicolon) {
  1180. ucl_chunk_skipc (chunk, p);
  1181. got_eq = true;
  1182. }
  1183. else {
  1184. ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character",
  1185. &parser->err);
  1186. return false;
  1187. }
  1188. }
  1189. else if (*p == ':') {
  1190. if (!got_eq && !got_semicolon) {
  1191. ucl_chunk_skipc (chunk, p);
  1192. got_semicolon = true;
  1193. }
  1194. else {
  1195. ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character",
  1196. &parser->err);
  1197. return false;
  1198. }
  1199. }
  1200. else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
  1201. /* Check for comment */
  1202. if (!ucl_skip_comments (parser)) {
  1203. return false;
  1204. }
  1205. p = chunk->pos;
  1206. }
  1207. else {
  1208. /* Start value */
  1209. break;
  1210. }
  1211. }
  1212. if (p >= chunk->end && got_content) {
  1213. ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
  1214. return false;
  1215. }
  1216. got_sep = got_semicolon || got_eq;
  1217. if (!got_sep) {
  1218. /*
  1219. * Maybe we have more keys nested, so search for termination character.
  1220. * Possible choices:
  1221. * 1) key1 key2 ... keyN [:=] value <- we treat that as error
  1222. * 2) key1 ... keyN {} or [] <- we treat that as nested objects
  1223. * 3) key1 value[;,\n] <- we treat that as linear object
  1224. */
  1225. t = p;
  1226. *next_key = false;
  1227. while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
  1228. t ++;
  1229. }
  1230. /* Check first non-space character after a key */
  1231. if (*t != '{' && *t != '[') {
  1232. while (t < chunk->end) {
  1233. if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
  1234. break;
  1235. }
  1236. else if (*t == '{' || *t == '[') {
  1237. *next_key = true;
  1238. break;
  1239. }
  1240. t ++;
  1241. }
  1242. }
  1243. }
  1244. /* Create a new object */
  1245. nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
  1246. keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
  1247. &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
  1248. if (keylen == -1) {
  1249. ucl_object_unref (nobj);
  1250. return false;
  1251. }
  1252. else if (keylen == 0) {
  1253. ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
  1254. ucl_object_unref (nobj);
  1255. return false;
  1256. }
  1257. nobj->key = key;
  1258. nobj->keylen = keylen;
  1259. if (!ucl_parser_process_object_element (parser, nobj)) {
  1260. return false;
  1261. }
  1262. if (ucl_escape) {
  1263. nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
  1264. }
  1265. return true;
  1266. }
  1267. /**
  1268. * Parse a cl string
  1269. * @param parser
  1270. * @param chunk
  1271. * @param var_expand
  1272. * @param need_unescape
  1273. * @return true if a key has been parsed
  1274. */
  1275. static bool
  1276. ucl_parse_string_value (struct ucl_parser *parser,
  1277. struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
  1278. {
  1279. const unsigned char *p;
  1280. enum {
  1281. UCL_BRACE_ROUND = 0,
  1282. UCL_BRACE_SQUARE,
  1283. UCL_BRACE_FIGURE
  1284. };
  1285. int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
  1286. p = chunk->pos;
  1287. while (p < chunk->end) {
  1288. /* Skip pairs of figure braces */
  1289. if (*p == '{') {
  1290. braces[UCL_BRACE_FIGURE][0] ++;
  1291. }
  1292. else if (*p == '}') {
  1293. braces[UCL_BRACE_FIGURE][1] ++;
  1294. if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
  1295. /* This is not a termination symbol, continue */
  1296. ucl_chunk_skipc (chunk, p);
  1297. continue;
  1298. }
  1299. }
  1300. /* Skip pairs of square braces */
  1301. else if (*p == '[') {
  1302. braces[UCL_BRACE_SQUARE][0] ++;
  1303. }
  1304. else if (*p == ']') {
  1305. braces[UCL_BRACE_SQUARE][1] ++;
  1306. if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
  1307. /* This is not a termination symbol, continue */
  1308. ucl_chunk_skipc (chunk, p);
  1309. continue;
  1310. }
  1311. }
  1312. else if (*p == '$') {
  1313. *var_expand = true;
  1314. }
  1315. else if (*p == '\\') {
  1316. *need_unescape = true;
  1317. ucl_chunk_skipc (chunk, p);
  1318. if (p < chunk->end) {
  1319. ucl_chunk_skipc (chunk, p);
  1320. }
  1321. continue;
  1322. }
  1323. if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
  1324. break;
  1325. }
  1326. ucl_chunk_skipc (chunk, p);
  1327. }
  1328. return true;
  1329. }
  1330. /**
  1331. * Parse multiline string ending with \n{term}\n
  1332. * @param parser
  1333. * @param chunk
  1334. * @param term
  1335. * @param term_len
  1336. * @param beg
  1337. * @param var_expand
  1338. * @return size of multiline string or 0 in case of error
  1339. */
  1340. static int
  1341. ucl_parse_multiline_string (struct ucl_parser *parser,
  1342. struct ucl_chunk *chunk, const unsigned char *term,
  1343. int term_len, unsigned char const **beg,
  1344. bool *var_expand)
  1345. {
  1346. const unsigned char *p, *c, *tend;
  1347. bool newline = false;
  1348. int len = 0;
  1349. p = chunk->pos;
  1350. c = p;
  1351. while (p < chunk->end) {
  1352. if (newline) {
  1353. if (chunk->end - p < term_len) {
  1354. return 0;
  1355. }
  1356. else if (memcmp (p, term, term_len) == 0) {
  1357. tend = p + term_len;
  1358. if (*tend != '\n' && *tend != ';' && *tend != ',') {
  1359. /* Incomplete terminator */
  1360. ucl_chunk_skipc (chunk, p);
  1361. continue;
  1362. }
  1363. len = p - c;
  1364. chunk->remain -= term_len;
  1365. chunk->pos = p + term_len;
  1366. chunk->column = term_len;
  1367. *beg = c;
  1368. break;
  1369. }
  1370. }
  1371. if (*p == '\n') {
  1372. newline = true;
  1373. }
  1374. else {
  1375. if (*p == '$') {
  1376. *var_expand = true;
  1377. }
  1378. newline = false;
  1379. }
  1380. ucl_chunk_skipc (chunk, p);
  1381. }
  1382. return len;
  1383. }
  1384. static inline ucl_object_t*
  1385. ucl_parser_get_container (struct ucl_parser *parser)
  1386. {
  1387. ucl_object_t *t, *obj = NULL;
  1388. if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) {
  1389. return NULL;
  1390. }
  1391. if (parser->stack->obj->type == UCL_ARRAY) {
  1392. /* Object must be allocated */
  1393. obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
  1394. t = parser->stack->obj;
  1395. if (!ucl_array_append (t, obj)) {
  1396. ucl_object_unref (obj);
  1397. return NULL;
  1398. }
  1399. parser->cur_obj = obj;
  1400. ucl_attach_comment (parser, obj, false);
  1401. }
  1402. else {
  1403. /* Object has been already allocated */
  1404. obj = parser->cur_obj;
  1405. }
  1406. return obj;
  1407. }
  1408. /**
  1409. * Handle value data
  1410. * @param parser
  1411. * @param chunk
  1412. * @return
  1413. */
  1414. static bool
  1415. ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
  1416. {
  1417. const unsigned char *p, *c;
  1418. ucl_object_t *obj = NULL;
  1419. unsigned int stripped_spaces;
  1420. int str_len;
  1421. bool need_unescape = false, ucl_escape = false, var_expand = false;
  1422. p = chunk->pos;
  1423. /* Skip any spaces and comments */
  1424. if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
  1425. (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
  1426. while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
  1427. ucl_chunk_skipc (chunk, p);
  1428. }
  1429. if (!ucl_skip_comments (parser)) {
  1430. return false;
  1431. }
  1432. p = chunk->pos;
  1433. }
  1434. while (p < chunk->end) {
  1435. c = p;
  1436. switch (*p) {
  1437. case '"':
  1438. ucl_chunk_skipc (chunk, p);
  1439. if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape,
  1440. &var_expand)) {
  1441. return false;
  1442. }
  1443. obj = ucl_parser_get_container (parser);
  1444. str_len = chunk->pos - c - 2;
  1445. obj->type = UCL_STRING;
  1446. if ((str_len = ucl_copy_or_store_ptr (parser, c + 1,
  1447. &obj->trash_stack[UCL_TRASH_VALUE],
  1448. &obj->value.sv, str_len, need_unescape, false,
  1449. var_expand)) == -1) {
  1450. return false;
  1451. }
  1452. obj->len = str_len;
  1453. parser->state = UCL_STATE_AFTER_VALUE;
  1454. p = chunk->pos;
  1455. return true;
  1456. break;
  1457. case '{':
  1458. obj = ucl_parser_get_container (parser);
  1459. /* We have a new object */
  1460. obj = ucl_parser_add_container (obj, parser, false, parser->stack->level);
  1461. if (obj == NULL) {
  1462. return false;
  1463. }
  1464. ucl_chunk_skipc (chunk, p);
  1465. return true;
  1466. break;
  1467. case '[':
  1468. obj = ucl_parser_get_container (parser);
  1469. /* We have a new array */
  1470. obj = ucl_parser_add_container (obj, parser, true, parser->stack->level);
  1471. if (obj == NULL) {
  1472. return false;
  1473. }
  1474. ucl_chunk_skipc (chunk, p);
  1475. return true;
  1476. break;
  1477. case ']':
  1478. /* We have the array ending */
  1479. if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
  1480. parser->state = UCL_STATE_AFTER_VALUE;
  1481. return true;
  1482. }
  1483. else {
  1484. goto parse_string;
  1485. }
  1486. break;
  1487. case '<':
  1488. obj = ucl_parser_get_container (parser);
  1489. /* We have something like multiline value, which must be <<[A-Z]+\n */
  1490. if (chunk->end - p > 3) {
  1491. if (memcmp (p, "<<", 2) == 0) {
  1492. p += 2;
  1493. /* We allow only uppercase characters in multiline definitions */
  1494. while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
  1495. p ++;
  1496. }
  1497. if (*p =='\n') {
  1498. /* Set chunk positions and start multiline parsing */
  1499. c += 2;
  1500. chunk->remain -= p - c;
  1501. chunk->pos = p + 1;
  1502. chunk->column = 0;
  1503. chunk->line ++;
  1504. if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
  1505. p - c, &c, &var_expand)) == 0) {
  1506. ucl_set_err (parser, UCL_ESYNTAX,
  1507. "unterminated multiline value", &parser->err);
  1508. return false;
  1509. }
  1510. obj->type = UCL_STRING;
  1511. obj->flags |= UCL_OBJECT_MULTILINE;
  1512. if ((str_len = ucl_copy_or_store_ptr (parser, c,
  1513. &obj->trash_stack[UCL_TRASH_VALUE],
  1514. &obj->value.sv, str_len - 1, false,
  1515. false, var_expand)) == -1) {
  1516. return false;
  1517. }
  1518. obj->len = str_len;
  1519. parser->state = UCL_STATE_AFTER_VALUE;
  1520. return true;
  1521. }
  1522. }
  1523. }
  1524. /* Fallback to ordinary strings */
  1525. default:
  1526. parse_string:
  1527. if (obj == NULL) {
  1528. obj = ucl_parser_get_container (parser);
  1529. }
  1530. /* Parse atom */
  1531. if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
  1532. if (!ucl_lex_number (parser, chunk, obj)) {
  1533. if (parser->state == UCL_STATE_ERROR) {
  1534. return false;
  1535. }
  1536. }
  1537. else {
  1538. parser->state = UCL_STATE_AFTER_VALUE;
  1539. return true;
  1540. }
  1541. /* Fallback to normal string */
  1542. }
  1543. if (!ucl_parse_string_value (parser, chunk, &var_expand,
  1544. &need_unescape)) {
  1545. return false;
  1546. }
  1547. /* Cut trailing spaces */
  1548. stripped_spaces = 0;
  1549. while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
  1550. UCL_CHARACTER_WHITESPACE)) {
  1551. stripped_spaces ++;
  1552. }
  1553. str_len = chunk->pos - c - stripped_spaces;
  1554. if (str_len <= 0) {
  1555. ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty",
  1556. &parser->err);
  1557. return false;
  1558. }
  1559. else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
  1560. obj->len = 0;
  1561. obj->type = UCL_NULL;
  1562. }
  1563. else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
  1564. obj->type = UCL_STRING;
  1565. if ((str_len = ucl_copy_or_store_ptr (parser, c,
  1566. &obj->trash_stack[UCL_TRASH_VALUE],
  1567. &obj->value.sv, str_len, need_unescape,
  1568. false, var_expand)) == -1) {
  1569. return false;
  1570. }
  1571. obj->len = str_len;
  1572. }
  1573. parser->state = UCL_STATE_AFTER_VALUE;
  1574. p = chunk->pos;
  1575. return true;
  1576. break;
  1577. }
  1578. }
  1579. return true;
  1580. }
  1581. /**
  1582. * Handle after value data
  1583. * @param parser
  1584. * @param chunk
  1585. * @return
  1586. */
  1587. static bool
  1588. ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
  1589. {
  1590. const unsigned char *p;
  1591. bool got_sep = false;
  1592. struct ucl_stack *st;
  1593. p = chunk->pos;
  1594. while (p < chunk->end) {
  1595. if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
  1596. /* Skip whitespaces */
  1597. ucl_chunk_skipc (chunk, p);
  1598. }
  1599. else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
  1600. /* Skip comment */
  1601. if (!ucl_skip_comments (parser)) {
  1602. return false;
  1603. }
  1604. /* Treat comment as a separator */
  1605. got_sep = true;
  1606. p = chunk->pos;
  1607. }
  1608. else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
  1609. if (*p == '}' || *p == ']') {
  1610. if (parser->stack == NULL) {
  1611. ucl_set_err (parser, UCL_ESYNTAX,
  1612. "end of array or object detected without corresponding start",
  1613. &parser->err);
  1614. return false;
  1615. }
  1616. if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
  1617. (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
  1618. /* Pop all nested objects from a stack */
  1619. st = parser->stack;
  1620. parser->stack = st->next;
  1621. UCL_FREE (sizeof (struct ucl_stack), st);
  1622. if (parser->cur_obj) {
  1623. ucl_attach_comment (parser, parser->cur_obj, true);
  1624. }
  1625. while (parser->stack != NULL) {
  1626. st = parser->stack;
  1627. if (st->next == NULL || st->next->level == st->level) {
  1628. break;
  1629. }
  1630. parser->stack = st->next;
  1631. parser->cur_obj = st->obj;
  1632. UCL_FREE (sizeof (struct ucl_stack), st);
  1633. }
  1634. }
  1635. else {
  1636. ucl_set_err (parser, UCL_ESYNTAX,
  1637. "unexpected terminating symbol detected",
  1638. &parser->err);
  1639. return false;
  1640. }
  1641. if (parser->stack == NULL) {
  1642. /* Ignore everything after a top object */
  1643. return true;
  1644. }
  1645. else {
  1646. ucl_chunk_skipc (chunk, p);
  1647. }
  1648. got_sep = true;
  1649. }
  1650. else {
  1651. /* Got a separator */
  1652. got_sep = true;
  1653. ucl_chunk_skipc (chunk, p);
  1654. }
  1655. }
  1656. else {
  1657. /* Anything else */
  1658. if (!got_sep) {
  1659. ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing",
  1660. &parser->err);
  1661. return false;
  1662. }
  1663. return true;
  1664. }
  1665. }
  1666. return true;
  1667. }
  1668. static bool
  1669. ucl_skip_macro_as_comment (struct ucl_parser *parser,
  1670. struct ucl_chunk *chunk)
  1671. {
  1672. const unsigned char *p, *c;
  1673. enum {
  1674. macro_skip_start = 0,
  1675. macro_has_symbols,
  1676. macro_has_obrace,
  1677. macro_has_quote,
  1678. macro_has_backslash,
  1679. macro_has_sqbrace,
  1680. macro_save
  1681. } state = macro_skip_start, prev_state = macro_skip_start;
  1682. p = chunk->pos;
  1683. c = chunk->pos;
  1684. while (p < chunk->end) {
  1685. switch (state) {
  1686. case macro_skip_start:
  1687. if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
  1688. state = macro_has_symbols;
  1689. }
  1690. else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
  1691. state = macro_save;
  1692. continue;
  1693. }
  1694. ucl_chunk_skipc (chunk, p);
  1695. break;
  1696. case macro_has_symbols:
  1697. if (*p == '{') {
  1698. state = macro_has_sqbrace;
  1699. }
  1700. else if (*p == '(') {
  1701. state = macro_has_obrace;
  1702. }
  1703. else if (*p == '"') {
  1704. state = macro_has_quote;
  1705. }
  1706. else if (*p == '\n') {
  1707. state = macro_save;
  1708. continue;
  1709. }
  1710. ucl_chunk_skipc (chunk, p);
  1711. break;
  1712. case macro_has_obrace:
  1713. if (*p == '\\') {
  1714. prev_state = state;
  1715. state = macro_has_backslash;
  1716. }
  1717. else if (*p == ')') {
  1718. state = macro_has_symbols;
  1719. }
  1720. ucl_chunk_skipc (chunk, p);
  1721. break;
  1722. case macro_has_sqbrace:
  1723. if (*p == '\\') {
  1724. prev_state = state;
  1725. state = macro_has_backslash;
  1726. }
  1727. else if (*p == '}') {
  1728. state = macro_save;
  1729. }
  1730. ucl_chunk_skipc (chunk, p);
  1731. break;
  1732. case macro_has_quote:
  1733. if (*p == '\\') {
  1734. prev_state = state;
  1735. state = macro_has_backslash;
  1736. }
  1737. else if (*p == '"') {
  1738. state = macro_save;
  1739. }
  1740. ucl_chunk_skipc (chunk, p);
  1741. break;
  1742. case macro_has_backslash:
  1743. state = prev_state;
  1744. ucl_chunk_skipc (chunk, p);
  1745. break;
  1746. case macro_save:
  1747. if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
  1748. ucl_save_comment (parser, c, p - c);
  1749. }
  1750. return true;
  1751. }
  1752. }
  1753. return false;
  1754. }
  1755. /**
  1756. * Handle macro data
  1757. * @param parser
  1758. * @param chunk
  1759. * @param marco
  1760. * @param macro_start
  1761. * @param macro_len
  1762. * @return
  1763. */
  1764. static bool
  1765. ucl_parse_macro_value (struct ucl_parser *parser,
  1766. struct ucl_chunk *chunk, struct ucl_macro *macro,
  1767. unsigned char const **macro_start, size_t *macro_len)
  1768. {
  1769. const unsigned char *p, *c;
  1770. bool need_unescape = false, ucl_escape = false, var_expand = false;
  1771. p = chunk->pos;
  1772. switch (*p) {
  1773. case '"':
  1774. /* We have macro value encoded in quotes */
  1775. c = p;
  1776. ucl_chunk_skipc (chunk, p);
  1777. if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
  1778. return false;
  1779. }
  1780. *macro_start = c + 1;
  1781. *macro_len = chunk->pos - c - 2;
  1782. p = chunk->pos;
  1783. break;
  1784. case '{':
  1785. /* We got a multiline macro body */
  1786. ucl_chunk_skipc (chunk, p);
  1787. /* Skip spaces at the beginning */
  1788. while (p < chunk->end) {
  1789. if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
  1790. ucl_chunk_skipc (chunk, p);
  1791. }
  1792. else {
  1793. break;
  1794. }
  1795. }
  1796. c = p;
  1797. while (p < chunk->end) {
  1798. if (*p == '}') {
  1799. break;
  1800. }
  1801. ucl_chunk_skipc (chunk, p);
  1802. }
  1803. *macro_start = c;
  1804. *macro_len = p - c;
  1805. ucl_chunk_skipc (chunk, p);
  1806. break;
  1807. default:
  1808. /* Macro is not enclosed in quotes or braces */
  1809. c = p;
  1810. while (p < chunk->end) {
  1811. if (ucl_lex_is_atom_end (*p)) {
  1812. break;
  1813. }
  1814. ucl_chunk_skipc (chunk, p);
  1815. }
  1816. *macro_start = c;
  1817. *macro_len = p - c;
  1818. break;
  1819. }
  1820. /* We are at the end of a macro */
  1821. /* Skip ';' and space characters and return to previous state */
  1822. while (p < chunk->end) {
  1823. if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
  1824. break;
  1825. }
  1826. ucl_chunk_skipc (chunk, p);
  1827. }
  1828. return true;
  1829. }
  1830. /**
  1831. * Parse macro arguments as UCL object
  1832. * @param parser parser structure
  1833. * @param chunk the current data chunk
  1834. * @return
  1835. */
  1836. static ucl_object_t *
  1837. ucl_parse_macro_arguments (struct ucl_parser *parser,
  1838. struct ucl_chunk *chunk)
  1839. {
  1840. ucl_object_t *res = NULL;
  1841. struct ucl_parser *params_parser;
  1842. int obraces = 1, ebraces = 0, state = 0;
  1843. const unsigned char *p, *c;
  1844. size_t args_len = 0;
  1845. struct ucl_parser_saved_state saved;
  1846. saved.column = chunk->column;
  1847. saved.line = chunk->line;
  1848. saved.pos = chunk->pos;
  1849. saved.remain = chunk->remain;
  1850. p = chunk->pos;
  1851. if (*p != '(' || chunk->remain < 2) {
  1852. return NULL;
  1853. }
  1854. /* Set begin and start */
  1855. ucl_chunk_skipc (chunk, p);
  1856. c = p;
  1857. while ((p) < (chunk)->end) {
  1858. switch (state) {
  1859. case 0:
  1860. /* Parse symbols and check for '(', ')' and '"' */
  1861. if (*p == '(') {
  1862. obraces ++;
  1863. }
  1864. else if (*p == ')') {
  1865. ebraces ++;
  1866. }
  1867. else if (*p == '"') {
  1868. state = 1;
  1869. }
  1870. /* Check pairing */
  1871. if (obraces == ebraces) {
  1872. state = 99;
  1873. }
  1874. else {
  1875. args_len ++;
  1876. }
  1877. /* Check overflow */
  1878. if (chunk->remain == 0) {
  1879. goto restore_chunk;
  1880. }
  1881. ucl_chunk_skipc (chunk, p);
  1882. break;
  1883. case 1:
  1884. /* We have quote character, so skip all but quotes */
  1885. if (*p == '"' && *(p - 1) != '\\') {
  1886. state = 0;
  1887. }
  1888. if (chunk->remain == 0) {
  1889. goto restore_chunk;
  1890. }
  1891. args_len ++;
  1892. ucl_chunk_skipc (chunk, p);
  1893. break;
  1894. case 99:
  1895. /*
  1896. * We have read the full body of arguments, so we need to parse and set
  1897. * object from that
  1898. */
  1899. params_parser = ucl_parser_new (parser->flags);
  1900. if (!ucl_parser_add_chunk (params_parser, c, args_len)) {
  1901. ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error",
  1902. &parser->err);
  1903. }
  1904. else {
  1905. res = ucl_parser_get_object (params_parser);
  1906. }
  1907. ucl_parser_free (params_parser);
  1908. return res;
  1909. break;
  1910. }
  1911. }
  1912. return res;
  1913. restore_chunk:
  1914. chunk->column = saved.column;
  1915. chunk->line = saved.line;
  1916. chunk->pos = saved.pos;
  1917. chunk->remain = saved.remain;
  1918. return NULL;
  1919. }
  1920. #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \
  1921. while ((p) < (chunk)->end) { \
  1922. if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \
  1923. if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \
  1924. if (!ucl_skip_comments (parser)) { \
  1925. return false; \
  1926. } \
  1927. p = (chunk)->pos; \
  1928. } \
  1929. break; \
  1930. } \
  1931. ucl_chunk_skipc (chunk, p); \
  1932. } \
  1933. } while(0)
  1934. /**
  1935. * Handle the main states of rcl parser
  1936. * @param parser parser structure
  1937. * @return true if chunk has been parsed and false in case of error
  1938. */
  1939. static bool
  1940. ucl_state_machine (struct ucl_parser *parser)
  1941. {
  1942. ucl_object_t *obj, *macro_args;
  1943. struct ucl_chunk *chunk = parser->chunks;
  1944. const unsigned char *p, *c = NULL, *macro_start = NULL;
  1945. unsigned char *macro_escaped;
  1946. size_t macro_len = 0;
  1947. struct ucl_macro *macro = NULL;
  1948. bool next_key = false, end_of_object = false, ret;
  1949. if (parser->top_obj == NULL) {
  1950. parser->state = UCL_STATE_INIT;
  1951. }
  1952. p = chunk->pos;
  1953. while (chunk->pos < chunk->end) {
  1954. switch (parser->state) {
  1955. case UCL_STATE_INIT:
  1956. /*
  1957. * At the init state we can either go to the parse array or object
  1958. * if we got [ or { correspondingly or can just treat new data as
  1959. * a key of newly created object
  1960. */
  1961. if (!ucl_skip_comments (parser)) {
  1962. parser->prev_state = parser->state;
  1963. parser->state = UCL_STATE_ERROR;
  1964. return false;
  1965. }
  1966. else {
  1967. /* Skip any spaces */
  1968. while (p < chunk->end && ucl_test_character (*p,
  1969. UCL_CHARACTER_WHITESPACE_UNSAFE)) {
  1970. ucl_chunk_skipc (chunk, p);
  1971. }
  1972. p = chunk->pos;
  1973. if (*p == '[') {
  1974. parser->state = UCL_STATE_VALUE;
  1975. ucl_chunk_skipc (chunk, p);
  1976. }
  1977. else {
  1978. parser->state = UCL_STATE_KEY;
  1979. if (*p == '{') {
  1980. ucl_chunk_skipc (chunk, p);
  1981. }
  1982. }
  1983. if (parser->top_obj == NULL) {
  1984. if (parser->state == UCL_STATE_VALUE) {
  1985. obj = ucl_parser_add_container (NULL, parser, true, 0);
  1986. }
  1987. else {
  1988. obj = ucl_parser_add_container (NULL, parser, false, 0);
  1989. }
  1990. if (obj == NULL) {
  1991. return false;
  1992. }
  1993. parser->top_obj = obj;
  1994. parser->cur_obj = obj;
  1995. }
  1996. }
  1997. break;
  1998. case UCL_STATE_KEY:
  1999. /* Skip any spaces */
  2000. while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
  2001. ucl_chunk_skipc (chunk, p);
  2002. }
  2003. if (*p == '}') {
  2004. /* We have the end of an object */
  2005. parser->state = UCL_STATE_AFTER_VALUE;
  2006. continue;
  2007. }
  2008. if (parser->stack == NULL) {
  2009. /* No objects are on stack, but we want to parse a key */
  2010. ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser "
  2011. "expects a key", &parser->err);
  2012. parser->prev_state = parser->state;
  2013. parser->state = UCL_STATE_ERROR;
  2014. return false;
  2015. }
  2016. if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
  2017. parser->prev_state = parser->state;
  2018. parser->state = UCL_STATE_ERROR;
  2019. return false;
  2020. }
  2021. if (end_of_object) {
  2022. p = chunk->pos;
  2023. parser->state = UCL_STATE_AFTER_VALUE;
  2024. continue;
  2025. }
  2026. else if (parser->state != UCL_STATE_MACRO_NAME) {
  2027. if (next_key && parser->stack->obj->type == UCL_OBJECT) {
  2028. /* Parse more keys and nest objects accordingly */
  2029. obj = ucl_parser_add_container (parser->cur_obj, parser, false,
  2030. parser->stack->level + 1);
  2031. if (obj == NULL) {
  2032. return false;
  2033. }
  2034. }
  2035. else {
  2036. parser->state = UCL_STATE_VALUE;
  2037. }
  2038. }
  2039. else {
  2040. c = chunk->pos;
  2041. }
  2042. p = chunk->pos;
  2043. break;
  2044. case UCL_STATE_VALUE:
  2045. /* We need to check what we do have */
  2046. if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) {
  2047. parser->prev_state = parser->state;
  2048. parser->state = UCL_STATE_ERROR;
  2049. return false;
  2050. }
  2051. /* State is set in ucl_parse_value call */
  2052. p = chunk->pos;
  2053. break;
  2054. case UCL_STATE_AFTER_VALUE:
  2055. if (!ucl_parse_after_value (parser, chunk)) {
  2056. parser->prev_state = parser->state;
  2057. parser->state = UCL_STATE_ERROR;
  2058. return false;
  2059. }
  2060. if (parser->stack != NULL) {
  2061. if (parser->stack->obj->type == UCL_OBJECT) {
  2062. parser->state = UCL_STATE_KEY;
  2063. }
  2064. else {
  2065. /* Array */
  2066. parser->state = UCL_STATE_VALUE;
  2067. }
  2068. }
  2069. else {
  2070. /* Skip everything at the end */
  2071. return true;
  2072. }
  2073. p = chunk->pos;
  2074. break;
  2075. case UCL_STATE_MACRO_NAME:
  2076. if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
  2077. if (!ucl_skip_macro_as_comment (parser, chunk)) {
  2078. /* We have invalid macro */
  2079. ucl_create_err (&parser->err,
  2080. "error on line %d at column %d: invalid macro",
  2081. chunk->line,
  2082. chunk->column);
  2083. parser->state = UCL_STATE_ERROR;
  2084. return false;
  2085. }
  2086. else {
  2087. p = chunk->pos;
  2088. parser->state = parser->prev_state;
  2089. }
  2090. }
  2091. else {
  2092. if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
  2093. *p != '(') {
  2094. ucl_chunk_skipc (chunk, p);
  2095. }
  2096. else {
  2097. if (p - c > 0) {
  2098. /* We got macro name */
  2099. macro_len = (size_t) (p - c);
  2100. HASH_FIND (hh, parser->macroes, c, macro_len, macro);
  2101. if (macro == NULL) {
  2102. ucl_create_err (&parser->err,
  2103. "error on line %d at column %d: "
  2104. "unknown macro: '%.*s', character: '%c'",
  2105. chunk->line,
  2106. chunk->column,
  2107. (int) (p - c),
  2108. c,
  2109. *chunk->pos);
  2110. parser->state = UCL_STATE_ERROR;
  2111. return false;
  2112. }
  2113. /* Now we need to skip all spaces */
  2114. SKIP_SPACES_COMMENTS(parser, chunk, p);
  2115. parser->state = UCL_STATE_MACRO;
  2116. }
  2117. else {
  2118. /* We have invalid macro name */
  2119. ucl_create_err (&parser->err,
  2120. "error on line %d at column %d: invalid macro name",
  2121. chunk->line,
  2122. chunk->column);
  2123. parser->state = UCL_STATE_ERROR;
  2124. return false;
  2125. }
  2126. }
  2127. }
  2128. break;
  2129. case UCL_STATE_MACRO:
  2130. if (*chunk->pos == '(') {
  2131. macro_args = ucl_parse_macro_arguments (parser, chunk);
  2132. p = chunk->pos;
  2133. if (macro_args) {
  2134. SKIP_SPACES_COMMENTS(parser, chunk, p);
  2135. }
  2136. }
  2137. else {
  2138. macro_args = NULL;
  2139. }
  2140. if (!ucl_parse_macro_value (parser, chunk, macro,
  2141. &macro_start, &macro_len)) {
  2142. parser->prev_state = parser->state;
  2143. parser->state = UCL_STATE_ERROR;
  2144. return false;
  2145. }
  2146. macro_len = ucl_expand_variable (parser, &macro_escaped,
  2147. macro_start, macro_len);
  2148. parser->state = parser->prev_state;
  2149. if (macro_escaped == NULL) {
  2150. if (macro->is_context) {
  2151. ret = macro->h.context_handler (macro_start, macro_len,
  2152. macro_args,
  2153. parser->top_obj,
  2154. macro->ud);
  2155. }
  2156. else {
  2157. ret = macro->h.handler (macro_start, macro_len, macro_args,
  2158. macro->ud);
  2159. }
  2160. }
  2161. else {
  2162. if (macro->is_context) {
  2163. ret = macro->h.context_handler (macro_escaped, macro_len,
  2164. macro_args,
  2165. parser->top_obj,
  2166. macro->ud);
  2167. }
  2168. else {
  2169. ret = macro->h.handler (macro_escaped, macro_len, macro_args,
  2170. macro->ud);
  2171. }
  2172. UCL_FREE (macro_len + 1, macro_escaped);
  2173. }
  2174. /*
  2175. * Chunk can be modified within macro handler
  2176. */
  2177. chunk = parser->chunks;
  2178. p = chunk->pos;
  2179. if (macro_args) {
  2180. ucl_object_unref (macro_args);
  2181. }
  2182. if (!ret) {
  2183. return false;
  2184. }
  2185. break;
  2186. default:
  2187. ucl_set_err (parser, UCL_EINTERNAL,
  2188. "internal error: parser is in an unknown state", &parser->err);
  2189. parser->state = UCL_STATE_ERROR;
  2190. return false;
  2191. }
  2192. }
  2193. if (parser->last_comment) {
  2194. if (parser->cur_obj) {
  2195. ucl_attach_comment (parser, parser->cur_obj, true);
  2196. }
  2197. else if (parser->stack && parser->stack->obj) {
  2198. ucl_attach_comment (parser, parser->stack->obj, true);
  2199. }
  2200. else if (parser->top_obj) {
  2201. ucl_attach_comment (parser, parser->top_obj, true);
  2202. }
  2203. else {
  2204. ucl_object_unref (parser->last_comment);
  2205. }
  2206. }
  2207. return true;
  2208. }
  2209. struct ucl_parser*
  2210. ucl_parser_new (int flags)
  2211. {
  2212. struct ucl_parser *parser;
  2213. parser = UCL_ALLOC (sizeof (struct ucl_parser));
  2214. if (parser == NULL) {
  2215. return NULL;
  2216. }
  2217. memset (parser, 0, sizeof (struct ucl_parser));
  2218. ucl_parser_register_macro (parser, "include", ucl_include_handler, parser);
  2219. ucl_parser_register_macro (parser, "try_include", ucl_try_include_handler, parser);
  2220. ucl_parser_register_macro (parser, "includes", ucl_includes_handler, parser);
  2221. ucl_parser_register_macro (parser, "priority", ucl_priority_handler, parser);
  2222. ucl_parser_register_macro (parser, "load", ucl_load_handler, parser);
  2223. ucl_parser_register_context_macro (parser, "inherit", ucl_inherit_handler, parser);
  2224. parser->flags = flags;
  2225. parser->includepaths = NULL;
  2226. if (flags & UCL_PARSER_SAVE_COMMENTS) {
  2227. parser->comments = ucl_object_typed_new (UCL_OBJECT);
  2228. }
  2229. /* Initial assumption about filevars */
  2230. ucl_parser_set_filevars (parser, NULL, false);
  2231. return parser;
  2232. }
  2233. bool
  2234. ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio)
  2235. {
  2236. if (parser == NULL) {
  2237. return false;
  2238. }
  2239. parser->default_priority = prio;
  2240. return true;
  2241. }
  2242. void
  2243. ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
  2244. ucl_macro_handler handler, void* ud)
  2245. {
  2246. struct ucl_macro *new;
  2247. if (macro == NULL || handler == NULL) {
  2248. return;
  2249. }
  2250. new = UCL_ALLOC (sizeof (struct ucl_macro));
  2251. if (new == NULL) {
  2252. return;
  2253. }
  2254. memset (new, 0, sizeof (struct ucl_macro));
  2255. new->h.handler = handler;
  2256. new->name = strdup (macro);
  2257. new->ud = ud;
  2258. HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
  2259. }
  2260. void
  2261. ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro,
  2262. ucl_context_macro_handler handler, void* ud)
  2263. {
  2264. struct ucl_macro *new;
  2265. if (macro == NULL || handler == NULL) {
  2266. return;
  2267. }
  2268. new = UCL_ALLOC (sizeof (struct ucl_macro));
  2269. if (new == NULL) {
  2270. return;
  2271. }
  2272. memset (new, 0, sizeof (struct ucl_macro));
  2273. new->h.context_handler = handler;
  2274. new->name = strdup (macro);
  2275. new->ud = ud;
  2276. new->is_context = true;
  2277. HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
  2278. }
  2279. void
  2280. ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
  2281. const char *value)
  2282. {
  2283. struct ucl_variable *new = NULL, *cur;
  2284. if (var == NULL) {
  2285. return;
  2286. }
  2287. /* Find whether a variable already exists */
  2288. LL_FOREACH (parser->variables, cur) {
  2289. if (strcmp (cur->var, var) == 0) {
  2290. new = cur;
  2291. break;
  2292. }
  2293. }
  2294. if (value == NULL) {
  2295. if (new != NULL) {
  2296. /* Remove variable */
  2297. DL_DELETE (parser->variables, new);
  2298. free (new->var);
  2299. free (new->value);
  2300. UCL_FREE (sizeof (struct ucl_variable), new);
  2301. }
  2302. else {
  2303. /* Do nothing */
  2304. return;
  2305. }
  2306. }
  2307. else {
  2308. if (new == NULL) {
  2309. new = UCL_ALLOC (sizeof (struct ucl_variable));
  2310. if (new == NULL) {
  2311. return;
  2312. }
  2313. memset (new, 0, sizeof (struct ucl_variable));
  2314. new->var = strdup (var);
  2315. new->var_len = strlen (var);
  2316. new->value = strdup (value);
  2317. new->value_len = strlen (value);
  2318. DL_APPEND (parser->variables, new);
  2319. }
  2320. else {
  2321. free (new->value);
  2322. new->value = strdup (value);
  2323. new->value_len = strlen (value);
  2324. }
  2325. }
  2326. }
  2327. void
  2328. ucl_parser_set_variables_handler (struct ucl_parser *parser,
  2329. ucl_variable_handler handler, void *ud)
  2330. {
  2331. parser->var_handler = handler;
  2332. parser->var_data = ud;
  2333. }
  2334. bool
  2335. ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
  2336. size_t len, unsigned priority, enum ucl_duplicate_strategy strat,
  2337. enum ucl_parse_type parse_type)
  2338. {
  2339. struct ucl_chunk *chunk;
  2340. if (parser == NULL) {
  2341. return false;
  2342. }
  2343. if (data == NULL) {
  2344. ucl_create_err (&parser->err, "invalid chunk added");
  2345. return false;
  2346. }
  2347. if (len == 0) {
  2348. parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
  2349. return true;
  2350. }
  2351. if (parser->state != UCL_STATE_ERROR) {
  2352. chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
  2353. if (chunk == NULL) {
  2354. ucl_create_err (&parser->err, "cannot allocate chunk structure");
  2355. return false;
  2356. }
  2357. chunk->begin = data;
  2358. chunk->remain = len;
  2359. chunk->pos = chunk->begin;
  2360. chunk->end = chunk->begin + len;
  2361. chunk->line = 1;
  2362. chunk->column = 0;
  2363. chunk->priority = priority;
  2364. chunk->strategy = strat;
  2365. chunk->parse_type = parse_type;
  2366. LL_PREPEND (parser->chunks, chunk);
  2367. parser->recursion ++;
  2368. if (parser->recursion > UCL_MAX_RECURSION) {
  2369. ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
  2370. parser->recursion);
  2371. return false;
  2372. }
  2373. switch (parse_type) {
  2374. default:
  2375. case UCL_PARSE_UCL:
  2376. return ucl_state_machine (parser);
  2377. case UCL_PARSE_MSGPACK:
  2378. return ucl_parse_msgpack (parser);
  2379. }
  2380. }
  2381. ucl_create_err (&parser->err, "a parser is in an invalid state");
  2382. return false;
  2383. }
  2384. bool
  2385. ucl_parser_add_chunk_priority (struct ucl_parser *parser,
  2386. const unsigned char *data, size_t len, unsigned priority)
  2387. {
  2388. /* We dereference parser, so this check is essential */
  2389. if (parser == NULL) {
  2390. return false;
  2391. }
  2392. return ucl_parser_add_chunk_full (parser, data, len,
  2393. priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
  2394. }
  2395. bool
  2396. ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
  2397. size_t len)
  2398. {
  2399. if (parser == NULL) {
  2400. return false;
  2401. }
  2402. return ucl_parser_add_chunk_full (parser, data, len,
  2403. parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
  2404. }
  2405. bool
  2406. ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data,
  2407. size_t len, unsigned priority)
  2408. {
  2409. if (data == NULL) {
  2410. ucl_create_err (&parser->err, "invalid string added");
  2411. return false;
  2412. }
  2413. if (len == 0) {
  2414. len = strlen (data);
  2415. }
  2416. return ucl_parser_add_chunk_priority (parser,
  2417. (const unsigned char *)data, len, priority);
  2418. }
  2419. bool
  2420. ucl_parser_add_string (struct ucl_parser *parser, const char *data,
  2421. size_t len)
  2422. {
  2423. if (parser == NULL) {
  2424. return false;
  2425. }
  2426. return ucl_parser_add_string_priority (parser,
  2427. (const unsigned char *)data, len, parser->default_priority);
  2428. }
  2429. bool
  2430. ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths)
  2431. {
  2432. if (parser == NULL || paths == NULL) {
  2433. return false;
  2434. }
  2435. if (parser->includepaths == NULL) {
  2436. parser->includepaths = ucl_object_copy (paths);
  2437. }
  2438. else {
  2439. ucl_object_unref (parser->includepaths);
  2440. parser->includepaths = ucl_object_copy (paths);
  2441. }
  2442. if (parser->includepaths == NULL) {
  2443. return false;
  2444. }
  2445. return true;
  2446. }